The following are assumptions and definitions that limit the scope of the task: <br><br>
<b>Word</b>: To simplify, a word is represented by a sequence of one or more characters between „a‟ and „z‟ or between „A‟ and „Z‟). For example “agdfBh”. <br>
<b>Letter Case</b>: When counting frequencies, we are interested in the case insensitive frequency (i.e. in the text “The sun shines over the lake”, the library should count 2 occurrences for any of the words “the” or “The” or “tHE” etc). <br>
<b>Input Text</b>: The input text contains words separated by various separator characters. Note that the characters from „a‟ and „z‟ and from „A‟ and „Z‟ can only appear within words. <br>
<b>Available Memory</b>: There is enough memory to store the whole input text.

In [1]:
import os.path

class FrequencyAnalyzer(object):
    """Program implements a part of text processing librabry.
    
    Attributes:
        tokens: Tokenized input text.
        word: Single word from a text.
        n: number of words to be returned.
    """
    hash_map = {}
    
    def __init__(self, tokens):
        self.tokens = tokens

        if tokens is not None:
            self.hash_map = {}
            for element in tokens:
                # Removing comma and full stop
                word = element.replace(",","")
                word = word.replace(".","")
                
                if word in self.hash_map:
                    self.hash_map[word] = self.hash_map[word] + 1
                else:
                    self.hash_map[word] = 1

    def calculate_highest_frequency (self, tokens):
        """Return the highest frequency in the text."""
        return [word for word, count in self.hash_map.items() if count == max(self.hash_map.values())]

    def calculate_frequency_for_word (self, tokens, word):
        """Return the frequency of the specified word."""
        if word in tokens:
            return self.hash_map[word]
        else:
            return None
    
    def calculate_most_frequent_n_words (self, tokens, n):
        sorted_list = dict(sorted(self.hash_map.items(), key=lambda word: word[0]))
        return dict(sorted(sorted_list.items(), key=lambda word: word[1], reverse=True)[:n])
       

In [2]:
def main():
    # Give choice to user
    choice = int(input("1. Enter one for entering a text string.\n 2. Enter one for entering a text file. "))
    
    if choice == 1:
        textFile = input("Enter text. ")
    elif choice == 2:
        # Reading the input text file
        while True:
            try:
                # Input from user
                file_path = input("Enter filepath of text file: ")
                with open(file_path) as infile:
                    file = open(file_path, 'r')
                    textFile = file.read()
                    break
            except IOError:
                print("Path of the file is Invalid! Note that this is a relative path.")
    else:
        print("Not a valid option.")
            
    
    def tokenize():
        if textFile is not None:
            words = textFile.lower().split()
            return words
        else:
            return None
        
    # Tokenize the text file    
    words = tokenize()
    
    freq_analyser = FrequencyAnalyzer(words)
    
    # Ouputing the highest frequency word
    max_occurance = freq_analyser.calculate_highest_frequency(words)
    print("Highest frequency Word(s): "+ str(max_occurance))
    
    # Ouputing frequency for a given word
    word = input("Enter a of word from text: ")
    freq_count = freq_analyser.calculate_frequency_for_word(words, word)
    print("Frequency of "+ word + " is " + str(freq_count))
    
    # Ouputing top n frequency words
    while True:
        try:
            n = int(input("Enter value of n: "))
            break
        except ValueError:
            print("That's not a valid option!")
        
    top_words = freq_analyser.calculate_most_frequent_n_words(words, n)
    print("Top "+ str(n) + " words are: ")
    print(top_words)

In [3]:
if __name__ == "__main__":
    main()

1. Enter one for entering a text string.
 2. Enter one for entering a text file. 2
Enter filepath of text file: s.txt
Highest frequency Word(s): ['and']
Enter a of word from text: fast
Frequency of fast is 4
Enter value of n: 6
Top 6 words are: 
{'and': 490, 'the': 431, 'to': 408, 'my': 390, 'of': 369, 'i': 339}


<b>Testcases</b> using unittest
Prerequisite: install pytest

In [4]:
import unittest
import import_ipynb
from frequencyanalyzer import FrequencyAnalyzer

class TestIWordFrequencyAnalyzer(unittest.TestCase):
    
    def setUp(self):
        words_test = ['the', 'sun', 'shines', 'over', 'the', 'lake', 'the', 'over', 'over']
        self.func = FrequencyAnalyzer(words_test)
        
    def test_calculate_highest_frequency(self):
        words_test = {'the', 'sun', 'shines', 'over', 'the', 'lake', 'the', 'over', 'over'}
        self.assertEqual(self.func.calculate_highest_frequency(words_test), ['the', 'over'])
 
    def test_calculate_frequency_for_word(self):
        words_test = ['the', 'sun', 'shines', 'over', 'the', 'lake', 'the', 'over', 'over']
        self.assertEqual(self.func.calculate_frequency_for_word(words_test, "over"), 3)
    
    def test_calculate_most_frequent_n_words(self):
        words_test = ['the', 'sun', 'shines', 'over', 'the', 'lake', 'the', 'over', 'over']
        self.assertDictEqual(self.func.calculate_most_frequent_n_words(words_test, 4), {'over': 3, 'the': 3, 'lake': 1, 'shines': 1})
        
        
unittest.main(argv=[''], verbosity=2, exit=False)


----------------------------------------------------------------------
Ran 0 tests in 0.000s

OK
test_calculate_frequency_for_word (__main__.TestIWordFrequencyAnalyzer) ... ok
test_calculate_highest_frequency (__main__.TestIWordFrequencyAnalyzer) ... ok
test_calculate_most_frequent_n_words (__main__.TestIWordFrequencyAnalyzer) ... 

importing Jupyter notebook from frequencyanalyzer.ipynb
{'the': 3, 'sun': 1, 'shines': 1, 'over': 3, 'lake': 1}
{'the': 3, 'sun': 1, 'shines': 1, 'over': 3, 'lake': 1}
{'the': 3, 'sun': 1, 'shines': 1, 'over': 3, 'lake': 1}


ok

----------------------------------------------------------------------
Ran 3 tests in 0.004s

OK


<unittest.main.TestProgram at 0x27e5af88a20>