Solving Anagrams.

Created by: Tony Held
Created on: 2021-03-15

References & Acknowledgements:
1) Inspired by `Impractical Python Projects` Chapter 3

In [37]:
# **********************************************
#     Jupyter Interactive Mode Settings
# **********************************************
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last_expr_or_assign"

# **********************************************
#     Allows autocomplete to work properly
# **********************************************
%config Completer.use_jedi = False

In [38]:
"""
Solving Anagrams.

Created by: Tony Held
Created on: 2021-03-15

References & Acknowledgements:
1) Inspired by `Impractical Python Projects` Chapter 3

Notes
-----
1) multi word finder
2) select each letter,
    see if it is in the dict, if so process remainder
3) select two letters
"""
import collections

class word_list:
    """Stores a list of words that are indexed and easy to search for anagrams"""

    def __init__(self, file_name):
        """file_name : location of file with a word list"""
        self.file_name = file_name
        self.words = load_dictionary(self.file_name)
        self.words_sorted = listify_words(self.words)
        self.words_indexed, self.frequencies = index_list(self.words_sorted)

    def print_most_frequent(self, n):
        print(f'\nThe {n} most frequently occurring anagrams in the word list are:')

        for words in self.frequencies[:n]:
            print(f'{len(words)} anagrams were found for these words:')
            for i in words:
                print(f'\t{self.words[i]}')

    def find_anagrams(self, word):
        """find the anagrams for a given word"""
        print(f'\nAnagrams for the word: {word}')
        word_sorted_tuple = tuple(sorted(word.strip().lower()))
        matches = self.words_indexed.get(word_sorted_tuple, [])
        if matches:
            for i, index in enumerate(matches):
                print(f'\t{i+1}) {self.words[index]}')
        else:
            print('\tWord not found in dictionary.')


def load_dictionary(file_name):
    """load contents of text file into attribute self.words"""
    with open(file_name) as fn:
        words_raw = fn.readlines()
    words = [i.strip().lower() for i in words_raw]

    print(f'Dictionary with {len(words)} entries loaded.')
    print(f'The first and last 5 entries are:')
    print(f'{words[:5]}\n{words[-5:]}')

    return words

def listify_words(words):
    """Take a list of words and convert to a list of tuple of letters.
     The letters in the tuple are sorted alphabetically."""

    words_sorted = [tuple(sorted(i)) for i in words]

    print(f'\nThe first and last 5 entries are:')
    print(f'{words_sorted[:5]}\n{words_sorted[-5:]}')

    return words_sorted

def index_list(words):
    """Create a dictionary of occurrences of a list, and the frequency of each word, to speed up future searches.
    Returns
    -------
    dict_words - dict
        The key is the tuple of letters in a word,
        the value is the location(s) of that tuple in the original word list.
    frequencies - list
        list of frequencies of each word occurring in the word list

    """
    dict_words = collections.defaultdict(list)

    for i, word in enumerate(words):
        dict_words[word].append(i)

    print(f'\nThe first 5 entries are:')
    for i, (k, v) in enumerate(dict_words.items()):
        if i > 5: break
        print(f'{k}: {v}')

    """Find the most n most frequently occurring words"""
    frequencies = sorted(dict_words.values(), key=lambda x: len(x), reverse=True)

    print(f'The 5 most frequent entries are:')
    print(f'{frequencies[:5]}')

    return dict_words, frequencies


def main():
    file_name = 'dictionaries/2of4brif.txt'
    words = word_list(file_name)
    words.print_most_frequent(5)
    words.find_anagrams('bear')
    words.find_anagrams('polyglot')
    words.find_anagrams('qwertyuiop')

if __name__ == '__main__':
    main()


Dictionary with 60388 entries loaded.
The first and last 5 entries are:
['aah', 'aardvark', 'aardvarks', 'abacus', 'abacuses']
['zucchini', 'zucchinis', 'zydeco', 'zygote', 'zygotes']

The first and last 5 entries are:
[('a', 'a', 'h'), ('a', 'a', 'a', 'd', 'k', 'r', 'r', 'v'), ('a', 'a', 'a', 'd', 'k', 'r', 'r', 's', 'v'), ('a', 'a', 'b', 'c', 's', 'u'), ('a', 'a', 'b', 'c', 'e', 's', 's', 'u')]
[('c', 'c', 'h', 'i', 'i', 'n', 'u', 'z'), ('c', 'c', 'h', 'i', 'i', 'n', 's', 'u', 'z'), ('c', 'd', 'e', 'o', 'y', 'z'), ('e', 'g', 'o', 't', 'y', 'z'), ('e', 'g', 'o', 's', 't', 'y', 'z')]

The first 5 entries are:
('a', 'a', 'h'): [0, 1081]
('a', 'a', 'a', 'd', 'k', 'r', 'r', 'v'): [1]
('a', 'a', 'a', 'd', 'k', 'r', 'r', 's', 'v'): [2]
('a', 'a', 'b', 'c', 's', 'u'): [3]
('a', 'a', 'b', 'c', 'e', 's', 's', 'u'): [4]
('a', 'a', 'b', 'e', 'l', 'n', 'o'): [5]
The 5 most frequent entries are:
[[37517, 37583, 37953, 42605, 42901, 50023, 50092], [2415, 2544, 16755, 18191, 46890, 47335], [7579, 76

In [48]:
def split_list(my_list, indices):
    """Return the values of my_list at the given indicies and the remaining members of the list."""
    inverse_indices = [i for i in range(len(my_list)) if i not in indices]
    list_selected = [my_list[i] for i in indices]
    list_remaining = [my_list[i] for i in inverse_indices]
    return somelist

In [43]:
phrase = ' cowboY farts1';
# extract the sorted letters of the phrase
phrase2 = sorted([i for i in phrase.lower() if i.isalpha()])
print(phrase, phrase2)

 cowboY farts1 ['a', 'b', 'c', 'f', 'o', 'o', 'r', 's', 't', 'w', 'y']


In [46]:
working_word = phrase2

print(f'extracted letter {working_word.pop([0, 1])}.  remaining phrase {working_word} ')

TypeError: 'list' object cannot be interpreted as an integer

In [None]:
dict_search = {}
working_word = phrase2

# make a search pattern for a single letter
for i in len(phrase2):
    if i not in dict_search:
        key = working_word.pop(1)
        dict_search
        