# 03_03: Finding Anagrams

In [10]:
import math
import collections

import numpy as np
import pandas as pd
import matplotlib.pyplot as pp

%matplotlib inline

In [11]:
words = sorted({line.strip().lower() for line in open('words.txt', 'r')})

In [17]:
# compute the signature string for a word

def signature(word):
    return ''.join(sorted(word))

In [18]:
# compute the reverse for the palindromics

def get_reverse(word):
    rev = word[::-1]
    return rev

In [19]:
get_reverse('anything')

'gnihtyna'

In [20]:
# brute-force anagram search: compare myword's signature
# with the signatures of all words in the dictionary

def find_anagram(myword):
    mysig = signature(myword)
    
    for word in words:
        if mysig == signature(word):
            print(word)

In [21]:
find_anagram('dictionary')

dictionary
indicatory


In [22]:
%time find_anagram('dictionary')

dictionary
indicatory
CPU times: user 212 ms, sys: 302 µs, total: 213 ms
Wall time: 215 ms


In [30]:
%time get_reverse('dictionary')

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 6.68 µs


'yranoitcid'

In [23]:
# make a dict that maps each signature to the set of words with that signature;
# each signature will map to at least one word

words_by_sig = collections.defaultdict(set)

for word in words:
    words_by_sig[signature(word)].add(word)

In [24]:
# make a dict that maps each signature to the set of words with that signature, 
# so that each set will be checked for palindromics;
# each signature will map to at least one word

words_palidromic = collections.defaultdict(set)

for word in words:
    words_palidromic[signature(word)].add(word)

In [33]:
# keep only the key/value pairs where the set has more than one element;
# this is now a regular dict

anagrams_by_sig = {sig: wordset for sig, wordset in words_by_sig.items() if len(wordset) > 1}

In [53]:
anagrams_by_sig

{'aal': {'aal', 'ala'},
 'aam': {'aam', 'ama'},
 'aacinor': {'aaronic', 'nicarao', 'ocarina'},
 'aaeinort': {'aaronite', 'aeration'},
 'aaru': {'aaru', 'aura'},
 'ab': {'ab', 'ba'},
 'aab': {'aba', 'baa'},
 'aabc': {'abac', 'caba'},
 'aabcort': {'abactor', 'acrobat'},
 'aabft': {'abaft', 'bafta'},
 'aabelno': {'abalone', 'balonea'},
 'aabdennor': {'abandoner', 'reabandon'},
 'aabcin': {'abanic', 'bianca'},
 'aabirs': {'abaris', 'arabis'},
 'aabs': {'abas', 'saba'},
 'aabers': {'abaser', 'abrase'},
 'aabet': {'abate', 'ateba', 'batea', 'beata'},
 'aabert': {'abater', 'artabe', 'eartab', 'trabea'},
 'abb': {'abb', 'bab'},
 'aabb': {'abba', 'baba'},
 'abbey': {'abbey', 'bebay'},
 'abby': {'abby', 'baby'},
 'aabdt': {'abdat', 'batad'},
 'abdeil': {'abdiel', 'baldie'},
 'aaabdgiilmnnoov': {'abdominovaginal', 'vaginoabdominal'},
 'aabcdeiilmnoosv': {'abdominovesical', 'vesicoabdominal'},
 'abe': {'abe', 'bae', 'bea'},
 'abde': {'abed', 'bade', 'bead'},
 'abel': {'abel', 'able', 'albe', 'bale

In [70]:
def palindromic_pair(w1, w2):
    if get_reverse(w1) == w2:
        return True
    return False 

In [77]:
# HSTH attempt 1

def palidromic_wordset(wordset):
    palidromic_words = []
    for word1 in wordset:
        if len(word1) == 1: 
            continue
        else:
            if word1 == get_reverse(word1): # true palindrome
                palidromic_words.append(word1)
            else:
                for word2 in wordset:
                    if palindromic_pair(word1, word2): # palindromic
                       palidromic_words.append((word1, word2))

    return palidromic_words

In [48]:
# keep only the key/value pairs where the set has more than one element;
# cannot be palindromic if only 1 word.
# this is now a regular dict

# SOLUTION NOT BY A COMPREHENSION...

# palindromic_by_sig = {pal: wordset for pal, wordset in words_palidromic.items() if palidromic_wordset(wordset)}

In [83]:
suggested_palidromics = []

for wordset in anagrams_by_sig.values():
    # palidromic_wordset(wordset)
    for word1 in wordset:
        for word2 in wordset:
            if word1 == word2[::-1]: # palindromic
                suggested_palidromics.append((word1, word2))


In [84]:
suggested_palidromics

[('ala', 'ala'),
 ('ama', 'ama'),
 ('ab', 'ba'),
 ('ba', 'ab'),
 ('aba', 'aba'),
 ('caba', 'abac'),
 ('abac', 'caba'),
 ('saba', 'abas'),
 ('abas', 'saba'),
 ('bab', 'bab'),
 ('abba', 'abba'),
 ('yalb', 'blay'),
 ('blay', 'yalb'),
 ('absi', 'isba'),
 ('isba', 'absi'),
 ('tuba', 'abut'),
 ('abut', 'tuba'),
 ('araca', 'acara'),
 ('acara', 'araca'),
 ('acca', 'acca'),
 ('dirca', 'acrid'),
 ('acrid', 'dirca'),
 ('da', 'ad'),
 ('ad', 'da'),
 ('adad', 'dada'),
 ('dada', 'adad'),
 ('adda', 'adda'),
 ('adar', 'rada'),
 ('rada', 'adar'),
 ('dad', 'dad'),
 ('daud', 'duad'),
 ('duad', 'daud'),
 ('teda', 'adet'),
 ('adet', 'teda'),
 ('dian', 'naid'),
 ('naid', 'dian'),
 ('adman', 'namda'),
 ('namda', 'adman'),
 ('ado', 'oda'),
 ('oda', 'ado'),
 ('yard', 'dray'),
 ('dray', 'yard'),
 ('yad', 'day'),
 ('day', 'yad'),
 ('ea', 'ae'),
 ('ae', 'ea'),
 ('era', 'are'),
 ('are', 'era'),
 ('rea', 'aer'),
 ('aer', 'rea'),
 ('aes', 'sea'),
 ('sea', 'aes'),
 ('alga', 'agla'),
 ('agla', 'alga'),
 ('agama', 'amag

# Copied from solution / next lesson from here:

In [93]:
import itertools

# list all combinations of two different elements from the set {1,2,3} 
list(itertools.combinations({1,2,3}, 2))

[(1, 2), (1, 3), (2, 3)]

In [96]:
elegant_palidromics = []

for wordset in anagrams_by_sig.values():
    for word1, word2 in itertools.combinations(wordset, 2):
            if word1 == word2[::-1]: # palindromic
                elegant_palidromics.append((word1, word2))

In [97]:
elegant_palidromics

[('ab', 'ba'),
 ('caba', 'abac'),
 ('saba', 'abas'),
 ('yalb', 'blay'),
 ('absi', 'isba'),
 ('tuba', 'abut'),
 ('araca', 'acara'),
 ('dirca', 'acrid'),
 ('da', 'ad'),
 ('adad', 'dada'),
 ('adar', 'rada'),
 ('daud', 'duad'),
 ('teda', 'adet'),
 ('dian', 'naid'),
 ('adman', 'namda'),
 ('ado', 'oda'),
 ('yard', 'dray'),
 ('yad', 'day'),
 ('ea', 'ae'),
 ('era', 'are'),
 ('rea', 'aer'),
 ('aes', 'sea'),
 ('alga', 'agla'),
 ('agama', 'amaga'),
 ('agar', 'raga'),
 ('agger', 'regga'),
 ('agib', 'biga'),
 ('morga', 'agrom'),
 ('agust', 'tsuga'),
 ('ah', 'ha'),
 ('moha', 'ahom'),
 ('tha', 'aht'),
 ('redia', 'aider'),
 ('elia', 'aile'),
 ('ami', 'ima'),
 ('ria', 'air'),
 ('aria', 'aira'),
 ('eria', 'aire'),
 ('ati', 'ita'),
 ('raja', 'ajar'),
 ('ak', 'ka'),
 ('oka', 'ako'),
 ('kua', 'auk'),
 ('al', 'la'),
 ('lana', 'anal'),
 ('bal', 'lab'),
 ('nabla', 'alban'),
 ('laban', 'nabal'),
 ('alem', 'mela'),
 ('lean', 'nael'),
 ('anil', 'lina'),
 ('yalla', 'allay'),
 ('alle', 'ella'),
 ('allium', 'muilla