# A simple program to find anagrams

In [4]:
import pandas as pd
import numpy as np
import collections

In [5]:
# Make an empty list and add all text from words text file to list
#
# Interestingly, the words_alpha.txt file is every single word in the English language
# that only includes alphabetic text (No symbols or numbers, etc.)

words = []
for line in open('words_alpha.txt', 'r'):
    words.append(line.strip().lower()) # remove whitespace and uppers

In [6]:
# Convert to set to remove dupes
words = set(words)

In [7]:
# Convert back to list and sort
words = sorted(words)

In [8]:
# Now it's a sorted list with no duplicates. Let's check.
words

['a',
 'aa',
 'aaa',
 'aah',
 'aahed',
 'aahing',
 'aahs',
 'aal',
 'aalii',
 'aaliis',
 'aals',
 'aam',
 'aani',
 'aardvark',
 'aardvarks',
 'aardwolf',
 'aardwolves',
 'aargh',
 'aaron',
 'aaronic',
 'aaronical',
 'aaronite',
 'aaronitic',
 'aarrgh',
 'aarrghh',
 'aaru',
 'aas',
 'aasvogel',
 'aasvogels',
 'ab',
 'aba',
 'ababdeh',
 'ababua',
 'abac',
 'abaca',
 'abacas',
 'abacate',
 'abacaxi',
 'abacay',
 'abaci',
 'abacinate',
 'abacination',
 'abacisci',
 'abaciscus',
 'abacist',
 'aback',
 'abacli',
 'abacot',
 'abacterial',
 'abactinal',
 'abactinally',
 'abaction',
 'abactor',
 'abaculi',
 'abaculus',
 'abacus',
 'abacuses',
 'abada',
 'abaddon',
 'abadejo',
 'abadengo',
 'abadia',
 'abadite',
 'abaff',
 'abaft',
 'abaisance',
 'abaised',
 'abaiser',
 'abaisse',
 'abaissed',
 'abaka',
 'abakas',
 'abalation',
 'abalienate',
 'abalienated',
 'abalienating',
 'abalienation',
 'abalone',
 'abalones',
 'abama',
 'abamp',
 'abampere',
 'abamperes',
 'abamps',
 'aband',
 'abandon',


In [105]:
# Define signature function (a list of all letters in a word)
def signature(word):
    signature = ''.join(sorted(word)) # Sort all letters of word and 'join' with blank character
    return signature

In [106]:
wordsBySig = collections.defaultdict(set) # this adds a set to the dictionary every time a new word is iterated
for word in words:
    wordsBySig[signature(word)].add(word)

In [108]:
anagramsBySig = {signature: wordset for signature, wordset in wordsBySig.items() if len(wordset) > 1}

In [113]:
# Define FASTER anagram finder -- returns all possible anagrams of a word as a list
def fasterAnagramFinder(myword):
    mysig = signature(myword)
    
    try:
        return anagramsBySig[mysig]
    except KeyError:
        print('This word doesn\'t appear to be in the dictionary. Hmmm.')


In [114]:
fasterAnagramFinder('dfdkjfd')

This word doesn't appear to be in the dictionary. Hmmm.


In [121]:
# What are the longest anagrams?
sorted(anagramsBySig.keys(), key = len, reverse= True)

['aceefhhillmnoooorrrttu',
 'ccddeehlmnooooossttuyy',
 'acddeehiimmnoopprrruuy',
 'aaaaccghiillmnooooptt',
 'acghhhhilmooooopprrtt',
 'aaccddeeemnnoooprttuy',
 'aaabegghilllnoooprssy',
 'aceehhiilmnnooorrrttt',
 'aaccghiiilmnoooopsty',
 'acceeeeeghillmnnnoop',
 'aaabeggillllnooorssy',
 'aaaccghiilmnooooptt',
 'aacccghiiilllnooopt',
 'aceeeghiiilmnnnopst',
 'aaegghmooooprssstty',
 'bceiiiilnnoorrtttvy',
 'aacccdeehiiinopprrr',
 'accghhiilloooppssyy',
 'abceehhlmmnoooorrt',
 'aaccdhmmnoooorrsxy',
 'cdehiiiiinooorrstt',
 'accghhhinoooopprrt',
 'ceeeeehlmmoorrrttt',
 'addeiimmooopsstvyy',
 'aaeeeghlmmnoorrttv',
 'aadeeehhiknorrsttx',
 'aagghiilnnoprrstyy',
 'aaceghlmnooorrttyy',
 'acceghillnoooprsuy',
 'aacghhiilloooppsty',
 'aaccghhlmooopprsyy',
 'aacdeeehiillmntty',
 'aaaabchiimnoprrst',
 'aaccceeiilloorssu',
 'aceeghiiilmnnopst',
 'bceeegiiimnnorrst',
 'aacccceehhiilmmno',
 'acccehhilmoopprty',
 'acghhhilmoooprrty',
 'acghhhmoooopprrty',
 'acghhhnoooopprrty',
 'aciiilmnnoosstttu',
 'acd

In [122]:
# Wrap in list comprehension
[anagramsBySig[signature] for signature in sorted(anagramsBySig.keys(), key = len, reverse= True)]

[{'chlorotrifluoromethane', 'trifluorochloromethane'},
 {'cholecystoduodenostomy', 'duodenocholecystostomy'},
 {'hydropneumopericardium', 'pneumohydropericardium'},
 {'anatomicopathological', 'pathologicoanatomical'},
 {'chromophotolithograph', 'photochromolithograph'},
 {'duodenopancreatectomy', 'pancreatoduodenectomy'},
 {'glossolabiopharyngeal', 'labioglossopharyngeal'},
 {'nitrotrichloromethane', 'trichloronitromethane'},
 {'anatomicophysiologic', 'physiologicoanatomic'},
 {'encephalomeningocele', 'meningoencephalocele'},
 {'glossolabiolaryngeal', 'labioglossolaryngeal'},
 {'anatomicopathologic', 'pathologicoanatomic'},
 {'clinicopathological', 'pathologicoclinical'},
 {'encephalomeningitis', 'meningoencephalitis'},
 {'esophagogastrostomy', 'gastroesophagostomy'},
 {'incontrovertibility', 'introconvertibility'},
 {'pericardiacophrenic', 'phrenicopericardiac'},
 {'physiopsychological', 'psychophysiological'},
 {'bromochloromethane', 'chlorobromomethane'},
 {'chondromyxosarcoma', 'my

In [135]:
help(pd.api)

Help on package pandas.api in pandas:

NAME
    pandas.api - public toolkit API

PACKAGE CONTENTS
    extensions (package)
    indexers (package)
    types (package)

FILE
    /Users/aepperly/opt/anaconda3/lib/python3.9/site-packages/pandas/api/__init__.py




In [136]:
pd.__version__

'1.4.2'