In [46]:
squares = []

for i in range(10):
    squares.append(i**2)
    
squares

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [47]:
squares = [i**2 for i in range(10)] #comprehension to create a list in a single line. 
squares

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [48]:
squares3 = [i**2 for i in range(10) if i%3 == 0] #create a filter with if in comprehension
squares3

[0, 9, 36, 81]

In [49]:
square3_dict = {i: i**2 for i in range(30) if i % 3 == 0}
square3_dict

{0: 0,
 3: 9,
 6: 36,
 9: 81,
 12: 144,
 15: 225,
 18: 324,
 21: 441,
 24: 576,
 27: 729}

In [50]:
capitals = {'US': 'Washington DC', 'Italy' : 'Rome', 'Singapore' : 'Singapore'}

In [51]:
capitals_bycapital = {capitals[key]: key for key in capitals} #use comprehension dict to transpose key and value
capitals_bycapital

{'Rome': 'Italy', 'Singapore': 'Singapore', 'Washington DC': 'US'}

In [52]:
sum([i**2 for i in range(10)])

285

In [53]:
sum(i**2 for i in range(10)) #'naked' comprehension w/o brackets; use as a generator w/o saving to a list

285

In [54]:
word = open('words', 'r')

In [55]:
word

<_io.TextIOWrapper name='words' mode='r' encoding='UTF-8'>

In [56]:
wordlist = word.readlines()


In [57]:
wordlist[:10]

['A\n',
 'a\n',
 'aa\n',
 'aal\n',
 'aalii\n',
 'aam\n',
 'Aani\n',
 'aardvark\n',
 'aardwolf\n',
 'Aaron\n']

In [58]:
len(wordlist)

235886

In [59]:
'aalii\n'.strip() #removes newline

'aalii'

In [60]:
'Aani'.lower() #converts to lowercase

'aani'

In [61]:
wordclean = [word.strip().lower() for word in wordlist] #list comprehension to strip newline and convert to lowercase

In [62]:
wordclean[:10]

['a',
 'a',
 'aa',
 'aal',
 'aalii',
 'aam',
 'aani',
 'aardvark',
 'aardwolf',
 'aaron']

In [63]:
wordunique = list(set(wordclean))

In [64]:
wordunique[:10]

['dillyman',
 'tomboyish',
 'perilless',
 'electromassage',
 'jozy',
 'bilinguar',
 'disunionist',
 'huse',
 'ologist',
 'boildown']

In [65]:
wordunique.sort()

In [66]:
wordunique[:10]

['a',
 'aa',
 'aal',
 'aalii',
 'aam',
 'aani',
 'aardvark',
 'aardwolf',
 'aaron',
 'aaronic']

In [67]:
wordclean = [word.strip().lower() for word in open('words', 'r')]

In [68]:
wordclean[:10]

['a',
 'a',
 'aa',
 'aal',
 'aalii',
 'aam',
 'aani',
 'aardvark',
 'aardwolf',
 'aaron']

In [69]:
wordclean = sorted(list(set([word.strip().lower() for word in open('words', 'r')])))

In [70]:
wordclean[:10]

['a',
 'aa',
 'aal',
 'aalii',
 'aam',
 'aani',
 'aardvark',
 'aardwolf',
 'aaron',
 'aaronic']

In [71]:
sorted(['test', 'cat', 'aeroplane'])

['aeroplane', 'cat', 'test']

In [72]:
sorted('lives')

['e', 'i', 'l', 's', 'v']

In [73]:
sorted('lives') == sorted('elvis')

True

In [74]:
sorted('hate') == sorted('love')

False

In [75]:
def signature(word):
    return ''.join(sorted(word)) #join method to join string characters without any characters between

In [76]:
signature('lives')

'eilsv'

In [77]:
'/'.join(['1','2','3'])

'1/2/3'

In [78]:
signature('213')

'123'

In [79]:
def anagram(myword):
    return [word for word in wordclean if signature(myword) == signature(word)]

In [80]:
anagram('dictionary')

['dictionary', 'indicatory']

In [81]:
%timeit anagram('dictionary')

963 ms ± 84.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [82]:
words_bysig = {}

for word in wordclean:
    words_bysig[signature(word)].append(word)

KeyError: 'a'

In [83]:
import collections

In [84]:
words_bysig = collections.defaultdict(list) #yield an empty list as a default value if the key doesn't exit

for word in wordclean:
    words_bysig[signature(word)].append(word)

In [85]:
def anagram_fast(myword): #fast anagram lookup using a dictionary with signature as a keyjjj
    return words_bysig[signature(myword)]

In [86]:
anagram_fast('dictionary')

['dictionary', 'indicatory']

In [87]:
%timeit anagram_fast('dictionary')

2.11 µs ± 63 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [88]:
anagram_all = {word: anagram_fast(word) for word in wordclean if len(anagram_fast(word)) > 1}

In [89]:
len(anagram_all)

32890

### Challenge

1. Given a list of words from dictionary, separate words in classes of words with same length
2. For each class of words of the same length, find all anagrams
3. Count the total number of anagrams for each class

In [90]:
import collections

In [91]:
words_bylen = collections.defaultdict(list)

for word in wordclean:
    words_bylen[len(word)].append(word)

In [None]:
words_bylen

In [97]:
anagram_bylen = {}

for length, words in words_bylen.items():
    anagram_bylen[length] = {word: anagram_fast(word) for word in words if len(anagram_fast(word)) > 1}

In [None]:
anagram_bylen

In [99]:
anagram_bylen = {}

for length, words in words_bylen.items():
    anagram_bylen[length] = {word: len(anagram_fast(word)) - 1 for word in words if len(anagram_fast(word)) > 1}

In [None]:
anagram_bylen

In [102]:
anagram_bylen = {}

for length, words in words_bylen.items():
    anagram_bylen[length] = sum(len(anagram_fast(word)) - 1 for word in words if len(anagram_fast(word)) > 1)/2

In [103]:
anagram_bylen

{1: 0.0,
 2: 40.0,
 3: 554.0,
 4: 2780.0,
 5: 4247.0,
 6: 5153.0,
 7: 4220.0,
 8: 3097.0,
 9: 2100.0,
 10: 1168.0,
 11: 584.0,
 12: 288.0,
 13: 137.0,
 14: 70.0,
 15: 49.0,
 16: 35.0,
 17: 22.0,
 18: 10.0,
 19: 7.0,
 20: 3.0,
 21: 4.0,
 22: 2.0,
 23: 0.0,
 24: 0.0}