In [None]:
import requests
import os

WORDLIST_URL = 'https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english-no-swears.txt'
WORDLIST_LOCAL_PATH = './wordlist'

def download_words():
    wordlist = requests.get(WORDLIST_URL).content
    open(WORDLIST_LOCAL_PATH, 'wb').write(wordlist)
    
def load_words():
    if not os.path.exists(WORDLIST_LOCAL_PATH):
        dowload_words()
    return [line for line in map(str.strip, open(WORDLIST_LOCAL_PATH)) if line]

In [None]:
words = load_words()

In [None]:
len(words)

In [None]:
wordset = set(words)

In [None]:
def is_word(word):
    return word in wordset

In [None]:
is_word('potato'), is_word('kldjffffs')

In [None]:
def charsets(word):
    if len(word) == 1:
        yield word
    else:
        c = word[0]
        yield c
        for cs in charsets(word[1:]):
            yield c + cs
        for cs in charsets(word[1:]):
            yield cs

In [None]:
def swapped(word, i, j):
    return word[:i] + word[j] + word[i+1:j] + word[i] + word[j+1:]

def permutations(word):
    yield word
    for i in range(0, len(word)-1):
        for j in range(i+1, len(word)):
            yield swapped(word, i, j)

def candidates(word):
    for chars in charsets(word):
        for perm in permutations(chars):
            yield perm

In [None]:
def embedded_words(word):
    for cand in candidates(word):
        if is_word(cand):
            yield cand

In [None]:
len(set(candidates('scoreboard')))

In [None]:
def timeit(f):
    import time
    start = time.clock()
    res = f()
    return res, time.clock() - start

In [None]:
for i in range(1,18):
    n, t = timeit(lambda: len(list(embedded_words('abcdefghijklmnopqrstuvwxyz'[:i]))))
    print(i, n, t)


In [None]:
# part 2 - build a trie
def add_to_trie(word: str, trie: dict):
    if not word:
        trie[None] = True
        return
    c = word[0]
    rest_of_word = word[1:]
    add_to_trie(rest_of_word, trie.setdefault(c, {}))
        
def build_trie(words):
    trie = {}
    for word in words:
        add_to_trie(word, trie)
    return trie

def is_word(trie, word):
    if not word:
        return None in trie
    c, rest = word[0], word[1:]
    return c in trie and is_word(trie[c], rest)

In [None]:
dictionary = build_trie(words)

In [None]:
is_word(dictionary, 'potato'), is_word(dictionary, 'kjosdffj')

In [None]:
from collections import Counter

def walk_trie(path, trie, counts):
    for c in trie:
        if c is None:
            yield path
        if counts[c]:
            counts[c] -= 1
            yield from walk_trie(path + c, trie[c], counts)
            counts[c] += 1
            
def embedded_words(trie, word):
    return walk_trie('', trie, Counter(word))

In [None]:
list(embedded_words(dictionary, 'cats'))

In [None]:
for i in range(1,18):
    n, t = timeit(lambda: len(list(embedded_words(dictionary, 'abcdefghijklmnopqrstuvwxyz'[:i]))))
    print(i, n, t)
