In [7]:
import time
import sys
from collections import defaultdict, Counter
import re

In [2]:
class Trie:
    def __init__(self):
        self.word = None
        self.children = defaultdict(Trie)
        
    def insert(self, word):
        node = self
        for letter in word:
            node = node.children[letter]
        node.word = word

In [3]:
class LevenshteinAutomaton:
    def __init__(self, word, max_dist):
        self.word = word
        self.max_dist = max_dist

    def start(self):
        return range(len(self.word) + 1)

    def step(self, state, letter):
        new_state = [state[0] + 1]
        for i in range(len(state) - 1):
            insert_state = new_state[i] + 1
            replace_state = state[i] + (self.word[i] != letter)
            delete_state = state[i + 1] + 1
            new_state.append(min(insert_state, replace_state, delete_state))
        return new_state

    def is_match(self, state):
        return state[-1] <= self.max_dist

    def can_match(self, state):
        return min(state) <= self.max_dist

In [4]:
class WordSearcher:
    def __init__(self, trie, max_dist=1):
        self.trie = trie
        self.max_dist = max_dist
        self.words = []
        
    def search(self, word):
        automaton = LevenshteinAutomaton(word, self.max_dist)
        state = automaton.start()
        self.search_recursive(self.trie.children, automaton, state)
        
    def search_recursive(self, node, automaton, state):
        for letter in node:
            new_state = automaton.step(state, letter)
            if automaton.is_match(new_state) and node[letter].word != None:
                self.words.append((node[letter].word, new_state[-1]))
            if automaton.can_match(new_state):
                self.search_recursive(node[letter].children, automaton, new_state)

In [25]:
class SpellChecker:
    def __init__(self, rus_dictionary, eng_dictionary, max_dist=1):
        self.rus_trie = Trie()
        for word in open(rus_dictionary, "rt").read().split():
            trie.insert(word)
        self.max_dist = max_dist
    
    #def check(self, text):
        

In [191]:
trie = Trie()
for word in ['a', 'as', 'asd', 'ae', 'awer']:
    trie.insert( word )

In [194]:
searcher = WordSearcher(trie, 5)
searcher.search('asde')
searcher.words

[('a', 3), ('awer', 3), ('ae', 2), ('as', 2), ('asd', 1)]

In [8]:
def words(text): 
    return re.findall(r'\w+', text.lower())

In [23]:
eng = Counter(words(open('../data/eng.txt').read()))
rus = Counter(words(open('../data/rus.txt').read()))