In [1]:
import re

def words(text):
    """Extract all words from a text (lowercase)"""
    return re.findall(r'\w+', text.lower())

def create_word_counts(text):
    """Create a dictionary of word frequencies from text"""
    word_counts = {}
    for word in words(text):
        word_counts[word] = word_counts.get(word, 0) + 1
    return word_counts

# Load corpus and create frequency dictionary
with open('big.txt', 'r') as f:
    WORDS = create_word_counts(f.read())

def P(word, total_words=sum(WORDS.values())):
    """Probability of a word in the corpus"""
    return WORDS.get(word, 0) / total_words

def correction(word):
    """Most probable spelling correction for word"""
    return max(candidates(word), key=P, default=word)

def known(words):
    """Return the subset of words that appear in the dictionary"""
    return set(w for w in words if w in WORDS)

def candidates(word):
    """Generate possible spelling corrections"""
    return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word])



def edits1(word):
    """All edits that are one edit away from word"""
    letters = 'abcdefghijklmnopqrstuvwxyz'
    splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
    deletes = [L + R[1:] for L, R in splits if R]
    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R) > 1]
    replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
    inserts = [L + c + R for L, R in splits for c in letters]
    return set(deletes + transposes + replaces + inserts)

def edits2(word):
    """All edits that are two edits away from word"""
    return (e2 for e1 in edits1(word) for e2 in edits1(e1))

if __name__ == "__main__":
    test_words = ["speling", "korrect", "thn", "wrriten"]
    for word in test_words:
        print(f"{word} -> {correction(word)}")

speling -> spelling
korrect -> correct
thn -> the
wrriten -> written
