In [2]:
import re, collections

In [3]:
def words(text): 
    return re.findall('[a-z]+', text.lower()) 

def train(features):
    model = collections.defaultdict(lambda: 1)
    for f in features:
        model[f] += 1
    return model

In [4]:
NWORDS = train(words(open('big.txt', 'r', -1, 'utf-8-sig').read()))

In [5]:
alphabet = 'abcdefghijklmnopqrstuvwxyz'

In [6]:
def edits1(word):
    splits     = [(word[:i], word[i:]) for i in range(len(word) + 1)]
    deletes    = [a + b[1:] for a, b in splits if b]
    transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1]
    replaces   = [a + c + b[1:] for a, b in splits for c in alphabet if b]
    inserts    = [a + c + b     for a, b in splits for c in alphabet]
    return set(deletes + transposes + replaces + inserts)

def known_edits2(word):
    return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS)

def known(words): 
    return set(w for w in words if w in NWORDS)

def correct(word):
    candidates = known([word]) or known(edits1(word)) or known_edits2(word) or [word]
    return max(candidates, key=NWORDS.get)

In [7]:
def splits(word):
    splits     = [(word[:i], word[i:]) for i in range(len(word) + 1)]
    return splits
def deletes(word):
    splits     = [(word[:i], word[i:]) for i in range(len(word) + 1)]
    deletes    = [a + b[1:] for a, b in splits if b]
    return deletes
def transposes(word):
    splits     = [(word[:i], word[i:]) for i in range(len(word) + 1)]
    transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1]
    return transposes
def replaces(word):
    splits     = [(word[:i], word[i:]) for i in range(len(word) + 1)]
    replaces   = [a + c + b[1:] for a, b in splits for c in alphabet if b]
    return replaces
def inserts(word):
    splits     = [(word[:i], word[i:]) for i in range(len(word) + 1)]
    inserts    = [a + c + b     for a, b in splits for c in alphabet]
    return inserts

In [8]:
splits('friend')

[('', 'friend'),
 ('f', 'riend'),
 ('fr', 'iend'),
 ('fri', 'end'),
 ('frie', 'nd'),
 ('frien', 'd'),
 ('friend', '')]

In [9]:
deletes('friend')

['riend', 'fiend', 'frend', 'frind', 'fried', 'frien']

In [10]:
transposes('friend')

['rfiend', 'firend', 'freind', 'frined', 'friedn']

In [11]:
inserts('friend')

['afriend',
 'bfriend',
 'cfriend',
 'dfriend',
 'efriend',
 'ffriend',
 'gfriend',
 'hfriend',
 'ifriend',
 'jfriend',
 'kfriend',
 'lfriend',
 'mfriend',
 'nfriend',
 'ofriend',
 'pfriend',
 'qfriend',
 'rfriend',
 'sfriend',
 'tfriend',
 'ufriend',
 'vfriend',
 'wfriend',
 'xfriend',
 'yfriend',
 'zfriend',
 'fariend',
 'fbriend',
 'fcriend',
 'fdriend',
 'feriend',
 'ffriend',
 'fgriend',
 'fhriend',
 'firiend',
 'fjriend',
 'fkriend',
 'flriend',
 'fmriend',
 'fnriend',
 'foriend',
 'fpriend',
 'fqriend',
 'frriend',
 'fsriend',
 'ftriend',
 'furiend',
 'fvriend',
 'fwriend',
 'fxriend',
 'fyriend',
 'fzriend',
 'fraiend',
 'frbiend',
 'frciend',
 'frdiend',
 'freiend',
 'frfiend',
 'frgiend',
 'frhiend',
 'friiend',
 'frjiend',
 'frkiend',
 'frliend',
 'frmiend',
 'frniend',
 'froiend',
 'frpiend',
 'frqiend',
 'frriend',
 'frsiend',
 'frtiend',
 'fruiend',
 'frviend',
 'frwiend',
 'frxiend',
 'fryiend',
 'frziend',
 'friaend',
 'fribend',
 'fricend',
 'fridend',
 'frieend',
 'fr

In [12]:
edits1('friend')

{'afriend',
 'ariend',
 'bfriend',
 'briend',
 'cfriend',
 'criend',
 'dfriend',
 'driend',
 'efriend',
 'eriend',
 'faiend',
 'fariend',
 'fbiend',
 'fbriend',
 'fciend',
 'fcriend',
 'fdiend',
 'fdriend',
 'feiend',
 'feriend',
 'ffiend',
 'ffriend',
 'fgiend',
 'fgriend',
 'fhiend',
 'fhriend',
 'fiend',
 'fiiend',
 'firend',
 'firiend',
 'fjiend',
 'fjriend',
 'fkiend',
 'fkriend',
 'fliend',
 'flriend',
 'fmiend',
 'fmriend',
 'fniend',
 'fnriend',
 'foiend',
 'foriend',
 'fpiend',
 'fpriend',
 'fqiend',
 'fqriend',
 'fraend',
 'fraiend',
 'frbend',
 'frbiend',
 'frcend',
 'frciend',
 'frdend',
 'frdiend',
 'freend',
 'freiend',
 'freind',
 'frend',
 'frfend',
 'frfiend',
 'frgend',
 'frgiend',
 'frhend',
 'frhiend',
 'friaend',
 'friand',
 'fribend',
 'fribnd',
 'fricend',
 'fricnd',
 'fridend',
 'fridnd',
 'friead',
 'frieand',
 'friebd',
 'friebnd',
 'friecd',
 'friecnd',
 'fried',
 'friedd',
 'friedn',
 'friednd',
 'frieed',
 'frieend',
 'friefd',
 'friefnd',
 'friegd',
 'frie

In [13]:
known_edits2('friend')

{'befriend',
 'cried',
 'dried',
 'feind',
 'field',
 'fiend',
 'fiends',
 'find',
 'fined',
 'fired',
 'fred',
 'freed',
 'friant',
 'fried',
 'friend',
 'friendly',
 'friends',
 'frieze',
 'frigid',
 'fringed',
 'fwiend',
 'grind',
 'orient',
 'pried',
 'rien',
 'trend',
 'tried'}

In [26]:
sentence = 'Hello how are you my frend'
sentence = sentence.split()
result = ''
for word in sentence:
    word = word.lower()
    result += correct(word)+' '
print(result)

hello how are you my friend 


In [29]:
correct('how are you my fend')

'how are you my fend'