### Spell check

In [25]:
from spellchecker import SpellChecker
import json

In [79]:
spell = SpellChecker(distance=1)


# find those words from a list of words that are not found in the dictionary
unknown = spell.unknown(['abc','hi','you'])

# find those words from a list of words that are  found in the dictionary
known = spell.known(['abc','hi','you'])
spell.word_frequency.remove("waring")
print("unknown words : ",unknown)
print("known words : ",known)

# provides one correct suggestion
print(spell.correction("waring"))

# provides possible suggestions
cands = spell.candidates("waring")


    

unknown words :  {'abc'}
known words :  {'you', 'hi'}
wearing


In [81]:
def suggestions(word):
    if spell.known([word]):
        return 1
    else:
        word = spell.correction(word)
        words = spell.candidates(word)
        result = []
        for word in words:
            if word.startswith(word[:1]):
                result.append(word)
                
        return  word,result

result = suggestions('waring')
print(result)

('wearing', ['wearing'])


In [21]:
spell['replacement']

8745

In [15]:
i = 0
for word in spell:
    # print("{}: {}".format(word, spell[word]))
    i +=1
i

138581

### Adding custom data to spell checker

In [31]:
with open ("custom_dict.json",'r') as file:
    custom_words_data = json.load(file)
    for word in custom_words_data['words']:
        spell.word_frequency.add(word)


In [32]:
i = 0
for word in spell:
    # print("{}: {}".format(word, spell[word]))
    i +=1
i

138584

### custome data preparation

In [38]:
word = "arun"
alphabet = "abcdefghijklmnopqrstuvwxyz"
splits = [(word[:i],word[i:]) for i in range(len(word)+1)]
print("splits : ",splits)
deletes = [left + right[1:] for left ,right in splits if right]
print(f"deletes : {len(deletes)} \n {deletes}")
transposes = [left + right[1] + right[0] + right[2:] for left , right in splits if len(right)>1]
print(f"transposes : {len(transposes)}\n {transposes}")
replaces = [left + c + right[1:] for left, right in splits if right for c in alphabet]
print(f"replaces : {len(replaces)} \n {replaces}")
inserts = [left + c + right for left, right in splits for c in alphabet]
print(f"replaces : {len(inserts)} \n {inserts}")

splits :  [('', 'arun'), ('a', 'run'), ('ar', 'un'), ('aru', 'n'), ('arun', '')]
deletes : 4 
 ['run', 'aun', 'arn', 'aru']
transposes : 3
 ['raun', 'aurn', 'arnu']
replaces : 104 
 ['arun', 'brun', 'crun', 'drun', 'erun', 'frun', 'grun', 'hrun', 'irun', 'jrun', 'krun', 'lrun', 'mrun', 'nrun', 'orun', 'prun', 'qrun', 'rrun', 'srun', 'trun', 'urun', 'vrun', 'wrun', 'xrun', 'yrun', 'zrun', 'aaun', 'abun', 'acun', 'adun', 'aeun', 'afun', 'agun', 'ahun', 'aiun', 'ajun', 'akun', 'alun', 'amun', 'anun', 'aoun', 'apun', 'aqun', 'arun', 'asun', 'atun', 'auun', 'avun', 'awun', 'axun', 'ayun', 'azun', 'aran', 'arbn', 'arcn', 'ardn', 'aren', 'arfn', 'argn', 'arhn', 'arin', 'arjn', 'arkn', 'arln', 'armn', 'arnn', 'aron', 'arpn', 'arqn', 'arrn', 'arsn', 'artn', 'arun', 'arvn', 'arwn', 'arxn', 'aryn', 'arzn', 'arua', 'arub', 'aruc', 'arud', 'arue', 'aruf', 'arug', 'aruh', 'arui', 'aruj', 'aruk', 'arul', 'arum', 'arun', 'aruo', 'arup', 'aruq', 'arur', 'arus', 'arut', 'aruu', 'aruv', 'aruw', 'arux', '

In [39]:
dictionary = set(deletes + transposes + replaces + inserts)
print(len(dictionary))

234


In [59]:
class Checker():
    def __init__(self,word_list):
        self.words = word_list
    
    def _edits1(self, word):
        alphabet = "abcdefghijklmnopqrstuvwxyz"
        splits = [(word[:i],word[i:]) for i in range(len(word)+1)]
        deletes = [left + right[1:] for left ,right in splits if right]
        transposes = [left + right[1] + right[0] + right[2:] for left , right in splits if len(right)>1]
        replaces = [left + c + right[1:] for left, right in splits if right for c in alphabet]
        inserts = [left + c + right for left, right in splits for c in alphabet]

        return  set(deletes + transposes + replaces + inserts)

    def _known_edits2(self,word):
        return set(e2 for e1 in self._edits1(word) for e2 in self._edits1(e1) if e2 in self.words)

    def _known(self,words):
        return set(w for w in words if w in self.words)
    
    def correct(self,word):
        candidates = self._known([word]) or self._known(self._edits1(word)) or self._known_edits2(word) or [word]
        return max(candidates, key=lambda x: self.words)
    

word_list = {"apple", "banana", "orange", "grape", "peach","replacement","injector"}  # Sample dictionary of valid words
spell_checker = Checker(word_list)

word = "injictor"
corrected_word = spell_checker.correct(word)
print(f"Corrected word for '{word}': {corrected_word}")

Corrected word for 'injictor': injector


## textblob

In [82]:
from textblob import TextBlob

In [83]:
def text_correction(word):
    tb = TextBlob(word)
    correction = tb.correct()

    return correction

In [97]:
result = text_correction("coolint")
result.correct()

TextBlob("cooling")