### 1. Using TextBlob (your version)

In [3]:
from textblob import TextBlob

In [4]:
incorrect_text = "Ths is a smple txt with som speling erors."

In [5]:
textBlb = TextBlob(incorrect_text)
textBlb.correct().string

'The is a smile txt with so spelling errors.'

### 2. Using autocorrect library

In [6]:
!pip install autocorrect

Collecting autocorrect
  Using cached autocorrect-2.6.1.tar.gz (622 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'error'


  error: subprocess-exited-with-error
  
  × python setup.py egg_info did not run successfully.
  │ exit code: 1
  ╰─> [1 lines of output]
      ERROR: Can not execute `setup.py` since setuptools is not available in the build environment.
      [end of output]
  
  note: This error originates from a subprocess, and is likely not a problem with pip.
error: metadata-generation-failed

× Encountered error while generating package metadata.
╰─> See above for output.

note: This is an issue with the package mentioned above, not pip.
hint: See above for details.


In [7]:
from autocorrect import Speller

spell = Speller(lang='en')
incorrect_text = "Ths is a smple txt with som speling erors."
corrected_text = spell(incorrect_text)
print(corrected_text)


ModuleNotFoundError: No module named 'autocorrect'

### 3. Using pyspellchecker

In [None]:
from spellchecker import SpellChecker

spell = SpellChecker()
incorrect_text = "Ths is a smple txt with som speling erors."
corrected_words = [spell.correction(word) for word in incorrect_text.split()]
corrected_text = " ".join(corrected_words)
print(corrected_text)


ModuleNotFoundError: No module named 'spellchecker'

### 4. Using gingerit (Ginger API)

In [None]:
from gingerit.gingerit import GingerIt

parser = GingerIt()
incorrect_text = "Ths is a smple txt with som speling erors."
result = parser.parse(incorrect_text)
print(result['result'])


ModuleNotFoundError: No module named 'gingerit'

### 5. SymSpell (symspellpy)

In [None]:
from symspellpy import SymSpell, Verbosity
import pkg_resources

sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
dictionary_path = pkg_resources.resource_filename(
    "symspellpy", "frequency_dictionary_en_82_765.txt")
sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)

incorrect_text = "Ths is a smple txt with som speling erors."
words = incorrect_text.split()
corrected_words = [sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)[0].term for word in words]
corrected_text = " ".join(corrected_words)
print(corrected_text)


### 6. JamSpell

In [None]:
import jamspell

corrector = jamspell.TSpellCorrector()
corrector.LoadLangModel("en.bin")  # pre-trained model file

incorrect_text = "Ths is a smple txt with som speling erors."
corrected_text = corrector.FixFragment(incorrect_text)
print(corrected_text)


### 7. Norvig’s Spell Corrector

In [None]:
# Simple implementation using Peter Norvig's approach
import re, collections

def words(text): return re.findall(r'\w+', text.lower())
WORDS = collections.Counter(words(open('big.txt').read()))  # big corpus file

def P(word, N=sum(WORDS.values())): 
    return WORDS[word] / N

def correction(word): 
    return max(candidates(word), key=P)

def candidates(word): 
    return known([word]) or known(edits1(word)) or [word]

def known(words): 
    return set(w for w in words if w in WORDS)

def edits1(word):
    letters    = 'abcdefghijklmnopqrstuvwxyz'
    splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
    deletes    = [L + R[1:]               for L, R in splits if R]
    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
    replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
    inserts    = [L + c + R               for L, R in splits for c in letters]
    return set(deletes + transposes + replaces + inserts)

incorrect_text = "Ths is a smple txt with som speling erors."
corrected_text = " ".join([correction(word) for word in incorrect_text.split()])
print(corrected_text)


### 8. Hunspell

In [None]:
from hunspell import HunSpell

hobj = HunSpell('/usr/share/hunspell/en_US.dic', '/usr/share/hunspell/en_US.aff')
incorrect_text = "Ths is a smple txt with som speling erors."
corrected_words = [hobj.suggest(word)[0] if hobj.suggest(word) else word for word in incorrect_text.split()]
corrected_text = " ".join(corrected_words)
print(corrected_text)


### 9. LanguageTool (language_tool_python)

In [None]:
import language_tool_python

tool = language_tool_python.LanguageTool('en-US')
incorrect_text = "Ths is a smple txt with som speling erors."
matches = tool.check(incorrect_text)
corrected_text = language_tool_python.utils.correct(incorrect_text, matches)
print(corrected_text)


### 10. Transformer/BERT-based correction (HuggingFace)

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("prithivida/grammar_error_correcter_v1")
model = AutoModelForSeq2SeqLM.from_pretrained("prithivida/grammar_error_correcter_v1")

incorrect_text = "Ths is a smple txt with som speling erors."
inputs = tokenizer.encode("gec: " + incorrect_text, return_tensors="pt")
outputs = model.generate(inputs, max_length=128)
corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(corrected_text)


In [None]:
import nltk
nltk.download('punkt')


from nltk.tokenize import word_tokenize
from spellchecker import SpellChecker

spell = SpellChecker()
text = "Ths is a smple txt with som speling erors."

# Tokenize first using NLTK
words = word_tokenize(text)

# Correct each word
corrected_words = [spell.correction(word) for word in words]
corrected_text = " ".join(corrected_words)
print(corrected_text)
