# Spell Checker using words, stopwords

In [15]:
import nltk
from nltk.corpus import words as nltk_words, stopwords
from string import punctuation
from nltk.metrics import edit_distance
from nltk import WordNetLemmatizer

In [4]:
def fetch_valid_words():
    """Fetch a set of valid English words."""
    return set(nltk_words.words())

In [5]:
def standardize_casing(term):
    """Standardize the casing of a term."""
    return term.lower()

In [6]:
def create_unique_word_list(word_list):
    """Create a unique list from a list of words."""
    return list(set(word_list))

In [7]:
def obtain_stop_words():
    """Obtain a set of stop words."""
    nltk_stopwords = set(stopwords.words('english'))
    punctuation_set = set(punctuation)
    return nltk_stopwords | punctuation_set

In [16]:
def find_correct_word(target_word, valid_words_set):
    """Find the correct word with the lowest edit distance."""
    lemmatizer = WordNetLemmatizer()
    target_lemma = lemmatizer.lemmatize(target_word)
    
    valid_lemmas = {lemmatizer.lemmatize(word) for word in valid_words_set}
    
    if target_lemma in valid_lemmas:
        return target_word

    candidates = [(word, edit_distance(target_lemma, word)) for word in valid_lemmas]
    sorted_candidates = sorted(candidates, key=lambda x: x[1])
    
    return sorted_candidates[0][0]

In [17]:
def correct_word(word, valid_words_set, stop_words_set):
    """Correct a single word."""
    if word in valid_words_set:
        return word
    elif word not in stop_words_set:
        return find_correct_word(word, valid_words_set)
    else:
        return word


In [18]:
def perform_spelling_correction(input_sentence, valid_words_set, stop_words_set):
    """Perform spelling correction in a sentence."""
    tokenized_sentence = [standardize_casing(word) for word in nltk.word_tokenize(input_sentence.lower())]
    corrected_sentence = [correct_word(word, valid_words_set, stop_words_set) for word in tokenized_sentence]
    return ' '.join(corrected_sentence)

In [19]:
english_words_set = fetch_valid_words()
unique_normalized_words = create_unique_word_list([standardize_casing(word) for word in english_words_set])
stop_words_collection = obtain_stop_words()

In [25]:

input_text = "I lve Python. It is an amzing language."
output_text = perform_spelling_correction(input_text, english_words_set, stop_words_collection)
print("Input Text:", input_text)
print("Output Text:", output_text)

Input Text: I lve Python. It is an amzing language.
Output Text: i love python . it is an amazing language .
