In [5]:
import nltk
import string
from nltk.corpus import words
from nltk.util import ngrams
from collections import Counter

# Download NLTK resources
nltk.download('words')
nltk.download('stopwords')

# Get the list of valid words in the English language
valid_words = set(words.words())

# Check the casing of the first 20 words in the list
print("First 20 words in the list:", list(valid_words)[:20])

# Normalize the casing for all the terms
valid_words = set(word.lower() for word in valid_words)

# Create a list of stop words
stop_words = set(nltk.corpus.stopwords.words('english'))

# Add punctuation to the stop words list
stop_words.update(set(string.punctuation))

# Combine stop words and punctuation to create the final stop words list
stop_words_set = set(stop_words)



def get_correct_term(target_term):
    # Get the first 20,000 entries from the valid word list
    valid_words_subset = list(valid_words)[:20000]

    # Calculate the edit distance for each term in the valid word list
    distances = {term: nltk.edit_distance(target_term, term) for term in valid_words_subset}

    # Sort the dictionary by edit distance in ascending order
    sorted_distances = sorted(distances.items(), key=lambda x: x[1])

    # Return the term with the minimum edit distance
    return sorted_distances[0][0]

def correct_spelling(sentence):
    # Tokenize the sentence and make all terms lowercase
    tokenized_sentence = nltk.word_tokenize(sentence.lower())

    # Correct spelling for each term in the tokenized sentence
    corrected_sentence = [term if term in valid_words else get_correct_term(term) for term in tokenized_sentence]

    # Return the joined string as output
    return ' '.join(corrected_sentence)

# Test the function
input_sentence = "The new abacos is great"
output_sentence = correct_spelling(input_sentence)
print("Input Sentence:", input_sentence)
print("Output Sentence:", output_sentence)


[nltk_data] Downloading package words to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


First 20 words in the list: ['Macrocystis', 'Opuntia', 'Cystophora', 'zygophoric', 'iodobromite', 'ironhard', 'amaranth', 'Bruchidae', 'clat', 'stiffening', 'soleiform', 'dorsocervically', 'sublot', 'unperceptibly', 'Bessemerize', 'posterosuperior', 'fut', 'pizzle', 'clumsily', 'banked']
Input Sentence: The new abacos is great
Output Sentence: the new aback is great
