In [4]:
import pandas as pd
import re
import unicodedata
import sys

def normalizar(texto):
    """Normalize the text to lowercase, remove accents and special characters, except hyphens."""
    if not isinstance(texto, str):
        return ""
    texto = texto.lower()
    texto = ''.join(c for c in unicodedata.normalize('NFD', texto) if unicodedata.category(c) != 'Mn')
    return texto

def levenshtein_distance(s1, s2):
    """Calculates the Levenshtein distance between two strings."""
    if len(s1) > len(s2):
        s1, s2 = s2, s1
    
    distancias = range(len(s1) + 1)
    for i2, c2 in enumerate(s2):
        nueva_distancia = [i2 + 1]
        for i1, c1 in enumerate(s1):
            if c1 == c2:
                nueva_distancia.append(distancias[i1])
            else:
                nueva_distancia.append(1 + min((nueva_distancia[-1], distancias[i1], distancias[i1+1])))
        distancias = nueva_distancia
    return distancias[-1]

def corregir_sentencia(sentencia, diccionario):
    """Correct spelling errors in a sentence using the Levenshtein distance."""
    palabras_originales = re.findall(r'\b[\w-]+\b', sentencia)
    palabras_corregidas = []
    
    for palabra in palabras_originales:
        palabra_normalizada = normalizar(palabra)
        mejor_coincidencia = palabra
        min_distancia = float('inf')
        
        # If the word is already in the dictionary, it is not corrected.
        if palabra_normalizada in diccionario:
            palabras_corregidas.append(palabra)
            continue
            
        # Look up the closest word in the dictionary.
        for palabra_dic in diccionario:
            dist = levenshtein_distance(palabra_normalizada, palabra_dic)
            if dist < min_distancia:
                min_distancia = dist
                mejor_coincidencia = palabra_dic
        
        # Correct only if the distance is sufficiently small. (e.g., 1 o 2)
        if min_distancia <= 2:
            palabras_corregidas.append(mejor_coincidencia)
        else:
            palabras_corregidas.append(palabra)
            
    return ' '.join(palabras_corregidas)

if __name__ == "__main__":
    
    # 1. Load the dictionary from an Excel file
    diccionario_file = "lista_palabras.xlsx"
    try:
        df_diccionario = pd.read_excel(diccionario_file)
        # Convert the column of words into a set for quick searching
        diccionario = set(df_diccionario['Palabras en el Diccionario'].dropna())
    except FileNotFoundError:
        print(f"Error: The dictionary file was not found.: {diccionario_file}")
        print("AsegÃºrate de haber generado el diccionario con el script anterior.")
        sys.exit(1)

    print("Dictionary successfully loaded.")

    # 2. Ask the user to enter a sentence
    sentencia_a_corregir = input("Please enter a sentence to correct: ")

    # 3. Correct the sentence
    sentencia_corregida = corregir_sentencia(sentencia_a_corregir, diccionario)

    # 4. Print the results
    print("\n--- Correction results ---")
    print(f"Original Sentence: {sentencia_a_corregir}")
    print(f"Sentencia corregida: {sentencia_corregida}")

Dictionary successfully loaded.


Please enter a sentence to correct:  te voy a matar si no pagas



--- Correction results ---
Original Sentence: te voy a matar si no pagas
Sentencia corregida: te voy a matar si no pagas
