In [5]:
from symspellpy import SymSpell, Verbosity
from pkg_resources import resource_filename
import re
import os, sys

In [6]:
# ---------------------------------------------
# Load SymSpell with edit distance up to 5
# ---------------------------------------------
def load_symspell(max_distance=5):
    sym_spell = SymSpell(max_dictionary_edit_distance=max_distance, prefix_length=7)

    # Încercăm mai întâi din pachet
    try:
        dictionary_path = resource_filename(
            "symspellpy", "frequency_dictionary_en_82_765.txt"
        )
        print("→ English Dictionary loaded with success from SymSpell library!")
    except Exception:
        dictionary_path = "frequency_dictionary_en_82_765.txt"
        print("→ English Dictionary loaded with success from current directory!")

    if not os.path.exists(dictionary_path):
        print(f"[EROARE] Nu găsesc dicționarul SymSpell la: {dictionary_path}")
        print(
            "→ Descarcă `frequency_dictionary_en_82_765.txt` și pune-l în același director cu acest script/notebook."
        )
        print("→ Apoi rulează din nou.")
        sys.exit(1)

    if not sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1):
        print("[EROARE] Nu am putut încărca dicționarul SymSpell.")
        sys.exit(1)

    return sym_spell


# ---------------------------------------------
# Default text with up to 5 misspellings
# ---------------------------------------------
default_text = (
    "Thiss cod chekks speling in text using SymSpell and BERRT ranking.\n"
    "It corects words by two stepps: dictionnary lookup and conttext moddel.\n"
    "SymmSpell findd candidattes from a big dictonary faast.\n"
    "Then a BERRT maskked language modele chooces the best suggetion in context.\n"
    "The input can be a defaalt texxt or custtom lines typped by the usser.\n"
    "In the defalt texxt we add mispelled wordss to testt the code.\n"
    "This codee prints the correced texte and a raport of all wrong wordt.\n"
    "Only CPU ussage is needded, usingg a smaler BERT model for speeed."
)


def get_input_text():
    choice = input("Use default text? (yes/no): ").strip().lower()
    if choice == "yes":
        print("\nUsing default text:\n")
        print(default_text)
        return default_text

    print("\nEnter your text (finish with empty line):")
    lines = []
    while True:
        line = input()
        if line == "":
            break
        lines.append(line)
    return "\n".join(lines)


# ---------------------------------------------
# Spell correction using SymSpell
# ---------------------------------------------
def correct_text(sym_spell, text):
    words = re.findall(r"[A-Za-z']+", text)
    corrected_text = text
    corrections = {}

    for word in words:
        suggestions = sym_spell.lookup(word, Verbosity.TOP, max_edit_distance=5)
        if suggestions:
            best = suggestions[0].term
            if best.lower() != word.lower():
                corrections[word] = best
                # înlocuim doar cuvântul exact, nu substring-uri
                corrected_text = re.sub(rf"\b{word}\b", best, corrected_text)

    return corrected_text, corrections


def main():
    sym_spell = load_symspell()
    text = get_input_text()

    corrected_text, corrections = correct_text(sym_spell, text)

    print("\n---------------------------------------")
    print("Corrected Text:")
    print("---------------------------------------")
    print(corrected_text)

    print("\n---------------------------------------")
    print("Correction Report:")
    print("---------------------------------------")

    if corrections:
        for wrong, correct in corrections.items():
            print(f"{wrong} → {correct}")
    else:
        print("No corrections needed.")

    print(f"\nTotal wrong words found: {len(corrections)}")


if __name__ == "__main__":
    main()

→ English Dictionary loaded with success from SymSpell library!

Using default text:

Thiss cod chekks speling in text using SymSpell and BERRT ranking.
It corects words by two stepps: dictionnary lookup and conttext moddel.
SymmSpell findd candidattes from a big dictonary faast.
Then a BERRT maskked language modele chooces the best suggetion in context.
The input can be a defaalt texxt or custtom lines typped by the usser.
In the defalt texxt we add mispelled wordss to testt the code.
This codee prints the correced texte and a raport of all wrong wordt.
Only CPU ussage is needded, usingg a smaler BERT model for speeed.

---------------------------------------
Corrected Text:
---------------------------------------
hiss cod checks spelling in text using campbell and the ranking.
It corrects words by two steps: dictionary lookup and context model.
campbell find candidates from a big dictionary fast.
when a the masked language model choices the best suggestion in context.
The input can b