In [2]:
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
import itertools
import re

# Define phonetic variations
variation_map = {
    "aa": ["a", "aa"],
    "a": ["a", "aa"],
    "ah": ["a", "ah", "aa"],
    "w": ["a", "w"],
    "e": ["e", "ee"],
    "ee": ["e", "ee"],
    "i": ["i", "ii"],
    "o": ["o"],
    "oo": ["u", "uu", "oo"],
    "u": ["u", "uu"],
    "uu": ["u", "uu"],
    "ae": ["ai"],
    "s": ["sh", "shh"],
    "sh": ["s", "shh"],
    "shh": ["s", "sh"]
}

# Additional consonant variations
consonant_map = {
    "chh": ["x", "ch", "xw", "xah"],
    "e": ["ye", "ya"],
    "pha": ["fa"],
    "a": ["w",],
    'T': ["t", "tt"],
    'Th': ["Th"],
    'D': ["d"],
    'Dh': ["dh"],
    'v': ['b', 'bh'],
    'b': ['v'],
    'bh': ['v']
}

last_word_check_list = ['w','ah']

def handle_repeated_chars(roman_word):
        """Reduce long repeated vowels (e.g., 'aaaa' тЖТ 'aa')."""
        optimized_word = []
        i = 0
        while i < len(roman_word):
            match = re.match(r"([a-zA-Z])\1{2,}", roman_word[i:])
            if match:
                char = match.group(1)
                optimized_word.append(char)  # Keep one repetition (aa, ee, etc.)
                i += len(match.group(0))
            else:
                optimized_word.append(roman_word[i])
                i += 1
        return "".join(optimized_word)



def generate_variations(word):
    normalize_word = handle_repeated_chars(word)

    new_words = [normalize_word]

    for key, variations in consonant_map.items():
        for variation in variations:
            if variation in normalize_word and (key not in normalize_word or key in ["e"]):
                normalize_word = normalize_word.replace(variation, key)
    new_words.append(normalize_word)
    # print('new_words', new_words)

    new_variations = list(new_words)
    # print('new_variations', new_variations)
    for roman_word in new_words:
        tokens = []
        i = 0
        while i < len(roman_word):
            # print('roman_word', roman_word)
            if i + 1 < len(roman_word) and roman_word[i:i+2] in variation_map:
                if (i + 2 < len(roman_word)) and roman_word[i:i+2] in last_word_check_list:
                    if roman_word[i] in variation_map:
                        tokens.append(variation_map[roman_word[i]])
                    else:
                        tokens.append([roman_word[i]])
                    i += 1
                else:
                    tokens.append(variation_map[roman_word[i:i+2]])
                    i += 2
            elif roman_word[i] in variation_map:
                if (i + 2 < len(roman_word)) and roman_word[i] in last_word_check_list:
                    tokens.append([roman_word[i]])
                else:
                    tokens.append(variation_map[roman_word[i]])
                i += 1
            else:
                tokens.append([roman_word[i]])
                i += 1
            # print('tokens', tokens)

        variations = ["".join(variant) for variant in itertools.product(*tokens)]
        new_variations.extend(variations)

    return set(new_variations)

# Example usage

def generate_devnagri_variations(variant):
    latin_variations = generate_variations(variant)
    # print('latin_variations', latin_variations)
    devanagari_variations = []
    print('Romanized variations', list(latin_variations))
    for variant in latin_variations:
        nepali = transliterate(variant, sanscript.ITRANS, sanscript.DEVANAGARI)
        devanagari_variations.append(nepali)
    return devanagari_variations


variant = "bigriyako"
# variant = "vahirw"
print('word:', variant)
devanagari_variations = generate_devnagri_variations(variant)
print('Devanagari variations:', devanagari_variations)

word: bigriyako
Romanized variations ['biigriieko', 'biigrieko', 'biigriyako', 'bigrieko', 'biigriiyaako', 'bigriyaako', 'biigrieeko', 'biigriiyako', 'bigriieeko', 'biigriieeko', 'bigriiyaako', 'bigriyako', 'bigrieeko', 'bigriiyako', 'biigriyaako', 'bigriieko']
Devanagari variations: ['рдмреАрдЧреНрд░реАрдПрдХреЛ', 'рдмреАрдЧреНрд░рд┐рдПрдХреЛ', 'рдмреАрдЧреНрд░рд┐рдпрдХреЛ', 'рдмрд┐рдЧреНрд░рд┐рдПрдХреЛ', 'рдмреАрдЧреНрд░реАрдпрд╛рдХреЛ', 'рдмрд┐рдЧреНрд░рд┐рдпрд╛рдХреЛ', 'рдмреАрдЧреНрд░рд┐рдИрдХреЛ', 'рдмреАрдЧреНрд░реАрдпрдХреЛ', 'рдмрд┐рдЧреНрд░реАрдИрдХреЛ', 'рдмреАрдЧреНрд░реАрдИрдХреЛ', 'рдмрд┐рдЧреНрд░реАрдпрд╛рдХреЛ', 'рдмрд┐рдЧреНрд░рд┐рдпрдХреЛ', 'рдмрд┐рдЧреНрд░рд┐рдИрдХреЛ', 'рдмрд┐рдЧреНрд░реАрдпрдХреЛ', 'рдмреАрдЧреНрд░рд┐рдпрд╛рдХреЛ', 'рдмрд┐рдЧреНрд░реАрдПрдХреЛ']


In [3]:
import pickle
import os

PICKLE_FILE = "dictionary/word_count.pkl"  # Path to the saved pickle file

def load_saved_word_count():
    """Load and return the word frequency dictionary from the pickle file."""
    if os.path.exists(PICKLE_FILE):
        with open(PICKLE_FILE, "rb") as f:
            return pickle.load(f)
    else:
        print("No saved word count dictionary found.")
        return {}  # Return an empty dictionary if the file doesn't exist

# Usage
saved_word_count = load_saved_word_count()

No saved word count dictionary found.


In [226]:
len(saved_word_count)

240683

In [227]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


def rank_variants_by_similarity(variants, nepali_dictionary):
    """
    Rank the generated variants by their similarity to words in the Nepali dictionary using TF-IDF.

    :param variants: List of transliterated Nepali variants.
    :param nepali_dictionary: List of words in the Nepali dictionary.
    :return: List of variants ordered by similarity along with matched dictionary word and similarity score.
    """
    # Ensure both dictionary and variants are lists
    nepali_dictionary = list(nepali_dictionary)

    # Combine variants and dictionary into a single corpus
    corpus = nepali_dictionary + variants

    # Compute TF-IDF vectors
    # vectorizer = TfidfVectorizer(analyzer='char')
    vectorizer = TfidfVectorizer(analyzer='char', ngram_range=(2, 4))
    tfidf_matrix = vectorizer.fit_transform(corpus)

    # Calculate similarity of each variant to the dictionary words
    dictionary_size = len(nepali_dictionary)
    variant_vectors = tfidf_matrix[dictionary_size:]
    dictionary_vectors = tfidf_matrix[:dictionary_size]

    similarities = cosine_similarity(variant_vectors, dictionary_vectors)

    # Rank variants by maximum similarity to any dictionary word
    ranked_variants = []
    for i, variant in enumerate(variants):
        max_sim_index = similarities[i].argmax()
        matched_word = nepali_dictionary[max_sim_index]
        similarity_score = similarities[i, max_sim_index]
        ranked_variants.append((variant, matched_word, similarity_score))

    ranked_variants.sort(key=lambda x: x[2], reverse=True)

    return ranked_variants

In [228]:

def generate_devnagri_variations(variant):
    latin_variations = generate_variations(variant)
    devanagari_variations = []
    print('Romanized variations', list(latin_variations))
    for variant in latin_variations:
        nepali = transliterate(variant, sanscript.ITRANS, sanscript.DEVANAGARI)
        devanagari_variations.append(nepali)
    return devanagari_variations



variant = "tikosa" # "choto"
devanagari_variations = generate_devnagri_variations(variant)
print('Devanagari variations:', devanagari_variations)

NEPALI_DICTIONARY= saved_word_count
top_matched_words = rank_variants_by_similarity(devanagari_variations, NEPALI_DICTIONARY)

print("\nTop 5 Matched Words:")
top_matched_words

Romanized variations ['tiikoshaa', 'Tikoshhaa', 'Tikosha', 'Tikoshaa', 'tikoshaa', 'Tiikosha', 'tikoshhaa', 'tiikoshha', 'tiikosha', 'tikoshha', 'Tikoshha', 'Tiikoshaa', 'Tiikoshha', 'tikosa', 'Tiikoshhaa', 'tiikoshhaa', 'tikosha', 'Tikosa']
Devanagari variations: ['рддреАрдХреЛрд╢рд╛', 'рдЯрд┐рдХреЛрд╖рд╛', 'рдЯрд┐рдХреЛрд╢', 'рдЯрд┐рдХреЛрд╢рд╛', 'рддрд┐рдХреЛрд╢рд╛', 'рдЯреАрдХреЛрд╢', 'рддрд┐рдХреЛрд╖рд╛', 'рддреАрдХреЛрд╖', 'рддреАрдХреЛрд╢', 'рддрд┐рдХреЛрд╖', 'рдЯрд┐рдХреЛрд╖', 'рдЯреАрдХреЛрд╢рд╛', 'рдЯреАрдХреЛрд╖', 'рддрд┐рдХреЛрд╕', 'рдЯреАрдХреЛрд╖рд╛', 'рддреАрдХреЛрд╖рд╛', 'рддрд┐рдХреЛрд╢', 'рдЯрд┐рдХреЛрд╕']

Top 5 Matched Words:


[('рддрд┐рдХреЛрд╖', 'рдЕрд╕реНрддрд┐рдХреЛрд╖', np.float64(0.7247021189837836)),
 ('рддрд┐рдХреЛрд╕', 'рдирд┐рдХреЛрд╕', np.float64(0.6912723641612005)),
 ('рдЯреАрдХреЛрд╖', 'рдЯреАрдХреЛ', np.float64(0.6833905808526253)),
 ('рдЯреАрдХреЛрд╢', 'рдЯреАрдХреЛ', np.float64(0.6830596542246341)),
 ('рддреАрдХреЛрд╖', 'рддреАрдХреЛ', np.float64(0.6659021962415224)),
 ('рддреАрдХреЛрд╢', 'рддреАрдХреЛ', np.float64(0.6655654726023579)),
 ('рдЯрд┐рдХреЛрд╕', 'рдирд┐рдХреЛрд╕', np.float64(0.6514994386051814)),
 ('рддрд┐рдХреЛрд╖рд╛', 'рдЕрд╕реНрддрд┐рдХреЛрд╖', np.float64(0.5658756902513336)),
 ('рддрд┐рдХреЛрд╢', 'рдХреЛрд╢', np.float64(0.5596815239968708)),
 ('рдЯреАрдХреЛрд╢рд╛', 'рдЯреАрдХреЛ', np.float64(0.5518158239419589)),
 ('рддреАрдХреЛрд╖рд╛', 'рдХреБрддреНрд▓реАрдХреЛрд╖рд╛', np.float64(0.5481520192791927)),
 ('рдЯреАрдХреЛрд╖рд╛', 'рдЯреАрдХреЛ', np.float64(0.5469707154553786)),
 ('рддреАрдХреЛрд╢рд╛', 'рддреАрдХреЛ', np.float64(0.5335996154201536)),
 ('рдЯрд┐рдХреЛрд╖', 'рдХреЛрд

In [229]:
NEPALI_DICTIONARY= saved_word_count
top_matched_words = rank_variants_by_similarity(devanagari_variations, NEPALI_DICTIONARY)

print("\nTop 5 Matched Words:")
top_matched_words


Top 5 Matched Words:


[('рддрд┐рдХреЛрд╖', 'рдЕрд╕реНрддрд┐рдХреЛрд╖', np.float64(0.7247021189837836)),
 ('рддрд┐рдХреЛрд╕', 'рдирд┐рдХреЛрд╕', np.float64(0.6912723641612005)),
 ('рдЯреАрдХреЛрд╖', 'рдЯреАрдХреЛ', np.float64(0.6833905808526253)),
 ('рдЯреАрдХреЛрд╢', 'рдЯреАрдХреЛ', np.float64(0.6830596542246341)),
 ('рддреАрдХреЛрд╖', 'рддреАрдХреЛ', np.float64(0.6659021962415224)),
 ('рддреАрдХреЛрд╢', 'рддреАрдХреЛ', np.float64(0.6655654726023579)),
 ('рдЯрд┐рдХреЛрд╕', 'рдирд┐рдХреЛрд╕', np.float64(0.6514994386051814)),
 ('рддрд┐рдХреЛрд╖рд╛', 'рдЕрд╕реНрддрд┐рдХреЛрд╖', np.float64(0.5658756902513336)),
 ('рддрд┐рдХреЛрд╢', 'рдХреЛрд╢', np.float64(0.5596815239968708)),
 ('рдЯреАрдХреЛрд╢рд╛', 'рдЯреАрдХреЛ', np.float64(0.5518158239419589)),
 ('рддреАрдХреЛрд╖рд╛', 'рдХреБрддреНрд▓реАрдХреЛрд╖рд╛', np.float64(0.5481520192791927)),
 ('рдЯреАрдХреЛрд╖рд╛', 'рдЯреАрдХреЛ', np.float64(0.5469707154553786)),
 ('рддреАрдХреЛрд╢рд╛', 'рддреАрдХреЛ', np.float64(0.5335996154201536)),
 ('рдЯрд┐рдХреЛрд╖', 'рдХреЛрд

In [230]:
from collections import defaultdict
import pandas as pd
import unicodedata
from metaphone import doublemetaphone

# Load the CSV file
df = pd.read_csv("files/multi_words_devnagri_root.csv")

In [231]:
df.head()

Unnamed: 0,Words,Devanagari,Root
0,raamrai,рд░рд╛рдореНрд░реИ,рд░рд╛рдореНрд░рд╛
1,raamro,рд░рд╛рдореНрд░реЛ,рд░рд╛рдореНрд░рд╛
2,fohor,рдлреЛрд╣реЛрд░,рдлреЛрд╣реЛрд░
3,xodyo,рдЫреЛрдбреНрдпреЛ,рдЫреЛрдбреНрдпреЛ
4,sidhai,рд╕рд┐рдзреИ,рд╕рд┐рдзрд╛


In [232]:
grouped = df.groupby('Devanagari')['Words'].apply(list).reset_index()

# Filter to only include entries with more than one word
filtered = grouped[grouped['Words'].apply(len) > 1]

In [233]:
filtered

Unnamed: 0,Devanagari,Words
0,рдЦреБрд▓реЗрдХреЛ,"[khuuleko, khuleko]"
1,рдЫреИрди,"['xhaina', 'xiana', 'xaena', chhaina]"
2,рдЯрд┐рдХреНрдЫ,"[tikcha, tikxa]"
3,рдкрд░рд┐рдпреЛ,"[pariyo, pareyo]"
4,рдкрд░реНрдпреЛ,"[paryoo, paryo]"
...,...,...
113,рд╣реБрдиреЗ,"[hune, hunay]"
114,рд╣реБрдиреЗрдЫ,"[hunexa, hunexw, hunexa]"
115,рд╣реБрдиреНрдЫ,"[hunchha, hunxa]"
116,рд╣реИ,"[haii, hae]"


In [214]:
# from collections import Counter
# total = 0
# accurate = 0
# not_accurate = 0
# NEPALI_DICTIONARY = saved_word_count

# for nepali_word, variants in zip(filtered['Devanagari'], filtered['Words']):
#     # print("nepali_word", nepali_word, 'variants', variants)
#     nepali_variations = []
#     for variant in variants:
#       total +=1
#       latin_variations = generate_variations(variant)
#       devanagari_variations = []
#       for new_variant in latin_variations:
#           nepali = transliterate(new_variant, sanscript.ITRANS, sanscript.DEVANAGARI)
#           devanagari_variations.append(nepali)
#       top_matched_words = rank_variants_by_similarity(devanagari_variations, NEPALI_DICTIONARY)
#       nepali = top_matched_words[0][0] if top_matched_words else None
#       nepali_variations.append((nepali,))
#     # Count occurrences of each mapped Nepali word
#     counter = Counter(nepali_variations)
#     most_common_word, count = counter.most_common(1)[0]

#     if len(counter) == 1:
#         print(f"тЬЕ All variants map to the same Nepali word: '{most_common_word}' тАФ Adding {count}")
#         accurate += count
#     else:
#         print(f"тЭМ Multiple mappings found: {dict(counter)}")
#         print(f"тЬЕ Keeping only the most frequent one: '{most_common_word}' тАФ Adding {count}")
#         accurate += count
# print(f'accurate: {accurate} total: {total} percent{accurate/total}')

In [235]:
from collections import Counter

total = 0
accurate = 0
not_accurate = 0
NEPALI_DICTIONARY = saved_word_count

for nepali_word, variants in zip(filtered['Devanagari'], filtered['Words']):
    nepali_variations = []
    for variant in variants:
        total += 1
        latin_variations = generate_variations(variant)
        devanagari_variations = []
        for new_variant in latin_variations:
            nepali = transliterate(new_variant, sanscript.ITRANS, sanscript.DEVANAGARI)
            devanagari_variations.append(nepali)
        top_matched_words = rank_variants_by_similarity(devanagari_variations, NEPALI_DICTIONARY)
        nepali = top_matched_words[0][1] if top_matched_words else None
        nepali_variations.append((nepali, variant))  # store variant too for debugging

    # Find the most common mapped word
    mapped_words_only = [x[0] for x in nepali_variations]
    counter = Counter(mapped_words_only)
    most_common_word, count = counter.most_common(1)[0]

    # Now keep only those variants where mapped word == most_common_word
    filtered_nepali_variations = [
        (mapped_word, variant) for mapped_word, variant in nepali_variations if mapped_word == most_common_word
    ]

    if len(counter) == 1:
        print(f"тЬЕ All variants map to the same Nepali word: '{most_common_word}' тАФ Adding {count}")
        accurate += count
    else:
        print(f"тЭМ Multiple mappings found: {dict(counter)}")
        print(f"тЬЕ Keeping only the most frequent one: '{most_common_word}' тАФ Adding {count}")
        accurate += count

    # Now check mismatches based on filtered mappings
    for mapped_word, variant in nepali_variations:
        if mapped_word != nepali_word:
            print(f"ЁЯФБ Mismatch: Variant '{variant}' тЖТ Mapped '{mapped_word}', Expected '{nepali_word}'")
            not_accurate += 1


print(f'accurate: {accurate}, total: {total}, percent: {accurate / total:.2f}')


тЬЕ All variants map to the same Nepali word: 'рдЦреБрд▓реЗрдХреЛ' тАФ Adding 2
ЁЯФБ Mismatch: Variant 'khuuleko' тЖТ Mapped 'рдЦреБрд▓реЗрдХреЛ', Expected ' рдЦреБрд▓реЗрдХреЛ'
ЁЯФБ Mismatch: Variant 'khuleko' тЖТ Mapped 'рдЦреБрд▓реЗрдХреЛ', Expected ' рдЦреБрд▓реЗрдХреЛ'
тЭМ Multiple mappings found: {'рд╣рд╛рдЗрдирд╛': 1, 'рдХреБрд▓рд┐рдЖрдирд╛': 1, 'рдЫреИрди': 2}
тЬЕ Keeping only the most frequent one: 'рдЫреИрди' тАФ Adding 2
ЁЯФБ Mismatch: Variant ''xhaina'' тЖТ Mapped 'рд╣рд╛рдЗрдирд╛', Expected ' рдЫреИрди'
ЁЯФБ Mismatch: Variant ''xiana'' тЖТ Mapped 'рдХреБрд▓рд┐рдЖрдирд╛', Expected ' рдЫреИрди'
ЁЯФБ Mismatch: Variant ''xaena'' тЖТ Mapped 'рдЫреИрди', Expected ' рдЫреИрди'
ЁЯФБ Mismatch: Variant 'chhaina' тЖТ Mapped 'рдЫреИрди', Expected ' рдЫреИрди'
тЬЕ All variants map to the same Nepali word: 'рдЯрд┐рдХреНрдЫ' тАФ Adding 2
ЁЯФБ Mismatch: Variant 'tikcha' тЖТ Mapped 'рдЯрд┐рдХреНрдЫ', Expected ' рдЯрд┐рдХреНрдЫ'
ЁЯФБ Mismatch: Variant 'tikxa' тЖТ Mapped 'рдЯрд┐рдХреНрдЫ', E

In [216]:

from collections import Counter

total = 0
accurate = 0
not_accurate = 0
NEPALI_DICTIONARY = saved_word_count

for nepali_word, variants in zip(filtered['Devanagari'], filtered['Words']):
    nepali_variations = []
    for variant in variants:
        total += 1
        latin_variations = generate_variations(variant)
        devanagari_variations = []
        for new_variant in latin_variations:
            nepali = transliterate(new_variant, sanscript.ITRANS, sanscript.DEVANAGARI)
            devanagari_variations.append(nepali)
        top_matched_words = rank_variants_by_similarity(devanagari_variations, NEPALI_DICTIONARY)
        nepali = top_matched_words[0][0] if top_matched_words else None
        nepali_variations.append((nepali, variant))  # store variant too for debugging

    # Find the most common mapped word
    mapped_words_only = [x[0] for x in nepali_variations]
    counter = Counter(mapped_words_only)
    most_common_word, count = counter.most_common(1)[0]

    # Now keep only those variants where mapped word == most_common_word
    filtered_nepali_variations = [
        (mapped_word, variant) for mapped_word, variant in nepali_variations if mapped_word == most_common_word
    ]

    if len(counter) == 1:
        # print(f"тЬЕ All variants map to the same Nepali word: '{most_common_word}' тАФ Adding {count}")
        accurate += count
    else:
        # print(f"тЭМ Multiple mappings found: {dict(counter)}")
        # print(f"тЬЕ Keeping only the most frequent one: '{most_common_word}' тАФ Adding {count}")
        accurate += count

    # Now check mismatches based on filtered mappings
    for mapped_word, variant in nepali_variations:
        if mapped_word != nepali_word:
            print(f"ЁЯФБ Mismatch: Variant '{variant}' тЖТ Mapped '{mapped_word}', Expected '{nepali_word}'")
            not_accurate += 1


print(f'accurate: {accurate}, total: {total}, percent: {accurate / total:.2f}')


тЬЕ All variants map to the same Nepali word: 'рдЦреБрд▓реЗрдХреЛ' тАФ Adding 2
ЁЯФБ Mismatch: Variant 'khuuleko' тЖТ Mapped 'рдЦреБрд▓реЗрдХреЛ', Expected ' рдЦреБрд▓реЗрдХреЛ'
ЁЯФБ Mismatch: Variant 'khuleko' тЖТ Mapped 'рдЦреБрд▓реЗрдХреЛ', Expected ' рдЦреБрд▓реЗрдХреЛ'
тЭМ Multiple mappings found: {"'рдЫреНрд╣рд╛рдЗрдирд╛'": 1, "'рдЫрд┐рдЖрдирд╛'": 1, "'рдЫреИрди'": 1, 'рдЫреИрди': 1}
тЬЕ Keeping only the most frequent one: ''рдЫреНрд╣рд╛рдЗрдирд╛'' тАФ Adding 1
ЁЯФБ Mismatch: Variant ''xhaina'' тЖТ Mapped ''рдЫреНрд╣рд╛рдЗрдирд╛'', Expected ' рдЫреИрди'
ЁЯФБ Mismatch: Variant ''xiana'' тЖТ Mapped ''рдЫрд┐рдЖрдирд╛'', Expected ' рдЫреИрди'
ЁЯФБ Mismatch: Variant ''xaena'' тЖТ Mapped ''рдЫреИрди'', Expected ' рдЫреИрди'
ЁЯФБ Mismatch: Variant 'chhaina' тЖТ Mapped 'рдЫреИрди', Expected ' рдЫреИрди'
тЬЕ All variants map to the same Nepali word: 'рдЯрд┐рдХреНрдЫ' тАФ Adding 2
ЁЯФБ Mismatch: Variant 'tikcha' тЖТ Mapped 'рдЯрд┐рдХреНрдЫ', Expected ' рдЯрд┐рдХреНрдЫ'
ЁЯФБ Mismatch: Varia

In [None]:
from collections import Counter
import unicodedata

total = 0
accurate = 0
not_accurate = 0
NEPALI_DICTIONARY = saved_word_count



def clean_text(text):
    if text is None:
        return None
    return unicodedata.normalize('NFC', text.strip())

for nepali_word, variants in zip(filtered['Devanagari'], filtered['Words']):
    nepali_variations = []
    for variant in variants:
        total += 1
        latin_variations = generate_variations(variant)
        devanagari_variations = []
        for new_variant in latin_variations:
            nepali = transliterate(new_variant, sanscript.ITRANS, sanscript.DEVANAGARI)
            devanagari_variations.append(nepali)
        top_matched_words = rank_variants_by_similarity(devanagari_variations, NEPALI_DICTIONARY)
        nepali = top_matched_words[0][0] if top_matched_words else None
        if clean_text(nepali) == clean_text(nepali_word):
            accurate +=1
        else:
            not_accurate +=1
            print(f'{variant}-->{nepali_word}-->{nepali}')

print(f'accurate: {accurate}, total: {total}, percent: {accurate / total:.2f}')
