In [None]:
import snowballstemmer

# Create a stemmer for the Nepali language
try:
    nepali_stemmer = snowballstemmer.stemmer('nepali')
    print("✅ Nepali stemmer created successfully!")
except Exception as e:
    print(f"❌ Could not create stemmer. Is 'nepali' a supported language? Error: {e}")
    print("Supported languages are:", snowballstemmer.algorithms())

✅ Nepali stemmer created successfully!


In [None]:
# A sentence in Nepali with different word forms
nepali_sentence = "केटाहरू विद्यालयमा राम्रोसँग पढ्दैथिए"
# Translation: "The boys were studying well in school"

# Let's break it down into individual words
words_to_stem = [
    "केटाहरू",  # Boys (plural of केटा - boy)
    "विद्यालयमा", # In the school (from विद्यालय - school)
    "राम्रोसँग", # Nicely (from राम्रो - good)
    "पढ्दैथिए"   # Were studying (from पढ्नु - to study)
]

# Use the stemmer to find the root of each word
stemmed_words = nepali_stemmer.stemWords(words_to_stem)

# Print the results in a nice format
print("--- Nepali Stemming in Action ---\n")
print(f"{'Original Word':<15} | {'Stemmed Root'}")
print("-" * 30)

for original, stemmed in zip(words_to_stem, stemmed_words):
    print(f"{original:<15} | {stemmed}")

--- Nepali Stemming in Action ---

Original Word   | Stemmed Root
------------------------------
केटाहरू         | केटा
विद्यालयमा      | विद्यालय
राम्रोसँग       | राम्रो
पढ्दैथिए        | पढ्


In [3]:
# 2. A list of 20+ diverse Nepali words for testing
words_to_test = [
    # --- Nouns with Suffixes ---
    "किताबहरू",    # books (plural)
    "घरमा",          # in the house (case marker 'ma')
    "रामको",        # Ram's (case marker 'ko')
    "साथीलाई",      # to the friend (case marker 'lai')
    "शहरबाट",      # from the city (case marker 'bata')
    "मानिसले",      # by the person (case marker 'le')
    
    # --- Verbs with Different Tenses/Forms ---
    "गर्दैछु",        # am doing (present continuous)
    "खायो",          # ate (simple past)
    "जानेछ",        # will go (future)
    "पढ्नुहुन्छ",    # studies/reads (formal present)
    "खेलेका",       # have played (past participle)
    "हिँड्दैथिए",    # were walking (past continuous)
    "हाँस्यो",        # laughed (simple past)
    
    # --- Derived and Compound Words ---
    "सुन्दरता",      # beauty (derived from 'sundar' - beautiful)
    "नेपाली",        # Nepali (derived from 'Nepal')
    "मित्रता",      # friendship (derived from 'mitra' - friend)
    "छिटोसँग",      # quickly (adverbial form)
    "बालकपन",      # childhood
    
    # --- Words that should change less ---
    "विद्यालय",    # school (root word)
    "खुशी",          # happy 
    "आज"           # today
]

# 3. Stem the words
stemmed_results = nepali_stemmer.stemWords(words_to_test)

# 4. Print the results in a formatted table
print("--- Testing Nepali Stemmer with 20+ Examples ---")
print(f"{'No.':<4} | {'Original Word':<15} | {'Stemmed Root'}")
print("-" * 45)

for i, (original, stemmed) in enumerate(zip(words_to_test, stemmed_results), 1):
    print(f"{i:<4} | {original:<15} | {stemmed}")

print("\n--- Test Complete ---")

--- Testing Nepali Stemmer with 20+ Examples ---
No.  | Original Word   | Stemmed Root
---------------------------------------------
1    | किताबहरू        | किताब
2    | घरमा            | घर
3    | रामको           | राम
4    | साथीलाई         | सा
5    | शहरबाट          | शहरबाट
6    | मानिसले         | मानिस
7    | गर्दैछु         | गर्
8    | खायो            | खा
9    | जानेछ           | जा
10   | पढ्नुहुन्छ      | पढ्
11   | खेलेका          | खेल
12   | हिँड्दैथिए      | हिँड्
13   | हाँस्यो         | हाँस्
14   | सुन्दरता        | सुन्दरता
15   | नेपाली          | नेपाली
16   | मित्रता         | मित्रता
17   | छिटोसँग         | छिटो
18   | बालकपन          | बालकपन
19   | विद्यालय        | विद्यालय
20   | खुशी            | खुशी
21   | आज              | आज

--- Test Complete ---
