### Without Libraries 

In [None]:
'''
We will implement stemming (a data preprocessing technique)

1. Without Library 
2. With Library nltk,a popular Python package, provides many preprocessing tools for data in NLP. In our case, we will use PorterStemmer for stemming. 
'''

In [1]:
def porter_stemmer(word):
    if len(word) <= 2:
        return word

    step2_suffixes = ["ational", "tional", "enci", "anci", "izer", "bli", "alli", "entli", "eli", "ousli", "ization", "ation", "ator", "alism", "iveness", "fulness", "ousness", "aliti", "iviti", "biliti", "logi"]
    step3_suffixes = ["icate", "ative", "alize", "iciti", "ical", "ful", "ness"]

    # Step 1a
    if word.endswith("sses"):
        word = word[:-2]
    elif word.endswith("ies"):
        word = word[:-2]
    elif word.endswith("ss"):
        pass
    elif word.endswith("s"):
        word = word[:-1]

    # Step 1b
    if word.endswith("eed"):
        if len(word[:-3]) > 0 and word[:-3][-1] in "aeiou":
            word = word[:-1]
    elif word.endswith(("ed", "ing")):
        for suffix in ["ed", "ing"]:
            if word.endswith(suffix):
                if any(char in word[:-len(suffix)] for char in "aeiou"):
                    word = word[:-len(suffix)]
                    break

    # Step 1c
    if word.endswith("y"):
        if len(word[:-1]) > 0 and word[:-1][-1] in "aeiou":
            word = word[:-1]

    # Step 2
    for suffix in step2_suffixes:
        if word.endswith(suffix):
            if len(word[:-len(suffix)]) > 0 and word[:-len(suffix)][-1] in "aeiou":
                word = word[:-len(suffix)]
            break

    # Step 3
    for suffix in step3_suffixes:
        if word.endswith(suffix):
            if len(word[:-len(suffix)]) > 0 and word[:-len(suffix)][-1] in "aeiou":
                word = word[:-len(suffix)]
            break

    # Step 4
    if word.endswith("sion") or word.endswith("tion"):
        if len(word[:-3]) > 0 and word[:-3][-1] in "aeiou":
            word = word[:-3]

    # Step 5a
    if word.endswith("e"):
        if len(word[:-1]) > 1 and word[:-1][-1] in "aeiou":
            word = word[:-1]

    # Step 5b
    if word.endswith("ll") and len(word[:-1]) > 1 and word[:-1][-1] in "aeiou":
        word = word[:-1]

    return word


In [16]:
if __name__ == "__main__":
    words_to_stem = ["Jumping", "Barking", "Helping", "Learning", "Stemming", "Beautiful", "Engineering"]
    for word in words_to_stem:
        print(f"{word} -> {porter_stemmer(word)}")

Jumping -> Jump
Barking -> Bark
Helping -> Help
Learning -> Learn
Stemming -> Stemm
Beautiful -> Beauti
Engineering -> Engineer


### With Libraries 

In [7]:
import nltk
from nltk.stem import PorterStemmer

In [17]:
def porter_stemmer_nltk(word):
    porter_stemmer = PorterStemmer()
    return porter_stemmer.stem(word)

if __name__ == "__main__":
    words_to_stem = ["Jumping", "Barking", "Helping", "Learning", "Stemming", "Beautiful", "Engineering"]
    for word in words_to_stem:
        print(f"{word} -> {porter_stemmer_nltk(word)}")

Jumping -> jump
Barking -> bark
Helping -> help
Learning -> learn
Stemming -> stem
Beautiful -> beauti
Engineering -> engin
