In [None]:
import re
import nltk
from nltk.stem import PorterStemmer

nltk.download('punkt')

porter = PorterStemmer()

In [None]:

def porter_stemmer(word):
    def apply_rule(word, suffix, replacement, condition):
        if condition(word):
            stem = re.sub(suffix + "$", replacement, word)
            return stem
        return word

    def step1a(word):
        if word.endswith("sses"):
            return word[:-2]
        elif word.endswith("ies"):
            return word[:-2]
        elif word.endswith("ss"):
            return word
        elif word.endswith("s"):
            return word[:-1]
        return word

    def step1b(word):
        if word.endswith("eed"):
            if re.search(r"eed$", word) and len(word) > 4:
                return word[:-1]
        elif word.endswith("ed"):
            if re.search(r"ed$", word):
                return apply_rule(word[:-2], "ed", "", lambda w: re.search(r"[aeiouy]", w))
        elif word.endswith("ing"):
            if re.search(r"ing$", word):
                return apply_rule(word[:-3], "ing", "", lambda w: re.search(r"[aeiouy]", w))
        return word

    def step1c(word):
        if word.endswith("y"):
            if re.search(r"y$", word):
                return apply_rule(word[:-1], "y", "i", lambda w: re.search(r"[aeiouy]", w))
        return word

    def step2(word):
        if re.search(r"(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti)$", word):
            return apply_rule(word, r"(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti)$", "", lambda w: re.search(r"[aeiouy]", w))
        return word

    def step3(word):
        if re.search(r"(icate|ative|alize|iciti|ical|ful|ness)$", word):
            return apply_rule(word, r"(icate|ative|alize|iciti|ical|ful|ness)$", "", lambda w: re.search(r"[aeiouy]", w))
        return word

    def step4(word):
        if re.search(r"(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ism|ate|iti|ous|ive|ize)$", word):
            return apply_rule(word, r"(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ism|ate|iti|ous|ive|ize)$", "", lambda w: len(w) > 1)
        elif word.endswith("sion") or word.endswith("tion"):
            return apply_rule(word, r"(sion|tion)$", "", lambda w: re.search(r"[aeiouy]", w))
        return word

    def step5a(word):
        if re.search(r"e$", word):
            return apply_rule(word[:-1], "e", "", lambda w: m(w) > 1 or (m(w) == 1 and not cvc(w[:-1])))
        return word

    def step5b(word):
        if m(word) > 1 and re.search(r"ll$", word):
            return word[:-1]
        return word

    def m(word):
        return len(re.findall(r"[aeiouy]+", word))

    def cvc(word):
        return re.search(r"[aeiouy][^aeiouy][aeiouy]$", word) and not re.search(r"[wxy]$", word)

    word = word.lower()
    word = step1a(word)
    word = step1b(word)
    word = step1c(word)
    word = step2(word)
    word = step3(word)
    word = step4(word)
    word = step5a(word)
    word = step5b(word)

    return word


In [None]:
# Example usage
words = ["ruined", "filler", "studies", "happier", "agreement", "saying", "strange"]
for word in words:
    print(f"{word}: {porter_stemmer(word)}")



ruined: ruin
filler: fill
studies: studi
happier: happi
agreement: agr
saying: sa
strange: strang


In [None]:
# Apply Porter stemmer
for word in words:
    stemmed_word = porter.stem(word)
    print(f"{word}: {stemmed_word}")


ruined: ruin
filler: filler
studies: studi
happier: happier
agreement: agreement
saying: say
strange: strang
