In [1]:
words=["eating","eats","eaten","writing","writes","programming","programs","finally","finalize"]

### Porter Stemmer

In [2]:
from nltk.stem import PorterStemmer

ps=PorterStemmer()

In [3]:
# apply porter stemmer to each word

for word in words:
    print(f"{word} --> {ps.stem(word)}")

eating --> eat
eats --> eat
eaten --> eaten
writing --> write
writes --> write
programming --> program
programs --> program
finally --> final
finalize --> final


In [4]:
# major disadvantage of stemming is that it can produce non-existent words
# e.g. congratulation is stemmed to congratul
ps.stem("congratulations")

'congratul'

### RegexpStemmer class

In [5]:
from nltk.stem import RegexpStemmer

reg_stemmer=RegexpStemmer("ing$|s$|e$|able$",min=4)

In [6]:
reg_stemmer.stem("eating")

'eat'

#### 'ing' was a regular expression, found in the word. So it is removed.

In [7]:
# if any word has length less than min, it is not stemmed
reg_stemmer.stem("was")

'was'

#### 's' from 'was' was not trimmed, because length of 'was'=3 (less than 4)

# Snowball Stemmer

In [8]:
# snowball stemmer is an improvement over porter stemmer
from nltk.stem import SnowballStemmer

snowball_stemmer=SnowballStemmer("english")

In [9]:
for word in words:
    print(f"{word} --> {snowball_stemmer.stem(word)}")

eating --> eat
eats --> eat
eaten --> eaten
writing --> write
writes --> write
programming --> program
programs --> program
finally --> final
finalize --> final


In [10]:
# let's  see how porter stemmer and snowball stemmer differ
# 'fairly' and 'sportingly' in porter stemmer
ps.stem("fairly"),ps.stem("sportingly")

('fairli', 'sportingli')

In [11]:
# for snowball stemmer
snowball_stemmer.stem("fairly"),snowball_stemmer.stem("sportingly")

('fair', 'sport')

In [None]:
snowball_stemmer.stem("congratulations")
# but the problem of non-existent words still persists

'congratul'