In [44]:
import nltk
import pandas as pd

stemmer_names = [func for func in dir(nltk) if func.endswith('Stemmer')]
print(stemmer_names)
lemmatizer_names = [func for func in dir(nltk) if func.endswith('Lemmatizer')]
print(lemmatizer_names)

['ISRIStemmer', 'LancasterStemmer', 'PorterStemmer', 'RSLPStemmer', 'RegexpStemmer', 'SnowballStemmer']
['WordNetLemmatizer']


In [45]:
words = ['describes', 'describe', 'descriptor', 'description']
isri = nltk.ISRIStemmer()
lancaster = nltk.LancasterStemmer()
porter = nltk.PorterStemmer()
rslp = nltk.RSLPStemmer()
snowball = nltk.SnowballStemmer('english')

stemmers = [lancaster, porter, rslp, snowball]

wordnet = nltk.WordNetLemmatizer()

## Compare all stemmers of interest

In [48]:
# Build list of stemmed words
# Using list first is computationally cheaper than DataFrame
ls = []
for word in words:
    new_row = [stemmer.stem(word) for stemmer in stemmers]
    new_row.append(wordnet.lemmatize(word))
    # Prepend word
    new_row.insert(0, word)
    ls.append(new_row)
print(ls)

[['describes', 'describ', 'describ', 'describ', 'describ', 'describes'], ['describe', 'describ', 'describ', 'describ', 'describ', 'describe'], ['descriptor', 'describ', 'descriptor', 'descrip', 'descriptor', 'descriptor'], ['description', 'describ', 'descript', 'description', 'descript', 'description']]


In [49]:
columns = [stemmer.__class__.__name__ for stemmer in stemmers]
columns.append('WordNetLemmatizer')
columns.insert(0, 'Word')
df = pd.DataFrame(ls, columns=columns)
print(df)

          Word LancasterStemmer PorterStemmer  RSLPStemmer SnowballStemmer  \
0    describes          describ       describ      describ         describ   
1     describe          describ       describ      describ         describ   
2   descriptor          describ    descriptor      descrip      descriptor   
3  description          describ      descript  description        descript   

  WordNetLemmatizer  
0         describes  
1          describe  
2        descriptor  
3       description  


Result shows that Lancaster produces desired output i.e. the same stem for all supplied variations.
