In [29]:
import nltk
from nltk.stem import PorterStemmer , LancasterStemmer , WordNetLemmatizer 
from nltk.stem.snowball import SnowballStemmer 
from nltk.tokenize import sent_tokenize, word_tokenize 
import spacy

nlp = spacy.load('en_core_web_sm') 
nltk.download("wordnet")

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\aakam\AppData\Roaming\nltk_data...


True

In [16]:
p_stemmer = PorterStemmer()

words = ['run','runner','running','ran','runs','easily','fairly'] 
for word in words: 
    print(word+' --> '+p_stemmer.stem(word)) 

run --> run
runner --> runner
running --> run
ran --> ran
runs --> run
easily --> easili
fairly --> fairli


In [6]:
s_stemmer = SnowballStemmer(language='english') 
words = ['run','runner','running','ran','runs','easily','fairly'] 
for word in words: 
    print(word+' --> '+s_stemmer.stem(word))

run --> run
runner --> runner
running --> run
ran --> ran
runs --> run
easily --> easili
fairly --> fair


In [7]:
words = ['generous','generation','generously','generate'] 
for word in words: 
    print(word+' --> '+s_stemmer.stem(word))

generous --> generous
generation --> generat
generously --> generous
generate --> generat


In [None]:
ps = PorterStemmer() 

words = ["is","was","be","been","are","were"] 

for w in words: 
    print(ps.stem(w)) 

is
wa
be
been
are
were


In [11]:
ls =  LancasterStemmer() 

for w in words: 
    print(ls.stem(w))

is
was
be
been
ar
wer


In [12]:
words = ["book","booking","booked","books","booker","bookstore"] 
for w in words: 
    print(ps.stem(w)) 
for w in words: 
    print(ls.stem(w))   

book
book
book
book
booker
bookstor
book
book
book
book
book
bookst


In [13]:
sentence = 'had you booked the air booking yet ? if not try to book it ASAP since \
booking will be out of books' 
words = word_tokenize(sentence) 
for w in words: 
    print(ps.stem(w))

had
you
book
the
air
book
yet
?
if
not
tri
to
book
it
asap
sinc
book
will
be
out
of
book


In [14]:
word_list = [
            "friend", "friendship", "friends", 
            "friendships","stabil","destabilize","misunderstanding",
            "railroad","moonlight","football"
        ] 

print("{0:20}{1:20}{2:20}".format("Word","Porter Stemmer","lancaster Stemmer")) 
for word in word_list: 
    print("{0:20}{1:20}{2:20}".format(word,ps.stem(word),ls.stem(word)))

Word                Porter Stemmer      lancaster Stemmer   
friend              friend              friend              
friendship          friendship          friend              
friends             friend              friend              
friendships         friendship          friend              
stabil              stabil              stabl               
destabilize         destabil            dest                
misunderstanding    misunderstand       misunderstand       
railroad            railroad            railroad            
moonlight           moonlight           moonlight           
football            footbal             footbal             


In [30]:
lemmatizer = WordNetLemmatizer() 
words = ["cats","cacti","radii","feet","speech",'runner'] 
 
for word in words :  
    print(lemmatizer.lemmatize(word))

cat
cactus
radius
foot
speech
runner


In [None]:
def show_lemmas(text): 
    for token in text: 
        print(f'{token.text:{20}} \t {token.pos_:{20}} \t {token.lemma:<{25}} \t {token.lemma_}') 

In [24]:
doc1 = nlp(u"I am a runner running in a race because I love to run since I ran today") 

for token in doc1: 
    print(f'{token.text:{20}} \t {token.pos_:{20}} \t {token.lemma:<{25}} \t {token.lemma_}') 


I                    	 PRON                 	 4690420944186131903       	 I
am                   	 AUX                  	 10382539506755952630      	 be
a                    	 DET                  	 11901859001352538922      	 a
runner               	 NOUN                 	 12640964157389618806      	 runner
running              	 VERB                 	 12767647472892411841      	 run
in                   	 ADP                  	 3002984154512732771       	 in
a                    	 DET                  	 11901859001352538922      	 a
race                 	 NOUN                 	 8048469955494714898       	 race
because              	 SCONJ                	 16950148841647037698      	 because
I                    	 PRON                 	 4690420944186131903       	 I
love                 	 VERB                 	 3702023516439754181       	 love
to                   	 PART                 	 3791531372978436496       	 to
run                  	 VERB                 	 12767647472892411841

In [25]:
doc2 = nlp(u"I saw eighteen mice today!") 
show_lemmas(doc2) 
doc3 = nlp(u"I am meeting him tomorrow at the meeting.") 
show_lemmas(doc3) 

I                    	 PRON                 	 4690420944186131903       	 I
saw                  	 VERB                 	 11925638236994514241      	 see
eighteen             	 NUM                  	 9609336664675087640       	 eighteen
mice                 	 NOUN                 	 1384165645700560590       	 mouse
today                	 NOUN                 	 11042482332948150395      	 today
!                    	 PUNCT                	 17494803046312582752      	 !
I                    	 PRON                 	 4690420944186131903       	 I
am                   	 AUX                  	 10382539506755952630      	 be
meeting              	 VERB                 	 6880656908171229526       	 meet
him                  	 PRON                 	 1655312771067108281       	 he
tomorrow             	 NOUN                 	 3573583789758258062       	 tomorrow
at                   	 ADP                  	 11667289587015813222      	 at
the                  	 DET                  	 742598569962