In [59]:
import spacy
nlp = spacy.load('en_core_web_sm')
doc2 = nlp(u"Hello. We are performing stemming")
for t in doc2:
    print(t)

Hello
.
We
are
performing
stemming


In [60]:
doc3 = nlp(u'The PKR is nowdays 145$')
for t in doc3:
    print(t)

The
PKR
is
nowdays
145
$


In [61]:
doc4 = nlp(u"Let's visit Pir. Chinasi in the upcoming year.")
for t in doc4:
    print(t)

Let
's
visit
Pir
.
Chinasi
in
the
upcoming
year
.


In [62]:
 # ----   Porter Stemmer   ------

In [63]:
# Import the toolkit and the full Porter Stemmer library
import nltk
from nltk.stem.porter import *

In [64]:
p_stemmer = PorterStemmer()

In [65]:
x="Flyin"
print(p_stemmer.stem(x))

flyin


In [66]:
x=" Flying high in the sky"
print(p_stemmer.stem(x))

 flying high in the ski


In [67]:
x="Flying high in the ski"
for i in x.split():
    print(p_stemmer.stem(i))

fli
high
in
the
ski


In [68]:
words = ['run','runner','running','ran','runs','easily','fairly']
for word in words:
    print(word+' --> '+p_stemmer.stem(word))

run --> run
runner --> runner
running --> run
ran --> ran
runs --> run
easily --> easili
fairly --> fairli


In [69]:
#SnowballStemmer
from nltk.stem.snowball import SnowballStemmer
# The Snowball Stemmer requires that you pass a language parameter
s_stemmer = SnowballStemmer(language='english')

In [70]:
words = ['run','runner','running','ran','runs','easily','fairly']

In [71]:
for word in words:
    print(word+' --> '+s_stemmer.stem(word))

run --> run
runner --> runner
running --> run
ran --> ran
runs --> run
easily --> easili
fairly --> fair


In [72]:
x = ['generous','generation','generously','generate', 'generated' ]
for i in x:
    print(i +' --> '+s_stemmer.stem(i))


generous --> generous
generation --> generat
generously --> generous
generate --> generat
generated --> generat


In [73]:
# ----Do Some more practice -----

In [74]:
word = ['consolingly']

In [75]:
print('Porter Stemmer:')
for i in word:
    print(i+' --> '+p_stemmer.stem(i))

Porter Stemmer:
consolingly --> consolingli


In [76]:
print('SnowBall Stemmer:')
for i in word:
    print(i+' --> '+s_stemmer.stem(i))

SnowBall Stemmer:
consolingly --> consol


In [77]:
x = ['Looked', 'were','surprisingly', 'waiting', 'anxiousness']
for i in x:
    print(i + '--> ' + p_stemmer.stem(i))
    print(i + '--> ' + s_stemmer.stem(i))

Looked--> look
Looked--> look
were--> were
were--> were
surprisingly--> surprisingli
surprisingly--> surpris
waiting--> wait
waiting--> wait
anxiousness--> anxious
anxiousness--> anxious


In [78]:
x = ['heartedly', 'hurting',' enumoruos', 'jolly', 'surprising-ly', 'jolly', 'better', 'was']

In [79]:
for i in x:
    print(i + '--> ' + p_stemmer.stem(i))

heartedly--> heartedli
hurting--> hurt
 enumoruos-->  enumoruo
jolly--> jolli
surprising-ly--> surprising-li
jolly--> jolli
better--> better
was--> wa


In [80]:
for i in x:
    print(i + '--> ' + s_stemmer.stem(i))

heartedly--> heart
hurting--> hurt
 enumoruos-->  enumoruo
jolly--> jolli
surprising-ly--> surprising-li
jolly--> jolli
better--> better
was--> was


In [81]:
# Stemming has its drawbacks. If given the token saw, stemming might always return saw, 
#whereas lemmatization would likely return either
# see or saw depending on whether the use of the token was as a verb or a noun. As an example, consider the following:

phrase = 'I went to meet her tomorrow at the meeting'
for word in phrase.split():
    print(word+' --> '+p_stemmer.stem(word))

I --> i
went --> went
to --> to
meet --> meet
her --> her
tomorrow --> tomorrow
at --> at
the --> the
meeting --> meet


In [82]:
# Perform standard imports:
import spacy
nlp = spacy.load('en_core_web_sm')

In [83]:
var1 = nlp(u"John Adam is one the researcher who invent the direction of way towards success!")

for token in var1:
    print(token.text, '\t', token.pos_, '\t', token.lemma, '\t', token.lemma_)

John 	 PROPN 	 11174346320140919546 	 John
Adam 	 PROPN 	 14264057329400597350 	 Adam
is 	 AUX 	 10382539506755952630 	 be
one 	 NUM 	 17454115351911680600 	 one
the 	 DET 	 7425985699627899538 	 the
researcher 	 NOUN 	 1317581537614213870 	 researcher
who 	 PRON 	 3876862883474502309 	 who
invent 	 VERB 	 5373681334090504585 	 invent
the 	 DET 	 7425985699627899538 	 the
direction 	 NOUN 	 895834437038626927 	 direction
of 	 ADP 	 886050111519832510 	 of
way 	 NOUN 	 6878210874361030284 	 way
towards 	 ADP 	 9315050841437086371 	 towards
success 	 NOUN 	 16089821935113899987 	 success
! 	 PUNCT 	 17494803046312582752 	 !


In [89]:
def show_lemmas(text):
    print(f'{"text":{12}} {"pos":{6}} {"lemma_address":<{22}} {"lemma"} \n')
    for token in text:
        print(f'{token.text:{12}} {token.pos_:{6}} {token.lemma:<{22}} {token.lemma_}')

In [90]:
var2 = nlp(u"John Adam is one the researcher who invent the direction of way towards success!")
show_lemmas(var2)

text         pos    lemma_address          lemma 

John         PROPN  11174346320140919546   John
Adam         PROPN  14264057329400597350   Adam
is           AUX    10382539506755952630   be
one          NUM    17454115351911680600   one
the          DET    7425985699627899538    the
researcher   NOUN   1317581537614213870    researcher
who          PRON   3876862883474502309    who
invent       VERB   5373681334090504585    invent
the          DET    7425985699627899538    the
direction    NOUN   895834437038626927     direction
of           ADP    886050111519832510     of
way          NOUN   6878210874361030284    way
towards      ADP    9315050841437086371    towards
success      NOUN   16089821935113899987   success
!            PUNCT  17494803046312582752   !


In [22]:
var3 = nlp(u"I am meeting him tomorrow at the meeting.")
show_lemmas(var3)

I            PRON   561228191312463089     -PRON-
am           AUX    10382539506755952630   be
meeting      VERB   6880656908171229526    meet
him          PRON   561228191312463089     -PRON-
tomorrow     NOUN   3573583789758258062    tomorrow
at           ADP    11667289587015813222   at
the          DET    7425985699627899538    the
meeting      NOUN   14798207169164081740   meeting
.            PUNCT  12646065887601541794   .


In [23]:
var4 = nlp(u"That's of the greate person in the world")
show_lemmas(var4)

That         DET    4380130941430378203    that
's           AUX    10382539506755952630   be
of           ADP    886050111519832510     of
the          DET    7425985699627899538    the
greate       ADJ    4429768169814447593    greate
person       NOUN   14800503047316267216   person
in           ADP    3002984154512732771    in
the          DET    7425985699627899538    the
world        NOUN   1703489418272052182    world


In [1]:
# Import the toolkit and the full Porter Stemmer library
import nltk
from nltk.stem.porter import *
#SnowballStemmer
from nltk.stem.snowball import SnowballStemmer

In [2]:
# The Snowball Stemmer requires that you pass a language parameter
s_stemmer = SnowballStemmer(language='english')
p_stemmer = PorterStemmer()

In [58]:
x = ['Looked', 'were', 'waiting', 'anxiousness','surprisingly']
print(" Porter Stemmer\t Output \t\tSnowball Stemmer  Output \t\n  ")
for i in x:
    print(f'{i:{15}} {p_stemmer.stem(i):{25}} {i:{15}} {s_stemmer.stem(i):{25}}')
    #print(i + '--> ' + p_stemmer.stem(i) + i + '--> ' + s_stemmer.stem(i))

 Porter Stemmer	 Output 		Snowball Stemmer  Output 	
  
Looked          look                      Looked          look                     
were            were                      were            were                     
waiting         wait                      waiting         wait                     
anxiousness     anxious                   anxiousness     anxious                  
surprisingly    surprisingli              surprisingly    surpris                  
