## Stemming And Lemmatizations

In [1]:
import nltk
import spacy

### Stemming

In [4]:
from nltk.stem import PorterStemmer

stemmer=PorterStemmer()

In [5]:
words=['eating','eats','eat','ate','adjustable','rafting','ability','meeting','went','goes']

for word in words:
    print(word," | ",stemmer.stem(word))

eating  |  eat
eats  |  eat
eat  |  eat
ate  |  ate
adjustable  |  adjust
rafting  |  raft
ability  |  abil
meeting  |  meet
went  |  went
goes  |  goe


### Lemmatization

In [9]:
nlp=spacy.load("en_core_web_sm")

doc=nlp("eating eats ate adjustable rafting ability meeting better went goes")

for token in doc:
    print(token,"|",token.lemma_)

eating | eat
eats | eat
ate | eat
adjustable | adjustable
rafting | raft
ability | ability
meeting | meeting
better | well
went | go
goes | go


In [10]:
nlp=spacy.load("en_core_web_sm")

doc=nlp("eating eats ate adjustable rafting ability meeting better went goes")

for token in doc:
    print(token,"|",token.lemma_,"|",token.lemma) # token.lemma is unique values for every one same word

eating | eat | 9837207709914848172
eats | eat | 9837207709914848172
ate | eat | 9837207709914848172
adjustable | adjustable | 6033511944150694480
rafting | raft | 7154368781129989833
ability | ability | 11565809527369121409
meeting | meeting | 14798207169164081740
better | well | 4525988469032889948
went | go | 8004577259940138793
goes | go | 8004577259940138793


In [34]:

doc=nlp("Mando talked for 3 hours althoungh talking isn't his thing he became talkative")
doc
for token in doc:
    print(token,"|",token.lemma_)

Mando | Mando
talked | talk
for | for
3 | 3
hours | hour
althoungh | althoungh
talking | talking
is | be
n't | not
his | his
thing | thing
he | he
became | become
talkative | talkative


In [14]:
doc=nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")

for token in doc:
    print(token.text,"|",token.lemma_)

Bro | bro
, | ,
you | you
wanna | wanna
go | go
? | ?
Brah | Brah
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


In [13]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [16]:
ar=nlp.get_pipe('attribute_ruler')

ar.add([[{"TEXT":"Bro"}],[{"TEXT":"Brah"}]],{"LEMMA":"Brother"})

doc=nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")

for token in doc:
    print(token.text,"|",token.lemma_)

Bro | Brother
, | ,
you | you
wanna | wanna
go | go
? | ?
Brah | Brother
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


####  Exercise1:

Convert these list of words into base form using Stemming and Lemmatization and observe the transformations
Write a short note on the words that have different base words using stemming and Lemmatization

In [17]:
#using stemming in nltk
lst_words = ['running', 'painting', 'walking', 'dressing', 'likely', 'children', 'whom', 'good', 'ate', 'fishing']

for word in lst_words:
    print(word," | ",stemmer.stem(word))

running  |  run
painting  |  paint
walking  |  walk
dressing  |  dress
likely  |  like
children  |  children
whom  |  whom
good  |  good
ate  |  ate
fishing  |  fish


In [18]:
#using lemmatization in spacy

doc = nlp("running painting walking dressing likely children who good ate fishing")

for token in doc:
    print(token.text,"|",token.lemma_)

running | run
painting | paint
walking | walk
dressing | dress
likely | likely
children | child
who | who
good | good
ate | eat
fishing | fishing


#### Exercise2:

convert the given text into it's base form using both stemming and lemmatization

In [54]:
text = """Latha is very multi talented girl.She is good at many skills like dancing, running, singing, playing.She also likes eating Pav Bhagi. she has a 
habit of fishing and swimming too.Besides all this, she is a wonderful at cooking too.
"""
#using stemming in nltk

#step1: Word tokenizing

tokens=nltk.word_tokenize(text)

#step2: getting the base form for each token using stemmer
base_word=[]
for token in tokens:
    bs=stemmer.stem(token)
    base_word.append(bs)

#step3: joining all words in a list into string using 'join()'
final_text = ' '.join(base_word)
print(final_text)

latha is veri multi talent girl.sh is good at mani skill like danc , run , sing , playing.sh also like eat pav bhagi . she ha a habit of fish and swim too.besid all thi , she is a wonder at cook too .


In [55]:
#using lemmatisation in spacy


#step1: Creating the object for the given text

doc=nlp(text)

#step2: getting the base form for each token using spacy 'lemma_'
base_words=[]

for word in doc:
    bs=word.lemma_
    base_words.append(bs)

#step3: joining all words in a list into string using 'join()'
final_texts=' '.join(base_words)
print(final_texts)

Latha be very multi talented girl . she be good at many skill like dancing , running , singing , play . she also like eat Pav Bhagi . she have a 
 habit of fishing and swim too . besides all this , she be a wonderful at cook too . 

