#### Stemming in NLTK

In [1]:
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()

In [2]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

for word in words:
    print(word, "|", stemmer.stem(word))

eating | eat
eats | eat
eat | eat
ate | ate
adjustable | adjust
rafting | raft
ability | abil
meeting | meet


#### Lemmatization in Spacy

In [3]:
import spacy

In [4]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("eating eats eat ate adjustable rafting ability meeting better")
for token in doc:
    print(token, " | ", token.lemma_)

eating  |  eating
eats  |  eat
eat  |  eat
ate  |  eat
adjustable  |  adjustable
rafting  |  raft
ability  |  ability
meeting  |  meeting
better  |  well


In [5]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("Mando talked for 3 hours although talking isn't his thing")
for token in doc:
    print(token, " | ", token.lemma_)

Mando  |  mando
talked  |  talk
for  |  for
3  |  3
hours  |  hour
although  |  although
talking  |  talking
is  |  be
n't  |  not
his  |  his
thing  |  thing


#### Customizing lemmatizer

In [6]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [7]:
ar = nlp.get_pipe('attribute_ruler')

ar.add([[{"TEXT":"Bro"}],[{"TEXT":"Brah"}]],{"LEMMA":"Brother"})

doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")

for token in doc:
    print(token.text, "|", token.lemma_)

Bro | Brother
, | ,
you | you
wanna | wanna
go | go
? | ?
Brah | Brother
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


In [8]:
doc[6]

Brah

In [9]:
doc[6].lemma_

'Brother'

### Practice

#### 1. using stemming in nltk

In [10]:
words = ['running', 'painting', 'walking', 'dressing', 'likely', 'children', 'whom', 'good', 'ate', 'fishing']

In [11]:
import nltk
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()

In [12]:
# commenting because already downloaded
#nltk.download('all')

In [13]:
for word in words:
    print(f"{word} | {stemmer.stem(word)}")

running | run
painting | paint
walking | walk
dressing | dress
likely | like
children | children
whom | whom
good | good
ate | ate
fishing | fish


#### 2. using lemmatization in spacy

In [14]:
doc = nlp("running painting walking dressing likely children who good ate fishing")

In [15]:
for token in doc:
    print(token, " | ", token.lemma_)

running  |  run
painting  |  painting
walking  |  walking
dressing  |  dress
likely  |  likely
children  |  child
who  |  who
good  |  good
ate  |  eat
fishing  |  fishing


#### 3. convert the given text into it's base form using both stemming and lemmatization

In [16]:
text = """Latha is very multi talented girl.She is good at many skills like dancing, running, singing, playing.She also likes eating Pav Bhagi. she has a 
habit of fishing and swimming too.Besides all this, she is a wonderful at cooking too.
"""

#### using stemming

In [17]:
# Step 1: Word tokenizing
all_word_tokens = nltk.word_tokenize(text)
all_base_words = []

In [18]:
# Step 2: getting the base form for each token using stemmer
for token in all_word_tokens:
    base_form = stemmer.stem(token)
    all_base_words.append(base_form)

In [19]:
#step3: joining all words in a list into string using 'join()'
final_base_text = ' '.join(all_base_words)
final_base_text

'latha is veri multi talent girl.sh is good at mani skill like danc , run , sing , playing.sh also like eat pav bhagi . she ha a habit of fish and swim too.besid all thi , she is a wonder at cook too .'

#### using lemmatization

In [20]:
#step1: Creating the object for the given text
doc = nlp(text)
all_base_words = []

In [21]:
#step2: getting the base form for each token using spacy 'lemma_'
for token in doc:
    base_word =  token.lemma_
    all_base_words.append(base_word)

In [22]:
#step3: joining all words in a list into string using 'join()'
final_base_text = ' '.join(all_base_words)
print(final_base_text)

Latha be very multi talented girl . she be good at many skill like dancing , running , singing , play . she also like eat Pav Bhagi . she have a 
 habit of fishing and swim too . besides all this , she be a wonderful at cook too . 

