**Stemming in NLTK**

In [1]:
from nltk.stem import PorterStemmer
stemmer=PorterStemmer()


In [2]:
words=['eating','eats','eat','ate','adjustable','rafting','ability','meeting']
for word in words:
  print(word,'|',stemmer.stem(word))

eating | eat
eats | eat
eat | eat
ate | ate
adjustable | adjust
rafting | raft
ability | abil
meeting | meet


**Lemmmatiztion in Spacy**

In [3]:
import spacy

In [4]:
nlp=spacy.load('en_core_web_sm')

In [5]:
doc = nlp("Mando talked for 3 hours although talking isn't his thing")
doc = nlp("eating eats eat ate adjustable rafting ability meeting better")

In [6]:
for token in doc:
  print(token,'|',token.lemma_)

eating | eat
eats | eat
eat | eat
ate | eat
adjustable | adjustable
rafting | raft
ability | ability
meeting | meeting
better | well


**Customizing Lemmatizer**

In [7]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [10]:
doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")
for token in doc:
  print(token.text,'|',token.lemma_)

Bro | bro
, | ,
you | you
wanna | wanna
go | go
? | ?
Brah | Brah
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


In [13]:
nlp.get_pipe('attribute_ruler')
ar.add([[{"TEXT":"Bro"}],[{"TEXT":"Brah"}]],{'LEMMA':'Brother'})
doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")
for token in doc:
  print(token.text,'|',token.lemma_)

Bro | Brother
, | ,
you | you
wanna | wanna
go | go
? | ?
Brah | Brother
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


In [14]:
doc[6]

Brah

In [15]:
doc[6].lemma_

'Brother'

**Examples**

In [18]:
import nltk
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
nltk.download('all')
import spacy
spacy.load('en_core_web_sm')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping grammars/basque_grammars.zip.
[nltk_data]    | Downloading package bcp47 to /root/nltk_data...
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   U

<spacy.lang.en.English at 0x7bcf0061a9e0>

In [20]:
#using stemming in nltk
lst_words = ['running', 'painting', 'walking', 'dressing', 'likely', 'children', 'whom', 'good', 'ate', 'fishing']
for word in lst_words:
  print(word,'|',stemmer.stem(word))

running | run
painting | paint
walking | walk
dressing | dress
likely | like
children | children
whom | whom
good | good
ate | ate
fishing | fish


In [26]:
#Using lemmatization in spacy
doc=nlp('running painting walking dressing likely children whom good ate fishing')
for word in doc:
  print(word,'|',word.lemma_)

running | run
painting | paint
walking | walk
dressing | dress
likely | likely
children | child
whom | whom
good | good
ate | eat
fishing | fishing


In [27]:
text = """Latha is very multi talented girl.She is good at many skills like dancing, running, singing, playing.She also likes eating Pav Bhagi. she has a
habit of fishing and swimming too.Besides all this, she is a wonderful at cooking too.
"""

In [31]:
#Using stemming in nltk
all_base_words=[]
for words in text:
  base_form=stemmer.stem(words)
  all_base_words.append(base_form)
  final_base_text=''.join(all_base_words)
final_base_text


'latha is very multi talented girl.she is good at many skills like dancing, running, singing, playing.she also likes eating pav bhagi. she has a \nhabit of fishing and swimming too.besides all this, she is a wonderful at cooking too.\n'

In [37]:
#Using lemmatization in spacy
doc=nlp(text)
all_base_words=[]
for words in doc:
  base_form = words.lemma_
  all_base_words.append(base_form)
  final_base_text=' '.join(all_base_words)
final_base_text

'Latha be very multi talented girl . she be good at many skill like dancing , running , singing , play . she also like eat Pav Bhagi . she have a \n habit of fishing and swim too . besides all this , she be a wonderful at cook too . \n'