# **Author: Asad Tariq Sheikh**

In [1]:
import nltk
import spacy

In [2]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

# **Stemming: It will apply fix set of rules e.g: remove `ing`, `able`, `ity` (dumb)**

In [3]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]

for word in words:
    print(word, "|", stemmer.stem(word))

eating | eat
eats | eat
eat | eat
ate | ate
adjustable | adjust
rafting | raft
ability | abil
meeting | meet


# **Lemmatization: We use some real language model like `English Language Processing Pipeline` `en_core_web_sm`**

In [4]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("eating eats eat ate adjustable rafting ability meeting better")

for token in doc:
    print(token, "|", token.lemma_, "|", token.lemma)

eating | eat | 9837207709914848172
eats | eat | 9837207709914848172
eat | eat | 9837207709914848172
ate | eat | 9837207709914848172
adjustable | adjustable | 6033511944150694480
rafting | raft | 7154368781129989833
ability | ability | 11565809527369121409
meeting | meeting | 14798207169164081740
better | well | 4525988469032889948


# **Lemmatization: How to customize model?**
## **Slang Words e.g: `Bro`, `Brah`**

In [5]:
doc = nlp("Bro you wanna go? Brah, don't say no! I am exhausted")

for token in doc:
    print(token.text, "|", token.lemma_)

Bro | bro
you | you
wanna | wanna
go | go
? | ?
Brah | Brah
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


In [6]:
doc[0]

Bro

In [7]:
doc[0].lemma_

'bro'

In [8]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [9]:
ar = nlp.get_pipe("attribute_ruler")
ar

<spacy.pipeline.attributeruler.AttributeRuler at 0x7bb0257de800>

In [10]:
ar.add([[{"TEXT": "Bro"}], [{"TEXT": "Brah"}]], {"LEMMA": "Brother"})

# **Let's try again!**
## **Slang Words e.g: `Bro`, `Brah`**

In [11]:
doc = nlp("Bro you wanna go? Brah, don't say no! I am exhausted")

for token in doc:
    print(token.text, "|", token.lemma_)

Bro | Brother
you | you
wanna | wanna
go | go
? | ?
Brah | Brother
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust
