### Stemming and Lemmatization

In [1]:
import spacy
import nltk

### Stemming 

In [8]:
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()


In [10]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting", "enjoying", "fencing"]
for word in words:
    print(word, " | ", stemmer.stem(word))

eating  |  eat
eats  |  eat
eat  |  eat
ate  |  ate
adjustable  |  adjust
rafting  |  raft
ability  |  abil
meeting  |  meet
enjoying  |  enjoy
fencing  |  fenc


#### Stemming with Lemmatization to make correct base words

In [23]:
# load a pre-trained English pipeline
nlp = spacy.load("en_core_web_sm")

doc = nlp("eating eats eat ate adjustable rafting ability meeting enjoying fencing")
for token in doc:
    print(token.text, " | ", token.lemma_)

eating  |  eat
eats  |  eat
eat  |  eat
ate  |  eat
adjustable  |  adjustable
rafting  |  raft
ability  |  ability
meeting  |  meeting
enjoying  |  enjoy
fencing  |  fence


In [26]:
nlp = spacy.load("en_core_web_sm")

doc = nlp("He was working until late night, then he reached home at morning when the work was finished, all of the workers wrre tired but they wanted to complete their duty before they leave. The manager then paid them with a more wage they expected to receive.")
for token in doc:
    print(token.text, " | ", token.lemma_)

He  |  he
was  |  be
working  |  work
until  |  until
late  |  late
night  |  night
,  |  ,
then  |  then
he  |  he
reached  |  reach
home  |  home
at  |  at
morning  |  morning
when  |  when
the  |  the
work  |  work
was  |  be
finished  |  finish
,  |  ,
all  |  all
of  |  of
the  |  the
workers  |  worker
wrre  |  wrre
tired  |  tired
but  |  but
they  |  they
wanted  |  want
to  |  to
complete  |  complete
their  |  their
duty  |  duty
before  |  before
they  |  they
leave  |  leave
.  |  .
The  |  the
manager  |  manager
then  |  then
paid  |  pay
them  |  they
with  |  with
a  |  a
more  |  more
wage  |  wage
they  |  they
expected  |  expect
to  |  to
receive  |  receive
.  |  .


In [28]:
# load a pre-trained English pipeline
nlp = spacy.load("en_core_web_sm")
doc = nlp("Despite the researchers’ ongoing investigations into adjustable methodologies and evolving frameworks, the committee reconvened to reassess previously established findings, debating the applicability of theoretical constructs and the feasibility of implementing scalable solutions. Participants were analyzing, negotiating, and documenting their observations while simultaneously coordinating interdisciplinary meetings, forecasting potential disruptions, and evaluating the sustainability of proposed interventions. Activities such as brainstorming, prototyping, and reengineering were enthusiastically embraced, although some members expressed reservations about the practicality of integrating emerging technologies into legacy systems.")
for token in doc:
    print(token.text, " | ", token.lemma_)

Despite  |  despite
the  |  the
researchers  |  researcher
’  |  '
ongoing  |  ongoing
investigations  |  investigation
into  |  into
adjustable  |  adjustable
methodologies  |  methodology
and  |  and
evolving  |  evolve
frameworks  |  framework
,  |  ,
the  |  the
committee  |  committee
reconvened  |  reconvene
to  |  to
reassess  |  reassess
previously  |  previously
established  |  establish
findings  |  finding
,  |  ,
debating  |  debate
the  |  the
applicability  |  applicability
of  |  of
theoretical  |  theoretical
constructs  |  construct
and  |  and
the  |  the
feasibility  |  feasibility
of  |  of
implementing  |  implement
scalable  |  scalable
solutions  |  solution
.  |  .
Participants  |  participant
were  |  be
analyzing  |  analyze
,  |  ,
negotiating  |  negotiate
,  |  ,
and  |  and
documenting  |  document
their  |  their
observations  |  observation
while  |  while
simultaneously  |  simultaneously
coordinating  |  coordinate
interdisciplinary  |  interdisciplina

### custom Attribute ruler 

In [30]:
nlp = spacy.load("en_core_web_sm")
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [39]:
nlp = spacy.load("en_core_web_sm")

ar = nlp.get_pipe('attribute_ruler')

#ar.add([[{"TEXT": "bro"}], {"LEMMA": "brother"}])
ar.add(patterns=[[{"TEXT": "bro"}], [{"TEXT": "Bra"}]],attrs={"LEMMA": "brother"})
ar.add(patterns=[[{"TEXT": "wanna"}]],attrs={"LEMMA": "want to"})
ar.add(patterns=[[{"TEXT": "gonna"}]],attrs={"LEMMA": " going to"})
ar.add(patterns=[[{"TEXT": "gotta"}]],attrs={"LEMMA": "got to"})        
ar.add(patterns=[[{"TEXT": "lemme"}]],attrs={"LEMMA": "let me"})
ar.add(patterns=[[{"TEXT": "kinda"}]],attrs={"LEMMA": "kind of"})
ar.add(patterns=[[{"TEXT": "outta"}]],attrs={"LEMMA": "out of"})
ar.add(patterns=[[{"TEXT": "cuz"}]],attrs={"LEMMA": "   because"})
ar.add(patterns=[[{"TEXT": "chill"}]],attrs={"LEMMA": " relax"})
ar.add(patterns=[[{"TEXT": "brews"}]],attrs={"LEMMA": " beers"})
ar.add(patterns=[[{"TEXT": "hit up"}]],attrs={"LEMMA": " visit"})
ar.add(patterns=[[{"TEXT": "joint"}]],attrs={"LEMMA": " place"})
ar.add(patterns=[[{"TEXT": "sick"}]],attrs={"LEMMA": " great"})
ar.add(patterns=[[{"TEXT": "bounce"}]],attrs={"LEMMA": " go"})
ar.add(patterns=[[{"TEXT": "wreck"}]],attrs={"LEMMA": " defeat"})




doc = nlp("Yo bro, I was gonna hit up that new joint downtown with Bra, but he said he’s outta town for the weekend. I was like, “Cuz, you gotta be kidding me!” I really wanna chill, grab some brews, and wreck some noobs on the PS5. Lemme know if you're down — it's been a minute since we had a proper bro night. The place has sick deals on wings, and the vibe is kinda unreal. I heard they even added a rooftop lounge, which sounds great for kicking back. After that, we can bounce to my place, maybe watch a movie or just relax and talk. Honestly, I’m exhausted from work and need to unwind. You in or what?")

for token in doc:
    print(token.text, " | ", token.lemma_)  

Yo  |  Yo
bro  |  brother
,  |  ,
I  |  I
was  |  be
gon  |  go
na  |  to
hit  |  hit
up  |  up
that  |  that
new  |  new
joint  |   place
downtown  |  downtown
with  |  with
Bra  |  brother
,  |  ,
but  |  but
he  |  he
said  |  say
he  |  he
’s  |  ’
outta  |  out of
town  |  town
for  |  for
the  |  the
weekend  |  weekend
.  |  .
I  |  I
was  |  be
like  |  like
,  |  ,
“  |  "
Cuz  |  cuz
,  |  ,
you  |  you
got  |  get
ta  |  to
be  |  be
kidding  |  kid
me  |  I
!  |  !
”  |  "
I  |  I
really  |  really
wanna  |  want to
chill  |   relax
,  |  ,
grab  |  grab
some  |  some
brews  |   beers
,  |  ,
and  |  and
wreck  |   defeat
some  |  some
noobs  |  noob
on  |  on
the  |  the
PS5  |  PS5
.  |  .
Lemme  |  Lemme
know  |  know
if  |  if
you  |  you
're  |  be
down  |  down
—  |  —
it  |  it
's  |  be
been  |  be
a  |  a
minute  |  minute
since  |  since
we  |  we
had  |  have
a  |  a
proper  |  proper
bro  |  brother
night  |  night
.  |  .
The  |  the
place  |  place
has  |  hav

# Worked on Stemming and Lemmatization.