In [11]:
import spacy

In [12]:
nlp = spacy.blank('en')
doc = nlp("The sun slowly set over the tranquil horizon, casting a golden glow across the shimmering waves. Seagulls glided gracefully through the air, their cries echoing in the distance. As the evening grew darker, the stars emerged one by one, dotting the night sky like diamonds. A gentle breeze rustled through the leaves, carrying with it the scent of freshly bloomed flowers. It was a peaceful scene, a moment of serenity amidst the chaos of everyday life.")

In [13]:
for token in doc:
    print(token)

The
sun
slowly
set
over
the
tranquil
horizon
,
casting
a
golden
glow
across
the
shimmering
waves
.
Seagulls
glided
gracefully
through
the
air
,
their
cries
echoing
in
the
distance
.
As
the
evening
grew
darker
,
the
stars
emerged
one
by
one
,
dotting
the
night
sky
like
diamonds
.
A
gentle
breeze
rustled
through
the
leaves
,
carrying
with
it
the
scent
of
freshly
bloomed
flowers
.
It
was
a
peaceful
scene
,
a
moment
of
serenity
amidst
the
chaos
of
everyday
life
.


In [14]:
nlp.pipe_names

[]

In [15]:
nlp = spacy.load("en_core_web_sm")

In [16]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [17]:
doc = nlp("The sun slowly set over the tranquil horizon, casting a golden glow across the shimmering waves. Seagulls glided gracefully through the air, their cries echoing in the distance. As the evening grew darker, the stars emerged one by one, dotting the night sky like diamonds. A gentle breeze rustled through the leaves, carrying with it the scent of freshly bloomed flowers. It was a peaceful scene, a moment of serenity amidst the chaos of everyday life.")

In [18]:
# pos_ is part of speech, lemma_ is to get base word of the word
for token in doc:
    print(token,' | ',token.pos_,' | ',token.lemma_)

The  |  DET  |  the
sun  |  NOUN  |  sun
slowly  |  ADV  |  slowly
set  |  VERB  |  set
over  |  ADP  |  over
the  |  DET  |  the
tranquil  |  NOUN  |  tranquil
horizon  |  NOUN  |  horizon
,  |  PUNCT  |  ,
casting  |  VERB  |  cast
a  |  DET  |  a
golden  |  ADJ  |  golden
glow  |  NOUN  |  glow
across  |  ADP  |  across
the  |  DET  |  the
shimmering  |  VERB  |  shimmer
waves  |  NOUN  |  wave
.  |  PUNCT  |  .
Seagulls  |  NOUN  |  seagull
glided  |  VERB  |  glide
gracefully  |  ADV  |  gracefully
through  |  ADP  |  through
the  |  DET  |  the
air  |  NOUN  |  air
,  |  PUNCT  |  ,
their  |  PRON  |  their
cries  |  NOUN  |  cry
echoing  |  VERB  |  echo
in  |  ADP  |  in
the  |  DET  |  the
distance  |  NOUN  |  distance
.  |  PUNCT  |  .
As  |  SCONJ  |  as
the  |  DET  |  the
evening  |  NOUN  |  evening
grew  |  VERB  |  grow
darker  |  NOUN  |  darker
,  |  PUNCT  |  ,
the  |  DET  |  the
stars  |  NOUN  |  star
emerged  |  VERB  |  emerge
one  |  NUM  |  one
by  |  ADP  | 

In [32]:
# ents is entities 
# .text to get text information
# label_ to get type of entity
# spacy.explain(ent.label_) to get explaination of the entity
doc = nlp("My name is Ashish Yadav. I have 20 cars, namely ently, toyota land cruiser")
for ent in doc.ents:
    print(ent.text,' | ',ent.label_,' | ',spacy.explain(ent.label_))

Ashish Yadav  |  PERSON  |  People, including fictional
20  |  CARDINAL  |  Numerals that do not fall under another type
toyota  |  ORG  |  Companies, agencies, institutions, etc.


In [34]:
# to add specific component to the nlp pipeline
# create empty pipeline add component from trained pipeline
source_nlp = spacy.load('en_core_web_sm')
nlp = spacy.blank('en')
#ner is name entity relationship
nlp.add_pipe('ner',source = source_nlp)
nlp.pipe_names

['ner']

## Stemming and Lemmatization

In [38]:
import nltk

In [39]:
from nltk.stem import PorterStemmer

In [41]:
stemmer = PorterStemmer()
words = ['eating','running','eat','ate','adjustable','better']
for word in words:
    print(word,' | ',stemmer.stem(word))

eating  |  eat
running  |  run
eat  |  eat
ate  |  ate
adjustable  |  adjust
better  |  better


In [42]:
# in spacy 
# use token.lemma_
nlp = spacy.load('en_core_web_sm')

In [48]:
# model doesn't understand slang we need to add custom attributes
# to get component from pipeline
ar = nlp.get_pipe('attribute_ruler')
ar.add([[{'TEXT': 'bro'}],[{'TEXT': 'brah'}]],{'LEMMA':'brother'}) # add custom rule
doc = nlp(" Hello bro! , brah")
for token in doc:
    print(token,' | ',token.lemma_)

   |   
Hello  |  hello
bro  |  brother
!  |  !
,  |  ,
brah  |  brother
