In [2]:
import spacy 

In [2]:
nlp = spacy.blank("en")
doc = nlp("It was established in 1992 and is administered by the Dr. Ambedkar foundation to people or organizations for their outstanding work. The award symbolizes the vision of Babasaheb Ambedkar for social understanding and national integrity. The money constituent of this award is 1 million rupees and a citation. $")

for token in doc:
    print(token)

It
was
established
in
1992
and
is
administered
by
the
Dr.
Ambedkar
foundation
to
people
or
organizations
for
their
outstanding
work
.
The
award
symbolizes
the
vision
of
Babasaheb
Ambedkar
for
social
understanding
and
national
integrity
.
The
money
constituent
of
this
award
is
1
million
rupees
and
a
citation
.
$


In [3]:
nlp.pipe_names

[]

In [4]:
nlp= spacy.load("en_core_web_sm")

In [5]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x1cc0b758f40>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x1cc0b758e80>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x1cc0b5b2510>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x1cc0b7a8200>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x1cc0b73e740>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x1cc0b5b2350>)]

In [6]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [7]:
doc = nlp("It was established in 1992 and is administered by the Dr. Ambedkar foundation to people or organizations for their outstanding work. The award symbolizes the vision of Babasaheb Ambedkar for social understanding and national integrity. The money constituent of this award is 1 million rupees and a citation.")
for token in doc:
    print(token, "|",token.pos_, "|",token.lemma_)

It | PRON | it
was | AUX | be
established | VERB | establish
in | ADP | in
1992 | NUM | 1992
and | CCONJ | and
is | AUX | be
administered | VERB | administer
by | ADP | by
the | DET | the
Dr. | PROPN | Dr.
Ambedkar | PROPN | Ambedkar
foundation | NOUN | foundation
to | ADP | to
people | NOUN | people
or | CCONJ | or
organizations | NOUN | organization
for | ADP | for
their | PRON | their
outstanding | ADJ | outstanding
work | NOUN | work
. | PUNCT | .
The | DET | the
award | NOUN | award
symbolizes | VERB | symbolize
the | DET | the
vision | NOUN | vision
of | ADP | of
Babasaheb | PROPN | Babasaheb
Ambedkar | PROPN | Ambedkar
for | ADP | for
social | ADJ | social
understanding | NOUN | understanding
and | CCONJ | and
national | ADJ | national
integrity | NOUN | integrity
. | PUNCT | .
The | DET | the
money | NOUN | money
constituent | NOUN | constituent
of | ADP | of
this | DET | this
award | NOUN | award
is | AUX | be
1 | NUM | 1
million | NUM | million
rupees | NOUN | rupee
and | CCO

In [8]:
doc = nlp("It was established in 1992 and is administered by the Dr. Ambedkar foundation to people or organizations for their outstanding work. The award symbolizes the vision of Babasaheb Ambedkar for social understanding and national integrity. The money constituent of this award is 1 million rupees and a citation. $")
for ent in doc.ents:
    print(ent.text,"|",ent.label_, "|",spacy.explain(ent.label_))

1992 | DATE | Absolute or relative dates or periods
Ambedkar | PERSON | People, including fictional
1 million | CARDINAL | Numerals that do not fall under another type


In [9]:
from spacy import displacy
displacy.render(doc,style= "ent")

In [10]:
source_nlp = spacy.load("en_core_web_sm")
nlp = spacy.blank("en")
nlp.add_pipe("ner", source=source_nlp)
nlp.pipe_names

['ner']

In [12]:
doc1 = nlp("including a wide array of ingredients, herbs, spices, techniques, and dishes. As cultures have mixed through forces like international trade and globalization, ")
for ent in doc1.ents:
    print(ent.text,"|",ent.label_, "|",spacy.explain(ent.label_))

In [13]:
    print(ent.text,"|",ent.label_, "|",spacy.explain(ent.label_))

1 million | CARDINAL | Numerals that do not fall under another type


###  Stemming & lematization

In [14]:
import nltk
import spacy

In [16]:
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()

In [18]:
words = ["Eating", "talking","talk","talked","Eats","Eat","ate"]
for word in words:
    print(word, "|", stemmer.stem(word))

Eating | eat
talking | talk
talk | talk
talked | talk
Eats | eat
Eat | eat
ate | ate


In [20]:
nlp = spacy.load("en_core_web_sm")
doc = nlp("eating eats eat ate adjustable rafting ability meeting better")
for token in doc:
    print(token, "|", token.lemma_,token.lemma)

eating | eating 12092082220177030354
eats | eat 9837207709914848172
eat | eat 9837207709914848172
ate | eat 9837207709914848172
adjustable | adjustable 6033511944150694480
rafting | raft 7154368781129989833
ability | ability 11565809527369121409
meeting | meeting 14798207169164081740
better | well 4525988469032889948


In [23]:
doc = nlp("Mando talked for 3 hours although talking isn't his thing he become talketive")
for token in doc:
    print(token,"|",token.lemma_)

Mando | mando
talked | talk
for | for
3 | 3
hours | hour
although | although
talking | talking
is | be
n't | not
his | his
thing | thing
he | he
become | become
talketive | talketive


In [24]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [28]:
# attribute_ruler = attribute to perticuler token we can customize it. 
ar = nlp.get_pipe("attribute_ruler")
ar.add([[{"TEXT":"Bro"}],[{"TEXT":"Brah"}]],{"LEMMA": "Brother"})
doc = nlp("Bro , you wanna go? Brah, don't say no! I am exhausted")
for token in doc:
    print(token.text,"|",token.lemma_)

Bro | Brother
, | ,
you | you
wanna | wanna
go | go
? | ?
Brah | Brother
, | ,
do | do
n't | not
say | say
no | no
! | !
I | I
am | be
exhausted | exhaust


In [30]:
doc[0]

Bro

In [29]:
doc[0].lemma_

'Brother'

###  POS tagging

In [4]:
nlp = spacy.load("en_core_web_sm")
doc = nlp("It was established in 1992 and is administered by the Dr. Ambedkar foundation to people or organizations for their outstanding work. The award symbolizes the vision of Babasaheb Ambedkar for social understanding and national integrity. The money constituent of this award is 1 million rupees and a citation. $")

for token in doc:
    print(token,"|",token.pos_)

It | PRON
was | AUX
established | VERB
in | ADP
1992 | NUM
and | CCONJ
is | AUX
administered | VERB
by | ADP
the | DET
Dr. | PROPN
Ambedkar | PROPN
foundation | NOUN
to | ADP
people | NOUN
or | CCONJ
organizations | NOUN
for | ADP
their | PRON
outstanding | ADJ
work | NOUN
. | PUNCT
The | DET
award | NOUN
symbolizes | VERB
the | DET
vision | NOUN
of | ADP
Babasaheb | PROPN
Ambedkar | PROPN
for | ADP
social | ADJ
understanding | NOUN
and | CCONJ
national | ADJ
integrity | NOUN
. | PUNCT
The | DET
money | NOUN
constituent | NOUN
of | ADP
this | DET
award | NOUN
is | AUX
1 | NUM
million | NUM
rupees | NOUN
and | CCONJ
a | DET
citation | NOUN
. | PUNCT
$ | SYM
