In [1]:
import spacy

In [2]:
nlp = spacy.blank("en")

doc = nlp("Captain america ate 100$ samosa. Then he said I can do this all day.")

for token in doc:
    print(token,end="_")

Captain_america_ate_100_$_samosa_._Then_he_said_I_can_do_this_all_day_._

In [3]:
nlp.pipe_names

[]

In [4]:
nlp = spacy.load("en_core_web_sm")

nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [6]:
doc = nlp("Captain america ate 100$ samosa. Then he said I can do this all day.")
for token in doc:
    # Printing token along with the part of speech the word belongs to and the lemmatization version(base word for eg : "eat" base for "ate,eating")
    print(token," | ",token.pos_," | ",token.lemma_)

Captain  |  PROPN  |  Captain
america  |  PROPN  |  america
ate  |  VERB  |  eat
100  |  NUM  |  100
$  |  SYM  |  $
samosa  |  NOUN  |  samosa
.  |  PUNCT  |  .
Then  |  ADV  |  then
he  |  PRON  |  he
said  |  VERB  |  say
I  |  PRON  |  I
can  |  AUX  |  can
do  |  VERB  |  do
this  |  PRON  |  this
all  |  DET  |  all
day  |  NOUN  |  day
.  |  PUNCT  |  .


In [12]:
doc2 = nlp("Tesla Inc is going to acquire Twitter Inc of $45 billion")

# Looping through entities in the text inorder to use "ner"(Named Entity Recognition) feature of SpaCy's nlp pipline:
for ent in doc2.ents:
    print(ent.text," | ",ent.label_," | ",spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit


In [13]:
# Displaying entity in a better and discrete way:
from spacy import displacy

displacy.render(doc2,style="ent")

In [14]:
# Loading a pipeline feature into a blank pipeline:

source_nlp = spacy.load("en_core_web_sm")

blank_nlp = spacy.blank("en")

blank_nlp.add_pipe("ner" , source=source_nlp)

blank_nlp.pipe_names

['ner']

In [15]:
doc_blank_nlp = blank_nlp("Tesla Inc is going to acquire Twitter Inc of $45 billion")

for ent in doc_blank_nlp.ents:
    print(ent.text," | ",ent.label_," | ",spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter Inc  |  ORG  |  Companies, agencies, institutions, etc.
$45 billion  |  MONEY  |  Monetary values, including unit
