In [None]:
import spacy

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [None]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x7f7b8f26fe20>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x7f7b8f038ac0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x7f7b8f3a86d0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x7f7b8f017740>),
 ('lemmatizer',
  <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x7f7b8f2a00c0>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x7f7b8f3a8970>)]

In [None]:
doc = nlp("Captain america is a clown with 100 $ plate. Marvel is and will be for kids.")

for token in doc:
  print(token.text, "|", token.pos_, "|", token.lemma_)

Captain | PROPN | Captain
america | PROPN | america
is | AUX | be
a | DET | a
clown | NOUN | clown
with | ADP | with
100 | NUM | 100
$ | SYM | $
plate | NOUN | plate
. | PUNCT | .
Marvel | PROPN | Marvel
is | AUX | be
and | CCONJ | and
will | AUX | will
be | AUX | be
for | ADP | for
kids | NOUN | kid
. | PUNCT | .


In [None]:
doc = nlp("Tesla Org will buy Twitter for $45 Billion")

for entity in doc.ents:
  print(entity.text, "|", entity.label_, "|", spacy.explain(entity.label_))

Tesla Org | ORG | Companies, agencies, institutions, etc.
$45 Billion | MONEY | Monetary values, including unit


In [None]:
from spacy import displacy

displacy.render(doc, style="ent")

'<div class="entities" style="line-height: 2.5; direction: ltr">\n<mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    Tesla Org\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORG</span>\n</mark>\n will buy Twitter for \n<mark class="entity" style="background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    $45 Billion\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">MONEY</span>\n</mark>\n</div>'

## Pipeline with only important features required for our task

In [None]:
source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")

nlp.add_pipe("ner", source=source_nlp)
nlp.pipe_names

['ner']

In [None]:
doc = nlp("Tesla Org will buy GE for $45 Billion after Musk's approval")

for entity in doc.ents:
  print(entity.text, "|", entity.label_, "|", spacy.explain(entity.label_))

Tesla Org | ORG | Companies, agencies, institutions, etc.
GE | ORG | Companies, agencies, institutions, etc.
$45 Billion | MONEY | Monetary values, including unit
Musk | PERSON | People, including fictional
