### Named Entity Recognition

In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")


In [2]:
nlp.pipe_names


['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [3]:
doc = nlp("Tesla Inc is going to acquire Twitter Inc for $45 billions")

for ent in doc.ents:
    print(ent.text, " | ", ent.label_, spacy.explain(ent.label_))

Tesla Inc  |  ORG Companies, agencies, institutions, etc.
Twitter Inc  |  PERSON People, including fictional
$45 billions  |  MONEY Monetary values, including unit


In [4]:
from spacy import displacy 
displacy.render(doc, style="ent")


In [5]:
nlp.pipe_labels['ner']

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [6]:
doc  = nlp("Mitchell Bloomberg founded Bloomberg LP in 1981. He was the mayor of New York City from 2002 to 2013.")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, spacy.explain(ent.label_))

Mitchell Bloomberg  |  PERSON People, including fictional
Bloomberg LP  |  ORG Companies, agencies, institutions, etc.
1981  |  DATE Absolute or relative dates or periods
New York City  |  GPE Countries, cities, states
2002  |  DATE Absolute or relative dates or periods
2013  |  DATE Absolute or relative dates or periods


In [7]:
doc = nlp("Tesla is going to acquire Twitter for $45 billions")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, spacy.explain(ent.label_))

Tesla  |  ORG Companies, agencies, institutions, etc.
Twitter  |  PERSON People, including fictional
$45 billions  |  MONEY Monetary values, including unit


In [11]:
doc[0:3]

Tesla is going

In [12]:
type(doc[2:5])

spacy.tokens.span.Span

In [13]:
from spacy.tokens import Span
s1 = Span(doc, 0, 1, label="ORG")
s2 = Span(doc, 5, 6, label="ORG")

doc.set_ents([s1, s2], default="unmodified")


In [15]:
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)

Tesla  |  ORG
Twitter  |  ORG
$45 billions  |  MONEY
