## Named Entity Recognition

In [51]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [52]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [53]:
doc = nlp("Tesla INC is going to acquire Twitter Inc for $44 billion")
for ent in doc.ents:
    print(f'{ent.text}: {ent.label_}')

Tesla INC: ORG
Twitter Inc: ORG
$44 billion: MONEY


In [54]:
for ent in nlp.pipe_labels['ner']:
    print(f'{ent}: {spacy.explain(ent)}')

CARDINAL: Numerals that do not fall under another type
DATE: Absolute or relative dates or periods
EVENT: Named hurricanes, battles, wars, sports events, etc.
FAC: Buildings, airports, highways, bridges, etc.
GPE: Countries, cities, states
LANGUAGE: Any named language
LAW: Named documents made into laws.
LOC: Non-GPE locations, mountain ranges, bodies of water
MONEY: Monetary values, including unit
NORP: Nationalities or religious or political groups
ORDINAL: "first", "second", etc.
ORG: Companies, agencies, institutions, etc.
PERCENT: Percentage, including "%"
PERSON: People, including fictional
PRODUCT: Objects, vehicles, foods, etc. (not services)
QUANTITY: Measurements, as of weight or distance
TIME: Times smaller than a day
WORK_OF_ART: Titles of books, songs, etc.


In [55]:
doc = nlp("Tesla INC is founded by Mr. Nikola Tesla")
for ent in doc.ents:
    print(f'{ent.text}: {ent.label_}')

Tesla INC: ORG
Nikola Tesla: PERSON


In [61]:
doc = nlp("tesla is going to acquire twitter")
for ent in doc.ents:
    print(f'{ent.text}: {ent.label_}')

tesla: ORG


In [62]:
# to add custom entities
from spacy.tokens import Span

In [63]:
# 0 to 1 index
s1 = Span(doc,0,1,label="ORG")
s2 = Span(doc,5,6,label="ORG")
doc.set_ents([s1,s2],default='unmodified')

In [64]:
for ent in doc.ents:
    print(f'{ent.text}: {ent.label_}')

tesla: ORG
twitter: ORG
