# Named Entity Recognition

In [1]:
import spacy
from spacy import displacy

## Load the model

In [2]:
nlp = spacy.load('en_core_web_sm')

## Analyze the text

In [3]:
text = "Sam bought 400 shares of Amazon.com Inc in 2000"
doc = nlp(text)

In [4]:
for token in doc:
    print(token, '\t', token.pos_, '\t', token.tag_, '\t', token.dep_, '\t', token.ent_type)

Sam 	 PROPN 	 NNP 	 nsubj 	 380
bought 	 VERB 	 VBD 	 ROOT 	 0
400 	 NUM 	 CD 	 nummod 	 397
shares 	 NOUN 	 NNS 	 dobj 	 0
of 	 ADP 	 IN 	 prep 	 0
Amazon.com 	 PROPN 	 NNP 	 compound 	 383
Inc 	 PROPN 	 NNP 	 pobj 	 383
in 	 ADP 	 IN 	 prep 	 0
2000 	 NUM 	 CD 	 pobj 	 391


In [5]:
spacy.explain('NNP')

'noun, proper singular'

In [6]:
spacy.explain('PROPN')

'proper noun'

In [7]:
print(doc.ents)

(Sam, 400, Amazon.com Inc, 2000)


## Check the entities

In [8]:
for ent in doc.ents:
    print(ent.start_char, ent.end_char, ent.label_)

0 3 PERSON
11 14 CARDINAL
25 39 ORG
43 47 DATE


## Render Virtualization

In [9]:
displacy.render(doc, style='ent', jupyter=True)

In [10]:
print(displacy.parse_deps(doc))

{'words': [{'text': 'Sam', 'tag': 'PROPN'}, {'text': 'bought', 'tag': 'VERB'}, {'text': '400', 'tag': 'NUM'}, {'text': 'shares', 'tag': 'NOUN'}, {'text': 'of', 'tag': 'ADP'}, {'text': 'Amazon.com', 'tag': 'PROPN'}, {'text': 'Inc', 'tag': 'PROPN'}, {'text': 'in', 'tag': 'ADP'}, {'text': '2000', 'tag': 'NUM'}], 'arcs': [{'start': 0, 'end': 1, 'label': 'nsubj', 'dir': 'left'}, {'start': 2, 'end': 3, 'label': 'nummod', 'dir': 'left'}, {'start': 1, 'end': 3, 'label': 'dobj', 'dir': 'right'}, {'start': 3, 'end': 4, 'label': 'prep', 'dir': 'right'}, {'start': 5, 'end': 6, 'label': 'compound', 'dir': 'left'}, {'start': 4, 'end': 6, 'label': 'pobj', 'dir': 'right'}, {'start': 1, 'end': 7, 'label': 'prep', 'dir': 'right'}, {'start': 7, 'end': 8, 'label': 'pobj', 'dir': 'right'}], 'settings': {'lang': 'en', 'direction': 'ltr'}}


In [11]:
displacy.render(doc, style='dep', jupyter=True, options={'distance':110})