# <mark> Named Entity Recognition

In [42]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [43]:
doc1 = nlp(u'Tata Motors Limited, a USD 35 billion organisation, CEO being Ratanji')

In [44]:
for token in doc1:
    print(token.text, end=' | ')

Tata | Motors | Limited | , | a | USD | 35 | billion | organisation | , | CEO | being | Ratanji | 

In [45]:
doc1.ents

(Tata Motors Limited, USD 35 billion, Ratanji)

In [46]:
for ent in doc1.ents:
    print(ent.text)
    # print(ent.label)
    print(ent.label_)
    print(str(spacy.explain(ent.label_)))
    print(ent.start)
    print(ent.end)
    # print(ent.start_char)
    # print(ent.end_char)
    print()

Tata Motors Limited
ORG
Companies, agencies, institutions, etc.
0
3

USD 35 billion
MONEY
Monetary values, including unit
5
8

Ratanji
PERSON
People, including fictional
12
13



In [47]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - '+ ent.label_ + ' - ' + str(spacy.explain(ent.label_)))
            print('-----------------------------')
    else:
        print('No named entities found.')

In [48]:
show_ents(nlp('hey, are you fine?'))

No named entities found.


In [49]:
show_ents(nlp('Patrick was owner of Mercedes in London'))

Patrick - PERSON - People, including fictional
-----------------------------
Mercedes - ORG - Companies, agencies, institutions, etc.
-----------------------------
London - GPE - Countries, cities, states
-----------------------------


In [50]:
## Adding a new entity to current document

from spacy.tokens import Span
doc = nlp('CPRO to build a U.K. factory for $6 million')

ORG = doc.vocab.strings[u'ORG']  
new_ent = Span(doc, 0, 1, label=ORG)

new_ent.text, new_ent.label_, spacy.explain(ent.label_)

('CPRO', 'ORG', 'People, including fictional')

In [51]:
doc =nlp('CPRO to build a U.K. factory for $6 million')
doc.ents = list(doc.ents) + [new_ent]

show_ents(doc)

CPRO - ORG - Companies, agencies, institutions, etc.
-----------------------------
U.K. - GPE - Countries, cities, states
-----------------------------
$6 million - MONEY - Monetary values, including unit
-----------------------------


In [52]:
doc2 = nlp(u"Our manufacturing facilities are certified for world-class manufacturing and quality standards.")

for chunk in doc2.noun_chunks:
    print(chunk.text)
    print(chunk.root.text)
    # print(chunk.root.dep_)
    print(spacy.explain(chunk.root.dep_))
    print(chunk.label_)
    print(spacy.explain(chunk.label_))
    print(chunk.start)
    print(chunk.end)
    print()

Our manufacturing facilities
facilities
nominal subject (passive)
NP
noun phrase
0
3

world-class manufacturing and quality standards
standards
object of preposition
NP
noun phrase
6
13



In [53]:
doc3 = nlp('''
Cristiano Ronaldo dos Santos Aveiro GOIH ComM is a Portuguese professional footballer 
who plays as a forward for and 
captains both Saudi Pro League club Al Nassr and the Portugal national team.
''')

In [54]:
for chunk in doc3.noun_chunks:
    print(chunk.text,' : ', chunk.root.text,' : ', chunk.root.dep_,' : ', spacy.explain(chunk.root.dep_),' : ', chunk.label,' : ', chunk.label_,' : ',spacy.explain(chunk.label_))
    print()


Cristiano Ronaldo  :  Ronaldo  :  nsubj  :  nominal subject  :  3342607623747562680  :  NP  :  noun phrase

ComM  :  ComM  :  nsubj  :  nominal subject  :  3342607623747562680  :  NP  :  noun phrase

a Portuguese professional footballer  :  footballer  :  attr  :  attribute  :  3342607623747562680  :  NP  :  noun phrase

who  :  who  :  nsubj  :  nominal subject  :  3342607623747562680  :  NP  :  noun phrase

a forward  :  forward  :  pobj  :  object of preposition  :  3342607623747562680  :  NP  :  noun phrase

captains  :  captains  :  attr  :  attribute  :  3342607623747562680  :  NP  :  noun phrase

both Saudi Pro League club Al Nassr  :  Nassr  :  dobj  :  direct object  :  3342607623747562680  :  NP  :  noun phrase

the Portugal national team  :  team  :  conj  :  conjunct  :  3342607623747562680  :  NP  :  noun phrase

