In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
def show_entity(doc):
    
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - '+ ent.label_ + ' - ' + str(spacy.explain(ent.label_)) + ' - ' + str(ent.start) , str(ent.end) , str(ent.start_char),str(ent.end_char) )
    else:
        print( 'No entity found')
            

In [8]:
doc = nlp(u'how are you')


In [9]:
show_entity(doc)

No entity found


In [10]:
doc.ents

()

In [5]:
doc = nlp(u"i am planning to go India to see Tajmahal at next winter")
show_entity(doc)

India - GPE - Countries, cities, states - 5 6 20 25
Tajmahal - PERSON - People, including fictional - 8 9 33 41
next winter - DATE - Absolute or relative dates or periods - 10 12 45 56


In [6]:
doc = nlp(u"HawkeyT to build a U.K. factory for 6$ million")
show_entity(doc)

U.K. - GPE - Countries, cities, states - 4 5 19 23
6$ million - MONEY - Monetary values, including unit - 7 10 36 46


## In above example, 'HawkeyT' is not considered under entity. But we want it to be included in entity list. below is the process to add.

In [118]:
from spacy.tokens import Span

In [119]:
ORG = doc.vocab.strings[u'ORG']

In [120]:
new_ent = Span(doc,0,1,label=ORG)


In [121]:
print(type(doc.ents))
print(doc.ents)
print(new_ent)

<class 'tuple'>
(U.K., 6$ million)
HawkeyT


In [122]:
doc.ents = list(doc.ents) + [new_ent]

In [123]:
print(type(doc.ents))
print(doc.ents)
print(new_ent)

<class 'tuple'>
(HawkeyT, U.K., 6$ million)
HawkeyT


In [124]:
show_entity(doc)

HawkeyT - ORG - Companies, agencies, institutions, etc. - 0 1 0 7
U.K. - GPE - Countries, cities, states - 4 5 19 23
6$ million - MONEY - Monetary values, including unit - 7 10 36 46


## 'HawkeyT' is added as expected, as ORG

---
### what if , there are more than few terms which need to be added in entity list. let see that as well.

In [161]:
doc = nlp(u"This vacuum cleaner is one of the best in market." 
          u"Vaccum-cleaner is required in every house")

In [149]:
show_entity(doc)

No entity found


In [139]:
# phrase matcher

In [140]:
from spacy.matcher import PhraseMatcher

In [141]:
matcher = PhraseMatcher(nlp.vocab)

In [142]:
phrase_list = ['vacuum cleaner','Vaccum-cleaner']

In [143]:
phrase_pattern = [nlp(text) for text in phrase_list]

In [144]:
phrase_pattern

[vacuum cleaner, Vaccum-cleaner]

In [145]:
matcher.add('newproduct',None,*phrase_pattern)

In [162]:
found_matches  = matcher(doc)

In [175]:
found_matches

[(2689272359382549672, 1, 3), (2689272359382549672, 11, 14)]

In [164]:
doc

This vacuum cleaner is one of the best in market.Vaccum-cleaner is required in every house

In [165]:
# now span
from spacy.tokens import Span

In [166]:
PROD = doc.vocab.strings['PRODUCT']

In [167]:
PROD

384

In [177]:
new_ent = []
for f,start,end in found_matches:
    new_ent.append(Span(doc,start,end,label=PROD))

In [179]:
doc.ents = list(doc.ents) + new_ent

In [180]:
show_entity(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services) - 1 3 5 19
Vaccum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services) - 11 14 49 63


## finding the number of entities of specific type

In [183]:
doc = nlp(u"I bought Apple phone in $1000  but my friend suggested about one cheaper one which was of $900 ")

In [184]:
[ent for ent in doc.ents if ent.label_ == 'MONEY']

[1000, 900]

In [185]:
len([ent for ent in doc.ents if ent.label_ == 'MONEY'])

2

In [198]:
doc = nlp(u"I bought Apple phone in 1000 euro  but my friend suggested about one cheaper one which was in 800 euro ")

In [199]:
[ent for ent in doc.ents if ent.label_ == 'CARDINAL']

[1000, about one, one, 800]