In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [9]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
    else:
        print('No entities found')

In [5]:
doc = nlp(u'Hi how are you?')

In [6]:
show_ents(doc)

No entities found


In [7]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")

In [10]:
show_ents(doc)

Washington - GPE - Countries, cities, states
DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [11]:
doc = nlp("Can I please have 500 dollars of Microsoft Stock?")
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft Stock - ORG - Companies, agencies, institutions, etc.


In [12]:
doc = nlp(u'Tesla to build a U.K. Factory for $6 million')

In [13]:
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [14]:
from spacy.tokens import Span

In [15]:
ORG = doc.vocab.strings[u"ORG"]
ORG

383

In [16]:
new_ent = Span(doc,0,1,label=ORG)
new_ent

Tesla

In [18]:
doc.ents

(U.K., $6 million)

In [19]:
doc.ents = list(doc.ents)+ [new_ent]
doc.ents

(Tesla, U.K., $6 million)

In [21]:
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [23]:
# How to add Multiple Name entities

In [24]:
doc = nlp(u"Our company created a brand new vaccum cleaner."
         u"This new vaccum-cleaner is the best in show.")

In [26]:
show_ents(doc)

No entities found


In [27]:
from spacy.matcher import PhraseMatcher

In [29]:
matcher = PhraseMatcher(nlp.vocab)

In [30]:
phrase_list = ['vaccum cleaner','vaccum-cleaner']

In [31]:
phrase_pattern = [nlp(text) for text in phrase_list]

In [32]:
matcher.add('newproduct',None,*phrase_pattern)

In [33]:
found_matches = matcher(doc)
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [34]:
from spacy.tokens import Span

In [36]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [37]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [41]:
new_ents = [Span(doc,match[1],match[2],label=PROD) for match in found_matches]

In [42]:
doc.ents = list(doc.ents) + new_ents

In [43]:
show_ents(doc)

vaccum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vaccum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [44]:
doc = nlp(u'Originally priced at $29.50,\nthe sweater was marked down to five dollars.')

In [47]:
[ent for ent in doc.ents if ent.label_ == "MONEY"]

[29.50, five dollars]

In [48]:
len([ent for ent in doc.ents if ent.label_ == "MONEY"])

2

In [49]:
# visualizating the Name and Entity Recognization

In [51]:
from spacy import displacy

In [50]:
doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million. '
         u'By contrast, Sony sold only 7 thousand Walkman music players.')

In [52]:
displacy.render(doc,style='ent',jupyter=True)

In [53]:
for sent in doc.sents:
    displacy.render(nlp(sent.text),style='ent',jupyter=True)

In [58]:
options = {'ents':['PRODUCT',"ORG"]}

In [59]:
displacy.render(doc,style='ent',jupyter=True,options=options)

In [64]:
colors={'ORG':'red'}
options = {'ents':['PRODUCT',"ORG"],'colors':colors}

In [65]:
displacy.render(doc,style='ent',jupyter=True,options=options)

In [66]:
colors={'ORG':'#aa9cfc'}
options = {'ents':['PRODUCT',"ORG"],'colors':colors}

In [67]:
displacy.render(doc,style='ent',jupyter=True,options=options)

In [68]:
colors={'ORG':'radial-gradient(yellow,green)'}
options = {'ents':['PRODUCT',"ORG"],'colors':colors}

In [69]:
displacy.render(doc,style='ent',jupyter=True,options=options)

In [72]:
colors={'ORG':'linear-gradient(90deg,#aa9cfc,#fc9e7)'}
options = {'ents':['PRODUCT',"ORG"],'colors':colors}

In [73]:
displacy.render(doc,style='ent',jupyter=True,options=options)

In [None]:
displacy.serve(doc,style='ent',options=options)




Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

