In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [13]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
    else:
        print("No entities found")

In [14]:
doc = nlp(u"Hi how are you")

In [15]:
show_ents(doc)

No entities found


In [16]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument")

In [17]:
show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [18]:
doc = nlp(u"Can i please 500 dollars of Microsoft Dollars")

In [19]:
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [20]:
doc = nlp(u"Tesla to build a U.K. factory for $6 million")

In [21]:
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [25]:
for token in doc:
    print(token.text)

Tesla
to
build
a
U.K.
factory
for
$
6
million


In [26]:
from spacy.tokens import Span

In [27]:
ORG = doc.vocab.strings[u"ORG"]

In [28]:
ORG

383

In [29]:
new_ent = Span(doc,0,1,label=ORG)

In [31]:
doc.ents = list(doc.ents) + [new_ent]

In [32]:
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [34]:
doc = nlp(u"Our company created a brand new vacuum cleaner."
         u"This new vacuum-cleaner is the best in show.")

In [35]:
show_ents(doc)

No entities found


In [36]:
from spacy.matcher import PhraseMatcher

In [37]:
matcher = PhraseMatcher(nlp.vocab)

In [39]:
phrase_list = ['vacuum cleaner','vacuum-cleaner']

In [40]:
phrase_pattrens = [nlp(text) for text in phrase_list]

In [41]:
matcher.add("newProduct",None,*phrase_pattrens)

In [42]:
found_matches = matcher(doc)

In [43]:
found_matches

[(4452177204818730156, 6, 8), (4452177204818730156, 11, 14)]

In [44]:
from spacy.tokens import Span

In [45]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [46]:
new_ents = [Span(doc,match[1],match[2],label=PROD) for match in found_matches]

In [48]:
doc.ents = list(doc.ents) + new_ents

In [49]:
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [56]:
doc = nlp(u"Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars")

In [57]:
show_ents(doc)

29.95 - MONEY - Monetary values, including unit
10 dollars - MONEY - Monetary values, including unit


In [58]:
from spacy import displacy

In [65]:
doc = nlp(u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million"
         u"By contrast Sony only sold 8 thousand Walkman music players")

In [66]:
displacy.render(doc,style='ent')

In [67]:
options = {'ents':['PRODUCT']}

In [69]:
displacy.render(doc,style='ent',options=options)