In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - '+ent.label_ + ' - '+str(spacy.explain(ent.label_)))
    else:
        print('No entities found')

In [4]:
doc = nlp(u'Hi How are you ?')

In [5]:
show_ents(doc)

No entities found


In [6]:
doc = nlp(u"May I go to washington,DC next may to see the washington monuments ? ")

In [7]:
show_ents(doc)

washington - GPE - Countries, cities, states
DC - GPE - Countries, cities, states
washington - GPE - Countries, cities, states


In [8]:
doc = nlp(u"I bought a pen in new york in the year of 2008 ! ")

In [9]:
show_ents(doc)

new york - GPE - Countries, cities, states
the year of 2008 - DATE - Absolute or relative dates or periods


In [10]:
doc = nlp(u"Tesla to build a U.K to factory for $6 million ")

In [11]:
show_ents(doc)

U.K - ORG - Companies, agencies, institutions, etc.
$6 million - MONEY - Monetary values, including unit


In [12]:
from spacy.tokens import Span

In [13]:
ORG = doc.vocab.strings[u"ORG"]

In [14]:
ORG

383

In [15]:
new_ent = Span(doc,0,1,label=ORG)

In [16]:
doc.ents = list(doc.ents) + [new_ent]

In [17]:
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K - ORG - Companies, agencies, institutions, etc.
$6 million - MONEY - Monetary values, including unit


In [18]:
doc = nlp(u"Our company created a brand new vaccum cleaner."u"This new vaccum-cleaner is the best in show.")

In [19]:
show_ents(doc)

No entities found


In [20]:
from spacy.matcher import PhraseMatcher

In [21]:
matcher = PhraseMatcher(nlp.vocab)

In [22]:
phrase_list = ['vaccum cleaner','vaccum-cleaner']

In [23]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [24]:
matcher.add('newproduct',None,*phrase_patterns)

In [25]:
found_matches = matcher(doc)

In [26]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [27]:
from spacy.tokens import Span

In [28]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [29]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [31]:
new_ents = [Span(doc,match[1],match[2],label=PROD) for match in found_matches] 

In [35]:
doc.ents = list(doc.ents) + new_ents

In [36]:
show_ents(doc)

Our - ORG - Companies, agencies, institutions, etc.
vaccum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vaccum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [37]:
doc = nlp(u"Originally I paid $29.95 for this car toy,but now it is mark down by 10 dollars.")

In [39]:
[ent for ent in doc.ents if ent.label_ == "MONEY"]

[29.95, 10 dollars]

In [40]:
len([ent for ent in doc.ents if ent.label_ == "MONEY"])

2