In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
    else:
        print('No named entities found.')

In [3]:
doc = nlp(u'May I go to Washington, DC next May to see the Washington Monument?')

In [4]:
show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [5]:
doc = nlp(u'Can I please borrow 500 dollars from you to buy some Microsoft stock?')

In [6]:
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [7]:
doc = nlp(u'Tesla to build a U.K. factory for $6 million')

In [8]:
show_ents(doc)

Tesla - ORDINAL - "first", "second", etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [9]:
doc = nlp(u'Our company plans to introduce a new vacuum cleaner. '
          u'If successful, the vacuum cleaner will be our first product.')

In [10]:
show_ents(doc)

first - ORDINAL - "first", "second", etc.


In [11]:
from spacy.matcher import PhraseMatcher

In [12]:
matcher=PhraseMatcher(nlp.vocab)

In [13]:
phrase_list = ['vacuum cleaner', 'vacuum-cleaner']

In [14]:
phrase_patterns=[nlp(text) for text in phrase_list]

In [15]:
matcher.add("newproduct",None,*phrase_patterns)

In [16]:
found_matches=matcher(doc)

In [17]:
found_matches

[(2689272359382549672, 7, 9), (2689272359382549672, 14, 16)]

In [19]:
from spacy.tokens import Span

In [20]:
PROB= doc.vocab.strings[u"PRODUCT"]

In [24]:
new_ents = [Span(doc, match[1],match[2],label=PROB) for match in found_matches]

In [25]:
doc.ents = list(doc.ents) + new_ents

In [26]:
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
first - ORDINAL - "first", "second", etc.
