In [2]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [6]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + " - "+ent.label_ + " - " + str(spacy.explain(ent.label_)))
    else:
        print("No Entities found")
    return

In [7]:
doc = nlp(u"Hi how are you?")

In [8]:
show_ents(doc)

No Entities found


In [11]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")

In [12]:
show_ents(doc)

Washington - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [17]:
doc = nlp(u"The name of the project is Jarvis")

In [18]:
show_ents(doc)

Jarvis - PERSON - People, including fictional


In [19]:
for t in doc:
    print(t.text, t.pos_)

The DET
name NOUN
of ADP
the DET
project NOUN
is AUX
Jarvis PROPN


In [20]:
doc = nlp(u"can I please have 500 dollars of Microsoft stock?")

In [21]:
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [22]:
doc = nlp(u"Tesla to build a U.K. factoru for $6 million")

In [23]:
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [27]:
from spacy.tokens import Span

In [28]:
ORG = doc.vocab.strings[u"ORG"]

In [29]:
ORG

383

In [30]:
new_ent = Span(doc, 0,1,label=ORG)

In [31]:
doc.ents = list(doc.ents) + [new_ent]

In [32]:
doc.ents

(Tesla, U.K., $6 million)

In [33]:
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [34]:
doc = nlp(u"Our company created a brand new vaccum cleaner."
         u"This new vaccum-cleaner is the best in show.")

In [35]:
show_ents(doc)

No Entities found


In [36]:
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

In [38]:
phrase_list = ["vaccum cleaner", "vaccum-cleaner"]

In [39]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [40]:
matcher.add("newproduct",None, *phrase_patterns)

In [46]:
found_matches = matcher(doc)

In [47]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [48]:
from spacy.tokens import Span

In [49]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [50]:
new_ents = [Span(doc,match[1],match[2], label=PROD) for match in found_matches]

In [51]:
doc.ents = list(doc.ents) + new_ents

In [52]:
show_ents(doc)

vaccum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vaccum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [53]:
from spacy import displacy

In [61]:
doc = nlp(u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million."
         u"By contrast Sony only sold 8 thousand Walkman music players.")

In [62]:
displacy.render(doc, style="ent", jupyter=True)

In [63]:
for sent in doc.sents:
    displacy.render(nlp(sent.text), style="ent", jupyter=True)

In [64]:
options = {"ents":["PRODUCT"]}

In [65]:
displacy.render(doc, style="ent", jupyter=True, options=options)