In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - '+ent.label_+ ' - '+str(spacy.explain(ent.label_)))
    else:
        print("No entites Found")

In [3]:
doc = nlp(u"Hi, How are you");
show_ents(doc)

No entites Found


In [4]:
doc = nlp(u'Tesla May I go to India, next Monday to visit Mahatma Gandhi, to buy $500 worth Indian stock')
show_ents(doc)

India - GPE - Countries, cities, states
next Monday - DATE - Absolute or relative dates or periods
Mahatma Gandhi - PERSON - People, including fictional
500 - MONEY - Monetary values, including unit
Indian - NORP - Nationalities or religious or political groups


In [5]:
from spacy.tokens import Span

In [6]:
ORG = doc.vocab.strings[u"ORG"]
ORG

383

In [7]:
new_entity = Span(doc, 0,1, label=ORG)
doc.ents = list(doc.ents) + [new_entity]

In [8]:
doc = nlp(u'Tesla May I go to India, next Monday to visit Mahatma Gandhi, to buy $500 worth Indian stock')
show_ents(doc)

India - GPE - Countries, cities, states
next Monday - DATE - Absolute or relative dates or periods
Mahatma Gandhi - PERSON - People, including fictional
500 - MONEY - Monetary values, including unit
Indian - NORP - Nationalities or religious or political groups


In [34]:
doc = nlp(u"Our company created a brand new vaccum cleaner."
          u"This new vaccum-cleaner is best in the shop")

In [35]:
show_ents(doc)

No entites Found


In [36]:
from spacy.matcher import PhraseMatcher

In [37]:
matcher = PhraseMatcher(nlp.vocab)

In [38]:
phrase_list = ['vaccum cleaner','vaccum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list]

In [39]:
matcher.add('newProduct', None, *phrase_patterns)

In [40]:
found_matches = matcher(doc)
found_matches

[(4452177204818730156, 6, 8), (4452177204818730156, 11, 14)]

In [41]:
doc.text

'Our company created a brand new vaccum cleaner.This new vaccum-cleaner is best in the shop'

In [42]:
from spacy.tokens import Span

In [43]:
PROD = doc.vocab.strings[u"PRODUCT"]
new_ents = [Span(doc,match[1], match[2], label=PROD) for match in found_matches]

In [44]:
doc.ents = list(doc.ents) + new_ents

In [45]:
show_ents(doc)

vaccum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vaccum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [48]:
doc = nlp(u"Originally I paid $29.95 for this car toy, but now it is marked down by $10")

In [51]:
len([ent for ent in doc.ents if ent.label_ == 'MONEY'])

2

In [52]:
from spacy import displacy

In [60]:
doc = nlp(u"Over the last quarter Apple sold nealy 20 thousands ipods for a profit of $6 million"
         u"By Contract Sony only sold 8 thousand Walkman music players")

In [61]:
displacy.render(doc, style='ent', jupyter=True)

In [81]:
for sent in doc.sents:
        displacy.render(sent, style='ent', jupyter=True, options=options)

In [80]:
options = {'ents':['PRODUCT','ORG'], 'colors':{'ORG':'radial-gradient(yellow,green)'}}