In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")

In [2]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + " - " + ent.label_ + " - " + str(spacy.explain(ent.label_)))
    else:
        print("No Entities Found")

In [3]:
doc = nlp(u"Hii how are you?")

In [4]:
show_ents(doc)

No Entities Found


In [5]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?") 

In [6]:
show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [7]:
doc = nlp(u"Can i please have 500 dollars of Microsoft stocks?")

In [8]:
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


<br>
__________________________________________________________________________________________________________________________

# Define our own variable

In [9]:
doc = nlp(u"Test to built a U.K. factory for $6 Million")

In [10]:
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 Million - MONEY - Monetary values, including unit


Here spacy didn't recognized the ***Tesla*** as a company.

In [11]:
from spacy.tokens import Span

ORG = doc.vocab.strings[u"ORG"]

In [12]:
ORG

383

In [13]:
new_entity = Span(doc, 0, 1, label = ORG) 
# 0: Start index position, 1: end index position which is not included

In [14]:
doc.ents = list(doc.ents) + [new_entity]

In [15]:
show_ents(doc)

Test - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 Million - MONEY - Monetary values, including unit


Now ***Tesla*** is considered as company.

<br>
__________________________________________________________________________________________________________________________

# Add Several items too add NERs

In [16]:
doc = nlp(u"Our company created a brand new vacuum cleaner."
         u"This new vacuum-cleaner is the best in show.")

In [17]:
show_ents(doc)

No Entities Found


In [18]:
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

phrase_list = ['vacuum cleaner', 'vacuum-cleaner']

phrase_pattern = [nlp(text) for text in phrase_list]

In [19]:
# apply patterns to match our matcher obj

matcher.add('newproduct', None, *phrase_pattern) # * : Means all

In [20]:
found_matches = matcher(doc)
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [21]:
from spacy.tokens import Span

PROD = doc.vocab.strings[u"PRODUCT"]

In [22]:
new_ents = [Span(doc, match[1], match[2], label=PROD) for match in found_matches]

doc.ents = list(doc.ents) + new_ents

In [23]:
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


<br>
__________________________________________________________________________________________________________________________

# Find out How may time any Named Entity is mentioned 

In [24]:
doc = nlp(u"Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars")

In [25]:
[ent for ent in doc.ents if ent.label_ == 'MONEY']

[29.95, 10 dollars]

In [26]:
len([ent for ent in doc.ents if ent.label_ == 'MONEY'])

2