In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [8]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(f'{ent.text} ---> {ent.label_} ---> {spacy.explain(ent.label_)}')
    else:
        print('No entities found')

In [10]:
doc = nlp(u'How are you?')

In [11]:
show_ents(doc)

No entities found


In [14]:
doc = nlp(u'May i go to Washington, DC next May to see the Washington Monument?')

In [15]:
show_ents(doc)

Washington, DC ---> GPE ---> Countries, cities, states
next May ---> DATE ---> Absolute or relative dates or periods
the Washington Monument ---> ORG ---> Companies, agencies, institutions, etc.


In [16]:
doc = nlp(u'Can i please have 500 dollars of Microsoft stock.')

In [17]:
show_ents(doc)

500 dollars ---> MONEY ---> Monetary values, including unit
Microsoft ---> ORG ---> Companies, agencies, institutions, etc.


In [18]:
doc = nlp(u'Tesla to build a U.K. factory for $6 million.')

In [19]:
show_ents(doc)

U.K. ---> GPE ---> Countries, cities, states
$6 million ---> MONEY ---> Monetary values, including unit


In [20]:
# Defining Custom Named Entity

from spacy.tokens import Span

In [21]:
ORG = doc.vocab.strings[u'ORG']

In [22]:
ORG

381

In [23]:
# Creating a span for New Entity

new_ent = Span(doc, 0, 1, label=ORG)

In [25]:
doc.ents = list(doc.ents) + [new_ent]

In [26]:
show_ents(doc)

Tesla ---> ORG ---> Companies, agencies, institutions, etc.
U.K. ---> GPE ---> Countries, cities, states
$6 million ---> MONEY ---> Monetary values, including unit


In [37]:
# Adding Multiple phrases of NER

doc = nlp(u'Our company has created a brand new vaccum cleaner.'
         u'This vaccum-cleaner is the best in show.')

In [28]:
show_ents(doc)

No entities found


In [29]:
from spacy.matcher import PhraseMatcher

In [30]:
phraseMatcher = PhraseMatcher(nlp.vocab)

In [31]:
phrase_list = ['vaccum cleaner', 'vaccum-cleaner']

In [32]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [36]:
phrase_patterns

[vaccum cleaner, vaccum-cleaner]

In [33]:
phraseMatcher.add('NewProduct', None, *phrase_patterns)

In [38]:
found_matches = phraseMatcher(doc)

In [39]:
found_matches

[(17436358318007586288, 7, 9), (17436358318007586288, 11, 14)]

In [40]:
from spacy.tokens import Span

In [41]:
PROD = doc.vocab.strings[u'PRODUCT']

In [44]:
new_ents = [Span(doc, match[1], match[2], label=PROD)for match in found_matches]

In [45]:
doc.ents = list(doc.ents) + new_ents

In [46]:
show_ents(doc)

vaccum cleaner ---> PRODUCT ---> Objects, vehicles, foods, etc. (not services)
vaccum-cleaner ---> PRODUCT ---> Objects, vehicles, foods, etc. (not services)


In [47]:
# To find the frequency of a specific entity in an document

doc = nlp(u'Originally i paid $29.95 for car and it has marked down to $10 now.')

In [49]:
[ent for ent in doc.ents if ent.label_ == 'MONEY']

[29.95, 10]

In [50]:
len([ent for ent in doc.ents if ent.label_ == 'MONEY'])

2

In [51]:
# Visualizing Named Entity Recognition

doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')

In [52]:
from spacy import displacy

In [53]:
displacy.render(doc, style='ent', jupyter=True)

In [56]:
doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.'
         u'By contrast, Sony sold 8 thousand walkman music players.')

In [57]:
for sent in doc.sents:
    displacy.render(nlp(sent.text), style='ent', jupyter=True)

In [58]:
# To highlight only particular entities

options = {'ents': ['PRODUCT', 'ORG']}

In [60]:
for sent in doc.sents:
    displacy.render(nlp(sent.text), style='ent', jupyter=True, options=options)

In [63]:
# To assign customized colors for entities
# colors = {'ORG': 'red'}
colors = {'ORG': 'radial-gradient(yellow, green)'}
options = {'ents': ['PRODUCT', 'ORG'], 'colors':colors}

In [64]:
for sent in doc.sents:
    displacy.render(nlp(sent.text), style='ent', jupyter=True, options=options)

In [65]:
# Rendering on external server

displacy.serve(doc, style='ent', options=options)


[93m    Serving on port 5000...[0m
    Using the 'ent' visualizer



127.0.0.1 - - [22/Aug/2019 12:43:17] "GET / HTTP/1.1" 200 1748
127.0.0.1 - - [22/Aug/2019 12:43:18] "GET /favicon.ico HTTP/1.1" 200 1748



    Shutting down server on port 5000.

