In [1]:
import spacy

In [3]:
nlp = spacy.load('en_core_web_sm')

In [10]:
def show_entities(doc):
    if (doc.ents):
        for ent in doc.ents:
            print(f'{ent.text:{16}} | {ent.label_:{10}} | {str(spacy.explain(ent.label_)):{16}}')
            
    else:
        print('No entities gefund')
    

In [5]:
doc = nlp(u'May I go to Bega next year? to see the big Bega cheese factories?')

In [11]:
show_entities(doc)

Bega next year   | DATE       | Absolute or relative dates or periods
Bega             | PERSON     | People, including fictional


In [12]:
doc = nlp('Can I please have 500 dollars of Microsoft stock.')

In [13]:
show_entities(doc)

500 dollars      | MONEY      | Monetary values, including unit
Microsoft        | ORG        | Companies, agencies, institutions, etc.


In [14]:
doc = nlp('Tesla to build a UK factory for $6 million')

In [15]:
show_entities(doc)

UK               | GPE        | Countries, cities, states
$6 million       | MONEY      | Monetary values, including unit


In [20]:
from spacy.tokens import Span

In [17]:
org = doc.vocab.strings[u'ORG']

In [18]:
org

383

In [21]:
# Create a new span and give a label to it
new_ent = Span(doc, 0, 1, label='ORG')

In [23]:
# We add an entity to the doc
doc.ents = list(doc.ents) + [new_ent]

ValueError: [E1010] Unable to set entity information for token 0 which is included in more than one span in entities, blocked, missing or outside.

In [24]:
show_entities(doc)

Tesla            | ORG        | Companies, agencies, institutions, etc.
UK               | GPE        | Countries, cities, states
$6 million       | MONEY      | Monetary values, including unit


In [54]:
# Add multiple NERs...
doc = nlp(u'Our company created a brand new vacuum cleaner! ' u'This new vacuum-cleaner is the best in the world')

In [55]:
doc

Our company created a brand new vacuum cleaner! This new vacuum-cleaner is the best in the world

In [56]:
show_entities(doc)

No entities gefund


In [29]:
from spacy.matcher import PhraseMatcher

In [30]:
matcher = PhraseMatcher(nlp.vocab)

In [57]:
phrases = ['vacuum cleaner', 'vacuum-cleaner']

In [58]:
phrase_patterns = [nlp(text) for text in phrases]

In [59]:
matcher.add('newproduct', None, *phrase_patterns)

In [60]:
found_matches = matcher(doc)

In [61]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [37]:
from spacy.tokens import Span

In [62]:
PROD = doc.vocab.strings[u'PRODUCT']

In [63]:
new_ents = [Span(doc, match[1], match[2], label=PROD) for match in found_matches]

In [64]:
new_ents

[vacuum cleaner, vacuum-cleaner]

In [65]:
doc.ents = list(doc.ents) + new_ents

In [66]:
show_entities(doc)

vacuum cleaner   | PRODUCT    | Objects, vehicles, foods, etc. (not services)
vacuum-cleaner   | PRODUCT    | Objects, vehicles, foods, etc. (not services)


In [67]:
doc3 = nlp(u'Originally I paid $29.95 for this candle, but now the candle is marked down by $10')

In [68]:
doc3

Originally I paid $29.95 for this candle, but now the candle is marked down by $10

In [73]:
# Count amount of a particular named entity.
money_ents = [ent for ent in doc3.ents if ent.label_ == 'MONEY']

In [74]:
money_ents

[29.95, 10]

In [75]:
# Visualise!
from spacy import displacy

In [83]:
doc = nlp(u'Over the last quater, Apple sold over 20 thousand iPods for a profit of $6 Million'
         u'\nBy contrast, Sony only sold 8 thousand Walkman music players')

In [84]:
displacy.render(doc, style='ent', jupyter=True)

In [85]:
# Seperate into sent
for sent in doc.sents:
    displacy.render(nlp(sent.text), style='ent', jupyter=True)

In [100]:
# Only products please!
colours = { 'PRODUCT': 'red', 'ORG': 'linear-gradient(45deg, orange, red)'}
options = { 'ents': ['PRODUCT', 'ORG'], 'colors': colours}

In [101]:
displacy.render(doc, style='ent', jupyter=True, options=options)

In [102]:
# If in .py script
# displacy.serve(doc, style='ent', options=options)