In [1]:
import spacy



In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp(u"The quick brown fox jumped over the lazy dog.")

In [4]:
from spacy import displacy

In [5]:
displacy.render(doc,style='dep',jupyter=True)

In [6]:
options = {'distance':110,'compact':'True','color':'yellow','bg':'#09a3d5','font':'Times'}

In [7]:
displacy.render(doc,style='dep',jupyter=True,options=options)

In [8]:
doc2 = nlp(u"This is a sentence. This is another sentence, possibly longer than the other. ")

In [9]:
spans = list(doc2.sents)

In [10]:
displacy.serve(spans,style='dep',options={'distance':110})


Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [11]:
def show_ents(doc):
   if doc.ents:
     for ent in doc.ents:
          print(ent.text + ' - '+ent.label_ + ' - '+str(spacy.explain(ent.label_)))
   else:
     print('No entities found')       

In [12]:
doc = nlp(u"Hi how are you?")

In [13]:
show_ents(doc)

No entities found


In [14]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")

In [15]:
show_ents(doc)

Washington - GPE - Countries, cities, states
DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [16]:
doc = nlp(u"Can I please have 500 dollars of Microsoft stock?")

In [17]:
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [20]:
doc = nlp(u"Tesla to build a U.K. factory for $6 million")

In [21]:
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [22]:
from spacy.tokens import Span

In [23]:
ORG = doc.vocab.strings[u"ORG"]

In [24]:
ORG

383

In [25]:
new_ent = Span(doc,0,1,label=ORG)

In [26]:
doc.ents = list(doc.ents) + [new_ent]

In [27]:
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [28]:
doc = nlp(u"Our company created a brand new vacuum cleaner."u"This new vacuum-cleaner is the best in show.")

In [29]:
show_ents(doc)

No entities found


In [30]:
from spacy.matcher import PhraseMatcher

In [31]:
matcher = PhraseMatcher(nlp.vocab) 

In [32]:
phrase_list = ['vacuum cleaner','vacuum-cleaner']

In [33]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [34]:
matcher.add('newproduct',[*phrase_patterns])

In [35]:
found_matches = matcher(doc)

In [36]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [37]:
from spacy.tokens import Span

In [38]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [39]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [42]:
new_ents = [Span(doc,match[1],match[2],label=PROD) for match in found_matches]

In [43]:
doc.ents = list(doc.ents) + new_ents

In [44]:
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [45]:
doc = nlp(u"Originally I paid $29.99 for this car toy, but now it is marked down by 10 dollars.")

In [50]:
([ent for ent in doc.ents if ent.label_ == "MONEY"])

[29.99, 10 dollars]