In [None]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [None]:
def show_ents(doc):
  if doc.ents:
    for ent in doc.ents:
      print(ent.text + ' - ' + ent.label_ + ' - ' + str(spacy.explain(ent.label_)))
  else:
    print('No entities found')

In [None]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")

show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [None]:
doc = nlp(u"Can I please have 500 dollars of Microsoft stock?")
for ent in doc.ents:
  print(ent.text, ent.label_, ent.start, ent.end, ent.start_char, ent.end_char)

500 dollars MONEY 4 6 18 29
Microsoft ORG 7 8 33 42


NER Tags

In [None]:
for token in doc:
  print(token.text, token.ent_type_, token.ent_iob_)

Can  O
I  O
please  O
have  O
500 MONEY B
dollars MONEY I
of  O
Microsoft ORG B
stock  O
?  O


In [None]:
doc = nlp(u"Tesla to build a U.K. factory for $6 million")

show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


Problem with line breaks

In [None]:
spacy.__version__

'3.8.4'

In [None]:
doc = nlp(u"Originally priced at $29.50,\nthe sweater was marked down to five dollars.")

show_ents(doc)

29.50 - MONEY - Monetary values, including unit
five dollars - MONEY - Monetary values, including unit


Noun Chunks

In [None]:
doc = nlp(u"Autonomus cars shift insurance liability toward manufacturers.")

for chunk in doc.noun_chunks:
  print(chunk.text+'-'+chunk.root.text+'-'+chunk.root.dep_+'-'+chunk.root.head.text)

Autonomus cars-cars-nsubj-shift
insurance liability-liability-dobj-shift
manufacturers-manufacturers-pobj-toward


In [None]:
len(list(doc.noun_chunks))

3

Visualizing the name entities

In [None]:
from spacy import displacy

In [None]:
doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million. '
          u'By contrast, Sony only sold 8 thousand Walkman music players.')

displacy.render(doc, style='ent', jupyter=True)

In [None]:
for sent in doc.sents:
  displacy.render(nlp(sent.text), style='ent', jupyter=True)

In [None]:
doc2 = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million. '
          u'By contrast, my kids only sold 8 thousand Walkman music players.')

In [None]:
for sent in doc2.sents:
  displacy.render(nlp(sent.text), style='ent', jupyter=True)