In [8]:
import spacy

In [9]:
sp = spacy.load('en_core_web_sm')

In [10]:
sen = sp(u"I like to play football. I hated it in my childhood though")
print(sen.text)
print(sen[7].pos_)
print(sen[7].tag_)
print(spacy.explain(sen[7].tag_))
for word in sen:
    print(f'{word.text:{12}} {word.pos_:{10}} {word.tag_:{8}} {spacy.explain(word.tag_)}')

I like to play football. I hated it in my childhood though
VERB
VBD
verb, past tense
I            PRON       PRP      pronoun, personal
like         VERB       VBP      verb, non-3rd person singular present
to           PART       TO       infinitival "to"
play         VERB       VB       verb, base form
football     NOUN       NN       noun, singular or mass
.            PUNCT      .        punctuation mark, sentence closer
I            PRON       PRP      pronoun, personal
hated        VERB       VBD      verb, past tense
it           PRON       PRP      pronoun, personal
in           ADP        IN       conjunction, subordinating or preposition
my           PRON       PRP$     pronoun, possessive
childhood    NOUN       NN       noun, singular or mass
though       ADV        RB       adverb


In [11]:
sen = sp(u"I like to play football. I hated it in my childhood though")

num_pos = sen.count_by(spacy.attrs.POS)
num_pos

{95: 4, 100: 3, 94: 1, 92: 2, 97: 1, 85: 1, 86: 1}

In [12]:
for k,v in sorted(num_pos.items()):
    print(f'{k}. {sen.vocab[k].text:{8}}: {v}')

85. ADP     : 1
86. ADV     : 1
92. NOUN    : 2
94. PART    : 1
95. PRON    : 4
97. PUNCT   : 1
100. VERB    : 3


In [13]:
from spacy import displacy

sen = sp(u"I like to play football. I hated it in my childhood though")
displacy.render(sen, style='dep', jupyter=True, options={'distance': 85})

In [15]:
# displacy.serve(sen, style='dep', options={'distance': 120})

In [16]:
sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million')
print(sen.ents)
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

(Manchester United, Harry Kane, $90 million)
Manchester United - GPE - Countries, cities, states
Harry Kane - PERSON - People, including fictional
$90 million - MONEY - Monetary values, including unit


In [18]:
from spacy.tokens import Span
sen = sp(u'Nesfruita is setting up a new company in India')
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))
ORG = sen.vocab.strings[u'ORG']
new_entity = Span(sen, 0, 1, label=ORG)
# sen.ents = list(sen.ents) + [new_entity]
# for entity in sen.ents:
#     print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Nesfruita - ORG - Companies, agencies, institutions, etc.
India - GPE - Countries, cities, states


In [19]:
sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million. David demand 100 Million Dollars')
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Manchester United - GPE - Countries, cities, states
Harry Kane - PERSON - People, including fictional
$90 million - MONEY - Monetary values, including unit
David - PERSON - People, including fictional
100 Million Dollars - MONEY - Monetary values, including unit


In [20]:
len([ent for ent in sen.ents if ent.label_=='PERSON'])


2

In [21]:
from spacy import displacy
sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million. David demand 100 Million Dollars')
displacy.render(sen, style='ent', jupyter=True)

In [23]:
filter = {'ents': ['ORG']}
displacy.render(sen, style='ent', jupyter=True, options=filter)