In [1]:
# POS = Parts of speech

In [2]:
import spacy

In [3]:
nlp = spacy.load('en_core_web_sm')

In [4]:
doc = nlp(u'The quick brown fox jumped over the fox hound\'s big back')

In [5]:
print(doc.text)

The quick brown fox jumped over the fox hound's big back


In [6]:
print(doc[4])

jumped


In [7]:
print(doc[4].pos_) # coarse tag

VERB


In [8]:
print(doc[4].tag_) # fine grained tag

VBD


In [9]:
# _ suffix gives the string tag, not numerical

In [18]:
def explain_token(token):
    print(f'{token.text:{10}} | {token.pos_:{10}} | {token.tag_:{10}} | {spacy.explain(token.tag_)}')
        
for token in doc: explain_token(token)

I          | PRON       | PRP        | pronoun, personal
read       | VERB       | VBP        | verb, non-3rd person singular present
books      | NOUN       | NNS        | noun, plural
on         | ADP        | IN         | conjunction, subordinating or preposition
nlp        | NOUN       | NN         | noun, singular or mass


In [13]:
doc = nlp(u'I read books on nlp')

In [16]:
word = doc[1]

In [17]:
word

read

In [19]:
explain_token(word)

read       | VERB       | VBP        | verb, non-3rd person singular present


In [20]:
doc = nlp(u'I read a book on nlp')

In [21]:
word = doc[1]

In [22]:
explain_token(word)

read       | VERB       | VBD        | verb, past tense


In [23]:
# Get freq count of attributes...
doc = nlp(u'The quick brown fox jumped over the fox hound\'s big back')

In [24]:
pos_counts = doc.count_by(spacy.attrs.POS)

In [25]:
pos_counts

{90: 2, 84: 3, 92: 2, 100: 1, 85: 1, 96: 1, 94: 1, 86: 1}

In [26]:
pos_counts = doc.count_by(spacy.attrs.POS_)

AttributeError: module 'spacy.attrs' has no attribute 'POS_'

In [28]:
doc.vocab[83].text

'LANG'

In [29]:
doc[2].pos_

'ADJ'

In [30]:
doc[2].pos

84

In [31]:
doc.vocab[84].text

'ADJ'

In [37]:
def get_name(code):
    return doc.vocab[code].text

for k, v in sorted(pos_counts.items()):
    print(f'{get_name(k):{10}}| {v:{10}}')

ADJ       |          3
ADP       |          1
ADV       |          1
DET       |          2
NOUN      |          2
PART      |          1
PROPN     |          1
VERB      |          1


In [39]:
tag_counts = doc.count_by(spacy.attrs.TAG)
for k, v in sorted(tag_counts.items()):
    print(f'{get_name(k):{10}}| {v:{10}}')

POS       |          1
RB        |          1
IN        |          1
JJ        |          3
DT        |          2
NN        |          2
NNP       |          1
VBD       |          1


In [40]:
dep_counts = doc.count_by(spacy.attrs.DEP)
for k, v in sorted(dep_counts.items()):
    print(f'{get_name(k):{10}}| {v:{10}}')

advmod    |          1
amod      |          3
det       |          2
nsubj     |          1
pobj      |          1
prep      |          1
compound  |          1
case      |          1
ROOT      |          1


In [41]:
# Visualise!

In [42]:
doc = nlp(u'The quick blue fox jumped over the lazy lethargic dog')

In [43]:
from spacy import displacy

In [44]:
displacy.render(doc, style='dep', jupyter=True)

In [47]:
options = { 'distance': 110, 'compact': 'True', 'color': 'white', 'bg': '#09a3d5', 'font': 'Times'}

In [48]:
displacy.render(doc, style='dep', jupyter=True, options=options)

In [49]:
doc2 = nlp(u'This is a sentence. This is another sentence. This is another.')

In [53]:
spans = list(doc2.sents)

In [None]:
displacy.serve(spans, style='dep', port=3111)