In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [4]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [5]:
# get a token of the doc
print(doc[4])

jumped


In [6]:
# print the tag of a token
print(doc[4].tag_)

VBD


In [7]:
print(doc[4].pos_)

VERB


In [8]:
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

The        DET        DT         determiner
quick      ADJ        JJ         adjective
brown      ADJ        JJ         adjective
fox        NOUN       NN         noun, singular or mass
jumped     VERB       VBD        verb, past tense
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective
dog        NOUN       NN         noun, singular or mass
's         PART       POS        possessive ending
back       NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [17]:
doc = nlp(u"I read books on NLP.")

In [18]:
word = doc[1]

In [19]:
word.text

'read'

In [20]:
token = word
print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

read       VERB       VBP        verb, non-3rd person singular present


In [26]:
# same word, different context
doc = nlp(u"I read a book on NLP.")
print(doc.text)

I read a book on NLP.


In [27]:
# the result also changes, due to the context is different
word = doc[1]

token = word
print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

read       VERB       VBP        verb, non-3rd person singular present


In [28]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [30]:
POS_counts = doc.count_by(spacy.attrs.POS)

In [31]:
# number id of each token, and how many times it shows up
POS_counts

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [33]:
doc.vocab[84].text

'ADJ'

In [34]:
doc[2]

brown

In [35]:
doc[2].pos

84

In [36]:
doc[2].pos_

'ADJ'

In [38]:
# POS Part-of-speech tags
for k, v in sorted(POS_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

84. ADJ   3
85. ADP   1
90. DET   2
92. NOUN  3
94. PART  1
97. PUNCT 1
100. VERB  1


In [40]:
TAG_counts = doc.count_by(spacy.attrs.TAG)

# Fine Grained Tags
for k, v in sorted(TAG_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

74. POS   1
1292078113972184607. IN    1
10554686591937588953. JJ    3
12646065887601541794. .     1
15267657372422890137. DT    2
15308085513773655218. NN    3
17109001835818727656. VBD   1


In [42]:
DEP_counts = doc.count_by(spacy.attrs.DEP)

# Sintactic Dependencies
for k, v in sorted(DEP_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

402. amod  3
415. det   2
429. nsubj 1
439. pobj  1
440. poss  1
443. prep  1
445. punct 1
8110129090154140942. case  1
8206900633647566924. ROOT  1


### TIPS:

### to explain a specific tag
spacy.explain(token.tag_)

### to get the count of a specific attribute
doc.count_by(spacy.attrs.POS)

doc.count_by(spacy.attrs.TAG)

doc.count_by(spacy.attrs.DEP)