In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [4]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [5]:
# Coarse grained part of speech tag
print(doc[4].pos_)

VERB


In [10]:
# Fine grained part of speech tag
# VBD past tense verb
print(doc[4].tag_)

VBD


In [13]:
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:10} {spacy.explain(token.tag_)}")

The        DET        DT         determiner
quick      ADJ        JJ         adjective
brown      ADJ        JJ         adjective
fox        PROPN      NNP        noun, proper singular
jumped     VERB       VBD        verb, past tense
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective
dog        NOUN       NN         noun, singular or mass
's         PART       POS        possessive ending
back       NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [39]:
doc = nlp(u"Hey, read a book on NLP.")

In [40]:
word = doc[2]

In [41]:
token = word

In [42]:
print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:10} {spacy.explain(token.tag_)}")

read       VERB       VB         verb, base form


In [43]:
doc = nlp(u"I read a book on NLP")

In [46]:
word = doc[1]
token = word
print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:10} {spacy.explain(token.tag_)}")

read       VERB       VBD        verb, past tense


In [47]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [48]:
POS_counts = doc.count_by(spacy.attrs.POS)

In [50]:
POS_counts

{90: 2, 84: 3, 96: 1, 100: 1, 85: 1, 92: 2, 94: 1, 97: 1}

In [52]:
for k,v in sorted(POS_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

84. ADJ   3
85. ADP   1
90. DET   2
92. NOUN  2
94. PART  1
96. PROPN 1
97. PUNCT 1
100. VERB  1


In [54]:
TAG_counts = doc.count_by(spacy.attrs.TAG)
for k,v in sorted(TAG_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

74. POS   1
1292078113972184607. IN    1
10554686591937588953. JJ    3
12646065887601541794. .     1
15267657372422890137. DT    2
15308085513773655218. NN    2
15794550382381185553. NNP   1
17109001835818727656. VBD   1
