In [1]:
import spacy

In [2]:
NLP=spacy.load('en_core_web_sm')

In [3]:
# U:UNICODE
Doc=NLP(u'The quick brown fox jumped over the lazy dog\'s back.')

In [4]:
print(Doc.text)

The quick brown fox jumped over the lazy dog's back.


In [5]:
print(Doc[4])

jumped


In [6]:
print(Doc[4].pos_)

VERB


In [7]:
print(Doc[4].dep_)

ROOT


In [8]:
print(Doc[4].tag_)

VBD


In [9]:
for Token in Doc:
    print(f'{Token.text:{10}} {Token.pos_:{15}} {Token.tag_:{5}} {spacy.explain(Token.tag_)}')

The        DET             DT    determiner
quick      ADJ             JJ    adjective
brown      ADJ             JJ    adjective
fox        NOUN            NN    noun, singular or mass
jumped     VERB            VBD   verb, past tense
over       ADP             IN    conjunction, subordinating or preposition
the        DET             DT    determiner
lazy       ADJ             JJ    adjective
dog        NOUN            NN    noun, singular or mass
's         PART            POS   possessive ending
back       NOUN            NN    noun, singular or mass
.          PUNCT           .     punctuation mark, sentence closer


In [10]:
Doc1=NLP('I read books on NLP.')

In [11]:
Word1=Doc1[1]
Word1

read

In [12]:
Word1.text

'read'

In [13]:
Token=Word1   
print(f'{Token.text:{10}} {Token.pos_:{15}} {Token.tag_:{5}} {spacy.explain(Token.tag_)}')

read       VERB            VBD   verb, past tense


In [14]:
Doc2=NLP('I read book on NLP.')
Word2=Doc2[1]
Token=Word2   
print(f'{Token.text:{10}} {Token.pos_:{15}} {Token.tag_:{5}} {spacy.explain(Token.tag_)}')

read       VERB            VBP   verb, non-3rd person singular present


In [15]:
# PARTS OF SPEECH
POS_Count=Doc.count_by(spacy.attrs.POS)
POS_Count

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [16]:
Doc.vocab[84].text

'ADJ'

In [17]:
Doc[2].pos

84

In [18]:
Doc[2].pos_

'ADJ'

In [19]:
for Key,Value in sorted(POS_Count.items()):
    print(f'{Key} {Doc.vocab[Key].text:<{5}} {Value}')

84 ADJ   3
85 ADP   1
90 DET   2
92 NOUN  3
94 PART  1
97 PUNCT 1
100 VERB  1


In [20]:
# TAG
TAG_Count=Doc.count_by(spacy.attrs.TAG)
TAG_Count

{15267657372422890137: 2,
 10554686591937588953: 3,
 15308085513773655218: 3,
 17109001835818727656: 1,
 1292078113972184607: 1,
 74: 1,
 12646065887601541794: 1}

In [21]:
for Key,Value in sorted(TAG_Count.items()):
    print(f'{Key} {Doc.vocab[Key].text:->{25}} {Value:.>{10}}')

74 ----------------------POS .........1
1292078113972184607 -----------------------IN .........1
10554686591937588953 -----------------------JJ .........3
12646065887601541794 ------------------------. .........1
15267657372422890137 -----------------------DT .........2
15308085513773655218 -----------------------NN .........3
17109001835818727656 ----------------------VBD .........1


In [22]:
# SYNTACTIC DEPENDENCIES
DEP_Count=Doc.count_by(spacy.attrs.DEP)
DEP_Count

{415: 2,
 402: 3,
 429: 1,
 8206900633647566924: 1,
 443: 1,
 440: 1,
 8110129090154140942: 1,
 439: 1,
 445: 1}

In [23]:
for Key,Value in sorted(DEP_Count.items()):
    print(f'{Key} {Doc.vocab[Key].text:.>{10}} {Value:->{5}}')

402 ......amod ----3
415 .......det ----2
429 .....nsubj ----1
439 ......pobj ----1
440 ......poss ----1
443 ......prep ----1
445 .....punct ----1
8110129090154140942 ......case ----1
8206900633647566924 ......ROOT ----1
