In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [26]:
doc = nlp(u'The quick brown fox jumped over the lazy dog\'s back.')

In [4]:
# To print the text of whole document

print(doc)

The quick brown fox jumped over the lazy dog's back.


In [5]:
# To grap a particular token use index

print(doc[4])

jumped


In [6]:
# Parts of speech

print(doc[4].pos_)

VERB


In [7]:
# Fined grained tag

print(doc[4].tag_)

VBD


In [8]:
for token in doc:
    print(f'{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_):{20}}')

The        DET        DT         determiner          
quick      ADJ        JJ         adjective           
brown      ADJ        JJ         adjective           
fox        NOUN       NN         noun, singular or mass
jumped     VERB       VBD        verb, past tense    
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner          
lazy       ADJ        JJ         adjective           
dog        NOUN       NN         noun, singular or mass
's         PART       POS        possessive ending   
back       NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [9]:
doc = nlp(u'I read books on NLP.')

In [10]:
word = doc[1]

token = word
print(f'{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_):{20}}')

read       VERB       VBP        verb, non-3rd person singular present


In [11]:
doc = nlp(u'I read a book on NLP.')

In [12]:
word = doc[1]

token = word
print(f'{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_):{20}}')

read       VERB       VBD        verb, past tense    


In [13]:
# To get the frequency of POS

POS_Counts = doc.count_by(spacy.attrs.POS)

POS_Counts

{96: 1, 99: 1, 84: 1, 89: 1, 91: 1, 94: 1, 95: 1}

In [16]:
for key, value in sorted(POS_Counts.items()):
    print(f'{key} {doc.vocab[key].text:{6}} {value}')

84 ADP    1
89 DET    1
91 NOUN   1
94 PRON   1
95 PROPN  1
96 PUNCT  1
99 VERB   1


In [18]:
# Even we can get the frequency for fine grained tags

TAG_Counts = doc.count_by(spacy.attrs.TAG)

TAG_Counts

{15794550382381185553: 1,
 15308085513773655218: 1,
 12646065887601541794: 1,
 17109001835818727656: 1,
 15267657372422890137: 1,
 13656873538139661788: 1,
 1292078113972184607: 1}

In [21]:
for key, value in sorted(TAG_Counts.items()):
    print(f'{key:{20}} {doc.vocab[key].text:{10}} {value}')

 1292078113972184607 IN         1
12646065887601541794 .          1
13656873538139661788 PRP        1
15267657372422890137 DT         1
15308085513773655218 NN         1
15794550382381185553 NNP        1
17109001835818727656 VBD        1


In [22]:
# Even we can try it for syntatic dependency

DEP_Counts = doc.count_by(spacy.attrs.DEP)

DEP_Counts

{436: 1, 440: 1, 426: 1, 442: 1, 8206900633647566924: 1, 412: 1, 413: 1}

In [24]:
for key, value in sorted(DEP_Counts.items()):
    print(f'{key:<{20}} {doc.vocab[key].text:{10}} {value}')

412                  det        1
413                  dobj       1
426                  nsubj      1
436                  pobj       1
440                  prep       1
442                  punct      1
8206900633647566924  ROOT       1


In [25]:
# Visualizing the Parts of speech

from spacy import displacy

In [27]:
displacy.render(doc, style='dep', jupyter=True)

In [28]:
options = {'color': 'yellow', 'distance': 110, 'compact': 'True', 'bg': '#09a3d5', 'font': 'Times'}

In [29]:
displacy.render(doc, style='dep', jupyter=True, options=options)

In [30]:
# Serving on external server

doc2 = nlp(u'This is a sentence. This is another long sentence.')

In [32]:
spans = doc2.sents

In [33]:
displacy.serve(spans, style='dep', options={'distance': 110})


[93m    Serving on port 5000...[0m
    Using the 'dep' visualizer



127.0.0.1 - - [22/Aug/2019 10:28:26] "GET / HTTP/1.1" 200 6570
127.0.0.1 - - [22/Aug/2019 10:28:27] "GET /favicon.ico HTTP/1.1" 200 6570



    Shutting down server on port 5000.

