"""In corpus linguistics, part-of-speech tagging (POS tagging or PoS tagging or POST), also called grammatical tagging is the process of marking up a word in a text (corpus) as corresponding to a particular part of speech, based on both its definition and its context."""

In [1]:
import spacy 

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
doc = nlp("The quick brown fox jumped over the lazy dog's back.")

In [4]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [5]:
doc[4]

jumped

In [6]:
print(doc[4].pos_)

VERB


In [7]:
print(doc[4].tag_)

VBD


In [None]:
### part-of-speech tag of every word in sentence 

In [8]:
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_ :{10}} {spacy.explain(token.tag_)}")

The        DET        DT         determiner
quick      ADJ        JJ         adjective
brown      ADJ        JJ         adjective
fox        NOUN       NN         noun, singular or mass
jumped     VERB       VBD        verb, past tense
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective
dog        NOUN       NN         noun, singular or mass
's         PART       POS        possessive ending
back       NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [None]:
## POS - differenciate bw present tense and past tense

In [9]:
doc1 = nlp("I read books on NLP.")

In [10]:
for token in doc1:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_ :{10}} {spacy.explain(token.tag_)}")

I          PRON       PRP        pronoun, personal
read       VERB       VBD        verb, past tense
books      NOUN       NNS        noun, plural
on         ADP        IN         conjunction, subordinating or preposition
NLP        PROPN      NNP        noun, proper singular
.          PUNCT      .          punctuation mark, sentence closer


In [11]:
doc2 = nlp("I read a book on NLP.")

In [12]:
for token in doc2:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_ :{10}} {spacy.explain(token.tag_)}")

I          PRON       PRP        pronoun, personal
read       VERB       VBD        verb, past tense
a          DET        DT         determiner
book       NOUN       NN         noun, singular or mass
on         ADP        IN         conjunction, subordinating or preposition
NLP        PROPN      NNP        noun, proper singular
.          PUNCT      .          punctuation mark, sentence closer


In [None]:
### part-of-speech Count - 

In [13]:
print(doc)

The quick brown fox jumped over the lazy dog's back.


In [14]:
POS_count = doc.count_by(spacy.attrs.POS)

In [15]:
POS_count

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [16]:
doc.vocab[90].text,doc.vocab[84].text

('DET', 'ADJ')

In [17]:
for k, v in sorted(POS_count.items()):
    print(f"{k} {doc.vocab[k].text:{5}} {v}")

84 ADJ   3
85 ADP   1
90 DET   2
92 NOUN  3
94 PART  1
97 PUNCT 1
100 VERB  1


In [None]:
## Tag Count 

In [18]:
TAG_count = doc.count_by(spacy.attrs.TAG)
TAG_count

{15267657372422890137: 2,
 10554686591937588953: 3,
 15308085513773655218: 3,
 17109001835818727656: 1,
 1292078113972184607: 1,
 74: 1,
 12646065887601541794: 1}

In [19]:
for k, v in sorted(TAG_count.items()):
    print(f"{k} {doc.vocab[k].text:{5}} {v}")

74 POS   1
1292078113972184607 IN    1
10554686591937588953 JJ    3
12646065887601541794 .     1
15267657372422890137 DT    2
15308085513773655218 NN    3
17109001835818727656 VBD   1


In [None]:
### part-of-speech Visulization -- Words visulization 

In [20]:
from spacy import displacy

In [21]:
displacy.render(doc, style='dep', jupyter=True)

In [24]:
options = {'distance':110, 'compact':"True", 'color':'yellow', 'bg':'#09a3d5', 'font':'Times'}

In [25]:
displacy.render(doc, style='dep', jupyter=True, options=options)

In [None]:
## part-of-speech -- Sentences Visulization -- 

In [26]:
new_doc = nlp("I name is Anurag Joshi. I am a Data Scientist")

In [27]:
sent = list(new_doc.sents)

In [28]:
displacy.render(sent, style='dep',options=options, jupyter=True)

In [None]:
### Also we can visulize it on local server - 

In [None]:
#displacy.server(sent, style='dep',options=options)