In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp(u'The intelligent dog would walk upto his human and sits on his lap whenever he is upset. Quite a human thing !.')

In [4]:
for token in doc:
    print(f"{token.text:{13}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

The           DET        DT         determiner
intelligent   ADJ        JJ         adjective
dog           NOUN       NN         noun, singular or mass
would         VERB       MD         verb, modal auxiliary
walk          VERB       VB         verb, base form
upto          VERB       VB         verb, base form
his           DET        PRP$       pronoun, possessive
human         NOUN       NN         noun, singular or mass
and           CCONJ      CC         conjunction, coordinating
sits          VERB       VBZ        verb, 3rd person singular present
on            ADP        IN         conjunction, subordinating or preposition
his           DET        PRP$       pronoun, possessive
lap           NOUN       NN         noun, singular or mass
whenever      ADV        WRB        wh-adverb
he            PRON       PRP        pronoun, personal
is            AUX        VBZ        verb, 3rd person singular present
upset         ADJ        JJ         adjective
.             PUNCT      .    

In [5]:
word = doc[4]

In [6]:
word

walk

In [7]:
token = word
print(f"{token.text:{13}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

walk          VERB       VB         verb, base form


In [8]:
# gaining part of speech count for part of speech
pos_counts = doc.count_by(spacy.attrs.POS)


In [9]:
pos_counts

{90: 5, 84: 3, 92: 4, 100: 4, 89: 1, 85: 1, 86: 1, 95: 1, 87: 1, 97: 3}

In [10]:
#the token or the word at 1st location in the doc
doc[1].text

'intelligent'

In [11]:
#numerical identifier of the pos of the word
doc[1].pos

84

In [12]:
#looking upto numerical identifiers
doc.vocab[84].text

'ADJ'

In [13]:
for k,v in sorted(pos_counts.items()):
    print(f"{k:{5}} {doc.vocab[k].text:{10}} {v}")

   84 ADJ        3
   85 ADP        1
   86 ADV        1
   87 AUX        1
   89 CCONJ      1
   90 DET        5
   92 NOUN       4
   95 PRON       1
   97 PUNCT      3
  100 VERB       4


In [14]:
# gaining count of tags
tags_counts = doc.count_by(spacy.attrs.TAG)
for k,v in sorted(tags_counts.items()):
    print(f"{k} {doc.vocab[k].text:<{10}} {v}")

1292078113972184607 IN         1
4062917326063685704 PRP$       2
7477624496606277342 PDT        1
10554686591937588953 JJ         3
12646065887601541794 .          3
13656873538139661788 PRP        1
13927759927860985106 VBZ        2
14200088355797579614 VB         2
15267657372422890137 DT         2
15308085513773655218 NN         4
16235386156175103506 MD         1
17524233984504158541 WRB        1
17571114184892886314 CC         1


In [15]:
# gaining count of syntactic dependency tags
deps_counts = doc.count_by(spacy.attrs.DEP)
for k,v in sorted(deps_counts.items()):
    print(f"{k:<{25}} {doc.vocab[k].text:<{10}} {v}")

398                       acomp      1
399                       advcl      1
400                       advmod     1
402                       amod       2
405                       aux        1
407                       cc         1
410                       conj       1
415                       det        2
416                       dobj       1
429                       nsubj      2
439                       pobj       1
440                       poss       2
443                       prep       1
445                       punct      3
450                       xcomp      1
8206900633647566924       ROOT       2
13323405159917154080      predet     1


In [16]:
from spacy import displacy

In [17]:
displacy.render(doc,style='dep', jupyter=True )

In [18]:
options = {'distance':100,'compact':'True','color':'yellow','bg':'blue','font':'Times'}
displacy.render(doc,style="dep",jupyter=True,options = options)

In [19]:
#to pass a para in a serve we break the para in the sentences using .sents attribute and than running
doc1 = nlp(u'I hate front end. i love Backend. I dodge balls. I love pussy.')
spans = list(doc1.sents)

In [20]:
spans

[I hate front end., i love Backend., I dodge balls., I love pussy.]

In [None]:
#serve serves and render the code outside jupyter notebook
displacy.serve(spans,style='dep',options={'distance':110})




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...



In [21]:
def show_entitiy(doc):
    if doc.ents:
        for ent in doc.ents:
            print(f"{ent.text} {ent.label_} {str(spacy.explain(ent.label_))}")
            print(ent.text+ ' - ' + ent.label_+ ' - '+str(spacy.explain(ent.label_)))
    else:
        print("No entity found")
        

In [22]:
doc2=nlp(u'Actuaries are the professionals who study possibilities on the basis of the maths. S.B.I is a \
government owned bank. I.C.I.C.I is the largest private owned bank in india.')

In [23]:
show_entitiy(doc2)

S.B.I GPE Countries, cities, states
S.B.I - GPE - Countries, cities, states
india GPE Countries, cities, states
india - GPE - Countries, cities, states
