In [1]:
# Perform standard imports
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
# Create a simple Doc object
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [3]:
# Print the full text:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [4]:
# Print the fifth word and associated tags:
print(doc[4].text, doc[4].pos_, doc[4].tag_, doc[4].lemma_, spacy.explain(doc[4].tag_))

jumped VERB VBD jump verb, past tense


In [8]:
for token in doc:
    print(f'{token.text:{10}} {token.pos_:{8}} {token.lemma_:{15}}{token.tag_:{6}} {spacy.explain(token.tag_)}')

The        DET      the            DT     determiner
quick      ADJ      quick          JJ     adjective (English), other noun-modifier (Chinese)
brown      ADJ      brown          JJ     adjective (English), other noun-modifier (Chinese)
fox        NOUN     fox            NN     noun, singular or mass
jumped     VERB     jump           VBD    verb, past tense
over       ADP      over           IN     conjunction, subordinating or preposition
the        DET      the            DT     determiner
lazy       ADJ      lazy           JJ     adjective (English), other noun-modifier (Chinese)
dog        NOUN     dog            NN     noun, singular or mass
's         PART     's             POS    possessive ending
back       NOUN     back           NN     noun, singular or mass
.          PUNCT    .              .      punctuation mark, sentence closer


In [9]:
doc = nlp(u'I read books on NLP.')
r = doc[1]

print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')

read       VERB     VBP    verb, non-3rd person singular present


In [10]:
doc = nlp(u'I read a book on NLP.')
r = doc[1]

print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')

read       VERB     VBD    verb, past tense


In [11]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

# Count the frequencies of different coarse-grained POS tags:
POS_counts = doc.count_by(spacy.attrs.POS)
POS_counts

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [12]:
doc.vocab[83].text

'LANG'

In [14]:
for item in POS_counts:
    print(doc.vocab[item].text, POS_counts[item] )

DET 2
ADJ 3
NOUN 3
VERB 1
ADP 1
PART 1
PUNCT 1


In [18]:
data = {doc.vocab[item].text: POS_counts[item]  for item in POS_counts }

In [19]:
data

{'DET': 2, 'ADJ': 3, 'NOUN': 3, 'VERB': 1, 'ADP': 1, 'PART': 1, 'PUNCT': 1}

In [20]:
# Count the different fine-grained tags:
TAG_counts = doc.count_by(spacy.attrs.TAG)

for k,v in sorted(TAG_counts.items()):
    print(f'{k}. {doc.vocab[k].text:{4}}: {v}')

74. POS : 1
1292078113972184607. IN  : 1
10554686591937588953. JJ  : 3
12646065887601541794. .   : 1
15267657372422890137. DT  : 2
15308085513773655218. NN  : 3
17109001835818727656. VBD : 1


In [21]:
# Count the different dependencies:
DEP_counts = doc.count_by(spacy.attrs.DEP)

for k,v in sorted(DEP_counts.items()):
    print(f'{k}. {doc.vocab[k].text:{4}}: {v}')

400. advmod: 1
402. amod: 3
415. det : 2
429. nsubj: 1
439. pobj: 1
443. prep: 1
445. punct: 1
8110129090154140942. case: 1
8206900633647566924. ROOT: 1


doc.text

In [23]:
print(spacy.attrs)

<module 'spacy.attrs' from '/Users/ujjwal/codingpoint/pycourses/venvnlp/lib/python3.11/site-packages/spacy/attrs.cpython-311-darwin.so'>


In [24]:
spacy.attrs.TAG

75

In [25]:
len(doc.vocab)

801

# Visualizing POS

In [27]:
# Perform standard imports
import spacy
nlp = spacy.load('en_core_web_sm')

# Import the displaCy library
from spacy import displacy

In [28]:
# Create a simple Doc object
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [29]:
# Render the dependency parse immediately inside Jupyter:
displacy.render(doc, style='dep', jupyter=True, options={'distance': 110})

In [31]:
# Render the dependency parse immediately inside Jupyter:
displacy.render(doc, style='dep', jupyter=True, options={'distance': 110, 'compact': True, 'color': 'yellow', 'bg': '#09a3d5', 'font': 'Times'})

In [32]:
for token in doc:
    print(f'{token.text:{10}} {token.pos_:{7}} {token.dep_:{7}} {spacy.explain(token.dep_)}')

The        DET     det     determiner
quick      ADJ     amod    adjectival modifier
brown      ADJ     amod    adjectival modifier
fox        NOUN    nsubj   nominal subject
jumped     VERB    ROOT    root
over       ADP     prep    prepositional modifier
the        DET     det     determiner
lazy       ADJ     amod    adjectival modifier
dog        NOUN    pobj    object of preposition
's         PART    case    case marking
back       NOUN    advmod  adverbial modifier
.          PUNCT   punct   punctuation


In [35]:
displacy.serve(doc, style='dep', options={'distance': 110}, port=5050)




Using the 'dep' visualizer
Serving on http://0.0.0.0:5050 ...



127.0.0.1 - - [30/Jul/2024 12:46:39] "GET / HTTP/1.1" 200 9192
127.0.0.1 - - [30/Jul/2024 12:46:56] "GET / HTTP/1.1" 200 9192


Shutting down server on port 5050.


In [34]:
doc2 = nlp("This is a sentence. This is another sentence., possibly longer than first one.")

In [38]:
spans = list(doc2.sents)

In [41]:
options = {'distance': 110, 'compact': 'True', 'color': 'yellow', 'bg': '#09a3d5', 'font': 'Times'}


displacy.serve(spans, style='dep', port=5050, options=options)


Using the 'dep' visualizer
Serving on http://0.0.0.0:5050 ...



127.0.0.1 - - [30/Jul/2024 12:48:46] "GET / HTTP/1.1" 200 10877


Shutting down server on port 5050.
