In [1]:
# 1. Create a Doc object from the file peterrabbit.txt
import spacy
nlp=spacy.load('en_core_web_sm')

In [2]:
with open(r"/content/peterrabbit.txt",'r',encoding='utf-8') as f:
  d=f.read()
  doc=nlp(d)

In [3]:
# 2. For every token in the third sentence, print the token text, the POS tag, the fine-grained TAG tag, and the description of the fine-grained tag.

In [4]:
sentences=list(doc.sents)

In [5]:
token_test=sentences[2]

In [6]:
print('3rd Sentence:',token_test)

3rd Sentence: They lived with their Mother in a sand-bank, underneath the root of a
very big fir-tree.




In [7]:
for token in token_test:
  print(f"Token {token.text:<10} POS {token.pos_:<10} TAG {token.tag_:<10} DESCRIPTION {spacy.explain(token.tag_)}")

Token They       POS PRON       TAG PRP        DESCRIPTION pronoun, personal
Token lived      POS VERB       TAG VBD        DESCRIPTION verb, past tense
Token with       POS ADP        TAG IN         DESCRIPTION conjunction, subordinating or preposition
Token their      POS PRON       TAG PRP$       DESCRIPTION pronoun, possessive
Token Mother     POS PROPN      TAG NNP        DESCRIPTION noun, proper singular
Token in         POS ADP        TAG IN         DESCRIPTION conjunction, subordinating or preposition
Token a          POS DET        TAG DT         DESCRIPTION determiner
Token sand       POS NOUN       TAG NN         DESCRIPTION noun, singular or mass
Token -          POS PUNCT      TAG HYPH       DESCRIPTION punctuation mark, hyphen
Token bank       POS NOUN       TAG NN         DESCRIPTION noun, singular or mass
Token ,          POS PUNCT      TAG ,          DESCRIPTION punctuation mark, comma
Token underneath POS ADP        TAG IN         DESCRIPTION conjunction, subordinatin

In [8]:
# 3. Provide a frequency list of POS tags from the entire document

In [9]:
import spacy
from collections import Counter

In [10]:
pos=Counter([token.pos_ for token in doc])

In [12]:
print("Frequency of pos tags:/n")
for p,freq in pos.items():
  print(f"{p: <10}: {freq}")

Frequency of pos tags:/n
DET       : 90
PROPN     : 75
ADP       : 124
PUNCT     : 172
NUM       : 8
SPACE     : 99
ADV       : 65
SCONJ     : 20
NOUN      : 173
PRON      : 108
VERB      : 131
ADJ       : 54
CCONJ     : 61
AUX       : 50
PART      : 28


In [13]:
# 4. CHALLENGE: What percentage of tokens are nouns?

In [14]:
tokens = [token for token in doc if not token.is_punct and not token.is_space]

In [15]:
noun = [token for token in tokens if token.pos_ in ["NOUN", "PROPN"]]

In [16]:
percentage = (len(noun) / len(tokens)) * 100 if tokens else 0

In [17]:
print(f"Nouns percentage: {percentage:.2f}%")

Nouns percentage: 25.08%


In [18]:
# 5. Display the Dependency Parse for the third sentence.

In [19]:
d_parse=list(doc.sents)

In [20]:
token_test=sentences[2]

In [21]:
print('3rd Sentence:',token_test)

3rd Sentence: They lived with their Mother in a sand-bank, underneath the root of a
very big fir-tree.




In [22]:
for token in token_test:
  print(f'{token.text:12} {token.dep_:<10} {token.pos_:<10} {token.head.text:<10}')

They         nsubj      PRON       lived     
lived        ROOT       VERB       lived     
with         prep       ADP        lived     
their        poss       PRON       Mother    
Mother       pobj       PROPN      with      
in           prep       ADP        lived     
a            det        DET        bank      
sand         compound   NOUN       bank      
-            punct      PUNCT      bank      
bank         pobj       NOUN       in        
,            punct      PUNCT      bank      
underneath   prep       ADP        bank      
the          det        DET        root      
root         pobj       NOUN       underneath
of           prep       ADP        root      
a            det        DET        tree      

            dep        SPACE      a         
very         advmod     ADV        big       
big          amod       ADJ        tree      
fir          compound   NOUN       tree      
-            punct      PUNCT      tree      
tree         pobj       NOUN      

In [25]:
from spacy import displacy
displacy.render(token_test,style='dep',jupyter=True)

In [26]:
# 6. Show the first two named entities from Beatrix Potter's The Tale of Peter Rabbit

In [27]:
ent=[(ent.text,ent.label_) for ent in doc.ents]
print('First two named entities')
for ent_text,ent_label in ent[:2]:
  print(f'{ent_text} |{ent_label}')

First two named entities
The Tale of Peter Rabbit |WORK_OF_ART
Beatrix Potter |PERSON


In [28]:
# 7. How many sentences are contained in The Tale of Peter Rabbit?

In [29]:
sentences=list(doc.sents)

In [30]:
print("No. of sentences:",len(sentences))

No. of sentences: 57


In [31]:
for i, sent in enumerate(sentences[:3], start=1):
    print(f"Sentence {i}= {sent.text.strip()}")

Sentence 1= The Tale of Peter Rabbit, by Beatrix Potter (1902).
Sentence 2= Once upon a time there were four little Rabbits, and their names
were--

          Flopsy,
       Mopsy,
   Cotton-tail,
and Peter.
Sentence 3= They lived with their Mother in a sand-bank, underneath the root of a
very big fir-tree.


In [32]:
# 8. CHALLENGE: How many sentences contain named entities?

In [33]:
sentences = list(doc.sents)
entities = [sent for sent in sentences if any(ent.start >= sent.start and ent.end <= sent.end for ent in doc.ents)]

In [34]:
print(f"Total sentences: {len(sentences)}")
print(f"Sentences containing named entities: {len(entities)}")

Total sentences: 57
Sentences containing named entities: 38


In [35]:
for sent in entities:
    ents = [ent.text for ent in doc.ents if ent.start >= sent.start and ent.end <= sent.end]
    print(f"\nSentence: {sent.text.strip()}")
    print(f"Named Entities: {ents}")


Sentence: The Tale of Peter Rabbit, by Beatrix Potter (1902).
Named Entities: ['The Tale of Peter Rabbit', 'Beatrix Potter', '1902']

Sentence: Once upon a time there were four little Rabbits, and their names
were--

          Flopsy,
       Mopsy,
   Cotton-tail,
and Peter.
Named Entities: ['four', 'Mopsy', 'Cotton-tail', 'Peter']

Sentence: 'Now my dears,' said old Mrs. Rabbit one morning, 'you may go into
the fields or down the lane, but don't go into Mr. McGregor's garden:
your Father had an accident there; he was put in a pie by Mrs.
McGregor.'

'Now run along, and don't get into mischief.
Named Entities: ['Rabbit', 'one morning', 'McGregor', 'McGregor']

Sentence: Then old Mrs. Rabbit took a basket and her umbrella, and went through
the wood to the baker's.
Named Entities: ['Rabbit']

Sentence: She bought a loaf of brown bread and five
currant buns.
Named Entities: ['five']

Sentence: Flopsy, Mopsy, and Cottontail, who were good little bunnies, went
down the lane to gather black

In [36]:
# 9. Display the named entity visualization for list_of_sents[0] from the previous problem

In [37]:
l=list(doc.sents)

In [38]:
n_entity=l[0]
print("Sentence to visualise",n_entity.text)

Sentence to visualise The Tale of Peter Rabbit, by Beatrix Potter (1902).




In [40]:
displacy.render(n_entity,style='ent',jupyter=True)