In [None]:
# Implement problems on Natural Language processing - Part of Speech tagging, N-grams and Chunking using NLTK.
import nltk
from nltk import word_tokenize, pos_tag, ngrams, RegexpParser

In [20]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [26]:
# --- Step 1: Take user input ---
sentence = input("Enter a sentence: ")
# --- Step 2: Tokenize the sentence ---
tokens = word_tokenize(sentence)
# --- Step 3: POS tagging ---
tags = pos_tag(tokens)
# --- Step 4: Display POS tagging results ---
print("\nPart of Speech Tags:\n")
for word, tag in tags:
  print(f"{word:15} --> {tag}")

Enter a sentence: Hello my name is Chotta Bheem

Part of Speech Tags:

Hello           --> NNP
my              --> PRP$
name            --> NN
is              --> VBZ
Chotta          --> NNP
Bheem           --> NNP


In [27]:
# --- Step 5: N-gram generation ---
n = int(input("\nEnter n for n-grams (e.g., 2 for bigrams, 3 for trigrams): "))
ngram_list = list(ngrams(tokens, n))
print(f"\n{n}-grams:\n")
for gram in ngram_list:
  print(gram)


Enter n for n-grams (e.g., 2 for bigrams, 3 for trigrams): 2

2-grams:

('Hello', 'my')
('my', 'name')
('name', 'is')
('is', 'Chotta')
('Chotta', 'Bheem')


In [28]:
# --- Step 6: Define a simple chunk grammar ---
# Grammar rules:
# NP = Noun Phrase, VP = Verb Phrase, PP = Prepositional Phrase
grammar = r"""
NP: {<DT>?<JJ>*<NN.*>} # Noun Phrase
VP: {<VB.*><NP|PP|CLAUSE>+$} # Verb Phrase
PP: {<IN><NP>} # Prepositional Phrase
"""
# --- Step 7: Create a chunk parser ---
chunk_parser = RegexpParser(grammar)
# --- Step 8: Parse the tagged sentence ---
tree = chunk_parser.parse(tags)
# --- Step 9: Display the chunk tree ---
print("\nChunked Phrases:\n")
print(tree)


Chunked Phrases:

(S
  (NP Hello/NNP)
  my/PRP$
  (NP name/NN)
  (VP is/VBZ (NP Chotta/NNP) (NP Bheem/NNP)))
