In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from collections import Counter
import re

In [None]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
story = """
Mr Oliver, an Anglo-Indian teacher, was returning to his school late one night, on the outskirts of the hill station of Simla. From before Kipling’s time,
the school had been run on English public school lines and the boys, most of them from wealthy Indian families, wore blazers, caps and ties. Life magazine,
in a feature on India, had once called it the ‘Eton of the East’. Mr Oliver had been teaching in the school for several years.

The Simla bazaar, with its cinemas and restaurants, was about three miles from the school and Mr Oliver, a bachelor, usually strolled into the town in the

evening, returning after dark, when he would take a short cut through the pine forest.

When there was a strong wind the pine trees made sad, eerie sounds that kept most people to the main road. But Mr Oliver was not a nervous or imaginative man.
He carried a torch and its gleam—the batteries were running down—moved fitfully down the narrow forest path. When its flickering light fell on the figure of a
boy, who was sitting alone on a rock, Mr Oliver stopped. Boys were not supposed to be out after dark.

‘What are you doing out here, boy?’ asked Mr Oliver sharply, moving closer so that he could recognize the miscreant. But even as he approached the boy, Mr Oliver
sensed that something was wrong. The boy appeared to be crying. His head hung down, he held his face in his hands and his body shook convulsively. It was a strange,
soundless weeping and Mr Oliver felt distinctly uneasy.

‘Well, what’s the matter?’ he asked, his anger giving way to concern. ‘What are you crying for?’ The boy would not answer or look up. His body continued to be racked
with silent sobbing. ‘Come on, boy, you shouldn’t be out here at this hour. Tell me the trouble. Look up!’ The boy looked up. He took his hands from his face and
looked up at his teacher. The light from Mr Oliver’s torch fell on the boy’s face—if you could call it a face.

It had no eyes, ears, nose or mouth. It was just a round smooth head—with a school cap on top of it! And that’s where the story should end. But for Mr Oliver it
did not end here.

The torch fell from his trembling hand. He turned and scrambled down the path, running blindly through the trees and calling for help. He was still running towards
the school buildings when he saw a lantern swinging in the middle of the path. Mr Oliver stumbled up to the watchman, gasping for breath. ‘What is it, sahib?’ asked
the watchman. ‘Has there been an accident? Why are you running?’

‘I saw something—something horrible—a boy weeping in the forest—and he had no face!’

‘No face, sahib?’

‘No eyes, nose, mouth—nothing!’

‘Do you mean it was like this, sahib?’ asked the watchman and raised the lamp to his own face. The watchman had no eyes, no ears, no features at all—not even an
eyebrow! And that’s when the wind blew the lamp out.
"""

In [None]:
sentences = sent_tokenize(story)
print("Tokenization:", tokens[:20])

Sentence Tokenization: ['\nMr Oliver, an Anglo-Indian teacher, was returning to his school late one night, on the outskirts of the hill station of Simla.', 'From before Kipling’s time, \nthe school had been run on English public school lines and the boys, most of them from wealthy Indian families, wore blazers, caps and ties.', 'Life magazine, \nin a feature on India, had once called it the ‘Eton of the East’.', 'Mr Oliver had been teaching in the school for several years.', 'The Simla bazaar, with its cinemas and restaurants, was about three miles from the school and Mr Oliver, a bachelor, usually strolled into the town in the \n\nevening, returning after dark, when he would take a short cut through the pine forest.']


In [None]:
# Tokenization
tokens = word_tokenize(story)

print("Tokenization:", tokens[:20])

Tokenization: ['Mr', 'Oliver', ',', 'an', 'Anglo-Indian', 'teacher', ',', 'was', 'returning', 'to', 'his', 'school', 'late', 'one', 'night', ',', 'on', 'the', 'outskirts', 'of']


In [None]:
# Removing stopwords
stop_words = (stopwords.words('english'))
stop_words.extend ([',', '.'])
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

print("Filtered Tokens (Stopwords Removed):", filtered_tokens[:20])

Filtered Tokens (Stopwords Removed): ['Mr', 'Oliver', 'Anglo-Indian', 'teacher', 'returning', 'school', 'late', 'one', 'night', 'outskirts', 'hill', 'station', 'Simla', 'Kipling', '’', 'time', 'school', 'run', 'English', 'public']


In [None]:
# Stemming
porter = PorterStemmer()
stemmed_tokens = [porter.stem(word) for word in filtered_tokens]

print("Stemmed Tokens:", stemmed_tokens[:15])

Stemmed Tokens: ['mr', 'oliv', 'anglo-indian', 'teacher', 'return', 'school', 'late', 'one', 'night', 'outskirt', 'hill', 'station', 'simla', 'kipl', '’']


In [None]:
# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]

print("Lemmatized Tokens:", lemmatized_tokens[:15])

Lemmatized Tokens: ['Mr', 'Oliver', 'Anglo-Indian', 'teacher', 'returning', 'school', 'late', 'one', 'night', 'outskirt', 'hill', 'station', 'Simla', 'Kipling', '’']


In [None]:
# Frequency distribution
frequency_distribution = Counter(lemmatized_tokens)

print("Frequency Distribution:", frequency_distribution.most_common(10))

Frequency Distribution: [('’', 18), ('Mr', 11), ('Oliver', 11), ('‘', 11), ('boy', 10), ('?', 8), ('school', 7), ('face', 6), ('!', 5), ('running', 4)]


In [None]:
# POS tagging
tagged_tokens = nltk.pos_tag(filtered_tokens)

In [None]:
# Function to extract main character using POS tags
def extract_main_character(tagged_tokens):
    character_names = ["Oliver", "Watchman"]
    character_counts = Counter([word for word, tag in tagged_tokens if tag.startswith('NNP') and word in character_names])
    main_character = character_counts.most_common(1)[0][0] if character_counts else None
    return main_character

main_character = extract_main_character(tagged_tokens)

print("Main Character:", main_character)

Main Character: Oliver
