In [1]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem import PorterStemmer, SnowballStemmer, LancasterStemmer, RegexpStemmer

In [2]:
nltk.download("punkt")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vasan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
text = """The children were playing happily in the gardens. 
They had been running and jumping for hours before it started raining. 
Later, they decided to go home and eat their favorite meals."""

In [11]:
# 1️⃣ Sentence Segmentation
# -------------------------------------------------------------
sentences = sent_tokenize(text)
print("=== Sentence Segmentation ===")
for i, sentence in enumerate(sentences):
    print(f"{i+1}. {sentence}")

=== Sentence Segmentation ===
1. The children were playing happily in the gardens.
2. They had been running and jumping for hours before it started raining.
3. Later, they decided to go home and eat their favorite meals.


In [12]:
# 2️⃣ Initialize Different Stemmers
# -------------------------------------------------------------
porter = PorterStemmer()
snowball = SnowballStemmer("english")
lancaster = LancasterStemmer()

In [7]:
print("\n=== Stemming Results ===")
for i, sentence in enumerate(sentences, 1):
    words = word_tokenize(sentence)
    porter_stems = [porter.stem(w) for w in words]
    snow_stems = [snowball.stem(w) for w in words]
    lanc_stems = [lancaster.stem(w) for w in words]


    print(f"\nSentence {i}: {sentence}")
    print("Original Words :", words)
    print("Porter Stemmer :", porter_stems)
    print("Snowball Stemmer:", snow_stems)
    print("Lancaster Stemmer:", lanc_stems)


=== Stemming Results ===

Sentence 1: The children were playing happily in the gardens.
Original Words : ['The', 'children', 'were', 'playing', 'happily', 'in', 'the', 'gardens', '.']
Porter Stemmer : ['the', 'children', 'were', 'play', 'happili', 'in', 'the', 'garden', '.']
Snowball Stemmer: ['the', 'children', 'were', 'play', 'happili', 'in', 'the', 'garden', '.']
Lancaster Stemmer: ['the', 'childr', 'wer', 'play', 'happy', 'in', 'the', 'gard', '.']

Sentence 2: They had been running and jumping for hours before it started raining.
Original Words : ['They', 'had', 'been', 'running', 'and', 'jumping', 'for', 'hours', 'before', 'it', 'started', 'raining', '.']
Porter Stemmer : ['they', 'had', 'been', 'run', 'and', 'jump', 'for', 'hour', 'befor', 'it', 'start', 'rain', '.']
Snowball Stemmer: ['they', 'had', 'been', 'run', 'and', 'jump', 'for', 'hour', 'befor', 'it', 'start', 'rain', '.']
Lancaster Stemmer: ['they', 'had', 'been', 'run', 'and', 'jump', 'for', 'hour', 'bef', 'it', 'start