In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Importing stemmer libraries

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.lancaster import LancasterStemmer

In [None]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## Defining Inputs

In [89]:
input_sentences = [
    "professor lectures were renowned for their profundity often delving into abstruse topics",
    "tempestuous relationship between two lovers was fraught with acrimony and recrimination",
    "enigmatic manuscript discovered dusty attic contained cryptic symbols that defied interpretation",
    "arcane rituals performed indigenous tribe during equinox were shrouded mystery and intrigue",
    "hermit lived reclusive life hinterlands surrounded arcane artifacts and ancient texts"
]

In [90]:
tokenized_sentences = [word_tokenize(sentence) for sentence in input_sentences]

## Comparison of Stemmers

In [91]:
porter_stemmer = PorterStemmer()
snowball_stemmer = SnowballStemmer(language='english')
lancaster_stemmer = LancasterStemmer()

stemmed_porter = [[porter_stemmer.stem(word) for word in tokenized_sentence] for tokenized_sentence in tokenized_sentences]
stemmed_snowball = [[snowball_stemmer.stem(word) for word in tokenized_sentence] for tokenized_sentence in tokenized_sentences]
stemmed_lancaster = [[lancaster_stemmer.stem(word) for word in tokenized_sentence] for tokenized_sentence in tokenized_sentences]

In [92]:
print("Using Porter Stemmer: ")
for index, stemmed_sentence in enumerate(stemmed_porter, 1):
    print(f"Sentence {index}: {stemmed_sentence}")

print("Using Snowball Stemmer: ")
for index, stemmed_sentence in enumerate(stemmed_snowball, 1):
    print(f"Sentence {index}: {stemmed_sentence}")

print("Using Lancaster Stemmer: ")
for index, stemmed_sentence in enumerate(stemmed_lancaster, 1):
    print(f"Sentence {index}: {stemmed_sentence}")

Using Porter Stemmer: 
Sentence 1: ['professor', 'lectur', 'were', 'renown', 'for', 'their', 'profund', 'often', 'delv', 'into', 'abstrus', 'topic']
Sentence 2: ['tempestu', 'relationship', 'between', 'two', 'lover', 'wa', 'fraught', 'with', 'acrimoni', 'and', 'recrimin']
Sentence 3: ['enigmat', 'manuscript', 'discov', 'dusti', 'attic', 'contain', 'cryptic', 'symbol', 'that', 'defi', 'interpret']
Sentence 4: ['arcan', 'ritual', 'perform', 'indigen', 'tribe', 'dure', 'equinox', 'were', 'shroud', 'mysteri', 'and', 'intrigu']
Sentence 5: ['hermit', 'live', 'reclus', 'life', 'hinterland', 'surround', 'arcan', 'artifact', 'and', 'ancient', 'text']
Using Snowball Stemmer: 
Sentence 1: ['professor', 'lectur', 'were', 'renown', 'for', 'their', 'profund', 'often', 'delv', 'into', 'abstrus', 'topic']
Sentence 2: ['tempestu', 'relationship', 'between', 'two', 'lover', 'was', 'fraught', 'with', 'acrimoni', 'and', 'recrimin']
Sentence 3: ['enigmat', 'manuscript', 'discov', 'dusti', 'attic', 'contai

## Applying Lemmatization

In [93]:
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [94]:
lemmatizer = WordNetLemmatizer()
lemmatized_sentence = [[lemmatizer.lemmatize(word) for word in tokenized_sentence] for tokenized_sentence in tokenized_sentences]

In [99]:
print("Using WordNet Lemmatization: ")
for i,lemmatize_sentence in enumerate(lemmatized_sentence,1):
  print(f"Sentence {i}: {lemmatize_sentence}")

Using WordNet Lemmatization: 
Sentence 1: ['professor', 'lecture', 'were', 'renowned', 'for', 'their', 'profundity', 'often', 'delving', 'into', 'abstruse', 'topic']
Sentence 2: ['tempestuous', 'relationship', 'between', 'two', 'lover', 'wa', 'fraught', 'with', 'acrimony', 'and', 'recrimination']
Sentence 3: ['enigmatic', 'manuscript', 'discovered', 'dusty', 'attic', 'contained', 'cryptic', 'symbol', 'that', 'defied', 'interpretation']
Sentence 4: ['arcane', 'ritual', 'performed', 'indigenous', 'tribe', 'during', 'equinox', 'were', 'shrouded', 'mystery', 'and', 'intrigue']
Sentence 5: ['hermit', 'lived', 'reclusive', 'life', 'hinterland', 'surrounded', 'arcane', 'artifact', 'and', 'ancient', 'text']
