<a href="https://colab.research.google.com/github/ayush9h/MLOps/blob/main/NLP/Comparison-Stemmers/NLP_Lab2(AyushKumar).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing stemmer libraries

In [66]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.lancaster import LancasterStemmer

In [67]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## Defining Inputs

In [68]:
input_sentences = [
    "professor lectures were renowned for their profundity often delving into abstruse topics",
    "tempestuous relationship between two lovers was fraught with acrimony and recrimination",
    "The striped bats were hanging on their feet and ate best fishes",
    "arcane rituals performed indigenous tribe during equinox were shrouded mystery and intrigue",
    "hermit lived reclusive life hinterlands surrounded arcane artifacts and ancient texts"
]

In [69]:
tokenized_sentences = [word_tokenize(sentence) for sentence in input_sentences]

## Comparison of Stemmers

In [70]:
porter_stemmer = PorterStemmer()
snowball_stemmer = SnowballStemmer(language='english')
lancaster_stemmer = LancasterStemmer()

stemmed_porter = [[porter_stemmer.stem(word) for word in tokenized_sentence] for tokenized_sentence in tokenized_sentences]
stemmed_snowball = [[snowball_stemmer.stem(word) for word in tokenized_sentence] for tokenized_sentence in tokenized_sentences]
stemmed_lancaster = [[lancaster_stemmer.stem(word) for word in tokenized_sentence] for tokenized_sentence in tokenized_sentences]

In [71]:
print("Using Porter Stemmer: ")
for index, stemmed_sentence in enumerate(stemmed_porter, 1):
    print(f"Sentence {index}: {stemmed_sentence}")

print("Using Snowball Stemmer: ")
for index, stemmed_sentence in enumerate(stemmed_snowball, 1):
    print(f"Sentence {index}: {stemmed_sentence}")

print("Using Lancaster Stemmer: ")
for index, stemmed_sentence in enumerate(stemmed_lancaster, 1):
    print(f"Sentence {index}: {stemmed_sentence}")

Using Porter Stemmer: 
Sentence 1: ['professor', 'lectur', 'were', 'renown', 'for', 'their', 'profund', 'often', 'delv', 'into', 'abstrus', 'topic']
Sentence 2: ['tempestu', 'relationship', 'between', 'two', 'lover', 'wa', 'fraught', 'with', 'acrimoni', 'and', 'recrimin']
Sentence 3: ['the', 'stripe', 'bat', 'were', 'hang', 'on', 'their', 'feet', 'and', 'ate', 'best', 'fish']
Sentence 4: ['arcan', 'ritual', 'perform', 'indigen', 'tribe', 'dure', 'equinox', 'were', 'shroud', 'mysteri', 'and', 'intrigu']
Sentence 5: ['hermit', 'live', 'reclus', 'life', 'hinterland', 'surround', 'arcan', 'artifact', 'and', 'ancient', 'text']
Using Snowball Stemmer: 
Sentence 1: ['professor', 'lectur', 'were', 'renown', 'for', 'their', 'profund', 'often', 'delv', 'into', 'abstrus', 'topic']
Sentence 2: ['tempestu', 'relationship', 'between', 'two', 'lover', 'was', 'fraught', 'with', 'acrimoni', 'and', 'recrimin']
Sentence 3: ['the', 'stripe', 'bat', 'were', 'hang', 'on', 'their', 'feet', 'and', 'ate', 'bes

## Applying Lemmatization

In [72]:
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [73]:
lemmatizer = WordNetLemmatizer()
lemmatized_sentence = [[lemmatizer.lemmatize(word) for word in tokenized_sentence] for tokenized_sentence in tokenized_sentences]

TextBlob Lemmatization

In [74]:
!pip install textblob



In [75]:
from textblob import TextBlob,Word

In [76]:
lemmatized_sentence_blob = [[Word(word).lemmatize() for word in tokenized_sentence] for tokenized_sentence in tokenized_sentences]

Spacy Lemmatization

In [77]:
import spacy

In [78]:
spac_lemmat = spacy.load('en_core_web_sm')
lemmatized_sentence_spacy = [[token.lemma_ for token in spac_lemmat(" ".join(tokenized_sentence))] for tokenized_sentence in tokenized_sentences]

In [79]:
print("Using WordNet Lemmatization: ")
for i,lemmatize_sentence in enumerate(lemmatized_sentence,1):
  print(f"Sentence {i}: {lemmatize_sentence}")

print("Using TextBlob Lemmatization: ")
for i,lemmatize_sentence in enumerate(lemmatized_sentence_blob,1):
  print(f"Sentence {i} : {lemmatize_sentence}")

print("Using Spacy Lemmatization: ")
for i,lemmatize_sentence in enumerate(lemmatized_sentence_spacy,1):
  print(f"Sentence {i}: {lemmatize_sentence}")

Using WordNet Lemmatization: 
Sentence 1: ['professor', 'lecture', 'were', 'renowned', 'for', 'their', 'profundity', 'often', 'delving', 'into', 'abstruse', 'topic']
Sentence 2: ['tempestuous', 'relationship', 'between', 'two', 'lover', 'wa', 'fraught', 'with', 'acrimony', 'and', 'recrimination']
Sentence 3: ['The', 'striped', 'bat', 'were', 'hanging', 'on', 'their', 'foot', 'and', 'ate', 'best', 'fish']
Sentence 4: ['arcane', 'ritual', 'performed', 'indigenous', 'tribe', 'during', 'equinox', 'were', 'shrouded', 'mystery', 'and', 'intrigue']
Sentence 5: ['hermit', 'lived', 'reclusive', 'life', 'hinterland', 'surrounded', 'arcane', 'artifact', 'and', 'ancient', 'text']
Using TextBlob Lemmatization: 
Sentence 1 : ['professor', 'lecture', 'were', 'renowned', 'for', 'their', 'profundity', 'often', 'delving', 'into', 'abstruse', 'topic']
Sentence 2 : ['tempestuous', 'relationship', 'between', 'two', 'lover', 'wa', 'fraught', 'with', 'acrimony', 'and', 'recrimination']
Sentence 3 : ['The', '

## Comparison Table for Stemmers

In [80]:
import pandas as pd

In [81]:
d = {'Input Sentences': input_sentences, 'Porter Stemmer':stemmed_porter, 'Lancaster Stemmer':stemmed_lancaster,'Snowball Stemmer':stemmed_snowball}
df_stemmers = pd.DataFrame(data = d)

In [82]:
df_stemmers

Unnamed: 0,Input Sentences,Porter Stemmer,Lancaster Stemmer,Snowball Stemmer
0,professor lectures were renowned for their pro...,"[professor, lectur, were, renown, for, their, ...","[profess, lect, wer, renown, for, their, profu...","[professor, lectur, were, renown, for, their, ..."
1,tempestuous relationship between two lovers wa...,"[tempestu, relationship, between, two, lover, ...","[tempestu, rel, between, two, lov, was, fraugh...","[tempestu, relationship, between, two, lover, ..."
2,The striped bats were hanging on their feet an...,"[the, stripe, bat, were, hang, on, their, feet...","[the, striped, bat, wer, hang, on, their, feet...","[the, stripe, bat, were, hang, on, their, feet..."
3,arcane rituals performed indigenous tribe duri...,"[arcan, ritual, perform, indigen, tribe, dure,...","[arc, rit, perform, indig, trib, dur, equinox,...","[arcan, ritual, perform, indigen, tribe, dure,..."
4,hermit lived reclusive life hinterlands surrou...,"[hermit, live, reclus, life, hinterland, surro...","[hermit, liv, reclud, lif, hinterland, surroun...","[hermit, live, reclus, life, hinterland, surro..."


## Comparison Table for Lemmatizers

In [83]:
d = {'Input Sentences': input_sentences, 'Word Net Lemmatizer':lemmatized_sentence, 'TextBlob Lemmatizer':lemmatized_sentence_blob, "Spacy Lemmatization" : lemmatized_sentence_spacy}
df_lemmatizers = pd.DataFrame(data = d)

In [84]:
df_lemmatizers

Unnamed: 0,Input Sentences,Word Net Lemmatizer,TextBlob Lemmatizer,Spacy Lemmatization
0,professor lectures were renowned for their pro...,"[professor, lecture, were, renowned, for, thei...","[professor, lecture, were, renowned, for, thei...","[professor, lecture, be, renowne, for, their, ..."
1,tempestuous relationship between two lovers wa...,"[tempestuous, relationship, between, two, love...","[tempestuous, relationship, between, two, love...","[tempestuous, relationship, between, two, love..."
2,The striped bats were hanging on their feet an...,"[The, striped, bat, were, hanging, on, their, ...","[The, striped, bat, were, hanging, on, their, ...","[the, stripe, bat, be, hang, on, their, foot, ..."
3,arcane rituals performed indigenous tribe duri...,"[arcane, ritual, performed, indigenous, tribe,...","[arcane, ritual, performed, indigenous, tribe,...","[arcane, ritual, perform, indigenous, tribe, d..."
4,hermit lived reclusive life hinterlands surrou...,"[hermit, lived, reclusive, life, hinterland, s...","[hermit, lived, reclusive, life, hinterland, s...","[hermit, live, reclusive, life, hinterland, su..."
