In [None]:
#In what scenario we will use Stemming
#Classification problems where the exact form of the word is not important, but the root meaning is.
#For example, in spam detection, sentiment analysis, or topic categorization, stemming can help group similar words together.
#Information retrieval systems, such as search engines, where users may search for different forms of a word.
#By stemming words to their root form, the system can match queries with relevant documents more effectively.
#Reducing dimensionality in text data for machine learning models.
#By stemming words, we can reduce the number of unique tokens in the dataset, which can help improve model performance and reduce overfitting.
#Preprocessing step in natural language processing (NLP) pipelines to standardize words and improve the efficiency of subsequent analysis.
#However, it's important to note that stemming can sometimes lead to loss of meaning or context, so it may not be suitable for all NLP tasks, especially those requiring precise understanding of word forms, such as machine translation or named entity recognition.  
#Using WordPunct Tokenizer


In [1]:
words = ["running", "runner", "ran", "easily", "fairly", "cats", "cat's", "children", "geese", "better", "best", "happily",'happiness','emotion','emotional']

In [None]:
#Porter Stemmer
#It will loose some meaning of the word while stemming. like - easily --> easili
from nltk.stem import PorterStemmer
porter_stemmer = PorterStemmer()
for word in words:
    print(f"Porter Stemmer: {word} --> {porter_stemmer.stem(word)}")

Porter Stemmer: running --> run
Porter Stemmer: runner --> runner
Porter Stemmer: ran --> ran
Porter Stemmer: easily --> easili
Porter Stemmer: fairly --> fairli
Porter Stemmer: cats --> cat
Porter Stemmer: cat's --> cat'
Porter Stemmer: children --> children
Porter Stemmer: geese --> gees
Porter Stemmer: better --> better
Porter Stemmer: best --> best
Porter Stemmer: happily --> happili
Porter Stemmer: happiness --> happi
Porter Stemmer: emotion --> emot
Porter Stemmer: emotional --> emot


In [None]:
#RegexpStemmer Class
#This stemmer allows you to define custom stemming rules using regular expressions.
#You can specify patterns to match and replace suffixes or prefixes in words.
#This stemmer also may lead to loss of meaning if the rules are too aggressive or not well-defined.
from nltk.stem import RegexpStemmer
regexp_stemmer = RegexpStemmer("ing$|s$|ed$|ly$|es$|tion$|'s$")
for word in words:
    print(f"Regexp Stemmer: {word} --> {regexp_stemmer.stem(word)}")

Regexp Stemmer: running --> runn
Regexp Stemmer: runner --> runner
Regexp Stemmer: ran --> ran
Regexp Stemmer: easily --> easi
Regexp Stemmer: fairly --> fair
Regexp Stemmer: cats --> cat
Regexp Stemmer: cat's --> cat
Regexp Stemmer: children --> children
Regexp Stemmer: geese --> geese
Regexp Stemmer: better --> better
Regexp Stemmer: best --> best
Regexp Stemmer: happily --> happi
Regexp Stemmer: happiness --> happines
Regexp Stemmer: emotion --> emo
Regexp Stemmer: emotional --> emotional


In [None]:
#Snowball Stemmer
#It is an improved version of the Porter Stemmer and is more aggressive in its stemming approach
#This stemmer is designed to handle multiple languages and provides better stemming results for many words compared to the Porter Stemmer.
#It reduces words to their root form while preserving more of the original meaning.
#It is generally preferred over the Porter Stemmer for its improved accuracy and language support.
#However, like other stemming algorithms, it may still lead to some loss of meaning in certain cases.
from nltk.stem import SnowballStemmer
snowball_stemmer = SnowballStemmer('english')
for word in words:
    print(f"Snowball Stemmer: {word} --> {snowball_stemmer.stem(word)}")

Snowball Stemmer: running --> run
Snowball Stemmer: runner --> runner
Snowball Stemmer: ran --> ran
Snowball Stemmer: easily --> easili
Snowball Stemmer: fairly --> fair
Snowball Stemmer: cats --> cat
Snowball Stemmer: cat's --> cat
Snowball Stemmer: children --> children
Snowball Stemmer: geese --> gees
Snowball Stemmer: better --> better
Snowball Stemmer: best --> best
Snowball Stemmer: happily --> happili
Snowball Stemmer: happiness --> happi
Snowball Stemmer: emotion --> emot
Snowball Stemmer: emotional --> emot
