In [1]:
## Lemmatization in NLP
# Lemmatization is the process of reducing a word to its base or root form.
# Unlike stemming, which may produce non-words, lemmatization results in a valid word.
# For example, the lemma of "running" is "run", and the lemma of "better" is "good".
# Lemmatization considers the context and converts the word to its meaningful base form.
# It is often used in natural language processing (NLP) tasks such as text analysis, information retrieval, and machine learning.
# In this code, we will demonstrate how to perform lemmatization using the NLTK library in Python.

In [14]:
## Use Cases for Lemmatization
# 1. Text Preprocessing: Lemmatization is commonly used in text preprocessing to reduce words to their base forms, making it easier to analyze and compare text data.
# 2. Information Retrieval: In search engines, lemmatization helps improve search results by matching different forms of a word to its base form.
# 3. Sentiment Analysis: Lemmatization can enhance sentiment analysis by ensuring that different forms of a word are treated as the same entity, improving the accuracy of sentiment classification.
# 4. Machine Translation: In machine translation, lemmatization helps in translating words to their base forms, improving the quality of translations.


In [2]:
words = ["running", "runner", "ran", "easily", "fairly", 
"cats", "cacti", "eating", "eats", "eaten", "ate", "eater", "eaters", "eating", "eats"
"finally", "finally", "finalize", "finalizes", "finalizing", 
"finalized", "finalizer", "finalizers"]


In [5]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/bhaveshg/nltk_data...


True

In [6]:
## WordNetLemmatizer
from nltk.stem import WordNetLemmatizer
word_net_lemmatizer = WordNetLemmatizer()
for word in words:
    print(f"Lemmatized form of '{word}': {word_net_lemmatizer.lemmatize(word)}")

Lemmatized form of 'running': running
Lemmatized form of 'runner': runner
Lemmatized form of 'ran': ran
Lemmatized form of 'easily': easily
Lemmatized form of 'fairly': fairly
Lemmatized form of 'cats': cat
Lemmatized form of 'cacti': cactus
Lemmatized form of 'eating': eating
Lemmatized form of 'eats': eats
Lemmatized form of 'eaten': eaten
Lemmatized form of 'ate': ate
Lemmatized form of 'eater': eater
Lemmatized form of 'eaters': eater
Lemmatized form of 'eating': eating
Lemmatized form of 'eatsfinally': eatsfinally
Lemmatized form of 'finally': finally
Lemmatized form of 'finalize': finalize
Lemmatized form of 'finalizes': finalizes
Lemmatized form of 'finalizing': finalizing
Lemmatized form of 'finalized': finalized
Lemmatized form of 'finalizer': finalizer
Lemmatized form of 'finalizers': finalizers


In [12]:
## POS Tagging
# The lemmatizer can also take a part of speech (POS) tag to improve accuracy.
# valid POS tags include 'n' for noun, 'v' for verb, 'a' for adjective, and 'r' for adverb.
print(word_net_lemmatizer.lemmatize("going", pos='v')) # 'go'
print(word_net_lemmatizer.lemmatize("better", pos='a')) # 'good'
print(word_net_lemmatizer.lemmatize("cats", pos='n')) # 'cat'
print(word_net_lemmatizer.lemmatize("cacti", pos='n')) # 'cactus'
print(word_net_lemmatizer.lemmatize("eating", pos='v')) # 'eat'
# adverb example
print(word_net_lemmatizer.lemmatize("finally", pos='r')) # 'finally'

go
good
cat
cactus
eat
finally


In [13]:
## WordNetLemmatizer
from nltk.stem import WordNetLemmatizer
word_net_lemmatizer = WordNetLemmatizer()
for word in words:
    print(f"Lemmatized form of '{word}': {word_net_lemmatizer.lemmatize(word, pos='v')}")

Lemmatized form of 'running': run
Lemmatized form of 'runner': runner
Lemmatized form of 'ran': run
Lemmatized form of 'easily': easily
Lemmatized form of 'fairly': fairly
Lemmatized form of 'cats': cat
Lemmatized form of 'cacti': cacti
Lemmatized form of 'eating': eat
Lemmatized form of 'eats': eat
Lemmatized form of 'eaten': eat
Lemmatized form of 'ate': eat
Lemmatized form of 'eater': eater
Lemmatized form of 'eaters': eaters
Lemmatized form of 'eating': eat
Lemmatized form of 'eatsfinally': eatsfinally
Lemmatized form of 'finally': finally
Lemmatized form of 'finalize': finalize
Lemmatized form of 'finalizes': finalize
Lemmatized form of 'finalizing': finalize
Lemmatized form of 'finalized': finalize
Lemmatized form of 'finalizer': finalizer
Lemmatized form of 'finalizers': finalizers
