## Installing NLTK

In [17]:
! pip install nltk



In [18]:
import nltk

# Download essential datasets and models
nltk.download('punkt')  # Tokenizers for sentence and word tokenization
nltk.download('stopwords')  # List of common stop words
nltk.download('wordnet')  # WordNet lexical database for lemmatization
nltk.download('averaged_perceptron_tagger_eng')  # Part-of-speech tagger
nltk.download('maxent_ne_chunker_tab')  # Named Entity Recognition model
nltk.download('words')  # Word corpus for NER
nltk.download('punkt_tab')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker_tab to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker_tab.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

## Text Preprocessing

In [19]:
import string
from nltk.tokenize import word_tokenize, sent_tokenize

text = "Natural Language Processing (NLP) is cool! Let's explore it."

# Remove punctuation using string.punctuation
cleaned_text = ''.join(char for char in text if char not in string.punctuation)
print("Text without punctuation:", cleaned_text)

# Sentence Tokenization
sentences = sent_tokenize(cleaned_text)
print("Sentences:", sentences)

# Word Tokenization
words = word_tokenize(cleaned_text)
print("Words:", words)


Text without punctuation: Natural Language Processing NLP is cool Lets explore it
Sentences: ['Natural Language Processing NLP is cool Lets explore it']
Words: ['Natural', 'Language', 'Processing', 'NLP', 'is', 'cool', 'Lets', 'explore', 'it']


In [20]:
from nltk.corpus import stopwords

# Load NLTK's stopwords list
stop_words = set(stopwords.words('english'))

# Filter out stop words
filtered_words = [word for word in words if word.lower() not in stop_words]
print("Filtered Words:", filtered_words)


Filtered Words: ['Natural', 'Language', 'Processing', 'NLP', 'cool', 'Lets', 'explore']


In [21]:
from nltk.stem import PorterStemmer

# Initialize the Porter Stemmer
stemmer = PorterStemmer()

# Apply stemming to filtered words
stemmed_words = [stemmer.stem(word) for word in filtered_words]
print("Stemmed Words:", stemmed_words)


Stemmed Words: ['natur', 'languag', 'process', 'nlp', 'cool', 'let', 'explor']


## Lemmatization

In [22]:
from nltk.stem import WordNetLemmatizer

# Initialize the Lemmatizer
lemmatizer = WordNetLemmatizer()

# Lemmatize each word
lemmatized_words = [lemmatizer.lemmatize(word, pos='v') for word in filtered_words]
print("Lemmatized Words:", lemmatized_words)


Lemmatized Words: ['Natural', 'Language', 'Processing', 'NLP', 'cool', 'Lets', 'explore']


## Part-of-Speech (POS) Tagging

In [23]:
from nltk import pos_tag

# Tokenize the text into words
text = "She enjoys playing soccer on weekends."

# Tokenization (words)
words = word_tokenize(text)

# POS tagging
tagged_words = pos_tag(words)
print("Tagged Words:", tagged_words)


Tagged Words: [('She', 'PRP'), ('enjoys', 'VBZ'), ('playing', 'VBG'), ('soccer', 'NN'), ('on', 'IN'), ('weekends', 'NNS'), ('.', '.')]


## Named Entity Recognition (NER)

In [24]:
from nltk import ne_chunk, pos_tag, word_tokenize

# Sample text
text = "We shall visit the Eiffel Tower on our vacation to Paris."

# Tokenize the text into words
words = word_tokenize(text)

# Part-of-speech tagging
tagged_words = pos_tag(words)

# Named Entity Recognition
named_entities = ne_chunk(tagged_words)
print("Named Entities:", named_entities)


Named Entities: (S
  We/PRP
  shall/MD
  visit/VB
  the/DT
  (ORGANIZATION Eiffel/NNP Tower/NNP)
  on/IN
  our/PRP$
  vacation/NN
  to/TO
  (GPE Paris/NNP)
  ./.)
