## Break text into words, remove stopwords, and find word roots.

In [None]:
import nltk

In [None]:
# Download required datasets and models
nltk.download('punkt')        # For tokenization
nltk.download('stopwords')    # For stopwords removal
nltk.download('wordnet')      # For lemmatization
nltk.download('vader_lexicon')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\harsh\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\harsh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\harsh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\harsh\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

## 📌 1. Tokenization

##


In [None]:
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize

text = "AI is revolutionizing the world of technology! NLP is a key part of AI."

# Word Tokenization
word_tokens = word_tokenize(text)
print("Word Tokens:", word_tokens)

# Sentence Tokenization
sentence_tokens = sent_tokenize(text)
print("Sentence Tokens:", sentence_tokens)


Word Tokens: ['AI', 'is', 'revolutionizing', 'the', 'world', 'of', 'technology', '!', 'NLP', 'is', 'a', 'key', 'part', 'of', 'AI', '.']
Sentence Tokens: ['AI is revolutionizing the world of technology!', 'NLP is a key part of AI.']


## 📌 2. Stopwords Removal
##

In [None]:
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))  # Load English stopwords

# Example text
tokens = word_tokenize("This is an amazing NLP tutorial for beginners!")

# Removing stopwords
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

print("Original Tokens:", tokens)
print("Filtered Tokens (Stopwords removed):", filtered_tokens)

Original Tokens: ['This', 'is', 'an', 'amazing', 'NLP', 'tutorial', 'for', 'beginners', '!']
Filtered Tokens (Stopwords removed): ['amazing', 'NLP', 'tutorial', 'beginners', '!']


## 📌 3. Stemming


In [None]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

words = ["running", "studies", "beautiful", "flies", "crying"]
stemmed_words = [stemmer.stem(word) for word in words]

print("Stemmed Words:", stemmed_words)


Stemmed Words: ['run', 'studi', 'beauti', 'fli', 'cri']


## 📌 4. Lemmatization


In [None]:
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

words = ["running", "studies", "mice", "went", "crying"]
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]

print("Lemmatized Words:", lemmatized_words)

Lemmatized Words: ['running', 'study', 'mouse', 'went', 'cry']


# 📌 5. Sentiment Analysis

In [None]:
from nltk.sentiment import SentimentIntensityAnalyzer

In [None]:
sia = SentimentIntensityAnalyzer()
text = "I absolutely love this AI course! It's amazing."
sentiment = sia.polarity_scores(text)

print("Sentiment Scores:", sentiment)

Sentiment Scores: {'neg': 0.0, 'neu': 0.368, 'pos': 0.632, 'compound': 0.862}


# 📌 6. Named Entity Recognition (NER)

In [None]:
import spacy

In [None]:
nlp = spacy.load("en_core_web_sm")

text = "Elon Musk founded SpaceX in 2002 and Tesla in 2003."
doc = nlp(text)

for ent in doc.ents:
    print(ent.text, "-", ent.label_)

Elon Musk - PERSON
2002 - DATE
Tesla - ORG
2003 - DATE


# 📌 7. Build a Simple Chatbot

In [None]:
from nltk.chat.util import Chat, reflections

pairs = [
    ["hi|hello", ["Hello!", "Hi there!"]],
    ["how are you?", ["I'm a bot, I'm always good!"]],
    ["what's your name?", ["I'm a simple chatbot!"]],
]

chatbot = Chat(pairs, reflections)
chatbot.converse()

None


# 📌 8. Text Summarization

In [None]:
from transformers import pipeline

In [None]:
summarizer = pipeline("summarization")

No model was supplied, defaulted to google-t5/t5-small and revision df1b051 (https://huggingface.co/google-t5/t5-small).
Using a pipeline without specifying a model name and revision in production is not recommended.






All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.
Device set to use 0


In [None]:
text = "The `ps` command, which stands for “process status,” is like a computer tool that helps you see what’s happening inside your Linux computer. Imagine your computer is doing several things simultaneously, like running different programs or apps. These are the processes and the `ps` command lets you take a quick look at them. When you use it without any special instructions, it shows you the processes that are connected to the window or screen you are currently using. But here’s where it gets interesting: you can make the ps command show you exactly what you want to know by giving it special instructions, called options. These options let you customize the information you see, like finding out which programs are using the most computer power or checking what a specific user is doing. So, while it can give you a basic overview, the ps command’s real strength is in letting you choose exactly what details you want to see about the processes on your computer."
summary = summarizer(text, max_length=50, min_length=20, do_sample=False)

print("Summary:", summary[0]['summary_text'])

Summary: the ps command, which stands for “process status,” is like a computer tool that helps you see what’s happening inside your Linux computer . you can make the ps command show you exactly what you want
