In [None]:
%pip install nltk


# off-the-shelf sentiment analysis pipeline

In [None]:
# Load the off-the-shelf sentiment analysis pipeline

from transformers import pipeline

sentiment_analysis = pipeline("sentiment-analysis")

# Example texts 

texts = ["I loved this movie", "This movie was terrible", "This movie was the worst movie I have ever seen"]

# Perform sentiment analysis 

results_off_the_shelf = sentiment_analysis(texts)

# Print the results

for text, result in zip(texts, results_off_the_shelf):
    print(f"Text: {text}")
    print(f"Sentiment: {result['label']}")
    print(f"Confidence: {result['score']}")
    print("-------------------------")

# using the TextBlob library to perform sentiment analysis

In [None]:
%pip install textblob


In [17]:
from textblob import TextBlob
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np


In [None]:

# Load the IMDb dataset
max_words = 10000  # Only consider the top 10,000 most frequent words
max_len = 200  # Pad sequences to a maximum length of 200 words

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)

# Convert IMDb integer sequences back to words for TextBlob
word_index = imdb.get_word_index()
reverse_word_index = {v: k for k, v in word_index.items()}

# Function to decode sequences back to text
def decode_review(text_sequence):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in text_sequence])

# Decode some test sequences to evaluate with TextBlob
X_test_texts = [decode_review(sequence) for sequence in X_test[:1000]]  # Evaluate 1000 reviews

# Prepare input data for deep learning models (LSTM/GRU)
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

# Use TextBlob for sentiment analysis on the test data
textblob_sentiments = []

for review in X_test_texts:
    blob = TextBlob(review)
    # TextBlob returns polarity: -1 (negative), 1 (positive), 0 (neutral)
    sentiment_score = blob.sentiment.polarity
    # Convert polarity to binary sentiment (similar to LSTM/GRU model)
    sentiment_label = 1 if sentiment_score > 0 else 0  # 1: positive, 0: negative
    textblob_sentiments.append(sentiment_label)

# TextBlob predicted labels
textblob_sentiments = np.array(textblob_sentiments)

# Evaluate TextBlob accuracy
textblob_accuracy = np.mean(textblob_sentiments == y_test[:1000])
print(f"TextBlob Sentiment Analysis Accuracy: {textblob_accuracy * 100:.2f}%")


# nltk model

In [1]:
import nltk
from nltk.corpus import movie_reviews
from nltk.corpus import stopwords
from nltk.classify.util import accuracy as nltk_accuracy
from nltk.classify import NaiveBayesClassifier
import random

# Download necessary NLTK data
nltk.data.path.append('/Users/hi/jitenStuff/MyGit/AI-ML-DL/.venv/nltk_data')
nltk.download("movie_reviews")
nltk.download("stopwords")
nltk.download("punkt")
nltk.download('punkt_tab')

[nltk_data] Downloading package movie_reviews to
[nltk_data]     /Users/hi/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
[nltk_data] Downloading package stopwords to /Users/hi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/hi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /Users/hi/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [2]:
def extract_features(words):
    return {word: True for word in words}

documents = [
    (list(movie_reviews.words(fileid)), category)
    for category in movie_reviews.categories()
    for fileid in movie_reviews.fileids(category)
]

random.shuffle(documents)

featuresets = [(extract_features(d), c) for (d, c) in documents]
train_set, test_set = featuresets[:1600], featuresets[1600:]

classifier = NaiveBayesClassifier.train(train_set)

accuracy = nltk_accuracy(classifier, test_set)
print(f"Accuracy: {accuracy * 100:.2f}")

classifier.show_most_informative_features(10)

Accuracy: 73.75
Most Informative Features
                 idiotic = True              neg : pos    =     19.0 : 1.0
               marvelous = True              pos : neg    =     14.3 : 1.0
               ludicrous = True              neg : pos    =     12.6 : 1.0
               stupidity = True              neg : pos    =     11.4 : 1.0
             outstanding = True              pos : neg    =     11.4 : 1.0
              astounding = True              pos : neg    =     11.0 : 1.0
                  avoids = True              pos : neg    =     11.0 : 1.0
             fascination = True              pos : neg    =      9.6 : 1.0
              henstridge = True              neg : pos    =      9.0 : 1.0
                  random = True              neg : pos    =      9.0 : 1.0


In [5]:
def analyse_sentiment(text):
    stop_words = set(stopwords.words("english"))
    words = nltk.word_tokenize(text.lower())
    words = [word for word in words if word.isalpha() and word not in stop_words]  # Remove punctuation and stopwords
    features = extract_features(words)
    
    return classifier.classify(features)


In [8]:
test_inputs = [
    "I love this movie", 
    "I hate this movie", 
    "the movie was terrible", 
    "the movie was horrible",
    "I didn't like the movie",
    "fantastic movie",
]

for sentence in test_inputs:
    print(f"Sentence: {sentence}")
    print(f"Sentiment: {analyse_sentiment(sentence)}")
    print("-------------------------")

Sentence: I love this movie
Sentiment: pos
-------------------------
Sentence: I hate this movie
Sentiment: neg
-------------------------
Sentence: the movie was terrible
Sentiment: neg
-------------------------
Sentence: the movie was horrible
Sentiment: neg
-------------------------
Sentence: I didn't like the movie
Sentiment: neg
-------------------------
Sentence: fantastic movie
Sentiment: pos
-------------------------
