In [2]:
#import libraries
import nltk
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import stopwords
from nltk.classify.util import accuracy as nltk_accuracy
import random


In [None]:
#download the nltk data files

nltk.download("movie_reviews")
nltk.download("punkt")
nltk.download("stopwords")

In [4]:
#preprocess the dataset and extract features

def extract_features(words):
    return {word: True for word in words}

In [6]:
#Load the movie_reviews dataset from NLTK

documents = [(list(movie_reviews.words(fileid)), category) 
             for category in movie_reviews.categories() 
             for fileid in movie_reviews.fileids(category)]

#shuffle to ensure randomness
random.shuffle(documents)


In [16]:
#prepare the dataset for training and testing

feature_sets = [(extract_features(d), c) for (d,c) in documents]
train_set, test_set = feature_sets[:1600], feature_sets[1600:]

In [17]:
#train the naive bayes classifier

classifier = NaiveBayesClassifier.train(train_set)

In [None]:
#evaluate the classifier on the test set

accuracy = nltk_accuracy(classifier, test_set)
print(f"Accuracy : {accuracy * 100 :.2f}%")


In [None]:
#show the most informative features

classifier.show_most_informative_features(10)

In [21]:
#test on  new input sentences

def analyze_sentiment(text):
    #tokenize and remove stopwords
    words = nltk.word_tokenize(text)
    words = [word for word in words if word.lower() not in stopwords.words("english")]

    #predict sentiment
    features = extract_features(words)
    return classifier.classify(features)

In [None]:
#test the classifier with some custom text inputs
while True:
    test_sentence = input("Enter your review: ").lower()
    print(f"Review: {test_sentence}")
    print(f"Sentiment : {analyze_sentiment(test_sentence)}")
    next_review = input("Do you want to enter another review? (y/n) : ")
    if next_review.lower() != "y":
        print("Goodbye!")
        break