# *Sentiment Analysis for Movie Reviews using IMDb Movie Review Dataset*

# Loading Drive and moving to folder

In [None]:
# prompt: drive mount

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/Sentiment Analysis

/content/drive/MyDrive/Sentiment Analysis


# Load and Prepare Dataset

In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv('/content/drive/MyDrive/Sentiment Analysis/IMDB Dataset.csv')
df.head()


Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


# Encode Sentiment Labels

In [None]:
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})
df.head()


Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1


# Preprocess Text

In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower().split()
    text = [lemmatizer.lemmatize(word) for word in text if word not in stop_words]
    return ' '.join(text)

df['cleaned_review'] = df['review'].apply(preprocess)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


# Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split

X = df['cleaned_review']
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# TF-IDF Vectorizer

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


# Train Model

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train_vec, y_train)


# Evaluate

In [None]:
from sklearn.metrics import classification_report

y_pred = model.predict(X_test_vec)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.90      0.88      0.89      4961
           1       0.88      0.90      0.89      5039

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000



# Save Model & Vectorizer

In [None]:
import joblib

joblib.dump(model, 'movie_review_model_binary.joblib')
joblib.dump(vectorizer, 'movie_vectorizer_binary.joblib')


['movie_vectorizer_binary.joblib']

# Gradio App

In [None]:
!pip install --upgrade gradio
!pip install wordcloud matplotlib nltk





In [None]:
import gradio as gr
import joblib
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# Load model and vectorizer
model = joblib.load("movie_review_model_binary.joblib")
vectorizer = joblib.load("movie_vectorizer_binary.joblib")

# Custom stopwords
custom_stopwords = {
    'don', "don't", 'ain', 'aren', "aren't", 'couldn', "couldn't",
    'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't",
    'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't",
    'needn', "needn't", 'shan', "shan't", 'no', 'nor', 'not', 'shouldn', "shouldn't",
    'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"
}
stop_words = set(stopwords.words("english")) - custom_stopwords
ps = PorterStemmer()

def preprocess(text):
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower().split()
    text = [ps.stem(word) for word in text if word not in stop_words]
    return ' '.join(text)

def analyze_sentiment(review):
    clean = preprocess(review)
    vec = vectorizer.transform([clean])
    pred = model.predict(vec)[0]
    return "Positive 😊" if pred == 1 else "Negative 😞"

def create_wordcloud(review):
    clean = preprocess(review)
    wc = WordCloud(width=500, height=500, background_color="white", colormap="viridis").generate(clean)

    plt.figure(figsize=(5, 5))
    plt.imshow(wc, interpolation="bilinear")

    # Show axes with ticks and labels
    plt.axis("on")
    plt.xlabel("X Axis (pixels)")
    plt.ylabel("Y Axis (pixels)")

    # Set ticks every 100 pixels
    plt.xticks(range(0, 501, 100))
    plt.yticks(range(0, 501, 100))

    plt.tight_layout()
    plt.savefig("wordcloud.png")
    plt.close()

    return "wordcloud.png"

#  Interface layout with theme
with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as demo:
    gr.Markdown("<h1 style='text-align: center; font-size: 32px;'> IMDb Movie Review Sentiment Classifier</h1>")

    with gr.Row():
        review_input = gr.Textbox(label="Enter Movie Review", placeholder="Type your review here...", lines=4, show_label=True)
        sentiment_output = gr.Label(label="Predicted Sentiment")

    with gr.Row():
        analyze_button = gr.Button("Analyze Review ", elem_classes=["analyze-btn"])
        wordcloud_button = gr.Button(" Generate WordCloud", elem_classes=["wordcloud-btn"])

    wordcloud_output = gr.Image(label="WordCloud", type="filepath")

    analyze_button.click(analyze_sentiment, inputs=review_input, outputs=sentiment_output)
    wordcloud_button.click(create_wordcloud, inputs=review_input, outputs=wordcloud_output)

# Launch app
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://b4bef89a30b435dd4e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


