<a href="https://colab.research.google.com/github/divishamb123/Movie-Reviewer/blob/main/Movie_Review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install gradio scikit-learn pandas numpy matplotlib



In [2]:
import re
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split

In [3]:
#user text-> regex cleaning -> tf-idf -> ML Model(logreg) ->Prediction  ->Gradio

In [4]:
# Dataset
data = {
    "review": [
        "I loved this movie, it was fantastic!",
        "Worst movie ever, totally boring",
        "Amazing acting and great storyline",
        "I did not like the film at all",
        "Best movie of the year",
        "Terrible plot and bad acting",
        "Absolutely wonderful experience",
        "Waste of time and money",
        "Brilliant direction and screenplay",
        "Horrible movie"
    ],
    "sentiment": [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
}
df=pd.DataFrame(data)

In [5]:
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
    return  text

df['review'] = df['review'].apply(clean_text)



In [6]:
#LogisticRegression
lr_Pipeline=Pipeline([("tfidf", TfidfVectorizer(stop_words='english')), ("classifier", LogisticRegression())])

In [7]:
#naive bayes pipeline
nb_Pipeline=Pipeline([("tfidf", TfidfVectorizer(stop_words='english')), ("classifier", MultinomialNB())])


In [8]:
#train both the pipeline
lr_Pipeline.fit(df['review'], df['sentiment'])
nb_Pipeline.fit(df['review'], df['sentiment'])

In [9]:
def predict_sentiment(text, model_choice):
    text = clean_text(text)

    model = lr_Pipeline if model_choice == "Logistic Regression" else nb_Pipeline

    prediction = model.predict([text])[0]
    probabilities = model.predict_proba([text])[0]

    sentiment = "Positive ðŸ˜Š" if prediction == 1 else "Negative ðŸ˜ž"
    confidence = round(np.max(probabilities) * 100, 2)

    # Visualization
    fig, ax = plt.subplots()
    ax.bar(["Negative", "Positive"], probabilities)
    ax.set_ylim(0, 1)
    ax.set_title("Sentiment Probability Distribution")

    return sentiment, f"{confidence}%", fig

In [10]:
interface = gr.Interface(
    fn=predict_sentiment,
    inputs=[
        gr.Textbox(
            lines=4,
            placeholder="Type a movie review, tweet, or feedback..."
        ),
        gr.Radio(
            ["Logistic Regression", "Naive Bayes"],
            label="Choose Model",
            value="Logistic Regression"
        )
    ],
    outputs=[
        gr.Textbox(label="Predicted Sentiment"),
        gr.Textbox(label="Confidence Score"),
        gr.Plot(label="Sentiment Probability")
    ],
)
interface.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c5d9318c2a637c681f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


