# **LOADING DATASET**

In [None]:
from google.colab import drive
import pandas as pd

# Mount your Google Drive
drive.mount('/content/drive')

# Load the dataset
file_path = '/content/drive/MyDrive/Movie Analysis/IMDb_Reviews.csv'
df = pd.read_csv(file_path, on_bad_lines='skip', encoding='utf-8')

# Show the first few rows
df.head()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


# **PREPROCESSING THE TEXT**

In [None]:
import pandas as pd
import re
import string
from sklearn.feature_extraction.text import TfidfVectorizer

# Load dataset
df = pd.read_csv('IMDb_Reviews.csv', on_bad_lines='skip', encoding='utf-8')

# Simple preprocessing function (no NLTK!)
def preprocess(text):
    text = text.lower()
    text = re.sub(r'<.*?>', '', text)  # remove HTML tags
    text = re.sub(f"[{string.punctuation}]", "", text)  # remove punctuation
    text = re.sub(r'\d+', '', text)  # remove numbers
    text = re.sub(r'\s+', ' ', text)  # remove extra spaces
    return text.strip()

# Apply preprocessing
df['cleaned_review'] = df['review'].astype(str).apply(preprocess)

# View result
df[['review', 'cleaned_review']].head()


Unnamed: 0,review,cleaned_review
0,One of the other reviewers has mentioned that ...,one of the other reviewers has mentioned that ...
1,A wonderful little production. <br /><br />The...,a wonderful little production the filming tech...
2,I thought this was a wonderful way to spend ti...,i thought this was a wonderful way to spend ti...
3,Basically there's a family where a little boy ...,basically theres a family where a little boy j...
4,"Petter Mattei's ""Love in the Time of Money"" is...",petter matteis love in the time of money is a ...


# **VECTORIZATION**

In [None]:
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(df['cleaned_review'])

# Encode labels: 'positive' → 1, 'negative' → 0
y = df['sentiment'].map({'positive': 1, 'negative': 0})


# **TRAINING MODEL**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)


# **EVALUATING MODEL**

In [None]:
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {acc:.2f}")
print(f"F1-Score: {f1:.2f}")


Accuracy: 0.90
F1-Score: 0.90


In [None]:
def analyze_movie_review(review):
    """
    Analyze the sentiment of a movie review.

    Args:
        review (str): The review text entered by the user.

    Returns:
        str: Predicted sentiment with optional confidence.
    """
    if not isinstance(review, str) or not review.strip():
        return "❌ Invalid input. Please enter a non-empty movie review."

    # Preprocess the input
    cleaned_review = preprocess(review)  # Make sure your preprocess function is defined
    vectorized_review = tfidf.transform([cleaned_review])

    # Predict sentiment
    prediction = model.predict(vectorized_review)[0]
    probability = model.predict_proba(vectorized_review)[0][prediction] if hasattr(model, 'predict_proba') else None

    # Format the result
    sentiment = "🎬 Positive Review 😊" if prediction == 1 else "🎬 Negative Review 😞"
    confidence = f" (Confidence: {probability:.2f})" if probability is not None else ""

    return f"{sentiment}{confidence}"


# **EXAMPLE USAGE**

In [None]:
print(analyze_movie_review("Absolutely loved it! Great direction and cast."))
print(analyze_movie_review("Waste of time. Really bad editing and acting."))
print(analyze_movie_review(""))  # Will return the error message


🎬 Positive Review 😊 (Confidence: 0.99)
🎬 Negative Review 😞 (Confidence: 1.00)
❌ Invalid input. Please enter a non-empty movie review.
