**IMPORT REQUIRED LIBRARIES**

In [28]:
import pandas as pd
import numpy as np
import re
import string
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from textblob import TextBlob

**LOADING THE DATASET**

In [30]:
df = pd.read_csv("imdb_top_1000.csv")
df.head()

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000


**STEP 1 : DATA PREPROCESSING**

In [33]:
df = pd.read_csv("imdb_top_1000.csv")
df = df[['Series_Title', 'Overview']].dropna()

In [34]:
#function to preprocess text (tokenization, removing stopwards, lemmatization)

def clean_and_normalize(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)  # Remove digits
    text = text.translate(str.maketrans('', '', string.punctuation))  # Strip punctuation
    tokens = word_tokenize(text)  # Tokenize
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words and word.isalpha()]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(tokens)


In [35]:
#Saving cleaned overview

df['Cleaned_Overview'] = df['Overview'].apply(clean_and_normalize)

In [37]:
df.head()

Unnamed: 0,Series_Title,Overview,Cleaned_Overview
0,The Shawshank Redemption,Two imprisoned men bond over a number of years...,two imprisoned men bond number year finding so...
1,The Godfather,An organized crime dynasty's aging patriarch t...,organized crime dynasty aging patriarch transf...
2,The Dark Knight,When the menace known as the Joker wreaks havo...,menace known joker wreaks havoc chaos people g...
3,The Godfather: Part II,The early life and career of Vito Corleone in ...,early life career vito corleone new york city ...
4,12 Angry Men,A jury holdout attempts to prevent a miscarria...,jury holdout attempt prevent miscarriage justi...


In [38]:
#Sentiment Analysis (text) using textblob

def analyze_sentiment(text):
    polarity = TextBlob(text).sentiment.polarity
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'

In [39]:
# Saving Cleaned Overview's Sentiment

df['Sentiment'] = df['Cleaned_Overview'].apply(analyze_sentiment)


In [40]:
df.head()

Unnamed: 0,Series_Title,Overview,Cleaned_Overview,Sentiment
0,The Shawshank Redemption,Two imprisoned men bond over a number of years...,two imprisoned men bond number year finding so...,negative
1,The Godfather,An organized crime dynasty's aging patriarch t...,organized crime dynasty aging patriarch transf...,neutral
2,The Dark Knight,When the menace known as the Joker wreaks havo...,menace known joker wreaks havoc chaos people g...,positive
3,The Godfather: Part II,The early life and career of Vito Corleone in ...,early life career vito corleone new york city ...,positive
4,12 Angry Men,A jury holdout attempts to prevent a miscarria...,jury holdout attempt prevent miscarriage justi...,neutral


**STEP 2 FEATURE ENGINEERING**

In [41]:
vectorizer = TfidfVectorizer(max_features=500)
X = vectorizer.fit_transform(df['Cleaned_Overview']).toarray()
y = df['Sentiment'].map({'negative': 0, 'neutral': 1, 'positive': 2})

**STEP 3 : MODEL TRAINING | LOGISTICS REGRESSION**

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)

**STEP 4 : MODEL EVALUATION**

In [42]:
y_pred = model.predict(X_test)
print("Model Evaluation Report:")
print(classification_report(y_test, y_pred, target_names=["Negative", "Neutral", "Positive"]))

Model Evaluation Report:
              precision    recall  f1-score   support

    Negative       0.53      0.62      0.57        66
     Neutral       0.34      0.22      0.27        46
    Positive       0.65      0.68      0.66        88

    accuracy                           0.56       200
   macro avg       0.51      0.51      0.50       200
weighted avg       0.54      0.56      0.54       200



In [44]:
df['Predicted_Sentiment'] = model.predict(vectorizer.transform(df['Cleaned_Overview']).toarray())
df.to_csv("sentiment_analysis_results.csv", index=False)
print("Sentiment predictions saved to 'sentiment_analysis_results.csv'.")

Sentiment predictions saved to 'sentiment_analysis_results.csv'.


**LOADING THE NEW DATASET**

In [45]:
data=pd.read_csv("sentiment_analysis_results.csv")
data.head()

Unnamed: 0,Series_Title,Overview,Cleaned_Overview,Sentiment,Predicted_Sentiment
0,The Shawshank Redemption,Two imprisoned men bond over a number of years...,two imprisoned men bond number year finding so...,negative,0
1,The Godfather,An organized crime dynasty's aging patriarch t...,organized crime dynasty aging patriarch transf...,neutral,1
2,The Dark Knight,When the menace known as the Joker wreaks havo...,menace known joker wreaks havoc chaos people g...,positive,2
3,The Godfather: Part II,The early life and career of Vito Corleone in ...,early life career vito corleone new york city ...,positive,2
4,12 Angry Men,A jury holdout attempts to prevent a miscarria...,jury holdout attempt prevent miscarriage justi...,neutral,1
