In [None]:
from flask import Flask, request, jsonify
import pandas as pd
import spacy
from sklearn.model_selection import train_test_split
import re

In [None]:
app = Flask(__name__)

In [None]:
path = "IMDB Dataset.csv"
df = pd.read_csv(path)

In [None]:
df.sentiment.value_counts()

positive    25000
negative    25000
Name: sentiment, dtype: int64

In [None]:
df.head(4)

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative


In [None]:
nlp = spacy.load('en_core_web_sm')

NameError: name 'spacy' is not defined

In [None]:
def preprocess_text(text):
    text = re.sub(r'<[^>]+>', '', text)
    doc = nlp(text)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct]
    return ' '.join(tokens)

In [None]:
df['cleaned_review'] = df['review'].apply(preprocess_text)

In [None]:
df["sentiments_map"]=df["sentiment"].map({
    "positive":1,
    "negative":0
})

In [None]:
df.head(3)

Unnamed: 0,review,sentiment,cleaned_review,sentiments_map
0,One of the other reviewers has mentioned that ...,positive,"[reviewer, mention, watch, 1, oz, episode, hoo...",1
1,A wonderful little production. <br /><br />The...,positive,"[wonderful, little, production, film, techniqu...",1
2,I thought this was a wonderful way to spend ti...,positive,"[think, wonderful, way, spend, time, hot, summ...",1


In [None]:
X = df['cleaned_review']
y = df['sentiments_map']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

# Join tokens back to strings
X_train_str = [' '.join(tokens) for tokens in X_train]
X_test_str = [' '.join(tokens) for tokens in X_test]

# Convert text to numeric format
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train_str)
X_test_vec = vectorizer.transform(X_test_str)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

y_pred = model.predict(X_test_vec)

# Evaluate the model
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))


Accuracy: 0.8806
              precision    recall  f1-score   support

    Negative       0.89      0.87      0.88      4961
    Positive       0.88      0.89      0.88      5039

    accuracy                           0.88     10000
   macro avg       0.88      0.88      0.88     10000
weighted avg       0.88      0.88      0.88     10000



In [None]:
def predict_sentiment(review):
    cleaned_review = preprocess_text(review)
    
    review_vec = vectorizer.transform([cleaned_review])
    
    prediction = model.predict(review_vec)

    sentiment = 'Positive' if prediction[0] == 1 else 'Negative'
    
    return sentiment


In [None]:
# new_review = "I didn't like the film at all. It was boring and predictable."
# predicted_sentiment_2 = predict_sentiment(new_review)
# print(f"The sentiment of the review is : {predicted_sentiment_2}")

The sentiment of the review is : Negative


In [None]:
@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    review = data.get('review', '')

    if not review:
        return jsonify({'error': 'No review provided'}), 400

    sentiment = predict_sentiment(review)
    return jsonify({'sentiment': sentiment})

# Run the app
if __name__ == '__main__':
    app.run(debug=True)