In [65]:
# 📚 Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import re
import string

In [66]:

df = pd.read_csv('customer_reviews.csv')
df.head()

Unnamed: 0,Review,Sentiment
0,I loved the product! Highly recommend.,Positive
1,Terrible experience. Wouldn't buy again.,Negative
2,"The quality is okay, not the best.",Negative
3,Excellent service and fast delivery!,Positive
4,Not worth the money.,Negative


In [85]:
df.isna

<bound method DataFrame.isna of                                       Review Sentiment  \
0     I loved the product! Highly recommend.  Positive   
1   Terrible experience. Wouldn't buy again.  Negative   
2         The quality is okay, not the best.  Negative   
3       Excellent service and fast delivery!  Positive   
4                       Not worth the money.  Negative   
..                                       ...       ...   
95             Didn't find it useful at all.  Negative   
96                       Poor response time.  Negative   
97           No refund or replacement given.  Negative   
98              Design is ugly and outdated.  Negative   
99            Stopped charging after a week.  Negative   

                             clean_review  label  
0    i loved the product highly recommend    NaN  
1   terrible experience wouldnt buy again    NaN  
2        the quality is okay not the best    NaN  
3     excellent service and fast delivery    NaN  
4               

In [81]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'<.*?>', '', text)  # Remove HTML
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.strip()
    return text

df['clean_review'] = df['Review'].apply(clean_text)

In [86]:
# Convert sentiments to binary labels
df['label'] = df['Sentiment'].map({'Positive': 1, 'Negative': 0})

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(df['clean_review'], df['label'], test_size=0.3, random_state=42)

# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english')
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)


In [87]:
print("X_train_tfidf shape:", X_train_tfidf.shape)
print("y_train shape:", y_train.shape)


X_train_tfidf shape: (70, 153)
y_train shape: (70,)


In [88]:
# Logistic Regression
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Predict
y_pred = model.predict(X_test_tfidf)


In [89]:
# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Classification Report
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.7333333333333333
Confusion Matrix:
 [[10  7]
 [ 1 12]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.59      0.71        17
           1       0.63      0.92      0.75        13

    accuracy                           0.73        30
   macro avg       0.77      0.76      0.73        30
weighted avg       0.79      0.73      0.73        30



In [90]:
def predict_sentiment(review):
    review_clean = clean_text(review)
    vector = tfidf.transform([review_clean])
    prediction = model.predict(vector)[0]
    return "Positive 😊" if prediction == 1 else "Negative 😞"

# Try a custom review
predict_sentiment("This is the best product I have ever used!")

'Positive 😊'