<a href="https://colab.research.google.com/github/anjali-0404/AIML-practice/blob/main/Customer_Review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===============================
# 📦 Import Required Libraries
# ===============================
import pandas as pd
import numpy as np
import re
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

# ===============================
# 📥 Load Predefined Dataset
# ===============================
# Dataset source: Amazon reviews sample (small size for demo)
url = "https://raw.githubusercontent.com/dD2405/Twitter_Sentiment_Analysis/master/train.csv"
df = pd.read_csv(url)
df = df.rename(columns={'tweet': 'review', 'label': 'rating'})  # Rename for uniformity

# Simulate 1–5 star ratings (for demonstration)
# Original dataset has 0 (negative), 1 (positive)
df['rating'] = df['rating'].apply(lambda x: np.random.randint(1, 6))

print("Sample data:\n", df.head())

# ===============================
# 🧹 Clean and Preprocess Text
# ===============================
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\S+|https\S+", '', text)
    text = re.sub(r"[^a-zA-Z\s]", '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df['clean_review'] = df['review'].apply(clean_text)

# ===============================
# 💬 Create Sentiment Label
# ===============================
def get_sentiment(rating):
    if rating <= 2:
        return "Negative"
    elif rating == 3:
        return "Neutral"
    else:
        return "Positive"

df['sentiment'] = df['rating'].apply(get_sentiment)

# ===============================
# 🔤 Tokenization and Padding
# ===============================
vocab_size = 10000
max_len = 100

tokenizer = Tokenizer(num_words=vocab_size, oov_token='<OOV>')
tokenizer.fit_on_texts(df['clean_review'])

X = tokenizer.texts_to_sequences(df['clean_review'])
X = pad_sequences(X, maxlen=max_len, padding='post', truncating='post')

# Encode ratings and sentiments
label_encoder_rating = LabelEncoder()
label_encoder_sentiment = LabelEncoder()

y_rating = label_encoder_rating.fit_transform(df['rating'])
y_sentiment = label_encoder_sentiment.fit_transform(df['sentiment'])

# ===============================
# 🧩 Train-Test Split
# ===============================
X_train, X_test, y_rating_train, y_rating_test, y_sent_train, y_sent_test = train_test_split(
    X, y_rating, y_sentiment, test_size=0.2, random_state=42
)

# ===============================
# 🧠 Build LSTM Model
# ===============================
embedding_dim = 64

model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_len),
    LSTM(128, return_sequences=False),
    Dropout(0.4),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(5, activation='softmax')  # For 1–5 rating prediction
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# ===============================
# 🚀 Train Model
# ===============================
history = model.fit(X_train, y_rating_train, epochs=3, batch_size=64, validation_split=0.2)

# ===============================
# 🎯 Evaluate Model
# ===============================
pred_rating = np.argmax(model.predict(X_test), axis=1)
print("\nRating Prediction Accuracy:", accuracy_score(y_rating_test, pred_rating))
print(classification_report(y_rating_test, pred_rating))

# ===============================
# 💬 Sentiment Prediction (Derived)
# ===============================
pred_sentiment = ['Positive' if r >= 4 else 'Neutral' if r == 3 else 'Negative' for r in pred_rating]
true_sentiment = ['Positive' if r >= 4 else 'Neutral' if r == 3 else 'Negative' for r in y_rating_test]

print("\nSentiment Accuracy:", accuracy_score(true_sentiment, pred_sentiment))
print(classification_report(true_sentiment, pred_sentiment))

# ===============================
# 🧾 Test with Custom Review
# ===============================
test_review = ["The product quality is excellent and delivery was quick!"]
seq = tokenizer.texts_to_sequences(test_review)
padded = pad_sequences(seq, maxlen=max_len, padding='post')
pred = np.argmax(model.predict(padded))
pred_rating_label = label_encoder_rating.inverse_transform([pred])[0]
pred_sent_label = get_sentiment(pred_rating_label)

print("\n🗣️ Review:", test_review[0])
print("⭐ Predicted Rating:", pred_rating_label)
print("🙂 Predicted Sentiment:", pred_sent_label)


Sample data:
    id  rating                                             review
0   1       2   @user when a father is dysfunctional and is s...
1   2       5  @user @user thanks for #lyft credit i can't us...
2   3       2                                bihday your majesty
3   4       5  #model   i love u take with u all the time in ...
4   5       1             factsguide: society now    #motivation




Epoch 1/3
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 187ms/step - accuracy: 0.2022 - loss: 1.6106 - val_accuracy: 0.1989 - val_loss: 1.6115
Epoch 2/3
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 188ms/step - accuracy: 0.2020 - loss: 1.6104 - val_accuracy: 0.1981 - val_loss: 1.6096
Epoch 3/3
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 184ms/step - accuracy: 0.2032 - loss: 1.6096 - val_accuracy: 0.1989 - val_loss: 1.6097
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45ms/step

Rating Prediction Accuracy: 0.19959330517753793
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1278
           1       0.00      0.00      0.00      1277
           2       0.20      1.00      0.33      1276
           3       0.00      0.00      0.00      1282
           4       0.00      0.00      0.00      1280

    accuracy                           0.20      6393
   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
