In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [None]:
# -----------------------------
# 1. Load Dataset
# -----------------------------
data = pd.read_csv("reviews.csv")

# -----------------------------
# 2. Convert ratings → sentiment
# -----------------------------
def rating_to_sentiment(r):
    if r >= 4:
        return "Positive"
    elif r == 3:
        return "Neutral"
    else:
        return "Negative"

data["sentiment"] = data["rating"].apply(rating_to_sentiment)

texts = data["review"].values
labels = data["sentiment"].values

# -----------------------------
# 3. Encode labels
# -----------------------------
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
labels_cat = to_categorical(labels_encoded)

# -----------------------------
# 4. Text Tokenization
# -----------------------------
vocab_size = 5000
max_len = 100

tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)

sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding="post")

# -----------------------------
# 5. Train-Test Split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, labels_cat, test_size=0.2, random_state=42
)

# -----------------------------
# 6. LSTM Model
# -----------------------------
model = Sequential([
    Embedding(vocab_size, 128, input_length=max_len),
    LSTM(128, return_sequences=False),
    Dropout(0.5),
    Dense(3, activation="softmax")  # Positive / Neutral / Negative
])

model.compile(
    loss="categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

model.summary()

# -----------------------------
# 7. Train Model
# -----------------------------
model.fit(
    X_train,
    y_train,
    epochs=5,
    batch_size=32,
    validation_data=(X_test, y_test)
)

# -----------------------------
# 8. Prediction Function
# -----------------------------
def predict_sentiment(text):
    seq = tokenizer.texts_to_sequences([text])
    pad = pad_sequences(seq, maxlen=max_len, padding="post")
    pred = model.predict(pad)
    return le.inverse_transform([np.argmax(pred)])

# -----------------------------
# 9. Test
# -----------------------------
text = "I absolutely love this product! It exceeded all my expectations."
print("Input:", text)
print("Predicted Sentiment:", predict_sentiment(text)[0])



Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.3750 - loss: 1.0906 - val_accuracy: 0.5000 - val_loss: 1.0778
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step - accuracy: 0.2500 - loss: 1.1132 - val_accuracy: 0.5000 - val_loss: 1.0629
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step - accuracy: 0.3750 - loss: 1.0838 - val_accuracy: 0.5000 - val_loss: 1.0508
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305ms/step - accuracy: 0.2500 - loss: 1.0932 - val_accuracy: 0.5000 - val_loss: 1.0373
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.2500 - loss: 1.1070 - val_accuracy: 0.5000 - val_loss: 1.0286
Input: I absolutely love this product! It exceeded all my expectations.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step
Predicted Sentiment: Positive
