In [1]:
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping





In [38]:
df = pd.read_csv("../data/clean_reviews.csv")

df = df[["clean_review", "sentiment"]]
df.head()


Unnamed: 0,clean_review,sentiment
0,not satisfied many bugs and issues,Negative
1,amazing quality and userfriendly interface,Positive
2,terrible experience needs major improvements,Negative
3,poor performance and not userfriendly,Negative
4,not satisfied many bugs and issues,Negative


In [39]:
label_encoder = LabelEncoder()
df["sentiment_encoded"] = label_encoder.fit_transform(df["sentiment"])

label_encoder.classes_


array(['Negative', 'Neutral', 'Positive'], dtype=object)

In [40]:
X_train, X_test, y_train, y_test = train_test_split(
    df["clean_review"],
    df["sentiment_encoded"],
    test_size=0.2,
    random_state=42,
    stratify=df["sentiment_encoded"]
)


In [41]:
MAX_WORDS = 10000
MAX_LEN = 100

tokenizer = Tokenizer(num_words=MAX_WORDS, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq  = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=MAX_LEN, padding="post")
X_test_pad  = pad_sequences(X_test_seq,  maxlen=MAX_LEN, padding="post")


In [42]:
model = Sequential([
    Embedding(input_dim=MAX_WORDS, output_dim=128, input_length=MAX_LEN),
    LSTM(64, return_sequences=False),
    Dropout(0.3),
    Dense(32, activation="relu"),
    Dense(3, activation="softmax")   # 3 sentiment classes
])

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

model.summary()


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 100, 128)          1280000   
                                                                 
 lstm_6 (LSTM)               (None, 64)                49408     
                                                                 
 dropout_6 (Dropout)         (None, 64)                0         
                                                                 
 dense_7 (Dense)             (None, 32)                2080      
                                                                 
 dense_8 (Dense)             (None, 3)                 99        
                                                                 
Total params: 1331587 (5.08 MB)
Trainable params: 1331587 (5.08 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [43]:
early_stop = EarlyStopping(
    monitor="val_loss",
    patience=2,
    restore_best_weights=True
)

history = model.fit(
    X_train_pad,
    y_train,
    validation_split=0.2,
    epochs=5,
    batch_size=64,
    callbacks=[early_stop]
)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5


In [44]:
loss, acc = model.evaluate(X_test_pad, y_test)
print("Test Accuracy:", acc)


Test Accuracy: 0.4000000059604645


In [45]:
model.save("../models/lstm_model")

with open("../models/lstm_tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

with open("../models/lstm_label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

print("✅ LSTM model saved successfully")


INFO:tensorflow:Assets written to: ../models/lstm_model\assets


INFO:tensorflow:Assets written to: ../models/lstm_model\assets


✅ LSTM model saved successfully
