In [1]:
import re
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load tokenizer
with open("tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

# Load trained LSTM model
model = load_model("lstm_model.h5")

# Load and clean test data
df = pd.read_csv("test.csv")
df.columns = ['overall', 'title', 'reviewText']
df.dropna(subset=['reviewText'], inplace=True)
df['sentiment'] = df['overall'].apply(lambda r: 'negative' if r == 1 else 'positive')

# Clean text
def clean_text(text):
    text = re.sub(r'[^a-zA-Z\s]', '', str(text))
    text = text.lower().split()
    return ' '.join(text)

df['cleaned_review'] = df['reviewText'].apply(clean_text)

# Tokenize and pad
X_text = df['cleaned_review'].tolist()
X_seq = tokenizer.texts_to_sequences(X_text)
X_pad = pad_sequences(X_seq, maxlen=100, padding='post', truncating='post')

# Predict
y_probs = model.predict(X_pad)
y_pred = np.argmax(y_probs, axis=1)
y_pred = ['negative' if i == 0 else 'positive' for i in y_pred]
df['lstm_prediction'] = y_pred

# Evaluation
y_true = df['sentiment'].tolist()
label_map = {'negative': 1, 'positive': 2}
y_true_num = [label_map[y] for y in y_true]
y_pred_num = [label_map[y] for y in y_pred]

print("\n📋 LSTM Classification Report:")
print(classification_report(y_true_num, y_pred_num))

# Save predictions
df.to_csv("lstm_predictions.csv", index=False)
print("✅ Predictions saved to 'lstm_predictions.csv'")




[1m12500/12500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 12ms/step

📋 LSTM Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           1       0.50      1.00      0.67    200000
           2       0.00      0.00      0.00    199999

    accuracy                           0.50    399999
   macro avg       0.25      0.50      0.33    399999
weighted avg       0.25      0.50      0.33    399999

✅ Predictions saved to 'lstm_predictions.csv'
