In [1]:
from tensorflow.keras.models import load_model
import pickle

# Load balanced model
balanced_model = load_model('deep_balanced_model4.h5')

# Load tokenizer used during balanced training
with open('deep_tokenizer_balanced.pkl', 'rb') as f:
    tokenizer_bal = pickle.load(f)

# Load maxlen
with open('maxlen_balanced1.pkl', 'rb') as f:
    max_len_bal = pickle.load(f)




In [2]:
import pandas as pd

import pandas as pd

X_test = pd.read_csv("X_test_imbalanced.csv")
y_test = pd.read_csv("y_test_imbalanced.csv")


# Fix labels to start from 0 (i.e., 1–5 → 0–4)
y_test['Rating'] -= 1

# Separate features and labels
X_imbalanced_test = X_test['review_text']
y_imbalanced_test = y_test['Rating']


In [3]:
import spacy
nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"])

def spacy_preprocess_pipe(texts):
    processed = []
    for doc in nlp.pipe(texts, batch_size=1000):
        tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
        processed.append(" ".join(tokens))
    return processed

X_imbalanced_test_cleaned = pd.Series(spacy_preprocess_pipe(X_imbalanced_test))


In [4]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

X_test_seq = tokenizer_bal.texts_to_sequences(X_imbalanced_test_cleaned)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len_bal, padding='post', truncating='post')


In [5]:
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Predict
y_pred_probs = balanced_model.predict(X_test_pad)
y_pred = np.argmax(y_pred_probs, axis=1)

# Evaluate
print("Accuracy:", accuracy_score(y_imbalanced_test, y_pred))
print(classification_report(y_imbalanced_test, y_pred, digits=2))
print("Confusion Matrix:\n", confusion_matrix(y_imbalanced_test, y_pred))


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 134ms/step
Accuracy: 0.5528
              precision    recall  f1-score   support

           0       0.44      0.82      0.58      2000
           1       0.53      0.33      0.40      3000
           2       0.61      0.54      0.57      5000
           3       0.62      0.48      0.54      6000
           4       0.53      0.72      0.61      4000

    accuracy                           0.55     20000
   macro avg       0.55      0.58      0.54     20000
weighted avg       0.57      0.55      0.55     20000

Confusion Matrix:
 [[1645   89   99   60  107]
 [ 774  975  921  192  138]
 [ 578  653 2706  763  300]
 [ 463   83  610 2864 1980]
 [ 254   27  105  748 2866]]
