In [9]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [10]:
model1 = load_model("facial_expressions_model.h5")             # CNN
model2 = load_model("facial_expressions_model_MobileNetV2.h5")  # MobileNetV2



In [11]:
# CNN model (grayscale, 48x48)
test_datagen1 = ImageDataGenerator(rescale=1./255)
test_generator1 = test_datagen1.flow_from_directory(
    "images/validation",          
    target_size=(48, 48),
    color_mode="grayscale",
    class_mode="categorical",
    shuffle=False,
    batch_size=32
)

# MobileNetV2 model (RGB, 128x128)
test_datagen2 = ImageDataGenerator(rescale=1./255)
test_generator2 = test_datagen2.flow_from_directory(
    "images/validation",         
    target_size=(128, 128),
    color_mode="rgb",
    class_mode="categorical",
    shuffle=False,
    batch_size=32
)

Found 7066 images belonging to 7 classes.
Found 7066 images belonging to 7 classes.


In [12]:
# Model 1
y_pred1 = model1.predict(test_generator1)
y_pred_classes1 = np.argmax(y_pred1, axis=1)
y_true1 = test_generator1.classes

# Model 2
y_pred2 = model2.predict(test_generator2)
y_pred_classes2 = np.argmax(y_pred2, axis=1)
y_true2 = test_generator2.classes

[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 39ms/step
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 137ms/step


In [13]:
# Metrics function
def print_metrics(y_true, y_pred, model_name):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average="weighted")
    rec = recall_score(y_true, y_pred, average="weighted")
    f1 = f1_score(y_true, y_pred, average="weighted")
    
    print(f"---- {model_name} Performance ----")
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1-Score : {f1:.4f}")
    print("\nClassification Report:\n")
    print(classification_report(y_true, y_pred, target_names=list(test_generator1.class_indices.keys())))
    print("--------------------------------\n")

In [14]:
print_metrics(y_true1, y_pred_classes1, "CNN Model")
print_metrics(y_true2, y_pred_classes2, "MobileNetV2 Model")

---- CNN Model Performance ----
Accuracy : 0.6349
Precision: 0.6335
Recall   : 0.6349
F1-Score : 0.6259

Classification Report:

              precision    recall  f1-score   support

       angry       0.55      0.53      0.54       960
     disgust       0.78      0.42      0.55       111
        fear       0.55      0.30      0.39      1018
       happy       0.83      0.86      0.85      1825
     neutral       0.53      0.70      0.60      1216
         sad       0.51      0.51      0.51      1139
    surprise       0.70      0.77      0.74       797

    accuracy                           0.63      7066
   macro avg       0.64      0.59      0.60      7066
weighted avg       0.63      0.63      0.63      7066

--------------------------------

---- MobileNetV2 Model Performance ----
Accuracy : 0.4316
Precision: 0.4553
Recall   : 0.4316
F1-Score : 0.3986

Classification Report:

              precision    recall  f1-score   support

       angry       0.52      0.06      0.11     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
