In [None]:
"""evaluate_model.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1UGwuNxcfik7S_3z3Fm-e-qVN4sewjQoc

# Model Evaluation and Analysis

This notebook evaluates the trained face recognition model and provides detailed analysis of its performance.
"""

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import os
import cv2
from glob import glob
import pickle

In [None]:
"""## 1. Load Trained Model"""

In [None]:
print("🔄 Loading trained model...")

In [None]:
# Load the best model
model = load_model('models/best_face_model.h5')

In [None]:
# Load label encoder
with open('models/enhanced_label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

In [None]:
print(f"✅ Model loaded successfully!")
print(f"📊 Model input shape: {model.input_shape}")
print(f"📊 Model output shape: {model.output_shape}")
print(f"🏷️ Number of classes: {len(label_encoder.classes_)}")

In [None]:
"""## 2. Load Validation Data"""

In [None]:
# Load validation set saved from training
if os.path.exists('X_val.npy') and os.path.exists('y_val.npy'):
    X_val = np.load('X_val.npy')
    y_val = np.load('y_val.npy')
    if os.path.exists('distortion_types_val.npy'):
        distortion_types = np.load('distortion_types_val.npy', allow_pickle=True)
    else:
        distortion_types = ['unknown'] * len(y_val)
    print(f"✅ Loaded validation set from disk: {X_val.shape[0]} samples")
else:
    raise FileNotFoundError("Validation set files 'X_val.npy' and 'y_val.npy' not found. Please run training first.")

In [None]:
"""## 3. Preprocess Data"""

In [None]:
# Normalize images
X_val_normalized = X_val / 255.0

In [None]:
# y_val is already integer-encoded (from training)
y_val_encoded = y_val.astype(int)

In [None]:
print(f"📊 Validation data shape: {X_val_normalized.shape}")
print(f"📊 Labels shape: {y_val_encoded.shape}")

In [None]:
"""## 4. Model Evaluation"""

In [None]:
print("🎯 Evaluating model performance...")

In [None]:
# Get predictions
predictions = model.predict(X_val_normalized)
predicted_classes = np.argmax(predictions, axis=1)
prediction_probabilities = np.max(predictions, axis=1)

In [None]:
# Calculate accuracy
accuracy = accuracy_score(y_val_encoded, predicted_classes)

In [None]:
# Calculate macro-averaged F1-score
macro_f1 = f1_score(y_val_encoded, predicted_classes, average='macro')

In [None]:
print(f"🏆 Top-1 Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"⭐ Macro-averaged F1-Score: {macro_f1:.4f}")

In [None]:
"""## 5. Detailed Performance Analysis"""

In [None]:
# Classification report
print("\n📋 Classification Report:")
print(classification_report(y_val_encoded, predicted_classes,
                          target_names=label_encoder.classes_[:10],  # Show first 10 classes
                          zero_division=0))

In [None]:
"""## 6. Confusion Matrix"""

In [None]:
plt.figure(figsize=(12, 10))
cm = confusion_matrix(y_val_encoded, predicted_classes)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_[:10],
            yticklabels=label_encoder.classes_[:10])
plt.title('Confusion Matrix (First 10 Classes)', fontsize=16, fontweight='bold')
plt.xlabel('Predicted', fontsize=14)
plt.ylabel('Actual', fontsize=14)
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig('results/confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
"""## 7. Confidence Analysis"""

In [None]:
plt.figure(figsize=(15, 5))

In [None]:
# Confidence distribution
plt.subplot(1, 3, 1)
plt.hist(prediction_probabilities, bins=50, alpha=0.7, color='skyblue', edgecolor='black')
plt.title('Prediction Confidence Distribution', fontsize=14, fontweight='bold')
plt.xlabel('Confidence Score')
plt.ylabel('Frequency')
plt.grid(True, alpha=0.3)

In [None]:
# Accuracy vs Confidence
plt.subplot(1, 3, 2)
correct_mask = y_val_encoded == predicted_classes
plt.scatter(prediction_probabilities[correct_mask],
           [1]*sum(correct_mask), alpha=0.6, color='green', label='Correct')
plt.scatter(prediction_probabilities[~correct_mask],
           [0]*sum(~correct_mask), alpha=0.6, color='red', label='Incorrect')
plt.title('Accuracy vs Confidence', fontsize=14, fontweight='bold')
plt.xlabel('Confidence Score')
plt.ylabel('Correct (1) / Incorrect (0)')
plt.legend()
plt.grid(True, alpha=0.3)

In [None]:
plt.tight_layout()
plt.savefig('results/confidence_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
"""## 8. Save Evaluation Results"""

In [None]:
print("\n💾 Saving evaluation results...")

In [None]:
with open('results/evaluation_results.txt', 'w') as f:
    f.write(f"Face Recognition Model Evaluation Results\n")
    f.write(f"==========================================\n")
    f.write(f"Top-1 Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)\n")
    f.write(f"Macro-averaged F1-Score: {macro_f1:.4f}\n")
    f.write(f"Total Validation Images: {len(X_val)}\n")
    f.write(f"Number of Classes: {len(label_encoder.classes_)}\n\n")

In [None]:
print("✅ Evaluation completed!")
print("📁 Results saved to:")
print("  • results/confusion_matrix.png")
print("  • results/confidence_analysis.png")
print("  • results/evaluation_results.txt")