# Model Evaluation

### Evaluate on Validation/Test Set
- **Compute Metrics:**  
  Calculate accuracy, precision, recall, and F1-score (using libraries like scikit-learn).
- **Generate Classification Report:**  
  Provide detailed per-class performance metrics.

### Confusion Matrix
- **Visualization:**  
  Use seaborn’s `heatmap` or similar tools to plot the confusion matrix.
- **Analysis:**  
  Identify classes where the model performs well or struggles.

### Performance Analysis
- **Overfitting/Underfitting Check:**  
  Compare training and validation metrics.
- **Adjustments:**  
  Consider regularization techniques or architectural changes if necessary.

---

In [8]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os


# Define dataset directory 
dataset_dir = r"..\data\raw-img" 

# Set parameters for image dimensions and batch size
img_height = 150
img_width = 150
batch_size = 32

# Load test dataset (using image_dataset_from_directory without data augmentation)
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_dir,
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=False  # Ensure consistent ordering during evaluation
)

# normalization layer as used during training
normalization_layer = tf.keras.layers.Rescaling(1./255)

# Apply normalization to the test dataset
normalized_test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))
normalized_test_ds = normalized_test_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

# Get class names for future reference
class_names = test_ds.class_names
print("Class names:", class_names)

Found 26179 files belonging to 10 classes.
Class names: ['cane', 'cavallo', 'elefante', 'farfalla', 'gallina', 'gatto', 'mucca', 'pecora', 'ragno', 'scoiattolo']


### Evaluate on Validation/Test Set

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Load the saved model
model = load_model('my_cnn_model_20250218_234853.keras')

# Prepare lists to collect true labels and predictions
y_true = []
y_pred = []

# Iterate over the normalized test dataset batches
for images, labels in normalized_test_ds:
    # Predict probabilities for the batch of images
    preds = model.predict(images)
    # Convert predicted probabilities to class indices
    preds = np.argmax(preds, axis=1)
    
    # Append predictions and true labels
    y_pred.extend(preds)
    y_true.extend(labels.numpy())

# Convert lists to numpy arrays for metric calculations
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Compute performance metrics with the zero_division parameter
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

# Print overall performance metrics
print("Accuracy: {:.2f}%".format(accuracy * 100))
print("Precision: {:.2f}".format(precision))
print("Recall: {:.2f}".format(recall))
print("F1 Score: {:.2f}".format(f1))

# Generate a detailed classification report with zero_division set to 0
report = classification_report(y_true, y_pred, target_names=class_names, zero_division=0)
print("\nClassification Report:\n", report)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 458ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 