# Model Evaluation

### Evaluate on Validation/Test Set
- **Compute Metrics:**  
  Calculate accuracy, precision, recall, and F1-score (using libraries like scikit-learn).
- **Generate Classification Report:**  
  Provide detailed per-class performance metrics.

### Confusion Matrix
- **Visualization:**  
  Use seaborn’s `heatmap` or similar tools to plot the confusion matrix.
- **Analysis:**  
  Identify classes where the model performs well or struggles.

### Performance Analysis
- **Overfitting/Underfitting Check:**  
  Compare training and validation metrics.
- **Adjustments:**  
  Consider regularization techniques or architectural changes if necessary.

---

In [3]:
import tensorflow as tf
from tensorflow.keras.models import load_model

# Define file paths for the validation and test TFRecord files
val_tfrecord = "balanced_val_20250220_235108.tfrecord"
test_tfrecord = "balanced_test_20250220_235108.tfrecord"

# Define a parsing function for the TFRecord files
feature_description = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'label': tf.io.FixedLenFeature([], tf.int64)
}

def _parse_function(example_proto):
    parsed = tf.io.parse_single_example(example_proto, feature_description)
    # Decode JPEG image, resize to 150x150 and normalize to [0, 1]
    image = tf.io.decode_jpeg(parsed['image'], channels=3)
    image = tf.image.resize(image, [150, 150])
    image = image / 255.0
    label = tf.cast(parsed['label'], tf.int32)
    return image, label

# Set up dataset parameters
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

# Create the validation dataset
val_dataset = tf.data.TFRecordDataset(val_tfrecord) \
    .map(_parse_function, num_parallel_calls=AUTOTUNE) \
    .batch(BATCH_SIZE) \
    .prefetch(AUTOTUNE)

# Create the test dataset
test_dataset = tf.data.TFRecordDataset(test_tfrecord) \
    .map(_parse_function, num_parallel_calls=AUTOTUNE) \
    .batch(BATCH_SIZE) \
    .prefetch(AUTOTUNE)

# Load the best saved model (using the native Keras format)
model = load_model('best_trained_model.keras')



### Evaluate on Validation/Test Set

In [4]:
# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(val_dataset)
print("Validation Loss:", val_loss)
print("Validation Accuracy:", val_accuracy)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_dataset)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)


[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 193ms/step - accuracy: 0.3632 - loss: 3.2622
Validation Loss: 3.3417835235595703
Validation Accuracy: 0.35297131538391113
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 204ms/step - accuracy: 0.3494 - loss: 3.1629
Test Loss: 3.16951322555542
Test Accuracy: 0.3490733802318573


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Initialize lists to collect true labels and predicted labels
y_true = []
y_pred = []

# Loop over the test dataset (assuming test_dataset is already defined)
for images, labels in test_dataset:
    # Get model predictions (the output is probability for each class)
    preds = model.predict(images)
    # Convert predictions to class labels by taking the argmax
    preds = np.argmax(preds, axis=1)
    
    y_true.extend(labels.numpy())
    y_pred.extend(preds)

# Convert lists to numpy arrays for metric computations
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Compute metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print("Test Accuracy: {:.4f}".format(accuracy))
print("Test Precision: {:.4f}".format(precision))
print("Test Recall: {:.4f}".format(recall))
print("Test F1 Score: {:.4f}".format(f1))

# Assuming you have a list of class names from your dataset
# For example, class_names = ['cat', 'dog', 'elephant', ...]
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))
