# 📗 Notebook 03: Model Evaluation
Here we evaluate trained models on the test dataset with metrics and confusion matrices.

In [None]:
# 🧩 Imports
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from src.data.utils import load_filtered_rotated_dataset

In [None]:
# 📦 Load validation/test set
_, val_set = load_filtered_rotated_dataset(train_ratio=0.8)
val_loader = DataLoader(val_set, batch_size=64)

In [None]:
# 🧪 Evaluate function
def evaluate_model(model, data_loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in data_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.numpy())
            all_labels.extend(labels.numpy())
    print(classification_report(all_labels, all_preds))
    cm = confusion_matrix(all_labels, all_preds)
    sns.heatmap(cm, annot=True, fmt='d')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()