# Evaluation of the model on the test dataset

In [None]:
from pathlib import Path

import numpy as np
import torch
from scipy.stats import entropy
from sklearn.metrics import f1_score, accuracy_score
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
from tqdm import tqdm

from ml_utils import CustomDataset
from model.resnet_model import CNNClassifier

In [None]:
device = "mps"

In [3]:
test_dt = CustomDataset(data_path=Path("../datasets/car_dataset"), portion="test")
test_loader = DataLoader(test_dt, batch_size=64, shuffle=True, num_workers=4)

cls = CNNClassifier(num_classes=196, pretrained=True, model_name="resnet18")
cls.load_state_dict(torch.load("../webapp_result/r18_pretrained_freeze0/model.pth", weights_only=True))
cls.eval()
cls = cls.to(device)

In [4]:
losses, preds, trues, entropy_list = [], [], [], []
criterion = CrossEntropyLoss().to(device)

with torch.no_grad():
    for idx, (x, y) in enumerate(tqdm(test_loader), 1):
        if device != "cpu":
            x, y = x.to(device), y.to(device)
        logits = cls(x)
        loss = criterion(logits, y)
        entropy_list.append(entropy(torch.softmax(logits, dim=1).cpu().numpy()))
        predictions = torch.argmax(logits, dim=1)

        losses.append(loss)
        preds.append(predictions)
        trues.append(y)

    # Concatenate all predictions, true labels, and losses
    preds = torch.cat(preds).cpu().numpy()
    trues = torch.cat(trues).cpu().numpy()
    losses_tensor = torch.stack(losses)
    entropy_list = torch.tensor(np.asarray(entropy_list))

# Calculate and store metrics
loss_mean = losses_tensor.mean().item()
loss_std = losses_tensor.std().item()
f1_mean = f1_score(trues, preds, average='weighted')
acc_mean = accuracy_score(trues, preds)

100%|██████████| 126/126 [01:10<00:00,  1.78it/s]


In [5]:
print(entropy_list.mean(), entropy_list.std())

tensor(1.4588) tensor(0.8639)


In [6]:
loss_mean

0.5138686895370483

In [7]:
f1_mean

0.8835541083535506

In [8]:
acc_mean

0.8838452928740207