Libraries (don't change)

In [1]:
!pip -q install torchinfo
!pip -q install mlflow
from dataclasses import dataclass
from typing import List, Callable, Optional, Tuple

import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from torchinfo import summary
import mlflow

from sklearn.metrics import confusion_matrix, classification_report

import matplotlib.pyplot as plt
%matplotlib inline


Device (don't change)

In [2]:

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
use_amp = (DEVICE == "cuda")

print(f"Using device: {DEVICE}")
print(f"Mixed precision (AMP): {use_amp}")

Using device: cpu
Mixed precision (AMP): False


Data (don't change)

In [3]:

class DataManager:
    def __init__(self, dataset_class, root: str = "./data", val_fraction: float = 0.1,
                 batch_size: int = 32, seed: int = 42):
        self.dataset_class = dataset_class
        self.root = root
        self.val_fraction = val_fraction
        self.batch_size = batch_size
        self.seed = seed

        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1918,), (0.3483,))
        ])

    def get_loaders(self) -> Tuple[DataLoader, DataLoader, DataLoader]:
        full_train = self.dataset_class(root=self.root, train=True,
                                        download=True, transform=self.transform)
        test_ds = self.dataset_class(root=self.root, train=False,
                                     download=True, transform=self.transform)

        val_size = int(len(full_train) * self.val_fraction)
        train_size = len(full_train) - val_size

        generator = torch.Generator().manual_seed(self.seed)
        train_ds, val_ds = random_split(full_train, [train_size, val_size], generator=generator)

        train_loader = DataLoader(train_ds, batch_size=self.batch_size,
                                  shuffle=True, num_workers=2, pin_memory=True)
        val_loader   = DataLoader(val_ds,   batch_size=self.batch_size,
                                  shuffle=False, num_workers=2, pin_memory=True)
        test_loader  = DataLoader(test_ds,  batch_size=self.batch_size,
                                  shuffle=False, num_workers=2, pin_memory=True)

        print(f"Train: {len(train_ds)} | Val: {len(val_ds)} | Test: {len(test_ds)}")
        return train_loader, val_loader, test_loader

Configurations (don't change)

In [4]:

@dataclass
class LayerSpec:
    out_dim: int
    activation: Callable[[torch.Tensor], torch.Tensor] = F.relu
    dropout: float = 0.0
    batch_norm: bool = True
    weight_decay: float = 0.0

@dataclass
class ModelConfig:
    input_dim: Tuple[int, int, int] = (1, 28, 28)
    num_classes: int = 10
    layers: List[LayerSpec] = None

@dataclass
class TrainConfig:
    batch_size: int = 64
    epochs: int = 100
    lr: float = 1e-4
    patience: int = 15
    min_delta: float = 1e-4
    val_fraction: float = 0.1
    seed: int = 42


Model

In [5]:

class MLPFromConfig(nn.Module):
    def __init__(self, config: ModelConfig):
        super().__init__()
        flat_dim = config.input_dim[0] * config.input_dim[1] * config.input_dim[2]
        self.layers_specs = config.layers
        layers = []
        prev_dim = flat_dim

        for i, spec in enumerate(config.layers):
            linear = nn.Linear(prev_dim, spec.out_dim)

            layers.append(linear)
            if spec.batch_norm:
                layers.append(nn.BatchNorm1d(spec.out_dim))
            if spec.dropout > 0:
                layers.append(nn.Dropout(spec.dropout))
            layers.append(spec.activation())
            prev_dim = spec.out_dim

        # Final classifier layer
        self.final_linear = nn.Linear(prev_dim, config.num_classes)
        layers.append(self.final_linear)

        self.net = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.view(x.size(0), -1)
        return self.net(x)

    def get_layer_params(self):
        param_groups = []
        for i, spec in enumerate(self.layers_specs):
            linear_layer = self.net[i * (4 if spec.batch_norm or spec.dropout > 0 else 3)]
            pass
        return self.layers_specs

Early Stopping (don't change)

In [6]:

class EarlyStopping:
    def __init__(self, patience: int = 10, min_delta: float = 1e-4):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = float('inf')
        self.should_stop = False

    def __call__(self, val_loss: float) -> bool:
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.should_stop = True
        return self.should_stop

Trainer (don't change)

In [7]:

class Trainer:
    def __init__(self, model: nn.Module, config: TrainConfig):
        self.model = model.to(DEVICE)
        self.config = config
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = self._build_optimizer()
        self.scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
        self.early_stopping = EarlyStopping(patience=config.patience,
                                            min_delta=config.min_delta)

        self.history = {"train_loss": [], "train_acc": [],
                        "val_loss": [], "val_acc": []}

    def _build_optimizer(self):

        # Collect all Linear layers in the order they appear
        linear_layers = []
        for name, module in self.model.named_modules():
            if isinstance(module, nn.Linear):
                linear_layers.append((name, module))

        param_groups = []

        for i, spec in enumerate(self.model.layers_specs):
            name, layer = linear_layers[i]
            param_groups.append({
                'params': layer.parameters(),
                'weight_decay': spec.weight_decay
            })

        final_name, final_layer = linear_layers[-1]
        param_groups.append({
            'params': final_layer.parameters(),
            'weight_decay': 0.0
        })

        return torch.optim.SGD(param_groups, momentum=0.9, nesterov=True, lr=self.config.lr)

    def _train_epoch(self, loader: DataLoader):
        self.model.train()
        total_loss = 0.0
        correct = 0
        total = 0

        for data, target in loader:
            data, target = data.to(DEVICE), target.to(DEVICE)

            self.optimizer.zero_grad()
            with torch.cuda.amp.autocast(enabled=use_amp):
                output = self.model(data)
                loss = self.criterion(output, target)

            self.scaler.scale(loss).backward()
            self.scaler.step(self.optimizer)
            self.scaler.update()

            total_loss += loss.item() * data.size(0)
            correct += (output.argmax(1) == target).sum().item()
            total += data.size(0)

        return total_loss / total, correct / total

    @torch.no_grad()
    def _eval_epoch(self, loader: DataLoader):
        self.model.eval()
        total_loss = 0.0
        correct = 0
        total = 0

        for data, target in loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            with torch.cuda.amp.autocast(enabled=use_amp):
                output = self.model(data)
                loss = self.criterion(output, target)

            total_loss += loss.item() * data.size(0)
            correct += (output.argmax(1) == target).sum().item()
            total += data.size(0)

        return total_loss / total, correct / total

    def fit(self, train_loader: DataLoader, val_loader: DataLoader):
        print("üöÄ Starting training...\n")
        for epoch in range(1, self.config.epochs + 1):
            train_loss, train_acc = self._train_epoch(train_loader)
            val_loss, val_acc     = self._eval_epoch(val_loader)

            self.history["train_loss"].append(train_loss)
            self.history["train_acc"].append(train_acc)
            self.history["val_loss"].append(val_loss)
            self.history["val_acc"].append(val_acc)

            print(f"Epoch {epoch:3d} | "
                  f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
                  f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

            if self.early_stopping(val_loss):
                print(f"\nüõë Early stopping triggered at epoch {epoch}")
                break

        print("\n‚úÖ Training complete!")

    @torch.no_grad()
    def evaluate(self, loader: DataLoader):
        return self._eval_epoch(loader)

    @torch.no_grad()
    def predict_all(self, loader: DataLoader):
        self.model.eval()
        all_preds, all_targets = [], []
        for x, y in loader:
            x = x.to(DEVICE, non_blocking=True)
            logits = self.model(x)
            preds = logits.argmax(dim=1).cpu().numpy()
            all_preds.append(preds)
            all_targets.append(y.numpy())
        return np.concatenate(all_preds), np.concatenate(all_targets)


    def save(self, path: str = "mlp_best.pt"):
        torch.save(self.model.state_dict(), path)
        print(f"üíæ Model saved to {path}")



Run (do change)

In [None]:
import pandas as pd
train_cfg = TrainConfig(
    batch_size=128,
    epochs=100,
    lr=1e-4,
    patience=5,
    val_fraction=0.1
)

data_mgr = DataManager(
    dataset_class=datasets.KMNIST,
    val_fraction=train_cfg.val_fraction,
    batch_size=train_cfg.batch_size,
    seed=train_cfg.seed
)

train_loader, val_loader, test_loader = data_mgr.get_loaders()

experiments = [
    {
        "layers": [
            LayerSpec(out_dim=32, dropout=0.0, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=64, dropout=0.0, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=128, dropout=0.0, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=256, dropout=0.0, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=64, dropout=0.1, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=64, dropout=0.2, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=64, dropout=0.3, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=128, dropout=0.0, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=64, dropout=0.0, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=128, dropout=0.2, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=64, dropout=0.2, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=128, dropout=0.0, batch_norm=False, activation=nn.ReLU),
            LayerSpec(out_dim=64, dropout=0.0, batch_norm=False, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=256, dropout=0.2, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=128, dropout=0.2, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=64, dropout=0.2, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
    {
        "layers": [
            LayerSpec(out_dim=128, dropout=0.2, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=128, dropout=0.2, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=64, dropout=0.2, batch_norm=True, activation=nn.ReLU),
            LayerSpec(out_dim=10, dropout=0.1, batch_norm=False, activation=nn.ReLU),
        ]
    },
]

mlflow.set_experiment("KMNIST_Assigment")
excel_results = []
best_macro_f1 = -1
best_trainer = None
best_experiment_idx = None


for i, exp in enumerate(experiments):
    model_cfg = ModelConfig(layers=exp["layers"])
    model = MLPFromConfig(model_cfg)
    trainer = Trainer(model, train_cfg)

    mlflow.start_run(run_name=f"exp_{i}")

    mlflow.log_param("layers", [spec.out_dim for spec in model_cfg.layers])
    mlflow.log_param("dropout", [spec.dropout for spec in model_cfg.layers])
    mlflow.log_param("batch_norm", [spec.batch_norm for spec in model_cfg.layers])

    trainer.fit(train_loader, val_loader)

    preds, targets = trainer.predict_all(test_loader)
    report = classification_report(targets, preds, digits=4, output_dict=True)

    mlflow.log_metric("test_accuracy", report["accuracy"])
    mlflow.log_metric("macro_f1", report["macro avg"]["f1-score"])
    mlflow.log_metric("macro_precision", report["macro avg"]["precision"])
    mlflow.log_metric("macro_recall", report["macro avg"]["recall"])

    trainer.save(f"model_exp_{i}.pt")


    excel_results.append({
    "experiment": f"exp_{i}",
    "layers": [spec.out_dim for spec in model_cfg.layers],
    "test_accuracy": report["accuracy"],
    "macro_f1": report["macro avg"]["f1-score"],
    "macro_precision": report["macro avg"]["precision"],
    "macro_recall": report["macro avg"]["recall"],
})
    current_f1 = report["macro avg"]["f1-score"]

    if current_f1 > best_macro_f1:
        best_macro_f1 = current_f1
        best_trainer = trainer
        best_experiment_idx = i
    mlflow.end_run()
df = pd.DataFrame(excel_results)
df.to_excel("results.xlsx", index=False)

df

Train: 54000 | Val: 6000 | Test: 10000


2026/01/14 09:50:29 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/14 09:50:29 INFO mlflow.store.db.utils: Updating database tables
2026/01/14 09:50:29 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/14 09:50:29 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/01/14 09:50:29 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/14 09:50:29 INFO alembic.runtime.migration: Will assume non-transactional DDL.
  self.scaler = torch.cuda.amp.GradScaler(enabled=use_amp)


üöÄ Starting training...



  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):


Epoch   1 | Train Loss: 2.2145 Acc: 0.2194 | Val Loss: 2.1455 Acc: 0.2862
Epoch   2 | Train Loss: 2.1030 Acc: 0.2919 | Val Loss: 2.0569 Acc: 0.3257
Epoch   3 | Train Loss: 2.0282 Acc: 0.3173 | Val Loss: 1.9830 Acc: 0.3470
Epoch   4 | Train Loss: 1.9612 Acc: 0.3369 | Val Loss: 1.9130 Acc: 0.3663
Epoch   5 | Train Loss: 1.8995 Acc: 0.3545 | Val Loss: 1.8487 Acc: 0.3833
Epoch   6 | Train Loss: 1.8411 Acc: 0.3736 | Val Loss: 1.7890 Acc: 0.4112
Epoch   7 | Train Loss: 1.7846 Acc: 0.3953 | Val Loss: 1.7311 Acc: 0.4405
Epoch   8 | Train Loss: 1.7269 Acc: 0.4252 | Val Loss: 1.6644 Acc: 0.4805
Epoch   9 | Train Loss: 1.6736 Acc: 0.4522 | Val Loss: 1.6092 Acc: 0.5075
Epoch  10 | Train Loss: 1.6231 Acc: 0.4789 | Val Loss: 1.5531 Acc: 0.5332
Epoch  11 | Train Loss: 1.5710 Acc: 0.5025 | Val Loss: 1.4983 Acc: 0.5575
Epoch  12 | Train Loss: 1.5227 Acc: 0.5212 | Val Loss: 1.4493 Acc: 0.5728
Epoch  13 | Train Loss: 1.4806 Acc: 0.5342 | Val Loss: 1.3978 Acc: 0.5957
Epoch  14 | Train Loss: 1.4369 Acc: 0.

  self.scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):


Epoch   1 | Train Loss: 2.2833 Acc: 0.1298 | Val Loss: 2.2320 Acc: 0.1948
Epoch   2 | Train Loss: 2.1871 Acc: 0.2460 | Val Loss: 2.1342 Acc: 0.3167
Epoch   3 | Train Loss: 2.0922 Acc: 0.3361 | Val Loss: 2.0365 Acc: 0.4012
Epoch   4 | Train Loss: 1.9985 Acc: 0.4210 | Val Loss: 1.9423 Acc: 0.4932
Epoch   5 | Train Loss: 1.9089 Acc: 0.4619 | Val Loss: 1.8495 Acc: 0.5262
Epoch   6 | Train Loss: 1.8204 Acc: 0.4841 | Val Loss: 1.7542 Acc: 0.5563
Epoch   7 | Train Loss: 1.7342 Acc: 0.5110 | Val Loss: 1.6621 Acc: 0.5953
Epoch   8 | Train Loss: 1.6474 Acc: 0.5434 | Val Loss: 1.5697 Acc: 0.6253
Epoch   9 | Train Loss: 1.5660 Acc: 0.5706 | Val Loss: 1.4835 Acc: 0.6517
Epoch  10 | Train Loss: 1.4931 Acc: 0.5894 | Val Loss: 1.3969 Acc: 0.6738
Epoch  11 | Train Loss: 1.4224 Acc: 0.6070 | Val Loss: 1.3242 Acc: 0.6928
Epoch  12 | Train Loss: 1.3590 Acc: 0.6198 | Val Loss: 1.2556 Acc: 0.7023
Epoch  13 | Train Loss: 1.2999 Acc: 0.6338 | Val Loss: 1.1952 Acc: 0.7138
Epoch  14 | Train Loss: 1.2516 Acc: 0.

  self.scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):


Epoch   1 | Train Loss: 2.2834 Acc: 0.1354 | Val Loss: 2.2058 Acc: 0.2098
Epoch   2 | Train Loss: 2.1442 Acc: 0.2564 | Val Loss: 2.0681 Acc: 0.3355
Epoch   3 | Train Loss: 2.0252 Acc: 0.3379 | Val Loss: 1.9499 Acc: 0.3893
Epoch   4 | Train Loss: 1.9225 Acc: 0.3811 | Val Loss: 1.8469 Acc: 0.4278
Epoch   5 | Train Loss: 1.8270 Acc: 0.4174 | Val Loss: 1.7532 Acc: 0.4717
Epoch   6 | Train Loss: 1.7415 Acc: 0.4536 | Val Loss: 1.6630 Acc: 0.5078
Epoch   7 | Train Loss: 1.6578 Acc: 0.4897 | Val Loss: 1.5759 Acc: 0.5567
Epoch   8 | Train Loss: 1.5769 Acc: 0.5293 | Val Loss: 1.4911 Acc: 0.5933
Epoch   9 | Train Loss: 1.4977 Acc: 0.5590 | Val Loss: 1.4073 Acc: 0.6255
Epoch  10 | Train Loss: 1.4234 Acc: 0.5830 | Val Loss: 1.3323 Acc: 0.6488
Epoch  11 | Train Loss: 1.3524 Acc: 0.6039 | Val Loss: 1.2577 Acc: 0.6692
Epoch  12 | Train Loss: 1.2924 Acc: 0.6188 | Val Loss: 1.1938 Acc: 0.6850
Epoch  13 | Train Loss: 1.2365 Acc: 0.6311 | Val Loss: 1.1345 Acc: 0.6955
Epoch  14 | Train Loss: 1.1864 Acc: 0.

  self.scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):


Epoch   1 | Train Loss: 2.2395 Acc: 0.1773 | Val Loss: 2.1190 Acc: 0.2810
Epoch   2 | Train Loss: 2.0220 Acc: 0.3325 | Val Loss: 1.9065 Acc: 0.4102
Epoch   3 | Train Loss: 1.8390 Acc: 0.4356 | Val Loss: 1.7297 Acc: 0.5303
Epoch   4 | Train Loss: 1.6828 Acc: 0.5119 | Val Loss: 1.5659 Acc: 0.6012
Epoch   5 | Train Loss: 1.5477 Acc: 0.5526 | Val Loss: 1.4263 Acc: 0.6418
Epoch   6 | Train Loss: 1.4326 Acc: 0.5809 | Val Loss: 1.3118 Acc: 0.6695
Epoch   7 | Train Loss: 1.3384 Acc: 0.6063 | Val Loss: 1.2135 Acc: 0.6923
Epoch   8 | Train Loss: 1.2596 Acc: 0.6284 | Val Loss: 1.1266 Acc: 0.7207
Epoch   9 | Train Loss: 1.1887 Acc: 0.6446 | Val Loss: 1.0597 Acc: 0.7303
Epoch  10 | Train Loss: 1.1309 Acc: 0.6583 | Val Loss: 0.9978 Acc: 0.7422
Epoch  11 | Train Loss: 1.0802 Acc: 0.6683 | Val Loss: 0.9422 Acc: 0.7523
Epoch  12 | Train Loss: 1.0314 Acc: 0.6848 | Val Loss: 0.8979 Acc: 0.7660
Epoch  13 | Train Loss: 0.9985 Acc: 0.6907 | Val Loss: 0.8590 Acc: 0.7752
Epoch  14 | Train Loss: 0.9618 Acc: 0.

  self.scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):


Epoch   1 | Train Loss: 2.2769 Acc: 0.1205 | Val Loss: 2.2227 Acc: 0.1488
Epoch   2 | Train Loss: 2.1841 Acc: 0.1960 | Val Loss: 2.1233 Acc: 0.2547
Epoch   3 | Train Loss: 2.0880 Acc: 0.2594 | Val Loss: 2.0240 Acc: 0.2995
Epoch   4 | Train Loss: 1.9952 Acc: 0.3051 | Val Loss: 1.9279 Acc: 0.3480
Epoch   5 | Train Loss: 1.9090 Acc: 0.3441 | Val Loss: 1.8412 Acc: 0.3903
Epoch   6 | Train Loss: 1.8302 Acc: 0.3831 | Val Loss: 1.7630 Acc: 0.4318
Epoch   7 | Train Loss: 1.7579 Acc: 0.4227 | Val Loss: 1.6845 Acc: 0.4713
Epoch   8 | Train Loss: 1.6893 Acc: 0.4640 | Val Loss: 1.6161 Acc: 0.5157
Epoch   9 | Train Loss: 1.6269 Acc: 0.4992 | Val Loss: 1.5535 Acc: 0.5538
Epoch  10 | Train Loss: 1.5672 Acc: 0.5307 | Val Loss: 1.4847 Acc: 0.5787
Epoch  11 | Train Loss: 1.5079 Acc: 0.5530 | Val Loss: 1.4293 Acc: 0.6097
Epoch  12 | Train Loss: 1.4534 Acc: 0.5730 | Val Loss: 1.3648 Acc: 0.6288
Epoch  13 | Train Loss: 1.4001 Acc: 0.5869 | Val Loss: 1.3071 Acc: 0.6447
Epoch  14 | Train Loss: 1.3524 Acc: 0.

  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):


Epoch  48 | Train Loss: 0.7512 Acc: 0.7630 | Val Loss: 0.5876 Acc: 0.8415
Epoch  49 | Train Loss: 0.7407 Acc: 0.7664 | Val Loss: 0.5812 Acc: 0.8437
Epoch  50 | Train Loss: 0.7376 Acc: 0.7659 | Val Loss: 0.5732 Acc: 0.8450
Epoch  51 | Train Loss: 0.7348 Acc: 0.7684 | Val Loss: 0.5702 Acc: 0.8455
Epoch  52 | Train Loss: 0.7298 Acc: 0.7688 | Val Loss: 0.5651 Acc: 0.8483
Epoch  53 | Train Loss: 0.7168 Acc: 0.7724 | Val Loss: 0.5584 Acc: 0.8477
Epoch  54 | Train Loss: 0.7147 Acc: 0.7741 | Val Loss: 0.5516 Acc: 0.8500
Epoch  55 | Train Loss: 0.7102 Acc: 0.7744 | Val Loss: 0.5481 Acc: 0.8498
Epoch  56 | Train Loss: 0.7032 Acc: 0.7759 | Val Loss: 0.5452 Acc: 0.8532
Epoch  57 | Train Loss: 0.6999 Acc: 0.7777 | Val Loss: 0.5372 Acc: 0.8542
Epoch  58 | Train Loss: 0.6950 Acc: 0.7774 | Val Loss: 0.5361 Acc: 0.8535


Visuazize the train


In [None]:

history = best_trainer.history

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history["train_loss"], label="Train Loss")
plt.plot(history["val_loss"],   label="Val Loss")
plt.title("Loss")
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history["train_acc"], label="Train Acc")
plt.plot(history["val_acc"],   label="Val Acc")
plt.title("Accuracy")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()




Visualized the test (do change)

In [None]:
test_loss, test_acc = best_trainer.evaluate(test_loader)
print(f"üèÜ Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.4f}")

preds, targets = best_trainer.predict_all(test_loader)

cm = confusion_matrix(targets, preds)
plt.figure()
plt.imshow(cm)
plt.title("Confusion Matrix (Test)")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.colorbar()
plt.show()

print("Classification report (Test):")
print(classification_report(targets, preds, digits=4))