In [1]:
import numpy as np
import os
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import TensorDataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

from tqdm.auto import tqdm

import wandb

import logging
import sys
from torchinfo import summary

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def setup_logger(name=__name__):
    """
    Sets up a logger that outputs to the console (stdout).
    """
    logger = logging.getLogger(name)
    if not logger.handlers:
        logger.setLevel(logging.INFO)
        handler = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        handler.setFormatter(formatter)
        logger.addHandler(handler)
    return logger

logger = setup_logger()

In [3]:
# Load the dataset
data_folder = "../data"
preped_folder = os.path.join(data_folder, "_preped")

train_data = pd.read_csv(os.path.join(data_folder, 'train_data.csv')).values.tolist()
test_data = pd.read_csv(os.path.join(data_folder, 'test_data.csv')).values.tolist()

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to consistent size
    transforms.ToTensor(),           # Convert to tensor [0, 1]
    transforms.Normalize(mean=[0.5], std=[0.5])
])

x_train = []
y_train = []

for img_name, label in train_data:
    img_path = os.path.join(preped_folder, img_name)
    try:
        img = Image.open(img_path).convert('L') # Convert to grayscale
        img_tensor = transform(img)
        x_train.append(img_tensor)
        y_train.append(label)
    except Exception as e:
        logger.info(f"Error loading {img_name}: {e}")

# Stack into tensors
x_train_tensor = torch.stack(x_train)
logger.info(f"Training images shape: {x_train_tensor.shape}")

# Encode labels to integers
label_to_idx = {label: idx for idx, label in enumerate(np.unique(y_train))}
y_train_encoded = [label_to_idx[label] for label in y_train]
y_train_tensor = torch.tensor(y_train_encoded, dtype=torch.long)

logger.info(f"Training labels shape: {y_train_tensor.shape}")
logger.info(f"Label mapping: {label_to_idx}")

2025-12-13 08:58:26,128 - INFO - Training images shape: torch.Size([241, 1, 224, 224])
2025-12-13 08:58:26,130 - INFO - Training labels shape: torch.Size([241])
2025-12-13 08:58:26,130 - INFO - Label mapping: {np.str_('1_Pronacio'): 0, np.str_('2_Neutralis'): 1, np.str_('3_Szupinacio'): 2}


In [4]:
x_test = []
y_test = []

for img_name, label in test_data:
    img_path = os.path.join(preped_folder, img_name)
    try:
        img = Image.open(img_path).convert('L') # Convert to grayscale
        img_tensor = transform(img)
        x_test.append(img_tensor)
        y_test.append(label)
    except Exception as e:
        logger.info(f"Error loading {img_name}: {e}")

x_test_tensor = torch.stack(x_test)
logger.info(f"Test images shape: {x_test_tensor.shape}")
y_test_encoded = [label_to_idx[label] for label in y_test]
y_test_tensor = torch.tensor(y_test_encoded, dtype=torch.long)

logger.info(f"Test labels shape: {y_test_tensor.shape}")

2025-12-13 08:58:32,729 - INFO - Test images shape: torch.Size([49, 1, 224, 224])
2025-12-13 08:58:32,730 - INFO - Test labels shape: torch.Size([49])


In [5]:
if torch.cuda.is_available():
    logger.info(f"CUDA available: {torch.cuda.is_available()}")
    logger.info(f"Number of GPUs: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        logger.info(f"\nGPU {i}: {torch.cuda.get_device_name(i)}")
        props = torch.cuda.get_device_properties(i)
        logger.info(f"  Memory: {props.total_memory / 1024**3:.2f} GB")
        logger.info(f"  Compute Capability: {props.major}.{props.minor}")
else:
    logger.info("CUDA not available")

2025-12-13 08:58:32,791 - INFO - CUDA available: True
2025-12-13 08:58:32,792 - INFO - Number of GPUs: 1
2025-12-13 08:58:32,800 - INFO - 
GPU 0: NVIDIA GeForce RTX 4060
2025-12-13 08:58:32,801 - INFO -   Memory: 8.00 GB
2025-12-13 08:58:32,802 - INFO -   Compute Capability: 8.9


In [6]:
batch_size = 16
num_epochs = 70
device = 'cuda' 

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [23]:
# wandb login an init
# Login to wandb with API key
load_dotenv()
wandb.login(key=os.getenv("wandbKey"))

def init_wandb():
    # Initialize wandb project
    wandb.init(
        project="ankle-align-inc-model",
        config={
            "batch_size": batch_size,
            "num_epochs": num_epochs,
      
            "architecture": "Custom CNN",
            "dataset": "AnkleAlign",
            "optimizer": "Adam"
        }
    )

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Win 10\_netrc


In [None]:
net0 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 8, kernel_size=3, stride=2, padding=1),   # 224x224 -> 112x112
    torch.nn.ReLU(),
    torch.nn.Conv2d(8, 16, kernel_size=3, stride=2, padding=1),  # 112x112 -> 56x56
    torch.nn.ReLU(),
    torch.nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),  # 56x56 -> 28x28
    torch.nn.ReLU(),
    torch.nn.AdaptiveAvgPool2d(1),
    torch.nn.Flatten(),
    torch.nn.Linear(32, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 64),
    torch.nn.ReLU(),
    torch.nn.Linear(64, 3)                       # Output layer     
).to(device)


def init_weights(m):
    if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
        torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
        if m.bias is not None:
            torch.nn.init.constant_(m.bias, 0)

net0.apply(init_weights)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net0.parameters(), lr=0.01)

summary(net0, input_size=(batch_size, 1, 224, 224))


Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [16, 3]                   --
├─Conv2d: 1-1                            [16, 8, 112, 112]         80
├─ReLU: 1-2                              [16, 8, 112, 112]         --
├─Conv2d: 1-3                            [16, 16, 56, 56]          1,168
├─ReLU: 1-4                              [16, 16, 56, 56]          --
├─Conv2d: 1-5                            [16, 32, 28, 28]          4,640
├─ReLU: 1-6                              [16, 32, 28, 28]          --
├─AdaptiveAvgPool2d: 1-7                 [16, 32, 1, 1]            --
├─Flatten: 1-8                           [16, 32]                  --
├─Linear: 1-9                            [16, 128]                 4,224
├─ReLU: 1-10                             [16, 128]                 --
├─Linear: 1-11                           [16, 64]                  8,256
├─ReLU: 1-12                             [16, 64]                  --
├─L

In [9]:
# Trying to overfit one batch
init_wandb()
one_batch = next(iter(train_loader))
images, labels = one_batch

images = images.to(device)
labels = labels.to(device)

loss_values = []
net0.train()
for epoch in tqdm(range(num_epochs), desc='Training model'):
        pred_logits = net0(images)
        loss = loss_fn(pred_logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_values.append(loss.item())
        wandb.log({
                "epoch": epoch + 1,
                "train_loss": loss.item()
            })
        
wandb.finish()
print(loss_values)

Training model: 100%|██████████| 70/70 [00:00<00:00, 108.06it/s]
[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▇▇▇▇▇▇███
train_loss,█▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▂▂▃▃▄▂▂▁▁

0,1
epoch,70.0
train_loss,0.20578


[0.9189960956573486, 0.9109243750572205, 0.6084246039390564, 0.6110831499099731, 0.5959535837173462, 0.6036111116409302, 0.5776468515396118, 0.5964195132255554, 0.5777697563171387, 0.5779669880867004, 0.5845142006874084, 0.5704132318496704, 0.57173091173172, 0.5740042924880981, 0.562248706817627, 0.5676491260528564, 0.5616258978843689, 0.5558769702911377, 0.5586780309677124, 0.5480696558952332, 0.551421046257019, 0.5429296493530273, 0.5432425737380981, 0.5348362922668457, 0.5359674692153931, 0.524548351764679, 0.530306339263916, 0.5163533091545105, 0.5102657079696655, 0.511725664138794, 0.49767816066741943, 0.48764994740486145, 0.49501532316207886, 0.50182044506073, 0.47447913885116577, 0.45350348949432373, 0.4503435790538788, 0.46593379974365234, 0.5171388387680054, 0.44671016931533813, 0.4155682325363159, 0.4388939142227173, 0.43282079696655273, 0.4105982780456543, 0.3897345960140228, 0.4066815972328186, 0.4314323365688324, 0.3709186315536499, 0.3621132969856262, 0.4061894416809082, 

Net0 looks the most simply CNN which could learn on a 16 image batch, and overfit on this data. Smaller networks were not sifficent enaught to learon on 16 images.

In [10]:
def train_model(network, optimizer, loss_fn, enable_early_stopping=False, patience=5):
    torch.cuda.empty_cache()

    loss_values = []

    if enable_early_stopping:
        early_stopping = EarlyStopping(patience=patience, verbose=True)

    network.train()
    for epoch in tqdm(range(num_epochs), desc='Training model'):
        network.train()
        epoch_loss = 0.0
        num_batches = 0
        for images, target_labels in train_loader:
            images = images.to(device)
            target_labels = target_labels.to(device)

            pred_logits = network(images)
            loss = loss_fn(pred_logits, target_labels)
            epoch_loss += loss.item()
            num_batches += 1

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        avg_train_loss = epoch_loss / num_batches

        if enable_early_stopping:
            network.eval()
            val_loss = 0.0
            val_batches = 0
            correct = 0
            total = 0
            with torch.no_grad():
                for images, target_labels in val_loader:
                    images = images.to(device)
                    target_labels = target_labels.to(device)
                    
                    pred_logits = network(images)
                    loss = loss_fn(pred_logits, target_labels)
                    val_loss += loss.item()
                    val_batches += 1
                    
                    _, predicted = torch.max(pred_logits, 1)
                    total += target_labels.size(0)
                    correct += (predicted == target_labels).sum().item()
            
            avg_val_loss = val_loss / val_batches
            val_accuracy = correct / total

        # Log metrics
        if enable_early_stopping:
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": avg_train_loss,
                "val_loss": avg_val_loss,
                "val_accuracy": val_accuracy
            })
        else:
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": avg_train_loss
            })
        loss_values.append(avg_train_loss)
        
        if enable_early_stopping:
            logger.info(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.4f}")
        else:
            logger.info(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}")

        # Early stopping check
        if enable_early_stopping:
            early_stopping(avg_val_loss, network)
            if early_stopping.early_stop:
                logger.info("Early stopping triggered")
                network.load_state_dict(early_stopping.best_model)
                break
    
    # Load best model
    if enable_early_stopping and early_stopping.best_model is not None:
        network.load_state_dict(early_stopping.best_model)
        logger.info("Loaded best model weights")

    logger.info(loss_values)

In [11]:
def evaluate_model(network):
    # Training score
    true_labels = y_test_encoded
    predicted_labels = []
    network.eval()
    with torch.no_grad():
        for images, _ in test_loader:
            images = images.to(device)
            outputs = network(images)
            _, predicted = torch.max(outputs, 1)
            predicted_labels.extend(predicted.cpu().numpy())

    accuracy = np.mean([true == pred for true, pred in zip(true_labels, predicted_labels)])
    precision = precision_score(true_labels, predicted_labels, average='weighted')
    recall = recall_score(true_labels, predicted_labels, average='weighted')
    f1 = f1_score(true_labels, predicted_labels, average='weighted')

    logger.info(f"network accuracy: {accuracy * 100:.2f}%")
    logger.info(f"network precision: {precision * 100:.2f}%")
    logger.info(f"network recall: {recall * 100:.2f}%")
    logger.info(f"network F1 score: {f1 * 100:.2f}%")

    logger.info(f"Detailed Classification Report: \n{classification_report(true_labels, predicted_labels)}")

    # Log test metrics
    wandb.log({
        "test_accuracy": accuracy,
        "test_precision": precision,
        "test_recall": recall,
        "test_f1": f1
    })

    wandb.finish()

In [12]:
init_wandb()
train_model(net0, optimizer, loss_fn, enable_early_stopping=False)

Training model:   0%|          | 0/70 [00:00<?, ?it/s]

2025-12-13 08:58:42,088 - INFO - Epoch 1/70, Train Loss: 1.3676


Training model:   1%|▏         | 1/70 [00:00<00:12,  5.48it/s]

2025-12-13 08:58:42,183 - INFO - Epoch 2/70, Train Loss: 1.0051
2025-12-13 08:58:42,270 - INFO - Epoch 3/70, Train Loss: 1.0193


Training model:   4%|▍         | 3/70 [00:00<00:07,  8.75it/s]

2025-12-13 08:58:42,395 - INFO - Epoch 4/70, Train Loss: 0.9792


Training model:   6%|▌         | 4/70 [00:00<00:07,  8.44it/s]

2025-12-13 08:58:42,527 - INFO - Epoch 5/70, Train Loss: 0.9988


Training model:   7%|▋         | 5/70 [00:00<00:07,  8.14it/s]

2025-12-13 08:58:42,649 - INFO - Epoch 6/70, Train Loss: 1.0225


Training model:   9%|▊         | 6/70 [00:00<00:07,  8.17it/s]

2025-12-13 08:58:42,790 - INFO - Epoch 7/70, Train Loss: 0.9923


Training model:  10%|█         | 7/70 [00:00<00:08,  7.79it/s]

2025-12-13 08:58:42,882 - INFO - Epoch 8/70, Train Loss: 1.0017
2025-12-13 08:58:42,973 - INFO - Epoch 9/70, Train Loss: 0.9875


Training model:  13%|█▎        | 9/70 [00:01<00:06,  9.02it/s]

2025-12-13 08:58:43,066 - INFO - Epoch 10/70, Train Loss: 0.9763
2025-12-13 08:58:43,188 - INFO - Epoch 11/70, Train Loss: 1.0099


Training model:  16%|█▌        | 11/70 [00:01<00:06,  9.12it/s]

2025-12-13 08:58:43,330 - INFO - Epoch 12/70, Train Loss: 0.9834


Training model:  17%|█▋        | 12/70 [00:01<00:06,  8.56it/s]

2025-12-13 08:58:43,452 - INFO - Epoch 13/70, Train Loss: 0.9767


Training model:  19%|█▊        | 13/70 [00:01<00:06,  8.48it/s]

2025-12-13 08:58:43,572 - INFO - Epoch 14/70, Train Loss: 1.0197


Training model:  20%|██        | 14/70 [00:01<00:06,  8.42it/s]

2025-12-13 08:58:43,662 - INFO - Epoch 15/70, Train Loss: 0.9612
2025-12-13 08:58:43,763 - INFO - Epoch 16/70, Train Loss: 0.9506


Training model:  23%|██▎       | 16/70 [00:01<00:05,  9.20it/s]

2025-12-13 08:58:43,861 - INFO - Epoch 17/70, Train Loss: 0.9390
2025-12-13 08:58:43,956 - INFO - Epoch 18/70, Train Loss: 1.0396


Training model:  26%|██▌       | 18/70 [00:02<00:05,  9.59it/s]

2025-12-13 08:58:44,049 - INFO - Epoch 19/70, Train Loss: 1.0614
2025-12-13 08:58:44,147 - INFO - Epoch 20/70, Train Loss: 0.9847


Training model:  29%|██▊       | 20/70 [00:02<00:05,  9.89it/s]

2025-12-13 08:58:44,243 - INFO - Epoch 21/70, Train Loss: 0.9499
2025-12-13 08:58:44,329 - INFO - Epoch 22/70, Train Loss: 1.0370


Training model:  31%|███▏      | 22/70 [00:02<00:04, 10.23it/s]

2025-12-13 08:58:44,421 - INFO - Epoch 23/70, Train Loss: 0.9701
2025-12-13 08:58:44,512 - INFO - Epoch 24/70, Train Loss: 0.9628


Training model:  34%|███▍      | 24/70 [00:02<00:04, 10.44it/s]

2025-12-13 08:58:44,600 - INFO - Epoch 25/70, Train Loss: 0.9306
2025-12-13 08:58:44,691 - INFO - Epoch 26/70, Train Loss: 0.9516


Training model:  37%|███▋      | 26/70 [00:02<00:04, 10.67it/s]

2025-12-13 08:58:44,777 - INFO - Epoch 27/70, Train Loss: 0.9399
2025-12-13 08:58:44,867 - INFO - Epoch 28/70, Train Loss: 0.9338


Training model:  40%|████      | 28/70 [00:02<00:03, 10.88it/s]

2025-12-13 08:58:44,960 - INFO - Epoch 29/70, Train Loss: 0.9297
2025-12-13 08:58:45,052 - INFO - Epoch 30/70, Train Loss: 0.9591


Training model:  43%|████▎     | 30/70 [00:03<00:03, 10.84it/s]

2025-12-13 08:58:45,148 - INFO - Epoch 31/70, Train Loss: 0.8830
2025-12-13 08:58:45,251 - INFO - Epoch 32/70, Train Loss: 0.9720


Training model:  46%|████▌     | 32/70 [00:03<00:03, 10.58it/s]

2025-12-13 08:58:45,364 - INFO - Epoch 33/70, Train Loss: 0.9695
2025-12-13 08:58:45,469 - INFO - Epoch 34/70, Train Loss: 0.9415


Training model:  49%|████▊     | 34/70 [00:03<00:03, 10.12it/s]

2025-12-13 08:58:45,583 - INFO - Epoch 35/70, Train Loss: 0.8798
2025-12-13 08:58:45,708 - INFO - Epoch 36/70, Train Loss: 0.8725


Training model:  51%|█████▏    | 36/70 [00:03<00:03,  9.53it/s]

2025-12-13 08:58:45,817 - INFO - Epoch 37/70, Train Loss: 0.8441


Training model:  53%|█████▎    | 37/70 [00:03<00:03,  9.44it/s]

2025-12-13 08:58:45,932 - INFO - Epoch 38/70, Train Loss: 0.8001


Training model:  54%|█████▍    | 38/70 [00:04<00:03,  9.30it/s]

2025-12-13 08:58:46,050 - INFO - Epoch 39/70, Train Loss: 0.8207


Training model:  56%|█████▌    | 39/70 [00:04<00:03,  9.10it/s]

2025-12-13 08:58:46,153 - INFO - Epoch 40/70, Train Loss: 0.7307


Training model:  57%|█████▋    | 40/70 [00:04<00:03,  9.23it/s]

2025-12-13 08:58:46,261 - INFO - Epoch 41/70, Train Loss: 1.3516


Training model:  59%|█████▊    | 41/70 [00:04<00:03,  9.25it/s]

2025-12-13 08:58:46,367 - INFO - Epoch 42/70, Train Loss: 1.1947


Training model:  60%|██████    | 42/70 [00:04<00:03,  9.29it/s]

2025-12-13 08:58:46,472 - INFO - Epoch 43/70, Train Loss: 0.9482


Training model:  61%|██████▏   | 43/70 [00:04<00:02,  9.35it/s]

2025-12-13 08:58:46,580 - INFO - Epoch 44/70, Train Loss: 0.9477


Training model:  63%|██████▎   | 44/70 [00:04<00:02,  9.33it/s]

2025-12-13 08:58:46,684 - INFO - Epoch 45/70, Train Loss: 0.8608


Training model:  64%|██████▍   | 45/70 [00:04<00:02,  9.42it/s]

2025-12-13 08:58:46,790 - INFO - Epoch 46/70, Train Loss: 0.8280


Training model:  66%|██████▌   | 46/70 [00:04<00:02,  9.42it/s]

2025-12-13 08:58:46,895 - INFO - Epoch 47/70, Train Loss: 0.8271


Training model:  67%|██████▋   | 47/70 [00:04<00:02,  9.44it/s]

2025-12-13 08:58:47,003 - INFO - Epoch 48/70, Train Loss: 0.7395


Training model:  69%|██████▊   | 48/70 [00:05<00:02,  9.40it/s]

2025-12-13 08:58:47,109 - INFO - Epoch 49/70, Train Loss: 0.7451


Training model:  70%|███████   | 49/70 [00:05<00:02,  9.39it/s]

2025-12-13 08:58:47,211 - INFO - Epoch 50/70, Train Loss: 1.4151


Training model:  71%|███████▏  | 50/70 [00:05<00:02,  9.52it/s]

2025-12-13 08:58:47,313 - INFO - Epoch 51/70, Train Loss: 1.0156


Training model:  73%|███████▎  | 51/70 [00:05<00:01,  9.59it/s]

2025-12-13 08:58:47,419 - INFO - Epoch 52/70, Train Loss: 0.9837


Training model:  74%|███████▍  | 52/70 [00:05<00:01,  9.54it/s]

2025-12-13 08:58:47,559 - INFO - Epoch 53/70, Train Loss: 0.9138


Training model:  76%|███████▌  | 53/70 [00:05<00:01,  8.67it/s]

2025-12-13 08:58:47,758 - INFO - Epoch 54/70, Train Loss: 0.8647


Training model:  77%|███████▋  | 54/70 [00:05<00:02,  7.13it/s]

2025-12-13 08:58:47,862 - INFO - Epoch 55/70, Train Loss: 0.8239


Training model:  79%|███████▊  | 55/70 [00:05<00:01,  7.74it/s]

2025-12-13 08:58:47,964 - INFO - Epoch 56/70, Train Loss: 0.7883


Training model:  80%|████████  | 56/70 [00:06<00:01,  8.26it/s]

2025-12-13 08:58:48,060 - INFO - Epoch 57/70, Train Loss: 0.7467
2025-12-13 08:58:48,159 - INFO - Epoch 58/70, Train Loss: 0.7822


Training model:  83%|████████▎ | 58/70 [00:06<00:01,  9.05it/s]

2025-12-13 08:58:48,280 - INFO - Epoch 59/70, Train Loss: 0.7376


Training model:  84%|████████▍ | 59/70 [00:06<00:01,  8.87it/s]

2025-12-13 08:58:48,405 - INFO - Epoch 60/70, Train Loss: 0.6778


Training model:  86%|████████▌ | 60/70 [00:06<00:01,  8.62it/s]

2025-12-13 08:58:48,512 - INFO - Epoch 61/70, Train Loss: 0.6672


Training model:  87%|████████▋ | 61/70 [00:06<00:01,  8.80it/s]

2025-12-13 08:58:48,619 - INFO - Epoch 62/70, Train Loss: 0.7251


Training model:  89%|████████▊ | 62/70 [00:06<00:00,  8.94it/s]

2025-12-13 08:58:48,754 - INFO - Epoch 63/70, Train Loss: 0.6653


Training model:  90%|█████████ | 63/70 [00:06<00:00,  8.45it/s]

2025-12-13 08:58:48,874 - INFO - Epoch 64/70, Train Loss: 0.7124


Training model:  91%|█████████▏| 64/70 [00:06<00:00,  8.41it/s]

2025-12-13 08:58:49,011 - INFO - Epoch 65/70, Train Loss: 0.6708


Training model:  93%|█████████▎| 65/70 [00:07<00:00,  8.06it/s]

2025-12-13 08:58:49,126 - INFO - Epoch 66/70, Train Loss: 0.7083


Training model:  94%|█████████▍| 66/70 [00:07<00:00,  8.23it/s]

2025-12-13 08:58:49,239 - INFO - Epoch 67/70, Train Loss: 0.5661


Training model:  96%|█████████▌| 67/70 [00:07<00:00,  8.41it/s]

2025-12-13 08:58:49,350 - INFO - Epoch 68/70, Train Loss: 0.5048


Training model:  97%|█████████▋| 68/70 [00:07<00:00,  8.58it/s]

2025-12-13 08:58:49,461 - INFO - Epoch 69/70, Train Loss: 0.4799


Training model:  99%|█████████▊| 69/70 [00:07<00:00,  8.68it/s]

2025-12-13 08:58:49,565 - INFO - Epoch 70/70, Train Loss: 0.4845


Training model: 100%|██████████| 70/70 [00:07<00:00,  9.14it/s]

2025-12-13 08:58:49,567 - INFO - [1.367609478533268, 1.0050833337008953, 1.019287422299385, 0.9792229644954205, 0.9987912774085999, 1.0225254148244858, 0.9923433512449265, 1.0016814656555653, 0.9874717779457569, 0.9762842319905758, 1.0099237561225891, 0.9834324643015862, 0.9767465516924858, 1.0197370164096355, 0.9611853286623955, 0.9505730979144573, 0.9390123076736927, 1.0395533852279186, 1.0613774731755257, 0.9847182855010033, 0.949910644441843, 1.0370331592857838, 0.9700660593807697, 0.9628367051482201, 0.9305558316409588, 0.9515724293887615, 0.9399220794439316, 0.9337956011295319, 0.9297220185399055, 0.9591145627200603, 0.8830360732972622, 0.9720307663083076, 0.9694576673209667, 0.9415075182914734, 0.8797676488757133, 0.8724886253476143, 0.8441466502845287, 0.8001011405140162, 0.8206600099802017, 0.7307148575782776, 1.3516437225043774, 1.1947062239050865, 0.9481813460588455, 0.9477338530123234, 0.8607966676354408, 0.8279795423150063, 0.8270625714212656, 0.7394652627408504, 0.7450531




Net0 can learn on one batch of 16 images, but can not learn on all the provided train data, to simple for 200+ images.

In [None]:
net1 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 8, kernel_size=3, stride=2, padding=1),   # 224x224 -> 112x112   // (3x3x1)x8
    torch.nn.ReLU(),
    torch.nn.Conv2d(8, 16, kernel_size=3, stride=2, padding=1),  # 112x112 -> 56x56     // (3x3x8)x16
    torch.nn.ReLU(),
    torch.nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),  # 56x56 -> 28x28      // (3x3x16)x32
    torch.nn.ReLU(),
    torch.nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),  # 28x28 -> 14x14      // (3x3x32)x32
    torch.nn.ReLU(),
    torch.nn.AdaptiveAvgPool2d(1),
    torch.nn.Flatten(),
    torch.nn.Linear(32, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 64),
    torch.nn.ReLU(),
    torch.nn.Linear(64, 3)                       # Output layer     
).to(device)

net1.apply(init_weights)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net1.parameters(), lr=0.001)

summary(net1, input_size=(batch_size, 1, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [16, 3]                   --
├─Conv2d: 1-1                            [16, 8, 112, 112]         80
├─ReLU: 1-2                              [16, 8, 112, 112]         --
├─Conv2d: 1-3                            [16, 16, 56, 56]          1,168
├─ReLU: 1-4                              [16, 16, 56, 56]          --
├─Conv2d: 1-5                            [16, 32, 28, 28]          4,640
├─ReLU: 1-6                              [16, 32, 28, 28]          --
├─Conv2d: 1-7                            [16, 32, 14, 14]          9,248
├─ReLU: 1-8                              [16, 32, 14, 14]          --
├─AdaptiveAvgPool2d: 1-9                 [16, 32, 1, 1]            --
├─Flatten: 1-10                          [16, 32]                  --
├─Linear: 1-11                           [16, 128]                 4,224
├─ReLU: 1-12                             [16, 128]                 --
├─L

In [14]:
init_wandb()
train_model(net1, optimizer, loss_fn, enable_early_stopping=False)
wandb.finish()

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇███
train_loss,█▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▄▅▅▅▄▄▄▃█▅▅▄▃▃▅▄▃▃▃▂▃▂▃▁

0,1
epoch,70.0
train_loss,0.48453


Training model:   0%|          | 0/70 [00:00<?, ?it/s]

2025-12-13 08:58:53,286 - INFO - Epoch 1/70, Train Loss: 1.0399


Training model:   1%|▏         | 1/70 [00:00<00:12,  5.31it/s]

2025-12-13 08:58:53,488 - INFO - Epoch 2/70, Train Loss: 1.0588


Training model:   3%|▎         | 2/70 [00:00<00:13,  5.08it/s]

2025-12-13 08:58:53,660 - INFO - Epoch 3/70, Train Loss: 1.0044


Training model:   4%|▍         | 3/70 [00:00<00:12,  5.41it/s]

2025-12-13 08:58:53,798 - INFO - Epoch 4/70, Train Loss: 0.9897


Training model:   6%|▌         | 4/70 [00:00<00:11,  6.00it/s]

2025-12-13 08:58:53,945 - INFO - Epoch 5/70, Train Loss: 0.9756


Training model:   7%|▋         | 5/70 [00:00<00:10,  6.28it/s]

2025-12-13 08:58:54,099 - INFO - Epoch 6/70, Train Loss: 0.9911


Training model:   9%|▊         | 6/70 [00:01<00:10,  6.34it/s]

2025-12-13 08:58:54,238 - INFO - Epoch 7/70, Train Loss: 0.9647


Training model:  10%|█         | 7/70 [00:01<00:09,  6.59it/s]

2025-12-13 08:58:54,364 - INFO - Epoch 8/70, Train Loss: 1.0426


Training model:  11%|█▏        | 8/70 [00:01<00:08,  6.97it/s]

2025-12-13 08:58:54,487 - INFO - Epoch 9/70, Train Loss: 0.9857


Training model:  13%|█▎        | 9/70 [00:01<00:08,  7.31it/s]

2025-12-13 08:58:54,607 - INFO - Epoch 10/70, Train Loss: 0.9359


Training model:  14%|█▍        | 10/70 [00:01<00:07,  7.60it/s]

2025-12-13 08:58:54,755 - INFO - Epoch 11/70, Train Loss: 0.9648


Training model:  16%|█▌        | 11/70 [00:01<00:08,  7.30it/s]

2025-12-13 08:58:54,917 - INFO - Epoch 12/70, Train Loss: 0.9730


Training model:  17%|█▋        | 12/70 [00:01<00:08,  6.92it/s]

2025-12-13 08:58:55,063 - INFO - Epoch 13/70, Train Loss: 0.9674


Training model:  19%|█▊        | 13/70 [00:01<00:08,  6.90it/s]

2025-12-13 08:58:55,165 - INFO - Epoch 14/70, Train Loss: 0.9942


Training model:  20%|██        | 14/70 [00:02<00:07,  7.58it/s]

2025-12-13 08:58:55,265 - INFO - Epoch 15/70, Train Loss: 0.9509


Training model:  21%|██▏       | 15/70 [00:02<00:06,  8.18it/s]

2025-12-13 08:58:55,367 - INFO - Epoch 16/70, Train Loss: 0.9442


Training model:  23%|██▎       | 16/70 [00:02<00:06,  8.62it/s]

2025-12-13 08:58:55,466 - INFO - Epoch 17/70, Train Loss: 0.9756
2025-12-13 08:58:55,560 - INFO - Epoch 18/70, Train Loss: 0.9841


Training model:  26%|██▌       | 18/70 [00:02<00:05,  9.34it/s]

2025-12-13 08:58:55,657 - INFO - Epoch 19/70, Train Loss: 0.9559
2025-12-13 08:58:55,749 - INFO - Epoch 20/70, Train Loss: 0.9507


Training model:  29%|██▊       | 20/70 [00:02<00:05,  9.80it/s]

2025-12-13 08:58:55,852 - INFO - Epoch 21/70, Train Loss: 0.9481


Training model:  30%|███       | 21/70 [00:02<00:05,  9.78it/s]

2025-12-13 08:58:55,957 - INFO - Epoch 22/70, Train Loss: 0.9402


Training model:  31%|███▏      | 22/70 [00:02<00:04,  9.72it/s]

2025-12-13 08:58:56,057 - INFO - Epoch 23/70, Train Loss: 0.9618
2025-12-13 08:58:56,156 - INFO - Epoch 24/70, Train Loss: 0.9211


Training model:  34%|███▍      | 24/70 [00:03<00:04,  9.85it/s]

2025-12-13 08:58:56,255 - INFO - Epoch 25/70, Train Loss: 0.9276
2025-12-13 08:58:56,346 - INFO - Epoch 26/70, Train Loss: 0.9469


Training model:  37%|███▋      | 26/70 [00:03<00:04, 10.08it/s]

2025-12-13 08:58:56,442 - INFO - Epoch 27/70, Train Loss: 0.9129
2025-12-13 08:58:56,535 - INFO - Epoch 28/70, Train Loss: 0.8862


Training model:  40%|████      | 28/70 [00:03<00:04, 10.24it/s]

2025-12-13 08:58:56,632 - INFO - Epoch 29/70, Train Loss: 0.9156
2025-12-13 08:58:56,743 - INFO - Epoch 30/70, Train Loss: 0.9107


Training model:  43%|████▎     | 30/70 [00:03<00:03, 10.04it/s]

2025-12-13 08:58:56,846 - INFO - Epoch 31/70, Train Loss: 0.8601
2025-12-13 08:58:56,951 - INFO - Epoch 32/70, Train Loss: 0.8930


Training model:  46%|████▌     | 32/70 [00:03<00:03,  9.89it/s]

2025-12-13 08:58:57,062 - INFO - Epoch 33/70, Train Loss: 0.9370


Training model:  47%|████▋     | 33/70 [00:03<00:03,  9.73it/s]

2025-12-13 08:58:57,160 - INFO - Epoch 34/70, Train Loss: 0.8763
2025-12-13 08:58:57,290 - INFO - Epoch 35/70, Train Loss: 0.8895


Training model:  50%|█████     | 35/70 [00:04<00:03,  9.37it/s]

2025-12-13 08:58:57,388 - INFO - Epoch 36/70, Train Loss: 0.8099
2025-12-13 08:58:57,493 - INFO - Epoch 37/70, Train Loss: 0.8024


Training model:  53%|█████▎    | 37/70 [00:04<00:03,  9.53it/s]

2025-12-13 08:58:57,586 - INFO - Epoch 38/70, Train Loss: 0.8345
2025-12-13 08:58:57,679 - INFO - Epoch 39/70, Train Loss: 0.8260


Training model:  56%|█████▌    | 39/70 [00:04<00:03,  9.89it/s]

2025-12-13 08:58:57,771 - INFO - Epoch 40/70, Train Loss: 0.8054
2025-12-13 08:58:57,864 - INFO - Epoch 41/70, Train Loss: 0.7939


Training model:  59%|█████▊    | 41/70 [00:04<00:02, 10.16it/s]

2025-12-13 08:58:58,174 - INFO - Epoch 42/70, Train Loss: 0.8448
2025-12-13 08:58:58,331 - INFO - Epoch 43/70, Train Loss: 0.7519


Training model:  61%|██████▏   | 43/70 [00:05<00:03,  7.13it/s]

2025-12-13 08:58:58,477 - INFO - Epoch 44/70, Train Loss: 0.7166


Training model:  63%|██████▎   | 44/70 [00:05<00:03,  7.07it/s]

2025-12-13 08:58:58,609 - INFO - Epoch 45/70, Train Loss: 0.7608


Training model:  64%|██████▍   | 45/70 [00:05<00:03,  7.17it/s]

2025-12-13 08:58:58,730 - INFO - Epoch 46/70, Train Loss: 0.7264


Training model:  66%|██████▌   | 46/70 [00:05<00:03,  7.39it/s]

2025-12-13 08:58:58,854 - INFO - Epoch 47/70, Train Loss: 0.6664


Training model:  67%|██████▋   | 47/70 [00:05<00:03,  7.56it/s]

2025-12-13 08:58:58,969 - INFO - Epoch 48/70, Train Loss: 0.6700


Training model:  69%|██████▊   | 48/70 [00:05<00:02,  7.81it/s]

2025-12-13 08:58:59,093 - INFO - Epoch 49/70, Train Loss: 0.6238


Training model:  70%|███████   | 49/70 [00:05<00:02,  7.87it/s]

2025-12-13 08:58:59,311 - INFO - Epoch 50/70, Train Loss: 0.6302


Training model:  71%|███████▏  | 50/70 [00:06<00:03,  6.57it/s]

2025-12-13 08:58:59,437 - INFO - Epoch 51/70, Train Loss: 0.6543


Training model:  73%|███████▎  | 51/70 [00:06<00:02,  6.92it/s]

2025-12-13 08:58:59,539 - INFO - Epoch 52/70, Train Loss: 0.6036


Training model:  74%|███████▍  | 52/70 [00:06<00:02,  7.55it/s]

2025-12-13 08:58:59,648 - INFO - Epoch 53/70, Train Loss: 0.7145


Training model:  76%|███████▌  | 53/70 [00:06<00:02,  7.98it/s]

2025-12-13 08:58:59,747 - INFO - Epoch 54/70, Train Loss: 0.6118


Training model:  77%|███████▋  | 54/70 [00:06<00:01,  8.48it/s]

2025-12-13 08:58:59,908 - INFO - Epoch 55/70, Train Loss: 0.5830


Training model:  79%|███████▊  | 55/70 [00:06<00:01,  7.65it/s]

2025-12-13 08:59:00,077 - INFO - Epoch 56/70, Train Loss: 0.4940


Training model:  80%|████████  | 56/70 [00:06<00:01,  7.05it/s]

2025-12-13 08:59:00,228 - INFO - Epoch 57/70, Train Loss: 0.5946


Training model:  81%|████████▏ | 57/70 [00:07<00:01,  6.91it/s]

2025-12-13 08:59:00,362 - INFO - Epoch 58/70, Train Loss: 0.5347


Training model:  83%|████████▎ | 58/70 [00:07<00:01,  7.07it/s]

2025-12-13 08:59:00,525 - INFO - Epoch 59/70, Train Loss: 0.5133


Training model:  84%|████████▍ | 59/70 [00:07<00:01,  6.76it/s]

2025-12-13 08:59:00,654 - INFO - Epoch 60/70, Train Loss: 0.4753


Training model:  86%|████████▌ | 60/70 [00:07<00:01,  7.04it/s]

2025-12-13 08:59:00,764 - INFO - Epoch 61/70, Train Loss: 0.6728


Training model:  87%|████████▋ | 61/70 [00:07<00:01,  7.54it/s]

2025-12-13 08:59:00,911 - INFO - Epoch 62/70, Train Loss: 0.5184


Training model:  89%|████████▊ | 62/70 [00:07<00:01,  7.30it/s]

2025-12-13 08:59:01,037 - INFO - Epoch 63/70, Train Loss: 0.4909


Training model:  90%|█████████ | 63/70 [00:07<00:00,  7.48it/s]

2025-12-13 08:59:01,147 - INFO - Epoch 64/70, Train Loss: 0.4347


Training model:  91%|█████████▏| 64/70 [00:08<00:00,  7.89it/s]

2025-12-13 08:59:01,272 - INFO - Epoch 65/70, Train Loss: 0.3701


Training model:  93%|█████████▎| 65/70 [00:08<00:00,  7.94it/s]

2025-12-13 08:59:01,419 - INFO - Epoch 66/70, Train Loss: 0.3803


Training model:  94%|█████████▍| 66/70 [00:08<00:00,  7.54it/s]

2025-12-13 08:59:01,598 - INFO - Epoch 67/70, Train Loss: 0.5007


Training model:  96%|█████████▌| 67/70 [00:08<00:00,  6.83it/s]

2025-12-13 08:59:01,747 - INFO - Epoch 68/70, Train Loss: 0.5196


Training model:  97%|█████████▋| 68/70 [00:08<00:00,  6.79it/s]

2025-12-13 08:59:01,862 - INFO - Epoch 69/70, Train Loss: 0.4329


Training model:  99%|█████████▊| 69/70 [00:08<00:00,  7.28it/s]

2025-12-13 08:59:01,971 - INFO - Epoch 70/70, Train Loss: 0.4017


Training model: 100%|██████████| 70/70 [00:08<00:00,  7.89it/s]

2025-12-13 08:59:01,973 - INFO - [1.0399250909686089, 1.0587920546531677, 1.0044136866927147, 0.9897469095885754, 0.9755896776914597, 0.991124402731657, 0.9646955281496048, 1.04264210537076, 0.9857311733067036, 0.9359408151358366, 0.9648342840373516, 0.9729919619858265, 0.9673570320010185, 0.9942279644310474, 0.9509303942322731, 0.9442183263599873, 0.9756307825446129, 0.9840870201587677, 0.9559366554021835, 0.9507177956402302, 0.948088962584734, 0.9401596039533615, 0.9618076607584953, 0.9211361669003963, 0.927639152854681, 0.9469483271241188, 0.9128565788269043, 0.8862023800611496, 0.9155723638832569, 0.9106504619121552, 0.8601327519863844, 0.8929817080497742, 0.9369538389146328, 0.876258909702301, 0.8895190432667732, 0.8098578378558159, 0.8024081028997898, 0.8345138542354107, 0.8259834088385105, 0.8053876645863056, 0.7939435169100761, 0.8447823598980904, 0.7518744012340903, 0.7166483849287033, 0.7607695125043392, 0.7263949736952782, 0.6663938499987125, 0.6699719466269016, 0.6237642664




0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇██
train_loss,██▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▅▅▆▅▅▅▄▄▄▃▃▃▂▃▂▁▁▂

0,1
epoch,70.0
train_loss,0.40171


Net1 could learn on all the provided train data, but it took the model over 50 epoch to converge below 0.5 loss on train set. So I think I will create 1 more little bit more complex network.

In [None]:
net2 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 8, kernel_size=3, stride=2, padding=1),   # 224x224 -> 112x112   // (3x3x1)x8
    torch.nn.ReLU(),
    torch.nn.Conv2d(8, 16, kernel_size=3, stride=2, padding=1),  # 112x112 -> 56x56     // (3x3x8)x16
    torch.nn.ReLU(),
    torch.nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),  # 56x56 -> 28x28      // (3x3x16)x32
    torch.nn.ReLU(),
    torch.nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),  # 28x28 -> 14x14      // (3x3x32)x64
    torch.nn.ReLU(),
    torch.nn.Conv2d(64, 32, kernel_size=3, stride=2, padding=1),  # 14x14 -> 7x7        // (3x3x64)x32
    torch.nn.ReLU(),
    torch.nn.AdaptiveAvgPool2d(1),
    torch.nn.Flatten(),
    torch.nn.Linear(32, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 64),
    torch.nn.ReLU(),
    torch.nn.Linear(64, 3)                       # Output layer     
).to(device)

net2.apply(init_weights)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net2.parameters(), lr=0.001)

summary(net2, input_size=(batch_size, 1, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [16, 3]                   --
├─Conv2d: 1-1                            [16, 8, 112, 112]         80
├─ReLU: 1-2                              [16, 8, 112, 112]         --
├─Conv2d: 1-3                            [16, 16, 56, 56]          1,168
├─ReLU: 1-4                              [16, 16, 56, 56]          --
├─Conv2d: 1-5                            [16, 32, 28, 28]          4,640
├─ReLU: 1-6                              [16, 32, 28, 28]          --
├─Conv2d: 1-7                            [16, 64, 14, 14]          18,496
├─ReLU: 1-8                              [16, 64, 14, 14]          --
├─Conv2d: 1-9                            [16, 32, 7, 7]            18,464
├─ReLU: 1-10                             [16, 32, 7, 7]            --
├─AdaptiveAvgPool2d: 1-11                [16, 32, 1, 1]            --
├─Flatten: 1-12                          [16, 32]                  --
├

In [16]:
init_wandb()
train_model(net2, optimizer, loss_fn, enable_early_stopping=False)
evaluate_model(net2)

Training model:   0%|          | 0/70 [00:00<?, ?it/s]

2025-12-13 08:59:04,957 - INFO - Epoch 1/70, Train Loss: 0.9957


Training model:   1%|▏         | 1/70 [00:00<00:17,  4.04it/s]

2025-12-13 08:59:05,129 - INFO - Epoch 2/70, Train Loss: 1.0129


Training model:   3%|▎         | 2/70 [00:00<00:13,  4.92it/s]

2025-12-13 08:59:05,281 - INFO - Epoch 3/70, Train Loss: 1.0745


Training model:   4%|▍         | 3/70 [00:00<00:12,  5.57it/s]

2025-12-13 08:59:05,391 - INFO - Epoch 4/70, Train Loss: 1.0224


Training model:   6%|▌         | 4/70 [00:00<00:10,  6.57it/s]

2025-12-13 08:59:05,504 - INFO - Epoch 5/70, Train Loss: 0.9866


Training model:   7%|▋         | 5/70 [00:00<00:08,  7.25it/s]

2025-12-13 08:59:05,750 - INFO - Epoch 6/70, Train Loss: 0.9619


Training model:   9%|▊         | 6/70 [00:01<00:11,  5.72it/s]

2025-12-13 08:59:05,860 - INFO - Epoch 7/70, Train Loss: 0.9556


Training model:  10%|█         | 7/70 [00:01<00:09,  6.51it/s]

2025-12-13 08:59:05,966 - INFO - Epoch 8/70, Train Loss: 0.9642


Training model:  11%|█▏        | 8/70 [00:01<00:08,  7.21it/s]

2025-12-13 08:59:06,068 - INFO - Epoch 9/70, Train Loss: 0.9375


Training model:  13%|█▎        | 9/70 [00:01<00:07,  7.87it/s]

2025-12-13 08:59:06,176 - INFO - Epoch 10/70, Train Loss: 0.9424


Training model:  14%|█▍        | 10/70 [00:01<00:07,  8.25it/s]

2025-12-13 08:59:06,311 - INFO - Epoch 11/70, Train Loss: 0.9132


Training model:  16%|█▌        | 11/70 [00:01<00:07,  7.98it/s]

2025-12-13 08:59:06,439 - INFO - Epoch 12/70, Train Loss: 0.9232


Training model:  17%|█▋        | 12/70 [00:01<00:07,  7.93it/s]

2025-12-13 08:59:06,546 - INFO - Epoch 13/70, Train Loss: 0.9043


Training model:  19%|█▊        | 13/70 [00:01<00:06,  8.31it/s]

2025-12-13 08:59:06,648 - INFO - Epoch 14/70, Train Loss: 0.8974


Training model:  20%|██        | 14/70 [00:01<00:06,  8.70it/s]

2025-12-13 08:59:06,775 - INFO - Epoch 15/70, Train Loss: 0.8278


Training model:  21%|██▏       | 15/70 [00:02<00:06,  8.44it/s]

2025-12-13 08:59:06,887 - INFO - Epoch 16/70, Train Loss: 0.8497


Training model:  23%|██▎       | 16/70 [00:02<00:06,  8.59it/s]

2025-12-13 08:59:06,989 - INFO - Epoch 17/70, Train Loss: 0.8351


Training model:  24%|██▍       | 17/70 [00:02<00:05,  8.92it/s]

2025-12-13 08:59:07,116 - INFO - Epoch 18/70, Train Loss: 0.7468


Training model:  26%|██▌       | 18/70 [00:02<00:06,  8.58it/s]

2025-12-13 08:59:07,262 - INFO - Epoch 19/70, Train Loss: 0.6241


Training model:  27%|██▋       | 19/70 [00:02<00:06,  7.97it/s]

2025-12-13 08:59:07,382 - INFO - Epoch 20/70, Train Loss: 0.5798


Training model:  29%|██▊       | 20/70 [00:02<00:06,  8.07it/s]

2025-12-13 08:59:07,481 - INFO - Epoch 21/70, Train Loss: 0.6294
2025-12-13 08:59:07,583 - INFO - Epoch 22/70, Train Loss: 0.6197


Training model:  31%|███▏      | 22/70 [00:02<00:05,  8.84it/s]

2025-12-13 08:59:07,691 - INFO - Epoch 23/70, Train Loss: 0.5585


Training model:  33%|███▎      | 23/70 [00:02<00:05,  8.94it/s]

2025-12-13 08:59:07,795 - INFO - Epoch 24/70, Train Loss: 0.4672


Training model:  34%|███▍      | 24/70 [00:03<00:05,  9.12it/s]

2025-12-13 08:59:07,908 - INFO - Epoch 25/70, Train Loss: 0.3649


Training model:  36%|███▌      | 25/70 [00:03<00:04,  9.04it/s]

2025-12-13 08:59:08,040 - INFO - Epoch 26/70, Train Loss: 0.3274


Training model:  37%|███▋      | 26/70 [00:03<00:05,  8.56it/s]

2025-12-13 08:59:08,150 - INFO - Epoch 27/70, Train Loss: 0.2922


Training model:  39%|███▊      | 27/70 [00:03<00:04,  8.72it/s]

2025-12-13 08:59:08,263 - INFO - Epoch 28/70, Train Loss: 0.2215


Training model:  40%|████      | 28/70 [00:03<00:04,  8.73it/s]

2025-12-13 08:59:08,378 - INFO - Epoch 29/70, Train Loss: 0.2425


Training model:  41%|████▏     | 29/70 [00:03<00:04,  8.74it/s]

2025-12-13 08:59:08,491 - INFO - Epoch 30/70, Train Loss: 0.3481


Training model:  43%|████▎     | 30/70 [00:03<00:04,  8.78it/s]

2025-12-13 08:59:08,601 - INFO - Epoch 31/70, Train Loss: 0.3120


Training model:  44%|████▍     | 31/70 [00:03<00:04,  8.86it/s]

2025-12-13 08:59:08,712 - INFO - Epoch 32/70, Train Loss: 0.2684


Training model:  46%|████▌     | 32/70 [00:04<00:04,  8.92it/s]

2025-12-13 08:59:08,838 - INFO - Epoch 33/70, Train Loss: 0.1758


Training model:  47%|████▋     | 33/70 [00:04<00:04,  8.60it/s]

2025-12-13 08:59:08,959 - INFO - Epoch 34/70, Train Loss: 0.1362


Training model:  49%|████▊     | 34/70 [00:04<00:04,  8.49it/s]

2025-12-13 08:59:09,078 - INFO - Epoch 35/70, Train Loss: 0.1156


Training model:  50%|█████     | 35/70 [00:04<00:04,  8.46it/s]

2025-12-13 08:59:09,200 - INFO - Epoch 36/70, Train Loss: 0.0907


Training model:  51%|█████▏    | 36/70 [00:04<00:04,  8.39it/s]

2025-12-13 08:59:09,317 - INFO - Epoch 37/70, Train Loss: 0.1079


Training model:  53%|█████▎    | 37/70 [00:04<00:03,  8.42it/s]

2025-12-13 08:59:09,432 - INFO - Epoch 38/70, Train Loss: 0.1017


Training model:  54%|█████▍    | 38/70 [00:04<00:03,  8.50it/s]

2025-12-13 08:59:09,545 - INFO - Epoch 39/70, Train Loss: 0.0976


Training model:  56%|█████▌    | 39/70 [00:04<00:03,  8.62it/s]

2025-12-13 08:59:09,670 - INFO - Epoch 40/70, Train Loss: 0.1114


Training model:  57%|█████▋    | 40/70 [00:04<00:03,  8.42it/s]

2025-12-13 08:59:09,802 - INFO - Epoch 41/70, Train Loss: 0.0924


Training model:  59%|█████▊    | 41/70 [00:05<00:03,  8.14it/s]

2025-12-13 08:59:09,907 - INFO - Epoch 42/70, Train Loss: 0.0689


Training model:  60%|██████    | 42/70 [00:05<00:03,  8.52it/s]

2025-12-13 08:59:10,029 - INFO - Epoch 43/70, Train Loss: 0.0689


Training model:  61%|██████▏   | 43/70 [00:05<00:03,  8.36it/s]

2025-12-13 08:59:10,214 - INFO - Epoch 44/70, Train Loss: 0.0731


Training model:  63%|██████▎   | 44/70 [00:05<00:03,  7.24it/s]

2025-12-13 08:59:10,508 - INFO - Epoch 45/70, Train Loss: 0.0680


Training model:  64%|██████▍   | 45/70 [00:05<00:04,  5.40it/s]

2025-12-13 08:59:10,717 - INFO - Epoch 46/70, Train Loss: 0.0611


Training model:  66%|██████▌   | 46/70 [00:06<00:04,  5.19it/s]

2025-12-13 08:59:10,844 - INFO - Epoch 47/70, Train Loss: 0.0731


Training model:  67%|██████▋   | 47/70 [00:06<00:03,  5.79it/s]

2025-12-13 08:59:10,962 - INFO - Epoch 48/70, Train Loss: 0.0872


Training model:  69%|██████▊   | 48/70 [00:06<00:03,  6.40it/s]

2025-12-13 08:59:11,092 - INFO - Epoch 49/70, Train Loss: 0.1097


Training model:  70%|███████   | 49/70 [00:06<00:03,  6.74it/s]

2025-12-13 08:59:11,206 - INFO - Epoch 50/70, Train Loss: 0.0838


Training model:  71%|███████▏  | 50/70 [00:06<00:02,  7.12it/s]

2025-12-13 08:59:11,349 - INFO - Epoch 51/70, Train Loss: 0.0844


Training model:  73%|███████▎  | 51/70 [00:06<00:02,  7.21it/s]

2025-12-13 08:59:11,527 - INFO - Epoch 52/70, Train Loss: 0.0802


Training model:  74%|███████▍  | 52/70 [00:06<00:02,  6.65it/s]

2025-12-13 08:59:11,691 - INFO - Epoch 53/70, Train Loss: 0.0757


Training model:  76%|███████▌  | 53/70 [00:06<00:02,  6.46it/s]

2025-12-13 08:59:11,867 - INFO - Epoch 54/70, Train Loss: 0.0641


Training model:  77%|███████▋  | 54/70 [00:07<00:02,  6.20it/s]

2025-12-13 08:59:12,020 - INFO - Epoch 55/70, Train Loss: 0.0519


Training model:  79%|███████▊  | 55/70 [00:07<00:02,  6.30it/s]

2025-12-13 08:59:12,232 - INFO - Epoch 56/70, Train Loss: 0.0793


Training model:  80%|████████  | 56/70 [00:07<00:02,  5.73it/s]

2025-12-13 08:59:12,367 - INFO - Epoch 57/70, Train Loss: 0.0848


Training model:  81%|████████▏ | 57/70 [00:07<00:02,  6.15it/s]

2025-12-13 08:59:12,511 - INFO - Epoch 58/70, Train Loss: 0.0619


Training model:  83%|████████▎ | 58/70 [00:07<00:01,  6.37it/s]

2025-12-13 08:59:12,629 - INFO - Epoch 59/70, Train Loss: 0.0657


Training model:  84%|████████▍ | 59/70 [00:07<00:01,  6.87it/s]

2025-12-13 08:59:12,735 - INFO - Epoch 60/70, Train Loss: 0.0644


Training model:  86%|████████▌ | 60/70 [00:08<00:01,  7.48it/s]

2025-12-13 08:59:12,873 - INFO - Epoch 61/70, Train Loss: 0.0662


Training model:  87%|████████▋ | 61/70 [00:08<00:01,  7.41it/s]

2025-12-13 08:59:12,975 - INFO - Epoch 62/70, Train Loss: 0.0551


Training model:  89%|████████▊ | 62/70 [00:08<00:01,  8.00it/s]

2025-12-13 08:59:13,076 - INFO - Epoch 63/70, Train Loss: 0.0378


Training model:  90%|█████████ | 63/70 [00:08<00:00,  8.50it/s]

2025-12-13 08:59:13,183 - INFO - Epoch 64/70, Train Loss: 0.0389


Training model:  91%|█████████▏| 64/70 [00:08<00:00,  8.72it/s]

2025-12-13 08:59:13,343 - INFO - Epoch 65/70, Train Loss: 0.0398


Training model:  93%|█████████▎| 65/70 [00:08<00:00,  7.81it/s]

2025-12-13 08:59:13,473 - INFO - Epoch 66/70, Train Loss: 0.0497


Training model:  94%|█████████▍| 66/70 [00:08<00:00,  7.76it/s]

2025-12-13 08:59:13,596 - INFO - Epoch 67/70, Train Loss: 0.0465


Training model:  96%|█████████▌| 67/70 [00:08<00:00,  7.86it/s]

2025-12-13 08:59:13,713 - INFO - Epoch 68/70, Train Loss: 0.0658


Training model:  97%|█████████▋| 68/70 [00:09<00:00,  8.07it/s]

2025-12-13 08:59:13,836 - INFO - Epoch 69/70, Train Loss: 0.0458


Training model:  99%|█████████▊| 69/70 [00:09<00:00,  8.08it/s]

2025-12-13 08:59:13,958 - INFO - Epoch 70/70, Train Loss: 0.0513


Training model: 100%|██████████| 70/70 [00:09<00:00,  7.57it/s]

2025-12-13 08:59:13,960 - INFO - [0.9956907853484154, 1.0128592774271965, 1.0744765773415565, 1.0224140472710133, 0.9865722917020321, 0.9618925675749779, 0.9555903635919094, 0.9642202518880367, 0.9374573156237602, 0.9424333684146404, 0.9132138900458813, 0.9232466984540224, 0.9042926207184792, 0.897444412112236, 0.8278295639902353, 0.8496574871242046, 0.8351486176252365, 0.7468331698328257, 0.6240808647125959, 0.5798384752124548, 0.6294200960546732, 0.6197226643562317, 0.5584932754281908, 0.4672447012271732, 0.3648925491143018, 0.32740815496072173, 0.29216101253405213, 0.2215290897875093, 0.24248065007850528, 0.3481039176695049, 0.31198797561228275, 0.26836151140742004, 0.1757689490239045, 0.13624115788843483, 0.11561787932259904, 0.09073313918088388, 0.10790391359478235, 0.1017022569428434, 0.09758510446408764, 0.11143907031510025, 0.09241706322063692, 0.0689141603392045, 0.06891027855454013, 0.07312596451083664, 0.06802698915998917, 0.06105668313102797, 0.0730885010707425, 0.087200891


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


2025-12-13 08:59:14,010 - INFO - network F1 score: 34.71%
2025-12-13 08:59:14,023 - INFO - Detailed Classification Report: 
              precision    recall  f1-score   support

           0       0.22      0.86      0.35         7
           1       0.90      0.21      0.35        42
           2       0.00      0.00      0.00         0

    accuracy                           0.31        49
   macro avg       0.37      0.36      0.23        49
weighted avg       0.80      0.31      0.35        49



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇███
test_accuracy,▁
test_f1,▁
test_precision,▁
test_recall,▁
train_loss,███▇▇▇▇▇▇▆▆▅▅▅▅▃▃▃▂▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,70.0
test_accuracy,0.30612
test_f1,0.34712
test_precision,0.80317
test_recall,0.30612
train_loss,0.05129


Net2 seems the best in convergence but it definitely overfits, so the next step is to solve this with net3.

In [17]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0.0001, verbose=True):
        self.patience = patience
        self.min_delta = min_delta
        self.verbose = verbose
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.best_model = None
        
    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.best_model = model.state_dict().copy()
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.verbose:
                logger.info(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.best_model = model.state_dict().copy()
            self.counter = 0


In [18]:
batch_size = 32
net3 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1),      # (3x3x1)x8
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 224x224 -> 112x112

    torch.nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),       # (3x3x8)x16
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 112x112 -> 56x56

    torch.nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),       # (3x3x16)x32
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 56x56 -> 28x28 

    torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),       # (3x3x32)x64
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 28x28 -> 14x14 

    torch.nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),       # (3x3x64)x32
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 14x14 -> 7x7

    torch.nn.AdaptiveAvgPool2d(1),
    torch.nn.Flatten(),
    torch.nn.Linear(32, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 64),
    torch.nn.ReLU(),
    torch.nn.Linear(64, 3)                       # Output layer     
).to(device)

net3.apply(init_weights)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net3.parameters(), lr=0.001)

summary(net3, input_size=(batch_size, 1, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [32, 3]                   --
├─Conv2d: 1-1                            [32, 8, 224, 224]         80
├─ReLU: 1-2                              [32, 8, 224, 224]         --
├─MaxPool2d: 1-3                         [32, 8, 112, 112]         --
├─Conv2d: 1-4                            [32, 16, 112, 112]        1,168
├─ReLU: 1-5                              [32, 16, 112, 112]        --
├─MaxPool2d: 1-6                         [32, 16, 56, 56]          --
├─Conv2d: 1-7                            [32, 32, 56, 56]          4,640
├─ReLU: 1-8                              [32, 32, 56, 56]          --
├─MaxPool2d: 1-9                         [32, 32, 28, 28]          --
├─Conv2d: 1-10                           [32, 64, 28, 28]          18,496
├─ReLU: 1-11                             [32, 64, 28, 28]          --
├─MaxPool2d: 1-12                        [32, 64, 14, 14]          --
├─Con

In [19]:
init_wandb()
train_model(net3, optimizer, loss_fn, enable_early_stopping=False)
evaluate_model(net3)

Training model:   0%|          | 0/70 [00:00<?, ?it/s]

2025-12-13 08:59:17,196 - INFO - Epoch 1/70, Train Loss: 1.1933


Training model:   1%|▏         | 1/70 [00:00<00:29,  2.33it/s]

2025-12-13 08:59:17,335 - INFO - Epoch 2/70, Train Loss: 1.0336


Training model:   3%|▎         | 2/70 [00:00<00:17,  3.87it/s]

2025-12-13 08:59:17,466 - INFO - Epoch 3/70, Train Loss: 0.9807


Training model:   4%|▍         | 3/70 [00:00<00:13,  4.99it/s]

2025-12-13 08:59:17,626 - INFO - Epoch 4/70, Train Loss: 1.0428


Training model:   6%|▌         | 4/70 [00:00<00:12,  5.43it/s]

2025-12-13 08:59:17,782 - INFO - Epoch 5/70, Train Loss: 0.9761


Training model:   7%|▋         | 5/70 [00:01<00:11,  5.74it/s]

2025-12-13 08:59:17,929 - INFO - Epoch 6/70, Train Loss: 0.9912


Training model:   9%|▊         | 6/70 [00:01<00:10,  6.06it/s]

2025-12-13 08:59:18,062 - INFO - Epoch 7/70, Train Loss: 0.9713


Training model:  10%|█         | 7/70 [00:01<00:09,  6.48it/s]

2025-12-13 08:59:18,191 - INFO - Epoch 8/70, Train Loss: 0.9816


Training model:  11%|█▏        | 8/70 [00:01<00:09,  6.82it/s]

2025-12-13 08:59:18,364 - INFO - Epoch 9/70, Train Loss: 1.0057


Training model:  13%|█▎        | 9/70 [00:01<00:09,  6.47it/s]

2025-12-13 08:59:18,492 - INFO - Epoch 10/70, Train Loss: 0.9502


Training model:  14%|█▍        | 10/70 [00:01<00:08,  6.83it/s]

2025-12-13 08:59:18,632 - INFO - Epoch 11/70, Train Loss: 0.9660


Training model:  16%|█▌        | 11/70 [00:01<00:08,  6.93it/s]

2025-12-13 08:59:18,779 - INFO - Epoch 12/70, Train Loss: 1.0056


Training model:  17%|█▋        | 12/70 [00:02<00:08,  6.88it/s]

2025-12-13 08:59:18,947 - INFO - Epoch 13/70, Train Loss: 0.9349


Training model:  19%|█▊        | 13/70 [00:02<00:08,  6.58it/s]

2025-12-13 08:59:19,107 - INFO - Epoch 14/70, Train Loss: 0.9265


Training model:  20%|██        | 14/70 [00:02<00:08,  6.47it/s]

2025-12-13 08:59:19,295 - INFO - Epoch 15/70, Train Loss: 0.9070


Training model:  21%|██▏       | 15/70 [00:02<00:09,  6.08it/s]

2025-12-13 08:59:19,457 - INFO - Epoch 16/70, Train Loss: 0.9526


Training model:  23%|██▎       | 16/70 [00:02<00:08,  6.05it/s]

2025-12-13 08:59:19,616 - INFO - Epoch 17/70, Train Loss: 0.8708


Training model:  24%|██▍       | 17/70 [00:02<00:08,  6.18it/s]

2025-12-13 08:59:19,866 - INFO - Epoch 18/70, Train Loss: 0.8231


Training model:  26%|██▌       | 18/70 [00:03<00:09,  5.31it/s]

2025-12-13 08:59:20,015 - INFO - Epoch 19/70, Train Loss: 0.8253


Training model:  27%|██▋       | 19/70 [00:03<00:09,  5.66it/s]

2025-12-13 08:59:20,154 - INFO - Epoch 20/70, Train Loss: 0.8179


Training model:  29%|██▊       | 20/70 [00:03<00:08,  6.05it/s]

2025-12-13 08:59:20,291 - INFO - Epoch 21/70, Train Loss: 0.7937


Training model:  30%|███       | 21/70 [00:03<00:07,  6.38it/s]

2025-12-13 08:59:20,428 - INFO - Epoch 22/70, Train Loss: 0.8644


Training model:  31%|███▏      | 22/70 [00:03<00:07,  6.63it/s]

2025-12-13 08:59:20,563 - INFO - Epoch 23/70, Train Loss: 0.8070


Training model:  33%|███▎      | 23/70 [00:03<00:06,  6.86it/s]

2025-12-13 08:59:20,696 - INFO - Epoch 24/70, Train Loss: 0.8449


Training model:  34%|███▍      | 24/70 [00:03<00:06,  7.03it/s]

2025-12-13 08:59:20,840 - INFO - Epoch 25/70, Train Loss: 0.7694


Training model:  36%|███▌      | 25/70 [00:04<00:06,  7.02it/s]

2025-12-13 08:59:20,998 - INFO - Epoch 26/70, Train Loss: 0.7061


Training model:  37%|███▋      | 26/70 [00:04<00:06,  6.79it/s]

2025-12-13 08:59:21,160 - INFO - Epoch 27/70, Train Loss: 0.7387


Training model:  39%|███▊      | 27/70 [00:04<00:06,  6.58it/s]

2025-12-13 08:59:21,295 - INFO - Epoch 28/70, Train Loss: 0.8054


Training model:  40%|████      | 28/70 [00:04<00:06,  6.82it/s]

2025-12-13 08:59:21,437 - INFO - Epoch 29/70, Train Loss: 0.7324


Training model:  41%|████▏     | 29/70 [00:04<00:05,  6.89it/s]

2025-12-13 08:59:21,574 - INFO - Epoch 30/70, Train Loss: 0.9051


Training model:  43%|████▎     | 30/70 [00:04<00:05,  7.00it/s]

2025-12-13 08:59:21,710 - INFO - Epoch 31/70, Train Loss: 0.8510


Training model:  44%|████▍     | 31/70 [00:04<00:05,  7.10it/s]

2025-12-13 08:59:21,839 - INFO - Epoch 32/70, Train Loss: 0.7616


Training model:  46%|████▌     | 32/70 [00:05<00:05,  7.28it/s]

2025-12-13 08:59:21,969 - INFO - Epoch 33/70, Train Loss: 0.7051


Training model:  47%|████▋     | 33/70 [00:05<00:04,  7.41it/s]

2025-12-13 08:59:22,097 - INFO - Epoch 34/70, Train Loss: 0.6489


Training model:  49%|████▊     | 34/70 [00:05<00:04,  7.51it/s]

2025-12-13 08:59:22,228 - INFO - Epoch 35/70, Train Loss: 0.5855


Training model:  50%|█████     | 35/70 [00:05<00:04,  7.56it/s]

2025-12-13 08:59:22,360 - INFO - Epoch 36/70, Train Loss: 0.5620


Training model:  51%|█████▏    | 36/70 [00:05<00:04,  7.55it/s]

2025-12-13 08:59:22,510 - INFO - Epoch 37/70, Train Loss: 0.4858


Training model:  53%|█████▎    | 37/70 [00:05<00:04,  7.28it/s]

2025-12-13 08:59:22,662 - INFO - Epoch 38/70, Train Loss: 0.6041


Training model:  54%|█████▍    | 38/70 [00:05<00:04,  7.04it/s]

2025-12-13 08:59:22,825 - INFO - Epoch 39/70, Train Loss: 0.5159


Training model:  56%|█████▌    | 39/70 [00:06<00:04,  6.74it/s]

2025-12-13 08:59:22,997 - INFO - Epoch 40/70, Train Loss: 0.5747


Training model:  57%|█████▋    | 40/70 [00:06<00:04,  6.44it/s]

2025-12-13 08:59:23,166 - INFO - Epoch 41/70, Train Loss: 0.7559


Training model:  59%|█████▊    | 41/70 [00:06<00:04,  6.27it/s]

2025-12-13 08:59:23,326 - INFO - Epoch 42/70, Train Loss: 0.5732


Training model:  60%|██████    | 42/70 [00:06<00:04,  6.26it/s]

2025-12-13 08:59:23,469 - INFO - Epoch 43/70, Train Loss: 0.4554


Training model:  61%|██████▏   | 43/70 [00:06<00:04,  6.47it/s]

2025-12-13 08:59:23,611 - INFO - Epoch 44/70, Train Loss: 0.4313


Training model:  63%|██████▎   | 44/70 [00:06<00:03,  6.63it/s]

2025-12-13 08:59:23,750 - INFO - Epoch 45/70, Train Loss: 0.4593


Training model:  64%|██████▍   | 45/70 [00:06<00:03,  6.79it/s]

2025-12-13 08:59:23,883 - INFO - Epoch 46/70, Train Loss: 0.3624


Training model:  66%|██████▌   | 46/70 [00:07<00:03,  6.99it/s]

2025-12-13 08:59:24,045 - INFO - Epoch 47/70, Train Loss: 0.4621


Training model:  67%|██████▋   | 47/70 [00:07<00:03,  6.73it/s]

2025-12-13 08:59:24,286 - INFO - Epoch 48/70, Train Loss: 0.5371


Training model:  69%|██████▊   | 48/70 [00:07<00:03,  5.66it/s]

2025-12-13 08:59:24,520 - INFO - Epoch 49/70, Train Loss: 0.3731


Training model:  70%|███████   | 49/70 [00:07<00:04,  5.17it/s]

2025-12-13 08:59:24,704 - INFO - Epoch 50/70, Train Loss: 0.3727


Training model:  71%|███████▏  | 50/70 [00:07<00:03,  5.24it/s]

2025-12-13 08:59:24,862 - INFO - Epoch 51/70, Train Loss: 0.2793


Training model:  73%|███████▎  | 51/70 [00:08<00:03,  5.52it/s]

2025-12-13 08:59:25,016 - INFO - Epoch 52/70, Train Loss: 0.2507


Training model:  74%|███████▍  | 52/70 [00:08<00:03,  5.79it/s]

2025-12-13 08:59:25,144 - INFO - Epoch 53/70, Train Loss: 0.2060


Training model:  76%|███████▌  | 53/70 [00:08<00:02,  6.26it/s]

2025-12-13 08:59:25,274 - INFO - Epoch 54/70, Train Loss: 0.2421


Training model:  77%|███████▋  | 54/70 [00:08<00:02,  6.65it/s]

2025-12-13 08:59:25,438 - INFO - Epoch 55/70, Train Loss: 0.2000


Training model:  79%|███████▊  | 55/70 [00:08<00:02,  6.47it/s]

2025-12-13 08:59:25,583 - INFO - Epoch 56/70, Train Loss: 0.1799


Training model:  80%|████████  | 56/70 [00:08<00:02,  6.59it/s]

2025-12-13 08:59:25,761 - INFO - Epoch 57/70, Train Loss: 0.1672


Training model:  81%|████████▏ | 57/70 [00:08<00:02,  6.27it/s]

2025-12-13 08:59:25,909 - INFO - Epoch 58/70, Train Loss: 0.1296


Training model:  83%|████████▎ | 58/70 [00:09<00:01,  6.40it/s]

2025-12-13 08:59:26,234 - INFO - Epoch 59/70, Train Loss: 0.1348


Training model:  84%|████████▍ | 59/70 [00:09<00:02,  4.82it/s]

2025-12-13 08:59:26,408 - INFO - Epoch 60/70, Train Loss: 0.1345


Training model:  86%|████████▌ | 60/70 [00:09<00:01,  5.08it/s]

2025-12-13 08:59:26,550 - INFO - Epoch 61/70, Train Loss: 0.1280


Training model:  87%|████████▋ | 61/70 [00:09<00:01,  5.54it/s]

2025-12-13 08:59:26,691 - INFO - Epoch 62/70, Train Loss: 0.0944


Training model:  89%|████████▊ | 62/70 [00:09<00:01,  5.94it/s]

2025-12-13 08:59:26,831 - INFO - Epoch 63/70, Train Loss: 0.1339


Training model:  90%|█████████ | 63/70 [00:10<00:01,  6.25it/s]

2025-12-13 08:59:26,982 - INFO - Epoch 64/70, Train Loss: 0.1308


Training model:  91%|█████████▏| 64/70 [00:10<00:00,  6.36it/s]

2025-12-13 08:59:27,132 - INFO - Epoch 65/70, Train Loss: 0.1072


Training model:  93%|█████████▎| 65/70 [00:10<00:00,  6.45it/s]

2025-12-13 08:59:27,291 - INFO - Epoch 66/70, Train Loss: 0.2542


Training model:  94%|█████████▍| 66/70 [00:10<00:00,  6.40it/s]

2025-12-13 08:59:27,455 - INFO - Epoch 67/70, Train Loss: 0.1175


Training model:  96%|█████████▌| 67/70 [00:10<00:00,  6.30it/s]

2025-12-13 08:59:27,603 - INFO - Epoch 68/70, Train Loss: 0.0937


Training model:  97%|█████████▋| 68/70 [00:10<00:00,  6.44it/s]

2025-12-13 08:59:27,754 - INFO - Epoch 69/70, Train Loss: 0.1077


Training model:  99%|█████████▊| 69/70 [00:10<00:00,  6.48it/s]

2025-12-13 08:59:27,905 - INFO - Epoch 70/70, Train Loss: 0.1125


Training model: 100%|██████████| 70/70 [00:11<00:00,  6.28it/s]

2025-12-13 08:59:27,908 - INFO - [1.1932735219597816, 1.0335881970822811, 0.9807348139584064, 1.042787965387106, 0.976081408560276, 0.9911840967833996, 0.971342895179987, 0.9815533012151718, 1.005672175437212, 0.9501973241567612, 0.9659802876412868, 1.0056226253509521, 0.9348855875432491, 0.9265397377312183, 0.9070290513336658, 0.9525636471807957, 0.870769340544939, 0.8230959130451083, 0.8252803646028042, 0.8178792037069798, 0.7936655506491661, 0.8643611613661051, 0.8070287359878421, 0.8448962569236755, 0.7694230210036039, 0.706070626154542, 0.7387008685618639, 0.8054158389568329, 0.7323718369007111, 0.9050506241619587, 0.8509819768369198, 0.7616352587938309, 0.7051434237509966, 0.6489369096234441, 0.5854946076869965, 0.5620360033353791, 0.48575551621615887, 0.6040581102715805, 0.5158599717542529, 0.5746682249009609, 0.7559193912893534, 0.5731692090630531, 0.4553545406088233, 0.43126018065959215, 0.459331464022398, 0.36239993944764093, 0.4620612794533372, 0.5370859894901514, 0.37309488


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


2025-12-13 08:59:27,944 - INFO - network precision: 89.71%
2025-12-13 08:59:27,945 - INFO - network recall: 51.02%
2025-12-13 08:59:27,946 - INFO - network F1 score: 57.68%
2025-12-13 08:59:27,962 - INFO - Detailed Classification Report: 
              precision    recall  f1-score   support

           0       0.28      1.00      0.44         7
           1       1.00      0.43      0.60        42
           2       0.00      0.00      0.00         0

    accuracy                           0.51        49
   macro avg       0.43      0.48      0.35        49
weighted avg       0.90      0.51      0.58        49



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
test_accuracy,▁
test_f1,▁
test_precision,▁
test_recall,▁
train_loss,█▇▇▇▇▇▆▇▆▆▆▆▆▆▅▆▅▅▅▅▄▄▄▅▄▃▃▃▄▃▂▁▁▁▁▁▁▁▁▁

0,1
epoch,70.0
test_accuracy,0.5102
test_f1,0.57679
test_precision,0.89714
test_recall,0.5102
train_loss,0.11251


Net3 intorduced maxpool layers after each conv layer and acheved better generalisation, more over batch size could be inreased to 32, and still perfomed godd on it unlike previous networks.

In [20]:
# Use train test split, for getting validation metrics during training
x_train_tensor, x_val_tensor, y_train_tensor, y_val_tensor = train_test_split(
    x_train_tensor, y_train_tensor, test_size=0.2, random_state=42, stratify=y_train_tensor)

val_dataset = TensorDataset(x_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [29]:
def init_weights(m):
    if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
        torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
        if m.bias is not None:
            torch.nn.init.constant_(m.bias, 0)
    elif isinstance(m, torch.nn.BatchNorm2d):
        torch.nn.init.constant_(m.weight, 1)
        torch.nn.init.constant_(m.bias, 0)

In [None]:
net4 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1),      # (3x3x1)x8
    torch.nn.BatchNorm2d(8),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 224x224 -> 112x112

    torch.nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),       # (3x3x8)x16
    torch.nn.BatchNorm2d(16),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 112x112 -> 56x56

    torch.nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),       # (3x3x16)x32
    torch.nn.BatchNorm2d(32),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 56x56 -> 28x28 

    torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),       # (3x3x32)x64
    torch.nn.BatchNorm2d(64),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 28x28 -> 14x14 

    torch.nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),       # (3x3x64)x32
    torch.nn.BatchNorm2d(32),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 14x14 -> 7x7

    torch.nn.AdaptiveAvgPool2d(1),
    torch.nn.Flatten(),
    torch.nn.Linear(32, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 64),
    torch.nn.ReLU(),
    torch.nn.Linear(64, 3)                       # Output layer     
).to(device)

net4.apply(init_weights)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net4.parameters(), lr=0.0005)

summary(net4, input_size=(batch_size, 1, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [32, 3]                   --
├─Conv2d: 1-1                            [32, 8, 224, 224]         80
├─BatchNorm2d: 1-2                       [32, 8, 224, 224]         16
├─ReLU: 1-3                              [32, 8, 224, 224]         --
├─MaxPool2d: 1-4                         [32, 8, 112, 112]         --
├─Conv2d: 1-5                            [32, 16, 112, 112]        1,168
├─BatchNorm2d: 1-6                       [32, 16, 112, 112]        32
├─ReLU: 1-7                              [32, 16, 112, 112]        --
├─MaxPool2d: 1-8                         [32, 16, 56, 56]          --
├─Conv2d: 1-9                            [32, 32, 56, 56]          4,640
├─BatchNorm2d: 1-10                      [32, 32, 56, 56]          64
├─ReLU: 1-11                             [32, 32, 56, 56]          --
├─MaxPool2d: 1-12                        [32, 32, 28, 28]          --
├─Conv2d:

In [37]:
init_wandb()
train_model(net4, optimizer, loss_fn, enable_early_stopping=True, patience=5)
evaluate_model(net4)

Training model:   0%|          | 0/70 [00:00<?, ?it/s]

2025-12-13 09:15:41,900 - INFO - Epoch 1/70, Train Loss: 1.0242, Val Loss: 1.0488, Val Acc: 0.4082


Training model:   1%|▏         | 1/70 [00:00<00:29,  2.36it/s]

2025-12-13 09:15:42,036 - INFO - Epoch 2/70, Train Loss: 0.9303, Val Loss: 1.0724, Val Acc: 0.4694
2025-12-13 09:15:42,036 - INFO - EarlyStopping counter: 1 out of 5


Training model:   3%|▎         | 2/70 [00:00<00:17,  3.93it/s]

2025-12-13 09:15:42,166 - INFO - Epoch 3/70, Train Loss: 0.8768, Val Loss: 1.0739, Val Acc: 0.4082
2025-12-13 09:15:42,167 - INFO - EarlyStopping counter: 2 out of 5


Training model:   4%|▍         | 3/70 [00:00<00:13,  5.06it/s]

2025-12-13 09:15:42,285 - INFO - Epoch 4/70, Train Loss: 0.8448, Val Loss: 1.0468, Val Acc: 0.4286


Training model:   6%|▌         | 4/70 [00:00<00:10,  6.01it/s]

2025-12-13 09:15:42,400 - INFO - Epoch 5/70, Train Loss: 0.8033, Val Loss: 1.0372, Val Acc: 0.5102


Training model:   7%|▋         | 5/70 [00:00<00:09,  6.76it/s]

2025-12-13 09:15:42,514 - INFO - Epoch 6/70, Train Loss: 0.7948, Val Loss: 1.0355, Val Acc: 0.5510


Training model:   9%|▊         | 6/70 [00:01<00:08,  7.33it/s]

2025-12-13 09:15:42,631 - INFO - Epoch 7/70, Train Loss: 0.7572, Val Loss: 1.0421, Val Acc: 0.6122
2025-12-13 09:15:42,631 - INFO - EarlyStopping counter: 1 out of 5


Training model:  10%|█         | 7/70 [00:01<00:08,  7.69it/s]

2025-12-13 09:15:42,754 - INFO - Epoch 8/70, Train Loss: 0.7276, Val Loss: 1.0444, Val Acc: 0.6122
2025-12-13 09:15:42,754 - INFO - EarlyStopping counter: 2 out of 5


Training model:  11%|█▏        | 8/70 [00:01<00:07,  7.85it/s]

2025-12-13 09:15:42,874 - INFO - Epoch 9/70, Train Loss: 0.7008, Val Loss: 1.0422, Val Acc: 0.5510
2025-12-13 09:15:42,875 - INFO - EarlyStopping counter: 3 out of 5


Training model:  13%|█▎        | 9/70 [00:01<00:07,  7.98it/s]

2025-12-13 09:15:43,092 - INFO - Epoch 10/70, Train Loss: 0.6738, Val Loss: 1.0671, Val Acc: 0.6122
2025-12-13 09:15:43,092 - INFO - EarlyStopping counter: 4 out of 5


Training model:  14%|█▍        | 10/70 [00:01<00:09,  6.50it/s]

2025-12-13 09:15:43,234 - INFO - Epoch 11/70, Train Loss: 0.6508, Val Loss: 1.0178, Val Acc: 0.6327


Training model:  16%|█▌        | 11/70 [00:01<00:08,  6.65it/s]

2025-12-13 09:15:43,368 - INFO - Epoch 12/70, Train Loss: 0.6047, Val Loss: 1.0315, Val Acc: 0.4898
2025-12-13 09:15:43,369 - INFO - EarlyStopping counter: 1 out of 5


Training model:  17%|█▋        | 12/70 [00:01<00:08,  6.87it/s]

2025-12-13 09:15:43,492 - INFO - Epoch 13/70, Train Loss: 0.5815, Val Loss: 1.0364, Val Acc: 0.6939
2025-12-13 09:15:43,493 - INFO - EarlyStopping counter: 2 out of 5


Training model:  19%|█▊        | 13/70 [00:02<00:07,  7.21it/s]

2025-12-13 09:15:43,620 - INFO - Epoch 14/70, Train Loss: 0.5419, Val Loss: 1.0601, Val Acc: 0.5918
2025-12-13 09:15:43,621 - INFO - EarlyStopping counter: 3 out of 5


Training model:  20%|██        | 14/70 [00:02<00:07,  7.38it/s]

2025-12-13 09:15:43,748 - INFO - Epoch 15/70, Train Loss: 0.5240, Val Loss: 1.0407, Val Acc: 0.6122
2025-12-13 09:15:43,749 - INFO - EarlyStopping counter: 4 out of 5


Training model:  21%|██▏       | 15/70 [00:02<00:07,  7.49it/s]

2025-12-13 09:15:43,943 - INFO - Epoch 16/70, Train Loss: 0.4987, Val Loss: 1.0972, Val Acc: 0.6531
2025-12-13 09:15:43,944 - INFO - EarlyStopping counter: 5 out of 5
2025-12-13 09:15:43,944 - INFO - Early stopping triggered


Training model:  21%|██▏       | 15/70 [00:02<00:09,  6.06it/s]

2025-12-13 09:15:43,957 - INFO - Loaded best model weights
2025-12-13 09:15:43,958 - INFO - [1.0241894920667012, 0.9302704135576884, 0.8767741024494171, 0.8448319633801779, 0.8033467233181, 0.7947917580604553, 0.7572037974993387, 0.7275828321774801, 0.7008424401283264, 0.6737854778766632, 0.6508257488409678, 0.6046920021375021, 0.5815143783887228, 0.541930745045344, 0.5239527573188146, 0.49871138234933216]
2025-12-13 09:15:44,010 - INFO - network accuracy: 46.94%
2025-12-13 09:15:44,011 - INFO - network precision: 80.39%
2025-12-13 09:15:44,011 - INFO - network recall: 46.94%
2025-12-13 09:15:44,012 - INFO - network F1 score: 54.70%
2025-12-13 09:15:44,029 - INFO - Detailed Classification Report: 
              precision    recall  f1-score   support

           0       0.23      0.71      0.34         7
           1       0.90      0.43      0.58        42
           2       0.00      0.00      0.00         0

    accuracy                           0.47        49
   macro avg       0.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
test_accuracy,▁
test_f1,▁
test_precision,▁
test_recall,▁
train_loss,█▇▆▆▅▅▄▄▄▃▃▂▂▂▁▁
val_accuracy,▁▂▁▁▃▄▆▆▄▆▆▃█▅▆▇
val_loss,▄▆▆▄▃▃▃▃▃▅▁▂▃▅▃█

0,1
epoch,16.0
test_accuracy,0.46939
test_f1,0.54696
test_precision,0.8039
test_recall,0.46939
train_loss,0.49871
val_accuracy,0.65306
val_loss,1.09716


Net4 introduced some batch normalization to help with the overfitting problem. The learning rate also got decreased.

In [None]:
net5 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1),      # (3x3x1)x8
    torch.nn.BatchNorm2d(8),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 224x224 -> 112x112

    torch.nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),       # (3x3x8)x16
    torch.nn.BatchNorm2d(16),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 112x112 -> 56x56

    torch.nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),       # (3x3x16)x32
    torch.nn.BatchNorm2d(32),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 56x56 -> 28x28 

    torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),       # (3x3x32)x64
    torch.nn.BatchNorm2d(64),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 28x28 -> 14x14 

    torch.nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),       # (3x3x64)x32
    torch.nn.BatchNorm2d(32),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2, stride=2),    # 14x14 -> 7x7

    torch.nn.AdaptiveAvgPool2d(1),
    torch.nn.Flatten(),
    torch.nn.Linear(32, 128),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.3),
    torch.nn.Linear(128, 64),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.3),
    torch.nn.Linear(64, 3)                       # Output layer     
).to(device)

net5.apply(init_weights)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net5.parameters(), lr=0.0005)

summary(net5, input_size=(batch_size, 1, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [32, 3]                   --
├─Conv2d: 1-1                            [32, 8, 224, 224]         80
├─BatchNorm2d: 1-2                       [32, 8, 224, 224]         16
├─ReLU: 1-3                              [32, 8, 224, 224]         --
├─MaxPool2d: 1-4                         [32, 8, 112, 112]         --
├─Conv2d: 1-5                            [32, 16, 112, 112]        1,168
├─BatchNorm2d: 1-6                       [32, 16, 112, 112]        32
├─ReLU: 1-7                              [32, 16, 112, 112]        --
├─MaxPool2d: 1-8                         [32, 16, 56, 56]          --
├─Conv2d: 1-9                            [32, 32, 56, 56]          4,640
├─BatchNorm2d: 1-10                      [32, 32, 56, 56]          64
├─ReLU: 1-11                             [32, 32, 56, 56]          --
├─MaxPool2d: 1-12                        [32, 32, 28, 28]          --
├─Conv2d:

In [45]:
init_wandb()
train_model(net5, optimizer, loss_fn, enable_early_stopping=True, patience=5)
evaluate_model(net5)

Training model:   0%|          | 0/70 [00:00<?, ?it/s]

2025-12-13 09:48:41,084 - INFO - Epoch 1/70, Train Loss: 1.4772, Val Loss: 1.0236, Val Acc: 0.4694


Training model:   1%|▏         | 1/70 [00:00<00:28,  2.41it/s]

2025-12-13 09:48:41,225 - INFO - Epoch 2/70, Train Loss: 1.3053, Val Loss: 1.0214, Val Acc: 0.4694


Training model:   3%|▎         | 2/70 [00:00<00:17,  3.93it/s]

2025-12-13 09:48:41,368 - INFO - Epoch 3/70, Train Loss: 1.2189, Val Loss: 1.0154, Val Acc: 0.4694


Training model:   4%|▍         | 3/70 [00:00<00:13,  4.93it/s]

2025-12-13 09:48:41,494 - INFO - Epoch 4/70, Train Loss: 1.1031, Val Loss: 1.0122, Val Acc: 0.4286


Training model:   6%|▌         | 4/70 [00:00<00:11,  5.80it/s]

2025-12-13 09:48:41,616 - INFO - Epoch 5/70, Train Loss: 1.1549, Val Loss: 1.0199, Val Acc: 0.4286
2025-12-13 09:48:41,617 - INFO - EarlyStopping counter: 1 out of 5


Training model:   7%|▋         | 5/70 [00:00<00:10,  6.49it/s]

2025-12-13 09:48:41,737 - INFO - Epoch 6/70, Train Loss: 1.1303, Val Loss: 1.0337, Val Acc: 0.5306
2025-12-13 09:48:41,738 - INFO - EarlyStopping counter: 2 out of 5


Training model:   9%|▊         | 6/70 [00:01<00:09,  6.99it/s]

2025-12-13 09:48:41,860 - INFO - Epoch 7/70, Train Loss: 1.1344, Val Loss: 1.0554, Val Acc: 0.5510
2025-12-13 09:48:41,861 - INFO - EarlyStopping counter: 3 out of 5


Training model:  10%|█         | 7/70 [00:01<00:08,  7.34it/s]

2025-12-13 09:48:41,982 - INFO - Epoch 8/70, Train Loss: 1.1339, Val Loss: 1.0740, Val Acc: 0.5510
2025-12-13 09:48:41,982 - INFO - EarlyStopping counter: 4 out of 5


Training model:  11%|█▏        | 8/70 [00:01<00:08,  7.60it/s]

2025-12-13 09:48:42,100 - INFO - Epoch 9/70, Train Loss: 1.1417, Val Loss: 1.0764, Val Acc: 0.5714
2025-12-13 09:48:42,100 - INFO - EarlyStopping counter: 5 out of 5
2025-12-13 09:48:42,100 - INFO - Early stopping triggered


Training model:  11%|█▏        | 8/70 [00:01<00:11,  5.59it/s]

2025-12-13 09:48:42,104 - INFO - Loaded best model weights
2025-12-13 09:48:42,105 - INFO - [1.4771883487701416, 1.3052693406740825, 1.2189250588417053, 1.1031062801678975, 1.1549132664998372, 1.1303402582804363, 1.1344122886657715, 1.1339189608891804, 1.1417464415232341]
2025-12-13 09:48:42,130 - INFO - network accuracy: 67.35%
2025-12-13 09:48:42,131 - INFO - network precision: 83.76%
2025-12-13 09:48:42,132 - INFO - network recall: 67.35%
2025-12-13 09:48:42,132 - INFO - network F1 score: 72.16%
2025-12-13 09:48:42,141 - INFO - Detailed Classification Report: 
              precision    recall  f1-score   support

           0       0.26      0.71      0.38         7
           1       0.93      0.67      0.78        42

    accuracy                           0.67        49
   macro avg       0.60      0.69      0.58        49
weighted avg       0.84      0.67      0.72        49




[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
epoch,▁▂▃▄▅▅▆▇█
test_accuracy,▁
test_f1,▁
test_precision,▁
test_recall,▁
train_loss,█▅▃▁▂▂▂▂▂
val_accuracy,▃▃▃▁▁▆▇▇█
val_loss,▂▂▁▁▂▃▆██

0,1
epoch,9.0
test_accuracy,0.67347
test_f1,0.72161
test_precision,0.83759
test_recall,0.67347
train_loss,1.14175
val_accuracy,0.57143
val_loss,1.07642


Net5 introdiced dropout to the fully connected layers but it did not imrpoved the model's performance compared to the previous net4.