In [2]:
import os
import random

import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torch.nn.functional as F
import torchaudio.transforms as T
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
from torchvision import transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset, DataLoader

from time import time
from tqdm import tqdm

from src.model.model import SpeakerCountCNN
import src.model.model as m


class SpectrogramDataset(Dataset):
    def __init__(self, csv_file, data_dir):

        self.data = pd.read_csv(csv_file)
        self.data_dir = data_dir
        self.labels = self.data['speaker_count'].astype(int).tolist()  # <- add this


    def __len__(self):
    
        return len(self.data)
    

    def __getitem__(self, idx):
    
        row = self.data.iloc[idx]
        tensor_path = os.path.join(self.data_dir, row['spectrogram'])
        spectrogram = torch.load(tensor_path).unsqueeze(0).float();  # shape: [1, H, W]
        label = int(row['speaker_count'])
        return spectrogram, label

dataset = SpectrogramDataset('data\spectrogram_labels_aug.csv','data\spectrograms_aug')

# Model with additional conv layer
class SpeakerCountCNN_test(nn.Module):
    def __init__(self, conv1_out, conv2_out, conv3_out, conv4_out, fc_hidden, dropout_prob, input_height=96, input_width=64, num_classes=6):
        super(SpeakerCountCNN_test, self).__init__()

        self.conv1 = nn.Conv2d(1, conv1_out, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(conv1_out)

        self.conv2 = nn.Conv2d(conv1_out, conv2_out, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(conv2_out)

        self.conv3 = nn.Conv2d(conv2_out, conv3_out, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(conv3_out)

        self.conv4 = nn.Conv2d(conv3_out, conv4_out, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(conv4_out)

        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(dropout_prob)

        # Dynamically compute flattened feature size
        with torch.no_grad():
            dummy = torch.zeros(1, 1, input_height, input_width)
            x = self.pool(F.relu(self.bn1(self.conv1(dummy))))
            x = self.pool(F.relu(self.bn2(self.conv2(x))))
            x = self.pool(F.relu(self.bn3(self.conv3(x))))
            x = self.pool(F.relu(self.bn4(self.conv4(x))))
            flattened_size = x.view(1, -1).shape[1]

        self.fc1 = nn.Linear(flattened_size, fc_hidden)
        self.fc2 = nn.Linear(fc_hidden, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [5]:
# Data loaders for full data set
def get_loaders(dataset, train_frac=0.8, bs=32, seed=42):
    n = len(dataset)
    t = int(train_frac * n)
    train_ds, val_ds = random_split(dataset, [t, n-t], generator=torch.Generator().manual_seed(seed))
    return (DataLoader(train_ds, batch_size=bs, shuffle=True),
            DataLoader(val_ds, batch_size=bs, shuffle=False))



# Data loaders for subset, makes hp tuning quicker (stratified sampling prevents class imbalance)
def get_stratified_loaders(
    dataset,
    subset_frac: float = 0.3,
    train_frac: float = 0.8,
    batch_size: int = 32,
    random_state: int = 42,
    shuffle_train: bool = True
):

    # 1. Stratified sampling of the full dataset
    full_indices = list(range(len(dataset)))
    labels = dataset.labels
    subset_idx, _ = train_test_split(
        full_indices,
        train_size=subset_frac,
        stratify=labels,
        random_state=random_state
    )
    reduced_ds = Subset(dataset, subset_idx)

    # 2. Stratified train/val split of the reduced dataset
    reduced_labels = [labels[i] for i in subset_idx]
    train_idx, val_idx = train_test_split(
        list(range(len(reduced_ds))),
        train_size=train_frac,
        stratify=reduced_labels,
        random_state=random_state
    )

    # 3. Build DataLoaders
    train_loader = DataLoader(
        Subset(reduced_ds, train_idx),
        batch_size=batch_size,
        shuffle=shuffle_train
    )
    val_loader = DataLoader(
        Subset(reduced_ds, val_idx),
        batch_size=batch_size,
        shuffle=False
    )

    return train_loader, val_loader



# Model builder
def build_model(cfg, input_h, input_w):
    return SpeakerCountCNN(
        input_height=input_h, input_width=input_w,
        conv1_out=cfg['conv1_out'], conv2_out=cfg['conv2_out'],
        conv3_out=cfg['conv3_out'], fc_hidden=cfg['fc_hidden'],
        dropout_prob=cfg['dropout']
    )



# One‐epoch training with tqdm
from tqdm import tqdm
from time import time
def train_one_epoch(model, loader, opt, crit, device):
    model.train()
    total = 0
    for x, y in tqdm(loader, desc="Training", unit="batch"):
        x, y = x.to(device), y.to(device)
        opt.zero_grad()
        loss = crit(model(x), y)
        loss.backward()
        opt.step()
        total += loss.item()
    return total / len(loader)



# Evaluate model
def validate(model, loader, device):
    model.eval()
    correct = 0
    with torch.no_grad():
        for x,y in loader:
            x,y = x.to(device), y.to(device)
            preds = model(x).argmax(1)
            correct += (preds==y).sum().item()
    return correct / len(loader.dataset)

In [7]:
# 1. Dataset
dataset = SpectrogramDataset(csv_file="data/spectrogram_labels.csv", data_dir="data/spectrograms")
dataset_aug = SpectrogramDataset(csv_file="data/spectrogram_labels_aug.csv", data_dir="data/spectrograms_aug")

from torch.utils.data import ConcatDataset
dataset_comb = ConcatDataset([dataset,dataset_aug])


# 2. Model config and init
config = {
    'lr': 0.001,
    'dropout': 0.3,
    'fc_hidden': 128,
    'conv1_out': 32,
    'conv2_out': 16,
    'conv3_out': 64,
    'conv4_out': 64
}



model = SpeakerCountCNN_test(
    conv1_out=config['conv1_out'],
    conv2_out=config['conv2_out'],
    conv3_out=config['conv3_out'],
    conv4_out=config['conv4_out'],
    fc_hidden=config['fc_hidden'],
    dropout_prob=config['dropout'],
    input_height=96,
    input_width=64
)



# 3. Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



# 4. DataLoaders

# train_loader, val_loader = get_stratified_loaders(
#     dataset, subset_frac=0.3, train_frac=0.8, batch_size=32, random_state=42
# )


train_loader, val_loader = get_loaders(dataset_comb)



# 5. Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=config['lr'])



# 6. Training loop with early stopping
# if loss doesn't improve for 3 epochs, stop training.
loss_list = []
strike_count = 0
threshold = 0.001

for epoch in range(50):
    start_time = time()
    avg_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
    duration = time() - start_time
    loss_list.append(avg_loss)

    print(f"Epoch {epoch+1} — Loss: {avg_loss:.4f} — Time: {duration:.2f}s")

    if epoch > 0:
        if loss_list[epoch] < loss_list[epoch - 1] - threshold:
            strike_count = 0  # Reset on improvement
        else:
            strike_count += 1
            print(f"No significant improvement — strike {strike_count}/3")

        if strike_count == 3:
            print("Early stopping triggered after 3 consecutive low-improvement epochs.")
            break



# 7. Final Validation (refactored to use helper)
acc = validate(model, val_loader, device)
print(f"Validation Accuracy: {acc:.2%}")



# 8. Save model
torch.save(model.state_dict(), 'SpeakerCountCNN_comb_v1.pt')

Training: 100%|██████████| 1772/1772 [23:54<00:00,  1.24batch/s]


Epoch 1 — Loss: 0.5837 — Time: 1434.07s


Training: 100%|██████████| 1772/1772 [20:22<00:00,  1.45batch/s]


Epoch 2 — Loss: 0.4982 — Time: 1222.04s


Training: 100%|██████████| 1772/1772 [25:00<00:00,  1.18batch/s]


Epoch 3 — Loss: 0.4644 — Time: 1500.24s


Training: 100%|██████████| 1772/1772 [26:50<00:00,  1.10batch/s]


Epoch 4 — Loss: 0.4456 — Time: 1610.23s


Training: 100%|██████████| 1772/1772 [26:37<00:00,  1.11batch/s]


Epoch 5 — Loss: 0.4306 — Time: 1597.09s


Training: 100%|██████████| 1772/1772 [26:09<00:00,  1.13batch/s]


Epoch 6 — Loss: 0.4146 — Time: 1569.74s


Training: 100%|██████████| 1772/1772 [25:08<00:00,  1.17batch/s]


Epoch 7 — Loss: 0.3992 — Time: 1508.57s


Training: 100%|██████████| 1772/1772 [25:31<00:00,  1.16batch/s]


Epoch 8 — Loss: 0.3882 — Time: 1531.25s


Training: 100%|██████████| 1772/1772 [26:12<00:00,  1.13batch/s]


Epoch 9 — Loss: 0.3745 — Time: 1572.06s


Training: 100%|██████████| 1772/1772 [25:08<00:00,  1.17batch/s]


Epoch 10 — Loss: 0.3636 — Time: 1508.14s


Training: 100%|██████████| 1772/1772 [25:28<00:00,  1.16batch/s]


Epoch 11 — Loss: 0.3517 — Time: 1528.22s


Training: 100%|██████████| 1772/1772 [27:34<00:00,  1.07batch/s]


Epoch 12 — Loss: 0.3397 — Time: 1654.02s


Training: 100%|██████████| 1772/1772 [27:29<00:00,  1.07batch/s]


Epoch 13 — Loss: 0.3298 — Time: 1649.45s


Training: 100%|██████████| 1772/1772 [26:55<00:00,  1.10batch/s]


Epoch 14 — Loss: 0.3171 — Time: 1615.63s


Training: 100%|██████████| 1772/1772 [26:55<00:00,  1.10batch/s]


Epoch 15 — Loss: 0.3084 — Time: 1615.31s


Training: 100%|██████████| 1772/1772 [27:16<00:00,  1.08batch/s]


Epoch 16 — Loss: 0.2923 — Time: 1636.11s


Training: 100%|██████████| 1772/1772 [18:43<00:00,  1.58batch/s]


Epoch 17 — Loss: 0.2866 — Time: 1123.42s


Training: 100%|██████████| 1772/1772 [27:24<00:00,  1.08batch/s]


Epoch 18 — Loss: 0.2724 — Time: 1644.03s


Training: 100%|██████████| 1772/1772 [17:57<00:00,  1.64batch/s]


Epoch 19 — Loss: 0.2671 — Time: 1077.25s


Training: 100%|██████████| 1772/1772 [26:37<00:00,  1.11batch/s]


Epoch 20 — Loss: 0.2561 — Time: 1597.09s


Training: 100%|██████████| 1772/1772 [27:26<00:00,  1.08batch/s]


Epoch 21 — Loss: 0.2480 — Time: 1646.88s


Training: 100%|██████████| 1772/1772 [26:43<00:00,  1.10batch/s]


Epoch 22 — Loss: 0.2402 — Time: 1603.83s


Training: 100%|██████████| 1772/1772 [25:46<00:00,  1.15batch/s]


Epoch 23 — Loss: 0.2289 — Time: 1546.45s


Training: 100%|██████████| 1772/1772 [25:42<00:00,  1.15batch/s]


Epoch 24 — Loss: 0.2258 — Time: 1542.89s


Training: 100%|██████████| 1772/1772 [17:10<00:00,  1.72batch/s]


Epoch 25 — Loss: 0.2152 — Time: 1030.72s


Training: 100%|██████████| 1772/1772 [27:02<00:00,  1.09batch/s]


Epoch 26 — Loss: 0.2091 — Time: 1622.03s


Training: 100%|██████████| 1772/1772 [18:37<00:00,  1.59batch/s]


Epoch 27 — Loss: 0.2046 — Time: 1117.47s


Training: 100%|██████████| 1772/1772 [24:45<00:00,  1.19batch/s]


Epoch 28 — Loss: 0.1949 — Time: 1485.63s


Training: 100%|██████████| 1772/1772 [22:19<00:00,  1.32batch/s]


Epoch 29 — Loss: 0.1915 — Time: 1339.39s


Training: 100%|██████████| 1772/1772 [15:59<00:00,  1.85batch/s]


Epoch 30 — Loss: 0.1853 — Time: 959.70s


Training: 100%|██████████| 1772/1772 [14:14<00:00,  2.07batch/s]


Epoch 31 — Loss: 0.1812 — Time: 854.64s


Training: 100%|██████████| 1772/1772 [20:24<00:00,  1.45batch/s]


Epoch 32 — Loss: 0.1768 — Time: 1224.42s


Training: 100%|██████████| 1772/1772 [23:43<00:00,  1.24batch/s]


Epoch 33 — Loss: 0.1700 — Time: 1423.42s


Training: 100%|██████████| 1772/1772 [21:16<00:00,  1.39batch/s]


Epoch 34 — Loss: 0.1655 — Time: 1276.97s


Training: 100%|██████████| 1772/1772 [11:37<00:00,  2.54batch/s]


Epoch 35 — Loss: 0.1657 — Time: 697.81s
No significant improvement — strike 1/3


Training: 100%|██████████| 1772/1772 [09:52<00:00,  2.99batch/s]


Epoch 36 — Loss: 0.1569 — Time: 592.29s


Training: 100%|██████████| 1772/1772 [13:29<00:00,  2.19batch/s]


Epoch 37 — Loss: 0.1528 — Time: 809.05s


Training: 100%|██████████| 1772/1772 [16:54<00:00,  1.75batch/s]


Epoch 38 — Loss: 0.1556 — Time: 1014.66s
No significant improvement — strike 1/3


Training: 100%|██████████| 1772/1772 [13:52<00:00,  2.13batch/s]


Epoch 39 — Loss: 0.1481 — Time: 832.96s


Training: 100%|██████████| 1772/1772 [18:49<00:00,  1.57batch/s]


Epoch 40 — Loss: 0.1454 — Time: 1129.24s


Training: 100%|██████████| 1772/1772 [04:07<00:00,  7.17batch/s]


Epoch 41 — Loss: 0.1415 — Time: 247.10s


Training: 100%|██████████| 1772/1772 [04:37<00:00,  6.39batch/s]


Epoch 42 — Loss: 0.1362 — Time: 277.35s


Training: 100%|██████████| 1772/1772 [04:52<00:00,  6.06batch/s]


Epoch 43 — Loss: 0.1343 — Time: 292.51s


Training: 100%|██████████| 1772/1772 [04:41<00:00,  6.29batch/s]


Epoch 44 — Loss: 0.1297 — Time: 281.90s


Training: 100%|██████████| 1772/1772 [06:10<00:00,  4.79batch/s]


Epoch 45 — Loss: 0.1290 — Time: 370.07s
No significant improvement — strike 1/3


Training: 100%|██████████| 1772/1772 [11:55<00:00,  2.48batch/s]


Epoch 46 — Loss: 0.1245 — Time: 715.27s


Training: 100%|██████████| 1772/1772 [11:54<00:00,  2.48batch/s]


Epoch 47 — Loss: 0.1247 — Time: 714.49s
No significant improvement — strike 1/3


Training: 100%|██████████| 1772/1772 [23:10<00:00,  1.27batch/s]


Epoch 48 — Loss: 0.1194 — Time: 1390.52s


Training: 100%|██████████| 1772/1772 [28:12<00:00,  1.05batch/s]


Epoch 49 — Loss: 0.1182 — Time: 1692.98s


Training: 100%|██████████| 1772/1772 [27:44<00:00,  1.06batch/s]


Epoch 50 — Loss: 0.1169 — Time: 1664.82s
Validation Accuracy: 83.13%


In [None]:
import optuna

# SH trial over 5 epochs
def objective(trial):

    cfg = {
      'lr': trial.suggest_categorical('lr', [5e-5,1e-4,5e-4]),
      'dropout': trial.suggest_categorical('dropout', [0.25,0.3,0.35]),
      'fc_hidden': trial.suggest_categorical('fc_hidden',[64,128,256]),
      'conv1_out': trial.suggest_categorical('conv1_out',[16,32]),
      'conv2_out': trial.suggest_categorical('conv2_out',[16,32,64]),
      'conv3_out': trial.suggest_categorical('conv3_out',[64,128]),
      'conv4_out': trial.suggest_categorical('conv4_out',[32,64,128])
    }

    model = SpeakerCountCNN_test(
        conv1_out=cfg['conv1_out'],
        conv2_out=cfg['conv2_out'],
        conv3_out=cfg['conv3_out'],
        conv4_out=cfg['conv4_out'],
        fc_hidden=cfg['fc_hidden'],
        dropout_prob=cfg['dropout'],
        input_height=96,
        input_width=64
    )

    # Adam optim
    opt   = optim.Adam(model.parameters(), lr=cfg['lr'])

    # Cross entropy objective
    crit  = nn.CrossEntropyLoss()

    # Training and comparing to existing models
    for epoch in range(5):
        train_one_epoch(model, train_loader, opt, crit, device)
        val_acc = validate(model, val_loader, device)
        trial.report(val_acc, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    return val_acc


# Creates a stratified subset, makes hp tuning quicker
train_loader, val_loader = get_stratified_loaders(
    dataset,
    subset_frac=0.3,   
    train_frac=0.8,    
    batch_size=32,
    random_state=42
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

study  = optuna.create_study(direction='maximize',
            pruner=optuna.pruners.SuccessiveHalvingPruner(), study_name='0-5_Speaker_4layer_HPTune')

study.optimize(objective, n_trials=100)

best_cfg = study.best_trial.params
print("Best HPs:", best_cfg)

[I 2025-07-26 23:39:56,823] A new study created in memory with name: 0-2_Speaker_4layer_HPTune
[I 2025-07-26 23:50:23,323] Trial 0 finished with value: 0.8416206261510129 and parameters: {'lr': 5e-05, 'dropout': 0.3, 'fc_hidden': 128, 'conv1_out': 16, 'conv2_out': 64, 'conv3_out': 128, 'conv4_out': 128}. Best is trial 0 with value: 0.8416206261510129.
[I 2025-07-27 00:03:14,606] Trial 1 finished with value: 0.8402394106813996 and parameters: {'lr': 0.0001, 'dropout': 0.35, 'fc_hidden': 256, 'conv1_out': 16, 'conv2_out': 16, 'conv3_out': 64, 'conv4_out': 128}. Best is trial 0 with value: 0.8416206261510129.
[I 2025-07-27 00:11:47,945] Trial 2 pruned. 
[I 2025-07-27 00:17:50,820] Trial 3 pruned. 
[I 2025-07-27 00:27:27,250] Trial 4 pruned. 
[I 2025-07-27 00:36:11,932] Trial 5 pruned. 
[I 2025-07-27 00:44:33,715] Trial 6 pruned. 
[I 2025-07-27 00:51:24,357] Trial 7 pruned. 
[I 2025-07-27 01:00:34,121] Trial 8 pruned. 
[I 2025-07-27 01:20:09,370] Trial 9 finished with value: 0.848066298342

Best HPs: {'lr': 0.0005, 'dropout': 0.3, 'fc_hidden': 128, 'conv1_out': 16, 'conv2_out': 64, 'conv3_out': 128, 'conv4_out': 128}


In [53]:
study.best_trial.value

0.85451197053407

In [55]:
# Model with config from Optuna study
cfg = {'lr': 0.0005, 'dropout': 0.3, 'fc_hidden': 128, 'conv1_out': 16, 'conv2_out': 64, 'conv3_out': 128, 'conv4_out': 128}


final_model = build_model(cfg, 96, 64).to(device)



# Adam optimizer
opt = optim.Adam(final_model.parameters(), lr=best_cfg['lr'])



# Cross Entropy loss
crit = nn.CrossEntropyLoss()



# Loaders
train_loader, val_loader = get_loaders(dataset)



# Train
num_epochs = 50
for epoch in range(num_epochs):
    t0 = time()
    loss = train_one_epoch(final_model, train_loader, opt, crit, device)  # tqdm is inside this
    dur = time() - t0
    print(f"Epoch {epoch+1} — Loss {loss:.4f} — {dur:.1f}s")



# Validate
val_acc = validate(final_model, val_loader, device)
print(f"Final val accuracy: {val_acc:.2%}")
torch.save(final_model.state_dict(), r"src/model/best_0-2_3layer.pt")

Training: 100%|██████████| 905/905 [11:16<00:00,  1.34batch/s]


Epoch 1 — Loss 0.5115 — 676.3s


Training: 100%|██████████| 905/905 [15:15<00:00,  1.01s/batch]


Epoch 2 — Loss 0.4429 — 915.4s


Training: 100%|██████████| 905/905 [15:19<00:00,  1.02s/batch]


Epoch 3 — Loss 0.4223 — 919.7s


Training: 100%|██████████| 905/905 [06:19<00:00,  2.38batch/s]


Epoch 4 — Loss 0.4012 — 379.5s


Training: 100%|██████████| 905/905 [06:21<00:00,  2.37batch/s]


Epoch 5 — Loss 0.3809 — 381.2s


Training: 100%|██████████| 905/905 [14:21<00:00,  1.05batch/s]


Epoch 6 — Loss 0.3653 — 861.7s


Training: 100%|██████████| 905/905 [14:11<00:00,  1.06batch/s]


Epoch 7 — Loss 0.3487 — 851.7s


Training: 100%|██████████| 905/905 [13:41<00:00,  1.10batch/s]


Epoch 8 — Loss 0.3398 — 821.1s


Training: 100%|██████████| 905/905 [13:37<00:00,  1.11batch/s]


Epoch 9 — Loss 0.3283 — 817.1s


Training: 100%|██████████| 905/905 [13:45<00:00,  1.10batch/s]


Epoch 10 — Loss 0.3116 — 825.6s


Training: 100%|██████████| 905/905 [13:42<00:00,  1.10batch/s]


Epoch 11 — Loss 0.2935 — 822.8s


Training: 100%|██████████| 905/905 [11:28<00:00,  1.31batch/s]


Epoch 12 — Loss 0.2754 — 688.4s


Training: 100%|██████████| 905/905 [13:58<00:00,  1.08batch/s]


Epoch 13 — Loss 0.2597 — 838.0s


Training: 100%|██████████| 905/905 [15:02<00:00,  1.00batch/s]


Epoch 14 — Loss 0.2468 — 902.3s


Training: 100%|██████████| 905/905 [10:18<00:00,  1.46batch/s]


Epoch 15 — Loss 0.2266 — 618.3s


Training: 100%|██████████| 905/905 [11:13<00:00,  1.34batch/s]


Epoch 16 — Loss 0.2156 — 673.1s


Training: 100%|██████████| 905/905 [12:39<00:00,  1.19batch/s]


Epoch 17 — Loss 0.1950 — 759.4s


Training: 100%|██████████| 905/905 [13:47<00:00,  1.09batch/s]


Epoch 18 — Loss 0.1812 — 827.7s


Training: 100%|██████████| 905/905 [14:04<00:00,  1.07batch/s]


Epoch 19 — Loss 0.1672 — 844.3s


Training: 100%|██████████| 905/905 [14:06<00:00,  1.07batch/s]


Epoch 20 — Loss 0.1563 — 846.7s


Training: 100%|██████████| 905/905 [06:37<00:00,  2.28batch/s]


Epoch 21 — Loss 0.1454 — 397.1s


Training: 100%|██████████| 905/905 [08:11<00:00,  1.84batch/s]


Epoch 22 — Loss 0.1349 — 491.2s


Training: 100%|██████████| 905/905 [15:07<00:00,  1.00s/batch]


Epoch 23 — Loss 0.1197 — 907.5s


Training: 100%|██████████| 905/905 [14:44<00:00,  1.02batch/s]


Epoch 24 — Loss 0.1152 — 885.0s


Training: 100%|██████████| 905/905 [06:43<00:00,  2.24batch/s]


Epoch 25 — Loss 0.1038 — 403.8s


Training: 100%|██████████| 905/905 [07:38<00:00,  1.97batch/s]


Epoch 26 — Loss 0.0995 — 458.4s


Training: 100%|██████████| 905/905 [15:18<00:00,  1.01s/batch]


Epoch 27 — Loss 0.0979 — 918.1s


Training: 100%|██████████| 905/905 [06:41<00:00,  2.26batch/s]


Epoch 28 — Loss 0.0872 — 401.2s


Training: 100%|██████████| 905/905 [06:38<00:00,  2.27batch/s]


Epoch 29 — Loss 0.0789 — 398.4s


Training: 100%|██████████| 905/905 [16:10<00:00,  1.07s/batch]


Epoch 30 — Loss 0.0765 — 970.5s


Training: 100%|██████████| 905/905 [15:58<00:00,  1.06s/batch]


Epoch 31 — Loss 0.0721 — 958.3s


Training: 100%|██████████| 905/905 [15:28<00:00,  1.03s/batch]


Epoch 32 — Loss 0.0672 — 928.6s


Training: 100%|██████████| 905/905 [15:19<00:00,  1.02s/batch]


Epoch 33 — Loss 0.0661 — 919.8s


Training: 100%|██████████| 905/905 [15:07<00:00,  1.00s/batch]


Epoch 34 — Loss 0.0616 — 907.7s


Training: 100%|██████████| 905/905 [15:15<00:00,  1.01s/batch]


Epoch 35 — Loss 0.0598 — 915.2s


Training: 100%|██████████| 905/905 [14:53<00:00,  1.01batch/s]


Epoch 36 — Loss 0.0575 — 893.2s


Training: 100%|██████████| 905/905 [15:00<00:00,  1.00batch/s]


Epoch 37 — Loss 0.0564 — 900.5s


Training: 100%|██████████| 905/905 [15:02<00:00,  1.00batch/s]


Epoch 38 — Loss 0.0607 — 902.9s


Training: 100%|██████████| 905/905 [15:10<00:00,  1.01s/batch]


Epoch 39 — Loss 0.0499 — 910.3s


Training: 100%|██████████| 905/905 [14:55<00:00,  1.01batch/s]


Epoch 40 — Loss 0.0511 — 895.0s


Training: 100%|██████████| 905/905 [15:06<00:00,  1.00s/batch]


Epoch 41 — Loss 0.0461 — 906.1s


Training: 100%|██████████| 905/905 [14:58<00:00,  1.01batch/s]


Epoch 42 — Loss 0.0457 — 898.2s


Training: 100%|██████████| 905/905 [15:04<00:00,  1.00batch/s]


Epoch 43 — Loss 0.0469 — 904.4s


Training: 100%|██████████| 905/905 [15:17<00:00,  1.01s/batch]


Epoch 44 — Loss 0.0442 — 917.7s


Training: 100%|██████████| 905/905 [15:10<00:00,  1.01s/batch]


Epoch 45 — Loss 0.0435 — 910.7s


Training: 100%|██████████| 905/905 [15:12<00:00,  1.01s/batch]


Epoch 46 — Loss 0.0383 — 912.4s


Training: 100%|██████████| 905/905 [15:00<00:00,  1.01batch/s]


Epoch 47 — Loss 0.0442 — 900.1s


Training: 100%|██████████| 905/905 [14:42<00:00,  1.02batch/s]


Epoch 48 — Loss 0.0406 — 883.0s


Training: 100%|██████████| 905/905 [14:58<00:00,  1.01batch/s]


Epoch 49 — Loss 0.0345 — 898.4s


Training: 100%|██████████| 905/905 [15:13<00:00,  1.01s/batch]


Epoch 50 — Loss 0.0416 — 913.7s
Final val accuracy: 86.05%


## repeating with large epochs (early stopping on val accuracy)

WTF