In [113]:
import os
import time
import copy
from tqdm import tqdm
from pathlib import Path
import pickle
import random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image
import torch
from torchsummary import summary
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.amp import autocast, GradScaler

import torchvision
from torchvision import datasets, models, transforms

from torch.utils.data import Dataset, DataLoader, random_split

In [114]:
# #load files
if not os.path.exists("/content/drive"):
  from google.colab import drive
  drive.mount('/content/drive')
if not os.path.exists("/content/AppliedMLProject"):
  !git clone https://YasinKaryagdi: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@github.com/YasinKaryagdi/AppliedMLProject.git
if not os.path.exists("/content/augmented_set.zip"):
  !cp -r /content/drive/MyDrive/Machinelearning_files/augmented_set.zip /content/
  !unzip augmented_set.zip
if not os.path.exists("/content/validate_split.csv"):
  !cp -r /content/drive/MyDrive/Machinelearning_files/validate_split.csv /content/
  !cp -r /content/drive/MyDrive/Machinelearning_files/train_augmented.csv /content/
  !cp -r /content/drive/MyDrive/Machinelearning_files/train_split.csv /content/
  !cp -r /content/drive/MyDrive/Machinelearning_files/train_balanced.csv /content/

In [115]:
cwd = Path.cwd()
gitpath = cwd / "AppliedMLProject"
dirpath = gitpath / "aml-2025-feathers-in-focus"
train_images_csv = dirpath / "train_images.csv"
train_images_folder = dirpath / "train_images"
image_classes = dirpath / "class_names.npy"
drive_path = cwd / "drive" / "MyDrive" / "Machinelearning files"
val_images_csv = cwd / "validate_split.csv"
train_balanced_csv = cwd / "train_balanced.csv"


In [116]:
#Defining model and training variables
#use augmented trainingset and if so, use balanced set?
use_augmented = True
if use_augmented:
  use_balanced = True
  augmentations = []
#model
model_name = "MODERNRES" # <- modelname goes here
#possible models: "M3MAX", "SIMPLE1", "CLASSIC1", "CLASSICRES", "MODERNRES"
#use model transformations or standard
use_model_transforms = False
#use_scaler
use_scaler = True
#earlystop
early_stopping = False
patience = 10
min_delta = 0
#training batchsize
train_batch_size = 64
#validation & testing batchsize
val_batch_size = 64
#Epochs
num_epochs = 30
#Optimizer build:
#learningrate
learning_rate = 0.001
#momentum
moment = 0.9
#weight decay
wd = 0.001
#resize to:
size = (256,256)
#use pretrained or not
use_pretrained = True
classes = np.load(image_classes, allow_pickle=True).item()
num_classes = len(classes)
#train-test split
split = 0.85
#model save name
model_save_name = (model_name + "_" +
                   ("_aug" if use_augmented else "noaug")+
                   ("_bal" if use_balanced else "")
                   )
model_save_name
#use seed?
use_seed = True
seed = 42
SEEDS = [3,4,5,6,7,8,9]

Test model classes go here

In [117]:
class ModelM3MAX(nn.Module):
    def __init__(self):
        super(ModelM3MAX, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, padding=1, stride = 1, bias=False)
        self.conv1_bn = nn.BatchNorm2d(32)

        self.conv2 = nn.Conv2d(32, 48, 3, padding=1, bias=False)
        self.conv2_bn = nn.BatchNorm2d(48)

        self.conv3 = nn.Conv2d(48, 64, 3, padding=1, bias=False)
        self.conv3_bn = nn.BatchNorm2d(64)

        self.conv4 = nn.Conv2d(64, 80, 3, padding=1, bias=False)
        self.conv4_bn = nn.BatchNorm2d(80)

        self.conv5 = nn.Conv2d(80, 96, 3, padding=1, bias=False)
        self.conv5_bn = nn.BatchNorm2d(96)

        self.conv6 = nn.Conv2d(96, 112, 3, padding=1, bias=False)
        self.conv6_bn = nn.BatchNorm2d(112)

        self.conv7 = nn.Conv2d(112, 128, 3, padding=1, bias=False)
        self.conv7_bn = nn.BatchNorm2d(128)

        self.conv8 = nn.Conv2d(128, 144, 3, padding=1, bias=False)
        self.conv8_bn = nn.BatchNorm2d(144)

        self.conv9 = nn.Conv2d(144, 160, 3, padding=1, bias=False)
        self.conv9_bn = nn.BatchNorm2d(160)

        self.conv10 = nn.Conv2d(160, 176, 3, padding=1, bias=False)
        self.conv10_bn = nn.BatchNorm2d(176)

        # unchanged: 176 * 8 * 8 = 11264
        self.fc1 = nn.Linear(11264, 200, bias=False)
        self.fc1_bn = nn.BatchNorm1d(200)

    def get_logits(self, x):
        x = (x - 0.5) * 2.0

        conv1 = F.relu(self.conv1_bn(self.conv1(x)))
        conv2 = F.relu(self.conv2_bn(self.conv2(conv1)))
        conv2 = F.max_pool2d(conv2, 2)  # 256 -> 128

        conv3 = F.relu(self.conv3_bn(self.conv3(conv2)))
        conv4 = F.relu(self.conv4_bn(self.conv4(conv3)))
        conv4 = F.max_pool2d(conv4, 2)  # 128 -> 64

        conv5 = F.relu(self.conv5_bn(self.conv5(conv4)))
        conv6 = F.relu(self.conv6_bn(self.conv6(conv5)))
        conv6 = F.max_pool2d(conv6, 2)  # 64 -> 32

        conv7 = F.relu(self.conv7_bn(self.conv7(conv6)))
        conv8 = F.relu(self.conv8_bn(self.conv8(conv7)))
        conv8 = F.max_pool2d(conv8, 2)  # 32 -> 16

        conv9 = F.relu(self.conv9_bn(self.conv9(conv8)))
        conv10 = F.relu(self.conv10_bn(self.conv10(conv9)))
        conv10 = F.max_pool2d(conv10, 2)  # 16 -> 8

        # Now conv10 is (batch, 176, 8, 8)
        flat = torch.flatten(conv10.permute(0, 2, 3, 1), 1)
        logits = self.fc1_bn(self.fc1(flat))
        return logits

    def forward(self, x):
        logits = self.get_logits(x)
        return logits

In [118]:
class SIMPLE1(nn.Module):
    def __init__(self):
        super(SIMPLE1, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, padding=1, stride = 1, bias=False)
        self.conv1_bn = nn.BatchNorm2d(32)

        self.conv2 = nn.Conv2d(32, 64, 3, padding=1, bias=False)
        self.conv2_bn = nn.BatchNorm2d(64)

        self.conv3 = nn.Conv2d(64, 128, 3, padding=1, bias=False)
        self.conv3_bn = nn.BatchNorm2d(128)

        self.conv4 = nn.Conv2d(128, 164, 3, padding=1, bias=False)
        self.conv4_bn = nn.BatchNorm2d(164)

        self.conv5 = nn.Conv2d(164, 176, 3, padding=1, bias=False)
        self.conv5_bn = nn.BatchNorm2d(176)

        # unchanged: 176 * 8 * 8 = 11264
        self.fc1 = nn.Linear(11264, 200, bias=False)
        self.fc1_bn = nn.BatchNorm1d(200)

    def get_logits(self, x):
        x = (x - 0.5) * 2.0

        #conv 1
        x = F.relu(self.conv1_bn(self.conv1(x)))
        x = F.max_pool2d(x, 2) #256 -> 128

        #conv 2
        x = F.relu(self.conv2_bn(self.conv2(x)))
        x = F.max_pool2d(x, 2) #128 -> 64

        #conv 3
        x = F.relu(self.conv3_bn(self.conv3(x)))
        x = F.max_pool2d(x, 2) #64 -> 32

        # Conv 4
        x = F.relu(self.conv4_bn(self.conv4(x)))
        x = F.max_pool2d(x, 2)   # 32 -> 16

        # Conv 5
        x = F.relu(self.conv5_bn(self.conv5(x)))
        x = F.max_pool2d(x, 2)   # 16 -> 8

        # x is now (batch, 176, 8, 8)
        x = torch.flatten(x, 1)  # (batch, 11264)

        logits = self.fc1_bn(self.fc1(x))
        return logits

    def forward(self, x):
        logits = self.get_logits(x)
        return logits

In [119]:
class CLASSIC1(nn.Module):
    def __init__(self):
        super(CLASSIC1, self).__init__()

        # 5 stages with double conv + pooling
        self.stage1 = self.conv_block(3, 32)
        self.stage2 = self.conv_block(32, 64)
        self.stage3 = self.conv_block(64, 128)
        self.stage4 = self.conv_block(128, 256) #adapt to 256 and 512 to conform to memory norms (powers of 2)
        self.stage5 = self.conv_block(256, 512)

        self.gap = nn.AdaptiveAvgPool2d(1)   # Global Average Pooling reduces parameters and betters generalization
        self.dropout = nn.Dropout(p=0.4) # Prevents overfitting, with some reduced probability to allow quicker learning and not over-regularize
        self.fc1 = nn.Linear(512, 200)

    @staticmethod
    def conv_block(in_ch, out_ch): #first building block for conv layers (stack of 2)
        # Standard pattern: Conv -> BN -> ReLU -> Conv -> BN -> ReLU -> MaxPool
        return nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),

            nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),

            nn.MaxPool2d(2)
        )

    def forward(self, x): #replaced forward + logit with just forward
        x = (x - 0.5) * 2.0

        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.stage5(x)
        # x is now (Batch_Size, 512, 1, 1) assuming input was 32x32 (5 max pools)

        x = self.gap(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        logits = self.fc1(x)
        return logits

In [120]:
class ResidualBlock(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.conv1 = nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_ch)
        self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_ch)

        # shortcut: identity if channels match, otherwise 1x1 conv
        self.shortcut = nn.Identity() if in_ch == out_ch else nn.Conv2d(in_ch, out_ch, 1, bias=False)

        self.relu = nn.ReLU(inplace=True)
        self.pool = nn.MaxPool2d(2)

    def forward(self, x):
        identity = self.shortcut(x)
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity
        out = self.relu(out)
        out = self.pool(out)
        return out

class CLASSICRES(nn.Module):
    def __init__(self, num_classes=200):
        super().__init__()

        # Stage-level residual blocks
        self.stage1 = ResidualBlock(3, 32)       # 256 -> 128
        self.stage2 = ResidualBlock(32, 64)      # 128 -> 64
        self.stage3 = ResidualBlock(64, 128)     # 64 -> 32
        self.stage4 = ResidualBlock(128, 256)    # 32 -> 16
        self.stage5 = ResidualBlock(256, 512)    # 16 -> 8

        # Classifier
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(p=0.4)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = (x - 0.5) * 2.0  # normalize to [-1, 1]

        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.stage5(x)

        x = self.gap(x)                  # (B, 512, 1, 1)
        x = torch.flatten(x, 1)          # (B, 512)
        x = self.dropout(x)
        logits = self.fc(x)
        return logits

In [121]:
class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super().__init__()
        self.fc1 = nn.Linear(channels, channels // reduction)
        self.fc2 = nn.Linear(channels // reduction, channels)
        self.activation = nn.ReLU()  # <- changed back from SiLU
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        b, c, h, w = x.size()
        # Squeeze: global average pooling
        y = x.mean(dim=(2, 3))           # (B, C)
        # Excitation: MLP
        y = self.fc2(self.activation(self.fc1(y)))  # (B, C)
        y = self.sigmoid(y).view(b, c, 1, 1)
        # Scale: multiply original feature map
        return x * y


class ResidualBlock(nn.Module):
    def __init__(self, in_ch, out_ch, use_se=False):
        super().__init__()
        self.use_se = use_se

        self.conv1 = nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_ch)
        self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_ch)

        self.shortcut = nn.Identity() if in_ch == out_ch else nn.Conv2d(in_ch, out_ch, 1, bias=False)
        self.act = nn.ReLU(inplace=True)  # <- changed back from SiLU
        self.pool = nn.MaxPool2d(2)

        if use_se:
            self.se = SEBlock(out_ch)

    def forward(self, x):
        identity = self.shortcut(x)
        out = self.act(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity

        if self.use_se:
            out = self.se(out)

        out = self.act(out)
        out = self.pool(out)
        return out


class MODERNRES(nn.Module):
    def __init__(self, num_classes=200):
        super().__init__()

        # Stage-level residual blocks
        self.stage1 = ResidualBlock(3, 32, use_se=True)
        self.stage2 = ResidualBlock(32, 64, use_se=True)
        self.stage3 = ResidualBlock(64, 96, use_se=True)
        self.stage4 = ResidualBlock(96, 128, use_se=True)
        self.stage5 = ResidualBlock(128, 160, use_se=True)

        # Classifier
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(p=0.4)
        self.fc = nn.Linear(160, num_classes)

    def forward(self, x):
        x = (x - 0.5) * 2.0  # normalize to [-1, 1]

        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.stage5(x)

        x = self.gap(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        logits = self.fc(x)
        return logits

In [122]:
class EnsembleModel(nn.Module):
    def __init__(self, modelList):
        super().__init__()
        self.models = nn.ModuleList(modelList)
        self.classifier = nn.Linear(200 * len(modelList), 200)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        outputs = []
        for model in self.models:
            outputs.append(model(x))

        x_cat = torch.cat(outputs, dim=1)
        x_cat = self.dropout(x_cat)
        out = self.classifier(x_cat)
        return out

Test model activations go here

In [123]:
# define test model+transforms here
if model_name == "M3MAX":
  custom_model = ModelM3MAX()
#gets 12.99 percent with: balanced augdata, lr 0.001, moment 0.9, wd 0.001, batchsize 32, epochs 15,
if model_name == "SIMP1":
  custom_model = SIMPLE1()
#gets 12.7 percent with: balanced augdata, lr 0.001, moment 0.9, wd 0.001, batchsize 32, epochs 15 (but fewer params, not much though)
if model_name == "CLASSIC1":
  custom_model = CLASSIC1()
#gets 15.6 percent with: balanced augdata, lr 0.001, moment 0.9, wd 0.001, batchsize 32, epochs 15 (but fewer params, not much though)
#but has quite some more potential with more epochs/higher learning rate/higher batch size
if model_name == "CLASSICRES":
  custom_model = CLASSICRES()
#gets 23 percent with: balanced augdata, lr 0.001, moment 0.9, wd 0.001, batchsize 64, epochs 15 (but fewer params, not much though)
if model_name == "MODERNRES":
  custom_model = MODERNRES()


Class and function definitions goes here

In [124]:
#remove randomness for benchmarking
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
if use_seed:
  set_seed(seed)

In [125]:
#defining dataclass
class CSVDataset(Dataset):
    def __init__(self,
                 csv_file,
                 base_dir,
                 transform=None,
                 return_id=False,
                 augmentation_tags=None): # Added augmentation_tags parameter
        self.df = pd.read_csv(csv_file)

        # Apply augmentation filtering if tags are provided
        if augmentation_tags is not None:
            # Ensure 'original' is always included
            all_tags_to_include = list(set(augmentation_tags + ['original']))

            mask = pd.Series([False] * len(self.df), index=self.df.index)
            for tag in all_tags_to_include:
                # Check if the image_path contains the augmentation tag
                mask = mask | self.df['image_path'].str.contains(f'_{tag}.jpg', regex=False)
            self.df = self.df[mask].copy()

        self.base_dir = base_dir
        self.transform = transform
        self.return_id = return_id

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # extract fields
        img_id = row['id'] if self.return_id else None
        relative_path = row['image_path'].lstrip('/')  # safe
        label = row['label'] - 1   # shift to 0-based indexing

        # build full path
        img_path = os.path.join(self.base_dir, relative_path)

        # load
        image = Image.open(img_path).convert('RGB')

        # transform
        if self.transform:
            image = self.transform(image)

        # optionally return id
        if self.return_id:
            return image, label, img_id

        return image, label

In [126]:
def train_model(model,
                train_loader,
                val_loader,
                criterion,
                optimizer,
                schedular=None,
                num_epochs=10,
                early_stopping=False,
                epochs_no_improve=0,
                patience=5,
                min_delta=0.0,
                device="cuda"):

    dataloaders_dict = {"train": train_loader, "val": val_loader}
    since = time.time()
    model.to(device)
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    # Initialize history dictionary
    hist = {
        "train_loss": [],
        "val_loss": [],
        "train_acc": [],
        "val_acc": []
    }

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in tqdm(dataloaders_dict[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders_dict[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders_dict[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Save metrics in history
            if phase == 'train':
                hist['train_loss'].append(epoch_loss)
                hist['train_acc'].append(epoch_acc.item())
            else:
                hist['val_loss'].append(epoch_loss)
                hist['val_acc'].append(epoch_acc.item())

                # Early stopping logic
                if epoch_acc > best_acc + min_delta:
                    print(f"Validation improved ({best_acc:.4f} → {epoch_acc:.4f})")
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    epochs_no_improve = 0
                else:
                    epochs_no_improve += 1
                    print(f"No improvement for {epochs_no_improve} epoch(s).")

        if epochs_no_improve >= patience and early_stopping:
            print(f"Early stopping triggered at epoch {epoch+1}!")
            break

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:.4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)

    return model, hist

In [127]:
def train_model(model,
                train_loader,
                val_loader,
                criterion,
                optimizer,
                schedular=None,
                num_epochs=10,
                early_stopping=False,
                epochs_no_improve=0,
                patience=5,
                min_delta=0.0,
                device="cuda"):

    dataloaders_dict = {"train": train_loader, "val": val_loader}
    since = time.time()
    model.to(device)
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    if use_scaler:
      scaler = GradScaler()

    # Initialize history dictionary
    hist = {
        "train_loss": [],
        "val_loss": [],
        "train_acc": [],
        "val_acc": []
    }

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in tqdm(dataloaders_dict[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                if use_scaler:
                  with torch.set_grad_enabled(phase == 'train'):
                    with autocast("cuda"):  # Mixed precision context
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                        _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        scaler.scale(loss).backward()
                        scaler.step(optimizer)
                        scaler.update()
                else:
                  with torch.set_grad_enabled(phase == 'train'):
                      outputs = model(inputs)
                      loss = criterion(outputs, labels)

                      _, preds = torch.max(outputs, 1)

                      if phase == 'train':
                          loss.backward()
                          optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders_dict[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders_dict[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Save metrics in history
            if phase == 'train':
                hist['train_loss'].append(epoch_loss)
                hist['train_acc'].append(epoch_acc.item())
            else:
                hist['val_loss'].append(epoch_loss)
                hist['val_acc'].append(epoch_acc.item())

                # Early stopping logic
                if epoch_acc > best_acc + min_delta:
                    print(f"Validation improved ({best_acc:.4f} → {epoch_acc:.4f})")
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    epochs_no_improve = 0
                else:
                    epochs_no_improve += 1
                    print(f"No improvement for {epochs_no_improve} epoch(s).")

        if epochs_no_improve >= patience and early_stopping:
            print(f"Early stopping triggered at epoch {epoch+1}!")
            break

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:.4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)

    return model, hist

Most basic transformations, standardizes rgb values, resizes images to set values and converts image to tensor

In [128]:
#Define some standard transformations
transformations = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((size)),
    transforms.Normalize(mean = (0.5,0.5,0.5), std = (0.5,0.5,0.5))
    ])
## Probably better to follow the original resnet transformations
#See: (model.ResNet152_Weights.IMAGENET1K_V1.transforms)
if not use_model_transforms:
  model_transforms = transformations


Define datasets based on augmented or not

In [129]:
if use_augmented == False:
  full_dataset = CSVDataset(
      csv_file=str(dirpath / "train_images.csv"),
      base_dir=str(dirpath),
      transform = model_transforms,
      return_id=False
  )
  train_size = int(split * len(full_dataset))
  val_size = len(full_dataset) - train_size
  train_dataset, val_dataset = random_split(
      full_dataset,
      [train_size, val_size],
      generator=torch.Generator().manual_seed(seed)
  )
  loader = DataLoader(full_dataset, batch_size=train_batch_size, shuffle=True)

In [130]:
if use_augmented == True:
  if use_balanced:
    train_dataset = CSVDataset(
        csv_file=str(cwd / "train_balanced.csv"),
        base_dir=str(cwd),
        transform = model_transforms,
        return_id=False
    )
  else:
    train_dataset = CSVDataset(
        csv_file=str(cwd / "train_augmented.csv"),
        base_dir=str(cwd),
        transform = model_transforms,
        return_id=False
    )
  val_dataset = CSVDataset(
      csv_file=str(val_images_csv),
      base_dir=str(dirpath),
      transform = model_transforms,
      return_id=False
  )

In [131]:
#define dataloaders
# data loaders
#create full loader
train_loader = DataLoader(train_dataset,
                          batch_size=train_batch_size,
                          shuffle=True,
                          num_workers=2,
                          pin_memory=True,
                          prefetch_factor=2,
                          persistent_workers=True)
val_loader = DataLoader(val_dataset,
                        batch_size=val_batch_size,
                        shuffle=False,
                        num_workers=2,
                        pin_memory=True,
                        prefetch_factor=2,
                        persistent_workers=True
                        )

In [132]:
# Detect if we have a GPU available
torch.cuda.empty_cache()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [133]:
#gather optimizable parameters
params_to_update = custom_model.parameters()
#Design optimzer
# optimizer = optim.SGD(params_to_update, lr=learning_rate, momentum=moment,
#                       weight_decay=wd
#                       )
optimizer = optim.AdamW(params_to_update, lr=learning_rate)
# Setup the loss func
criterion = nn.CrossEntropyLoss()

In [134]:
#Get models:
import re

model_dir = "/content/drive/MyDrive/Test/"

# Ensure the directory exists to avoid FileNotFoundError during os.listdir
os.makedirs(model_dir, exist_ok=True)

# Initialize a list to store the details of each model file
model_files_data = []

model_pattern = re.compile(rf"^{model_name}_(\d+)\.pth$")

print(f"Scanning directory: {model_dir} for files matching pattern: {model_name}_<seed>.pth")

# Iterate through all files in the specified directory
for filename in os.listdir(model_dir):
    # Check if the file is a .pth file and matches the expected pattern
    match = model_pattern.match(filename)
    if match:
        seed = match.group(1)
        full_path = os.path.join(model_dir, filename)
        model_files_data.append({
            "model_name": model_name, # Using the global model_name variable as per instruction
            "seed": seed,
            "path": full_path
        })

# Print the list of extracted model details
if model_files_data:
    print("Found model files:")
    for model_info in model_files_data:
        print(model_info)
else:
    print("No model files found matching the pattern in the specified directory.")

Scanning directory: /content/drive/MyDrive/Test/ for files matching pattern: MODERNRES_<seed>.pth
Found model files:
{'model_name': 'MODERNRES', 'seed': '0', 'path': '/content/drive/MyDrive/Test/MODERNRES_0.pth'}
{'model_name': 'MODERNRES', 'seed': '1', 'path': '/content/drive/MyDrive/Test/MODERNRES_1.pth'}
{'model_name': 'MODERNRES', 'seed': '2', 'path': '/content/drive/MyDrive/Test/MODERNRES_2.pth'}
{'model_name': 'MODERNRES', 'seed': '3', 'path': '/content/drive/MyDrive/Test/MODERNRES_3.pth'}
{'model_name': 'MODERNRES', 'seed': '4', 'path': '/content/drive/MyDrive/Test/MODERNRES_4.pth'}
{'model_name': 'MODERNRES', 'seed': '5', 'path': '/content/drive/MyDrive/Test/MODERNRES_5.pth'}
{'model_name': 'MODERNRES', 'seed': '6', 'path': '/content/drive/MyDrive/Test/MODERNRES_6.pth'}


In [135]:
#initialize model helpers
# init model list
models = []
model_paths = [pred_model['path'] for pred_model in model_files_data]
print(model_paths)
for path in model_paths:
    model = MODERNRES().to(device)
    model.load_state_dict(torch.load(path, map_location=device))
    models.append(model)


# init ensem model
ensemble_model = EnsembleModel(models)

# freeze parameters of individual models
for param in ensemble_model.parameters():
    param.requires_grad = False

# unfreeze parameters of the classifier
for param in ensemble_model.classifier.parameters():
    param.requires_grad = True
model = ensemble_model.to(device)

summary(model, (3, 256, 256)) # this is optional, it prints the model, its params and its trainable params.



#initialize optimzer and criterion
# Initialize optimizer and criterion
params_to_update = ensemble_model.parameters()
optimizer = optim.AdamW(params_to_update, lr=learning_rate)
criterion = nn.CrossEntropyLoss()

#train model
model_trained, hist = train_model(ensemble_model,
                          train_loader,
                          val_loader,
                          criterion,
                          optimizer,
                          early_stopping = early_stopping,
                          patience = patience,
                          min_delta = min_delta,
                          schedular=None,
                          num_epochs=num_epochs,
                          device=device)
torch.save(model_trained.state_dict(), f"/content/drive/MyDrive/Test/ensemble.pth")
with open(f"/content/drive/MyDrive/Test/ensemble_acc.pkl", "wb") as f:
    pickle.dump(hist, f)

print(f"Seed {seed}: model and history saved.")


['/content/drive/MyDrive/Test/MODERNRES_0.pth', '/content/drive/MyDrive/Test/MODERNRES_1.pth', '/content/drive/MyDrive/Test/MODERNRES_2.pth', '/content/drive/MyDrive/Test/MODERNRES_3.pth', '/content/drive/MyDrive/Test/MODERNRES_4.pth', '/content/drive/MyDrive/Test/MODERNRES_5.pth', '/content/drive/MyDrive/Test/MODERNRES_6.pth']
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 256, 256]              96
            Conv2d-2         [-1, 32, 256, 256]             864
       BatchNorm2d-3         [-1, 32, 256, 256]              64
              ReLU-4         [-1, 32, 256, 256]               0
            Conv2d-5         [-1, 32, 256, 256]           9,216
       BatchNorm2d-6         [-1, 32, 256, 256]              64
            Linear-7                    [-1, 2]              66
              ReLU-8                    [-1, 2]               0
            Linear-9         

100%|██████████| 313/313 [02:17<00:00,  2.28it/s]


train Loss: 18.0981 Acc: 0.2525


100%|██████████| 13/13 [00:05<00:00,  2.37it/s]


val Loss: 20.0225 Acc: 0.1947
Validation improved (0.0000 → 0.1947)

Epoch 2/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 7.6854 Acc: 0.5467


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 22.2941 Acc: 0.2354
Validation improved (0.1947 → 0.2354)

Epoch 3/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 5.8688 Acc: 0.6508


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 25.2038 Acc: 0.2417
Validation improved (0.2354 → 0.2417)

Epoch 4/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 5.1169 Acc: 0.6995


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 26.7546 Acc: 0.2468
Validation improved (0.2417 → 0.2468)

Epoch 5/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.8485 Acc: 0.7324


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 29.5831 Acc: 0.2392
No improvement for 1 epoch(s).

Epoch 6/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.5614 Acc: 0.7487


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 32.7901 Acc: 0.2494
Validation improved (0.2468 → 0.2494)

Epoch 7/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.2745 Acc: 0.7773


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 35.0556 Acc: 0.2684
Validation improved (0.2494 → 0.2684)

Epoch 8/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.4411 Acc: 0.7842


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 35.8627 Acc: 0.2481
No improvement for 1 epoch(s).

Epoch 9/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.1602 Acc: 0.7989


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 36.9924 Acc: 0.2748
Validation improved (0.2684 → 0.2748)

Epoch 10/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.1445 Acc: 0.8054


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 38.3315 Acc: 0.2748
No improvement for 1 epoch(s).

Epoch 11/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.1847 Acc: 0.8125


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 41.2578 Acc: 0.2595
No improvement for 2 epoch(s).

Epoch 12/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.2177 Acc: 0.8151


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 43.2808 Acc: 0.2621
No improvement for 3 epoch(s).

Epoch 13/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.1622 Acc: 0.8245


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 45.7961 Acc: 0.2723
No improvement for 4 epoch(s).

Epoch 14/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.0491 Acc: 0.8307


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 47.3449 Acc: 0.2621
No improvement for 5 epoch(s).

Epoch 15/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.0058 Acc: 0.8381


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 48.8797 Acc: 0.2748
No improvement for 6 epoch(s).

Epoch 16/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.0482 Acc: 0.8419


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 50.9665 Acc: 0.2634
No improvement for 7 epoch(s).

Epoch 17/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.1341 Acc: 0.8407


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 48.9070 Acc: 0.2659
No improvement for 8 epoch(s).

Epoch 18/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.28it/s]


train Loss: 4.0648 Acc: 0.8499


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 52.2020 Acc: 0.2735
No improvement for 9 epoch(s).

Epoch 19/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.0663 Acc: 0.8494


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 54.7500 Acc: 0.2646
No improvement for 10 epoch(s).

Epoch 20/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 3.9553 Acc: 0.8588


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 56.4520 Acc: 0.2748
No improvement for 11 epoch(s).

Epoch 21/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 4.0703 Acc: 0.8566


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 58.6838 Acc: 0.2774
Validation improved (0.2748 → 0.2774)

Epoch 22/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 3.8642 Acc: 0.8607


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 56.8934 Acc: 0.2837
Validation improved (0.2774 → 0.2837)

Epoch 23/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 3.9583 Acc: 0.8667


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 59.4660 Acc: 0.2799
No improvement for 1 epoch(s).

Epoch 24/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 3.9875 Acc: 0.8627


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 59.3680 Acc: 0.2913
Validation improved (0.2837 → 0.2913)

Epoch 25/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 3.8263 Acc: 0.8690


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 62.9398 Acc: 0.2863
No improvement for 1 epoch(s).

Epoch 26/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 3.9548 Acc: 0.8670


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 62.4745 Acc: 0.2697
No improvement for 2 epoch(s).

Epoch 27/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 3.8496 Acc: 0.8721


100%|██████████| 13/13 [00:05<00:00,  2.39it/s]


val Loss: 66.4044 Acc: 0.2850
No improvement for 3 epoch(s).

Epoch 28/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 3.9701 Acc: 0.8743


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 67.4483 Acc: 0.2595
No improvement for 4 epoch(s).

Epoch 29/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.28it/s]


train Loss: 4.2590 Acc: 0.8651


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 64.9944 Acc: 0.2850
No improvement for 5 epoch(s).

Epoch 30/30
----------


100%|██████████| 313/313 [02:17<00:00,  2.27it/s]


train Loss: 3.6547 Acc: 0.8825


100%|██████████| 13/13 [00:05<00:00,  2.40it/s]


val Loss: 68.1277 Acc: 0.2672
No improvement for 6 epoch(s).

Training complete in 71m 32s
Best val Acc: 0.2913
Seed 6: model and history saved.
