In [1]:
import os
import time
import copy
from tqdm import tqdm
from pathlib import Path
import pickle
import random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms

from torch.utils.data import Dataset, DataLoader, random_split

In [2]:
num_classes = 200
#model_name = "efficientnetV2s" 
#model_path = "finetuned_models/efficientnetV2s_nofreeze_aug.pth"

curr_seed = 0
model_name = "MODERNRES" 
model_path = f"Test/MODERNRES_{curr_seed}.pth"

#model_name = "ensemble" 
#model_path = "Test/ensemble.pth"

Define models

In [3]:
class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super().__init__()
        self.fc1 = nn.Linear(channels, channels // reduction)
        self.fc2 = nn.Linear(channels // reduction, channels)
        self.activation = nn.ReLU()  # <- changed back from SiLU
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        b, c, h, w = x.size()
        # Squeeze: global average pooling
        y = x.mean(dim=(2, 3))           # (B, C)
        # Excitation: MLP
        y = self.fc2(self.activation(self.fc1(y)))  # (B, C)
        y = self.sigmoid(y).view(b, c, 1, 1)
        # Scale: multiply original feature map
        return x * y


In [4]:
class ResidualBlock(nn.Module):
    def __init__(self, in_ch, out_ch, use_se=False, use_pool=False):
        super().__init__()
        self.use_se = use_se
        self.use_pool = use_pool

        self.conv1 = nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_ch)
        self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_ch)

        self.shortcut = nn.Identity() if in_ch == out_ch else nn.Conv2d(in_ch, out_ch, 1, bias=False)
        self.act = nn.SiLU(inplace=True)
        if use_pool:
            self.pool = nn.MaxPool2d(2)
        if use_se:
            self.se = SEBlock(out_ch)

    def forward(self, x):
        identity = self.shortcut(x)
        out = self.act(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity

        if self.use_se:
            out = self.se(out)

        out = self.act(out)
        if self.use_pool:
            out = self.pool(out)
        return out


In [5]:
class MODERNRES(nn.Module):
    def __init__(self, num_classes=200):
        super().__init__()

        # Stage-level residual blocks
        self.stage1 = ResidualBlock(3, 32, use_se=True)
        self.stage2 = ResidualBlock(32, 64, use_se=True)
        self.stage3 = ResidualBlock(64, 96, use_se=True)
        self.stage4 = ResidualBlock(96, 128, use_se=True)
        self.stage5 = ResidualBlock(128, 160, use_se=True)

        # Classifier
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(p=0.4)
        self.fc = nn.Linear(160, num_classes)

    def forward(self, x):
        x = (x - 0.5) * 2.0  # normalize to [-1, 1]

        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.stage4(x)
        x = self.stage5(x)

        x = self.gap(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        logits = self.fc(x)
        return logits

In [6]:
class EnsembleModel(nn.Module):
    def __init__(self, modelList):
        super().__init__()
        self.models = nn.ModuleList(modelList)
        self.classifier = nn.Linear(200 * len(modelList), 200)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        outputs = []
        for model in self.models:
            outputs.append(model(x))

        x_cat = torch.cat(outputs, dim=1)
        x_cat = self.dropout(x_cat)
        out = self.classifier(x_cat)
        return out

Code used for loading and predicting test

In [7]:
class CSVDataset(Dataset):
    def __init__(self, csv_file, base_dir, transform=None, return_id=False):
        self.df = pd.read_csv(csv_file)
        self.base_dir = base_dir
        self.transform = transform
        self.return_id = return_id  # Useful for test set where no labels exist

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # extract fields
        img_id = row['id'] if self.return_id else None
        relative_path = row['image_path'].lstrip('/')  # safe
        label = row['label'] - 1   # shift to 0-based indexing

        # build full path
        img_path = os.path.join(self.base_dir, relative_path)

        # load
        image = Image.open(img_path).convert('RGB')

        # transform
        if self.transform:
            image = self.transform(image)

        # optionally return id
        if self.return_id:
            return image, label, img_id

        return image, label

In [8]:
def predict(model, test_loader, device="cuda"):
    model.eval()
    preds_list = []
    ids_list = []

    with torch.no_grad():
        for inputs, labels, img_ids in tqdm(test_loader):
            inputs = inputs.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            #Re-add 1 to label to give right predictions
            preds_list.extend(preds.cpu().numpy()+1)
            ids_list.extend(img_ids.numpy())

    return ids_list, preds_list

In [9]:
torch.cuda.empty_cache()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

Genererate submission files

In [10]:
if model_name == "efficientnetV2s":
  """EfficientnetV2s"""
  efficientnet_v2_s_weights = models.EfficientNet_V2_S_Weights.DEFAULT
  model_transforms = efficientnet_v2_s_weights.transforms()
  finetuned_model = models.efficientnet_v2_s()
  num_ftrs = finetuned_model.classifier[1].in_features
  finetuned_model.classifier[1] = nn.Linear(num_ftrs, num_classes)
  finetuned_model.num_classes = num_classes
  finetuned_model.load_state_dict(torch.load(model_path))
  finetuned_model.to(device)
elif model_name == "MODERNRES":
  """Our model"""
  transformations = transforms.Compose([
  transforms.ToTensor(),
  transforms.Resize((size)),
  transforms.Normalize(mean = (0.5,0.5,0.5), std = (0.5,0.5,0.5))
  ])
  model_transform = transformations
  finetuned_model = MODERNRES(num_classes=num_classes)
  finetuned_model.load_state_dict(torch.load(model_path))
  finetuned_model.to(device)
elif model_name == "ensemble":
  """Ensemble of 7 MODERNRES models"""
  modelList = []
  model_path_base = "Test/MODERNRES"
  for i in range(7):
      model = MODERNRES(num_classes=num_classes)
      model_path_i = model_path_base + f'_{i+1}.pth'
      model.load_state_dict(torch.load(model_path_i))
      model.to(device)
      modelList.append(model)
  finetuned_model = EnsembleModel(modelList)
  finetuned_model.load_state_dict(torch.load(model_path))
  finetuned_model.to(device)

  finetuned_model.load_state_dict(torch.load(model_path))


In [11]:
#Get Test set
dirpath = "aml-2025-feathers-in-focus"
test_dataset = CSVDataset(
    csv_file= "aml-2025-feathers-in-focus/test_images_path.csv",
    base_dir= dirpath ,
    transform = model_transforms,
    return_id=True
)
test_image_ids = test_dataset.df['id'].tolist()
#Create dataloader
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

NameError: name 'model_transforms' is not defined

In [None]:
# Run ensemble prediction
test_ids, test_preds = predict(finetuned_model, test_loader, device=device)

In [None]:
# Generate submission.csv for ensemble
submission = pd.DataFrame({
    "id": test_ids,
    "label": test_preds
})

submission.to_csv(f"submissions/{model_name}.csv", index=False)