In [1]:
import os, random, copy, math
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR

from sklearn.model_selection import train_test_split
from torchvision import transforms
from torchvision.models import resnet50, ResNet50_Weights

In [2]:
# 1.  Dataset definitions
class MultiClassImageDataset(Dataset):
    def __init__(self, df, img_dir, transform):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(self.img_dir, row['image'])).convert('RGB')
        if self.transform: img = self.transform(img)
        return img, row['superclass_index'], row['subclass_index']

class ImageOnlyDataset(Dataset):
    def __init__(self, img_dir, transform):
        self.img_dir = img_dir
        self.transform = transform
        self.names = sorted(os.listdir(img_dir))

    def __len__(self): return len(self.names)

    def __getitem__(self, idx):
        img = Image.open(os.path.join(self.img_dir, self.names[idx])).convert('RGB')
        if self.transform: img = self.transform(img)
        return img, self.names[idx]

In [3]:
# 2.  Data split with held-out novel sub-classes
FULL_CSV  = 'train_data.csv'
IMG_DIR   = 'train_shuffle'
TEST_DIR  = 'test_shuffle'
BATCH_SZ  = 64
NUM_WORK  = 4

full_df = pd.read_csv(FULL_CSV)

# pick 5 sub-classes to simulate novelty
novel_ids = [7, 9, 14, 18, 43]
val_novel_df = full_df[full_df['subclass_index'].isin(novel_ids)]
seen_df      = full_df[~full_df['subclass_index'].isin(novel_ids)]

train_df, val_df = train_test_split(
    seen_df, test_size=0.10, stratify=seen_df['superclass_index'],
    random_state=42
)

print(f'Train {len(train_df)}, Val {len(val_df)}, Val-Novel {len(val_novel_df)}')

Train 5300, Val 589, Val-Novel 399


In [4]:
# 3.  Transforms  +  MixUp helper
IM_MU, IM_SIG = (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)

train_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224, scale=(0.7,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.25,0.25,0.25,0.1),
    transforms.ToTensor(),
    transforms.Normalize(IM_MU, IM_SIG),
])

test_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(IM_MU, IM_SIG),
])

def mixup_data(x, y1, y2, alpha=1.0):
    if alpha <= 0.0: return x, y1, y2, 1.0
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size, device=x.device)
    mixed_x = lam * x + (1 - lam) * x[index]
    y1_a, y1_b = y1, y1[index]
    y2_a, y2_b = y2, y2[index]
    return mixed_x, (y1_a, y1_b, y2_a, y2_b), lam

def mixup_criterion(crit, pred, targets, lam):
    sup_out, sub_out = pred
    y1a,y1b,y2a,y2b = targets
    return lam * (crit(sup_out, y1a)+crit(sub_out, y2a)) + \
           (1-lam)*(crit(sup_out, y1b)+crit(sub_out, y2b))

In [8]:
# 4.  Model
class ResNet50Multi(nn.Module):
    def __init__(self, n_super=3, n_sub=88, pretrained=True):
        super().__init__()
        weights = ResNet50_Weights.IMAGENET1K_V2 if pretrained else None
        self.backbone = resnet50(weights=weights)
        in_feats = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()
        self.shared = nn.Sequential(nn.Linear(in_feats,512), nn.ReLU())
        self.super_head = nn.Linear(512, n_super)
        self.sub_head   = nn.Linear(512, n_sub)

    def forward(self, x, return_feat=False):
        feat = self.shared(self.backbone(x))
        if return_feat: return feat
        return self.super_head(feat), self.sub_head(feat)

In [5]:
# !unzip -q test_shuffle.zip
# !unzip -q train_shuffle.zip

In [6]:
# 5.  Dataloaders
train_ds = MultiClassImageDataset(train_df, IMG_DIR, train_tf)
val_ds   = MultiClassImageDataset(val_df,   IMG_DIR, test_tf)
novel_ds = MultiClassImageDataset(val_novel_df, IMG_DIR, test_tf)
test_ds  = ImageOnlyDataset(TEST_DIR, test_tf)

train_ld = DataLoader(train_ds,  BATCH_SZ, shuffle=True,  num_workers=NUM_WORK)
val_ld   = DataLoader(val_ds,    BATCH_SZ, shuffle=False, num_workers=NUM_WORK)
novel_ld = DataLoader(novel_ds,  BATCH_SZ, shuffle=False, num_workers=NUM_WORK)
test_ld  = DataLoader(test_ds,   1,        shuffle=False)



In [14]:
# 6.  Training loop with two phases
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = ResNet50Multi().to(device)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

for p in model.backbone.parameters():
    if not p.requires_grad: continue
    p.requires_grad = False
for n, p in model.backbone.named_parameters():
    if n.startswith('layer4'): p.requires_grad = True

opt = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=1e-4)

def run_epoch(phase):
    is_train = phase=='train'
    model.train(is_train)
    loader = train_ld if is_train else val_ld
    tot, correct_s, correct_sub, loss_sum = 0,0,0,0
    for x,sup,sub in loader:
        x,sup,sub = x.to(device),sup.to(device),sub.to(device)
        if is_train:
            x, targets, lam = mixup_data(x,sup,sub,alpha=0.4)
            opt.zero_grad()
            out = model(x)
            loss = mixup_criterion(criterion,out,targets,lam)
            loss.backward()
            opt.step()
        else:
            sup_out, sub_out = model(x)
            loss = criterion(sup_out,sup)+criterion(sub_out,sub)
        loss_sum += loss.item()
        with torch.no_grad():
            sup_p = model(x)[0].argmax(1)
            sub_p = model(x)[1].argmax(1)
            correct_s += (sup_p==sup).sum().item()
            correct_sub += (sub_p==sub).sum().item()
            tot += sup.size(0)
    return loss_sum/len(loader), 100*correct_s/tot, 100*correct_sub/tot

print('Phase 1: train heads + layer4 (5 epochs)')
for ep in range(5):
    tr = run_epoch('train')
    vl = run_epoch('val')
    print(f'E{ep+1}: TrainLoss {tr[0]:.3f} | ValSup {vl[1]:.2f}%  ValSub {vl[2]:.2f}%')

# full-fine-tune phase
for p in model.parameters(): p.requires_grad = True
opt = optim.AdamW(model.parameters(), lr=3e-5, weight_decay=1e-4)
sched = CosineAnnealingLR(opt, T_max=15)

print('Phase 2: full fine-tune + cosine LR (15 epochs)')
best_loss, best_state = 1e9, None
for ep in range(15):
    tr = run_epoch('train')
    vl = run_epoch('val')
    sched.step()
    print(f'E{ep+6}: TrainLoss {tr[0]:.3f} | ValSup {vl[1]:.2f}%  ValSub {vl[2]:.2f}%')
    if vl[0] < best_loss:
        best_loss, best_state = vl[0], copy.deepcopy(model.state_dict())

model.load_state_dict(best_state)
model.eval()

In [20]:
# 7.  Build prototypes & choose novelty threshold θ
print('Building prototypes and calibrating θ')
proto = {}
with torch.no_grad():
    for x,_,sub in DataLoader(train_ds, BATCH_SZ):
        feat = model(x.to(device), return_feat=True).cpu()
        for f, idx in zip(feat, sub):
            idx=int(idx); proto.setdefault(idx,[]).append(f)
proto = {k: torch.stack(v).mean(0) for k,v in proto.items()}

def min_cos_dist(feat):
    """feat: (B,512) → (B,) minimum cosine distance to any prototype"""
    dists = [1-F.cosine_similarity(feat, p.to(feat.device), dim=1)
             for p in proto.values()]
    return torch.stack(dists).min(0)[0]

# collect distances on seen and novel validation sets
seen_d, novel_d = [],[]
with torch.no_grad():
    for ld,bucket in [(val_ld, seen_d), (novel_ld, novel_d)]:
        for x,_,_ in ld:
            d = min_cos_dist(model(x.to(device),return_feat=True))
            bucket.extend(d.cpu())

seen_d, novel_d = np.array(seen_d), np.array(novel_d)
# theta = 0.5
theta = np.percentile(seen_d, 99)
print(f'Chosen θ = {theta:.3f}   (median novel distance = {np.median(novel_d):.3f})')

==> Building prototypes and calibrating θ …




Chosen θ = 0.500   (median novel distance = 0.056)


In [22]:
# 8.  Inference on hidden test set
print('Running inference on test set …')
# theta = 0.99
rows=[]
with torch.no_grad():
    for x,name in test_ld:
        x=x.to(device)
        feat = model(x, return_feat=True)
        sup_out, sub_out = model(x)
        sup_idx = sup_out.argmax(1).item()
        sub_idx = sub_out.argmax(1).item()
        if min_cos_dist(feat).item() > theta:
            sub_idx = 87
        rows.append((name, sup_idx, sub_idx))

subm = pd.DataFrame(rows, columns=['image','superclass_index','subclass_index'])
subm.to_csv('submission.csv', index=False)
print('Saved submission.csv with', len(subm), 'rows.')

==> Running inference on test set …
Saved submission.csv with 11180 rows.
