In [None]:
import os
import time
import random
import numpy as np
import torch

# CONFIGURATION

CONFIG = {
    "seed": 42,
    "model_names": ["wide_resnet50_2", "resnext50_32x4d", "efficientnet_b0", "convnext_tiny"],
    "model_seeds": [42, 43, 44, 45],
    "batch": 128,
    "num_epochs": 120,
    "lr": 3e-4,
    "wd": 1e-4,
    "mix_alpha": 0.8,
    "lbl_smooth": 0.1,
    "classes": 10,
    "img_res": 224,
    "weights_dir": "/content/models",
    "out_dir": "/content/results"
}

os.makedirs(CONFIG["weights_dir"], exist_ok=True)
os.makedirs(CONFIG["out_dir"], exist_ok=True)

assert torch.cuda.is_available(), "CUDA is required."
device = torch.device("cuda")

# SEED CONTROL
def set_global_seed(s):
    random.seed(s)
    np.random.seed(s)
    torch.manual_seed(s)
    torch.cuda.manual_seed_all(s)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


set_global_seed(CONFIG["seed"])
print("Using GPU =>", torch.cuda.get_device_name(0))


GPU: Tesla T4


In [None]:
# IMPORTS

import timm
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, datasets
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, Subset
from torch.cuda.amp import autocast, GradScaler
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import confusion_matrix

In [None]:
# MIXUP
def mixup_inputs(x, y, strength):
    lam = np.random.beta(strength, strength)
    idx = torch.randperm(x.size(0), device=x.device)
    return lam * x + (1 - lam) * x[idx], y, y[idx], lam

def mixup_objective(criterion, outputs, y_a, y_b, lam):
    return lam * criterion(outputs, y_a) + (1 - lam) * criterion(outputs, y_b)

def compute_accuracy(logits, labels):
    return (logits.argmax(dim=1) == labels).float().mean().item()

# TRANSFORMS

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.Resize((CONFIG["image_size"], CONFIG["image_size"])),
    transforms.RandomHorizontalFlip(),
    transforms.RandAugment(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

test_transform = transforms.Compose([
    transforms.Resize((CONFIG["image_size"], CONFIG["image_size"])),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])


In [None]:
# DATASET
full_train = CIFAR10("./data", train=True, download=True, transform=train_transform)
test_set = CIFAR10("./data", train=False, download=True, transform=test_transform)

indices = list(range(50000))
random.shuffle(indices)

train_ids = indices[:40000]
val_ids = indices[40000:45000]

train_data = Subset(CIFAR10("./data", train=True, transform=train_transform), train_ids)
val_data = Subset(CIFAR10("./data", train=True, transform=test_transform), val_ids)

train_loader = DataLoader(train_data, batch_size=CONFIG["batch"], shuffle=True, num_workers=4)
val_loader = DataLoader(val_data, batch_size=CONFIG["batch"], shuffle=False, num_workers=4)
test_loader = DataLoader(test_set, batch_size=CONFIG["batch"], shuffle=False, num_workers=4)

print("Train:", len(train_data), "Val:", len(val_data), "Test:", len(test_set))

# MODEL BUILDER
def create_model(model_name):
    model = timm.create_model(model_name, pretrained=True, num_classes=CONFIG["num_classes"])
    return model.to(device)


In [None]:

# TRAINING
def train_loop(model, seed, name):
    apply_seed(seed)
    optimizer = optim.AdamW(model.parameters(), lr=CONFIG["learning_rate"], weight_decay=CONFIG["weight_decay"])
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, CONFIG["epochs"])
    criterion = nn.CrossEntropyLoss(label_smoothing=CONFIG["label_smoothing"])
    scaler = GradScaler()

    best_score = 0.0

    for epoch in range(CONFIG["epochs"]):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            mixed_x, y_a, y_b, lam = mixup_inputs(xb, yb, CONFIG["mixup_alpha"])

            optimizer.zero_grad()
            with autocast():
                preds = model(mixed_x)
                loss = mixup_objective(criterion, preds, y_a, y_b, lam)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

        model.eval()
        scores = []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                scores.append(compute_accuracy(model(xb), yb))

        val_acc = float(np.mean(scores))
        scheduler.step()

        if val_acc > best_score:
            best_score = val_acc
            torch.save(model.state_dict(), f"{CONFIG['model_dir']}/{name}.pth")

        print(name, "Epoch", epoch+1, "ValAcc", round(val_acc, 4))

    return best_score


In [None]:

# MULTI RUN
saved_models = []

for mname, s in zip(CONFIG["model_names"], CONFIG["run_seeds"]):
    mdl = create_model(mname)
    score = train_loop(mdl, s, mname)
    saved_models.append(mname)
    print(mname, "BEST VAL:", score)

print("CHECKPOINTS:", saved_models)
print(os.listdir(CONFIG["model_dir"]))

# ENSEMBLE
chosen = ["wide_resnet50_2"]

def get_test_probs(model):
    model.eval()
    batch_probs = []
    with torch.no_grad():
        for xb, _ in test_loader:
            xb = xb.to(device)
            batch_probs.append(F.softmax(model(xb), dim=1).cpu())
    return torch.cat(batch_probs, dim=0)

all_outputs = []
for name in chosen:
    mdl = create_model(name)
    mdl.load_state_dict(torch.load(f"{CONFIG['model_dir']}/{name}.pth"))
    all_outputs.append(get_test_probs(mdl))

ensemble_output = torch.stack(all_outputs).mean(dim=0)
ensemble_preds = ensemble_output.argmax(dim=1)
true_labels = torch.tensor(test_set.targets)
test_accuracy = (ensemble_preds == true_labels).float().mean().item()

print("ENSEMBLE TEST ACC:", test_accuracy)

cm = confusion_matrix(true_labels, ensemble_preds)
plt.imshow(cm)
plt.colorbar()
plt.show()

for c in range(10):
    print(c, cm[c, c] / cm[c].sum())


In [None]:

# SAVE RESULTS
df = pd.DataFrame({
    "index": range(len(true_labels)),
    "true": true_labels.numpy(),
    "pred": ensemble_preds.numpy(),
    "prob": ensemble_output.max(dim=1).values.numpy()
})

df.to_csv(f"{CONFIG['results_dir']}/predictions.csv", index=False)

with open("requirements.txt", "w") as f:
    f.write("\n".join([
        "torch",
        "torchvision",
        "timm",
        "numpy",
        "pandas",
        "scikit-learn",
        "matplotlib",
        "tqdm"
    ]))

print("Saved outputs")