In [1]:
!pip install -q timm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler 
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torch.optim.swa_utils import AveragedModel 
from tqdm.notebook import tqdm
import timm
import timm.utils # For ModelEmaV2
import pandas as pd
import numpy as np
import zipfile
import io
from PIL import Image
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} with timm version {timm.__version__}")



Using cuda with timm version 1.0.20


In [2]:
MODEL_NAME = 'eva02_base_patch14_448.mim_in22k_ft_in1k' # High performance EVA-02
IMG_SIZE = 448
BATCH_SIZE = 16  # Small batch size due to huge 448px resolution
GRAD_ACCUM = 4  # Gradient Accumulation to simulate Batch Size = 32
EPOCHS = 6

# 1. Custom Loss: Focal + BCE (Binary Cross Entropy)
class FocalLossBCE(nn.Module):
    def __init__(self, alpha=0.25, gamma=2):
        super().__init__()
        self.bce = nn.BCEWithLogitsLoss(reduction='none')
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, pred, target):
        bce_loss = self.bce(pred, target)
        probas = torch.sigmoid(pred)
        pt = torch.where(target == 1, probas, 1 - probas)
        # Focal term: (1 - pt)^gamma
        focal_loss = self.alpha * (1 - pt).pow(self.gamma) * bce_loss
        return focal_loss.mean()

# 2. Build Model & LLRD Optimizer
def build_model_optimizer():
    # Load EVA-02 Model
    print(f"Loading {MODEL_NAME}...")
    model = timm.create_model(MODEL_NAME, pretrained=True, num_classes=5)
    model = model.to(device)
    
    # Layer-wise Learning Rate Decay (LLRD)
    # We assign lower LR to early layers (backbone) and higher LR to the head
    param_groups = [
        {'params': model.patch_embed.parameters(), 'lr': 1e-5}, # Early layers: Low LR
        {'params': model.blocks.parameters(), 'lr': 5e-5},      # Middle layers: Med LR
        {'params': model.head.parameters(), 'lr': 3e-4}         # Head: High LR
    ]
    
    optimizer = optim.AdamW(param_groups, weight_decay=0.05)
    
    # Exponential Moving Average (EMA) setup
    # Keeps a smoother copy of the model to stabilize training
    model_ema = timm.utils.ModelEmaV2(model, decay=0.999)
    
    return model, optimizer, model_ema

# 3. Strong Augmentations (Geometric + Photometric)
train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.8, 1.2)), # Geometric
    transforms.ColorJitter(brightness=0.2, contrast=0.2), # Photometric
    transforms.ToTensor(),
    transforms.Normalize([0.481, 0.457, 0.408], [0.268, 0.261, 0.275]) # CLIP/EVA Stats
])

val_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.481, 0.457, 0.408], [0.268, 0.261, 0.275])
])

In [3]:
class ZipDataset(Dataset):
    def __init__(self, zip_path, csv_df, transform=None):
        self.zip_path = zip_path
        self.labels = csv_df
        self.transform = transform
        # Create map: 'Lung Opacity' -> 0, 'Cardiomegaly' -> 1, etc.
        self.cls_map = {name: i for i, name in enumerate(sorted(csv_df['label'].unique()))}
        self.num_classes = len(self.cls_map)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        row = self.labels.iloc[idx]
        img_name = str(row['Image_name'])
        
        # Read directly from zip
        with zipfile.ZipFile(self.zip_path, 'r') as zf:
            with zf.open(img_name) as f:
                image = Image.open(io.BytesIO(f.read())).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        return image, self.cls_map[row['label']]

In [None]:
# Re-initialize Dataset (Using ZipDataset)
ZIP_PATH = '/kaggle/input/train-val-test-split/train_val_data.zip'
CSV_PATH = '/kaggle/input/train-val-test-split/train_val.csv'

# Load metadata & recreate label column
df = pd.read_csv(CSV_PATH)
pathology_cols = ['Pneumothorax', 'Cardiomegaly', 'Lung Opacity', 'Pleural Effusion', 'Support Devices']
df['label'] = df[pathology_cols].idxmax(axis=1)

# Split
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.15, stratify=df['label'], random_state=42)

# Loaders
train_ds = ZipDataset(ZIP_PATH, train_df, transform=train_transforms)
val_ds = ZipDataset(ZIP_PATH, val_df, transform=val_transforms)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
# We use one-hot encoding for BCE Loss
train_ds.num_classes = 5

# Initialize Everything
model, optimizer, model_ema = build_model_optimizer()
criterion = FocalLossBCE()
scaler = GradScaler() # For Mixed Precision
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

print(f"Starting EVA-X Training for {EPOCHS} Epochs...")

for epoch in range(EPOCHS):
    model.train()
    optimizer.zero_grad()
    train_loss = 0
    
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
    for step, (images, labels) in enumerate(pbar):
        images, labels = images.to(device), labels.to(device)
        
        # Convert integer labels to One-Hot for BCE
        labels_one_hot = torch.nn.functional.one_hot(labels, num_classes=5).float()
        
        # 1. Mixed Precision Forward Pass
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels_one_hot) / GRAD_ACCUM
            
        # 2. Backward & Scaler Step
        scaler.scale(loss).backward()
        
        # Gradient Accumulation Step
        if (step + 1) % GRAD_ACCUM == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            model_ema.update(model) # Update EMA model
            
        train_loss += loss.item() * GRAD_ACCUM
        pbar.set_postfix({'loss': f"{loss.item() * GRAD_ACCUM:.4f}"})
        
    scheduler.step()
    
    # Save Checkpoint for Epoch 5 and 6
    if epoch >= 4: # Epochs are 0-indexed, so 4 is epoch 5
        torch.save(model_ema.module.state_dict(), f'eva_epoch_{epoch+1}.pth')
        print(f"Saved Checkpoint: eva_epoch_{epoch+1}.pth")

print("Training Complete.")

Loading eva02_base_patch14_448.mim_in22k_ft_in1k...
Starting EVA-X Training for 6 Epochs...


  scaler = GradScaler() # For Mixed Precision


Epoch 1/6:   0%|          | 0/563 [00:00<?, ?it/s]

  with autocast():


In [None]:
print("Averaging Checkpoints (Epoch 5 + 6)...")

# 1. Load the two checkpoints
state_dict_5 = torch.load('eva_epoch_5.pth', map_location=device)
state_dict_6 = torch.load('eva_epoch_6.pth', map_location=device)

# 2. Average the weights
avg_state_dict = {}
for key in state_dict_5.keys():
    # Simple mathematical average of the tensors
    avg_state_dict[key] = (state_dict_5[key] + state_dict_6[key]) / 2.0

# 3. Load into model and save final
model.load_state_dict(avg_state_dict)
torch.save(model.state_dict(), 'eva_x_final_soup.pth')

print("Final Averaged Model Saved: eva_x_final_soup.pth")