In [None]:
#!pip install -q clearml optuna torchmetrics

In [None]:
import os

from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
os.environ['CLEARML_API_ACCESS_KEY'] = user_secrets.get_secret("CLEARML_API_ACCESS_KEY")
os.environ['CLEARML_API_SECRET_KEY'] = user_secrets.get_secret("CLEARML_API_SECRET_KEY")

In [42]:
%env CLEARML_WEB_HOST=https://app.clear.ml/
%env CLEARML_API_HOST=https://api.clear.ml
%env CLEARML_FILES_HOST=https://files.clear.ml

env: CLEARML_WEB_HOST=https://app.clear.ml/
env: CLEARML_API_HOST=https://api.clear.ml
env: CLEARML_FILES_HOST=https://files.clear.ml


In [None]:
import glob
import random
import xml.etree.ElementTree as ET
from types import SimpleNamespace

import numpy as np
import optuna
import torch
import torch.nn as nn
from clearml import Task
from optuna.exceptions import TrialPruned
from PIL import Image
from torch.optim import SGD
from torch.optim.lr_scheduler import OneCycleLR
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torchvision.models import ResNet50_Weights
from torchvision.models.detection import RetinaNet_ResNet50_FPN_Weights, retinanet_resnet50_fpn
from torchvision.transforms import ColorJitter, Compose, InterpolationMode, Normalize, RandomHorizontalFlip, ToTensor
from torchvision.transforms import functional as F
from tqdm import tqdm


In [None]:
import random
import numpy as np
import torch
os.environ["PYTHONHASHSEED"] = str(42)

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [44]:
args = SimpleNamespace(
    voc_root="/kaggle/input/pascal-voc-2012/VOC2012",
    output_dir="/kaggle/working/optuna_outputs",
    train_set="train",
    val_set='val',
    epochs=6,
    batch_size=4,
    momentum=0.9,
    min_short_size=400,
    max_short_size=800,
    max_long_size=1333,
    val_short_size=600,
    val_long_size=1000,
    workers=2,
    trainable_backbone_layers=3,
    resume_checkpoint=None,
    n_trials=10,
    study_name="retina_optuna"
)
os.makedirs(args.output_dir, exist_ok=True)

task = Task.init(
    project_name="CMT318-Object-Detection",
    task_name="RetinaNet_Optuna_Tuning",
    output_uri=None,
    tags=["retinanet","voc2012","kaggle","author:hussain","account:kiran","hyperparameter-tuning"]
)
task.connect(vars(args))
logger = task.get_logger()

Jupyter Notebook auto-logging failed, could not access: /kaggle/working/__notebook_source__.ipynb


ClearML Task: created new task id=da8c7f696aef4279887bddd977577201
ClearML results page: https://app.clear.ml/projects/31ab205b5fdb489d9ad1b4ed44a65563/experiments/da8c7f696aef4279887bddd977577201/output/log


In [45]:
class IdentityTransform(nn.Module):
    def forward(self, images, targets):
        return images, targets

CLASS_NAMES = [
    'aeroplane','bicycle','bird','boat','bottle',
    'bus','car','cat','chair','cow',
    'diningtable','dog','horse','motorbike','person',
    'pottedplant','sheep','sofa','train','tvmonitor'
]
CLASS_NAME_TO_IDX = {name: i+1 for i,name in enumerate(CLASS_NAMES)}
NUM_CLASSES = len(CLASS_NAMES) + 1

class VOCDataset(Dataset):
    def __init__(self, root, image_set, transforms=None):
        self.root = root
        ids_file = os.path.join(root, 'ImageSets', 'Main', f"{image_set}.txt")
        with open(ids_file) as f:
            img_ids = [l.strip() for l in f if l.strip()]
        self.annotations = []
        for img_id in img_ids:
            xml_path = os.path.join(root, 'Annotations', f"{img_id}.xml")
            tree = ET.parse(xml_path)
            boxes, labels = [], []
            for obj in tree.getroot().findall('object'):
                cls = obj.find('name').text
                idx = CLASS_NAME_TO_IDX.get(cls)
                if idx is None:
                    continue
                b = obj.find('bndbox')
                coords = [float(b.find('xmin').text), float(b.find('ymin').text),
                          float(b.find('xmax').text), float(b.find('ymax').text)]
                if coords[2] <= coords[0] or coords[3] <= coords[1]:
                    # skip degenerate
                    continue
                boxes.append(coords)
                labels.append(idx)
            if not labels:
                continue
            self.annotations.append({
                'id': img_id,
                'boxes': torch.tensor(boxes, dtype=torch.float32),
                'labels': torch.tensor(labels, dtype=torch.int64)
            })
        assert self.annotations, f"No annotations for {image_set}!"
        self.transforms = transforms
    def __len__(self):
        return len(self.annotations)
    def __getitem__(self, idx):
        ann = self.annotations[idx]
        img = Image.open(os.path.join(self.root, 'JPEGImages', f"{ann['id']}.jpg")).convert('RGB')
        target = {'boxes': ann['boxes'].clone(), 'labels': ann['labels'].clone()}
        if self.transforms:
            img, target = self.transforms(img, target)
        return img, target

mean=[0.485,0.456,0.406]; std=[0.229,0.224,0.225]
train_color_aug = Compose([ColorJitter(0.3,0.3,0.3,0.1)])

def get_train_transform(min_s, max_s_short, max_l_long, mean, std):
    def transform(img, target):
        w,h = img.size
        short = random.randint(min_s, max_s_short)
        if w<h:
            new_w,new_h = short, int(short*h/w)
        else:
            new_h,new_w = short, int(short*w/h)
        long_dim = max(new_w,new_h)
        if long_dim>max_l_long:
            scale = max_l_long/long_dim
            new_w, new_h = int(new_w*scale), int(new_h*scale)
        img = F.resize(img, [new_h,new_w], interpolation=InterpolationMode.BILINEAR)
        sx,sy = new_w/w, new_h/h

        boxes = target['boxes'].clone()
        boxes[:,[0,2]] *= sx; boxes[:,[1,3]] *= sy

        img = train_color_aug(img)
        if random.random()<0.5:
            img = F.hflip(img)
            x1,x2 = boxes[:,0].clone(), boxes[:,2].clone()
            boxes[:,0] = new_w - x2; boxes[:,2] = new_w - x1

        boxes[:,[0,2]].clamp_(0,new_w)
        boxes[:,[1,3]].clamp_(0,new_h)
        keep = (boxes[:,2] - boxes[:,0] > 0) & (boxes[:,3] - boxes[:,1] > 0)
        boxes = boxes[keep]
        target['labels'] = target['labels'][keep]

        target['boxes'] = boxes
        img = ToTensor()(img)
        img = Normalize(mean,std)(img)
        return img, target
    return transform


def get_val_transform(val_s, max_l_long, mean, std):
    def transform(img,target):
        w,h = img.size
        if w<h:
            new_w,new_h = val_s, int(val_s*h/w)
        else:
            new_h,new_w = val_s, int(val_s*w/h)
        long_dim = max(new_w,new_h)
        if long_dim>max_l_long:
            scale = max_l_long/long_dim
            new_w,new_h = int(new_w*scale), int(new_h*scale)
        img = F.resize(img,[new_h,new_w], interpolation=InterpolationMode.BILINEAR)
        sx,sy = new_w/w, new_h/h
        boxes = target['boxes'].clone()
        boxes[:,[0,2]]*=sx; boxes[:,[1,3]]*=sy
        boxes[:,[0,2]].clamp_(0,new_w); boxes[:,[1,3]].clamp_(0,new_h)
        keep = (boxes[:,2] - boxes[:,0] > 0) & (boxes[:,3] - boxes[:,1] > 0)
        boxes = boxes[keep]
        target['labels'] = target['labels'][keep]
        target['boxes'] = boxes
        img = ToTensor()(img)
        img = Normalize(mean,std)(img)
        return img, target
    return transform

def collate_fn(batch): return tuple(zip(*batch))

@torch.no_grad()
def evaluate_map(model, loader, device):
    model.eval()
    metric = MeanAveragePrecision(box_format='xyxy')
    for imgs, tars in loader:
        imgs = [i.to(device) for i in imgs]
        outs = model(imgs)
        preds = []
        for o in outs:
            preds.append({k: v.cpu() for k,v in o.items() if k in ['boxes','scores','labels']})
        metric.update(preds, tars)
    return metric.compute()

def train_one_epoch(model, optimizer, loader, device, scheduler, epoch, scaler):
    model.train()
    total, cnt = 0.0, 0
    for imgs,tars in tqdm(loader, desc=f"Train Ep{epoch}", leave=False):
        imgs = [i.to(device) for i in imgs]
        tars = [{k:v.to(device) for k,v in t.items()} for t in tars]
        optimizer.zero_grad()
        with torch.amp.autocast('cuda' if torch.cuda.is_available() else 'cpu'):
            losses = model(imgs, tars)
            loss = sum(losses.values())
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        total += loss.item(); cnt += 1
    avg = total/cnt
    lr = scheduler.get_last_lr()[0]
    logger.report_scalar("Loss","train",iteration=epoch,value=avg)
    logger.report_scalar("LearningRate","train",iteration=epoch,value=lr)
    print(f"Epoch {epoch} done, lr={lr:.6f}, avg_loss={avg:.4f}")

train_transform = get_train_transform(
    args.min_short_size,
    args.max_short_size,
    args.max_long_size,
    mean, std
)

val_transform = get_val_transform(
    args.val_short_size,
    args.val_long_size,
    mean, std
)

train_ds = VOCDataset(args.voc_root, args.train_set, transforms=train_transform)
val_ds = VOCDataset(args.voc_root, args.val_set, transforms=val_transform)

weights=[]
counts={i:0 for i in range(1,NUM_CLASSES)}
for ann in train_ds.annotations:
    for lbl in ann['labels'].tolist(): counts[lbl]+=1
for k,v in counts.items(): counts[k] = v or 1
for ann in train_ds.annotations:
    w = sum(1.0/counts[lbl] for lbl in ann['labels'].tolist())/len(ann['labels'])
    weights.append(w)

def objective(trial):
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    wd = trial.suggest_float('weight_decay', 1e-5, 1e-3, log=True)
    task.connect({'lr':lr,'weight_decay':wd})

    train_loader = DataLoader(
        train_ds, batch_size=args.batch_size,
        sampler=WeightedRandomSampler(weights, len(weights), True),
        num_workers=args.workers, pin_memory=torch.cuda.is_available(), collate_fn=collate_fn
    )
    val_loader = DataLoader(
        val_ds, batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=torch.cuda.is_available(), collate_fn=collate_fn
    )
    steps = len(train_loader)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = retinanet_resnet50_fpn(
        weights=None,
        weights_backbone=ResNet50_Weights.IMAGENET1K_V1,
        num_classes=NUM_CLASSES,
        trainable_backbone_layers=args.trainable_backbone_layers
    ).to(device)
    
    optimizer = SGD(model.parameters(), lr=lr, momentum=args.momentum, weight_decay=wd)
    scheduler = OneCycleLR(
        optimizer, max_lr=lr, epochs=args.epochs, steps_per_epoch=steps, pct_start=0.3, div_factor=25
    )
    scaler = torch.amp.GradScaler('cuda' if torch.cuda.is_available() else 'cpu')

    start_ep = 1
    if args.resume_checkpoint and os.path.exists(args.resume_checkpoint):
        ck = torch.load(args.resume_checkpoint)
        model.load_state_dict(ck['model_state_dict'])
        optimizer.load_state_dict(ck['optimizer_state_dict'])
        scheduler.load_state_dict(ck['scheduler_state_dict'])
        scaler.load_state_dict(ck.get('scaler_state_dict', {}))
        start_ep = ck.get('epoch', 1) + 1
        if hasattr(scheduler, 'total_steps'):
            expected = args.epochs * steps
            assert scheduler.total_steps == expected, \
                f"Scheduler total_steps {scheduler.total_steps} != expected {expected}"

    best_map = 0.0
    best_stats = None
    for ep in range(start_ep, args.epochs+1):
        train_one_epoch(model, optimizer, train_loader, device, scheduler, ep, scaler)
        mAPs = evaluate_map(model, val_loader, device)
        overall = mAPs['map'].item()
        trial.report(overall, ep)
        if trial.should_prune():
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'scaler_state_dict': scaler.state_dict(),
                'epoch': ep
            }, os.path.join(args.output_dir, f'resume_trial{trial.number}_ep{ep}.pth'))
            del model; torch.cuda.empty_cache()
            raise TrialPruned()
        for k,v in mAPs.items():
            logger.report_scalar('DetectionMetrics', k, iteration=ep, value=v.item())
        if overall > best_map:
            best_map, best_stats = overall, mAPs
            ckpt = {
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'scaler_state_dict': scaler.state_dict(),
                'epoch': ep
            }
            path = os.path.join(args.output_dir, f'trial{trial.number}_best_ep{ep}.pth')
            torch.save(ckpt, path)

    del model; torch.cuda.empty_cache()
    logger.report_text(f"Trial {trial.number} done: best_map={best_map:.4f}")
    logger.report_text(f"Best stats: {best_stats}")
    return best_map



In [None]:
study_storage = f"sqlite:///{os.path.join(args.output_dir,args.study_name)}.db"
study = optuna.create_study(direction='maximize', storage=study_storage, study_name=args.study_name, load_if_exists=True)
study.optimize(objective, n_trials=args.n_trials)

best = study.best_trial
print(f"Best lr={best.params['lr']:.4e}, wd={best.params['weight_decay']:.4e} -> mAP={best.value:.4f}")
logger.report_text(f"Study best: {best.params}, mAP={best.value:.4f}")

pattern = f"trial{best.number}_best_ep*.pth"
matches = glob.glob(os.path.join(args.output_dir,pattern))
if matches:
    ckpt = sorted(matches)[-1]
    dst = os.path.join(args.output_dir,'global_best_model.pth')
    torch.save(torch.load(ckpt), dst)
    task.upload_artifact(name='global_best_model.pth', artifact_object=dst)
    print(f"Global best model saved to {dst}")
with open(os.path.join(args.output_dir,'best_params.txt'),'w') as f:
    f.write(str(best.params))

task.close()

[I 2025-04-29 23:25:45,999] A new study created in RDB with name: retina_optuna
Train Ep1:   4%|▍         | 54/1430 [00:37<16:03,  1.43it/s]