# **Football Player Segmentation**

In [4]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

## I. Pre-processing

### I.1. Dataset class

In [None]:
import os
import json
import numpy as np
import torch
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as T
# from torchvision import transforms
from pycocotools import mask as coco_mask
import torch.nn.functional as F_nn


class FootballSegmentDataset(Dataset):
    def __init__(self, root_dir, annotation_file, transforms=None, frame_step=1):
        self.root_dir = root_dir
        self.transforms = transforms
        self.frame_step = frame_step

        with open(annotation_file, 'r') as f:
            self.coco = json.load(f)

        self.image_id_to_info = {img['id']: img for img in self.coco['images']}
        self.id_to_annotations = {}

        for ann in self.coco['annotations']:
            img_id = ann['image_id']
            if img_id not in self.id_to_annotations:
                self.id_to_annotations[img_id] = []
            self.id_to_annotations[img_id].append(ann)

        # Filter image IDs with step
        full_ids = list(self.id_to_annotations.keys())
        self.image_ids = full_ids[::self.frame_step]

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img_info = self.image_id_to_info[img_id]
        img_path = os.path.join(self.root_dir, img_info['file_name'])
        img = Image.open(img_path).convert("RGB")

        height, width = img_info['height'], img_info['width']
        anns = self.id_to_annotations[img_id]
        masks = []
        for ann in anns:
            segmentation = ann['segmentation']
            rles = coco_mask.frPyObjects(segmentation, height, width)
            rle = coco_mask.merge(rles)
            m = coco_mask.decode(rle)
            masks.append(m)

        if masks:
            mask = np.any(np.stack(masks, axis=0), axis=0).astype(np.uint8)
        else:
            mask = np.zeros((height, width), dtype=np.uint8)

        mask = Image.fromarray(mask)

        if self.transforms:
            img, mask = self.transforms(img, mask)
        else:
            img = T.ToTensor()(img)
            mask = torch.from_numpy(np.array(mask)).long()

        return img, mask


### I.2. Transform

In [2]:
import random
from torchvision.transforms import functional as F

# Augmentations for TRAINING only
class JointTrainTransform:
    def __init__(self, resize=(256, 256), hflip_prob=0.5, vflip_prob=0.5):
        self.resize = resize
        self.hflip_prob = hflip_prob
        self.vflip_prob = vflip_prob
        self.color_jitter = T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1)
        self.normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    def __call__(self, img, mask):
        img = F.resize(img, self.resize)
        mask = F.resize(mask, self.resize, interpolation=Image.NEAREST)

        if random.random() < self.hflip_prob:
            img = F.hflip(img)
            mask = F.hflip(mask)

        if random.random() < self.vflip_prob:
            img = F.vflip(img)
            mask = F.vflip(mask)

        img = self.color_jitter(img)
        img = F.to_tensor(img)
        mask = torch.from_numpy(np.array(mask)).long()
        img = self.normalize(img)
        return img, mask

# Minimal transform for VAL & TEST
class JointEvalTransform:
    def __init__(self, resize=(256, 256)):
        self.resize = resize
        self.normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    def __call__(self, img, mask):
        img = F.resize(img, self.resize)
        mask = F.resize(mask, self.resize, interpolation=Image.NEAREST)

        img = F.to_tensor(img)
        mask = torch.from_numpy(np.array(mask)).long()
        img = self.normalize(img)
        return img, mask

### I.3. Dataset

In [3]:
import torch.utils.data as data
from torchvision import utils
from torch.utils.data import DataLoader, random_split
from torch.utils.data import Subset

resize=(256, 256)
frame_step=5

# Dataset
train_dataset = FootballSegmentDataset(
    root_dir='dataset1/images',
    annotation_file='dataset1/annotations/instances_default.json',
    transforms=JointTrainTransform(resize=resize),
    frame_step=frame_step
)

val_dataset = FootballSegmentDataset(
    root_dir='dataset1/images',
    annotation_file='dataset1/annotations/instances_default.json',
    transforms=JointEvalTransform(resize=resize),
    frame_step=frame_step
)

test_dataset = FootballSegmentDataset(
    root_dir='dataset1/images',
    annotation_file='dataset1/annotations/instances_default.json',
    transforms=JointEvalTransform(resize=resize),
    frame_step=frame_step
)

# Splitting
#-- Dataset splitting: Train - Val - Test: 70-15-15
#-- Train: 70%
#-- Val: 15% for comparison between models
#-- Test: 15% for final estimation of the model performance

total_size = len(train_dataset)
indices = list(range(total_size))
np.random.seed(42)
np.random.shuffle(indices)

train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

train_idx = indices[:train_size]
val_idx = indices[train_size:train_size + val_size]
test_idx = indices[train_size + val_size:]

trainset = Subset(train_dataset, train_idx)
valset = Subset(val_dataset, val_idx)
testset = Subset(test_dataset, test_idx)

## II. Supported functions

### Loss function

In [5]:
import torch.nn as nn

class BCEJaccardWithLogitsLoss(nn.Module):
    def __init__(self, jaccard_weight=1, smooth=1):
        super().__init__()
        self.bce = nn.BCEWithLogitsLoss()
        self.jaccard_weight = jaccard_weight
        self.smooth = smooth

    def forward(self, outputs, targets):
        if outputs.size() != targets.size():
            raise ValueError("size mismatch, {} != {}".format(outputs.size(), targets.size()))
            
        loss = self.bce(outputs, targets)

        if self.jaccard_weight:
            targets = (targets == 1.0).float()
            targets = targets.view(-1)
            outputs = torch.sigmoid(outputs)
            outputs = outputs.view(-1)

            intersection = (targets * outputs).sum()
            union = outputs.sum() + targets.sum() - intersection

            loss -= self.jaccard_weight * torch.log((intersection + self.smooth ) / (union + self.smooth )) # try with 1-dice
        return loss

class BCEDiceWithLogitsLoss(nn.Module):
    def __init__(self, dice_weight=1, smooth=1):
        super().__init__()
        self.bce = nn.BCEWithLogitsLoss()
        self.dice_weight = dice_weight
        self.smooth = smooth
        
    def __call__(self, outputs, targets):
        if outputs.size() != targets.size():
            raise ValueError("size mismatch, {} != {}".format(outputs.size(), targets.size()))
            
        loss = self.bce(outputs, targets)

        targets = (targets == 1.0).float()
        targets = targets.view(-1)
        outputs = F_nn.sigmoid(outputs)
        outputs = outputs.view(-1)

        intersection = (outputs * targets).sum()
        dice = 2.0 * (intersection + self.smooth)  / (targets.sum() + outputs.sum() + self.smooth)
        
        loss -= self.dice_weight * torch.log(dice) # try with 1- dice

        return loss
    
class FocalWithLogitsLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2):
        super().__init__()
        self.bce = nn.BCEWithLogitsLoss()
        self.alpha = alpha
        self.gamma = gamma
        
    def __call__(self, outputs, targets):
        if outputs.size() != targets.size():
            raise ValueError("size mismatch, {} != {}".format(outputs.size(), targets.size()))
            
        loss = self.bce(outputs, targets)

        targets = (targets == 1.0).float()
        targets = targets.view(-1)
        outputs = torch.sigmoid(outputs)
        outputs = outputs.view(-1)
        outputs = torch.where(targets == 1, outputs, 1 - outputs)

        focal = self.alpha * (1 - outputs) ** (self.gamma)
        loss *= focal.mean()

        return loss

def dice_loss(input, target):
    input = torch.sigmoid(input)
    smooth = 1.0

    iflat = input.view(-1)
    tflat = target.view(-1)
    intersection = (iflat * tflat).sum()
    
    return ((2.0 * intersection + smooth) / (iflat.sum() + tflat.sum() + smooth))

class FocalLoss(nn.Module):
    def __init__(self, gamma):
        super().__init__()
        self.gamma = gamma
        
    def forward(self, input, target):
        if not (target.size() == input.size()):
            raise ValueError("Target size ({}) must be the same as input size ({})"
                             .format(target.size(), input.size()))

        max_val = (-input).clamp(min=0)
        loss = input - input * target + max_val + \
            ((-max_val).exp() + (-input - max_val).exp()).log()

        invprobs = F_nn.logsigmoid(-input * (target * 2.0 - 1.0))
        loss = (invprobs * self.gamma).exp() * loss
        
        return loss.mean()

class MixedLoss(nn.Module):
    def __init__(self, alpha, gamma):
        super().__init__()
        self.alpha = alpha
        self.focal = FocalLoss(gamma)
        
    def forward(self, input, target):
        loss = self.alpha*self.focal(input, target) - torch.log(dice_loss(input, target))
        return loss.mean()

In [6]:
import os
import json
import gc
import itertools
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score

# ========== 1. Define training and validation loop ==========
def train_and_validate(model, trainloader, testloader, criterion, optimizer, num_epochs=10, device='cuda'):
    model = model.to(device)
    train_losses, val_losses = [], []

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        model.train()
        running_train_loss = 0.0
        for images, masks in trainloader:
            gc.collect()
            torch.cuda.empty_cache()
            images = images.to(device, non_blocking=True)
            masks = masks.to(device, non_blocking=True).unsqueeze(1).float()
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()
        train_loss = running_train_loss / len(trainloader)
        train_losses.append(train_loss)

        model.eval()
        running_val_loss = 0.0
        with torch.no_grad():
            for images, masks in testloader:
                gc.collect()
                torch.cuda.empty_cache()
                images = images.to(device, non_blocking=True)
                masks = masks.to(device, non_blocking=True).unsqueeze(1).float()
                outputs = model(images)
                loss = criterion(outputs, masks)
                running_val_loss += loss.item()
        val_loss = running_val_loss / len(testloader)
        val_losses.append(val_loss)

        print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

    return model, train_losses, val_losses

# ========== 2. Evaluation ==========
def evaluate_segmentation_model(model, dataloader, device, threshold=0.5):
    model.eval()
    iou_scores, dice_scores, precisions, recalls, f1s, pixel_accuracies, mae_scores = [], [], [], [], [], [], []
    with torch.no_grad():
        for images, masks in dataloader:
            images = images.to(device)
            masks = masks.to(device).float()
            outputs = model(images)
            preds = torch.sigmoid(outputs)
            preds = (preds > threshold).float()
            preds_flat = preds.view(-1).cpu().numpy()
            masks_flat = masks.view(-1).cpu().numpy()

            intersection = np.logical_and(preds_flat, masks_flat).sum()
            union = np.logical_or(preds_flat, masks_flat).sum()
            iou = intersection / union if union != 0 else 0
            dice = (2 * intersection) / (preds_flat.sum() + masks_flat.sum() + 1e-8)
            pixel_acc = (preds_flat == masks_flat).mean()
            mae = np.abs(preds_flat - masks_flat).mean()

            iou_scores.append(iou)
            dice_scores.append(dice)
            pixel_accuracies.append(pixel_acc)
            mae_scores.append(mae)
            precisions.append(precision_score(masks_flat, preds_flat, zero_division=0))
            recalls.append(recall_score(masks_flat, preds_flat, zero_division=0))
            f1s.append(f1_score(masks_flat, preds_flat, zero_division=0))

    metrics = {
        "IoU": float(np.mean(iou_scores)),
        "Dice": float(np.mean(dice_scores)),
        "Precision": float(np.mean(precisions)),
        "Recall": float(np.mean(recalls)),
        "F1 Score": float(np.mean(f1s)),
        "Pixel Accuracy": float(np.mean(pixel_accuracies)),
        "MAE": float(np.mean(mae_scores))
    }
    return metrics

# ========== 3. Run experiment and save results ==========
def run_experiment(model_class, model_name, learning_rate, batch_size, num_epochs, loss_function,
                   trainset, valset, device="cuda", output_dir="outputs/UNet"):
    os.makedirs(output_dir, exist_ok=True)
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=12)
    valloader = DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=1)
    model = model_class().to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    model, train_losses, val_losses = train_and_validate(
        model=model,
        trainloader=trainloader,
        testloader=valloader,
        criterion=loss_function,
        optimizer=optimizer,
        num_epochs=num_epochs,
        device=device
    )

    metrics = evaluate_segmentation_model(model, valloader, device)
    model_save_path = os.path.join(output_dir, f"{model_name}_model.pt")
    torch.save(model.state_dict(), model_save_path)

    result = {
        "model_name": model_name,
        "learning_rate": learning_rate,
        "batch_size": batch_size,
        "num_epochs": num_epochs,
        "loss_function": loss_function.__class__.__name__,
        "train_losses": train_losses,
        "val_losses": val_losses,
        **metrics
    }

    return result, model_save_path

# ========== 4. Full grid search tuning function ==========
def tune_model(model_class, model_name, param_grid, loss_function_map, trainset, valset, device="cuda"):
    results = []
    grid = list(itertools.product(*param_grid.values()))
    keys = list(param_grid.keys())
    output_dir = os.path.join("outputs", model_name)
    os.makedirs(output_dir, exist_ok=True)

    for i, values in enumerate(grid, 1):
        params = dict(zip(keys, values))
        print(f"\nRunning configuration {i}/{len(grid)}: {params}")
        loss_function = loss_function_map[params["loss_function"]]
        exp_name = f"{model_name}_lr{params['learning_rate']}_bs{params['batch_size']}_ep{params['num_epochs']}_{params['loss_function']}"

        result, model_path = run_experiment(
            model_class=model_class,
            model_name=exp_name,
            learning_rate=params["learning_rate"],
            batch_size=params["batch_size"],
            num_epochs=params["num_epochs"],
            loss_function=loss_function,
            trainset=trainset,
            valset=valset,
            device=device,
            output_dir=output_dir
        )
        result["model_path"] = model_path
        results.append(result)

    result_path = os.path.join(output_dir, f"gridsearch_results.json")
    with open(result_path, "w") as f:
        json.dump(results, f, indent=4)

    return results

In [7]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import os
import math

# Function to load JSON summary and return dataframe
def load_experiment_summary(path, drop_losses=True):
    with open(path, "r") as f:
        data = json.load(f)

    df = pd.DataFrame(data)
    if drop_losses:
        df = df.drop(columns=["train_losses", "val_losses", "model_path"])
    return df

# Function to plot all loss curves in a grid layout (3 per row)
def plot_loss_curves_from_json_grid(path, columns=3):
    with open(path, "r") as f:
        experiments = json.load(f)

    total = len(experiments)
    rows = math.ceil(total / columns)

    fig, axes = plt.subplots(rows, columns, figsize=(6 * columns, 4 * rows))

    for i, exp in enumerate(experiments):
        row, col = divmod(i, columns)
        ax = axes[row][col] if rows > 1 else axes[col]

        name = exp["model_name"]
        train_losses = exp["train_losses"]
        val_losses = exp["val_losses"]

        ax.plot(train_losses, label="Train")
        ax.plot(val_losses, label="Val")
        ax.set_title(name)
        ax.set_xlabel("Epoch")
        ax.set_ylabel("Loss")
        ax.legend()
        ax.grid(True)

    # Hide any empty subplots
    for j in range(total, rows * columns):
        row, col = divmod(j, columns)
        ax = axes[row][col] if rows > 1 else axes[col]
        ax.axis("off")

    plt.tight_layout()
    plt.show()

## III. Model and training

### III.1. TinyUNet

In [None]:
import torch.nn as nn

class Block(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.ReLU()
        )

    def forward(self, x):
        #print(f"Block: x.shape = {x.shape}, out_channels = {self.block[0].out_channels}")
        return self.block(x)
    
class Down(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.block = Block(in_channels, out_channels)
        self.down = nn.MaxPool2d(2)

    def forward(self, x):
        #print(f"Down: x.shape = {x.shape}")
        x = self.block(x)
        x_down = self.down(x)
        return x, x_down
    
class Up(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, in_channels, kernel_size=2, stride=2)
        self.block = Block(in_channels + in_channels, out_channels)

    def forward(self, x, skip):
        #print(f"Up: x.shape = {x.shape}, skip.shape = {skip.shape}")
        x = self.up(x)
        x = torch.cat([x, skip], dim=1)  
        x = self.block(x)
        return x

class TinyUnet(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels

        self.down1 = Down(in_channels, 64)
        self.down2 = Down(64, 128)
        self.down3 = Down(128, 256)
        self.middle = Block(256, 256)
        self.up3 = Up(256, 128)
        self.up2 = Up(128, 64)
        self.up1 = Up(64, 32)
        self.conv = nn.Conv2d(32, out_channels, kernel_size=1)

    def forward(self, x):
        # x is (B, in_channels, H, W)
        x1, x = self.down1(x)  # x1 is (B, 64, H, W), x is (B, 64, H/2, W/2)
        x2, x = self.down2(x)  # x2 is (B, 128, H/2, W/2), x is (B, 128, H/4, W/4)
        x3, x = self.down3(x)  # x3 is (B, 256, H/4, W/4), x is (B, 256, H/8, W/8)
        x = self.middle(x)     # x is (B, 256, H/8, W/8)
        x = self.up3(x, x3)    # x is (B, 128, H/4, W/4)
        x = self.up2(x, x2)    # x is (B, 64, H/2, W/2)
        x = self.up1(x, x1)    # x is (B, 32, H, W)
        x = self.conv(x)       # x is (B, out_channels, H, W)
        return x

In [9]:
from torch import nn

param_grid = {
    "learning_rate": [1e-5, 1e-4, 1e-3],
    "batch_size": [4, 8],
    "num_epochs": [20, 40],
    "loss_function": ["BCEWithLogitsLoss"]
}

loss_function_map = {
    "BCEWithLogitsLoss": nn.BCEWithLogitsLoss(),
}

results_tinyunet = tune_model(
    model_class=lambda: TinyUnet(in_channels=3, out_channels=1),
    model_name="TinyUNet-5",
    param_grid=param_grid,
    loss_function_map=loss_function_map,
    trainset=trainset,
    valset=valset,
    device=device
)


Running configuration 1/12: {'learning_rate': 1e-05, 'batch_size': 4, 'num_epochs': 20, 'loss_function': 'BCEWithLogitsLoss'}

Epoch 1/20
Train Loss: 0.6734 | Val Loss: 0.6712

Epoch 2/20
Train Loss: 0.6683 | Val Loss: 0.6664

Epoch 3/20
Train Loss: 0.6622 | Val Loss: 0.6600

Epoch 4/20
Train Loss: 0.6539 | Val Loss: 0.6510

Epoch 5/20
Train Loss: 0.6416 | Val Loss: 0.6377

Epoch 6/20
Train Loss: 0.6238 | Val Loss: 0.6164

Epoch 7/20
Train Loss: 0.5883 | Val Loss: 0.5624

Epoch 8/20
Train Loss: 0.4500 | Val Loss: 0.2919

Epoch 9/20
Train Loss: 0.1721 | Val Loss: 0.1359

Epoch 10/20
Train Loss: 0.1317 | Val Loss: 0.1264

Epoch 11/20
Train Loss: 0.1244 | Val Loss: 0.1176

Epoch 12/20
Train Loss: 0.1219 | Val Loss: 0.1154

Epoch 13/20
Train Loss: 0.1193 | Val Loss: 0.1128

Epoch 14/20
Train Loss: 0.1162 | Val Loss: 0.1113

Epoch 15/20
Train Loss: 0.1146 | Val Loss: 0.1094

Epoch 16/20
Train Loss: 0.1146 | Val Loss: 0.1064

Epoch 17/20
Train Loss: 0.1125 | Val Loss: 0.1052

Epoch 18/20
Tr

### III.2. UNet

In [16]:
import torch
import torch.nn as nn

class Block(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.block(x)

class Down(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.block = Block(in_channels, out_channels)
        self.down = nn.MaxPool2d(2)

    def forward(self, x):
        x = self.block(x)
        x_down = self.down(x)
        return x, x_down

class Up(nn.Module):
    def __init__(self, up_in_channels, skip_in_channels, out_channels):
        super().__init__()
        self.up = nn.ConvTranspose2d(up_in_channels, skip_in_channels, kernel_size=2, stride=2)
        self.block = Block(skip_in_channels + skip_in_channels, out_channels)

    def forward(self, x, skip):
        x = self.up(x)
        diffY = skip.size()[2] - x.size()[2]
        diffX = skip.size()[3] - x.size()[3]
        x = nn.functional.pad(x, [diffX // 2, diffX - diffX // 2,
                                  diffY // 2, diffY - diffY // 2])
        x = torch.cat([x, skip], dim=1)
        return self.block(x)

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super().__init__()

        self.down1 = Down(in_channels, 64)
        self.down2 = Down(64, 128)
        self.down3 = Down(128, 256)
        self.down4 = Down(256, 512)

        self.middle = Block(512, 1024)

        self.up4 = Up(1024, 512, 512)
        self.up3 = Up(512, 256, 256)
        self.up2 = Up(256, 128, 128)
        self.up1 = Up(128, 64, 64)

        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        x1, x = self.down1(x)  # x1: 64
        x2, x = self.down2(x)  # x2: 128
        x3, x = self.down3(x)  # x3: 256
        x4, x = self.down4(x)  # x4: 512

        x = self.middle(x)     # 1024

        x = self.up4(x, x4)    # (1024 → 512) + 512 = 1024 → 512
        x = self.up3(x, x3)    # (512 → 256) + 256 = 512 → 256
        x = self.up2(x, x2)    # (256 → 128) + 128 = 256 → 128
        x = self.up1(x, x1)    # (128 → 64) + 64 = 128 → 64

        return self.final_conv(x)


In [17]:
from torch import nn

param_grid = {
    "learning_rate": [1e-5, 1e-4, 1e-3],
    "batch_size": [4, 8],
    "num_epochs": [20, 40],
    # "num_epochs": [1],
    "loss_function": ["BCEWithLogitsLoss"]
}

loss_function_map = {
    "BCEWithLogitsLoss": nn.BCEWithLogitsLoss(),
}


results_unet = tune_model(
    model_class=lambda: UNet(in_channels=3, out_channels=1),
    model_name="UNet-5",
    param_grid=param_grid,
    loss_function_map=loss_function_map,
    trainset=trainset,
    valset=valset,
    device=device
)


Running configuration 1/12: {'learning_rate': 1e-05, 'batch_size': 4, 'num_epochs': 20, 'loss_function': 'BCEWithLogitsLoss'}

Epoch 1/20
Train Loss: 0.7230 | Val Loss: 0.7210

Epoch 2/20
Train Loss: 0.7190 | Val Loss: 0.7167

Epoch 3/20
Train Loss: 0.7138 | Val Loss: 0.7107

Epoch 4/20
Train Loss: 0.7063 | Val Loss: 0.7014

Epoch 5/20
Train Loss: 0.6934 | Val Loss: 0.6834

Epoch 6/20
Train Loss: 0.6533 | Val Loss: 0.5835

Epoch 7/20
Train Loss: 0.2955 | Val Loss: 0.1573

Epoch 8/20
Train Loss: 0.1388 | Val Loss: 0.1196

Epoch 9/20
Train Loss: 0.1262 | Val Loss: 0.1155

Epoch 10/20
Train Loss: 0.1254 | Val Loss: 0.1125

Epoch 11/20
Train Loss: 0.1221 | Val Loss: 0.1108

Epoch 12/20
Train Loss: 0.1203 | Val Loss: 0.1108

Epoch 13/20
Train Loss: 0.1164 | Val Loss: 0.1074

Epoch 14/20
Train Loss: 0.1139 | Val Loss: 0.1044

Epoch 15/20
Train Loss: 0.1131 | Val Loss: 0.1018

Epoch 16/20
Train Loss: 0.1093 | Val Loss: 0.0992

Epoch 17/20
Train Loss: 0.1073 | Val Loss: 0.0990

Epoch 18/20
Tr

### III.3. UNet++

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F_nn

class ConvBlock(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.block(x)

class UNetPlusPlus(nn.Module):
    def __init__(self, in_ch=3, out_ch=1, deep_supervision=False):
        super().__init__()
        self.deep_supervision = deep_supervision
        n1 = 64
        filters = [n1, n1*2, n1*4, n1*8, n1*16]

        # Encoder
        self.conv0_0 = ConvBlock(in_ch, filters[0])
        self.conv1_0 = ConvBlock(filters[0], filters[1])
        self.conv2_0 = ConvBlock(filters[1], filters[2])
        self.conv3_0 = ConvBlock(filters[2], filters[3])
        self.conv4_0 = ConvBlock(filters[3], filters[4])

        # Decoder (nested)
        self.conv0_1 = ConvBlock(filters[0]+filters[1], filters[0])
        self.conv1_1 = ConvBlock(filters[1]+filters[2], filters[1])
        self.conv2_1 = ConvBlock(filters[2]+filters[3], filters[2])
        self.conv3_1 = ConvBlock(filters[3]+filters[4], filters[3])

        self.conv0_2 = ConvBlock(filters[0]*2 + filters[1], filters[0])
        self.conv1_2 = ConvBlock(filters[1]*2 + filters[2], filters[1])
        self.conv2_2 = ConvBlock(filters[2]*2 + filters[3], filters[2])

        self.conv0_3 = ConvBlock(filters[0]*3 + filters[1], filters[0])
        self.conv1_3 = ConvBlock(filters[1]*3 + filters[2], filters[1])

        self.conv0_4 = ConvBlock(filters[0]*4 + filters[1], filters[0])

        # Final convolution layers
        if self.deep_supervision:
            self.final = nn.ModuleList([nn.Conv2d(filters[0], out_ch, kernel_size=1) for _ in range(4)])
        else:
            self.final = nn.Conv2d(filters[0], out_ch, kernel_size=1)

        self.pool = nn.MaxPool2d(2)
        self.up = lambda x, scale_factor=2: F_nn.interpolate(x, scale_factor=scale_factor, mode='bilinear', align_corners=True)

    def forward(self, x):
        # Encoder
        x0_0 = self.conv0_0(x)
        x1_0 = self.conv1_0(self.pool(x0_0))
        x2_0 = self.conv2_0(self.pool(x1_0))
        x3_0 = self.conv3_0(self.pool(x2_0))
        x4_0 = self.conv4_0(self.pool(x3_0))

        # Decoder
        x0_1 = self.conv0_1(torch.cat([x0_0, self.up(x1_0)], 1))
        x1_1 = self.conv1_1(torch.cat([x1_0, self.up(x2_0)], 1))
        x2_1 = self.conv2_1(torch.cat([x2_0, self.up(x3_0)], 1))
        x3_1 = self.conv3_1(torch.cat([x3_0, self.up(x4_0)], 1))

        x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.up(x1_1)], 1))
        x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.up(x2_1)], 1))
        x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.up(x3_1)], 1))

        x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.up(x1_2)], 1))
        x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.up(x2_2)], 1))

        x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.up(x1_3)], 1))

        if self.deep_supervision:
            outputs = [self.final[i](x) for i, x in enumerate([x0_1, x0_2, x0_3, x0_4])]
            return outputs
        else:
            return self.final(x0_4)

In [None]:
from torch import nn

param_grid = {
    "learning_rate": [1e-5, 1e-4, 1e-3],
    "batch_size": [2, 4],
    "num_epochs": [20, 40],
    # "num_epochs": [1],
    "loss_function": ["BCEWithLogitsLoss"]
}

loss_function_map = {
    "BCEWithLogitsLoss": nn.BCEWithLogitsLoss(),
}

results_unetpp = tune_model(
    model_class=lambda: UNetPlusPlus(in_ch=3, out_ch=1, deep_supervision=False),
    model_name="UNetpp-5",
    param_grid=param_grid,
    loss_function_map=loss_function_map,
    trainset=trainset,
    valset=valset,
    device=device
)

### III.4. ResUNet

In [18]:
# ResUNet architecture

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock, self).__init__()
        self.same_channels = (in_channels == out_channels)
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1   = nn.BatchNorm2d(out_channels)
        self.relu  = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2   = nn.BatchNorm2d(out_channels)
        self.res_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1) if not self.same_channels else nn.Identity()

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        identity = self.res_conv(identity)
        out += identity
        return self.relu(out)

class ResUNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super(ResUNet, self).__init__()
        self.enc1 = ResidualBlock(in_channels, 64)
        self.enc2 = ResidualBlock(64, 128)
        self.enc3 = ResidualBlock(128, 256)
        self.enc4 = ResidualBlock(256, 512)
        self.center = ResidualBlock(512, 1024)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.up4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.dec4 = ResidualBlock(1024, 512)
        self.up3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.dec3 = ResidualBlock(512, 256)
        self.up2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec2 = ResidualBlock(256, 128)
        self.up1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec1 = ResidualBlock(128, 64)

        self.final = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        e1 = self.enc1(x)
        e2 = self.enc2(self.pool(e1))
        e3 = self.enc3(self.pool(e2))
        e4 = self.enc4(self.pool(e3))
        center = self.center(self.pool(e4))

        d4 = self.dec4(torch.cat([self.up4(center), e4], dim=1))
        d3 = self.dec3(torch.cat([self.up3(d4), e3], dim=1))
        d2 = self.dec2(torch.cat([self.up2(d3), e2], dim=1))
        d1 = self.dec1(torch.cat([self.up1(d2), e1], dim=1))

        return self.final(d1)

In [19]:
from torch import nn

param_grid = {
    "learning_rate": [1e-5, 1e-4, 1e-3],
    "batch_size": [4, 8],
    "num_epochs": [20, 40],
    "loss_function": ["BCEWithLogitsLoss"]
}

loss_function_map = {
    "BCEWithLogitsLoss": nn.BCEWithLogitsLoss(),
}

results_tinyunet = tune_model(
    model_class=lambda: ResUNet(in_channels=3, out_channels=1),
    model_name="ResUNet-5",
    param_grid=param_grid,
    loss_function_map=loss_function_map,
    trainset=trainset,
    valset=valset,
    device=device
)


Running configuration 1/12: {'learning_rate': 1e-05, 'batch_size': 4, 'num_epochs': 20, 'loss_function': 'BCEWithLogitsLoss'}

Epoch 1/20
Train Loss: 0.6887 | Val Loss: 0.6705

Epoch 2/20
Train Loss: 0.6390 | Val Loss: 0.6137

Epoch 3/20
Train Loss: 0.5950 | Val Loss: 0.5653

Epoch 4/20
Train Loss: 0.5538 | Val Loss: 0.5408

Epoch 5/20
Train Loss: 0.5209 | Val Loss: 0.5049

Epoch 6/20
Train Loss: 0.4861 | Val Loss: 0.4771

Epoch 7/20
Train Loss: 0.4559 | Val Loss: 0.4444

Epoch 8/20
Train Loss: 0.4311 | Val Loss: 0.4205

Epoch 9/20
Train Loss: 0.3990 | Val Loss: 0.4013

Epoch 10/20
Train Loss: 0.3805 | Val Loss: 0.3740

Epoch 11/20
Train Loss: 0.3559 | Val Loss: 0.3444

Epoch 12/20
Train Loss: 0.3304 | Val Loss: 0.3221

Epoch 13/20
Train Loss: 0.2960 | Val Loss: 0.2910

Epoch 14/20
Train Loss: 0.2468 | Val Loss: 0.2456

Epoch 15/20
Train Loss: 0.1547 | Val Loss: 0.1354

Epoch 16/20
Train Loss: 0.0665 | Val Loss: 0.0491

Epoch 17/20
Train Loss: 0.0367 | Val Loss: 0.0350

Epoch 18/20
Tr

### III.5. ResUNet Depper (Custom architecture)

In [None]:
import numpy as np
import random # Used for transforms (to allow transform on both images and mask)
import matplotlib.pyplot as plt
import gc
# ResUNet architecture

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock, self).__init__()
        self.same_channels = (in_channels == out_channels)
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1   = nn.BatchNorm2d(out_channels)
        self.relu  = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2   = nn.BatchNorm2d(out_channels)
        self.res_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1) if not self.same_channels else nn.Identity()

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        identity = self.res_conv(identity)
        out += identity
        return self.relu(out)

class ResUNet_deeper(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super(ResUNet_deeper, self).__init__()
        self.enc1 = ResidualBlock(in_channels, 64)
        self.enc2 = ResidualBlock(64, 128)
        self.enc3 = ResidualBlock(128, 256)
        self.enc4 = ResidualBlock(256, 512)
        self.enc5 = ResidualBlock(512, 1024)

        self.center = ResidualBlock(1024, 2048)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.up5 = nn.ConvTranspose2d(2048, 1024, kernel_size=2, stride=2)
        self.dec5 = ResidualBlock(2048, 1024)

        self.up4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.dec4 = ResidualBlock(1024, 512)

        self.up3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.dec3 = ResidualBlock(512, 256)

        self.up2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec2 = ResidualBlock(256, 128)

        self.up1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec1 = ResidualBlock(128, 64)

        self.final = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        e1 = self.enc1(x)
        e2 = self.enc2(self.pool(e1))
        e3 = self.enc3(self.pool(e2))
        e4 = self.enc4(self.pool(e3))
        e5 = self.enc5(self.pool(e4))

        center = self.center(self.pool(e5))

        d5 = self.dec5(torch.cat([self.up5(center), e5], dim=1))
        d4 = self.dec4(torch.cat([self.up4(d5), e4], dim=1))
        d3 = self.dec3(torch.cat([self.up3(d4), e3], dim=1))
        d2 = self.dec2(torch.cat([self.up2(d3), e2], dim=1))
        d1 = self.dec1(torch.cat([self.up1(d2), e1], dim=1))

        return self.final(d1)


In [None]:
from torch import nn

param_grid = {
    "learning_rate": [1e-5, 1e-4, 1e-3],
    "batch_size": [2, 4],
    "num_epochs": [20, 40],
    "loss_function": ["BCEWithLogitsLoss"]
}

loss_function_map = {
    "BCEWithLogitsLoss": nn.BCEWithLogitsLoss(),
}

results_tinyunet = tune_model(
    model_class=lambda: ResUNet_deeper(in_channels=3, out_channels=1),
    model_name="ResUNet_deeper-5",
    param_grid=param_grid,
    loss_function_map=loss_function_map,
    trainset=trainset,
    valset=valset,
    device=device
)

### III.6. ResUNet++ (Custom architecture)

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F_nn

class ConvBlock(nn.Module): # do not modify shape asides number of channels
    def __init__(self, in_channels, out_channels):
        super(ConvBlock, self).__init__()
        self.same_channels = (in_channels == out_channels)
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1   = nn.BatchNorm2d(out_channels)
        self.relu  = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2   = nn.BatchNorm2d(out_channels)
        self.res_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1) if not self.same_channels else nn.Identity()

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        identity = self.res_conv(identity)
        out += identity
        return self.relu(out)

class ResUNetPlusPlus(nn.Module):
    def __init__(self, in_ch=3, out_ch=1, deep_supervision=False):
        super().__init__()
        self.deep_supervision = deep_supervision
        n1 = 64
        filters = [n1, n1*2, n1*4, n1*8, n1*16]

        # Encoder
        self.conv0_0 = ConvBlock(in_ch, filters[0]) 
        self.conv1_0 = ConvBlock(filters[0], filters[1])
        self.conv2_0 = ConvBlock(filters[1], filters[2])
        self.conv3_0 = ConvBlock(filters[2], filters[3])
        self.conv4_0 = ConvBlock(filters[3], filters[4])

        # Decoder (nested)
        self.conv0_1 = ConvBlock(filters[0]+filters[1], filters[0]) # 64+128 -> 64
        self.conv1_1 = ConvBlock(filters[1]+filters[2], filters[1]) # 128+256 -> 128
        self.conv2_1 = ConvBlock(filters[2]+filters[3], filters[2]) # 256+512 -> 256
        self.conv3_1 = ConvBlock(filters[3]+filters[4], filters[3]) # 512+1024 -> 512

        self.conv0_2 = ConvBlock(filters[0]*2 + filters[1], filters[0]) # 64*2+128=256 -> 64
        self.conv1_2 = ConvBlock(filters[1]*2 + filters[2], filters[1]) # 512 -> 128
        self.conv2_2 = ConvBlock(filters[2]*2 + filters[3], filters[2]) # 1024 -> 256

        self.conv0_3 = ConvBlock(filters[0]*3 + filters[1], filters[0]) # 64*3+128 ->64
        self.conv1_3 = ConvBlock(filters[1]*3 + filters[2], filters[1]) # 128*3+256 -> 128

        self.conv0_4 = ConvBlock(filters[0]*4 + filters[1], filters[0]) # 64*4+128 -> 64

        # Final convolution layers
        if self.deep_supervision:
            self.final = nn.ModuleList([nn.Conv2d(filters[0], out_ch, kernel_size=1) for _ in range(4)])
        else:
            self.final = nn.Conv2d(filters[0], out_ch, kernel_size=1)

        self.pool = nn.MaxPool2d(2)
        self.up = lambda x, scale_factor=2: F_nn.interpolate(x, scale_factor=scale_factor, mode='bilinear', align_corners=True)

    def forward(self, x):
        # Encoder
        x0_0 = self.conv0_0(x) # c = 64, h = 256
        x1_0 = self.conv1_0(self.pool(x0_0)) # c=128, h=128
        x2_0 = self.conv2_0(self.pool(x1_0)) # c=256, h=64
        x3_0 = self.conv3_0(self.pool(x2_0)) # c=512, h=32
        x4_0 = self.conv4_0(self.pool(x3_0)) # c=1024, h=16

        # Decoder
        x0_1 = self.conv0_1(torch.cat([x0_0, self.up(x1_0)], 1))
        # x0_1 c=64 h=
        # x0_0 c=64, h=256, upx1_0 c=128, h=128 -> concat c=64+128, h=128
        x1_1 = self.conv1_1(torch.cat([x1_0, self.up(x2_0)], 1))
        x2_1 = self.conv2_1(torch.cat([x2_0, self.up(x3_0)], 1))
        x3_1 = self.conv3_1(torch.cat([x3_0, self.up(x4_0)], 1))

        x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.up(x1_1)], 1))
        x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.up(x2_1)], 1))
        x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.up(x3_1)], 1))

        x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.up(x1_2)], 1))
        x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.up(x2_2)], 1))

        x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.up(x1_3)], 1))

        if self.deep_supervision:
            outputs = [self.final[i](x) for i, x in enumerate([x0_1, x0_2, x0_3, x0_4])]
            return outputs
        else:
            return self.final(x0_4)

In [11]:
from torch import nn

param_grid = {
    "learning_rate": [1e-5, 1e-4, 1e-3],
    "batch_size": [2, 4],
    "num_epochs": [20, 40],
    "loss_function": ["BCEWithLogitsLoss"]
}

loss_function_map = {
    "BCEWithLogitsLoss": nn.BCEWithLogitsLoss(),
}

results_tinyunet = tune_model(
    model_class=lambda: ResUNetPlusPlus(in_ch=3, out_ch=1, deep_supervision=False),
    model_name="ResUNet_deeper-5",
    param_grid=param_grid,
    loss_function_map=loss_function_map,
    trainset=trainset,
    valset=valset,
    device=device
)


Running configuration 1/12: {'learning_rate': 1e-05, 'batch_size': 2, 'num_epochs': 20, 'loss_function': 'BCEWithLogitsLoss'}

Epoch 1/20
Train Loss: 0.4406 | Val Loss: 0.3567

Epoch 2/20
Train Loss: 0.2302 | Val Loss: 0.1959

Epoch 3/20
Train Loss: 0.1451 | Val Loss: 0.1276

Epoch 4/20
Train Loss: 0.1022 | Val Loss: 0.0872

Epoch 5/20
Train Loss: 0.0751 | Val Loss: 0.0640

Epoch 6/20
Train Loss: 0.0547 | Val Loss: 0.0454

Epoch 7/20
Train Loss: 0.0409 | Val Loss: 0.0354

Epoch 8/20
Train Loss: 0.0316 | Val Loss: 0.0272

Epoch 9/20
Train Loss: 0.0253 | Val Loss: 0.0219

Epoch 10/20
Train Loss: 0.0221 | Val Loss: 0.0197

Epoch 11/20
Train Loss: 0.0191 | Val Loss: 0.0174

Epoch 12/20
Train Loss: 0.0175 | Val Loss: 0.0157

Epoch 13/20
Train Loss: 0.0158 | Val Loss: 0.0147

Epoch 14/20
Train Loss: 0.0147 | Val Loss: 0.0144

Epoch 15/20
Train Loss: 0.0140 | Val Loss: 0.0134

Epoch 16/20
Train Loss: 0.0135 | Val Loss: 0.0127

Epoch 17/20
Train Loss: 0.0131 | Val Loss: 0.0128

Epoch 18/20
Tr