In [1]:
!pip install segmentation-models-pytorch

Collecting segmentation-models-pytorch
  Downloading segmentation_models_pytorch-0.5.0-py3-none-any.whl.metadata (17 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8->segmentation-models-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8->segmentation-models-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8->segmentation-models-pytorch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8->segmentation-models-pytorch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8->segmentation-models-pytorch)
  Downloading nvidia_cublas_cu12-12.4.5.8-

In [2]:
import os
import glob
import random
from PIL import Image
import tifffile

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as T
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.metrics import iou_score, f1_score
from tqdm import tqdm

import albumentations as A
from albumentations.pytorch import ToTensorV2



In [3]:
class LGGDataset(Dataset):
    def __init__(self, dirs, transform=None):
        self.samples = []
        self.transform = transform

        for patient_dir in dirs:            
            images = [os.path.join(patient_dir, img) for img in os.listdir(patient_dir) if "_mask.tif" not in img]

            for img in images:
                mask = img.replace(".tif", "_mask.tif")

                image = tifffile.imread(img).astype(np.float32)
                mask = tifffile.imread(mask).astype(np.float32)

                image = image / 255.0
                mask = mask / 255.0

                image = np.array(image)
                mask = np.array(mask)
                
                self.samples.append((image, mask))
                
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        image, mask = self.samples[idx]

        if self.transform is not None:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        else:
            image = torch.tensor(image).permute(2,0,1).float()
            mask = torch.tensor(mask).float()
            
        if mask.ndim == 2:  
            mask = mask.unsqueeze(0)
        
        return image, mask

In [10]:
root = "/kaggle/input/lgg-mri-segmentation/kaggle_3m/"

patients = sorted([os.path.join(root, p) for p in os.listdir(root)])
patients = [patient for patient in patients if os.path.isdir(patient)]
random.shuffle(patients)

train_patients = patients[:90]
val_patients = patients[90:]

train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.RandomRotate90(p=0.5),
    A.Affine(
        translate_percent=(0.05, 0.05),
        scale=(0.95, 1.05),
        rotate=(-20, 20),
        p=0.5
    ),
    A.ElasticTransform(alpha=50, sigma=50, p=0.3),
    A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.3),
    A.RandomBrightnessContrast(p=0.3),
    #A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

val_transform = A.Compose([
    #A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

train_ds = LGGDataset(train_patients, transform=train_transform)
val_ds = LGGDataset(val_patients, transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=4, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=4, shuffle=False)

In [11]:
models = {
    "FPN_resnet34": smp.FPN(
        encoder_name='resnet34',
        encoder_weights="imagenet",
        in_channels=3,
        classes=1,
        activation=None,
    ),
    "FPN_efficientnet": smp.FPN(
        encoder_name='efficientnet-b0',
        encoder_weights="imagenet",
        in_channels=3,
        classes=1,
        activation=None,
    ),
    "FPN_vgg16": smp.FPN(
        encoder_name='vgg16',
        encoder_weights="imagenet",
        in_channels=3,
        classes=1
    ),
}

In [12]:
metrics = {'FPN_resnet34':[],'FPN_efficientnet':[],'FPN_vgg16':[]}

In [13]:
class DiceBCELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.dice = smp.losses.DiceLoss(mode="binary")
        self.bce = nn.BCEWithLogitsLoss()

    def forward(self, pred, target):
        return self.dice(pred, target) + self.bce(pred, target)

In [None]:
import csv

device = "cuda"
num_epochs = 35

for name in models.keys():
    with open(f"{name}_metrics.csv", mode='w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['epoch', 'train_loss', 'val_loss', 'IoU', 'Dice'])

patience = 5
min_delta = 1e-4

for name, model in models.items():
    print(f"\nTraining: {name}\n{'-'*40}")
    
    model.to(device)
    loss_val = DiceBCELoss()

    if name == "FPN_vgg16":
        lr = 1e-4
        scheduler_type = "Cosine"
    else:
        lr = 5e-3
        scheduler_type = "ReduceLROnPlateau"
        
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    if scheduler_type == "Cosine":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=num_epochs, eta_min=1e-6
        )
    else:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.5, patience=2
        )

    best_val_loss = np.inf
    epochs_no_improve = 0
    
    for epoch in range(num_epochs):
        model.train()
        train_losses = []

        for img, mask in tqdm(train_loader, desc=f"Epoch {epoch:02d} [Train]", leave=True):
            img, mask = img.to(device), mask.to(device)
            pred = model(img)
            loss = loss_val(pred, mask)

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            train_losses.append(loss.item())
        
        model.eval()
        val_losses, ious, dices = [], [], []

        with torch.no_grad():
            for img, mask in tqdm(val_loader, desc=f"Epoch {epoch:02d} [Val]", leave=True):
                img, mask = img.to(device), mask.to(device)
                pred = model(img)
                loss = loss_val(pred, mask)
                val_losses.append(loss.item())

                tp, fp, fn, tn = smp.metrics.get_stats(pred, mask.int(), mode='binary', threshold=0.5)
                iou = iou_score(tp, fp, fn, tn, reduction="micro")
                dice = f1_score(tp, fp, fn, tn, reduction="micro")

                ious.append(iou.item())
                dices.append(dice.item())
        
        train_loss_avg = np.mean(train_losses)
        val_loss_avg = np.mean(val_losses)
        iou_avg = np.nanmean(ious)
        dice_avg = np.nanmean(dices)

        print(
            f"Epoch {epoch:02d} | "
            f"train_loss={train_loss_avg:.4f} | "
            f"val_loss={val_loss_avg:.4f} | "
            f"IoU={iou_avg:.4f} | "
            f"Dice={dice_avg:.4f} | "
            f"lr={scheduler.get_last_lr()[0]:.6f}"
        )

        metrics[name].append((train_loss_avg, val_loss_avg, iou_avg, dice_avg))

        with open(f"{name}_metrics.csv", mode='a', newline='') as f:
            writer = csv.writer(f)
            writer.writerow([epoch, train_loss_avg, val_loss_avg, iou_avg, dice_avg])

        if val_loss_avg + min_delta < best_val_loss:
            best_val_loss = val_loss_avg
            epochs_no_improve = 0
            torch.save(model.state_dict(), f"{name}_best_weights.pth")
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break
            
        if scheduler_type == "Cosine":
            scheduler.step()
        else:
            scheduler.step(val_loss_avg)

In [14]:
import matplotlib.pyplot as plt

model_names = ["FPN_resnet34", "FPN_efficientnet", "FPN_vgg16"]

metrics = {}
for name in model_names:
    metrics[name] = []
    with open(f"{name}_metrics.csv", newline='') as f:
        reader = csv.DictReader(f)
        for row in reader:
            train_loss = float(row['train_loss'])
            val_loss = float(row['val_loss'])
            iou = float(row['IoU'])
            dice = float(row['Dice'])
            metrics[name].append((train_loss, val_loss, iou, dice))

fig, axs = plt.subplots(2, 2, figsize=(14, 10))
axs = axs.ravel()

titles = ["Train Loss", "Val Loss", "IoU", "Dice"]
metric_idx = [0, 1, 2, 3]

for ax, title, idx in zip(axs, titles, metric_idx):
    for model in metrics.keys():
        values = [epoch[idx] for epoch in metrics[model]]
        ax.plot(values, label=model, linewidth=2)

    ax.set_title(title, fontsize=14)
    ax.set_xlabel("Epoch")
    ax.grid(True, alpha=0.3)
    ax.legend()

plt.tight_layout()
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'FPN_resnet34_metrics.csv'

In [15]:
for name, model in models.items():
    starter = torch.cuda.Event(enable_timing=True)
    ender   = torch.cuda.Event(enable_timing=True)
    
    repeats = 1000
    model.to('cuda')
    model.eval()
    
    for img, mask in train_loader:
        img, mask = img.to('cuda'), mask.to('cuda')
    
        with torch.no_grad():
            starter.record()
            for _ in range(repeats):
                model(img)
            ender.record()
        
        torch.cuda.synchronize()
        total_time = starter.elapsed_time(ender)  
        latency = total_time / repeats
        
        print(f"Encoder: {name}, Latency: {latency:.3f} ms")
        break

Encoder: FPN_resnet34, Latency: 16.075 ms
Encoder: FPN_efficientnet, Latency: 13.235 ms
Encoder: FPN_vgg16, Latency: 37.278 ms
