## **Medical Image Processing**
### Lab 3 - Deep Learning


In [2]:
# Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# ---------------------------------------------- Part #2 ----------------------------------------------

**1. Install useful libraries**


In [4]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm

In [5]:
# Show versioning of deep learning libraries
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

2.8.0+cu126 True


In [6]:
# Directory that contains all the data/script of this lab
current_dir = '/content/drive/MyDrive/Colab Notebooks/eim/lab3'

**2. Define U-Net model**

In [7]:
import torch.nn as nn
import torch.nn.functional as F

# U-Net è un'architettura a encoder-decoder con skip connections

"""
Input Image (3, 256, 256)
    ↓
┌─────────────────────────────────────┐
│  ENCODER (Downsampling)             │
│  DoubleConv → MaxPool → DoubleConv  │
│  ┌────────────┐                     │
│  │SkipConnect │ (copia feature)     │
│  └────────────┘                     │
└─────────────────────────────────────┘
    ↓
┌─────────────────────────────────────┐
│  BOTTLENECK (livello più profondo)  │
└─────────────────────────────────────┘
    ↓
┌─────────────────────────────────────┐
│  DECODER (Upsampling)               │
│  Upsample → Concatena skip          │
│  → DoubleConv                       │
└─────────────────────────────────────┘
    ↓
Output Mask (1, 256, 256)
"""

class DoubleConv(nn.Module):
  # blocco riutilizzabile che applica 2 convoluzioni consecutive
  # (perchè se usiamo 2 convoluzioni posso estrerre features più complesse e non lineari)
    """Applies two consecutive conv-batchnorm-relu layers"""
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            # nella prima concoluzione il kernel usato è 3x3 e si aggiunge 1 pixel di bordo
            nn.BatchNorm2d(out_channels),
            # le attivazioni vengono normalizzate
            nn.ReLU(inplace=True),

            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            # seconda attivazione
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class UNet(nn.Module):
    def __init__(self,
                 in_channels=3,
                 out_channels=1,
                 init_filters=64, # primo layer ha 64 filtri, poi 128, 256, 512...
                 depth=4, # 4 livelli di downsampling (256→128→64→32→16)
                 bilinear=True): # usa interpolazione bilineare per upsampling
        super(UNet, self).__init__()
        self.depth = depth
        self.down_layers = nn.ModuleList()
        self.up_layers = nn.ModuleList()
        self.pool = nn.MaxPool2d(2)

        # Encoder
        filters = init_filters
        for d in range(depth):
            conv = DoubleConv(in_channels, filters)
            self.down_layers.append(conv)
            in_channels = filters
            filters *= 2

        # Bottleneck
        self.bottleneck = DoubleConv(in_channels, filters)

        # Decoder
        for d in range(depth):
            filters //= 2 # Dimezza i filtri: 1024→512→256→128→64
            if bilinear:
              # raddoppio dimensione spaziale a 32x32, interpolazione spaziale e riduzione canali con conv 1x1
                up = nn.Sequential(
                    nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
                    nn.Conv2d(filters * 2, filters, kernel_size=1)
                )
            else:
              # convoluzione trasporta, più parametri-> impara come fare upsampling
                up = nn.ConvTranspose2d(filters * 2, filters, kernel_size=2, stride=2)
            self.up_layers.append(nn.ModuleDict({
                'up': up,
                'conv': DoubleConv(filters * 2, filters)
            }))

        # Output layer
        self.out_conv = nn.Conv2d(init_filters, out_channels, kernel_size=1)
        # riduce da 64 canali a 1 canale (maschera)
        # il kernel:size = 1 non cambia la dimensione spaziale

    def forward(self, x):
        skip_connections = []
        for down in self.down_layers:
            x = down(x)
            skip_connections.append(x)
            x = self.pool(x)

        x = self.bottleneck(x)
        # processa il livello più profondo
        # Input: (512, 16, 16) → Output: (1024, 16, 16)

        for i in range(self.depth):
            skip = skip_connections[-(i+1)]
            # decoder va dal basso verso l'alto, le skip connections vanno dall'alto verso il basso
            # -(i+1) inverte l'ordine -> i = 0 ultimo salvato, i = 1 penultimo ...
            up = self.up_layers[i]['up'](x)
            if up.size() != skip.size():
                # Resize in case of odd size mismatch
                up = F.interpolate(up, size=skip.shape[2:])
            # se le dimensionoi non combaciano perfettamente, ridimensiona
            x = torch.cat([skip, up], dim=1)
            x = self.up_layers[i]['conv'](x)

        return self.out_conv(x)

        # quando il encoder ha più filtri riesco a catturare più features complesse
        # quando il decoder con meno filtri riesce a ricostruire più dettagli

Define and test a sample model with

In [6]:
# Test the model
model = UNet(in_channels=3, out_channels=1, init_filters=32, depth=3)
x = torch.randn(1, 3, 256, 256)  # example input
out = model(x)
print(out.shape)  # should be [1, 1, 256, 256]

torch.Size([1, 1, 256, 256])


In [7]:
import torch
from torchsummary import summary

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet(in_channels=3, out_channels=1, init_filters=32, depth=2)
model.to(device)

summary(model, input_size=(3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 256, 256]             896
       BatchNorm2d-2         [-1, 32, 256, 256]              64
              ReLU-3         [-1, 32, 256, 256]               0
            Conv2d-4         [-1, 32, 256, 256]           9,248
       BatchNorm2d-5         [-1, 32, 256, 256]              64
              ReLU-6         [-1, 32, 256, 256]               0
        DoubleConv-7         [-1, 32, 256, 256]               0
         MaxPool2d-8         [-1, 32, 128, 128]               0
            Conv2d-9         [-1, 64, 128, 128]          18,496
      BatchNorm2d-10         [-1, 64, 128, 128]             128
             ReLU-11         [-1, 64, 128, 128]               0
           Conv2d-12         [-1, 64, 128, 128]          36,928
      BatchNorm2d-13         [-1, 64, 128, 128]             128
             ReLU-14         [-1, 64, 1

**3. Define hyperparameters for network training**

In [8]:
# Network architecture
input_size = (256,256)
in_channels = 3    # 1 for grayscale, 3 for RGB images
out_channels = 1   # 1 for binary segmentation, N for multiclass segmentation
init_filters = 32
depth = 3

# Training hyperparameters
criterion = nn.BCEWithLogitsLoss()  # loss function
n_epochs = 10 # Start point. Aumenta a 20-30 per risultati migliori
batch_size = 4 # between 2 and 8
learning_rate = 1e-3
checkpoint_freq = 1  # save every checkpoint
checkpoint_dir = os.path.join(current_dir,'checkpoints')  # path where checkpoints will be saved

if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)

# Paths for dataset
base_dir = os.path.join(current_dir,'HAM1000')

train_img_dir = os.path.join(base_dir, 'train', 'image')
train_mask_dir = os.path.join(base_dir, 'train', 'manual')

val_img_dir = os.path.join(base_dir, 'val', 'image')
val_mask_dir = os.path.join(base_dir, 'val', 'manual')

In [9]:
# Save config file with the user-defined parameters of the current run
import json

params = {
    "input_size": input_size,
    "in_channels": in_channels,
    "out_channels": out_channels,
    "init_filters": init_filters,
    "depth": depth,
    "n_epochs": n_epochs,
    "batch_size": batch_size,
    "learning_rate": learning_rate,
    "checkpoint_freq": checkpoint_freq,
}

params_path = os.path.join(checkpoint_dir, 'training_params.json')
with open(params_path, 'w') as f:
    json.dump(params, f, indent=4)
print(f"Training parameters saved to {params_path}")

Training parameters saved to /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/training_params.json


Dataloader definition

In [10]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

# pipeline per caricamento dati per il training della U-Net
class DermaDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
      #salva i percorsi delle cartelle e le trasformazioni
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.image_list = sorted(os.listdir(image_dir))
        self.mask_list = sorted(os.listdir(mask_dir))
        assert len(self.image_list) == len(self.mask_list), "Number of images and masks do not match"

    def __len__(self):
        return len(self.image_list)
        # ritorna quanti campioni ci sono nel dataset

    def __getitem__(self, idx):
      # viene chiamata ogni volta che il DataLocker prende un dato
        img_path = os.path.join(self.image_dir, self.image_list[idx])
        mask_path = os.path.join(self.mask_dir, self.mask_list[idx])

        image = Image.open(img_path).convert('RGB')
        mask = Image.open(mask_path).convert('L')  # grayscale mask

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        # Convert mask to binary float tensor (0 or 1)
        mask = (mask > 0).float()

        return image, mask


# Transformations applied to both images and masks
transform = transforms.Compose([
    transforms.Resize(input_size),
    transforms.ToTensor(),
])

# Initialize datasets and corresponding dataloaders
train_dataset = DermaDataset(train_img_dir, train_mask_dir, transform=transform)
val_dataset = DermaDataset(val_img_dir, val_mask_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

**4. Actual training**

In [11]:
import torch.optim as optim

# Model initialization
model = UNet(in_channels, out_channels, init_filters, depth).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [12]:
from sklearn.metrics import jaccard_score

for epoch in range(n_epochs):
    model.train()
    train_loss = 0.0  # loss should be initialized to 0

    for images, masks in tqdm(train_loader, desc=f"Epoch {epoch+1}/{n_epochs} - Training"):
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)  # logits

        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)

    train_loss /= len(train_loader.dataset)
    print(f"Epoch {epoch+1} Train Loss: {train_loss:.4f}")

    # Validation
    model.eval()
    val_loss = 0.0
    all_preds = []
    all_masks = []

    with torch.no_grad():
        for images, masks in val_loader:
            images = images.to(device)
            masks = masks.to(device)
            outputs = model(images)

            loss = criterion(outputs, masks)
            val_loss += loss.item() * images.size(0)

            if out_channels == 1:
                # Binary segmentation: sigmoid + threshold
                probs = torch.sigmoid(outputs)
                preds = (probs > 0.5).long()
                true = masks.long()
            else:
                # Multi-class segmentation: argmax
                preds = torch.argmax(outputs, dim=1)
                true = masks.long()

            all_preds.append(preds.cpu().numpy().flatten())
            all_masks.append(true.cpu().numpy().flatten())

    val_loss /= len(val_loader.dataset)

    # mIoU (Jaccard) metric
    y_true = np.concatenate(all_masks)
    y_pred = np.concatenate(all_preds)

    iou = jaccard_score(y_true, y_pred, average='macro')

    print(f"Epoch {epoch+1} Val Loss: {val_loss:.4f} - Val IoU: {iou:.4f}")

    # Save checkpoint every [checkpoint_freq] epochs
    if (epoch + 1) % checkpoint_freq == 0:
        checkpoint_path = os.path.join(checkpoint_dir, f"model_epoch_{epoch+1}.pt")
        torch.save(model.state_dict(), checkpoint_path)
        print(f"Checkpoint saved at {checkpoint_path}")

print("Training complete!")


Epoch 1/10 - Training: 100%|██████████| 52/52 [00:38<00:00,  1.35it/s]

Epoch 1 Train Loss: 0.4285





Epoch 1 Val Loss: 0.5409 - Val IoU: 0.6115
Checkpoint saved at /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/model_epoch_1.pt


Epoch 2/10 - Training: 100%|██████████| 52/52 [00:04<00:00, 11.65it/s]

Epoch 2 Train Loss: 0.3625





Epoch 2 Val Loss: 0.3996 - Val IoU: 0.6514
Checkpoint saved at /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/model_epoch_2.pt


Epoch 3/10 - Training: 100%|██████████| 52/52 [00:04<00:00, 12.26it/s]

Epoch 3 Train Loss: 0.3583





Epoch 3 Val Loss: 0.4390 - Val IoU: 0.6268
Checkpoint saved at /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/model_epoch_3.pt


Epoch 4/10 - Training: 100%|██████████| 52/52 [00:04<00:00, 12.15it/s]

Epoch 4 Train Loss: 0.3128





Epoch 4 Val Loss: 0.3619 - Val IoU: 0.6272
Checkpoint saved at /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/model_epoch_4.pt


Epoch 5/10 - Training: 100%|██████████| 52/52 [00:04<00:00, 11.82it/s]

Epoch 5 Train Loss: 0.3210





Epoch 5 Val Loss: 0.4203 - Val IoU: 0.6551
Checkpoint saved at /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/model_epoch_5.pt


Epoch 6/10 - Training: 100%|██████████| 52/52 [00:04<00:00, 12.12it/s]

Epoch 6 Train Loss: 0.3204





Epoch 6 Val Loss: 0.4137 - Val IoU: 0.5974
Checkpoint saved at /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/model_epoch_6.pt


Epoch 7/10 - Training: 100%|██████████| 52/52 [00:04<00:00, 11.39it/s]

Epoch 7 Train Loss: 0.2956





Epoch 7 Val Loss: 0.3428 - Val IoU: 0.6710
Checkpoint saved at /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/model_epoch_7.pt


Epoch 8/10 - Training: 100%|██████████| 52/52 [00:04<00:00, 12.13it/s]

Epoch 8 Train Loss: 0.2902





Epoch 8 Val Loss: 0.3843 - Val IoU: 0.6889
Checkpoint saved at /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/model_epoch_8.pt


Epoch 9/10 - Training: 100%|██████████| 52/52 [00:04<00:00, 11.67it/s]

Epoch 9 Train Loss: 0.3042





Epoch 9 Val Loss: 0.3344 - Val IoU: 0.6924
Checkpoint saved at /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/model_epoch_9.pt


Epoch 10/10 - Training: 100%|██████████| 52/52 [00:04<00:00, 12.05it/s]

Epoch 10 Train Loss: 0.2835





Epoch 10 Val Loss: 0.3618 - Val IoU: 0.6828
Checkpoint saved at /content/drive/MyDrive/Colab Notebooks/eim/lab3/checkpoints/model_epoch_10.pt
Training complete!


Extra (do not run until the end of the part #3)

In [8]:
class DiceLoss(nn.Module):
    def __init__(self, smooth=1e-6):
        super(DiceLoss, self).__init__()
        self.smooth = smooth

    def forward(self, inputs, targets):
        # inputs: raw logits -> apply sigmoid
        inputs = torch.sigmoid(inputs)

        # Flatten
        inputs = inputs.view(-1)
        targets = targets.view(-1)

        intersection = (inputs * targets).sum()
        dice = (2. * intersection + self.smooth) / (
            inputs.sum() + targets.sum() + self.smooth
        )

        return 1 - dice  # Dice Loss

In [9]:
criterion = DiceLoss()