In [4]:
!pip install segmentation-models-pytorch==0.3.3 --quiet
!pip install --upgrade torch torchvision --quiet

import torch
import segmentation_models_pytorch as smp
from torch import nn

# Recommended safety flags
import torch.backends.cudnn as cudnn
cudnn.enabled = True
cudnn.benchmark = False
cudnn.deterministic = True

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Recreate model AFTER setting device
model = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
)
model = model.to(device)


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/58.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.5/68.5 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m58.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m83.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /root/.cache/torch/hub/checkpoints/resnet34-333f7ec4.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 272MB/s]


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from tqdm import tqdm
import numpy as np
import os
import pandas as pd


In [6]:
# Dummy tensor
x = torch.randn(1, 3, 256, 256).to(device)
with torch.no_grad():
    y = model(x)
print("Output shape:", y.shape)


Output shape: torch.Size([1, 1, 256, 256])


In [7]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("cuDNN Version:", torch.backends.cudnn.version())


CUDA Available: True
cuDNN Version: 90100


In [8]:
# 📦 Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 🛤️ Set base path and dataset names
import os
dataset_names = ['CVC-ClinicDB', 'CVC-ColonDB', 'ETIS-LaribPolypDB', 'Kvasir-SEG']
base_path = '/content/drive/My Drive/GT/DL/GroupProject/Datasets/'

# 📁 Build paths for each dataset
all_dataset_paths = []

for dataset in dataset_names:
    dataset_path = os.path.join(base_path, dataset)
    paths = {
        'name': dataset,
        'train': {
            'images': os.path.join(dataset_path, 'train', 'images'),
            'masks': os.path.join(dataset_path, 'train', 'masks')
        },
        'val': {
            'images': os.path.join(dataset_path, 'validation', 'images'),
            'masks': os.path.join(dataset_path, 'validation', 'masks')
        },
        'test': {
            'images': os.path.join(dataset_path, 'test', 'images'),
            'masks': os.path.join(dataset_path, 'test', 'masks')
        }
    }
    all_dataset_paths.append(paths)

# ✅ Print all paths for each dataset
for dataset in all_dataset_paths:
    print(f"\n📂 DATASET: {dataset['name']}")
    for split in ['train', 'val', 'test']:
        print(f"{split.upper()} IMAGES PATH: {dataset[split]['images']}")
        print(f"{split.upper()} MASKS PATH:  {dataset[split]['masks']}")


Mounted at /content/drive

📂 DATASET: CVC-ClinicDB
TRAIN IMAGES PATH: /content/drive/My Drive/GT/DL/GroupProject/Datasets/CVC-ClinicDB/train/images
TRAIN MASKS PATH:  /content/drive/My Drive/GT/DL/GroupProject/Datasets/CVC-ClinicDB/train/masks
VAL IMAGES PATH: /content/drive/My Drive/GT/DL/GroupProject/Datasets/CVC-ClinicDB/validation/images
VAL MASKS PATH:  /content/drive/My Drive/GT/DL/GroupProject/Datasets/CVC-ClinicDB/validation/masks
TEST IMAGES PATH: /content/drive/My Drive/GT/DL/GroupProject/Datasets/CVC-ClinicDB/test/images
TEST MASKS PATH:  /content/drive/My Drive/GT/DL/GroupProject/Datasets/CVC-ClinicDB/test/masks

📂 DATASET: CVC-ColonDB
TRAIN IMAGES PATH: /content/drive/My Drive/GT/DL/GroupProject/Datasets/CVC-ColonDB/train/images
TRAIN MASKS PATH:  /content/drive/My Drive/GT/DL/GroupProject/Datasets/CVC-ColonDB/train/masks
VAL IMAGES PATH: /content/drive/My Drive/GT/DL/GroupProject/Datasets/CVC-ColonDB/validation/images
VAL MASKS PATH:  /content/drive/My Drive/GT/DL/GroupPr

In [9]:
# import cv2
# import os
# import matplotlib.pyplot as plt

# # Visualize sample pairs for each split
# for dataset in all_dataset_paths:
#   for split in ['train', 'val', 'test']:
#       print(f"\n📂 DATASET: {dataset['name']}")

#       image_dir = dataset[split]['images']
#       mask_dir = dataset[split]['masks']

#       # Get sorted file lists
#       image_files = sorted(os.listdir(image_dir))
#       mask_files = sorted(os.listdir(mask_dir))

#       # Pick a sample index safely (in case folders are small)
#       sample_idx = min(30, len(image_files) - 1)

#       # Read image and mask
#       image_path = os.path.join(image_dir, image_files[sample_idx])
#       mask_path = os.path.join(mask_dir, mask_files[sample_idx])

#       image = cv2.imread(image_path)
#       image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

#       mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

#       # Plot
#       plt.figure(figsize=(10, 4))
#       plt.suptitle(f"{split.upper()} SET SAMPLE", fontsize=14)

#       plt.subplot(1, 2, 1)
#       plt.imshow(image)
#       plt.title("Image")
#       plt.axis("off")

#       plt.subplot(1, 2, 2)
#       plt.imshow(mask, cmap='gray')
#       plt.title("Mask")
#       plt.axis("off")

#       plt.show()


In [10]:
import os
from torch.utils.data import Dataset
from PIL import Image

class PolypSegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None, mask_transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.image_filenames = sorted(os.listdir(images_dir))
        self.mask_filenames = sorted(os.listdir(masks_dir))
        self.transform = transform
        self.mask_transform = mask_transform

        assert len(self.image_filenames) == len(self.mask_filenames), \
            "Mismatch between number of images and masks."

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        # Load image and mask
        img_path = os.path.join(self.images_dir, self.image_filenames[idx])
        mask_path = os.path.join(self.masks_dir, self.mask_filenames[idx])

        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")  # grayscale mask

        # Apply transformations
        if self.transform:
            image = self.transform(image)
        if self.mask_transform:
            mask = self.mask_transform(mask)

        return image, mask


In [11]:
image_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

mask_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),  # Grayscale mask in [0,1]
])


In [12]:
from torch.utils.data import DataLoader, ConcatDataset

def get_data_loaders(all_dataset_paths, image_transform, mask_transform, batch_size=16, num_workers=2, pin_memory=True, combine_datasets=False):
    """
    Returns dataloaders either per dataset or combined, depending on `combine_datasets`.

    Args:
        all_dataset_paths: list of dicts with dataset paths.
        image_transform: transform function for input images.
        mask_transform: transform function for target masks.
        batch_size: batch size for loaders.
        num_workers: number of subprocesses to use for data loading.
        pin_memory: whether to pin memory during data transfer to GPU.
        combine_datasets: if True, returns combined loaders; otherwise returns one set per dataset.

    Returns:
        List of loaders or single dictionary of loaders (if combined).
    """
    if combine_datasets:
        train_datasets, val_datasets, test_datasets = [], [], []

        for dataset_paths in all_dataset_paths:
            train_datasets.append(PolypSegmentationDataset(
                dataset_paths['train']['images'],
                dataset_paths['train']['masks'],
                image_transform,
                mask_transform
            ))
            val_datasets.append(PolypSegmentationDataset(
                dataset_paths['val']['images'],
                dataset_paths['val']['masks'],
                image_transform,
                mask_transform
            ))
            test_datasets.append(PolypSegmentationDataset(
                dataset_paths['test']['images'],
                dataset_paths['test']['masks'],
                image_transform,
                mask_transform
            ))

        train_dataset = ConcatDataset(train_datasets)
        val_dataset = ConcatDataset(val_datasets)
        test_dataset = ConcatDataset(test_datasets)

        loaders = {
            'name': 'CombinedDataset',
            'train_dataset': train_dataset,
            'val_dataset': val_dataset,
            'test_dataset': test_dataset,
            'train_loader': DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory),
            'val_loader': DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory),
            'test_loader': DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory)
        }

        # ✅ Print summary for combined dataset
        print(f"\n✅ Combined dataset loaded:")
        print(f"Train samples: {len(train_dataset)}")
        print(f"Val samples:   {len(val_dataset)}")
        print(f"Test samples:  {len(test_dataset)}")

        return loaders

    else:
        all_data_loaders = []

        for dataset_paths in all_dataset_paths:
            dataset_name = dataset_paths['name']

            train_dataset = PolypSegmentationDataset(
                dataset_paths['train']['images'],
                dataset_paths['train']['masks'],
                image_transform,
                mask_transform
            )
            val_dataset = PolypSegmentationDataset(
                dataset_paths['val']['images'],
                dataset_paths['val']['masks'],
                image_transform,
                mask_transform
            )
            test_dataset = PolypSegmentationDataset(
                dataset_paths['test']['images'],
                dataset_paths['test']['masks'],
                image_transform,
                mask_transform
            )

            loaders = {
                'name': dataset_name,
                'train_dataset': train_dataset,
                'val_dataset': val_dataset,
                'test_dataset': test_dataset,
                'train_loader': DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory),
                'val_loader': DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory),
                'test_loader': DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory)
            }

            all_data_loaders.append(loaders)

            # ✅ Print summary per dataset
            print(f"\n📦 {dataset_name} loaded:")
            print(f"Train samples: {len(train_dataset)}")
            print(f"Val samples:   {len(val_dataset)}")
            print(f"Test samples:  {len(test_dataset)}")

        return all_data_loaders


In [13]:
combined_loaders = get_data_loaders(all_dataset_paths, image_transform, mask_transform,batch_size = 32, combine_datasets=True)



✅ Combined dataset loaded:
Train samples: 1748
Val samples:   220
Test samples:  220


In [14]:
separate_loaders = get_data_loaders(all_dataset_paths, image_transform, mask_transform, combine_datasets=False)



📦 CVC-ClinicDB loaded:
Train samples: 488
Val samples:   62
Test samples:  62

📦 CVC-ColonDB loaded:
Train samples: 304
Val samples:   38
Test samples:  38

📦 ETIS-LaribPolypDB loaded:
Train samples: 156
Val samples:   20
Test samples:  20

📦 Kvasir-SEG loaded:
Train samples: 800
Val samples:   100
Test samples:  100


In [15]:
from torch.utils.data import DataLoader, ConcatDataset

# 🧠 Assume: all_dataset_paths is already defined as in the previous cell
# and image_transform, mask_transform are also defined

combined_train_datasets = []
all_data_loaders = []

for dataset_paths in all_dataset_paths:
    dataset_name = dataset_paths['name']

    # Create datasets
    train_dataset = PolypSegmentationDataset(
        dataset_paths['train']['images'],
        dataset_paths['train']['masks'],
        image_transform,
        mask_transform
    )
    val_dataset = PolypSegmentationDataset(
        dataset_paths['val']['images'],
        dataset_paths['val']['masks'],
        image_transform,
        mask_transform
    )
    test_dataset = PolypSegmentationDataset(
        dataset_paths['test']['images'],
        dataset_paths['test']['masks'],
        image_transform,
        mask_transform
    )

    combined_train_datasets.append(train_dataset)

    # Create dataloaders
    loaders = {
        'name': dataset_name,
        'train_dataset': train_dataset,
        'val_dataset': val_dataset,
        'test_dataset': test_dataset,
        'train_loader': DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers = 2, pin_memory = True),
        'val_loader': DataLoader(val_dataset, batch_size=16, num_workers = 2, pin_memory = True),
        'test_loader': DataLoader(test_dataset, batch_size=16, num_workers = 2, pin_memory = True)
    }

    all_data_loaders.append(loaders)

full_train_dataset = ConcatDataset(combined_train_datasets)
train_loader = DataLoader(full_train_dataset, batch_size=16, shuffle=True, num_workers=2, pin_memory=True)

#all_data_loaders.append(train_loader)
print(f"\n🧠 Combined Train Dataset: {len(full_train_dataset)} samples")

# ✅ Print confirmation for each dataset
for loaders in all_data_loaders:
    print(f"\n📦 {loaders['name']} loaded:")
    print(f"Train samples: {len(loaders['train_dataset'])}")
    print(f"Val samples:   {len(loaders['val_dataset'])}")
    print(f"Test samples:  {len(loaders['test_dataset'])}")



🧠 Combined Train Dataset: 1748 samples

📦 CVC-ClinicDB loaded:
Train samples: 488
Val samples:   62
Test samples:  62

📦 CVC-ColonDB loaded:
Train samples: 304
Val samples:   38
Test samples:  38

📦 ETIS-LaribPolypDB loaded:
Train samples: 156
Val samples:   20
Test samples:  20

📦 Kvasir-SEG loaded:
Train samples: 800
Val samples:   100
Test samples:  100


# U-Net



In [17]:
# Dice + BCE Loss
# dice loss is based on dice coefficient, which is a measure of overlap between two samples
#  - commonly used in image segmentation tasks
#  - Dice loss = 1 - Dice Coefficient
#         - Dice coefficient ranges from 0 (no overlap) to 1 (perfect overlap)
#
def dice_loss(pred, target, smooth=1.):
    #
    pred = torch.sigmoid(pred).view(-1)

    # flatten both tensors to prep them for comparison
    pred = pred.view(-1)
    target = target.view(-1)

    intersection = (pred * target).sum() # pixel wise product
    return 1 - ((2. * intersection + smooth) / (pred.sum() + target.sum() + smooth))

# binary cross entropy since this is a segmentation task
bce_loss = nn.BCEWithLogitsLoss()

def combined_loss(pred, target):
    return bce_loss(pred, target) + dice_loss(pred, target)

def iou_score(preds, masks, threshold=0.5):
    preds = torch.sigmoid(preds) > threshold
    masks = masks > 0.5
    preds = preds.view(-1)
    masks = masks.view(-1)

    intersection = (preds & masks).float().sum()
    union = (preds | masks).float().sum()
    return ((intersection + 1e-6) / (union + 1e-6)).item()


def compute_metrics(preds, masks, threshold=0.5):
    # transform preds from logits to probabilities using torch.sigmoid
    # then transform from probabilities to binary using threshold
    preds = torch.sigmoid(preds) > threshold

    masks = masks > 0.5 # binarize the mask

    # flatten the predicted segmentations for the entire batch
    # from [B, 1, H, W] to a 1D vector. Basically, treating the entire batch as
    # 1 giant image
    preds = preds.view(-1)
    masks = masks.view(-1)

    # use logical & to compute figures necessary for confusion matrix calculations
    TP = (preds & masks).sum().float() # bitwise 1 and 1 match
    FP = (preds & ~masks).sum().float() # bitwise pred 1 yet mask was 0 (ie false positive)
    FN = (~preds & masks).sum().float() # bitwise pred 0 yet mask was 1 (ie false negative)
    TN = (~preds & ~masks).sum().float() # bitwise pred 0 and mask 0 (ie true negative)

    epsilon = 1e-6 # add epsilon to make sure there's no division by 0
    precision = TP / (TP + FP + epsilon)
    recall = TP / (TP + FN + epsilon)
    accuracy = (TP + TN) / (TP + TN + FP + FN + epsilon)
    dice = 2 * TP / (2 * TP + FP + FN + epsilon)
    jaccard = TP / (TP + FP + FN + epsilon)

    return {
        'precision': precision.item(),
        'recall': recall.item(),
        'accuracy': accuracy.item(),
        'dice': dice.item(),
        'jaccard': jaccard.item()
    }



In [18]:
def train_epoch(model, loader, optimizer):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    running_loss = 0
    for imgs, masks in tqdm(loader):
        imgs = imgs.cuda() if torch.cuda.is_available() else imgs
        masks = masks.cuda() if torch.cuda.is_available() else masks
        # print(f"Image batch shape: {imgs.shape}")
        # print(f"Mask batch shape: {masks.shape}")
        # print(f"Image dtype: {imgs.dtype}, device: {imgs.device}")
        # print(f"Mask dtype: {masks.dtype}, device: {masks.device}")
        preds = model(imgs)
        loss = combined_loss(preds, masks)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(loader)
def validate_epoch(model, loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    val_loss = 0
    val_iou = 0
    metrics_accumulator = {'precision': 0, 'recall': 0, 'accuracy': 0, 'dice': 0, 'jaccard': 0}

    with torch.no_grad():
        for imgs, masks in loader:
            imgs = imgs.to(device)
            masks = masks.to(device)

            preds = model(imgs)
            val_loss += combined_loss(preds, masks).item()
            val_iou += iou_score(preds, masks)

            metrics = compute_metrics(preds, masks)
            for k in metrics_accumulator:
                metrics_accumulator[k] += metrics[k]

    avg_metrics = {k: v / len(loader) for k, v in metrics_accumulator.items()}
    avg_metrics['loss'] = val_loss / len(loader)
    avg_metrics['iou'] = val_iou / len(loader)

    return avg_metrics



In [19]:
if torch.cuda.is_available():
    print(f"🔋 Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("⚠️ GPU not available. Using CPU.")


🔋 Using GPU: Tesla T4


In [20]:
import time
import torch
import pandas as pd
import torch.optim as optim

def train_and_cross_test(model_class, all_data_loaders, num_epochs=10, lr=1e-4, patience=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    final_results = []

    # ⏱️ Track total training + testing time
    total_start_time = time.time()

    for i, train_val_data in enumerate(all_data_loaders):
        print(f"\n🚀 Training on: {train_val_data['name']}")
        model = model_class().to(device)
        optimizer = optim.Adam(model.parameters(), lr=lr)

        train_loader = train_val_data['train_loader']
        val_loader = train_val_data['val_loader']

        best_val_loss = float('inf')
        best_model_state = None
        epochs_without_improvement = 0

        for epoch in range(num_epochs):
            print(f"\nEpoch {epoch+1}/{num_epochs}")

            # ⏱️ Epoch timing
            epoch_start_time = time.time()
            batch_times = []

            model.train()
            running_loss = 0
            for batch_idx, (imgs, masks) in enumerate(train_loader):
                batch_start_time = time.time()

                imgs = imgs.to(device)
                masks = masks.to(device)

                preds = model(imgs)
                loss = combined_loss(preds, masks)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                batch_duration = time.time() - batch_start_time
                batch_times.append(batch_duration)

                print(f"  🧪 Batch {batch_idx+1}/{len(train_loader)} — Loss: {loss.item():.4f} — Time: {batch_duration:.2f}s")

            train_loss = running_loss / len(train_loader)
            val_metrics = validate_epoch(model, val_loader)
            val_loss = val_metrics['loss']

            epoch_duration = time.time() - epoch_start_time
            avg_batch_time = sum(batch_times) / len(batch_times) if batch_times else 0

            print(f"🕒 Epoch Time: {epoch_duration:.2f}s | Avg Batch Time: {avg_batch_time:.2f}s")
            print(f"📊 Train Loss: {train_loss:.4f} | "
                  f"Val Loss: {val_loss:.4f} | "
                  f"IoU: {val_metrics['iou']:.4f} | "
                  f"Dice: {val_metrics['dice']:.4f} | "
                  f"Jaccard: {val_metrics['jaccard']:.4f} | "
                  f"Precision: {val_metrics['precision']:.4f} | "
                  f"Recall: {val_metrics['recall']:.4f} | "
                  f"Accuracy: {val_metrics['accuracy']:.4f}")

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model_state = model.state_dict()
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
                if epochs_without_improvement >= patience:
                    print(f"⏹️ Early stopping triggered after {epoch+1} epochs.")
                    break

        # Restore the best model before testing
        if best_model_state is not None:
            model.load_state_dict(best_model_state)

        print(f"\n🧪 Testing model trained on {train_val_data['name']} on all datasets...")

        test_results = []
        for j, test_data in enumerate(all_data_loaders):
            test_loader = test_data['test_loader']
            test_metrics = validate_epoch(model, test_loader)

            result = {
                "Trained On": train_val_data['name'],
                "Tested On": test_data['name'],
                "Loss": test_metrics['loss'],
                "IoU": test_metrics['iou'],
                "Dice": test_metrics['dice'],
                "Jaccard": test_metrics['jaccard'],
                "Precision": test_metrics['precision'],
                "Recall": test_metrics['recall'],
                "Accuracy": test_metrics['accuracy'],
            }

            test_results.append(result)
            final_results.append(result)

        df = pd.DataFrame(test_results)
        print(df.to_markdown(index=False))

    # ⏱️ Print total time
    total_duration = time.time() - total_start_time
    print(f"\n⏱️ Total Time for Training + Testing: {total_duration:.2f} seconds")

    print("\n📋 Final Cross-Dataset Testing Summary:")
    final_df = pd.DataFrame(final_results)
    print(final_df.to_markdown(index=False))
    return final_df, model


In [21]:
import time
import torch
import pandas as pd

def evaluate_model_on_datasets(trained_model, all_data_loaders):
    """
    Evaluates a trained model on all test sets in the provided data loaders.

    Args:
        trained_model: A PyTorch model (with weights already loaded).
        all_data_loaders: List of dicts containing 'name' and 'test_loader'.

    Returns:
        DataFrame with test metrics per dataset + appended row of average scores.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    trained_model = trained_model.to(device)
    trained_model.eval()

    print("\n🧪 Evaluating trained model on all test sets...\n")
    all_test_results = []

    test_start_time = time.time()

    for test_data in all_data_loaders:
        test_loader = test_data['test_loader']
        dataset_name = test_data['name']

        print(f"🔍 Testing on: {dataset_name}")
        test_metrics = validate_epoch(trained_model, test_loader)

        result = {
            "Tested On": dataset_name,
            "Loss": test_metrics['loss'],
            "IoU": test_metrics['iou'],
            "Dice": test_metrics['dice'],
            "Jaccard": test_metrics['jaccard'],
            "Precision": test_metrics['precision'],
            "Recall": test_metrics['recall'],
            "Accuracy": test_metrics['accuracy'],
        }

        all_test_results.append(result)

    test_duration = time.time() - test_start_time
    print(f"\n⏱️ Total Evaluation Time: {test_duration:.2f} seconds")

    result_df = pd.DataFrame(all_test_results)

    # 🔢 Compute average across all numeric columns
    avg_row = result_df.drop(columns=["Tested On"]).mean()
    avg_row["Tested On"] = "Average"

    # Add the average row at the end
    result_df = pd.concat([result_df, pd.DataFrame([avg_row])], ignore_index=True)

    print("\n📋 Test Results Across Datasets (with Average):")
    print(result_df.to_markdown(index=False))

    return result_df


# U-Net

In [27]:
!pip install segmentation-models-pytorch --quiet

def unet_model_factory():
    return smp.Unet(
        encoder_name="efficientnet-b0",     # encoder backbone
        encoder_weights="imagenet",  # pretrained weights
        in_channels=3,               # rgb images
        classes=1,                   # binary segmentation
    )


In [28]:
u_net_results, trained_model = train_and_cross_test(model_class=unet_model_factory, all_data_loaders=[combined_loaders], num_epochs=100)


🚀 Training on: CombinedDataset


Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|██████████| 20.4M/20.4M [00:00<00:00, 288MB/s]


Epoch 1/100





  🧪 Batch 1/55 — Loss: 1.8745 — Time: 0.96s
  🧪 Batch 2/55 — Loss: 1.8415 — Time: 0.47s
  🧪 Batch 3/55 — Loss: 1.8808 — Time: 0.49s
  🧪 Batch 4/55 — Loss: 1.8054 — Time: 0.47s
  🧪 Batch 5/55 — Loss: 1.7843 — Time: 0.53s
  🧪 Batch 6/55 — Loss: 1.7477 — Time: 0.46s
  🧪 Batch 7/55 — Loss: 1.7971 — Time: 0.52s
  🧪 Batch 8/55 — Loss: 1.8287 — Time: 0.44s
  🧪 Batch 9/55 — Loss: 1.7617 — Time: 0.53s
  🧪 Batch 10/55 — Loss: 1.7694 — Time: 0.46s
  🧪 Batch 11/55 — Loss: 1.7631 — Time: 0.52s
  🧪 Batch 12/55 — Loss: 1.7226 — Time: 0.44s
  🧪 Batch 13/55 — Loss: 1.7748 — Time: 0.47s
  🧪 Batch 14/55 — Loss: 1.6163 — Time: 0.46s
  🧪 Batch 15/55 — Loss: 1.6513 — Time: 0.44s
  🧪 Batch 16/55 — Loss: 1.7000 — Time: 0.46s
  🧪 Batch 17/55 — Loss: 1.6433 — Time: 0.44s
  🧪 Batch 18/55 — Loss: 1.6629 — Time: 0.45s
  🧪 Batch 19/55 — Loss: 1.7349 — Time: 0.45s
  🧪 Batch 20/55 — Loss: 1.6336 — Time: 0.50s
  🧪 Batch 21/55 — Loss: 1.6076 — Time: 0.46s
  🧪 Batch 22/55 — Loss: 1.6307 — Time: 0.48s
  🧪 Batch 23/55 — L

In [29]:
evaluation_df = evaluate_model_on_datasets(trained_model, separate_loaders)


🧪 Evaluating trained model on all test sets...

🔍 Testing on: CVC-ClinicDB
🔍 Testing on: CVC-ColonDB
🔍 Testing on: ETIS-LaribPolypDB
🔍 Testing on: Kvasir-SEG

⏱️ Total Evaluation Time: 3.05 seconds

📋 Test Results Across Datasets (with Average):
| Tested On         |     Loss |      IoU |     Dice |   Jaccard |   Precision |   Recall |   Accuracy |
|:------------------|---------:|---------:|---------:|----------:|------------:|---------:|-----------:|
| CVC-ClinicDB      | 0.173286 | 0.846071 | 0.915336 |  0.846071 |    0.944182 | 0.892699 |   0.984647 |
| CVC-ColonDB       | 0.138843 | 0.85811  | 0.923507 |  0.85811  |    0.93774  | 0.909704 |   0.992436 |
| ETIS-LaribPolypDB | 0.464115 | 0.51178  | 0.638788 |  0.51178  |    0.938224 | 0.540285 |   0.984364 |
| Kvasir-SEG        | 0.198417 | 0.848067 | 0.9172   |  0.848067 |    0.933992 | 0.901806 |   0.975417 |
| Average           | 0.243665 | 0.766007 | 0.848708 |  0.766007 |    0.938534 | 0.811123 |   0.984216 |


# U-Net++ (no pretrained weights)


In [25]:
def unet_no_PT_weights_model_factory():
    return smp.UnetPlusPlus(
        encoder_name="resnet34",     # Same encoder as your U-Net
        encoder_weights=None,  # pretrained on ImageNet
        in_channels=3,               # rgb input
        classes=1,                   # Binary segmentation (e.g., polyps)
    )

In [26]:
u_net_results, trained_model = train_and_cross_test(model_class=unet_no_PT_weights_model_factory, all_data_loaders=[combined_loaders], num_epochs=1)


🚀 Training on: CombinedDataset

Epoch 1/1
  🧪 Batch 1/55 — Loss: 1.5803 — Time: 1.50s
  🧪 Batch 2/55 — Loss: 1.5544 — Time: 0.81s
  🧪 Batch 3/55 — Loss: 1.5796 — Time: 0.85s
  🧪 Batch 4/55 — Loss: 1.5413 — Time: 0.81s
  🧪 Batch 5/55 — Loss: 1.3381 — Time: 0.87s
  🧪 Batch 6/55 — Loss: 1.4447 — Time: 0.81s
  🧪 Batch 7/55 — Loss: 1.4720 — Time: 0.87s
  🧪 Batch 8/55 — Loss: 1.3836 — Time: 0.81s
  🧪 Batch 9/55 — Loss: 1.4174 — Time: 0.86s
  🧪 Batch 10/55 — Loss: 1.4023 — Time: 0.81s
  🧪 Batch 11/55 — Loss: 1.4084 — Time: 0.86s
  🧪 Batch 12/55 — Loss: 1.4311 — Time: 0.81s
  🧪 Batch 13/55 — Loss: 1.4196 — Time: 0.85s
  🧪 Batch 14/55 — Loss: 1.3889 — Time: 0.81s
  🧪 Batch 15/55 — Loss: 1.3947 — Time: 0.86s
  🧪 Batch 16/55 — Loss: 1.3385 — Time: 0.81s
  🧪 Batch 17/55 — Loss: 1.2801 — Time: 0.86s
  🧪 Batch 18/55 — Loss: 1.3026 — Time: 0.81s
  🧪 Batch 19/55 — Loss: 1.3147 — Time: 0.86s
  🧪 Batch 20/55 — Loss: 1.3848 — Time: 0.81s
  🧪 Batch 21/55 — Loss: 1.3280 — Time: 0.85s
  🧪 Batch 22/55 — Los

In [None]:
evaluation_df = evaluate_model_on_datasets(trained_model, separate_loaders)

# U-Net++

In [None]:
# This crashes the runtime, which restarts the whole Colab environment
import os
os.kill(os.getpid(), 9)


In [None]:
!nvidia-smi

torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()



Mon Apr 21 20:02:49 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   75C    P0             33W /   70W |   15092MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
def unetpp_model_factory():
    return smp.UnetPlusPlus(
        encoder_name="resnet34",     # Same encoder as your U-Net
        encoder_weights="imagenet",  # pretrained on ImageNet
        in_channels=3,               # rgb input
        classes=1,                   # Binary segmentation (e.g., polyps)
    )


In [None]:
u_net_pp_results, trained_model = train_and_cross_test(model_class=unetpp_model_factory, all_data_loaders=[combined_loaders], num_epochs=100)
#u_net_pp_results = train_and_cross_test(model_class=unetpp_model_factory, all_data_loaders=all_data_loaders, num_epochs=10)
print('U-Net Plus Plus Results')
print(u_net_pp_results)


🚀 Training on: CombinedDataset

Epoch 1/100
  🧪 Batch 1/55 — Loss: 1.4446 — Time: 0.91s
  🧪 Batch 2/55 — Loss: 1.3799 — Time: 0.89s
  🧪 Batch 3/55 — Loss: 1.3521 — Time: 0.88s
  🧪 Batch 4/55 — Loss: 1.3426 — Time: 0.87s
  🧪 Batch 5/55 — Loss: 1.3372 — Time: 0.89s
  🧪 Batch 6/55 — Loss: 1.2106 — Time: 0.88s
  🧪 Batch 7/55 — Loss: 1.2402 — Time: 0.88s
  🧪 Batch 8/55 — Loss: 1.2324 — Time: 0.89s
  🧪 Batch 9/55 — Loss: 1.2138 — Time: 0.89s
  🧪 Batch 10/55 — Loss: 1.1926 — Time: 0.89s
  🧪 Batch 11/55 — Loss: 1.2037 — Time: 0.89s
  🧪 Batch 12/55 — Loss: 1.1893 — Time: 0.89s
  🧪 Batch 13/55 — Loss: 1.1141 — Time: 0.89s
  🧪 Batch 14/55 — Loss: 1.1224 — Time: 0.91s
  🧪 Batch 15/55 — Loss: 1.1830 — Time: 0.90s
  🧪 Batch 16/55 — Loss: 1.1139 — Time: 0.91s
  🧪 Batch 17/55 — Loss: 1.1111 — Time: 0.91s
  🧪 Batch 18/55 — Loss: 1.0969 — Time: 0.91s
  🧪 Batch 19/55 — Loss: 1.0349 — Time: 0.91s
  🧪 Batch 20/55 — Loss: 1.0250 — Time: 0.90s
  🧪 Batch 21/55 — Loss: 1.0195 — Time: 0.91s
  🧪 Batch 22/55 — L

In [None]:
evaluation_df = evaluate_model_on_datasets(trained_model, separate_loaders)


🧪 Evaluating trained model on all test sets...

🔍 Testing on: CVC-ClinicDB
🔍 Testing on: CVC-ColonDB
🔍 Testing on: ETIS-LaribPolypDB
🔍 Testing on: Kvasir-SEG

⏱️ Total Evaluation Time: 4.15 seconds

📋 Test Results Across Datasets (with Average):
| Tested On         |     Loss |      IoU |     Dice |   Jaccard |   Precision |   Recall |   Accuracy |
|:------------------|---------:|---------:|---------:|----------:|------------:|---------:|-----------:|
| CVC-ClinicDB      | 0.121548 | 0.854252 | 0.942273 |  0.891018 |    0.950179 | 0.935242 |   0.989891 |
| CVC-ColonDB       | 0.146963 | 0.76528  | 0.920163 |  0.853685 |    0.94232  | 0.901489 |   0.992375 |
| ETIS-LaribPolypDB | 0.500283 | 0.486214 | 0.601018 |  0.507218 |    0.97153  | 0.528764 |   0.986279 |
| Kvasir-SEG        | 0.210686 | 0.864189 | 0.915843 |  0.845795 |    0.947629 | 0.888326 |   0.975459 |
| Average           | 0.24487  | 0.742484 | 0.844824 |  0.774429 |    0.952914 | 0.813456 |   0.986001 |


# DeepLabV3

In [None]:
def deeplabv3_model_factory():
    return smp.DeepLabV3(
        encoder_name="resnet34",
        encoder_weights="imagenet",
        in_channels=3,
        classes=1,
    )

# BetterNet

In [None]:
import sys
sys.path.append("/content/drive/My Drive/GT/DL/GroupProject/BetterNet")

# sys.path.append("../BetterNet")  # add the BetterNet folder to the import path

from model import BetterNet  # import the model definition

# Define model parameters
params = {
    "img_height": 224,
    "img_width": 224,
    "img_channels": 3,
    "mask_channels": 1  # binary segmentation
}

# Initialize the BetterNet model
model = BetterNet(
    input_shape=(params["img_height"], params["img_width"], params["img_channels"]),
    num_classes=params["mask_channels"],
    dropout_rate=0.5
)

# Move to GPU if available
model = model.cuda() if torch.cuda.is_available() else model
