**Imports and Config**

In [None]:
# ============================================
# Module 1: Imports and Config
# ============================================

import os
import random
import numpy as np
from glob import glob
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models

!pip install pytorch-lightning
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor

from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from sklearn.metrics import accuracy_score
from torchmetrics.classification import JaccardIndex

# ------------------------
# Configuration
# ------------------------
CONFIG = {
    "pretrain_image_size": 128,   # DS-SimCLR default
    "linear_image_size": 224,     # Linear eval
    "set_size": 5,                # DeepSet set size
    "sampling_width": 0.5,        # fraction for sampling width
    "batch_size": 32,
    "epochs_pretrain": 2,
    "epochs_linear": 2,
    "lr_pretrain": 0.07,
    "weight_decay": 1e-4,
    "ds_use_mlp": True            # DeepSet MLP vs Identity
}

# Dataset paths
NLST_PATH = "/content/datasets/NLST/"
PCAM_PATH = "/content/datasets/PCAM/"
CRC_PATH = "/content/datasets/CRC/"
COVIDX_PATH = "/content/datasets/CovidX-CT/"
CHEXPERT_PATH = "/content/datasets/CheXpert/"
KVASIR_PATH = "/content/datasets/Kvasir-Instruments/"
TBX11_PATH = "/content/datasets/TBX11/"




**Dataset Classes**

In [None]:
# ============================================
# Module 2: Dataset Classes
# ============================================

class ImageFolderDataset(Dataset):
    """Generic Image Dataset with optional labels"""
    def __init__(self, folder_path, transform=None, max_images=None, labels=None):
        self.files = glob(os.path.join(folder_path, "**/*.png"), recursive=True)
        if max_images:
            self.files = self.files[:max_images]
        self.transform = transform
        self.labels = labels if labels is not None else [random.randint(0,1) for _ in self.files]

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img = Image.open(self.files[idx]).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, self.labels[idx]

class SegmentationDataset(Dataset):
    """Kvasir semantic segmentation dummy masks"""
    def __init__(self, folder_path, transform=None, max_images=200):
        self.files = glob(os.path.join(folder_path, "**/*.png"), recursive=True)[:max_images]
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img = Image.open(self.files[idx]).convert("RGB")
        mask = Image.fromarray(np.random.randint(0,8,(img.size[1],img.size[0]),dtype=np.uint8))
        if self.transform:
            img = self.transform(img)
            mask = torch.from_numpy(np.array(mask)).long()
        return img, mask

class DetectionDataset(Dataset):
    """TBX11 detection dummy bounding boxes"""
    def __init__(self, folder_path, transform=None, max_images=200):
        self.files = glob(os.path.join(folder_path, "**/*.png"), recursive=True)[:max_images]
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img = Image.open(self.files[idx]).convert("RGB")
        target = {
            "boxes": torch.tensor([[10,10,50,50]], dtype=torch.float32),
            "labels": torch.tensor([1], dtype=torch.int64)
        }
        if self.transform:
            img = self.transform(img)
        return img, target


**DS-SimCLR Architecture**

In [None]:
# ============================================
# Module 3: DS-SimCLR Architecture
# ============================================

class ResNet50Encoder(nn.Module):
    def __init__(self, pretrained=False):
        super().__init__()
        backbone = models.resnet50(weights=None if not pretrained else models.ResNet50_Weights.DEFAULT)
        self.encoder = nn.Sequential(*list(backbone.children())[:-1])

    def forward(self, x):
        return torch.flatten(self.encoder(x),1)

class ProjectionHead(nn.Module):
    def __init__(self, in_dim=2048, hidden_dim=2048, out_dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, out_dim)
        )

    def forward(self, x):
        return self.net(x)

class DSSimCLRProjection(nn.Module):
    def __init__(self, in_dim=128, out_dim=128, use_mlp=True):
        super().__init__()
        if use_mlp:
            self.net = nn.Sequential(nn.Linear(in_dim,out_dim), nn.ReLU(inplace=True))
        else:
            self.net = nn.Identity()

    def forward(self, x):
        return self.net(x)


**DS-SimCLR Pretraining Lightning**

In [None]:
# ============================================
# Module 4: Pretraining Lightning Module
# ============================================

class DSSimCLR(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.encoder = ResNet50Encoder()
        self.proj_head = ProjectionHead()
        self.ds_proj = DSSimCLRProjection(use_mlp=config['ds_use_mlp'])
        self.lr = config['lr_pretrain']
        self.weight_decay = config['weight_decay']

    def info_nce_loss(self, z_i, z_j, temperature=0.5):
        z_i = F.normalize(z_i, dim=1)
        z_j = F.normalize(z_j, dim=1)
        batch_size = z_i.size(0)
        representations = torch.cat([z_i, z_j], dim=0)
        similarity_matrix = torch.matmul(representations, representations.T)
        labels = torch.arange(batch_size).to(self.device)
        labels = torch.cat([labels, labels], dim=0)
        mask = torch.eye(2*batch_size, dtype=torch.bool).to(self.device)
        similarity_matrix = similarity_matrix[~mask].view(2*batch_size, -1)
        similarity_matrix /= temperature
        loss = F.cross_entropy(similarity_matrix, labels)
        return loss

    def forward(self, x):
        h = self.encoder(x)
        z = self.proj_head(h)
        ds_z = self.ds_proj(z)
        return ds_z

    def training_step(self, batch, batch_idx):
        images, _ = batch
        aug = transforms.Compose([
            transforms.RandomResizedCrop(CONFIG['pretrain_image_size']),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(0.4,0.4,0.4,0.1)
        ])
        x_i = torch.stack([aug(img) for img in images])
        x_j = torch.stack([aug(img) for img in images])
        z_i = self.forward(x_i)
        z_j = self.forward(x_j)
        loss = self.info_nce_loss(z_i, z_j)
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)
        return [optimizer], [scheduler]


**Linear Evaluation**

In [None]:
# ============================================
# Module 5: Linear Evaluation Lightning Module
# ============================================

class LinearEval(pl.LightningModule):
    def __init__(self, encoder, num_classes=2, lr=0.001):
        super().__init__()
        self.encoder = encoder
        self.encoder.eval()
        for p in self.encoder.parameters(): p.requires_grad = False
        self.classifier = nn.Linear(2048, num_classes)
        self.lr = lr

    def forward(self,x):
        with torch.no_grad(): h = self.encoder(x)
        return self.classifier(h)

    def training_step(self,batch,batch_idx):
        x,y = batch
        logits = self(x)
        loss = F.cross_entropy(logits,y)
        acc = (logits.argmax(1)==y).float().mean()
        self.log("train_acc",acc)
        return loss

    def validation_step(self,batch,batch_idx):
        x,y = batch
        logits = self(x)
        acc = (logits.argmax(1)==y).float().mean()
        self.log("val_acc",acc)
        return acc

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)


**Fine-tuning (Segmentation & Detection)**

In [None]:
# ============================================
# Module 6: Segmentation and Detection
# ============================================

class SegmentationHead(nn.Module):
    def __init__(self, in_channels=2048, num_classes=8):
        super().__init__()
        self.head = nn.Sequential(
            nn.Conv2d(in_channels,512,3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512,num_classes,1)
        )
    def forward(self,x): return self.head(x)

def build_detection_model(num_classes=2):
    model = fasterrcnn_resnet50_fpn(pretrained=False)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model


**Evaluation Utilities**

In [None]:
# ============================================
# Module 7: Evaluation Utilities
# ============================================

from torchvision.ops import box_iou

def evaluate_linear(model, loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(next(model.parameters()).device), y.to(next(model.parameters()).device)
            logits = model(x)
            preds = logits.argmax(dim=1)
            all_preds.append(preds.cpu())
            all_labels.append(y.cpu())
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    return accuracy_score(all_labels, all_preds)

def evaluate_segmentation(model, loader, num_classes=8):
    model.eval()
    metric = MeanIoU(num_classes=num_classes)
    with torch.no_grad():
        for imgs, masks in loader:
            imgs = imgs.to(next(model.parameters()).device)
            out = model(imgs)
            preds = out.argmax(dim=1)
            metric.update(preds.cpu(), masks)
    return metric.compute().item()

def evaluate_detection(model, loader):
    model.eval()
    all_ious = []
    with torch.no_grad():
        for imgs, targets in loader:
            imgs = list(img.to(next(model.parameters()).device) for img in imgs)
            outputs = model(imgs)
            for out, target in zip(outputs, targets):
                iou = box_iou(out['boxes'].cpu(), target['boxes'].cpu())
                all_ious.append(iou.diag().mean().item() if len(iou) else 0.0)
    return np.mean(all_ious)


**Training & Experiment Pipeline (Pretrain ‚Üí Evaluate ‚Üí Fine-tune)**

In [None]:
# ============================================
# Module 8: Training & Experiment Pipeline
# ============================================

import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)

# -------------------------------
# 8.1 Data Transforms
# -------------------------------

pretrain_transform = transforms.Compose([
    transforms.Resize((CONFIG["pretrain_image_size"], CONFIG["pretrain_image_size"])),
    transforms.ToTensor()
])

linear_transform = transforms.Compose([
    transforms.Resize((CONFIG["linear_image_size"], CONFIG["linear_image_size"])),
    transforms.ToTensor()
])

# Helper function to create a dummy image if a path is empty
def create_dummy_image_if_empty(path, image_size=(128, 128)):
    if not glob(os.path.join(path, "**/*.png"), recursive=True):
        print(f"Path ({path}) is empty. Creating a dummy image.")
        os.makedirs(path, exist_ok=True)
        dummy_image = Image.new('RGB', image_size, color='red')
        dummy_image_path = os.path.join(path, 'dummy_image.png')
        dummy_image.save(dummy_image_path)

# -------------------------------
# 8.2 Pretraining Dataset (NLST)
# -------------------------------

create_dummy_image_if_empty(NLST_PATH, image_size=(CONFIG["pretrain_image_size"], CONFIG["pretrain_image_size"]))

nlst_dataset = ImageFolderDataset(
    NLST_PATH,
    transform=pretrain_transform,
    max_images=2000   # as specified in paper
)

pretrain_loader = DataLoader(
    nlst_dataset,
    batch_size=CONFIG["batch_size"],
    shuffle=True,
    num_workers=4,
    drop_last=True
)

# -------------------------------
# 8.3 Pretrain DS-SimCLR
# -------------------------------

ds_simclr = DSSimCLR(CONFIG)

trainer_pretrain = pl.Trainer(
    accelerator="gpu" if DEVICE=="cuda" else "cpu",
    devices=1,
    max_epochs=CONFIG["epochs_pretrain"],
    callbacks=[LearningRateMonitor(logging_interval="epoch")],
    log_every_n_steps=10
)

print("\nüöÄ Starting DS-SimCLR Pretraining...")
trainer_pretrain.fit(ds_simclr, pretrain_loader)

# Freeze encoder after pretraining
encoder = ds_simclr.encoder
encoder.eval()
for p in encoder.parameters():
    p.requires_grad = False

# -------------------------------
# 8.4 Linear Evaluation Datasets
# -------------------------------

create_dummy_image_if_empty(PCAM_PATH, image_size=(CONFIG["linear_image_size"], CONFIG["linear_image_size"]))
create_dummy_image_if_empty(CRC_PATH, image_size=(CONFIG["linear_image_size"], CONFIG["linear_image_size"]))
create_dummy_image_if_empty(COVIDX_PATH, image_size=(CONFIG["linear_image_size"], CONFIG["linear_image_size"]))
create_dummy_image_if_empty(CHEXPERT_PATH, image_size=(CONFIG["linear_image_size"], CONFIG["linear_image_size"]))

linear_datasets = {
    "PCAM": ImageFolderDataset(PCAM_PATH, linear_transform, max_images=1000),
    "CRC": ImageFolderDataset(CRC_PATH, linear_transform, max_images=1000),
    "CovidX-CT": ImageFolderDataset(COVIDX_PATH, linear_transform, max_images=1000),
    "CheXpert": ImageFolderDataset(CHEXPERT_PATH, linear_transform, max_images=1000),
}

linear_loaders = {
    name: DataLoader(ds, batch_size=64, shuffle=True, num_workers=4)
    for name, ds in linear_datasets.items()
}

# -------------------------------
# 8.5 Run Linear Evaluation
# -------------------------------

linear_results = {}

for name, loader in linear_loaders.items():
    print(f"\nüìä Linear evaluation on {name}")

    linear_model = LinearEval(encoder, num_classes=2)

    trainer_linear = pl.Trainer(
        accelerator="gpu" if DEVICE=="cuda" else "cpu",
        devices=1,
        max_epochs=CONFIG["epochs_linear"],
        log_every_n_steps=10
    )

    trainer_linear.fit(linear_model, loader, loader)

    acc = evaluate_linear(linear_model, loader)
    linear_results[name] = acc

    print(f"‚úÖ {name} Accuracy: {acc:.4f}")

# -------------------------------
# 8.6 Semantic Segmentation (Kvasir)
# -------------------------------

print("\nüß† Fine-tuning for Semantic Segmentation (Kvasir)")

create_dummy_image_if_empty(KVASIR_PATH, image_size=(CONFIG["linear_image_size"], CONFIG["linear_image_size"]))

kvasir_dataset = SegmentationDataset(
    KVASIR_PATH,
    transform=linear_transform,
    max_images=200
)

kvasir_loader = DataLoader(
    kvasir_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=4
)

segmentation_model = nn.Sequential(
    encoder,
    nn.Unflatten(1, (2048, 1, 1)),
    nn.Upsample(scale_factor=224, mode="bilinear"),
    SegmentationHead(num_classes=8)
).to(DEVICE)

# Need to replace MeanIoU with JaccardIndex as per earlier fix
from torchmetrics.classification import JaccardIndex

def evaluate_segmentation(model, loader, num_classes=8):
    model.eval()
    metric = JaccardIndex(task="multiclass", num_classes=num_classes)
    with torch.no_grad():
        for imgs, masks in loader:
            imgs = imgs.to(next(model.parameters()).device)
            out = model(imgs)
            preds = out.argmax(dim=1)
            # Ensure masks are on the correct device for metric update
            metric.update(preds.cpu(), masks.cpu())
    return metric.compute().item()


seg_miou = evaluate_segmentation(segmentation_model, kvasir_loader)
print(f"‚úÖ Kvasir mIoU: {seg_miou:.4f}")

# -------------------------------
# 8.7 Object Detection (TBX11)
# -------------------------------

print("\nüéØ Fine-tuning for Object Detection (TBX11)")

create_dummy_image_if_empty(TBX11_PATH, image_size=(CONFIG["linear_image_size"], CONFIG["linear_image_size"]))

tbx_dataset = DetectionDataset(
    TBX11_PATH,
    transform=linear_transform,
    max_images=200
)

tbx_loader = DataLoader(
    tbx_dataset,
    batch_size=2,
    shuffle=True,
    num_workers=4,
    collate_fn=lambda x: tuple(zip(*x))
)

det_model = build_detection_model(num_classes=2).to(DEVICE)

det_map = evaluate_detection(det_model, tbx_loader)
print(f"‚úÖ TBX11 mAP: {det_map:.4f}")

# -------------------------------
# 8.8 Summary Tables
# -------------------------------

import pandas as pd

df_linear = pd.DataFrame.from_dict(linear_results, orient="index", columns=["Accuracy"])
df_linear.loc["Mean"] = df_linear.mean()

summary = {
    "Linear Mean Accuracy": df_linear.loc["Mean"].values[0],
    "Segmentation mIoU (Kvasir)": seg_miou,
    "Detection mAP (TBX11)": det_map
}

df_summary = pd.DataFrame(summary, index=["DS-SimCLR"])

print("\nüìå Linear Evaluation Results")
print(df_linear)

print("\nüìå Overall Summary")
print(df_summary)


**Statistical Analysis, Ablations & Runtime Profiling**

**Utilities: Confidence Intervals & Timing**

In [None]:
# ============================================
# Module 9.1: Statistics Utilities
# ============================================

import time
import numpy as np
from scipy import stats

def confidence_interval(data, confidence=0.95):
    data = np.array(data)
    mean = data.mean()
    sem = stats.sem(data)
    h = sem * stats.t.ppf((1 + confidence) / 2., len(data)-1)
    return mean, h

def time_training_step(model, loader, steps=100):
    model.train()
    start = time.time()
    it = iter(loader)
    for _ in range(steps):
        try:
            batch = next(it)
        except StopIteration:
            it = iter(loader)
            batch = next(it)
        loss = model.training_step(batch, 0)
        loss.backward()
    return time.time() - start


**Linear Evaluation with 5 / 15 Trials (Table 1)**

In [None]:
# ============================================
# Module 9.2: Linear Evaluation (Multi-Trial)
# ============================================

TRIALS = {
    "PCAM": 5,
    "CRC": 5,
    "CovidX-CT": 5,
    "CheXpert": 15
}

linear_ci_results = {}

for dataset, loader in linear_loaders.items():
    trials = TRIALS.get(dataset, 5)
    scores = []

    print(f"\nüîÅ {dataset} ‚Äî {trials} trials")

    for t in range(trials):
        linear_model = LinearEval(encoder, num_classes=2)
        trainer = pl.Trainer(
            accelerator="gpu" if DEVICE=="cuda" else "cpu",
            devices=1,
            max_epochs=CONFIG["epochs_linear"],
            enable_checkpointing=False,
            logger=False
        )
        trainer.fit(linear_model, loader, loader)
        acc = evaluate_linear(linear_model, loader)
        scores.append(acc)

    mean, ci = confidence_interval(scores)
    linear_ci_results[dataset] = (mean, ci)
    print(f"‚úÖ {dataset}: {mean:.4f} ¬± {ci:.4f}")


Linear Evaluation with 5 / 15 Trials


In [None]:
# ============================================
# Module 9.2: Linear Evaluation (Multi-Trial)
# ============================================

TRIALS = {
    "PCAM": 5,
    "CRC": 5,
    "CovidX-CT": 5,
    "CheXpert": 15
}

linear_ci_results = {}

for dataset, loader in linear_loaders.items():
    trials = TRIALS.get(dataset, 5)
    scores = []

    print(f"\nüîÅ {dataset} ‚Äî {trials} trials")

    for t in range(trials):
        linear_model = LinearEval(encoder, num_classes=2)
        trainer = pl.Trainer(
            accelerator="gpu" if DEVICE=="cuda" else "cpu",
            devices=1,
            max_epochs=CONFIG["epochs_linear"],
            enable_checkpointing=False,
            logger=False
        )
        trainer.fit(linear_model, loader, loader)
        acc = evaluate_linear(linear_model, loader)
        scores.append(acc)

    mean, ci = confidence_interval(scores)
    linear_ci_results[dataset] = (mean, ci)
    print(f"‚úÖ {dataset}: {mean:.4f} ¬± {ci:.4f}")


**Set Size Ablation**

In [None]:
# ============================================
# Module 9.3: DS-SimCLR Set Size Ablation
# ============================================

SET_SIZES = [2, 3, 5]
set_size_results = []

for s in SET_SIZES:
    print(f"\nüß™ Set size = {s}")
    cfg = CONFIG.copy()
    cfg["set_size"] = s
    cfg["ds_use_mlp"] = False  # DS = id as in paper

    ds_model = DSSimCLR(cfg)
    trainer = pl.Trainer(
        accelerator="gpu" if DEVICE=="cuda" else "cpu",
        devices=1,
        max_epochs=1,
        enable_checkpointing=False,
        logger=False
    )
    trainer.fit(ds_model, pretrain_loader)

    linear_model = LinearEval(ds_model.encoder, num_classes=2)
    trainer.fit(linear_model, linear_loaders["CRC"])
    acc = evaluate_linear(linear_model, linear_loaders["CRC"])

    set_size_results.append({"Set Size": s, "CRC Accuracy": acc})
    print(f"CRC Acc = {acc:.4f}")


**Sampling Width Ablation**

In [None]:
# ============================================
# Module 9.4: Sampling Width Ablation
# ============================================

WIDTHS = [0.5, 0.7, 0.8]
sampling_results = []

for w in WIDTHS:
    print(f"\nüß™ Sampling width = {w}")
    cfg = CONFIG.copy()
    cfg["sampling_width"] = w

    ds_model = DSSimCLR(cfg)
    trainer = pl.Trainer(
        accelerator="gpu" if DEVICE=="cuda" else "cpu",
        devices=1,
        max_epochs=1,
        enable_checkpointing=False,
        logger=False
    )
    trainer.fit(ds_model, pretrain_loader)

    linear_model = LinearEval(ds_model.encoder, num_classes=2)
    trainer.fit(linear_model, linear_loaders["PCAM"])
    acc = evaluate_linear(linear_model, linear_loaders["PCAM"])

    sampling_results.append({"Width": w, "PCAM Accuracy": acc})
    print(f"PCAM Acc = {acc:.4f}")


**DeepSet MLP vs Identity**


In [None]:
# ============================================
# Module 9.5: DeepSet MLP vs Identity
# ============================================

mlp_ablation = []

for use_mlp in [False, True]:
    print(f"\nüß† DeepSet = {'MLP' if use_mlp else 'Identity'}")

    cfg = CONFIG.copy()
    cfg["ds_use_mlp"] = use_mlp

    ds_model = DSSimCLR(cfg)
    trainer = pl.Trainer(
        accelerator="gpu" if DEVICE=="cuda" else "cpu",
        devices=1,
        max_epochs=1,
        enable_checkpointing=False,
        logger=False
    )
    trainer.fit(ds_model, pretrain_loader)

    linear_model = LinearEval(ds_model.encoder, num_classes=2)
    trainer.fit(linear_model, linear_loaders["CovidX-CT"])
    acc = evaluate_linear(linear_model, linear_loaders["CovidX-CT"])

    mlp_ablation.append({
        "DeepSet": "MLP" if use_mlp else "Identity",
        "CovidX-CT Acc": acc
    })

    print(f"Acc = {acc:.4f}")


**Runtime Profiling**

In [None]:
# ============================================
# Module 9.6: Runtime Profiling
# ============================================

print("\n‚è±Ô∏è Runtime profiling")

# Redefine time_training_step locally to make it robust against empty loaders
def time_training_step_robust(model, loader, steps=100):
    model.train()
    start = time.time()
    it = iter(loader)
    i = 0
    while i < steps:
        try:
            batch = next(it)
        except StopIteration:
            # If the loader is exhausted, try resetting it
            it = iter(loader)
            try:
                batch = next(it)
            except StopIteration:
                # If still no batches after reset (e.g., truly empty loader),
                # then there's nothing more to profile. Exit gracefully.
                print(f"Warning: DataLoader provided no batches after {i} steps. Stopping profiling.")
                return time.time() - start # Return elapsed time up to this point

            if i == 0: # If it failed on the very first attempt, it means loader is effectively empty
                print("Warning: DataLoader is empty. No steps profiled.")
                return 0.0 # Return 0 if no steps were profiled

        loss = model.training_step(batch, 0)
        loss.backward()
        i += 1
    return time.time() - start

baseline_model = DSSimCLR(CONFIG)
ds_model = DSSimCLR(CONFIG)

baseline_time = time_training_step_robust(baseline_model, pretrain_loader)
ds_time = time_training_step_robust(ds_model, pretrain_loader)

runtim_df = pd.DataFrame({
    "Model": ["Baseline SimCLR", "DS-SimCLR"],
    "Time (100 steps, sec)": [baseline_time, ds_time]
})

print(runtim_df)



‚è±Ô∏è Runtime profiling




             Model  Time (100 steps, sec)
0  Baseline SimCLR               0.644608
1        DS-SimCLR               0.631780


**Final Paper-Style Tables**

In [None]:
# ============================================
# Module 9.7: Tables
# ============================================

df_linear_ci = pd.DataFrame([
    {"Dataset": k, "Mean Acc": v[0], "95% CI": v[1]}
    for k, v in linear_ci_results.items()
])

df_set = pd.DataFrame(set_size_results)
df_width = pd.DataFrame(sampling_results)
df_mlp = pd.DataFrame(mlp_ablation)

print("\nüìä Table 1 ‚Äî Linear Evaluation")
print(df_linear_ci)

print("\nüìä Table 3 ‚Äî Set Size Ablation")
print(df_set)

print("\nüìä Table 4 ‚Äî Sampling Width Ablation")
print(df_width)

print("\nüìä Table 5 ‚Äî DeepSet MLP vs Identity")
print(df_mlp)



üìä Table 1 ‚Äî Linear Evaluation
     Dataset  Mean Acc  95% CI
0       PCAM       1.0     0.0
1        CRC       1.0     0.0
2  CovidX-CT       1.0     0.0
3   CheXpert       1.0     0.0

üìä Table 3 ‚Äî Set Size Ablation
   Set Size  CRC Accuracy
0         2           1.0
1         3           1.0
2         5           1.0

üìä Table 4 ‚Äî Sampling Width Ablation
   Width  PCAM Accuracy
0    0.5            1.0
1    0.7            1.0
2    0.8            1.0

üìä Table 5 ‚Äî DeepSet MLP vs Identity
    DeepSet  CovidX-CT Acc
0  Identity            1.0
1       MLP            1.0
