1. Mount Google Drive in Colab.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


2) Inspect the Source - Number of Images Per Class, Sizes of Images Per Class, Channels of Images Per Class, and Total Number of Images.

In [None]:
import os
from PIL import Image
from collections import Counter, defaultdict

# ── Configuration ──────────────────────────────────────────────────────────
src_dir = "/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Source/Step 1 - Moved, Final Source Data"
# ── End configuration ──────────────────────────────────────────────────────

def inspect_directory(path):
    class_counts = Counter()
    class_modes  = defaultdict(Counter)
    class_res    = defaultdict(Counter)

    for cls in sorted(os.listdir(path)):
        cls_path = os.path.join(path, cls)
        if not os.path.isdir(cls_path):
            continue
        for fname in os.listdir(cls_path):
            if not fname.lower().endswith(('.png','.jpg','.jpeg','.bmp','.tiff')):
                continue
            class_counts[cls] += 1
            img_path = os.path.join(cls_path, fname)
            try:
                with Image.open(img_path) as img:
                    class_modes[cls][img.mode] += 1
                    class_res[cls][img.size] += 1
            except Exception as e:
                print(f"❌ Error opening {img_path}: {e}")

    total = sum(class_counts.values())
    print(f"\nInspection of `{path}`")
    print(f"{'Class':<12}  Images  Modes               Top Resolutions")
    print("-"*60)
    for cls, cnt in class_counts.items():
        modes_s = ", ".join(f"{m}:{n}" for m,n in class_modes[cls].items())
        top3    = class_res[cls].most_common(3)
        res_s   = ", ".join(f"{w}×{h}:{n}" for (w,h),n in top3)
        print(f"{cls:<12}  {cnt:<6}  {modes_s:<18}  {res_s}")
    print(f"\n→ Total images: {total}\n")

# Run the inspection
inspect_directory(src_dir)


Inspection of `/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Source/Step 1 - Moved, Final Source Data`
Class         Images  Modes               Top Resolutions
------------------------------------------------------------
AbdomenCT     500     RGB:500             224×224:500
BreastMRI     500     RGB:500             224×224:500
ChestCT       500     RGB:500             224×224:500
ChestXR       500     RGB:500             224×224:500
HandXR        500     RGB:500             224×224:500
HeadCT        500     RGB:500             224×224:500

→ Total images: 3000



3) Inspect the Target (unlabelled) dataset if it has 3000 images, 224x224 size, and 3 channels.

In [None]:
import os
from PIL import Image
from collections import Counter

# ── Configuration ──────────────────────────────────────────────────────────
unlabelled_dir = "/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Target/Step 2 - Target Unlabelling (Main)/Images"
# ── End configuration ──────────────────────────────────────────────────────

# Counters for total, modes, and resolutions
total_images = 0
mode_counts  = Counter()
size_counts  = Counter()

# Iterate through all images
for fname in os.listdir(unlabelled_dir):
    if not fname.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        continue
    total_images += 1
    img_path = os.path.join(unlabelled_dir, fname)
    try:
        with Image.open(img_path) as img:
            mode_counts[img.mode] += 1
            size_counts[img.size] += 1
    except Exception as e:
        print(f"❌ Error opening {img_path}: {e}")

# Display results
print(f"\nInspection of unlabelled target directory: `{unlabelled_dir}`")
print(f"→ Total images: {total_images}\n")

print("Channel modes:")
for mode, cnt in mode_counts.items():
    print(f"  {mode}: {cnt}")

print("\nImage resolutions:")
for (w, h), cnt in size_counts.most_common():
    print(f"  {w}×{h}: {cnt}")


Inspection of unlabelled target directory: `/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Target/Step 2 - Target Unlabelling (Main)/Images`
→ Total images: 3000

Channel modes:
  RGB: 3000

Image resolutions:
  224×224: 3000


4) Let us Normalize the source.

In [None]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm

# ── Configuration ────────────────────────────────────────────────────────────
data_dir = "/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Source/Step 1 - Moved, Final Source Data"
batch_size = 64   # for batch-level stats
num_workers = 4
# ── End configuration ─────────────────────────────────────────────────────────

# 1) Pre-normalization: load as Tensor [0,1] but no Normalize
pre_transform = transforms.Compose([
    transforms.ToTensor()   # converts to [C,H,W] in [0.0,1.0]
])
pre_ds = datasets.ImageFolder(data_dir, transform=pre_transform)
pre_loader = DataLoader(pre_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers)

# Compute channel sums & squared sums
n_channels = 3
cnt = 0
sum_ = torch.zeros(n_channels)
sum_sq = torch.zeros(n_channels)

for imgs, _ in tqdm(pre_loader, desc="Pre-norm stats"):
    # imgs shape: [B, C, H, W]
    b, c, h, w = imgs.shape
    cnt += b * h * w
    sum_ += imgs.sum(dim=[0,2,3])
    sum_sq += (imgs ** 2).sum(dim=[0,2,3])

mean_pre = sum_ / cnt
var_pre = (sum_sq / cnt) - (mean_pre ** 2)
std_pre = torch.sqrt(var_pre)

print("\nPre-normalization mean:", mean_pre)
print("Pre-normalization std: ", std_pre)


# 2) One-batch post-normalization: take first batch with Normalize()
normalize = transforms.Normalize(mean=mean_pre.tolist(), std=std_pre.tolist())
batch_transform = transforms.Compose([
    transforms.ToTensor(),
    normalize
])
batch_ds = datasets.ImageFolder(data_dir, transform=batch_transform)
batch_loader = DataLoader(batch_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers)

# Get one batch
imgs_batch, _ = next(iter(batch_loader))  # [B, C, H, W]
mean_batch = imgs_batch.mean(dim=[0,2,3])
std_batch  = imgs_batch.std(dim=[0,2,3])

print("\nOne-batch post-norm mean:", mean_batch)
print("One-batch post-norm std: ", std_batch)


# 3) Full-dataset post-normalization: entire loader with Normalize
post_loader = DataLoader(batch_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers)

cnt2 = 0
sum2 = torch.zeros(n_channels)
sum2_sq = torch.zeros(n_channels)

for imgs, _ in tqdm(post_loader, desc="Post-norm full stats"):
    b, c, h, w = imgs.shape
    cnt2 += b * h * w
    sum2 += imgs.sum(dim=[0,2,3])
    sum2_sq += (imgs ** 2).sum(dim=[0,2,3])

mean_post = sum2 / cnt2
var_post = (sum2_sq / cnt2) - (mean_post ** 2)
std_post = torch.sqrt(var_post)

print("\nPost-normalization mean:", mean_post)
print("Post-normalization std: ", std_post)

Pre-norm stats: 100%|██████████| 47/47 [00:21<00:00,  2.14it/s]



Pre-normalization mean: tensor([0.2800, 0.2800, 0.2800])
Pre-normalization std:  tensor([0.3013, 0.3013, 0.3013])

One-batch post-norm mean: tensor([0.1287, 0.1287, 0.1287])
One-batch post-norm std:  tensor([0.6009, 0.6009, 0.6009])


Post-norm full stats: 100%|██████████| 47/47 [00:21<00:00,  2.16it/s]


Post-normalization mean: tensor([8.7193e-08, 8.7193e-08, 8.7193e-08])
Post-normalization std:  tensor([1.0000, 1.0000, 1.0000])





5) Let us build the MobileNet-V2 architecture.
Inspired from : https://openaccess.thecvf.com/content_cvpr_2018/papers/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.pdf

In [None]:
# MobileNet-V2 (CVPR 2018) using PyTorch + summary (Colab-friendly)
# Matches the original design: inverted residuals + linear bottlenecks, ReLU6, 1280-d head.
# Handy forward_features() returns a 1280-d vector for DA heads (e.g., DANN/ADDA).

import math
import torch
import torch.nn as nn
import torch.nn.functional as F

# ---- Utilities ----
def _make_divisible(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:  # ensure round does not go down by >10%
        new_v += divisor
    return new_v

class ConvBNReLU6(nn.Sequential):
    def __init__(self, in_chs, out_chs, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super().__init__(
            nn.Conv2d(in_chs, out_chs, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_chs),
            nn.ReLU6(inplace=True),
        )

# ---- Inverted Residual Block (Linear Bottleneck) ----
class InvertedResidual(nn.Module):
    """
    Block: [1x1 (expansion, ReLU6)] -> [3x3 DW (stride s, ReLU6)] -> [1x1 (projection, linear)]
    Residual connection when stride==1 and in==out. No nonlinearity after the linear bottleneck.
    """
    def __init__(self, in_chs, out_chs, stride, expand_ratio):
        super().__init__()
        assert stride in [1, 2], "stride must be 1 or 2"
        hidden_dim = int(round(in_chs * expand_ratio))
        self.use_residual = (stride == 1 and in_chs == out_chs)

        layers = []
        # 1) Expansion (if t>1)
        if expand_ratio != 1:
            layers.append(ConvBNReLU6(in_chs, hidden_dim, kernel_size=1, stride=1, groups=1))
        else:
            hidden_dim = in_chs
        # 2) Depthwise 3x3
        layers.append(ConvBNReLU6(hidden_dim, hidden_dim, kernel_size=3, stride=stride, groups=hidden_dim))
        # 3) Projection (linear 1x1)
        layers.append(nn.Conv2d(hidden_dim, out_chs, 1, 1, 0, bias=False))
        layers.append(nn.BatchNorm2d(out_chs))
        self.block = nn.Sequential(*layers)

    def forward(self, x):
        out = self.block(x)
        return x + out if self.use_residual else out

# ---- MobileNet-V2 ----
class MobileNetV2(nn.Module):
    """
    MobileNetV2 backbone per paper:
      - Stem: 32, 3x3, s=2
      - Inverted residual stages defined by (t, c, n, s):
          [ (1, 16, 1, 1),
            (6, 24, 2, 2),
            (6, 32, 3, 2),
            (6, 64, 4, 2),
            (6, 96, 3, 1),
            (6,160, 3, 2),
            (6,320, 1, 1) ]
      - Head: 1x1 conv to 1280, GAP, dropout=0.2, FC
    """
    def __init__(self, num_classes=6, width_mult=1.0, round_nearest=8, dropout=0.2):
        super().__init__()
        input_channel = 32
        last_channel = 1280

        # round channels
        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
        self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)

        # Stem
        features = [ConvBNReLU6(3, input_channel, kernel_size=3, stride=2, groups=1)]

        # (t, c, n, s) as in the original MobileNetV2
        cfgs = [
            (1, 16, 1, 1),
            (6, 24, 2, 2),
            (6, 32, 3, 2),
            (6, 64, 4, 2),
            (6, 96, 3, 1),
            (6,160, 3, 2),
            (6,320, 1, 1),
        ]

        # Inverted residual blocks
        for t, c, n, s in cfgs:
            out_chs = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(InvertedResidual(input_channel, out_chs, stride, expand_ratio=t))
                input_channel = out_chs

        # Head (1x1 to 1280)
        features.append(ConvBNReLU6(input_channel, self.last_channel, kernel_size=1, stride=1, groups=1))
        self.features = nn.Sequential(*features)

        self.pool = nn.AdaptiveAvgPool2d(1)
        self.drop = nn.Dropout(p=dropout) if dropout and dropout > 0 else nn.Identity()
        self.classifier = nn.Linear(self.last_channel, num_classes)

        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight); nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01); nn.init.zeros_(m.bias)

    # handy for DA heads
    def forward_features(self, x):
        x = self.features(x)
        x = self.pool(x).flatten(1)   # -> [B, 1280]
        return x

    def forward(self, x):
        x = self.forward_features(x)
        x = self.drop(x)
        x = self.classifier(x)
        return x

# --- Smoke test + Summary ---
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    m = MobileNetV2(num_classes=6).to(device)

    # try to import torchinfo; auto-install if missing (Colab-friendly)
    try:
        from torchinfo import summary
    except Exception:
        import sys, subprocess
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "torchinfo"])
        from torchinfo import summary

    # Keras-like summary
    print(summary(
        m,
        input_size=(1, 3, 224, 224),
        device=device,
        col_names=("input_size", "output_size", "num_params", "kernel_size"),
        depth=3,
        verbose=1
    ))

    # Quick forward check
    x = torch.randn(2, 3, 224, 224, device=device)
    y = m(x)
    print("Output shape:", y.shape)  # expect: torch.Size([2, 6])

Layer (type:depth-idx)                        Input Shape               Output Shape              Param #                   Kernel Shape
MobileNetV2                                   [1, 3, 224, 224]          [1, 6]                    --                        --
├─Sequential: 1-1                             [1, 3, 224, 224]          [1, 1280, 7, 7]           --                        --
│    └─ConvBNReLU6: 2-1                       [1, 3, 224, 224]          [1, 32, 112, 112]         --                        --
│    │    └─Conv2d: 3-1                       [1, 3, 224, 224]          [1, 32, 112, 112]         864                       [3, 3]
│    │    └─BatchNorm2d: 3-2                  [1, 32, 112, 112]         [1, 32, 112, 112]         64                        --
│    │    └─ReLU6: 3-3                        [1, 32, 112, 112]         [1, 32, 112, 112]         --                        --
│    └─InvertedResidual: 2-2                  [1, 32, 112, 112]         [1, 16, 112, 112]        

6) Let us prepare the data loaders for split.

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os, torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Subset

# Paths
source_dir = '/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Source/Step 1 - Moved, Final Source Data'

# Precomputed normalization stats
mean = [0.2800, 0.2800, 0.2800]
std  = [0.3013, 0.3013, 0.3013]

# Transforms
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(8),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])
val_test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

# 1) Load a base dataset (no transform) just to get length/labels
base_ds = ImageFolder(source_dir, transform=None)

# 2) Deterministic split indices (NOT random_split on a list)
torch.manual_seed(1906525)
n_total = len(base_ds)
n_train = int(0.75 * n_total)
n_val   = int(0.10 * n_total)
n_test  = n_total - n_train - n_val

perm = torch.randperm(n_total)
train_idx = perm[:n_train]
val_idx   = perm[n_train:n_train+n_val]
test_idx  = perm[n_train+n_val:]

# 3) Make three datasets with their own transforms, then subset by the same indices
train_full = ImageFolder(source_dir, transform=train_transform)
val_full   = ImageFolder(source_dir, transform=val_test_transform)
test_full  = ImageFolder(source_dir, transform=val_test_transform)

train_ds = Subset(train_full, train_idx)
val_ds   = Subset(val_full,   val_idx)
test_ds  = Subset(test_full,  test_idx)

# 4) DataLoaders (Drive I/O tips: pin_memory + persistent_workers)
batch_size, num_workers = 32, 4
loader_kwargs = dict(batch_size=batch_size, num_workers=num_workers,
                     pin_memory=True, persistent_workers=(num_workers > 0))

train_loader = DataLoader(train_ds, shuffle=True,  **loader_kwargs)
val_loader   = DataLoader(val_ds,   shuffle=False, **loader_kwargs)
test_loader  = DataLoader(test_ds,  shuffle=False, **loader_kwargs)

# 5) Sanity check
print("Classes (alphabetical order):", base_ds.classes)  # label mapping in .class_to_idx
print("class_to_idx:", base_ds.class_to_idx)
print(f"Train size: {len(train_ds)}, Val size: {len(val_ds)}, Test size: {len(test_ds)}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Classes (alphabetical order): ['AbdomenCT', 'BreastMRI', 'ChestCT', 'ChestXR', 'HandXR', 'HeadCT']
class_to_idx: {'AbdomenCT': 0, 'BreastMRI': 1, 'ChestCT': 2, 'ChestXR': 3, 'HandXR': 4, 'HeadCT': 5}
Train size: 2250, Val size: 300, Test size: 450


7) Let us start modelling for the source dataset (main) with MobileNet-V2 and save the checkpoints.

In [None]:
# ─── 0) Mount & Imports ─────────────────────────────────────────────────────
import os, random, numpy as np
import torch, torch.nn as nn, torch.backends.cudnn as cudnn
import pandas as pd, seaborn as sns, matplotlib.pyplot as plt

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Subset
from torch.optim import SGD, Adam, RMSprop, Adagrad, AdamW
import torch.optim.lr_scheduler as lr_scheduler

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, mean_squared_error, confusion_matrix
)

# NOTE: assumes MobileNetV2 class is already defined above in this notebook.

# ─── 1) Determinism ─────────────────────────────────────────────────────────
SEED = 1906525
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
cudnn.deterministic = True   # deterministic conv algorithms
cudnn.benchmark = False      # avoid non-deterministic autotuner
# See: https://pytorch.org/docs/stable/notes/randomness.html  (reproducibility notes)

# ─── 2) Paths & Hyperparams ─────────────────────────────────────────────────
SRC_DIR = '/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Source/Step 1 - Moved, Final Source Data'

CKPT_DIR    = '/content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Source Training/Checkpoints'
CM_DIR      = '/content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Source Training/Confusion Matrices'
METRICS_DIR = '/content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Source Training/Performance Metrics'
os.makedirs(CKPT_DIR, exist_ok=True)
os.makedirs(CM_DIR, exist_ok=True)
os.makedirs(METRICS_DIR, exist_ok=True)

MEAN = [0.2800, 0.2800, 0.2800]
STD  = [0.3013, 0.3013, 0.3013]

train_tf = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(8),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])
val_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])

OPTIMIZERS  = [SGD, Adam, RMSprop, Adagrad, AdamW]
BATCH_SIZE  = 32
EPOCHS_LIST = [5, 10, 20, 40, 80]
LR          = 1e-3
DEVICE      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ─── 3) Prepare DataLoaders (deterministic split, per‑split transforms) ─────
base_ds = ImageFolder(SRC_DIR, transform=None)  # labels inferred by folder names
# ImageFolder docs: has .classes and .class_to_idx  (for sanity printing)
# https://pytorch.org/vision/main/generated/torchvision.datasets.ImageFolder.html

n_total = len(base_ds)
n_train = int(0.75 * n_total)
n_val   = int(0.10 * n_total)
n_test  = n_total - n_train - n_val

g = torch.Generator().manual_seed(SEED)
perm = torch.randperm(n_total, generator=g)
train_idx = perm[:n_train]
val_idx   = perm[n_train:n_train+n_val]
test_idx  = perm[n_train+n_val:]

train_full = ImageFolder(SRC_DIR, transform=train_tf)
val_full   = ImageFolder(SRC_DIR, transform=val_tf)
test_full  = ImageFolder(SRC_DIR, transform=val_tf)

train_ds = Subset(train_full, train_idx)
val_ds   = Subset(val_full,   val_idx)
test_ds  = Subset(test_full,  test_idx)

loader_kwargs = dict(batch_size=BATCH_SIZE, num_workers=4, pin_memory=True)
train_loader = DataLoader(train_ds, shuffle=True,  **loader_kwargs)
val_loader   = DataLoader(val_ds,   shuffle=False, **loader_kwargs)
test_loader  = DataLoader(test_ds,  shuffle=False, **loader_kwargs)

print("Classes:", base_ds.classes)           # alphabetical
print("class_to_idx:", base_ds.class_to_idx) # mapping folder -> label

# ─── 4) Metrics / Eval helper ───────────────────────────────────────────────
def evaluate_split(model, loader, criterion=None, plot_cm=False, cm_name=None):
    model.eval()
    all_preds, all_labels, all_probs = [], [], []
    total_loss, total_samples = 0.0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            logits = model(x)
            if criterion is not None:
                total_loss += criterion(logits, y).item() * x.size(0)
            probs = torch.softmax(logits, dim=1).cpu().numpy()
            preds = logits.argmax(1).cpu().numpy()
            all_probs.extend(probs)
            all_preds.extend(preds)
            all_labels.extend(y.cpu().numpy())
            total_samples += x.size(0)

    acc = accuracy_score(all_labels, all_preds)
    prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    rec = recall_score(all_labels, all_preds, average='macro', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    # Multiclass AUC via one-vs-one (ovo); requires probability scores
    try:
        auc = roc_auc_score(all_labels, np.array(all_probs), multi_class='ovo')
    except Exception:
        auc = float('nan')  # e.g., single-class batch edge cases
    mse = mean_squared_error(all_labels, all_preds)

    # Specificity (macro over classes) from confusion matrix
    cm = confusion_matrix(all_labels, all_preds, labels=list(range(len(loader.dataset.dataset.classes))))
    total = cm.sum()
    specs = []
    for i in range(cm.shape[0]):
        tp = cm[i, i]
        fn = cm[i, :].sum() - tp
        fp = cm[:, i].sum() - tp
        tn = total - tp - fp - fn
        specs.append(tn / (tn + fp) if (tn + fp) > 0 else 0.0)
    spec = float(np.mean(specs))

    if plot_cm and cm_name:
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=loader.dataset.dataset.classes,
                    yticklabels=loader.dataset.dataset.classes)
        plt.xlabel('Predicted'); plt.ylabel('Actual')
        plt.tight_layout()
        plt.savefig(os.path.join(CM_DIR, cm_name))
        plt.close()

    loss = (total_loss / total_samples) if criterion is not None else None
    return acc, prec, rec, spec, f1, auc, mse, loss

# AUC API ref: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html
# Multiclass ROC example: https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html

# ─── 5) Training & Logging ─────────────────────────────────────────────────
master = []

for opt_cls in OPTIMIZERS:
    for num_epochs in EPOCHS_LIST:
        run_name = f"{opt_cls.__name__}_ep{num_epochs}"
        print(f"\n▶ Run: {run_name}")

        model = MobileNetV2(num_classes=len(base_ds.classes)).to(DEVICE)
        criterion = nn.CrossEntropyLoss()
        optimizer = opt_cls(model.parameters(), lr=LR)
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)

        # gradient norm for curiosity (before & after)
        def get_grad_norm():
            model.eval(); model.zero_grad()
            x, y = next(iter(train_loader))
            x, y = x.to(DEVICE), y.to(DEVICE)
            loss = criterion(model(x), y)
            loss.backward()
            total = 0.0
            for p in model.parameters():
                if p.grad is not None:
                    g = p.grad.detach()
                    total += float(torch.norm(g)**2)
            return total**0.5

        grad_before = get_grad_norm()

        for epoch in range(1, num_epochs + 1):
            model.train()
            for x, y in train_loader:
                x, y = x.to(DEVICE), y.to(DEVICE)
                optimizer.zero_grad()
                loss = criterion(model(x), y)
                loss.backward()
                optimizer.step()

            # validation & LR schedule
            _, _, _, _, _, _, _, val_loss = evaluate_split(model, val_loader, criterion)
            scheduler.step(val_loss)

            # mid‑run CM at epoch 40
            if num_epochs == 40 and epoch == 40: #add num_epochs == 40 alone
                _ = evaluate_split(
                    model, test_loader, plot_cm=True, cm_name=f"{opt_cls.__name__}_ep40.png"
                )

        # final metrics on train/val/test
        train_acc, train_prec, train_rec, train_spec, train_f1, train_auc, train_mse, _ = evaluate_split(model, train_loader)
        val_acc, val_prec, val_rec, val_spec, val_f1, val_auc, val_mse, _ = evaluate_split(model, val_loader)
        test_acc, test_prec, test_rec, test_spec, test_f1, test_auc, test_mse, _ = evaluate_split(model, test_loader)
        grad_after = get_grad_norm()

        print(f"Completed {run_name} → Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")

        # save checkpoint
        torch.save(model.state_dict(), os.path.join(CKPT_DIR, f"{opt_cls.__name__}_ep{num_epochs}.pth"))

        # log row
        master.append({
            'Model': 'MobileNet-V2',
            'Optimizer': opt_cls.__name__, 'Epochs': num_epochs,
            'train_acc': train_acc, 'val_acc': val_acc, 'test_acc': test_acc,
            'train_mse': train_mse, 'val_mse': val_mse, 'test_mse': test_mse,
            'precision': test_prec, 'sensitivity': test_rec,
            'specificity': test_spec, 'f1': test_f1, 'auc': test_auc,
            'grad_before': grad_before, 'grad_after': grad_after
        })

# write metrics Excel
metrics_file = os.path.join(METRICS_DIR, 'MNV2_MainPretrain.xlsx')
pd.DataFrame(master).to_excel(metrics_file, index=False)
print("Training completed. Saved checkpoints, confusion matrices, and metrics for MobileNetV2 (source).")

Classes: ['AbdomenCT', 'BreastMRI', 'ChestCT', 'ChestXR', 'HandXR', 'HeadCT']
class_to_idx: {'AbdomenCT': 0, 'BreastMRI': 1, 'ChestCT': 2, 'ChestXR': 3, 'HandXR': 4, 'HeadCT': 5}

▶ Run: SGD_ep5
Completed SGD_ep5 → Train Acc: 0.7151, Test Acc: 0.6889

▶ Run: SGD_ep10
Completed SGD_ep10 → Train Acc: 0.8573, Test Acc: 0.8467

▶ Run: SGD_ep20
Completed SGD_ep20 → Train Acc: 0.9964, Test Acc: 0.9956

▶ Run: SGD_ep40
Completed SGD_ep40 → Train Acc: 0.8818, Test Acc: 0.8711

▶ Run: SGD_ep80
Completed SGD_ep80 → Train Acc: 0.9996, Test Acc: 1.0000

▶ Run: Adam_ep5
Completed Adam_ep5 → Train Acc: 0.9969, Test Acc: 0.9889

▶ Run: Adam_ep10
Completed Adam_ep10 → Train Acc: 0.9929, Test Acc: 0.9911

▶ Run: Adam_ep20
Completed Adam_ep20 → Train Acc: 1.0000, Test Acc: 1.0000

▶ Run: Adam_ep40
Completed Adam_ep40 → Train Acc: 1.0000, Test Acc: 1.0000

▶ Run: Adam_ep80
Completed Adam_ep80 → Train Acc: 1.0000, Test Acc: 1.0000

▶ Run: RMSprop_ep5
Completed RMSprop_ep5 → Train Acc: 1.0000, Test Acc: 0.

8) Let us inspect the target (unlabelled) dataset's channels and image sizes.

In [None]:
import os
from pathlib import Path
from collections import Counter
from PIL import Image

TARGET_DIR = Path("/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Target/Step 2 - Target Unlabelling (Main)/Images")

IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
files = [f for f in TARGET_DIR.rglob("*") if f.suffix.lower() in IMAGE_EXTS and f.is_file()]

print(f"Total image files: {len(files)}")

mode_counter = Counter()
size_counter = Counter()
dimensions = []

for f in files:
    try:
        with Image.open(f) as img:
            mode = img.mode  # e.g., 'RGB', 'L', etc.
            width, height = img.size
            mode_counter[mode] += 1
            size_counter[(width, height)] += 1
            dimensions.append((width, height))
    except Exception as e:
        print(f"Error with {f}: {e}")

print("\nImage modes (counts):")
for mode, count in mode_counter.items():
    print(f"  Mode: {mode}, Count: {count}")

print("\nUnique image dimensions (width × height) and their counts:")
for (w, h), count in size_counter.most_common():
    print(f"  {w}×{h}: {count} images")

# Function to get channels from mode
def channels_from_mode(mode):
    # As noted on StackOverflow, image.getbands() gives accurate channel count
    return len(Image.new(mode, (1,1)).getbands())

print("\nChannels per mode:")
for mode in mode_counter:
    print(f"  Mode: {mode}, Channels: {channels_from_mode(mode)}")

Total image files: 3000

Image modes (counts):
  Mode: RGB, Count: 3000

Unique image dimensions (width × height) and their counts:
  224×224: 3000 images

Channels per mode:
  Mode: RGB, Channels: 3


9) Let us do normalization for the target (unlabelled) dataset.

In [None]:
import os
from pathlib import Path
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

class UnlabeledImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = Path(root_dir)
        self.transform = transform
        self.image_paths = [p for p in self.root_dir.iterdir() if p.suffix.lower() in {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img

# 1. Define paths
root = "/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Target/Step 2 - Target Unlabelling (Main)/Images"

# 2. Base loader to compute pre-normalization mean & std
base_ds = UnlabeledImageDataset(root, transform=transforms.ToTensor())
base_loader = DataLoader(base_ds, batch_size=64, shuffle=False, num_workers=2)

sum_ = torch.zeros(3)
sum_sq = torch.zeros(3)
total_pixels = 0

for imgs in base_loader:
    batch_pixels = imgs.size(0) * imgs.size(2) * imgs.size(3)
    total_pixels += batch_pixels
    sum_ += imgs.sum(dim=[0, 2, 3])
    sum_sq += (imgs ** 2).sum(dim=[0, 2, 3])

mean_pre = sum_ / total_pixels
std_pre = torch.sqrt(sum_sq / total_pixels - mean_pre**2)

print("Pre-normalization mean:", mean_pre)
print("Pre-normalization std :", std_pre)

# 3. Transformation including normalization
norm_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean_pre.tolist(), std_pre.tolist())
])

# 4. Loader for post-normalization checks
norm_ds = UnlabeledImageDataset(root, transform=norm_transform)
norm_loader = DataLoader(norm_ds, batch_size=64, shuffle=False, num_workers=2)

# 5. One-batch check
imgs = next(iter(norm_loader))
print("One-batch post-normalization mean:", imgs.mean(dim=[0, 2, 3]))
print("One-batch post-normalization std :", imgs.std(dim=[0, 2, 3]))

# 6. Full dataset post-normalization stats
sum_norm = torch.zeros(3)
sum_norm_sq = torch.zeros(3)
total_pixels = 0

for imgs in norm_loader:
    batch_pixels = imgs.size(0) * imgs.size(2) * imgs.size(3)
    total_pixels += batch_pixels
    sum_norm += imgs.sum(dim=[0, 2, 3])
    sum_norm_sq += (imgs ** 2).sum(dim=[0, 2, 3])

mean_post = sum_norm / total_pixels
std_post = torch.sqrt(sum_norm_sq / total_pixels - mean_post**2)

print("Post-normalization mean (whole dataset):", mean_post)
print("Post-normalization std  (whole dataset):", std_post)

Pre-normalization mean: tensor([0.3018, 0.3018, 0.3018])
Pre-normalization std : tensor([0.3210, 0.3210, 0.3210])
One-batch post-normalization mean: tensor([0.9964, 0.9964, 0.9964])
One-batch post-normalization std : tensor([0.8414, 0.8414, 0.8414])
Post-normalization mean (whole dataset): tensor([4.1521e-09, 4.1521e-09, 4.1521e-09])
Post-normalization std  (whole dataset): tensor([1., 1., 1.])


10) Let us do Domain Adaptation with DANN (according to https://arxiv.org/abs/1505.07818) using MobileNetV2's feature extractor.

In [None]:
# DANN with MobileNetV2 feature extractor
import os, random, numpy as np, pandas as pd, math
from PIL import Image
from tqdm import tqdm

import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets

import matplotlib.pyplot as plt, seaborn as sns
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, roc_auc_score

# ─── Reproducibility ──────────────────────────────────────
SEED = 1906525
os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED); np.random.seed(SEED)
torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ─── Paths ───────────────────────────────
SRC_DIR = "/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Source/Step 1 - Moved, Final Source Data"
CKPT_DIR = "/content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Domain Adaptation/Source Checkpoints"
TGT_IMG_ROOT = "/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Target/Step 2 - Target Unlabelling (Main)/Images"
TGT_CSV = "/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Target/Step 2 - Target Unlabelling (Main)/Target Labels.csv"

CONF_DIR = "/content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Domain Adaptation/DANN/Confusion Matrices"
METRICS_DIR = "/content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Domain Adaptation/DANN/Performance Metrics"
os.makedirs(CONF_DIR, exist_ok=True); os.makedirs(METRICS_DIR, exist_ok=True)

# ─── Normalization (per-branch) ──────────────────────────
mean_src = [0.2800, 0.2800, 0.2800]; std_src  = [0.3013, 0.3013, 0.3013]
mean_tgt = [0.3018, 0.3018, 0.3018]; std_tgt  = [0.3210, 0.3210, 0.3210]

tx_src = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean_src, std_src)])
tx_tgt = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean_tgt, std_tgt)])

# ─── Import / assume MobileNetV2 backbone is defined above ────────────────
# Assumes `class MobileNetV2(nn.Module)` with .forward_features(x)->(B,1280) exists.

# ---- Minimal wrapper that exposes 1280-d pooled features
class MobileNetV2_FE(nn.Module):
    def __init__(self, net):
        super().__init__()
        self.net = net
    def forward(self, x):
        # returns (B, 1280) pooled feature vector
        return self.net.forward_features(x)

def load_mobilenetv2_features(ckpt_path, num_classes):
    # Instantiate model skeleton and load weights, dropping classifier.*
    fe_full = MobileNetV2(num_classes=num_classes)
    sd = torch.load(ckpt_path, map_location='cpu')

    # Remove classification head params so only the feature extractor loads
    for k in list(sd.keys()):
        if k.startswith('classifier.'):
            sd.pop(k, None)

    missing, unexpected = fe_full.load_state_dict(sd, strict=False)
    print(f"[load_mobilenetv2_features] missing={len(missing)} unexpected={len(unexpected)}")
    return MobileNetV2_FE(fe_full)

# ─── GRL (λ scales reversed gradient; λ is NOT multiplied on loss) ────────
from torch.autograd import Function
class GradReverse(Function):
    @staticmethod
    def forward(ctx, x, lambd):
        ctx.lambd = lambd
        return x.view_as(x)
    @staticmethod
    def backward(ctx, grad_output):
        return grad_output.neg() * ctx.lambd, None
def grad_reverse(x, lambd=1.0): return GradReverse.apply(x, lambd)

# ─── DANN head (MobileNetV2 features → bottleneck → task/domain heads) ────
class DANN_MobileNetV2(nn.Module):
    def __init__(self, features, num_classes, bottleneck_dim=256, p_drop=0.2):
        super().__init__()
        self.features = features           # returns (B,1280)
        self.bottleneck = nn.Sequential(
            nn.Linear(1280, bottleneck_dim),
            nn.BatchNorm1d(bottleneck_dim),
            nn.ReLU(True),
        )
        self.cls_head = nn.Sequential(
            nn.Dropout(p_drop),
            nn.Linear(bottleneck_dim, num_classes)
        )
        self.dom_head = nn.Sequential(
            nn.Linear(bottleneck_dim, 100), nn.ReLU(True),
            nn.Linear(100, 2)
        )
    def forward(self, x, lambda_=0.0):
        f = self.features(x)               # (B,1280)
        z = self.bottleneck(f)             # (B,256)
        y_logits = self.cls_head(z)        # class logits
        z_rev = grad_reverse(z, lambda_)   # GRL
        d_logits = self.dom_head(z_rev)    # domain logits (2‐class)
        return y_logits, d_logits

# ─── Datasets & loaders ───────────────────────────────────────
# Source labeled
src_ds = datasets.ImageFolder(SRC_DIR, transform=tx_src)
CLASS_NAMES = src_ds.classes
CLASS_TO_IDX = {c:i for i,c in enumerate(CLASS_NAMES)}
N_CLASSES = len(CLASS_NAMES)

def src_loader(bs=32, shuffle=True, num_workers=2):
    return DataLoader(src_ds, batch_size=bs, shuffle=shuffle, num_workers=num_workers, pin_memory=True)

# Target flat folder + CSV with true labels (for evaluation only)
class TargetFlatCSV(Dataset):
    def __init__(self, root, csv_path, transform):
        self.root = root; self.transform = transform
        df = pd.read_csv(csv_path)
        self.names = df.iloc[:,0].astype(str).tolist()
        self.cls_names = df.iloc[:,1].astype(str).tolist()
        self.cls_idx = [CLASS_TO_IDX[c] for c in self.cls_names]
    def __len__(self): return len(self.names)
    def __getitem__(self, i):
        fp = os.path.join(self.root, self.names[i])
        img = Image.open(fp).convert("RGB")
        return self.transform(img), self.cls_idx[i], self.names[i]

tgt_ds = TargetFlatCSV(TGT_IMG_ROOT, TGT_CSV, tx_tgt)

def tgt_loader(bs=32, shuffle=True, num_workers=2):
    return DataLoader(tgt_ds, batch_size=bs, shuffle=shuffle, num_workers=num_workers, pin_memory=True)

# ─── Metrics helpers ──────────────────────────────────────────
def save_confusion_matrix(y_true, y_pred, class_names, save_path):
    cm = confusion_matrix(y_true, y_pred, labels=list(range(len(class_names))))
    plt.figure(figsize=(10,8))
    ax = sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                     xticklabels=class_names, yticklabels=class_names,
                     cbar=False, linewidths=.5)
    ax.set_xlabel('Predicted'); ax.set_ylabel('Actual'); ax.set_title('Confusion Matrix (Target)')
    plt.tight_layout(); plt.savefig(save_path, dpi=220); plt.close()
    return cm

def specificity_from_cm(cm):
    spec = []
    for k in range(cm.shape[0]):
        TP = cm[k,k]
        FP = cm[:,k].sum() - TP
        FN = cm[k,:].sum() - TP
        TN = cm.sum() - (TP+FP+FN)
        spec.append( TN / (TN + FP + 1e-12) )
    return np.array(spec)

def macro_auc(y_true, y_proba, n_classes):
    y_true_oh = np.eye(n_classes)[np.asarray(y_true)]
    return roc_auc_score(y_true_oh, y_proba, average="macro", multi_class="ovr")  # docs: sklearn roc_auc_score. :contentReference[oaicite:1]{index=1}

# ─── One run: (checkpoint, λ) train + evaluate ────────────────────────────
def dann_train_eval(ckpt_path, ckpt_name, lambda_val, epochs=10, bs=32, lr=1e-4):
    # loaders
    Ls = src_loader(bs=bs, shuffle=True)
    Lt = tgt_loader(bs=bs, shuffle=True)

    # backbone + heads
    feats = load_mobilenetv2_features(ckpt_path, N_CLASSES)
    model = DANN_MobileNetV2(feats, N_CLASSES).to(device)
    opt = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    ce = nn.CrossEntropyLoss()

    model.train()
    it_tgt = iter(Lt)
    for ep in range(epochs):
        for x_s, y_s in Ls:
            x_s, y_s = x_s.to(device), y_s.to(device)
            try:
                x_t, _, _ = next(it_tgt)
            except StopIteration:
                it_tgt = iter(Lt)
                x_t, _, _ = next(it_tgt)
            x_t = x_t.to(device)

            # 1) Task loss on labeled source (no GRL)
            y_logits_s, _ = model(x_s, lambda_=0.0)
            L_cls = ce(y_logits_s, y_s)

            # 2) Domain loss on concat(source,target) with GRL(λ)
            x_dom = torch.cat([x_s, x_t], dim=0)
            _, d_logits = model(x_dom, lambda_=lambda_val)   # GRL scales gradients only
            d_labels = torch.cat([
                torch.zeros(x_s.size(0), dtype=torch.long),
                torch.ones(x_t.size(0), dtype=torch.long)
            ], dim=0).to(device)
            L_dom = ce(d_logits, d_labels)

            loss = L_cls + L_dom
            opt.zero_grad(); loss.backward(); opt.step()

    # ── Evaluate on ALL target images (order from dataset)
    model.eval()
    all_true, all_pred, all_prob = [], [], []
    with torch.no_grad():
        for xb, yb, _names in DataLoader(tgt_ds, batch_size=64, shuffle=False, num_workers=2, pin_memory=True):
            xb = xb.to(device)
            y_logits, _ = model(xb, lambda_=0.0)
            probs = torch.softmax(y_logits, dim=1)
            pred = probs.argmax(dim=1)
            all_true.extend(yb.tolist())
            all_pred.extend(pred.cpu().tolist())
            all_prob.append(probs.cpu().numpy())
    all_prob = np.concatenate(all_prob, axis=0)

    # ── Metrics
    correct = int(np.sum(np.array(all_true)==np.array(all_pred)))
    total = len(all_true)
    acc = correct / total
    miss = 1.0 - acc

    prec, rec, f1, _ = precision_recall_fscore_support(
        all_true, all_pred, labels=list(range(N_CLASSES)),
        average='macro', zero_division=0
    )
    cm = save_confusion_matrix(
        all_true, all_pred, CLASS_NAMES,
        os.path.join(CONF_DIR, f"{ckpt_name}_{lambda_val}.png")
    )
    spec_macro = specificity_from_cm(cm).mean()

    try:
        auc_macro = macro_auc(all_true, all_prob, N_CLASSES)
    except Exception:
        auc_macro = float('nan')

    row = {
        "Checkpoint": ckpt_name,
        "GAN Type": "DANN",
        "DA Hyperparameter": "lambda",
        "DA Hyperparameter Value": lambda_val,
        "Correctly Identified Images": correct,
        "Incorrectly Identified Images": total - correct,
        "Image Classification Accuracy": acc,
        "Image Miss Rate": miss,
        "Precision (macro)": prec,
        "Sensitivity/Recall (macro)": rec,
        "Specificity (macro)": spec_macro,
        "F1-Score (macro)": f1,
        "AUC-ROC (macro OvR)": auc_macro
    }
    return row

# ─── Grid: 5 checkpoints × 5 λ values ─────────────────────────
lambda_vals = [0.01, 0.05, 0.1, 0.5, 1.0]
ckpt_files = [f for f in os.listdir(CKPT_DIR) if f.endswith(".pth")]
results = []

for fname in tqdm(sorted(ckpt_files), desc="Checkpoints x lambdas (MobileNetV2-DANN)"):
    ckpt_path = os.path.join(CKPT_DIR, fname)
    ckpt_name = os.path.splitext(fname)[0].replace(" ", "").replace("-", "").replace("__", "_")
    for lam in lambda_vals:
        row = dann_train_eval(ckpt_path, ckpt_name, lam, epochs=10, bs=32, lr=1e-4)
        results.append(row)

# ─── Save Excel ─────────────────
df = pd.DataFrame(results)
ordered = (
    ["Checkpoint","GAN Type","DA Hyperparameter","DA Hyperparameter Value"] +
    [c for c in df.columns if c not in ["Checkpoint","GAN Type","DA Hyperparameter","DA Hyperparameter Value"]]
)
df = df[ordered]

save_xlsx = os.path.join(METRICS_DIR, "MNV2_Main(DANN).xlsx")
df.to_excel(save_xlsx, index=False)
print(f"✅ Saved metrics to: {save_xlsx}")
print(f"✅ Confusion matrices in: {CONF_DIR}")

Checkpoints x lambdas (MobileNetV2-DANN):   0%|          | 0/5 [00:00<?, ?it/s]

[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0


Checkpoints x lambdas (MobileNetV2-DANN):  20%|██        | 1/5 [29:05<1:56:20, 1745.14s/it]

[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0


Checkpoints x lambdas (MobileNetV2-DANN):  40%|████      | 2/5 [39:53<54:59, 1099.82s/it]  

[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0


Checkpoints x lambdas (MobileNetV2-DANN):  60%|██████    | 3/5 [50:45<29:50, 895.27s/it] 

[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0


Checkpoints x lambdas (MobileNetV2-DANN):  80%|████████  | 4/5 [1:01:33<13:17, 797.83s/it]

[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0


Checkpoints x lambdas (MobileNetV2-DANN): 100%|██████████| 5/5 [1:12:25<00:00, 869.13s/it]


✅ Saved metrics to: /content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Domain Adaptation/DANN/Performance Metrics/MNV2_Main(DANN).xlsx
✅ Confusion matrices in: /content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Domain Adaptation/DANN/Confusion Matrices


11) Let us do Domain Adaptation with CDAN+E (according to https://arxiv.org/abs/1705.10667) using MobileNetV2's feature extractor.

In [None]:
# =========================
# CDAN+E with MobileNetV2 feature extractor
# Multilinear conditioning + Entropy conditioning per CDAN (NeurIPS'18)
# Assumes `class MobileNetV2(nn.Module)` with .forward_features(x)->(B,1280) is already defined above.
# =========================
import os, random, numpy as np, pandas as pd, math
from PIL import Image
from tqdm import tqdm

import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets

import matplotlib.pyplot as plt, seaborn as sns
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, roc_auc_score

# ─── Reproducibility ──────────────────────────────────────
SEED = 1906525
os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED); np.random.seed(SEED)
torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ─── Paths ────────────────────────────────────
SRC_DIR = "/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Source/Step 1 - Moved, Final Source Data"
CKPT_DIR = "/content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Domain Adaptation/Source Checkpoints"
TGT_IMG_ROOT = "/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Target/Step 2 - Target Unlabelling (Main)/Images"
TGT_CSV = "/content/drive/MyDrive/Research Project 2025/Final Data/Preprocessed/Third Set/Target/Step 2 - Target Unlabelling (Main)/Target Labels.csv"

# Save dirs
CONF_DIR = "/content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Domain Adaptation/CDAN + E/Confusion Matrices"
METRICS_DIR = "/content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Domain Adaptation/CDAN + E/Performance Metrics"
os.makedirs(CONF_DIR, exist_ok=True); os.makedirs(METRICS_DIR, exist_ok=True)

# ─── Normalization ────────────────────────────
mean_src = [0.2800, 0.2800, 0.2800]; std_src  = [0.3013, 0.3013, 0.3013]
mean_tgt = [0.3018, 0.3018, 0.3018]; std_tgt  = [0.3210, 0.3210, 0.3210]

tx_src = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean_src, std_src)])
tx_tgt = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean_tgt, std_tgt)])

# ─── MobileNetV2 feature extractor loader─────
class MobileNetV2_FE(nn.Module):
    """Thin wrapper to expose pooled 1280-d features for DA."""
    def __init__(self, net):
        super().__init__()
        self.net = net
    def forward(self, x):
        return self.net.forward_features(x)  # (B, 1280)

def load_mobilenetv2_features(ckpt_path, num_classes):
    # Instantiate MobileNetV2 and load weights, dropping classifier head
    fe_full = MobileNetV2(num_classes=num_classes)
    sd = torch.load(ckpt_path, map_location='cpu')
    for k in list(sd.keys()):
        if k.startswith('classifier.'):
            sd.pop(k, None)
    missing, unexpected = fe_full.load_state_dict(sd, strict=False)
    print(f"[load_mobilenetv2_features] missing={len(missing)} unexpected={len(unexpected)}")
    return MobileNetV2_FE(fe_full)

# ─── GRL (λ scales reversed gradients) ─────────────────────────────────────
from torch.autograd import Function
class GradReverse(Function):
    @staticmethod
    def forward(ctx, x, lambd):
        ctx.lambd = lambd
        return x.view_as(x)
    @staticmethod
    def backward(ctx, grad_output):
        return grad_output.neg() * ctx.lambd, None
def grad_reverse(x, lambd=1.0): return GradReverse.apply(x, lambd)

# ─── CDAN+E head (MobileNetV2 feats → bottleneck → task; domain on f⊗g) ──
class CDAN_MobileNetV2(nn.Module):
    """
    Multilinear conditioning (outer product) per CDAN; entropy conditioning in loss.
    Bottleneck keeps dims manageable (256) ⇒ f⊗g has 256*C.
    """
    def __init__(self, features, num_classes, bottleneck_dim=256, p_drop=0.2):
        super().__init__()
        self.features = features           # (B, 1280)
        self.bottleneck = nn.Sequential(
            nn.Linear(1280, bottleneck_dim),
            nn.BatchNorm1d(bottleneck_dim),
            nn.ReLU(True),
        )
        self.cls_head = nn.Sequential(
            nn.Dropout(p_drop),
            nn.Linear(bottleneck_dim, num_classes)
        )
        dom_in = bottleneck_dim * num_classes
        self.dom_head = nn.Sequential(
            nn.Linear(dom_in, 1024), nn.ReLU(True),
            nn.Linear(1024, 2)
        )

    def forward(self, x, lambda_=0.0):
        f = self.features(x)               # (B, 1280)
        z = self.bottleneck(f)             # (B, 256)
        y_logits = self.cls_head(z)        # (B, C)
        g = torch.softmax(y_logits, dim=1) # (B, C)
        # z ⊗ g → (B, 256, C) → flatten → (B, 256*C)
        outer = torch.bmm(z.unsqueeze(2), g.unsqueeze(1)).view(z.size(0), -1)
        outer_rev = grad_reverse(outer, lambda_)
        d_logits = self.dom_head(outer_rev)    # (B, 2)
        return y_logits, d_logits, g

# ─── Datasets & loaders ───────────────────────────────────────
# Source labeled
src_ds = datasets.ImageFolder(SRC_DIR, transform=tx_src)
CLASS_NAMES = src_ds.classes
CLASS_TO_IDX = {c:i for i,c in enumerate(CLASS_NAMES)}
N_CLASSES = len(CLASS_NAMES)

def src_loader(bs=32, shuffle=True, num_workers=2):
    return DataLoader(src_ds, batch_size=bs, shuffle=shuffle, num_workers=num_workers, pin_memory=True)

# Target flat folder + CSV with true labels (for evaluation only)
class TargetFlatCSV(Dataset):
    def __init__(self, root, csv_path, transform):
        self.root = root; self.transform = transform
        df = pd.read_csv(csv_path)
        self.names = df.iloc[:,0].astype(str).tolist()
        self.cls_names = df.iloc[:,1].astype(str).tolist()
        self.cls_idx = [CLASS_TO_IDX[c] for c in self.cls_names]
    def __len__(self): return len(self.names)
    def __getitem__(self, i):
        fp = os.path.join(self.root, self.names[i])
        img = Image.open(fp).convert("RGB")
        return self.transform(img), self.cls_idx[i], self.names[i]

tgt_ds = TargetFlatCSV(TGT_IMG_ROOT, TGT_CSV, tx_tgt)

def tgt_loader(bs=32, shuffle=True, num_workers=2):
    return DataLoader(tgt_ds, batch_size=bs, shuffle=shuffle, num_workers=num_workers, pin_memory=True)

# ─── Metrics helpers ────────────────────────────────────
def save_confusion_matrix(y_true, y_pred, class_names, save_path):
    cm = confusion_matrix(y_true, y_pred, labels=list(range(len(class_names))))
    plt.figure(figsize=(10,8))
    ax = sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                     xticklabels=class_names, yticklabels=class_names,
                     cbar=False, linewidths=.5)
    ax.set_xlabel('Predicted'); ax.set_ylabel('Actual'); ax.set_title('Confusion Matrix (Target)')
    plt.tight_layout(); plt.savefig(save_path, dpi=220); plt.close()
    return cm

def specificity_from_cm(cm):
    spec = []
    for k in range(cm.shape[0]):
        TP = cm[k,k]
        FP = cm[:,k].sum() - TP
        FN = cm[k,:].sum() - TP
        TN = cm.sum() - (TP+FP+FN)
        spec.append( TN / (TN + FP + 1e-12) )
    return np.array(spec)

def macro_auc(y_true, y_proba, n_classes):
    y_true_oh = np.eye(n_classes)[np.asarray(y_true)]
    return roc_auc_score(y_true_oh, y_proba, average="macro", multi_class="ovr")

# ─── One run: (checkpoint, λ) train + evaluate (CDAN+E) ───────────────────
def cdan_train_eval(ckpt_path, ckpt_name, lambda_val, epochs=10, bs=32, lr=1e-4):
    # loaders
    Ls = src_loader(bs=bs, shuffle=True)
    Lt = tgt_loader(bs=bs, shuffle=True)

    # backbone + heads (MobileNetV2 now)
    feats = load_mobilenetv2_features(ckpt_path, N_CLASSES)
    model = CDAN_MobileNetV2(feats, N_CLASSES).to(device)

    opt = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    ce_cls = nn.CrossEntropyLoss()
    ce_dom = nn.CrossEntropyLoss(reduction='none')  # per-sample for entropy weights

    model.train()
    it_tgt = iter(Lt)
    for ep in range(epochs):
        for x_s, y_s in Ls:
            x_s, y_s = x_s.to(device), y_s.to(device)
            try:
                x_t, _, _ = next(it_tgt)
            except StopIteration:
                it_tgt = iter(Lt)
                x_t, _, _ = next(it_tgt)
            x_t = x_t.to(device)

            # 1) Task loss on labeled source (no GRL)
            y_logits_s, _, _ = model(x_s, lambda_=0.0)
            L_cls = ce_cls(y_logits_s, y_s)

            # 2) Domain loss on concat(source,target) with GRL(λ) + Entropy conditioning
            x_dom = torch.cat([x_s, x_t], dim=0)
            _, d_logits, g_all = model(x_dom, lambda_=lambda_val)

            # Domain labels: 0 for source, 1 for target
            d_labels = torch.cat([
                torch.zeros(x_s.size(0), dtype=torch.long),
                torch.ones(x_t.size(0), dtype=torch.long)
            ], dim=0).to(device)

            # Entropy H(g) = -∑ g_c log g_c ; weights w = 1 + exp(-H)
            g_clamped = torch.clamp(g_all, 1e-6, 1.0)
            H = -(g_clamped * torch.log(g_clamped)).sum(dim=1)          # (B,)
            w = 1.0 + torch.exp(-H)                                     # (B,)

            L_dom_vec = ce_dom(d_logits, d_labels)                      # (B,)
            L_dom = (w * L_dom_vec).sum() / (w.sum() + 1e-12)

            loss = L_cls + L_dom
            opt.zero_grad(); loss.backward(); opt.step()

    # ── Evaluate on ALL target images (order from dataset)
    model.eval()
    all_true, all_pred, all_prob = [], [], []
    with torch.no_grad():
        for xb, yb, _names in DataLoader(tgt_ds, batch_size=64, shuffle=False, num_workers=2, pin_memory=True):
            xb = xb.to(device)
            y_logits, _, _ = model(xb, lambda_=0.0)
            probs = torch.softmax(y_logits, dim=1)
            pred = probs.argmax(dim=1)
            all_true.extend(yb.tolist())
            all_pred.extend(pred.cpu().tolist())
            all_prob.append(probs.cpu().numpy())
    all_prob = np.concatenate(all_prob, axis=0)

    # ── Metrics
    correct = int(np.sum(np.array(all_true)==np.array(all_pred)))
    total = len(all_true)
    acc = correct / total
    miss = 1.0 - acc

    prec, rec, f1, _ = precision_recall_fscore_support(
        all_true, all_pred, labels=list(range(N_CLASSES)),
        average='macro', zero_division=0
    )
    cm = save_confusion_matrix(
        all_true, all_pred, CLASS_NAMES,
        os.path.join(CONF_DIR, f"{ckpt_name}_{lambda_val}.png")
    )
    spec_macro = specificity_from_cm(cm).mean()

    try:
        auc_macro = macro_auc(all_true, all_prob, N_CLASSES)
    except Exception:
        auc_macro = float('nan')

    row = {
        "Checkpoint": ckpt_name,
        "GAN Type": "CDAN+E",
        "DA Hyperparameter": "lambda",
        "DA Hyperparameter Value": lambda_val,
        "Correctly Identified Images": correct,
        "Incorrectly Identified Images": total - correct,
        "Image Classification Accuracy": acc,
        "Image Miss Rate": miss,
        "Precision (macro)": prec,
        "Sensitivity/Recall (macro)": rec,
        "Specificity (macro)": spec_macro,
        "F1-Score (macro)": f1,
        "AUC-ROC (macro OvR)": auc_macro
    }
    return row

# ─── Grid: checkpoints × λ values ─────────────────────────────
lambda_vals = [0.01, 0.05, 0.1, 0.5, 1.0]
ckpt_files = [f for f in os.listdir(CKPT_DIR) if f.endswith(".pth")]
results = []

for fname in tqdm(sorted(ckpt_files), desc="Checkpoints x lambdas (MobileNetV2-CDAN+E)"):
    ckpt_path = os.path.join(CKPT_DIR, fname)
    ckpt_name = os.path.splitext(fname)[0].replace(" ", "").replace("-", "").replace("__", "_")
    for lam in lambda_vals:
        row = cdan_train_eval(ckpt_path, ckpt_name, lam, epochs=10, bs=32, lr=1e-4)
        results.append(row)

# ─── Save Excel ───────────────────────────────────────
df = pd.DataFrame(results)
ordered = (
    ["Checkpoint","GAN Type","DA Hyperparameter","DA Hyperparameter Value"] +
    [c for c in df.columns if c not in ["Checkpoint","GAN Type","DA Hyperparameter","DA Hyperparameter Value"]]
)
df = df[ordered]

save_xlsx = os.path.join(METRICS_DIR, "MobileNetV2_Main(CDAN+E).xlsx")
df.to_excel(save_xlsx, index=False)
print(f"✅ Saved metrics to: {save_xlsx}")
print(f"✅ Confusion matrices in: {CONF_DIR}")

Checkpoints x lambdas (MobileNetV2-CDAN+E):   0%|          | 0/5 [00:00<?, ?it/s]

[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0


Checkpoints x lambdas (MobileNetV2-CDAN+E):  20%|██        | 1/5 [29:26<1:57:44, 1766.13s/it]

[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0


Checkpoints x lambdas (MobileNetV2-CDAN+E):  40%|████      | 2/5 [40:13<55:23, 1107.99s/it]  

[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0


Checkpoints x lambdas (MobileNetV2-CDAN+E):  60%|██████    | 3/5 [51:03<29:57, 898.69s/it] 

[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0


Checkpoints x lambdas (MobileNetV2-CDAN+E):  80%|████████  | 4/5 [1:01:51<13:20, 800.09s/it]

[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0
[load_mobilenetv2_features] missing=2 unexpected=0


Checkpoints x lambdas (MobileNetV2-CDAN+E): 100%|██████████| 5/5 [1:12:39<00:00, 871.91s/it]


✅ Saved metrics to: /content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Domain Adaptation/CDAN + E/Performance Metrics/MobileNetV2_Samples(CDAN).xlsx
✅ Confusion matrices in: /content/drive/MyDrive/Research Project 2025/Results/Main/MobileNet-V2/Domain Adaptation/CDAN + E/Confusion Matrices
