# DeepCORAL Domain Adaptation Pipeline

This notebook pre-trains a CNN on a source dataset (Phase 1) and then adapts it to an unlabeled target domain using CORAL loss (Phase 2). Both pretrained and adapted models are evaluated across multiple validation sets over several random seeds.

## 1. Imports & Utilities

Load libraries, define reproducibility, model, CORAL loss, and training/evaluation functions.

In [1]:
import os, sys, random
sys.path.insert(0, os.path.abspath('..'))
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from pkldataset import PKLDataset
from helpers import train_model, eval_model, set_seed

class CNN(nn.Module):
    def __init__(self, input_length=2800, num_classes=10, input_channels=1):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv1d(input_channels,16,31,padding=15), nn.BatchNorm1d(16), nn.ReLU(True), nn.MaxPool1d(2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(16,32,31,padding=15), nn.BatchNorm1d(32), nn.ReLU(True), nn.MaxPool1d(2)
        )
        self.conv3 = nn.Sequential(
            nn.Conv1d(32,64,31,padding=15), nn.BatchNorm1d(64), nn.ReLU(True), nn.MaxPool1d(2)
        )
        conv_out = input_length//8
        self.feature_extractor = nn.Sequential(nn.Flatten(), nn.Linear(64*conv_out,128), nn.ReLU(True))
        self.classifier_head   = nn.Sequential(nn.Dropout(0.5), nn.Linear(128,num_classes))
    def forward(self,x):
        if x.dim()==2: x = x.unsqueeze(1)
        x = self.conv1(x); x = self.conv2(x); x = self.conv3(x)
        return self.classifier_head(self.feature_extractor(x))
    def extract_features(self,x):
        if x.dim()==2: x = x.unsqueeze(1)
        x = self.conv1(x); x = self.conv2(x); x = self.conv3(x)
        return self.feature_extractor(x)

def coral_loss(fs, ft):
    ns, nt = fs.size(0), ft.size(0)
    fs_c = fs - fs.mean(0,keepdim=True)
    ft_c = ft - ft.mean(0,keepdim=True)
    cov_s = fs_c.t()@fs_c/(ns-1)
    cov_t = ft_c.t()@ft_c/(nt-1)
    d = fs.size(1)
    return torch.sum((cov_s-cov_t)**2)/(4*d*d)

def train_deepcoral(model, source_loader, target_loader, criterion, optimizer, scheduler, num_epochs, lambda_coral, device, max_grad_norm=1.0):
    best_loss, best_state = float('inf'), None
    for epoch in range(1,num_epochs+1):
        model.train(); running=0
        it_s, it_t = iter(source_loader), iter(target_loader)
        steps = min(len(source_loader), len(target_loader))
        for _ in range(steps):
            xs, ys = next(it_s); xt,_=next(it_t)
            xs, ys, xt = xs.to(device), ys.to(device), xt.to(device)
            if ys.dim()>1: ys=ys.argmax(1)
            optimizer.zero_grad(); out_s=model(xs)
            loss_cls=criterion(out_s,ys)
            fs, ft = model.extract_features(xs), model.extract_features(xt)
            loss_c=coral_loss(fs,ft)
            loss=loss_cls+lambda_coral*loss_c
            print(f"  cls={loss_cls.item():.4f}  coral={loss_c.item():.4f}")
            loss.backward(); nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step(); running+=loss.item()*xs.size(0)
        scheduler.step(); epoch_loss=running/(steps*source_loader.batch_size)
        if epoch_loss<best_loss: best_loss, best_state=epoch_loss, model.state_dict().copy()
        print(f"[DeepCORAL] Epoch {epoch}/{num_epochs} — combined loss {epoch_loss:.4f}")
    model.load_state_dict(best_state); return model

## 2. Configuration

Define source & target paths, validation sets, seeds, device, losses, and initialize results storage.

In [2]:
train_path_1 = r"C:\Users\gus07\Desktop\data hiwi\preprocessing\HC\T197\RP"
train_path_dc = "../datasets/RPDC185/train_500"
val_paths = [
    "../datasets/RPDC185/val_1000",
    "../datasets/RPDC188/val_1000",
    "../datasets/RPDC191/val_1000",
    "../datasets/RPDC194/val_1000",
    "../datasets/RPDC197/val_1000",
]
seeds = [101,202,303,404,505,606,707,808,909,1001]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()
lambda_coral = 1e-2
num_epochs_pre = 10
num_epochs_coral = 10
results_pre = {vp: [] for vp in val_paths}
results_adapt = {vp: [] for vp in val_paths}


## 3. Pipeline: Pretrain & Adapt

For each seed:
1. Split source and pretrain (Phase 1)
2. Evaluate pretrained model on validation sets
3. Adapt with DeepCORAL (Phase 2)
4. Evaluate adapted model on validation sets

In [3]:
for seed in seeds:
    print(f"\n>>> Pipeline with seed {seed}")
    set_seed(seed)
    train_ds, val_ds = PKLDataset.split_dataset(train_path_1)
    loader_tr = DataLoader(train_ds, batch_size=64, shuffle=True)
    loader_val = DataLoader(val_ds, batch_size=64, shuffle=False)
    model = CNN().to(device)
    opt = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
    sch = optim.lr_scheduler.StepLR(opt, step_size=50, gamma=0.1)
    model = train_model(model, loader_tr, criterion, opt, sch, num_epochs_pre, device)
    for vp in val_paths:
        acc = eval_model(model, DataLoader(PKLDataset(vp), batch_size=64, shuffle=False), device)
        results_pre[vp].append(acc)
        print(f"  [Pretrained] {vp}: {acc:.2f}%")
    pretrained_w = model.state_dict()
    model.load_state_dict(pretrained_w)
    opt = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
    sch = optim.lr_scheduler.StepLR(opt, step_size=50, gamma=0.1)
    loader_dc = DataLoader(PKLDataset(train_path_dc), batch_size=64, shuffle=True, drop_last=True)
    model = train_deepcoral(model, loader_tr, loader_dc, criterion, opt, sch, num_epochs_coral, lambda_coral, device)
    for vp in val_paths:
        acc = eval_model(model, DataLoader(PKLDataset(vp), batch_size=64, shuffle=False), device)
        results_adapt[vp].append(acc)
        print(f"  [Adapted  ] {vp}: {acc:.2f}%")



>>> Pipeline with seed 101
Epoch 1/10, Loss: 0.7356
Epoch 2/10, Loss: 0.1979
Epoch 3/10, Loss: 0.1356
Epoch 4/10, Loss: 0.1226
Epoch 5/10, Loss: 0.1194
Epoch 6/10, Loss: 0.0860
Epoch 7/10, Loss: 0.0850
Epoch 8/10, Loss: 0.0581
Epoch 9/10, Loss: 0.0738
Epoch 10/10, Loss: 0.0733
  [Pretrained] ../datasets/RPDC185/val_1000: 33.55%
  [Pretrained] ../datasets/RPDC188/val_1000: 43.58%
  [Pretrained] ../datasets/RPDC191/val_1000: 53.64%
  [Pretrained] ../datasets/RPDC194/val_1000: 52.80%
  [Pretrained] ../datasets/RPDC197/val_1000: 50.20%
  cls=0.3134  coral=2042.0173
  cls=0.3550  coral=260.3205
  cls=0.0548  coral=40.7759
  cls=0.1895  coral=21.2862
  cls=0.1805  coral=4.6125
  cls=0.1817  coral=2.1691
  cls=0.1969  coral=2.8057
[DeepCORAL] Epoch 1/10 — combined loss 3.6017
  cls=0.1694  coral=1.8193
  cls=0.2173  coral=2.2610
  cls=0.2239  coral=2.3835
  cls=0.1070  coral=3.4300
  cls=0.0988  coral=6.8810
  cls=0.1390  coral=4.9837
  cls=0.1184  coral=6.1575
[DeepCORAL] Epoch 2/10 — combi

## 4. Summary of Results

Compute mean and standard deviation over seeds for both pretrained and adapted models.

In [4]:
print("\n=== Pretrained (HC only) ===")
for vp, vals in results_pre.items():
    arr = np.array(vals)
    print(f"{vp}: mean = {arr.mean():.2f}%, std = {arr.std(ddof=1):.2f}%")
print("\n=== After DeepCORAL Adaptation ===")
for vp, vals in results_adapt.items():
    arr = np.array(vals)
    print(f"{vp}: mean = {arr.mean():.2f}%, std = {arr.std(ddof=1):.2f}%")



=== Pretrained (HC only) ===
../datasets/RPDC185/val_1000: mean = 42.34%, std = 5.65%
../datasets/RPDC188/val_1000: mean = 51.93%, std = 5.99%
../datasets/RPDC191/val_1000: mean = 58.02%, std = 4.46%
../datasets/RPDC194/val_1000: mean = 54.24%, std = 5.47%
../datasets/RPDC197/val_1000: mean = 50.37%, std = 3.22%

=== After DeepCORAL Adaptation ===
../datasets/RPDC185/val_1000: mean = 56.60%, std = 4.90%
../datasets/RPDC188/val_1000: mean = 57.58%, std = 6.46%
../datasets/RPDC191/val_1000: mean = 62.23%, std = 4.25%
../datasets/RPDC194/val_1000: mean = 61.69%, std = 5.88%
../datasets/RPDC197/val_1000: mean = 52.13%, std = 3.16%
