# 02 Transfer core loop
Hypothesis: source initialization improves sample efficiency compared with scratch.

## Step 1: Imports and setup
Run the transfer workflow directly: source pretrain -> target adaptation.

In [None]:
from pathlib import Path
import sys
import torch
import pandas as pd
import matplotlib.pyplot as plt

ROOT = Path.cwd().resolve()
while ROOT != ROOT.parent and not (ROOT / 'src').is_dir():
    ROOT = ROOT.parent
sys.path.insert(0, str(ROOT / 'src'))

from utils.seed import set_seed
from data.cifar10_transfer import get_cifar10_transfer
from models.transfer_resnet import TransferResNet18
from methods.transfer_learning import pretrain_source, build_transferred_model, run_target_adaptation

FIGS = ROOT / 'outputs' / 'figures'
FIGS.mkdir(parents=True, exist_ok=True)

## Step 2: Build data and pretrain the source model
This creates the source representation used by transfer methods.

In [None]:
SEED = 0
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

set_seed(SEED)
loaders = get_cifar10_transfer(
    data_dir='./data',
    source_classes=[2, 3, 4, 5, 6, 7],
    target_classes=[3, 5, 7],
    source_train_per_class=1000,
    source_test_per_class=300,
    target_train_per_class=80,
    target_test_per_class=300,
    probe_per_class=120,
    batch_size=128,
    num_workers=2,
    seed=SEED,
)

source_model = TransferResNet18(num_classes=loaders.source_num_classes)
source_result = pretrain_source(
    model=source_model,
    train_loader=loaders.source_train,
    test_loader=loaders.source_test,
    device=DEVICE,
    epochs=6,
    lr=0.03,
    weight_decay=5e-4,
    momentum=0.9,
    use_progress=True,
)
pd.DataFrame(source_result.history).tail()

## Step 3: Run target adaptation methods
Only the adaptation strategy changes.

In [None]:
methods = ['scratch', 'feature_extraction', 'gradual_unfreeze', 'naive_finetune']
frames = {}

for method in methods:
    if method == 'scratch':
        model = TransferResNet18(num_classes=loaders.target_num_classes)
        source_head = None
        source_test = None
    else:
        model, source_head = build_transferred_model(source_model, loaders.target_num_classes)
        source_test = loaders.source_test

    result = run_target_adaptation(
        model=model,
        target_train=loaders.target_train,
        target_test=loaders.target_test,
        target_probe=loaders.target_probe,
        source_test=source_test,
        source_head=source_head,
        device=DEVICE,
        strategy=method,
        epochs=10,
        lr=0.01,
        weight_decay=5e-4,
        momentum=0.9,
        gradual_schedule={
            2: ['backbone.layer4'],
            5: ['backbone.layer3', 'backbone.layer2'],
            7: ['backbone.layer1', 'backbone.bn1', 'backbone.conv1'],
        },
        use_progress=True,
    )
    frames[method] = pd.DataFrame(result.history)

summary = pd.DataFrame([
    {
        'method': m,
        'best_target_acc': float(df['target_test_acc'].max()),
        'final_target_acc': float(df['target_test_acc'].iloc[-1]),
        'final_feature_drift': float(df['feature_drift'].iloc[-1]),
    }
    for m, df in frames.items()
]).sort_values('best_target_acc', ascending=False)
summary

## Step 4: Visualize learning curves
Compare both target quality and representation stability.

In [None]:
fig, ax = plt.subplots(figsize=(6.6, 3.7))
for method, df in frames.items():
    ax.plot(df['epoch'], df['target_test_acc'], marker='o', label=method)
ax.set_title('Target accuracy by method')
ax.set_xlabel('epoch')
ax.set_ylabel('target_test_acc')
ax.grid(alpha=0.25)
ax.legend(frameon=False)
fig.savefig(FIGS / '02_target_acc_by_method.png', dpi=150, bbox_inches='tight')

fig, ax = plt.subplots(figsize=(6.6, 3.7))
for method, df in frames.items():
    ax.plot(df['epoch'], df['feature_drift'], marker='o', label=method)
ax.set_title('Feature drift by method')
ax.set_xlabel('epoch')
ax.set_ylabel('feature_drift')
ax.grid(alpha=0.25)
ax.legend(frameon=False)
fig.savefig(FIGS / '02_feature_drift_by_method.png', dpi=150, bbox_inches='tight')

### Expected Outcome
Transfer methods should outperform scratch in early and final target accuracy.

## Loop Mapping
Source pretraining is fixed; adaptation policy determines final behavior.