# 08 Design pattern and final checklist

Sections covered: 10-12 (design pattern, checklist, 3 key points).

This notebook operationalizes a recommendation engine from measured stability signals.

In [None]:
from pathlib import Path
from copy import deepcopy
import subprocess
import yaml
import pandas as pd
import matplotlib.pyplot as plt

try:
    ROOT = Path(__file__).resolve().parents[1]
except NameError:
    ROOT = Path.cwd().resolve()
LOGS = ROOT / 'outputs' / 'logs'
FIGS = ROOT / 'outputs' / 'figures'
LOGS.mkdir(parents=True, exist_ok=True)
FIGS.mkdir(parents=True, exist_ok=True)

def run_config(config_path: Path, use_progress: bool = True):
    cmd = ['python', str(ROOT / 'scripts' / 'run_transfer.py'), '--config', str(config_path)]
    if use_progress:
        cmd.append('--use-progress')
    subprocess.run(cmd, cwd=ROOT, check=True)

def read_method(name: str) -> pd.DataFrame:
    return pd.read_csv(LOGS / f'transfer_{name}.csv')

def deep_update(base: dict, patch: dict) -> dict:
    for k, v in patch.items():
        if isinstance(v, dict) and isinstance(base.get(k), dict):
            deep_update(base[k], v)
        else:
            base[k] = v
    return base

def apply_fast_dev_profile(cfg: dict) -> dict:
    cfg = deepcopy(cfg)
    data = cfg.setdefault('data', {})
    train = cfg.setdefault('train', {})

    if 'source_train_per_class' in data:
        data['source_train_per_class'] = min(int(data['source_train_per_class']), 220)
    if 'source_test_per_class' in data:
        data['source_test_per_class'] = min(int(data['source_test_per_class']), 80)
    if 'target_train_per_class' in data:
        data['target_train_per_class'] = min(int(data['target_train_per_class']), 30)
    if 'target_test_per_class' in data:
        data['target_test_per_class'] = min(int(data['target_test_per_class']), 80)
    if 'probe_per_class' in data:
        data['probe_per_class'] = min(int(data['probe_per_class']), 40)

    data['batch_size'] = min(int(data.get('batch_size', 128)), 64)
    data['num_workers'] = 0

    if 'source_epochs' in train:
        train['source_epochs'] = min(int(train['source_epochs']), 2)
    if 'target_epochs' in train:
        train['target_epochs'] = min(int(train['target_epochs']), 3)

    train['gradual_schedule'] = {
        '1': ['backbone.layer4'],
        '2': ['backbone.layer3'],
    }
    return cfg

def make_profiled_config(base_name: str, notebook_tag: str, fast_dev_run: bool, overrides: dict | None = None) -> Path:
    cfg = yaml.safe_load((ROOT / 'configs' / base_name).read_text())
    if fast_dev_run:
        cfg = apply_fast_dev_profile(cfg)
    if overrides:
        cfg = deep_update(cfg, deepcopy(overrides))

    suffix = 'fast' if fast_dev_run else 'full'
    out = ROOT / 'configs' / f'tmp_{notebook_tag}_{suffix}.yaml'
    out.write_text(yaml.safe_dump(cfg, sort_keys=False))
    return out


In [None]:
FAST_DEV_RUN = False
cfg_path = make_profiled_config(
    base_name='transfer_core_related.yaml',
    notebook_tag='08_checklist',
    fast_dev_run=FAST_DEV_RUN,
)
run_config(cfg_path, use_progress=not FAST_DEV_RUN)
method_frames = {m: read_method(m) for m in ['scratch', 'feature_extraction', 'gradual_unfreeze', 'naive_finetune']}


In [None]:
table = []
for method, df in method_frames.items():
    final = df.iloc[-1]
    table.append({
        'method': method,
        'final_target_acc': float(final['target_test_acc']),
        'final_feature_drift': float(final['feature_drift']),
        'final_retention': float(final['source_retention_acc']) if 'source_retention_acc' in df else float('nan'),
        'final_grad_norm': float(final['grad_norm']),
    })
summary = pd.DataFrame(table).sort_values('final_target_acc', ascending=False)
summary


In [None]:
def recommend(df: pd.DataFrame) -> str:
    best = df.sort_values('final_target_acc', ascending=False).iloc[0]
    low_drift = df[df['final_feature_drift'] < 0.25]
    if not low_drift.empty:
        stable_best = low_drift.sort_values('final_target_acc', ascending=False).iloc[0]
        return f"Prefer {stable_best['method']} for production (best stable accuracy)."
    return f"Prefer {best['method']} for raw accuracy; add stabilizers before production."

recommendation = recommend(summary)
recommendation


## Final checklist mapping

- Are tasks related: compare related vs unrelated curves.
- Is transfer stable: inspect drift, retention, grad norm.
- Is it worth it: verify improvements over scratch on both final score and convergence speed.