# 07 Modern method comparison in transfer setting

Section covered: 9 (feature extraction vs fine-tuning families).

Experiment matrix: label budget x method.

In [None]:
from pathlib import Path
from copy import deepcopy
import subprocess
import yaml
import pandas as pd
import matplotlib.pyplot as plt

ROOT = Path('..').resolve()
LOGS = ROOT / 'outputs' / 'logs'
FIGS = ROOT / 'outputs' / 'figures'
LOGS.mkdir(parents=True, exist_ok=True)
FIGS.mkdir(parents=True, exist_ok=True)

def run_config(config_path: Path, use_progress: bool = True):
    cmd = ['python', str(ROOT / 'scripts' / 'run_transfer.py'), '--config', str(config_path)]
    if use_progress:
        cmd.append('--use-progress')
    subprocess.run(cmd, cwd=ROOT, check=True)

def read_method(name: str) -> pd.DataFrame:
    return pd.read_csv(LOGS / f'transfer_{name}.csv')

def deep_update(base: dict, patch: dict) -> dict:
    for k, v in patch.items():
        if isinstance(v, dict) and isinstance(base.get(k), dict):
            deep_update(base[k], v)
        else:
            base[k] = v
    return base

def apply_fast_dev_profile(cfg: dict) -> dict:
    cfg = deepcopy(cfg)
    data = cfg.setdefault('data', {})
    train = cfg.setdefault('train', {})

    if 'source_train_per_class' in data:
        data['source_train_per_class'] = min(int(data['source_train_per_class']), 220)
    if 'source_test_per_class' in data:
        data['source_test_per_class'] = min(int(data['source_test_per_class']), 80)
    if 'target_train_per_class' in data:
        data['target_train_per_class'] = min(int(data['target_train_per_class']), 30)
    if 'target_test_per_class' in data:
        data['target_test_per_class'] = min(int(data['target_test_per_class']), 80)
    if 'probe_per_class' in data:
        data['probe_per_class'] = min(int(data['probe_per_class']), 40)

    data['batch_size'] = min(int(data.get('batch_size', 128)), 64)
    data['num_workers'] = 0

    if 'source_epochs' in train:
        train['source_epochs'] = min(int(train['source_epochs']), 2)
    if 'target_epochs' in train:
        train['target_epochs'] = min(int(train['target_epochs']), 3)

    train['gradual_schedule'] = {
        '1': ['backbone.layer4'],
        '2': ['backbone.layer3'],
    }
    return cfg

def make_profiled_config(base_name: str, notebook_tag: str, fast_dev_run: bool, overrides: dict | None = None) -> Path:
    cfg = yaml.safe_load((ROOT / 'configs' / base_name).read_text())
    if fast_dev_run:
        cfg = apply_fast_dev_profile(cfg)
    if overrides:
        cfg = deep_update(cfg, deepcopy(overrides))

    suffix = 'fast' if fast_dev_run else 'full'
    out = ROOT / 'configs' / f'tmp_{notebook_tag}_{suffix}.yaml'
    out.write_text(yaml.safe_dump(cfg, sort_keys=False))
    return out


In [None]:
FAST_DEV_RUN = False
base_name = 'transfer_core_related.yaml'
label_budgets = [10, 20] if FAST_DEV_RUN else [20, 40, 80, 120]
rows = []

for budget in label_budgets:
    cfg_path = make_profiled_config(
        base_name=base_name,
        notebook_tag=f'07_budget_{budget}',
        fast_dev_run=FAST_DEV_RUN,
        overrides={
            'data': {'target_train_per_class': budget},
            'methods': ['scratch', 'feature_extraction', 'gradual_unfreeze'],
        },
    )
    run_config(cfg_path, use_progress=not FAST_DEV_RUN)

    for method in ['scratch', 'feature_extraction', 'gradual_unfreeze']:
        df = read_method(method)
        rows.append({
            'budget': budget,
            'method': method,
            'best_target_acc': float(df['target_test_acc'].max()),
            'final_target_acc': float(df['target_test_acc'].iloc[-1]),
            'final_drift': float(df['feature_drift'].iloc[-1]),
        })

results = pd.DataFrame(rows)
results.head()


In [None]:
pivot = results.pivot(index='budget', columns='method', values='best_target_acc')
pivot.plot(marker='o', figsize=(6.8, 3.8), grid=True)
plt.ylabel('best_target_acc')
plt.title('Method ranking by target label budget')


In [None]:
results.sort_values(['budget', 'best_target_acc'], ascending=[True, False])


## Interpretation

Feature extraction usually wins in very low-label regimes; gradual unfreezing catches up as label budget grows.