# EMPCA amplitude evaluation for 6 wk4 models (sum-channel, qp template)

This notebook evaluates all 6 `wk4/models` artifacts:
- 3 noise types: `mmc`, `white`, `pink`
- 2 weight types: `1/PSD`, `SNR2`

Method:
1. Load sum-channel traces and clean references.
2. Project noisy traces to EMPCA coefficients with each model's own metric.
3. Estimate amplitude using:
   - linear calibration on `||b_{1:k}||`
   - REIM ridge regression (`[Re(b), Im(b)]`) with holdout sigma.
4. Optional OF baseline using qp template.

A preflight cell reports missing models / PSD / template / data files.


In [1]:
import os
import re
import sys
import gc
from pathlib import Path

import numpy as np
import h5py
import pickle

try:
    import pandas as pd
except Exception:
    pd = None

# Repo root for PCA_dev

def _repo_root():
    here = Path.cwd().resolve()
    cands = [here, here.parent, here.parent.parent]
    for c in cands:
        if (c / 'wk4').exists() and (c / 'reusable').exists():
            return c
        if (c / 'PCA_dev' / 'wk4').exists() and (c / 'PCA_dev' / 'reusable').exists():
            return c / 'PCA_dev'
    raise RuntimeError('Run this notebook from DELight_mtr, PCA_dev, or PCA_dev/wk4 directory.')

repo = _repo_root()
print('repo =', repo)

sys.path.insert(0, str(repo / 'reusable'))
from empca_TCY_optimized import ti_rfft

try:
    from OptimumFilter import OptimumFilter
    HAVE_OF = True
except Exception as exc:
    HAVE_OF = False
    print('OptimumFilter import failed:', exc)

MODELS_DIR = repo / 'wk4' / 'models'
OUTDIR = Path('/ceph/dwong/trigger_samples/PCA_QP/main')
FS = 3906250.0
VAL_FRAC = 0.30
SEED = 0

MODEL_FILES = [
    'PSD_run1_sum_mmc.pkl',
    'SNR2_run1_sum_mmc.pkl',
    'PSD_run1_sum_white.pkl',
    'SNR2_run1_sum_white.pkl',
    'PSD_run1_sum_pink.pkl',
    'SNR2_run1_sum_pink.pkl',
]

# Preferred qp-template path first, then known fallbacks
QP_TEMPLATE_CANDIDATES = [
    repo / 'reusable' / 'template' / 'qp_template.npy',
    repo / 'reusable' / 'template' / 'QP_template.npy',
    repo.parent / 'trigger_study' / 'archive' / 'wk9' / 'preliminary' / 'sum_qp_template_1800.npy',
]

NOISE_PSD_PATHS = {
    'mmc': Path('/ceph/dwong/delight/noise_psd_xray.npy'),
    'white': repo / 'reusable' / 'weight' / 'noise_psd_white.npy',
    'pink': repo / 'reusable' / 'weight' / 'noise_psd_pink.npy',
}


repo = /home/dwong/DELight_mtr/PCA_dev


In [2]:
def resolve_local_path(path_like):
    if path_like is None:
        return None
    p = Path(path_like)
    if p.exists():
        return p

    # Remap baked absolute path containing 'PCA_dev/' to current repo root.
    s = str(p)
    marker = 'PCA_dev/'
    if marker in s:
        suffix = s.split(marker, 1)[1]
        cand = repo / suffix
        if cand.exists():
            return cand

    return p


def choose_qp_template_path():
    for p in QP_TEMPLATE_CANDIDATES:
        if p.exists():
            return p
    return None


def preflight_checks():
    missing = []
    info = []

    # Model files
    for name in MODEL_FILES:
        p = MODELS_DIR / name
        ok = p.exists()
        info.append(('model', str(p), ok))
        if not ok:
            missing.append(str(p))

    # Open model artifacts to inspect embedded paths
    artifacts = {}
    for name in MODEL_FILES:
        p = MODELS_DIR / name
        if not p.exists():
            continue
        with open(p, 'rb') as f:
            artifacts[name] = pickle.load(f)

    # Path checks from artifacts
    data_paths = set()
    for name, art in artifacts.items():
        psd_path = resolve_local_path(art.get('psd_path'))
        snr_path = resolve_local_path(art.get('snr2_weight_path'))
        if psd_path is not None:
            ok = psd_path.exists()
            info.append(('psd', f'{name}: {psd_path}', ok))
            if not ok:
                missing.append(str(psd_path))
        if snr_path is not None:
            ok = snr_path.exists()
            info.append(('snr2', f'{name}: {snr_path}', ok))
            if not ok:
                missing.append(str(snr_path))

        for dp in art.get('dataset_paths', [])[:1]:
            data_paths.add(str(dp))

    # Check at least one representative data path per model family
    for dp in sorted(data_paths):
        p = Path(dp)
        ok = p.exists()
        info.append(('data', str(p), ok))
        if not ok:
            missing.append(str(p))

    # QP template availability
    qp_path = choose_qp_template_path()
    if qp_path is None:
        info.append(('template', 'qp template (all candidates)', False))
        missing.append('qp template: none of candidates found')
    else:
        info.append(('template', str(qp_path), True))

    print('--- preflight checks ---')
    for kind, path, ok in info:
        tag = 'OK' if ok else 'MISSING'
        print(f'[{tag}] {kind:<8} {path}')

    print('\nmissing_count =', len(missing))
    if missing:
        print('missing assets:')
        for m in sorted(set(missing)):
            print(' -', m)

    return artifacts, sorted(set(missing)), qp_path


artifacts, missing_assets, qp_template_path = preflight_checks()
print('\nloaded_artifacts =', len(artifacts))
print('qp_template_path =', qp_template_path)


--- preflight checks ---
[OK] model    /home/dwong/DELight_mtr/PCA_dev/wk4/models/PSD_run1_sum_mmc.pkl
[OK] model    /home/dwong/DELight_mtr/PCA_dev/wk4/models/SNR2_run1_sum_mmc.pkl
[OK] model    /home/dwong/DELight_mtr/PCA_dev/wk4/models/PSD_run1_sum_white.pkl
[OK] model    /home/dwong/DELight_mtr/PCA_dev/wk4/models/SNR2_run1_sum_white.pkl
[OK] model    /home/dwong/DELight_mtr/PCA_dev/wk4/models/PSD_run1_sum_pink.pkl
[OK] model    /home/dwong/DELight_mtr/PCA_dev/wk4/models/SNR2_run1_sum_pink.pkl
[OK] psd      PSD_run1_sum_mmc.pkl: /ceph/dwong/delight/noise_psd_xray.npy
[OK] snr2     SNR2_run1_sum_mmc.pkl: /home/dwong/DELight_mtr/PCA_dev/reusable/weight/qp_snr2_weight_MMC.npy
[OK] psd      PSD_run1_sum_white.pkl: /home/dwong/DELight_mtr/PCA_dev/reusable/weight/noise_psd_white.npy
[OK] snr2     SNR2_run1_sum_white.pkl: /home/dwong/DELight_mtr/PCA_dev/reusable/weight/qp_snr2_weight_white.npy
[OK] psd      PSD_run1_sum_pink.pkl: /home/dwong/DELight_mtr/PCA_dev/reusable/weight/noise_psd_pi

In [3]:
# Utilities reused from wk4 amplitude notebook

def baseline_correct_per_trace(X_time, pretrigger=4000, method='mean'):
    X_time = np.asarray(X_time, dtype=np.float64)
    if X_time.ndim != 2:
        raise ValueError(f'X_time must be 2D; got {X_time.shape}')
    if not (1 <= pretrigger <= X_time.shape[1]):
        raise ValueError('pretrigger must be within [1, n_time]')

    pre = X_time[:, :pretrigger]
    if method == 'mean':
        baseline = np.mean(pre, axis=1)
    elif method == 'median':
        baseline = np.median(pre, axis=1)
    else:
        raise ValueError("method must be 'mean' or 'median'")

    return X_time - baseline[:, None], baseline


def to_shift_invariant_spectrum(X_time):
    X_tilde = ti_rfft(X_time)
    if X_tilde.ndim != 2:
        raise RuntimeError('Unexpected output shape from ti_rfft')
    return X_tilde


def project_coefficients(X_feat, eigvec, metric_weight):
    # Solve weighted normal equation for coefficients per trace.
    Phi = eigvec.T  # (n_freq, n_comp)
    w = np.asarray(metric_weight, dtype=np.float64)
    if w.ndim != 1:
        raise ValueError('metric_weight must be 1D')
    if len(w) != Phi.shape[0]:
        raise ValueError(f'Weight length mismatch: {len(w)} vs {Phi.shape[0]}')

    WPhi = Phi * w[:, None]
    A = Phi.conj().T @ WPhi
    rhs = (X_feat * w[None, :]) @ Phi.conj()
    coeff = np.linalg.solve(A, rhs.T).T
    return coeff


def resolution_from_truth(y_est, y_true):
    resid = np.asarray(y_true) - np.asarray(y_est)
    sigma = np.std(resid, ddof=1)
    return sigma, 2.355 * sigma


def get_metric_weight(model_artifact):
    if model_artifact.get('psd_eff') is not None:
        psd_eff = np.asarray(model_artifact['psd_eff'], dtype=np.float64)
        return 1.0 / (psd_eff + 0.0)

    psd_path = resolve_local_path(model_artifact.get('psd_path'))
    if psd_path is not None and psd_path.exists():
        arr = np.load(psd_path)
        psd = arr[1] if arr.ndim == 2 and arr.shape[0] == 2 else arr
        return 1.0 / (np.asarray(psd, dtype=np.float64) + 0.0)

    snr_path = resolve_local_path(model_artifact.get('snr2_weight_path'))
    if snr_path is not None and snr_path.exists():
        return np.asarray(np.load(snr_path), dtype=np.float64)

    raise ValueError('No usable metric in model artifact')


def compute_coeff_for_model(model_artifact, X_time):
    pca = model_artifact['pca']
    cfg = model_artifact.get('cfg', {})
    pretrigger = int(cfg.get('pretrigger', 4000))
    baseline_method = cfg.get('baseline_method', 'mean')

    X0, _ = baseline_correct_per_trace(X_time, pretrigger=pretrigger, method=baseline_method)
    X_feat = to_shift_invariant_spectrum(X0)

    eigvec = pca.eigvec
    if X_feat.shape[1] != eigvec.shape[1]:
        raise ValueError(f'freq mismatch: {X_feat.shape[1]} vs {eigvec.shape[1]}')

    w = get_metric_weight(model_artifact)
    coeff = project_coefficients(X_feat, eigvec, w)
    return coeff


def make_train_val_idx(n, val_frac=0.3, seed=0):
    idx = np.arange(n)
    rng = np.random.default_rng(seed)
    rng.shuffle(idx)
    n_val = int(val_frac * n)
    val_idx = idx[:n_val]
    train_idx = idx[n_val:]
    return train_idx, val_idx


def lin_calibrate_train(x_train, y_train, x_eval):
    M = np.vstack([x_train, np.ones_like(x_train)]).T
    a, b = np.linalg.lstsq(M, y_train, rcond=None)[0]
    return a * x_eval + b, a, b


def best_knorm_linear(coeff, y_true, train_idx, val_idx, k_list=(1,2,3,4,5,6,7,8)):
    best = None
    for k in k_list:
        amp = np.sqrt(np.sum(np.abs(coeff[:, :k]) ** 2, axis=1))
        y_est, a, b = lin_calibrate_train(amp[train_idx], y_true[train_idx], amp[val_idx])
        sigma, fwhm = resolution_from_truth(y_est, y_true[val_idx])
        row = {
            'method': 'k-norm+linear',
            'k': int(k),
            'alpha': np.nan,
            'sigma': float(sigma),
            'fwhm': float(fwhm),
            'a': float(a),
            'b': float(b),
        }
        if best is None or row['sigma'] < best['sigma']:
            best = row
    return best


def make_reim_features(coeff, k):
    c = coeff[:, :k]
    return np.column_stack([c.real, c.imag])


def standardize(train, val):
    mu = np.mean(train, axis=0)
    sig = np.std(train, axis=0)
    sig[sig == 0] = 1.0
    return (train - mu) / sig, (val - mu) / sig


def ridge_predict(X_train, y_train, X_val, alpha):
    X1 = np.column_stack([X_train, np.ones(X_train.shape[0])])
    I = np.eye(X1.shape[1])
    I[-1, -1] = 0.0  # do not regularize bias
    w = np.linalg.solve(X1.T @ X1 + alpha * I, X1.T @ y_train)
    return np.column_stack([X_val, np.ones(X_val.shape[0])]) @ w


def best_reim_ridge(coeff, y_true, train_idx, val_idx, k_list=(1,2,3,4,5,6,7,8), alphas=None):
    if alphas is None:
        alphas = np.logspace(-8, 2, 11)

    best = None
    for k in k_list:
        X = make_reim_features(coeff, k)
        X_train, X_val = X[train_idx], X[val_idx]
        X_train, X_val = standardize(X_train, X_val)
        for a in alphas:
            y_est = ridge_predict(X_train, y_true[train_idx], X_val, float(a))
            sigma, fwhm = resolution_from_truth(y_est, y_true[val_idx])
            row = {
                'method': 'reim-ridge',
                'k': int(k),
                'alpha': float(a),
                'sigma': float(sigma),
                'fwhm': float(fwhm),
                'a': np.nan,
                'b': np.nan,
            }
            if best is None or row['sigma'] < best['sigma']:
                best = row
    return best


def load_noise_dataset(dataset_paths, trace_key, n_batches=None):
    files = [Path(p) for p in dataset_paths]
    files = sorted(files, key=lambda p: int(re.search(r'_batch_(\d+)', p.name).group(1)))
    if n_batches is not None:
        files = files[:n_batches]

    noisy_list = []
    clean_list = []
    for p in files:
        with h5py.File(p, 'r') as f:
            noisy_list.append(f[trace_key][:].astype(np.float64))
            clean_list.append(f['traces_clean'][:].astype(np.float64))

    X_noisy = np.concatenate(noisy_list, axis=0)[:, 0, :]
    X_clean = np.concatenate(clean_list, axis=0)[:, 0, :]
    return X_noisy, X_clean, files


In [4]:
# Evaluate all 6 models (3 noises x 2 weighting strategies)

# Organize artifacts by noise and weight type
models = {}
for name, art in artifacts.items():
    noise = art.get('noise_type', 'unknown')
    wt = art.get('weight_type', 'unknown')
    if 'SNR2' in wt or 'SNR2' in name:
        wlabel = 'SNR2'
    else:
        wlabel = 'PSD'
    models.setdefault(noise, {})[wlabel] = {'name': name, 'artifact': art}

print('noise groups:', sorted(models.keys()))
for n in sorted(models):
    print(n, '->', sorted(models[n].keys()))

rows = []
rows_of = []

for noise in sorted(models):
    # Pick one artifact as the source of dataset_paths/trace key for this noise
    first = next(iter(models[noise].values()))['artifact']
    dataset_paths = first.get('dataset_paths', [])
    trace_key = first.get('trace_key', f'traces_{noise}')

    if not dataset_paths:
        print(f'[{noise}] no dataset paths in artifacts; skipping noise type')
        continue

    X_noisy, X_clean, used_files = load_noise_dataset(dataset_paths, trace_key=trace_key)
    A_true = np.max(X_clean, axis=1)

    print(f'[{noise}] files={len(used_files)} traces={len(A_true)} trace_key={trace_key}')

    # shared split per noise so PSD/SNR2 are directly comparable
    train_idx, val_idx = make_train_val_idx(len(A_true), val_frac=VAL_FRAC, seed=SEED)

    for wlabel in sorted(models[noise]):
        item = models[noise][wlabel]
        name = item['name']
        art = item['artifact']

        keep_mask = art.get('keep_mask', None)
        if keep_mask is not None and len(keep_mask) == len(A_true):
            X_eval = X_noisy[keep_mask]
            y_eval = A_true[keep_mask]
            tr, va = make_train_val_idx(len(y_eval), val_frac=VAL_FRAC, seed=SEED)
        else:
            X_eval = X_noisy
            y_eval = A_true
            tr, va = train_idx, val_idx

        coeff = compute_coeff_for_model(art, X_eval)

        best_k = best_knorm_linear(coeff, y_eval, tr, va)
        best_rr = best_reim_ridge(coeff, y_eval, tr, va)

        rows.append({
            'model_file': name,
            'noise_type': noise,
            'weight_type': wlabel,
            **best_k,
        })
        rows.append({
            'model_file': name,
            'noise_type': noise,
            'weight_type': wlabel,
            **best_rr,
        })

    # Optional OF baseline using qp template
    if HAVE_OF and qp_template_path is not None and NOISE_PSD_PATHS.get(noise, None) is not None:
        psd_path = NOISE_PSD_PATHS[noise]
        if psd_path.exists():
            template = np.asarray(np.load(qp_template_path), dtype=np.float64).squeeze()
            psd_arr = np.load(psd_path)
            psd = psd_arr[1] if psd_arr.ndim == 2 and psd_arr.shape[0] == 2 else psd_arr
            psd = np.asarray(psd, dtype=np.float64)

            pretrigger = int(first.get('cfg', {}).get('pretrigger', 4000))
            X0, _ = baseline_correct_per_trace(X_noisy, pretrigger=pretrigger, method='mean')
            if template.shape[0] != X0.shape[1]:
                print(f'[{noise}] OF skipped: template length {template.shape[0]} != trace length {X0.shape[1]}')
            elif psd.shape[0] != (X0.shape[1] // 2 + 1):
                print(f'[{noise}] OF skipped: PSD length {psd.shape[0]} mismatch')
            else:
                t0 = template - np.mean(template[:pretrigger])
                of = OptimumFilter(t0, psd, FS)
                amp_of = np.empty(X0.shape[0], dtype=np.float64)
                for i in range(X0.shape[0]):
                    a, _ = of.fit(X0[i])
                    amp_of[i] = a
                y_est, a_lin, b_lin = lin_calibrate_train(amp_of[train_idx], A_true[train_idx], amp_of[val_idx])
                sigma, fwhm = resolution_from_truth(y_est, A_true[val_idx])
                rows_of.append({
                    'model_file': 'OF',
                    'noise_type': noise,
                    'weight_type': 'OF',
                    'method': 'OF+linear',
                    'k': np.nan,
                    'alpha': np.nan,
                    'sigma': float(sigma),
                    'fwhm': float(fwhm),
                    'a': float(a_lin),
                    'b': float(b_lin),
                })

    del X_noisy, X_clean, A_true
    gc.collect()


all_rows = rows + rows_of
print('total result rows =', len(all_rows))

if pd is not None:
    df = pd.DataFrame(all_rows)
    display(df.sort_values(['noise_type', 'weight_type', 'method']))
else:
    for r in sorted(all_rows, key=lambda d: (d['noise_type'], d['weight_type'], d['method'])):
        print(r)


noise groups: ['mmc', 'pink', 'white']
mmc -> ['PSD', 'SNR2']
pink -> ['PSD', 'SNR2']
white -> ['PSD', 'SNR2']
[mmc] files=10 traces=1000 trace_key=traces_MMC
[pink] files=10 traces=1000 trace_key=traces_pink
[white] files=10 traces=1000 trace_key=traces_white
total result rows = 15


Unnamed: 0,model_file,noise_type,weight_type,method,k,alpha,sigma,fwhm,a,b
12,OF,mmc,OF,OF+linear,,,7.816658,18.40823,1.015569,-4.676299
0,PSD_run1_sum_mmc.pkl,mmc,PSD,k-norm+linear,6.0,,12.336683,29.05289,6e-05,17.124258
1,PSD_run1_sum_mmc.pkl,mmc,PSD,reim-ridge,2.0,1e-08,7.393366,17.411377,,
2,SNR2_run1_sum_mmc.pkl,mmc,SNR2,k-norm+linear,1.0,,18.848461,44.388126,2.3e-05,34.751282
3,SNR2_run1_sum_mmc.pkl,mmc,SNR2,reim-ridge,8.0,1e-08,5.677784,13.371182,,
13,OF,pink,OF,OF+linear,,,21.780283,51.292567,0.949585,20.424425
4,PSD_run1_sum_pink.pkl,pink,PSD,k-norm+linear,7.0,,27.147569,63.932526,5.5e-05,77.202049
5,PSD_run1_sum_pink.pkl,pink,PSD,reim-ridge,7.0,10.0,21.849867,51.456436,,
6,SNR2_run1_sum_pink.pkl,pink,SNR2,k-norm+linear,1.0,,31.067384,73.163689,5.4e-05,92.086522
7,SNR2_run1_sum_pink.pkl,pink,SNR2,reim-ridge,8.0,1e-08,20.225714,47.631556,,


In [5]:
# OF baseline with fit_with_shift (sigma on holdout)

rows_of_shift = []

for noise in sorted(models):
    first = next(iter(models[noise].values()))['artifact']
    dataset_paths = first.get('dataset_paths', [])
    trace_key = first.get('trace_key', f'traces_{noise}')

    if not dataset_paths:
        print(f'[{noise}] no dataset paths in artifacts; skipping noise type')
        continue

    X_noisy, X_clean, used_files = load_noise_dataset(dataset_paths, trace_key=trace_key)
    A_true = np.max(X_clean, axis=1)
    train_idx, val_idx = make_train_val_idx(len(A_true), val_frac=VAL_FRAC, seed=SEED)

    if not HAVE_OF or qp_template_path is None or NOISE_PSD_PATHS.get(noise, None) is None:
        continue

    psd_path = NOISE_PSD_PATHS[noise]
    if not psd_path.exists():
        continue

    template = np.asarray(np.load(qp_template_path), dtype=np.float64).squeeze()
    psd_arr = np.load(psd_path)
    psd = psd_arr[1] if psd_arr.ndim == 2 and psd_arr.shape[0] == 2 else psd_arr
    psd = np.asarray(psd, dtype=np.float64)

    pretrigger = int(first.get('cfg', {}).get('pretrigger', 4000))
    X0, _ = baseline_correct_per_trace(X_noisy, pretrigger=pretrigger, method='mean')

    if template.shape[0] != X0.shape[1]:
        print(f'[{noise}] OF+shift skipped: template length {template.shape[0]} != trace length {X0.shape[1]}')
        continue
    if psd.shape[0] != (X0.shape[1] // 2 + 1):
        print(f'[{noise}] OF+shift skipped: PSD length {psd.shape[0]} mismatch')
        continue

    t0 = template - np.mean(template[:pretrigger])
    of = OptimumFilter(t0, psd, FS)

    amp_shift = np.empty(X0.shape[0], dtype=np.float64)
    best_shift = np.empty(X0.shape[0], dtype=np.int32)
    for i in range(X0.shape[0]):
        a, _, shift = of.fit_with_shift(X0[i])
        amp_shift[i] = a
        best_shift[i] = shift

    y_est, a_lin, b_lin = lin_calibrate_train(amp_shift[train_idx], A_true[train_idx], amp_shift[val_idx])
    sigma, fwhm = resolution_from_truth(y_est, A_true[val_idx])

    rows_of_shift.append({
        'model_file': 'OF',
        'noise_type': noise,
        'weight_type': 'OF',
        'method': 'OF+shift+linear',
        'k': np.nan,
        'alpha': np.nan,
        'sigma': float(sigma),
        'fwhm': float(fwhm),
        'a': float(a_lin),
        'b': float(b_lin),
        'mean_abs_shift': float(np.mean(np.abs(best_shift))),
    })

    del X_noisy, X_clean, A_true
    gc.collect()

if pd is not None:
    df_of_shift = pd.DataFrame(rows_of_shift)
    if len(df_of_shift) > 0:
        display(df_of_shift.sort_values(['noise_type'])[
            ['noise_type', 'sigma', 'fwhm', 'mean_abs_shift']
        ])
    else:
        print('No OF+shift results generated.')
else:
    for r in rows_of_shift:
        print(r)


Unnamed: 0,noise_type,sigma,fwhm,mean_abs_shift
0,mmc,6.493218,15.291529,96.341
1,pink,20.994062,49.441017,52.017
2,white,3.686814,8.682446,122.561


In [6]:
# Compact summary tables
if pd is None:
    print('pandas not installed; skip table formatting')
else:
    df = pd.DataFrame(all_rows)

    # Main target table: REIM ridge across all 6 models
    df_rr = df[df['method'] == 'reim-ridge'].copy()
    print('=== REIM ridge (best holdout sigma) ===')
    display(df_rr.sort_values(['noise_type', 'sigma'])[
        ['noise_type', 'weight_type', 'model_file', 'k', 'alpha', 'sigma', 'fwhm']
    ])

    print('=== k-norm + linear (best holdout sigma) ===')
    df_kn = df[df['method'] == 'k-norm+linear'].copy()
    display(df_kn.sort_values(['noise_type', 'sigma'])[
        ['noise_type', 'weight_type', 'model_file', 'k', 'sigma', 'fwhm']
    ])

    if len(rows_of) > 0:
        print('=== OF baseline (qp template) ===')
        df_of = df[df['method'] == 'OF+linear'].copy()
        display(df_of.sort_values(['noise_type'])[
            ['noise_type', 'sigma', 'fwhm']
        ])

    # Easy cross-check pivot
    print('=== Pivot: sigma by noise / weight (REIM ridge) ===')
    piv = df_rr.pivot_table(index='noise_type', columns='weight_type', values='sigma', aggfunc='min')
    display(piv)


=== REIM ridge (best holdout sigma) ===


Unnamed: 0,noise_type,weight_type,model_file,k,alpha,sigma,fwhm
3,mmc,SNR2,SNR2_run1_sum_mmc.pkl,8.0,1e-08,5.677784,13.371182
1,mmc,PSD,PSD_run1_sum_mmc.pkl,2.0,1e-08,7.393366,17.411377
7,pink,SNR2,SNR2_run1_sum_pink.pkl,8.0,1e-08,20.225714,47.631556
5,pink,PSD,PSD_run1_sum_pink.pkl,7.0,10.0,21.849867,51.456436
11,white,SNR2,SNR2_run1_sum_white.pkl,8.0,1e-08,1.002644,2.361226
9,white,PSD,PSD_run1_sum_white.pkl,3.0,1e-08,1.144652,2.695656


=== k-norm + linear (best holdout sigma) ===


Unnamed: 0,noise_type,weight_type,model_file,k,sigma,fwhm
0,mmc,PSD,PSD_run1_sum_mmc.pkl,6.0,12.336683,29.05289
2,mmc,SNR2,SNR2_run1_sum_mmc.pkl,1.0,18.848461,44.388126
4,pink,PSD,PSD_run1_sum_pink.pkl,7.0,27.147569,63.932526
6,pink,SNR2,SNR2_run1_sum_pink.pkl,1.0,31.067384,73.163689
8,white,PSD,PSD_run1_sum_white.pkl,1.0,9.097989,21.425765
10,white,SNR2,SNR2_run1_sum_white.pkl,1.0,10.128687,23.853059


=== OF baseline (qp template) ===


Unnamed: 0,noise_type,sigma,fwhm
12,mmc,7.816658,18.40823
13,pink,21.780283,51.292567
14,white,4.70732,11.08574


=== Pivot: sigma by noise / weight (REIM ridge) ===


weight_type,PSD,SNR2
noise_type,Unnamed: 1_level_1,Unnamed: 2_level_1
mmc,7.393366,5.677784
pink,21.849867,20.225714
white,1.144652,1.002644


## Notes

- This notebook uses **sum-channel traces** (`traces_MMC`, `traces_white`, `traces_pink`), matching the wk4 training setup.
- QP template is searched in this order:
  1. `PCA_dev/reusable/template/qp_template.npy`
  2. `PCA_dev/reusable/template/QP_template.npy`
  3. `trigger_study/archive/wk9/preliminary/sum_qp_template_1800.npy`
- If template or OF dependency is missing, EMPCA evaluation still runs and OF rows are skipped.
