In [1]:
# CNT One-Cell Groundbreaker: blind human→mouse transfer with prereg + permutation test
# Files (optional): /mnt/data/HUMAN_EEG.csv and /mnt/data/MOUSE_EEG.csv
# CSV shape: rows=time samples, cols=channels, plus 'label' (0/1). Assumes 4 s epochs @ 128 Hz.
# Artifacts: /mnt/data/CNT_OneCell_Groundbreaker/{results.json,summary.txt,transfer_roc.png,perm_null.png}

import os, json, textwrap
from datetime import datetime
from pathlib import Path
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from sklearn.model_selection import StratifiedKFold

OUTDIR = Path("/mnt/data/CNT_OneCell_Groundbreaker"); OUTDIR.mkdir(parents=True, exist_ok=True)
rng = np.random.default_rng(42)

# ---------- helpers ----------
def _fft_band_energy(X, fs, f_lo, f_hi):
    n = X.shape[0]
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    S = np.abs(np.fft.rfft(X, axis=0))**2 / n
    mask = (freqs >= f_lo) & (freqs <= f_hi)
    return S[mask].sum(axis=0), freqs, S

def _cov_band(X, fs, band):
    n = X.shape[0]
    F = np.fft.rfft(X, axis=0)
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    mask = (freqs >= band[0]) & (freqs <= band[1])
    Fm = np.zeros_like(F); Fm[mask] = F[mask]
    xb = np.fft.irfft(Fm, axis=0, n=n)
    Xc = xb - xb.mean(axis=0, keepdims=True)
    C = (Xc.T @ Xc) / max(1, (Xc.shape[0]-1))
    return C, xb

def _hjorth_params(X):
    dX = np.diff(X, axis=0)
    var_x = X.var(axis=0)
    var_dx = dX.var(axis=0) if dX.size else np.zeros(X.shape[1])
    mob = np.sqrt(np.divide(var_dx, var_x, out=np.zeros_like(var_x), where=var_x>0))
    ddX = np.diff(dX, axis=0)
    var_ddx = ddX.var(axis=0) if ddX.size else np.zeros(X.shape[1])
    mob_dx = np.sqrt(np.divide(var_ddx, var_dx, out=np.zeros_like(var_dx), where=var_dx>0))
    comp = np.divide(mob_dx, mob, out=np.zeros_like(mob), where=mob>0)
    return np.vstack([var_x, mob, comp]).T

def _orthobasis(channels, k=8):
    rng_local = np.random.default_rng(12345)
    Q, _ = np.linalg.qr(rng_local.normal(size=(channels, channels)))
    return Q[:, :k]

def glyph_invariant_feature(X, fs, band=(8,12), k=8):
    C, _ = _cov_band(X, fs, band)
    A = _orthobasis(C.shape[0], k=k)
    ACAt = A.T @ C @ A
    return float(np.linalg.norm(ACAt, 'fro') / (np.linalg.norm(C, 'fro') + 1e-9))

def baseline_features(X, fs, bands=[(1,4),(4,8),(8,12),(12,30)]):
    feats = []
    for lo,hi in bands:
        bp,_,_ = _fft_band_energy(X, fs, lo, hi)
        feats.append(bp.mean())
    hj = _hjorth_params(X)
    feats += list(hj.mean(axis=0))
    bp_full,_,_ = _fft_band_energy(X, fs, 0.5, 40)
    feats += [bp_full.mean(), np.sqrt((bp_full**2).mean())]
    return np.array(feats, float)

def synth_domain(n_epochs=240, n_channels=32, fs=128.0, epoch_len=4.0, eo_shift=0.8, domain_noise=0.3, seed=0):
    rngs = np.random.default_rng(seed)
    t = np.arange(int(fs*epoch_len))/fs
    freqs = [2,4,8,10,12,20]
    X_list, y = [], []
    for i in range(n_epochs):
        label = int(i%2)
        chs = []
        for _ in range(n_channels):
            sig = np.zeros_like(t)
            for f in freqs:
                amp = rngs.normal(0.8, 0.2)
                if label==1 and 8<=f<=12: amp += eo_shift
                sig += amp*np.sin(2*np.pi*f*t + rngs.uniform(0,2*np.pi))
            sig += rngs.normal(0, domain_noise, size=t.shape)
            chs.append(sig)
        X_list.append(np.stack(chs, axis=1)); y.append(label)
    return X_list, np.array(y), fs

def load_or_synthesize():
    fs, epoch_len = 128.0, 4.0
    hp, mp = Path("/mnt/data/HUMAN_EEG.csv"), Path("/mnt/data/MOUSE_EEG.csv")
    if hp.exists() and mp.exists():
        def load_csv(p):
            df = pd.read_csv(p); assert 'label' in df.columns
            y = df['label'].astype(int).values
            X = df.drop(columns=['label']).values
            L = int(fs*epoch_len); n = (X.shape[0]//L)*L
            X = X[:n].reshape(-1, L, X.shape[1]); y = y[:X.shape[0]]
            return [X[i] for i in range(X.shape[0])], y, fs
        Xh,yh,fsh = load_csv(hp); Xm,ym,fsm = load_csv(mp)
        return Xh,yh,fsh, Xm,ym,fsm, True
    # synthetic fallback
    Xh,yh,fsh = synth_domain(domain_noise=0.25, seed=1)
    Xm,ym,fsm = synth_domain(domain_noise=0.45, seed=2)
    return Xh,yh,fsh, Xm,ym,fsm, False

def featurize_block(X_list, fs):
    G,B = [],[]
    for X in X_list:
        try: g = glyph_invariant_feature(X, fs, band=(8,12), k=8)
        except: g = np.nan
        b = baseline_features(X, fs)
        G.append(g); B.append(b)
    G = np.array(G).reshape(-1,1); B = np.array(B)
    m = ~np.isnan(G).ravel()
    return G[m], B[m], m

def fit_eval(train_X, train_y, test_X, test_y):
    clf = LogisticRegression(max_iter=200, solver="lbfgs")
    clf.fit(train_X, train_y)
    proba = clf.predict_proba(test_X)[:,1]
    preds = (proba>=0.5).astype(int)
    return {"auroc": roc_auc_score(test_y, proba), "acc": accuracy_score(test_y, preds), "proba": proba}

def nested_cv(X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=7)
    aucs, accs = [], []
    for tr, te in skf.split(X, y):
        out = fit_eval(X[tr], y[tr], X[te], y[te])
        aucs.append(out["auroc"]); accs.append(out["acc"])
    return float(np.mean(aucs)), float(np.std(aucs)), float(np.mean(accs)), float(np.std(accs))

def perm_test(train_X, train_y, test_X, test_y, baseline_auc, n_perm=1000, seed=77):
    rngp = np.random.default_rng(seed)
    aucs, deltas = [], []
    for _ in range(n_perm):
        y_perm = train_y.copy(); rngp.shuffle(y_perm)
        out = fit_eval(train_X, y_perm, test_X, test_y)
        aucs.append(out["auroc"]); deltas.append(out["auroc"] - baseline_auc)
    return np.array(aucs), np.array(deltas)

# ---------- run ----------
Xh, yh, fs_h, Xm, ym, fs_m, used_real = load_or_synthesize()
Gh, Bh, mh = featurize_block(Xh, fs_h); yh = yh[mh]
Gm, Bm, mm = featurize_block(Xm, fs_m); ym = ym[mm]

res_g = fit_eval(Gh, yh, Gm, ym)
res_b = fit_eval(Bh, yh, Bm, ym)
cv_g = nested_cv(Gh, yh); cv_b = nested_cv(Bh, yh)

n_perm = 1000
perm_aucs_g, perm_deltas_g = perm_test(Gh, yh, Gm, ym, res_b["auroc"], n_perm=n_perm, seed=77)
p_g = (np.sum(perm_aucs_g >= res_g["auroc"]) + 1) / (n_perm + 1)
p_delta = (np.sum(perm_deltas_g >= (res_g["auroc"] - res_b["auroc"])) + 1) / (n_perm + 1)

# ---------- plots ----------
fpr_g, tpr_g, _ = roc_curve(ym, res_g["proba"])
fpr_b, tpr_b, _ = roc_curve(ym, fit_eval(Bh, yh, Bm, ym)["proba"])

plt.figure(figsize=(6,5))
plt.plot(fpr_g, tpr_g, label=f"Glyph-Invariant (AUROC={res_g['auroc']:.3f})")
plt.plot(fpr_b, tpr_b, label=f"Baseline (AUROC={res_b['auroc']:.3f})")
plt.plot([0,1],[0,1],'--')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend(loc="lower right")
plt.title("Cross-Domain ROC (Train: HUMAN, Test: MOUSE)")
plt.tight_layout()
roc_path = OUTDIR/"transfer_roc.png"; plt.savefig(roc_path, dpi=200); plt.close()

plt.figure(figsize=(6,5))
plt.hist(perm_aucs_g, bins=30, alpha=0.7)
plt.axvline(res_g["auroc"], linestyle='--', linewidth=2)
plt.xlabel("AUROC under label permutation (Glyph-Invariant)")
plt.ylabel("Count"); plt.title(f"Permutation Test (n={n_perm}) | p={p_g:.4f}")
plt.tight_layout()
perm_path = OUTDIR/"perm_null.png"; plt.savefig(perm_path, dpi=200); plt.close()

# ---------- reports ----------
results = {
    "timestamp": datetime.utcnow().isoformat()+"Z",
    "used_real_data": bool(used_real),
    "fs_hz_human": float(fs_h),
    "fs_hz_mouse": float(fs_m),
    "n_human_epochs": int(len(Gh)),
    "n_mouse_epochs": int(len(Gm)),
    "transfer": {
        "glyph_invariant": {"auroc": float(res_g["auroc"]), "acc": float(res_g["acc"])},
        "baseline": {"auroc": float(res_b["auroc"]), "acc": float(res_b["acc"])},
        "delta_auroc": float(res_g["auroc"] - res_b["auroc"]),
        "perm_test": {
            "n_perm": n_perm, "p_auroc": float(p_g), "p_delta": float(p_delta),
            "perm_mean_auroc": float(np.mean(perm_aucs_g)), "perm_std_auroc": float(np.std(perm_aucs_g)),
        }
    },
    "nested_cv_human": {
        "glyph_invariant": {"auroc_mean": cv_g[0], "auroc_std": cv_g[1], "acc_mean": cv_g[2], "acc_std": cv_g[3]},
        "baseline": {"auroc_mean": cv_b[0], "auroc_std": cv_b[1], "acc_mean": cv_b[2], "acc_std": cv_b[3]}
    },
    "prereg": {
        "primary_endpoint": "Cross-domain AUROC (Train: HUMAN, Test: MOUSE) for glyph-invariant > baseline",
        "null_hypothesis": "Glyph-invariant AUROC equals baseline under label permutation in training.",
        "alpha": 0.05,
        "test": "One-sided permutation test on training labels; n=1000",
        "blinding": "Baseline features fixed; glyph basis fixed; test labels unseen during training.",
        "decision_rule": "Reject H0 if p_delta < 0.05 and glyph AUROC > 0.70."
    },
    "notes": "Synthetic mode mimics alpha-band modulation; place real CSVs at /mnt/data to run empirically."
}
with open(OUTDIR/"results.json","w") as f: json.dump(results, f, indent=2)

summary = f"""
CNT One-Cell Groundbreaker — Blind Cross-Domain Test
====================================================
UTC: {results['timestamp']}
Data mode: {"REAL" if results['used_real_data'] else "SYNTHETIC"}
Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']}

TRANSFER (Train Human → Test Mouse)
- Glyph-Invariant: AUROC={results['transfer']['glyph_invariant']['auroc']:.3f}, ACC={results['transfer']['glyph_invariant']['acc']:.3f}
- Baseline      : AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}
- Δ AUROC       : {results['transfer']['delta_auroc']:.3f}

Permutation Test (n={n_perm})
- p(AUROC ≥ observed)   : p={results['transfer']['perm_test']['p_auroc']:.4f}
- p(Δ AUROC ≥ observed) : p={results['transfer']['perm_test']['p_delta']:.4f}

Nested CV on Human
- Glyph-Invariant: AUROC={results['nested_cv_human']['glyph_invariant']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph_invariant']['auroc_std']:.3f}
- Baseline      : AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}

PREREG DECISION RULE
Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.
Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png
"""
with open(OUTDIR/"summary.txt","w") as f: f.write(textwrap.dedent(summary))

print(summary.strip())
print("\nArtifacts saved to:", OUTDIR)


  "timestamp": datetime.utcnow().isoformat()+"Z",


UnicodeEncodeError: 'charmap' codec can't encode character '\u2192' in position 229: character maps to <undefined>

In [2]:
# CNT One-Cell Groundbreaker — Windows-safe (UTF-8 + timezone-aware) edition
# - Blinded human→mouse transfer, CNT glyph-invariant vs baselines, 1000-permutation test
# - Accepts optional /mnt/data/HUMAN_EEG.csv and /mnt/data/MOUSE_EEG.csv (cols = channels, plus 'label' 0/1)
# - Writes artifacts with UTF-8 encoding to an existing folder (prefers /mnt/data if present)

import os, json, textwrap, platform
from pathlib import Path
from datetime import datetime, timezone

import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from sklearn.model_selection import StratifiedKFold

# ---------- Output directory (portable) ----------
def pick_outdir():
    cand = [Path("/mnt/data"), Path.cwd(), Path.home() / "Documents"]
    for p in cand:
        try:
            p.mkdir(parents=True, exist_ok=True)
            test = p / ".touch_ok"
            with open(test, "w", encoding="utf-8") as f: f.write("ok")
            test.unlink(missing_ok=True)
            return p
        except Exception:
            continue
    return Path.cwd()

BASE = pick_outdir()
OUTDIR = BASE / "CNT_OneCell_Groundbreaker"
OUTDIR.mkdir(parents=True, exist_ok=True)

rng = np.random.default_rng(42)

# ---------- helpers ----------
def _fft_band_energy(X, fs, f_lo, f_hi):
    n = X.shape[0]
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    S = np.abs(np.fft.rfft(X, axis=0))**2 / n
    mask = (freqs >= f_lo) & (freqs <= f_hi)
    return S[mask].sum(axis=0), freqs, S

def _cov_band(X, fs, band):
    n = X.shape[0]
    F = np.fft.rfft(X, axis=0)
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    mask = (freqs >= band[0]) & (freqs <= band[1])
    Fm = np.zeros_like(F); Fm[mask] = F[mask]
    xb = np.fft.irfft(Fm, axis=0, n=n)
    Xc = xb - xb.mean(axis=0, keepdims=True)
    C = (Xc.T @ Xc) / max(1, (Xc.shape[0]-1))
    return C, xb

def _hjorth_params(X):
    dX = np.diff(X, axis=0)
    var_x = X.var(axis=0)
    var_dx = dX.var(axis=0) if dX.size else np.zeros(X.shape[1])
    mob = np.sqrt(np.divide(var_dx, var_x, out=np.zeros_like(var_x), where=var_x>0))
    ddX = np.diff(dX, axis=0)
    var_ddx = ddX.var(axis=0) if ddX.size else np.zeros(X.shape[1])
    mob_dx = np.sqrt(np.divide(var_ddx, var_dx, out=np.zeros_like(var_dx), where=var_dx>0))
    comp = np.divide(mob_dx, mob, out=np.zeros_like(mob), where=mob>0)
    return np.vstack([var_x, mob, comp]).T

def _orthobasis(channels, k=8):
    rng_local = np.random.default_rng(12345)
    Q, _ = np.linalg.qr(rng_local.normal(size=(channels, channels)))
    return Q[:, :k]

def glyph_invariant_feature(X, fs, band=(8,12), k=8):
    C, _ = _cov_band(X, fs, band)
    A = _orthobasis(C.shape[0], k=k)
    ACAt = A.T @ C @ A
    return float(np.linalg.norm(ACAt, 'fro') / (np.linalg.norm(C, 'fro') + 1e-9))

def baseline_features(X, fs, bands=[(1,4),(4,8),(8,12),(12,30)]):
    feats = []
    for lo,hi in bands:
        bp,_,_ = _fft_band_energy(X, fs, lo, hi)
        feats.append(bp.mean())
    hj = _hjorth_params(X)  # mean over channels
    feats += list(hj.mean(axis=0))
    bp_full,_,_ = _fft_band_energy(X, fs, 0.5, 40)
    feats += [bp_full.mean(), np.sqrt((bp_full**2).mean())]
    return np.array(feats, float)

def synth_domain(n_epochs=240, n_channels=32, fs=128.0, epoch_len=4.0, eo_shift=0.8, domain_noise=0.3, seed=0):
    rngs = np.random.default_rng(seed)
    t = np.arange(int(fs*epoch_len))/fs
    freqs = [2,4,8,10,12,20]
    X_list, y = [], []
    for i in range(n_epochs):
        label = int(i%2)
        chs = []
        for _ in range(n_channels):
            sig = np.zeros_like(t)
            for f in freqs:
                amp = rngs.normal(0.8, 0.2)
                if label==1 and 8<=f<=12: amp += eo_shift
                sig += amp*np.sin(2*np.pi*f*t + rngs.uniform(0,2*np.pi))
            sig += rngs.normal(0, domain_noise, size=t.shape)
            chs.append(sig)
        X_list.append(np.stack(chs, axis=1)); y.append(label)
    return X_list, np.array(y), fs

def load_or_synthesize():
    fs, epoch_len = 128.0, 4.0
    hp, mp = Path("/mnt/data/HUMAN_EEG.csv"), Path("/mnt/data/MOUSE_EEG.csv")
    if hp.exists() and mp.exists():
        def load_csv(p):
            df = pd.read_csv(p)
            assert 'label' in df.columns, "CSV must include a 'label' column."
            y = df['label'].astype(int).values
            X = df.drop(columns=['label']).values
            L = int(fs*epoch_len)
            n = (X.shape[0]//L)*L
            X = X[:n].reshape(-1, L, X.shape[1])
            y = y[:X.shape[0]]
            return [X[i] for i in range(X.shape[0])], y, fs
        Xh,yh,fsh = load_csv(hp); Xm,ym,fsm = load_csv(mp)
        return Xh,yh,fsh, Xm,ym,fsm, True
    # synthetic fallback
    Xh,yh,fsh = synth_domain(domain_noise=0.25, seed=1)
    Xm,ym,fsm = synth_domain(domain_noise=0.45, seed=2)
    return Xh,yh,fsh, Xm,ym,fsm, False

def featurize_block(X_list, fs):
    G,B = [],[]
    for X in X_list:
        try: g = glyph_invariant_feature(X, fs, band=(8,12), k=8)
        except: g = np.nan
        b = baseline_features(X, fs)
        G.append(g); B.append(b)
    G = np.array(G).reshape(-1,1); B = np.array(B)
    m = ~np.isnan(G).ravel()
    return G[m], B[m], m

def fit_eval(train_X, train_y, test_X, test_y):
    clf = LogisticRegression(max_iter=200, solver="lbfgs")
    clf.fit(train_X, train_y)
    proba = clf.predict_proba(test_X)[:,1]
    preds = (proba>=0.5).astype(int)
    return {"auroc": roc_auc_score(test_y, proba), "acc": accuracy_score(test_y, preds), "proba": proba}

def nested_cv(X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=7)
    aucs, accs = [], []
    for tr, te in skf.split(X, y):
        out = fit_eval(X[tr], y[tr], X[te], y[te])
        aucs.append(out["auroc"]); accs.append(out["acc"])
    return float(np.mean(aucs)), float(np.std(aucs)), float(np.mean(accs)), float(np.std(accs))

def perm_test(train_X, train_y, test_X, test_y, baseline_auc, n_perm=1000, seed=77):
    rngp = np.random.default_rng(seed)
    aucs, deltas = [], []
    for _ in range(n_perm):
        y_perm = train_y.copy(); rngp.shuffle(y_perm)
        out = fit_eval(train_X, y_perm, test_X, test_y)
        aucs.append(out["auroc"]); deltas.append(out["auroc"] - baseline_auc)
    return np.array(aucs), np.array(deltas)

# ---------- run ----------
Xh, yh, fs_h, Xm, ym, fs_m, used_real = load_or_synthesize()
Gh, Bh, mh = featurize_block(Xh, fs_h); yh = yh[mh]
Gm, Bm, mm = featurize_block(Xm, fs_m); ym = ym[mm]

res_g = fit_eval(Gh, yh, Gm, ym)
res_b = fit_eval(Bh, yh, Bm, ym)
cv_g = nested_cv(Gh, yh); cv_b = nested_cv(Bh, yh)

n_perm = 1000
perm_aucs_g, perm_deltas_g = perm_test(Gh, yh, Gm, ym, res_b["auroc"], n_perm=n_perm, seed=77)
p_g = (np.sum(perm_aucs_g >= res_g["auroc"]) + 1) / (n_perm + 1)
p_delta = (np.sum(perm_deltas_g >= (res_g["auroc"] - res_b["auroc"])) + 1) / (n_perm + 1)

# ---------- plots ----------
fpr_g, tpr_g, _ = roc_curve(ym, res_g["proba"])
fpr_b, tpr_b, _ = roc_curve(ym, fit_eval(Bh, yh, Bm, ym)["proba"])

plt.figure(figsize=(6,5))
plt.plot(fpr_g, tpr_g, label=f"Glyph-Invariant (AUROC={res_g['auroc']:.3f})")
plt.plot(fpr_b, tpr_b, label=f"Baseline (AUROC={res_b['auroc']:.3f})")
plt.plot([0,1],[0,1],'--')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend(loc="lower right")
plt.title("Cross-Domain ROC (Train: HUMAN, Test: MOUSE)")
plt.tight_layout()
roc_path = OUTDIR/"transfer_roc.png"; plt.savefig(roc_path, dpi=200); plt.close()

plt.figure(figsize=(6,5))
plt.hist(perm_aucs_g, bins=30, alpha=0.7)
plt.axvline(res_g["auroc"], linestyle='--', linewidth=2)
plt.xlabel("AUROC under label permutation (Glyph-Invariant)")
plt.ylabel("Count"); plt.title(f"Permutation Test (n={n_perm}) | p={p_g:.4f}")
plt.tight_layout()
perm_path = OUTDIR/"perm_null.png"; plt.savefig(perm_path, dpi=200); plt.close()

# ---------- reports (UTF-8, timezone-aware) ----------
results = {
    "timestamp": datetime.now(timezone.utc).isoformat(),  # timezone-aware UTC
    "used_real_data": bool(used_real),
    "fs_hz_human": float(fs_h),
    "fs_hz_mouse": float(fs_m),
    "n_human_epochs": int(len(Gh)),
    "n_mouse_epochs": int(len(Gm)),
    "transfer": {
        "glyph_invariant": {"auroc": float(res_g["auroc"]), "acc": float(res_g["acc"])},
        "baseline": {"auroc": float(res_b["auroc"]), "acc": float(res_b["acc"])},
        "delta_auroc": float(res_g["auroc"] - res_b["auroc"]),
        "perm_test": {
            "n_perm": n_perm, "p_auroc": float(p_g), "p_delta": float(p_delta),
            "perm_mean_auroc": float(np.mean(perm_aucs_g)), "perm_std_auroc": float(np.std(perm_aucs_g)),
        }
    },
    "nested_cv_human": {
        "glyph_invariant": {"auroc_mean": cv_g[0], "auroc_std": cv_g[1], "acc_mean": cv_g[2], "acc_std": cv_g[3]},
        "baseline": {"auroc_mean": cv_b[0], "auroc_std": cv_b[1], "acc_mean": cv_b[2], "acc_std": cv_b[3]}
    },
    "prereg": {
        "primary_endpoint": "Cross-domain AUROC (Train: HUMAN, Test: MOUSE) for glyph-invariant > baseline",
        "null_hypothesis": "Glyph-invariant AUROC equals baseline under label permutation in training.",
        "alpha": 0.05,
        "test": "One-sided permutation test on training labels; n=1000",
        "blinding": "Baseline features fixed; glyph basis fixed; test labels unseen during training.",
        "decision_rule": "Reject H0 if p_delta < 0.05 and glyph AUROC > 0.70."
    },
    "notes": "Place real CSVs in /mnt/data if available; otherwise synthetic mode runs."
}

# Write JSON with UTF-8 (allow Unicode) and TXT with UTF-8
with open(OUTDIR/"results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

summary = f"""
CNT One-Cell Groundbreaker — Blind Cross-Domain Test
====================================================
UTC: {results['timestamp']}
Data mode: {"REAL" if results['used_real_data'] else "SYNTHETIC"}
Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']}

TRANSFER (Train Human -> Test Mouse)
- Glyph-Invariant: AUROC={results['transfer']['glyph_invariant']['auroc']:.3f}, ACC={results['transfer']['glyph_invariant']['acc']:.3f}
- Baseline      : AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}
- Delta AUROC   : {results['transfer']['delta_auroc']:.3f}

Permutation Test (n={n_perm})
- p(AUROC >= observed)   : p={results['transfer']['perm_test']['p_auroc']:.4f}
- p(Delta AUROC >= obs.) : p={results['transfer']['perm_test']['p_delta']:.4f}

Nested CV on Human
- Glyph-Invariant: AUROC={results['nested_cv_human']['glyph_invariant']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph_invariant']['auroc_std']:.3f}
- Baseline      : AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}

PREREG DECISION RULE
Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.
Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png
"""

try:
    with open(OUTDIR/"summary.txt", "w", encoding="utf-8") as f:
        f.write(textwrap.dedent(summary))
except UnicodeEncodeError:
    # Fallback: strip non-ASCII if user's environment forces a legacy encoding somewhere else
    with open(OUTDIR/"summary.txt", "w", encoding="utf-8", errors="ignore") as f:
        f.write(textwrap.dedent(summary))

print(textwrap.dedent(summary).strip())
print("\nArtifacts saved to:", OUTDIR)


CNT One-Cell Groundbreaker — Blind Cross-Domain Test
UTC: 2025-09-29T03:38:46.173736+00:00
Data mode: SYNTHETIC
Human epochs: 240 | Mouse epochs: 240

TRANSFER (Train Human -> Test Mouse)
- Glyph-Invariant: AUROC=0.453, ACC=0.471
- Baseline      : AUROC=1.000, ACC=1.000
- Delta AUROC   : -0.547

Permutation Test (n=1000)
- p(AUROC >= observed)   : p=1.0000
- p(Delta AUROC >= obs.) : p=1.0000

Nested CV on Human
- Glyph-Invariant: AUROC=0.475±0.060
- Baseline      : AUROC=1.000±0.000

PREREG DECISION RULE
Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.
Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png

Artifacts saved to: \mnt\data\CNT_OneCell_Groundbreaker


In [7]:
# ========================= CNT One-Cell Groundbreaker (Prereg-Final, LOCKED) =========================
# Blind Human→Mouse transfer. Compares CNT multi-glyph invariant vs. strong baselines.
# Prereg DECISION RULE (FROZEN): Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.
#
# Inputs (optional):
#   /mnt/data/HUMAN_EEG.csv
#   /mnt/data/MOUSE_EEG.csv
#   - CSV rows = contiguous samples, columns = channels + 'label' (0/1). Assumes 4 s epochs @ 128 Hz.
#
# Outputs (UTF-8):
#   CNT_OneCell_Groundbreaker/results.json
#   CNT_OneCell_Groundbreaker/summary.txt
#   CNT_OneCell_Groundbreaker/transfer_roc.png
#   CNT_OneCell_Groundbreaker/perm_null.png
#
# LOCKS (do not change to preserve prereg integrity):
#   FS=128.0 Hz, EPOCH=4.0 s
#   TARGET_CHANNEL_DIM=32 via fixed random projection (seeded)
#   GLYPH_BANDS=[(6,9),(8,12),(10,14)], RANKS=[4,8,12]
#   BASELINES = mean bandpowers (1–4, 4–8, 8–12, 12–30), Hjorth mean (activity, mobility, complexity),
#               full-band PSD mean and RMS
#   MODEL = LogisticRegression(lbfgs, max_iter=200)
#   PERMUTATIONS = 1000, one-sided on ΔAUROC (glyph − baseline), labels permuted in TRAIN only
#
# =====================================================================================================

import os, json, textwrap
from pathlib import Path
from datetime import datetime, timezone

import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from sklearn.model_selection import StratifiedKFold

# ---------- PREREG: FROZEN CONSTANTS ----------
FS = 128.0
EPOCH_S = 4.0
TARGET_CHANNEL_DIM = 32
GLYPH_BANDS = [(6,9),(8,12),(10,14)]
GLYPH_RANKS = [4,8,12]
BASELINE_BANDS = [(1,4),(4,8),(8,12),(12,30)]
PERM_N = 1000
SEED_MASTER = 20250928  # fixed seed for reproducibility

# ---------- Output dir (portable & UTF-8 safe) ----------
def pick_outdir():
    for p in [Path("/mnt/data"), Path.cwd(), Path.home()/"Documents"]:
        try:
            p.mkdir(parents=True, exist_ok=True)
            (p/".touch_ok").write_text("ok", encoding="utf-8")
            (p/".touch_ok").unlink(missing_ok=True)
            return p
        except Exception:
            continue
    return Path.cwd()

BASE = pick_outdir()
OUTDIR = BASE / "CNT_OneCell_Groundbreaker"
OUTDIR.mkdir(parents=True, exist_ok=True)

rng_global = np.random.default_rng(SEED_MASTER)

# ---------- Helpers (locked) ----------
def _fft_band_energy(X, fs, f_lo, f_hi):
    n = X.shape[0]
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    S = np.abs(np.fft.rfft(X, axis=0))**2 / n
    m = (freqs >= f_lo) & (freqs <= f_hi)
    return S[m].sum(axis=0)

def _cov_band(X, fs, band):
    n = X.shape[0]
    F = np.fft.rfft(X, axis=0)
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    m = (freqs >= band[0]) & (freqs <= band[1])
    Fm = np.zeros_like(F); Fm[m] = F[m]
    xb = np.fft.irfft(Fm, axis=0, n=n)
    Xc = xb - xb.mean(axis=0, keepdims=True)
    C = (Xc.T @ Xc) / max(1, (Xc.shape[0]-1))
    return C

def _hjorth_params(X):
    dX = np.diff(X, axis=0)
    var_x = X.var(axis=0)
    var_dx = dX.var(axis=0) if dX.size else np.zeros(X.shape[1])
    mob = np.sqrt(np.divide(var_dx, var_x, out=np.zeros_like(var_x), where=var_x>0))
    ddX = np.diff(dX, axis=0)
    var_ddx = ddX.var(axis=0) if ddX.size else np.zeros(X.shape[1])
    mob_dx = np.sqrt(np.divide(var_ddx, var_dx, out=np.zeros_like(var_dx), where=var_dx>0))
    comp = np.divide(mob_dx, mob, out=np.zeros_like(mob), where=mob>0)
    return np.vstack([var_x, mob, comp]).T

def _orthobasis(channels, k, seed=777):
    rng_local = np.random.default_rng(seed + channels + k)
    Q, _ = np.linalg.qr(rng_local.normal(size=(channels, channels)))
    return Q[:, :k]

def fixed_projection_matrix(ch_in, ch_out, seed=SEED_MASTER):
    """Deterministic channel projector to TARGET_CHANNEL_DIM using QR; keeps prereg blind & reproducible."""
    rng = np.random.default_rng(seed + 31*ch_in + 7*ch_out)
    A = rng.normal(size=(ch_in, ch_out))
    Q, _ = np.linalg.qr(A)
    return Q[:, :ch_out]  # ch_in x ch_out (orthonormal columns)

def map_channels_epoch(X_epoch, target_dim=TARGET_CHANNEL_DIM):
    """Map [T, ch] → [T, target_dim] with a fixed projection matrix determined by original ch."""
    ch_in = X_epoch.shape[1]
    if ch_in == target_dim:
        return X_epoch
    P = fixed_projection_matrix(ch_in, target_dim)
    return X_epoch @ P  # [T, ch_in] @ [ch_in, target_dim] → [T, target_dim]

def baseline_features(X, fs):
    feats = []
    for lo,hi in BASELINE_BANDS:
        feats.append(_fft_band_energy(X, fs, lo, hi).mean())
    hj = _hjorth_params(X).mean(axis=0)
    feats += list(hj)
    bp_full = _fft_band_energy(X, fs, 0.5, 40.0)
    feats += [bp_full.mean(), float(np.sqrt((bp_full**2).mean()))]
    return np.array(feats, float)

def glyph_stack_features(X, fs):
    # Multi-glyph invariant vector over bands × ranks (FROZEN)
    C_feats = []
    for band in GLYPH_BANDS:
        C = _cov_band(X, fs, band)
        Cn = np.linalg.norm(C, 'fro') + 1e-9
        for k in GLYPH_RANKS:
            A = _orthobasis(C.shape[0], k, seed=13579)
            val = np.linalg.norm(A.T @ C @ A, 'fro') / Cn
            C_feats.append(val)
    return np.array(C_feats, float)

def model_fit_eval(train_X, train_y, test_X, test_y):
    clf = LogisticRegression(max_iter=200, solver="lbfgs")
    clf.fit(train_X, train_y)
    proba = clf.predict_proba(test_X)[:,1]
    preds = (proba >= 0.5).astype(int)
    return dict(
        auroc=float(roc_auc_score(test_y, proba)),
        acc=float(accuracy_score(test_y, preds)),
        proba=proba
    )

def nested_cv_scores(X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4242)
    aucs, accs = [], []
    for tr, te in skf.split(X, y):
        out = model_fit_eval(X[tr], y[tr], X[te], y[te])
        aucs.append(out["auroc"]); accs.append(out["acc"])
    return float(np.mean(aucs)), float(np.std(aucs)), float(np.mean(accs)), float(np.std(accs))

def perm_test_delta(train_X, train_y, test_X, test_y, baseline_auc, n_perm=PERM_N):
    rngp = np.random.default_rng(9090)
    aucs, deltas = [], []
    for _ in range(n_perm):
        y_perm = train_y.copy()
        rngp.shuffle(y_perm)
        out = model_fit_eval(train_X, y_perm, test_X, test_y)
        aucs.append(out["auroc"])
        deltas.append(out["auroc"] - baseline_auc)
    aucs = np.array(aucs); deltas = np.array(deltas)
    return aucs, deltas

# ---------- Data load or synthetic fallback (harder) ----------
def synth_domain(n_epochs=240, n_channels=TARGET_CHANNEL_DIM, fs=FS, epoch_len=EPOCH_S,
                 eo_shift=0.6, domain_noise=0.5, seed=0, alpha_center=10.0, alpha_jitter=1.0):
    rngs = np.random.default_rng(seed)
    t = np.arange(int(fs*epoch_len))/fs
    base_freqs = [2,4,8,12,20]
    X_list, y = [], []
    # domain-specific mixing matrix (but we map both domains to TARGET_CHANNEL_DIM later anyway)
    for i in range(n_epochs):
        label = int(i%2)  # 0/1 alternating
        chs = []
        for _ in range(n_channels):
            sig = np.zeros_like(t)
            for f in base_freqs:
                amp = rngs.normal(0.8, 0.25)
                if label==1 and 8<=f<=12:
                    boost = eo_shift * np.exp(-0.5*((f - (alpha_center + rngs.normal(0,alpha_jitter)))/1.5)**2)
                    amp += boost
                sig += amp*np.sin(2*np.pi*f*t + rngs.uniform(0,2*np.pi))
            sig += rngs.normal(0, domain_noise*(0.6+0.4*np.sin(2*np.pi*0.2*t)), size=t.shape)
            chs.append(sig)
        X = np.stack(chs, axis=1)
        X_list.append(X); y.append(label)
    return X_list, np.array(y, int), fs

def load_or_synthesize():
    hp, mp = Path("/mnt/data/HUMAN_EEG.csv"), Path("/mnt/data/MOUSE_EEG.csv")
    if hp.exists() and mp.exists():
        dfh = pd.read_csv(hp); dfm = pd.read_csv(mp)
        assert 'label' in dfh.columns and 'label' in dfm.columns, "CSVs must include a 'label' column."
        y_h = dfh['label'].astype(int).values
        y_m = dfm['label'].astype(int).values
        X_h = dfh.drop(columns=['label']).values
        X_m = dfm.drop(columns=['label']).values
        L = int(FS*EPOCH_S)
        n_h = (X_h.shape[0]//L)*L; n_m = (X_m.shape[0]//L)*L
        X_h = X_h[:n_h].reshape(-1, L, X_h.shape[1]); y_h = y_h[:X_h.shape[0]]
        X_m = X_m[:n_m].reshape(-1, L, X_m.shape[1]); y_m = y_m[:X_m.shape[0]]
        used_real = True
        return [X_h[i] for i in range(X_h.shape[0])], y_h, [X_m[i] for i in range(X_m.shape[0])], y_m, used_real
    # Harder synthetic (two domains with different seeds/noise)
    Xh, yh, _ = synth_domain(seed=111, domain_noise=0.40)
    Xm, ym, _ = synth_domain(seed=222, domain_noise=0.55)
    return Xh, yh, Xm, ym, False

# ---------- Featurization (channel mapping + glyph stack + baselines) ----------
def featurize_domain(X_list, y, fs=FS):
    G_list, B_list = [], []
    for X in X_list:
        Xp = map_channels_epoch(X, TARGET_CHANNEL_DIM)
        G_list.append(glyph_stack_features(Xp, fs))
        B_list.append(baseline_features(Xp, fs))
    G = np.asarray(G_list, float)
    B = np.asarray(B_list, float)
    # drop any NaN rows (defensive)
    mask = ~np.isnan(G).any(axis=1)
    return G[mask], B[mask], y[mask]

# ---------- RUN (LOCKED FLOW) ----------
Xh, yh, Xm, ym, used_real = load_or_synthesize()
Gh, Bh, yh = featurize_domain(Xh, yh)
Gm, Bm, ym = featurize_domain(Xm, ym)

# Train on HUMAN, blind test on MOUSE
res_g = model_fit_eval(Gh, yh, Gm, ym)
res_b = model_fit_eval(Bh, yh, Bm, ym)

# Sanity CV on HUMAN
cv_g = nested_cv_scores(Gh, yh)
cv_b = nested_cv_scores(Bh, yh)

# Permutation on TRAIN labels (glyph), measuring ΔAUROC vs baseline
perm_aucs_g, perm_deltas = perm_test_delta(Gh, yh, Gm, ym, baseline_auc=res_b["auroc"], n_perm=PERM_N)
p_auroc = (np.sum(perm_aucs_g >= res_g["auroc"]) + 1) / (PERM_N + 1)
obs_delta = res_g["auroc"] - res_b["auroc"]
p_delta = (np.sum(perm_deltas >= obs_delta) + 1) / (PERM_N + 1)

# ---------- Plots ----------
fpr_g, tpr_g, _ = roc_curve(ym, res_g["proba"])
fpr_b, tpr_b, _ = roc_curve(ym, model_fit_eval(Bh, yh, Bm, ym)["proba"])

plt.figure(figsize=(6,5))
plt.plot(fpr_g, tpr_g, label=f"Glyph (AUROC={res_g['auroc']:.3f})")
plt.plot(fpr_b, tpr_b, label=f"Baseline (AUROC={res_b['auroc']:.3f})")
plt.plot([0,1],[0,1],'--')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend(loc="lower right")
plt.title("Cross-Domain ROC (Train: HUMAN, Test: MOUSE)")
plt.tight_layout()
plt.savefig(OUTDIR/"transfer_roc.png", dpi=200); plt.close()

plt.figure(figsize=(6,5))
plt.hist(perm_aucs_g, bins=30, alpha=0.7)
plt.axvline(res_g["auroc"], linestyle='--', linewidth=2)
plt.xlabel("AUROC under label permutation (Glyph)")
plt.ylabel("Count"); plt.title(f"Permutation Test (n={PERM_N}) | p={p_auroc:.4f}")
plt.tight_layout()
plt.savefig(OUTDIR/"perm_null.png", dpi=200); plt.close()

# ---------- Reports (UTF-8, timezone-aware) ----------
results = {
    "timestamp": datetime.now(timezone.utc).isoformat(),
    "used_real_data": bool(used_real),
    "fs_hz": FS,
    "epoch_s": EPOCH_S,
    "target_channel_dim": TARGET_CHANNEL_DIM,
    "n_human_epochs": int(Gh.shape[0]),
    "n_mouse_epochs": int(Gm.shape[0]),
    "transfer": {
        "glyph": {"auroc": float(res_g["auroc"]), "acc": float(res_g["acc"])},
        "baseline": {"auroc": float(res_b["auroc"]), "acc": float(res_b["acc"])},
        "delta_auroc": float(obs_delta),
        "perm_test": {
            "n_perm": PERM_N,
            "p_auroc": float(p_auroc),
            "p_delta": float(p_delta),
            "perm_mean_auroc": float(np.mean(perm_aucs_g)),
            "perm_std_auroc": float(np.std(perm_aucs_g)),
        }
    },
    "nested_cv_human": {
        "glyph": {"auroc_mean": cv_g[0], "auroc_std": cv_g[1], "acc_mean": cv_g[2], "acc_std": cv_g[3]},
        "baseline": {"auroc_mean": cv_b[0], "auroc_std": cv_b[1], "acc_mean": cv_b[2], "acc_std": cv_b[3]}
    },
    "prereg": {
        "primary_endpoint": "Cross-domain AUROC (Train: HUMAN, Test: MOUSE) for glyph > baseline",
        "null_hypothesis": "Glyph AUROC equals baseline under label permutation in training.",
        "alpha": 0.05,
        "test": "One-sided permutation on training labels; n=1000",
        "blinding": "Hyperparameters & projections fixed; test labels unseen; baseline fixed.",
        "decision_rule": "Reject H0 if p_delta < 0.05 AND glyph AUROC > 0.70."
    }
}

with open(OUTDIR/"results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

summary = f"""
CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)
==================================================
UTC: {results['timestamp']}
Data mode: {"REAL" if results['used_real_data'] else "SYNTHETIC"}
Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']}
Target channel dim (fixed): {results['target_channel_dim']}

TRANSFER (Train Human -> Test Mouse)
- Glyph:    AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}
- Baseline: AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}
- Δ AUROC:  {results['transfer']['delta_auroc']:.3f}

Permutation Test (n={PERM_N})
- p(AUROC >= observed)         : p={results['transfer']['perm_test']['p_auroc']:.4f}
- p(Δ AUROC >= observed (glyph-baseline)) : p={results['transfer']['perm_test']['p_delta']:.4f}

Nested CV on Human
- Glyph:    AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}
- Baseline: AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}

PREREG DECISION RULE (LOCKED)
Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.

Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png
"""
with open(OUTDIR/"summary.txt", "w", encoding="utf-8") as f:
    f.write(textwrap.dedent(summary))

print(textwrap.dedent(summary).strip())
print("\nArtifacts saved to:", OUTDIR)
# ====================================== END LOCKED CELL ==============================================


CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)
UTC: 2025-09-29T03:47:07.821606+00:00
Data mode: SYNTHETIC
Human epochs: 240 | Mouse epochs: 240
Target channel dim (fixed): 32

TRANSFER (Train Human -> Test Mouse)
- Glyph:    AUROC=0.479, ACC=0.463
- Baseline: AUROC=1.000, ACC=1.000
- Δ AUROC:  -0.521

Permutation Test (n=1000)
- p(AUROC >= observed)         : p=0.6733
- p(Δ AUROC >= observed (glyph-baseline)) : p=0.6733

Nested CV on Human
- Glyph:    AUROC=0.521±0.047
- Baseline: AUROC=1.000±0.000

PREREG DECISION RULE (LOCKED)
Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.

Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png

Artifacts saved to: \mnt\data\CNT_OneCell_Groundbreaker


In [8]:
# Convert MNE Epochs to the CSV the one-cell expects (rows = samples, cols = channels + 'label')
def export_epochs(epochs, label_map, out_csv, fs_target=128.0, epoch_s=4.0):
    import numpy as np, pandas as pd, mne
    # Resample if needed
    if epochs.info['sfreq'] != fs_target:
        epochs = epochs.copy().resample(fs_target)
    # Ensure fixed epoch length
    L = int(fs_target * epoch_s)
    X_list, y_list = [], []
    for i, e in enumerate(epochs):
        x = e[0]  # shape (n_channels, n_times)
        if x.shape[1] < L:
            continue
        x = x[:, :L]  # trim/pad policy: trim
        X_list.append(x.T)  # (L, n_channels)
        # Map event id → 0/1 label
        event_id = epochs.events[i, 2]
        y_list.append(int(label_map.get(event_id, 0)))
    if not X_list:
        raise ValueError("No epochs exported. Check lengths and label_map.")
    X = np.vstack(X_list)  # (n_epochs*L, n_channels)
    y = np.repeat(np.array(y_list, int), L)
    df = pd.DataFrame(X)
    df["label"] = y
    df.to_csv(out_csv, index=False)
    print(f"Wrote {out_csv} | rows={df.shape[0]} cols={df.shape[1]}")


In [9]:
# === CNT One-Cell Groundbreaker (DIAGNOSTIC / VERBOSE) ===
# Purpose: make sure you SEE output. Progress prints + quick permutation run.
# If this works, set PERM_N=1000 below and re-run the same cell.

import os, sys, json, textwrap, traceback
from pathlib import Path
from datetime import datetime, timezone

import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from sklearn.model_selection import StratifiedKFold

def say(msg):
    print(msg); sys.stdout.flush()

try:
    say("[1/12] Config…")
    FS = 128.0
    EPOCH_S = 4.0
    TARGET_CHANNEL_DIM = 32
    GLYPH_BANDS = [(6,9),(8,12),(10,14)]
    GLYPH_RANKS = [4,8,12]
    BASELINE_BANDS = [(1,4),(4,8),(8,12),(12,30)]
    PERM_N = 50   # <<< smoke test; change to 1000 after this works
    SEED_MASTER = 20250928

    def pick_outdir():
        for p in [Path("/mnt/data"), Path.cwd(), Path.home()/"Documents"]:
            try:
                p.mkdir(parents=True, exist_ok=True)
                (p/".touch_ok").write_text("ok", encoding="utf-8")
                (p/".touch_ok").unlink(missing_ok=True)
                return p
            except Exception:
                continue
        return Path.cwd()

    BASE = pick_outdir()
    OUTDIR = BASE / "CNT_OneCell_Groundbreaker"
    OUTDIR.mkdir(parents=True, exist_ok=True)
    say(f"[2/12] OUTDIR = {OUTDIR}")

    rng_global = np.random.default_rng(SEED_MASTER)

    # ----- helpers -----
    def _fft_band_energy(X, fs, f_lo, f_hi):
        n = X.shape[0]
        freqs = np.fft.rfftfreq(n, d=1.0/fs)
        S = np.abs(np.fft.rfft(X, axis=0))**2 / n
        m = (freqs >= f_lo) & (freqs <= f_hi)
        return S[m].sum(axis=0)

    def _cov_band(X, fs, band):
        n = X.shape[0]
        F = np.fft.rfft(X, axis=0)
        freqs = np.fft.rfftfreq(n, d=1.0/fs)
        m = (freqs >= band[0]) & (freqs <= band[1])
        Fm = np.zeros_like(F); Fm[m] = F[m]
        xb = np.fft.irfft(Fm, axis=0, n=n)
        Xc = xb - xb.mean(axis=0, keepdims=True)
        C = (Xc.T @ Xc) / max(1, (Xc.shape[0]-1))
        return C

    def _hjorth_params(X):
        dX = np.diff(X, axis=0)
        var_x = X.var(axis=0)
        var_dx = dX.var(axis=0) if dX.size else np.zeros(X.shape[1])
        mob = np.sqrt(np.divide(var_dx, var_x, out=np.zeros_like(var_x), where=var_x>0))
        ddX = np.diff(dX, axis=0)
        var_ddx = ddX.var(axis=0) if ddX.size else np.zeros(X.shape[1])
        mob_dx = np.sqrt(np.divide(var_ddx, var_dx, out=np.zeros_like(var_dx), where=var_dx>0))
        comp = np.divide(mob_dx, mob, out=np.zeros_like(mob), where=mob>0)
        return np.vstack([var_x, mob, comp]).T

    def _orthobasis(channels, k, seed=13579):
        rng_local = np.random.default_rng(seed + channels + k)
        Q, _ = np.linalg.qr(rng_local.normal(size=(channels, channels)))
        return Q[:, :k]

    def fixed_projection_matrix(ch_in, ch_out, seed=SEED_MASTER):
        rng = np.random.default_rng(seed + 31*ch_in + 7*ch_out)
        A = rng.normal(size=(ch_in, ch_out))
        Q, _ = np.linalg.qr(A)
        return Q[:, :ch_out]

    def map_channels_epoch(X_epoch, target_dim=TARGET_CHANNEL_DIM):
        ch_in = X_epoch.shape[1]
        if ch_in == target_dim:
            return X_epoch
        P = fixed_projection_matrix(ch_in, target_dim)
        return X_epoch @ P

    def baseline_features(X, fs):
        feats = []
        for lo,hi in BASELINE_BANDS:
            feats.append(_fft_band_energy(X, fs, lo, hi).mean())
        hj = _hjorth_params(X).mean(axis=0)
        feats += list(hj)
        bp_full = _fft_band_energy(X, fs, 0.5, 40.0)
        feats += [bp_full.mean(), float(np.sqrt((bp_full**2).mean()))]
        return np.array(feats, float)

    def glyph_stack_features(X, fs):
        C_feats = []
        for band in GLYPH_BANDS:
            C = _cov_band(X, fs, band)
            Cn = np.linalg.norm(C, 'fro') + 1e-9
            for k in GLYPH_RANKS:
                A = _orthobasis(C.shape[0], k)
                C_feats.append(np.linalg.norm(A.T @ C @ A, 'fro') / Cn)
        return np.array(C_feats, float)

    def model_fit_eval(train_X, train_y, test_X, test_y):
        clf = LogisticRegression(max_iter=200, solver="lbfgs")
        clf.fit(train_X, train_y)
        proba = clf.predict_proba(test_X)[:,1]
        preds = (proba >= 0.5).astype(int)
        return dict(
            auroc=float(roc_auc_score(test_y, proba)),
            acc=float(accuracy_score(test_y, preds)),
            proba=proba
        )

    def nested_cv_scores(X, y, n_splits=5):
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4242)
        aucs, accs = [], []
        for tr, te in skf.split(X, y):
            out = model_fit_eval(X[tr], y[tr], X[te], y[te])
            aucs.append(out["auroc"]); accs.append(out["acc"])
        return float(np.mean(aucs)), float(np.std(aucs)), float(np.mean(accs)), float(np.std(accs))

    def perm_test_delta(train_X, train_y, test_X, test_y, baseline_auc, n_perm=PERM_N):
        rngp = np.random.default_rng(9090)
        aucs, deltas = [], []
        for i in range(n_perm):
            y_perm = train_y.copy()
            rngp.shuffle(y_perm)
            out = model_fit_eval(train_X, y_perm, test_X, test_y)
            aucs.append(out["auroc"])
            deltas.append(out["auroc"] - baseline_auc)
            if (i+1) % max(1, n_perm//5) == 0:
                say(f"    … permutations {i+1}/{n_perm}")
        return np.array(aucs), np.array(deltas)

    # ----- data -----
    say("[3/12] Loading data or building synthetic…")
    hp, mp = Path("/mnt/data/HUMAN_EEG.csv"), Path("/mnt/data/MOUSE_EEG.csv")
    used_real = False
    if hp.exists() and mp.exists():
        say("    Found CSVs in /mnt/data (REAL mode).")
        dfh = pd.read_csv(hp); dfm = pd.read_csv(mp)
        assert 'label' in dfh.columns and 'label' in dfm.columns, "CSVs must include a 'label' column."
        y_h = dfh['label'].astype(int).values
        y_m = dfm['label'].astype(int).values
        X_h = dfh.drop(columns=['label']).values
        X_m = dfm.drop(columns=['label']).values
        L = int(FS*EPOCH_S)
        n_h = (X_h.shape[0]//L)*L; n_m = (X_m.shape[0]//L)*L
        X_h = X_h[:n_h].reshape(-1, L, X_h.shape[1]); y_h = y_h[:X_h.shape[0]]
        X_m = X_m[:n_m].reshape(-1, L, X_m.shape[1]); y_m = y_m[:X_m.shape[0]]
        Xh = [X_h[i] for i in range(X_h.shape[0])]
        Xm = [X_m[i] for i in range(X_m.shape[0])]
        yh, ym = y_h, y_m
        used_real = True
    else:
        say("    No CSVs—running harder synthetic fallback (SYNTHETIC mode).")
        def synth_domain(n_epochs=240, n_channels=TARGET_CHANNEL_DIM, fs=FS, epoch_len=EPOCH_S,
                         eo_shift=0.6, domain_noise=0.5, seed=0, alpha_center=10.0, alpha_jitter=1.0):
            rngs = np.random.default_rng(seed)
            t = np.arange(int(fs*epoch_len))/fs
            base_freqs = [2,4,8,12,20]
            X_list, y = [], []
            for i in range(n_epochs):
                label = int(i%2)
                chs = []
                for _ in range(n_channels):
                    sig = np.zeros_like(t)
                    for f in base_freqs:
                        amp = rngs.normal(0.8, 0.25)
                        if label==1 and 8<=f<=12:
                            boost = eo_shift * np.exp(-0.5*((f - (alpha_center + rngs.normal(0,alpha_jitter)))/1.5)**2)
                            amp += boost
                        sig += amp*np.sin(2*np.pi*f*t + rngs.uniform(0,2*np.pi))
                    sig += rngs.normal(0, domain_noise*(0.6+0.4*np.sin(2*np.pi*0.2*t)), size=t.shape)
                    chs.append(sig)
                X = np.stack(chs, axis=1)
                X_list.append(X); y.append(label)
            return X_list, np.array(y, int)
        Xh, yh = synth_domain(seed=111, domain_noise=0.40)
        Xm, ym = synth_domain(seed=222, domain_noise=0.55)

    say(f"[4/12] Shapes → HUMAN: {len(Xh)} epochs | MOUSE: {len(Xm)} epochs")

    # ----- featurize -----
    def featurize_domain(X_list, y, fs=FS):
        G_list, B_list = [], []
        for idx, X in enumerate(X_list):
            Xp = map_channels_epoch(X, TARGET_CHANNEL_DIM)
            G_list.append(glyph_stack_features(Xp, fs))
            B_list.append(baseline_features(Xp, fs))
            if (idx+1) % max(1, len(X_list)//4) == 0:
                say(f"    … featurized {idx+1}/{len(X_list)}")
        G = np.asarray(G_list, float)
        B = np.asarray(B_list, float)
        mask = ~np.isnan(G).any(axis=1)
        return G[mask], B[mask], y[mask]

    say("[5/12] Featurizing HUMAN…"); Gh, Bh, yh = featurize_domain(Xh, yh)
    say("[6/12] Featurizing MOUSE…"); Gm, Bm, ym = featurize_domain(Xm, ym)

    say(f"[7/12] Train on HUMAN, test on MOUSE…")
    res_g = model_fit_eval(Gh, yh, Gm, ym)
    res_b = model_fit_eval(Bh, yh, Bm, ym)

    say("[8/12] Nested CV (HUMAN) for sanity…")
    cv_g = nested_cv_scores(Gh, yh); cv_b = nested_cv_scores(Bh, yh)

    say("[9/12] Permutation test (this prints progress)…")
    perm_aucs_g, perm_deltas = perm_test_delta(Gh, yh, Gm, ym, baseline_auc=res_b["auroc"], n_perm=PERM_N)
    p_auroc = (np.sum(perm_aucs_g >= res_g["auroc"]) + 1) / (PERM_N + 1)
    obs_delta = res_g["auroc"] - res_b["auroc"]
    p_delta = (np.sum(perm_deltas >= obs_delta) + 1) / (PERM_N + 1)

    say("[10/12] Plotting…")
    fpr_g, tpr_g, _ = roc_curve(ym, res_g["proba"])
    fpr_b, tpr_b, _ = roc_curve(ym, model_fit_eval(Bh, yh, Bm, ym)["proba"])

    plt.figure(figsize=(6,5))
    plt.plot(fpr_g, tpr_g, label=f"Glyph (AUROC={res_g['auroc']:.3f})")
    plt.plot(fpr_b, tpr_b, label=f"Baseline (AUROC={res_b['auroc']:.3f})")
    plt.plot([0,1],[0,1],'--')
    plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend(loc="lower right")
    plt.title("Cross-Domain ROC (Train: HUMAN, Test: MOUSE)")
    plt.tight_layout()
    plt.savefig(OUTDIR/"transfer_roc.png", dpi=200); plt.close()

    plt.figure(figsize=(6,5))
    plt.hist(perm_aucs_g, bins=30, alpha=0.7)
    plt.axvline(res_g["auroc"], linestyle='--', linewidth=2)
    plt.xlabel("AUROC under label permutation (Glyph)")
    plt.ylabel("Count"); plt.title(f"Permutation Test (n={PERM_N}) | p={p_auroc:.4f}")
    plt.tight_layout()
    plt.savefig(OUTDIR/"perm_null.png", dpi=200); plt.close()

    say("[11/12] Writing reports…")
    results = {
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "used_real_data": bool(used_real),
        "fs_hz": FS,
        "epoch_s": EPOCH_S,
        "target_channel_dim": TARGET_CHANNEL_DIM,
        "n_human_epochs": int(Gh.shape[0]),
        "n_mouse_epochs": int(Gm.shape[0]),
        "transfer": {
            "glyph": {"auroc": float(res_g["auroc"]), "acc": float(res_g["acc"])},
            "baseline": {"auroc": float(res_b["auroc"]), "acc": float(res_b["acc"])},
            "delta_auroc": float(obs_delta),
            "perm_test": {
                "n_perm": PERM_N, "p_auroc": float(p_auroc), "p_delta": float(p_delta),
                "perm_mean_auroc": float(np.mean(perm_aucs_g)), "perm_std_auroc": float(np.std(perm_aucs_g)),
            }
        },
        "nested_cv_human": {
            "glyph": {"auroc_mean": float(cv_g[0]), "auroc_std": float(cv_g[1]), "acc_mean": float(cv_g[2]), "acc_std": float(cv_g[3])},
            "baseline": {"auroc_mean": float(cv_b[0]), "auroc_std": float(cv_b[1]), "acc_mean": float(cv_b[2]), "acc_std": float(cv_b[3])}
        },
        "prereg": {
            "primary_endpoint": "Cross-domain AUROC (Train: HUMAN, Test: MOUSE) for glyph > baseline",
            "null_hypothesis": "Glyph AUROC equals baseline under label permutation in training.",
            "alpha": 0.05,
            "test": "One-sided permutation on training labels",
            "n_permutations": PERM_N,
            "blinding": "Hyperparameters & projections fixed; test labels unseen; baseline fixed.",
            "decision_rule": "Reject H0 if p_delta < 0.05 AND glyph AUROC > 0.70."
        }
    }
    with open(OUTDIR/"results.json", "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    summary = f"""
    CNT One-Cell Groundbreaker — VERBOSE SMOKE TEST
    ===============================================
    UTC: {results['timestamp']}
    Data mode: {"REAL" if results['used_real_data'] else "SYNTHETIC"}
    Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']}
    Target channel dim: {results['target_channel_dim']}

    TRANSFER (Train Human -> Test Mouse)
    - Glyph:    AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}
    - Baseline: AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}
    - Δ AUROC:  {results['transfer']['delta_auroc']:.3f}

    Permutation Test (n={PERM_N})
    - p(AUROC >= observed)   : p={results['transfer']['perm_test']['p_auroc']:.4f}
    - p(Δ AUROC >= observed) : p={results['transfer']['perm_test']['p_delta']:.4f}

    Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png
    Folder: {OUTDIR}
    """
    with open(OUTDIR/"summary.txt", "w", encoding="utf-8") as f:
        f.write(textwrap.dedent(summary))

    say("[12/12] DONE.")
    print(textwrap.dedent(summary).strip())

except Exception as e:
    say("!!! ERROR — printing traceback:")
    traceback.print_exc()



[1/12] Config…
[2/12] OUTDIR = \mnt\data\CNT_OneCell_Groundbreaker
[3/12] Loading data or building synthetic…
    No CSVs—running harder synthetic fallback (SYNTHETIC mode).
[4/12] Shapes → HUMAN: 240 epochs | MOUSE: 240 epochs
[5/12] Featurizing HUMAN…
    … featurized 60/240
    … featurized 120/240
    … featurized 180/240
    … featurized 240/240
[6/12] Featurizing MOUSE…
    … featurized 60/240
    … featurized 120/240
    … featurized 180/240
    … featurized 240/240
[7/12] Train on HUMAN, test on MOUSE…
[8/12] Nested CV (HUMAN) for sanity…
[9/12] Permutation test (this prints progress)…
    … permutations 10/50
    … permutations 20/50
    … permutations 30/50
    … permutations 40/50
    … permutations 50/50
[10/12] Plotting…
[11/12] Writing reports…
[12/12] DONE.
CNT One-Cell Groundbreaker — VERBOSE SMOKE TEST
UTC: 2025-09-29T03:53:51.484559+00:00
Data mode: SYNTHETIC
Human epochs: 240 | Mouse epochs: 240
Target channel dim: 32

TRANSFER (Train Human -> Test Mouse)
- Glyph:   

In [10]:
# ========================= CNT One-Cell Groundbreaker (ALL-IN-ONE, LOCKED) =========================
# - Blind Human→Mouse transfer, CNT multi-glyph invariant vs strong baselines
# - 1000-permutation significance (training-label permutations), verbose progress
# - Uses real CSVs if found at /mnt/data/{HUMAN_EEG.csv,MOUSE_EEG.csv}, else harder synthetic fallback
# - Windows-safe UTF-8 outputs, fixed channel projection, plots, and a 1-page PDF certificate
#
# Expected CSVs (optional):
#   /mnt/data/HUMAN_EEG.csv
#   /mnt/data/MOUSE_EEG.csv
# Each: rows=time samples (contiguous), columns = EEG channels + 'label' (0/1). Assumes 4 s epochs @ 128 Hz.
#
# Outputs:
#   CNT_OneCell_Groundbreaker/results.json
#   CNT_OneCell_Groundbreaker/summary.txt
#   CNT_OneCell_Groundbreaker/transfer_roc.png
#   CNT_OneCell_Groundbreaker/perm_null.png
#   CNT_OneCell_Groundbreaker/certificate_onecell.pdf
#
# PREREG DECISION RULE (LOCKED):
#   Reject H0 if p_delta < 0.05 AND glyph AUROC > 0.70 on transfer.
# ===================================================================================================

import os, sys, json, textwrap, traceback
from pathlib import Path
from datetime import datetime, timezone

import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from sklearn.model_selection import StratifiedKFold

# ----------------------- Helpers: printing -----------------------
def say(msg):
    print(msg); sys.stdout.flush()

# ----------------------- PREREG: FROZEN CONSTANTS ----------------
FS = 128.0
EPOCH_S = 4.0
TARGET_CHANNEL_DIM = 32
GLYPH_BANDS = [(6,9),(8,12),(10,14)]
GLYPH_RANKS = [4,8,12]
BASELINE_BANDS = [(1,4),(4,8),(8,12),(12,30)]
PERM_N = 1000
SEED_MASTER = 20250928  # deterministic

# ----------------------- Output dir (portable, UTF-8) ------------
def pick_outdir():
    for p in [Path("/mnt/data"), Path.cwd(), Path.home()/"Documents"]:
        try:
            p.mkdir(parents=True, exist_ok=True)
            (p/".touch_ok").write_text("ok", encoding="utf-8")
            (p/".touch_ok").unlink(missing_ok=True)
            return p
        except Exception:
            continue
    return Path.cwd()

BASE = pick_outdir()
OUTDIR = BASE / "CNT_OneCell_Groundbreaker"
OUTDIR.mkdir(parents=True, exist_ok=True)

# ----------------------- Core math helpers (locked) --------------
def _fft_band_energy(X, fs, f_lo, f_hi):
    n = X.shape[0]
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    S = np.abs(np.fft.rfft(X, axis=0))**2 / n
    m = (freqs >= f_lo) & (freqs <= f_hi)
    return S[m].sum(axis=0)

def _cov_band(X, fs, band):
    n = X.shape[0]
    F = np.fft.rfft(X, axis=0)
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    m = (freqs >= band[0]) & (freqs <= band[1])
    Fm = np.zeros_like(F); Fm[m] = F[m]
    xb = np.fft.irfft(Fm, axis=0, n=n)
    Xc = xb - xb.mean(axis=0, keepdims=True)
    C = (Xc.T @ Xc) / max(1, (Xc.shape[0]-1))
    return C

def _hjorth_params(X):
    dX = np.diff(X, axis=0)
    var_x = X.var(axis=0)
    var_dx = dX.var(axis=0) if dX.size else np.zeros(X.shape[1])
    mob = np.sqrt(np.divide(var_dx, var_x, out=np.zeros_like(var_x), where=var_x>0))
    ddX = np.diff(dX, axis=0)
    var_ddx = ddX.var(axis=0) if ddX.size else np.zeros(X.shape[1])
    mob_dx = np.sqrt(np.divide(var_ddx, var_dx, out=np.zeros_like(var_dx), where=var_dx>0))
    comp = np.divide(mob_dx, mob, out=np.zeros_like(mob), where=mob>0)
    return np.vstack([var_x, mob, comp]).T

def _orthobasis(channels, k, seed=13579):
    rng_local = np.random.default_rng(seed + channels + k)
    Q, _ = np.linalg.qr(rng_local.normal(size=(channels, channels)))
    return Q[:, :k]

def fixed_projection_matrix(ch_in, ch_out, seed=SEED_MASTER):
    rng = np.random.default_rng(seed + 31*ch_in + 7*ch_out)
    A = rng.normal(size=(ch_in, ch_out))
    Q, _ = np.linalg.qr(A)
    return Q[:, :ch_out]

def map_channels_epoch(X_epoch, target_dim=TARGET_CHANNEL_DIM):
    ch_in = X_epoch.shape[1]
    if ch_in == target_dim: return X_epoch
    P = fixed_projection_matrix(ch_in, target_dim)
    return X_epoch @ P

def baseline_features(X, fs):
    feats = []
    for lo,hi in BASELINE_BANDS:
        feats.append(_fft_band_energy(X, fs, lo, hi).mean())
    hj = _hjorth_params(X).mean(axis=0)
    feats += list(hj)
    bp_full = _fft_band_energy(X, fs, 0.5, 40.0)
    feats += [bp_full.mean(), float(np.sqrt((bp_full**2).mean()))]
    return np.array(feats, float)

def glyph_stack_features(X, fs):
    C_feats = []
    for band in GLYPH_BANDS:
        C = _cov_band(X, fs, band)
        Cn = np.linalg.norm(C, 'fro') + 1e-9
        for k in GLYPH_RANKS:
            A = _orthobasis(C.shape[0], k)
            C_feats.append(np.linalg.norm(A.T @ C @ A, 'fro') / Cn)
    return np.array(C_feats, float)

def model_fit_eval(train_X, train_y, test_X, test_y):
    clf = LogisticRegression(max_iter=200, solver="lbfgs")
    clf.fit(train_X, train_y)
    proba = clf.predict_proba(test_X)[:,1]
    preds = (proba >= 0.5).astype(int)
    return dict(
        auroc=float(roc_auc_score(test_y, proba)),
        acc=float(accuracy_score(test_y, preds)),
        proba=proba
    )

def nested_cv_scores(X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4242)
    aucs, accs = [], []
    for tr, te in skf.split(X, y):
        out = model_fit_eval(X[tr], y[tr], X[te], y[te])
        aucs.append(out["auroc"]); accs.append(out["acc"])
    return float(np.mean(aucs)), float(np.std(aucs)), float(np.mean(accs)), float(np.std(accs))

def perm_test_delta(train_X, train_y, test_X, test_y, baseline_auc, n_perm=PERM_N):
    rngp = np.random.default_rng(9090)
    aucs, deltas = [], []
    for i in range(n_perm):
        y_perm = train_y.copy()
        rngp.shuffle(y_perm)
        out = model_fit_eval(train_X, y_perm, test_X, test_y)
        aucs.append(out["auroc"])
        deltas.append(out["auroc"] - baseline_auc)
        if (i+1) % max(1, n_perm//10) == 0:
            say(f"    … permutations {i+1}/{n_perm}")
    return np.array(aucs), np.array(deltas)

# ----------------------- Data load or synthetic -------------------
try:
    say("[1/9] Checking for real CSVs...")
    hp, mp = Path("/mnt/data/HUMAN_EEG.csv"), Path("/mnt/data/MOUSE_EEG.csv")
    used_real = False
    if hp.exists() and mp.exists():
        say("    Found CSVs in /mnt/data (REAL mode).")
        dfh = pd.read_csv(hp); dfm = pd.read_csv(mp)
        assert 'label' in dfh.columns and 'label' in dfm.columns, "CSVs must include a 'label' column."
        y_h = dfh['label'].astype(int).values
        y_m = dfm['label'].astype(int).values
        X_h = dfh.drop(columns=['label']).values
        X_m = dfm.drop(columns=['label']).values
        L = int(FS*EPOCH_S)
        n_h = (X_h.shape[0]//L)*L; n_m = (X_m.shape[0]//L)*L
        X_h = X_h[:n_h].reshape(-1, L, X_h.shape[1]); y_h = y_h[:X_h.shape[0]]
        X_m = X_m[:n_m].reshape(-1, L, X_m.shape[1]); y_m = y_m[:X_m.shape[0]]
        Xh = [X_h[i] for i in range(X_h.shape[0])]
        Xm = [X_m[i] for i in range(X_m.shape[0])]
        yh, ym = y_h, y_m
        used_real = True
    else:
        say("    No CSVs found — using harder synthetic fallback (SYNTHETIC mode).")
        def synth_domain(n_epochs=240, n_channels=TARGET_CHANNEL_DIM, fs=FS, epoch_len=EPOCH_S,
                         eo_shift=0.6, domain_noise=0.5, seed=0, alpha_center=10.0, alpha_jitter=1.0):
            rngs = np.random.default_rng(seed)
            t = np.arange(int(fs*epoch_len))/fs
            base_freqs = [2,4,8,12,20]
            X_list, y = [], []
            for i in range(n_epochs):
                label = int(i%2)
                chs = []
                for _ in range(n_channels):
                    sig = np.zeros_like(t)
                    for f in base_freqs:
                        amp = rngs.normal(0.8, 0.25)
                        if label==1 and 8<=f<=12:
                            boost = eo_shift * np.exp(-0.5*((f - (alpha_center + rngs.normal(0,alpha_jitter)))/1.5)**2)
                            amp += boost
                        sig += amp*np.sin(2*np.pi*f*t + rngs.uniform(0,2*np.pi))
                    sig += rngs.normal(0, domain_noise*(0.6+0.4*np.sin(2*np.pi*0.2*t)), size=t.shape)
                    chs.append(sig)
                X = np.stack(chs, axis=1)
                X_list.append(X); y.append(label)
            return X_list, np.array(y, int)
        Xh, yh = synth_domain(seed=111, domain_noise=0.40)
        Xm, ym = synth_domain(seed=222, domain_noise=0.55)
    say(f"[2/9] Shapes → HUMAN: {len(Xh)} epochs | MOUSE: {len(Xm)} epochs")

    # ------------------- Featurize both domains -------------------
    def featurize_domain(X_list, y, fs=FS):
        G_list, B_list = [], []
        for idx, X in enumerate(X_list):
            Xp = map_channels_epoch(X, TARGET_CHANNEL_DIM)
            G_list.append(glyph_stack_features(Xp, fs))
            B_list.append(baseline_features(Xp, fs))
            if (idx+1) % max(1, len(X_list)//4) == 0:
                say(f"    … featurized {idx+1}/{len(X_list)}")
        G = np.asarray(G_list, float)
        B = np.asarray(B_list, float)
        mask = ~np.isnan(G).any(axis=1)
        return G[mask], B[mask], y[mask]

    say("[3/9] Featurizing HUMAN…"); Gh, Bh, yh = featurize_domain(Xh, yh)
    say("[4/9] Featurizing MOUSE…"); Gm, Bm, ym = featurize_domain(Xm, ym)

    # ------------------- Train/Test & CV --------------------------
    say("[5/9] Train on HUMAN, blind test on MOUSE…")
    res_g = model_fit_eval(Gh, yh, Gm, ym)
    res_b = model_fit_eval(Bh, yh, Bm, ym)

    say("[6/9] Nested CV on HUMAN…")
    cv_g = nested_cv_scores(Gh, yh); cv_b = nested_cv_scores(Bh, yh)

    # ------------------- Permutation test -------------------------
    say("[7/9] Permutation test (1000 perms)…")
    perm_aucs_g, perm_deltas = perm_test_delta(Gh, yh, Gm, ym, baseline_auc=res_b["auroc"], n_perm=PERM_N)
    p_auroc = (np.sum(perm_aucs_g >= res_g["auroc"]) + 1) / (PERM_N + 1)
    obs_delta = res_g["auroc"] - res_b["auroc"]
    p_delta = (np.sum(perm_deltas >= obs_delta) + 1) / (PERM_N + 1)

    # ------------------- Plots ------------------------------------
    say("[8/9] Plotting ROC and permutation null…")
    fpr_g, tpr_g, _ = roc_curve(ym, res_g["proba"])
    fpr_b, tpr_b, _ = roc_curve(ym, model_fit_eval(Bh, yh, Bm, ym)["proba"])

    plt.figure(figsize=(6,5))
    plt.plot(fpr_g, tpr_g, label=f"Glyph (AUROC={res_g['auroc']:.3f})")
    plt.plot(fpr_b, tpr_b, label=f"Baseline (AUROC={res_b['auroc']:.3f})")
    plt.plot([0,1],[0,1],'--')
    plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend(loc="lower right")
    plt.title("Cross-Domain ROC (Train: HUMAN, Test: MOUSE)")
    plt.tight_layout()
    roc_path = OUTDIR/"transfer_roc.png"; plt.savefig(roc_path, dpi=200); plt.close()

    plt.figure(figsize=(6,5))
    plt.hist(perm_aucs_g, bins=30, alpha=0.7)
    plt.axvline(res_g["auroc"], linestyle='--', linewidth=2)
    plt.xlabel("AUROC under label permutation (Glyph)")
    plt.ylabel("Count"); plt.title(f"Permutation Test (n={PERM_N}) | p={p_auroc:.4f}")
    plt.tight_layout()
    perm_path = OUTDIR/"perm_null.png"; plt.savefig(perm_path, dpi=200); plt.close()

    # ------------------- Reports (UTF-8) --------------------------
    say("[9/9] Writing reports + certificate…")
    results = {
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "used_real_data": bool(used_real),
        "fs_hz": FS,
        "epoch_s": EPOCH_S,
        "target_channel_dim": TARGET_CHANNEL_DIM,
        "n_human_epochs": int(Gh.shape[0]),
        "n_mouse_epochs": int(Gm.shape[0]),
        "transfer": {
            "glyph": {"auroc": float(res_g["auroc"]), "acc": float(res_g["acc"])},
            "baseline": {"auroc": float(res_b["auroc"]), "acc": float(res_b["acc"])},
            "delta_auroc": float(obs_delta),
            "perm_test": {
                "n_perm": PERM_N,
                "p_auroc": float(p_auroc),
                "p_delta": float(p_delta),
                "perm_mean_auroc": float(np.mean(perm_aucs_g)),
                "perm_std_auroc": float(np.std(perm_aucs_g)),
            }
        },
        "nested_cv_human": {
            "glyph": {"auroc_mean": float(cv_g[0]), "auroc_std": float(cv_g[1]), "acc_mean": float(cv_g[2]), "acc_std": float(cv_g[3])},
            "baseline": {"auroc_mean": float(cv_b[0]), "auroc_std": float(cv_b[1]), "acc_mean": float(cv_b[2]), "acc_std": float(cv_b[3])}
        },
        "prereg": {
            "primary_endpoint": "Cross-domain AUROC (Train: HUMAN, Test: MOUSE) for glyph > baseline",
            "null_hypothesis": "Glyph AUROC equals baseline under label permutation in training.",
            "alpha": 0.05,
            "test": "One-sided permutation on training labels; n=1000",
            "blinding": "Hyperparameters & projections fixed; test labels unseen; baseline fixed.",
            "decision_rule": "Reject H0 if p_delta < 0.05 AND glyph AUROC > 0.70."
        }
    }
    with open(OUTDIR/"results.json", "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    summary = f"""
    CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)
    ==================================================
    UTC: {results['timestamp']}
    Data mode: {"REAL" if results['used_real_data'] else "SYNTHETIC"}
    Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']}
    Target channel dim (fixed): {results['target_channel_dim']}

    TRANSFER (Train Human -> Test Mouse)
    - Glyph:    AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}
    - Baseline: AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}
    - Delta AUROC: {results['transfer']['delta_auroc']:.3f}

    Permutation Test (n={PERM_N})
    - p(AUROC >= observed)         : p={results['transfer']['perm_test']['p_auroc']:.4f}
    - p(Delta AUROC >= observed)   : p={results['transfer']['perm_test']['p_delta']:.4f}

    Nested CV on Human
    - Glyph:    AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}
    - Baseline: AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}

    PREREG DECISION RULE (LOCKED)
    Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.

    Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png, certificate_onecell.pdf
    """
    with open(OUTDIR/"summary.txt", "w", encoding="utf-8") as f:
        f.write(textwrap.dedent(summary))

    # ------------------- Certificate PDF (1 page) -----------------
    cert_path = OUTDIR/"certificate_onecell.pdf"
    with PdfPages(cert_path) as pdf:
        fig = plt.figure(figsize=(8.5, 11))  # US Letter portrait
        ax = fig.add_axes([0.07, 0.07, 0.86, 0.86]); ax.axis("off")

        title = "CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)"
        hdr = f"UTC: {results['timestamp']}   |   Data: {'REAL' if results['used_real_data'] else 'SYNTHETIC'}\n" \
              f"Human epochs: {results['n_human_epochs']}   |   Mouse epochs: {results['n_mouse_epochs']}   |   Target ch: {results['target_channel_dim']}"
        body = (
            f"TRANSFER (Train Human -> Test Mouse)\n"
            f"  Glyph:    AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}\n"
            f"  Baseline: AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}\n"
            f"  Delta AUROC: {results['transfer']['delta_auroc']:.3f}\n\n"
            f"Permutation Test (n={PERM_N})\n"
            f"  p(AUROC >= observed): {results['transfer']['perm_test']['p_auroc']:.4f}\n"
            f"  p(Delta AUROC >= observed): {results['transfer']['perm_test']['p_delta']:.4f}\n\n"
            f"Nested CV (Human)\n"
            f"  Glyph AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}\n"
            f"  Baseline AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}\n\n"
            "PREREG DECISION RULE (LOCKED)\n"
            "  Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer."
        )

        ax.text(0.5, 0.96, title, ha="center", va="top", fontsize=16, weight="bold")
        ax.text(0.5, 0.92, hdr, ha="center", va="top", fontsize=10)
        ax.text(0.05, 0.83, body, ha="left", va="top", fontsize=10, family="monospace")

        # Embed the two plots if present
        y0 = 0.10
        img_w, img_h = 0.42, 0.28
        try:
            import matplotlib.image as mpimg
            roc_img = mpimg.imread(roc_path)
            perm_img = mpimg.imread(perm_path)
            fig.add_axes([0.06, y0+0.18, img_w, img_h]).imshow(roc_img); plt.axis('off')
            fig.add_axes([0.52, y0+0.18, img_w, img_h]).imshow(perm_img); plt.axis('off')
            ax.text(0.06, y0+0.47, "ROC (Human→Mouse)", fontsize=9)
            ax.text(0.52, y0+0.47, "Permutation Null", fontsize=9)
        except Exception:
            ax.text(0.05, 0.45, "[Plots unavailable to embed]", fontsize=9)

        pdf.savefig(fig); plt.close(fig)

    # ------------------- Final console summary --------------------
    print(textwrap.dedent(summary).strip())
    print("\nArtifacts saved to:", OUTDIR)

except Exception as e:
    say("!!! ERROR — printing traceback:")
    traceback.print_exc()


[1/9] Checking for real CSVs...
    No CSVs found — using harder synthetic fallback (SYNTHETIC mode).
[2/9] Shapes → HUMAN: 240 epochs | MOUSE: 240 epochs
[3/9] Featurizing HUMAN…
    … featurized 60/240
    … featurized 120/240
    … featurized 180/240
    … featurized 240/240
[4/9] Featurizing MOUSE…
    … featurized 60/240
    … featurized 120/240
    … featurized 180/240
    … featurized 240/240
[5/9] Train on HUMAN, blind test on MOUSE…
[6/9] Nested CV on HUMAN…
[7/9] Permutation test (1000 perms)…
    … permutations 100/1000
    … permutations 200/1000
    … permutations 300/1000
    … permutations 400/1000
    … permutations 500/1000
    … permutations 600/1000
    … permutations 700/1000
    … permutations 800/1000
    … permutations 900/1000
    … permutations 1000/1000
[8/9] Plotting ROC and permutation null…
[9/9] Writing reports + certificate…
CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)
UTC: 2025-09-29T03:59:22.121250+00:00
Data mode: SYNTHETIC
Human epochs: 240 | Mo

In [11]:
def export_epochs(epochs, label_map, out_csv, fs_target=128.0, epoch_s=4.0):
    import numpy as np, pandas as pd
    if epochs.info['sfreq'] != fs_target:
        epochs = epochs.copy().resample(fs_target)
    L = int(fs_target*epoch_s)
    Xs, ys = [], []
    for i, e in enumerate(epochs):
        x = e[0]
        if x.shape[1] < L: continue
        Xs.append(x[:, :L].T)                  # (L, n_channels)
        ys.append(int(label_map.get(epochs.events[i,2], 0)))
    X = np.vstack(Xs); y = np.repeat(np.array(ys,int), L)
    df = pd.DataFrame(X); df["label"] = y
    df.to_csv(out_csv, index=False)


In [12]:
# === CNT CSV Sanity + Prereg ALL-IN-ONE Runner (single cell) ===
# - Verifies HUMAN_EEG.csv and MOUSE_EEG.csv
# - Checks shape, NaNs, label balance per-epoch (L=512 = 4s @ 128Hz)
# - If all good, runs the locked ALL-IN-ONE prereg (1000 perms) and writes artifacts.

import os, sys, json, textwrap, traceback
from pathlib import Path
from datetime import datetime, timezone
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from sklearn.model_selection import StratifiedKFold

def say(m): 
    print(m); sys.stdout.flush()

# ---------- Locate CSVs (Windows + Unix friendly) ----------
candidates = [Path(r"C:\mnt\data"), Path("/mnt/data"), Path.cwd()]
base = None
for p in candidates:
    if (p/"HUMAN_EEG.csv").exists() and (p/"MOUSE_EEG.csv").exists():
        base = p; break
if base is None:
    raise FileNotFoundError("Could not find HUMAN_EEG.csv and MOUSE_EEG.csv in C:\\mnt\\data or /mnt/data. Place both files there.")

hp, mp = base/"HUMAN_EEG.csv", base/"MOUSE_EEG.csv"
say(f"[SANITY] Found:\n  HUMAN: {hp}\n  MOUSE: {mp}")

# ---------- Quick sanity: shape, NaNs, labels, epoch partition ----------
L = 512  # 4 s @ 128 Hz (locked assumption downstream)
def sanity_check(path, name):
    df = pd.read_csv(path)
    if 'label' not in df.columns:
        raise ValueError(f"{name} is missing a 'label' column.")
    X = df.drop(columns=['label'])
    y = df['label'].astype(int).values
    n_samples, n_ch = X.shape[0], X.shape[1]
    nan_any = X.isna().to_numpy().any()
    rem = n_samples % L
    n_epochs = n_samples // L
    # Epoch-wise majority label (assumes label was repeated per-sample)
    lab_major = []
    for i in range(n_epochs):
        chunk = y[i*L:(i+1)*L]
        if chunk.size < L: break
        # majority vote
        ones = int(chunk.sum())
        lab_major.append(1 if ones >= (L-ones) else 0)
    p1 = np.mean(lab_major) if lab_major else float('nan')
    say(f"[SANITY] {name}: samples={n_samples}, channels={n_ch}, epochs={n_epochs}, remainder={rem}, NaNs={nan_any}, epoch-mean(label)≈{p1:.3f}")
    return n_ch, n_epochs, rem, nan_any

nch_h, ne_h, rem_h, nan_h = sanity_check(hp, "HUMAN")
nch_m, ne_m, rem_m, nan_m = sanity_check(mp, "MOUSE")

if rem_h or rem_m:
    say("[WARN] Samples not divisible by 512 (4s epochs). The runner will trim trailing samples.")
if nan_h or nan_m:
    say("[WARN] NaNs detected; consider cleaning or interpolating.")

# =================== RUN LOCKED ALL-IN-ONE PREREG (same as before) ===================
FS = 128.0
EPOCH_S = 4.0
TARGET_CHANNEL_DIM = 32
GLYPH_BANDS = [(6,9),(8,12),(10,14)]
GLYPH_RANKS = [4,8,12]
BASELINE_BANDS = [(1,4),(4,8),(8,12),(12,30)]
PERM_N = 1000
SEED_MASTER = 20250928

def pick_outdir():
    for p in [Path("/mnt/data"), Path(r"C:\mnt\data"), Path.cwd(), Path.home()/"Documents"]:
        try:
            p.mkdir(parents=True, exist_ok=True)
            (p/".touch_ok").write_text("ok", encoding="utf-8")
            (p/".touch_ok").unlink(missing_ok=True)
            return p
        except Exception:
            continue
    return Path.cwd()

OUTROOT = pick_outdir()
OUTDIR = OUTROOT / "CNT_OneCell_Groundbreaker"
OUTDIR.mkdir(parents=True, exist_ok=True)

def _fft_band_energy(X, fs, f_lo, f_hi):
    n = X.shape[0]
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    S = np.abs(np.fft.rfft(X, axis=0))**2 / n
    m = (freqs >= f_lo) & (freqs <= f_hi)
    return S[m].sum(axis=0)

def _cov_band(X, fs, band):
    n = X.shape[0]
    F = np.fft.rfft(X, axis=0)
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    m = (freqs >= band[0]) & (freqs <= band[1])
    Fm = np.zeros_like(F); Fm[m] = F[m]
    xb = np.fft.irfft(Fm, axis=0, n=n)
    Xc = xb - xb.mean(axis=0, keepdims=True)
    C = (Xc.T @ Xc) / max(1, (Xc.shape[0]-1))
    return C

def _hjorth_params(X):
    dX = np.diff(X, axis=0)
    var_x = X.var(axis=0)
    var_dx = dX.var(axis=0) if dX.size else np.zeros(X.shape[1])
    mob = np.sqrt(np.divide(var_dx, var_x, out=np.zeros_like(var_x), where=var_x>0))
    ddX = np.diff(dX, axis=0)
    var_ddx = ddX.var(axis=0) if ddX.size else np.zeros(X.shape[1])
    mob_dx = np.sqrt(np.divide(var_ddx, var_dx, out=np.zeros_like(var_dx), where=var_dx>0))
    comp = np.divide(mob_dx, mob, out=np.zeros_like(mob), where=mob>0)
    return np.vstack([var_x, mob, comp]).T

def _orthobasis(channels, k, seed=13579):
    rng_local = np.random.default_rng(seed + channels + k)
    Q, _ = np.linalg.qr(rng_local.normal(size=(channels, channels)))
    return Q[:, :k]

def fixed_projection_matrix(ch_in, ch_out, seed=SEED_MASTER):
    rng = np.random.default_rng(seed + 31*ch_in + 7*ch_out)
    A = rng.normal(size=(ch_in, ch_out))
    Q, _ = np.linalg.qr(A)
    return Q[:, :ch_out]

def map_channels_epoch(X_epoch, target_dim=TARGET_CHANNEL_DIM):
    ch_in = X_epoch.shape[1]
    if ch_in == target_dim: return X_epoch
    P = fixed_projection_matrix(ch_in, target_dim)
    return X_epoch @ P

def baseline_features(X, fs):
    feats = []
    for lo,hi in BASELINE_BANDS:
        feats.append(_fft_band_energy(X, fs, lo, hi).mean())
    hj = _hjorth_params(X).mean(axis=0)
    feats += list(hj)
    bp_full = _fft_band_energy(X, fs, 0.5, 40.0)
    feats += [bp_full.mean(), float(np.sqrt((bp_full**2).mean()))]
    return np.array(feats, float)

def glyph_stack_features(X, fs):
    C_feats = []
    for band in GLYPH_BANDS:
        C = _cov_band(X, fs, band)
        Cn = np.linalg.norm(C, 'fro') + 1e-9
        for k in GLYPH_RANKS:
            A = _orthobasis(C.shape[0], k)
            C_feats.append(np.linalg.norm(A.T @ C @ A, 'fro') / Cn)
    return np.array(C_feats, float)

def model_fit_eval(train_X, train_y, test_X, test_y):
    clf = LogisticRegression(max_iter=200, solver="lbfgs")
    clf.fit(train_X, train_y)
    proba = clf.predict_proba(test_X)[:,1]
    preds = (proba >= 0.5).astype(int)
    return dict(
        auroc=float(roc_auc_score(test_y, proba)),
        acc=float(accuracy_score(test_y, preds)),
        proba=proba
    )

def nested_cv_scores(X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4242)
    aucs, accs = [], []
    for tr, te in skf.split(X, y):
        out = model_fit_eval(X[tr], y[tr], X[te], y[te])
        aucs.append(out["auroc"]); accs.append(out["acc"])
    return float(np.mean(aucs)), float(np.std(aucs)), float(np.mean(accs)), float(np.std(accs))

def perm_test_delta(train_X, train_y, test_X, test_y, baseline_auc, n_perm=1000):
    rngp = np.random.default_rng(9090)
    aucs, deltas = [], []
    for i in range(n_perm):
        y_perm = train_y.copy()
        rngp.shuffle(y_perm)
        out = model_fit_eval(train_X, y_perm, test_X, test_y)
        aucs.append(out["auroc"])
        deltas.append(out["auroc"] - baseline_auc)
        if (i+1) % max(1, n_perm//10) == 0:
            say(f"    … permutations {i+1}/{n_perm}")
    return np.array(aucs), np.array(deltas)

# ---------- Load, epoch, and run ----------
say("[RUN] Loading CSVs…")
dfh = pd.read_csv(hp); dfm = pd.read_csv(mp)
Xh_all = dfh.drop(columns=['label']).values; yh_all = dfh['label'].astype(int).values
Xm_all = dfm.drop(columns=['label']).values; ym_all = dfm['label'].astype(int).values

# Trim to whole epochs
n_h = (Xh_all.shape[0]//L)*L; n_m = (Xm_all.shape[0]//L)*L
Xh_all, yh_all = Xh_all[:n_h], yh_all[:n_h]
Xm_all, ym_all = Xm_all[:n_m], ym_all[:n_m]

# Reshape to epochs
Xh = Xh_all.reshape(-1, L, Xh_all.shape[1]); yh = yh_all[:Xh.shape[0]]
Xm = Xm_all.reshape(-1, L, Xm_all.shape[1]); ym = ym_all[:Xm.shape[0]]

say(f"[RUN] Epochs → HUMAN={Xh.shape[0]} | MOUSE={Xm.shape[0]} | CH={Xh.shape[2]} vs {Xm.shape[2]}")

def featurize_domain(X_ep, y):
    G_list, B_list = [], []
    for i in range(X_ep.shape[0]):
        Xp = map_channels_epoch(X_ep[i], TARGET_CHANNEL_DIM)
        G_list.append(glyph_stack_features(Xp, FS))
        B_list.append(baseline_features(Xp, FS))
        if (i+1) % max(1, X_ep.shape[0]//4) == 0:
            say(f"    … featurized {i+1}/{X_ep.shape[0]}")
    G = np.asarray(G_list, float); B = np.asarray(B_list, float)
    mask = ~np.isnan(G).any(axis=1)
    return G[mask], B[mask], y[mask]

say("[RUN] Featurizing HUMAN…"); Gh, Bh, yh = featurize_domain(Xh, yh)
say("[RUN] Featurizing MOUSE…"); Gm, Bm, ym = featurize_domain(Xm, ym)

say("[RUN] Train on HUMAN, blind test on MOUSE…")
res_g = model_fit_eval(Gh, yh, Gm, ym)
res_b = model_fit_eval(Bh, yh, Bm, ym)

say("[RUN] Nested CV on HUMAN…")
cv_g = nested_cv_scores(Gh, yh); cv_b = nested_cv_scores(Bh, yh)

say("[RUN] Permutation test (n=1000)…")
perm_aucs_g, perm_deltas = perm_test_delta(Gh, yh, Gm, ym, baseline_auc=res_b["auroc"], n_perm=1000)
p_auroc = (np.sum(perm_aucs_g >= res_g["auroc"]) + 1) / (1000 + 1)
obs_delta = res_g["auroc"] - res_b["auroc"]
p_delta = (np.sum(perm_deltas >= obs_delta) + 1) / (1000 + 1)

# Plots
plt.figure(figsize=(6,5))
fpr_g, tpr_g, _ = roc_curve(ym, res_g["proba"])
fpr_b, tpr_b, _ = roc_curve(ym, model_fit_eval(Bh, yh, Bm, ym)["proba"])
plt.plot(fpr_g, tpr_g, label=f"Glyph (AUROC={res_g['auroc']:.3f})")
plt.plot(fpr_b, tpr_b, label=f"Baseline (AUROC={res_b['auroc']:.3f})")
plt.plot([0,1],[0,1],'--')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend(loc="lower right")
plt.title("Cross-Domain ROC (Train: HUMAN, Test: MOUSE)")
plt.tight_layout()
roc_path = OUTDIR/"transfer_roc.png"; plt.savefig(roc_path, dpi=200); plt.close()

plt.figure(figsize=(6,5))
plt.hist(perm_aucs_g, bins=30, alpha=0.7)
plt.axvline(res_g["auroc"], linestyle='--', linewidth=2)
plt.xlabel("AUROC under label permutation (Glyph)")
plt.ylabel("Count"); plt.title(f"Permutation Test (n=1000) | p={p_auroc:.4f}")
plt.tight_layout()
perm_path = OUTDIR/"perm_null.png"; plt.savefig(perm_path, dpi=200); plt.close()

# Reports + certificate
results = {
    "timestamp": datetime.now(timezone.utc).isoformat(),
    "used_real_data": True,
    "fs_hz": FS, "epoch_s": 4.0, "target_channel_dim": TARGET_CHANNEL_DIM,
    "n_human_epochs": int(Gh.shape[0]), "n_mouse_epochs": int(Gm.shape[0]),
    "transfer": {
        "glyph": {"auroc": float(res_g["auroc"]), "acc": float(res_g["acc"])},
        "baseline": {"auroc": float(res_b["auroc"]), "acc": float(res_b["acc"])},
        "delta_auroc": float(obs_delta),
        "perm_test": {"n_perm": 1000, "p_auroc": float(p_auroc), "p_delta": float(p_delta),
                      "perm_mean_auroc": float(np.mean(perm_aucs_g)), "perm_std_auroc": float(np.std(perm_aucs_g))},
    },
    "nested_cv_human": {
        "glyph": {"auroc_mean": float(cv_g[0]), "auroc_std": float(cv_g[1]), "acc_mean": float(cv_g[2]), "acc_std": float(cv_g[3])},
        "baseline": {"auroc_mean": float(cv_b[0]), "auroc_std": float(cv_b[1]), "acc_mean": float(cv_b[2]), "acc_std": float(cv_b[3])}
    },
    "prereg": {
        "primary_endpoint": "Cross-domain AUROC (Train: HUMAN, Test: MOUSE) for glyph > baseline",
        "null_hypothesis": "Glyph AUROC equals baseline under label permutation in training.",
        "alpha": 0.05, "test": "One-sided permutation on training labels; n=1000",
        "blinding": "Hyperparameters & projections fixed; test labels unseen; baseline fixed.",
        "decision_rule": "Reject H0 if p_delta < 0.05 AND glyph AUROC > 0.70."
    }
}
with open(OUTDIR/"results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

summary = f"""
CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)
==================================================
UTC: {results['timestamp']}
Data mode: REAL
Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']}
Target channel dim (fixed): {results['target_channel_dim']}

TRANSFER (Train Human -> Test Mouse)
- Glyph:    AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}
- Baseline: AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}
- Delta AUROC: {results['transfer']['delta_auroc']:.3f}

Permutation Test (n=1000)
- p(AUROC >= observed)         : p={results['transfer']['perm_test']['p_auroc']:.4f}
- p(Delta AUROC >= observed)   : p={results['transfer']['perm_test']['p_delta']:.4f}

Nested CV on Human
- Glyph:    AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}
- Baseline: AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}

PREREG DECISION RULE (LOCKED)
Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.

Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png, certificate_onecell.pdf
"""

with open(OUTDIR/"summary.txt", "w", encoding="utf-8") as f:
    f.write(textwrap.dedent(summary))

cert_path = OUTDIR/"certificate_onecell.pdf"
with PdfPages(cert_path) as pdf:
    fig = plt.figure(figsize=(8.5, 11)); ax = fig.add_axes([0.07, 0.07, 0.86, 0.86]); ax.axis("off")
    title = "CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)"
    hdr = f"UTC: {results['timestamp']} | Data: REAL | Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']} | Target ch: {results['target_channel_dim']}"
    body = (
        f"TRANSFER (Train Human -> Test Mouse)\n"
        f"  Glyph AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}\n"
        f"  Baseline AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}\n"
        f"  Δ AUROC={results['transfer']['delta_auroc']:.3f}\n\n"
        f"Permutation Test (n=1000)\n"
        f"  p(AUROC ≥ observed)={results['transfer']['perm_test']['p_auroc']:.4f}\n"
        f"  p(Δ AUROC ≥ observed)={results['transfer']['perm_test']['p_delta']:.4f}\n\n"
        f"Nested CV (Human)\n"
        f"  Glyph AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}\n"
        f"  Baseline AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}\n\n"
        "PREREG DECISION RULE (LOCKED): Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer."
    )
    ax.text(0.5, 0.96, title, ha="center", va="top", fontsize=16, weight="bold")
    ax.text(0.5, 0.92, hdr, ha="center", va="top", fontsize=10)
    ax.text(0.05, 0.82, body, ha="left", va="top", fontsize=10, family="monospace")

    # embed plots if present
    try:
        import matplotlib.image as mpimg
        roc_img = mpimg.imread(OUTDIR/"transfer_roc.png")
        perm_img = mpimg.imread(OUTDIR/"perm_null.png")
        fig.add_axes([0.06, 0.48, 0.40, 0.26]).imshow(roc_img); plt.axis('off')
        fig.add_axes([0.54, 0.48, 0.40, 0.26]).imshow(perm_img); plt.axis('off')
        ax.text(0.06, 0.75, "ROC (Human→Mouse)", fontsize=9)
        ax.text(0.54, 0.75, "Permutation Null", fontsize=9)
    except Exception:
        ax.text(0.05, 0.50, "[Plots unavailable to embed]", fontsize=9)

    pdf.savefig(fig); plt.close(fig)

print(textwrap.dedent(summary).strip())
print("\nArtifacts saved to:", OUTDIR)


FileNotFoundError: Could not find HUMAN_EEG.csv and MOUSE_EEG.csv in C:\mnt\data or /mnt/data. Place both files there.

In [13]:
# === CNT One-Cell Groundbreaker — Path-Smart ALL-IN-ONE (locked) ===
# 1) Try user-provided paths; else auto-search common folders for HUMAN/MOUSE CSVs
# 2) Sanity check (shape/NaNs/epoching) + Locked prereg run (1000 perms) + plots + PDF certificate

import os, sys, json, textwrap, traceback, re
from pathlib import Path
from datetime import datetime, timezone
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from sklearn.model_selection import StratifiedKFold

def say(m): print(m); sys.stdout.flush()

# --------- EDIT THESE IF YOU KNOW THE EXACT PATHS ---------
USER_HUMAN_CSV = r""  # e.g., r"C:\Users\caleb\Documents\EEG\HUMAN_EEG.csv"
USER_MOUSE_CSV = r""  # e.g., r"C:\Users\caleb\Documents\EEG\MOUSE_EEG.csv"
# ----------------------------------------------------------

# --------- Discover CSVs ----------
def discover_csvs():
    # 1) Use user-provided if present
    if USER_HUMAN_CSV and USER_MOUSE_CSV:
        hp = Path(USER_HUMAN_CSV); mp = Path(USER_MOUSE_CSV)
        if hp.exists() and mp.exists(): return hp, mp

    # 2) Try common roots
    candidates = [
        Path(r"C:\mnt\data"), Path("/mnt/data"),
        Path.cwd(),
        Path.home() / "Documents",
        Path.home() / "Downloads",
        Path.home() / "Desktop",
    ]
    found = []
    for root in candidates:
        if not root.exists(): continue
        # shallow + one level deep to stay fast
        for p in list(root.glob("*.csv")) + list(root.glob("**/*.csv")):
            name_low = p.name.lower()
            score = 0
            if "eeg" in name_low: score += 2
            if "human" in name_low: score += 3
            if "mouse" in name_low or "mice" in name_low or "rat" in name_low: score += 3
            if "label" in name_low: score += 1
            size_mb = p.stat().st_size / (1024*1024)
            found.append((score, size_mb, p))

    if not found:
        return None, None

    # Rank by heuristic score, then size
    found.sort(key=lambda t: (t[0], t[1]), reverse=True)

    # Heuristic pairing: pick best "human-like" and "mouse-like"
    human_like = [p for s,sz,p in found if re.search(r"human|subj|participant", p.name.lower())]
    mouse_like = [p for s,sz,p in found if re.search(r"mouse|mice|rat", p.name.lower())]

    if human_like and mouse_like:
        return human_like[0], mouse_like[0]

    # Fallback: show top candidates and bail so user can set the paths
    say("[DISCOVER] Could not confidently match human vs mouse. Here are top CSV candidates:")
    for s, sz, p in found[:10]:
        say(f"  score={s:>2} | {sz:5.1f} MB | {p}")
    return None, None

hp, mp = discover_csvs()
if hp is None or mp is None:
    raise FileNotFoundError(
        "Could not automatically locate HUMAN_EEG.csv and MOUSE_EEG.csv.\n"
        "→ Fix: set USER_HUMAN_CSV and USER_MOUSE_CSV at the top of this cell to the exact file paths\n"
        "   (or place them in C:\\mnt\\data or /mnt/data and re-run)."
    )

say(f"[FOUND] HUMAN: {hp}")
say(f"[FOUND] MOUSE: {mp}")

# --------- Sanity checks ----------
FS = 128.0
EPOCH_S = 4.0
L = int(FS*EPOCH_S)  # 512 samples/epoch
def sanity_check(path, name):
    df = pd.read_csv(path)
    if 'label' not in df.columns:
        raise ValueError(f"{name} missing 'label' column.")
    X = df.drop(columns=['label'])
    y = df['label'].astype(int).values
    n_samples, n_ch = X.shape
    rem = n_samples % L
    n_epochs = n_samples // L
    nan_any = X.isna().to_numpy().any()
    # epoch-majority label (expects label repeated per sample)
    maj = []
    for i in range(n_epochs):
        seg = y[i*L:(i+1)*L]
        if len(seg) < L: break
        maj.append(int(seg.mean() >= 0.5))
    p1 = float(np.mean(maj)) if maj else float('nan')
    say(f"[SANITY] {name}: samples={n_samples}, ch={n_ch}, epochs={n_epochs}, remainder={rem}, NaNs={nan_any}, epoch-mean(label)≈{p1:.3f}")
    return X.values, y, n_ch

Xh_all, yh_all, ch_h = sanity_check(hp, "HUMAN")
Xm_all, ym_all, ch_m = sanity_check(mp, "MOUSE")

# Trim to whole epochs and reshape
n_h = (Xh_all.shape[0]//L)*L; n_m = (Xm_all.shape[0]//L)*L
Xh_all, yh_all = Xh_all[:n_h], yh_all[:n_h]
Xm_all, ym_all = Xm_all[:n_m], ym_all[:n_m]
Xh = Xh_all.reshape(-1, L, ch_h); yh = yh_all[:Xh.shape[0]]
Xm = Xm_all.reshape(-1, L, ch_m); ym = ym_all[:Xm.shape[0]]

say(f"[EPOCHS] HUMAN={Xh.shape[0]} | MOUSE={Xm.shape[0]} | CH={ch_h} vs {ch_m}")

# --------- Locked prereg constants ----------
TARGET_CHANNEL_DIM = 32
GLYPH_BANDS = [(6,9),(8,12),(10,14)]
GLYPH_RANKS = [4,8,12]
BASELINE_BANDS = [(1,4),(4,8),(8,12),(12,30)]
PERM_N = 1000
SEED_MASTER = 20250928

def pick_outdir():
    for p in [Path(r"C:\mnt\data"), Path("/mnt/data"), Path.cwd(), Path.home()/"Documents"]:
        try:
            p.mkdir(parents=True, exist_ok=True)
            (p/".touch_ok").write_text("ok", encoding="utf-8")
            (p/".touch_ok").unlink(missing_ok=True)
            return p
        except Exception:
            continue
    return Path.cwd()
OUTROOT = pick_outdir()
OUTDIR = OUTROOT / "CNT_OneCell_Groundbreaker"
OUTDIR.mkdir(parents=True, exist_ok=True)

# --------- Feature helpers (locked) ----------
def _fft_band_energy(X, fs, f_lo, f_hi):
    n = X.shape[0]
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    S = np.abs(np.fft.rfft(X, axis=0))**2 / n
    m = (freqs >= f_lo) & (freqs <= f_hi)
    return S[m].sum(axis=0)

def _cov_band(X, fs, band):
    n = X.shape[0]
    F = np.fft.rfft(X, axis=0)
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    m = (freqs >= band[0]) & (freqs <= band[1])
    Fm = np.zeros_like(F); Fm[m] = F[m]
    xb = np.fft.irfft(Fm, axis=0, n=n)
    Xc = xb - xb.mean(axis=0, keepdims=True)
    C = (Xc.T @ Xc) / max(1, (Xc.shape[0]-1))
    return C

def _hjorth_params(X):
    dX = np.diff(X, axis=0)
    var_x = X.var(axis=0)
    var_dx = dX.var(axis=0) if dX.size else np.zeros(X.shape[1])
    mob = np.sqrt(np.divide(var_dx, var_x, out=np.zeros_like(var_x), where=var_x>0))
    ddX = np.diff(dX, axis=0)
    var_ddx = ddX.var(axis=0) if ddX.size else np.zeros(X.shape[1])
    mob_dx = np.sqrt(np.divide(var_ddx, var_dx, out=np.zeros_like(var_dx), where=var_dx>0))
    comp = np.divide(mob_dx, mob, out=np.zeros_like(mob), where=mob>0)
    return np.vstack([var_x, mob, comp]).T

def _orthobasis(channels, k, seed=13579):
    rng_local = np.random.default_rng(seed + channels + k)
    Q, _ = np.linalg.qr(rng_local.normal(size=(channels, channels)))
    return Q[:, :k]

def fixed_projection_matrix(ch_in, ch_out, seed=SEED_MASTER):
    rng = np.random.default_rng(seed + 31*ch_in + 7*ch_out)
    A = rng.normal(size=(ch_in, ch_out))
    Q, _ = np.linalg.qr(A)
    return Q[:, :ch_out]

def map_channels_epoch(X_epoch, target_dim=TARGET_CHANNEL_DIM):
    ch_in = X_epoch.shape[1]
    if ch_in == target_dim: return X_epoch
    P = fixed_projection_matrix(ch_in, target_dim)
    return X_epoch @ P

def baseline_features(X, fs):
    feats = []
    for lo,hi in BASELINE_BANDS:
        feats.append(_fft_band_energy(X, fs, lo, hi).mean())
    hj = _hjorth_params(X).mean(axis=0)
    feats += list(hj)
    bp_full = _fft_band_energy(X, FS, 0.5, 40.0)
    feats += [bp_full.mean(), float(np.sqrt((bp_full**2).mean()))]
    return np.array(feats, float)

def glyph_stack_features(X, fs):
    C_feats = []
    for band in GLYPH_BANDS:
        C = _cov_band(X, fs, band)
        Cn = np.linalg.norm(C, 'fro') + 1e-9
        for k in GLYPH_RANKS:
            A = _orthobasis(C.shape[0], k)
            C_feats.append(np.linalg.norm(A.T @ C @ A, 'fro') / Cn)
    return np.array(C_feats, float)

def model_fit_eval(train_X, train_y, test_X, test_y):
    clf = LogisticRegression(max_iter=200, solver="lbfgs")
    clf.fit(train_X, train_y)
    proba = clf.predict_proba(test_X)[:,1]
    preds = (proba >= 0.5).astype(int)
    return dict(auroc=float(roc_auc_score(test_y, proba)),
                acc=float(accuracy_score(test_y, preds)),
                proba=proba)

def nested_cv_scores(X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4242)
    aucs, accs = [], []
    for tr, te in skf.split(X, y):
        out = model_fit_eval(X[tr], y[tr], X[te], y[te])
        aucs.append(out["auroc"]); accs.append(out["acc"])
    return float(np.mean(aucs)), float(np.std(aucs)), float(np.mean(accs)), float(np.std(accs))

def perm_test_delta(train_X, train_y, test_X, test_y, baseline_auc, n_perm=1000):
    rngp = np.random.default_rng(9090)
    aucs, deltas = [], []
    for i in range(n_perm):
        y_perm = train_y.copy(); rngp.shuffle(y_perm)
        out = model_fit_eval(train_X, y_perm, test_X, test_y)
        aucs.append(out["auroc"]); deltas.append(out["auroc"] - baseline_auc)
        if (i+1) % max(1, n_perm//10) == 0:
            say(f"    … permutations {i+1}/{n_perm}")
    return np.array(aucs), np.array(deltas)

# --------- Featurize & run ---------
def featurize_domain(X_ep, y):
    G_list, B_list = [], []
    for i in range(X_ep.shape[0]):
        Xp = map_channels_epoch(X_ep[i], TARGET_CHANNEL_DIM)
        G_list.append(glyph_stack_features(Xp, FS))
        B_list.append(baseline_features(Xp, FS))
        if (i+1) % max(1, X_ep.shape[0]//4) == 0:
            say(f"    … featurized {i+1}/{X_ep.shape[0]}")
    G = np.asarray(G_list, float); B = np.asarray(B_list, float)
    mask = ~np.isnan(G).any(axis=1)
    return G[mask], B[mask], y[mask]

say("[RUN] Featurizing HUMAN…"); Gh, Bh, yh = featurize_domain(Xh, yh)
say("[RUN] Featurizing MOUSE…"); Gm, Bm, ym = featurize_domain(Xm, ym)

say("[RUN] Train on HUMAN, blind test on MOUSE…")
res_g = model_fit_eval(Gh, yh, Gm, ym)
res_b = model_fit_eval(Bh, yh, Bm, ym)

say("[RUN] Nested CV on HUMAN…")
cv_g = nested_cv_scores(Gh, yh); cv_b = nested_cv_scores(Bh, yh)

say("[RUN] Permutation test (n=1000)…")
perm_aucs_g, perm_deltas = perm_test_delta(Gh, yh, Gm, ym, baseline_auc=res_b["auroc"], n_perm=1000)
p_auroc = (np.sum(perm_aucs_g >= res_g["auroc"]) + 1) / (1000 + 1)
obs_delta = res_g["auroc"] - res_b["auroc"]
p_delta = (np.sum(perm_deltas >= obs_delta) + 1) / (1000 + 1)

# --------- Plots ---------
def pick_outdir_for_write():
    for p in [Path(r"C:\mnt\data"), Path("/mnt/data"), Path.cwd(), Path.home()/"Documents"]:
        try:
            p.mkdir(parents=True, exist_ok=True)
            return p
        except Exception:
            continue
    return Path.cwd()

OUTROOT = pick_outdir_for_write()
OUTDIR = OUTROOT / "CNT_OneCell_Groundbreaker"
OUTDIR.mkdir(parents=True, exist_ok=True)

plt.figure(figsize=(6,5))
fpr_g, tpr_g, _ = roc_curve(ym, res_g["proba"])
fpr_b, tpr_b, _ = roc_curve(ym, model_fit_eval(Bh, yh, Bm, ym)["proba"])
plt.plot(fpr_g, tpr_g, label=f"Glyph (AUROC={res_g['auroc']:.3f})")
plt.plot(fpr_b, tpr_b, label=f"Baseline (AUROC={res_b['auroc']:.3f})")
plt.plot([0,1],[0,1],'--')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend(loc="lower right")
plt.title("Cross-Domain ROC (Train: HUMAN, Test: MOUSE)")
plt.tight_layout()
roc_path = OUTDIR/"transfer_roc.png"; plt.savefig(roc_path, dpi=200); plt.close()

plt.figure(figsize=(6,5))
plt.hist(perm_aucs_g, bins=30, alpha=0.7)
plt.axvline(res_g["auroc"], linestyle='--', linewidth=2)
plt.xlabel("AUROC under label permutation (Glyph)")
plt.ylabel("Count"); plt.title(f"Permutation Test (n=1000) | p={p_auroc:.4f}")
plt.tight_layout()
perm_path = OUTDIR/"perm_null.png"; plt.savefig(perm_path, dpi=200); plt.close()

# --------- Reports + Certificate ---------
results = {
    "timestamp": datetime.now(timezone.utc).isoformat(),
    "used_real_data": True,
    "fs_hz": FS, "epoch_s": EPOCH_S, "target_channel_dim": TARGET_CHANNEL_DIM,
    "n_human_epochs": int(Gh.shape[0]), "n_mouse_epochs": int(Gm.shape[0]),
    "transfer": {
        "glyph": {"auroc": float(res_g["auroc"]), "acc": float(res_g["acc"])},
        "baseline": {"auroc": float(res_b["auroc"]), "acc": float(res_b["acc"])},
        "delta_auroc": float(obs_delta),
        "perm_test": {"n_perm": 1000, "p_auroc": float(p_auroc), "p_delta": float(p_delta),
                      "perm_mean_auroc": float(np.mean(perm_aucs_g)), "perm_std_auroc": float(np.std(perm_aucs_g))},
    },
    "nested_cv_human": {
        "glyph": {"auroc_mean": float(cv_g[0]), "auroc_std": float(cv_g[1]), "acc_mean": float(cv_g[2]), "acc_std": float(cv_g[3])},
        "baseline": {"auroc_mean": float(cv_b[0]), "auroc_std": float(cv_b[1]), "acc_mean": float(cv_b[2]), "acc_std": float(cv_b[3])}
    },
    "prereg": {
        "primary_endpoint": "Cross-domain AUROC (Train: HUMAN, Test: MOUSE) for glyph > baseline",
        "null_hypothesis": "Glyph AUROC equals baseline under label permutation in training.",
        "alpha": 0.05, "test": "One-sided permutation on training labels; n=1000",
        "blinding": "Hyperparameters & projections fixed; test labels unseen; baseline fixed.",
        "decision_rule": "Reject H0 if p_delta < 0.05 AND glyph AUROC > 0.70."
    }
}
with open(OUTDIR/"results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

summary = f"""
CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)
==================================================
UTC: {results['timestamp']}
Data mode: REAL
Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']}
Target channel dim (fixed): {results['target_channel_dim']}

TRANSFER (Train Human -> Test Mouse)
- Glyph:    AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}
- Baseline: AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}
- Delta AUROC: {results['transfer']['delta_auroc']:.3f}

Permutation Test (n=1000)
- p(AUROC >= observed)         : p={results['transfer']['perm_test']['p_auroc']:.4f}
- p(Delta AUROC >= observed)   : p={results['transfer']['perm_test']['p_delta']:.4f}

Nested CV on Human
- Glyph:    AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}
- Baseline: AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}

PREREG DECISION RULE (LOCKED)
Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.

Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png, certificate_onecell.pdf
"""
with open(OUTDIR/"summary.txt", "w", encoding="utf-8") as f:
    f.write(textwrap.dedent(summary))

cert_path = OUTDIR/"certificate_onecell.pdf"
with PdfPages(cert_path) as pdf:
    fig = plt.figure(figsize=(8.5, 11)); ax = fig.add_axes([0.07, 0.07, 0.86, 0.86]); ax.axis("off")
    title = "CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)"
    hdr = f"UTC: {results['timestamp']} | Data: REAL | Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']} | Target ch: {results['target_channel_dim']}"
    body = (
        f"TRANSFER (Train Human -> Test Mouse)\n"
        f"  Glyph AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}\n"
        f"  Baseline AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}\n"
        f"  Δ AUROC={results['transfer']['delta_auroc']:.3f}\n\n"
        f"Permutation Test (n=1000)\n"
        f"  p(AUROC ≥ observed)={results['transfer']['perm_test']['p_auroc']:.4f}\n"
        f"  p(Δ AUROC ≥ observed)={results['transfer']['perm_test']['p_delta']:.4f}\n\n"
        f"Nested CV (Human)\n"
        f"  Glyph AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}\n"
        f"  Baseline AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}\n\n"
        "PREREG DECISION RULE (LOCKED): Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer."
    )
    ax.text(0.5, 0.96, title, ha="center", va="top", fontsize=16, weight="bold")
    ax.text(0.5, 0.92, hdr, ha="center", va="top", fontsize=10)
    ax.text(0.05, 0.82, body, ha="left", va="top", fontsize=10, family="monospace")
    # embed plots if present
    try:
        import matplotlib.image as mpimg
        roc_img = mpimg.imread(OUTDIR/"transfer_roc.png")
        perm_img = mpimg.imread(OUTDIR/"perm_null.png")
        fig.add_axes([0.06, 0.48, 0.40, 0.26]).imshow(roc_img); plt.axis('off')
        fig.add_axes([0.54, 0.48, 0.40, 0.26]).imshow(perm_img); plt.axis('off')
        ax.text(0.06, 0.75, "ROC (Human→Mouse)", fontsize=9)
        ax.text(0.54, 0.75, "Permutation Null", fontsize=9)
    except Exception:
        ax.text(0.05, 0.50, "[Plots unavailable to embed]", fontsize=9)
    pdf.savefig(fig); plt.close(fig)

print(textwrap.dedent(summary).strip())
print("\nArtifacts saved to:", OUTDIR)


[DISCOVER] Could not confidently match human vs mouse. Here are top CSV candidates:
  score= 1 |   0.0 MB | C:\Users\caleb\Downloads\CNT-20250827T002504Z-1-001\CNT\releases\null_label_permute_pvals.csv
  score= 1 |   0.0 MB | C:\Users\caleb\Downloads\CNT-20250827T002504Z-1-001\CNT\releases\label_noise_power.csv
  score= 0 |  20.0 MB | C:\Users\caleb\cnt_genome\out\CNT_genomic_resonance_scored_v2.csv
  score= 0 |  17.3 MB | C:\Users\caleb\cnt_genome\out\CNT_genomic_resonance_scored.csv
  score= 0 |  12.8 MB | C:\Users\caleb\cnt_genome\out\CNT_genomic_resonance_map.csv
  score= 0 |   1.8 MB | C:\Users\caleb\cnt_genome\out\mini_atlas_lipids\table.csv
  score= 0 |   1.8 MB | C:\Users\caleb\cnt_genome\brainwaves\sim_theta.csv
  score= 0 |   1.8 MB | C:\Users\caleb\cnt_genome\brainwaves\sim_alpha.csv
  score= 0 |   0.8 MB | C:\Users\caleb\Downloads\CNT_Replication_Pack_v1\CNT_Replication_Pack_v1\sample_data\sample_signals.csv
  score= 0 |   0.4 MB | C:\Users\caleb\cnt_genome\cog_alphabet_phy

FileNotFoundError: Could not automatically locate HUMAN_EEG.csv and MOUSE_EEG.csv.
→ Fix: set USER_HUMAN_CSV and USER_MOUSE_CSV at the top of this cell to the exact file paths
   (or place them in C:\mnt\data or /mnt/data and re-run).

In [14]:
# === CNT One-Cell Groundbreaker — Interactive Picker + Locked Prereg Runner ===
# 1) Scans common dirs for CSVs; prints a numbered list with quick diagnostics
# 2) You enter the index for HUMAN and MOUSE
# 3) Runs the locked prereg (Human→Mouse, 1000 perms), writes artifacts & PDF certificate

import os, sys, json, textwrap, traceback, re
from pathlib import Path
from datetime import datetime, timezone
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from sklearn.model_selection import StratifiedKFold

def say(m): print(m); sys.stdout.flush()

# --------- 1) Discover CSVs (shallow + one level deep) ----------
roots = [
    Path(r"C:\mnt\data"), Path("/mnt/data"),
    Path.cwd(),
    Path.home()/"Desktop",
    Path.home()/"Documents",
    Path.home()/"Downloads",
]
seen = set()
candidates = []

def list_csvs(root):
    try:
        items = list(root.glob("*.csv")) + list(root.glob("*/*.csv"))
    except Exception:
        items = []
    return items

say("[DISCOVER] Scanning for CSVs…")
for root in roots:
    if not root.exists(): continue
    for p in list_csvs(root):
        try:
            if p.resolve() in seen: continue
            seen.add(p.resolve())
            size_mb = p.stat().st_size/(1024*1024)
            # Peek columns/snippet quickly (robust to large files)
            try:
                head = pd.read_csv(p, nrows=5)
                cols = list(head.columns[:6])
                has_label = ("label" in head.columns)
            except Exception:
                cols = ["<unreadable>"]; has_label = False
            name = p.name.lower()
            score = 0
            if "eeg" in name: score += 2
            if any(k in name for k in ["human","subj","participant","s01","s1"]): score += 2
            if any(k in name for k in ["mouse","mice","rat"]): score += 2
            if has_label: score += 1
            candidates.append({"path": p, "size_mb": size_mb, "cols": cols, "has_label": has_label, "score": score})
        except Exception:
            pass

if not candidates:
    raise FileNotFoundError("No CSVs found in common folders. If you know the file paths, move/copy them to C:\\mnt\\data and name them HUMAN_EEG.csv / MOUSE_EEG.csv.")

# Sort by score then size
candidates.sort(key=lambda d: (d["score"], d["size_mb"]), reverse=True)

say("\n[DISCOVER] Top CSV candidates:")
for i, c in enumerate(candidates[:40]):
    say(f"{i:>2}: score={c['score']} | {c['size_mb']:5.1f} MB | label={c['has_label']} | cols={c['cols']} | {c['path']}")

# --------- 2) Pick HUMAN & MOUSE by index ----------
def pick_idx(prompt):
    while True:
        s = input(prompt).strip()
        if not s:
            print("Please enter a number shown in the list above.")
            continue
        try:
            idx = int(s)
            if 0 <= idx < len(candidates):
                return idx
            else:
                print(f"Out of range. Enter 0–{len(candidates)-1}.")
        except ValueError:
            print("Not a number. Try again.")

print("\nChoose one index for HUMAN and one for MOUSE from the list above.")
h_idx = pick_idx("HUMAN index: ")
m_idx = pick_idx("MOUSE index: ")

hp = candidates[h_idx]["path"]
mp = candidates[m_idx]["path"]
say(f"\n[CHOICE] HUMAN → {hp}")
say(f"[CHOICE] MOUSE → {mp}")

# --------- 3) Sanity + Locked Prereg Runner ----------
FS = 128.0
EPOCH_S = 4.0
L = int(FS*EPOCH_S)  # 512 samples/epoch
TARGET_CHANNEL_DIM = 32
GLYPH_BANDS = [(6,9),(8,12),(10,14)]
GLYPH_RANKS = [4,8,12]
BASELINE_BANDS = [(1,4),(4,8),(8,12),(12,30)]
PERM_N = 1000
SEED_MASTER = 20250928

def pick_outdir():
    for p in [Path(r"C:\mnt\data"), Path("/mnt/data"), Path.cwd(), Path.home()/"Documents"]:
        try:
            p.mkdir(parents=True, exist_ok=True)
            (p/".touch_ok").write_text("ok", encoding="utf-8")
            (p/".touch_ok").unlink(missing_ok=True)
            return p
        except Exception:
            continue
    return Path.cwd()

OUTROOT = pick_outdir()
OUTDIR = OUTROOT / "CNT_OneCell_Groundbreaker"
OUTDIR.mkdir(parents=True, exist_ok=True)

def _fft_band_energy(X, fs, f_lo, f_hi):
    n = X.shape[0]
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    S = np.abs(np.fft.rfft(X, axis=0))**2 / n
    m = (freqs >= f_lo) & (freqs <= f_hi)
    return S[m].sum(axis=0)

def _cov_band(X, fs, band):
    n = X.shape[0]
    F = np.fft.rfft(X, axis=0)
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    m = (freqs >= band[0]) & (freqs <= band[1])
    Fm = np.zeros_like(F); Fm[m] = F[m]
    xb = np.fft.irfft(Fm, axis=0, n=n)
    Xc = xb - xb.mean(axis=0, keepdims=True)
    C = (Xc.T @ Xc) / max(1, (Xc.shape[0]-1))
    return C

def _hjorth_params(X):
    dX = np.diff(X, axis=0)
    var_x = X.var(axis=0)
    var_dx = dX.var(axis=0) if dX.size else np.zeros(X.shape[1])
    mob = np.sqrt(np.divide(var_dx, var_x, out=np.zeros_like(var_x), where=var_x>0))
    ddX = np.diff(dX, axis=0)
    var_ddx = ddX.var(axis=0) if dX.size else np.zeros(X.shape[1])
    mob_dx = np.sqrt(np.divide(var_ddx, var_dx, out=np.zeros_like(var_dx), where=var_dx>0))
    comp = np.divide(mob_dx, mob, out=np.zeros_like(mob), where=mob>0)
    return np.vstack([var_x, mob, comp]).T

def _orthobasis(channels, k, seed=13579):
    rng_local = np.random.default_rng(seed + channels + k)
    Q, _ = np.linalg.qr(rng_local.normal(size=(channels, channels)))
    return Q[:, :k]

def fixed_projection_matrix(ch_in, ch_out, seed=SEED_MASTER):
    rng = np.random.default_rng(seed + 31*ch_in + 7*ch_out)
    A = rng.normal(size=(ch_in, ch_out))
    Q, _ = np.linalg.qr(A)
    return Q[:, :ch_out]

def map_channels_epoch(X_epoch, target_dim=TARGET_CHANNEL_DIM):
    ch_in = X_epoch.shape[1]
    if ch_in == target_dim: return X_epoch
    P = fixed_projection_matrix(ch_in, target_dim)
    return X_epoch @ P

def baseline_features(X, fs):
    feats = []
    for lo,hi in BASELINE_BANDS:
        feats.append(_fft_band_energy(X, fs, lo, hi).mean())
    hj = _hjorth_params(X).mean(axis=0)
    feats += list(hj)
    bp_full = _fft_band_energy(X, FS, 0.5, 40.0)
    feats += [bp_full.mean(), float(np.sqrt((bp_full**2).mean()))]
    return np.array(feats, float)

def glyph_stack_features(X, fs):
    C_feats = []
    for band in GLYPH_BANDS:
        C = _cov_band(X, fs, band)
        Cn = np.linalg.norm(C, 'fro') + 1e-9
        for k in GLYPH_RANKS:
            A = _orthobasis(C.shape[0], k)
            C_feats.append(np.linalg.norm(A.T @ C @ A, 'fro') / Cn)
    return np.array(C_feats, float)

def model_fit_eval(train_X, train_y, test_X, test_y):
    clf = LogisticRegression(max_iter=200, solver="lbfgs")
    clf.fit(train_X, train_y)
    proba = clf.predict_proba(test_X)[:,1]
    preds = (proba >= 0.5).astype(int)
    return dict(auroc=float(roc_auc_score(test_y, proba)),
                acc=float(accuracy_score(test_y, preds)),
                proba=proba)

def nested_cv_scores(X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4242)
    aucs, accs = [], []
    for tr, te in skf.split(X, y):
        out = model_fit_eval(X[tr], y[tr], X[te], y[te])
        aucs.append(out["auroc"]); accs.append(out["acc"])
    return float(np.mean(aucs)), float(np.std(aucs)), float(np.mean(accs)), float(np.std(accs))

def perm_test_delta(train_X, train_y, test_X, test_y, baseline_auc, n_perm=1000):
    rngp = np.random.default_rng(9090)
    aucs, deltas = [], []
    for i in range(n_perm):
        y_perm = train_y.copy(); rngp.shuffle(y_perm)
        out = model_fit_eval(train_X, y_perm, test_X, test_y)
        aucs.append(out["auroc"]); deltas.append(out["auroc"] - baseline_auc)
        if (i+1) % max(1, n_perm//10) == 0:
            say(f"    … permutations {i+1}/{n_perm}")
    return np.array(aucs), np.array(deltas)

# Epoch trim/reshape
def reshape_epochs(csv_path, name):
    df = pd.read_csv(csv_path)
    if 'label' not in df.columns:
        raise ValueError(f"{name} is missing a 'label' column.")
    X = df.drop(columns=['label']).values
    y = df['label'].astype(int).values
    n = (X.shape[0]//L)*L
    X = X[:n]; y = y[:n]
    X = X.reshape(-1, L, X.shape[1])
    y = y[:X.shape[0]]
    say(f"[EPOCHS] {name}: epochs={X.shape[0]} | channels={X.shape[2]}")
    return X, y

Xh, yh = reshape_epochs(hp, "HUMAN")
Xm, ym = reshape_epochs(mp, "MOUSE")

def featurize_domain(X_ep, y):
    G_list, B_list = [], []
    for i in range(X_ep.shape[0]):
        Xp = map_channels_epoch(X_ep[i], TARGET_CHANNEL_DIM)
        G_list.append(glyph_stack_features(Xp, FS))
        B_list.append(baseline_features(Xp, FS))
        if (i+1) % max(1, X_ep.shape[0]//4) == 0:
            say(f"    … featurized {i+1}/{X_ep.shape[0]}")
    G = np.asarray(G_list, float); B = np.asarray(B_list, float)
    mask = ~np.isnan(G).any(axis=1)
    return G[mask], B[mask], y[mask]

say("[RUN] Featurizing HUMAN…"); Gh, Bh, yh = featurize_domain(Xh, yh)
say("[RUN] Featurizing MOUSE…"); Gm, Bm, ym = featurize_domain(Xm, ym)

say("[RUN] Train on HUMAN, blind test on MOUSE…")
res_g = model_fit_eval(Gh, yh, Gm, ym)
res_b = model_fit_eval(Bh, yh, Bm, ym)

say("[RUN] Nested CV on HUMAN…")
cv_g = nested_cv_scores(Gh, yh); cv_b = nested_cv_scores(Bh, yh)

say("[RUN] Permutation test (n=1000)…")
perm_aucs_g, perm_deltas = perm_test_delta(Gh, yh, Gm, ym, baseline_auc=res_b["auroc"], n_perm=1000)
p_auroc = (np.sum(perm_aucs_g >= res_g["auroc"]) + 1) / (1000 + 1)
obs_delta = res_g["auroc"] - res_b["auroc"]
p_delta = (np.sum(perm_deltas >= obs_delta) + 1) / (1000 + 1)

# Plots & outputs
def outdir():
    for p in [Path(r"C:\mnt\data"), Path("/mnt/data"), Path.cwd(), Path.home()/"Documents"]:
        try:
            p.mkdir(parents=True, exist_ok=True)
            return p
        except Exception:
            continue
    return Path.cwd()
OUTROOT = outdir()
OUTDIR = OUTROOT / "CNT_OneCell_Groundbreaker"; OUTDIR.mkdir(parents=True, exist_ok=True)

plt.figure(figsize=(6,5))
fpr_g, tpr_g, _ = roc_curve(ym, res_g["proba"])
fpr_b, tpr_b, _ = roc_curve(ym, model_fit_eval(Bh, yh, Bm, ym)["proba"])
plt.plot(fpr_g, tpr_g, label=f"Glyph (AUROC={res_g['auroc']:.3f})")
plt.plot(fpr_b, tpr_b, label=f"Baseline (AUROC={res_b['auroc']:.3f})")
plt.plot([0,1],[0,1],'--')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend(loc="lower right")
plt.title("Cross-Domain ROC (Train: HUMAN, Test: MOUSE)")
plt.tight_layout()
roc_path = OUTDIR/"transfer_roc.png"; plt.savefig(roc_path, dpi=200); plt.close()

plt.figure(figsize=(6,5))
plt.hist(perm_aucs_g, bins=30, alpha=0.7)
plt.axvline(res_g["auroc"], linestyle='--', linewidth=2)
plt.xlabel("AUROC under label permutation (Glyph)")
plt.ylabel("Count"); plt.title(f"Permutation Test (n=1000) | p={p_auroc:.4f}")
plt.tight_layout()
perm_path = OUTDIR/"perm_null.png"; plt.savefig(perm_path, dpi=200); plt.close()

results = {
    "timestamp": datetime.now(timezone.utc).isoformat(),
    "used_real_data": True,
    "fs_hz": 128.0, "epoch_s": 4.0, "target_channel_dim": 32,
    "n_human_epochs": int(Gh.shape[0]), "n_mouse_epochs": int(Gm.shape[0]),
    "transfer": {
        "glyph": {"auroc": float(res_g["auroc"]), "acc": float(res_g["acc"])},
        "baseline": {"auroc": float(res_b["auroc"]), "acc": float(res_b["acc"])},
        "delta_auroc": float(obs_delta),
        "perm_test": {"n_perm": 1000, "p_auroc": float(p_auroc), "p_delta": float(p_delta),
                      "perm_mean_auroc": float(np.mean(perm_aucs_g)), "perm_std_auroc": float(np.std(perm_aucs_g))},
    },
    "nested_cv_human": {
        "glyph": {"auroc_mean": float(cv_g[0]), "auroc_std": float(cv_g[1]), "acc_mean": float(cv_g[2]), "acc_std": float(cv_g[3])},
        "baseline": {"auroc_mean": float(cv_b[0]), "auroc_std": float(cv_b[1]), "acc_mean": float(cv_b[2]), "acc_std": float(cv_b[3])}
    },
    "prereg": {
        "primary_endpoint": "Cross-domain AUROC (Train: HUMAN, Test: MOUSE) for glyph > baseline",
        "null_hypothesis": "Glyph AUROC equals baseline under label permutation in training.",
        "alpha": 0.05, "test": "One-sided permutation on training labels; n=1000",
        "blinding": "Hyperparameters & projections fixed; test labels unseen; baseline fixed.",
        "decision_rule": "Reject H0 if p_delta < 0.05 AND glyph AUROC > 0.70."
    }
}
with open(OUTDIR/"results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

summary = f"""
CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)
==================================================
UTC: {results['timestamp']}
Data mode: REAL
Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']}
Target channel dim (fixed): {results['target_channel_dim']}

TRANSFER (Train Human -> Test Mouse)
- Glyph:    AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}
- Baseline: AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}
- Delta AUROC: {results['transfer']['delta_auroc']:.3f}

Permutation Test (n=1000)
- p(AUROC >= observed)         : p={results['transfer']['perm_test']['p_auroc']:.4f}
- p(Delta AUROC >= observed)   : p={results['transfer']['perm_test']['p_delta']:.4f}

Nested CV on Human
- Glyph:    AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}
- Baseline: AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}

PREREG DECISION RULE (LOCKED)
Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.

Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png, certificate_onecell.pdf
"""
with open(OUTDIR/"summary.txt", "w", encoding="utf-8") as f:
    f.write(textwrap.dedent(summary))

cert_path = OUTDIR/"certificate_onecell.pdf"
with PdfPages(cert_path) as pdf:
    fig = plt.figure(figsize=(8.5, 11)); ax = fig.add_axes([0.07, 0.07, 0.86, 0.86]); ax.axis("off")
    title = "CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)"
    hdr = f"UTC: {results['timestamp']} | Data: REAL | Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']} | Target ch: {results['target_channel_dim']}"
    body = (
        f"TRANSFER (Train Human -> Test Mouse)\n"
        f"  Glyph AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}\n"
        f"  Baseline AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}\n"
        f"  Δ AUROC={results['transfer']['delta_auroc']:.3f}\n\n"
        f"Permutation Test (n=1000)\n"
        f"  p(AUROC ≥ observed)={results['transfer']['perm_test']['p_auroc']:.4f}\n"
        f"  p(Δ AUROC ≥ observed)={results['transfer']['perm_test']['p_delta']:.4f}\n\n"
        f"Nested CV (Human)\n"
        f"  Glyph AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}\n"
        f"  Baseline AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}\n\n"
        "PREREG DECISION RULE (LOCKED): Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer."
    )
    ax.text(0.5, 0.96, title, ha="center", va="top", fontsize=16, weight="bold")
    ax.text(0.5, 0.92, hdr, ha="center", va="top", fontsize=10)
    ax.text(0.05, 0.82, body, ha="left", va="top", fontsize=10, family="monospace")
    # embed plots
    try:
        import matplotlib.image as mpimg
        roc_img = mpimg.imread(OUTDIR/"transfer_roc.png")
        perm_img = mpimg.imread(OUTDIR/"perm_null.png")
        fig.add_axes([0.06, 0.48, 0.40, 0.26]).imshow(roc_img); plt.axis('off')
        fig.add_axes([0.54, 0.48, 0.40, 0.26]).imshow(perm_img); plt.axis('off')
        ax.text(0.06, 0.75, "ROC (Human→Mouse)", fontsize=9)
        ax.text(0.54, 0.75, "Permutation Null", fontsize=9)
    except Exception:
        ax.text(0.05, 0.50, "[Plots unavailable to embed]", fontsize=9)
    pdf.savefig(fig); plt.close(fig)

print(textwrap.dedent(summary).strip())
print("\nArtifacts saved to:", OUTDIR)


[DISCOVER] Scanning for CSVs…

[DISCOVER] Top CSV candidates:
 0: score=2 |   0.0 MB | label=False | cols=['dataset', 'alpha_band', 'tuned_wpli', 'tuned_residual', 'tuned_lag95', 'PASS_resid'] | C:\Users\caleb\Downloads\S010R03_microPASS_20250821_073457.csv
 1: score=2 |   0.0 MB | label=False | cols=['dataset', 'alpha_band', 'tuned_wpli', 'tuned_residual', 'tuned_lag95', 'PASS_resid'] | C:\Users\caleb\Downloads\S010R03_lastmile_20250821_074258.csv
 2: score=1 |   0.0 MB | label=True | cols=['t_start_s', 't_end_s', 'mu_ERD_med', 'mu_ERD_iqr', 'beta_ERD_med', 'beta_ERD_iqr'] | C:\Users\caleb\cnt_genome\cog_alphabet_eval_groundtruth\ME_ROI_ERD_epochs_S001R03.csv
 3: score=0 |  20.0 MB | label=False | cols=['rsid', 'Chromosome', 'pos', 'trait', 'ccre_id', 'gene_name'] | C:\Users\caleb\cnt_genome\out\CNT_genomic_resonance_scored_v2.csv
 4: score=0 |  17.3 MB | label=False | cols=['rsid', 'Chromosome', 'pos', 'trait', 'ccre_id', 'gene_id'] | C:\Users\caleb\cnt_genome\out\CNT_genomic_resonan

HUMAN index:  


Please enter a number shown in the list above.


HUMAN index:  


Please enter a number shown in the list above.


HUMAN index:  2
MOUSE index:  38



[CHOICE] HUMAN → C:\Users\caleb\cnt_genome\cog_alphabet_eval_groundtruth\ME_ROI_ERD_epochs_S001R03.csv
[CHOICE] MOUSE → C:\Users\caleb\Downloads\CNT_hard_datasets_autocal_v2_20250821_054331.csv


ValueError: invalid literal for int() with base 10: 'rest'

In [None]:
# === CNT One-Cell Groundbreaker — Interactive Picker + Locked Prereg Runner ===
# 1) Scans common dirs for CSVs; prints a numbered list with quick diagnostics
# 2) You enter the index for HUMAN and MOUSE
# 3) Runs the locked prereg (Human→Mouse, 1000 perms), writes artifacts & PDF certificate

import os, sys, json, textwrap, traceback, re
from pathlib import Path
from datetime import datetime, timezone
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from sklearn.model_selection import StratifiedKFold

def say(m): print(m); sys.stdout.flush()

# --------- 1) Discover CSVs (shallow + one level deep) ----------
roots = [
    Path(r"C:\mnt\data"), Path("/mnt/data"),
    Path.cwd(),
    Path.home()/"Desktop",
    Path.home()/"Documents",
    Path.home()/"Downloads",
]
seen = set()
candidates = []

def list_csvs(root):
    try:
        items = list(root.glob("*.csv")) + list(root.glob("*/*.csv"))
    except Exception:
        items = []
    return items

say("[DISCOVER] Scanning for CSVs…")
for root in roots:
    if not root.exists(): continue
    for p in list_csvs(root):
        try:
            if p.resolve() in seen: continue
            seen.add(p.resolve())
            size_mb = p.stat().st_size/(1024*1024)
            # Peek columns/snippet quickly (robust to large files)
            try:
                head = pd.read_csv(p, nrows=5)
                cols = list(head.columns[:6])
                has_label = ("label" in head.columns)
            except Exception:
                cols = ["<unreadable>"]; has_label = False
            name = p.name.lower()
            score = 0
            if "eeg" in name: score += 2
            if any(k in name for k in ["human","subj","participant","s01","s1"]): score += 2
            if any(k in name for k in ["mouse","mice","rat"]): score += 2
            if has_label: score += 1
            candidates.append({"path": p, "size_mb": size_mb, "cols": cols, "has_label": has_label, "score": score})
        except Exception:
            pass

if not candidates:
    raise FileNotFoundError("No CSVs found in common folders. If you know the file paths, move/copy them to C:\\mnt\\data and name them HUMAN_EEG.csv / MOUSE_EEG.csv.")

# Sort by score then size
candidates.sort(key=lambda d: (d["score"], d["size_mb"]), reverse=True)

say("\n[DISCOVER] Top CSV candidates:")
for i, c in enumerate(candidates[:40]):
    say(f"{i:>2}: score={c['score']} | {c['size_mb']:5.1f} MB | label={c['has_label']} | cols={c['cols']} | {c['path']}")

# --------- 2) Pick HUMAN & MOUSE by index ----------
def pick_idx(prompt):
    while True:
        s = input(prompt).strip()
        if not s:
            print("Please enter a number shown in the list above.")
            continue
        try:
            idx = int(s)
            if 0 <= idx < len(candidates):
                return idx
            else:
                print(f"Out of range. Enter 0–{len(candidates)-1}.")
        except ValueError:
            print("Not a number. Try again.")

print("\nChoose one index for HUMAN and one for MOUSE from the list above.")
h_idx = pick_idx("HUMAN index: ")
m_idx = pick_idx("MOUSE index: ")

hp = candidates[h_idx]["path"]
mp = candidates[m_idx]["path"]
say(f"\n[CHOICE] HUMAN → {hp}")
say(f"[CHOICE] MOUSE → {mp}")

# --------- 3) Sanity + Locked Prereg Runner ----------
FS = 128.0
EPOCH_S = 4.0
L = int(FS*EPOCH_S)  # 512 samples/epoch
TARGET_CHANNEL_DIM = 32
GLYPH_BANDS = [(6,9),(8,12),(10,14)]
GLYPH_RANKS = [4,8,12]
BASELINE_BANDS = [(1,4),(4,8),(8,12),(12,30)]
PERM_N = 1000
SEED_MASTER = 20250928

def pick_outdir():
    for p in [Path(r"C:\mnt\data"), Path("/mnt/data"), Path.cwd(), Path.home()/"Documents"]:
        try:
            p.mkdir(parents=True, exist_ok=True)
            (p/".touch_ok").write_text("ok", encoding="utf-8")
            (p/".touch_ok").unlink(missing_ok=True)
            return p
        except Exception:
            continue
    return Path.cwd()

OUTROOT = pick_outdir()
OUTDIR = OUTROOT / "CNT_OneCell_Groundbreaker"
OUTDIR.mkdir(parents=True, exist_ok=True)

def _fft_band_energy(X, fs, f_lo, f_hi):
    n = X.shape[0]
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    S = np.abs(np.fft.rfft(X, axis=0))**2 / n
    m = (freqs >= f_lo) & (freqs <= f_hi)
    return S[m].sum(axis=0)

def _cov_band(X, fs, band):
    n = X.shape[0]
    F = np.fft.rfft(X, axis=0)
    freqs = np.fft.rfftfreq(n, d=1.0/fs)
    m = (freqs >= band[0]) & (freqs <= band[1])
    Fm = np.zeros_like(F); Fm[m] = F[m]
    xb = np.fft.irfft(Fm, axis=0, n=n)
    Xc = xb - xb.mean(axis=0, keepdims=True)
    C = (Xc.T @ Xc) / max(1, (Xc.shape[0]-1))
    return C

def _hjorth_params(X):
    dX = np.diff(X, axis=0)
    var_x = X.var(axis=0)
    var_dx = dX.var(axis=0) if dX.size else np.zeros(X.shape[1])
    mob = np.sqrt(np.divide(var_dx, var_x, out=np.zeros_like(var_x), where=var_x>0))
    ddX = np.diff(dX, axis=0)
    var_ddx = ddX.var(axis=0) if dX.size else np.zeros(X.shape[1])
    mob_dx = np.sqrt(np.divide(var_ddx, var_dx, out=np.zeros_like(var_dx), where=var_dx>0))
    comp = np.divide(mob_dx, mob, out=np.zeros_like(mob), where=mob>0)
    return np.vstack([var_x, mob, comp]).T

def _orthobasis(channels, k, seed=13579):
    rng_local = np.random.default_rng(seed + channels + k)
    Q, _ = np.linalg.qr(rng_local.normal(size=(channels, channels)))
    return Q[:, :k]

def fixed_projection_matrix(ch_in, ch_out, seed=SEED_MASTER):
    rng = np.random.default_rng(seed + 31*ch_in + 7*ch_out)
    A = rng.normal(size=(ch_in, ch_out))
    Q, _ = np.linalg.qr(A)
    return Q[:, :ch_out]

def map_channels_epoch(X_epoch, target_dim=TARGET_CHANNEL_DIM):
    ch_in = X_epoch.shape[1]
    if ch_in == target_dim: return X_epoch
    P = fixed_projection_matrix(ch_in, target_dim)
    return X_epoch @ P

def baseline_features(X, fs):
    feats = []
    for lo,hi in BASELINE_BANDS:
        feats.append(_fft_band_energy(X, fs, lo, hi).mean())
    hj = _hjorth_params(X).mean(axis=0)
    feats += list(hj)
    bp_full = _fft_band_energy(X, FS, 0.5, 40.0)
    feats += [bp_full.mean(), float(np.sqrt((bp_full**2).mean()))]
    return np.array(feats, float)

def glyph_stack_features(X, fs):
    C_feats = []
    for band in GLYPH_BANDS:
        C = _cov_band(X, fs, band)
        Cn = np.linalg.norm(C, 'fro') + 1e-9
        for k in GLYPH_RANKS:
            A = _orthobasis(C.shape[0], k)
            C_feats.append(np.linalg.norm(A.T @ C @ A, 'fro') / Cn)
    return np.array(C_feats, float)

def model_fit_eval(train_X, train_y, test_X, test_y):
    clf = LogisticRegression(max_iter=200, solver="lbfgs")
    clf.fit(train_X, train_y)
    proba = clf.predict_proba(test_X)[:,1]
    preds = (proba >= 0.5).astype(int)
    return dict(auroc=float(roc_auc_score(test_y, proba)),
                acc=float(accuracy_score(test_y, preds)),
                proba=proba)

def nested_cv_scores(X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4242)
    aucs, accs = [], []
    for tr, te in skf.split(X, y):
        out = model_fit_eval(X[tr], y[tr], X[te], y[te])
        aucs.append(out["auroc"]); accs.append(out["acc"])
    return float(np.mean(aucs)), float(np.std(aucs)), float(np.mean(accs)), float(np.std(accs))

def perm_test_delta(train_X, train_y, test_X, test_y, baseline_auc, n_perm=1000):
    rngp = np.random.default_rng(9090)
    aucs, deltas = [], []
    for i in range(n_perm):
        y_perm = train_y.copy(); rngp.shuffle(y_perm)
        out = model_fit_eval(train_X, y_perm, test_X, test_y)
        aucs.append(out["auroc"]); deltas.append(out["auroc"] - baseline_auc)
        if (i+1) % max(1, n_perm//10) == 0:
            say(f"    … permutations {i+1}/{n_perm}")
    return np.array(aucs), np.array(deltas)

# Epoch trim/reshape
def reshape_epochs(csv_path, name):
    df = pd.read_csv(csv_path)
    if 'label' not in df.columns:
        raise ValueError(f"{name} is missing a 'label' column.")
    X = df.drop(columns=['label']).values
    y = df['label'].astype(int).values
    n = (X.shape[0]//L)*L
    X = X[:n]; y = y[:n]
    X = X.reshape(-1, L, X.shape[1])
    y = y[:X.shape[0]]
    say(f"[EPOCHS] {name}: epochs={X.shape[0]} | channels={X.shape[2]}")
    return X, y

Xh, yh = reshape_epochs(hp, "HUMAN")
Xm, ym = reshape_epochs(mp, "MOUSE")

def featurize_domain(X_ep, y):
    G_list, B_list = [], []
    for i in range(X_ep.shape[0]):
        Xp = map_channels_epoch(X_ep[i], TARGET_CHANNEL_DIM)
        G_list.append(glyph_stack_features(Xp, FS))
        B_list.append(baseline_features(Xp, FS))
        if (i+1) % max(1, X_ep.shape[0]//4) == 0:
            say(f"    … featurized {i+1}/{X_ep.shape[0]}")
    G = np.asarray(G_list, float); B = np.asarray(B_list, float)
    mask = ~np.isnan(G).any(axis=1)
    return G[mask], B[mask], y[mask]

say("[RUN] Featurizing HUMAN…"); Gh, Bh, yh = featurize_domain(Xh, yh)
say("[RUN] Featurizing MOUSE…"); Gm, Bm, ym = featurize_domain(Xm, ym)

say("[RUN] Train on HUMAN, blind test on MOUSE…")
res_g = model_fit_eval(Gh, yh, Gm, ym)
res_b = model_fit_eval(Bh, yh, Bm, ym)

say("[RUN] Nested CV on HUMAN…")
cv_g = nested_cv_scores(Gh, yh); cv_b = nested_cv_scores(Bh, yh)

say("[RUN] Permutation test (n=1000)…")
perm_aucs_g, perm_deltas = perm_test_delta(Gh, yh, Gm, ym, baseline_auc=res_b["auroc"], n_perm=1000)
p_auroc = (np.sum(perm_aucs_g >= res_g["auroc"]) + 1) / (1000 + 1)
obs_delta = res_g["auroc"] - res_b["auroc"]
p_delta = (np.sum(perm_deltas >= obs_delta) + 1) / (1000 + 1)

# Plots & outputs
def outdir():
    for p in [Path(r"C:\mnt\data"), Path("/mnt/data"), Path.cwd(), Path.home()/"Documents"]:
        try:
            p.mkdir(parents=True, exist_ok=True)
            return p
        except Exception:
            continue
    return Path.cwd()
OUTROOT = outdir()
OUTDIR = OUTROOT / "CNT_OneCell_Groundbreaker"; OUTDIR.mkdir(parents=True, exist_ok=True)

plt.figure(figsize=(6,5))
fpr_g, tpr_g, _ = roc_curve(ym, res_g["proba"])
fpr_b, tpr_b, _ = roc_curve(ym, model_fit_eval(Bh, yh, Bm, ym)["proba"])
plt.plot(fpr_g, tpr_g, label=f"Glyph (AUROC={res_g['auroc']:.3f})")
plt.plot(fpr_b, tpr_b, label=f"Baseline (AUROC={res_b['auroc']:.3f})")
plt.plot([0,1],[0,1],'--')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend(loc="lower right")
plt.title("Cross-Domain ROC (Train: HUMAN, Test: MOUSE)")
plt.tight_layout()
roc_path = OUTDIR/"transfer_roc.png"; plt.savefig(roc_path, dpi=200); plt.close()

plt.figure(figsize=(6,5))
plt.hist(perm_aucs_g, bins=30, alpha=0.7)
plt.axvline(res_g["auroc"], linestyle='--', linewidth=2)
plt.xlabel("AUROC under label permutation (Glyph)")
plt.ylabel("Count"); plt.title(f"Permutation Test (n=1000) | p={p_auroc:.4f}")
plt.tight_layout()
perm_path = OUTDIR/"perm_null.png"; plt.savefig(perm_path, dpi=200); plt.close()

results = {
    "timestamp": datetime.now(timezone.utc).isoformat(),
    "used_real_data": True,
    "fs_hz": 128.0, "epoch_s": 4.0, "target_channel_dim": 32,
    "n_human_epochs": int(Gh.shape[0]), "n_mouse_epochs": int(Gm.shape[0]),
    "transfer": {
        "glyph": {"auroc": float(res_g["auroc"]), "acc": float(res_g["acc"])},
        "baseline": {"auroc": float(res_b["auroc"]), "acc": float(res_b["acc"])},
        "delta_auroc": float(obs_delta),
        "perm_test": {"n_perm": 1000, "p_auroc": float(p_auroc), "p_delta": float(p_delta),
                      "perm_mean_auroc": float(np.mean(perm_aucs_g)), "perm_std_auroc": float(np.std(perm_aucs_g))},
    },
    "nested_cv_human": {
        "glyph": {"auroc_mean": float(cv_g[0]), "auroc_std": float(cv_g[1]), "acc_mean": float(cv_g[2]), "acc_std": float(cv_g[3])},
        "baseline": {"auroc_mean": float(cv_b[0]), "auroc_std": float(cv_b[1]), "acc_mean": float(cv_b[2]), "acc_std": float(cv_b[3])}
    },
    "prereg": {
        "primary_endpoint": "Cross-domain AUROC (Train: HUMAN, Test: MOUSE) for glyph > baseline",
        "null_hypothesis": "Glyph AUROC equals baseline under label permutation in training.",
        "alpha": 0.05, "test": "One-sided permutation on training labels; n=1000",
        "blinding": "Hyperparameters & projections fixed; test labels unseen; baseline fixed.",
        "decision_rule": "Reject H0 if p_delta < 0.05 AND glyph AUROC > 0.70."
    }
}
with open(OUTDIR/"results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

summary = f"""
CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)
==================================================
UTC: {results['timestamp']}
Data mode: REAL
Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']}
Target channel dim (fixed): {results['target_channel_dim']}

TRANSFER (Train Human -> Test Mouse)
- Glyph:    AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}
- Baseline: AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}
- Delta AUROC: {results['transfer']['delta_auroc']:.3f}

Permutation Test (n=1000)
- p(AUROC >= observed)         : p={results['transfer']['perm_test']['p_auroc']:.4f}
- p(Delta AUROC >= observed)   : p={results['transfer']['perm_test']['p_delta']:.4f}

Nested CV on Human
- Glyph:    AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}
- Baseline: AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}

PREREG DECISION RULE (LOCKED)
Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer.

Artifacts: results.json, summary.txt, transfer_roc.png, perm_null.png, certificate_onecell.pdf
"""
with open(OUTDIR/"summary.txt", "w", encoding="utf-8") as f:
    f.write(textwrap.dedent(summary))

cert_path = OUTDIR/"certificate_onecell.pdf"
with PdfPages(cert_path) as pdf:
    fig = plt.figure(figsize=(8.5, 11)); ax = fig.add_axes([0.07, 0.07, 0.86, 0.86]); ax.axis("off")
    title = "CNT One-Cell Groundbreaker — Prereg-Final (LOCKED)"
    hdr = f"UTC: {results['timestamp']} | Data: REAL | Human epochs: {results['n_human_epochs']} | Mouse epochs: {results['n_mouse_epochs']} | Target ch: {results['target_channel_dim']}"
    body = (
        f"TRANSFER (Train Human -> Test Mouse)\n"
        f"  Glyph AUROC={results['transfer']['glyph']['auroc']:.3f}, ACC={results['transfer']['glyph']['acc']:.3f}\n"
        f"  Baseline AUROC={results['transfer']['baseline']['auroc']:.3f}, ACC={results['transfer']['baseline']['acc']:.3f}\n"
        f"  Δ AUROC={results['transfer']['delta_auroc']:.3f}\n\n"
        f"Permutation Test (n=1000)\n"
        f"  p(AUROC ≥ observed)={results['transfer']['perm_test']['p_auroc']:.4f}\n"
        f"  p(Δ AUROC ≥ observed)={results['transfer']['perm_test']['p_delta']:.4f}\n\n"
        f"Nested CV (Human)\n"
        f"  Glyph AUROC={results['nested_cv_human']['glyph']['auroc_mean']:.3f}±{results['nested_cv_human']['glyph']['auroc_std']:.3f}\n"
        f"  Baseline AUROC={results['nested_cv_human']['baseline']['auroc_mean']:.3f}±{results['nested_cv_human']['baseline']['auroc_std']:.3f}\n\n"
        "PREREG DECISION RULE (LOCKED): Reject H0 if p_delta < 0.05 AND Glyph AUROC > 0.70 on transfer."
    )
    ax.text(0.5, 0.96, title, ha="center", va="top", fontsize=16, weight="bold")
    ax.text(0.5, 0.92, hdr, ha="center", va="top", fontsize=10)
    ax.text(0.05, 0.82, body, ha="left", va="top", fontsize=10, family="monospace")
    # embed plots
    try:
        import matplotlib.image as mpimg
        roc_img = mpimg.imread(OUTDIR/"transfer_roc.png")
        perm_img = mpimg.imread(OUTDIR/"perm_null.png")
        fig.add_axes([0.06, 0.48, 0.40, 0.26]).imshow(roc_img); plt.axis('off')
        fig.add_axes([0.54, 0.48, 0.40, 0.26]).imshow(perm_img); plt.axis('off')
        ax.text(0.06, 0.75, "ROC (Human→Mouse)", fontsize=9)
        ax.text(0.54, 0.75, "Permutation Null", fontsize=9)
    except Exception:
        ax.text(0.05, 0.50, "[Plots unavailable to embed]", fontsize=9)
    pdf.savefig(fig); plt.close(fig)

print(textwrap.dedent(summary).strip())
print("\nArtifacts saved to:", OUTDIR)


[DISCOVER] Scanning for CSVs…

[DISCOVER] Top CSV candidates:
 0: score=2 |   0.0 MB | label=False | cols=['dataset', 'alpha_band', 'tuned_wpli', 'tuned_residual', 'tuned_lag95', 'PASS_resid'] | C:\Users\caleb\Downloads\S010R03_microPASS_20250821_073457.csv
 1: score=2 |   0.0 MB | label=False | cols=['dataset', 'alpha_band', 'tuned_wpli', 'tuned_residual', 'tuned_lag95', 'PASS_resid'] | C:\Users\caleb\Downloads\S010R03_lastmile_20250821_074258.csv
 2: score=1 |   0.0 MB | label=True | cols=['t_start_s', 't_end_s', 'mu_ERD_med', 'mu_ERD_iqr', 'beta_ERD_med', 'beta_ERD_iqr'] | C:\Users\caleb\cnt_genome\cog_alphabet_eval_groundtruth\ME_ROI_ERD_epochs_S001R03.csv
 3: score=0 |  20.0 MB | label=False | cols=['rsid', 'Chromosome', 'pos', 'trait', 'ccre_id', 'gene_name'] | C:\Users\caleb\cnt_genome\out\CNT_genomic_resonance_scored_v2.csv
 4: score=0 |  17.3 MB | label=False | cols=['rsid', 'Chromosome', 'pos', 'trait', 'ccre_id', 'gene_id'] | C:\Users\caleb\cnt_genome\out\CNT_genomic_resonan