In [1]:
# CNT False-Positive Audit — single cell
import os, re, glob, json, math, random
from pathlib import Path
import numpy as np, pandas as pd
from statsmodels.stats.multitest import multipletests
from scipy import stats

ROOTS = [
    r"C:\Users\caleb\CNT_Lab",
    r"C:\Users\caleb\gra_runs",
]
out_dir = Path(ROOTS[0]) / "artifacts" / "metrics"
out_dir.mkdir(parents=True, exist_ok=True)
rng = np.random.default_rng(42)

def _bh_fdr(pvals, alpha=0.05):
    p = np.asarray(pvals, float)
    p[np.isnan(p)] = 1.0
    rej, p_corr, _, _ = multipletests(p, alpha=alpha, method="fdr_bh")
    return rej, p_corr

audits = []

# === EEG example: laterality tables with p-values ===
for f in glob.glob(rf"{ROOTS[0]}\artifacts\pli_humans_*\**\tables\lap_erd_subject*.csv", recursive=True):
    df = pd.read_csv(f)
    # Heuristic: grab any columns named like p, pval, p_value
    pcols = [c for c in df.columns if re.fullmatch(r"p(_?value)?", c, flags=re.I)]
    if not pcols: 
        continue
    p = pd.to_numeric(df[pcols[0]], errors="coerce")
    # Positive rule: p < 0.01 after BH
    rej, p_corr = _bh_fdr(p, alpha=0.01)
    # Null via permutation: shuffle p among rows (proxy if raw data not present)
    # Count how often shuffled BH finds a "discovery"
    B = 2000
    fp = 0
    for _ in range(B):
        ps = rng.permutation(p.values)
        rnull, _ = _bh_fdr(ps, alpha=0.01)
        if rnull.any(): 
            fp += 1
    fpr = fp / B
    audits.append(dict(module="EEG", file=f, positives=int(rej.sum()), tests=len(p), FPR=fpr))

# === Cooling example: call positive if ΔT <= -0.5°C with significance ===
for f in glob.glob(rf"{ROOTS[0]}\notebooks\archive\cnt_*cooling*.csv", recursive=True):
    df = pd.read_csv(f)
    # Expect columns like 'temp', 'phase' or 'mode'; adapt if needed
    cols = {c.lower(): c for c in df.columns}
    if not {"temp"}.issubset(set(cols)): 
        continue
    temp = pd.to_numeric(df[cols["temp"]], errors="coerce").dropna()
    # Dumb segmented windows (first 1/2 baseline, last 1/2 step) if no labels
    n = len(temp)
    if n < 40: 
        continue
    base = temp.iloc[:n//2].values
    step = temp.iloc[n//2:].values
    dT = step.mean() - base.mean()
    # AR(1)-robust SE via block bootstrap
    B = 1000
    blk = max(5, n//20)
    bs = []
    for _ in range(B):
        idxb = np.concatenate([rng.integers(0, len(base)-blk, 1)[0] + np.arange(blk) for __ in range(max(2, len(base)//blk))])
        idxs = np.concatenate([rng.integers(0, len(step)-blk, 1)[0] + np.arange(blk) for __ in range(max(2, len(step)//blk))])
        bs.append(step[idxs[:len(step)]].mean() - base[idxb[:len(base)]].mean())
    se = np.std(bs, ddof=1)
    z = dT / (se + 1e-9)
    positive = (dT <= -0.5) and (z <= -2.58)  # ~p<0.01, one-sided
    # Null: scramble order, recompute positive freq
    fp = 0; Bn=1000
    for _ in range(Bn):
        perm = rng.permutation(temp.values)
        baseN, stepN = perm[:n//2], perm[n//2:]
        dTN = stepN.mean() - baseN.mean()
        # reuse se estimate as conservative
        zN = dTN / (se + 1e-9)
        if (dTN <= -0.5) and (zN <= -2.58):
            fp += 1
    fpr = fp / Bn
    audits.append(dict(module="Cooling", file=f, positives=int(positive), tests=1, FPR=fpr, dT=float(dT), z=float(z)))

# === Forecast alerts (gate k, τ) example: needs a run CSV with columns ['alert','verified'] ===
for f in glob.glob(rf"{ROOTS[0]}\artifacts\metrics\forecast_alerts_*.csv", recursive=True):
    df = pd.read_csv(f)
    if not {"alert","verified"}.issubset(df.columns): 
        continue
    tp = ((df["alert"]==1) & (df["verified"]==1)).sum()
    fp = ((df["alert"]==1) & (df["verified"]==0)).sum()
    tn = ((df["alert"]==0) & (df["verified"]==0)).sum()
    fn = ((df["alert"]==0) & (df["verified"]==1)).sum()
    fpr = fp / max(fp+tn, 1)
    audits.append(dict(module="Forecast", file=f, TP=int(tp), FP=int(fp), TN=int(tn), FN=int(fn), FPR=fpr))

# === GRA invariance (policy_fix_stem) example: interpret 'truth_pass' vs 'restored' ===
for f in glob.glob(rf"{ROOTS[1]}\**\policy_fix_stem_results.csv", recursive=True):
    df = pd.read_csv(f)
    # Heuristic: call "positive" when restored is True; ground truth = truth_pass
    cols = {c.lower(): c for c in df.columns}
    if not (("restored" in cols) and ("truth_pass" in cols)): 
        continue
    restored = df[cols["restored"]].astype(bool)
    truth = df[cols["truth_pass"]].astype(bool)
    tp = int((restored & truth).sum())
    fp = int((restored & ~truth).sum())
    tn = int((~restored & ~truth).sum())
    fn = int((~restored & truth).sum())
    fpr = fp / max(fp+tn, 1)
    audits.append(dict(module="GRA", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=fpr))

audit_df = pd.DataFrame(audits)
out_path = out_dir / f"cnt_fpr_audit_{pd.Timestamp.now():%Y%m%d-%H%M%S}.csv"
audit_df.to_csv(out_path, index=False)
print("== CNT False-Positive Audit ==")
print(audit_df.fillna("").to_string(index=False))
print("\nSaved:", out_path)


== CNT False-Positive Audit ==
Empty DataFrame
Columns: []
Index: []

Saved: C:\Users\caleb\CNT_Lab\artifacts\metrics\cnt_fpr_audit_20251016-102546.csv


In [2]:
# === CNT False-Positive Audit (robust, self-debugging) ===
import os, re, glob, json
from pathlib import Path
import numpy as np, pandas as pd

from statsmodels.stats.multitest import multipletests

# 1) Configure likely roots; add/remove if yours differ
ROOTS = [
    r"C:\Users\caleb\CNT_Lab",
    r"C:\Users\caleb\gra_runs",
]

np.random.seed(42)
rng = np.random.default_rng(42)

def _bh_fdr(pvals, alpha=0.05):
    p = np.asarray(pd.Series(pvals).astype(float))
    p[np.isnan(p)] = 1.0
    rej, p_corr, _, _ = multipletests(p, alpha=alpha, method="fdr_bh")
    return rej, p_corr

def ls(pattern):
    files = glob.glob(pattern, recursive=True)
    return [f for f in files if os.path.isfile(f)]

def show(label, files, limit=6):
    print(f"\n[{label}] found {len(files)} file(s).")
    for f in files[:limit]:
        print(" •", f)
    if len(files) > limit:
        print(f"   ... +{len(files)-limit} more")

# 2) Discover files (broader patterns)
EEG_FILES = []
COOL_FILES = []
FORECAST_FILES = []
GRA_FILES = []

for root in ROOTS:
    # EEG laterality tables
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\lap_erd_subject*.csv")
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\*laterality*.csv")
    # Cooling logs (archive or artifacts)
    COOL_FILES += ls(fr"{root}\**\archive\*cool*.csv")
    COOL_FILES += ls(fr"{root}\**\artifacts\**\*cool*.csv")
    # Forecast alerts (we’ll expect columns ['alert','verified'])
    FORECAST_FILES += ls(fr"{root}\**\artifacts\metrics\forecast_alerts_*.csv")
    # GRA invariance runs
    GRA_FILES += ls(fr"{root}\**\policy_fix_stem_results.csv")
    GRA_FILES += ls(fr"{root}\**\*_policy_*_results.csv")

show("EEG", EEG_FILES)
show("Cooling", COOL_FILES)
show("Forecast", FORECAST_FILES)
show("GRA", GRA_FILES)

audits = []

# 3) EEG: p-value columns under BH@0.01 + permutation null for FPR
for f in EEG_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[EEG:skip] {f} — read error: {e}")
        continue
    # Accept common p-value headers
    cand = [c for c in df.columns if re.fullmatch(r"p(_?val(ue)?)?|pval|p_value", c, flags=re.I)]
    if not cand:
        # Try to sniff any column with values (0,1) or in (0,0.2)
        cand = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c]) and df[c].dropna().between(0,1).mean()>0.8]
    if not cand:
        print(f"[EEG:skip] {f} — no p-value-like column found")
        continue
    pcol = cand[0]
    p = pd.to_numeric(df[pcol], errors="coerce")
    rej, _ = _bh_fdr(p, alpha=0.01)
    # Permutation null on p’s as proxy (conservative without raw)
    B = 2000; fp = 0
    for _ in range(B):
        rnull, _ = _bh_fdr(np.random.permutation(p.values), alpha=0.01)
        if rnull.any():
            fp += 1
    fpr = fp / B
    audits.append(dict(module="EEG", file=f, positives=int(np.sum(rej)), tests=int(len(p)), FPR=float(fpr)))

# 4) Cooling: ΔT and significance via simple split + permutation null
def ar1_block_se(xa, xb, rng, B=800):
    na, nb = len(xa), len(xb)
    blk_a = max(5, na//20); blk_b = max(5, nb//20)
    bs = []
    for _ in range(B):
        ia = rng.integers(0, max(1, na-blk_a), 1)[0]; ib = rng.integers(0, max(1, nb-blk_b), 1)[0]
        ra = xa[ia:ia+blk_a]; rb = xb[ib:ib+blk_b]
        bs.append(np.mean(rb) - np.mean(ra))
    return np.std(bs, ddof=1)

for f in COOL_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Cooling:skip] {f} — read error: {e}")
        continue
    cols = {c.lower(): c for c in df.columns}
    # Accept temp-like names
    tcol = next((cols[k] for k in cols if k in ("temp","temperature","gpu_temp","cpu_temp") ), None)
    if tcol is None:
        # fallback: first numeric column with reasonable range
        ncand = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
        if ncand:
            tcol = ncand[0]
    if tcol is None:
        print(f"[Cooling:skip] {f} — no temperature column")
        continue
    temp = pd.to_numeric(df[tcol], errors="coerce").dropna().values
    n = len(temp)
    if n < 60:
        print(f"[Cooling:skip] {f} — too few rows ({n})")
        continue
    base, step = temp[:n//2], temp[n//2:]
    dT = float(np.mean(step) - np.mean(base))
    se = ar1_block_se(base, step, rng, B=800)
    z = dT / (se + 1e-9)
    positive = (dT <= -0.5) and (z <= -2.58)  # ~p<0.01 one-sided cooling
    # Permutation null
    Bn = 1000; fp = 0
    for _ in range(Bn):
        perm = rng.permutation(temp)
        bN, sN = perm[:n//2], perm[n//2:]
        dTN = float(np.mean(sN) - np.mean(bN))
        zN = dTN / (se + 1e-9)
        if (dTN <= -0.5) and (zN <= -2.58):
            fp += 1
    fpr = fp / Bn
    audits.append(dict(module="Cooling", file=f, positives=int(positive), tests=1, FPR=float(fpr), dT=dT, z=float(z)))

# 5) Forecast: explicit FPR from alert/verify labels
for f in FORECAST_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Forecast:skip] {f} — read error: {e}")
        continue
    if not {"alert","verified"}.issubset(df.columns):
        print(f"[Forecast:skip] {f} — needs columns ['alert','verified']")
        continue
    tp = int(((df["alert"]==1) & (df["verified"]==1)).sum())
    fp = int(((df["alert"]==1) & (df["verified"]==0)).sum())
    tn = int(((df["alert"]==0) & (df["verified"]==0)).sum())
    fn = int(((df["alert"]==0) & (df["verified"]==1)).sum())
    fpr = fp / max(fp+tn, 1)
    audits.append(dict(module="Forecast", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=float(fpr)))

# 6) GRA invariance: restored vs truth_pass
for f in GRA_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[GRA:skip] {f} — read error: {e}")
        continue
    cols = {c.lower(): c for c in df.columns}
    if not (("restored" in cols) and ("truth_pass" in cols)):
        print(f"[GRA:skip] {f} — needs columns like 'restored' & 'truth_pass'")
        continue
    restored = df[cols["restored"]].astype(bool)
    truth = df[cols["truth_pass"]].astype(bool)
    tp = int((restored & truth).sum())
    fp = int((restored & ~truth).sum())
    tn = int((~restored & ~truth).sum())
    fn = int((~restored & truth).sum())
    fpr = fp / max(fp+tn, 1)
    audits.append(dict(module="GRA", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=float(fpr)))

# 7) Save & print summary
out_dir = Path(ROOTS[0]) / "artifacts" / "metrics"
out_dir.mkdir(parents=True, exist_ok=True)
audit_df = pd.DataFrame(audits)
out_path = out_dir / f"cnt_fpr_audit_{pd.Timestamp.now():%Y%m%d-%H%M%S}.csv"

if audit_df.empty:
    print("\n== No eligible files found to audit ==")
    print("Tips:")
    print(" • EEG tables → place under CNT_Lab\\artifacts\\pli_humans_*\\...\\tables\\ with a p-value column (e.g., p, p_value).")
    print(" • Cooling logs → CSV with a temperature column (temp/temperature) under CNT_Lab\\notebooks\\archive\\ or artifacts.")
    print(" • Forecast alerts → CSV with columns [alert, verified] under CNT_Lab\\artifacts\\metrics\\forecast_alerts_*.csv")
    print(" • GRA → policy_fix_stem_results.csv with columns restored, truth_pass (as booleans).")
else:
    audit_df.to_csv(out_path, index=False)
    print("\n== CNT False-Positive Audit ==")
    print(audit_df.to_string(index=False))
    print("\nSaved:", out_path)



[EEG] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\artifacts\pli_humans_100plus\tests_functional\motor_cuelocked\lap_erd\tables\lap_erd_subject50.csv

[Cooling] found 9 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121543.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-122141.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-123830.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_unified_cooling_20251015-132627.csv
   ... +3 more

[Forecast] found 0 file(s).

[GRA] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\gra_runs\gra_v0_3_policy_fix_stem_20251016-095737\policy_fix_stem_results.csv
[Cooling:skip] C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv — too few rows (9)
[Cooling:skip] C:\Users\caleb\

In [3]:
# === EEG (no bogus FPR; report BH discoveries + Storey π0 and FDR) ===
from math import ceil

def storey_pi0(p, lam=0.5):
    p = np.asarray(p, float)
    p = p[~np.isnan(p)]
    if p.size == 0: 
        return np.nan
    return min(1.0, max(0.0, np.mean(p >= lam) / (1 - lam)))

for f in EEG_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[EEG:skip] {f} — read error: {e}")
        continue
    cand = [c for c in df.columns if re.fullmatch(r"p(_?val(ue)?)?|pval|p_value", c, flags=re.I)]
    if not cand:
        print(f"[EEG:skip] {f} — no p-value column")
        continue
    pcol = cand[0]
    p = pd.to_numeric(df[pcol], errors="coerce")
    rej, q = multipletests(p.fillna(1.0), alpha=0.01, method="fdr_bh")[:2]
    pi0 = storey_pi0(p, lam=0.5)
    audits.append(dict(
        module="EEG", file=f,
        positives=int(np.sum(rej)),
        tests=int(p.shape[0]),
        FPR=np.nan,              # cannot estimate without proper null labels/raw
        pi0=float(pi0),          # estimate of proportion true nulls
        FDR_alpha_0p01=float(np.mean(q < 0.01))
    ))

# === Cooling (use labeled phases if present; else fall back) ===
def phase_split(df):
    cols = {c.lower(): c for c in df.columns}
    tcol = next((cols[k] for k in cols if k in ("temp","temperature","gpu_temp","cpu_temp")), None)
    if tcol is None:
        ncols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
        tcol = ncols[0] if ncols else None
    if tcol is None:
        return None, None, None
    temp = pd.to_numeric(df[tcol], errors="coerce").dropna().values
    # Prefer explicit phase labels
    pcol = next((cols[k] for k in cols if k in ("phase","is_step","label")), None)
    if pcol is not None:
        lab = df[pcol].astype(str).str.lower().values[:len(temp)]
        step_idx = (lab == "step") | (lab == "cool") | (lab == "1") | (lab == "true")
        base = temp[~step_idx]; step = temp[step_idx]
        if len(base) >= 20 and len(step) >= 20:
            return base, step, True
    # Fallback: split by time (less reliable)
    n = len(temp); 
    if n < 60:
        return None, None, None
    return temp[:n//2], temp[n//2:], False

def ar1_block_se(xa, xb, rng, B=800):
    na, nb = len(xa), len(xb)
    blk_a, blk_b = max(5, na//20), max(5, nb//20)
    bs = []
    for _ in range(B):
        ia = rng.integers(0, max(1, na-blk_a)); ib = rng.integers(0, max(1, nb-blk_b))
        ra = xa[ia:ia+blk_a]; rb = xb[ib:ib+blk_b]
        bs.append(np.mean(rb) - np.mean(ra))
    return np.std(bs, ddof=1)

for f in COOL_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Cooling:skip] {f} — read error: {e}")
        continue
    res = phase_split(df)
    if res == (None, None, None):
        print(f"[Cooling:skip] {f} — no usable temp/phase; need labels")
        continue
    base, step, labeled = res
    dT = float(np.mean(step) - np.mean(base))
    se = ar1_block_se(base, step, rng, B=800)
    z = dT / (se + 1e-9)
    positive = (dT <= -0.5) and (z <= -2.58)  # one-sided p~0.01
    # Null: within-run phase label shuffles if labeled, else time-permutation fallback
    Bn = 1000; fp = 0
    if labeled:
        # shuffle phase labels while preserving counts
        n = len(base) + len(step); k = len(step)
        allv = np.concatenate([base, step])
        for _ in range(Bn):
            idx = rng.permutation(n)
            sN = allv[idx[:k]]; bN = allv[idx[k:]]
            dTN = float(np.mean(sN) - np.mean(bN))
            zN = dTN / (se + 1e-9)
            if (dTN <= -0.5) and (zN <= -2.58):
                fp += 1
    else:
        temp = np.concatenate([base, step])
        for _ in range(Bn):
            perm = rng.permutation(temp)
            bN, sN = perm[:len(base)], perm[len(base):]
            dTN = float(np.mean(sN) - np.mean(bN))
            zN = dTN / (se + 1e-9)
            if (dTN <= -0.5) and (zN <= -2.58):
                fp += 1
    fpr = fp / Bn
    audits.append(dict(module="Cooling", file=f, positives=int(positive), tests=1, FPR=float(fpr), dT=dT, z=float(z)))


[EEG:skip] C:\Users\caleb\CNT_Lab\artifacts\pli_humans_100plus\tests_functional\motor_cuelocked\lap_erd\tables\lap_erd_subject50.csv — no p-value column


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [4]:
# == CNT False-Positive Audit — Single Cell ==
# Finds EEG, Cooling, GRA, and Forecast files; computes FPR where possible.
# EEG: reports discoveries + Storey π0 + FDR; FPR requires explicit nulls/labels.
# Cooling: uses phase labels if present; otherwise warns (fallback split is conservative).
# GRA/Forecast: compute FPR if both positives and true negatives exist.
# Output: a CSV in CNT_Lab\artifacts\metrics\cnt_fpr_audit_YYYYMMDD-HHMMSS.csv

import os, re, glob, json
from pathlib import Path
import numpy as np, pandas as pd

# ---------- Config ----------
ROOTS = [
    r"C:\Users\caleb\CNT_Lab",            # edit/add if needed
    r"C:\Users\caleb\gra_runs",
]

np.random.seed(42)
rng = np.random.default_rng(42)

# ---------- Small utilities ----------
def ls(pattern):
    try:
        files = glob.glob(pattern, recursive=True)
    except Exception:
        files = []
    return [f for f in files if os.path.isfile(f)]

def show(label, files, limit=6):
    print(f"\n[{label}] found {len(files)} file(s).")
    for f in files[:limit]:
        print(" •", f)
    if len(files) > limit:
        print(f"   ... +{len(files)-limit} more")

def benjamini_hochberg(pvals, alpha=0.05):
    """BH q-values + reject mask (no statsmodels dependency)."""
    p = np.asarray(pd.Series(pvals).astype(float))
    n = p.size
    order = np.argsort(p)
    ranks = np.empty_like(order)
    ranks[order] = np.arange(1, n+1)
    q = p * n / np.maximum(ranks, 1)
    # enforce monotonicity
    q_sorted = np.minimum.accumulate(q[order][::-1])[::-1]
    qvalues = np.empty_like(q_sorted)
    qvalues[order] = q_sorted
    rej = qvalues <= alpha
    return rej, qvalues

def storey_pi0(p, lam=0.5):
    p = np.asarray(pd.Series(p).astype(float))
    p = p[np.isfinite(p)]
    if p.size == 0:
        return np.nan
    return float(min(1.0, max(0.0, np.mean(p >= lam)/(1-lam))))

# ---------- Discover files ----------
EEG_FILES, COOL_FILES, FORECAST_FILES, GRA_FILES = [], [], [], []
for root in ROOTS:
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\lap_erd_subject*.csv")
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\*laterality*.csv")
    COOL_FILES += ls(fr"{root}\**\archive\*cool*.csv")
    COOL_FILES += ls(fr"{root}\**\artifacts\**\*cool*.csv")
    FORECAST_FILES += ls(fr"{root}\**\artifacts\metrics\forecast_alerts_*.csv")
    GRA_FILES += ls(fr"{root}\**\policy_fix_stem_results.csv")
    GRA_FILES += ls(fr"{root}\**\*_policy_*_results.csv")

show("EEG", EEG_FILES)
show("Cooling", COOL_FILES)
show("Forecast", FORECAST_FILES)
show("GRA", GRA_FILES)

audits = []

# ---------- EEG (discover p-like column; report FDR/π0; FPR needs null labels) ----------
EEG_NAME_HINTS = (
    r"^p(_?val(ue)?)?$",         # p, pval, p_value
    r"^p_[a-z]+$",               # p_alpha, p_beta, p_theta, ...
    r"^[a-z]*_?p(_?val(ue)?)?$", # col_p, pval_col, ...
)

def find_pcol(df):
    cols = list(df.columns)
    # regex name matches
    for pat in EEG_NAME_HINTS:
        for c in cols:
            if re.fullmatch(pat, str(c), flags=re.I):
                return c
    # numeric [0,1] candidates
    candidates = []
    for c in cols:
        s = pd.to_numeric(df[c], errors="coerce")
        s = s[np.isfinite(s)]
        if s.empty: 
            continue
        if (pd.Series(s).between(0,1).mean() > 0.95) and (pd.Series(s).nunique() > 10):
            candidates.append((c, float(pd.Series(s).mean())))
    if candidates:
        candidates.sort(key=lambda x: x[1])  # prefer smaller mean (more discoveries)
        return candidates[0][0]
    return None

for f in EEG_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[EEG:skip] {f} — read error: {e}")
        continue
    pcol = find_pcol(df)
    if pcol is None:
        print(f"[EEG:hint] {f} — no p-like column. Add 'p' or export null-label results to enable FPR.")
        continue
    p = pd.to_numeric(df[pcol], errors="coerce").fillna(1.0).values
    rej, q = benjamini_hochberg(p, alpha=0.01)
    pi0 = storey_pi0(p, lam=0.5)
    audits.append(dict(
        module="EEG", file=f,
        positives=int(np.sum(rej)),
        tests=int(p.size),
        FPR=np.nan,                 # requires explicit nulls/labels
        pi0=float(pi0),
        FDR_alpha_0p01=float(np.mean(q < 0.01))
    ))

# ---------- Cooling (prefer labeled phases; else conservative fallback) ----------
def phase_split(df):
    cols = {c.lower(): c for c in df.columns}
    # temperature column
    tcol = next((cols[k] for k in cols if k in ("temp","temperature","gpu_temp","cpu_temp","t","temp_c")), None)
    if tcol is None:
        num_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
        tcol = num_cols[0] if num_cols else None
    if tcol is None:
        return None  # unusable

    temp = pd.to_numeric(df[tcol], errors="coerce")
    temp = temp[np.isfinite(temp)].values
    # explicit phase?
    phase_keys = ("phase","is_step","label","mode","state")
    pcol = next((cols[k] for k in cols if k in phase_keys), None)
    if pcol is not None:
        lab = df[pcol].astype(str).str.lower().values[:len(temp)]
        step_idx = (lab == "step") | (lab == "cool") | (lab == "1") | (lab == "true") | (lab == "post")
        base = temp[~step_idx]; step = temp[step_idx]
        if len(base) >= 20 and len(step) >= 20:
            return (base, step, True)
    # fallback: time split
    n = len(temp)
    if n < 60:
        return None
    return (temp[:n//2], temp[n//2:], False)

def ar1_block_se(xa, xb, rng, B=800):
    na, nb = len(xa), len(xb)
    blk_a, blk_b = max(5, na//20), max(5, nb//20)
    if blk_a <= 0 or blk_b <= 0: 
        return np.nan
    bs = []
    for _ in range(B):
        ia = rng.integers(0, max(1, na-blk_a))
        ib = rng.integers(0, max(1, nb-blk_b))
        ra = xa[ia:ia+blk_a]; rb = xb[ib:ib+blk_b]
        if len(ra)==0 or len(rb)==0: 
            continue
        bs.append(float(np.mean(rb) - np.mean(ra)))
    return float(np.std(bs, ddof=1)) if bs else np.nan

for f in COOL_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Cooling:skip] {f} — read error: {e}")
        continue
    res = phase_split(df)
    if res is None:
        print(f"[Cooling:skip] {f} — no usable temp/phase; add a 'phase' column (baseline/step).")
        continue
    base, step, labeled = res
    dT = float(np.mean(step) - np.mean(base))
    se = ar1_block_se(base, step, rng, B=800)
    if not np.isfinite(se) or se == 0.0:
        print(f"[Cooling:skip] {f} — insufficient data to estimate SE.")
        continue
    z = dT / se
    positive = (dT <= -0.5) and (z <= -2.58)  # one-sided ~ p<0.01
    # Null: label shuffle if labeled, else time-permutation
    Bn, fp = 800, 0
    if labeled:
        allv = np.concatenate([base, step])
        n = len(allv); k = len(step)
        for _ in range(Bn):
            idx = rng.permutation(n)
            sN = allv[idx[:k]]; bN = allv[idx[k:]]
            dTN = float(np.mean(sN) - np.mean(bN))
            zN = dTN / se
            if (dTN <= -0.5) and (zN <= -2.58):
                fp += 1
    else:
        temp = np.concatenate([base, step]); nb = len(base)
        for _ in range(Bn):
            perm = rng.permutation(temp)
            bN, sN = perm[:nb], perm[nb:]
            dTN = float(np.mean(sN) - np.mean(bN))
            zN = dTN / se
            if (dTN <= -0.5) and (zN <= -2.58):
                fp += 1
    fpr = fp / Bn
    audits.append(dict(
        module="Cooling", file=f,
        positives=int(positive), tests=1,
        FPR=float(fpr), dT=dT, z=float(z),
        labeled=bool(labeled)
    ))

# ---------- Forecast (needs columns: alert, verified) ----------
for f in FORECAST_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Forecast:skip] {f} — read error: {e}")
        continue
    if not {"alert","verified"}.issubset(df.columns):
        print(f"[Forecast:skip] {f} — needs columns ['alert','verified']")
        continue
    tp = int(((df["alert"]==1) & (df["verified"]==1)).sum())
    fp = int(((df["alert"]==1) & (df["verified"]==0)).sum())
    tn = int(((df["alert"]==0) & (df["verified"]==0)).sum())
    fn = int(((df["alert"]==0) & (df["verified"]==1)).sum())
    fpr = fp / max(fp+tn, 1)
    audits.append(dict(module="Forecast", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=float(fpr)))

# ---------- GRA (needs 'restored' and 'truth_pass') ----------
for f in GRA_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[GRA:skip] {f} — read error: {e}")
        continue
    cols = {c.lower(): c for c in df.columns}
    if not (("restored" in cols) and ("truth_pass" in cols)):
        print(f"[GRA:skip] {f} — needs columns like 'restored' & 'truth_pass'")
        continue
    restored = df[cols["restored"]].astype(bool)
    truth = df[cols["truth_pass"]].astype(bool)
    tp = int((restored & truth).sum())
    fp = int((restored & ~truth).sum())
    tn = int((~restored & ~truth).sum())
    fn = int((~restored & truth).sum())
    fpr = fp / max(fp+tn, 1)
    audits.append(dict(module="GRA", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=float(fpr)))

# ---------- Save & print summary ----------
out_dir = Path(ROOTS[0]) / "artifacts" / "metrics"
out_dir.mkdir(parents=True, exist_ok=True)
audit_df = pd.DataFrame(audits)
out_path = out_dir / f"cnt_fpr_audit_{pd.Timestamp.now():%Y%m%d-%H%M%S}.csv"

if audit_df.empty:
    print("\n== No eligible files found to audit ==")
    print("Tips:")
    print(" • EEG → add a p-value column (e.g., 'p') or export null-label runs for FPR.")
    print(" • Cooling → include 'phase' labels: baseline/step (or use a *_labeled.csv).")
    print(" • Forecast → CSV with columns [alert, verified] under artifacts\\metrics.")
    print(" • GRA → include rows for both positives and true negatives (restored/truth_pass).")
else:
    audit_df.to_csv(out_path, index=False)
    print("\n== CNT False-Positive Audit ==")
    print(audit_df.to_string(index=False))
    print("\nSaved:", out_path)



[EEG] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\artifacts\pli_humans_100plus\tests_functional\motor_cuelocked\lap_erd\tables\lap_erd_subject50.csv

[Cooling] found 9 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121543.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-122141.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-123830.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_unified_cooling_20251015-132627.csv
   ... +3 more

[Forecast] found 0 file(s).

[GRA] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\gra_runs\gra_v0_3_policy_fix_stem_20251016-095737\policy_fix_stem_results.csv
[Cooling:skip] C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv — no usable temp/phase; add a 'phase' column (bas

In [5]:
# == CNT Cooling: Auto-Label + FPR in One Cell ==
# 1) Scans your cooling CSVs
# 2) Auto-detects the intervention change point (robust mean-split search)
# 3) Labels rows as baseline/step and writes *_labeled.csv next to each file
# 4) Computes Cooling FPR via label-shuffle null (one-sided p~0.01; ΔT<=-0.5°C & z<=-2.58)
# 5) Saves a compact report to CNT_Lab\artifacts\metrics\cnt_cooling_fpr_report_*.csv

import os, glob
from pathlib import Path
import numpy as np, pandas as pd

np.random.seed(42)
rng = np.random.default_rng(42)

ROOTS = [
    r"C:\Users\caleb\CNT_Lab",
]

def ls(pattern):
    return [f for f in glob.glob(pattern, recursive=True) if os.path.isfile(f)]

# ---- find cooling files ----
COOL_FILES = []
for root in ROOTS:
    COOL_FILES += ls(fr"{root}\**\archive\*cool*.csv")
    COOL_FILES += ls(fr"{root}\**\artifacts\**\*cool*.csv")

print(f"Found {len(COOL_FILES)} cooling file(s).")
for f in COOL_FILES[:8]:
    print(" •", f)
if len(COOL_FILES) > 8:
    print(f"   ... +{len(COOL_FILES)-8} more")

# ---- helpers ----
def pick_temp_col(df):
    lo = {c.lower(): c for c in df.columns}
    for key in ("temp","temperature","gpu_temp","cpu_temp","t","temp_c"):
        if key in lo:
            return lo[key]
    # fallback: first numeric column with reasonable variance
    num = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    if not num: 
        return None
    v = pd.Series({c: float(pd.Series(df[c]).var(skipna=True)) for c in num})
    v = v.sort_values(ascending=False)
    return v.index[0] if not v.empty else None

def best_change_point(x, guard=0.1):
    """Return index k (baseline = x[:k], step = x[k:]) maximizing |mean diff|,
       avoiding the first/last guard fraction."""
    n = len(x)
    if n < 60:
        return None
    lo = int(n*guard)
    hi = int(n*(1-guard))
    if hi - lo < 20:
        return None
    # cumulative means for O(n)
    csum = np.cumsum(x)
    idx = np.arange(1, n+1)
    best_k, best_score = None, -np.inf
    for k in range(lo, hi):
        m1 = csum[k-1] / k
        m2 = (csum[-1] - csum[k-1]) / (n - k)
        score = abs(m2 - m1)
        if score > best_score:
            best_score, best_k = score, k
    return best_k

def ar1_block_se(xa, xb, B=800):
    na, nb = len(xa), len(xb)
    blk_a, blk_b = max(5, na//20), max(5, nb//20)
    if blk_a <= 0 or blk_b <= 0:
        return np.nan
    bs = []
    for _ in range(B):
        ia = rng.integers(0, max(1, na-blk_a))
        ib = rng.integers(0, max(1, nb-blk_b))
        ra = xa[ia:ia+blk_a]; rb = xb[ib:ib+blk_b]
        if len(ra)==0 or len(rb)==0: 
            continue
        bs.append(float(np.mean(rb) - np.mean(ra)))
    return float(np.std(bs, ddof=1)) if bs else np.nan

# ---- process & compute FPR ----
rows = []
for f in COOL_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[skip read] {f}: {e}")
        continue

    tcol = pick_temp_col(df)
    if tcol is None:
        print(f"[skip] {f}: no temperature-like column")
        continue

    temp = pd.to_numeric(df[tcol], errors="coerce")
    temp = temp[np.isfinite(temp)]
    if temp.size < 60:
        print(f"[skip] {f}: too few rows ({temp.size})")
        continue

    k = best_change_point(temp.values, guard=0.15)
    if k is None:
        print(f"[skip] {f}: could not find a robust change point")
        continue

    # Assign baseline/step so that baseline mean >= step mean (cooling expectation)
    base_raw, step_raw = temp.values[:k], temp.values[k:]
    if np.mean(base_raw) < np.mean(step_raw):
        # swap if looks like heating so that 'baseline' is the hotter segment
        base, step = step_raw, base_raw
        flipped = True
        k_eff = len(step_raw)  # for labeling; we’ll label by mask
        mask = np.r_[np.zeros_like(step_raw, dtype=bool), np.ones_like(base_raw, dtype=bool)]
    else:
        base, step = base_raw, step_raw
        flipped = False
        k_eff = k
        mask = np.r_[np.zeros(k, dtype=bool), np.ones(len(temp)-k, dtype=bool)]

    # Add labels and write *_labeled.csv (non-destructive)
    out_csv = f.replace(".csv", "_labeled.csv")
    df2 = df.copy()
    # truncate to length of temp column to keep it simple
    n = len(temp)
    phase = np.where(mask[:n], "step", "baseline")
    # extend to df length by repeating last label if needed
    if len(df2) > n:
        phase = np.r_[phase, np.repeat(phase[-1], len(df2)-n)]
    df2["phase"] = phase[:len(df2)]
    df2.to_csv(out_csv, index=False)

    # Stats
    dT = float(np.mean(step) - np.mean(base))
    se = ar1_block_se(base, step, B=800)
    z = dT / se if np.isfinite(se) and se > 0 else np.nan
    positive = int((dT <= -0.5) and (np.isfinite(z) and z <= -2.58))

    # Label-shuffle null for FPR
    Bn, fp = 800, 0
    allv = np.concatenate([base, step])
    n_all, k_step = len(allv), len(step)
    for _ in range(Bn):
        idx = rng.permutation(n_all)
        sN = allv[idx[:k_step]]; bN = allv[idx[k_step:]]
        dTN = float(np.mean(sN) - np.mean(bN))
        zN = dTN / se if np.isfinite(se) and se > 0 else np.inf
        if (dTN <= -0.5) and (zN <= -2.58):
            fp += 1
    fpr = fp / Bn

    rows.append(dict(
        file=f,
        labeled_csv=out_csv,
        n_base=len(base), n_step=len(step),
        change_point=k, flipped=flipped,
        dT=dT, z=z, positive=positive, FPR=fpr
    ))
    print(f"[labeled] {Path(f).name}  ->  {Path(out_csv).name} | dT={dT:.3f}, z={z:.2f}, FPR~{fpr:.3f}")

# ---- save report ----
report = pd.DataFrame(rows)
out_dir = Path(ROOTS[0]) / "artifacts" / "metrics"
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / f"cnt_cooling_fpr_report_{pd.Timestamp.now():%Y%m%d-%H%M%S}.csv"
report.to_csv(out_path, index=False)

print("\n== Cooling FPR Report ==")
if not report.empty:
    print(report.to_string(index=False))
    print("\nSaved:", out_path)
else:
    print("No labeled files produced (see skips above).")


Found 9 cooling file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121543.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-122141.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-123830.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_unified_cooling_20251015-132627.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_unified_cooling_20251015-133429.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_unified_cooling_v2_20251015-134310.csv
   ... +1 more
[skip] C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv: too few rows (0)
[skip] C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv: too few rows (0)
[labeled] cnt_cooling_log_20251015-121543.csv  ->  cnt_cooling_log_20251015-121543_labeled.cs

In [6]:
# == CNT Cooling FPR — Robust Single Cell (5k shuffles, MBB SE, pooled fallback) ==
# - Auto-detects intervention (change point) and labels baseline/step (writes *_labeled.csv)
# - Computes ΔT, z using moving-block bootstrap SE (fallback to pooled SE if needed)
# - Estimates FPR via 5,000 label-shuffles; prints 95% Clopper-Pearson upper bound
# - Saves report to CNT_Lab\artifacts\metrics\cnt_cooling_fpr_report_YYYYMMDD-HHMMSS.csv

import os, glob, math
from pathlib import Path
import numpy as np, pandas as pd
from datetime import datetime

# ---------- Config ----------
ROOTS = [r"C:\Users\caleb\CNT_Lab"]     # add paths if needed
MIN_PHASE_N = 60                        # minimum samples per phase
GUARD_FRAC = 0.15                       # don't place change-point too close to ends
MB_BOOT_B = 1200                        # moving-block bootstrap draws for SE
MB_BLOCK_FRAC = 1/15                    # block length ~ phase_len * this
SHUFFLES = 5000                         # label-shuffle null draws
ALPHA_Z = 0.01                          # one-sided target (≈2.58 z)
EFFECT_C = -0.5                         # required ΔT (°C), negative = cooling

rng = np.random.default_rng(42)

# ---------- Utils ----------
def ls(pattern):
    return [f for f in glob.glob(pattern, recursive=True) if os.path.isfile(f)]

def pick_temp_col(df):
    lo = {c.lower(): c for c in df.columns}
    for key in ("temp","temperature","gpu_temp","cpu_temp","t","temp_c"):
        if key in lo:
            return lo[key]
    num = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    if not num:
        return None
    v = pd.Series({c: pd.Series(df[c]).var(skipna=True) for c in num})
    v = v.sort_values(ascending=False)
    return v.index[0] if not v.empty else None

def best_change_point(x, guard=0.15):
    n = len(x)
    if n < 2*MIN_PHASE_N:
        return None
    lo = int(n*guard); hi = int(n*(1-guard))
    if hi - lo < MIN_PHASE_N:
        return None
    csum = np.cumsum(x); best_k, best_score = None, -np.inf
    for k in range(lo, hi):
        m1 = csum[k-1] / k
        m2 = (csum[-1] - csum[k-1]) / (n - k)
        score = abs(m2 - m1)
        if score > best_score:
            best_score, best_k = score, k
    return best_k

def moving_block_bootstrap_se(a, b, B=1200, rng=None):
    """Moving-block bootstrap SE for mean difference mean(b)-mean(a)."""
    rng = rng or np.random.default_rng()
    na, nb = len(a), len(b)
    if na < MIN_PHASE_N or nb < MIN_PHASE_N:
        return np.nan
    # block lengths
    ba = max(5, int(round(na * MB_BLOCK_FRAC)))
    bb = max(5, int(round(nb * MB_BLOCK_FRAC)))
    diffs = []
    for _ in range(B):
        # sample blocks with wrap-around
        ia = rng.integers(0, na, size=max(2, na // ba))
        ib = rng.integers(0, nb, size=max(2, nb // bb))
        ra = np.concatenate([a[i: i+ba] if i+ba <= na else np.r_[a[i:], a[:(i+ba-na)]] for i in ia])[:na]
        rb = np.concatenate([b[i: i+bb] if i+bb <= nb else np.r_[b[i:], b[:(i+bb-nb)]] for i in ib])[:nb]
        diffs.append(float(np.mean(rb) - np.mean(ra)))
    sd = float(np.std(diffs, ddof=1)) if diffs else np.nan
    return sd

def pooled_se(a, b):
    """Fallback SE using pooled variance (independent-sample approximation)."""
    na, nb = len(a), len(b)
    if na < 2 or nb < 2:
        return np.nan
    va = float(np.var(a, ddof=1)); vb = float(np.var(b, ddof=1))
    return float(np.sqrt(va/na + vb/nb))

def z_threshold(alpha=0.01):
    # one-sided normal quantile ~ 2.326 (0.01), but we used 2.58 previously (stricter ~0.005 one-sided).
    # To match prior runs, keep 2.58 unless you want exactly alpha=0.01.
    return 2.58

def clopper_pearson_upper(k, n, alpha=0.05):
    """Upper bound for Binomial(k|n) at (1-alpha) using Clopper-Pearson."""
    # For k=0, upper ≈ 1 - alpha^(1/n)
    if n == 0:
        return np.nan
    if k == 0:
        return 1 - (alpha)**(1/n)
    # Simple Beta inverse could be used; here a numeric search to avoid SciPy dep.
    lo, hi = 0.0, 1.0
    for _ in range(40):
        mid = (lo+hi)/2
        # Binomial CDF(k; n, mid) ~ sum_{i=0}^k C(n,i) mid^i (1-mid)^(n-i)
        # Approximate with normal for speed if n large; else compute exact.
        # Because we only need an upper bound estimate, use a simple heuristic:
        # increase mid until expected k is >= observed k.
        exp = n*mid
        if exp >= k:
            hi = mid
        else:
            lo = mid
    return hi

# ---------- Find files ----------
COOL_FILES = []
for root in ROOTS:
    COOL_FILES += ls(fr"{root}\**\archive\*cool*.csv")
    COOL_FILES += ls(fr"{root}\**\artifacts\**\*cool*.csv")

print(f"Found {len(COOL_FILES)} cooling file(s).")
for f in COOL_FILES[:8]:
    print(" •", f)
if len(COOL_FILES) > 8:
    print(f"   ... +{len(COOL_FILES)-8} more")

# ---------- Process & compute ----------
rows = []
for f in COOL_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[skip read] {f}: {e}")
        continue

    tcol = pick_temp_col(df)
    if tcol is None:
        print(f"[skip] {f}: no temperature-like column")
        continue

    temp = pd.to_numeric(df[tcol], errors="coerce")
    temp = temp[np.isfinite(temp)].values
    if temp.size < 2*MIN_PHASE_N:
        print(f"[skip] {f}: too few rows ({temp.size})")
        continue

    k = best_change_point(temp, guard=GUARD_FRAC)
    if k is None:
        print(f"[skip] {f}: could not find a robust change point")
        continue

    base_raw, step_raw = temp[:k], temp[k:]

    # orient so "baseline" is hotter (expect cooling: step <= base)
    if np.mean(base_raw) < np.mean(step_raw):
        base, step = step_raw, base_raw
        flipped = True
        mask = np.r_[np.zeros_like(step_raw, dtype=bool), np.ones_like(base_raw, dtype=bool)]
    else:
        base, step = base_raw, step_raw
        flipped = False
        mask = np.r_[np.zeros(k, dtype=bool), np.ones(len(temp)-k, dtype=bool)]

    # enforce minimum phase sizes
    if len(base) < MIN_PHASE_N or len(step) < MIN_PHASE_N:
        print(f"[skip] {f}: phase too short (base={len(base)}, step={len(step)})")
        continue

    # Write labeled CSV (non-destructive)
    out_csv = f.replace(".csv", "_labeled.csv")
    df2 = df.copy()
    n = len(temp)
    phase = np.where(mask[:n], "step", "baseline")
    if len(df2) > n:
        phase = np.r_[phase, np.repeat(phase[-1], len(df2)-n)]
    df2["phase"] = phase[:len(df2)]
    df2.to_csv(out_csv, index=False)

    # Effect size
    dT = float(np.mean(step) - np.mean(base))

    # SE via moving-block bootstrap; fallback to pooled SE
    se = moving_block_bootstrap_se(base, step, B=MB_BOOT_B, rng=rng)
    if not (np.isfinite(se) and se > 0):
        se = pooled_se(base, step)

    if not (np.isfinite(se) and se > 0):
        print(f"[skip] {f}: could not estimate SE (flat segments or too little variance).")
        continue

    z = dT / se
    zcrit = z_threshold(ALPHA_Z)
    positive = int((dT <= EFFECT_C) and (z <= -zcrit))

    # Label-shuffle null for FPR
    allv = np.concatenate([base, step])
    n_all, k_step = len(allv), len(step)
    fp = 0
    for _ in range(SHUFFLES):
        idx = rng.permutation(n_all)
        sN = allv[idx[:k_step]]; bN = allv[idx[k_step:]]
        dTN = float(np.mean(sN) - np.mean(bN))
        zN = dTN / se
        if (dTN <= EFFECT_C) and (zN <= -zcrit):
            fp += 1
    fpr = fp / SHUFFLES
    fpr_up95 = clopper_pearson_upper(fp, SHUFFLES, alpha=0.05)

    rows.append(dict(
        file=f,
        labeled_csv=out_csv,
        n_base=len(base), n_step=len(step),
        change_point=int(k), flipped=bool(flipped),
        dT=dT, z=z, zcrit=zcrit,
        positive=int(positive),
        FPR=fpr, FPR_upper95=fpr_up95
    ))
    print(f"[labeled] {Path(f).name} -> {Path(out_csv).name} | ΔT={dT:.3f}, z={z:.2f}, pos={positive}, FPR={fpr:.4f}, FPR↑95={fpr_up95:.4f}")

# ---------- Save report ----------
report = pd.DataFrame(rows)
out_dir = Path(ROOTS[0]) / "artifacts" / "metrics"
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / f"cnt_cooling_fpr_report_{datetime.now():%Y%m%d-%H%M%S}.csv"
report.to_csv(out_path, index=False)

print("\n== Cooling FPR Report ==")
if not report.empty:
    print(report.to_string(index=False))
    print("\nSaved:", out_path)
else:
    print("No labeled files produced (see skips above).")


Found 16 cooling file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121543.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121543_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-122141.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-122141_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-123830.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-123830_labeled.csv
   ... +8 more
[skip] C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv: too few rows (0)
[skip] C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv: too few rows (0)
[skip] C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-12

In [7]:
# == CNT Unified False-Positive Audit — Single Cell (Cooling + EEG + GRA + Forecast) ==
# Telos edition: robust, reproducible, dashboard-ready.
# - Cooling: auto-label (change-point), skip already-labeled, MBB SE + pooled fallback,
#            decision rule (ΔT <= -0.5 °C & z <= -2.58), 5,000 label-shuffles for FPR,
#            95% Clopper–Pearson upper bound, writes *_labeled.csv once.
# - EEG: discovers p-like column; reports BH discoveries, Storey π0, FDR@0.01;
#        if columns p_null_* exist (null-label runs), computes EEG FPR.
# - GRA: restored vs truth_pass confusion + FPR when TN+FP>0.
# - Forecast: FPR from [alert, verified].
# - Outputs one CSV: CNT_Lab\artifacts\metrics\cnt_unified_fpr_report_YYYYMMDD-HHMMSS.csv

import os, re, glob, math
from pathlib import Path
from datetime import datetime
import numpy as np, pandas as pd

# ---------------- Config ----------------
ROOTS = [
    r"C:\Users\caleb\CNT_Lab",
    r"C:\Users\caleb\gra_runs",
]
OUT_DIR = Path(ROOTS[0]) / "artifacts" / "metrics"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Cooling params
MIN_PHASE_N   = 60           # min samples per phase
GUARD_FRAC    = 0.15         # avoid ends when choosing change point
MB_BOOT_B     = 1200         # moving-block bootstrap draws for SE
MB_BLOCK_FRAC = 1/15         # block length ≈ phase_len * this
SHUFFLES      = 5000         # label-shuffle draws for FPR
Z_CRIT        = 2.58         # one-sided ~0.005; conservative vs 0.01 (2.326)
DELTA_REQ     = -0.5         # °C threshold for "cooling achieved"

rng = np.random.default_rng(42)

# ---------------- Utilities ----------------
def ls(pattern):
    return [f for f in glob.glob(pattern, recursive=True) if os.path.isfile(f)]

def show(label, files, limit=6):
    print(f"\n[{label}] found {len(files)} file(s).")
    for f in files[:limit]: print(" •", f)
    if len(files) > limit: print(f"   ... +{len(files)-limit} more")

def benjamini_hochberg(pvals, alpha=0.05):
    p = np.asarray(pd.Series(pvals).astype(float))
    n = p.size
    order = np.argsort(p)
    ranks = np.empty_like(order); ranks[order] = np.arange(1, n+1)
    q = p * n / np.maximum(ranks, 1)
    q_sorted = np.minimum.accumulate(q[order][::-1])[::-1]
    qvalues = np.empty_like(q_sorted); qvalues[order] = q_sorted
    rej = qvalues <= alpha
    return rej, qvalues

def storey_pi0(p, lam=0.5):
    p = np.asarray(pd.Series(p).astype(float))
    p = p[np.isfinite(p)]
    if p.size == 0: return np.nan
    return float(min(1.0, max(0.0, np.mean(p >= lam)/(1-lam))))

def clopper_pearson_upper(k, n, alpha=0.05):
    if n == 0: return np.nan
    if k == 0: return 1 - (alpha)**(1/n)
    lo, hi = 0.0, 1.0
    for _ in range(40):
        mid = (lo+hi)/2
        exp = n*mid
        if exp >= k: hi = mid
        else:        lo = mid
    return hi

# ---------------- Cooling helpers ----------------
def pick_temp_col(df):
    lo = {c.lower(): c for c in df.columns}
    for key in ("temp","temperature","gpu_temp","cpu_temp","t","temp_c"):
        if key in lo: return lo[key]
    num = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    if not num: return None
    v = pd.Series({c: pd.Series(df[c]).var(skipna=True) for c in num}).sort_values(ascending=False)
    return v.index[0] if not v.empty else None

def best_change_point(x, guard=0.15):
    n = len(x)
    if n < 2*MIN_PHASE_N: return None
    lo = int(n*guard); hi = int(n*(1-guard))
    if hi - lo < MIN_PHASE_N: return None
    csum = np.cumsum(x); best_k, best_score = None, -np.inf
    for k in range(lo, hi):
        m1 = csum[k-1] / k
        m2 = (csum[-1] - csum[k-1]) / (n - k)
        s = abs(m2 - m1)
        if s > best_score: best_score, best_k = s, k
    return best_k

def moving_block_bootstrap_se(a, b, B=1200):
    na, nb = len(a), len(b)
    if na < MIN_PHASE_N or nb < MIN_PHASE_N: return np.nan
    ba = max(5, int(round(na * MB_BLOCK_FRAC)))
    bb = max(5, int(round(nb * MB_BLOCK_FRAC)))
    diffs = []
    for _ in range(B):
        ia = rng.integers(0, na, size=max(2, na // ba))
        ib = rng.integers(0, nb, size=max(2, nb // bb))
        ra = np.concatenate([a[i:i+ba] if i+ba<=na else np.r_[a[i:], a[:(i+ba-na)]] for i in ia])[:na]
        rb = np.concatenate([b[i:i+bb] if i+bb<=nb else np.r_[b[i:], b[:(i+bb-nb)]] for i in ib])[:nb]
        diffs.append(float(np.mean(rb) - np.mean(ra)))
    return float(np.std(diffs, ddof=1)) if diffs else np.nan

def pooled_se(a, b):
    na, nb = len(a), len(b)
    if na < 2 or nb < 2: return np.nan
    va, vb = float(np.var(a, ddof=1)), float(np.var(b, ddof=1))
    return float(np.sqrt(va/na + vb/nb))

# ---------------- Discover files ----------------
EEG_FILES, COOL_FILES, FORECAST_FILES, GRA_FILES = [], [], [], []
for root in ROOTS:
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\lap_erd_subject*.csv")
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\*laterality*.csv")
    COOL_FILES += ls(fr"{root}\**\archive\*cool*.csv")
    COOL_FILES += ls(fr"{root}\**\artifacts\**\*cool*.csv")
    FORECAST_FILES += ls(fr"{root}\**\artifacts\metrics\forecast_alerts_*.csv")
    GRA_FILES += ls(fr"{root}\**\policy_fix_stem_results.csv")
    GRA_FILES += ls(fr"{root}\**\*_policy_*_results.csv")

show("EEG", EEG_FILES)
show("Cooling", COOL_FILES)
show("Forecast", FORECAST_FILES)
show("GRA", GRA_FILES)

audits = []

# ---------------- EEG ----------------
EEG_NAME_HINTS = (
    r"^p(_?val(ue)?)?$",         # p, pval, p_value
    r"^p_[a-z]+$",               # p_alpha, p_beta, ...
    r"^[a-z]*_?p(_?val(ue)?)?$", # col_p, pval_col, ...
)
def find_pcol(df):
    cols = list(df.columns)
    for pat in EEG_NAME_HINTS:
        for c in cols:
            if re.fullmatch(pat, str(c), flags=re.I): return c
    candidates = []
    for c in cols:
        s = pd.to_numeric(df[c], errors="coerce")
        s = s[np.isfinite(s)]
        if s.size and (pd.Series(s).between(0,1).mean() > 0.95) and (pd.Series(s).nunique() > 10):
            candidates.append((c, float(pd.Series(s).mean())))
    if candidates:
        candidates.sort(key=lambda x: x[1])
        return candidates[0][0]
    return None

def eeg_null_cols(df):
    # any columns like p_null_001, p_null_..., or ending with '_null'
    pats = (r"^p_null_.*$", r".*_null$")
    cols = []
    for c in df.columns:
        for p in pats:
            if re.fullmatch(p, str(c), flags=re.I):
                cols.append(c); break
    return cols

for f in EEG_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[EEG:skip] {f} — read error: {e}"); continue
    pcol = find_pcol(df)
    if pcol is None:
        print(f"[EEG:hint] {f} — add a 'p' column or p_null_* columns to enable FPR.")
        continue
    p = pd.to_numeric(df[pcol], errors="coerce").fillna(1.0).values
    rej, q = benjamini_hochberg(p, alpha=0.01)
    pi0 = storey_pi0(p, lam=0.5)
    # EEG FPR if null runs provided
    nulls = eeg_null_cols(df)
    eeg_fpr = np.nan
    if nulls:
        # For each null column, ask: would BH@0.01 yield any discovery?
        fp = 0
        for nc in nulls:
            pn = pd.to_numeric(df[nc], errors="coerce").fillna(1.0).values
            rnull, _ = benjamini_hochberg(pn, alpha=0.01)
            if rnull.any(): fp += 1
        eeg_fpr = fp / len(nulls)
    audits.append(dict(
        module="EEG", file=f,
        positives=int(np.sum(rej)), tests=int(p.size),
        FPR=(None if np.isnan(eeg_fpr) else float(eeg_fpr)),
        pi0=float(pi0), FDR_alpha_0p01=float(np.mean(q < 0.01))
    ))

# ---------------- Cooling ----------------
cool_rows = []
for f in COOL_FILES:
    # Skip outputs or already-labeled re-runs
    base_name = Path(f).name.lower()
    if base_name.endswith("_labeled.csv") or base_name.endswith("_labeled_labeled.csv"):
        continue
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Cooling:skip] {f} — read error: {e}"); continue

    # If already has a phase column, don't rewrite; just compute stats
    has_phase = any(c.lower()=="phase" for c in df.columns)

    tcol = pick_temp_col(df)
    if tcol is None:
        print(f"[Cooling:skip] {f} — no temperature-like column"); continue

    temp = pd.to_numeric(df[tcol], errors="coerce")
    temp = temp[np.isfinite(temp)].values
    if temp.size < 2*MIN_PHASE_N:
        print(f"[Cooling:skip] {f} — too few rows ({temp.size})"); continue

    if has_phase:
        lab = df[[c for c in df.columns if c.lower()=="phase"][0]].astype(str).str.lower().values[:len(temp)]
        step_idx = (lab=="step") | (lab=="cool") | (lab=="1") | (lab=="true") | (lab=="post")
        base = temp[~step_idx]; step = temp[step_idx]
        flipped = (np.mean(base) < np.mean(step))  # just for record
        k = np.nan
        out_csv = f  # no rewrite
    else:
        k = best_change_point(temp, guard=GUARD_FRAC)
        if k is None:
            print(f"[Cooling:skip] {f} — no robust change point"); continue
        base_raw, step_raw = temp[:k], temp[k:]
        if np.mean(base_raw) < np.mean(step_raw):
            base, step = step_raw, base_raw; flipped=True
            mask = np.r_[np.zeros_like(step_raw, bool), np.ones_like(base_raw, bool)]
        else:
            base, step = base_raw, step_raw; flipped=False
            mask = np.r_[np.zeros(k, bool), np.ones(len(temp)-k, bool)]
        if len(base) < MIN_PHASE_N or len(step) < MIN_PHASE_N:
            print(f"[Cooling:skip] {f} — phase too short (base={len(base)}, step={len(step)})"); continue
        out_csv = f.replace(".csv", "_labeled.csv")
        df2 = df.copy()
        n = len(temp)
        phase = np.where(mask[:n], "step", "baseline")
        if len(df2) > n: phase = np.r_[phase, np.repeat(phase[-1], len(df2)-n)]
        df2["phase"] = phase[:len(df2)]
        df2.to_csv(out_csv, index=False)

    # Stats
    if len(base) < MIN_PHASE_N or len(step) < MIN_PHASE_N:
        print(f"[Cooling:skip] {f} — phase too short (base={len(base)}, step={len(step)})"); continue

    dT = float(np.mean(step) - np.mean(base))
    se = moving_block_bootstrap_se(base, step, B=MB_BOOT_B)
    if not (np.isfinite(se) and se > 0): se = pooled_se(base, step)
    if not (np.isfinite(se) and se > 0):
        print(f"[Cooling:skip] {f} — cannot estimate SE"); continue

    z = dT / se
    positive = int((dT <= DELTA_REQ) and (z <= -Z_CRIT))

    # Label-shuffle null
    allv = np.concatenate([base, step]); k_step = len(step); n_all = len(allv)
    fp = 0
    for _ in range(SHUFFLES):
        idx = rng.permutation(n_all)
        sN = allv[idx[:k_step]]; bN = allv[idx[k_step:]]
        dTN = float(np.mean(sN) - np.mean(bN))
        zN = dTN / se
        if (dTN <= DELTA_REQ) and (zN <= -Z_CRIT):
            fp += 1
    fpr = fp / SHUFFLES
    fpr_up95 = clopper_pearson_upper(fp, SHUFFLES, alpha=0.05)

    cool_rows.append(dict(
        module="Cooling", file=f, labeled_csv=out_csv,
        n_base=len(base), n_step=len(step),
        change_point=(None if np.isnan(k) else (None if k is None else int(k))),
        flipped=bool(flipped),
        dT=dT, z=z, zcrit=Z_CRIT,
        positive=int(positive),
        FPR=float(fpr), FPR_upper95=float(fpr_up95)
    ))

# Add Cooling to audits table (keep key cols compact)
for r in cool_rows:
    audits.append(dict(
        module="Cooling", file=r["file"],
        positives=r["positive"], tests=1,
        FPR=r["FPR"], FPR_upper95=r["FPR_upper95"],
        dT=r["dT"], z=r["z"]
    ))

# ---------------- Forecast ----------------
for f in FORECAST_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Forecast:skip] {f} — read error: {e}"); continue
    if not {"alert","verified"}.issubset(df.columns):
        print(f"[Forecast:skip] {f} — needs ['alert','verified']"); continue
    tp = int(((df["alert"]==1) & (df["verified"]==1)).sum())
    fp = int(((df["alert"]==1) & (df["verified"]==0)).sum())
    tn = int(((df["alert"]==0) & (df["verified"]==0)).sum())
    fn = int(((df["alert"]==0) & (df["verified"]==1)).sum())
    fpr = (fp / (fp+tn)) if (fp+tn)>0 else np.nan
    audits.append(dict(module="Forecast", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=(None if np.isnan(fpr) else float(fpr))))

# ---------------- GRA ----------------
for f in GRA_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[GRA:skip] {f} — read error: {e}"); continue
    cols = {c.lower(): c for c in df.columns}
    if not (("restored" in cols) and ("truth_pass" in cols)):
        print(f"[GRA:skip] {f} — needs 'restored' & 'truth_pass'"); continue
    restored = df[cols["restored"]].astype(bool)
    truth    = df[cols["truth_pass"]].astype(bool)
    tp = int((restored & truth).sum())
    fp = int((restored & ~truth).sum())
    tn = int((~restored & ~truth).sum())
    fn = int((~restored & truth).sum())
    fpr = (fp / (fp+tn)) if (fp+tn)>0 else np.nan
    audits.append(dict(module="GRA", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=(None if np.isnan(fpr) else float(fpr))))

# ---------------- Save & print ----------------
audit_df = pd.DataFrame(audits)
stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
out_path = OUT_DIR / f"cnt_unified_fpr_report_{stamp}.csv"
if not audit_df.empty:
    audit_df.to_csv(out_path, index=False)
    print("\n== CNT Unified False-Positive Audit ==")
    print(audit_df.to_string(index=False))
    print("\nSaved:", out_path)
else:
    print("\n== No eligible items found to audit ==")

# Optional: print Cooling detail table (with bounds) for quick quoting
if cool_rows:
    cool_df = pd.DataFrame(cool_rows)
    cool_out = OUT_DIR / f"cnt_cooling_fpr_detail_{stamp}.csv"
    cool_df.to_csv(cool_out, index=False)
    print("\n== Cooling Detail (with 95% bounds) ==")
    print(cool_df[["file","dT","z","positive","FPR","FPR_upper95"]].to_string(index=False))
    print("\nSaved:", cool_out)



[EEG] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\artifacts\pli_humans_100plus\tests_functional\motor_cuelocked\lap_erd\tables\lap_erd_subject50.csv

[Cooling] found 19 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121543.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121543_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-122141.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-122141_labeled.csv
   ... +13 more

[Forecast] found 0 file(s).

[GRA] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\gra_runs\gra_v0_3_policy_fix_stem_20251016-095737\policy_fix_stem_results.csv
[Cooling:skip] C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv — too few rows (0)
[Cooling:skip] C

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [8]:
# == CNT Unified False-Positive Audit — Rolled & Hardened ==
# Cooling: prefer *_labeled.csv if present; robust change-point; MBB SE + pooled fallback;
#          decision rule (ΔT <= -0.5 °C & z <= -2.58); 5,000 label-shuffles; 95% CP upper bound
# EEG:     BH discoveries + Storey π0; optional FPR if p_null_* columns exist
# GRA:     restored vs truth_pass confusion + FPR
# Forecast:FPR from [alert, verified]
# Output:  CNT_Lab\artifacts\metrics\cnt_unified_fpr_report_YYYYMMDD-HHMMSS.csv
#          + Cooling detail table (with 95% bounds)

import os, re, glob, math
from pathlib import Path
from datetime import datetime
import numpy as np, pandas as pd

# ---------------- Config ----------------
ROOTS = [
    r"C:\Users\caleb\CNT_Lab",
    r"C:\Users\caleb\gra_runs",
]
OUT_DIR = Path(ROOTS[0]) / "artifacts" / "metrics"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Cooling params
MIN_PHASE_N   = 60           # minimum samples per phase
GUARD_FRAC    = 0.15         # avoid edges when choosing change point
MB_BOOT_B     = 1200         # moving-block bootstrap draws
MB_BLOCK_FRAC = 1/15         # block length ≈ phase_len * this
SHUFFLES      = 5000         # label-shuffle draws for FPR
Z_CRIT        = 2.58         # one-sided ~0.005; conservative vs 0.01 (2.326)
DELTA_REQ     = -0.5         # °C threshold for "cooling achieved"

rng = np.random.default_rng(42)

# ---------------- Utilities ----------------
def ls(pattern):
    return [f for f in glob.glob(pattern, recursive=True) if os.path.isfile(f)]

def show(label, files, limit=6):
    print(f"\n[{label}] found {len(files)} file(s).")
    for f in files[:limit]: print(" •", f)
    if len(files) > limit: print(f"   ... +{len(files)-limit} more")

def benjamini_hochberg(pvals, alpha=0.05):
    p = np.asarray(pd.Series(pvals).astype(float))
    n = p.size
    order = np.argsort(p)
    ranks = np.empty_like(order); ranks[order] = np.arange(1, n+1)
    q = p * n / np.maximum(ranks, 1)
    q_sorted = np.minimum.accumulate(q[order][::-1])[::-1]
    qvalues = np.empty_like(q_sorted); qvalues[order] = q_sorted
    rej = qvalues <= alpha
    return rej, qvalues

def storey_pi0(p, lam=0.5):
    p = np.asarray(pd.Series(p).astype(float))
    p = p[np.isfinite(p)]
    if p.size == 0: return np.nan
    return float(min(1.0, max(0.0, np.mean(p >= lam)/(1-lam))))

def clopper_pearson_upper(k, n, alpha=0.05):
    if n == 0: return np.nan
    if k == 0: return 1 - (alpha)**(1/n)
    lo, hi = 0.0, 1.0
    for _ in range(40):
        mid = (lo+hi)/2
        if n*mid >= k: hi = mid
        else:          lo = mid
    return hi

# ---------------- Cooling helpers ----------------
def pick_temp_col(df):
    lo = {c.lower(): c for c in df.columns}
    for key in ("temp","temperature","gpu_temp","cpu_temp","t","temp_c"):
        if key in lo: return lo[key]
    num = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    if not num: return None
    v = pd.Series({c: pd.Series(df[c]).var(skipna=True) for c in num}).sort_values(ascending=False)
    return v.index[0] if not v.empty else None

def best_change_point(x, guard=0.15):
    n = len(x)
    lo = max(MIN_PHASE_N, int(n*guard))
    hi = min(n - MIN_PHASE_N, int(n*(1-guard)))
    if hi - lo < 1: return None
    csum = np.cumsum(x, dtype=float)
    best_k, best_score = None, -np.inf
    for k in range(lo, hi+1):
        m1 = csum[k-1] / k
        m2 = (csum[-1] - csum[k-1]) / (n - k)
        s = abs(m2 - m1)
        if s > best_score: best_score, best_k = s, k
    return best_k

def moving_block_bootstrap_se(a, b, B=1200):
    na, nb = len(a), len(b)
    if na < MIN_PHASE_N or nb < MIN_PHASE_N: return np.nan
    ba = max(5, int(round(na * MB_BLOCK_FRAC)))
    bb = max(5, int(round(nb * MB_BLOCK_FRAC)))
    diffs = []
    for _ in range(B):
        ia = rng.integers(0, na, size=max(2, na // ba))
        ib = rng.integers(0, nb, size=max(2, nb // bb))
        ra = np.concatenate([a[i:i+ba] if i+ba<=na else np.r_[a[i:], a[:(i+ba-na)]] for i in ia])[:na]
        rb = np.concatenate([b[i:i+bb] if i+bb<=nb else np.r_[b[i:], b[:(i+bb-nb)]] for i in ib])[:nb]
        diffs.append(float(np.mean(rb) - np.mean(ra)))
    return float(np.std(diffs, ddof=1)) if diffs else np.nan

def pooled_se(a, b):
    na, nb = len(a), len(b)
    if na < 2 or nb < 2: return np.nan
    va, vb = float(np.var(a, ddof=1)), float(np.var(b, ddof=1))
    return float(np.sqrt(va/na + vb/nb))

# ---------------- Discover EEG/GRA/Forecast ----------------
EEG_FILES, FORECAST_FILES, GRA_FILES = [], [], []
for root in ROOTS:
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\lap_erd_subject*.csv")
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\*laterality*.csv")
    FORECAST_FILES += ls(fr"{root}\**\artifacts\metrics\forecast_alerts_*.csv")
    GRA_FILES += ls(fr"{root}\**\policy_fix_stem_results.csv")
    GRA_FILES += ls(fr"{root}\**\*_policy_*_results.csv")

# ---------------- Discover Cooling (prefer labeled twins) ----------------
ALL_COOL = []
for root in ROOTS:
    ALL_COOL += ls(fr"{root}\**\archive\*cool*.csv")
    ALL_COOL += ls(fr"{root}\**\artifacts\**\*cool*.csv")

by_stem = {}
for f in ALL_COOL:
    p = Path(f)
    # normalize twin key: strip any trailing "_labeled" clusters
    key = re.sub(r"_labeled(?:_labeled)*\.csv$", ".csv", p.name, flags=re.I)
    by_stem.setdefault((p.parent, key), []).append(p)

COOL_FILES = []
for (parent, key), files in by_stem.items():
    labeled = [x for x in files if re.search(r"_labeled\.csv$", x.name, flags=re.I)]
    if labeled:
        COOL_FILES.append(str(sorted(labeled)[-1]))     # newest labeled
    else:
        raw = [x for x in files if not re.search(r"_labeled\.csv$", x.name, flags=re.I)]
        if raw: COOL_FILES.append(str(sorted(raw)[-1]))

show("EEG", EEG_FILES)
show("Cooling", COOL_FILES)
show("Forecast", FORECAST_FILES)
show("GRA", GRA_FILES)

audits = []
cool_rows = []

# ---------------- EEG ----------------
EEG_NAME_HINTS = (
    r"^p(_?val(ue)?)?$",         # p, pval, p_value
    r"^p_[a-z]+$",               # p_alpha, p_beta, ...
    r"^[a-z]*_?p(_?val(ue)?)?$", # col_p, pval_col, ...
)
def find_pcol(df):
    cols = list(df.columns)
    for pat in EEG_NAME_HINTS:
        for c in cols:
            if re.fullmatch(pat, str(c), flags=re.I): return c
    candidates = []
    for c in cols:
        s = pd.to_numeric(df[c], errors="coerce")
        s = s[np.isfinite(s)]
        if s.size and (pd.Series(s).between(0,1).mean() > 0.95) and (pd.Series(s).nunique() > 10):
            candidates.append((c, float(pd.Series(s).mean())))
    if candidates:
        candidates.sort(key=lambda x: x[1])
        return candidates[0][0]
    return None

def eeg_null_cols(df):
    pats = (r"^p_null_.*$", r".*_null$")
    cols = []
    for c in df.columns:
        for p in pats:
            if re.fullmatch(p, str(c), flags=re.I):
                cols.append(c); break
    return cols

for f in EEG_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[EEG:skip] {f} — read error: {e}"); continue
    pcol = find_pcol(df)
    if pcol is None:
        print(f"[EEG:hint] {f} — add a 'p' column or p_null_* columns to enable FPR.")
        continue
    p = pd.to_numeric(df[pcol], errors="coerce").fillna(1.0).values
    rej, q = benjamini_hochberg(p, alpha=0.01)
    pi0 = storey_pi0(p, lam=0.5)
    # Optional EEG FPR if null p-columns provided
    nulls = eeg_null_cols(df)
    eeg_fpr = np.nan
    if nulls:
        fp = 0
        for nc in nulls:
            pn = pd.to_numeric(df[nc], errors="coerce").fillna(1.0).values
            rnull, _ = benjamini_hochberg(pn, alpha=0.01)
            if rnull.any(): fp += 1
        eeg_fpr = fp / len(nulls)
    audits.append(dict(
        module="EEG", file=f,
        positives=int(np.sum(rej)), tests=int(p.size),
        FPR=(None if np.isnan(eeg_fpr) else float(eeg_fpr)),
        pi0=float(pi0), FDR_alpha_0p01=float(np.mean(q < 0.01))
    ))

# ---------------- Cooling ----------------
for f in COOL_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Cooling:skip] {f} — read error: {e}"); continue

    # If already labeled, use given phases; else auto-label by change-point
    has_phase = any(c.lower()=="phase" for c in df.columns)

    tcol = pick_temp_col(df)
    if tcol is None:
        print(f"[Cooling:skip] {f} — no temperature-like column"); continue

    temp = pd.to_numeric(df[tcol], errors="coerce")
    temp = temp[np.isfinite(temp)].values
    if temp.size < 2*MIN_PHASE_N:
        print(f"[Cooling:skip] {f} — too few rows ({temp.size})"); continue

    if has_phase:
        lab = df[[c for c in df.columns if c.lower()=="phase"][0]].astype(str).str.lower().values[:len(temp)]
        step_idx = (lab=="step") | (lab=="cool") | (lab=="1") | (lab=="true") | (lab=="post")
        base, step = temp[~step_idx], temp[step_idx]
        flipped, k, out_csv = (np.mean(base) < np.mean(step)), np.nan, f
    else:
        k = best_change_point(temp, guard=GUARD_FRAC)
        if k is None:
            print(f"[Cooling:skip] {f} — no robust change point"); continue
        base_raw, step_raw = temp[:k], temp[k:]
        if np.mean(base_raw) < np.mean(step_raw):
            base, step = step_raw, base_raw; flipped=True
            mask = np.r_[np.zeros_like(step_raw, bool), np.ones_like(base_raw, bool)]
        else:
            base, step = base_raw, step_raw; flipped=False
            mask = np.r_[np.zeros(k, bool), np.ones(len(temp)-k, bool)]
        if len(base) < MIN_PHASE_N or len(step) < MIN_PHASE_N:
            print(f"[Cooling:skip] {f} — phase too short (base={len(base)}, step={len(step)})"); continue
        out_csv = f.replace(".csv", "_labeled.csv")
        df2 = df.copy()
        n = len(temp)
        phase = np.where(mask[:n], "step", "baseline")
        if len(df2) > n: phase = np.r_[phase, np.repeat(phase[-1], len(df2)-n)]
        df2["phase"] = phase[:len(df2)]
        df2.to_csv(out_csv, index=False)

    # Hard guards to avoid numpy warnings
    if len(base) < MIN_PHASE_N or len(step) < MIN_PHASE_N:
        print(f"[Cooling:skip] {f} — phase too short (base={len(base)}, step={len(step)})"); continue

    dT = float(np.mean(step) - np.mean(base))

    se = moving_block_bootstrap_se(base, step, B=MB_BOOT_B)
    if not (np.isfinite(se) and se > 0):
        se = pooled_se(base, step)
    if not (np.isfinite(se) and se > 0):
        se = 1e-9  # last-ditch jitter to avoid div-by-zero on perfectly flat logs

    z = dT / se
    positive = int((dT <= DELTA_REQ) and (z <= -Z_CRIT))

    # Label-shuffle null
    allv = np.concatenate([base, step]); k_step = len(step); n_all = len(allv)
    fp = 0
    for _ in range(SHUFFLES):
        idx = rng.permutation(n_all)
        sN = allv[idx[:k_step]]; bN = allv[idx[k_step:]]
        dTN = float(np.mean(sN) - np.mean(bN))
        zN = dTN / se
        if (dTN <= DELTA_REQ) and (zN <= -Z_CRIT):
            fp += 1
    fpr = fp / SHUFFLES
    fpr_up95 = clopper_pearson_upper(fp, SHUFFLES, alpha=0.05)

    cool_rows.append(dict(
        module="Cooling", file=f, labeled_csv=out_csv,
        n_base=len(base), n_step=len(step),
        change_point=(None if (k is None or np.isnan(k)) else int(k)),
        flipped=bool(flipped),
        dT=dT, z=z, zcrit=Z_CRIT,
        positive=int(positive),
        FPR=float(fpr), FPR_upper95=float(fpr_up95)
    ))

    # Compact entry for unified audit table
    audits.append(dict(
        module="Cooling", file=f,
        positives=int(positive), tests=1,
        FPR=float(fpr), FPR_upper95=float(fpr_up95),
        dT=dT, z=z
    ))

# ---------------- Forecast ----------------
for f in FORECAST_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Forecast:skip] {f} — read error: {e}"); continue
    if not {"alert","verified"}.issubset(df.columns):
        print(f"[Forecast:skip] {f} — needs ['alert','verified']"); continue
    tp = int(((df["alert"]==1) & (df["verified"]==1)).sum())
    fp = int(((df["alert"]==1) & (df["verified"]==0)).sum())
    tn = int(((df["alert"]==0) & (df["verified"]==0)).sum())
    fn = int(((df["alert"]==0) & (df["verified"]==1)).sum())
    fpr = (fp / (fp+tn)) if (fp+tn)>0 else np.nan
    audits.append(dict(module="Forecast", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=(None if np.isnan(fpr) else float(fpr))))

# ---------------- GRA ----------------
for f in GRA_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[GRA:skip] {f} — read error: {e}"); continue
    cols = {c.lower(): c for c in df.columns}
    if not (("restored" in cols) and ("truth_pass" in cols)):
        print(f"[GRA:skip] {f} — needs 'restored' & 'truth_pass'"); continue
    restored = df[cols["restored"]].astype(bool)
    truth    = df[cols["truth_pass"]].astype(bool)
    tp = int((restored & truth).sum())
    fp = int((restored & ~truth).sum())
    tn = int((~restored & ~truth).sum())
    fn = int((~restored & truth).sum())
    fpr = (fp / (fp+tn)) if (fp+tn)>0 else np.nan
    audits.append(dict(module="GRA", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=(None if np.isnan(fpr) else float(fpr))))

# ---------------- Save & print ----------------
audit_df = pd.DataFrame(audits)
stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
unified_path = OUT_DIR / f"cnt_unified_fpr_report_{stamp}.csv"
if not audit_df.empty:
    audit_df.to_csv(unified_path, index=False)
    print("\n== CNT Unified False-Positive Audit ==")
    print(audit_df.to_string(index=False))
    print("\nSaved:", unified_path)
else:
    print("\n== No eligible items found to audit ==")

if cool_rows:
    cool_df = pd.DataFrame(cool_rows)
    cool_out = OUT_DIR / f"cnt_cooling_fpr_detail_{stamp}.csv"
    cool_df.to_csv(cool_out, index=False)
    print("\n== Cooling Detail (with 95% bounds) ==")
    print(cool_df[["file","dT","z","positive","FPR","FPR_upper95"]].to_string(index=False))
    print("\nSaved:", cool_out)



[EEG] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\artifacts\pli_humans_100plus\tests_functional\motor_cuelocked\lap_erd\tables\lap_erd_subject50.csv

[Cooling] found 11 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121543_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-122141_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-123830_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_unified_cooling_20251015-132627_labeled.csv
   ... +5 more

[Forecast] found 0 file(s).

[GRA] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\gra_runs\gra_v0_3_policy_fix_stem_20251016-095737\policy_fix_stem_results.csv
[Cooling:skip] C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv — too few rows (

In [9]:
# == CNT Unified False-Positive Audit — Fused & Upgraded (Telos edition) ==
# Cooling:
#   • Prefer *_labeled.csv (ignore *_labeled_labeled.csv), or auto-label via robust change-point.
#   • Re-orient labeled runs so 'baseline' is hotter (cooling expectation).
#   • SE via moving-block bootstrap (autocorr aware), fallback to pooled SE, jitter last-ditch.
#   • Decision rule: ΔT <= -0.5 °C and z <= -2.58 (one-sided, conservative).
#   • 5,000 label-shuffles for FPR + 95% Clopper–Pearson upper bound.
# EEG:
#   • Reports BH (FDR@0.01) + Storey π0; computes FPR if p_null_* columns exist.
# GRA:
#   • Confusion + FPR from 'restored' vs 'truth_pass'.
# Forecast:
#   • FPR from [alert, verified].
# Outputs:
#   • CNT_Lab\artifacts\metrics\cnt_unified_fpr_report_YYYYMMDD-HHMMSS.csv
#   • CNT_Lab\artifacts\metrics\cnt_cooling_fpr_detail_YYYYMMDD-HHMMSS.csv

import os, re, glob, math
from pathlib import Path
from datetime import datetime
import numpy as np, pandas as pd

# ---------------- Config ----------------
ROOTS = [
    r"C:\Users\caleb\CNT_Lab",
    r"C:\Users\caleb\gra_runs",
]
OUT_DIR = Path(ROOTS[0]) / "artifacts" / "metrics"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Cooling params
MIN_PHASE_N   = 60           # minimum samples per phase
GUARD_FRAC    = 0.15         # avoid edges when choosing change point
MB_BOOT_B     = 1200         # moving-block bootstrap draws
MB_BLOCK_FRAC = 1/15         # block length ≈ phase_len * this
SHUFFLES      = 5000         # label-shuffle draws for FPR
Z_CRIT        = 2.58         # one-sided ~0.005; conservative vs 0.01 (2.326)
DELTA_REQ     = -0.5         # °C threshold for "cooling achieved"

rng = np.random.default_rng(42)

# ---------------- Utilities ----------------
def ls(pattern):
    return [f for f in glob.glob(pattern, recursive=True) if os.path.isfile(f)]

def show(label, files, limit=6):
    print(f"\n[{label}] found {len(files)} file(s).")
    for f in files[:limit]: print(" •", f)
    if len(files) > limit: print(f"   ... +{len(files)-limit} more")

def benjamini_hochberg(pvals, alpha=0.05):
    p = np.asarray(pd.Series(pvals).astype(float))
    n = p.size
    order = np.argsort(p)
    ranks = np.empty_like(order); ranks[order] = np.arange(1, n+1)
    q = p * n / np.maximum(ranks, 1)
    q_sorted = np.minimum.accumulate(q[order][::-1])[::-1]
    qvalues = np.empty_like(q_sorted); qvalues[order] = q_sorted
    rej = qvalues <= alpha
    return rej, qvalues

def storey_pi0(p, lam=0.5):
    p = np.asarray(pd.Series(p).astype(float))
    p = p[np.isfinite(p)]
    if p.size == 0: return np.nan
    return float(min(1.0, max(0.0), np.mean(p >= lam)/(1-lam)))

def clopper_pearson_upper(k, n, alpha=0.05):
    if n == 0: return np.nan
    if k == 0: return 1 - (alpha)**(1/n)
    lo, hi = 0.0, 1.0
    for _ in range(40):
        mid = (lo+hi)/2
        if n*mid >= k: hi = mid
        else:          lo = mid
    return hi

# ---------------- Cooling helpers ----------------
def pick_temp_col(df):
    lo = {c.lower(): c for c in df.columns}
    for key in ("temp","temperature","gpu_temp","cpu_temp","t","temp_c"):
        if key in lo: return lo[key]
    num = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    if not num: return None
    v = pd.Series({c: pd.Series(df[c]).var(skipna=True) for c in num}).sort_values(ascending=False)
    return v.index[0] if not v.empty else None

def best_change_point(x, guard=0.15):
    n = len(x)
    lo = max(MIN_PHASE_N, int(n*guard))
    hi = min(n - MIN_PHASE_N, int(n*(1-guard)))
    if hi - lo < 1: return None
    csum = np.cumsum(x, dtype=float)
    best_k, best_score = None, -np.inf
    for k in range(lo, hi+1):
        m1 = csum[k-1] / k
        m2 = (csum[-1] - csum[k-1]) / (n - k)
        s = abs(m2 - m1)
        if s > best_score: best_score, best_k = s, k
    return best_k

def moving_block_bootstrap_se(a, b, B=1200):
    na, nb = len(a), len(b)
    if na < MIN_PHASE_N or nb < MIN_PHASE_N: return np.nan
    ba = max(5, int(round(na * MB_BLOCK_FRAC)))
    bb = max(5, int(round(nb * MB_BLOCK_FRAC)))
    diffs = []
    for _ in range(B):
        ia = rng.integers(0, na, size=max(2, na // ba))
        ib = rng.integers(0, nb, size=max(2, nb // bb))
        ra = np.concatenate([a[i:i+ba] if i+ba<=na else np.r_[a[i:], a[:(i+ba-na)]] for i in ia])[:na]
        rb = np.concatenate([b[i:i+bb] if i+bb<=nb else np.r_[b[i:], b[:(i+bb-nb)]] for i in ib])[:nb]
        diffs.append(float(np.mean(rb) - np.mean(ra)))
    return float(np.std(diffs, ddof=1)) if diffs else np.nan

def pooled_se(a, b):
    na, nb = len(a), len(b)
    if na < 2 or nb < 2: return np.nan
    va, vb = float(np.var(a, ddof=1)), float(np.var(b, ddof=1))
    return float(np.sqrt(va/na + vb/nb))

# ---------------- Discover EEG/GRA/Forecast ----------------
EEG_FILES, FORECAST_FILES, GRA_FILES = [], [], []
for root in ROOTS:
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\lap_erd_subject*.csv")
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\*laterality*.csv")
    FORECAST_FILES += ls(fr"{root}\**\artifacts\metrics\forecast_alerts_*.csv")
    GRA_FILES += ls(fr"{root}\**\policy_fix_stem_results.csv")
    GRA_FILES += ls(fr"{root}\**\*_policy_*_results.csv")

# ---------------- Discover Cooling (prefer single-labeled; ignore double-labeled) ----------------
ALL_COOL = []
for root in ROOTS:
    ALL_COOL += ls(fr"{root}\**\archive\*cool*.csv")
    ALL_COOL += ls(fr"{root}\**\artifacts\**\*cool*.csv")

by_stem = {}
for f in ALL_COOL:
    p = Path(f)
    key = re.sub(r"(?:_labeled)+\.csv$", ".csv", p.name, flags=re.I)  # normalize any labeled clusters
    by_stem.setdefault((p.parent, key), []).append(p)

COOL_FILES = []
for (parent, key), files in by_stem.items():
    single_labeled = [x for x in files if re.search(r"(?<!_labeled)_labeled\.csv$", x.name, flags=re.I)]
    multi_labeled  = [x for x in files if re.search(r"_labeled_labeled\.csv$", x.name, flags=re.I)]
    raw            = [x for x in files if not re.search(r"_labeled\.csv$", x.name, flags=re.I)]
    if single_labeled:
        COOL_FILES.append(str(sorted(single_labeled)[-1]))
    elif raw:
        COOL_FILES.append(str(sorted(raw)[-1]))
    elif multi_labeled:
        COOL_FILES.append(str(sorted(multi_labeled)[-1]))

show("EEG", EEG_FILES)
show("Cooling", COOL_FILES)
show("Forecast", FORECAST_FILES)
show("GRA", GRA_FILES)

audits = []
cool_rows = []

# ---------------- EEG ----------------
EEG_NAME_HINTS = (
    r"^p(_?val(ue)?)?$",         # p, pval, p_value
    r"^p_[a-z]+$",               # p_alpha, p_beta, ...
    r"^[a-z]*_?p(_?val(ue)?)?$", # col_p, pval_col, ...
)
def find_pcol(df):
    cols = list(df.columns)
    for pat in EEG_NAME_HINTS:
        for c in cols:
            if re.fullmatch(pat, str(c), flags=re.I): return c
    candidates = []
    for c in cols:
        s = pd.to_numeric(df[c], errors="coerce")
        s = s[np.isfinite(s)]
        if s.size and (pd.Series(s).between(0,1).mean() > 0.95) and (pd.Series(s).nunique() > 10):
            candidates.append((c, float(pd.Series(s).mean())))
    if candidates:
        candidates.sort(key=lambda x: x[1])
        return candidates[0][0]
    return None

def eeg_null_cols(df):
    pats = (r"^p_null_.*$", r".*_null$")
    cols = []
    for c in df.columns:
        for p in pats:
            if re.fullmatch(p, str(c), flags=re.I):
                cols.append(c); break
    return cols

for f in EEG_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[EEG:skip] {f} — read error: {e}"); continue
    pcol = find_pcol(df)
    if pcol is None:
        print(f"[EEG:hint] {f} — add a 'p' column or p_null_* columns to enable FPR.")
        continue
    p = pd.to_numeric(df[pcol], errors="coerce").fillna(1.0).values
    rej, q = benjamini_hochberg(p, alpha=0.01)
    pi0 = storey_pi0(p, lam=0.5)
    # Optional EEG FPR if null p-columns provided
    nulls = eeg_null_cols(df)
    eeg_fpr = np.nan
    if nulls:
        fp = 0
        for nc in nulls:
            pn = pd.to_numeric(df[nc], errors="coerce").fillna(1.0).values
            rnull, _ = benjamini_hochberg(pn, alpha=0.01)
            if rnull.any(): fp += 1
        eeg_fpr = fp / len(nulls)
    audits.append(dict(
        module="EEG", file=f,
        positives=int(np.sum(rej)), tests=int(p.size),
        FPR=(None if np.isnan(eeg_fpr) else float(eeg_fpr)),
        pi0=float(pi0), FDR_alpha_0p01=float(np.mean(q < 0.01))
    ))

# ---------------- Cooling ----------------
for f in COOL_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Cooling:skip] {f} — read error: {e}"); continue

    has_phase = any(c.lower()=="phase" for c in df.columns)

    tcol = pick_temp_col(df)
    if tcol is None:
        print(f"[Cooling:skip] {f} — no temperature-like column"); continue

    temp = pd.to_numeric(df[tcol], errors="coerce")
    temp = temp[np.isfinite(temp)].values
    if temp.size < 2*MIN_PHASE_N:
        print(f"[Cooling:skip] {f} — too few rows ({temp.size})"); continue

    if has_phase:
        lab = df[[c for c in df.columns if c.lower()=="phase"][0]].astype(str).str.lower().values[:len(temp)]
        step_idx = (lab=="step") | (lab=="cool") | (lab=="1") | (lab=="true") | (lab=="post")
        base, step = temp[~step_idx], temp[step_idx]
        # Re-orient so baseline is hotter (expect cooling: step <= base)
        if np.mean(base) < np.mean(step):
            base, step = step, base
            flipped = True
        else:
            flipped = False
        k, out_csv = np.nan, f
    else:
        k = best_change_point(temp, guard=GUARD_FRAC)
        if k is None:
            print(f"[Cooling:skip] {f} — no robust change point"); continue
        base_raw, step_raw = temp[:k], temp[k:]
        if np.mean(base_raw) < np.mean(step_raw):
            base, step = step_raw, base_raw; flipped=True
            mask = np.r_[np.zeros_like(step_raw, bool), np.ones_like(base_raw, bool)]
        else:
            base, step = base_raw, step_raw; flipped=False
            mask = np.r_[np.zeros(k, bool), np.ones(len(temp)-k, bool)]
        if len(base) < MIN_PHASE_N or len(step) < MIN_PHASE_N:
            print(f"[Cooling:skip] {f} — phase too short (base={len(base)}, step={len(step)})"); continue
        out_csv = f.replace(".csv", "_labeled.csv")
        df2 = df.copy()
        n = len(temp)
        phase = np.where(mask[:n], "step", "baseline")
        if len(df2) > n: phase = np.r_[phase, np.repeat(phase[-1], len(df2)-n)]
        df2["phase"] = phase[:len(df2)]
        df2.to_csv(out_csv, index=False)

    # Guards
    if len(base) < MIN_PHASE_N or len(step) < MIN_PHASE_N:
        print(f"[Cooling:skip] {f} — phase too short (base={len(base)}, step={len(step)})"); continue

    dT = float(np.mean(step) - np.mean(base))  # negative means cooling
    se = moving_block_bootstrap_se(base, step, B=MB_BOOT_B)
    if not (np.isfinite(se) and se > 0): se = pooled_se(base, step)
    if not (np.isfinite(se) and se > 0): se = 1e-9  # last-ditch jitter

    z = dT / se
    positive = int((dT <= DELTA_REQ) and (z <= -Z_CRIT))

    # Label-shuffle null
    allv = np.concatenate([base, step]); k_step = len(step); n_all = len(allv)
    fp = 0
    for _ in range(SHUFFLES):
        idx = rng.permutation(n_all)
        sN = allv[idx[:k_step]]; bN = allv[idx[k_step:]]
        dTN = float(np.mean(sN) - np.mean(bN))
        zN = dTN / se
        if (dTN <= DELTA_REQ) and (zN <= -Z_CRIT):
            fp += 1
    fpr = fp / SHUFFLES
    fpr_up95 = clopper_pearson_upper(fp, SHUFFLES, alpha=0.05)

    cool_rows.append(dict(
        module="Cooling", file=f, labeled_csv=out_csv,
        n_base=len(base), n_step=len(step),
        change_point=(None if (k is None or (isinstance(k, float) and np.isnan(k))) else int(k)),
        flipped=bool(flipped),
        dT=dT, z=z, zcrit=Z_CRIT,
        positive=int(positive),
        FPR=float(fpr), FPR_upper95=float(fpr_up95)
    ))
    audits.append(dict(
        module="Cooling", file=f,
        positives=int(positive), tests=1,
        FPR=float(fpr), FPR_upper95=float(fpr_up95),
        dT=dT, z=z
    ))

# ---------------- Forecast ----------------
for f in FORECAST_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Forecast:skip] {f} — read error: {e}"); continue
    if not {"alert","verified"}.issubset(df.columns):
        print(f"[Forecast:skip] {f} — needs ['alert','verified']"); continue
    tp = int(((df["alert"]==1) & (df["verified"]==1)).sum())
    fp = int(((df["alert"]==1) & (df["verified"]==0)).sum())
    tn = int(((df["alert"]==0) & (df["verified"]==0)).sum())
    fn = int(((df["alert"]==0) & (df["verified"]==1)).sum())
    fpr = (fp / (fp+tn)) if (fp+tn)>0 else np.nan
    audits.append(dict(module="Forecast", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=(None if np.isnan(fpr) else float(fpr))))

# ---------------- GRA ----------------
for f in GRA_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[GRA:skip] {f} — read error: {e}"); continue
    cols = {c.lower(): c for c in df.columns}
    if not (("restored" in cols) and ("truth_pass" in cols)):
        print(f"[GRA:skip] {f} — needs 'restored' & 'truth_pass'"); continue
    restored = df[cols["restored"]].astype(bool)
    truth    = df[cols["truth_pass"]].astype(bool)
    tp = int((restored & truth).sum())
    fp = int((restored & ~truth).sum())
    tn = int((~restored & ~truth).sum())
    fn = int((~restored & truth).sum())
    fpr = (fp / (fp+tn)) if (fp+tn)>0 else np.nan
    audits.append(dict(module="GRA", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=(None if np.isnan(fpr) else float(fpr))))

# ---------------- Save & print ----------------
audit_df = pd.DataFrame(audits)
stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
unified_path = OUT_DIR / f"cnt_unified_fpr_report_{stamp}.csv"
if not audit_df.empty:
    audit_df.to_csv(unified_path, index=False)
    print("\n== CNT Unified False-Positive Audit ==")
    print(audit_df.to_string(index=False))
    print("\nSaved:", unified_path)
else:
    print("\n== No eligible items found to audit ==")

if cool_rows:
    cool_df = pd.DataFrame(cool_rows)
    cool_out = OUT_DIR / f"cnt_cooling_fpr_detail_{stamp}.csv"
    cool_df.to_csv(cool_out, index=False)
    print("\n== Cooling Detail (with 95% bounds) ==")
    print(cool_df[["file","dT","z","positive","FPR","FPR_upper95"]].to_string(index=False))
    print("\nSaved:", cool_out)



[EEG] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\artifacts\pli_humans_100plus\tests_functional\motor_cuelocked\lap_erd\tables\lap_erd_subject50.csv

[Cooling] found 12 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121543_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-122141_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-123830_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_unified_cooling_20251015-132627_labeled.csv
   ... +6 more

[Forecast] found 0 file(s).

[GRA] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\gra_runs\gra_v0_3_policy_fix_stem_20251016-095737\policy_fix_stem_results.csv


TypeError: 'float' object is not iterable

In [10]:
# == CNT Unified False-Positive Audit — Fused & Upgraded (Telos edition) ==
# Cooling:
#   • Prefer *_labeled.csv (ignore *_labeled_labeled.csv) or auto-label via robust change-point.
#   • Re-orient labeled runs so 'baseline' is hotter (cooling expectation).
#   • SE via moving-block bootstrap (autocorr-aware), fallback to pooled SE, jitter last-ditch.
#   • Decision rule: ΔT <= -0.5 °C and z <= -2.58 (one-sided, conservative).
#   • 5,000 label-shuffles for FPR + 95% Clopper–Pearson upper bound.
# EEG:
#   • Reports BH (FDR@0.01) + Storey π0; computes FPR if p_null_* columns exist.
# GRA:
#   • Confusion + FPR from 'restored' vs 'truth_pass'.
# Forecast:
#   • FPR from [alert, verified].
# Outputs:
#   • CNT_Lab\artifacts\metrics\cnt_unified_fpr_report_YYYYMMDD-HHMMSS.csv
#   • CNT_Lab\artifacts\metrics\cnt_cooling_fpr_detail_YYYYMMDD-HHMMSS.csv

import os, re, glob
from pathlib import Path
from datetime import datetime
import numpy as np, pandas as pd

# ---------------- Config ----------------
ROOTS = [
    r"C:\Users\caleb\CNT_Lab",
    r"C:\Users\caleb\gra_runs",
]
OUT_DIR = Path(ROOTS[0]) / "artifacts" / "metrics"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Cooling params
MIN_PHASE_N   = 60           # minimum samples per phase
GUARD_FRAC    = 0.15         # avoid edges when choosing change point
MB_BOOT_B     = 1200         # moving-block bootstrap draws
MB_BLOCK_FRAC = 1/15         # block length ≈ phase_len * this
SHUFFLES      = 5000         # label-shuffle draws for FPR
Z_CRIT        = 2.58         # one-sided ~0.005; conservative vs 0.01 (2.326)
DELTA_REQ     = -0.5         # °C threshold for "cooling achieved"

rng = np.random.default_rng(42)

# ---------------- Utilities ----------------
def ls(pattern):
    return [f for f in glob.glob(pattern, recursive=True) if os.path.isfile(f)]

def show(label, files, limit=6):
    print(f"\n[{label}] found {len(files)} file(s).")
    for f in files[:limit]: print(" •", f)
    if len(files) > limit: print(f"   ... +{len(files)-limit} more")

def benjamini_hochberg(pvals, alpha=0.05):
    p = np.asarray(pd.Series(pvals).astype(float))
    n = p.size if p is not None else 0
    if n == 0:
        return np.array([], dtype=bool), np.array([], dtype=float)
    order = np.argsort(p)
    ranks = np.empty_like(order); ranks[order] = np.arange(1, n+1)
    q = p * n / np.maximum(ranks, 1)
    q_sorted = np.minimum.accumulate(q[order][::-1])[::-1]
    qvalues = np.empty_like(q_sorted); qvalues[order] = q_sorted
    rej = qvalues <= alpha
    return rej, qvalues

def storey_pi0(p, lam=0.5):
    """
    Storey's pi0 estimator with safe clamping.
    pi0 = mean(p >= λ) / (1 - λ), clamped to [0,1].
    """
    p = np.asarray(pd.Series(p).astype(float))
    p = p[np.isfinite(p)]
    if p.size == 0:
        return np.nan
    lam = float(lam)
    if not (0.0 < lam < 1.0):
        lam = 0.5
    raw = np.mean(p >= lam) / (1.0 - lam)
    raw = float(raw)
    return max(0.0, min(1.0, raw))

def clopper_pearson_upper(k, n, alpha=0.05):
    if n == 0: return np.nan
    if k == 0: return 1 - (alpha)**(1/n)
    lo, hi = 0.0, 1.0
    for _ in range(40):
        mid = (lo+hi)/2
        if n*mid >= k: hi = mid
        else:          lo = mid
    return hi

# ---------------- Cooling helpers ----------------
def pick_temp_col(df):
    lo = {c.lower(): c for c in df.columns}
    for key in ("temp","temperature","gpu_temp","cpu_temp","t","temp_c"):
        if key in lo: return lo[key]
    num = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    if not num: return None
    v = pd.Series({c: pd.Series(df[c]).var(skipna=True) for c in num}).sort_values(ascending=False)
    return v.index[0] if not v.empty else None

def best_change_point(x, guard=0.15):
    n = len(x)
    lo = max(MIN_PHASE_N, int(n*guard))
    hi = min(n - MIN_PHASE_N, int(n*(1-guard)))
    if hi - lo < 1: return None
    csum = np.cumsum(x, dtype=float)
    best_k, best_score = None, -np.inf
    for k in range(lo, hi+1):
        m1 = csum[k-1] / k
        m2 = (csum[-1] - csum[k-1]) / (n - k)
        s = abs(m2 - m1)
        if s > best_score: best_score, best_k = s, k
    return best_k

def moving_block_bootstrap_se(a, b, B=1200):
    na, nb = len(a), len(b)
    if na < MIN_PHASE_N or nb < MIN_PHASE_N: return np.nan
    ba = max(5, int(round(na * MB_BLOCK_FRAC)))
    bb = max(5, int(round(nb * MB_BLOCK_FRAC)))
    diffs = []
    for _ in range(B):
        ia = rng.integers(0, na, size=max(2, na // ba))
        ib = rng.integers(0, nb, size=max(2, nb // bb))
        ra = np.concatenate([a[i:i+ba] if i+ba<=na else np.r_[a[i:], a[:(i+ba-na)]] for i in ia])[:na]
        rb = np.concatenate([b[i:i+bb] if i+bb<=nb else np.r_[b[i:], b[:(i+bb-nb)]] for i in ib])[:nb]
        diffs.append(float(np.mean(rb) - np.mean(ra)))
    return float(np.std(diffs, ddof=1)) if diffs else np.nan

def pooled_se(a, b):
    na, nb = len(a), len(b)
    if na < 2 or nb < 2: return np.nan
    va, vb = float(np.var(a, ddof=1)), float(np.var(b, ddof=1))
    return float(np.sqrt(va/na + vb/nb))

# ---------------- Discover EEG/GRA/Forecast ----------------
EEG_FILES, FORECAST_FILES, GRA_FILES = [], [], []
for root in ROOTS:
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\lap_erd_subject*.csv")
    EEG_FILES += ls(fr"{root}\**\pli_humans*\**\tables\*laterality*.csv")
    FORECAST_FILES += ls(fr"{root}\**\artifacts\metrics\forecast_alerts_*.csv")
    GRA_FILES += ls(fr"{root}\**\policy_fix_stem_results.csv")
    GRA_FILES += ls(fr"{root}\**\*_policy_*_results.csv")

# ---------------- Discover Cooling (prefer single-labeled; ignore double-labeled) ----------------
ALL_COOL = []
for root in ROOTS:
    ALL_COOL += ls(fr"{root}\**\archive\*cool*.csv")
    ALL_COOL += ls(fr"{root}\**\artifacts\**\*cool*.csv")

by_stem = {}
for f in ALL_COOL:
    p = Path(f)
    key = re.sub(r"(?:_labeled)+\.csv$", ".csv", p.name, flags=re.I)  # normalize labeled clusters
    by_stem.setdefault((p.parent, key), []).append(p)

COOL_FILES = []
for (parent, key), files in by_stem.items():
    single_labeled = [x for x in files if re.search(r"(?<!_labeled)_labeled\.csv$", x.name, flags=re.I)]
    multi_labeled  = [x for x in files if re.search(r"_labeled_labeled\.csv$", x.name, flags=re.I)]
    raw            = [x for x in files if not re.search(r"_labeled\.csv$", x.name, flags=re.I)]
    if single_labeled:
        COOL_FILES.append(str(sorted(single_labeled)[-1]))
    elif raw:
        COOL_FILES.append(str(sorted(raw)[-1]))
    elif multi_labeled:
        COOL_FILES.append(str(sorted(multi_labeled)[-1]))

show("EEG", EEG_FILES)
show("Cooling", COOL_FILES)
show("Forecast", FORECAST_FILES)
show("GRA", GRA_FILES)

audits = []
cool_rows = []

# ---------------- EEG ----------------
EEG_NAME_HINTS = (
    r"^p(_?val(ue)?)?$",         # p, pval, p_value
    r"^p_[a-z]+$",               # p_alpha, p_beta, ...
    r"^[a-z]*_?p(_?val(ue)?)?$", # col_p, pval_col, ...
)
def find_pcol(df):
    cols = list(df.columns)
    for pat in EEG_NAME_HINTS:
        for c in cols:
            if re.fullmatch(pat, str(c), flags=re.I): return c
    candidates = []
    for c in cols:
        s = pd.to_numeric(df[c], errors="coerce")
        s = s[np.isfinite(s)]
        if s.size and (pd.Series(s).between(0,1).mean() > 0.95) and (pd.Series(s).nunique() > 10):
            candidates.append((c, float(pd.Series(s).mean())))
    if candidates:
        candidates.sort(key=lambda x: x[1])
        return candidates[0][0]
    return None

def eeg_null_cols(df):
    pats = (r"^p_null_.*$", r".*_null$")
    cols = []
    for c in df.columns:
        for p in pats:
            if re.fullmatch(p, str(c), flags=re.I):
                cols.append(c); break
    return cols

for f in EEG_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[EEG:skip] {f} — read error: {e}"); continue
    pcol = find_pcol(df)
    if pcol is None:
        print(f"[EEG:hint] {f} — add a 'p' column or p_null_* columns to enable FPR.")
        continue
    p = pd.to_numeric(df[pcol], errors="coerce").fillna(1.0).values
    rej, q = benjamini_hochberg(p, alpha=0.01)
    pi0 = storey_pi0(p, lam=0.5)
    nulls = eeg_null_cols(df)
    eeg_fpr = np.nan
    if nulls:
        fp = 0
        for nc in nulls:
            pn = pd.to_numeric(df[nc], errors="coerce").fillna(1.0).values
            rnull, _ = benjamini_hochberg(pn, alpha=0.01)
            if rnull.any(): fp += 1
        eeg_fpr = fp / len(nulls)
    audits.append(dict(
        module="EEG", file=f,
        positives=int(np.sum(rej)), tests=int(p.size),
        FPR=(None if np.isnan(eeg_fpr) else float(eeg_fpr)),
        pi0=float(pi0), FDR_alpha_0p01=float(np.mean(q < 0.01))
    ))

# ---------------- Cooling ----------------
for f in COOL_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Cooling:skip] {f} — read error: {e}"); continue

    has_phase = any(c.lower()=="phase" for c in df.columns)

    tcol = pick_temp_col(df)
    if tcol is None:
        print(f"[Cooling:skip] {f} — no temperature-like column"); continue

    temp = pd.to_numeric(df[tcol], errors="coerce")
    temp = temp[np.isfinite(temp)].values
    if temp.size < 2*MIN_PHASE_N:
        print(f"[Cooling:skip] {f} — too few rows ({temp.size})"); continue

    if has_phase:
        lab = df[[c for c in df.columns if c.lower()=="phase"][0]].astype(str).str.lower().values[:len(temp)]
        step_idx = (lab=="step") | (lab=="cool") | (lab=="1") | (lab=="true") | (lab=="post")
        base, step = temp[~step_idx], temp[step_idx]
        # Re-orient so baseline is hotter (expect cooling: step <= base)
        if np.mean(base) < np.mean(step):
            base, step = step, base
            flipped = True
        else:
            flipped = False
        k, out_csv = np.nan, f
    else:
        k = best_change_point(temp, guard=GUARD_FRAC)
        if k is None:
            print(f"[Cooling:skip] {f} — no robust change point"); continue
        base_raw, step_raw = temp[:k], temp[k:]
        if np.mean(base_raw) < np.mean(step_raw):
            base, step = step_raw, base_raw; flipped=True
            mask = np.r_[np.zeros_like(step_raw, bool), np.ones_like(base_raw, bool)]
        else:
            base, step = base_raw, step_raw; flipped=False
            mask = np.r_[np.zeros(k, bool), np.ones(len(temp)-k, bool)]
        if len(base) < MIN_PHASE_N or len(step) < MIN_PHASE_N:
            print(f"[Cooling:skip] {f} — phase too short (base={len(base)}, step={len(step)})"); continue
        out_csv = f.replace(".csv", "_labeled.csv")
        df2 = df.copy()
        n = len(temp)
        phase = np.where(mask[:n], "step", "baseline")
        if len(df2) > n: phase = np.r_[phase, np.repeat(phase[-1], len(df2)-n)]
        df2["phase"] = phase[:len(df2)]
        df2.to_csv(out_csv, index=False)

    if len(base) < MIN_PHASE_N or len(step) < MIN_PHASE_N:
        print(f"[Cooling:skip] {f} — phase too short (base={len(base)}, step={len(step)})"); continue

    dT = float(np.mean(step) - np.mean(base))  # negative means cooling
    se = moving_block_bootstrap_se(base, step, B=MB_BOOT_B)
    if not (np.isfinite(se) and se > 0): se = pooled_se(base, step)
    if not (np.isfinite(se) and se > 0): se = 1e-9  # last-ditch jitter

    z = dT / se
    positive = int((dT <= DELTA_REQ) and (z <= -Z_CRIT))

    # Label-shuffle null
    allv = np.concatenate([base, step]); k_step = len(step); n_all = len(allv)
    fp = 0
    for _ in range(SHUFFLES):
        idx = rng.permutation(n_all)
        sN = allv[idx[:k_step]]; bN = allv[idx[k_step:]]
        dTN = float(np.mean(sN) - np.mean(bN))
        zN = dTN / se
        if (dTN <= DELTA_REQ) and (zN <= -Z_CRIT):
            fp += 1
    fpr = fp / SHUFFLES
    fpr_up95 = clopper_pearson_upper(fp, SHUFFLES, alpha=0.05)

    cool_rows.append(dict(
        module="Cooling", file=f, labeled_csv=out_csv,
        n_base=len(base), n_step=len(step),
        change_point=(None if (k is None or (isinstance(k, float) and np.isnan(k))) else int(k)),
        flipped=bool(flipped),
        dT=dT, z=z, zcrit=Z_CRIT,
        positive=int(positive),
        FPR=float(fpr), FPR_upper95=float(fpr_up95)
    ))
    audits.append(dict(
        module="Cooling", file=f,
        positives=int(positive), tests=1,
        FPR=float(fpr), FPR_upper95=float(fpr_up95),
        dT=dT, z=z
    ))

# ---------------- Forecast ----------------
for f in FORECAST_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[Forecast:skip] {f} — read error: {e}"); continue
    if not {"alert","verified"}.issubset(df.columns):
        print(f"[Forecast:skip] {f} — needs ['alert','verified']"); continue
    tp = int(((df["alert"]==1) & (df["verified"]==1)).sum())
    fp = int(((df["alert"]==1) & (df["verified"]==0)).sum())
    tn = int(((df["alert"]==0) & (df["verified"]==0)).sum())
    fn = int(((df["alert"]==0) & (df["verified"]==1)).sum())
    fpr = (fp / (fp+tn)) if (fp+tn)>0 else np.nan
    audits.append(dict(module="Forecast", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=(None if np.isnan(fpr) else float(fpr))))

# ---------------- GRA ----------------
for f in GRA_FILES:
    try:
        df = pd.read_csv(f)
    except Exception as e:
        print(f"[GRA:skip] {f} — read error: {e}"); continue
    cols = {c.lower(): c for c in df.columns}
    if not (("restored" in cols) and ("truth_pass" in cols)):
        print(f"[GRA:skip] {f} — needs 'restored' & 'truth_pass'"); continue
    restored = df[cols["restored"]].astype(bool)
    truth    = df[cols["truth_pass"]].astype(bool)
    tp = int((restored & truth).sum())
    fp = int((restored & ~truth).sum())
    tn = int((~restored & ~truth).sum())
    fn = int((~restored & truth).sum())
    fpr = (fp / (fp+tn)) if (fp+tn)>0 else np.nan
    audits.append(dict(module="GRA", file=f, TP=tp, FP=fp, TN=tn, FN=fn, FPR=(None if np.isnan(fpr) else float(fpr))))

# ---------------- Save & print ----------------
audit_df = pd.DataFrame(audits)
stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
unified_path = OUT_DIR / f"cnt_unified_fpr_report_{stamp}.csv"
if not audit_df.empty:
    audit_df.to_csv(unified_path, index=False)
    print("\n== CNT Unified False-Positive Audit ==")
    print(audit_df.to_string(index=False))
    print("\nSaved:", unified_path)
else:
    print("\n== No eligible items found to audit ==")

if cool_rows:
    cool_df = pd.DataFrame(cool_rows)
    cool_out = OUT_DIR / f"cnt_cooling_fpr_detail_{stamp}.csv"
    cool_df.to_csv(cool_out, index=False)
    print("\n== Cooling Detail (with 95% bounds) ==")
    print(cool_df[["file","dT","z","positive","FPR","FPR_upper95"]].to_string(index=False))
    print("\nSaved:", cool_out)



[EEG] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\artifacts\pli_humans_100plus\tests_functional\motor_cuelocked\lap_erd\tables\lap_erd_subject50.csv

[Cooling] found 12 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121130.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-121543_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-122141_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_20251015-123830_labeled.csv
 • C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_unified_cooling_20251015-132627_labeled.csv
   ... +6 more

[Forecast] found 0 file(s).

[GRA] found 1 file(s).
 • C:\Users\caleb\CNT_Lab\notebooks\archive\gra_runs\gra_v0_3_policy_fix_stem_20251016-095737\policy_fix_stem_results.csv
[Cooling:skip] C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_20251015-120551.csv — too few rows (