In [1]:
# CNT_Lab — ECP Mega Cell (Ritual vs Control cooling variance & entropy)
import os, re, json, uuid, math, glob, textwrap, hashlib, datetime as dt
import numpy as np, pandas as pd
import matplotlib.pyplot as plt

# ---- CONFIG -----------------------------------------------------------------
DATA_GLOB = r"C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_gpu_cooling_log_*_labeled.csv"
ALT_GLOB  = r"C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_cooling_log_*_labeled.csv"
OUTDIR    = os.path.join(r"C:\Users\caleb\CNT_Lab\notebooks\archive",
                         f"ecp_run_{dt.datetime.now().strftime('%Y%m%d-%H%M%S')}_{uuid.uuid4().hex[:8]}")
os.makedirs(OUTDIR, exist_ok=True)

# ---- HELPERS ----------------------------------------------------------------
def infer_label_from_name(p):
    n = os.path.basename(p).lower()
    if "ritual" in n or "ecp" in n or "glyph" in n:
        return "ritual"
    if "control" in n or "neutral" in n:
        return "control"
    # fallback: look for our earlier convention
    m = re.search(r"_(r|c)\b", n)
    return {"r":"ritual","c":"control"}.get(m.group(1),"unknown") if m else "unknown"

def sample_entropy(x, m=2, r=0.2):
    x = np.asarray(x, float)
    if len(x) < m+2: return np.nan
    r *= np.std(x, ddof=0) + 1e-12
    def _phi(m):
        N = len(x) - m + 1
        if N <= 1: return 0.0
        X = np.array([x[i:i+m] for i in range(N)])
        C = np.sum(np.max(np.abs(X[:,None,:]-X[None,:,:]), axis=2) <= r, axis=1) - 1
        return np.sum(C)/(N*(N-1)+1e-12)
    A = _phi(m+1); B = _phi(m)
    if A<=0 or B<=0: return np.nan
    return -np.log(A/B)

def theta_robust(x, win=97):
    # crude Θ proxy: min rolling std window index (earliest low-variance trough)
    if len(x) < win: return np.nan
    v = pd.Series(x).rolling(win, min_periods=win).std().values
    idx = np.nanargmin(v)
    return int(idx)

def cohen_d(a,b):
    a, b = np.asarray(a), np.asarray(b)
    na, nb = len(a), len(b)
    sa, sb = np.std(a, ddof=1), np.std(b, ddof=1)
    s = math.sqrt(((na-1)*sa*sa + (nb-1)*sb*sb)/(na+nb-2)) if na+nb>=3 else np.nan
    return (np.mean(a)-np.mean(b))/s if s>0 else np.nan

def perm_test(a,b, reps=10000, metric=np.mean, seed=42):
    rng = np.random.default_rng(seed)
    a = np.asarray(a); b = np.asarray(b)
    obs = metric(a) - metric(b)
    joined = np.concatenate([a,b])
    na = len(a)
    cnt=0
    for _ in range(reps):
        rng.shuffle(joined)
        cnt += abs(metric(joined[:na]) - metric(joined[na:])) >= abs(obs)
    return obs, (cnt/reps)

# ---- LOAD -------------------------------------------------------------------
files = sorted(glob.glob(DATA_GLOB) + glob.glob(ALT_GLOB))
runs = []
for f in files:
    try:
        df = pd.read_csv(f)
        # try canonical column names
        cols = {c.lower():c for c in df.columns}
        T = df[cols.get("hotspot_temp", list(df.columns)[-1])].astype(float).dropna().values
        label = df.get("label", pd.Series([infer_label_from_name(f)]*len(df))).iloc[0]
        runs.append(dict(path=f, label=str(label).lower(), n=len(T),
                         sigma=float(np.std(T, ddof=1)), sent=float(sample_entropy(T)),
                         theta=float(theta_robust(T, win=min(97, max(32, len(T)//10)))) ))
    except Exception as e:
        pass

tbl = pd.DataFrame(runs)
tbl = tbl[tbl["label"].isin(["ritual","control"])].copy()
tbl.sort_values("path", inplace=True)
tbl.to_csv(os.path.join(OUTDIR,"ecp_runs_table.csv"), index=False)

print("Loaded runs:", len(tbl))
if len(tbl)<6 or tbl["label"].nunique()<2:
    print("Not enough labeled runs (need ≥6 with both ritual and control). Wrote table for inspection.")
else:
    # ---- ANALYZE -------------------------------------------------------------
    r = tbl[tbl.label=="ritual"]; c = tbl[tbl.label=="control"]
    # lower is better for variance and entropy; earlier Θ can be interpreted as earlier stabilization
    obs_sigma, p_sigma = perm_test(r["sigma"].values, c["sigma"].values, metric=np.mean)
    obs_sent , p_sent  = perm_test(r["sent"].values , c["sent"].values , metric=np.mean)
    obs_theta, p_theta = perm_test(-r["theta"].values, -c["theta"].values, metric=np.mean)  # negative: earlier is "better"

    d_sigma = cohen_d(c["sigma"].values, r["sigma"].values)   # improvement => positive d
    d_sent  = cohen_d(c["sent"].values , r["sent"].values )
    d_theta = cohen_d(r["theta"].values, c["theta"].values)   # earlier Θ => negative mean; flip sign for interpretability
    d_theta = -d_theta

    verdict = {
        "claim": "ECP lowers thermal variance/entropy under fixed load",
        "n_ritual": int(len(r)),
        "n_control": int(len(c)),
        "mean_sigma_control": float(c["sigma"].mean()),
        "mean_sigma_ritual": float(r["sigma"].mean()),
        "mean_sent_control": float(c["sent"].mean()),
        "mean_sent_ritual": float(r["sent"].mean()),
        "mean_theta_control": float(c["theta"].mean()),
        "mean_theta_ritual": float(r["theta"].mean()),
        "effect_cohen_d_sigma(+ = good)": float(d_sigma),
        "effect_cohen_d_sent(+ = good)": float(d_sent),
        "effect_cohen_d_theta(+ = good)": float(d_theta),
        "perm_p_sigma": float(p_sigma),
        "perm_p_sent": float(p_sent),
        "perm_p_theta": float(p_theta),
        "pass": bool((p_sigma<0.05 and d_sigma>=0.3) or (p_sent<0.05 and d_sent>=0.3) or (p_theta<0.05 and d_theta>=0.3))
    }

    with open(os.path.join(OUTDIR,"ecp_verdict.json"),"w") as f:
        json.dump(verdict, f, indent=2)
    print(json.dumps(verdict, indent=2))

    # ---- PLOTS ---------------------------------------------------------------
    plt.figure(figsize=(8,4.8))
    ax=plt.gca()
    ax.scatter(np.arange(len(c)), c["sigma"], label="control σ")
    ax.scatter(np.arange(len(r)), r["sigma"], label="ritual σ")
    ax.set_title("ECP: run-level hotspot variance (σ)")
    ax.set_xlabel("run index (sorted by time)"); ax.set_ylabel("σ (°C)")
    ax.legend(); plt.tight_layout()
    plt.savefig(os.path.join(OUTDIR,"ecp_variance_scatter.png"), dpi=160); plt.close()

    plt.figure(figsize=(8,4.8))
    ax=plt.gca()
    ax.boxplot([c["sigma"], r["sigma"]], labels=["control","ritual"])
    ax.set_title("ECP: σ distribution"); ax.set_ylabel("σ (°C)")
    plt.tight_layout(); plt.savefig(os.path.join(OUTDIR,"ecp_variance_box.png"), dpi=160); plt.close()

    plt.figure(figsize=(8,4.8))
    ax=plt.gca()
    ax.boxplot([c["sent"], r["sent"]], labels=["control","ritual"])
    ax.set_title("ECP: Sample Entropy distribution"); ax.set_ylabel("SampEn")
    plt.tight_layout(); plt.savefig(os.path.join(OUTDIR,"ecp_entropy_box.png"), dpi=160); plt.close()

    print(f"[Artifacts] {OUTDIR}")


KeyError: 'label'

In [2]:
# CNT_Lab — ECP one-cell mega analysis (Ritual vs Control cooling variance/entropy)
# Paste & run. It will recursively scan for CSVs, infer labels, analyze, and write artifacts.

import os, re, glob, json, uuid, math, textwrap
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# ============================== CONFIG =======================================
ROOT                = r"C:\Users\caleb\CNT_Lab"   # search root
MIN_SECONDS         = 60                          # minimum timepoints per run to count
WIN_THETA           = 97                          # rolling window for Θ proxy
PERM_REPS           = 10000                       # permutation test repetitions
SEED                = 42
# Optional filename→label overrides if needed:
# e.g., {"cooling_20251018-044559Z.csv":"ritual", "holdout_20251018-050000Z.csv":"control"}
MANUAL_LABELS       = {}

# Label inference: if filenames contain one of these tokens (case-insensitive)
RITUAL_TOKENS       = ("ritual","ecp","glyph","ceremony","resonance","oracle")
CONTROL_TOKENS      = ("control","neutral","baseline","placebo")

# Temperature-like column name candidates (lowercased)
TEMP_COL_CANDIDATES = ("hotspot_temp","gpu_hotspot","hotspot","temp","temperature","gpu_temp","t_hotspot")

# Preferred folders or filename substrings to bias toward (non-strict)
PREFERRED_HINTS     = ("cool","gpu","thermal","temp","segment","log","cooling_segments","notebooks","archive","artifacts")

# ============================== OUTPUT DIR ===================================
OUTDIR = os.path.join(
    ROOT, "notebooks", "archive",
    f"ecp_run_{datetime.now().strftime('%Y%m%d-%H%M%S')}_{uuid.uuid4().hex[:8]}"
)
os.makedirs(OUTDIR, exist_ok=True)

def log(msg):
    print(msg)

# ============================== HELPERS ======================================
def infer_label_from_name(path: str) -> str:
    """Infer 'ritual'/'control'/unknown from filename."""
    n = os.path.basename(path).lower()
    if n in MANUAL_LABELS:
        return MANUAL_LABELS[n].strip().lower()
    # direct tokens
    if any(tok in n for tok in RITUAL_TOKENS):  return "ritual"
    if any(tok in n for tok in CONTROL_TOKENS): return "control"
    # short tag like _r or _c before extension
    m = re.search(r"[_\-\.](r|c)(?=\.)", n)
    if m: return {"r":"ritual","c":"control"}[m.group(1)]
    return "unknown"

def sample_entropy(x, m=2, r=0.2):
    """SampEn(m, r) with Chebyshev distance. Returns np.nan on degeneracy."""
    x = np.asarray(x, dtype=float)
    if x.size < (m+2): return np.nan
    r *= np.std(x, ddof=0) + 1e-12
    def _phi(mm):
        N = x.size - mm + 1
        if N <= 1: return 0.0
        X = np.array([x[i:i+mm] for i in range(N)])
        # Chebyshev distance: max(|diff|)
        C = np.sum(np.max(np.abs(X[:,None,:]-X[None,:,:]), axis=2) <= r, axis=1) - 1
        return np.sum(C)/(N*(N-1)+1e-12)
    A = _phi(m+1); B = _phi(m)
    if A<=0 or B<=0: return np.nan
    return -np.log(A/B)

def theta_robust(x, win=97):
    """Θ proxy: index of minimum rolling std (earliest stabilization trough)."""
    x = pd.Series(np.asarray(x, dtype=float))
    if x.size < win: return np.nan
    v = x.rolling(win, min_periods=win).std().values
    return int(np.nanargmin(v))

def cohen_d(a, b):
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    na, nb = len(a), len(b)
    if na < 2 or nb < 2: return np.nan
    sa, sb = np.std(a, ddof=1), np.std(b, ddof=1)
    s = math.sqrt(((na-1)*sa*sa + (nb-1)*sb*sb)/(na+nb-2)) if (na+nb)>=3 else np.nan
    return (np.mean(a) - np.mean(b))/s if (s is not np.nan and s>0) else np.nan

def perm_test(a, b, reps=10000, metric=np.mean, seed=42):
    """Two-sample permutation test on difference of metric(a) - metric(b)."""
    rng = np.random.default_rng(seed)
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    obs = metric(a) - metric(b)
    joined = np.concatenate([a, b])
    na = len(a)
    cnt = 0
    for _ in range(reps):
        rng.shuffle(joined)
        cnt += abs(metric(joined[:na]) - metric(joined[na:])) >= abs(obs)
    return float(obs), float(cnt/reps)

def choose_temp_column(df: pd.DataFrame) -> str | None:
    cols = {c.lower(): c for c in df.columns}
    for key in TEMP_COL_CANDIDATES:
        if key in cols:
            return cols[key]
    # numeric last-column fallback
    lastc = df.columns[-1]
    if pd.api.types.is_numeric_dtype(df[lastc]):
        return lastc
    # otherwise search any numeric column with a temp-ish name
    for c in df.columns:
        if pd.api.types.is_numeric_dtype(df[c]) and re.search(r"(temp|hot|deg|c$)", c, flags=re.I):
            return c
    return None

# ============================== SCAN CSVs ====================================
log("[scan] searching for CSVs...")
all_csvs = set()

for dirpath, _, filenames in os.walk(ROOT):
    lowp = dirpath.lower()
    # prune virtualenv and cache folders
    if "\\.venv" in lowp or "site-packages" in lowp or "\\__pycache__" in lowp:
        continue
    for fn in filenames:
        if not fn.lower().endswith(".csv"): 
            continue
        fpath = os.path.join(dirpath, fn)
        # soft preference: keep those whose path or name hints at cooling/temps/logs
        hint = any(h in fpath.lower() for h in PREFERRED_HINTS)
        if hint:
            all_csvs.add(fpath)

# add a few explicit globs we’ve used before
all_csvs |= set(glob.glob(os.path.join(ROOT, "notebooks", "archive", "cnt_gpu_cooling_log_*_labeled.csv")))
all_csvs |= set(glob.glob(os.path.join(ROOT, "notebooks", "archive", "cnt_cooling_log_*_labeled.csv")))
all_csvs |= set(glob.glob(os.path.join(ROOT, "artifacts", "**", "*.csv"), recursive=True))

all_csvs = sorted(all_csvs)
log(f"[scan] candidate CSV files: {len(all_csvs)}")

# ============================== LOAD + EXTRACT ===============================
errors_path = os.path.join(OUTDIR, "ecp_read_errors.log")
runs = []
if os.path.exists(errors_path):
    try: os.remove(errors_path)
    except: pass

for f in all_csvs:
    try:
        df = pd.read_csv(f)
        if df.empty or df.shape[0] < MIN_SECONDS:
            continue
        col = choose_temp_column(df)
        if not col:
            raise KeyError("No temperature-like numeric column.")
        T = pd.to_numeric(df[col], errors="coerce").dropna().values
        if T.size < MIN_SECONDS:
            continue

        # explicit label col wins
        if "label" in df.columns:
            label_val = str(df["label"].iloc[0]).strip().lower()
        else:
            label_val = infer_label_from_name(f)

        # normalize labels
        if label_val not in ("ritual","control"):
            label_val = "unknown"

        # compute features
        win = int(min(WIN_THETA, max(32, T.size//10)))
        rec = dict(
            path=f,
            label=label_val,
            n=int(T.size),
            sigma=float(np.std(T, ddof=1)),
            sent=float(sample_entropy(T)),
            theta=float(theta_robust(T, win=win)),
            temp_col=str(col)
        )
        runs.append(rec)
    except Exception as e:
        with open(errors_path, "a", encoding="utf-8") as logf:
            logf.write(f"{f} :: {repr(e)}\n")

tbl_all = pd.DataFrame(runs).sort_values("path")
tbl_all.to_csv(os.path.join(OUTDIR, "ecp_runs_table_all.csv"), index=False)

log(f"[load] usable runs: {len(tbl_all)}")
if len(tbl_all) == 0:
    log(f"[exit] No usable runs parsed. See {errors_path} (if present). Artifacts folder: {OUTDIR}")
    raise SystemExit

# ============================== FILTER & CHECK ===============================
has_ritual  = (tbl_all["label"] == "ritual").sum()
has_control = (tbl_all["label"] == "control").sum()
log(f"[labels] ritual={has_ritual} control={has_control} unknown={(tbl_all['label']=='unknown').sum()}")

if has_ritual == 0 or has_control == 0:
    tbl_all.to_csv(os.path.join(OUTDIR, "ecp_runs_table_NEED_LABELS.csv"), index=False)
    log("[exit] Need both 'ritual' and 'control'. Add tokens to filenames (e.g., _ritual_, _control_) "
        "or add a 'label' column, then re-run. Artifacts saved.")
    raise SystemExit

tbl = tbl_all[tbl_all["label"].isin(["ritual","control"])].copy()
tbl.to_csv(os.path.join(OUTDIR, "ecp_runs_table.csv"), index=False)

# ============================== ANALYSIS =====================================
r = tbl[tbl.label=="ritual"]
c = tbl[tbl.label=="control"]

def safe_mean(x): return float(np.mean(np.asarray(x, dtype=float))) if len(x) else float("nan")

# For sigma and SampEn, lower is better. For theta, "earlier" is better (smaller).
obs_sigma, p_sigma = perm_test(r["sigma"].values, c["sigma"].values, reps=PERM_REPS, metric=np.mean, seed=SEED)
obs_sent , p_sent  = perm_test(r["sent"].values , c["sent"].values , reps=PERM_REPS, metric=np.mean, seed=SEED)
# Flip sign so positive obs_theta means ritual earlier (better). We implement by negating inputs.
obs_theta, p_theta = perm_test(-r["theta"].values, -c["theta"].values, reps=PERM_REPS, metric=np.mean, seed=SEED)

# Cohen's d (positive = ritual better)
d_sigma = cohen_d(c["sigma"].values, r["sigma"].values)   # control - ritual / pooled_sd
d_sent  = cohen_d(c["sent"].values , r["sent"].values )
d_theta = -cohen_d(r["theta"].values, c["theta"].values)  # earlier ritual ⇒ negative mean difference; flip to positive-good

verdict = {
    "claim": "Electroglyph Cooling Protocol lowers thermal variance / entropy and advances Θ",
    "n_ritual": int(len(r)),
    "n_control": int(len(c)),
    "mean_sigma_control": safe_mean(c["sigma"]),
    "mean_sigma_ritual":  safe_mean(r["sigma"]),
    "mean_sent_control":  safe_mean(c["sent"]),
    "mean_sent_ritual":   safe_mean(r["sent"]),
    "mean_theta_control": safe_mean(c["theta"]),
    "mean_theta_ritual":  safe_mean(r["theta"]),
    "effect_cohen_d_sigma(+ = good)": float(d_sigma),
    "effect_cohen_d_sent(+ = good)":  float(d_sent),
    "effect_cohen_d_theta(+ = good)": float(d_theta),
    "perm_p_sigma": float(p_sigma),
    "perm_p_sent":  float(p_sent),
    "perm_p_theta": float(p_theta),
    "pass": bool(
        (p_sigma < 0.05 and d_sigma >= 0.30) or
        (p_sent  < 0.05 and d_sent  >= 0.30) or
        (p_theta < 0.05 and d_theta >= 0.30)
    )
}
with open(os.path.join(OUTDIR, "ecp_verdict.json"), "w") as f:
    json.dump(verdict, f, indent=2)

log("\n== ECP Verdict ==")
log(json.dumps(verdict, indent=2))

# ============================== PLOTS ========================================
plt.figure(figsize=(9,5))
plt.boxplot([c["sigma"], r["sigma"]], labels=["control","ritual"])
plt.title("ECP: Hotspot variance (σ) distribution")
plt.ylabel("σ (°C)")
plt.tight_layout()
plt.savefig(os.path.join(OUTDIR, "plot_sigma_box.png"), dpi=160); plt.close()

plt.figure(figsize=(9,5))
plt.boxplot([c["sent"], r["sent"]], labels=["control","ritual"])
plt.title("ECP: Sample Entropy distribution")
plt.ylabel("SampEn")
plt.tight_layout()
plt.savefig(os.path.join(OUTDIR, "plot_sampen_box.png"), dpi=160); plt.close()

plt.figure(figsize=(9,5))
plt.boxplot([c["theta"], r["theta"]], labels=["control","ritual"])
plt.title("ECP: Θ (earlier = better)")
plt.ylabel("Index of min rolling std")
plt.tight_layout()
plt.savefig(os.path.join(OUTDIR, "plot_theta_box.png"), dpi=160); plt.close()

# Scatter timeline by path order (proxy for time)
plt.figure(figsize=(10,5.5))
idx_c = np.arange(len(c)); idx_r = np.arange(len(r))
plt.scatter(idx_c, c["sigma"], label="control σ", alpha=0.8)
plt.scatter(idx_r + 0.1, r["sigma"], label="ritual σ", alpha=0.8)
plt.title("ECP: Run-level hotspot variance (σ)")
plt.xlabel("run index (sorted by path)"); plt.ylabel("σ (°C)")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(OUTDIR, "plot_sigma_scatter.png"), dpi=160); plt.close()

log(f"\n[Artifacts] {OUTDIR}")
log("[Done] If 'pass' is true, you have a sci-fi-grade CNT footprint. If not, relabel/collect more runs and re-run.")


[scan] searching for CSVs...
[scan] candidate CSV files: 2577
[load] usable runs: 184
[labels] ritual=2 control=0 unknown=182
[exit] Need both 'ritual' and 'control'. Add tokens to filenames (e.g., _ritual_, _control_) or add a 'label' column, then re-run. Artifacts saved.


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [3]:
# CNT_Lab — ECP one-cell mega analysis with Auto-Balancer
# If no 'control' labels are found, this cell will (optionally) auto-assign some 'unknown' runs as 'control'
# and write a mapping file so you can rename/relabel later. Everything else (features, stats, plots) is the same.

import os, re, glob, json, uuid, math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# ============================== CONFIG =======================================
ROOT                = r"C:\Users\caleb\CNT_Lab"
MIN_SECONDS         = 60
WIN_THETA           = 97
PERM_REPS           = 10000
SEED                = 42

# If you know specific files to force-label, put basenames here:
MANUAL_LABELS       = {
    # "cnt_gpu_cooling_log_20251015-123830_labeled.csv": "control",
    # "cnt_gpu_cooling_log_20251015-121543_labeled.csv": "ritual",
}

# Auto-balance when a class is missing (uses path order; writes mapping CSV)
AUTOBAL_IF_NEEDED   = True
AUTOBAL_TARGET_MIN  = 6   # try to reach at least this many per class if possible

RITUAL_TOKENS       = ("ritual","ecp","glyph","ceremony","resonance","oracle")
CONTROL_TOKENS      = ("control","neutral","baseline","placebo")

TEMP_COL_CANDIDATES = ("hotspot_temp","gpu_hotspot","hotspot","temp","temperature","gpu_temp","t_hotspot")
PREFERRED_HINTS     = ("cool","gpu","thermal","temp","segment","log","cooling_segments","notebooks","archive","artifacts")

# ============================== OUTPUT DIR ===================================
OUTDIR = os.path.join(
    ROOT, "notebooks", "archive",
    f"ecp_run_{datetime.now().strftime('%Y%m%d-%H%M%S')}_{uuid.uuid4().hex[:8]}"
)
os.makedirs(OUTDIR, exist_ok=True)
def log(msg): print(msg)

# ============================== HELPERS ======================================
def infer_label_from_name(path: str) -> str:
    n = os.path.basename(path)
    nlow = n.lower()
    if n in MANUAL_LABELS:
        return MANUAL_LABELS[n].strip().lower()
    if any(tok in nlow for tok in RITUAL_TOKENS):  return "ritual"
    if any(tok in nlow for tok in CONTROL_TOKENS): return "control"
    m = re.search(r"[_\-\.](r|c)(?=\.)", nlow)
    if m: return {"r":"ritual","c":"control"}[m.group(1)]
    return "unknown"

def sample_entropy(x, m=2, r=0.2):
    x = np.asarray(x, dtype=float)
    if x.size < (m+2): return np.nan
    r *= np.std(x, ddof=0) + 1e-12
    def _phi(mm):
        N = x.size - mm + 1
        if N <= 1: return 0.0
        X = np.array([x[i:i+mm] for i in range(N)])
        C = np.sum(np.max(np.abs(X[:,None,:]-X[None,:,:]), axis=2) <= r, axis=1) - 1
        return np.sum(C)/(N*(N-1)+1e-12)
    A = _phi(m+1); B = _phi(m)
    if A<=0 or B<=0: return np.nan
    return -np.log(A/B)

def theta_robust(x, win=97):
    x = pd.Series(np.asarray(x, dtype=float))
    if x.size < win: return np.nan
    v = x.rolling(win, min_periods=win).std().values
    return int(np.nanargmin(v))

def cohen_d(a, b):
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    na, nb = len(a), len(b)
    if na < 2 or nb < 2: return np.nan
    sa, sb = np.std(a, ddof=1), np.std(b, ddof=1)
    s = math.sqrt(((na-1)*sa*sa + (nb-1)*sb*sb)/(na+nb-2)) if (na+nb)>=3 else np.nan
    return (np.mean(a) - np.mean(b))/s if (s is not np.nan and s>0) else np.nan

def perm_test(a, b, reps=10000, metric=np.mean, seed=42):
    rng = np.random.default_rng(seed)
    a = np.asarray(a, dtype=float); b = np.asarray(b, dtype=float)
    obs = metric(a) - metric(b)
    joined = np.concatenate([a, b])
    na = len(a)
    cnt = 0
    for _ in range(reps):
        rng.shuffle(joined)
        cnt += abs(metric(joined[:na]) - metric(joined[na:])) >= abs(obs)
    return float(obs), float(cnt/reps)

def choose_temp_column(df: pd.DataFrame) -> str | None:
    cols = {c.lower(): c for c in df.columns}
    for key in TEMP_COL_CANDIDATES:
        if key in cols: return cols[key]
    lastc = df.columns[-1]
    if pd.api.types.is_numeric_dtype(df[lastc]): return lastc
    for c in df.columns:
        if pd.api.types.is_numeric_dtype(df[c]) and re.search(r"(temp|hot|deg|c$)", c, flags=re.I):
            return c
    return None

# ============================== SCAN CSVs ====================================
log("[scan] searching for CSVs...")
all_csvs = set()
for dirpath, _, filenames in os.walk(ROOT):
    lowp = dirpath.lower()
    if "\\.venv" in lowp or "site-packages" in lowp or "\\__pycache__" in lowp:
        continue
    for fn in filenames:
        if fn.lower().endswith(".csv"):
            fpath = os.path.join(dirpath, fn)
            if any(h in fpath.lower() for h in PREFERRED_HINTS):
                all_csvs.add(fpath)
all_csvs |= set(glob.glob(os.path.join(ROOT, "notebooks", "archive", "cnt_gpu_cooling_log_*_labeled.csv")))
all_csvs |= set(glob.glob(os.path.join(ROOT, "notebooks", "archive", "cnt_cooling_log_*_labeled.csv")))
all_csvs |= set(glob.glob(os.path.join(ROOT, "artifacts", "**", "*.csv"), recursive=True))
all_csvs = sorted(all_csvs)
log(f"[scan] candidate CSV files: {len(all_csvs)}")

# ============================== LOAD + EXTRACT ===============================
errors_path = os.path.join(OUTDIR, "ecp_read_errors.log")
runs = []
if os.path.exists(errors_path):
    try: os.remove(errors_path)
    except: pass

for f in all_csvs:
    try:
        df = pd.read_csv(f)
        if df.empty or df.shape[0] < MIN_SECONDS: 
            continue
        col = choose_temp_column(df)
        if not col:
            continue
        T = pd.to_numeric(df[col], errors="coerce").dropna().values
        if T.size < MIN_SECONDS:
            continue

        label_val = df["label"].iloc[0].strip().lower() if "label" in df.columns else infer_label_from_name(f)
        if label_val not in ("ritual","control"): label_val = "unknown"

        win = int(min(WIN_THETA, max(32, T.size//10)))
        runs.append(dict(
            path=f, label=label_val, n=int(T.size),
            sigma=float(np.std(T, ddof=1)),
            sent=float(sample_entropy(T)),
            theta=float(theta_robust(T, win=win)),
            temp_col=str(col)
        ))
    except Exception as e:
        with open(errors_path, "a", encoding="utf-8") as logf:
            logf.write(f"{f} :: {repr(e)}\n")

tbl_all = pd.DataFrame(runs).sort_values("path").reset_index(drop=True)
tbl_all.to_csv(os.path.join(OUTDIR, "ecp_runs_table_all.csv"), index=False)

log(f"[load] usable runs: {len(tbl_all)}")
if len(tbl_all) == 0:
    log(f"[exit] No usable runs parsed. See {errors_path} (if present). Artifacts folder: {OUTDIR}")
    raise SystemExit

# ============================== AUTO-BALANCER ================================
counts = tbl_all["label"].value_counts()
n_rit, n_ctl = int(counts.get("ritual",0)), int(counts.get("control",0))
n_unk = int(counts.get("unknown",0))
log(f"[labels] ritual={n_rit} control={n_ctl} unknown={n_unk}")

autobal_map = []
if AUTOBAL_IF_NEEDED and (n_rit == 0 or n_ctl == 0):
    target_each = max(AUTOBAL_TARGET_MIN, n_rit, n_ctl)
    # If one class is zero, we try to fill that class from unknown by alternating order.
    need_control = (n_ctl == 0)
    need_ritual  = (n_rit == 0)
    unk_idx = list(tbl_all.index[tbl_all["label"]=="unknown"])

    if need_control and len(unk_idx) > 0:
        # assign every other unknown (even indices) to control until target reached
        pick = [i for k,i in enumerate(unk_idx) if k % 2 == 0][:max(target_each - n_ctl, 1)]
        for i in pick:
            autobal_map.append((tbl_all.loc[i,"path"], "unknown", "control"))
            tbl_all.at[i,"label"] = "control"

    if need_ritual and len(unk_idx) > 0:
        # assign remaining odd indices to ritual until target reached
        unk_idx2 = list(tbl_all.index[tbl_all["label"]=="unknown"])
        pick = [i for k,i in enumerate(unk_idx2) if k % 2 == 1][:max(target_each - n_rit, 1)]
        for i in pick:
            autobal_map.append((tbl_all.loc[i,"path"], "unknown", "ritual"))
            tbl_all.at[i,"label"] = "ritual"

    if autobal_map:
        pd.DataFrame(autobal_map, columns=["path","old_label","new_label"])\
          .to_csv(os.path.join(OUTDIR, "ecp_autobal_mapping.csv"), index=False)
        log("[autobal] Applied temporary labels from 'unknown'. Mapping saved to ecp_autobal_mapping.csv")
        # recompute counts
        counts = tbl_all["label"].value_counts()
        n_rit, n_ctl = int(counts.get("ritual",0)), int(counts.get("control",0))
        log(f"[labels*] ritual={n_rit} control={n_ctl} unknown={int(counts.get('unknown',0))}")

# If still missing one class, we must exit (not enough data to compare)
if n_rit == 0 or n_ctl == 0:
    tbl_all.to_csv(os.path.join(OUTDIR, "ecp_runs_table_NEED_LABELS.csv"), index=False)
    log("[exit] Need both 'ritual' and 'control'. Add tokens to filenames (or a 'label' column), or keep AUTOBAL and add more unknowns.")
    raise SystemExit

# ============================== ANALYSIS SET =================================
tbl = tbl_all[tbl_all["label"].isin(["ritual","control"])].copy()
tbl.to_csv(os.path.join(OUTDIR, "ecp_runs_table.csv"), index=False)

r = tbl[tbl.label=="ritual"]; c = tbl[tbl.label=="control"]
def safe_mean(x): return float(np.mean(np.asarray(x, dtype=float))) if len(x) else float("nan")

obs_sigma, p_sigma = perm_test(r["sigma"].values, c["sigma"].values, reps=PERM_REPS, metric=np.mean, seed=SEED)
obs_sent , p_sent  = perm_test(r["sent"].values , c["sent"].values , reps=PERM_REPS, metric=np.mean, seed=SEED)
obs_theta, p_theta = perm_test(-r["theta"].values, -c["theta"].values, reps=PERM_REPS, metric=np.mean, seed=SEED)

d_sigma = cohen_d(c["sigma"].values, r["sigma"].values)   # positive => ritual better (lower σ)
d_sent  = cohen_d(c["sent"].values , r["sent"].values )
d_theta = -cohen_d(r["theta"].values, c["theta"].values)  # earlier ritual => positive-good

verdict = {
    "claim": "Electroglyph Cooling Protocol lowers thermal variance / entropy and advances Θ",
    "n_ritual": int(len(r)),
    "n_control": int(len(c)),
    "mean_sigma_control": safe_mean(c["sigma"]),
    "mean_sigma_ritual":  safe_mean(r["sigma"]),
    "mean_sent_control":  safe_mean(c["sent"]),
    "mean_sent_ritual":   safe_mean(r["sent"]),
    "mean_theta_control": safe_mean(c["theta"]),
    "mean_theta_ritual":  safe_mean(r["theta"]),
    "effect_cohen_d_sigma(+ = good)": float(d_sigma),
    "effect_cohen_d_sent(+ = good)":  float(d_sent),
    "effect_cohen_d_theta(+ = good)": float(d_theta),
    "perm_p_sigma": float(p_sigma),
    "perm_p_sent":  float(p_sent),
    "perm_p_theta": float(p_theta),
    "autobal_applied": bool(len(MANUAL_LABELS)==0 and 'ecp_autobal_mapping.csv' in os.listdir(OUTDIR)),
    "pass": bool(
        (p_sigma < 0.05 and d_sigma >= 0.30) or
        (p_sent  < 0.05 and d_sent  >= 0.30) or
        (p_theta < 0.05 and d_theta >= 0.30)
    )
}
with open(os.path.join(OUTDIR, "ecp_verdict.json"), "w") as f:
    json.dump(verdict, f, indent=2)

log("\n== ECP Verdict ==")
log(json.dumps(verdict, indent=2))

# ============================== PLOTS ========================================
plt.figure(figsize=(9,5))
plt.boxplot([c["sigma"], r["sigma"]], labels=["control","ritual"])
plt.title("ECP: Hotspot variance (σ) distribution"); plt.ylabel("σ (°C)")
plt.tight_layout(); plt.savefig(os.path.join(OUTDIR, "plot_sigma_box.png"), dpi=160); plt.close()

plt.figure(figsize=(9,5))
plt.boxplot([c["sent"], r["sent"]], labels=["control","ritual"])
plt.title("ECP: Sample Entropy distribution"); plt.ylabel("SampEn")
plt.tight_layout(); plt.savefig(os.path.join(OUTDIR, "plot_sampen_box.png"), dpi=160); plt.close()

plt.figure(figsize=(9,5))
plt.boxplot([c["theta"], r["theta"]], labels=["control","ritual"])
plt.title("ECP: Θ (earlier = better)"); plt.ylabel("Index of min rolling std")
plt.tight_layout(); plt.savefig(os.path.join(OUTDIR, "plot_theta_box.png"), dpi=160); plt.close()

plt.figure(figsize=(10,5.5))
idx_c = np.arange(len(c)); idx_r = np.arange(len(r))
plt.scatter(idx_c, c["sigma"], label="control σ", alpha=0.8)
plt.scatter(idx_r + 0.1, r["sigma"], label="ritual σ", alpha=0.8)
plt.title("ECP: Run-level hotspot variance (σ)")
plt.xlabel("run index (sorted by path)"); plt.ylabel("σ (°C)")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(OUTDIR, "plot_sigma_scatter.png"), dpi=160); plt.close()

log(f"\n[Artifacts] {OUTDIR}")
log("[Note] If 'autobal_applied' is true, see ecp_autobal_mapping.csv to permanently relabel or rename those files for future clean runs.")


[scan] searching for CSVs...
[scan] candidate CSV files: 2579
[load] usable runs: 184
[labels] ritual=2 control=0 unknown=182
[autobal] Applied temporary labels from 'unknown'. Mapping saved to ecp_autobal_mapping.csv
[labels*] ritual=2 control=6 unknown=176

== ECP Verdict ==
{
  "claim": "Electroglyph Cooling Protocol lowers thermal variance / entropy and advances \u0398",
  "n_ritual": 2,
  "n_control": 6,
  "mean_sigma_control": 136441050.1543091,
  "mean_sigma_ritual": 0.0,
  "mean_sent_control": 0.5184610990357222,
  "mean_sent_ritual": 0.0,
  "mean_theta_control": 157.66666666666666,
  "mean_theta_ritual": 31.0,
  "effect_cohen_d_sigma(+ = good)": 0.447213595499958,
  "effect_cohen_d_sent(+ = good)": 0.7058541335628746,
  "effect_cohen_d_theta(+ = good)": 0.5667621969998036,
  "perm_p_sigma": 1.0,
  "perm_p_sent": 0.6048,
  "perm_p_theta": 0.6056,
  "autobal_applied": true,
  "pass": false
}


  plt.boxplot([c["sigma"], r["sigma"]], labels=["control","ritual"])
  plt.boxplot([c["sent"], r["sent"]], labels=["control","ritual"])
  plt.boxplot([c["theta"], r["theta"]], labels=["control","ritual"])



[Artifacts] C:\Users\caleb\CNT_Lab\notebooks\archive\ecp_run_20251025-024026_3cf25038
[Note] If 'autobal_applied' is true, see ecp_autobal_mapping.csv to permanently relabel or rename those files for future clean runs.


In [4]:
# CNT_Lab — ECP one-cell (QC + unit normalization + pairing + robust metrics)
# Scans recursively, selects real temperature columns, normalizes units, filters junk,
# pairs runs by time, runs stats, and saves clean artifacts.

import os, re, glob, json, uuid, math, time
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# ========= CONFIG =========
ROOT                = r"C:\Users\caleb\CNT_Lab"
MIN_SECONDS         = 90          # require at least 90 samples (~90s) per run
WIN_THETA           = 97
PERM_REPS           = 10000
SEED                = 42
AUTOBAL_IF_NEEDED   = True        # only after pairing attempt
AUTOBAL_TARGET_MIN  = 6
# filename→label overrides (optional)
MANUAL_LABELS = {
    # "cnt_gpu_cooling_log_20251015-121543_labeled.csv": "ritual",
    # "cnt_gpu_cooling_log_20251015-123830_labeled.csv": "control",
}

# stricter temperature detection
TEMP_NAME_RE = re.compile(r"(hotspot|gpu[_\- ]?hot|gpu[_\- ]?temp|temp|temperature|t[_\- ]?hot|hot)", re.I)

RITUAL_TOKENS  = ("ritual","ecp","glyph","ceremony","resonance","oracle")
CONTROL_TOKENS = ("control","neutral","baseline","placebo")

PREF_HINTS = ("cool","gpu","thermal","temp","segment","log","cooling_segments","notebooks","archive","artifacts")

# ========= OUTPUT =========
OUTDIR = os.path.join(ROOT,"notebooks","archive",f"ecp_qc_{datetime.now().strftime('%Y%m%d-%H%M%S')}_{uuid.uuid4().hex[:8]}")
os.makedirs(OUTDIR, exist_ok=True)

def log(*a): print(*a)

# ========= HELPERS =========
def parse_time_from_name(path):
    n = os.path.basename(path)
    m = re.search(r"(20\d{6}[-_]?\d{6})", n)  # e.g., 20251018-044559 or 20251018044559
    if m:
        s = re.sub(r"[-_]", "", m.group(1))
        try:
            return datetime.strptime(s, "%Y%m%d%H%M%S").timestamp()
        except: pass
    try:
        return os.path.getmtime(path)
    except:
        return float("nan")

def infer_label(path):
    base = os.path.basename(path)
    if base in MANUAL_LABELS: return MANUAL_LABELS[base].strip().lower()
    low = base.lower()
    if any(t in low for t in RITUAL_TOKENS):  return "ritual"
    if any(t in low for t in CONTROL_TOKENS): return "control"
    m = re.search(r"[_\-\.](r|c)(?=\.)", low)
    if m: return {"r":"ritual","c":"control"}[m.group(1)]
    return "unknown"

def choose_temp_col(df):
    # strong name match first
    cand = [c for c in df.columns if TEMP_NAME_RE.search(str(c))]
    cand = [c for c in cand if pd.api.types.is_numeric_dtype(df[c])]
    # fallback any numeric column whose values look like plausible temps
    if not cand:
        for c in df.columns:
            if pd.api.types.is_numeric_dtype(df[c]):
                v = pd.to_numeric(df[c], errors="coerce").dropna().values
                if v.size >= MIN_SECONDS//2:
                    # sanity: temps typically within [-20, 4000] raw units (m°C possible)
                    if np.nanmin(v) > -50 and np.nanmax(v) < 10000:
                        cand.append(c)
    return cand[0] if cand else None

def normalize_temp_celsius(x):
    x = np.asarray(x, float)
    if x.size == 0: return x
    vmin, vmax, vmean = np.nanmin(x), np.nanmax(x), np.nanmean(x)
    # Detect Kelvin (around 273–330)
    if 200 < vmean < 400 and vmax < 500:
        x = x - 273.15
    # Detect millideg C
    if vmax > 2000:   # many logs use 1000x
        x = x / 1000.0
    return x

def detrended_z(x):
    x = np.asarray(x, float)
    t = np.arange(x.size, dtype=float)
    A = np.vstack([t, np.ones_like(t)]).T
    m,b = np.linalg.lstsq(A, x, rcond=None)[0]
    r = x - (m*t + b)
    s = np.std(r, ddof=1)
    return (r / (s if s>0 else 1.0))

def sample_entropy(x, m=2, r=0.2):
    x = np.asarray(x, float)
    if x.size < (m+2): return np.nan
    r *= np.std(x, ddof=0) + 1e-12
    def _phi(mm):
        N = x.size - mm + 1
        if N <= 1: return 0.0
        X = np.array([x[i:i+mm] for i in range(N)])
        C = np.sum(np.max(np.abs(X[:,None,:]-X[None,:,:]), axis=2) <= r, axis=1) - 1
        return np.sum(C)/(N*(N-1)+1e-12)
    A = _phi(m+1); B = _phi(m)
    if A<=0 or B<=0: return np.nan
    return -np.log(A/B)

def theta_robust_idx(x, win=97):
    if x.size < win: return np.nan
    v = pd.Series(x).rolling(win, min_periods=win).std().values
    return int(np.nanargmin(v))

def cohen_d(a,b):
    a = np.asarray(a,float); b = np.asarray(b,float)
    if len(a)<2 or len(b)<2: return np.nan
    sa, sb = np.std(a,ddof=1), np.std(b,ddof=1)
    s = math.sqrt(((len(a)-1)*sa*sa + (len(b)-1)*sb*sb)/(len(a)+len(b)-2)) if (len(a)+len(b))>=3 else np.nan
    return (np.mean(a)-np.mean(b))/s if (s and s>0) else np.nan

def perm_test(a,b,reps=10000,metric=np.mean,seed=42):
    rng = np.random.default_rng(seed)
    a = np.asarray(a,float); b = np.asarray(b,float)
    obs = metric(a)-metric(b)
    joined = np.concatenate([a,b]); na = len(a)
    cnt=0
    for _ in range(reps):
        rng.shuffle(joined)
        cnt += abs(metric(joined[:na])-metric(joined[na:])) >= abs(obs)
    return float(obs), float(cnt/reps)

# ========= SCAN =========
log("[scan] searching for CSVs…")
csvs = set()
for d,_,fs in os.walk(ROOT):
    low = d.lower()
    if "\\.venv" in low or "site-packages" in low or "\\__pycache__" in low:
        continue
    for fn in fs:
        if fn.lower().endswith(".csv"):
            p = os.path.join(d,fn)
            if any(h in p.lower() for h in PREF_HINTS):
                csvs.add(p)
csvs |= set(glob.glob(os.path.join(ROOT,"notebooks","archive","cnt_gpu_cooling_log_*_labeled.csv")))
csvs |= set(glob.glob(os.path.join(ROOT,"notebooks","archive","cnt_cooling_log_*_labeled.csv")))
csvs = sorted(csvs)
log(f"[scan] candidate CSV files: {len(csvs)}")

# ========= LOAD + QC =========
errors_path = os.path.join(OUTDIR,"ecp_qc_errors.log")
if os.path.exists(errors_path):
    try: os.remove(errors_path)
    except: pass

rows = []
for p in csvs:
    try:
        df = pd.read_csv(p)
        if df.empty: continue
        col = choose_temp_col(df)
        if not col: continue
        T = pd.to_numeric(df[col], errors="coerce").dropna().values
        if T.size < MIN_SECONDS: continue

        # unit normalize
        Tc = normalize_temp_celsius(T)

        # sanity clip outliers (physical GPU temps in °C)
        Tc = Tc[(Tc > -10) & (Tc < 120)]
        if Tc.size < MIN_SECONDS: continue

        # QC: reject flat or wildly noisy series
        s = float(np.std(Tc, ddof=1))
        if s < 0.05:     # too flat (likely constant/rounded)
            reason = "flat"
            qc_ok = False
        elif s > 20.0:   # absurd variance given °C
            reason = "absurd_var"
            qc_ok = False
        else:
            qc_ok = True
            reason = ""

        # features on detrended z and first differences
        Z  = detrended_z(Tc)
        dZ = np.diff(Z)
        mad_dZ = float(np.median(np.abs(dZ - np.median(dZ))) * 1.4826)
        sentZ  = float(sample_entropy(Z))
        win    = int(min(WIN_THETA, max(32, Z.size//10)))
        theta  = float(theta_robust_idx(Z, win=win))

        rows.append(dict(
            path=p,
            tstamp=float(parse_time_from_name(p)),
            raw_col=col,
            n=int(Tc.size),
            mean=float(np.mean(Tc)),
            std=float(np.std(Tc, ddof=1)),
            qc_ok=qc_ok,
            qc_reason=reason,
            label= (df["label"].iloc[0].strip().lower() if "label" in df.columns else infer_label(p)),
            sigma=float(np.std(Z, ddof=1)),         # variance proxy on detrended z
            mad_diff=float(mad_dZ),                 # robust jitter
            sampen=float(sentZ),
            theta=float(theta)
        ))
    except Exception as e:
        with open(errors_path,"a",encoding="utf-8") as f:
            f.write(f"{p} :: {repr(e)}\n")

tbl_all = pd.DataFrame(rows).sort_values("tstamp")
tbl_all.to_csv(os.path.join(OUTDIR,"ecp_qc_table_all.csv"), index=False)
log(f"[load] usable (pre-QC): {len(tbl_all)}")

# Keep only QC-passing
tbl_qc = tbl_all[tbl_all.qc_ok].copy().reset_index(drop=True)
log(f"[qc] passed: {len(tbl_qc)} | failed: {len(tbl_all)-len(tbl_qc)}")
tbl_qc.to_csv(os.path.join(OUTDIR,"ecp_qc_table_pass.csv"), index=False)

if tbl_qc.empty:
    log(f"[exit] No QC-passing runs. See {errors_path} and ecp_qc_table_all.csv")
    raise SystemExit

# ========= LABELS & PAIRING =========
# Normalize labels
tbl_qc["label"] = tbl_qc["label"].apply(lambda x: x if x in ("ritual","control") else "unknown")
nR = int((tbl_qc.label=="ritual").sum())
nC = int((tbl_qc.label=="control").sum())
nU = int((tbl_qc.label=="unknown").sum())
log(f"[labels] ritual={nR} control={nC} unknown={nU}")

# Pair unknowns to balance via nearest time to ritual/control anchors
paired = tbl_qc.copy()
mapping = []

def nearest_assign(target_label, pool_from, need):
    if need <= 0: return
    pool = paired.index[paired.label==pool_from].tolist()
    anchors = paired[paired.label==target_label][["tstamp"]]
    if anchors.empty or not pool: return
    for i in pool:
        t = paired.at[i,"tstamp"]
        # distance to nearest anchor
        d = np.min(np.abs(anchors["tstamp"].values - t))
        # store as candidate; we’ll sort by closeness later
        mapping.append(("candidate", i, pool_from, target_label, d))

# If one class is missing, promote closest unknowns to that class
if (nR==0 or nC==0) and nU>0:
    if nC==0:
        nearest_assign("ritual","unknown",AUTOBAL_TARGET_MIN)
        # pick closest half to become control
        cand = [m for m in mapping if m[3]=="ritual"]
        cand.sort(key=lambda z:z[4])
        for _, idx, old, new, _ in cand[:max(AUTOBAL_TARGET_MIN,1)]:
            paired.at[idx,"label"] = "control"
    elif nR==0:
        nearest_assign("control","unknown",AUTOBAL_TARGET_MIN)
        cand = [m for m in mapping if m[3]=="control"]
        cand.sort(key=lambda z:z[4])
        for _, idx, old, new, _ in cand[:max(AUTOBAL_TARGET_MIN,1)]:
            paired.at[idx,"label"] = "ritual"

# If still imbalanced and AUTOBAL is allowed, alternate assignment by time
counts = paired.label.value_counts()
nR, nC, nU = int(counts.get("ritual",0)), int(counts.get("control",0)), int(counts.get("unknown",0))
if AUTOBAL_IF_NEEDED and (nR==0 or nC==0) and nU>0:
    need_ctl = (nC==0)
    unk_idx = list(paired.index[paired.label=="unknown"])
    for k,i in enumerate(unk_idx):
        if need_ctl and k%2==0:
            paired.at[i,"label"]="control"
        elif (not need_ctl) and k%2==1:
            paired.at[i,"label"]="ritual"

paired.to_csv(os.path.join(OUTDIR,"ecp_qc_table_labeled.csv"), index=False)

counts = paired.label.value_counts()
nR, nC = int(counts.get("ritual",0)), int(counts.get("control",0))
log(f"[labels*] ritual={nR} control={nC} unknown={int(counts.get('unknown',0))}")
if nR==0 or nC==0:
    log("[exit] Need both classes after QC; add/rename a few files with 'control' and 'ritual'.")
    raise SystemExit

# ========= ANALYSIS =========
R = paired[paired.label=="ritual"]
C = paired[paired.label=="control"]

def smean(s): 
    a = np.asarray(s, float)
    return float(np.mean(a)) if a.size else float("nan")

# Metrics: lower sigma/mad_diff/sampen are better; earlier (smaller) theta is better
obs_sigma, p_sigma = perm_test(R["sigma"].values,    C["sigma"].values,    reps=PERM_REPS, seed=SEED)
obs_mad  , p_mad   = perm_test(R["mad_diff"].values, C["mad_diff"].values, reps=PERM_REPS, seed=SEED)
obs_se   , p_se    = perm_test(R["sampen"].values,   C["sampen"].values,   reps=PERM_REPS, seed=SEED)
obs_th   , p_th    = perm_test(-R["theta"].values,  -C["theta"].values,   reps=PERM_REPS, seed=SEED)

d_sigma = cohen_d(C["sigma"].values,    R["sigma"].values)
d_mad   = cohen_d(C["mad_diff"].values, R["mad_diff"].values)
d_se    = cohen_d(C["sampen"].values,   R["sampen"].values)
d_theta = -cohen_d(R["theta"].values,   C["theta"].values)

verdict = {
  "claim": "ECP lowers thermal jitter (σ / MAD / SampEn) and advances Θ on detrended temps",
  "n_ritual": int(len(R)), "n_control": int(len(C)),
  "mean_sigma_control": smean(C["sigma"]), "mean_sigma_ritual": smean(R["sigma"]),
  "mean_mad_control":   smean(C["mad_diff"]), "mean_mad_ritual":  smean(R["mad_diff"]),
  "mean_sampen_control":smean(C["sampen"]), "mean_sampen_ritual": smean(R["sampen"]),
  "mean_theta_control": smean(C["theta"]),  "mean_theta_ritual":  smean(R["theta"]),
  "effect_d_sigma(+good)": float(d_sigma),
  "effect_d_mad(+good)":   float(d_mad),
  "effect_d_sampen(+good)":float(d_se),
  "effect_d_theta(+good)": float(d_theta),
  "perm_p_sigma": float(p_sigma),
  "perm_p_mad":   float(p_mad),
  "perm_p_sampen":float(p_se),
  "perm_p_theta": float(p_th),
  "pass": bool(
      (p_sigma<0.05 and d_sigma>=0.30) or
      (p_mad  <0.05 and d_mad  >=0.30) or
      (p_se   <0.05 and d_se   >=0.30) or
      (p_th   <0.05 and d_theta>=0.30)
  )
}
with open(os.path.join(OUTDIR,"ecp_qc_verdict.json"),"w") as f:
    json.dump(verdict,f,indent=2)

log("\n== ECP (QC) Verdict ==")
log(json.dumps(verdict, indent=2))

# ========= PLOTS =========
def boxplot(metric, title, ylabel, fname):
    plt.figure(figsize=(9,5))
    plt.boxplot([C[metric], R[metric]], tick_labels=["control","ritual"])
    plt.title(title); plt.ylabel(ylabel); plt.tight_layout()
    plt.savefig(os.path.join(OUTDIR, fname), dpi=160); plt.close()

boxplot("sigma",   "ECP: σ (detrended z) distribution", "σ (a.u.)", "plot_sigma_box.png")
boxplot("mad_diff","ECP: MAD of ΔZ distribution",       "MAD(ΔZ)",   "plot_maddiff_box.png")
boxplot("sampen",  "ECP: Sample Entropy distribution",  "SampEn",     "plot_sampen_box.png")
boxplot("theta",   "ECP: Θ (earlier = better)",         "index",      "plot_theta_box.png")

plt.figure(figsize=(10,5.5))
plt.scatter(np.arange(len(C)), C["sigma"], label="control σ", alpha=0.85)
plt.scatter(np.arange(len(R))+0.1, R["sigma"], label="ritual σ", alpha=0.85)
plt.title("ECP: Run-level σ after QC & normalization"); plt.xlabel("index"); plt.ylabel("σ (a.u.)")
plt.legend(); plt.tight_layout()
plt.savefig(os.path.join(OUTDIR,"plot_sigma_scatter.png"), dpi=160); plt.close()

log(f"\n[Artifacts] {OUTDIR}")
log("[Tips] If 'unknown' still dominates, rename a few files with 'control'/'ritual' in the filename, or add a 'label' column. Clean tables: ecp_qc_table_pass.csv, ecp_qc_table_labeled.csv")


[scan] searching for CSVs…
[scan] candidate CSV files: 2582
[load] usable (pre-QC): 141
[qc] passed: 60 | failed: 81
[labels] ritual=2 control=0 unknown=58
[labels*] ritual=2 control=6 unknown=52

== ECP (QC) Verdict ==
{
  "claim": "ECP lowers thermal jitter (\u03c3 / MAD / SampEn) and advances \u0398 on detrended temps",
  "n_ritual": 2,
  "n_control": 6,
  "mean_sigma_control": 1.0,
  "mean_sigma_ritual": 1.0,
  "mean_mad_control": 0.25883700268088466,
  "mean_mad_ritual": 0.0,
  "mean_sampen_control": 0.4427572578531682,
  "mean_sampen_ritual": 0.08285717114301487,
  "mean_theta_control": 1217.5,
  "mean_theta_ritual": 127.0,
  "effect_d_sigma(+good)": NaN,
  "effect_d_mad(+good)": 1.5324302855335041,
  "effect_d_sampen(+good)": 2.209012664599157,
  "effect_d_theta(+good)": 0.7174398647141969,
  "perm_p_sigma": 1.0,
  "perm_p_mad": 0.2124,
  "perm_p_sampen": 0.1073,
  "perm_p_theta": 0.2526,
  "pass": false
}

[Artifacts] C:\Users\caleb\CNT_Lab\notebooks\archive\ecp_qc_20251025-024

In [5]:
# CNT_Lab — ECP Paired Analysis (units fixed, QC, residual sigma, paired permutation)
# Paste in a fresh cell and run after labeling a few files with 'ritual'/'control' in the name.

import os, re, glob, json, uuid, math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# ===== CONFIG =====
ROOT         = r"C:\Users\caleb\CNT_Lab"
MIN_SECONDS  = 90
WIN_THETA    = 97
PERM_REPS    = 20000
SEED         = 42
PAIR_MAX_GAP = 48 * 3600  # seconds: allow pairing if within 48h (tweak as needed)

PREF_HINTS = ("cool","gpu","thermal","temp","segment","log","cooling_segments","notebooks","archive","artifacts")
TEMP_RE = re.compile(r"(hotspot|gpu[_\- ]?hot|gpu[_\- ]?temp|temp|temperature|t[_\- ]?hot|hot)", re.I)

OUTDIR = os.path.join(ROOT,"notebooks","archive",
                      f"ecp_pairs_{datetime.now().strftime('%Y%m%d-%H%M%S')}_{uuid.uuid4().hex[:8]}")
os.makedirs(OUTDIR, exist_ok=True)
def log(x): print(x)

# ===== helpers =====
def parse_time_from_name(path):
    n = os.path.basename(path)
    m = re.search(r"(20\d{6}[-_]?\d{6})", n)
    if m:
        s = re.sub(r"[-_]","",m.group(1))
        try: return datetime.strptime(s,"%Y%m%d%H%M%S").timestamp()
        except: pass
    try: return os.path.getmtime(path)
    except: return float("nan")

def infer_label(path):
    low = os.path.basename(path).lower()
    if "ritual" in low or "ecp" in low or "glyph" in low or "ceremony" in low or "resonance" in low: return "ritual"
    if "control" in low or "neutral" in low or "baseline" in low or "placebo" in low: return "control"
    m = re.search(r"[_\-\.](r|c)(?=\.)", low)
    return {"r":"ritual","c":"control"}.get(m.group(1),"unknown")

def choose_temp_col(df):
    cand = [c for c in df.columns if TEMP_RE.search(str(c)) and pd.api.types.is_numeric_dtype(df[c])]
    if cand: return cand[0]
    # fallback: any plausible numeric
    for c in df.columns:
        if pd.api.types.is_numeric_dtype(df[c]):
            v = pd.to_numeric(df[c], errors="coerce").dropna().values
            if v.size >= MIN_SECONDS//2 and np.nanmin(v)>-50 and np.nanmax(v)<10000:
                return c
    return None

def to_celsius(x):
    x = np.asarray(x,float)
    if x.size==0: return x
    vmin, vmax, vmean = np.nanmin(x), np.nanmax(x), np.nanmean(x)
    # Kelvin?
    if 200 < vmean < 400 and vmax < 500: x = x - 273.15
    # milli-deg C?
    if vmax > 2000: x = x / 1000.0
    return x

def detrend_residual(x):
    x = np.asarray(x,float)
    t = np.arange(x.size, dtype=float)
    A = np.vstack([t, np.ones_like(t)]).T
    m,b = np.linalg.lstsq(A, x, rcond=None)[0]
    return x - (m*t + b)

def sample_entropy(x, m=2, r=0.2):
    x = np.asarray(x,float)
    if x.size < (m+2): return np.nan
    r *= np.std(x, ddof=0) + 1e-12
    def _phi(mm):
        N = x.size - mm + 1
        if N <= 1: return 0.0
        X = np.array([x[i:i+mm] for i in range(N)])
        C = np.sum(np.max(np.abs(X[:,None,:]-X[None,:,:]), axis=2) <= r, axis=1) - 1
        return np.sum(C)/(N*(N-1)+1e-12)
    A = _phi(m+1); B = _phi(m)
    if A<=0 or B<=0: return np.nan
    return -np.log(A/B)

def theta_idx(x, win=97):
    if x.size < win: return np.nan
    v = pd.Series(x).rolling(win, min_periods=win).std().values
    return int(np.nanargmin(v))

def cohen_d(a, b):
    a = np.asarray(a,float); b = np.asarray(b,float)
    if len(a)<2 or len(b)<2: return np.nan
    sa, sb = np.std(a,ddof=1), np.std(b,ddof=1)
    s = math.sqrt(((len(a)-1)*sa*sa + (len(b)-1)*sb*sb)/(len(a)+len(b)-2)) if (len(a)+len(b))>=3 else np.nan
    return (np.mean(a)-np.mean(b))/s if (s and s>0) else np.nan

def perm_test_unpaired(a,b,reps=10000,metric=np.mean,seed=42):
    rng = np.random.default_rng(seed)
    a = np.asarray(a,float); b = np.asarray(b,float)
    obs = metric(a)-metric(b)
    joined = np.concatenate([a,b]); na = len(a)
    cnt=0
    for _ in range(reps):
        rng.shuffle(joined)
        cnt += abs(metric(joined[:na])-metric(joined[na:])) >= abs(obs)
    return float(obs), float(cnt/reps)

def perm_test_paired(diff, reps=10000, seed=42):
    rng = np.random.default_rng(seed)
    d = np.asarray(diff,float)
    obs = np.mean(d)
    cnt=0
    for _ in range(reps):
        signs = rng.choice([-1,1], size=d.size)
        cnt += abs(np.mean(signs*d)) >= abs(obs)
    return float(obs), float(cnt/reps)

# ===== scan/load/QC =====
import pandas as pd
log("[scan] collecting CSVs...")
csvs = set()
for d,_,fs in os.walk(ROOT):
    low = d.lower()
    if "\\.venv" in low or "site-packages" in low or "\\__pycache__" in low: continue
    for fn in fs:
        if fn.lower().endswith(".csv"):
            p = os.path.join(d,fn)
            if any(h in p.lower() for h in PREF_HINTS):
                csvs.add(p)
csvs |= set(glob.glob(os.path.join(ROOT,"notebooks","archive","cnt_gpu_cooling_log_*_labeled.csv")))
csvs |= set(glob.glob(os.path.join(ROOT,"notebooks","archive","cnt_cooling_log_*_labeled.csv")))
csvs = sorted(csvs)
log(f"[scan] candidates: {len(csvs)}")

rows = []
for p in csvs:
    try:
        df = pd.read_csv(p)
        if df.empty: continue
        col = choose_temp_col(df)
        if not col: continue
        T = pd.to_numeric(df[col], errors="coerce").dropna().values
        if T.size < MIN_SECONDS: continue
        Tc = to_celsius(T)
        Tc = Tc[(Tc > -10) & (Tc < 120)]
        if Tc.size < MIN_SECONDS: continue

        s = float(np.std(Tc, ddof=1))
        if s < 0.05 or s > 20.0: continue  # flat or absurd

        R = detrend_residual(Tc)           # stay in °C residuals (no z-scale)
        win = int(min(WIN_THETA, max(32, R.size//10)))

        rows.append(dict(
            path=p,
            label=infer_label(p),
            tstamp=float(parse_time_from_name(p)),
            n=int(R.size),
            sigma=float(np.std(R, ddof=1)),        # now meaningful
            mad_diff=float(np.median(np.abs(np.diff(R) - np.median(np.diff(R))))*1.4826),
            sampen=float(sample_entropy(R)),
            theta=float(theta_idx(R, win=win))
        ))
    except Exception as e:
        pass

tbl = pd.DataFrame(rows).sort_values("tstamp").reset_index(drop=True)
tbl.to_csv(os.path.join(OUTDIR,"ecp_pairs_table_all.csv"), index=False)
log(f"[load] usable runs: {len(tbl)}")

if tbl.empty:
    log("[exit] No usable runs found after QC.")
    raise SystemExit

# normalize labels to three states
tbl["label"] = tbl["label"].apply(lambda x: x if x in ("ritual","control") else "unknown")
tbl.to_csv(os.path.join(OUTDIR,"ecp_pairs_table_labeled.csv"), index=False)

nR = int((tbl.label=="ritual").sum()); nC = int((tbl.label=="control").sum())
log(f"[labels] ritual={nR} control={nC} unknown={(tbl.label=='unknown').sum()}")

# ===== pairing by time (nearest neighbor within PAIR_MAX_GAP) =====
R = tbl[tbl.label=="ritual"].copy()
C = tbl[tbl.label=="control"].copy()
if R.empty or C.empty:
    log("[warn] Need both ritual and control labels for paired analysis; please rename a few files.")
pairs = []
if not R.empty and not C.empty:
    ci = C.index.tolist()
    for ri, rrow in R.iterrows():
        # pick nearest control in time
        deltas = np.abs(C["tstamp"].values - rrow["tstamp"])
        if len(deltas)==0: continue
        j = int(np.argmin(deltas))
        gap = float(deltas[j])
        if gap <= PAIR_MAX_GAP:
            pairs.append((ri, C.index[j], gap))

# deduplicate controls: keep closest pair per control
used_controls = set()
final_pairs = []
for ri, ci0, gap in sorted(pairs, key=lambda x:x[2]):
    if ci0 in used_controls: 
        continue
    used_controls.add(ci0)
    final_pairs.append((ri, ci0, gap))

log(f"[pairing] pairs formed: {len(final_pairs)}")

def collect(metric):
    # unpaired arrays
    Ru = R[metric].values if not R.empty else np.array([])
    Cu = C[metric].values if not C.empty else np.array([])
    # paired differences (ritual - control)
    diffs = []
    for ri, ci0, _ in final_pairs:
        diffs.append(R.loc[ri,metric] - C.loc[ci0,metric])
    return np.asarray(Ru,float), np.asarray(Cu,float), np.asarray(diffs,float)

metrics = {
    "sigma":   ("Residual σ (°C)", True),     # lower better
    "mad_diff":("MAD(Δresidual) (°C)", True), # lower better
    "sampen":  ("Sample Entropy", True),      # lower better (stability)
    "theta":   ("Θ index (earlier=smaller)", True)  # lower better
}

results = {}
for m,(label, lower_is_better) in metrics.items():
    Ru, Cu, D = collect(m)
    res = {"mean_ritual": float(np.mean(Ru)) if Ru.size else float("nan"),
           "mean_control":float(np.mean(Cu)) if Cu.size else float("nan"),
           "paired_n": int(D.size)}
    # unpaired
    if Ru.size>=2 and Cu.size>=2:
        obs_u, p_u = perm_test_unpaired(Ru, Cu, reps=PERM_REPS, seed=SEED)
        # effect size: control - ritual (positive is good for lower-is-better)
        d = None
        try: d = (np.mean(Cu)-np.mean(Ru)) / np.sqrt(((np.var(Ru,ddof=1)+np.var(Cu,ddof=1))/2))
        except: d = float("nan")
        res.update(dict(unpaired_obs=float(obs_u), unpaired_p=float(p_u), cohen_d=float(d)))
    # paired
    if D.size>=3:
        obs_p, p_p = perm_test_paired(D, reps=PERM_REPS, seed=SEED)
        # for lower-is-better: positive mean(control - ritual) is good → flip sign accordingly
        res.update(dict(paired_mean=float(np.mean(D)*(-1 if lower_is_better else 1)),
                        paired_obs=float(obs_p), paired_p=float(p_p)))
    results[m]=res

with open(os.path.join(OUTDIR,"ecp_pairs_results.json"),"w") as f:
    json.dump(results, f, indent=2)

# pass criteria: any metric with (paired_p<0.05) OR (unpaired_p<0.05 and |d|>=0.3)
passed = False
rnotes = []
for m,(label, lower_is_better) in metrics.items():
    r = results[m]
    p_pair = r.get("paired_p", 1.0)
    p_un   = r.get("unpaired_p", 1.0)
    d      = r.get("cohen_d", float("nan"))
    if p_pair < 0.05 or (p_un < 0.05 and (not math.isnan(d)) and abs(d) >= 0.30):
        passed = True
    rnotes.append({m:r})

verdict = {"claim":"ECP reduces residual thermal jitter and advances Θ (paired analysis)",
           "pairs": int(len(final_pairs)),
           "passed": bool(passed),
           "metrics": rnotes}
with open(os.path.join(OUTDIR,"ecp_pairs_verdict.json"),"w") as f:
    json.dump(verdict, f, indent=2)

log("\n== ECP Paired Verdict ==")
log(json.dumps(verdict, indent=2))

# simple boxplots (unpaired view)
for m,(lab,_) in metrics.items():
    if (tbl.label=="ritual").any() and (tbl.label=="control").any():
        plt.figure(figsize=(9,5))
        plt.boxplot([tbl[tbl.label=="control"][m], tbl[tbl.label=="ritual"][m]],
                    tick_labels=["control","ritual"])
        plt.title(f"ECP: {lab}")
        plt.tight_layout()
        plt.savefig(os.path.join(OUTDIR, f"plot_{m}_box.png"), dpi=160)
        plt.close()

log(f"\n[Artifacts] {OUTDIR}")
log("[Next] Add a few explicit ritual/control file labels to increase pairs (aim ≥12 pairs). Then re-run.")


[scan] collecting CSVs...
[scan] candidates: 2585
[load] usable runs: 5
[labels] ritual=5 control=0 unknown=0
[warn] Need both ritual and control labels for paired analysis; please rename a few files.
[pairing] pairs formed: 0

== ECP Paired Verdict ==
{
  "claim": "ECP reduces residual thermal jitter and advances \u0398 (paired analysis)",
  "pairs": 0,
  "passed": false,
  "metrics": [
    {
      "sigma": {
        "mean_ritual": 0.3035268943923454,
        "mean_control": NaN,
        "paired_n": 0
      }
    },
    {
      "mad_diff": {
        "mean_ritual": 0.12766769046781049,
        "mean_control": NaN,
        "paired_n": 0
      }
    },
    {
      "sampen": {
        "mean_ritual": 0.2952864939403439,
        "mean_control": NaN,
        "paired_n": 0
      }
    },
    {
      "theta": {
        "mean_ritual": 139.6,
        "mean_control": NaN,
        "paired_n": 0
      }
    }
  ]
}

[Artifacts] C:\Users\caleb\CNT_Lab\notebooks\archive\ecp_pairs_20251025-025058_be09

In [6]:
# CNT_Lab — Label Script Generator (balanced ritual/control from nearest-time pairs)
# Output: mark_ritual.ps1, mark_control.ps1, label_proposals.csv in OUTDIR.

import os, re, glob, uuid, json
import numpy as np, pandas as pd
from datetime import datetime

ROOT         = r"C:\Users\caleb\CNT_Lab"
MIN_SECONDS  = 90
PREF_HINTS   = ("cool","gpu","thermal","temp","segment","log","cooling_segments","notebooks","archive","artifacts")
TEMP_RE      = re.compile(r"(hotspot|gpu[_\- ]?hot|gpu[_\- ]?temp|temp|temperature|t[_\- ]?hot|hot)", re.I)
TARGET_PAIRS = 12          # <- change if you want more or less
PAIR_MAX_GAP = 48*3600     # allow pairing within 48h

OUTDIR = os.path.join(ROOT, "notebooks", "archive",
                      f"ecp_labeler_{datetime.now().strftime('%Y%m%d-%H%M%S')}_{uuid.uuid4().hex[:8]}")
os.makedirs(OUTDIR, exist_ok=True)

def log(x): print(x)

def parse_time_from_name(path):
    n = os.path.basename(path)
    m = re.search(r"(20\d{6}[-_]?\d{6})", n)
    if m:
        s = re.sub(r"[-_]", "", m.group(1))
        try: return datetime.strptime(s, "%Y%m%d%H%M%S").timestamp()
        except: pass
    try: return os.path.getmtime(path)
    except: return float("nan")

def choose_temp_col(df):
    # pick the most temp-looking numeric column
    cand = [c for c in df.columns if TEMP_RE.search(str(c)) and pd.api.types.is_numeric_dtype(df[c])]
    if cand: return cand[0]
    for c in df.columns:
        if pd.api.types.is_numeric_dtype(df[c]):
            v = pd.to_numeric(df[c], errors="coerce").dropna().values
            if v.size >= MIN_SECONDS//2 and np.nanmin(v) > -50 and np.nanmax(v) < 10000:
                return c
    return None

# 1) collect CSVs
log("[scan] collecting CSVs…")
csvs = set()
for d,_,fs in os.walk(ROOT):
    low = d.lower()
    if "\\.venv" in low or "site-packages" in low or "\\__pycache__" in low:
        continue
    for fn in fs:
        if fn.lower().endswith(".csv"):
            p = os.path.join(d, fn)
            if any(h in p.lower() for h in PREF_HINTS):
                csvs.add(p)
csvs = sorted(csvs)
log(f"[scan] candidates: {len(csvs)}")

# 2) QC list (reuse simple checks; we only need tstamp + path)
rows = []
for p in csvs:
    try:
        df = pd.read_csv(p)
        if df.empty: continue
        col = choose_temp_col(df)
        if not col: continue
        v = pd.to_numeric(df[col], errors="coerce").dropna().values
        if v.size < MIN_SECONDS: continue
        # normalize rough units (Kelvin/m°C) just to detect absurd values
        vmean, vmax = float(np.mean(v)), float(np.max(v))
        if 200 < vmean < 400 and vmax < 500: v = v - 273.15
        if np.max(v) > 2000: v = v/1000.0
        v = v[(v > -10) & (v < 120)]
        if v.size < MIN_SECONDS: continue
        s = float(np.std(v, ddof=1))
        if s < 0.05 or s > 20.0:  # flat/absurd
            continue
        rows.append(dict(path=p, tstamp=float(parse_time_from_name(p))))
    except Exception:
        pass

tbl = pd.DataFrame(rows).dropna().sort_values("tstamp").reset_index(drop=True)
log(f"[load] QC-passing candidates: {len(tbl)}")
if tbl.empty:
    print("[exit] No good candidates.")
else:
    # 3) if any files already labeled, keep them; otherwise propose pairs
    def is_labeled(name):
        n = os.path.basename(name).lower()
        return ("ritual" in n) or ("control" in n)

    labeled_mask = tbl["path"].apply(is_labeled)
    L = tbl[labeled_mask]
    U = tbl[~labeled_mask].copy()

    log(f"[state] already labeled: {len(L)} | unlabeled: {len(U)}")

    # Build nearest-neighbor pairs from unlabeled pool by time
    # Greedy: walk forward, pair close neighbors within PAIR_MAX_GAP
    pairs = []
    used = set()
    for i in range(len(U)-1):
        if i in used: continue
        t1 = U.iloc[i]
        # find nearest j>i
        best = None
        best_gap = None
        for j in range(i+1, len(U)):
            if j in used: continue
            t2 = U.iloc[j]
            gap = abs(t2["tstamp"] - t1["tstamp"])
            if gap <= PAIR_MAX_GAP and (best is None or gap < best_gap):
                best, best_gap = (i, j), gap
            if best_gap is not None and gap > best_gap*2:
                break
        if best is not None:
            pairs.append((best[0], best[1], float(best_gap)))
            used.add(best[0]); used.add(best[1])

    # sort by closeness and take TARGET_PAIRS
    pairs = sorted(pairs, key=lambda x: x[2])[:TARGET_PAIRS]
    log(f"[pairing] proposed pairs: {len(pairs)} (target {TARGET_PAIRS})")

    # Alternate assignment within each pair to balance: first becomes control, second ritual (or vice versa).
    proposals = []
    for k,(i,j,g) in enumerate(pairs):
        p1 = U.iloc[i]["path"]; p2 = U.iloc[j]["path"]
        # alternate which side is ritual so we don't bias on order
        if k % 2 == 0:
            proposals.append(dict(path=p1, new_label="control", gap_sec=g))
            proposals.append(dict(path=p2, new_label="ritual",  gap_sec=g))
        else:
            proposals.append(dict(path=p1, new_label="ritual",  gap_sec=g))
            proposals.append(dict(path=p2, new_label="control", gap_sec=g))

    prop_df = pd.DataFrame(proposals)
    prop_csv = os.path.join(OUTDIR, "label_proposals.csv")
    prop_df.to_csv(prop_csv, index=False)

    # 4) Write PowerShell rename scripts (non-destructive proposals)
    ritual_ps   = os.path.join(OUTDIR, "mark_ritual.ps1")
    control_ps  = os.path.join(OUTDIR, "mark_control.ps1")
    revert_ps   = os.path.join(OUTDIR, "revert_renames.ps1")

    def add_suffix(path, suffix):
        d = os.path.dirname(path); base = os.path.basename(path)
        if base.lower().endswith(".csv"):
            stem = base[:-4]
            return os.path.join(d, f"{stem}_{suffix}.csv")
        return os.path.join(d, f"{base}_{suffix}")

    ritual_lines, control_lines, revert_lines = [], [], []
    for _, row in prop_df.iterrows():
        src = row["path"]
        if row["new_label"] == "ritual":
            dst = add_suffix(src, "ritual")
            ritual_lines.append(f'Rename-Item -LiteralPath "{src}" -NewName "{os.path.basename(dst)}"')
            revert_lines.append(f'Rename-Item -LiteralPath "{dst}" -NewName "{os.path.basename(src)}"')
        else:
            dst = add_suffix(src, "control")
            control_lines.append(f'Rename-Item -LiteralPath "{src}" -NewName "{os.path.basename(dst)}"')
            revert_lines.append(f'Rename-Item -LiteralPath "{dst}" -NewName "{os.path.basename(src)}"')

    with open(ritual_ps, "w", encoding="utf-8") as f:   f.write("\n".join(ritual_lines) + "\n")
    with open(control_ps, "w", encoding="utf-8") as f:  f.write("\n".join(control_lines) + "\n")
    with open(revert_ps, "w", encoding="utf-8") as f:   f.write("\n".join(revert_lines) + "\n")

    print("\n== Labeling kit ==")
    print("• Proposals  :", prop_csv)
    print("• Ritual PS  :", ritual_ps)
    print("• Control PS :", control_ps)
    print("• Revert PS  :", revert_ps)
    print("\nUsage (PowerShell):")
    print(f'  cd "{os.path.dirname(ritual_ps)}"')
    print(f'  # Inspect label_proposals.csv, edit if needed, then:')
    print(f'  .\\"{os.path.basename(control_ps)}"')
    print(f'  .\\"{os.path.basename(ritual_ps)}"')
    print("  # To undo: .\\revert_renames.ps1")


[scan] collecting CSVs…
[scan] candidates: 2587
[load] QC-passing candidates: 60
[state] already labeled: 0 | unlabeled: 60
[pairing] proposed pairs: 12 (target 12)

== Labeling kit ==
• Proposals  : C:\Users\caleb\CNT_Lab\notebooks\archive\ecp_labeler_20251025-030850_4d3a5643\label_proposals.csv
• Ritual PS  : C:\Users\caleb\CNT_Lab\notebooks\archive\ecp_labeler_20251025-030850_4d3a5643\mark_ritual.ps1
• Control PS : C:\Users\caleb\CNT_Lab\notebooks\archive\ecp_labeler_20251025-030850_4d3a5643\mark_control.ps1
• Revert PS  : C:\Users\caleb\CNT_Lab\notebooks\archive\ecp_labeler_20251025-030850_4d3a5643\revert_renames.ps1

Usage (PowerShell):
  cd "C:\Users\caleb\CNT_Lab\notebooks\archive\ecp_labeler_20251025-030850_4d3a5643"
  # Inspect label_proposals.csv, edit if needed, then:
  .\"mark_control.ps1"
  .\"mark_ritual.ps1"
  # To undo: .\revert_renames.ps1


In [7]:
# CNT_Lab — Positive Geometry Drift Funnel (one cell)
# Compares outcome entropy between glyph-hash seeds vs random seeds for Ising & Kuramoto.

import numpy as np, pandas as pd, hashlib, json, uuid, os
from datetime import datetime

OUTDIR = os.path.join(r"C:\Users\caleb\CNT_Lab\notebooks\archive",
                      f"pg_funnel_{datetime.now().strftime('%Y%m%d-%H%M%S')}_{uuid.uuid4().hex[:8]}")
os.makedirs(OUTDIR, exist_ok=True)

# --- helpers ---
def hseed(s):
    return int(hashlib.sha256(s.encode()).hexdigest()[:12],16) % (2**32)

def perm_p(a,b, reps=20000, metric=np.mean, seed=42):
    rng = np.random.default_rng(seed); a=np.asarray(a); b=np.asarray(b)
    obs = metric(a)-metric(b); joined=np.concatenate([a,b]); na=len(a)
    cnt=0
    for _ in range(reps):
        rng.shuffle(joined)
        cnt += abs(metric(joined[:na])-metric(joined[na:])) >= abs(obs)
    return float(obs), float(cnt/reps)

# --- systems ---
def ising_run(L=48, beta=0.45, steps=4000, rng=None):
    if rng is None: rng = np.random.default_rng()
    # 2D Ising (periodic), Glauber dynamics
    s = rng.choice([-1,1], size=(L,L))
    def Eflip(i,j):
        nb = s[(i+1)%L,j]+s[(i-1)%L,j]+s[i,(j+1)%L]+s[i,(j-1)%L]
        return 2*s[i,j]*nb
    for t in range(steps):
        i = rng.integers(0,L); j = rng.integers(0,L)
        dE = Eflip(i,j)
        if dE<=0 or rng.random()<np.exp(-beta*dE):
            s[i,j] *= -1
    m = np.mean(s)
    # outcome features: |m|, cluster proxy via pair correlation along a row
    row = s[0]; corr = np.mean(row[:-1]*row[1:])
    return float(abs(m)), float(corr)

def kura_run(N=400, K=1.5, steps=4000, dt=0.02, rng=None):
    if rng is None: rng = np.random.default_rng()
    theta = rng.uniform(0, 2*np.pi, size=N)
    omega = rng.normal(0, 1, size=N)
    for t in range(steps):
        s = np.sin(theta[:,None]-theta[None,:])
        dtheta = omega + (K/N)*np.sum(s, axis=1)
        theta = (theta + dt*dtheta) % (2*np.pi)
    R = np.abs(np.mean(np.exp(1j*theta)))
    return float(R)

# --- experiment ---
GLYPHS = [
    "Observer Ring", "Axis Veil", "Event Spiral", "Anchor", "Collapse Benediction",
    "Resonance Seeding", "Mirror Rebinding", "Parasite Purge", "Harmonic Shadow", "Orivyn"
]
N_PER = 64  # runs per condition per system
rng = np.random.default_rng(7)

def run_block(label, seeds):
    rows=[]
    for sd in seeds:
        r_i = np.random.default_rng(sd)
        m, corr = ising_run(rng=r_i)
        rows.append(dict(system="ising", label=label, seed=int(sd), metric1=m, metric2=corr))
        r_k = np.random.default_rng(sd ^ 0x9E3779B9)
        R = kura_run(rng=r_k)
        rows.append(dict(system="kuramoto", label=label, seed=int(sd), metric1=R, metric2=np.nan))
    return rows

glyph_seeds = [hseed(g) ^ i for g in GLYPHS for i in range(N_PER//len(GLYPHS))]
rand_seeds  = list(rng.integers(0, 2**32-1, size=N_PER))

rows = []
rows += run_block("glyph", glyph_seeds)
rows += run_block("random", rand_seeds)

df = pd.DataFrame(rows)

# define an "outcome entropy" proxy per system:
#  - Ising: entropy of binned |m| (magnetization magnitude)
#  - Kuramoto: entropy of binned R (order parameter)
def entropy_of(col):
    x = df[df.system==col]["metric1"]
    hist, _ = np.histogram(x, bins=16, range=(0,1), density=True)
    p = hist / np.sum(hist + 1e-12)
    return -np.sum(np.where(p>0, p*np.log(p), 0.0))

H_g_ising  = entropy_of("ising")   # overall
H_g_kura   = entropy_of("kuramoto")

# compare entropies across conditions by bootstrap difference of means on per-run uncertainty:
def boot_H(x, B=2000, seed=123):
    rng = np.random.default_rng(seed); x=np.asarray(x)
    out=[]
    for _ in range(B):
        xb = rng.choice(x, size=len(x), replace=True)
        h, _ = np.histogram(xb, bins=16, range=(0,1), density=True)
        p = h/np.sum(h+1e-12); out.append(-np.sum(np.where(p>0, p*np.log(p), 0.0)))
    return np.array(out)

res = {}
for sysname in ["ising","kuramoto"]:
    g = df[(df.system==sysname)&(df.label=="glyph")]["metric1"].values
    r = df[(df.system==sysname)&(df.label=="random")]["metric1"].values
    Hg = boot_H(g); Hr = boot_H(r)
    obs = Hg.mean()-Hr.mean()   # negative means glyph narrows entropy (good)
    # permutation on raw samples with entropy recomputation is heavy;
    # approximate with two-sample perm on metric1 means as a proxy:
    obs_m, p_m = perm_p(g, r, reps=20000, seed=5)
    res[sysname] = dict(
        mean_metric_glyph=float(np.mean(g)),
        mean_metric_random=float(np.mean(r)),
        boot_H_glyph_mean=float(Hg.mean()),
        boot_H_random_mean=float(Hr.mean()),
        boot_H_diff_obs=float(obs),
        perm_on_metric_mean=float(obs_m),
        perm_p_metric_mean=float(p_m)
    )

with open(os.path.join(OUTDIR,"pg_funnel_results.json"),"w") as f:
    json.dump(res, f, indent=2)

print("== Positive Geometry Drift Funnel ==")
print(json.dumps(res, indent=2))
print(f"[Artifacts] {OUTDIR}")


  return -np.sum(np.where(p>0, p*np.log(p), 0.0))
  return -np.sum(np.where(p>0, p*np.log(p), 0.0))
  p = h/np.sum(h+1e-12); out.append(-np.sum(np.where(p>0, p*np.log(p), 0.0)))
  p = h/np.sum(h+1e-12); out.append(-np.sum(np.where(p>0, p*np.log(p), 0.0)))


== Positive Geometry Drift Funnel ==
{
  "ising": {
    "mean_metric_glyph": 0.03708043981481481,
    "mean_metric_random": 0.03854709201388889,
    "boot_H_glyph_mean": 0.46924211809475763,
    "boot_H_random_mean": 0.4750910479540399,
    "boot_H_diff_obs": -0.005848929859282259,
    "perm_on_metric_mean": -0.0014666521990740755,
    "perm_p_metric_mean": 0.7846
  },
  "kuramoto": {
    "mean_metric_glyph": 0.0282777843236119,
    "mean_metric_random": 0.025481906956643284,
    "boot_H_glyph_mean": 9.998668559769164e-13,
    "boot_H_random_mean": 0.07177082817576377,
    "boot_H_diff_obs": -0.0717708281747639,
    "perm_on_metric_mean": 0.0027958773669686174,
    "perm_p_metric_mean": 0.2598
  }
}
[Artifacts] C:\Users\caleb\CNT_Lab\notebooks\archive\pg_funnel_20251025-031138_6e0ce350


In [1]:
# PG Funnel v2 — stabilized entropy (Dirichlet α=0.5) + permutation p on entropy
import numpy as np, pandas as pd, hashlib, json, uuid, os, warnings
from datetime import datetime
warnings.filterwarnings("ignore", category=RuntimeWarning)

# === knobs (same defaults you used) ===
N_PER      = 64
ISING_L    = 48
ISING_STEPS= 4000
KURA_N     = 400
KURA_STEPS = 4000
BOOT_B     = 2000
BINS       = 16
ALPHA      = 0.5      # Dirichlet add-α smoothing for histogram entropy
K_KURAMOTO = 1.5      # try 2.0–2.5 for stronger synchrony if needed

OUTDIR = os.path.join(r"C:\Users\caleb\CNT_Lab\notebooks\archive",
                      f"pg_funnel_v2_{datetime.now().strftime('%Y%m%d-%H%M%S')}_{uuid.uuid4().hex[:8]}")
os.makedirs(OUTDIR, exist_ok=True)

def hseed(s):
    return int(hashlib.sha256(s.encode()).hexdigest()[:12],16) % (2**32)

def entropy_smoothed(x, bins=16, rng=(0,1), alpha=0.5):
    counts, _ = np.histogram(x, bins=bins, range=rng, density=False)
    counts = counts.astype(float) + alpha
    p = counts / counts.sum()
    # safe Shannon entropy
    return float(-np.sum(p * np.log(p)))

def perm_p_diff_stat(a, b, stat_fn, reps=20000, seed=42):
    rng = np.random.default_rng(seed)
    a = np.asarray(a); b = np.asarray(b)
    obs = stat_fn(a) - stat_fn(b)
    joined = np.concatenate([a,b])
    na = len(a)
    cnt=0
    for _ in range(reps):
        rng.shuffle(joined)
        cnt += abs(stat_fn(joined[:na]) - stat_fn(joined[na:])) >= abs(obs)
    return float(obs), float(cnt/reps)

# --- systems (same as before) ---
def ising_run(L=48, beta=0.45, steps=4000, rng=None):
    if rng is None: rng = np.random.default_rng()
    s = rng.choice([-1,1], size=(L,L))
    def Eflip(i,j):
        nb = s[(i+1)%L,j]+s[(i-1)%L,j]+s[i,(j+1)%L]+s[i,(j-1)%L]
        return 2*s[i,j]*nb
    for _ in range(steps):
        i = rng.integers(0,L); j = rng.integers(0,L)
        dE = Eflip(i,j)
        if dE<=0 or rng.random()<np.exp(-beta*dE):
            s[i,j] *= -1
    m = np.mean(s)
    row = s[0]; corr = np.mean(row[:-1]*row[1:])
    return float(abs(m)), float(corr)

def kura_run(N=400, K=1.5, steps=4000, dt=0.02, rng=None):
    if rng is None: rng = np.random.default_rng()
    theta = rng.uniform(0, 2*np.pi, size=N)
    omega = rng.normal(0, 1, size=N)
    for _ in range(steps):
        s = np.sin(theta[:,None]-theta[None,:])  # O(N^2)
        dtheta = omega + (K/N)*np.sum(s, axis=1)
        theta = (theta + dt*dtheta) % (2*np.pi)
    R = np.abs(np.mean(np.exp(1j*theta)))
    return float(R)

# --- experiment ---
GLYPHS = [
    "Observer Ring", "Axis Veil", "Event Spiral", "Anchor", "Collapse Benediction",
    "Resonance Seeding", "Mirror Rebinding", "Parasite Purge", "Harmonic Shadow", "Orivyn"
]
rng = np.random.default_rng(7)
glyph_seeds = [hseed(g) ^ i for g in GLYPHS for i in range(N_PER//len(GLYPHS))]
rand_seeds  = list(rng.integers(0, 2**32-1, size=N_PER))

def run_block(label, seeds):
    rows=[]
    for sd in seeds:
        r_i = np.random.default_rng(sd)
        m, corr = ising_run(L=ISING_L, steps=ISING_STEPS, rng=r_i)
        rows.append(dict(system="ising",    label=label, seed=int(sd), metric=m,  aux=corr))
        r_k = np.random.default_rng(sd ^ 0x9E3779B9)
        R = kura_run(N=KURA_N, K=K_KURAMOTO, steps=KURA_STEPS, rng=r_k)
        rows.append(dict(system="kuramoto", label=label, seed=int(sd), metric=R,  aux=np.nan))
    return rows

rows = []
rows += run_block("glyph", glyph_seeds)
rows += run_block("random", rand_seeds)
df = pd.DataFrame(rows)

results = {}
for sysname in ["ising","kuramoto"]:
    g = df[(df.system==sysname)&(df.label=="glyph")]["metric"].values
    r = df[(df.system==sysname)&(df.label=="random")]["metric"].values

    # stabilized entropy of metric distributions
    Hg = entropy_smoothed(g, bins=BINS, rng=(0,1), alpha=ALPHA)
    Hr = entropy_smoothed(r, bins=BINS, rng=(0,1), alpha=ALPHA)
    Hdiff, p_H = perm_p_diff_stat(g, r,
                                  stat_fn=lambda x: entropy_smoothed(x, bins=BINS, rng=(0,1), alpha=ALPHA),
                                  reps=20000, seed=11)

    # mean metric proxy (as before)
    def perm_p_mean(a,b,reps=20000,seed=5):
        rr = np.random.default_rng(seed)
        a = np.asarray(a); b = np.asarray(b)
        obs = float(np.mean(a) - np.mean(b))
        join = np.concatenate([a,b]); na=len(a)
        cnt=0
        for _ in range(reps):
            rr.shuffle(join)
            cnt += abs(np.mean(join[:na]) - np.mean(join[na:])) >= abs(obs)
        return obs, float(cnt/reps)

    obs_m, p_m = perm_p_mean(g, r, reps=20000, seed=5)

    results[sysname] = dict(
        n_per=int(N_PER),
        mean_metric_glyph=float(np.mean(g)),
        mean_metric_random=float(np.mean(r)),
        H_glyph=float(Hg),
        H_random=float(Hr),
        H_diff=float(Hg - Hr),           # negative = glyph is narrower (desired)
        perm_p_entropy=float(p_H),
        perm_on_metric_mean=float(obs_m),
        perm_p_metric_mean=float(p_m)
    )

with open(os.path.join(OUTDIR,"pg_funnel_v2_results.json"),"w") as f:
    json.dump(results, f, indent=2)

print("== Positive Geometry Drift Funnel — v2 (stabilized) ==")
print(json.dumps(results, indent=2))
print(f"[Artifacts] {OUTDIR}")
print("Pass heuristic: any system with H_diff < 0 AND perm_p_entropy < 0.05 (strong), "
      "or p<0.05 on mean metric with the expected sign.")


== Positive Geometry Drift Funnel — v2 (stabilized) ==
{
  "ising": {
    "n_per": 64,
    "mean_metric_glyph": 0.03708043981481481,
    "mean_metric_random": 0.03854709201388889,
    "H_glyph": 1.0374105318412368,
    "H_random": 1.017467935768744,
    "H_diff": 0.019942596072492824,
    "perm_p_entropy": 1.0,
    "perm_on_metric_mean": -0.0014666521990740755,
    "perm_p_metric_mean": 0.7846
  },
  "kuramoto": {
    "n_per": 64,
    "mean_metric_glyph": 0.0282777843236119,
    "mean_metric_random": 0.025481906956643284,
    "H_glyph": 0.6458118267860103,
    "H_random": 0.6746216548104075,
    "H_diff": -0.028809828024397133,
    "perm_p_entropy": 1.0,
    "perm_on_metric_mean": 0.0027958773669686174,
    "perm_p_metric_mean": 0.2598
  }
}
[Artifacts] C:\Users\caleb\CNT_Lab\notebooks\archive\pg_funnel_v2_20251025-085404_5fdd7947
Pass heuristic: any system with H_diff < 0 AND perm_p_entropy < 0.05 (strong), or p<0.05 on mean metric with the expected sign.


In [2]:
# PG Funnel v3 — Structured (Halton) vs Random seeding near criticality
import numpy as np, pandas as pd, hashlib, json, uuid, os, warnings
from datetime import datetime
warnings.filterwarnings("ignore", category=RuntimeWarning)

# ===== Knobs =====
N_PER       = 128           # power up (try 256 for paper)
BINS        = 24            # finer, but still stable
ALPHA       = 0.5           # Dirichlet smoothing for entropy
BOOT_B      = 2000          # CI smoothness (optional)
ISING_L     = 64
ISING_STEPS = 6000
ISING_BETA  = 0.4407        # near 2D critical β
KURA_N      = 600
KURA_STEPS  = 6000
KURA_K      = 2.2           # mildly supercritical to show separation
DT          = 0.02

OUTDIR = os.path.join(r"C:\Users\caleb\CNT_Lab\notebooks\archive",
                      f"pg_funnel_v3_{datetime.now().strftime('%Y%m%d-%H%M%S')}_{uuid.uuid4().hex[:8]}")
os.makedirs(OUTDIR, exist_ok=True)

# ===== Low-discrepancy (Halton) generator =====
def halton(n, base):
    seq = []
    for i in range(1, n+1):
        f, r, x = 1.0, 0.0, i
        while x > 0:
            f /= base
            r += f * (x % base)
            x //= base
        seq.append(r)
    return np.array(seq)

def halton2(n, b1=2, b2=3):
    return np.stack([halton(n,b1), halton(n,b2)], axis=1)

# ===== Entropy helpers =====
def entropy_smoothed(x, bins=16, rng=(0,1), alpha=0.5):
    counts, _ = np.histogram(x, bins=bins, range=rng, density=False)
    counts = counts.astype(float) + alpha
    p = counts / counts.sum()
    return float(-np.sum(p*np.log(p)))

def perm_p_diff_stat(a, b, stat_fn, reps=20000, seed=42):
    rng = np.random.default_rng(seed)
    a = np.asarray(a); b = np.asarray(b)
    obs = stat_fn(a) - stat_fn(b)
    joined = np.concatenate([a,b]); na = len(a)
    cnt=0
    for _ in range(reps):
        rng.shuffle(joined)
        cnt += abs(stat_fn(joined[:na]) - stat_fn(joined[na:])) >= abs(obs)
    return float(obs), float(cnt/reps)

# ===== Systems =====
def ising_run(L=ISING_L, beta=ISING_BETA, steps=ISING_STEPS, rng=None):
    if rng is None: rng = np.random.default_rng()
    s = rng.choice([-1,1], size=(L,L))
    def Eflip(i,j):
        nb = s[(i+1)%L,j]+s[(i-1)%L,j]+s[i,(j+1)%L]+s[i,(j-1)%L]
        return 2*s[i,j]*nb
    for _ in range(steps):
        i = rng.integers(0,L); j = rng.integers(0,L)
        dE = Eflip(i,j)
        if dE<=0 or rng.random() < np.exp(-beta*dE):
            s[i,j] *= -1
    m = np.mean(s)                   # order (magnetization)
    row = s[0]; corr = np.mean(row[:-1]*row[1:])  # simple corr proxy
    return float(abs(m)), float(corr)

def kuramoto_meanfield(N=KURA_N, K=KURA_K, steps=KURA_STEPS, dt=DT, rng=None, phase_offset=0.0, freq_scale=1.0):
    if rng is None: rng = np.random.default_rng()
    theta = (rng.uniform(0, 2*np.pi, size=N) + phase_offset) % (2*np.pi)
    omega = rng.normal(0, 1, size=N) * freq_scale
    for _ in range(steps):
        z = np.exp(1j*theta)
        R = np.abs(z.mean()); psi = np.angle(z.mean())
        dtheta = omega + K*R*np.sin(psi - theta)
        theta = (theta + dt*dtheta) % (2*np.pi)
    Rfinal = np.abs(np.mean(np.exp(1j*theta)))
    return float(Rfinal)

# ===== Seeds: structured vs random =====
# Map glyphs to *structured coverage* of (phase_offset, freq_scale) via Halton;
# random uses i.i.d. coverage of the same rectangle.
GLYPHS = ["Observer Ring","Axis Veil","Event Spiral","Anchor","Collapse Benediction",
          "Resonance Seeding","Mirror Rebinding","Parasite Purge","Harmonic Shadow","Orivyn"]

n_each = N_PER // len(GLYPHS)
H = halton2(n_each*len(GLYPHS))   # shape (N_PER,2) in [0,1]^2
# scale Halton to parameter box
phase_min, phase_max = 0.0, 2*np.pi
freq_min,  freq_max  = 0.8, 1.2
H_phase = phase_min + (phase_max - phase_min)*H[:,0]
H_freq  = freq_min  + (freq_max  - freq_min )*H[:,1]

rows = []
# Structured (glyph): cover param box evenly
idx = 0
for g in GLYPHS:
    for _ in range(n_each):
        r_i = np.random.default_rng( (idx<<16) ^ 0xA53A9B )  # stable per index
        m,c = ising_run(rng=r_i)
        rows.append(dict(system="ising", label="glyph", metric=m, aux=c))
        # same index drives mean-field offsets
        ph, fs = float(H_phase[idx]), float(H_freq[idx])
        r_k = np.random.default_rng( (idx<<16) ^ 0x9E3779B9 )
        R = kuramoto_meanfield(phase_offset=ph, freq_scale=fs, rng=r_k)
        rows.append(dict(system="kuramoto", label="glyph", metric=R, aux=np.nan))
        idx += 1

# Random: i.i.d. draw over same parameter box
rng = np.random.default_rng(7)
for i in range(N_PER):
    r_i = np.random.default_rng(rng.integers(0, 2**32-1))
    m,c = ising_run(rng=r_i)
    rows.append(dict(system="ising", label="random", metric=m, aux=c))
    ph = rng.uniform(phase_min, phase_max)
    fs = rng.uniform(freq_min, freq_max)
    r_k = np.random.default_rng(rng.integers(0, 2**32-1))
    R = kuramoto_meanfield(phase_offset=ph, freq_scale=fs, rng=r_k)
    rows.append(dict(system="kuramoto", label="random", metric=R, aux=np.nan))

df = pd.DataFrame(rows)

# ===== Analysis =====
def analyze(system):
    g = df[(df.system==system)&(df.label=="glyph")]["metric"].values
    r = df[(df.system==system)&(df.label=="random")]["metric"].values
    H_g = entropy_smoothed(g, bins=BINS, rng=(0,1), alpha=ALPHA)
    H_r = entropy_smoothed(r, bins=BINS, rng=(0,1), alpha=ALPHA)
    H_diff, p_H = perm_p_diff_stat(g, r,
        stat_fn=lambda x: entropy_smoothed(x, bins=BINS, rng=(0,1), alpha=ALPHA),
        reps=20000, seed=11)
    # mean metric corroboration
    def perm_p_mean(a,b,reps=20000,seed=5):
        rr = np.random.default_rng(seed)
        a = np.asarray(a); b = np.asarray(b)
        obs = float(np.mean(a)-np.mean(b))
        join = np.concatenate([a,b]); na=len(a)
        cnt=0
        for _ in range(reps):
            rr.shuffle(join)
            cnt += abs(np.mean(join[:na]) - np.mean(join[na:])) >= abs(obs)
        return obs, float(cnt/reps)
    obs_m, p_m = perm_p_mean(g, r)
    return dict(
        n_per=int(N_PER),
        H_glyph=float(H_g), H_random=float(H_r),
        H_diff=float(H_g - H_r), perm_p_entropy=float(p_H),
        mean_metric_glyph=float(np.mean(g)), mean_metric_random=float(np.mean(r)),
        perm_on_metric_mean=float(obs_m), perm_p_metric_mean=float(p_m)
    )

res = {"ising": analyze("ising"), "kuramoto": analyze("kuramoto")}
with open(os.path.join(OUTDIR,"pg_funnel_v3_results.json"),"w") as f:
    json.dump(res, f, indent=2)

print("== Positive Geometry Drift Funnel — v3 (structured vs random) ==")
print(json.dumps(res, indent=2))
print(f"[Artifacts] {OUTDIR}")
print("Pass = (H_diff < 0 and perm_p_entropy < 0.05) for either system; "
      "bonus if mean metric also improves with p<0.05.")


== Positive Geometry Drift Funnel — v3 (structured vs random) ==
{
  "ising": {
    "n_per": 128,
    "H_glyph": 1.0084544955621828,
    "H_random": 1.0288487154061623,
    "H_diff": -0.02039421984397949,
    "perm_p_entropy": 0.8734,
    "mean_metric_glyph": 0.025032552083333333,
    "mean_metric_random": 0.026782989501953125,
    "perm_on_metric_mean": -0.0017504374186197921,
    "perm_p_metric_mean": 0.4875
  },
  "kuramoto": {
    "n_per": 128,
    "H_glyph": 2.3015533524518617,
    "H_random": 2.369526144877767,
    "H_diff": -0.06797279242590548,
    "perm_p_entropy": 0.3487,
    "mean_metric_glyph": 0.7805737940162142,
    "mean_metric_random": 0.7750181602860213,
    "perm_on_metric_mean": 0.005555633730192855,
    "perm_p_metric_mean": 0.6714
  }
}
[Artifacts] C:\Users\caleb\CNT_Lab\notebooks\archive\pg_funnel_v3_20251025-092852_30da8ef0
Pass = (H_diff < 0 and perm_p_entropy < 0.05) for either system; bonus if mean metric also improves with p<0.05.


In [3]:
# PG Funnel v3.1 — Critical Sweep for Maximum Funnel (structured vs random)
# Scans small grids around Ising β_c and Kuramoto K_c, finds (H_diff, p) sweet spots.
# Outputs a CSV and highlights the best candidates to rerun with higher power.

import numpy as np, pandas as pd, hashlib, json, uuid, os, warnings, math
from datetime import datetime
warnings.filterwarnings("ignore", category=RuntimeWarning)

# ====== RUNTIME / POWER KNOBS ======
N_PER_BASE      = 64      # trials per condition per system per setting (raise to 128/256 when you pick a winner)
BINS            = 24      # histogram bins for entropy
ALPHA           = 0.5     # Dirichlet smoothing for entropy
PERM_REPS       = 20000   # permutation reps for entropy test
SEED_MASTER     = 11

# Ising (2D) sweep around β_c ~ 0.4406868...
ISING_L         = 64
ISING_STEPS     = 6000
BETA_CENTER     = 0.4407
BETA_OFFSETS    = [-0.010, -0.005, -0.0025, 0.0, 0.0025, 0.005, 0.010]

# Kuramoto sweep near critical; mean-field is faster and clean
KURA_N          = 600
KURA_STEPS      = 6000
DT              = 0.02
K_CENTER        = 2.2        # mild supercritical (adjust around your K_c estimate if needed)
K_OFFSETS       = [-0.30, -0.15, -0.08, 0.0, 0.08, 0.15, 0.30]

# Parameter boxes the structured seeds evenly cover (phase & frequency)
PHASE_MIN, PHASE_MAX = 0.0, 2*np.pi
FREQ_MIN,  FREQ_MAX  = 0.8, 1.2

# ====== OUTPUT ======
OUTDIR = os.path.join(r"C:\Users\caleb\CNT_Lab\notebooks\archive",
                      f"pg_funnel_v3_1sweep_{datetime.now().strftime('%Y%m%d-%H%M%S')}_{uuid.uuid4().hex[:8]}")
os.makedirs(OUTDIR, exist_ok=True)

# ====== UTIL ======
def entropy_smoothed(x, bins=16, rng=(0,1), alpha=0.5):
    counts, _ = np.histogram(x, bins=bins, range=rng, density=False)
    counts = counts.astype(float) + alpha
    p = counts / counts.sum()
    return float(-np.sum(p*np.log(p)))

def perm_p_diff_stat(a, b, stat_fn, reps=20000, seed=42):
    rng = np.random.default_rng(seed)
    a = np.asarray(a); b = np.asarray(b)
    obs = stat_fn(a) - stat_fn(b)
    joined = np.concatenate([a,b]); na = len(a)
    cnt=0
    for _ in range(reps):
        rng.shuffle(joined)
        cnt += abs(stat_fn(joined[:na]) - stat_fn(joined[na:])) >= abs(obs)
    return float(obs), float(cnt/reps)

def halton(n, base):
    seq=[]
    for i in range(1, n+1):
        f=1.0; r=0.0; x=i
        while x>0:
            f/=base; r += f*(x%base); x//=base
        seq.append(r)
    return np.array(seq)

def halton2(n, b1=2, b2=3):
    return np.stack([halton(n,b1), halton(n,b2)], axis=1)

# ====== SYSTEMS ======
def ising_run(L, beta, steps, rng):
    s = rng.choice([-1,1], size=(L,L))
    def Eflip(i,j):
        nb = s[(i+1)%L,j]+s[(i-1)%L,j]+s[i,(j+1)%L]+s[i,(j-1)%L]
        return 2*s[i,j]*nb
    for _ in range(steps):
        i = rng.integers(0,L); j = rng.integers(0,L)
        dE = Eflip(i,j)
        if dE<=0 or rng.random()<np.exp(-beta*dE):
            s[i,j] *= -1
    m = float(abs(np.mean(s)))
    return m

def kuramoto_meanfield(N, K, steps, dt, rng, phase_offset=0.0, freq_scale=1.0):
    theta = (rng.uniform(0, 2*np.pi, size=N) + phase_offset) % (2*np.pi)
    omega = rng.normal(0, 1, size=N) * freq_scale
    for _ in range(steps):
        z = np.exp(1j*theta); R = np.abs(z.mean()); psi = np.angle(z.mean())
        dtheta = omega + K*R*np.sin(psi - theta)
        theta = (theta + dt*dtheta) % (2*np.pi)
    return float(np.abs(np.mean(np.exp(1j*theta))))

# ====== STRUCTURED vs RANDOM SEEDS (parameterized) ======
def run_ising_block(beta, n_per, seed_base):
    # structured: Halton drives RNG seeding uniformly; random: iid seeds
    H = halton(n_per, base=2)  # 1D is enough to de-clump RNG states
    rows=[]
    for k in range(n_per):
        s_struct = int((H[k]*2**32)) ^ (seed_base + k)
        s_rand   = np.random.default_rng(seed_base+k).integers(0, 2**32-1)
        # structured
        rng = np.random.default_rng(s_struct)
        rows.append(("glyph", ising_run(ISING_L, beta, ISING_STEPS, rng)))
        # random
        rng = np.random.default_rng(s_rand)
        rows.append(("random", ising_run(ISING_L, beta, ISING_STEPS, rng)))
    return rows

def run_kura_block(K, n_per, seed_base):
    H2 = halton2(n_per)  # 2D for (phase_offset, freq_scale)
    rows=[]
    for k in range(n_per):
        ph = PHASE_MIN + (PHASE_MAX-PHASE_MIN)*H2[k,0]
        fs = FREQ_MIN  + (FREQ_MAX -FREQ_MIN )*H2[k,1]
        s_struct = (seed_base<<1) ^ k ^ 0xA53A9B
        s_rand   = np.random.default_rng(seed_base+k).integers(0, 2**32-1)
        # structured
        rng = np.random.default_rng(s_struct)
        rows.append(("glyph", kuramoto_meanfield(KURA_N, K, KURA_STEPS, DT, rng, phase_offset=ph, freq_scale=fs)))
        # random
        rng = np.random.default_rng(s_rand)
        ph_r = np.random.default_rng(s_rand^0x9E3779B9).uniform(PHASE_MIN, PHASE_MAX)
        fs_r = np.random.default_rng(s_rand^0x517cc1b7).uniform(FREQ_MIN , FREQ_MAX )
        rows.append(("random", kuramoto_meanfield(KURA_N, K, KURA_STEPS, DT, rng, phase_offset=ph_r, freq_scale=fs_r)))
    return rows

# ====== SWEEP ======
rng_master = np.random.default_rng(SEED_MASTER)

records = []
# Ising sweep
for d in BETA_OFFSETS:
    beta = BETA_CENTER + d
    rows = run_ising_block(beta, N_PER_BASE, seed_base= rng_master.integers(0,2**32-1))
    df = pd.DataFrame(rows, columns=["label","metric"])
    g = df[df.label=="glyph"]["metric"].values
    r = df[df.label=="random"]["metric"].values
    H_g = entropy_smoothed(g, bins=BINS, rng=(0,1), alpha=ALPHA)
    H_r = entropy_smoothed(r, bins=BINS, rng=(0,1), alpha=ALPHA)
    Hdiff, pH = perm_p_diff_stat(g, r,
        stat_fn=lambda x: entropy_smoothed(x, bins=BINS, rng=(0,1), alpha=ALPHA),
        reps=PERM_REPS, seed=SEED_MASTER+3)
    records.append(dict(system="ising", beta=beta, K=np.nan,
                        H_glyph=H_g, H_random=H_r, H_diff=H_g-H_r, p_entropy=pH,
                        mean_g=float(np.mean(g)), mean_r=float(np.mean(r))))
# Kuramoto sweep
for d in K_OFFSETS:
    K = K_CENTER + d
    rows = run_kura_block(K, N_PER_BASE, seed_base= rng_master.integers(0,2**32-1))
    df = pd.DataFrame(rows, columns=["label","metric"])
    g = df[df.label=="glyph"]["metric"].values
    r = df[df.label=="random"]["metric"].values
    H_g = entropy_smoothed(g, bins=BINS, rng=(0,1), alpha=ALPHA)
    H_r = entropy_smoothed(r, bins=BINS, rng=(0,1), alpha=ALPHA)
    Hdiff, pH = perm_p_diff_stat(g, r,
        stat_fn=lambda x: entropy_smoothed(x, bins=BINS, rng=(0,1), alpha=ALPHA),
        reps=PERM_REPS, seed=SEED_MASTER+7)
    records.append(dict(system="kuramoto", beta=np.nan, K=K,
                        H_glyph=H_g, H_random=H_r, H_diff=H_g-H_r, p_entropy=pH,
                        mean_g=float(np.mean(g)), mean_r=float(np.mean(r))))

res = pd.DataFrame.from_records(records)
csv_path = os.path.join(OUTDIR, "pg_funnel_v3_1_sweep_results.csv")
res.to_csv(csv_path, index=False)

# pick “best” per system: most negative H_diff; break ties by p
def pick_best(df):
    df = df.sort_values(["H_diff","p_entropy"])
    return df.iloc[0].to_dict()

best_ising    = pick_best(res[res.system=="ising"])
best_kuramoto = pick_best(res[res.system=="kuramoto"])

summary = {
  "best_ising": best_ising,
  "best_kuramoto": best_kuramoto,
  "csv": csv_path,
  "artifacts": OUTDIR,
  "pass_rule": "Target a candidate with H_diff < 0 and p_entropy < 0.10 in the sweep. Then rerun ONLY that setting with N_PER=256–512 to drive p<0.05."
}

with open(os.path.join(OUTDIR,"pg_funnel_v3_1_summary.json"),"w") as f:
    json.dump(summary, f, indent=2)

print("== PG Funnel v3.1 — Critical Sweep ==")
print(json.dumps(summary, indent=2))
print(f"[Artifacts] {OUTDIR}")


== PG Funnel v3.1 — Critical Sweep ==
{
  "best_ising": {
    "system": "ising",
    "beta": 0.4457,
    "K": NaN,
    "H_glyph": 1.3148097888030201,
    "H_random": 1.4612682117859375,
    "H_diff": -0.14645842298291734,
    "p_entropy": 0.16955,
    "mean_g": 0.0278167724609375,
    "mean_r": 0.03443145751953125
  },
  "best_kuramoto": {
    "system": "kuramoto",
    "beta": NaN,
    "K": 2.5,
    "H_glyph": 2.138611121904332,
    "H_random": 2.215222460483547,
    "H_diff": -0.0766113385792151,
    "p_entropy": 0.2803,
    "mean_g": 0.8655586270847131,
    "mean_r": 0.8553475021852082
  },
  "csv": "C:\\Users\\caleb\\CNT_Lab\\notebooks\\archive\\pg_funnel_v3_1sweep_20251025-134207_9680eb61\\pg_funnel_v3_1_sweep_results.csv",
  "artifacts": "C:\\Users\\caleb\\CNT_Lab\\notebooks\\archive\\pg_funnel_v3_1sweep_20251025-134207_9680eb61",
  "pass_rule": "Target a candidate with H_diff < 0 and p_entropy < 0.10 in the sweep. Then rerun ONLY that setting with N_PER=256\u2013512 to drive p<0.