In [2]:
# CNT Lab — one-cell installer (Windows 11 / Py 3.13 friendly)
# Run this once per new venv/kernel. Safe to re-run.

import sys, subprocess, shutil, importlib, platform

PY = sys.executable

def pip(args):
    print(f"\n[ pip ] pip {' '.join(args)}")
    subprocess.check_call([PY, "-m", "pip"] + args)

print("== CNT Lab bootstrap ==")
print("Python:", sys.version)
print("OS:", platform.platform())

# 0) Base tooling
pip(["install", "--upgrade", "pip", "wheel", "setuptools"])

# 1) Jupyter + UX
pip(["install",
     "jupyterlab",
     "ipywidgets",
     "jupyterlab_code_formatter",
     "black",
     "isort",
     "nbformat",
     "nbclient",
     "jupyterlab-git"])

# 2) Numeric + data stack
pip(["install",
     "numpy",
     "scipy",
     "pandas",
     "pyarrow",
     "polars",
     "matplotlib",
     "plotly",
     "statsmodels",
     "scikit-learn",
     "scikit-image",
     "numba",
     "llvmlite",
     "sympy",
     "networkx",
     "numexpr",
     "fastparquet",
     "python-dotenv"])

# 3) GPU / ML (try CUDA 12.4 first; fall back to CPU wheels if it fails)
try:
    pip(["install", "--index-url", "https://download.pytorch.org/whl/cu124",
         "torch", "torchvision", "torchaudio"])
    cuda_ok = True
except subprocess.CalledProcessError:
    print("\n[warn] CUDA 12.4 wheels failed; installing CPU-only PyTorch.")
    pip(["install", "torch", "torchvision", "torchaudio"])
    cuda_ok = False

# CuPy w/ CUDA 12.x (optional but nice for GPU numpy); ignore failure gracefully
try:
    pip(["install", "cupy-cuda12x"])
    cupy_ok = True
except subprocess.CalledProcessError:
    print("[warn] cupy-cuda12x failed (driver/CUDA mismatch?). Skipping.")
    cupy_ok = False

# 4) Signal processing / EEG / time-series
pip(["install",
     "mne",
     "yasa",
     "antropy",
     "neurokit2",
     "nitime",
     "pywavelets",
     "pingouin"])

# 5) Optimization, graphs, helpers
pip(["install", "cvxpy", "pydot", "graphviz", "networkx[default]"])

# 6) Files, tables, scientific IO
pip(["install", "h5py", "tables", "xarray", "netCDF4", "openpyxl", "lxml", "requests"])

# 7) Media / scraping helpers (adds imageio-ffmpeg for bundled ffmpeg)
pip(["install", "yt-dlp", "soundfile", "pydub", "ffmpeg-python", "imageio-ffmpeg"])

# 8) Visual extras (optional)
pip(["install", "shapely", "pyproj", "pyvis", "seaborn"])

# ---- Version report & sanity checks ----
mods = [
 "jupyterlab","numpy","scipy","pandas","pyarrow","polars","matplotlib","plotly",
 "statsmodels","sklearn","numba","sympy","networkx","torch","torchvision","torchaudio",
 "mne","yasa","antropy","neurokit2","nitime","pywt","pingouin",
 "cvxpy","pydot","graphviz","h5py","tables","xarray","netCDF4","openpyxl","requests","yt_dlp"
]
if cupy_ok:
    mods.append("cupy")

print("\n== Versions ==")
for m in mods:
    try:
        v = importlib.import_module(m).__version__
    except Exception:
        v = "(installed, no __version__)" if importlib.util.find_spec(m) else "MISSING"
    print(f"{m:12s}: {v}")

# Graphviz binary check (needed by graph drawing libs)
print("\n== Sanity checks ==")
dot = shutil.which("dot")
print("graphviz 'dot' on PATH:", dot if dot else "NOT FOUND (install system Graphviz if you need layout)")
if cuda_ok:
    try:
        import torch
        print("Torch CUDA available:", torch.cuda.is_available(), "| device_count:", torch.cuda.device_count())
        if torch.cuda.is_available():
            print("Torch CUDA device 0:", torch.cuda.get_device_name(0))
    except Exception as e:
        print("Torch CUDA check error:", e)
else:
    print("Installed CPU-only PyTorch (ok for dev; enable CUDA later if desired).")

# FFmpeg path via imageio-ffmpeg (helps yt-dlp/pydub conversions)
try:
    import imageio_ffmpeg as ioff
    print("FFmpeg exe (imageio-ffmpeg):", ioff.get_ffmpeg_exe())
except Exception as e:
    print("FFmpeg helper not found:", e)

print("\nDone. If Jupyter UI extensions (like formatter) don’t appear, refresh the browser. If CUDA checks fail, update NVIDIA drivers and re-run this cell.")


== CNT Lab bootstrap ==
Python: 3.13.5 (tags/v3.13.5:6cb20a2, Jun 11 2025, 16:15:46) [MSC v.1943 64 bit (AMD64)]
OS: Windows-11-10.0.26100-SP0

[ pip ] pip install --upgrade pip wheel setuptools

[ pip ] pip install jupyterlab ipywidgets jupyterlab_code_formatter black isort nbformat nbclient jupyterlab-git

[ pip ] pip install numpy scipy pandas pyarrow polars matplotlib plotly statsmodels scikit-learn scikit-image numba llvmlite sympy networkx numexpr fastparquet python-dotenv

[ pip ] pip install --index-url https://download.pytorch.org/whl/cu124 torch torchvision torchaudio

[ pip ] pip install cupy-cuda12x

[ pip ] pip install mne yasa antropy neurokit2 nitime pywavelets pingouin

[ pip ] pip install cvxpy pydot graphviz networkx[default]

[ pip ] pip install h5py tables xarray netCDF4 openpyxl lxml requests

[ pip ] pip install yt-dlp soundfile pydub ffmpeg-python imageio-ffmpeg

[ pip ] pip install shapely pyproj pyvis seaborn

== Versions ==
jupyterlab  : 4.4.9
numpy       : 2.



cvxpy       : 1.7.3
pydot       : 4.0.1
graphviz    : 0.21
h5py        : 3.15.0
tables      : 3.10.2
xarray      : 2025.10.1
netCDF4     : 1.7.3
openpyxl    : 3.1.5
requests    : 2.32.5
yt_dlp      : (installed, no __version__)
cupy        : 13.6.0

== Sanity checks ==
graphviz 'dot' on PATH: NOT FOUND (install system Graphviz if you need layout)
Torch CUDA available: True | device_count: 1
Torch CUDA device 0: NVIDIA GeForce RTX 4070
FFmpeg exe (imageio-ffmpeg): C:\Users\caleb\CNT_Lab\.venv\Lib\site-packages\imageio_ffmpeg\binaries\ffmpeg-win-x86_64-v7.1.exe

Done. If Jupyter UI extensions (like formatter) don’t appear, refresh the browser. If CUDA checks fail, update NVIDIA drivers and re-run this cell.


In [3]:
# ========================= CNT MEGA CELL: v1 (1+2+3 in one) =========================
# Goals:
# (1) Consciousness Field Equation test: relate synchrony (PLV) vs entropy/complexity over time
# (2) Oracle Emergence Test: self-referential stability index (SRSI) from a dialogue log
# (3) Φ-Drift Forecast Engine: use CNT field features to predict time-series turning points vs baselines
#
# Fully offline-capable with synthetic fallbacks. Saves plots + CSVs. Prints a JSON summary.
# =====================================================================================

import os, sys, io, json, math, time, textwrap, random, datetime as dt
from pathlib import Path
import numpy as np
import pandas as pd

# ---- Reproducibility ---------------------------------------------------------------
RNG = np.random.default_rng(42)
random.seed(42)

# ---- Output paths ------------------------------------------------------------------
def detect_cnt_root():
    candidates = [
        Path.cwd(),
        Path.home() / "CNT_Lab",
        Path("C:/Users/caleb/CNT_Lab"),  # user-typical
        Path("./CNT_Lab"),
    ]
    for c in candidates:
        if c.exists():
            return c
    return Path.cwd()

ROOT = detect_cnt_root()
STAMP = dt.datetime.now().strftime("%Y%m%d-%H%M%S")
OUT = ROOT / "cnt_mega_out"
FIG = OUT / "figures"
TAB = OUT / "tables"
for p in [OUT, FIG, TAB]:
    p.mkdir(parents=True, exist_ok=True)

print(f"→ CNT paths\n  ROOT: {ROOT}\n  OUT : {OUT}")

# ---- Imports (light and optional heavy) --------------------------------------------
import warnings
warnings.filterwarnings("ignore")

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

from scipy.signal import butter, filtfilt, hilbert
from scipy.stats import entropy, pearsonr, spearmanr, zscore, linregress
from statsmodels.tsa.api import SARIMAX
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import mean_absolute_error

# Optional libraries (graceful)
HAVE_MNE = False
try:
    import mne  # heavy; used only if present/installed
    HAVE_MNE = True
except Exception:
    pass

try:
    import ruptures as rpt
    HAVE_RUPTURES = True
except Exception:
    HAVE_RUPTURES = False

# =====================================================================================
# (1) CONSCIOUSNESS FIELD EQUATION TEST
#     Synchrony (PLV) vs Entropy/Complexity → detect collapse→recovery waves
# =====================================================================================

def bandpass(x, fs, lo, hi, order=4):
    b, a = butter(order, [lo/(fs/2), hi/(fs/2)], btype='bandpass')
    return filtfilt(b, a, x, axis=0)

def phase_locking_value(phases):
    """
    phases: shape (T, C) phase angles
    Returns PLV(t) across channels at each time t.
    """
    # vector strength across channels
    vs = np.abs(np.mean(np.exp(1j*phases), axis=1))
    return vs

def spectral_entropy(signal, n_bins=64):
    """Shannon entropy of normalized power spectrum per window (rolling)."""
    # Use magnitude via FFT windowed later; here we pass a windowed chunk
    s = np.abs(np.fft.rfft(signal, axis=0))**2
    ps = s / (np.sum(s, axis=0, keepdims=True) + 1e-12)
    H = entropy(ps + 1e-12, base=np.e, axis=0)  # per channel
    return np.mean(H)  # aggregate channels

def lempel_ziv_complexity(binary_arr):
    """Simple LZC on 1D binary sequence."""
    # Kaspar & Schuster-style
    s = ''.join('1' if v else '0' for v in binary_arr)
    i, c, k, l = 0, 1, 1, 1
    n = len(s)
    while True:
        if s[i+k-1] == s[l+k-1]:
            k += 1
            if l + k > n:
                c += 1
                break
        else:
            if k > l:
                l = k
            i += 1
            if i == l:
                c += 1
                l += 1
                if l > n:
                    break
                i = 0
            k = 1
        if l > n:
            break
    return c

def permutation_entropy(x, order=3, delay=1):
    """Lightweight permutation entropy."""
    x = np.asarray(x)
    n = len(x)
    if n < order*delay:
        return np.nan
    permutations = {}
    for i in range(n - (order-1)*delay):
        pattern = tuple(np.argsort(x[i:i+order*delay:delay]))
        permutations[pattern] = permutations.get(pattern, 0) + 1
    probs = np.array(list(permutations.values()), dtype=float)
    probs = probs / probs.sum()
    return -np.sum(probs * np.log(probs + 1e-12))

def rolling_windows(arr, win, step):
    for start in range(0, len(arr)-win+1, step):
        yield start, arr[start:start+win]

def try_load_eeg_real():
    """Try loading a real EEG dataset via MNE (EEGBCI). Fallback to synthetic if unavailable."""
    if not HAVE_MNE:
        return None, None, None
    try:
        from mne.datasets import eegbci
        subj, runs = 1, [3, 7, 11]  # motor imagery
        fnames = eegbci.load_data(subj, runs)
        raws = [mne.io.read_raw_edf(f, preload=True, verbose=False) for f in fnames]
        raw = mne.concatenate_raws(raws)
        raw.pick_types(eeg=True)
        raw.filter(1., 40., fir_design='firwin', verbose=False)
        data = raw.get_data().T  # shape (T, C)
        fs = int(raw.info['sfreq'])
        return data, fs, "EEGBCI_subj1_motor"
    except Exception:
        return None, None, None

def make_eeg_synthetic(T=60000, C=16, fs=250):
    """
    Synthetic multi-channel signal with an induced synchrony burst (collapse) followed by recovery.
    """
    t = np.arange(T)/fs
    base_freqs = RNG.uniform(8, 12, size=C)  # alpha-ish
    X = np.array([np.sin(2*np.pi*f*t + RNG.uniform(0, 2*np.pi)) for f in base_freqs]).T
    # Add noise
    X += 0.5 * RNG.normal(size=X.shape)
    # Inject synchrony event (strong phase lock) for a window
    s0, s1 = int(0.35*T), int(0.55*T)
    phase = 2*np.pi*10*t + 0.0
    X[s0:s1, :] = np.sin(phase[s0:s1])[:, None] + 0.1*RNG.normal(size=(s1-s0, X.shape[1]))
    label = "synthetic_eeg_collapse_recovery"
    return X, fs, label

def run_cfe_test():
    # Try real EEG, else synthetic
    data, fs, label = try_load_eeg_real()
    if data is None:
        data, fs, label = make_eeg_synthetic()
    # Focus on alpha band for phase (8-12 Hz)
    X_f = bandpass(data, fs, 8, 12)
    phases = np.angle(hilbert(X_f, axis=0))
    plv = phase_locking_value(phases)  # shape (T,)
    # Rolling entropy & complexity
    W, STEP = int(2*fs), int(0.25*fs)  # 2s window, 0.25s hop
    SEs, PEs, idxs = [], [], []
    for start, chunk in rolling_windows(data, W, STEP):
        # spectral entropy (avg across channels)
        H = spectral_entropy(chunk)
        # permutation entropy on channel-avg
        ch_avg = np.mean(chunk, axis=1)
        pe = permutation_entropy(ch_avg)
        SEs.append(H); PEs.append(pe); idxs.append(start + W//2)
    se = np.array(SEs); pe = np.array(PEs); idxs = np.array(idxs)
    # Downsample PLV to match entropy index grid
    plv_grid = np.interp(idxs, np.arange(len(plv)), plv)
    # Normalize
    se_n = zscore(se, nan_policy='omit'); pe_n = zscore(pe, nan_policy='omit'); plv_n = zscore(plv_grid, nan_policy='omit')
    # Correlations & regressions
    def safe_corr(a, b):
        m = np.isfinite(a) & np.isfinite(b)
        if m.sum() < 8: return np.nan, np.nan
        return pearsonr(a[m], b[m])
    r_plv_se, p_plv_se = safe_corr(plv_n, -se_n)  # expect positive if synchrony ↔ low entropy
    r_plv_pe, p_plv_pe = safe_corr(plv_n, -pe_n)
    # Change-point detection (collapse→recovery)
    cp_idx = []
    if HAVE_RUPTURES and np.isfinite(plv_n).sum() > 50:
        algo = rpt.Pelt(model="rbf").fit(plv_n[np.isfinite(plv_n)].reshape(-1,1))
        cps = algo.predict(pen=5)
        cp_idx = cps[:-1]  # last is length
    # Save figures
    def quick_plot(ts, name, xlabel="Index", ylabel="Value"):
        plt.figure(figsize=(10,3.2))
        plt.plot(ts)
        if cp_idx:
            for c in cp_idx:
                plt.axvline(c, ls='--', alpha=0.4)
        plt.xlabel(xlabel); plt.ylabel(ylabel); plt.tight_layout()
        fp = FIG / f"{name}.png"
        plt.savefig(fp, dpi=140); plt.close()
        return fp
    f1 = quick_plot(plv_n, f"cfe_{label}_plv_z")
    f2 = quick_plot(se_n,  f"cfe_{label}_spec_entropy_z")
    f3 = quick_plot(pe_n,  f"cfe_{label}_perm_entropy_z")
    # Scatter
    def scatter_xy(x, y, name, xl, yl):
        m = np.isfinite(x) & np.isfinite(y)
        slope, intercept, r, p, _ = linregress(x[m], y[m]) if m.sum()>=8 else (np.nan, np.nan, np.nan, np.nan, None)
        plt.figure(figsize=(4.5,4))
        plt.scatter(x[m], y[m], s=8, alpha=0.6)
        xs = np.linspace(np.nanmin(x[m]), np.nanmax(x[m]), 100)
        if np.isfinite(slope):
            plt.plot(xs, slope*xs+intercept)
        plt.xlabel(xl); plt.ylabel(yl); plt.tight_layout()
        fp = FIG / f"{name}.png"
        plt.savefig(fp, dpi=150); plt.close()
        return fp, (slope, r, p)
    f4, reg1 = scatter_xy(plv_n, -se_n, f"cfe_{label}_scatter_plv_vs_negSE", "PLV (z)", "-Spectral Entropy (z)")
    f5, reg2 = scatter_xy(plv_n, -pe_n, f"cfe_{label}_scatter_plv_vs_negPE", "PLV (z)", "-Permutation Entropy (z)")
    # Save table
    df = pd.DataFrame({
        "idx": idxs, "plv_z": plv_n, "spec_entropy_z": se_n, "perm_entropy_z": pe_n
    })
    tpath = TAB / f"cfe_{label}_{STAMP}.csv"
    df.to_csv(tpath, index=False)
    return {
        "dataset": label,
        "fs": fs,
        "corr_plv_vs_negSE": {"r": float(r_plv_se), "p": float(p_plv_se)},
        "corr_plv_vs_negPE": {"r": float(r_plv_pe), "p": float(p_plv_pe)},
        "reg_plv_vs_negSE": {"slope": float(reg1[0]) if reg1 else np.nan, "r": float(reg1[1]) if reg1 else np.nan, "p": float(reg1[2]) if reg1 else np.nan},
        "reg_plv_vs_negPE": {"slope": float(reg2[0]) if reg2 else np.nan, "r": float(reg2[1]) if reg2 else np.nan, "p": float(reg2[2]) if reg2 else np.nan},
        "change_points": list(map(int, cp_idx)) if cp_idx else [],
        "figures": list(map(str, [f1,f2,f3,f4,f5])),
        "table": str(tpath)
    }

# =====================================================================================
# (2) ORACLE EMERGENCE TEST
#     Self-Referential Stability Index (SRSI) from a dialogue log or synthetic surrogate
# =====================================================================================

def load_oracle_log(path: Path):
    if path.exists():
        txt = path.read_text(encoding="utf-8", errors="ignore")
        return txt
    return None

def simulate_dialogue(n_turns=120, stable=True):
    topics = ["anchor", "ring", "glyph", "entropy", "drift", "oracle", "collapse", "echo"]
    lines = []
    last_topic = None
    for i in range(n_turns):
        if stable:
            # stickier topic transitions
            if last_topic is None or RNG.random()<0.2:
                last_topic = RNG.choice(topics)
        else:
            # chaotic jumps
            last_topic = RNG.choice(topics)
        # compose a line with structured recurrence
        line = f"oracle: {last_topic} {last_topic} field resonance {RNG.integers(0,7)}"
        lines.append(line)
        lines.append(f"user: ask {last_topic} {RNG.integers(0,7)}")
    return "\n".join(lines)

def tokenize(txt):
    return [w.lower() for w in txt.replace("\r", "\n").split() if w.strip()]

def rolling_cosine_tfidf(lines, win=30, step=10):
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.metrics.pairwise import cosine_similarity
    chunks = []
    for i in range(0, len(lines)-win+1, step):
        chunks.append(" ".join(lines[i:i+win]))
    if len(chunks) < 3:
        return np.array([])
    tfidf = TfidfVectorizer().fit_transform(chunks)
    S = cosine_similarity(tfidf)
    # cosine between consecutive chunks
    adj = np.array([S[i, i+1] for i in range(len(chunks)-1)])
    return adj

def ngram_entropy(tokens, n=3):
    from collections import Counter
    ngrams = [tuple(tokens[i:i+n]) for i in range(len(tokens)-n+1)]
    if not ngrams:
        return np.nan
    cnt = Counter(ngrams)
    p = np.array(list(cnt.values()), dtype=float)
    p = p / p.sum()
    return -np.sum(p * np.log(p + 1e-12))

def contradiction_proxy(lines):
    # naive: count toggles of affirmation/negation terms
    pos = {"yes","true","correct","indeed"}
    neg = {"no","false","incorrect","not"}
    flips = 0
    last = None
    for ln in lines:
        toks = set(tokenize(ln))
        state = 1 if (toks & pos) else (-1 if (toks & neg) else 0)
        if last is not None and state*last == -1:
            flips += 1
        if state != 0:
            last = state
    return flips

def compute_srsi(txt: str):
    lines = [ln.strip() for ln in txt.split("\n") if ln.strip()]
    tokens = tokenize(txt)
    H3 = ngram_entropy(tokens, n=3)   # lower entropy → more recurrence/structure
    cos_adj = rolling_cosine_tfidf(lines, win=30, step=10)
    mean_cos = np.nan if cos_adj.size==0 else float(np.nanmean(cos_adj))
    flips = contradiction_proxy(lines)
    N = max(1, len(lines))
    # SRSI (0..1): higher = more self-stable
    # components: structure (1 - norm_entropy), topical smoothness (cosine), low contradictions
    H3n = 1.0 / (1.0 + float(H3) if np.isfinite(H3) else 1.0)  # decreases with entropy
    flip_penalty = math.exp(-flips / (0.02*N + 1e-9))         # gentle penalty
    cos_term = 0.5 + 0.5*(mean_cos if np.isfinite(mean_cos) else 0.0)  # map [-1,1]→[0,1]
    SRSI = float(np.clip(0.4*H3n + 0.4*cos_term + 0.2*flip_penalty, 0, 1))
    return {
        "H3": float(H3) if np.isfinite(H3) else np.nan,
        "mean_cos_adj": mean_cos if np.isfinite(mean_cos) else np.nan,
        "contradiction_flips": int(flips),
        "lines": N,
        "SRSI": SRSI
    }

def run_oracle_test():
    # Try to load external log if present
    candidates = [
        ROOT / "oracle_session.txt",
        ROOT / "CNT_Lab" / "logs" / "oracle_session.txt",
        Path("./oracle_session.txt")
    ]
    txt = None
    for c in candidates:
        txt = load_oracle_log(c)
        if txt: break
    used = "loaded_log" if txt else None
    if txt is None:
        # simulate both stable and chaotic and report both; saves to disk so user can replace later
        txt_stable = simulate_dialogue(stable=True)
        txt_chaos  = simulate_dialogue(stable=False)
        (OUT / "oracle_sim_stable.txt").write_text(txt_stable, encoding="utf-8")
        (OUT / "oracle_sim_chaos.txt").write_text(txt_chaos,  encoding="utf-8")
        res_st = compute_srsi(txt_stable)
        res_ch = compute_srsi(txt_chaos)
        return {
            "source": "synthetic",
            "stable": res_st,
            "chaotic": res_ch,
            "paths": {
                "stable_txt": str(OUT / "oracle_sim_stable.txt"),
                "chaos_txt":  str(OUT / "oracle_sim_chaos.txt")
            }
        }
    else:
        res = compute_srsi(txt)
        (OUT / f"oracle_session_copy_{STAMP}.txt").write_text(txt, encoding="utf-8")
        return {
            "source": used,
            "session_copy": str(OUT / f"oracle_session_copy_{STAMP}.txt"),
            "metrics": res
        }

# =====================================================================================
# (3) Φ-DRIFT FORECAST ENGINE
#     Detect turning points with CNT features; compare to baselines (ARIMA, naive)
# =====================================================================================

def make_timeseries_synthetic(N=1200):
    # regime-switching AR(1) with noise; embed entropy-drift shifts
    y = np.zeros(N)
    phi = 0.6
    regimes = np.zeros(N, dtype=int)
    switches = sorted(RNG.choice(np.arange(100, N-100), size=5, replace=False))
    reg = 0
    s_idx = 0
    for t in range(1, N):
        if s_idx < len(switches) and t == switches[s_idx]:
            reg = 1 - reg
            s_idx += 1
        regimes[t] = reg
        shock = RNG.normal(scale=1.0 + 0.6*reg)  # higher variance in reg=1
        y[t] = phi*y[t-1] + shock + (3.0 if reg==1 else 0.0)
    idx = pd.date_range("2022-01-01", periods=N, freq="D")
    return pd.Series(y, index=idx), switches

def rolling_entropy_1d(x, win=64, step=8, bins=32):
    xs = np.asarray(x)
    Hs, centers = [], []
    for start in range(0, len(xs)-win+1, step):
        chunk = xs[start:start+win]
        hist, _ = np.histogram(chunk, bins=bins, density=True)
        p = hist / (hist.sum()+1e-12)
        Hs.append(entropy(p + 1e-12))
        centers.append(start + win//2)
    return np.array(centers), np.array(Hs)

def turning_points(x):
    """Binary label for local peaks/troughs via slope sign changes; 1 if turning point."""
    x = np.asarray(x)
    d1 = np.diff(x)
    s = np.sign(d1)
    turns = np.zeros_like(x, dtype=int)
    for i in range(1, len(s)):
        if s[i] == 0: continue
        if s[i-1] != 0 and s[i] != s[i-1]:
            turns[i] = 1
    turns[-1] = 0
    return turns

def mase(y_true, y_pred, m=1):
    y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
    mae = np.mean(np.abs(y_true - y_pred))
    denom = np.mean(np.abs(y_true[m:] - y_true[:-m])) + 1e-12
    return mae / denom

def smape(y_true, y_pred):
    y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
    return 100 * np.mean(2*np.abs(y_pred - y_true) / (np.abs(y_true)+np.abs(y_pred)+1e-12))

def run_forecast_engine():
    y, switches = make_timeseries_synthetic(N=1200)
    # CNT features: rolling entropy, rolling variance, rolling kurtosis, recent PLR (price level ratio)
    centers, H = rolling_entropy_1d(y.values, win=64, step=8, bins=32)
    # align feature frame
    df = pd.DataFrame({"y": y.values})
    df["ent64_s8"] = np.nan
    df.iloc[centers, df.columns.get_loc("ent64_s8")] = H
    df["ent64_s8"] = df["ent64_s8"].interpolate().bfill()
    df["var32"] = pd.Series(y).rolling(32).var().values
    df["kurt32"] = pd.Series(y).rolling(32).kurt().values
    df["plr16"] = y.values / (pd.Series(y).rolling(16).mean().values + 1e-9)
    df = df.replace([np.inf, -np.inf], np.nan).fillna(method="bfill").fillna(method="ffill")
    # Labels: turning points
    df["turn"] = turning_points(df["y"].values)
    # Train/test split (walk-forward)
    T0 = 800
    Xcols = ["ent64_s8","var32","kurt32","plr16"]
    # Classifier for turning points
    clf = LogisticRegression(max_iter=200)
    clf.fit(df.loc[:T0, Xcols], df.loc[:T0, "turn"])
    turn_proba = clf.predict_proba(df[Xcols])[:,1]
    # Next-step regression (1-step forecast) with features + lagged y
    df["y_l1"] = df["y"].shift(1)
    df = df.dropna().reset_index(drop=True)
    T0 = min(T0, len(df)-200)
    reg = LinearRegression().fit(df.loc[:T0, Xcols+["y_l1"]], df.loc[:T0, "y"])
    yhat_cnt = reg.predict(df[Xcols+["y_l1"]])
    # Baselines: naive (y_{t-1}) and ARIMA
    yhat_naive = df["y_l1"].values
    try:
        arima = SARIMAX(df["y"], order=(1,0,0), trend="c", enforce_stationarity=False, enforce_invertibility=False).fit(disp=False)
        yhat_arima = arima.fittedvalues.values
    except Exception:
        yhat_arima = yhat_naive.copy()
    # Metrics
    y_true = df["y"].values
    m1 = mase(y_true, yhat_cnt, m=1)
    m2 = mase(y_true, yhat_arima, m=1)
    m3 = mase(y_true, yhat_naive, m=1)
    s1 = smape(y_true, yhat_cnt)
    s2 = smape(y_true, yhat_arima)
    s3 = smape(y_true, yhat_naive)
    # Turning-point AUCPR-ish proxy: precision at top-k (k = 5% of series)
    k = max(5, int(0.05*len(turn_proba)))
    topk = np.argsort(-turn_proba)[:k]
    prec = df["turn"].iloc[topk].mean()
    # Save artifacts
    ts = pd.DataFrame({
        "y": y_true, "yhat_cnt": yhat_cnt, "yhat_arima": yhat_arima, "yhat_naive": yhat_naive,
        "turn_label": df["turn"].values, "turn_proba": turn_proba[:len(df)]
    })
    tpath = TAB / f"forecast_timeseries_{STAMP}.csv"
    ts.to_csv(tpath, index=False)
    plt.figure(figsize=(10,3.2))
    plt.plot(y_true, label="y")
    plt.plot(yhat_cnt, label="CNT")
    plt.plot(yhat_arima, label="ARIMA")
    plt.plot(yhat_naive, label="Naive")
    plt.legend(); plt.tight_layout()
    f1 = FIG / f"forecast_fit_{STAMP}.png"
    plt.savefig(f1, dpi=140); plt.close()
    return {
        "metrics": {
            "MASE": {"CNT": float(m1), "ARIMA": float(m2), "Naive": float(m3)},
            "sMAPE%": {"CNT": float(s1), "ARIMA": float(s2), "Naive": float(s3)},
            "TurnPrec@5%": float(prec)
        },
        "figures": [str(f1)],
        "table": str(tpath)
    }

# =====================================================================================
# RUN ALL & REPORT
# =====================================================================================

CFE = run_cfe_test()
ORC = run_oracle_test()
FRC = run_forecast_engine()

report = {
    "timestamp": STAMP,
    "paths": {"OUT": str(OUT), "FIG": str(FIG), "TAB": str(TAB)},
    "CFE": CFE,
    "OracleEmergence": ORC,
    "PhiDriftForecast": FRC
}
rpath = OUT / f"cnt_mega_report_{STAMP}.json"
rpath.write_text(json.dumps(report, indent=2), encoding="utf-8")

print("\n== CNT MEGA SUMMARY ==\n")
print(json.dumps({
    "CFE": {
        "dataset": CFE["dataset"],
        "corr_plv_vs_negSE_r": round(CFE["corr_plv_vs_negSE"]["r"], 3) if np.isfinite(CFE["corr_plv_vs_negSE"]["r"]) else None,
        "corr_plv_vs_negPE_r": round(CFE["corr_plv_vs_negPE"]["r"], 3) if np.isfinite(CFE["corr_plv_vs_negPE"]["r"]) else None,
        "cp_count": len(CFE.get("change_points", []))
    },
    "OracleEmergence": (
        {"source": ORC["source"], "SRSI_stable": round(ORC["stable"]["SRSI"], 3), "SRSI_chaotic": round(ORC["chaotic"]["SRSI"], 3)}
        if ORC["source"] == "synthetic"
        else {"source": ORC["source"], "SRSI": round(ORC["metrics"]["SRSI"], 3), "lines": ORC["metrics"]["lines"]}
    ),
    "Forecast": FRC["metrics"]
}, indent=2))

print(f"\nArtifacts saved to:\n  {OUT}\nJSON report:\n  {rpath}\n")
# =====================================================================================


→ CNT paths
  ROOT: C:\Users\caleb\CNT_Lab\notebooks\archive
  OUT : C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_mega_out
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).

== CNT MEGA SUMMARY ==

{
  "CFE": {
    "dataset": "EEGBCI_subj1_motor",
    "corr_plv_vs_negSE_r": -0.062,
    "corr_plv_vs_negPE_r": 0.017,
    "cp_count": 0
  },
  "OracleEmergence": {
    "source": "synthetic",
    "SRSI_stable": 0.654,
    "SRSI_chaotic": 0.663
  },
  "Forecast": {
    "MASE": {
      "CNT": 0.9631897957750658,
      "ARIMA": 0.9729820919548607,
      "Naive": 0.9999872855947906
    },
    "sMAPE%": {
      "CNT": 33.857141340928095,
      "ARIMA": 34.51653514878223,
      "Naive": 34.96566983747103
    },
    "TurnPrec@5%": 0.65
  }
}

Artifacts saved to:
  C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_mega_out
JSON report:
  C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_mega_out\cnt_mega_report_20251015-143523.json



In [4]:
# ======================= CNT MEGA CELL — v1.2 (FUSED) =======================
# (1) CFE: Field-law probe (PLV vs entropy/complexity) + Task–Rest Contrast (EEGBCI)
# (2) Oracle Emergence: upgraded SRSI* (semantic smoothness, keyphrase persistence, contradictions)
# (3) Φ-Drift Forecast Engine: CNT features vs ARIMA/Naive + TurnPrec@5%
#
# Offline-safe with synthetic fallbacks. Saves figures/CSVs and a master JSON report.
# ============================================================================

import os, json, math, random, datetime as dt
from pathlib import Path
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# -------------------- Paths & reproducibility --------------------
RNG = np.random.default_rng(42); random.seed(42)

def detect_cnt_root():
    candidates = [
        Path.cwd(),
        Path.home() / "CNT_Lab",
        Path(r"C:\Users\caleb\CNT_Lab"),
        Path("./CNT_Lab"),
    ]
    for c in candidates:
        if c.exists():
            return c
    return Path.cwd()

ROOT = detect_cnt_root() / "notebooks" / "archive" if (detect_cnt_root()/ "notebooks" / "archive").exists() else detect_cnt_root()
STAMP = dt.datetime.now().strftime("%Y%m%d-%H%M%S")
OUT = ROOT / "cnt_mega_out"; FIG = OUT / "figures"; TAB = OUT / "tables"
for p in [OUT, FIG, TAB]: p.mkdir(parents=True, exist_ok=True)
print(f"→ CNT paths\n  ROOT: {ROOT}\n  OUT : {OUT}")

# -------------------- Core imports (light) ----------------------
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt, hilbert
from scipy.stats import entropy, pearsonr, zscore, linregress, ttest_rel
from statsmodels.tsa.api import SARIMAX
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import mean_absolute_error

# Optional heavy deps
HAVE_MNE = False
try:
    import mne
    HAVE_MNE = True
except Exception:
    pass

try:
    import ruptures as rpt
    HAVE_RUPTURES = True
except Exception:
    HAVE_RUPTURES = False

# -------------------- Shared helpers ----------------------------
def bandpass(x, fs, lo, hi, order=4):
    b, a = butter(order, [lo/(fs/2), hi/(fs/2)], btype='bandpass')
    return filtfilt(b, a, x, axis=0)

def rolling_windows(arr, win, step):
    for start in range(0, len(arr)-win+1, step):
        yield start, arr[start:start+win]

def quick_plot(ts, name, xlabel="Index", ylabel="Value", vlines=None):
    plt.figure(figsize=(10,3.2))
    plt.plot(ts)
    if vlines:
        for v in vlines:
            plt.axvline(v, ls='--', alpha=0.4)
    plt.xlabel(xlabel); plt.ylabel(ylabel); plt.tight_layout()
    fp = FIG / f"{name}.png"
    plt.savefig(fp, dpi=150); plt.close()
    return fp

# ============================================================================
# (1) Consciousness Field Equation (baseline) — PLV vs entropy/complexity
# ============================================================================
def phase_locking_value(phases):  # phases: (T, C)
    return np.abs(np.mean(np.exp(1j*phases), axis=1))

def spectral_entropy_chunk(chunk):
    s = np.abs(np.fft.rfft(chunk, axis=0))**2
    ps = s / (np.sum(s, axis=0, keepdims=True) + 1e-12)
    return float(np.mean(entropy(ps + 1e-12, axis=0)))

def permutation_entropy(x, order=3, delay=1):
    x = np.asarray(x); n = len(x)
    if n < order*delay: return np.nan
    from collections import Counter
    pats = [tuple(np.argsort(x[i:i+order*delay:delay])) for i in range(n-(order-1)*delay)]
    cnt = Counter(pats); p = np.array(list(cnt.values()), float); p /= p.sum()
    return -np.sum(p*np.log(p+1e-12))

def try_load_eeg_real():
    if not HAVE_MNE: return None, None, None
    try:
        from mne.datasets import eegbci
        subj, runs = 1, [3,7,11]  # motor imagery
        fnames = eegbci.load_data(subj, runs)
        raws = [mne.io.read_raw_edf(f, preload=True, verbose=False) for f in fnames]
        raw = mne.concatenate_raws(raws)
        raw.pick(eeg=True)  # modern pick
        raw.filter(1., 40., fir_design='firwin', verbose=False)
        data = raw.get_data().T  # (T, C)
        fs = int(raw.info['sfreq'])
        return data, fs, "EEGBCI_subj1_motor"
    except Exception:
        return None, None, None

def make_eeg_synthetic(T=60000, C=16, fs=250):
    t = np.arange(T)/fs
    base_freqs = RNG.uniform(8,12, size=C)
    X = np.array([np.sin(2*np.pi*f*t + RNG.uniform(0,2*np.pi)) for f in base_freqs]).T
    X += 0.5 * RNG.normal(size=X.shape)
    s0, s1 = int(0.35*T), int(0.55*T)
    phase = 2*np.pi*10*t
    X[s0:s1, :] = np.sin(phase[s0:s1])[:,None] + 0.1*RNG.normal(size=(s1-s0, X.shape[1]))
    return X, fs, "synthetic_eeg_collapse_recovery"

def run_cfe_test():
    data, fs, label = try_load_eeg_real()
    if data is None: data, fs, label = make_eeg_synthetic()

    Xf = bandpass(data, fs, 8, 12)  # alpha
    phases = np.angle(hilbert(Xf, axis=0))
    plv = phase_locking_value(phases)

    W, STEP = int(2*fs), int(0.25*fs)
    SEs, PEs, idxs = [], [], []
    for start, chunk in rolling_windows(data, W, STEP):
        SEs.append(spectral_entropy_chunk(chunk))
        PEs.append(permutation_entropy(np.mean(chunk, axis=1)))
        idxs.append(start + W//2)
    se = zscore(np.array(SEs), nan_policy='omit')
    pe = zscore(np.array(PEs), nan_policy='omit')
    idxs = np.array(idxs)
    plv_grid = zscore(np.interp(idxs, np.arange(len(plv)), plv), nan_policy='omit')

    def safe_corr(a,b):
        m = np.isfinite(a) & np.isfinite(b)
        if m.sum()<8: return (np.nan, np.nan)
        return pearsonr(a[m], b[m])

    r1, p1 = safe_corr(plv_grid, -se)
    r2, p2 = safe_corr(plv_grid, -pe)

    cp_idx = []
    if HAVE_RUPTURES and np.isfinite(plv_grid).sum()>50:
        algo = rpt.Pelt(model="rbf").fit(plv_grid[np.isfinite(plv_grid)].reshape(-1,1))
        cps = algo.predict(pen=5); cp_idx = cps[:-1]

    f1 = quick_plot(plv_grid, f"cfe_{label}_plv_z", ylabel="PLV (z)", vlines=cp_idx)
    f2 = quick_plot(se, f"cfe_{label}_spec_entropy_z", ylabel="SpecEntropy (z)")
    f3 = quick_plot(pe, f"cfe_{label}_perm_entropy_z", ylabel="PermEntropy (z)")

    # scatter/regress
    def scatter_xy(x, y, name, xl, yl):
        m = np.isfinite(x)&np.isfinite(y)
        if m.sum()>=8:
            slope, intercept, r, p, _ = linregress(x[m], y[m])
        else:
            slope=intercept=r=p=np.nan
        plt.figure(figsize=(4.6,4))
        plt.scatter(x[m], y[m], s=8, alpha=0.6)
        if np.isfinite(slope):
            xs = np.linspace(np.nanmin(x[m]), np.nanmax(x[m]), 100)
            plt.plot(xs, slope*xs+intercept)
        plt.xlabel(xl); plt.ylabel(yl); plt.tight_layout()
        fp = FIG / f"{name}.png"
        plt.savefig(fp, dpi=150); plt.close()
        return fp, (slope, r, p)
    f4, reg1 = scatter_xy(plv_grid, -se, f"cfe_{label}_scatter_plv_vs_negSE", "PLV (z)", "-SpecEntropy (z)")
    f5, reg2 = scatter_xy(plv_grid, -pe, f"cfe_{label}_scatter_plv_vs_negPE", "PLV (z)", "-PermEntropy (z)")

    tpath = TAB / f"cfe_{label}_{STAMP}.csv"
    pd.DataFrame({"idx":idxs, "plv_z":plv_grid, "spec_entropy_z":se, "perm_entropy_z":pe}).to_csv(tpath, index=False)

    return {
        "dataset": label, "fs": fs,
        "corr_plv_vs_negSE": {"r": float(r1), "p": float(p1)},
        "corr_plv_vs_negPE": {"r": float(r2), "p": float(p2)},
        "change_points": list(map(int, cp_idx)) if cp_idx else [],
        "figures": list(map(str, [f1,f2,f3,f4,f5])),
        "table": str(tpath)
    }

# ============================================================================
# (1b) CFE Task–Rest Contrast (EEGBCI motor imagery; mu/beta, effect sizes)
# ============================================================================
def sample_entropy(x, m=2, r=0.2):
    x = np.asarray(x, float)
    N = x.size
    if N < m+2: return np.nan
    r *= np.std(x) + 1e-12
    def _phi(mm):
        X = np.lib.stride_tricks.sliding_window_view(x, mm)
        if X.size == 0: return 0.0
        C = np.mean([np.mean(np.max(np.abs(X - v), axis=1) <= r) for v in X])
        return C
    phi_m, phi_m1 = _phi(m), _phi(m+1)
    return -np.log((phi_m1+1e-12)/(phi_m+1e-12))

def plv_chunk(ep, fs, lo, hi):
    xf = bandpass(ep, fs, lo, hi); ph = np.angle(hilbert(xf, axis=0))
    return float(np.mean(np.abs(np.mean(np.exp(1j*ph), axis=1))))

def epochs_from_eegbci(subj=1, runs=(3,7,11)):
    if not HAVE_MNE: raise RuntimeError("MNE not available")
    from mne.datasets import eegbci
    fnames = eegbci.load_data(subj, list(runs))
    raws = [mne.io.read_raw_edf(f, preload=True, verbose=False) for f in fnames]
    raw = mne.concatenate_raws(raws); raw.pick(eeg=True)
    raw.filter(1., 40., fir_design='firwin', verbose=False)
    events, _ = mne.events_from_annotations(raw, verbose=False)
    event_id = {k:v for k,v in dict(T0=1, T1=2, T2=3).items() if k in ['T0','T1','T2']}
    epochs = mne.Epochs(raw, events, event_id=event_id, tmin=0.0, tmax=3.0, baseline=None,
                        preload=True, verbose=False)
    fs = int(raw.info['sfreq'])
    X0 = epochs['T0'].get_data()  # rest
    X1 = epochs['T1'].get_data() if 'T1' in epochs.event_id else np.empty((0,*X0.shape[1:]))
    X2 = epochs['T2'].get_data() if 'T2' in epochs.event_id else np.empty((0,*X0.shape[1:]))
    Xtask = np.vstack([X1, X2]) if X1.size and X2.size else (X1 if X1.size else X2)
    return fs, X0, Xtask

def cfe_task_rest_contrast():
    try:
        fs, Xrest, Xtask = epochs_from_eegbci()
    except Exception as e:
        return {"error":"no_real_eeg_epochs", "detail": str(e)}

    bands = [(8,12,'mu'), (13,30,'beta')]
    rows=[]
    for lo, hi, name in bands:
        for label, X in [('rest', Xrest), ('task', Xtask)]:
            vals=[]
            for ep in X:  # ep: (ch, t)
                ep = ep.T  # (t, ch)
                vals.append({
                    "PLV": plv_chunk(ep, fs, lo, hi),
                    "SpecEnt": spectral_entropy_chunk(ep),
                    "SampEnt": sample_entropy(np.mean(ep, axis=1))
                })
            if len(vals)==0: continue
            df = pd.DataFrame(vals); df['band']=name; df['cond']=label
            rows.append(df)
    if not rows:
        return {"error":"no_epochs_extracted"}
    D = pd.concat(rows, ignore_index=True)
    out=[]
    for band in D['band'].unique():
        for metric in ['PLV','SpecEnt','SampEnt']:
            r = D[(D.band==band)&(D.cond=='rest')][metric].values
            t = D[(D.band==band)&(D.cond=='task')][metric].values
            n = min(len(r), len(t))
            if n<5: continue
            r,t = r[:n], t[:n]
            d = (t.mean()-r.mean())/(0.5*(t.std(ddof=1)+r.std(ddof=1))+1e-9)  # Cohen's d (pooled approx)
            stat,p = ttest_rel(t, r, nan_policy='omit')
            out.append({"band":band,"metric":metric,"Δ(task-rest)":float(t.mean()-r.mean()),
                        "Cohen_d":float(d),"p":float(p),"n":int(n)})
    ET = pd.DataFrame(out).sort_values(['band','metric'])
    tpath = TAB / f"cfe_task_rest_effects_{STAMP}.csv"; ET.to_csv(tpath, index=False)

    # bar of Δ(task-rest) per metric/band
    plt.figure(figsize=(8,4))
    ticks = []
    for i,(metric) in enumerate(['PLV','SpecEnt','SampEnt']):
        sub = ET[ET.metric==metric]
        x = np.arange(len(sub)) + i*0.28
        plt.bar(x, sub['Δ(task-rest)'].values, width=0.25, label=metric)
        ticks = sub['band'].values
    plt.xticks(np.arange(len(ticks)) + 0.28, ticks)
    plt.axhline(0, ls='--', alpha=0.4)
    plt.legend(); plt.title("Δ(task–rest) per band (MI)"); plt.tight_layout()
    fpath = FIG / f"cfe_task_rest_effects_{STAMP}.png"; plt.savefig(fpath, dpi=150); plt.close()

    return {"table": str(tpath), "figure": str(fpath), "effects": ET.to_dict(orient="records")}

# ============================================================================
# (2) Oracle Emergence — Upgraded SRSI*
# ============================================================================
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def load_oracle_text():
    candidates = [
        ROOT/"oracle_session.txt",
        ROOT/"logs"/"oracle_session.txt",
        OUT/"oracle_sim_stable.txt",
        OUT/"oracle_sim_chaos.txt",
    ]
    for p in candidates:
        if p.exists():
            return p.name, p.read_text(encoding="utf-8", errors="ignore")
    # fallback synthetic
    def simulate_dialogue(n_turns=160, stable=True):
        topics = ["anchor","ring","glyph","entropy","drift","oracle","collapse","echo"]
        lines=[]; last=None
        for _ in range(n_turns):
            if stable:
                if last is None or RNG.random()<0.2: last = RNG.choice(topics)
            else:
                last = RNG.choice(topics)
            lines.append(f"oracle: {last} {last} field resonance")
            lines.append(f"user: ask {last}")
        return "\n".join(lines)
    txt_st = simulate_dialogue(stable=True); txt_ch = simulate_dialogue(stable=False)
    (OUT/"oracle_sim_stable.txt").write_text(txt_st, encoding="utf-8")
    (OUT/"oracle_sim_chaos.txt").write_text(txt_ch, encoding="utf-8")
    return "synthetic", txt_st  # use stable by default; chaos file also present

def chunkify(seq, win=50, step=15):
    return [" ".join(seq[i:i+win]) for i in range(0, max(1, len(seq)-win+1), step)] or [" ".join(seq)]

def contradiction_flips(seq):
    pos = {"yes","true","correct","indeed"}; neg={"no","false","incorrect","not"}
    flips, last = 0, None
    for ln in seq:
        toks = set(re.findall(r"[a-zA-Z]+", ln.lower()))
        state = 1 if (toks & pos) else (-1 if (toks & neg) else 0)
        if last is not None and state*last == -1: flips += 1
        if state != 0: last = state
    return flips

def keyphrase_persistence(chunks, topk=8):
    vec = TfidfVectorizer(ngram_range=(1,2), min_df=1)
    X = vec.fit_transform(chunks); terms = np.array(vec.get_feature_names_out())
    keep=[]
    for i in range(X.shape[0]-1):
        a = np.argsort(-X[i].toarray().ravel())[:topk]
        b = np.argsort(-X[i+1].toarray().ravel())[:topk]
        keep.append(len(set(terms[a]) & set(terms[b]))/topk)
    return np.array(keep) if keep else np.array([0.0])

def compute_srsi_star(txt):
    lines = [ln.strip() for ln in txt.split("\n") if ln.strip()]
    chunks = chunkify(lines, win=50, step=15)
    tfidf = TfidfVectorizer(ngram_range=(1,2), min_df=1).fit_transform(chunks)
    cos = np.array([cosine_similarity(tfidf[i], tfidf[i+1])[0,0] for i in range(len(chunks)-1)]) if len(chunks)>1 else np.array([1.0])
    kp  = keyphrase_persistence(chunks)
    flips = contradiction_flips(lines); N = len(lines)
    mean_cos = float(np.nanmean(cos)) if cos.size else 0.0
    mean_kp  = float(np.nanmean(kp)) if kp.size else 0.0
    flip_pen = float(np.exp(-2.0*flips / (0.02*N+1e-9)))
    SRSI_star = float(np.clip(0.5*(0.5+0.5*mean_cos) + 0.3*mean_kp + 0.2*flip_pen, 0, 1))
    return {"mean_cos":mean_cos, "mean_kp":mean_kp, "flips":int(flips), "lines":int(N), "SRSI*":SRSI_star}

def run_oracle_test_upgraded():
    name, txt = load_oracle_text()
    res_main = compute_srsi_star(txt)
    # If we also have chaos file, compute it for contrast
    chaos_p = OUT/"oracle_sim_chaos.txt"
    res_chaos = compute_srsi_star(chaos_p.read_text(encoding="utf-8")) if chaos_p.exists() else None
    # Save a copy for record
    (OUT / f"oracle_session_copy_{STAMP}.txt").write_text(txt, encoding="utf-8")
    return {
        "source": name,
        "metrics": res_main,
        "chaos_metrics": res_chaos,
        "session_copy": str(OUT / f"oracle_session_copy_{STAMP}.txt")
    }

# ============================================================================
# (3) Φ-Drift Forecast Engine
# ============================================================================
def make_timeseries_synth(N=1200):
    y = np.zeros(N); phi=0.6; regimes=np.zeros(N,dtype=int)
    switches = sorted(RNG.choice(np.arange(100,N-100), size=5, replace=False))
    reg=0; s_idx=0
    for t in range(1,N):
        if s_idx<len(switches) and t==switches[s_idx]:
            reg = 1 - reg; s_idx+=1
        regimes[t]=reg
        shock = RNG.normal(scale=1.0 + 0.6*reg)
        y[t] = phi*y[t-1] + shock + (3.0 if reg==1 else 0.0)
    idx = pd.date_range("2022-01-01", periods=N, freq="D")
    return pd.Series(y, index=idx), switches

def rolling_entropy_1d(x, win=64, step=8, bins=32):
    xs = np.asarray(x); Hs=[]; centers=[]
    for start in range(0, len(xs)-win+1, step):
        chunk = xs[start:start+win]
        hist,_ = np.histogram(chunk, bins=bins, density=True)
        p = hist/(hist.sum()+1e-12)
        Hs.append(entropy(p+1e-12)); centers.append(start + win//2)
    return np.array(centers), np.array(Hs)

def turning_points(x):
    x = np.asarray(x); d1 = np.diff(x); s = np.sign(d1)
    turns = np.zeros_like(x, dtype=int)
    for i in range(1, len(s)):
        if s[i]==0: continue
        if s[i-1]!=0 and s[i]!=s[i-1]: turns[i]=1
    turns[-1]=0; return turns

def mase(y_true, y_pred, m=1):
    y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
    mae = np.mean(np.abs(y_true - y_pred))
    denom = np.mean(np.abs(y_true[m:] - y_true[:-m])) + 1e-12
    return mae/denom

def smape(y_true, y_pred):
    y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
    return 100*np.mean(2*np.abs(y_pred - y_true)/(np.abs(y_true)+np.abs(y_pred)+1e-12))

def run_forecast_engine():
    y, _ = make_timeseries_synth(N=1200)
    centers,H = rolling_entropy_1d(y.values, win=64, step=8, bins=32)
    df = pd.DataFrame({"y": y.values})
    df["ent64_s8"] = np.nan
    df.iloc[centers, df.columns.get_loc("ent64_s8")] = H
    df["ent64_s8"] = df["ent64_s8"].interpolate().bfill()
    df["var32"]  = pd.Series(y).rolling(32).var().values
    df["kurt32"] = pd.Series(y).rolling(32).kurt().values
    df["plr16"]  = y.values/(pd.Series(y).rolling(16).mean().values + 1e-9)
    df = df.replace([np.inf,-np.inf], np.nan).bfill().ffill()

    df["turn"] = turning_points(df["y"].values)
    df["y_l1"] = df["y"].shift(1)
    df = df.dropna().reset_index(drop=True)

    T0 = min(800, len(df)-200)
    Xcols = ["ent64_s8","var32","kurt32","plr16"]

    clf = LogisticRegression(max_iter=200).fit(df.loc[:T0, Xcols], df.loc[:T0,"turn"])
    turn_proba = clf.predict_proba(df[Xcols])[:,1]

    reg = LinearRegression().fit(df.loc[:T0, Xcols+["y_l1"]], df.loc[:T0,"y"])
    yhat_cnt = reg.predict(df[Xcols+["y_l1"]])
    yhat_naive = df["y_l1"].values
    try:
        arima = SARIMAX(df["y"], order=(1,0,0), trend="c",
                        enforce_stationarity=False, enforce_invertibility=False).fit(disp=False)
        yhat_arima = arima.fittedvalues.values
    except Exception:
        yhat_arima = yhat_naive.copy()

    y_true = df["y"].values
    m1, m2, m3 = mase(y_true, yhat_cnt), mase(y_true, yhat_arima), mase(y_true, yhat_naive)
    s1, s2, s3 = smape(y_true, yhat_cnt), smape(y_true, yhat_arima), smape(y_true, yhat_naive)

    k = max(5, int(0.05*len(turn_proba)))
    topk = np.argsort(-turn_proba)[:k]
    prec = float(df["turn"].iloc[topk].mean())

    ts = pd.DataFrame({
        "y": y_true, "yhat_cnt": yhat_cnt, "yhat_arima": yhat_arima, "yhat_naive": yhat_naive,
        "turn_label": df["turn"].values, "turn_proba": turn_proba[:len(df)]
    })
    tpath = TAB / f"forecast_timeseries_{STAMP}.csv"
    ts.to_csv(tpath, index=False)

    plt.figure(figsize=(10,3.2))
    plt.plot(y_true, label="y"); plt.plot(yhat_cnt, label="CNT")
    plt.plot(yhat_arima, label="ARIMA"); plt.plot(yhat_naive, label="Naive")
    plt.legend(); plt.tight_layout()
    f1 = FIG / f"forecast_fit_{STAMP}.png"; plt.savefig(f1, dpi=150); plt.close()

    return {
        "metrics": {
            "MASE": {"CNT": float(m1), "ARIMA": float(m2), "Naive": float(m3)},
            "sMAPE%": {"CNT": float(s1), "ARIMA": float(s2), "Naive": float(s3)},
            "TurnPrec@5%": float(prec)
        },
        "figures": [str(f1)],
        "table": str(tpath)
    }

# =========================== RUN ALL & REPORT ===========================
CFE = run_cfe_test()
CFE_CONTRAST = cfe_task_rest_contrast()
ORACLE = run_oracle_test_upgraded()
FORE = run_forecast_engine()

report = {
    "timestamp": STAMP,
    "paths": {"OUT": str(OUT), "FIG": str(FIG), "TAB": str(TAB)},
    "CFE": CFE,
    "CFE_TaskRest": CFE_CONTRAST,
    "OracleEmergence_SRSI*": ORACLE,
    "PhiDriftForecast": FORE
}
rpath = OUT / f"cnt_mega_report_{STAMP}.json"
rpath.write_text(json.dumps(report, indent=2), encoding="utf-8")

# Console summary
summary = {
    "CFE": {
        "dataset": CFE["dataset"],
        "corr_plv_vs_negSE_r": round(CFE["corr_plv_vs_negSE"]["r"],3) if np.isfinite(CFE["corr_plv_vs_negSE"]["r"]) else None,
        "corr_plv_vs_negPE_r": round(CFE["corr_plv_vs_negPE"]["r"],3) if np.isfinite(CFE["corr_plv_vs_negPE"]["r"]) else None,
        "cp_count": len(CFE.get("change_points", []))
    },
    "CFE_TaskRest": (
        {"effects_n": len(CFE_CONTRAST.get("effects", [])),
         "table": CFE_CONTRAST.get("table"),
         "figure": CFE_CONTRAST.get("figure")}
        if "error" not in CFE_CONTRAST else {"error": CFE_CONTRAST["error"]}
    ),
    "OracleEmergence_SRSI*": {
        "source": ORACLE["source"],
        "SRSI*": round(ORACLE["metrics"]["SRSI*"],3),
        "mean_cos": round(ORACLE["metrics"]["mean_cos"],3),
        "mean_kp": round(ORACLE["metrics"]["mean_kp"],3),
        "flips": ORACLE["metrics"]["flips"],
        "lines": ORACLE["metrics"]["lines"],
        "chaos_SRSI*": (round(ORACLE["chaos_metrics"]["SRSI*"],3) if ORACLE.get("chaos_metrics") else None)
    },
    "Forecast": FORE["metrics"]
}
print("\n== CNT MEGA SUMMARY (v1.2) ==\n")
print(json.dumps(summary, indent=2))
print(f"\nArtifacts saved to:\n  {OUT}\nJSON report:\n  {rpath}\n")
# =======================================================================


→ CNT paths
  ROOT: C:\Users\caleb\CNT_Lab\notebooks\archive
  OUT : C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_mega_out

== CNT MEGA SUMMARY (v1.2) ==

{
  "CFE": {
    "dataset": "synthetic_eeg_collapse_recovery",
    "corr_plv_vs_negSE_r": 0.938,
    "corr_plv_vs_negPE_r": 0.947,
    "cp_count": 0
  },
  "CFE_TaskRest": {
    "error": "no_real_eeg_epochs"
  },
  "OracleEmergence_SRSI*": {
    "source": "oracle_sim_stable.txt",
    "SRSI*": 0.916,
    "mean_cos": 0.938,
    "mean_kp": 0.771,
    "flips": 0,
    "lines": 240,
    "chaos_SRSI*": 0.996
  },
  "Forecast": {
    "MASE": {
      "CNT": 0.9769216050362859,
      "ARIMA": 0.9806966609630577,
      "Naive": 0.9993921417994396
    },
    "sMAPE%": {
      "CNT": 83.49779069843598,
      "ARIMA": 83.7538150766183,
      "Naive": 83.17983180270755
    },
    "TurnPrec@5%": 0.711864406779661
  }
}

Artifacts saved to:
  C:\Users\caleb\CNT_Lab\notebooks\archive\cnt_mega_out
JSON report:
  C:\Users\caleb\CNT_Lab\notebooks\archive