In [1]:
# === G-DREN v0.1: Milestone-0 bootstrap ===
import os, sys, json, math, time, hashlib
from pathlib import Path
import numpy as np, pandas as pd
from scipy.signal import welch, coherence
from sklearn.isotonic import IsotonicRegression

# --- Paths ---
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
RUN = ROOT / time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
for p in [ROOT, RUN]: p.mkdir(parents=True, exist_ok=True)

def save_df(df, name):
    fp = RUN / f"{name}.csv"
    df.to_csv(fp, index=True)
    return str(fp)

# --- Utilities ---
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x)
    out = np.full_like(x, np.nan, dtype=float)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]
        h, _ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]
        out[i] = -np.sum(p*np.log(p))
    return out

def phase_echo(x, max_lag=60):
    x = pd.Series(x).astype(float)
    x = (x - x.mean())/(x.std()+1e-9)
    vals = []
    for tau in range(1, max_lag+1):
        a = x[tau:].values
        b = x.shift(tau).dropna().values
        if len(a)!=len(b) or len(a)<16: vals.append(np.nan); continue
        r = np.corrcoef(a, b)[0,1]
        vals.append(r)
    vals = np.array(vals, float)
    # echo power = max positive lag-corr over window
    return np.nanmax(vals)

def theta_breaches(sig, q=0.98, win=256):
    sig = pd.Series(sig).astype(float)
    thr = sig.rolling(win, min_periods=win//2).quantile(q)
    return (sig > thr).astype(int).rolling(win, min_periods=win//2).mean().values

def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a) != len(b) or len(a) < 256: return np.nan
    f, Cxy = coherence(a, b, fs=fs, nperseg=min(256, len(a)))
    band = (f>0.02) & (f<0.25)
    if not np.any(band): return np.nan
    return float(np.nanmean(Cxy[band]))

def zscore(x):
    x = pd.Series(x, dtype=float)
    return ((x - x.rolling(512, min_periods=64).mean())
            /(x.rolling(512, min_periods=64).std()+1e-9)).values

def fuse_to_nexus(features: pd.DataFrame):
    # Simple monotone fusion for v0
    F = features.copy()
    F = F.fillna(method="ffill").fillna(method="bfill").fillna(0.0)
    # Normalize each feature
    for c in F.columns:
        F[c] = (F[c]-F[c].quantile(0.1))/(F[c].quantile(0.9)-F[c].quantile(0.1)+1e-9)
        F[c] = np.clip(F[c], 0, 1)
    # Weighted sum + isotonic calibration against a synthetic target (peaks)
    raw = 0.35*F['S_d'] + 0.25*F['Echo'] + 0.2*F['Theta'] + 0.2*F['Gamma']
    # pseudo-target: rolling future volatility as proxy (for bootstrap only)
    y = F['S_d'].shift(-12).fillna(F['S_d'].median()).values
    ir = IsotonicRegression(out_of_bounds='clip')
    xfit = raw.values
    nexus = ir.fit_transform(xfit, y)
    nex = pd.Series(nexus, index=F.index).astype(float)
    return (nex - nex.rolling(256, min_periods=64).min()) / (
            nex.rolling(256, min_periods=64).max() - nex.rolling(256, min_periods=64).min() + 1e-9)

# --- Synthetic criticality test (Kuramoto-lite order parameter proxy) ---
def synthetic_criticality(T=4000, seed=7):
    rng = np.random.default_rng(seed)
    # Piecewise parameter drifting toward critical region
    base = np.cumsum(rng.normal(0, 0.02, T))
    trend = np.linspace(0, 1.6, T)   # rising “K” proxy
    shock = (np.arange(T) > int(0.85*T)).astype(float) * rng.normal(0, 2.0, T)
    x = base + 0.6*np.tanh(1.5*(trend-1.0)) + 0.1*rng.normal(0,1,T) + shock
    return pd.Series(x)

def run_m0():
    x = synthetic_criticality()
    df = pd.DataFrame(index=pd.RangeIndex(len(x)))
    df['signal'] = x.values
    df['S_d']   = rolling_entropy(df['signal'], win=256, bins=48)
    df['Echo']  = pd.Series([np.nan]*len(df))
    for i in range(len(df)):
        if i<300: continue
        df.loc[i,'Echo'] = phase_echo(df['signal'].iloc[:i].values, max_lag=48)
    df['Theta'] = theta_breaches(zscore(df['signal']), q=0.985, win=256)
    # For Gamma, compare signal to a delayed copy as a stand-in second channel
    df['Gamma'] = pd.Series([np.nan]*len(df))
    delayed = df['signal'].shift(3).fillna(method='bfill').values
    for i in range(300, len(df)):
        a = df['signal'].iloc[max(0,i-512):i].values
        b = delayed[max(0,i-512):i]
        df.loc[i,'Gamma'] = glyph_coupling(a, b, fs=1.0)
    feats = df[['S_d','Echo','Theta','Gamma']]
    df['NEXUS'] = fuse_to_nexus(feats)
    paths = {
        "features_csv": save_df(df, "gdren_m0_features"),
        "run_meta_json": str((RUN / "meta.json").write_text(json.dumps({
            "run_id": RUN.name,
            "root": str(ROOT), "cnt_lab_dir": str(CNT),
            "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
            "notes": "Milestone-0 synthetic criticality + NEXUS v0.1"
        }, indent=2)))
    }
    print("Artifacts:", json.dumps(paths, indent=2))
    # Simple alerting demo
    thr_watch, thr_warn = 0.65, 0.80
    df['alert'] = np.where(df['NEXUS']>=thr_warn, 'WARNING',
                    np.where(df['NEXUS']>=thr_watch, 'WATCH','OK'))
    save_df(df[['NEXUS','alert']], "gdren_m0_nexus_alerts")
    return df

if __name__ == "__main__":
    out = run_m0()
    print(out[['NEXUS','alert']].tail(10))


  delayed = df['signal'].shift(3).fillna(method='bfill').values


Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-210414Z\\gdren_m0_features.csv",
  "run_meta_json": "204"
}
         NEXUS alert
3990  0.341865    OK
3991  0.341865    OK
3992  0.341865    OK
3993  0.341865    OK
3994  0.341865    OK
3995  0.341865    OK
3996  0.341865    OK
3997  0.341865    OK
3998  0.341865    OK
3999  0.341865    OK


  F = F.fillna(method="ffill").fillna(method="bfill").fillna(0.0)


In [2]:
# === G-DREN v0.1.1: Milestone-0 bootstrap (stabilized & non-flat) ===
import os, json, math, time
from pathlib import Path
import numpy as np, pandas as pd
from scipy.signal import welch, coherence
from sklearn.isotonic import IsotonicRegression

# --- Paths ---
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
RUN = ROOT / time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
for p in [ROOT, RUN]: p.mkdir(parents=True, exist_ok=True)

def save_df(df, name):
    fp = RUN / f"{name}.csv"
    df.to_csv(fp, index=True)
    return str(fp)

# --- Utilities ---
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float)
    out = np.full_like(x, np.nan, dtype=float)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]
        h, _ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]
        out[i] = -np.sum(p*np.log(p))
    return out

def phase_echo(x, max_lag=60):
    x = pd.Series(x, dtype=float)
    x = (x - x.mean())/(x.std()+1e-9)
    vals = []
    for tau in range(1, max_lag+1):
        a = x[tau:].values
        b = x.shift(tau).dropna().values
        if len(a)!=len(b) or len(a)<16: vals.append(np.nan); continue
        r = np.corrcoef(a, b)[0,1]
        vals.append(r)
    vals = np.array(vals, float)
    return np.nanmax(vals)

def theta_breaches(sig, q=0.98, win=256):
    sig = pd.Series(sig, dtype=float)
    thr = sig.rolling(win, min_periods=win//2).quantile(q)
    return (sig > thr).astype(int).rolling(win, min_periods=win//2).mean().values

def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a) != len(b) or len(a) < 256: return np.nan
    f, Cxy = coherence(a, b, fs=fs, nperseg=min(256, len(a)))
    band = (f>0.02) & (f<0.25)
    if not np.any(band): return np.nan
    return float(np.nanmean(Cxy[band]))

def zscore_series(x, win=512):
    x = pd.Series(x, dtype=float)
    return ((x - x.rolling(win, min_periods=64).mean())
            /(x.rolling(win, min_periods=64).std()+1e-9)).values

def normalize01(s):
    q10, q90 = np.nanquantile(s, 0.10), np.nanquantile(s, 0.90)
    return np.clip((s - q10) / (q90 - q10 + 1e-9), 0, 1)

def fuse_to_nexus(features: pd.DataFrame, target: pd.Series):
    F = features.copy()
    F = F.ffill().bfill().fillna(0.0)   # ← no deprecation
    for c in F.columns:
        F[c] = normalize01(F[c].values)
    raw = 0.35*F['S_d'] + 0.25*F['Echo'] + 0.2*F['Theta'] + 0.2*F['Gamma']
    # Stronger target: future realized volatility (proxy for instability)
    y = pd.Series(target, index=F.index).astype(float).values
    ir = IsotonicRegression(out_of_bounds='clip')
    nx = ir.fit_transform(raw.values, y)
    nx = pd.Series(nx, index=F.index, dtype=float)
    # rolling min-max to smooth scale
    nxn = (nx - nx.rolling(256, min_periods=64).min()) / (
           nx.rolling(256, min_periods=64).max() - nx.rolling(256, min_periods=64).min() + 1e-9)
    return nxn.clip(0,1)

# --- Synthetic criticality (with rising variance near criticality) ---
def synthetic_criticality(T=4000, seed=7):
    rng = np.random.default_rng(seed)
    t = np.linspace(0, 1, T)
    base = np.cumsum(rng.normal(0, 0.02, T))
    drift = 0.6*np.tanh(3*(t-0.65))                  # slow regime drift
    sigma = 0.12 + 0.35/(1+np.exp(-12*(t-0.72)))     # variance ramps up
    shock = (t > 0.86)*rng.normal(0, 2.0, T)         # terminal rupture
    x = base + drift + sigma*rng.normal(0,1,T) + shock
    return pd.Series(x)

def future_volatility(sig: pd.Series, horizon=24):
    r = sig.diff()
    vol = r.rolling(horizon).std().shift(-horizon//2)
    return vol.ffill().bfill()

def run_m0():
    sig = synthetic_criticality()
    df = pd.DataFrame(index=pd.RangeIndex(len(sig)))
    df['signal'] = sig.values
    df['S_d']   = rolling_entropy(df['signal'], win=256, bins=48)
    df['Echo']  = pd.Series(np.nan, index=df.index, dtype=float)
    for i in range(len(df)):
        if i<300: continue
        df.loc[i,'Echo'] = phase_echo(df['signal'].iloc[:i].values, max_lag=48)
    df['Theta'] = theta_breaches(zscore_series(df['signal']), q=0.985, win=256)
    df['Gamma'] = pd.Series(np.nan, index=df.index, dtype=float)
    delayed = df['signal'].shift(3).bfill().values   # ← no deprecation
    for i in range(300, len(df)):
        a = df['signal'].iloc[max(0,i-512):i].values
        b = delayed[max(0,i-512):i]
        df.loc[i,'Gamma'] = glyph_coupling(a, b, fs=1.0)

    # target for isotonic: future realized volatility
    df['y_future_vol'] = future_volatility(pd.Series(df['signal']), horizon=24)

    feats = df[['S_d','Echo','Theta','Gamma']]
    df['NEXUS'] = fuse_to_nexus(feats, df['y_future_vol'])

    # Data-driven thresholds
    nx_train = df['NEXUS'].iloc[300:-300].dropna()
    thr_watch = float(nx_train.quantile(0.75))
    thr_warn  = float(nx_train.quantile(0.90))
    df['alert'] = np.where(df['NEXUS']>=thr_warn, 'WARNING',
                    np.where(df['NEXUS']>=thr_watch, 'WATCH','OK'))

    paths = {
        "features_csv": save_df(df, "gdren_m0_features"),
        "alerts_csv": save_df(df[['NEXUS','alert']], "gdren_m0_nexus_alerts"),
        "meta_json": str(RUN / "meta.json")
    }
    (RUN / "meta.json").write_text(json.dumps({
        "run_id": RUN.name,
        "root": str(ROOT), "cnt_lab_dir": str(CNT),
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "thresholds": {"watch": thr_watch, "warning": thr_warn},
        "notes": "Milestone-0 synthetic + sturdy calibration (future volatility)."
    }, indent=2))
    print("Artifacts:", json.dumps(paths, indent=2))
    print("Thresholds → WATCH>=%.3f | WARNING>=%.3f" % (thr_watch, thr_warn))
    return df

if __name__ == "__main__":
    out = run_m0()
    print(out[['NEXUS','alert']].tail(12))


Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-210655Z\\gdren_m0_features.csv",
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-210655Z\\gdren_m0_nexus_alerts.csv",
  "meta_json": "E:\\CNT\\artifacts\\g_dren\\20251105-210655Z\\meta.json"
}
      NEXUS  alert
3988    0.0  WATCH
3989    0.0  WATCH
3990    0.0  WATCH
3991    0.0  WATCH
3992    0.0  WATCH
3993    0.0  WATCH
3994    0.0  WATCH
3995    0.0  WATCH
3996    0.0  WATCH
3997    0.0  WATCH
3998    0.0  WATCH
3999    0.0  WATCH


In [3]:
# === G-DREN v0.1.2: robust fusion (no isotonic), early-warning features on ===
import os, json, time
from pathlib import Path
import numpy as np, pandas as pd
from scipy.signal import coherence, welch
from scipy.stats import percentileofscore

# --- Paths ---
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
RUN = ROOT / time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
for p in [ROOT, RUN]: p.mkdir(parents=True, exist_ok=True)

def save_df(df, name):
    fp = RUN / f"{name}.csv"
    df.to_csv(fp, index=True)
    return str(fp)

# --- Utilities ---
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float)
    out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]
        h, _ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]
        out[i] = -np.sum(p*np.log(p))
    return out

def phase_echo(x, max_lag=60):
    x = pd.Series(x, dtype=float)
    x = (x - x.mean())/(x.std()+1e-9)
    vals = []
    for tau in range(1, max_lag+1):
        a = x[tau:].values
        b = x.shift(tau).dropna().values
        if len(a)!=len(b) or len(a)<16:
            vals.append(np.nan); continue
        vals.append(np.corrcoef(a, b)[0,1])
    return np.nanmax(vals)

def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a) != len(b) or len(a) < 256: return np.nan
    f, Cxy = coherence(a, b, fs=fs, nperseg=min(256, len(a)))
    band = (f>0.02) & (f<0.25)
    return float(np.nanmean(Cxy[band])) if np.any(band) else np.nan

def rolling_ar1(x, win=256):
    x = pd.Series(x, dtype=float)
    out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x.iloc[i-win+1:i+1].values
        sw = w.std()
        out[i] = 0.0 if sw<1e-9 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out

def rolling_var(x, win=256):
    return pd.Series(x, dtype=float).rolling(win, min_periods=win//2).var(ddof=1).values

def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values

def zscore_series(x, win=512):
    s = pd.Series(x, dtype=float)
    mu = s.rolling(win, min_periods=64).median()
    mad = (s-mu).abs().rolling(win, min_periods=64).median()
    return ((s - mu)/(1.4826*mad + 1e-9)).values

def rolling_percent_rank(s, win=512):
    s = pd.Series(s, dtype=float)
    def pr(w):
        v = w.iat[-1]
        return percentileofscore(w, v, kind='mean')/100.0
    return s.rolling(win, min_periods=win//2).apply(pr, raw=False).values

def fuse_to_nexus(features: pd.DataFrame):
    F = features.copy().ffill().bfill().fillna(0.0)
    # robust z per feature (median/MAD over a rolling window)
    for c in F.columns:
        mu  = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-mu).abs().rolling(512, min_periods=64).median()
        F[c] = (F[c]-mu)/(1.4826*mad + 1e-9)

    # Weighted early-warning score (critical slowing down + dispersion + echoes)
    score = (0.26*F['S_d'] + 0.18*F['Echo'] + 0.16*F['Theta'] +
             0.28*F['AR1'] + 0.12*F['Gamma'])

    # Turn score into a stable 0..1 index via rolling percentile rank, then smooth
    nx = pd.Series(rolling_percent_rank(score, win=512))
    nx = nx.ewm(span=64, min_periods=16).mean()
    return nx.clip(0,1)

# --- Synthetic criticality with variance ramp + late rupture ---
def synthetic_criticality(T=4000, seed=7):
    rng = np.random.default_rng(seed)
    t = np.linspace(0, 1, T)
    base = np.cumsum(rng.normal(0, 0.02, T))
    drift = 0.6*np.tanh(3*(t-0.65))
    sigma = 0.12 + 0.35/(1+np.exp(-12*(t-0.72)))
    shock = (t > 0.86)*rng.normal(0, 2.0, T)
    x = base + drift + sigma*rng.normal(0,1,T) + shock
    return pd.Series(x)

def run_m0():
    sig = synthetic_criticality()
    df = pd.DataFrame(index=pd.RangeIndex(len(sig)))
    df['signal'] = sig.values

    # CNT features
    df['S_d']   = rolling_entropy(df['signal'], win=256, bins=48)
    df['Echo']  = np.nan
    for i in range(len(df)):
        if i<300: continue
        df.loc[i,'Echo'] = phase_echo(df['signal'].iloc[:i].values, max_lag=48)
    df['AR1']   = rolling_ar1(df['signal'], win=256)
    df['VAR']   = rolling_var(df['signal'], win=256)
    df['Theta'] = theta_breaches(zscore_series(df['signal']), q=0.985, win=256)

    # Gamma via delayed coherence as a placeholder hetero-channel coupling
    df['Gamma'] = np.nan
    delayed = df['signal'].shift(3).bfill().values
    for i in range(300, len(df)):
        a = df['signal'].iloc[max(0,i-512):i].values
        b = delayed[max(0,i-512):i]
        df.loc[i,'Gamma'] = glyph_coupling(a, b, fs=1.0)

    # Fuse → NEXUS
    feats = df[['S_d','Echo','Theta','AR1','Gamma']]
    df['NEXUS'] = fuse_to_nexus(feats)

    # Thresholds from central training slice (no edges)
    nx_train = df['NEXUS'].iloc[300:-300].dropna()
    thr_watch = float(nx_train.quantile(0.75))
    thr_warn  = float(nx_train.quantile(0.90))
    df['alert'] = np.where(df['NEXUS']>=thr_warn, 'WARNING',
                    np.where(df['NEXUS']>=thr_watch, 'WATCH','OK'))

    paths = {
        "features_csv": save_df(df, "gdren_m0_features"),
        "alerts_csv": save_df(df[['NEXUS','alert']], "gdren_m0_nexus_alerts"),
        "meta_json": str(RUN / "meta.json")
    }
    (RUN / "meta.json").write_text(json.dumps({
        "run_id": RUN.name,
        "root": str(ROOT), "cnt_lab_dir": str(CNT),
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "thresholds": {"watch": thr_watch, "warning": thr_warn},
        "notes": "v0.1.2 — robust percentile fusion; AR1+VAR+S_d+Echo+Theta"
    }, indent=2))
    print("Artifacts:", json.dumps(paths, indent=2))
    print("Thresholds → WATCH>=%.3f | WARNING>=%.3f" % (thr_watch, thr_warn))
    print(df['NEXUS'].describe()[['min','mean','50%','max']])
    return df

if __name__ == "__main__":
    out = run_m0()
    print(out[['NEXUS','alert']].tail(12))


Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-211340Z\\gdren_m0_features.csv",
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-211340Z\\gdren_m0_nexus_alerts.csv",
  "meta_json": "E:\\CNT\\artifacts\\g_dren\\20251105-211340Z\\meta.json"
}
min     0.032656
mean    0.488264
50%     0.471149
max     0.961931
Name: NEXUS, dtype: float64
         NEXUS  alert
3988  0.862075  WATCH
3989  0.859618  WATCH
3990  0.856996  WATCH
3991  0.853494  WATCH
3992  0.849678  WATCH
3993  0.845620  WATCH
3994  0.841385  WATCH
3995  0.839024  WATCH
3996  0.837697  WATCH
3997  0.836471  WATCH
3998  0.835043  WATCH
3999  0.834079  WATCH


In [4]:
# === G-DREN v0.2: Real feeds + skill metrics (SPY + local temperature) ===
import os, json, time, math, sys, subprocess
from pathlib import Path
import numpy as np, pandas as pd

def need(pkg):
    try:
        __import__(pkg)
        return False
    except Exception:
        return True

def pip_install(pkgs):
    for p in pkgs:
        if need(p):
            print(f"[setup] installing {p} …")
            subprocess.run([sys.executable, "-m", "pip", "install", p], check=False)

pip_install(["yfinance","meteostat","scikit-learn"])

import yfinance as yf
from meteostat import Point, Daily
from scipy.signal import coherence
from sklearn.metrics import average_precision_score, precision_recall_curve

# --- Config ---
LAT, LON = 40.7128, -74.0060     # <— change this to your location if you like
START, END = "2005-01-01", None  # None → today
FREQ = "D"
LOOKAHEAD_D = 10                  # window within which an "event" must occur after an alert
EVENT_KSIGMA = 2.0               # event = |Δprice| > K * rolling σ (21d)

# --- Paths ---
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
RUN = ROOT / time.strftime("%Y%m%d-%H%M%SZ", time.gmtime()) + "_v02"
RUN.mkdir(parents=True, exist_ok=True)

def save_df(df, name):
    fp = RUN / f"{name}.csv"
    df.to_csv(fp, index=True)
    return str(fp)

def save_json(obj, name):
    fp = RUN / f"{name}.json"
    fp.write_text(json.dumps(obj, indent=2))
    return str(fp)

# --- Util blocks (same spirit as v0.1.2) ---
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]
        h, _ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p))
    return out

def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = w.std()
        out[i] = 0.0 if sw<1e-9 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out

def rolling_var(x, win=256):
    return pd.Series(x, dtype=float).rolling(win, min_periods=win//2).var(ddof=1).values

def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    mu  = s.rolling(win, min_periods=64).median()
    mad = (s-mu).abs().rolling(win, min_periods=64).median()
    return ((s - mu)/(1.4826*mad + 1e-9)).values

def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values

def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a) != len(b) or len(a) < 256: return np.nan
    f, Cxy = coherence(a, b, fs=fs, nperseg=min(256, len(a)))
    band = (f>0.02) & (f<0.25)
    return float(np.nanmean(Cxy[band])) if np.any(band) else np.nan

def rolling_percent_rank(series, win=512):
    s = pd.Series(series, dtype=float)
    def pr(w):
        v = w.iat[-1]
        return (np.searchsorted(np.sort(w), v, side="right")/len(w))
    return s.rolling(win, min_periods=win//2).apply(pr, raw=False).values

def fuse_to_nexus(F: pd.DataFrame):
    F = F.copy().ffill().bfill().fillna(0.0)
    # robust per-feature normalize (median/MAD)
    for c in F.columns:
        mu  = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-mu).abs().rolling(512, min_periods=64).median()
        F[c] = (F[c]-mu)/(1.4826*mad + 1e-9)
    score = (0.24*F['S_d_mkt'] + 0.20*F['AR1_mkt'] + 0.10*F['Theta_mkt'] +
             0.18*F['S_d_tmp'] + 0.14*F['AR1_tmp'] + 0.06*F['Theta_tmp'] +
             0.08*F['Gamma'])
    nx = pd.Series(rolling_percent_rank(score, win=512), index=F.index)
    return nx.ewm(span=64, min_periods=16).mean().clip(0,1)

# --- Data: SPY & local temperature anomaly ---
def get_market():
    try:
        s = yf.download("SPY", start=START, end=END, interval=FREQ, progress=False)
        px = s['Adj Close'].rename("px").dropna()
        return px
    except Exception as e:
        print("[warn] market download failed:", e)
        return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        loc = Point(lat, lon)
        df = Daily(loc, START, END).fetch().rename(columns={"tavg":"t"})
        # anomaly vs 31d rolling median
        t = df['t'].astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return anom
    except Exception as e:
        print("[warn] temperature fetch failed:", e)
        return pd.Series(dtype=float)

px = get_market()
ta = get_temp_anomaly(LAT, LON)

# align to common daily index
idx = px.index if len(px)>0 else ta.index
if len(px)>0 and len(ta)>0:
    idx = px.index.union(ta.index)
df = pd.DataFrame(index=pd.DatetimeIndex(idx)).sort_index()
if len(px)>0: df['px'] = px.reindex(df.index).ffill()
if len(ta)>0: df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()

# basic returns for events
if 'px' in df:
    df['ret'] = df['px'].pct_change()

# --- Features per stream ---
# market
if 'px' in df:
    sig_m = df['ret'].fillna(0.0).values
    z_m   = zscore_mad(sig_m, win=512)
    df['S_d_mkt']   = rolling_entropy(sig_m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(sig_m, win=256)
    df['Theta_mkt'] = theta_breaches(z_m, q=0.985, win=256)
else:
    for c in ['S_d_mkt','AR1_mkt','Theta_mkt']:
        df[c] = np.nan

# temperature
if 'temp_anom' in df:
    sig_t = df['temp_anom'].fillna(0.0).values
    z_t   = zscore_mad(sig_t, win=512)
    df['S_d_tmp']   = rolling_entropy(sig_t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(sig_t, win=256)
    df['Theta_tmp'] = theta_breaches(z_t, q=0.985, win=256)
else:
    for c in ['S_d_tmp','AR1_tmp','Theta_tmp']:
        df[c] = np.nan

# hetero-coupling (coherence of mkt returns vs temp anomaly)
df['Gamma'] = np.nan
if 'ret' in df and 'temp_anom' in df:
    for i in range(400, len(df)):
        a = df['ret'].iloc[i-400:i].values
        b = df['temp_anom'].iloc[i-400:i].values
        df.iloc[i, df.columns.get_loc('Gamma')] = glyph_coupling(a, b, fs=1.0)

# --- Fuse → NEXUS + thresholds ---
feats = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_tmp','AR1_tmp','Theta_tmp','Gamma']
df['NEXUS'] = fuse_to_nexus(df[feats])

core = df['NEXUS'].iloc[365:-365].dropna()
thr_watch = float(core.quantile(0.75)) if len(core) else 0.75
thr_warn  = float(core.quantile(0.90)) if len(core) else 0.90
df['alert'] = np.where(df['NEXUS']>=thr_warn, 'WARNING',
                np.where(df['NEXUS']>=thr_watch, 'WATCH','OK'))

# --- Event labeling (market) & skill metrics ---
metrics = {}
if 'ret' in df:
    # Event definition: big absolute move relative to recent volatility
    vol = df['ret'].rolling(21).std()
    events = (df['ret'].abs() > EVENT_KSIGMA*vol).astype(int)
    events = events.shift(-1).fillna(0).astype(int)  # event realized next day
    y = events.reindex(df.index).fillna(0).astype(int)

    # Binary scores from NEXUS (direct), PR-AUC
    nx = df['NEXUS'].fillna(0.0)
    if y.sum() > 0:
        auc_pr = float(average_precision_score(y.values, nx.values))
    else:
        auc_pr = float('nan')

    # Lead-time & false alarms from thresholded WARNING
    warn = (df['alert']=="WARNING").astype(int)
    # identify warning starts
    starts = (warn.diff().fillna(0) == 1)
    event_ix = np.where(y.values==1)[0]
    start_ix = np.where(starts.values)[0]

    lead_days = []
    hits = 0
    for s in start_ix:
        # did an event occur within LOOKAHEAD_D?
        window = range(s, min(s+LOOKAHEAD_D+1, len(df)))
        if any(y.values[w]==1 for w in window):
            hits += 1
            # lead time = distance from start to next event
            next_event = next((w for w in window if y.values[w]==1), None)
            if next_event is not None:
                lead_days.append(int(next_event - s))

    # false alarms = warnings with no event within horizon; per year
    false_alarms = int(len(start_ix) - hits)
    years = max(1, (df.index[-1]-df.index[0]).days/365.25)
    fa_per_year = float(false_alarms / years)

    metrics = {
        "samples": int(len(df)),
        "years": years,
        "events_total": int(y.sum()),
        "auc_pr": auc_pr,
        "watch_threshold": thr_watch,
        "warning_threshold": thr_warn,
        "warnings": int(warn.sum()),
        "warning_starts": int(len(start_ix)),
        "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits),
        "false_alarms": int(false_alarms),
        "false_alarms_per_year": fa_per_year,
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],  # truncate for meta
    }

# --- Persist artifacts ---
paths = {
    "features_csv": save_df(df, "gdren_v02_features"),
    "alerts_csv": save_df(df[['NEXUS','alert']], "gdren_v02_alerts"),
    "metrics_json": save_json(metrics, "gdren_v02_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {
            "lat": LAT, "lon": LON, "start": START, "end": END,
            "lookahead_days": LOOKAHEAD_D, "event_ksigma": EVENT_KSIGMA
        },
        "thresholds": {"watch": thr_watch, "warning": thr_warn},
        "notes": "Real-data v0.2 (SPY + local temperature anomaly)."
    }, "meta")
}

print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.2 SKILL (market events) ===")
    for k,v in metrics.items(): print(f"{k:>24}: {v}")
else:
    print("\n[info] No market series available; metrics skipped.")


[setup] installing yfinance …
[setup] installing meteostat …
[setup] installing scikit-learn …


TypeError: unsupported operand type(s) for +: 'WindowsPath' and 'str'

In [5]:
# === G-DREN v0.2 (single cell): SPY + local temperature, thresholds + skill metrics ===
import os, json, time, sys, subprocess, math
from pathlib import Path
import numpy as np, pandas as pd

# ----- lightweight bootstrap for needed libs -----
def need(pkg):
    try:
        __import__(pkg)
        return False
    except Exception:
        return True

def pip_install(pkgs):
    for p in pkgs:
        if need(p):
            print(f"[setup] installing {p} …")
            subprocess.run([sys.executable, "-m", "pip", "install", p], check=False)

pip_install(["yfinance","meteostat","scikit-learn","scipy"])

import yfinance as yf
from meteostat import Point, Daily
from scipy.signal import coherence
from sklearn.metrics import average_precision_score

# ----- config -----
LAT, LON = 40.7128, -74.0060            # set your city if you wish
START, END = "2005-01-01", None          # None → today
LOOKAHEAD_D = 10                         # days after WARNING to count a hit
EVENT_KSIGMA = 2.0                       # event = |Δ| > K * rolling σ

# ----- paths -----
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v02"
RUN.mkdir(parents=True, exist_ok=True)

def save_df(df, name):
    fp = RUN / f"{name}.csv"
    df.to_csv(fp, index=True)
    return str(fp)

def save_json(obj, name):
    fp = RUN / f"{name}.json"
    fp.write_text(json.dumps(obj, indent=2))
    return str(fp)

# ----- CNT feature utils -----
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]
        h, _ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p))
    return out

def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = w.std()
        out[i] = 0.0 if sw<1e-9 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out

def rolling_var(x, win=256):
    return pd.Series(x, dtype=float).rolling(win, min_periods=win//2).var(ddof=1).values

def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values

def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values

def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a) != len(b) or len(a) < 256: return np.nan
    f, Cxy = coherence(a, b, fs=fs, nperseg=min(256, len(a)))
    band = (f>0.02) & (f<0.25)
    return float(np.nanmean(Cxy[band])) if np.any(band) else np.nan

def rolling_percent_rank(series, win=512):
    s = pd.Series(series, dtype=float)
    def pr(w):
        v = w.iat[-1]
        return (np.searchsorted(np.sort(w), v, side="right")/len(w))
    return s.rolling(win, min_periods=win//2).apply(pr, raw=False).values

def fuse_to_nexus(F: pd.DataFrame):
    F = F.copy().ffill().bfill().fillna(0.0)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        F[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    score = (0.24*F.get('S_d_mkt',0) + 0.20*F.get('AR1_mkt',0) + 0.10*F.get('Theta_mkt',0) +
             0.18*F.get('S_d_tmp',0) + 0.14*F.get('AR1_tmp',0) + 0.06*F.get('Theta_tmp',0) +
             0.08*F.get('Gamma',0))
    nx = pd.Series(rolling_percent_rank(score, win=512), index=F.index)
    return nx.ewm(span=64, min_periods=16).mean().clip(0,1)

# ----- data loaders -----
def get_market():
    try:
        s = yf.download("SPY", start=START, end=None, interval="1d", progress=False, auto_adjust=True)
        px = s['Close'].rename("px").dropna()
        return px
    except Exception as e:
        print("[warn] market download failed:", e)
        return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon)
        d = Daily(loc, start_dt, end_dt).fetch()
        if 'tavg' in d.columns and d['tavg'].notna().any():
            t = d['tavg'].astype(float)
        else:
            t = ((d.get('tmin') + d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return anom
    except Exception as e:
        print("[warn] temperature fetch failed:", e)
        return pd.Series(dtype=float)

px = get_market()
ta = get_temp_anomaly(LAT, LON)

# ----- align & build frame -----
idx = pd.DatetimeIndex([])
if len(px)>0: idx = px.index
if len(ta)>0: idx = ta.index if idx.empty else idx.union(ta.index)
df = pd.DataFrame(index=idx).sort_index()
if len(px)>0: df['px'] = px.reindex(df.index).ffill()
if len(ta)>0: df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()

if 'px' in df.columns:
    df['ret'] = df['px'].pct_change()

# ----- features (market) -----
if 'px' in df.columns:
    sig_m = df['ret'].fillna(0.0).values
    z_m   = zscore_mad(sig_m, win=512)
    df['S_d_mkt']   = rolling_entropy(sig_m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(sig_m, win=256)
    df['Theta_mkt'] = theta_breaches(z_m, q=0.985, win=256)
else:
    df['S_d_mkt'] = df['AR1_mkt'] = df['Theta_mkt'] = np.nan

# ----- features (temperature) -----
if 'temp_anom' in df.columns:
    sig_t = df['temp_anom'].fillna(0.0).values
    z_t   = zscore_mad(sig_t, win=512)
    df['S_d_tmp']   = rolling_entropy(sig_t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(sig_t, win=256)
    df['Theta_tmp'] = theta_breaches(z_t, q=0.985, win=256)
else:
    df['S_d_tmp'] = df['AR1_tmp'] = df['Theta_tmp'] = np.nan

# ----- hetero-coupling -----
df['Gamma'] = np.nan
if 'ret' in df.columns and 'temp_anom' in df.columns:
    for i in range(400, len(df)):
        a = df['ret'].iloc[i-400:i].values
        b = df['temp_anom'].iloc[i-400:i].values
        df.iloc[i, df.columns.get_loc('Gamma')] = glyph_coupling(a, b, fs=1.0)

# ----- fuse → NEXUS + thresholds -----
feats = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_tmp','AR1_tmp','Theta_tmp','Gamma']
df['NEXUS'] = fuse_to_nexus(df[feats])
core = df['NEXUS'].iloc[365:-365].dropna()
thr_watch = float(core.quantile(0.75)) if len(core) else 0.75
thr_warn  = float(core.quantile(0.90)) if len(core) else 0.90
df['alert'] = np.where(df['NEXUS']>=thr_warn, 'WARNING',
                np.where(df['NEXUS']>=thr_watch, 'WATCH','OK'))

# ----- label events & compute skill (market only) -----
metrics = {}
if 'ret' in df.columns:
    vol = df['ret'].rolling(21).std()
    events = (df['ret'].abs() > EVENT_KSIGMA*vol).astype(int).shift(-1).fillna(0).astype(int)
    y = events.reindex(df.index).fillna(0).astype(int)
    nx = df['NEXUS'].fillna(0.0)
    auc_pr = float(average_precision_score(y.values, nx.values)) if y.sum() > 0 else float('nan')
    warn_mask = (df['alert']=="WARNING").astype(int)
    starts = (warn_mask.diff().fillna(0) == 1)
    event_ix = np.where(y.values==1)[0]
    start_ix = np.where(starts.values)[0]
    hits, lead_days = 0, []
    for s in start_ix:
        window = range(s, min(s+LOOKAHEAD_D+1, len(df)))
        if any(y.values[w]==1 for w in window):
            hits += 1
            ne = next((w for w in window if y.values[w]==1), None)
            if ne is not None: lead_days.append(int(ne - s))
    false_alarms = int(len(start_ix) - hits)
    years = max(1e-9, (df.index[-1]-df.index[0]).days/365.25) if len(df) else 1.0
    metrics = {
        "samples": int(len(df)),
        "years": float(years),
        "events_total": int(y.sum()),
        "auc_pr": auc_pr,
        "watch_threshold": thr_watch,
        "warning_threshold": thr_warn,
        "warning_days": int(warn_mask.sum()),
        "warning_starts": int(len(start_ix)),
        "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits),
        "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(false_alarms / years),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
    }

# ----- persist -----
paths = {
    "features_csv": save_df(df, "gdren_v02_features"),
    "alerts_csv": save_df(df[['NEXUS','alert']], "gdren_v02_alerts"),
    "metrics_json": save_json(metrics, "gdren_v02_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {"lat": LAT, "lon": LON, "start": START, "end": END,
                "lookahead_days": LOOKAHEAD_D, "event_ksigma": EVENT_KSIGMA},
        "thresholds": {"watch": thr_watch, "warning": thr_warn},
        "notes": "Real-data v0.2 (SPY + local temperature anomaly)."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.2 SKILL (market events) ===")
    for k,v in metrics.items(): print(f"{k:>24}: {v}")
else:
    print("\n[info] Metrics skipped (no market series).")


[setup] installing scikit-learn …
[warn] market download failed: 'str' object is not callable




Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-213542Z_v02\\gdren_v02_features.csv",
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-213542Z_v02\\gdren_v02_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-213542Z_v02\\gdren_v02_metrics.json",
  "meta_json": "E:\\CNT\\artifacts\\g_dren\\20251105-213542Z_v02\\meta.json"
}

[info] Metrics skipped (no market series).


In [6]:
# === G-DREN v0.2a (single cell) — robust SPY fetch + temp fallback + thresholds + skill ===
import os, json, time, sys, subprocess
from pathlib import Path
import numpy as np, pandas as pd

# ----- lightweight bootstrap -----
def need(pkg):
    try: __import__(pkg); return False
    except Exception: return True

def pip_install(pkgs):
    for p in pkgs:
        if need(p):
            print(f"[setup] installing {p} …")
            subprocess.run([sys.executable, "-m", "pip", "install", p], check=False)

pip_install(["yfinance","meteostat","scikit-learn","scipy"])

import yfinance as yf
from meteostat import Point, Daily
from scipy.signal import coherence
from sklearn.metrics import average_precision_score

# ----- config -----
LAT, LON = 40.7128, -74.0060          # set your lat/lon if you want
START, END = "2005-01-01", None       # None → today
LOOKAHEAD_D = 10                      # days after WARNING counted as a hit
EVENT_KSIGMA = 2.0                    # event = |Δ| > K * rolling σ

# ----- paths -----
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v02a"
RUN.mkdir(parents=True, exist_ok=True)

def save_df(df, name):
    fp = RUN / f"{name}.csv"
    df.to_csv(fp, index=True)
    return str(fp)

def save_json(obj, name):
    fp = RUN / f"{name}.json"
    fp.write_text(json.dumps(obj, indent=2))
    return str(fp)

# ----- CNT feature utils -----
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]
        h, _ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p))
    return out

def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = w.std()
        out[i] = 0.0 if sw<1e-9 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out

def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values

def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values

def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a) != len(b) or len(a) < 256: return np.nan
    f, Cxy = coherence(a, b, fs=fs, nperseg=min(256, len(a)))
    band = (f>0.02) & (f<0.25)
    return float(np.nanmean(Cxy[band])) if np.any(band) else np.nan

def rolling_percent_rank(series, win=512):
    s = pd.Series(series, dtype=float)
    def pr(w):
        v = w.iat[-1]
        return (np.searchsorted(np.sort(w), v, side="right")/len(w))
    return s.rolling(win, min_periods=win//2).apply(pr, raw=False).values

def fuse_to_nexus(F: pd.DataFrame):
    F = F.copy().ffill().bfill().fillna(0.0)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        F[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    score = (0.24*F.get('S_d_mkt',0) + 0.20*F.get('AR1_mkt',0) + 0.10*F.get('Theta_mkt',0) +
             0.18*F.get('S_d_tmp',0) + 0.14*F.get('AR1_tmp',0) + 0.06*F.get('Theta_tmp',0) +
             0.08*F.get('Gamma',0))
    nx = pd.Series(rolling_percent_rank(score, win=512), index=F.index)
    return nx.ewm(span=64, min_periods=16).mean().clip(0,1)

# ----- data loaders (robust) -----
def get_market():
    # Try 1: Ticker.history (most resilient)
    try:
        h = yf.Ticker("SPY").history(period="max", auto_adjust=True)
        if isinstance(h, pd.DataFrame) and 'Close' in h.columns and len(h)>0:
            px = h['Close'].rename("px").dropna()
            if len(px): return px
    except Exception as e:
        print("[warn] yfinance Ticker.history failed:", e)
    # Try 2: download(period="max")
    try:
        s = yf.download("SPY", period="max", interval="1d", auto_adjust=True, progress=False)
        if isinstance(s, pd.DataFrame) and 'Close' in s.columns and len(s)>0:
            px = s['Close'].rename("px").dropna()
            if len(px): return px
    except Exception as e:
        print("[warn] yfinance download failed:", e)
    # Try 3: Stooq fallback (no API key, plain CSV)
    try:
        url = "https://stooq.com/q/d/l/?s=spy&i=d"
        stq = pd.read_csv(url)
        stq.columns = [c.lower() for c in stq.columns]
        stq['date'] = pd.to_datetime(stq['date'])
        stq = stq.set_index('date').sort_index()
        px = stq['close'].rename('px').astype(float)
        print("[info] Using Stooq fallback for SPY.")
        return px
    except Exception as e:
        print("[error] stooq fallback failed:", e)
        return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon)
        d = Daily(loc, start_dt, end_dt).fetch()
        if 'tavg' in d.columns and d['tavg'].notna().any():
            t = d['tavg'].astype(float)
        else:
            t = ((d.get('tmin') + d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return anom
    except Exception as e:
        print("[warn] temperature fetch failed:", e)
        return pd.Series(dtype=float)

px = get_market()
ta = get_temp_anomaly(LAT, LON)

# ----- align & frame -----
idx = pd.DatetimeIndex([])
if len(px)>0: idx = px.index
if len(ta)>0: idx = ta.index if idx.empty else idx.union(ta.index)
df = pd.DataFrame(index=idx).sort_index()
if len(px)>0: df['px'] = px.reindex(df.index).ffill()
if len(ta)>0: df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()
if 'px' in df.columns: df['ret'] = df['px'].pct_change()

# ----- features -----
if 'px' in df.columns:
    sig_m = df['ret'].fillna(0.0).values
    z_m   = zscore_mad(sig_m, win=512)
    df['S_d_mkt']   = rolling_entropy(sig_m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(sig_m, win=256)
    df['Theta_mkt'] = theta_breaches(z_m, q=0.985, win=256)
else:
    df['S_d_mkt'] = df['AR1_mkt'] = df['Theta_mkt'] = np.nan

if 'temp_anom' in df.columns:
    sig_t = df['temp_anom'].fillna(0.0).values
    z_t   = zscore_mad(sig_t, win=512)
    df['S_d_tmp']   = rolling_entropy(sig_t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(sig_t, win=256)
    df['Theta_tmp'] = theta_breaches(z_t, q=0.985, win=256)
else:
    df['S_d_tmp'] = df['AR1_tmp'] = df['Theta_tmp'] = np.nan

# hetero-coupling
df['Gamma'] = np.nan
if 'ret' in df.columns and 'temp_anom' in df.columns:
    for i in range(400, len(df)):
        a = df['ret'].iloc[i-400:i].values
        b = df['temp_anom'].iloc[i-400:i].values
        df.iloc[i, df.columns.get_loc('Gamma')] = glyph_coupling(a, b, fs=1.0)

# ----- fuse → NEXUS + thresholds -----
feats = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_tmp','AR1_tmp','Theta_tmp','Gamma']
df['NEXUS'] = fuse_to_nexus(df[feats])
core = df['NEXUS'].iloc[365:-365].dropna()
thr_watch = float(core.quantile(0.75)) if len(core) else 0.75
thr_warn  = float(core.quantile(0.90)) if len(core) else 0.90
df['alert'] = np.where(df['NEXUS']>=thr_warn, 'WARNING',
                np.where(df['NEXUS']>=thr_watch, 'WATCH','OK'))

# ----- events & skill (market only) -----
metrics = {}
if 'ret' in df.columns and df['ret'].notna().any():
    vol = df['ret'].rolling(21).std()
    events = (df['ret'].abs() > EVENT_KSIGMA*vol).astype(int).shift(-1).fillna(0).astype(int)
    y = events.reindex(df.index).fillna(0).astype(int)
    nx = df['NEXUS'].fillna(0.0)
    auc_pr = float(average_precision_score(y.values, nx.values)) if y.sum() > 0 else float('nan')
    warn_mask = (df['alert']=="WARNING").astype(int)
    starts = (warn_mask.diff().fillna(0) == 1)
    start_ix = np.where(starts.values)[0]
    hits, lead_days = 0, []
    for s in start_ix:
        window = range(s, min(s+LOOKAHEAD_D+1, len(df)))
        if any(y.values[w]==1 for w in window):
            hits += 1
            ne = next((w for w in window if y.values[w]==1), None)
            if ne is not None: lead_days.append(int(ne - s))
    false_alarms = int(len(start_ix) - hits)
    years = max(1e-9, (df.index[-1]-df.index[0]).days/365.25) if len(df) else 1.0
    metrics = {
        "samples": int(len(df)),
        "years": float(years),
        "events_total": int(y.sum()),
        "auc_pr": auc_pr,
        "watch_threshold": thr_watch,
        "warning_threshold": thr_warn,
        "warning_days": int(warn_mask.sum()),
        "warning_starts": int(len(start_ix)),
        "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits),
        "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(false_alarms / years),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
    }

# ----- persist -----
paths = {
    "features_csv": save_df(df, "gdren_v02a_features"),
    "alerts_csv": save_df(df[['NEXUS','alert']], "gdren_v02a_alerts"),
    "metrics_json": save_json(metrics, "gdren_v02a_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {"lat": LAT, "lon": LON, "start": START, "end": END,
                "lookahead_days": LOOKAHEAD_D, "event_ksigma": EVENT_KSIGMA},
        "thresholds": {"watch": thr_watch, "warning": thr_warn},
        "notes": "Real-data v0.2a (robust SPY fetch + Meteostat fallback)."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.2a SKILL (market events) ===")
    for k,v in metrics.items(): print(f"{k:>24}: {v}")
else:
    print("\n[info] Metrics skipped (no market series).")


[setup] installing scikit-learn …


TypeError: Cannot compare tz-naive and tz-aware timestamps

In [7]:
# === G-DREN v0.2b (single cell) — tz-safe index + robust SPY fetch + temp fallback + skill ===
import os, json, time, sys, subprocess
from pathlib import Path
import numpy as np, pandas as pd

# ----- lightweight bootstrap -----
def need(pkg):
    try: __import__(pkg); return False
    except Exception: return True

def pip_install(pkgs):
    for p in pkgs:
        if need(p):
            print(f"[setup] installing {p} …")
            subprocess.run([sys.executable, "-m", "pip", "install", p], check=False)

pip_install(["yfinance","meteostat","scikit-learn","scipy"])

import yfinance as yf
from meteostat import Point, Daily
from scipy.signal import coherence
from sklearn.metrics import average_precision_score

# ----- config -----
LAT, LON = 40.7128, -74.0060          # change if you like
START, END = "2005-01-01", None       # None → today
LOOKAHEAD_D = 10                      # days after WARNING counted as a hit
EVENT_KSIGMA = 2.0                    # event = |Δ| > K * rolling σ

# ----- paths -----
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v02b"
RUN.mkdir(parents=True, exist_ok=True)

def save_df(df, name):
    fp = RUN / f"{name}.csv"
    df.to_csv(fp, index=True)
    return str(fp)

def save_json(obj, name):
    fp = RUN / f"{name}.json"
    fp.write_text(json.dumps(obj, indent=2))
    return str(fp)

# ----- helpers -----
def normalize_daily_index(idx):
    """Return tz-naive, normalized-to-midnight daily DatetimeIndex."""
    idx = pd.DatetimeIndex(idx)
    if idx.tz is not None:
        idx = idx.tz_convert("UTC").tz_localize(None)
    return idx.normalize()

def normalize_series_daily(s):
    """Apply normalize_daily_index to a Series index; return Series."""
    if s is None or len(s) == 0: return pd.Series(dtype=float)
    s = pd.Series(s.copy())
    s.index = normalize_daily_index(s.index)
    # drop duplicates after normalization by keeping last
    s = s[~s.index.duplicated(keep="last")]
    return s.sort_index()

# ----- CNT feature utils -----
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]
        h, _ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p))
    return out

def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = w.std()
        out[i] = 0.0 if sw<1e-9 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out

def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values

def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values

def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a) != len(b) or len(a) < 256: return np.nan
    f, Cxy = coherence(a, b, fs=fs, nperseg=min(256, len(a)))
    band = (f>0.02) & (f<0.25)
    return float(np.nanmean(Cxy[band])) if np.any(band) else np.nan

def rolling_percent_rank(series, win=512):
    s = pd.Series(series, dtype=float)
    def pr(w):
        v = w.iat[-1]
        return (np.searchsorted(np.sort(w), v, side="right")/len(w))
    return s.rolling(win, min_periods=win//2).apply(pr, raw=False).values

def fuse_to_nexus(F: pd.DataFrame):
    F = F.copy().ffill().bfill().fillna(0.0)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        F[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    score = (0.24*F.get('S_d_mkt',0) + 0.20*F.get('AR1_mkt',0) + 0.10*F.get('Theta_mkt',0) +
             0.18*F.get('S_d_tmp',0) + 0.14*F.get('AR1_tmp',0) + 0.06*F.get('Theta_tmp',0) +
             0.08*F.get('Gamma',0))
    nx = pd.Series(rolling_percent_rank(score, win=512), index=F.index)
    return nx.ewm(span=64, min_periods=16).mean().clip(0,1)

# ----- data loaders (robust) -----
def get_market():
    # Try 1: Ticker.history
    try:
        h = yf.Ticker("SPY").history(period="max", auto_adjust=True)
        if isinstance(h, pd.DataFrame) and 'Close' in h.columns and len(h)>0:
            px = h['Close'].rename("px").dropna()
            return normalize_series_daily(px)
    except Exception as e:
        print("[warn] yfinance Ticker.history failed:", e)
    # Try 2: download(period="max")
    try:
        s = yf.download("SPY", period="max", interval="1d", auto_adjust=True, progress=False)
        if isinstance(s, pd.DataFrame) and 'Close' in s.columns and len(s)>0:
            px = s['Close'].rename("px").dropna()
            return normalize_series_daily(px)
    except Exception as e:
        print("[warn] yfinance download failed:", e)
    # Try 3: Stooq fallback
    try:
        url = "https://stooq.com/q/d/l/?s=spy&i=d"
        stq = pd.read_csv(url)
        stq.columns = [c.lower() for c in stq.columns]
        stq['date'] = pd.to_datetime(stq['date'])
        stq = stq.set_index('date').sort_index()
        px = stq['close'].rename('px').astype(float)
        print("[info] Using Stooq fallback for SPY.")
        return normalize_series_daily(px)
    except Exception as e:
        print("[error] stooq fallback failed:", e)
        return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon)
        d = Daily(loc, start_dt, end_dt).fetch()
        if 'tavg' in d.columns and d['tavg'].notna().any():
            t = d['tavg'].astype(float)
        else:
            t = ((d.get('tmin') + d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return normalize_series_daily(anom)
    except Exception as e:
        print("[warn] temperature fetch failed:", e)
        return pd.Series(dtype=float)

px = get_market()
ta = get_temp_anomaly(LAT, LON)

# ----- align & frame (all indices now tz-naive daily) -----
idx = pd.DatetimeIndex([])
if len(px)>0: idx = px.index
if len(ta)>0: idx = ta.index if idx.empty else idx.union(ta.index)
df = pd.DataFrame(index=idx).sort_index()
if len(px)>0: df['px'] = px.reindex(df.index).ffill()
if len(ta)>0: df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()

if 'px' in df.columns:
    df['ret'] = df['px'].pct_change()

# ----- features -----
if 'px' in df.columns:
    sig_m = df['ret'].fillna(0.0).values
    z_m   = zscore_mad(sig_m, win=512)
    df['S_d_mkt']   = rolling_entropy(sig_m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(sig_m, win=256)
    df['Theta_mkt'] = theta_breaches(z_m, q=0.985, win=256)
else:
    df['S_d_mkt'] = df['AR1_mkt'] = df['Theta_mkt'] = np.nan

if 'temp_anom' in df.columns:
    sig_t = df['temp_anom'].fillna(0.0).values
    z_t   = zscore_mad(sig_t, win=512)
    df['S_d_tmp']   = rolling_entropy(sig_t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(sig_t, win=256)
    df['Theta_tmp'] = theta_breaches(z_t, q=0.985, win=256)
else:
    df['S_d_tmp'] = df['AR1_tmp'] = df['Theta_tmp'] = np.nan

# hetero-coupling
df['Gamma'] = np.nan
if 'ret' in df.columns and 'temp_anom' in df.columns:
    for i in range(400, len(df)):
        a = df['ret'].iloc[i-400:i].values
        b = df['temp_anom'].iloc[i-400:i].values
        df.iloc[i, df.columns.get_loc('Gamma')] = glyph_coupling(a, b, fs=1.0)

# ----- fuse → NEXUS + thresholds -----
feats = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_tmp','AR1_tmp','Theta_tmp','Gamma']
df['NEXUS'] = fuse_to_nexus(df[feats])
core = df['NEXUS'].iloc[365:-365].dropna()
thr_watch = float(core.quantile(0.75)) if len(core) else 0.75
thr_warn  = float(core.quantile(0.90)) if len(core) else 0.90
df['alert'] = np.where(df['NEXUS']>=thr_warn, 'WARNING',
                np.where(df['NEXUS']>=thr_watch, 'WATCH','OK'))

# ----- events & skill (market only) -----
metrics = {}
if 'ret' in df.columns and df['ret'].notna().any():
    vol = df['ret'].rolling(21).std()
    events = (df['ret'].abs() > EVENT_KSIGMA*vol).astype(int).shift(-1).fillna(0).astype(int)
    y = events.reindex(df.index).fillna(0).astype(int)
    nx = df['NEXUS'].fillna(0.0)
    auc_pr = float(average_precision_score(y.values, nx.values)) if y.sum() > 0 else float('nan')
    warn_mask = (df['alert']=="WARNING").astype(int)
    starts = (warn_mask.diff().fillna(0) == 1)
    start_ix = np.where(starts.values)[0]
    hits, lead_days = 0, []
    for s in start_ix:
        window = range(s, min(s+LOOKAHEAD_D+1, len(df)))
        if any(y.values[w]==1 for w in window):
            hits += 1
            ne = next((w for w in window if y.values[w]==1), None)
            if ne is not None: lead_days.append(int(ne - s))
    false_alarms = int(len(start_ix) - hits)
    years = max(1e-9, (df.index[-1]-df.index[0]).days/365.25) if len(df) else 1.0
    metrics = {
        "samples": int(len(df)),
        "years": float(years),
        "events_total": int(y.sum()),
        "auc_pr": auc_pr,
        "watch_threshold": thr_watch,
        "warning_threshold": thr_warn,
        "warning_days": int(warn_mask.sum()),
        "warning_starts": int(len(start_ix)),
        "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits),
        "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(false_alarms / years),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
    }

# ----- persist -----
paths = {
    "features_csv": save_df(df, "gdren_v02b_features"),
    "alerts_csv": save_df(df[['NEXUS','alert']], "gdren_v02b_alerts"),
    "metrics_json": save_json(metrics, "gdren_v02b_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {"lat": LAT, "lon": LON, "start": START, "end": END,
                "lookahead_days": LOOKAHEAD_D, "event_ksigma": EVENT_KSIGMA},
        "thresholds": {"watch": thr_watch, "warning": thr_warn},
        "notes": "v0.2b — timezone-normalized daily index; robust SPY + Meteostat."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.2b SKILL (market events) ===")
    for k,v in metrics.items(): print(f"{k:>24}: {v}")
else:
    print("\n[info] Metrics skipped (no market series).")


[setup] installing scikit-learn …




Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-214614Z_v02b\\gdren_v02b_features.csv",
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-214614Z_v02b\\gdren_v02b_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-214614Z_v02b\\gdren_v02b_metrics.json",
  "meta_json": "E:\\CNT\\artifacts\\g_dren\\20251105-214614Z_v02b\\meta.json"
}

=== v0.2b SKILL (market events) ===
                 samples: 10615
                   years: 32.766598220396986
            events_total: 712
                  auc_pr: 0.0663486089959487
         watch_threshold: 0.7535573991041762
        hits_within_days: 10
                    hits: 11
            false_alarms: 3
   false_alarms_per_year: 0.09155665106951873
   lead_time_days_median: 7.0
     lead_time_days_mean: 5.909090909090909
      lead_time_days_all: [3, 5, 6, 9, 1, 8, 10, 8, 0, 7, 8]


In [8]:
# === G-DREN v0.2c (single cell) — robust features, windowed events, clearer skill ===
import os, json, time, sys, subprocess, contextlib, warnings
from pathlib import Path
import numpy as np, pandas as pd

# --- quiet noisy numeric warnings from lower libs (we guard ourselves) ---
warnings.filterwarnings("ignore", category=RuntimeWarning)

# ----- lightweight bootstrap -----
def need(pkg):
    try: __import__(pkg); return False
    except Exception: return True

def pip_install(pkgs):
    for p in pkgs:
        if need(p):
            print(f"[setup] installing {p} …")
            subprocess.run([sys.executable, "-m", "pip", "install", p], check=False)

pip_install(["yfinance","meteostat","scikit-learn","scipy"])

import yfinance as yf
from meteostat import Point, Daily
from scipy.signal import coherence
from sklearn.metrics import average_precision_score, precision_recall_curve, auc

# ----- config -----
LAT, LON = 40.7128, -74.0060           # set to your location if you like
START, END = "2005-01-01", None        # None → today
LOOKAHEAD_D = 10                       # event horizon for early warning
EVENT_KSIGMA = 2.0                     # base threshold scale (you can try 2.5–3.0)
COUPLING_WIN = 400                     # samples for hetero-coupling window
COOLDOWN_D = 15                        # minimal days between WARNING starts (reporting only)

# ----- paths -----
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v02c"
RUN.mkdir(parents=True, exist_ok=True)

def save_df(df, name):
    fp = RUN / f"{name}.csv"
    df.to_csv(fp, index=True)
    return str(fp)

def save_json(obj, name):
    fp = RUN / f"{name}.json"
    fp.write_text(json.dumps(obj, indent=2))
    return str(fp)

# ----- time index helpers -----
def normalize_daily_index(idx):
    idx = pd.DatetimeIndex(idx)
    if idx.tz is not None:
        idx = idx.tz_convert("UTC").tz_localize(None)
    return idx.normalize()

def normalize_series_daily(s):
    if s is None or len(s) == 0: return pd.Series(dtype=float)
    s = pd.Series(s.copy())
    s.index = normalize_daily_index(s.index)
    s = s[~s.index.duplicated(keep="last")]
    return s.sort_index()

# ----- CNT feature utils -----
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]
        h, _ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]
        out[i] = -np.sum(p*np.log(p)) if len(p) else np.nan
    return out

def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = np.nanstd(w)
        out[i] = 0.0 if sw<1e-12 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out

def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values

def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values

def glyph_coupling(a, b, fs=1.0):
    # robust coherence: guard constants/NaNs/empty bands
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a) != len(b) or len(a) < 128: return np.nan
    mask = np.isfinite(a) & np.isfinite(b)
    a, b = a[mask], b[mask]
    if len(a) < 128: return np.nan
    a = a - np.mean(a); b = b - np.mean(b)
    if np.std(a) < 1e-12 or np.std(b) < 1e-12: return np.nan
    with np.errstate(all='ignore'):
        f, Cxy = coherence(a, b, fs=fs, nperseg=min(256, len(a)))
    band = (f>0.02) & (f<0.25)
    if not np.any(band): return np.nan
    v = Cxy[band]
    v = v[np.isfinite(v)]
    return float(v.mean()) if v.size else np.nan

def rolling_percent_rank(series, win=512):
    s = pd.Series(series, dtype=float)
    def pr(w):
        v = w.iat[-1]
        arr = np.sort(w)
        return (np.searchsorted(arr, v, side="right")/len(w))
    return s.rolling(win, min_periods=win//2).apply(pr, raw=False).values

def fuse_to_nexus(F: pd.DataFrame):
    F = F.copy().ffill().bfill().fillna(0.0)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        F[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    score = (0.24*F.get('S_d_mkt',0) + 0.20*F.get('AR1_mkt',0) + 0.10*F.get('Theta_mkt',0) +
             0.18*F.get('S_d_tmp',0) + 0.14*F.get('AR1_tmp',0) + 0.06*F.get('Theta_tmp',0) +
             0.08*F.get('Gamma',0))
    nx = pd.Series(rolling_percent_rank(score, win=512), index=F.index)
    return nx.ewm(span=64, min_periods=16).mean().clip(0,1)

# ----- data loaders (robust) -----
def get_market():
    try:
        h = yf.Ticker("SPY").history(period="max", auto_adjust=True)
        if isinstance(h, pd.DataFrame) and 'Close' in h.columns and len(h)>0:
            return normalize_series_daily(h['Close'].rename("px").dropna())
    except Exception as e:
        print("[warn] yfinance Ticker.history failed:", e)
    try:
        s = yf.download("SPY", period="max", interval="1d", auto_adjust=True, progress=False)
        if isinstance(s, pd.DataFrame) and 'Close' in s.columns and len(s)>0:
            return normalize_series_daily(s['Close'].rename("px").dropna())
    except Exception as e:
        print("[warn] yfinance download failed:", e)
    try:
        url = "https://stooq.com/q/d/l/?s=spy&i=d"
        stq = pd.read_csv(url)
        stq.columns = [c.lower() for c in stq.columns]
        stq['date'] = pd.to_datetime(stq['date'])
        stq = stq.set_index('date').sort_index()
        print("[info] Using Stooq fallback for SPY.")
        return normalize_series_daily(stq['close'].rename('px').astype(float))
    except Exception as e:
        print("[error] stooq fallback failed:", e)
        return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon)
        d = Daily(loc, start_dt, end_dt).fetch()
        if 'tavg' in d.columns and d['tavg'].notna().any():
            t = d['tavg'].astype(float)
        else:
            t = ((d.get('tmin') + d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return normalize_series_daily(anom)
    except Exception as e:
        print("[warn] temperature fetch failed:", e)
        return pd.Series(dtype=float)

px = get_market()
ta = get_temp_anomaly(LAT, LON)

# ----- align & frame -----
idx = pd.DatetimeIndex([])
if len(px)>0: idx = px.index
if len(ta)>0: idx = ta.index if idx.empty else idx.union(ta.index)
df = pd.DataFrame(index=idx).sort_index()
if len(px)>0: df['px'] = px.reindex(df.index).ffill()
if len(ta)>0: df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()
if 'px' in df.columns: df['ret'] = df['px'].pct_change()

# ----- features -----
if 'px' in df.columns:
    sig_m = df['ret'].fillna(0.0).values
    z_m   = zscore_mad(sig_m, win=512)
    df['S_d_mkt']   = rolling_entropy(sig_m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(sig_m, win=256)
    df['Theta_mkt'] = theta_breaches(z_m, q=0.985, win=256)
else:
    df['S_d_mkt'] = df['AR1_mkt'] = df['Theta_mkt'] = np.nan

if 'temp_anom' in df.columns:
    sig_t = df['temp_anom'].fillna(0.0).values
    z_t   = zscore_mad(sig_t, win=512)
    df['S_d_tmp']   = rolling_entropy(sig_t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(sig_t, win=256)
    df['Theta_tmp'] = theta_breaches(z_t, q=0.985, win=256)
else:
    df['S_d_tmp'] = df['AR1_tmp'] = df['Theta_tmp'] = np.nan

# hetero-coupling (robust)
df['Gamma'] = np.nan
if 'ret' in df.columns and 'temp_anom' in df.columns:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['temp_anom'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma')] = glyph_coupling(a, b, fs=1.0)

# ----- fuse → NEXUS + thresholds -----
feats = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_tmp','AR1_tmp','Theta_tmp','Gamma']
df['NEXUS'] = fuse_to_nexus(df[feats])
core = df['NEXUS'].iloc[365:-365].dropna()
thr_watch = float(core.quantile(0.75)) if len(core) else 0.75
thr_warn  = float(core.quantile(0.90)) if len(core) else 0.90

# base alert
df['alert_raw'] = np.where(df['NEXUS']>=thr_warn, 'WARNING',
                    np.where(df['NEXUS']>=thr_watch, 'WATCH','OK'))

# optional: cooldown-summarized starts (reporting; does not rewrite NEXUS)
starts_raw = (df['alert_raw'].eq("WARNING") & ~df['alert_raw'].shift(1).eq("WARNING"))
start_ix = list(np.where(starts_raw.fillna(False).values)[0])
kept = []
last = -10**9
for s in start_ix:
    if s - last >= COOLDOWN_D:
        kept.append(s)
        last = s
kept = np.array(kept, dtype=int)
df['alert'] = df['alert_raw']  # for now keep raw state; we report cooldowned starts separately

# ----- events & skill (two views) -----
metrics = {}
if 'ret' in df.columns and df['ret'].notna().any():
    # (A) classic next-day spikes (for reference)
    vol = df['ret'].rolling(21).std()
    events_nextday = (df['ret'].abs() > EVENT_KSIGMA*vol).astype(int).shift(-1).fillna(0).astype(int)
    yA = events_nextday.reindex(df.index).fillna(0).astype(int)
    nx = df['NEXUS'].fillna(0.0)
    auc_pr_A = float(average_precision_score(yA.values, nx.values)) if yA.sum() > 0 else float('nan')

    # (B) windowed events: is there a K-sigma spike in the NEXT LOOKAHEAD_D days?
    future_max = (df['ret'].abs()
                  .rolling(LOOKAHEAD_D, min_periods=1).max()
                  .shift(-(LOOKAHEAD_D-1)))
    yB = (future_max > EVENT_KSIGMA*vol).astype(int).reindex(df.index).fillna(0).astype(int)
    auc_pr_B = float(average_precision_score(yB.values, nx.values)) if yB.sum() > 0 else float('nan')

    # WARNING starts & hits within horizon on yB
    warn_mask = (df['alert']=="WARNING").astype(int)
    starts = (df['alert'].eq("WARNING") & ~df['alert'].shift(1).eq("WARNING"))
    start_ix_all = np.where(starts.fillna(False).values)[0]
    hits, lead_days = 0, []
    for s in start_ix_all:
        window = range(s, min(s+LOOKAHEAD_D+1, len(df)))
        if any(yB.values[w]==1 for w in window):
            hits += 1
            ne = next((w for w in window if yB.values[w]==1), None)
            if ne is not None: lead_days.append(int(ne - s))
    false_alarms = int(len(start_ix_all) - hits)

    # Coverage: what fraction of all windowed events had a WARNING start in the prior horizon?
    ev_ix = np.where(yB.values==1)[0]
    covered = 0
    start_set = set(start_ix_all)
    for e in ev_ix:
        left = max(0, e-LOOKAHEAD_D)
        if any((i in start_set) for i in range(left, e+1)): covered += 1
    coverage = float(covered/len(ev_ix)) if len(ev_ix) else float('nan')

    # segment stats
    segs, in_warn, st = [], False, None
    for i, a in enumerate(df['alert'].values):
        if a=="WARNING" and not in_warn:
            st, in_warn = i, True
        elif a!="WARNING" and in_warn:
            segs.append((st, i-1)); in_warn=False
    if in_warn: segs.append((st, len(df)-1))
    seg_lens = [e-s+1 for s,e in segs]

    years = max(1e-9, (df.index[-1]-df.index[0]).days/365.25) if len(df) else 1.0
    metrics = {
        "samples": int(len(df)),
        "years": float(years),
        "events_nextday_total": int(yA.sum()),
        "events_window_total": int(yB.sum()),
        "auc_pr_nextday": auc_pr_A,
        "auc_pr_windowed": auc_pr_B,
        "watch_threshold": thr_watch,
        "warning_threshold": thr_warn,
        "warning_days": int(warn_mask.sum()),
        "warning_starts": int(len(start_ix_all)),
        "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits),
        "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(false_alarms / years),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
        "coverage_windowed": coverage,
        "warning_segments": int(len(segs)),
        "warning_segment_len_median": (float(np.median(seg_lens)) if seg_lens else None),
        "cooldown_applied_min_days_between_starts": COOLDOWN_D,
        "cooldown_starts_kept": int(len(kept))
    }

# ----- persist -----
paths = {
    "features_csv": save_df(df, "gdren_v02c_features"),
    "alerts_csv": save_df(df[['NEXUS','alert','alert_raw']], "gdren_v02c_alerts"),
    "metrics_json": save_json(metrics, "gdren_v02c_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {"lat": LAT, "lon": LON, "start": START, "end": END,
                "lookahead_days": LOOKAHEAD_D, "event_ksigma": EVENT_KSIGMA,
                "coupling_win": COUPLING_WIN},
        "thresholds": {"watch": thr_watch, "warning": thr_warn},
        "notes": "v0.2c — robust coupling; windowed-event metrics; clearer coverage."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.2c SKILL ===")
    for k,v in metrics.items(): print(f"{k:>28}: {v}")
else:
    print("\n[info] Metrics skipped (no market series).")


[setup] installing scikit-learn …
Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-215632Z_v02c\\gdren_v02c_features.csv",
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-215632Z_v02c\\gdren_v02c_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-215632Z_v02c\\gdren_v02c_metrics.json",
  "meta_json": "E:\\CNT\\artifacts\\g_dren\\20251105-215632Z_v02c\\meta.json"
}

=== v0.2c SKILL ===
                     samples: 10615
                       years: 32.766598220396986
        events_nextday_total: 712
         events_window_total: 5252
              auc_pr_nextday: 0.0662339310716148
             auc_pr_windowed: 0.4961180029348552
             watch_threshold: 0.7447845042357616
            hits_within_days: 10
                        hits: 12
                false_alarms: 2
       false_alarms_per_year: 0.06103776737967915
       lead_time_days_median: 0.0
         lead_time_days_mean: 0.5833333333333334
          lead_time_days_all: [0, 0,

In [9]:
# === G-DREN v0.2d (single cell) — horizon-aware skill, true lead time, hysteresis exits ===
import os, json, time, sys, subprocess, warnings
from pathlib import Path
import numpy as np, pandas as pd

warnings.filterwarnings("ignore", category=RuntimeWarning)

# ----- lightweight bootstrap -----
def need(pkg):
    try: __import__(pkg); return False
    except Exception: return True

def pip_install(pkgs):
    for p in pkgs:
        if need(p):
            print(f"[setup] installing {p} …")
            subprocess.run([sys.executable, "-m", "pip", "install", p], check=False)

pip_install(["yfinance","meteostat","scikit-learn","scipy"])

import yfinance as yf
from meteostat import Point, Daily
from scipy.signal import coherence
from sklearn.metrics import average_precision_score

# ----- config (tune freely) -----
LAT, LON = 40.7128, -74.0060    # your location
START, END = "2005-01-01", None
LOOKAHEAD_D = 7                 # early-warning horizon
EVENT_KSIGMA = 2.5              # event = |Δ| > K * rolling σ (try 2.5–3.0)
COUPLING_WIN = 400              # samples for hetero-coupling
HYST_EXIT_MARGIN = 0.05         # WARNING exits when NEXUS < (watch - margin) for N days
HYST_EXIT_DAYS = 5
COOLDOWN_BETWEEN_STARTS = 10    # min days between WARNING starts (reporting)

# ----- paths -----
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v02d"
RUN.mkdir(parents=True, exist_ok=True)

def save_df(df, name):
    fp = RUN / f"{name}.csv"; df.to_csv(fp, index=True); return str(fp)

def save_json(obj, name):
    fp = RUN / f"{name}.json"; fp.write_text(json.dumps(obj, indent=2)); return str(fp)

# ----- time helpers -----
def normalize_daily_index(idx):
    idx = pd.DatetimeIndex(idx)
    if idx.tz is not None:
        idx = idx.tz_convert("UTC").tz_localize(None)
    return idx.normalize()

def normalize_series_daily(s):
    if s is None or len(s) == 0: return pd.Series(dtype=float)
    s = pd.Series(s.copy())
    s.index = normalize_daily_index(s.index)
    s = s[~s.index.duplicated(keep="last")]
    return s.sort_index()

# ----- CNT features -----
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]
        h, _ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p)) if len(p) else np.nan
    return out

def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = np.nanstd(w)
        out[i] = 0.0 if sw<1e-12 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out

def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values

def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values

def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a) != len(b) or len(a) < 128: return np.nan
    mask = np.isfinite(a) & np.isfinite(b); a, b = a[mask], b[mask]
    if len(a) < 128: return np.nan
    a -= a.mean(); b -= b.mean()
    if a.std()<1e-12 or b.std()<1e-12: return np.nan
    f, Cxy = coherence(a, b, fs=fs, nperseg=min(256, len(a)))
    band = (f>0.02) & (f<0.25); v = Cxy[band]
    v = v[np.isfinite(v)]; return float(v.mean()) if v.size else np.nan

def rolling_percent_rank(series, win=512):
    s = pd.Series(series, dtype=float)
    def pr(w):
        v = w.iat[-1]; arr = np.sort(w)
        return (np.searchsorted(arr, v, side="right")/len(w))
    return s.rolling(win, min_periods=win//2).apply(pr, raw=False).values

def fuse_to_nexus(F: pd.DataFrame):
    F = F.copy().ffill().bfill().fillna(0.0)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        F[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    score = (0.24*F.get('S_d_mkt',0) + 0.20*F.get('AR1_mkt',0) + 0.10*F.get('Theta_mkt',0) +
             0.18*F.get('S_d_tmp',0) + 0.14*F.get('AR1_tmp',0) + 0.06*F.get('Theta_tmp',0) +
             0.08*F.get('Gamma',0))
    nx = pd.Series(rolling_percent_rank(score, win=512), index=F.index)
    return nx.ewm(span=64, min_periods=16).mean().clip(0,1)

# ----- data loaders (robust) -----
def get_market():
    try:
        h = yf.Ticker("SPY").history(period="max", auto_adjust=True)
        if isinstance(h, pd.DataFrame) and 'Close' in h.columns and len(h)>0:
            return normalize_series_daily(h['Close'].rename("px").dropna())
    except Exception as e:
        print("[warn] yfinance Ticker.history failed:", e)
    try:
        s = yf.download("SPY", period="max", interval="1d", auto_adjust=True, progress=False)
        if isinstance(s, pd.DataFrame) and 'Close' in s.columns and len(s)>0:
            return normalize_series_daily(s['Close'].rename("px").dropna())
    except Exception as e:
        print("[warn] yfinance download failed:", e)
    try:
        url = "https://stooq.com/q/d/l/?s=spy&i=d"
        stq = pd.read_csv(url)
        stq.columns = [c.lower() for c in stq.columns]
        stq['date'] = pd.to_datetime(stq['date'])
        stq = stq.set_index('date').sort_index()
        print("[info] Using Stooq fallback for SPY.")
        return normalize_series_daily(stq['close'].rename('px').astype(float))
    except Exception as e:
        print("[error] stooq fallback failed:", e)
        return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon)
        d = Daily(loc, start_dt, end_dt).fetch()
        if 'tavg' in d.columns and d['tavg'].notna().any():
            t = d['tavg'].astype(float)
        else:
            t = ((d.get('tmin') + d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return normalize_series_daily(anom)
    except Exception as e:
        print("[warn] temperature fetch failed:", e)
        return pd.Series(dtype=float)

px = get_market()
ta = get_temp_anomaly(LAT, LON)

# ----- align & frame -----
idx = pd.DatetimeIndex([])
if len(px)>0: idx = px.index
if len(ta)>0: idx = ta.index if idx.empty else idx.union(ta.index)
df = pd.DataFrame(index=idx).sort_index()
if len(px)>0: df['px'] = px.reindex(df.index).ffill()
if len(ta)>0: df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()
if 'px' in df.columns: df['ret'] = df['px'].pct_change()

# ----- features -----
if 'px' in df.columns:
    sig_m = df['ret'].fillna(0.0).values
    z_m   = zscore_mad(sig_m, win=512)
    df['S_d_mkt']   = rolling_entropy(sig_m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(sig_m, win=256)
    df['Theta_mkt'] = theta_breaches(z_m, q=0.985, win=256)
else:
    df['S_d_mkt'] = df['AR1_mkt'] = df['Theta_mkt'] = np.nan

if 'temp_anom' in df.columns:
    sig_t = df['temp_anom'].fillna(0.0).values
    z_t   = zscore_mad(sig_t, win=512)
    df['S_d_tmp']   = rolling_entropy(sig_t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(sig_t, win=256)
    df['Theta_tmp'] = theta_breaches(z_t, q=0.985, win=256)
else:
    df['S_d_tmp'] = df['AR1_tmp'] = df['Theta_tmp'] = np.nan

# hetero-coupling
df['Gamma'] = np.nan
if 'ret' in df.columns and 'temp_anom' in df.columns:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['temp_anom'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma')] = glyph_coupling(a, b, fs=1.0)

# ----- fuse → NEXUS + thresholds -----
feats = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_tmp','AR1_tmp','Theta_tmp','Gamma']
df['NEXUS'] = fuse_to_nexus(df[feats])

core = df['NEXUS'].iloc[365:-365].dropna()
thr_watch = float(core.quantile(0.75)) if len(core) else 0.75
thr_warn  = float(core.quantile(0.90)) if len(core) else 0.90
thr_exit  = thr_watch - HYST_EXIT_MARGIN

# ----- state with hysteresis exits -----
state = []
in_warn = False; below_cnt = 0
nx = df['NEXUS'].values
for i in range(len(df)):
    val = nx[i]
    slope3 = (val - nx[i-3]) if i>=3 and np.isfinite(nx[i-3]) else 0.0
    if in_warn:
        if val < thr_exit: below_cnt += 1
        else: below_cnt = 0
        if below_cnt >= HYST_EXIT_DAYS:
            in_warn = False
            state.append('WATCH' if val >= thr_watch else 'OK')
        else:
            state.append('WARNING')
    else:
        if (val >= thr_warn) and (slope3 > 0):
            in_warn = True; below_cnt = 0
            state.append('WARNING')
        elif val >= thr_watch:
            state.append('WATCH')
        else:
            state.append('OK')
df['alert'] = state

# ----- event series (actual spike days) -----
metrics = {}
if 'ret' in df.columns and df['ret'].notna().any():
    vol = df['ret'].rolling(21).std()
    E = (df['ret'].abs() > EVENT_KSIGMA*vol).astype(int)     # event days
    df['EVENT'] = E

    # Lead-curve PR-AUC: use NEXUS from k days earlier to rank today's events
    auc_lead = {}
    nx_series = df['NEXUS'].fillna(0.0)
    for k in range(1, LOOKAHEAD_D+1):
        y = E.iloc[k:].values
        x = nx_series.shift(k).iloc[k:].values
        if y.sum() > 0:
            auc_lead[k] = float(average_precision_score(y, x))
        else:
            auc_lead[k] = float('nan')

    # WARNING starts (after hysteresis)
    starts_mask = (df['alert'].eq("WARNING") & ~df['alert'].shift(1).eq("WARNING")).fillna(False)
    start_ix = np.where(starts_mask.values)[0].tolist()
    # apply reporting cooldown
    kept = []
    last = -10**9
    for s in start_ix:
        if s - last >= COOLDOWN_BETWEEN_STARTS:
            kept.append(s); last = s
    start_ix = kept

    # Hits/lead using TRUE next event day within horizon
    hits, lead_days = 0, []
    e_ix = np.where(E.values==1)[0]
    e_set = set(e_ix)
    for s in start_ix:
        hit = None
        for j in range(1, LOOKAHEAD_D+1):
            if (s+j) in e_set:
                hit = s+j; break
        if hit is not None:
            hits += 1; lead_days.append(int(hit - s))
    false_alarms = int(len(start_ix) - hits)

    # Coverage_by_event: proportion of events with a WARNING start in previous K days
    coverage_by_k = {}
    start_set = set(start_ix)
    for K in range(1, LOOKAHEAD_D+1):
        covered = 0
        for e in e_ix:
            left = max(0, e-K)
            if any((t in start_set) for t in range(left, e)):
                covered += 1
        coverage_by_k[K] = float(covered/len(e_ix)) if len(e_ix) else float('nan')

    # segment stats
    segs, inw, st = [], False, None
    for i, a in enumerate(df['alert'].values):
        if a=="WARNING" and not inw: st, inw = i, True
        elif a!="WARNING" and inw: segs.append((st, i-1)); inw=False
    if inw: segs.append((st, len(df)-1))
    seg_lens = [e-s+1 for s,e in segs]

    years = max(1e-9, (df.index[-1]-df.index[0]).days/365.25) if len(df) else 1.0
    warn_days = int((df['alert']=="WARNING").sum())

    metrics = {
        "samples": int(len(df)),
        "years": float(years),
        "events_total": int(E.sum()),
        "auc_pr_lead_curve": auc_lead,                    # PR-AUC at lead k days
        "watch_threshold": thr_watch,
        "warning_threshold": thr_warn,
        "exit_threshold": thr_exit,
        "warning_days": warn_days,
        "warning_starts": int(len(start_ix)),
        "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits),
        "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(false_alarms/years),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
        "coverage_by_event": coverage_by_k,
        "warning_segments": int(len(segs)),
        "warning_segment_len_median": (float(np.median(seg_lens)) if seg_lens else None),
        "cooldown_between_starts_days": COOLDOWN_BETWEEN_STARTS,
        "hysteresis_exit_days": HYST_EXIT_DAYS,
        "hysteresis_exit_margin": HYST_EXIT_MARGIN,
        "event_ksigma": EVENT_KSIGMA,
        "lookahead_days": LOOKAHEAD_D,
    }

# ----- persist -----
paths = {
    "features_csv": save_df(df, "gdren_v02d_features"),
    "alerts_csv": save_df(df[['NEXUS','alert','EVENT']], "gdren_v02d_alerts"),
    "metrics_json": save_json(metrics, "gdren_v02d_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {"lat": LAT, "lon": LON, "start": START, "end": END},
        "thresholds": {"watch": thr_watch, "warning": thr_warn, "exit": thr_exit},
        "notes": "v0.2d — event=|ret|>Kσ; lead-curve PR-AUC; true lead-to-event; hysteresis exits."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.2d SKILL (horizon-aware) ===")
    key = ['samples','years','events_total','watch_threshold','warning_threshold','exit_threshold',
           'warning_days','warning_starts','hits_within_days','hits','false_alarms','false_alarms_per_year',
           'lead_time_days_median','lead_time_days_mean']
    for k in key:
        print(f"{k:>26}: {metrics.get(k)}")
    print(" auc_pr_lead_curve (k→AUC):", metrics['auc_pr_lead_curve'])
    print(" coverage_by_event (K→cov):", metrics['coverage_by_event'])
else:
    print("\n[info] Metrics skipped (no market series).")


[setup] installing scikit-learn …
Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-220142Z_v02d\\gdren_v02d_features.csv",
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-220142Z_v02d\\gdren_v02d_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-220142Z_v02d\\gdren_v02d_metrics.json",
  "meta_json": "E:\\CNT\\artifacts\\g_dren\\20251105-220142Z_v02d\\meta.json"
}

=== v0.2d SKILL (horizon-aware) ===
                   samples: 10615
                     years: 32.766598220396986
              events_total: 265
           watch_threshold: 0.7447568136044356
            exit_threshold: 0.6947568136044355
          hits_within_days: 7
                      hits: 4
              false_alarms: 7
     false_alarms_per_year: 0.21363218582887702
     lead_time_days_median: 3.5
       lead_time_days_mean: 3.5
 auc_pr_lead_curve (k→AUC): {1: 0.02326670043046703, 2: 0.02327318286306805, 3: 0.023247071628575196, 4: 0.023240759619402286, 5: 0.02325057751

In [10]:
# === G-DREN v0.2e — regime events (spike ∪ vol-breakout ∪ drawdown), VIX channel, tidy warnings ===
import os, json, time, sys, subprocess, warnings
from pathlib import Path
import numpy as np, pandas as pd

warnings.filterwarnings("ignore", category=RuntimeWarning)

# ---- bootstrap ----
def need(pkg):
    try: __import__(pkg); return False
    except Exception: return True
def pip_install(pkgs):
    for p in pkgs:
        if need(p):
            print(f"[setup] installing {p} …")
            subprocess.run([sys.executable, "-m", "pip", "install", p], check=False)
pip_install(["yfinance","meteostat","scikit-learn","scipy"])

import yfinance as yf
from meteostat import Point, Daily
from scipy.signal import coherence
from sklearn.metrics import average_precision_score

# ---- config (tune as needed) ----
LAT, LON = 40.7128, -74.0060      # set to your city if desired
START, END = "2005-01-01", None
LOOKAHEAD_D = 7                   # early-warning horizon (days)
EVENT_KSIGMA = 2.5                # spike: |ret| > K * 21d σ
VOL_BREAK_Q = 0.98                # vol-breakout: 10d σ above rolling q
VOL_BREAK_WIN = 252*3             # quantile lookback (≈3y)
DD_WIN = 60                       # drawdown window (days)
DD_FRAC = 0.08                    # drawdown threshold (>=8%)
COUPLING_WIN = 400                # window for hetero-coupling
HYST_EXIT_MARGIN = 0.08           # WARNING exits when NEXUS < (watch - margin) for N days
HYST_EXIT_DAYS = 3
MAX_WARN_DAYS = 20                # hard cap for WARNING segment length
COOLDOWN_BETWEEN_STARTS = 12      # min days between WARNING starts (reporting)

# ---- paths ----
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v02e"
RUN.mkdir(parents=True, exist_ok=True)
def save_df(df, name):
    fp = RUN / f"{name}.csv"; df.to_csv(fp, index=True); return str(fp)
def save_json(obj, name):
    fp = RUN / f"{name}.json"; fp.write_text(json.dumps(obj, indent=2)); return str(fp)

# ---- time helpers ----
def normalize_daily_index(idx):
    idx = pd.DatetimeIndex(idx)
    if idx.tz is not None: idx = idx.tz_convert("UTC").tz_localize(None)
    return idx.normalize()
def normalize_series_daily(s):
    if s is None or len(s)==0: return pd.Series(dtype=float)
    s = pd.Series(s.copy()); s.index = normalize_daily_index(s.index)
    s = s[~s.index.duplicated(keep="last")]
    return s.sort_index()

# ---- CNT utilities ----
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]; h,_ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p)) if len(p) else np.nan
    return out
def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = np.nanstd(w); out[i] = 0.0 if sw<1e-12 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out
def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values
def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values
def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a)!=len(b) or len(a)<128: return np.nan
    mask = np.isfinite(a)&np.isfinite(b); a,b = a[mask], b[mask]
    if len(a)<128: return np.nan
    a -= a.mean(); b -= b.mean()
    if a.std()<1e-12 or b.std()<1e-12: return np.nan
    f,Cxy = coherence(a,b,fs=fs, nperseg=min(256,len(a)))
    band = (f>0.02)&(f<0.25); v = Cxy[band]; v = v[np.isfinite(v)]
    return float(v.mean()) if v.size else np.nan
def rolling_percent_rank(series, win=512):
    s = pd.Series(series, dtype=float)
    def pr(w): v = w.iat[-1]; arr = np.sort(w); return np.searchsorted(arr, v, side="right")/len(w)
    return s.rolling(win, min_periods=win//2).apply(pr, raw=False).values
def fuse_to_nexus(F: pd.DataFrame):
    F = F.copy().ffill().bfill().fillna(0.0)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        F[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    score = (0.20*F.get('S_d_mkt',0) + 0.15*F.get('AR1_mkt',0) + 0.08*F.get('Theta_mkt',0) +
             0.18*F.get('S_d_vix',0) + 0.10*F.get('AR1_vix',0) + 0.06*F.get('Theta_vix',0) +
             0.12*F.get('S_d_tmp',0) + 0.08*F.get('AR1_tmp',0) + 0.03*F.get('Theta_tmp',0) +
             0.10*F.get('Gamma_mkt_vix',0))
    nx = pd.Series(rolling_percent_rank(score, win=512), index=F.index)
    return nx.ewm(span=64, min_periods=16).mean().clip(0,1)

# ---- data loaders ----
def get_market():
    try:
        h = yf.Ticker("SPY").history(period="max", auto_adjust=True)
        if isinstance(h,pd.DataFrame) and 'Close' in h and len(h)>0:
            return normalize_series_daily(h['Close'].rename('px').dropna())
    except Exception as e: print("[warn] history SPY:", e)
    try:
        s = yf.download("SPY", period="max", interval="1d", auto_adjust=True, progress=False)
        if isinstance(s,pd.DataFrame) and 'Close' in s and len(s)>0:
            return normalize_series_daily(s['Close'].rename('px').dropna())
    except Exception as e: print("[warn] download SPY:", e)
    try:
        url = "https://stooq.com/q/d/l/?s=spy&i=d"
        stq = pd.read_csv(url); stq.columns=[c.lower() for c in stq.columns]
        stq['date']=pd.to_datetime(stq['date']); stq=stq.set_index('date').sort_index()
        return normalize_series_daily(stq['close'].rename('px').astype(float))
    except Exception as e: print("[error] stooq SPY:", e); return pd.Series(dtype=float)

def get_vix():
    try:
        h = yf.Ticker("^VIX").history(period="max", auto_adjust=False)
        if isinstance(h,pd.DataFrame) and 'Close' in h and len(h)>0:
            return normalize_series_daily(h['Close'].rename('vix').dropna())
    except Exception as e: print("[warn] history VIX:", e)
    try:
        s = yf.download("^VIX", period="max", interval="1d", auto_adjust=False, progress=False)
        if isinstance(s,pd.DataFrame) and 'Close' in s and len(s)>0:
            return normalize_series_daily(s['Close'].rename('vix').dropna())
    except Exception as e: print("[warn] download VIX:", e)
    return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon); d = Daily(loc, start_dt, end_dt).fetch()
        t = d['tavg'].astype(float) if 'tavg' in d and d['tavg'].notna().any() else ((d.get('tmin')+d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return normalize_series_daily(anom)
    except Exception as e:
        print("[warn] temp fetch:", e); return pd.Series(dtype=float)

px = get_market()
vix = get_vix()
ta  = get_temp_anomaly(LAT, LON)

# ---- align & frame ----
idx = pd.DatetimeIndex([])
for ser in [px, vix, ta]:
    if len(ser)>0: idx = ser.index if idx.empty else idx.union(ser.index)
df = pd.DataFrame(index=idx).sort_index()
if len(px)>0:  df['px'] = px.reindex(df.index).ffill(); df['ret'] = df['px'].pct_change()
if len(vix)>0: df['vix'] = vix.reindex(df.index).ffill(); df['dvix'] = df['vix'].pct_change()
if len(ta)>0:  df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()

# ---- features: SPY / VIX / temp ----
if 'ret' in df:
    sig_m = df['ret'].fillna(0.0).values
    z_m = zscore_mad(sig_m, win=512)
    df['S_d_mkt']   = rolling_entropy(sig_m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(sig_m, win=256)
    df['Theta_mkt'] = theta_breaches(z_m, q=0.985, win=256)
else:
    df['S_d_mkt']=df['AR1_mkt']=df['Theta_mkt']=np.nan

if 'dvix' in df:
    sig_v = df['dvix'].fillna(0.0).values
    z_v = zscore_mad(sig_v, win=512)
    df['S_d_vix']   = rolling_entropy(sig_v, win=256, bins=48)
    df['AR1_vix']   = rolling_ar1(sig_v, win=256)
    df['Theta_vix'] = theta_breaches(z_v, q=0.985, win=256)
else:
    df['S_d_vix']=df['AR1_vix']=df['Theta_vix']=np.nan

if 'temp_anom' in df:
    sig_t = df['temp_anom'].fillna(0.0).values
    z_t   = zscore_mad(sig_t, win=512)
    df['S_d_tmp']   = rolling_entropy(sig_t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(sig_t, win=256)
    df['Theta_tmp'] = theta_breaches(z_t, q=0.985, win=256)
else:
    df['S_d_tmp']=df['AR1_tmp']=df['Theta_tmp']=np.nan

# hetero-coupling (SPY vs VIX delta)
df['Gamma_mkt_vix'] = np.nan
if 'ret' in df and 'dvix' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['dvix'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_vix')] = glyph_coupling(a, b, fs=1.0)

# ---- fuse → NEXUS + thresholds ----
feats = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_vix','AR1_vix','Theta_vix','S_d_tmp','AR1_tmp','Theta_tmp','Gamma_mkt_vix']
df['NEXUS'] = fuse_to_nexus(df[feats])

core = df['NEXUS'].iloc[365:-365].dropna()
thr_watch = float(core.quantile(0.75)) if len(core) else 0.75
thr_warn  = float(core.quantile(0.90)) if len(core) else 0.90
thr_exit  = thr_watch - HYST_EXIT_MARGIN

# ---- WARNING state with hysteresis + max-length + slope gate ----
state=[]; in_warn=False; below_cnt=0; warn_len=0
nx = df['NEXUS'].values
for i in range(len(df)):
    val = nx[i]
    s3  = (val - nx[i-3]) if i>=3 and np.isfinite(nx[i-3]) else 0.0
    s7  = (val - nx[i-7]) if i>=7 and np.isfinite(nx[i-7]) else 0.0
    if in_warn:
        warn_len += 1
        if val < thr_exit: below_cnt += 1
        else: below_cnt = 0
        if below_cnt >= HYST_EXIT_DAYS or warn_len >= MAX_WARN_DAYS:
            in_warn=False; warn_len=0
            state.append('WATCH' if val>=thr_watch else 'OK')
        else:
            state.append('WARNING')
    else:
        if (val>=thr_warn) and (s3>0) and (s7>0):
            in_warn=True; below_cnt=0; warn_len=1
            state.append('WARNING')
        elif val>=thr_watch:
            state.append('WATCH')
        else:
            state.append('OK')
df['alert']=state

# ---- regime EVENTS (union): spike ∪ vol-breakout ∪ drawdown ----
metrics={}
if 'ret' in df:
    vol21 = df['ret'].rolling(21).std()
    E_spike = (df['ret'].abs() > EVENT_KSIGMA*vol21).astype(int)

    vol10 = df['ret'].rolling(10).std()
    q_roll = vol10.rolling(VOL_BREAK_WIN, min_periods=252).quantile(VOL_BREAK_Q)
    E_vol = (vol10 > q_roll).astype(int).fillna(0).astype(int)

    if 'px' in df:
        roll_max = df['px'].rolling(DD_WIN, min_periods=DD_WIN//2).max()
        dd = df['px']/roll_max - 1.0
        E_dd = (dd <= -DD_FRAC).astype(int).fillna(0).astype(int)
    else:
        E_dd = pd.Series(0, index=df.index)

    E = ((E_spike==1) | (E_vol==1) | (E_dd==1)).astype(int)
    df['EVENT'] = E

    # ---- lead AUC over 1..LOOKAHEAD_D (NEXUS leading EVENTS) ----
    nx_s = df['NEXUS'].fillna(0.0)
    auc_lead={}
    for k in range(1, LOOKAHEAD_D+1):
        y = E.iloc[k:].values
        x = nx_s.shift(k).iloc[k:].values
        auc_lead[k] = float(average_precision_score(y, x)) if y.sum()>0 else float('nan')

    # ---- starts / hits / lead / false alarms ----
    starts = (df['alert'].eq("WARNING") & ~df['alert'].shift(1).eq("WARNING")).fillna(False)
    s_ix = np.where(starts.values)[0].tolist()
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_BETWEEN_STARTS: kept.append(s); last=s
    s_ix = kept

    e_ix = np.where(E.values==1)[0]; e_set=set(e_ix)
    hits=0; lead_days=[]
    for s in s_ix:
        hit=None
        for j in range(1, LOOKAHEAD_D+1):
            if (s+j) in e_set: hit=s+j; break
        if hit is not None:
            hits += 1; lead_days.append(int(hit-s))
    false_alarms = int(len(s_ix) - hits)

    # ---- event coverage: event had WARNING start in prior K days ----
    coverage_by_k={}
    s_set=set(s_ix)
    for K in range(1, LOOKAHEAD_D+1):
        covered=0
        for e in e_ix:
            left=max(0, e-K)
            if any((t in s_set) for t in range(left, e)): covered+=1
        coverage_by_k[K] = float(covered/len(e_ix)) if len(e_ix) else float('nan')

    # ---- warning segment stats ----
    segs=[]; inw=False; st=None
    for i,a in enumerate(df['alert'].values):
        if a=="WARNING" and not inw: st,i0 = i, i; inw=True
        elif a!="WARNING" and inw: segs.append((st, i-1)); inw=False
    if inw: segs.append((st, len(df)-1))
    seg_lens=[e-s+1 for s,e in segs]

    years = max(1e-9,(df.index[-1]-df.index[0]).days/365.25) if len(df) else 1.0
    warn_days = int((df['alert']=="WARNING").sum())

    metrics = {
        "samples": int(len(df)),
        "years": float(years),
        "events_total": int(E.sum()),
        "auc_pr_lead_curve": auc_lead,
        "watch_threshold": thr_watch,
        "warning_threshold": thr_warn,
        "exit_threshold": thr_exit,
        "warning_days": warn_days,
        "warning_starts": int(len(s_ix)),
        "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits),
        "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(false_alarms/years),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
        "coverage_by_event": coverage_by_k,
        "warning_segments": int(len(segs)),
        "warning_segment_len_median": (float(np.median(seg_lens)) if seg_lens else None),
        "event_components": {
            "spike_k": EVENT_KSIGMA, "vol_q": VOL_BREAK_Q, "vol_win": VOL_BREAK_WIN,
            "dd_win": DD_WIN, "dd_frac": DD_FRAC
        }
    }

# ---- persist ----
paths = {
    "features_csv": save_df(df, "gdren_v02e_features"),
    "alerts_csv": save_df(df[['NEXUS','alert','EVENT']], "gdren_v02e_alerts"),
    "metrics_json": save_json(metrics, "gdren_v02e_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {"lat": LAT, "lon": LON, "start": START, "end": END,
                "lookahead_days": LOOKAHEAD_D},
        "thresholds": {"watch": thr_watch, "warning": thr_warn, "exit": thr_exit},
        "notes": "v0.2e — VIX channel + regime events (spike ∪ vol-breakout ∪ drawdown) + hysteresis cap."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.2e SKILL (regime-aware) ===")
    for k in ["samples","years","events_total","watch_threshold","warning_threshold","exit_threshold",
              "warning_days","warning_starts","hits_within_days","hits","false_alarms","false_alarms_per_year",
              "lead_time_days_median","lead_time_days_mean"]:
        print(f"{k:>26}: {metrics.get(k)}")
    print(" auc_pr_lead_curve (k→AUC):", metrics["auc_pr_lead_curve"])
    print(" coverage_by_event (K→cov):", metrics["coverage_by_event"])
else:
    print("\n[info] Metrics skipped (no market series).")


[setup] installing scikit-learn …
Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-220622Z_v02e\\gdren_v02e_features.csv",
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-220622Z_v02e\\gdren_v02e_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-220622Z_v02e\\gdren_v02e_metrics.json",
  "meta_json": "E:\\CNT\\artifacts\\g_dren\\20251105-220622Z_v02e\\meta.json"
}

=== v0.2e SKILL (regime-aware) ===
                   samples: 11394
                     years: 35.841204654346335
              events_total: 1537
           watch_threshold: 0.7173172519040436
            exit_threshold: 0.6373172519040436
          hits_within_days: 7
                      hits: 10
              false_alarms: 30
     false_alarms_per_year: 0.837025437323352
     lead_time_days_median: 2.0
       lead_time_days_mean: 2.4
 auc_pr_lead_curve (k→AUC): {1: 0.13588386034591796, 2: 0.1353583940825056, 3: 0.13483436367389043, 4: 0.13432494172297105, 5: 0.13383345562749

In [12]:
# === G-DREN v0.2f — onset-only regime events, VIX + credit channel, gated warnings, horizon metrics ===
import os, json, time, sys, subprocess, warnings
from pathlib import Path
import numpy as np, pandas as pd

warnings.filterwarnings("ignore", category=RuntimeWarning)

# ----- bootstrap -----
def need(pkg):
    try: __import__(pkg); return False
    except Exception: return True
def pip_install(pkgs):
    for p in pkgs:
        if need(p):
            print(f"[setup] installing {p} …")
            subprocess.run([sys.executable, "-m", "pip", "install", p], check=False)
pip_install(["yfinance","meteostat","scikit-learn","scipy"])

import yfinance as yf
from meteostat import Point, Daily
from scipy.signal import coherence
from sklearn.metrics import average_precision_score

# ----- config (tune freely) -----
LAT, LON = 40.7128, -74.0060    # your location
START, END = "2005-01-01", None
LOOKAHEAD_D = 7                 # early-warning horizon (days)
EVENT_KSIGMA = 3.0              # spike: |ret| > K * 21d σ  (try 2.5–3.0)
VOL_BREAK_Q = 0.995             # vol-breakout: 10d σ above rolling q
VOL_BREAK_WIN = 252*3           # quantile lookback (≈3y)
DD_WIN = 40                     # drawdown window (days)
DD_FRAC = 0.10                  # drawdown threshold (>=10%)
COUPLING_WIN = 400              # window for hetero-coupling
HYST_EXIT_MARGIN = 0.08         # WARNING exits when NEXUS < (watch - margin) for N days
HYST_EXIT_DAYS = 3
MAX_WARN_DAYS = 14              # cap individual WARNING segments
COOLDOWN_BETWEEN_STARTS = 12    # min days between WARNING starts (reporting)
GATE_THETA_MIN = 0.20           # gate requires any channel’s Theta >= this
GAMMA_VIX_MIN = 0.05            # or Γ(mkt↔VIX) above this

# ----- paths -----
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v02f"
RUN.mkdir(parents=True, exist_ok=True)
def save_df(df, name): fp = RUN / f"{name}.csv"; df.to_csv(fp, index=True); return str(fp)
def save_json(obj, name): fp = RUN / f"{name}.json"; fp.write_text(json.dumps(obj, indent=2)); return str(fp)

# ----- time helpers -----
def normalize_daily_index(idx):
    idx = pd.DatetimeIndex(idx)
    if idx.tz is not None: idx = idx.tz_convert("UTC").tz_localize(None)
    return idx.normalize()
def normalize_series_daily(s):
    if s is None or len(s)==0: return pd.Series(dtype=float)
    s = pd.Series(s.copy()); s.index = normalize_daily_index(s.index)
    s = s[~s.index.duplicated(keep="last")]
    return s.sort_index()

# ----- CNT utilities -----
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]; h,_ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p)) if len(p) else np.nan
    return out
def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = np.nanstd(w); out[i] = 0.0 if sw<1e-12 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out
def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values
def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values
def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a)!=len(b) or len(a)<128: return np.nan
    mask = np.isfinite(a)&np.isfinite(b); a,b = a[mask], b[mask]
    if len(a)<128: return np.nan
    a -= a.mean(); b -= b.mean()
    if a.std()<1e-12 or b.std()<1e-12: return np.nan
    f,Cxy = coherence(a,b,fs=fs, nperseg=min(256,len(a)))
    band = (f>0.02)&(f<0.25); v = Cxy[band]; v = v[np.isfinite(v)]
    return float(v.mean()) if v.size else np.nan
def rolling_percent_rank(series, win=512):
    s = pd.Series(series, dtype=float)
    def pr(w): v=w.iat[-1]; arr=np.sort(w); return np.searchsorted(arr, v, side="right")/len(w)
    return s.rolling(win, min_periods=win//2).apply(pr, raw=False).values
def fuse_to_nexus(F: pd.DataFrame):
    F = F.copy().ffill().bfill().fillna(0.0)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        F[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    score = (
        0.18*F.get('S_d_mkt',0) + 0.14*F.get('AR1_mkt',0) + 0.08*F.get('Theta_mkt',0) +
        0.18*F.get('S_d_vix',0) + 0.14*F.get('AR1_vix',0) + 0.08*F.get('Theta_vix',0) +
        0.12*F.get('S_d_cred',0)+ 0.08*F.get('AR1_cred',0)+ 0.04*F.get('Theta_cred',0) +
        0.06*F.get('S_d_tmp',0) + 0.02*F.get('AR1_tmp',0) + 0.02*F.get('Theta_tmp',0) +
        0.10*F.get('Gamma_mkt_vix',0) + 0.08*F.get('Gamma_mkt_cred',0)
    )
    nx = pd.Series(rolling_percent_rank(score, win=512), index=F.index)
    return nx.ewm(span=64, min_periods=16).mean().clip(0,1)

# ----- data loaders -----
def get_px(tkr, auto_adjust=True):
    try:
        h = yf.Ticker(tkr).history(period="max", auto_adjust=auto_adjust)
        if isinstance(h, pd.DataFrame) and 'Close' in h and len(h)>0:
            return normalize_series_daily(h['Close'].rename(tkr).dropna())
    except Exception as e: print(f"[warn] history {tkr}:", e)
    try:
        s = yf.download(tkr, period="max", interval="1d", auto_adjust=auto_adjust, progress=False)
        if isinstance(s, pd.DataFrame) and 'Close' in s and len(s)>0:
            return normalize_series_daily(s['Close'].rename(tkr).dropna())
    except Exception as e: print(f"[warn] download {tkr}:", e)
    return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon); d = Daily(loc, start_dt, end_dt).fetch()
        t = d['tavg'].astype(float) if 'tavg' in d and d['tavg'].notna().any() else ((d.get('tmin') + d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return normalize_series_daily(anom)
    except Exception as e:
        print("[warn] temp fetch:", e); return pd.Series(dtype=float)

px  = get_px("SPY", auto_adjust=True)
vix = get_px("^VIX", auto_adjust=False)
hyg = get_px("HYG", auto_adjust=True)
ief = get_px("IEF", auto_adjust=True)
ta  = get_temp_anomaly(LAT, LON)

# ----- align & frame -----
idx = pd.DatetimeIndex([])
for ser in [px, vix, hyg, ief, ta]:
    if len(ser)>0: idx = ser.index if idx.empty else idx.union(ser.index)
df = pd.DataFrame(index=idx).sort_index()

if len(px)>0:   df['px'] = px.reindex(df.index).ffill(); df['ret'] = df['px'].pct_change()
if len(vix)>0:  df['vix'] = vix.reindex(df.index).ffill(); df['dvix'] = df['vix'].pct_change()
if len(hyg)>0 and len(ief)>0:
    ratio = (hyg / ief).reindex(df.index).ffill()
    df['cred_ret'] = ratio.pct_change()
if len(ta)>0:   df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()

# ----- features: market / VIX / credit / temp -----
# market
if 'ret' in df:
    m = df['ret'].fillna(0.0).values; mz = zscore_mad(m, win=512)
    df['S_d_mkt']   = rolling_entropy(m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(m, win=256)
    df['Theta_mkt'] = theta_breaches(mz, q=0.985, win=256)
else:
    df['S_d_mkt']=df['AR1_mkt']=df['Theta_mkt']=np.nan

# vix
if 'dvix' in df:
    v = df['dvix'].fillna(0.0).values; vz = zscore_mad(v, win=512)
    df['S_d_vix']   = rolling_entropy(v, win=256, bins=48)
    df['AR1_vix']   = rolling_ar1(v, win=256)
    df['Theta_vix'] = theta_breaches(vz, q=0.985, win=256)
else:
    df['S_d_vix']=df['AR1_vix']=df['Theta_vix']=np.nan

# credit (HYG/IEF ratio)
if 'cred_ret' in df:
    c = df['cred_ret'].fillna(0.0).values; cz = zscore_mad(c, win=512)
    df['S_d_cred']   = rolling_entropy(c, win=256, bins=48)
    df['AR1_cred']   = rolling_ar1(c, win=256)
    df['Theta_cred'] = theta_breaches(cz, q=0.985, win=256)
else:
    df['S_d_cred']=df['AR1_cred']=df['Theta_cred']=np.nan

# temp
if 'temp_anom' in df:
    t = df['temp_anom'].fillna(0.0).values; tz = zscore_mad(t, win=512)
    df['S_d_tmp']   = rolling_entropy(t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(t, win=256)
    df['Theta_tmp'] = theta_breaches(tz, q=0.985, win=256)
else:
    df['S_d_tmp']=df['AR1_tmp']=df['Theta_tmp']=np.nan

# hetero-couplings
df['Gamma_mkt_vix']  = np.nan
df['Gamma_mkt_cred'] = np.nan
if 'ret' in df and 'dvix' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['dvix'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_vix')] = glyph_coupling(a,b,fs=1.0)
if 'ret' in df and 'cred_ret' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['cred_ret'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_cred')] = glyph_coupling(a,b,fs=1.0)

# ----- fuse → NEXUS + thresholds -----
feats = ['S_d_mkt','AR1_mkt','Theta_mkt',
         'S_d_vix','AR1_vix','Theta_vix',
         'S_d_cred','AR1_cred','Theta_cred',
         'S_d_tmp','AR1_tmp','Theta_tmp',
         'Gamma_mkt_vix','Gamma_mkt_cred']
df['NEXUS'] = fuse_to_nexus(df[feats])

core = df['NEXUS'].iloc[365:-365].dropna()
thr_watch = float(core.quantile(0.75)) if len(core) else 0.75
thr_warn  = float(core.quantile(0.92)) if len(core) else 0.92   # slightly stricter
thr_exit  = thr_watch - HYST_EXIT_MARGIN

# ----- gated WARNING state (hysteresis + max len + multi-channel gate) -----
state=[]; in_warn=False; below_cnt=0; warn_len=0
nx = df['NEXUS'].values
for i in range(len(df)):
    val = nx[i]
    s3  = (val - nx[i-3]) if i>=3 and np.isfinite(nx[i-3]) else 0.0
    s7  = (val - nx[i-7]) if i>=7 and np.isfinite(nx[i-7]) else 0.0
    # gate: any strong Theta or strong Γ(mkt↔VIX)
    th_v = float(df['Theta_vix'].iloc[i])  if 'Theta_vix'  in df else np.nan
    th_m = float(df['Theta_mkt'].iloc[i])  if 'Theta_mkt'  in df else np.nan
    th_c = float(df['Theta_cred'].iloc[i]) if 'Theta_cred' in df else np.nan
    gm_v = float(df['Gamma_mkt_vix'].iloc[i])  if 'Gamma_mkt_vix'  in df else np.nan
    gate_ok = ((np.nan_to_num(th_v)>=GATE_THETA_MIN) or
               (np.nan_to_num(th_m)>=GATE_THETA_MIN) or
               (np.nan_to_num(th_c)>=GATE_THETA_MIN) or
               (np.nan_to_num(gm_v)>=GAMMA_VIX_MIN))
    if in_warn:
        warn_len += 1
        if val < thr_exit: below_cnt += 1
        else: below_cnt = 0
        if below_cnt >= HYST_EXIT_DAYS or warn_len >= MAX_WARN_DAYS:
            in_warn=False; warn_len=0
            state.append('WATCH' if val>=thr_watch else 'OK')
        else:
            state.append('WARNING')
    else:
        if (val>=thr_warn) and (s3>0) and (s7>0) and gate_ok:
            in_warn=True; below_cnt=0; warn_len=1
            state.append('WARNING')
        elif val>=thr_watch:
            state.append('WATCH')
        else:
            state.append('OK')
df['alert']=state

# ----- regime EVENTS (union) then ONSETS-only -----
metrics={}
if 'ret' in df:
    vol21 = df['ret'].rolling(21).std()
    E_spike = (df['ret'].abs() > EVENT_KSIGMA*vol21).astype(int)

    vol10 = df['ret'].rolling(10).std()
    q_roll = vol10.rolling(VOL_BREAK_WIN, min_periods=252).quantile(VOL_BREAK_Q)
    E_vol  = (vol10 > q_roll).astype(int).fillna(0).astype(int)

    if 'px' in df:
        roll_max = df['px'].rolling(DD_WIN, min_periods=DD_WIN//2).max()
        dd = df['px']/roll_max - 1.0
        E_dd = (dd <= -DD_FRAC).astype(int).fillna(0).astype(int)
    else:
        E_dd = pd.Series(0, index=df.index)

    E = ((E_spike==1) | (E_vol==1) | (E_dd==1)).astype(int)

    # Onset days only
    E_onset = ((E==1) & (E.shift(1).fillna(0)==0)).astype(int)
    df['EVENT_ONSET'] = E_onset

    # Lead PR-AUC: does NEXUS k days earlier rank upcoming ONSETS?
    nx_s = df['NEXUS'].fillna(0.0)
    auc_lead={}
    for k in range(1, LOOKAHEAD_D+1):
        y = E_onset.iloc[k:].values
        x = nx_s.shift(k).iloc[k:].values
        auc_lead[k] = float(average_precision_score(y, x)) if y.sum()>0 else float('nan')

    # Starts & hits (relative to ONSETS)
    starts = (df['alert'].eq("WARNING") & ~df['alert'].shift(1).eq("WARNING")).fillna(False)
    s_ix = np.where(starts.values)[0].tolist()
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_BETWEEN_STARTS: kept.append(s); last=s
    s_ix = kept

    e_ix = np.where(E_onset.values==1)[0]; e_set=set(e_ix)
    hits=0; lead_days=[]
    for s in s_ix:
        hit=None
        for j in range(1, LOOKAHEAD_D+1):
            if (s+j) in e_set: hit=s+j; break
        if hit is not None:
            hits += 1; lead_days.append(int(hit-s))
    false_alarms = int(len(s_ix) - hits)

    # Coverage_by_event_onset: onset had a WARNING start in prior K days
    coverage_by_k={}
    s_set=set(s_ix)
    for K in range(1, LOOKAHEAD_D+1):
        covered=0
        for e in e_ix:
            left=max(0, e-K)
            if any((t in s_set) for t in range(left, e)): covered+=1
        coverage_by_k[K] = float(covered/len(e_ix)) if len(e_ix) else float('nan')

    # WARNING segment stats
    segs=[]; inw=False; st=None
    for i,a in enumerate(df['alert'].values):
        if a=="WARNING" and not inw: st=i; inw=True
        elif a!="WARNING" and inw: segs.append((st, i-1)); inw=False
    if inw: segs.append((st, len(df)-1))
    seg_lens=[e-s+1 for s,e in segs]

    years = max(1e-9,(df.index[-1]-df.index[0]).days/365.25) if len(df) else 1.0
    warn_days = int((df['alert']=="WARNING").sum())

    metrics = {
        "samples": int(len(df)),
        "years": float(years),
        "event_onsets_total": int(E_onset.sum()),
        "watch_threshold": thr_watch,
        "warning_threshold": thr_warn,
        "exit_threshold": thr_exit,
        "warning_days": warn_days,
        "warning_starts": int(len(s_ix)),
        "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits),
        "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(false_alarms/years),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
        "auc_pr_lead_curve": auc_lead,
        "coverage_onsets": coverage_by_k,
        "warning_segments": int(len(segs)),
        "warning_segment_len_median": (float(np.median(seg_lens)) if seg_lens else None),
        "event_components": {
            "spike_k": EVENT_KSIGMA, "vol_q": VOL_BREAK_Q, "vol_win": VOL_BREAK_WIN,
            "dd_win": DD_WIN, "dd_frac": DD_FRAC
        }
    }

# ----- persist -----
paths = {
    "features_csv": save_df(df, "gdren_v02f_features"),
    "alerts_csv": save_df(df[['NEXUS','alert','EVENT_ONSET']], "gdren_v02f_alerts"),
    "metrics_json": save_json(metrics, "gdren_v02f_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {"lat": LAT, "lon": LON, "start": START, "end": END,
                "lookahead_days": LOOKAHEAD_D},
        "thresholds": {"watch": thr_watch, "warning": thr_warn, "exit": thr_exit},
        "notes": "v0.2f — regime ONSETS only; VIX + credit; gated WARNING; horizon-aware metrics."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.2f SKILL (onset-aware) ===")
    keys = ["samples","years","event_onsets_total","watch_threshold","warning_threshold","exit_threshold",
            "warning_days","warning_starts","hits_within_days","hits","false_alarms","false_alarms_per_year",
            "lead_time_days_median","lead_time_days_mean"]
    for k in keys: print(f"{k:>26}: {metrics.get(k)}")
    print(" auc_pr_lead_curve (k→AUC):", metrics["auc_pr_lead_curve"])
    print(" coverage_onsets   (K→cov):", metrics["coverage_onsets"])
else:
    print("\n[info] Metrics skipped (no market series).")


[setup] installing scikit-learn …
Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-221211Z_v02f\\gdren_v02f_features.csv",
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-221211Z_v02f\\gdren_v02f_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-221211Z_v02f\\gdren_v02f_metrics.json",
  "meta_json": "E:\\CNT\\artifacts\\g_dren\\20251105-221211Z_v02f\\meta.json"
}

=== v0.2f SKILL (onset-aware) ===
                   samples: 11394
                     years: 35.841204654346335
        event_onsets_total: 159
           watch_threshold: 0.711124776785366
            exit_threshold: 0.631124776785366
          hits_within_days: 7
                      hits: 4
              false_alarms: 45
     false_alarms_per_year: 1.255538155985028
     lead_time_days_median: 3.5
       lead_time_days_mean: 3.25
 auc_pr_lead_curve (k→AUC): {1: 0.015039936715597802, 2: 0.015026154574159484, 3: 0.015012223731281422, 4: 0.014994303239131349, 5: 0.0149958133854

In [13]:
# === G-DREN v0.2g — onset clusters, ramp-aware NEXUS, multi-signal gates, disciplined warnings ===
import os, json, time, sys, subprocess, warnings
from pathlib import Path
import numpy as np, pandas as pd

warnings.filterwarnings("ignore", category=RuntimeWarning)

# ----- bootstrap -----
def need(pkg):
    try: __import__(pkg); return False
    except Exception: return True
def pip_install(pkgs):
    for p in pkgs:
        if need(p):
            print(f"[setup] installing {p} …")
            subprocess.run([sys.executable, "-m", "pip", "install", p], check=False)

pip_install(["yfinance","meteostat","scikit-learn","scipy"])

import yfinance as yf
from meteostat import Point, Daily
from scipy.signal import coherence
from sklearn.metrics import average_precision_score

# ====== CONFIG (tune as you like) ======
LAT, LON = 40.7128, -74.0060   # your city
START, END = "2005-01-01", None
LOOKAHEAD_D = 7                 # warning horizon
EVENT_KSIGMA = 3.0              # spike = |ret| > K * 21d σ
VOL_BREAK_Q = 0.995             # vol-breakout quantile (10d σ over ~3y)
VOL_BREAK_WIN = 252*3
DD_WIN = 40                     # drawdown window
DD_FRAC = 0.10                  # ≥10% drawdown
MIN_CLUSTER_LEN = 3             # sustained regime length (days) to count onset
COUPLING_WIN = 400
HYST_EXIT_MARGIN = 0.08         # exit when NEXUS < (watch - margin) for N days
HYST_EXIT_DAYS = 3
MAX_WARN_DAYS = 14              # cap WARNING segment length
COOLDOWN_BETWEEN_STARTS = 12    # min days between WARNING starts (reporting)
GATE_THETA_MIN = 0.25           # gate requires strength on Θ or Γ
GAMMA_VIX_MIN  = 0.08
GATE_MIN_VOTES = 2              # require >= this many gate signals

# ====== paths ======
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v02g"
RUN.mkdir(parents=True, exist_ok=True)
def save_df(df, name): fp = RUN / f"{name}.csv"; df.to_csv(fp, index=True); return str(fp)
def save_json(obj, name): fp = RUN / f"{name}.json"; fp.write_text(json.dumps(obj, indent=2)); return str(fp)

# ====== time helpers ======
def normalize_daily_index(idx):
    idx = pd.DatetimeIndex(idx)
    if idx.tz is not None: idx = idx.tz_convert("UTC").tz_localize(None)
    return idx.normalize()
def normalize_series_daily(s):
    if s is None or len(s)==0: return pd.Series(dtype=float)
    s = pd.Series(s.copy()); s.index = normalize_daily_index(s.index)
    s = s[~s.index.duplicated(keep="last")]
    return s.sort_index()

# ====== CNT utilities ======
def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]
        h,_ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p)) if len(p) else np.nan
    return out
def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = np.nanstd(w); out[i] = 0.0 if sw<1e-12 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out
def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values
def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values
def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a)!=len(b) or len(a)<128: return np.nan
    mask = np.isfinite(a)&np.isfinite(b); a,b = a[mask], b[mask]
    if len(a)<128: return np.nan
    a -= a.mean(); b -= b.mean()
    if a.std()<1e-12 or b.std()<1e-12: return np.nan
    f,Cxy = coherence(a,b,fs=fs, nperseg=min(256,len(a)))
    band = (f>0.02)&(f<0.25); v = Cxy[band]; v = v[np.isfinite(v)]
    return float(v.mean()) if v.size else np.nan
def rolling_percent_rank(series, win=512):
    s = pd.Series(series, dtype=float)
    def pr(w): v=w.iat[-1]; arr=np.sort(w); return np.searchsorted(arr, v, side="right")/len(w)
    return s.rolling(win, min_periods=win//2).apply(pr, raw=False).values

# ====== data loaders ======
def get_px(tkr, auto_adjust=True):
    try:
        h = yf.Ticker(tkr).history(period="max", auto_adjust=auto_adjust)
        if isinstance(h, pd.DataFrame) and 'Close' in h and len(h)>0:
            return normalize_series_daily(h['Close'].rename(tkr).dropna())
    except Exception as e: print(f"[warn] history {tkr}:", e)
    try:
        s = yf.download(tkr, period="max", interval="1d", auto_adjust=auto_adjust, progress=False)
        if isinstance(s, pd.DataFrame) and 'Close' in s and len(s)>0:
            return normalize_series_daily(s['Close'].rename(tkr).dropna())
    except Exception as e: print(f"[warn] download {tkr}:", e)
    return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon); d = Daily(loc, start_dt, end_dt).fetch()
        t = d['tavg'].astype(float) if 'tavg' in d and d['tavg'].notna().any() else ((d.get('tmin') + d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return normalize_series_daily(anom)
    except Exception as e: print("[warn] temp fetch:", e); return pd.Series(dtype=float)

px  = get_px("SPY",  auto_adjust=True)
vix = get_px("^VIX", auto_adjust=False)
hyg = get_px("HYG",  auto_adjust=True)
ief = get_px("IEF",  auto_adjust=True)
ta  = get_temp_anomaly(LAT, LON)

# ====== align & frame ======
idx = pd.DatetimeIndex([])
for ser in [px, vix, hyg, ief, ta]:
    if len(ser)>0: idx = ser.index if idx.empty else idx.union(ser.index)
df = pd.DataFrame(index=idx).sort_index()

if len(px)>0:  df['px']  = px.reindex(df.index).ffill(); df['ret'] = df['px'].pct_change()
if len(vix)>0: df['vix'] = vix.reindex(df.index).ffill(); df['dvix'] = df['vix'].pct_change()
if len(hyg)>0 and len(ief)>0:
    ratio = (hyg / ief).reindex(df.index).ffill()
    df['cred_ret'] = ratio.pct_change()
if len(ta)>0:  df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()

# ====== base features ======
if 'ret' in df:
    m  = df['ret'].fillna(0.0).values; mz = zscore_mad(m, win=512)
    df['S_d_mkt']   = rolling_entropy(m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(m, win=256)
    df['Theta_mkt'] = theta_breaches(mz, q=0.985, win=256)
else:
    df['S_d_mkt']=df['AR1_mkt']=df['Theta_mkt']=np.nan

if 'dvix' in df:
    v  = df['dvix'].fillna(0.0).values; vz = zscore_mad(v, win=512)
    df['S_d_vix']   = rolling_entropy(v, win=256, bins=48)
    df['AR1_vix']   = rolling_ar1(v, win=256)
    df['Theta_vix'] = theta_breaches(vz, q=0.985, win=256)
else:
    df['S_d_vix']=df['AR1_vix']=df['Theta_vix']=np.nan

if 'cred_ret' in df:
    c  = df['cred_ret'].fillna(0.0).values; cz = zscore_mad(c, win=512)
    df['S_d_cred']   = rolling_entropy(c, win=256, bins=48)
    df['AR1_cred']   = rolling_ar1(c, win=256)
    df['Theta_cred'] = theta_breaches(cz, q=0.985, win=256)
else:
    df['S_d_cred']=df['AR1_cred']=df['Theta_cred']=np.nan

if 'temp_anom' in df:
    t  = df['temp_anom'].fillna(0.0).values; tz = zscore_mad(t, win=512)
    df['S_d_tmp']   = rolling_entropy(t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(t, win=256)
    df['Theta_tmp'] = theta_breaches(tz, q=0.985, win=256)
else:
    df['S_d_tmp']=df['AR1_tmp']=df['Theta_tmp']=np.nan

# hetero-couplings
df['Gamma_mkt_vix']  = np.nan
df['Gamma_mkt_cred'] = np.nan
if 'ret' in df and 'dvix' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['dvix'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_vix')] = glyph_coupling(a,b,fs=1.0)
if 'ret' in df and 'cred_ret' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['cred_ret'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_cred')] = glyph_coupling(a,b,fs=1.0)

# ====== ramp-aware NEXUS fuse (adds 7d slopes of z-features) ======
def fuse_to_nexus(F: pd.DataFrame):
    F = F.copy().ffill().bfill().fillna(0.0)
    Z = pd.DataFrame(index=F.index)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        Z[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    # 7-day ramps on normalized features
    DZ = Z.diff(7)

    # base + ramp weights (favor approach dynamics)
    score = (
        0.16*Z.get('S_d_mkt',0)+ 0.14*Z.get('AR1_mkt',0)+ 0.08*Z.get('Theta_mkt',0) +
        0.16*Z.get('S_d_vix',0)+ 0.14*Z.get('AR1_vix',0)+ 0.08*Z.get('Theta_vix',0) +
        0.10*Z.get('S_d_cred',0)+ 0.08*Z.get('AR1_cred',0)+ 0.04*Z.get('Theta_cred',0) +
        0.05*Z.get('Gamma_mkt_vix',0)+ 0.04*Z.get('Gamma_mkt_cred',0) +
        # ramps
        0.06*DZ.get('AR1_mkt',0)+ 0.06*DZ.get('S_d_mkt',0)+ 0.04*DZ.get('Theta_mkt',0) +
        0.05*DZ.get('AR1_vix',0)+ 0.05*DZ.get('S_d_vix',0)+ 0.03*DZ.get('Theta_vix',0) +
        0.04*DZ.get('Gamma_mkt_vix',0)+ 0.03*DZ.get('Gamma_mkt_cred',0)
    )
    nx = pd.Series(rolling_percent_rank(score, win=512), index=F.index).ewm(span=64, min_periods=16).mean()
    return nx.clip(0,1)

feat_cols = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_vix','AR1_vix','Theta_vix',
             'S_d_cred','AR1_cred','Theta_cred','S_d_tmp','AR1_tmp','Theta_tmp',
             'Gamma_mkt_vix','Gamma_mkt_cred']
df['NEXUS'] = fuse_to_nexus(df[feat_cols])

# thresholds from core slice
core = df['NEXUS'].iloc[365:-365].dropna()
thr_watch = float(core.quantile(0.75)) if len(core) else 0.75
thr_warn  = float(core.quantile(0.94)) if len(core) else 0.94   # a bit stricter
thr_exit  = thr_watch - HYST_EXIT_MARGIN

# ====== gated WARNING state ======
state=[]; in_warn=False; below_cnt=0; warn_len=0
nx = df['NEXUS'].values
nx_slope7 = (df['NEXUS'] - df['NEXUS'].shift(7)).fillna(0.0).values
for i in range(len(df)):
    val = nx[i]
    s3  = (val - nx[i-3]) if i>=3 and np.isfinite(nx[i-3]) else 0.0
    s7  = (val - nx[i-7]) if i>=7 and np.isfinite(nx[i-7]) else 0.0
    # votes: Θ_vix / Θ_cred / Θ_mkt / Γ(mkt↔VIX) / rising NEXUS slope
    th_v = float(df['Theta_vix'].iloc[i])   if 'Theta_vix'   in df else np.nan
    th_c = float(df['Theta_cred'].iloc[i])  if 'Theta_cred'  in df else np.nan
    th_m = float(df['Theta_mkt'].iloc[i])   if 'Theta_mkt'   in df else np.nan
    gm_v = float(df['Gamma_mkt_vix'].iloc[i]) if 'Gamma_mkt_vix' in df else np.nan
    votes = 0
    votes += int(np.nan_to_num(th_v) >= GATE_THETA_MIN)
    votes += int(np.nan_to_num(th_c) >= GATE_THETA_MIN)
    votes += int(np.nan_to_num(th_m) >= GATE_THETA_MIN)
    votes += int(np.nan_to_num(gm_v) >= GAMMA_VIX_MIN)
    votes += int(nx_slope7[i] > 0)
    gate_ok = (votes >= GATE_MIN_VOTES)

    if in_warn:
        warn_len += 1
        if val < thr_exit: below_cnt += 1
        else: below_cnt = 0
        if below_cnt >= HYST_EXIT_DAYS or warn_len >= MAX_WARN_DAYS:
            in_warn=False; warn_len=0
            state.append('WATCH' if val>=thr_watch else 'OK')
        else:
            state.append('WARNING')
    else:
        if (val>=thr_warn) and (s3>0) and (s7>0) and gate_ok:
            in_warn=True; below_cnt=0; warn_len=1
            state.append('WARNING')
        elif val>=thr_watch:
            state.append('WATCH')
        else:
            state.append('OK')
df['alert']=state

# ====== regime events -> cluster onsets only ======
metrics={}
if 'ret' in df:
    vol21 = df['ret'].rolling(21).std()
    E_spike = (df['ret'].abs() > EVENT_KSIGMA*vol21).astype(int)

    vol10  = df['ret'].rolling(10).std()
    q_roll = vol10.rolling(VOL_BREAK_WIN, min_periods=252).quantile(VOL_BREAK_Q)
    E_vol  = (vol10 > q_roll).astype(int).fillna(0).astype(int)

    if 'px' in df:
        roll_max = df['px'].rolling(DD_WIN, min_periods=DD_WIN//2).max()
        dd = df['px']/roll_max - 1.0
        E_dd = (dd <= -DD_FRAC).astype(int).fillna(0).astype(int)
    else:
        E_dd = pd.Series(0, index=df.index)

    E = ((E_spike==1) | (E_vol==1) | (E_dd==1)).astype(int)

    # cluster sustained regimes (length >= MIN_CLUSTER_LEN) and mark onsets
    onset = np.zeros(len(df), dtype=int)
    i=0
    arr = E.values
    while i < len(arr):
        if arr[i]==1:
            j=i
            while j+1<len(arr) and arr[j+1]==1: j+=1
            seg_len = j - i + 1
            if seg_len >= MIN_CLUSTER_LEN: onset[i] = 1
            i = j + 1
        else:
            i += 1
    df['EVENT_ONSET'] = onset

    # ====== metrics: lead AUC (1..K), starts/hits/FA, coverage ======
    nx_s = df['NEXUS'].fillna(0.0)
    auc_lead={}
    y_on = df['EVENT_ONSET'].values
    for k in range(1, LOOKAHEAD_D+1):
        y = y_on[k:]
        x = nx_s.shift(k).iloc[k:].values
        auc_lead[k] = float(average_precision_score(y, x)) if y.sum()>0 else float('nan')

    # starts with cooldown
    starts = (df['alert'].eq("WARNING") & ~df['alert'].shift(1).eq("WARNING")).fillna(False)
    s_ix = np.where(starts.values)[0].tolist()
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_BETWEEN_STARTS: kept.append(s); last=s
    s_ix = kept

    # hits to next onset within horizon
    e_ix = np.where(y_on==1)[0]; e_set=set(e_ix)
    hits=0; lead_days=[]
    for s in s_ix:
        hit=None
        for j in range(1, LOOKAHEAD_D+1):
            if (s+j) in e_set: hit=s+j; break
        if hit is not None:
            hits += 1; lead_days.append(int(hit - s))
    false_alarms = int(len(s_ix) - hits)

    # coverage of onsets: had a WARNING start in prior K days
    coverage_by_k={}
    s_set=set(s_ix)
    for K in range(1, LOOKAHEAD_D+1):
        covered=0
        for e in e_ix:
            left=max(0, e-K)
            if any((t in s_set) for t in range(left, e)): covered+=1
        coverage_by_k[K] = float(covered/len(e_ix)) if len(e_ix) else float('nan')

    # segment stats
    segs=[]; inw=False; st=None
    for i,a in enumerate(df['alert'].values):
        if a=="WARNING" and not inw: st=i; inw=True
        elif a!="WARNING" and inw: segs.append((st, i-1)); inw=False
    if inw: segs.append((st, len(df)-1))
    seg_lens=[e-s+1 for s,e in segs]

    years = max(1e-9,(df.index[-1]-df.index[0]).days/365.25) if len(df) else 1.0
    warn_days = int((df['alert']=="WARNING").sum())

    metrics = {
        "samples": int(len(df)),
        "years": float(years),
        "event_onsets_total": int(y_on.sum()),
        "watch_threshold": thr_watch,
        "warning_threshold": thr_warn,
        "exit_threshold": thr_exit,
        "warning_days": warn_days,
        "warning_starts": int(len(s_ix)),
        "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits),
        "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(false_alarms/years),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
        "auc_pr_lead_curve": auc_lead,
        "coverage_onsets": coverage_by_k,
        "warning_segments": int(len(segs)),
        "warning_segment_len_median": (float(np.median(seg_lens)) if seg_lens else None),
        "params": {
            "lookahead": LOOKAHEAD_D, "ksigma": EVENT_KSIGMA, "vol_q": VOL_BREAK_Q,
            "vol_win": VOL_BREAK_WIN, "dd_win": DD_WIN, "dd_frac": DD_FRAC,
            "min_cluster_len": MIN_CLUSTER_LEN, "gate_theta": GATE_THETA_MIN,
            "gate_gamma_vix": GAMMA_VIX_MIN, "gate_min_votes": GATE_MIN_VOTES
        }
    }

# ====== persist ======
paths = {
    "features_csv": save_df(df, "gdren_v02g_features"),
    "alerts_csv": save_df(df[['NEXUS','alert','EVENT_ONSET']], "gdren_v02g_alerts"),
    "metrics_json": save_json(metrics, "gdren_v02g_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "thresholds": {"watch": thr_watch, "warning": thr_warn, "exit": thr_exit},
        "notes": "v0.2g — onset clusters ≥3d; ramp-aware fusion; multi-signal gate; disciplined WARNINGs."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.2g SKILL (onset clusters) ===")
    keys = ["samples","years","event_onsets_total","watch_threshold","warning_threshold","exit_threshold",
            "warning_days","warning_starts","hits_within_days","hits","false_alarms","false_alarms_per_year",
            "lead_time_days_median","lead_time_days_mean"]
    for k in keys: print(f"{k:>26}: {metrics.get(k)}")
    print(" auc_pr_lead_curve (k→AUC):", metrics["auc_pr_lead_curve"])
    print(" coverage_onsets   (K→cov):", metrics["coverage_onsets"])
else:
    print("\n[info] Metrics skipped (no market series).")


[setup] installing scikit-learn …
Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-221858Z_v02g\\gdren_v02g_features.csv",
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-221858Z_v02g\\gdren_v02g_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-221858Z_v02g\\gdren_v02g_metrics.json",
  "meta_json": "E:\\CNT\\artifacts\\g_dren\\20251105-221858Z_v02g\\meta.json"
}

=== v0.2g SKILL (onset clusters) ===
                   samples: 11394
                     years: 35.841204654346335
        event_onsets_total: 45
           watch_threshold: 0.7267524477222704
            exit_threshold: 0.6467524477222705
          hits_within_days: 7
                      hits: 1
              false_alarms: 36
     false_alarms_per_year: 1.0044305247880223
     lead_time_days_median: 4.0
       lead_time_days_mean: 4.0
 auc_pr_lead_curve (k→AUC): {1: 0.006777600843701023, 2: 0.0067560807716162356, 3: 0.006945569604921594, 4: 0.006966079429073362, 5: 0.00688556

In [14]:
# === G-DREN v0.3a — supervised calibration (onset-in-next-K), hard-gated warnings, FA/yr control ===
import os, json, time, sys, subprocess, warnings
from pathlib import Path
import numpy as np, pandas as pd

warnings.filterwarnings("ignore", category=RuntimeWarning)

# ---- bootstrap ----
def need(pkg):
    try: __import__(pkg); return False
    except Exception: return True
def pip_install(pkgs):
    for p in pkgs:
        if need(p):
            print(f"[setup] installing {p} …")
            subprocess.run([sys.executable, "-m", "pip", "install", p], check=False)

pip_install(["yfinance","meteostat","scikit-learn","scipy"])

import yfinance as yf
from meteostat import Point, Daily
from scipy.signal import coherence
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import average_precision_score

# ===== CONFIG =====
LAT, LON = 40.7128, -74.0060
START, END = "2005-01-01", None
LOOKAHEAD_D = 7                 # label = onset in next K days
EVENT_KSIGMA = 3.0              # spike
VOL_BREAK_Q = 0.995             # vol breakout quantile
VOL_BREAK_WIN = 252*3
DD_WIN = 40
DD_FRAC = 0.10
MIN_CLUSTER_LEN = 3
COUPLING_WIN = 400

# Warning state controls
GATE_THETA_MIN = 0.25           # channel intensity gate
GAMMA_VIX_MIN  = 0.08
GATE_MIN_VOTES = 3              # require this many votes to start
COOLDOWN_BETWEEN_STARTS = 18
MAX_WARN_DAYS = 12
HYST_EXIT_DAYS = 3
FA_PER_YEAR_TARGET = 0.50       # calibrate threshold on TRAIN to not exceed this

# Train/Test split (calibrate on train, evaluate on all)
TRAIN_END = "2015-12-31"

# ===== PATHS =====
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v03a"
RUN.mkdir(parents=True, exist_ok=True)
def save_df(df, name): fp = RUN / f"{name}.csv"; df.to_csv(fp, index=True); return str(fp)
def save_json(obj, name): fp = RUN / f"{name}.json"; fp.write_text(json.dumps(obj, indent=2)); return str(fp)

# ===== helpers =====
def normalize_daily_index(idx):
    idx = pd.DatetimeIndex(idx)
    if idx.tz is not None: idx = idx.tz_convert("UTC").tz_localize(None)
    return idx.normalize()
def normalize_series_daily(s):
    if s is None or len(s)==0: return pd.Series(dtype=float)
    s = pd.Series(s.copy()); s.index = normalize_daily_index(s.index)
    s = s[~s.index.duplicated(keep="last")]
    return s.sort_index()

def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]; h,_ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p)) if len(p) else np.nan
    return out
def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = np.nanstd(w); out[i] = 0.0 if sw<1e-12 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out
def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values
def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values
def glyph_coupling(a, b, fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a)!=len(b) or len(a)<128: return np.nan
    mask = np.isfinite(a)&np.isfinite(b); a,b = a[mask], b[mask]
    if len(a)<128: return np.nan
    a -= a.mean(); b -= b.mean()
    if a.std()<1e-12 or b.std()<1e-12: return np.nan
    f,Cxy = coherence(a,b,fs=fs, nperseg=min(256,len(a)))
    band = (f>0.02)&(f<0.25); v = Cxy[band]; v = v[np.isfinite(v)]
    return float(v.mean()) if v.size else np.nan

# ===== data =====
def get_px(tkr, auto_adjust=True):
    try:
        h = yf.Ticker(tkr).history(period="max", auto_adjust=auto_adjust)
        if isinstance(h, pd.DataFrame) and 'Close' in h and len(h)>0:
            return normalize_series_daily(h['Close'].rename(tkr).dropna())
    except Exception as e: print(f"[warn] history {tkr}:", e)
    try:
        s = yf.download(tkr, period="max", interval="1d", auto_adjust=auto_adjust, progress=False)
        if isinstance(s, pd.DataFrame) and 'Close' in s and len(s)>0:
            return normalize_series_daily(s['Close'].rename(tkr).dropna())
    except Exception as e: print(f"[warn] download {tkr}:", e)
    return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon); d = Daily(loc, start_dt, end_dt).fetch()
        t = d['tavg'].astype(float) if 'tavg' in d and d['tavg'].notna().any() else ((d.get('tmin') + d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return normalize_series_daily(anom)
    except Exception as e: print("[warn] temp fetch:", e); return pd.Series(dtype=float)

px  = get_px("SPY",  auto_adjust=True)
vix = get_px("^VIX", auto_adjust=False)
hyg = get_px("HYG",  auto_adjust=True)
ief = get_px("IEF",  auto_adjust=True)
ta  = get_temp_anomaly(LAT, LON)

idx = pd.DatetimeIndex([])
for ser in [px, vix, hyg, ief, ta]:
    if len(ser)>0: idx = ser.index if idx.empty else idx.union(ser.index)
df = pd.DataFrame(index=idx).sort_index()

if len(px)>0:  df['px']  = px.reindex(df.index).ffill(); df['ret'] = df['px'].pct_change()
if len(vix)>0: df['vix'] = vix.reindex(df.index).ffill(); df['dvix'] = df['vix'].pct_change()
if len(hyg)>0 and len(ief)>0:
    ratio = (hyg / ief).reindex(df.index).ffill()
    df['cred_ret'] = ratio.pct_change()
if len(ta)>0:  df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()

# ===== features =====
# market
if 'ret' in df:
    m = df['ret'].fillna(0.0).values; mz = zscore_mad(m, win=512)
    df['S_d_mkt']   = rolling_entropy(m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(m, win=256)
    df['Theta_mkt'] = theta_breaches(mz, q=0.985, win=256)
else:
    df['S_d_mkt']=df['AR1_mkt']=df['Theta_mkt']=np.nan
# vix
if 'dvix' in df:
    v = df['dvix'].fillna(0.0).values; vz = zscore_mad(v, win=512)
    df['S_d_vix']   = rolling_entropy(v, win=256, bins=48)
    df['AR1_vix']   = rolling_ar1(v, win=256)
    df['Theta_vix'] = theta_breaches(vz, q=0.985, win=256)
else:
    df['S_d_vix']=df['AR1_vix']=df['Theta_vix']=np.nan
# credit
if 'cred_ret' in df:
    c = df['cred_ret'].fillna(0.0).values; cz = zscore_mad(c, win=512)
    df['S_d_cred']   = rolling_entropy(c, win=256, bins=48)
    df['AR1_cred']   = rolling_ar1(c, win=256)
    df['Theta_cred'] = theta_breaches(cz, q=0.985, win=256)
else:
    df['S_d_cred']=df['AR1_cred']=df['Theta_cred']=np.nan
# temp (weak weight, but keep as context)
if 'temp_anom' in df:
    t = df['temp_anom'].fillna(0.0).values; tz = zscore_mad(t, win=512)
    df['S_d_tmp']   = rolling_entropy(t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(t, win=256)
    df['Theta_tmp'] = theta_breaches(tz, q=0.985, win=256)
else:
    df['S_d_tmp']=df['AR1_tmp']=df['Theta_tmp']=np.nan

# hetero-couplings
df['Gamma_mkt_vix']=np.nan; df['Gamma_mkt_cred']=np.nan
if 'ret' in df and 'dvix' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['dvix'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_vix')] = glyph_coupling(a,b,fs=1.0)
if 'ret' in df and 'cred_ret' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['cred_ret'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_cred')] = glyph_coupling(a,b,fs=1.0)

# ===== regime events -> sustained clusters -> ONSETS =====
metrics={}
if 'ret' in df:
    vol21 = df['ret'].rolling(21).std()
    E_spike = (df['ret'].abs() > EVENT_KSIGMA*vol21).astype(int)

    vol10  = df['ret'].rolling(10).std()
    q_roll = vol10.rolling(VOL_BREAK_WIN, min_periods=252).quantile(VOL_BREAK_Q)
    E_vol  = (vol10 > q_roll).astype(int).fillna(0).astype(int)

    if 'px' in df:
        roll_max = df['px'].rolling(DD_WIN, min_periods=DD_WIN//2).max()
        dd = df['px']/roll_max - 1.0
        E_dd = (dd <= -DD_FRAC).astype(int).fillna(0).astype(int)
    else:
        E_dd = pd.Series(0, index=df.index)

    E = ((E_spike==1) | (E_vol==1) | (E_dd==1)).astype(int).reindex(df.index).fillna(0).astype(int)

    # cluster sustained regimes, mark onsets
    onset = np.zeros(len(df), dtype=int)
    arr = E.values; i=0
    while i < len(arr):
        if arr[i]==1:
            j=i
            while j+1<len(arr) and arr[j+1]==1: j+=1
            if (j-i+1) >= MIN_CLUSTER_LEN: onset[i]=1
            i = j+1
        else:
            i+=1
    df['EVENT_ONSET'] = onset

    # ===== supervised calibration: label = onset within next K days =====
    y_future = pd.Series(df['EVENT_ONSET']).rolling(LOOKAHEAD_D, min_periods=1).max().shift(-(LOOKAHEAD_D-1))
    y_future = y_future.reindex(df.index).fillna(0).astype(int)

    # Feature matrix (robust z + 7d ramps)
    feat_cols = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_vix','AR1_vix','Theta_vix',
                 'S_d_cred','AR1_cred','Theta_cred','S_d_tmp','AR1_tmp','Theta_tmp',
                 'Gamma_mkt_vix','Gamma_mkt_cred']
    F = df[feat_cols].copy().ffill().bfill().fillna(0.0)
    # robust z
    Z = pd.DataFrame(index=F.index)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        Z[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    DZ7 = Z.diff(7).add_prefix("d7_")
    X = pd.concat([Z, DZ7[['d7_AR1_mkt','d7_S_d_mkt','d7_AR1_vix','d7_S_d_vix','d7_Gamma_mkt_vix','d7_Gamma_mkt_cred']].fillna(0.0)], axis=1).fillna(0.0)

    # Train / All indices
    idx_train = X.index[X.index <= pd.Timestamp(TRAIN_END)]
    X_train, y_train = X.loc[idx_train], y_future.loc[idx_train]
    if y_train.sum() == 0:
        # fallback: relax to any event (not just onset) if train lacks positives
        y_train = pd.Series(E, index=df.index).loc[idx_train]

    # Fit logistic (balanced)
    clf = make_pipeline(StandardScaler(with_mean=False),  # sparse-friendly
                        LogisticRegression(max_iter=2000, class_weight="balanced", solver="lbfgs"))
    clf.fit(X_train.values, y_train.values)

    # Risk probability
    risk_all = pd.Series(clf.predict_proba(X.values)[:,1], index=X.index, name="RISK")
    df['RISK'] = risk_all

    # ===== threshold calibration on TRAIN to meet FA/yr target =====
    def make_alert_from_risk(risk, thetas, gammas, fa_target=None, thr=None):
        # Build gated WARNING state from risk
        Theta_m, Theta_v, Theta_c = thetas
        Gamma_v = gammas[0]
        risk = pd.Series(risk).fillna(0.0)
        votes_series = (
            (Theta_v >= GATE_THETA_MIN).astype(int) +
            (Theta_c >= GATE_THETA_MIN).astype(int) +
            (Theta_m >= GATE_THETA_MIN).astype(int) +
            (Gamma_v >= GAMMA_VIX_MIN).astype(int) +
            ((risk - risk.shift(7)) > 0).astype(int)
        ).fillna(0).astype(int)

        if thr is None:
            qgrid = np.linspace(0.990, 0.9995, 20)  # strict thresholds
        else:
            qgrid = [thr]

        best = None
        for q in qgrid:
            t = risk.quantile(q)
            in_warn=False; below=0; warn_len=0
            state=[]
            for i,(r,v) in enumerate(zip(risk.values, votes_series.values)):
                if in_warn:
                    warn_len += 1
                    if r < 0.9*t: below += 1
                    else: below = 0
                    if below >= HYST_EXIT_DAYS or warn_len >= MAX_WARN_DAYS:
                        in_warn=False; warn_len=0
                        state.append('OK')
                    else:
                        state.append('WARNING')
                else:
                    # persistence: at least 2 of last 3 days above t
                    persist = (pd.Series(risk.values).rolling(3).apply(lambda w: (w>=t).sum(), raw=True).fillna(0).iloc[i] >= 2)
                    if (r>=t) and persist and (v >= GATE_MIN_VOTES):
                        in_warn=True; below=0; warn_len=1
                        state.append('WARNING')
                    else:
                        state.append('OK')
            alert = pd.Series(state, index=risk.index)

            # starts / hits / FA against ONSET label y_future (on TRAIN or eval slice where provided)
            starts = (alert.eq("WARNING") & ~alert.shift(1).eq("WARNING")).fillna(False)
            s_ix = list(np.where(starts.values)[0])
            # cooldown
            kept=[]; last=-10**9
            for s in s_ix:
                if s-last >= COOLDOWN_BETWEEN_STARTS: kept.append(s); last=s
            s_ix = kept

            e_ix = list(np.where(df.loc[risk.index, 'EVENT_ONSET'].values==1)[0]); e_set=set(e_ix)
            hits=0
            for s in s_ix:
                for j in range(1, LOOKAHEAD_D+1):
                    if (s+j) in e_set: hits += 1; break
            false_alarms = int(len(s_ix) - hits)
            years = max(1e-9, (risk.index[-1]-risk.index[0]).days/365.25)
            fa_per_year = false_alarms / years

            cand = {"thr": float(t), "q": float(q), "starts": len(s_ix), "hits": hits,
                    "fa_per_year": fa_per_year, "alert": alert}
            if fa_target is None:
                best = cand; break
            else:
                # pick highest hits under FA target; tie-break by lowest fa_per_year
                if (best is None) or \
                   ((cand["fa_per_year"] <= fa_target) and
                    (cand["hits"] > best.get("hits_ok", -1))) or \
                   ((cand["fa_per_year"] <= fa_target) and
                    (cand["hits"] == best.get("hits_ok", -1)) and (cand["fa_per_year"] < best["fa_per_year"])):
                    best = cand.copy()
                    best["hits_ok"] = hits
        return best

    thetas_train = (df['Theta_mkt'].loc[idx_train].fillna(0.0),
                    df['Theta_vix'].loc[idx_train].fillna(0.0),
                    df['Theta_cred'].loc[idx_train].fillna(0.0))
    gammas_train = (df['Gamma_mkt_vix'].loc[idx_train].fillna(0.0),)

    calib = make_alert_from_risk(risk_all.loc[idx_train],
                                 thetas_train, gammas_train,
                                 fa_target=FA_PER_YEAR_TARGET)

    thr_prob = calib["thr"]
    thr_q    = calib["q"]

    # ===== apply calibrated threshold to FULL series =====
    thetas_all = (df['Theta_mkt'].fillna(0.0), df['Theta_vix'].fillna(0.0), df['Theta_cred'].fillna(0.0))
    gammas_all = (df['Gamma_mkt_vix'].fillna(0.0),)
    final = make_alert_from_risk(risk_all, thetas_all, gammas_all, thr=thr_q)
    df['alert'] = final["alert"]

    # Metrics (full)
    starts = (df['alert'].eq("WARNING") & ~df['alert'].shift(1).eq("WARNING")).fillna(False)
    s_ix = list(np.where(starts.values)[0])
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_BETWEEN_STARTS: kept.append(s); last=s
    s_ix = kept

    y_on = df['EVENT_ONSET'].values
    e_ix = list(np.where(y_on==1)[0]); e_set=set(e_ix)
    hits=0; lead_days=[]
    for s in s_ix:
        hit=None
        for j in range(1, LOOKAHEAD_D+1):
            if (s+j) in e_set: hit=s+j; break
        if hit is not None:
            hits += 1; lead_days.append(int(hit - s))
    false_alarms = int(len(s_ix) - hits)
    years = max(1e-9, (df.index[-1]-df.index[0]).days/365.25)
    fa_per_year = false_alarms/years

    # Lead PR-AUC (ranking by risk K days ahead)
    auc_lead={}
    for k in range(1, LOOKAHEAD_D+1):
        yk = y_on[k:]
        xk = risk_all.shift(k).iloc[k:].values
        auc_lead[k] = float(average_precision_score(yk, xk)) if yk.sum()>0 else float('nan')

    # Coverage of onsets by starts within prior K
    coverage_by_k={}
    s_set=set(s_ix)
    for K in range(1, LOOKAHEAD_D+1):
        covered=0
        for e in e_ix:
            left=max(0, e-K)
            if any((t in s_set) for t in range(left, e)): covered+=1
        coverage_by_k[K] = float(covered/len(e_ix)) if len(e_ix) else float('nan')

    warn_days = int((df['alert']=="WARNING").sum())
    metrics = {
        "samples": int(len(df)),
        "years": float(years),
        "event_onsets_total": int(df['EVENT_ONSET'].sum()),
        "calibrated_thr_quantile_train": float(thr_q),
        "calibrated_thr_prob": float(thr_prob),
        "fa_per_year_target_train": FA_PER_YEAR_TARGET,
        "warning_starts": int(len(s_ix)),
        "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits),
        "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(fa_per_year),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
        "auc_pr_lead_curve": auc_lead,
        "coverage_onsets": coverage_by_k,
        "warning_days": warn_days,
        "gate": {"theta_min": GATE_THETA_MIN, "gamma_vix_min": GAMMA_VIX_MIN, "min_votes": GATE_MIN_VOTES},
        "cooldown_days": COOLDOWN_BETWEEN_STARTS,
        "max_warn_days": MAX_WARN_DAYS
    }

# ===== persist =====
paths = {
    "features_csv": save_df(df[['RISK','alert','EVENT_ONSET'] + [c for c in df.columns if c.startswith('S_d_') or c.startswith('AR1_') or c.startswith('Theta_') or c.startswith('Gamma_')]], "gdren_v03a_features"),
    "metrics_json": save_json(metrics, "gdren_v03a_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {"lat": LAT, "lon": LON, "start": START, "end": END, "lookahead": LOOKAHEAD_D},
        "fa_per_year_target_train": FA_PER_YEAR_TARGET,
        "notes": "v0.3a — logistic risk for onset-in-next-K, threshold calibrated on TRAIN to cap FA/yr; gated WARNING."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.3a SKILL (calibrated) ===")
    for k in ["samples","years","event_onsets_total","calibrated_thr_quantile_train",
              "warning_starts","hits","false_alarms","false_alarms_per_year",
              "lead_time_days_median","lead_time_days_mean"]:
        print(f"{k:>28}: {metrics.get(k)}")
    print(" auc_pr_lead_curve (k→AUC):", metrics["auc_pr_lead_curve"])
    print(" coverage_onsets   (K→cov):", metrics["coverage_onsets"])
else:
    print("\n[info] Metrics skipped (no market series).")


[setup] installing scikit-learn …


KeyboardInterrupt: 

In [15]:
# === G-DREN v0.3a-lite — no sklearn; NumPy IRLS logistic + pure-NumPy AP; gated WARNINGs ===
import os, json, time, warnings
from pathlib import Path
import numpy as np, pandas as pd

warnings.filterwarnings("ignore", category=RuntimeWarning)

# -------- config --------
LAT, LON = 40.7128, -74.0060
START, END = "2005-01-01", None
LOOKAHEAD_D = 7                 # label = onset in next K days
EVENT_KSIGMA = 3.0              # spike threshold (try 2.5–3.0)
VOL_BREAK_Q = 0.995             # vol breakout quantile
VOL_BREAK_WIN = 252*3
DD_WIN = 40
DD_FRAC = 0.10
MIN_CLUSTER_LEN = 3
COUPLING_WIN = 400

# Warning/gating & calibration
GATE_THETA_MIN = 0.25
GAMMA_VIX_MIN  = 0.08
GATE_MIN_VOTES = 3
COOLDOWN_BETWEEN_STARTS = 18
MAX_WARN_DAYS = 12
HYST_EXIT_DAYS = 3
FA_PER_YEAR_TARGET = 0.50
TRAIN_END = "2015-12-31"

# -------- paths --------
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v03a_lite"
RUN.mkdir(parents=True, exist_ok=True)
def save_df(df, name): fp = RUN / f"{name}.csv"; df.to_csv(fp, index=True); return str(fp)
def save_json(obj, name): fp = RUN / f"{name}.json"; fp.write_text(json.dumps(obj, indent=2)); return str(fp)

# -------- small utils (tz, robust z, coherence fallback, AP, IRLS) --------
def normalize_daily_index(idx):
    idx = pd.DatetimeIndex(idx)
    if idx.tz is not None: idx = idx.tz_convert("UTC").tz_localize(None)
    return idx.normalize()

def normalize_series_daily(s):
    if s is None or len(s)==0: return pd.Series(dtype=float)
    s = pd.Series(s.copy()); s.index = normalize_daily_index(s.index)
    s = s[~s.index.duplicated(keep="last")]
    return s.sort_index()

def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]; h,_ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p)) if len(p) else np.nan
    return out

def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = np.nanstd(w); out[i] = 0.0 if sw<1e-12 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out

def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values

def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values

# coherence fallback if SciPy absent
try:
    from scipy.signal import coherence as _coh
    def coherence_band(a,b,fs=1.0):
        f,Cxy = _coh(a,b,fs=fs, nperseg=min(256,len(a)))
        band = (f>0.02)&(f<0.25)
        v = Cxy[band]
        v = v[np.isfinite(v)]
        return float(v.mean()) if v.size else np.nan
except Exception:
    def coherence_band(a,b,fs=1.0):
        return np.nan

def glyph_coupling(a,b,fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a)!=len(b) or len(a)<128: return np.nan
    m = np.isfinite(a)&np.isfinite(b); a,b = a[m], b[m]
    if len(a)<128: return np.nan
    a -= a.mean(); b -= b.mean()
    if a.std()<1e-12 or b.std()<1e-12: return np.nan
    return coherence_band(a,b,fs)

def average_precision_np(y_true, y_score):
    y = np.asarray(y_true).astype(int)
    s = np.asarray(y_score, float)
    m = np.isfinite(s)
    y, s = y[m], s[m]
    P = y.sum()
    if P == 0: return float('nan')
    order = np.argsort(-s)
    y_sorted = y[order]
    tp = np.cumsum(y_sorted)
    prec = tp / (np.arange(len(y_sorted))+1)
    ap = prec[y_sorted==1].sum() / P
    return float(ap)

def sigmoid(z):
    z = np.clip(z, -30, 30)
    return 1.0/(1.0+np.exp(-z))

def logistic_ridge_fit(X, y, l2=1e-2, max_iter=200, tol=1e-6):
    # X: (n,p) already scaled; add intercept inside
    n,p = X.shape
    X_ = np.hstack([np.ones((n,1)), X])
    beta = np.zeros(p+1)
    eps = 1e-9
    for _ in range(max_iter):
        z = X_ @ beta
        mu = sigmoid(z)
        w = mu*(1-mu) + eps
        # IRLS normal equations: (X' W X + λI) β = X' W (z + (y-μ)/w)
        z_tilde = z + (y - mu)/w
        # compute A = X' W X and b = X' W z_tilde
        WX = X_ * w[:,None]
        A = X_.T @ WX
        # ridge (do not penalize intercept)
        A[1:,1:] += l2*np.eye(p)
        b = X_.T @ (w * z_tilde)
        beta_new = np.linalg.solve(A, b)
        if np.max(np.abs(beta_new - beta)) < tol:
            beta = beta_new; break
        beta = beta_new
    return beta

def logistic_predict_proba(X, beta):
    X_ = np.hstack([np.ones((X.shape[0],1)), X])
    return sigmoid(X_ @ beta)

# -------- data loaders (no installs) --------
def get_px(tkr, auto_adjust=True):
    try:
        import yfinance as yf
        h = yf.Ticker(tkr).history(period="max", auto_adjust=auto_adjust)
        if isinstance(h, pd.DataFrame) and 'Close' in h and len(h)>0:
            return normalize_series_daily(h['Close'].rename(tkr).dropna())
    except Exception as e: print(f"[warn] history {tkr}:", e)
    try:
        import yfinance as yf
        s = yf.download(tkr, period="max", interval="1d", auto_adjust=auto_adjust, progress=False)
        if isinstance(s, pd.DataFrame) and 'Close' in s and len(s)>0:
            return normalize_series_daily(s['Close'].rename(tkr).dropna())
    except Exception as e: print(f"[warn] download {tkr}:", e)
    # last resort: SPY from Stooq
    if tkr.upper()=="SPY":
        try:
            url = "https://stooq.com/q/d/l/?s=spy&i=d"
            stq = pd.read_csv(url); stq.columns=[c.lower() for c in stq.columns]
            stq['date']=pd.to_datetime(stq['date'])
            stq = stq.set_index('date').sort_index()
            return normalize_series_daily(stq['close'].rename('SPY').astype(float))
        except Exception as e: print("[error] stooq SPY:", e)
    return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        from meteostat import Point, Daily
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon); d = Daily(loc, start_dt, end_dt).fetch()
        t = d['tavg'].astype(float) if 'tavg' in d and d['tavg'].notna().any() else ((d.get('tmin') + d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return normalize_series_daily(anom)
    except Exception as e: print("[warn] temp fetch:", e); return pd.Series(dtype=float)

# -------- ingest --------
px  = get_px("SPY",  auto_adjust=True)
vix = get_px("^VIX", auto_adjust=False)
hyg = get_px("HYG",  auto_adjust=True)
ief = get_px("IEF",  auto_adjust=True)
ta  = get_temp_anomaly(LAT, LON)

idx = pd.DatetimeIndex([])
for ser in [px, vix, hyg, ief, ta]:
    if len(ser)>0: idx = ser.index if idx.empty else idx.union(ser.index)
df = pd.DataFrame(index=idx).sort_index()
if len(px)>0:  df['px']  = px.reindex(df.index).ffill(); df['ret'] = df['px'].pct_change()
if len(vix)>0: df['vix'] = vix.reindex(df.index).ffill(); df['dvix'] = df['vix'].pct_change()
if len(hyg)>0 and len(ief)>0:
    ratio = (hyg / ief).reindex(df.index).ffill()
    df['cred_ret'] = ratio.pct_change()
if len(ta)>0:  df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()

# -------- features --------
# market
if 'ret' in df:
    m = df['ret'].fillna(0.0).values; mz = zscore_mad(m, win=512)
    df['S_d_mkt']   = rolling_entropy(m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(m, win=256)
    df['Theta_mkt'] = theta_breaches(mz, q=0.985, win=256)
else:
    df['S_d_mkt']=df['AR1_mkt']=df['Theta_mkt']=np.nan
# vix
if 'dvix' in df:
    v = df['dvix'].fillna(0.0).values; vz = zscore_mad(v, win=512)
    df['S_d_vix']   = rolling_entropy(v, win=256, bins=48)
    df['AR1_vix']   = rolling_ar1(v, win=256)
    df['Theta_vix'] = theta_breaches(vz, q=0.985, win=256)
else:
    df['S_d_vix']=df['AR1_vix']=df['Theta_vix']=np.nan
# credit
if 'cred_ret' in df:
    c = df['cred_ret'].fillna(0.0).values; cz = zscore_mad(c, win=512)
    df['S_d_cred']   = rolling_entropy(c, win=256, bins=48)
    df['AR1_cred']   = rolling_ar1(c, win=256)
    df['Theta_cred'] = theta_breaches(cz, q=0.985, win=256)
else:
    df['S_d_cred']=df['AR1_cred']=df['Theta_cred']=np.nan
# temp
if 'temp_anom' in df:
    t = df['temp_anom'].fillna(0.0).values; tz = zscore_mad(t, win=512)
    df['S_d_tmp']   = rolling_entropy(t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(t, win=256)
    df['Theta_tmp'] = theta_breaches(tz, q=0.985, win=256)
else:
    df['S_d_tmp']=df['AR1_tmp']=df['Theta_tmp']=np.nan

# couplings
df['Gamma_mkt_vix']=np.nan; df['Gamma_mkt_cred']=np.nan
if 'ret' in df and 'dvix' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['dvix'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_vix')] = glyph_coupling(a,b,fs=1.0)
if 'ret' in df and 'cred_ret' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['cred_ret'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_cred')] = glyph_coupling(a,b,fs=1.0)

# -------- regime events → sustained clusters → ONSETS --------
metrics = {}
if 'ret' in df:
    vol21 = df['ret'].rolling(21).std()
    E_spike = (df['ret'].abs() > EVENT_KSIGMA*vol21).astype(int)

    vol10  = df['ret'].rolling(10).std()
    q_roll = vol10.rolling(VOL_BREAK_WIN, min_periods=252).quantile(VOL_BREAK_Q)
    E_vol  = (vol10 > q_roll).astype(int).fillna(0).astype(int)

    if 'px' in df:
        roll_max = df['px'].rolling(DD_WIN, min_periods=DD_WIN//2).max()
        dd = df['px']/roll_max - 1.0
        E_dd = (dd <= -DD_FRAC).astype(int).fillna(0).astype(int)
    else:
        E_dd = pd.Series(0, index=df.index)

    E = ((E_spike==1) | (E_vol==1) | (E_dd==1)).astype(int).reindex(df.index).fillna(0).astype(int)

    # sustained cluster onsets
    onset = np.zeros(len(df), dtype=int)
    i=0; arr=E.values
    while i<len(arr):
        if arr[i]==1:
            j=i
            while j+1<len(arr) and arr[j+1]==1: j+=1
            if (j-i+1) >= MIN_CLUSTER_LEN: onset[i]=1
            i = j+1
        else:
            i+=1
    df['EVENT_ONSET'] = onset

    # ----- supervised label = onset in next K days -----
    y_future = pd.Series(df['EVENT_ONSET']).rolling(LOOKAHEAD_D, min_periods=1).max().shift(-(LOOKAHEAD_D-1))
    y_future = y_future.reindex(df.index).fillna(0).astype(int)

    # feature matrix (robust Z + selected ramps)
    feat_cols = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_vix','AR1_vix','Theta_vix',
                 'S_d_cred','AR1_cred','Theta_cred','S_d_tmp','AR1_tmp','Theta_tmp',
                 'Gamma_mkt_vix','Gamma_mkt_cred']
    F = df[feat_cols].copy().ffill().bfill().fillna(0.0)
    Z = pd.DataFrame(index=F.index)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        Z[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    DZ7 = Z.diff(7).add_prefix("d7_")
    X = pd.concat([Z, DZ7[['d7_AR1_mkt','d7_S_d_mkt','d7_AR1_vix','d7_S_d_vix','d7_Gamma_mkt_vix','d7_Gamma_mkt_cred']].fillna(0.0)], axis=1).fillna(0.0)

    # train split
    idx_train = X.index[X.index <= pd.Timestamp(TRAIN_END)]
    X_train, y_train = X.loc[idx_train].values, y_future.loc[idx_train].values.astype(int)
    if y_train.sum()==0:
        y_train = E.loc[idx_train].values.astype(int)

    # IRLS logistic on robust Z (already roughly standardized)
    beta = logistic_ridge_fit(X_train, y_train, l2=1e-2, max_iter=300, tol=1e-6)
    risk_all = pd.Series(logistic_predict_proba(X.values, beta), index=X.index, name="RISK")
    df['RISK'] = risk_all

    # ----- threshold calibration on TRAIN to meet FA budget -----
    Theta_m_train = df['Theta_mkt'].loc[idx_train].fillna(0.0).values
    Theta_v_train = df['Theta_vix'].loc[idx_train].fillna(0.0).values
    Theta_c_train = df['Theta_cred'].loc[idx_train].fillna(0.0).values
    Gamma_v_train = df['Gamma_mkt_vix'].loc[idx_train].fillna(0.0).values
    onset_train   = df['EVENT_ONSET'].loc[idx_train].values.astype(int)
    rtrain        = risk_all.loc[idx_train].values

    qgrid = np.linspace(0.990, 0.9995, 20)
    best = None
    for q in qgrid:
        t = np.quantile(rtrain[np.isfinite(rtrain)], q)
        # votes & persistence
        votes = ((Theta_v_train>=GATE_THETA_MIN).astype(int) +
                 (Theta_c_train>=GATE_THETA_MIN).astype(int) +
                 (Theta_m_train>=GATE_THETA_MIN).astype(int) +
                 (Gamma_v_train>=GAMMA_VIX_MIN).astype(int) +
                 ((pd.Series(rtrain).diff(7).fillna(0.0).values)>0).astype(int))
        persist = (pd.Series(rtrain).rolling(3).apply(lambda w: (w>=t).sum(), raw=True).fillna(0).values >= 2)

        # build state
        state=[]; in_warn=False; below=0; wlen=0
        for i,(r,v,pers) in enumerate(zip(rtrain, votes, persist)):
            if in_warn:
                wlen += 1
                below = below+1 if r < 0.9*t else 0
                if below>=HYST_EXIT_DAYS or wlen>=MAX_WARN_DAYS: in_warn=False; wlen=0; state.append('OK')
                else: state.append('WARNING')
            else:
                if (r>=t) and pers and (v>=GATE_MIN_VOTES): in_warn=True; below=0; wlen=1; state.append('WARNING')
                else: state.append('OK')
        state = np.array(state, dtype=object)

        # starts → hits/FA vs ONSET on TRAIN
        starts = (state=='WARNING') & np.roll(state!='WARNING', 1)
        starts[0] = (state[0]=='WARNING')
        s_ix = np.where(starts)[0].tolist()
        kept=[]; last=-10**9
        for s in s_ix:
            if s-last >= COOLDOWN_BETWEEN_STARTS: kept.append(s); last=s
        s_ix = kept

        e_ix = np.where(onset_train==1)[0]; e_set=set(e_ix)
        hits=0
        for s in s_ix:
            if any(((s+j) in e_set) for j in range(1, LOOKAHEAD_D+1)): hits+=1
        fa = int(len(s_ix)-hits)
        years = max(1e-9, (idx_train[-1]-idx_train[0]).days/365.25)
        fa_per_year = fa/years

        cand = {"q": float(q), "thr": float(t), "hits": hits, "fa_py": fa_per_year}
        if (best is None) or ((fa_per_year <= FA_PER_YEAR_TARGET) and (hits > best["hits"])) or \
           ((fa_per_year <= FA_PER_YEAR_TARGET) and (hits == best["hits"]) and (fa_per_year < best["fa_py"])):
            best = cand

    thr_q, thr_prob = best["q"], best["thr"]

    # ----- apply to FULL series -----
    Theta_m = df['Theta_mkt'].fillna(0.0).values
    Theta_v = df['Theta_vix'].fillna(0.0).values
    Theta_c = df['Theta_cred'].fillna(0.0).values
    Gamma_v = df['Gamma_mkt_vix'].fillna(0.0).values
    risk    = df['RISK'].fillna(0.0).values
    t = np.quantile(risk[np.isfinite(risk)], thr_q)
    votes = ((Theta_v>=GATE_THETA_MIN).astype(int) +
             (Theta_c>=GATE_THETA_MIN).astype(int) +
             (Theta_m>=GATE_THETA_MIN).astype(int) +
             (Gamma_v>=GAMMA_VIX_MIN).astype(int) +
             ((pd.Series(risk).diff(7).fillna(0.0).values)>0).astype(int))
    persist = (pd.Series(risk).rolling(3).apply(lambda w: (w>=t).count(), raw=True).fillna(0).values >= 2)

    state=[]; in_warn=False; below=0; wlen=0
    for i,(r,v,pers) in enumerate(zip(risk, votes, persist)):
        if in_warn:
            wlen += 1
            below = below+1 if r < 0.9*t else 0
            if below>=HYST_EXIT_DAYS or wlen>=MAX_WARN_DAYS: in_warn=False; wlen=0; state.append('OK')
            else: state.append('WARNING')
        else:
            if (r>=t) and pers and (v>=GATE_MIN_VOTES): in_warn=True; below=0; wlen=1; state.append('WARNING')
            else: state.append('OK')
    df['alert'] = state

    # ----- full metrics -----
    starts = (df['alert'].eq("WARNING") & ~df['alert'].shift(1).eq("WARNING")).fillna(False)
    s_ix = np.where(starts.values)[0].tolist()
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_BETWEEN_STARTS: kept.append(s); last=s
    s_ix = kept

    y_on = df['EVENT_ONSET'].values
    e_ix = np.where(y_on==1)[0]; e_set=set(e_ix)
    hits=0; lead_days=[]
    for s in s_ix:
        hit=None
        for j in range(1, LOOKAHEAD_D+1):
            if (s+j) in e_set: hit=s+j; break
        if hit is not None:
            hits += 1; lead_days.append(int(hit-s))
    false_alarms = int(len(s_ix) - hits)
    years = max(1e-9, (df.index[-1]-df.index[0]).days/365.25)
    fa_per_year = false_alarms/years

    # lead PR-AUC with our AP
    auc_lead={}
    for k in range(1, LOOKAHEAD_D+1):
        yk = y_on[k:]
        xk = pd.Series(risk).shift(k).iloc[k:].values
        auc_lead[k] = average_precision_np(yk, xk) if yk.sum()>0 else float('nan')

    # onset coverage by starts in prior K days
    coverage_by_k={}
    s_set=set(s_ix)
    for K in range(1, LOOKAHEAD_D+1):
        covered=0
        for e in e_ix:
            left=max(0, e-K)
            if any((t0 in s_set) for t0 in range(left, e)): covered+=1
        coverage_by_k[K] = float(covered/len(e_ix)) if len(e_ix) else float('nan')

    metrics = {
        "samples": int(len(df)), "years": float(years),
        "event_onsets_total": int(df['EVENT_ONSET'].sum()),
        "calibrated_thr_quantile_train": float(thr_q),
        "calibrated_thr_prob": float(thr_prob),
        "fa_per_year_target_train": FA_PER_YEAR_TARGET,
        "warning_starts": int(len(s_ix)), "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits), "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(fa_per_year),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
        "auc_pr_lead_curve": auc_lead, "coverage_onsets": coverage_by_k,
        "warning_days": int((df['alert']=="WARNING").sum()),
        "gate": {"theta_min": GATE_THETA_MIN, "gamma_vix_min": GAMMA_VIX_MIN, "min_votes": GATE_MIN_VOTES},
        "cooldown_days": COOLDOWN_BETWEEN_STARTS, "max_warn_days": MAX_WARN_DAYS
    }

# -------- persist --------
paths = {
    "features_csv": save_df(df[['RISK','alert','EVENT_ONSET'] + [c for c in df.columns
                     if c.startswith(('S_d_','AR1_','Theta_','Gamma_'))]], "gdren_v03a_lite_features"),
    "metrics_json": save_json(metrics, "gdren_v03a_lite_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {"lat": LAT, "lon": LON, "start": START, "end": END, "lookahead": LOOKAHEAD_D},
        "fa_per_year_target_train": FA_PER_YEAR_TARGET,
        "notes": "v0.3a-lite — removed sklearn; IRLS logistic + NumPy AP; gated WARNING; calibrated threshold."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.3a-lite SKILL (calibrated) ===")
    for k in ["samples","years","event_onsets_total","calibrated_thr_quantile_train",
              "warning_starts","hits","false_alarms","false_alarms_per_year",
              "lead_time_days_median","lead_time_days_mean"]:
        print(f"{k:>28}: {metrics.get(k)}")
    print(" auc_pr_lead_curve (k→AUC):", metrics["auc_pr_lead_curve"])
    print(" coverage_onsets   (K→cov):", metrics["coverage_onsets"])
else:
    print("\n[info] Metrics skipped (no market series).")


AttributeError: 'numpy.ndarray' object has no attribute 'count'

In [16]:
# === G-DREN v0.3a-lite (FIXED) — no sklearn; IRLS logistic + NumPy AP; gated WARNING; vectorized persistence ===
import os, json, time, warnings
from pathlib import Path
import numpy as np, pandas as pd

warnings.filterwarnings("ignore", category=RuntimeWarning)

# -------- config --------
LAT, LON = 40.7128, -74.0060
START, END = "2005-01-01", None
LOOKAHEAD_D = 7                 # label = onset in next K days
EVENT_KSIGMA = 3.0              # spike threshold (try 2.5–3.0)
VOL_BREAK_Q = 0.995             # vol breakout quantile
VOL_BREAK_WIN = 252*3
DD_WIN = 40
DD_FRAC = 0.10
MIN_CLUSTER_LEN = 3
COUPLING_WIN = 400

# Warning/gating & calibration
GATE_THETA_MIN = 0.25
GAMMA_VIX_MIN  = 0.08
GATE_MIN_VOTES = 3
COOLDOWN_BETWEEN_STARTS = 18
MAX_WARN_DAYS = 12
HYST_EXIT_DAYS = 3
FA_PER_YEAR_TARGET = 0.50
TRAIN_END = "2015-12-31"

# -------- paths --------
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
ts = time.strftime("%Y%m%d-%H%M%SZ", time.gmtime())
RUN = ROOT / f"{ts}_v03a_lite_fix"
RUN.mkdir(parents=True, exist_ok=True)
def save_df(df, name): fp = RUN / f"{name}.csv"; df.to_csv(fp, index=True); return str(fp)
def save_json(obj, name): fp = RUN / f"{name}.json"; fp.write_text(json.dumps(obj, indent=2)); return str(fp)

# -------- small utils (tz, robust z, coherence fallback, AP, IRLS) --------
def normalize_daily_index(idx):
    idx = pd.DatetimeIndex(idx)
    if idx.tz is not None: idx = idx.tz_convert("UTC").tz_localize(None)
    return idx.normalize()

def normalize_series_daily(s):
    if s is None or len(s)==0: return pd.Series(dtype=float)
    s = pd.Series(s.copy()); s.index = normalize_daily_index(s.index)
    s = s[~s.index.duplicated(keep="last")]
    return s.sort_index()

def rolling_entropy(x, win=128, bins=32):
    x = np.asarray(x, float); out = np.full(len(x), np.nan)
    for i in range(win-1, len(x)):
        w = x[i-win+1:i+1]; h,_ = np.histogram(w, bins=bins, density=True)
        p = h[h>0]; out[i] = -np.sum(p*np.log(p)) if len(p) else np.nan
    return out

def rolling_ar1(x, win=256):
    s = pd.Series(x, dtype=float); out = np.full(len(s), np.nan)
    for i in range(win-1, len(s)):
        w = s.iloc[i-win+1:i+1].values
        sw = np.nanstd(w); out[i] = 0.0 if sw<1e-12 else np.corrcoef(w[1:], w[:-1])[0,1]
    return out

def zscore_mad(x, win=512):
    s = pd.Series(x, dtype=float)
    med = s.rolling(win, min_periods=64).median()
    mad = (s-med).abs().rolling(win, min_periods=64).median()
    return ((s - med)/(1.4826*mad + 1e-9)).values

def theta_breaches(zsig, q=0.985, win=256):
    s = pd.Series(zsig, dtype=float)
    thr = s.rolling(win, min_periods=win//2).quantile(q)
    return (s > thr).astype(int).rolling(win, min_periods=win//2).mean().values

# coherence fallback if SciPy absent
try:
    from scipy.signal import coherence as _coh
    def coherence_band(a,b,fs=1.0):
        f,Cxy = _coh(a,b,fs=fs, nperseg=min(256,len(a)))
        band = (f>0.02)&(f<0.25)
        v = Cxy[band]; v = v[np.isfinite(v)]
        return float(v.mean()) if v.size else np.nan
except Exception:
    def coherence_band(a,b,fs=1.0):
        return np.nan

def glyph_coupling(a,b,fs=1.0):
    a = np.asarray(a, float); b = np.asarray(b, float)
    if len(a)!=len(b) or len(a)<128: return np.nan
    m = np.isfinite(a)&np.isfinite(b); a,b = a[m], b[m]
    if len(a)<128: return np.nan
    a -= a.mean(); b -= b.mean()
    if a.std()<1e-12 or b.std()<1e-12: return np.nan
    return coherence_band(a,b,fs)

def average_precision_np(y_true, y_score):
    y = np.asarray(y_true).astype(int)
    s = np.asarray(y_score, float)
    m = np.isfinite(s)
    y, s = y[m], s[m]
    P = y.sum()
    if P == 0: return float('nan')
    order = np.argsort(-s)
    y_sorted = y[order]
    tp = np.cumsum(y_sorted)
    prec = tp / (np.arange(len(y_sorted))+1)
    ap = prec[y_sorted==1].sum() / P
    return float(ap)

def sigmoid(z):
    z = np.clip(z, -30, 30)
    return 1.0/(1.0+np.exp(-z))

def logistic_ridge_fit(X, y, l2=1e-2, max_iter=200, tol=1e-6):
    n,p = X.shape
    X_ = np.hstack([np.ones((n,1)), X])
    beta = np.zeros(p+1)
    eps = 1e-9
    for _ in range(max_iter):
        z = X_ @ beta
        mu = sigmoid(z)
        w = mu*(1-mu) + eps
        z_tilde = z + (y - mu)/w
        WX = X_ * w[:,None]
        A = X_.T @ WX
        A[1:,1:] += l2*np.eye(p)   # ridge (not penalizing intercept)
        b = X_.T @ (w * z_tilde)
        beta_new = np.linalg.solve(A, b)
        if np.max(np.abs(beta_new - beta)) < tol:
            beta = beta_new; break
        beta = beta_new
    return beta

def logistic_predict_proba(X, beta):
    X_ = np.hstack([np.ones((X.shape[0],1)), X])
    return sigmoid(X_ @ beta)

# -------- data loaders (no heavy installs) --------
def get_px(tkr, auto_adjust=True):
    try:
        import yfinance as yf
        h = yf.Ticker(tkr).history(period="max", auto_adjust=auto_adjust)
        if isinstance(h, pd.DataFrame) and 'Close' in h and len(h)>0:
            return normalize_series_daily(h['Close'].rename(tkr).dropna())
    except Exception as e: print(f"[warn] history {tkr}:", e)
    try:
        import yfinance as yf
        s = yf.download(tkr, period="max", interval="1d", auto_adjust=auto_adjust, progress=False)
        if isinstance(s, pd.DataFrame) and 'Close' in s and len(s)>0:
            return normalize_series_daily(s['Close'].rename(tkr).dropna())
    except Exception as e: print(f"[warn] download {tkr}:", e)
    if tkr.upper()=="SPY":
        try:
            url = "https://stooq.com/q/d/l/?s=spy&i=d"
            stq = pd.read_csv(url); stq.columns=[c.lower() for c in stq.columns]
            stq['date']=pd.to_datetime(stq['date'])
            stq = stq.set_index('date').sort_index()
            return normalize_series_daily(stq['close'].rename('SPY').astype(float))
        except Exception as e: print("[error] stooq SPY:", e)
    return pd.Series(dtype=float)

def get_temp_anomaly(lat, lon):
    try:
        from meteostat import Point, Daily
        start_dt = pd.Timestamp(START)
        end_dt = pd.Timestamp.today().normalize() if END is None else pd.Timestamp(END)
        loc = Point(lat, lon); d = Daily(loc, start_dt, end_dt).fetch()
        t = d['tavg'].astype(float) if 'tavg' in d and d['tavg'].notna().any() else ((d.get('tmin') + d.get('tmax'))/2).astype(float)
        anom = (t - t.rolling(31, min_periods=10).median()).rename("temp_anom").dropna()
        return normalize_series_daily(anom)
    except Exception as e: print("[warn] temp fetch:", e); return pd.Series(dtype=float)

# -------- ingest --------
px  = get_px("SPY",  auto_adjust=True)
vix = get_px("^VIX", auto_adjust=False)
hyg = get_px("HYG",  auto_adjust=True)
ief = get_px("IEF",  auto_adjust=True)
ta  = get_temp_anomaly(LAT, LON)

idx = pd.DatetimeIndex([])
for ser in [px, vix, hyg, ief, ta]:
    if len(ser)>0: idx = ser.index if idx.empty else idx.union(ser.index)
df = pd.DataFrame(index=idx).sort_index()
if len(px)>0:  df['px']  = px.reindex(df.index).ffill(); df['ret'] = df['px'].pct_change()
if len(vix)>0: df['vix'] = vix.reindex(df.index).ffill(); df['dvix'] = df['vix'].pct_change()
if len(hyg)>0 and len(ief)>0:
    ratio = (hyg / ief).reindex(df.index).ffill()
    df['cred_ret'] = ratio.pct_change()
if len(ta)>0:  df['temp_anom'] = ta.reindex(df.index).interpolate(limit=7).ffill()

# -------- features --------
# market
if 'ret' in df:
    m = df['ret'].fillna(0.0).values; mz = zscore_mad(m, win=512)
    df['S_d_mkt']   = rolling_entropy(m, win=256, bins=48)
    df['AR1_mkt']   = rolling_ar1(m, win=256)
    df['Theta_mkt'] = theta_breaches(mz, q=0.985, win=256)
else:
    df['S_d_mkt']=df['AR1_mkt']=df['Theta_mkt']=np.nan
# vix
if 'dvix' in df:
    v = df['dvix'].fillna(0.0).values; vz = zscore_mad(v, win=512)
    df['S_d_vix']   = rolling_entropy(v, win=256, bins=48)
    df['AR1_vix']   = rolling_ar1(v, win=256)
    df['Theta_vix'] = theta_breaches(vz, q=0.985, win=256)
else:
    df['S_d_vix']=df['AR1_vix']=df['Theta_vix']=np.nan
# credit
if 'cred_ret' in df:
    c = df['cred_ret'].fillna(0.0).values; cz = zscore_mad(c, win=512)
    df['S_d_cred']   = rolling_entropy(c, win=256, bins=48)
    df['AR1_cred']   = rolling_ar1(c, win=256)
    df['Theta_cred'] = theta_breaches(cz, q=0.985, win=256)
else:
    df['S_d_cred']=df['AR1_cred']=df['Theta_cred']=np.nan
# temp
if 'temp_anom' in df:
    t = df['temp_anom'].fillna(0.0).values; tz = zscore_mad(t, win=512)
    df['S_d_tmp']   = rolling_entropy(t, win=256, bins=48)
    df['AR1_tmp']   = rolling_ar1(t, win=256)
    df['Theta_tmp'] = theta_breaches(tz, q=0.985, win=256)
else:
    df['S_d_tmp']=df['AR1_tmp']=df['Theta_tmp']=np.nan

# couplings
df['Gamma_mkt_vix']=np.nan; df['Gamma_mkt_cred']=np.nan
if 'ret' in df and 'dvix' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['dvix'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_vix')] = glyph_coupling(a,b,fs=1.0)
if 'ret' in df and 'cred_ret' in df:
    for i in range(max(128, COUPLING_WIN), len(df)):
        a = df['ret'].iloc[i-COUPLING_WIN:i].values
        b = df['cred_ret'].iloc[i-COUPLING_WIN:i].values
        df.iloc[i, df.columns.get_loc('Gamma_mkt_cred')] = glyph_coupling(a,b,fs=1.0)

# -------- regime events → sustained clusters → ONSETS --------
metrics = {}
if 'ret' in df:
    vol21 = df['ret'].rolling(21).std()
    E_spike = (df['ret'].abs() > EVENT_KSIGMA*vol21).astype(int)

    vol10  = df['ret'].rolling(10).std()
    q_roll = vol10.rolling(VOL_BREAK_WIN, min_periods=252).quantile(VOL_BREAK_Q)
    E_vol  = (vol10 > q_roll).astype(int).fillna(0).astype(int)

    if 'px' in df:
        roll_max = df['px'].rolling(DD_WIN, min_periods=DD_WIN//2).max()
        dd = df['px']/roll_max - 1.0
        E_dd = (dd <= -DD_FRAC).astype(int).fillna(0).astype(int)
    else:
        E_dd = pd.Series(0, index=df.index)

    E = ((E_spike==1) | (E_vol==1) | (E_dd==1)).astype(int).reindex(df.index).fillna(0).astype(int)

    # sustained cluster onsets
    onset = np.zeros(len(df), dtype=int)
    i=0; arr=E.values
    while i<len(arr):
        if arr[i]==1:
            j=i
            while j+1<len(arr) and arr[j+1]==1: j+=1
            if (j-i+1) >= MIN_CLUSTER_LEN: onset[i]=1
            i = j+1
        else:
            i+=1
    df['EVENT_ONSET'] = onset

    # ----- supervised label = onset in next K days -----
    y_future = pd.Series(df['EVENT_ONSET']).rolling(LOOKAHEAD_D, min_periods=1).max().shift(-(LOOKAHEAD_D-1))
    y_future = y_future.reindex(df.index).fillna(0).astype(int)

    # feature matrix (robust Z + selected ramps)
    feat_cols = ['S_d_mkt','AR1_mkt','Theta_mkt','S_d_vix','AR1_vix','Theta_vix',
                 'S_d_cred','AR1_cred','Theta_cred','S_d_tmp','AR1_tmp','Theta_tmp',
                 'Gamma_mkt_vix','Gamma_mkt_cred']
    F = df[feat_cols].copy().ffill().bfill().fillna(0.0)
    Z = pd.DataFrame(index=F.index)
    for c in F.columns:
        med = F[c].rolling(512, min_periods=64).median()
        mad = (F[c]-med).abs().rolling(512, min_periods=64).median()
        Z[c] = (F[c]-med)/(1.4826*mad + 1e-9)
    DZ7 = Z.diff(7).add_prefix("d7_")
    X = pd.concat([Z, DZ7[['d7_AR1_mkt','d7_S_d_mkt','d7_AR1_vix','d7_S_d_vix','d7_Gamma_mkt_vix','d7_Gamma_mkt_cred']].fillna(0.0)], axis=1).fillna(0.0)

    # train split
    idx_train = X.index[X.index <= pd.Timestamp(TRAIN_END)]
    X_train, y_train = X.loc[idx_train].values, y_future.loc[idx_train].values.astype(int)
    if y_train.sum()==0:
        y_train = E.loc[idx_train].values.astype(int)

    # IRLS logistic on robust Z (already roughly standardized)
    beta = logistic_ridge_fit(X_train, y_train, l2=1e-2, max_iter=300, tol=1e-6)
    risk_all = pd.Series(logistic_predict_proba(X.values, beta), index=X.index, name="RISK")
    df['RISK'] = risk_all

    # ----- threshold calibration on TRAIN to meet FA budget -----
    Theta_m_train = df['Theta_mkt'].loc[idx_train].fillna(0.0).values
    Theta_v_train = df['Theta_vix'].loc[idx_train].fillna(0.0).values
    Theta_c_train = df['Theta_cred'].loc[idx_train].fillna(0.0).values
    Gamma_v_train = df['Gamma_mkt_vix'].loc[idx_train].fillna(0.0).values
    onset_train   = df['EVENT_ONSET'].loc[idx_train].values.astype(int)
    rtrain        = risk_all.loc[idx_train].values

    qgrid = np.linspace(0.990, 0.9995, 20)
    best = None
    for q in qgrid:
        t = np.quantile(rtrain[np.isfinite(rtrain)], q)
        votes = ((Theta_v_train>=GATE_THETA_MIN).astype(int) +
                 (Theta_c_train>=GATE_THETA_MIN).astype(int) +
                 (Theta_m_train>=GATE_THETA_MIN).astype(int) +
                 (Gamma_v_train>=GAMMA_VIX_MIN).astype(int) +
                 ((pd.Series(rtrain).diff(7).fillna(0.0).values)>0).astype(int))
        # === FIX: vectorized persistence over last 3 days ===
        persist = ((pd.Series(rtrain) >= t).astype(int).rolling(3).sum().fillna(0).values >= 2)

        # build state
        state=[]; in_warn=False; below=0; wlen=0
        for i,(r,v,pers) in enumerate(zip(rtrain, votes, persist)):
            if in_warn:
                wlen += 1
                below = below+1 if r < 0.9*t else 0
                if below>=HYST_EXIT_DAYS or wlen>=MAX_WARN_DAYS: in_warn=False; wlen=0; state.append('OK')
                else: state.append('WARNING')
            else:
                if (r>=t) and pers and (v>=GATE_MIN_VOTES): in_warn=True; below=0; wlen=1; state.append('WARNING')
                else: state.append('OK')
        state = np.array(state, dtype=object)

        # starts → hits/FA vs ONSET on TRAIN
        starts = (state=='WARNING') & np.roll(state!='WARNING', 1)
        starts[0] = (state[0]=='WARNING')
        s_ix = np.where(starts)[0].tolist()
        kept=[]; last=-10**9
        for s in s_ix:
            if s-last >= COOLDOWN_BETWEEN_STARTS: kept.append(s); last=s
        s_ix = kept

        e_ix = np.where(onset_train==1)[0]; e_set=set(e_ix)
        hits=0
        for s in s_ix:
            if any(((s+j) in e_set) for j in range(1, LOOKAHEAD_D+1)): hits+=1
        fa = int(len(s_ix)-hits)
        years = max(1e-9, (idx_train[-1]-idx_train[0]).days/365.25)
        fa_per_year = fa/years

        cand = {"q": float(q), "thr": float(t), "hits": hits, "fa_py": fa_per_year}
        if (best is None) or ((fa_per_year <= FA_PER_YEAR_TARGET) and (hits > best["hits"])) or \
           ((fa_per_year <= FA_PER_YEAR_TARGET) and (hits == best["hits"]) and (fa_per_year < best["fa_py"])):
            best = cand

    thr_q, thr_prob = best["q"], best["thr"]

    # ----- apply to FULL series -----
    Theta_m = df['Theta_mkt'].fillna(0.0).values
    Theta_v = df['Theta_vix'].fillna(0.0).values
    Theta_c = df['Theta_cred'].fillna(0.0).values
    Gamma_v = df['Gamma_mkt_vix'].fillna(0.0).values
    risk    = df['RISK'].fillna(0.0).values
    t = np.quantile(risk[np.isfinite(risk)], thr_q)
    votes = ((Theta_v>=GATE_THETA_MIN).astype(int) +
             (Theta_c>=GATE_THETA_MIN).astype(int) +
             (Theta_m>=GATE_THETA_MIN).astype(int) +
             (Gamma_v>=GAMMA_VIX_MIN).astype(int) +
             ((pd.Series(risk).diff(7).fillna(0.0).values)>0).astype(int))
    # === FIX: vectorized persistence (full series) ===
    persist = ((pd.Series(risk) >= t).astype(int).rolling(3).sum().fillna(0).values >= 2)

    state=[]; in_warn=False; below=0; wlen=0
    for i,(r,v,pers) in enumerate(zip(risk, votes, persist)):
        if in_warn:
            wlen += 1
            below = below+1 if r < 0.9*t else 0
            if below>=HYST_EXIT_DAYS or wlen>=MAX_WARN_DAYS: in_warn=False; wlen=0; state.append('OK')
            else: state.append('WARNING')
        else:
            if (r>=t) and pers and (v>=GATE_MIN_VOTES): in_warn=True; below=0; wlen=1; state.append('WARNING')
            else: state.append('OK')
    df['alert'] = state

    # ----- full metrics -----
    starts = (df['alert'].eq("WARNING") & ~df['alert'].shift(1).eq("WARNING")).fillna(False)
    s_ix = np.where(starts.values)[0].tolist()
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_BETWEEN_STARTS: kept.append(s); last=s
    s_ix = kept

    y_on = df['EVENT_ONSET'].values
    e_ix = np.where(y_on==1)[0]; e_set=set(e_ix)
    hits=0; lead_days=[]
    for s in s_ix:
        hit=None
        for j in range(1, LOOKAHEAD_D+1):
            if (s+j) in e_set: hit=s+j; break
        if hit is not None:
            hits += 1; lead_days.append(int(hit-s))
    false_alarms = int(len(s_ix) - hits)
    years = max(1e-9, (df.index[-1]-df.index[0]).days/365.25)
    fa_per_year = false_alarms/years

    # lead PR-AUC with our AP
    auc_lead={}
    for k in range(1, LOOKAHEAD_D+1):
        yk = y_on[k:]
        xk = pd.Series(risk).shift(k).iloc[k:].values
        auc_lead[k] = average_precision_np(yk, xk) if yk.sum()>0 else float('nan')

    # onset coverage by starts in prior K days
    coverage_by_k={}
    s_set=set(s_ix)
    for K in range(1, LOOKAHEAD_D+1):
        covered=0
        for e in e_ix:
            left=max(0, e-K)
            if any((t0 in s_set) for t0 in range(left, e)): covered+=1
        coverage_by_k[K] = float(covered/len(e_ix)) if len(e_ix) else float('nan')

    metrics = {
        "samples": int(len(df)), "years": float(years),
        "event_onsets_total": int(df['EVENT_ONSET'].sum()),
        "calibrated_thr_quantile_train": float(thr_q),
        "calibrated_thr_prob": float(thr_prob),
        "fa_per_year_target_train": FA_PER_YEAR_TARGET,
        "warning_starts": int(len(s_ix)), "hits_within_days": LOOKAHEAD_D,
        "hits": int(hits), "false_alarms": int(false_alarms),
        "false_alarms_per_year": float(fa_per_year),
        "lead_time_days_median": (float(np.median(lead_days)) if lead_days else None),
        "lead_time_days_mean": (float(np.mean(lead_days)) if lead_days else None),
        "lead_time_days_all": lead_days[:64],
        "auc_pr_lead_curve": auc_lead, "coverage_onsets": coverage_by_k,
        "warning_days": int((df['alert']=="WARNING").sum()),
        "gate": {"theta_min": GATE_THETA_MIN, "gamma_vix_min": GAMMA_VIX_MIN, "min_votes": GATE_MIN_VOTES},
        "cooldown_days": COOLDOWN_BETWEEN_STARTS, "max_warn_days": MAX_WARN_DAYS
    }

# -------- persist --------
paths = {
    "features_csv": save_df(df[['RISK','alert','EVENT_ONSET'] + [c for c in df.columns
                     if c.startswith(('S_d_','AR1_','Theta_','Gamma_'))]], "gdren_v03a_lite_fix_features"),
    "metrics_json": save_json(metrics, "gdren_v03a_lite_fix_metrics"),
    "meta_json": save_json({
        "run_id": RUN.name,
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "cfg": {"lat": LAT, "lon": LON, "start": START, "end": END, "lookahead": LOOKAHEAD_D},
        "fa_per_year_target_train": FA_PER_YEAR_TARGET,
        "notes": "v0.3a-lite (fix) — vectorized persistence; IRLS logistic + NumPy AP; gated WARNING; calibrated threshold."
    }, "meta")
}
print("Artifacts:", json.dumps(paths, indent=2))
if metrics:
    print("\n=== v0.3a-lite SKILL (calibrated) ===")
    for k in ["samples","years","event_onsets_total","calibrated_thr_quantile_train",
              "warning_starts","hits","false_alarms","false_alarms_per_year",
              "lead_time_days_median","lead_time_days_mean"]:
        print(f"{k:>28}: {metrics.get(k)}")
    print(" auc_pr_lead_curve (k→AUC):", metrics["auc_pr_lead_curve"])
    print(" coverage_onsets   (K→cov):", metrics["coverage_onsets"])
else:
    print("\n[info] Metrics skipped (no market series).")


Artifacts: {
  "features_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-230753Z_v03a_lite_fix\\gdren_v03a_lite_fix_features.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-230753Z_v03a_lite_fix\\gdren_v03a_lite_fix_metrics.json",
  "meta_json": "E:\\CNT\\artifacts\\g_dren\\20251105-230753Z_v03a_lite_fix\\meta.json"
}

=== v0.3a-lite SKILL (calibrated) ===
                     samples: 11394
                       years: 35.841204654346335
          event_onsets_total: 45
calibrated_thr_quantile_train: 0.99
                        hits: 0
                false_alarms: 0
       false_alarms_per_year: 0.0
       lead_time_days_median: None
         lead_time_days_mean: None
 auc_pr_lead_curve (k→AUC): {1: 0.0038998667934615114, 2: 0.0039006333315377045, 3: 0.0039004217787224067, 4: 0.0039008745657014665, 5: 0.0039000362672410877, 6: 0.0038998139332011117, 7: 0.003901709916340501}
 coverage_onsets   (K→cov): {1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0, 6: 0.0, 7: 0.0}


In [17]:
# === G-DREN v0.3a RETUNE — threshold & gating tuner (no recompute) ===
import json, time
from pathlib import Path
import numpy as np, pandas as pd

# --- knobs you can tweak quickly ---
FA_PER_YEAR_TARGET = 0.60   # raise to allow more starts, lower to be stricter
GATE_MIN_VOTES     = 2      # 2 is looser, 3+ stricter
COOLDOWN_DAYS      = 14
MAX_WARN_DAYS      = 12
HYST_EXIT_DAYS     = 3
LOOKAHEAD_D        = 7
PERSIST_WIN        = 5      # persistence window
PERSIST_REQ        = 2      # require >= this many days above threshold in the window
QGRID              = np.linspace(0.985, 0.999, 60)  # widen search (we were at ~0.99+)

# --- load latest v03a_lite_fix features ---
CNT = Path((Path("E:/CNT") if Path("E:/CNT").exists() else Path.cwd()))
ROOT = CNT / "artifacts" / "g_dren"
cands = sorted(ROOT.glob("*_v03a_lite_fix/gdren_v03a_lite_fix_features.csv"),
               key=lambda p: p.stat().st_mtime, reverse=True)
assert cands, "No gdren_v03a_lite_fix_features.csv files found"
FEAT = cands[0]
df = pd.read_csv(FEAT, index_col=0, parse_dates=True)

# sanity columns
need_cols = ["RISK","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix"]
missing = [c for c in need_cols if c not in df.columns]
assert not missing, f"Missing columns in features CSV: {missing}"

risk = pd.Series(df["RISK"].astype(float), index=df.index).fillna(0.0)
Theta_m = df["Theta_mkt"].astype(float).fillna(0.0).values
Theta_v = df["Theta_vix"].astype(float).fillna(0.0).values
Theta_c = df["Theta_cred"].astype(float).fillna(0.0).values
Gamma_v = df["Gamma_mkt_vix"].astype(float).fillna(0.0).values
onset   = df["EVENT_ONSET"].astype(int).values
idx     = df.index

# --- helper: build alert state from risk given a quantile threshold ---
def make_alert(risk_s: pd.Series, q: float):
    t = float(np.quantile(risk_s.values[np.isfinite(risk_s.values)], q))
    # votes = Θ_vix + Θ_cred + Θ_mkt + Γ(mkt↔VIX) + rising risk slope(7d)
    votes = ((Theta_v>=0.25).astype(int) +
             (Theta_c>=0.25).astype(int) +
             (Theta_m>=0.25).astype(int) +
             (Gamma_v>=0.08).astype(int) +
             ((risk_s.diff(7).fillna(0.0).values)>0).astype(int))
    # persistence: >= PERSIST_REQ of last PERSIST_WIN days above t
    persist = ( (risk_s >= t).astype(int)
               .rolling(PERSIST_WIN).sum()
               .fillna(0).values >= PERSIST_REQ )
    # state machine
    state=[]; in_warn=False; below=0; wlen=0
    rv = risk_s.values
    for i,(r,v,ps) in enumerate(zip(rv, votes, persist)):
        if in_warn:
            wlen += 1
            below = below+1 if r < 0.9*t else 0
            if below>=HYST_EXIT_DAYS or wlen>=MAX_WARN_DAYS:
                in_warn=False; wlen=0; state.append('OK')
            else:
                state.append('WARNING')
        else:
            if (r>=t) and ps and (v>=GATE_MIN_VOTES):
                in_warn=True; below=0; wlen=1; state.append('WARNING')
            else:
                state.append('OK')
    return pd.Series(state, index=risk_s.index, name="alert"), t

# --- score starts/hits/FA vs onset (on a slice) ---
def score(alert_s: pd.Series, onset_arr: np.ndarray, ix):
    a = alert_s.loc[ix]
    starts = (a.eq("WARNING") & ~a.shift(1).eq("WARNING")).fillna(False).values
    s_ix = list(np.where(starts)[0])
    # cooldown
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_DAYS: kept.append(s); last=s
    s_ix = kept
    # hits within horizon
    e_ix = list(np.where(onset_arr[a.index.get_indexer(ix)]==1)[0])
    e_set = set(e_ix)
    hits=0
    for s in s_ix:
        if any(((s+j) in e_set) for j in range(1, LOOKAHEAD_D+1)): hits += 1
    fa = int(len(s_ix)-hits)
    years = max(1e-9, (ix[-1]-ix[0]).days/365.25)
    return {"starts": len(s_ix), "hits": hits, "fa_py": fa/years, "s_ix": s_ix}

# --- choose TRAIN slice for calibration (<= 2015-12-31) ---
TRAIN_END = pd.Timestamp("2015-12-31")
ix_train = idx[idx <= TRAIN_END]
assert len(ix_train)>2000, "Too little train history; adjust TRAIN_END."

# --- search thresholds to meet FA budget while maximizing hits ---
best = None
for q in QGRID:
    alert_q, thr = make_alert(risk.loc[idx], q)
    res = score(alert_q, onset, ix_train)
    cand = {"q": q, "thr": float(thr), **res}
    if (best is None) or \
       ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] > best.get("hits_ok", -1))) or \
       ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] == best.get("hits_ok", -1)) and (res["fa_py"] < best["fa_py"])) or \
       ((best["fa_py"] > FA_PER_YEAR_TARGET) and (res["fa_py"] < best["fa_py"])):
        best = cand.copy()
        best["hits_ok"] = res["hits"]

# Fallback: if still zero starts on train, force a looser threshold
if best["starts"] == 0:
    q_fallback = 0.985
    alert_q, thr = make_alert(risk.loc[idx], q_fallback)
    res = score(alert_q, onset, ix_train)
    best = {"q": q_fallback, "thr": float(thr), **res, "hits_ok": res["hits"]}

# --- apply best threshold to FULL series ---
alert_final, thr_final = make_alert(risk.loc[idx], best["q"])
full = score(alert_final, onset, idx)

# --- write artifacts ---
RUN = ROOT / time.strftime("%Y%m%d-%H%M%SZ", time.gmtime()) / "v03a_retune"
RUN.mkdir(parents=True, exist_ok=True)
(df.assign(alert=alert_final)
   [["RISK","alert","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix"]]
   .to_csv(RUN / "gdren_v03a_retune_alerts.csv"))

metrics = {
    "features_used": str(FEAT),
    "calibrated_q_train": float(best["q"]),
    "calibrated_thr_prob": float(best["thr"]),
    "train_starts": int(best["starts"]), "train_hits": int(best["hits"]),
    "train_fa_per_year": float(best["fa_py"]),
    "full_starts": int(full["starts"]), "full_hits": int(full["hits"]),
    "full_false_alarms_per_year": float(full["fa_py"]),
    "cooldown_days": COOLDOWN_DAYS, "max_warn_days": MAX_WARN_DAYS,
    "gate_min_votes": GATE_MIN_VOTES, "persist_win": PERSIST_WIN, "persist_req": PERSIST_REQ,
    "fa_per_year_target": FA_PER_YEAR_TARGET, "lookahead_days": LOOKAHEAD_D
}
(RUN / "gdren_v03a_retune_metrics.json").write_text(json.dumps(metrics, indent=2))
print("Artifacts:", json.dumps({
    "alerts_csv": str(RUN / "gdren_v03a_retune_alerts.csv"),
    "metrics_json": str(RUN / "gdren_v03a_retune_metrics.json")
}, indent=2))
print("\n=== RETUNE SUMMARY ===")
for k,v in metrics.items():
    print(f"{k:>24}: {v}")


Artifacts: {
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-231133Z\\v03a_retune\\gdren_v03a_retune_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-231133Z\\v03a_retune\\gdren_v03a_retune_metrics.json"
}

=== RETUNE SUMMARY ===
           features_used: E:\CNT\artifacts\g_dren\20251105-230753Z_v03a_lite_fix\gdren_v03a_lite_fix_features.csv
      calibrated_q_train: 0.985
     calibrated_thr_prob: 9.357622968839301e-14
            train_starts: 2
              train_hits: 0
       train_fa_per_year: 0.07694333263113545
             full_starts: 2
               full_hits: 0
full_false_alarms_per_year: 0.0558016958215568
           cooldown_days: 14
           max_warn_days: 12
          gate_min_votes: 2
             persist_win: 5
             persist_req: 2
      fa_per_year_target: 0.6
          lookahead_days: 7


In [18]:
# === G-DREN v0.3a RETUNE++ — hybrid driver (risk_pct ∨ consensus), wider q-grid, low-FA calibration ===
import json, time
from pathlib import Path
import numpy as np, pandas as pd

# ---- quick knobs ----
FA_PER_YEAR_TARGET = 0.60   # raise to allow more recall, lower to be stricter
GATE_MIN_VOTES     = 2      # 2=looser gate; try 3 later if FA rises
COOLDOWN_DAYS      = 14
MAX_WARN_DAYS      = 12
HYST_EXIT_DAYS     = 3
LOOKAHEAD_D        = 7
PERSIST_WIN        = 3      # require persistence in recent window
PERSIST_REQ        = 1
QGRID              = np.linspace(0.970, 0.995, 51)
WIN_RANK           = 512    # rolling window for percentile ranks

# ---- load latest features ----
ROOT = Path("E:/CNT/artifacts/g_dren")
cands = sorted(ROOT.glob("*_v03a_lite_fix/gdren_v03a_lite_fix_features.csv"),
               key=lambda p: p.stat().st_mtime, reverse=True)
assert cands, "No gdren_v03a_lite_fix_features.csv found."
FEAT = cands[0]
df = pd.read_csv(FEAT, index_col=0, parse_dates=True)

need = ["RISK","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix","AR1_mkt"]
missing = [c for c in need if c not in df.columns]
assert not missing, f"Missing columns: {missing}"

risk = pd.Series(df["RISK"].astype(float), index=df.index).fillna(0.0)
Theta_m = df["Theta_mkt"].astype(float).fillna(0.0).values
Theta_v = df["Theta_vix"].astype(float).fillna(0.0).values
Theta_c = df["Theta_cred"].astype(float).fillna(0.0).values
Gamma_v = df["Gamma_mkt_vix"].astype(float).fillna(0.0).values
AR1_m   = df["AR1_mkt"].astype(float).fillna(0.0).values
onset   = df["EVENT_ONSET"].astype(int).values
idx     = df.index

# ---- helpers ----
def roll_pct_rank(s: pd.Series, win=512):
    s = s.astype(float)
    def pr(w):
        v = w[-1]
        arr = np.sort(w)
        return np.searchsorted(arr, v, side="right")/len(w)
    return s.rolling(win, min_periods=win//2).apply(pr, raw=True)

def make_alert(driver: pd.Series, q: float):
    t = float(np.quantile(driver.values[np.isfinite(driver.values)], q))
    votes = ((Theta_v>=0.25).astype(int) +
             (Theta_c>=0.25).astype(int) +
             (Theta_m>=0.25).astype(int) +
             (Gamma_v>=0.08).astype(int) +
             ((driver.diff(7).fillna(0.0).values)>0).astype(int))
    persist = ((driver>=t).astype(int).rolling(PERSIST_WIN).sum().fillna(0).values >= PERSIST_REQ)
    state=[]; in_warn=False; below=0; wlen=0
    for r,v,ps in zip(driver.values, votes, persist):
        if in_warn:
            wlen += 1
            below = below+1 if r < 0.9*t else 0
            if below>=HYST_EXIT_DAYS or wlen>=MAX_WARN_DAYS:
                in_warn=False; wlen=0; state.append('OK')
            else:
                state.append('WARNING')
        else:
            if (r>=t) and ps and (v>=GATE_MIN_VOTES):
                in_warn=True; below=0; wlen=1; state.append('WARNING')
            else:
                state.append('OK')
    return pd.Series(state, index=driver.index), t

def score(alert_s: pd.Series, onset_arr: np.ndarray, ix):
    a = alert_s.loc[ix]
    starts = (a.eq("WARNING") & ~a.shift(1).eq("WARNING")).fillna(False).values
    s_ix = np.where(starts)[0].tolist()
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_DAYS: kept.append(s); last=s
    s_ix = kept
    # hits within horizon
    e_slice = onset_arr[a.index.get_indexer(ix)]
    hits=0
    for s in s_ix:
        if e_slice[s+1:min(s+1+LOOKAHEAD_D, len(e_slice))].any():
            hits += 1
    fa = int(len(s_ix)-hits)
    years = max(1e-9, (ix[-1]-ix[0]).days/365.25)
    return {"starts": len(s_ix), "hits": hits, "fa_py": fa/years, "s_ix": s_ix}

# ---- build HYBRID driver: max(rolling risk-percentile, consensus stress) ----
risk_pct = roll_pct_rank(risk, win=WIN_RANK).fillna(0.0)

# consensus = weighted mix of stress channels, then ranked
cons_raw = (0.45*pd.Series(Theta_v, index=idx) +
            0.35*pd.Series(Theta_c, index=idx) +
            0.15*pd.Series(np.maximum(0, AR1_m), index=idx) +
            0.05*pd.Series(np.nan_to_num(Gamma_v), index=idx))
cons_pct = roll_pct_rank(cons_raw, win=WIN_RANK).fillna(0.0)

driver = pd.Series(np.maximum(risk_pct.values, cons_pct.values), index=idx, name="DRIVER")

# ---- calibrate on TRAIN (<=2015-12-31) ----
TRAIN_END = pd.Timestamp("2015-12-31")
ix_train = idx[idx <= TRAIN_END]
assert len(ix_train)>2000, "Too little train history; adjust TRAIN_END."

best = None
for q in QGRID:
    alert_q, thr = make_alert(driver, q)
    res = score(alert_q, onset, ix_train)
    cand = {"q": float(q), "thr": float(thr), **res}
    if (best is None) or \
       ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] > best.get("hits_ok", -1))) or \
       ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] == best.get("hits_ok", -1)) and (res["fa_py"] < best["fa_py"])) or \
       ((best["fa_py"] > FA_PER_YEAR_TARGET) and (res["fa_py"] < best["fa_py"])):
        best = cand.copy(); best["hits_ok"] = res["hits"]

# fallback if zero starts
if best["starts"] == 0:
    q_fallback = float(QGRID[0])
    alert_q, thr = make_alert(driver, q_fallback)
    res = score(alert_q, onset, ix_train)
    best = {"q": q_fallback, "thr": float(thr), **res, "hits_ok": res["hits"]}

# ---- apply to FULL series ----
alert_final, thr_final = make_alert(driver, best["q"])
full = score(alert_final, onset, idx)

# ---- write artifacts ----
RUN = ROOT / time.strftime("%Y%m%d-%H%M%SZ", time.gmtime()) / "v03a_retune_pp"
RUN.mkdir(parents=True, exist_ok=True)
out_df = df.assign(DRIVER=driver, risk_pct=risk_pct, cons_pct=cons_pct, alert=alert_final)
(out_df[["RISK","risk_pct","cons_pct","DRIVER","alert","EVENT_ONSET",
         "Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix"]]
 .to_csv(RUN / "gdren_v03a_retune_pp_alerts.csv"))

metrics = {
    "features_used": str(FEAT),
    "calibrated_q_train": float(best["q"]),
    "calibrated_thr_driver": float(best["thr"]),
    "train_starts": int(best["starts"]), "train_hits": int(best["hits"]),
    "train_fa_per_year": float(best["fa_py"]),
    "full_starts": int(full["starts"]), "full_hits": int(full["hits"]),
    "full_false_alarms_per_year": float(full["fa_py"]),
    "cooldown_days": COOLDOWN_DAYS, "max_warn_days": MAX_WARN_DAYS,
    "gate_min_votes": GATE_MIN_VOTES, "persist_win": PERSIST_WIN, "persist_req": PERSIST_REQ,
    "fa_per_year_target": FA_PER_YEAR_TARGET, "lookahead_days": LOOKAHEAD_D
}
(RUN / "gdren_v03a_retune_pp_metrics.json").write_text(json.dumps(metrics, indent=2))
print("Artifacts:", json.dumps({
    "alerts_csv": str(RUN / "gdren_v03a_retune_pp_alerts.csv"),
    "metrics_json": str(RUN / "gdren_v03a_retune_pp_metrics.json")
}, indent=2))
print("\n=== RETUNE++ SUMMARY ===")
for k,v in metrics.items(): print(f"{k:>24}: {v}")


Artifacts: {
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-231403Z\\v03a_retune_pp\\gdren_v03a_retune_pp_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-231403Z\\v03a_retune_pp\\gdren_v03a_retune_pp_metrics.json"
}

=== RETUNE++ SUMMARY ===
           features_used: E:\CNT\artifacts\g_dren\20251105-230753Z_v03a_lite_fix\gdren_v03a_lite_fix_features.csv
      calibrated_q_train: 0.97
   calibrated_thr_driver: 1.0
            train_starts: 3
              train_hits: 0
       train_fa_per_year: 0.11541499894670318
             full_starts: 3
               full_hits: 0
full_false_alarms_per_year: 0.0837025437323352
           cooldown_days: 14
           max_warn_days: 12
          gate_min_votes: 2
             persist_win: 3
             persist_req: 1
      fa_per_year_target: 0.6
          lookahead_days: 7


In [19]:
# === G-DREN v0.3a RETUNE+++ — hybrid driver + grid sweep (no feature recompute) ===
import json, time
from pathlib import Path
import numpy as np, pandas as pd

# ---- knobs ----
FA_PER_YEAR_TARGET = 0.60
COOLDOWN_DAYS      = 14
MAX_WARN_DAYS      = 12
HYST_EXIT_DAYS     = 3
QGRID              = np.linspace(0.965, 0.990, 51)   # wider than before
WIN_RANK           = 512                             # rolling rank window
VOTE_THRESH_THETA  = 0.25
VOTE_THRESH_GAMMA  = 0.08

# ---- load latest features ----
ROOT = Path("E:/CNT/artifacts/g_dren")
cands = sorted(ROOT.glob("*_v03a_lite_fix/gdren_v03a_lite_fix_features.csv"),
               key=lambda p: p.stat().st_mtime, reverse=True)
assert cands, "No gdren_v03a_lite_fix_features.csv found."
FEAT = cands[0]
df = pd.read_csv(FEAT, index_col=0, parse_dates=True)

need = ["RISK","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix","AR1_mkt"]
missing = [c for c in need if c not in df.columns]
assert not missing, f"Missing columns: {missing}"

risk   = pd.Series(df["RISK"].astype(float), index=df.index).fillna(0.0)
Theta_m= df["Theta_mkt"].astype(float).fillna(0.0).values
Theta_v= df["Theta_vix"].astype(float).fillna(0.0).values
Theta_c= df["Theta_cred"].astype(float).fillna(0.0).values
Gamma_v= df["Gamma_mkt_vix"].astype(float).fillna(0.0).values
AR1_m  = df["AR1_mkt"].astype(float).fillna(0.0).values
onset  = df["EVENT_ONSET"].astype(int).values
idx    = df.index

# ---- rolling percentile helper ----
def roll_pct_rank(s: pd.Series, win=512):
    def pr(w):
        v = w[-1]; a = np.sort(w)
        return np.searchsorted(a, v, side="right")/len(w)
    return s.rolling(win, min_periods=win//2).apply(pr, raw=True)

# ---- hybrid driver: max(risk_pct, consensus_pct) ----
risk_pct = roll_pct_rank(risk, win=WIN_RANK).fillna(0.0)
cons_raw = (0.45*pd.Series(Theta_v, index=idx) +
            0.35*pd.Series(Theta_c, index=idx) +
            0.15*pd.Series(np.maximum(0, AR1_m), index=idx) +
            0.05*pd.Series(np.nan_to_num(Gamma_v), index=idx))
cons_pct = roll_pct_rank(cons_raw, win=WIN_RANK).fillna(0.0)
driver   = pd.Series(np.maximum(risk_pct.values, cons_pct.values), index=idx, name="DRIVER")

# ---- optional VIX term-structure gate (non-fatal if fetch fails) ----
term_vote = np.zeros(len(idx), dtype=int)
try:
    import yfinance as yf
    def get_close(t):
        d = yf.Ticker(t).history(period="max", auto_adjust=False)
        return pd.Series(d["Close"]).rename(t)
    vix   = get_close("^VIX")
    vix9d = get_close("^VIX9D")
    vix3m = get_close("^VIX3M")
    # align
    D = pd.concat([vix, vix9d, vix3m], axis=1).reindex(idx).ffill()
    ts1 = (D["^VIX9D"] - D["^VIX"]  > 0).astype(int)   # front stress
    ts2 = (D["^VIX"]   - D["^VIX3M"]> 0).astype(int)   # contango inversion
    term_vote = np.where((ts1.add(ts2, fill_value=0) > 0), 1, 0)
except Exception as e:
    # fine; no extra vote
    term_vote = np.zeros(len(idx), dtype=int)

# ---- core gating pieces (vectors) ----
votes_base = ((Theta_v>=VOTE_THRESH_THETA).astype(int) +
              (Theta_c>=VOTE_THRESH_THETA).astype(int) +
              (Theta_m>=VOTE_THRESH_THETA).astype(int) +
              (Gamma_v>=VOTE_THRESH_GAMMA).astype(int) +
              ((driver.diff(7).fillna(0.0).values)>0).astype(int) +
              term_vote)

def make_alert(driver_s: pd.Series, q: float, gate_min_votes: int, persist_win: int, persist_req: int):
    t = float(np.quantile(driver_s.values[np.isfinite(driver_s.values)], q))
    persist = ((driver_s>=t).astype(int).rolling(persist_win).sum().fillna(0).values >= persist_req)
    state=[]; in_warn=False; below=0; wlen=0
    dv = driver_s.values
    for i,(r,v,p) in enumerate(zip(dv, votes_base, persist)):
        if in_warn:
            wlen += 1
            below = below+1 if r < 0.9*t else 0
            if below>=HYST_EXIT_DAYS or wlen>=MAX_WARN_DAYS:
                in_warn=False; wlen=0; state.append('OK')
            else:
                state.append('WARNING')
        else:
            if (r>=t) and p and (v>=gate_min_votes):
                in_warn=True; below=0; wlen=1; state.append('WARNING')
            else:
                state.append('OK')
    return pd.Series(state, index=driver_s.index), t

def score(alert_s: pd.Series, onset_arr: np.ndarray, horizon: int, ix):
    a = alert_s.loc[ix]
    starts = (a.eq("WARNING") & ~a.shift(1).eq("WARNING")).fillna(False).values
    s_ix = np.where(starts)[0].tolist()
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_DAYS: kept.append(s); last=s
    s_ix = kept
    e_slice = onset_arr[a.index.get_indexer(ix)]
    hits=0
    for s in s_ix:
        if e_slice[s+1:min(s+1+horizon, len(e_slice))].any():
            hits += 1
    fa = int(len(s_ix)-hits)
    years = max(1e-9, (ix[-1]-ix[0]).days/365.25)
    return {"starts": len(s_ix), "hits": hits, "fa_py": fa/years, "s_ix": s_ix}

# ---- train slice (<=2015-12-31) ----
TRAIN_END = pd.Timestamp("2015-12-31")
ix_train = idx[idx <= TRAIN_END]
assert len(ix_train)>2000

# ---- sweep grid ----
best=None
for horizon in [7, 14]:
    for gate_min in [1, 2, 3]:
        for pwin in [3, 5]:
            for preq in [1, 2]:
                for q in QGRID:
                    alert_q, thr = make_alert(driver, q, gate_min, pwin, preq)
                    res = score(alert_q, onset, horizon, ix_train)
                    cand = {"q": float(q), "thr": float(thr), "horizon": horizon,
                            "gate_min_votes": gate_min, "persist_win": pwin, "persist_req": preq, **res}
                    # pick best: maximize hits under FA budget; tie-break lowest FA, then fewer starts
                    if (best is None) or \
                       ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] > best.get("hits_ok", -1))) or \
                       ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] == best.get("hits_ok", -1)) and (res["fa_py"] < best["fa_py"])) or \
                       ((best["fa_py"] > FA_PER_YEAR_TARGET) and (res["fa_py"] < best["fa_py"])):
                        best = cand.copy(); best["hits_ok"] = res["hits"]

# fallback if zero starts on train
if best["starts"] == 0:
    q_fallback = float(QGRID[0])
    alert_q, thr = make_alert(driver, q_fallback, 1, 3, 1)
    res = score(alert_q, onset, 14, ix_train)
    best = {"q": q_fallback, "thr": float(thr), "horizon": 14,
            "gate_min_votes": 1, "persist_win": 3, "persist_req": 1, **res, "hits_ok": res["hits"]}

# ---- apply best to FULL series ----
alert_final, thr_final = make_alert(driver, best["q"], best["gate_min_votes"], best["persist_win"], best["persist_req"])
full = score(alert_final, onset, best["horizon"], idx)

# ---- save ----
RUN = ROOT / time.strftime("%Y%m%d-%H%M%SZ", time.gmtime()) / "v03a_retune_sweep"
RUN.mkdir(parents=True, exist_ok=True)
out = df.assign(risk_pct=risk_pct, cons_pct=cons_pct, DRIVER=driver, alert=alert_final)
out_cols = ["RISK","risk_pct","cons_pct","DRIVER","alert","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix","AR1_mkt"]
out[out_cols].to_csv(RUN / "gdren_v03a_retune_sweep_alerts.csv")

metrics = {
    "features_used": str(FEAT),
    "calibrated_q_train": best["q"],
    "calibrated_thr_driver": best["thr"],
    "train_horizon_days": best["horizon"],
    "train_starts": int(best["starts"]),
    "train_hits": int(best["hits"]),
    "train_fa_per_year": float(best["fa_py"]),
    "gate_min_votes": int(best["gate_min_votes"]),
    "persist_win": int(best["persist_win"]),
    "persist_req": int(best["persist_req"]),
    "full_starts": int(full["starts"]),
    "full_hits": int(full["hits"]),
    "full_false_alarms_per_year": float(full["fa_py"]),
    "fa_per_year_target": FA_PER_YEAR_TARGET
}
(Path(RUN / "gdren_v03a_retune_sweep_metrics.json")).write_text(json.dumps(metrics, indent=2))
print("Artifacts:", json.dumps({
    "alerts_csv": str(RUN / "gdren_v03a_retune_sweep_alerts.csv"),
    "metrics_json": str(RUN / "gdren_v03a_retune_sweep_metrics.json")
}, indent=2))
print("\n=== RETUNE+++ SUMMARY ===")
for k,v in metrics.items(): print(f"{k:>26}: {v}")


Artifacts: {
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-231708Z\\v03a_retune_sweep\\gdren_v03a_retune_sweep_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-231708Z\\v03a_retune_sweep\\gdren_v03a_retune_sweep_metrics.json"
}

=== RETUNE+++ SUMMARY ===
             features_used: E:\CNT\artifacts\g_dren\20251105-230753Z_v03a_lite_fix\gdren_v03a_lite_fix_features.csv
        calibrated_q_train: 0.965
     calibrated_thr_driver: 1.0
        train_horizon_days: 14
              train_starts: 3
                train_hits: 1
         train_fa_per_year: 0.07694333263113545
            gate_min_votes: 2
               persist_win: 3
               persist_req: 1
               full_starts: 3
                 full_hits: 1
full_false_alarms_per_year: 0.0558016958215568
        fa_per_year_target: 0.6


In [20]:
# === G-DREN Ops Shim v0.1 — “What’s the bell say today?” ===
import os, json, time
from pathlib import Path
import pandas as pd
import numpy as np

# ---- roots ----
CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
assert ROOT.exists(), f"Missing: {ROOT}"

TODAY = pd.Timestamp.today().normalize()

def _latest(path_globs):
    """Return newest file by mtime from a list of glob patterns (recursive)."""
    cands = []
    for pat in path_globs:
        cands += list(ROOT.glob(pat))
    if not cands:
        return None
    return sorted(cands, key=lambda p: p.stat().st_mtime, reverse=True)[0]

def _read_alerts(fp):
    df = pd.read_csv(fp, index_col=0, parse_dates=True)
    # normalize to tz-naive daily
    df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
    df = df[~df.index.duplicated(keep="last")].sort_index()
    return df

def _status_line(name, df, key_cols=()):
    # pick the most recent available day ≤ today
    ix = df.index[df.index <= TODAY]
    if len(ix) == 0:
        ix = df.index
    last = ix[-1]
    row = df.loc[last]
    alert = str(row.get("alert", "OK"))
    bits = []
    for k in key_cols:
        if k in row and pd.notna(row[k]):
            val = float(row[k])
            bits.append(f"{k}={val:.3f}")
    # since-warning info (if currently WARNING)
    since = ""
    try:
        starts = (df["alert"].eq("WARNING") & ~df["alert"].shift(1).eq("WARNING")).fillna(False)
        last_start = df.index[(starts) & (df.index <= last)]
        if alert == "WARNING" and len(last_start) > 0:
            days = int((last - last_start[-1]).days)
            since = f" (day {days+1} of WARNING)"
    except Exception:
        pass
    cols = (" | " + " ".join(bits)) if bits else ""
    return f"{name:10s} — {alert:8s} as of {last.date()}{since}{cols}", last, alert

def g_dren_status():
    # ---- locate latest artifacts ----
    # Spike Bell (v0.2b)
    spike_fp = _latest([
        "*_v02b/gdren_v02b_alerts.csv",      # canonical
        "*_v02b/*alerts*.csv",               # fallback
    ])
    # Regime Bell (v0.3 sweeps/retunes)
    regime_fp = _latest([
        "*/v03a_retune_sweep/gdren_v03a_retune_sweep_alerts.csv",  # preferred
        "*/v03a_retune_pp/gdren_v03a_retune_pp_alerts.csv",
        "*/v03a_retune/gdren_v03a_retune_alerts.csv",
        "*_v02e/*alerts*.csv", "*_v02f/*alerts*.csv", "*_v02g/*alerts*.csv"  # earlier betas as fallback
    ])

    lines = []
    payload = {"generated_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())}

    # ---- Spike Bell ----
    if spike_fp and spike_fp.exists():
        df_spike = _read_alerts(spike_fp)
        line, when, alert = _status_line("SPIKE", df_spike, key_cols=("NEXUS",))
        lines.append(line)
        payload["spike"] = {
            "file": str(spike_fp),
            "asof": str(when.date()),
            "alert": alert,
            "nexus": (float(df_spike.loc[when, "NEXUS"]) if "NEXUS" in df_spike.columns and pd.notna(df_spike.loc[when, "NEXUS"]) else None)
        }
    else:
        lines.append("SPIKE      — (no recent v0.2b alerts found)")
        payload["spike"] = {"file": None, "alert": None}

    # ---- Regime Bell ----
    if regime_fp and regime_fp.exists():
        df_reg = _read_alerts(regime_fp)
        key_cols = tuple(k for k in ("RISK","DRIVER") if k in df_reg.columns)
        line, when, alert = _status_line("REGIME", df_reg, key_cols=key_cols)
        lines.append(line)
        payload["regime"] = {
            "file": str(regime_fp),
            "asof": str(when.date()),
            "alert": alert,
            "risk": (float(df_reg.loc[when, "RISK"]) if "RISK" in df_reg.columns and pd.notna(df_reg.loc[when, "RISK"]) else None),
            "driver": (float(df_reg.loc[when, "DRIVER"]) if "DRIVER" in df_reg.columns and pd.notna(df_reg.loc[when, "DRIVER"]) else None)
        }
    else:
        lines.append("REGIME     — (no recent v0.3 alerts found)")
        payload["regime"] = {"file": None, "alert": None}

    # ---- write dashboard JSON ----
    dash_dir = ROOT / "dashboard"
    dash_dir.mkdir(parents=True, exist_ok=True)
    dash_path = dash_dir / "g_dren_dashboard_status.json"
    dash_path.write_text(json.dumps(payload, indent=2))

    print("\n".join(lines))
    print(f"\n[dashboard] {dash_path}")

# ---- run once to print today’s status ----
g_dren_status()


SPIKE      — OK       as of 2025-11-05 | NEXUS=0.286
REGIME     — OK       as of 2025-11-05 | RISK=0.000 DRIVER=1.000

[dashboard] E:\CNT\artifacts\g_dren\dashboard\g_dren_dashboard_status.json


In [21]:
# === G-DREN Ops Diagnostics v0.2 — explain today's REGIME decision + what-ifs ===
import json, time
from pathlib import Path
import pandas as pd
import numpy as np

CNT = Path("E:/CNT")
ROOT = CNT / "artifacts" / "g_dren"
TODAY = pd.Timestamp.today().normalize()

def latest(globs):
    c=[]
    for g in globs: c += list(ROOT.glob(g))
    return sorted(c, key=lambda p: p.stat().st_mtime, reverse=True)[0] if c else None

# Load latest REGIME alerts + metrics (retune_sweep preferred)
alerts_fp = latest(["*/v03a_retune_sweep/gdren_v03a_retune_sweep_alerts.csv",
                    "*/v03a_retune_pp/gdren_v03a_retune_pp_alerts.csv",
                    "*/v03a_retune/gdren_v03a_retune_alerts.csv"])
metrics_fp = latest(["*/v03a_retune_sweep/gdren_v03a_retune_sweep_metrics.json",
                     "*/v03a_retune_pp/gdren_v03a_retune_pp_metrics.json",
                     "*/v03a_retune/gdren_v03a_retune_metrics.json"])

assert alerts_fp and metrics_fp, "No regime artifacts found"

DF = pd.read_csv(alerts_fp, index_col=0, parse_dates=True).sort_index()
DF.index = DF.index.tz_localize(None).normalize()
ix = DF.index[DF.index<=TODAY]
last = ix[-1] if len(ix) else DF.index[-1]
row = DF.loc[last]

M = json.loads(metrics_fp.read_text())
q    = float(M.get("calibrated_q_train", 0.97))
thr  = float(M.get("calibrated_thr_driver", 1.0))
gate = int(M.get("gate_min_votes", 2))
pwin = int(M.get("persist_win", 3))
preq = int(M.get("persist_req", 1))

# Compute votes & persistence for today
Theta_m = float(row.get("Theta_mkt", 0))
Theta_v = float(row.get("Theta_vix", 0))
Theta_c = float(row.get("Theta_cred",0))
Gamma_v = float(row.get("Gamma_mkt_vix",0))
driver  = float(row.get("DRIVER", np.nan)) if "DRIVER" in DF.columns else np.nan

def votes_at(ts):
    i = DF.index.get_loc(ts)
    slope7 = 0.0
    if i>=7 and "DRIVER" in DF.columns:
        slope7 = float(DF["DRIVER"].iloc[i] - DF["DRIVER"].iloc[i-7])
    v = int(Theta_v>=0.25) + int(Theta_c>=0.25) + int(Theta_m>=0.25) + int(Gamma_v>=0.08) + int(slope7>0)
    return v, slope7

def persisted(ts, threshold, win, req):
    i = DF.index.get_loc(ts)
    L = DF["DRIVER"].iloc[max(0,i-win+1):i+1]
    return int((L>=threshold).sum() >= req), int((L>=threshold).sum()), len(L)

votes, slope7 = votes_at(last)
persist_ok, persist_hits, persist_len = persisted(last, thr, pwin, preq)

print(f"== REGIME diagnostics @ {last.date()} ==")
print(f"alert         : {row.get('alert')}")
print(f"DRIVER / thr  : {driver:.3f} / {thr:.3f} (q={q})")
print(f"votes (need≥{gate}): {votes}  [Θ_vix={Theta_v:.3f} Θ_cred={Theta_c:.3f} Θ_mkt={Theta_m:.3f} Γ_mv={Gamma_v:.3f} slope7={slope7:.3f}]")
print(f"persistence   : {'OK' if persist_ok else 'NO'}  (≥{preq} of last {pwin} days ≥ thr; had {persist_hits})")

reason = []
if driver < thr: reason.append("driver<threshold")
if votes < gate: reason.append("insufficient votes")
if not persist_ok: reason.append("failed persistence")
print("decision path : " + (", ".join(reason) if reason else "would qualify"))

# --- What-if scenarios ---
def simulate(q_alt=None, gate_alt=None, preq_alt=None, pwin_alt=None):
    q_a   = q if q_alt is None else q_alt
    thr_a = float(DF["DRIVER"].quantile(q_a))
    gate_a= gate if gate_alt is None else gate_alt
    preq_a= preq if preq_alt is None else preq_alt
    pwin_a= pwin if pwin_alt is None else pwin_alt
    v,_   = votes_at(last)
    p_ok, p_hits, p_len = persisted(last, thr_a, pwin_a, preq_a)
    qualifies = (driver>=thr_a) and (v>=gate_a) and p_ok
    return {
        "q": q_a, "thr": thr_a, "gate": gate_a, "persist_req": preq_a, "persist_win": pwin_a,
        "qualifies": qualifies, "driver_ge_thr": (driver>=thr_a),
        "votes": v, "persist_hits": p_hits, "persist_len": p_len
    }

print("\n-- what-if: gate=1, q=0.975, persist_req=1/3 --")
print(simulate(q_alt=0.975, gate_alt=1, preq_alt=1, pwin_alt=3))
print("\n-- what-if: gate=2, q=0.970, persist_req=1/3 --")
print(simulate(q_alt=0.970, gate_alt=2, preq_alt=1, pwin_alt=3))


== REGIME diagnostics @ 2025-11-05 ==
alert         : OK
DRIVER / thr  : 1.000 / 1.000 (q=0.965)
votes (need≥2): 1  [Θ_vix=0.016 Θ_cred=0.023 Θ_mkt=0.016 Γ_mv=0.776 slope7=0.000]
persistence   : OK  (≥1 of last 3 days ≥ thr; had 3)
decision path : insufficient votes

-- what-if: gate=1, q=0.975, persist_req=1/3 --
{'q': 0.975, 'thr': 1.0, 'gate': 1, 'persist_req': 1, 'persist_win': 3, 'qualifies': 1, 'driver_ge_thr': True, 'votes': 1, 'persist_hits': 3, 'persist_len': 3}

-- what-if: gate=2, q=0.970, persist_req=1/3 --
{'q': 0.97, 'thr': 1.0, 'gate': 2, 'persist_req': 1, 'persist_win': 3, 'qualifies': False, 'driver_ge_thr': True, 'votes': 1, 'persist_hits': 3, 'persist_len': 3}


In [22]:
# === G-DREN Ops Shim v0.1.1 — reason-coded status + PREWARN policy overlay ===
import os, json, time
from pathlib import Path
import pandas as pd
import numpy as np

CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
TODAY = pd.Timestamp.today().normalize()

def _latest(globs):
    c=[]
    for g in globs: c += list(ROOT.glob(g))
    return sorted(c, key=lambda p: p.stat().st_mtime, reverse=True)[0] if c else None

def _read_alerts(fp):
    df = pd.read_csv(fp, index_col=0, parse_dates=True)
    df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
    return df[~df.index.duplicated(keep="last")].sort_index()

def _status_line(name, date, base_alert, extras):
    parts = [f"{name:10s} — {base_alert:8s} as of {date}"]
    if extras:
        parts.append(" | " + " ".join(extras))
    return "".join(parts)

# ---- locate latest SPIKE + REGIME artifacts ----
spike_fp  = _latest(["*_v02b/gdren_v02b_alerts.csv", "*_v02b/*alerts*.csv"])
regime_fp = _latest([
    "*/v03a_retune_sweep/gdren_v03a_retune_sweep_alerts.csv",
    "*/v03a_retune_pp/gdren_v03a_retune_pp_alerts.csv",
    "*/v03a_retune/gdren_v03a_retune_alerts.csv"
])
regime_metrics = _latest([
    "*/v03a_retune_sweep/gdren_v03a_retune_sweep_metrics.json",
    "*/v03a_retune_pp/gdren_v03a_retune_pp_metrics.json",
    "*/v03a_retune/gdren_v03a_retune_metrics.json"
])

payload = {"generated_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())}
lines = []

# ---- SPIKE status (unchanged) ----
if spike_fp and spike_fp.exists():
    S = _read_alerts(spike_fp)
    ix = S.index[S.index<=TODAY]
    last = (ix[-1] if len(ix) else S.index[-1]).date()
    s_alert = str(S.loc[pd.Timestamp(last), "alert"])
    s_nexus = float(S.loc[pd.Timestamp(last), "NEXUS"]) if "NEXUS" in S.columns else None
    lines.append(_status_line("SPIKE", last, s_alert, [f"NEXUS={s_nexus:.3f}"] if s_nexus is not None else []))
    payload["spike"] = {"file": str(spike_fp), "asof": str(last), "alert": s_alert, "nexus": s_nexus}
else:
    lines.append("SPIKE      — (no recent v0.2b alerts found)")
    payload["spike"] = {"file": None, "alert": None}

# ---- REGIME status with reason + PREWARN policy ----
if regime_fp and regime_fp.exists() and regime_metrics and regime_metrics.exists():
    R = _read_alerts(regime_fp)
    with open(regime_metrics, "r") as f:
        M = json.load(f)

    ix = R.index[R.index<=TODAY]
    last_ts = ix[-1] if len(ix) else R.index[-1]
    last = last_ts.date()
    base_alert = str(R.loc[last_ts, "alert"])

    # pull calibrated threshold if present; else fallback to q-quantile on DRIVER
    q = float(M.get("calibrated_q_train", 0.97))
    if "calibrated_thr_driver" in M:
        thr = float(M["calibrated_thr_driver"])
    else:
        if "DRIVER" in R.columns:
            thr = float(pd.Series(R["DRIVER"]).quantile(q))
        else:
            thr = 1.0  # neutral fallback

    driver  = float(R.loc[last_ts, "DRIVER"]) if "DRIVER" in R.columns else np.nan
    risk    = float(R.loc[last_ts, "RISK"])   if "RISK"   in R.columns else np.nan
    Theta_m = float(R.loc[last_ts, "Theta_mkt"])     if "Theta_mkt"     in R.columns else 0.0
    Theta_v = float(R.loc[last_ts, "Theta_vix"])     if "Theta_vix"     in R.columns else 0.0
    Theta_c = float(R.loc[last_ts, "Theta_cred"])    if "Theta_cred"    in R.columns else 0.0
    Gamma_v = float(R.loc[last_ts, "Gamma_mkt_vix"]) if "Gamma_mkt_vix" in R.columns else 0.0

    # 7-day slope on DRIVER
    i = R.index.get_loc(last_ts)
    slope7 = 0.0
    if "DRIVER" in R.columns and i>=7:
        slope7 = float(R["DRIVER"].iloc[i] - R["DRIVER"].iloc[i-7])

    # persistence over last 3 days ≥ thr (matches our tuner defaults)
    persist_hits = int((R["DRIVER"].iloc[max(0, i-2):i+1] >= thr).sum()) if "DRIVER" in R.columns else 0
    persist_ok = (persist_hits >= 1)

    # votes (same definition as tuner)
    votes = int(Theta_v>=0.25) + int(Theta_c>=0.25) + int(Theta_m>=0.25) + int(Gamma_v>=0.08) + int(slope7>0)
    gate_needed = int(M.get("gate_min_votes", 2))

    # reason for OK
    reasons = []
    if not np.isfinite(driver) or driver < thr: reasons.append("threshold")
    if votes < gate_needed:                      reasons.append("votes")
    if not persist_ok:                           reasons.append("persistence")

    # PREWARN policy: if driver≥thr & persistence OK & Γ strong (≥0.75), tag PREWARN (no change to WARNING).
    policy_alert = base_alert
    if base_alert != "WARNING":
        if np.isfinite(driver) and driver >= thr and persist_ok and (Gamma_v >= 0.75):
            policy_alert = "PREWARN"

    # print line
    extras = []
    if np.isfinite(risk):   extras.append(f"RISK={risk:.3f}")
    if np.isfinite(driver): extras.append(f"DRIVER={driver:.3f}")
    if policy_alert == base_alert and base_alert == "OK" and reasons:
        extras.append(f"held_by={'+'.join(reasons)}")
    elif policy_alert == "PREWARN":
        extras.append("policy=PREWARN(Γ strong)")

    lines.append(_status_line("REGIME", last, policy_alert, extras))

    payload["regime"] = {
        "file": str(regime_fp), "metrics": str(regime_metrics),
        "asof": str(last), "alert": base_alert, "policy_alert": policy_alert,
        "driver": driver, "risk": risk, "threshold": thr, "q": q,
        "votes": votes, "gate_needed": gate_needed,
        "Theta": {"mkt": Theta_m, "vix": Theta_v, "cred": Theta_c},
        "Gamma_mkt_vix": Gamma_v, "slope7": slope7,
        "persistence": {"ok": bool(persist_ok), "hits": persist_hits, "win": 3},
        "held_by": reasons
    }
else:
    lines.append("REGIME     — (no recent v0.3 alerts found)")
    payload["regime"] = {"file": None, "alert": None}

# ---- write dashboard JSON ----
dash_dir = ROOT / "dashboard"
dash_dir.mkdir(parents=True, exist_ok=True)
dash_path = dash_dir / "g_dren_dashboard_status.json"
dash_path.write_text(json.dumps(payload, indent=2))

print("\n".join(lines))
print(f"\n[dashboard] {dash_path}")


SPIKE      — OK       as of 2025-11-05 | NEXUS=0.286
REGIME     — PREWARN  as of 2025-11-05 | RISK=0.000 DRIVER=1.000 policy=PREWARN(Γ strong)

[dashboard] E:\CNT\artifacts\g_dren\dashboard\g_dren_dashboard_status.json


In [23]:
# === G-DREN Ops Shim v0.1.2 — history (last 5 transitions) + reason + policy overlay ===
import os, json, time
from pathlib import Path
import pandas as pd
import numpy as np

CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
TODAY = pd.Timestamp.today().normalize()

def _latest(globs):
    c=[]
    for g in globs: c += list(ROOT.glob(g))
    return sorted(c, key=lambda p: p.stat().st_mtime, reverse=True)[0] if c else None

def _load_csv(fp):
    df = pd.read_csv(fp, index_col=0, parse_dates=True)
    df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
    return df[~df.index.duplicated(keep="last")].sort_index()

def _last_transitions(df, col="alert", n=5):
    a = df[col].astype(str)
    trans = (a != a.shift(1)).fillna(True)
    pts = df.index[trans]
    out=[]
    for i in range(len(pts)-1, -1, -1):
        t = pts[i]
        state = a.loc[t]
        if i+1 < len(pts):
            next_t = pts[i+1]
            days = int((next_t - t).days)
        else:
            days = int((df.index[-1] - t).days) + 1
        out.append((t.date(), state, days))
        if len(out) >= n: break
    return out

def _status_line(name, date, base_alert, extras):
    parts = [f"{name:10s} — {base_alert:8s} as of {date}"]
    if extras: parts.append(" | " + " ".join(extras))
    return "".join(parts)

# ---- locate artifacts ----
spike_fp  = _latest(["*_v02b/gdren_v02b_alerts.csv", "*_v02b/*alerts*.csv"])
regime_fp = _latest([
    "*/v03a_retune_sweep/gdren_v03a_retune_sweep_alerts.csv",
    "*/v03a_retune_pp/gdren_v03a_retune_pp_alerts.csv",
    "*/v03a_retune/gdren_v03a_retune_alerts.csv"
])
regime_metrics = _latest([
    "*/v03a_retune_sweep/gdren_v03a_retune_sweep_metrics.json",
    "*/v03a_retune_pp/gdren_v03a_retune_pp_metrics.json",
    "*/v03a_retune/gdren_v03a_retune_metrics.json"
])

payload = {"generated_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())}
lines   = []
overlay = {"recommendation": None, "why": None}

# ---- SPIKE block ----
if spike_fp and spike_fp.exists():
    S = _load_csv(spike_fp)
    ix = S.index[S.index<=TODAY]
    last_ts = ix[-1] if len(ix) else S.index[-1]
    s_alert = str(S.loc[last_ts, "alert"])
    s_nexus = float(S.loc[last_ts, "NEXUS"]) if "NEXUS" in S.columns and pd.notna(S.loc[last_ts, "NEXUS"]) else None
    lines.append(_status_line("SPIKE", last_ts.date(), s_alert, [f"NEXUS={s_nexus:.3f}"] if s_nexus is not None else []))
    hist = _last_transitions(S, "alert", n=5)
    lines.append("  recent: " + " · ".join([f"{d}:{st}({days}d)" for d,st,days in hist]))
    payload["spike"] = {"file": str(spike_fp), "asof": str(last_ts.date()), "alert": s_alert, "nexus": s_nexus, "recent": hist}
else:
    lines.append("SPIKE      — (no recent v0.2b alerts found)")
    payload["spike"] = {"file": None, "alert": None, "recent": []}

# ---- REGIME block (reason + policy PREWARN echo) ----
if regime_fp and regime_fp.exists():
    R = _load_csv(regime_fp)
    ix = R.index[R.index<=TODAY]
    last_ts = ix[-1] if len(ix) else R.index[-1]
    base_alert = str(R.loc[last_ts, "alert"]) if "alert" in R.columns else "OK"

    # metrics (for q / threshold)
    q = 0.97
    thr = 1.0
    if regime_metrics and regime_metrics.exists():
        M = json.loads(regime_metrics.read_text())
        q   = float(M.get("calibrated_q_train", q))
        thr = float(M.get("calibrated_thr_driver", thr)) if "calibrated_thr_driver" in M else thr
        gate_needed = int(M.get("gate_min_votes", 2))
    else:
        gate_needed = 2

    vals = lambda k, default=0.0: float(R.loc[last_ts, k]) if k in R.columns and pd.notna(R.loc[last_ts, k]) else default
    driver  = vals("DRIVER", np.nan) if "DRIVER" in R.columns else np.nan
    risk    = vals("RISK",   np.nan) if "RISK"   in R.columns else np.nan
    Theta_m = vals("Theta_mkt")
    Theta_v = vals("Theta_vix")
    Theta_c = vals("Theta_cred")
    Gamma_v = vals("Gamma_mkt_vix")
    # slope7
    slope7 = 0.0
    if "DRIVER" in R.columns:
        i = R.index.get_loc(last_ts)
        if i>=7: slope7 = float(R["DRIVER"].iloc[i] - R["DRIVER"].iloc[i-7])

    # persistence 1/3 over threshold
    persist_hits = int((R["DRIVER"].iloc[max(0, R.index.get_loc(last_ts)-2):R.index.get_loc(last_ts)+1] >= thr).sum()) if "DRIVER" in R.columns else 0
    persist_ok   = (persist_hits >= 1)

    # votes
    votes = int(Theta_v>=0.25) + int(Theta_c>=0.25) + int(Theta_m>=0.25) + int(Gamma_v>=0.08) + int(slope7>0)

    # PREWARN overlay (policy): driver≥thr & persist OK & Γ strong
    policy_alert = base_alert
    if base_alert != "WARNING" and np.isfinite(driver) and driver>=thr and persist_ok and (Gamma_v >= 0.75):
        policy_alert = "PREWARN"

    # reason
    reasons=[]
    if not np.isfinite(driver) or driver < thr: reasons.append("threshold")
    if votes < gate_needed:                      reasons.append("votes")
    if not persist_ok:                           reasons.append("persistence")

    extras = []
    if np.isfinite(risk):   extras.append(f"RISK={risk:.3f}")
    if np.isfinite(driver): extras.append(f"DRIVER={driver:.3f}")
    if policy_alert == "PREWARN" and base_alert != "WARNING":
        extras.append("policy=PREWARN(Γ strong)")
    elif base_alert == "OK" and reasons:
        extras.append(f"held_by={'+'.join(reasons)}")

    lines.append(_status_line("REGIME", last_ts.date(), policy_alert, extras))
    hist = _last_transitions(R, "alert", n=5)
    lines.append("  recent: " + " · ".join([f"{d}:{st}({days}d)" for d,st,days in hist]))

    payload["regime"] = {
        "file": str(regime_fp),
        "metrics": (str(regime_metrics) if regime_metrics else None),
        "asof": str(last_ts.date()),
        "alert": base_alert,
        "policy_alert": policy_alert,
        "q": q, "threshold": thr,
        "driver": driver, "risk": risk,
        "Theta": {"mkt": Theta_m, "vix": Theta_v, "cred": Theta_c},
        "Gamma_mkt_vix": Gamma_v, "slope7": slope7,
        "votes": votes, "gate_needed": gate_needed,
        "persistence": {"ok": bool(persist_ok), "hits": persist_hits, "win": 3},
        "recent": hist,
        "held_by": reasons
    }

    # policy overlay recommendation if PREWARN pattern (driver≥thr & Γ strong) is present but votes < gate
    overlay_path = ROOT / "dashboard" / "g_dren_policy_overlay.json"
    overlay_path.parent.mkdir(parents=True, exist_ok=True)
    if np.isfinite(driver) and driver>=thr and persist_ok and (Gamma_v >= 0.75) and (votes < gate_needed):
        overlay = {
            "recommendation": "consider temporary gate=1 with q≈0.975 (persist 1/3) to catch potential onset; keep FA budget ≤0.8/yr",
            "why": {
                "driver_ge_threshold": True,
                "gamma_strong": True,
                "persistence_ok": True,
                "votes_vs_gate": f"{votes} < {gate_needed}"
            },
            "asof": str(last_ts.date())
        }
    else:
        overlay = {"recommendation": None, "why": None, "asof": str(last_ts.date())}
    overlay_path.write_text(json.dumps(overlay, indent=2))

else:
    lines.append("REGIME     — (no recent v0.3 alerts found)")
    payload["regime"] = {"file": None, "alert": None, "recent": []}

# ---- write dashboard JSON ----
dash_dir = ROOT / "dashboard"
dash_dir.mkdir(parents=True, exist_ok=True)
dash_path = dash_dir / "g_dren_dashboard_status.json"
dash_path.write_text(json.dumps(payload, indent=2))

print("\n".join(lines))
print(f"\n[dashboard] {dash_path}")
if overlay["recommendation"]:
    print(f"[overlay ] {ROOT/'dashboard'/'g_dren_policy_overlay.json'}  ← suggestion saved")


TypeError: Object of type date is not JSON serializable

In [24]:
# === G-DREN Ops Shim v0.1.2-fix — JSON-safe dates + history + policy overlay ===
import os, json, time
from pathlib import Path
import pandas as pd
import numpy as np

CNT = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
TODAY = pd.Timestamp.today().normalize()

def _latest(globs):
    c=[]
    for g in globs: c += list(ROOT.glob(g))
    return sorted(c, key=lambda p: p.stat().st_mtime, reverse=True)[0] if c else None

def _load_csv(fp):
    df = pd.read_csv(fp, index_col=0, parse_dates=True)
    df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
    return df[~df.index.duplicated(keep="last")].sort_index()

def _last_transitions(df, col="alert", n=5):
    a = df[col].astype(str)
    trans = (a != a.shift(1)).fillna(True)
    pts = df.index[trans]
    out=[]
    for i in range(len(pts)-1, -1, -1):
        t = pts[i]
        state = a.loc[t]
        if i+1 < len(pts):
            next_t = pts[i+1]
            days = int((next_t - t).days)
        else:
            days = int((df.index[-1] - t).days) + 1
        out.append((t.date(), state, days))
        if len(out) >= n: break
    return out

def _stringify_recent(hist):
    # convert [(date, state, days), ...] -> [{"date":"YYYY-MM-DD","state":..., "days":...}, ...]
    return [{"date": str(d), "state": st, "days": int(days)} for (d, st, days) in hist]

def _status_line(name, date, base_alert, extras):
    parts = [f"{name:10s} — {base_alert:8s} as of {date}"]
    if extras: parts.append(" | " + " ".join(extras))
    return "".join(parts)

# ---- locate artifacts ----
spike_fp  = _latest(["*_v02b/gdren_v02b_alerts.csv", "*_v02b/*alerts*.csv"])
regime_fp = _latest([
    "*/v03a_retune_sweep/gdren_v03a_retune_sweep_alerts.csv",
    "*/v03a_retune_pp/gdren_v03a_retune_pp_alerts.csv",
    "*/v03a_retune/gdren_v03a_retune_alerts.csv"
])
regime_metrics = _latest([
    "*/v03a_retune_sweep/gdren_v03a_retune_sweep_metrics.json",
    "*/v03a_retune_pp/gdren_v03a_retune_pp_metrics.json",
    "*/v03a_retune/gdren_v03a_retune_metrics.json"
])

payload = {"generated_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())}
lines   = []
overlay = {"recommendation": None, "why": None, "asof": str(TODAY.date())}

# ---- SPIKE block ----
if spike_fp and spike_fp.exists():
    S = _load_csv(spike_fp)
    ix = S.index[S.index<=TODAY]
    last_ts = ix[-1] if len(ix) else S.index[-1]
    s_alert = str(S.loc[last_ts, "alert"])
    s_nexus = float(S.loc[last_ts, "NEXUS"]) if "NEXUS" in S.columns and pd.notna(S.loc[last_ts, "NEXUS"]) else None
    lines.append(_status_line("SPIKE", str(last_ts.date()), s_alert, [f"NEXUS={s_nexus:.3f}"] if s_nexus is not None else []))
    hist = _last_transitions(S, "alert", n=5)
    lines.append("  recent: " + " · ".join([f"{d}:{st}({days}d)" for d,st,days in hist]))
    payload["spike"] = {
        "file": str(spike_fp),
        "asof": str(last_ts.date()),
        "alert": s_alert,
        "nexus": (float(s_nexus) if s_nexus is not None else None),
        "recent": _stringify_recent(hist)
    }
else:
    lines.append("SPIKE      — (no recent v0.2b alerts found)")
    payload["spike"] = {"file": None, "alert": None, "recent": []}

# ---- REGIME block ----
if regime_fp and regime_fp.exists():
    R = _load_csv(regime_fp)
    ix = R.index[R.index<=TODAY]
    last_ts = ix[-1] if len(ix) else R.index[-1]
    base_alert = str(R.loc[last_ts, "alert"]) if "alert" in R.columns else "OK"

    # metrics (for q / threshold)
    q = 0.97; thr = 1.0; gate_needed = 2
    if regime_metrics and regime_metrics.exists():
        M = json.loads(regime_metrics.read_text())
        q   = float(M.get("calibrated_q_train", q))
        thr = float(M.get("calibrated_thr_driver", thr)) if "calibrated_thr_driver" in M else thr
        gate_needed = int(M.get("gate_min_votes", gate_needed))

    # pull values (coerce to plain floats)
    def val(col, default=np.nan):
        return float(R.loc[last_ts, col]) if col in R.columns and pd.notna(R.loc[last_ts, col]) else float(default)

    driver  = val("DRIVER", np.nan) if "DRIVER" in R.columns else np.nan
    risk    = val("RISK",   np.nan) if "RISK"   in R.columns else np.nan
    Theta_m = val("Theta_mkt", 0.0); Theta_v = val("Theta_vix", 0.0); Theta_c = val("Theta_cred", 0.0)
    Gamma_v = val("Gamma_mkt_vix", 0.0)

    i = R.index.get_loc(last_ts)
    slope7 = 0.0
    if "DRIVER" in R.columns and i>=7:
        slope7 = float(R["DRIVER"].iloc[i] - R["DRIVER"].iloc[i-7])

    persist_hits = int((R["DRIVER"].iloc[max(0, i-2):i+1] >= thr).sum()) if "DRIVER" in R.columns else 0
    persist_ok   = bool(persist_hits >= 1)

    votes = int(Theta_v>=0.25) + int(Theta_c>=0.25) + int(Theta_m>=0.25) + int(Gamma_v>=0.08) + int(slope7>0)

    # PREWARN overlay (policy): driver≥thr & persist OK & Γ strong (no change to WARNING logic)
    policy_alert = base_alert
    if base_alert != "WARNING" and np.isfinite(driver) and driver>=thr and persist_ok and (Gamma_v >= 0.75):
        policy_alert = "PREWARN"
        overlay = {
            "recommendation": "consider temporary gate=1 with q≈0.975 (persist 1/3) to catch potential onset; keep FA budget ≤0.8/yr",
            "why": {
                "driver_ge_threshold": True,
                "gamma_strong": True,
                "persistence_ok": True,
                "votes_vs_gate": f"{votes} < {gate_needed}"
            },
            "asof": str(last_ts.date())
        }

    # reasons for holding
    reasons = []
    if not np.isfinite(driver) or driver < thr: reasons.append("threshold")
    if votes < gate_needed:                      reasons.append("votes")
    if not persist_ok:                           reasons.append("persistence")

    extras = []
    if np.isfinite(risk):   extras.append(f"RISK={risk:.3f}")
    if np.isfinite(driver): extras.append(f"DRIVER={driver:.3f}")
    if policy_alert == "PREWARN" and base_alert != "WARNING":
        extras.append("policy=PREWARN(Γ strong)")
    elif base_alert == "OK" and reasons:
        extras.append(f"held_by={'+'.join(reasons)}")

    lines.append(_status_line("REGIME", str(last_ts.date()), policy_alert, extras))
    hist = _last_transitions(R, "alert", n=5)
    lines.append("  recent: " + " · ".join([f"{d}:{st}({days}d)" for d,st,days in hist]))

    payload["regime"] = {
        "file": str(regime_fp),
        "metrics": (str(regime_metrics) if regime_metrics else None),
        "asof": str(last_ts.date()),
        "alert": base_alert,
        "policy_alert": policy_alert,
        "q": float(q), "threshold": float(thr),
        "driver": (float(driver) if np.isfinite(driver) else None),
        "risk":   (float(risk)   if np.isfinite(risk)   else None),
        "Theta": {"mkt": float(Theta_m), "vix": float(Theta_v), "cred": float(Theta_c)},
        "Gamma_mkt_vix": float(Gamma_v),
        "slope7": float(slope7),
        "votes": int(votes), "gate_needed": int(gate_needed),
        "persistence": {"ok": bool(persist_ok), "hits": int(persist_hits), "win": 3},
        "recent": _stringify_recent(hist),
        "held_by": reasons
    }
else:
    lines.append("REGIME     — (no recent v0.3 alerts found)")
    payload["regime"] = {"file": None, "alert": None, "recent": []}

# ---- write dashboard JSONs (JSON-safe) ----
dash_dir = ROOT / "dashboard"
dash_dir.mkdir(parents=True, exist_ok=True)

dash_path = dash_dir / "g_dren_dashboard_status.json"
dash_path.write_text(json.dumps(payload, indent=2))

overlay_path = dash_dir / "g_dren_policy_overlay.json"
overlay_path.write_text(json.dumps(overlay, indent=2))

print("\n".join(lines))
print(f"\n[dashboard] {dash_path}")
if overlay.get("recommendation"):
    print(f"[overlay ] {overlay_path}  ← suggestion saved")


SPIKE      — OK       as of 2025-11-05 | NEXUS=0.286
REGIME     — PREWARN  as of 2025-11-05 | RISK=0.000 DRIVER=1.000 policy=PREWARN(Γ strong)

[dashboard] E:\CNT\artifacts\g_dren\dashboard\g_dren_dashboard_status.json
[overlay ] E:\CNT\artifacts\g_dren\dashboard\g_dren_policy_overlay.json  ← suggestion saved


In [25]:
# === G-DREN Daily Runner v0.1 — retune sweep → dashboard → history CSVs ===
import os, json, time
from pathlib import Path
import numpy as np, pandas as pd

# ---- knobs (tweak as you like) ----
FA_PER_YEAR_TARGET = 0.60       # false-alarm budget for regime tuner
COOLDOWN_DAYS      = 14
MAX_WARN_DAYS      = 12
HYST_EXIT_DAYS     = 3
QGRID              = np.linspace(0.965, 0.990, 51)
WIN_RANK           = 512
VOTE_THRESH_THETA  = 0.25
VOTE_THRESH_GAMMA  = 0.08
LOOKAHEAD_DAYS     = 14         # regime horizon used in sweep scoring

# If features look stale (> 3 days old), we *warn* but do not recompute here.
STALE_FEATURE_DAYS = 3

# ---- paths ----
CNT  = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
DASH = ROOT / "dashboard"
ROOT.mkdir(parents=True, exist_ok=True); DASH.mkdir(parents=True, exist_ok=True)
TODAY = pd.Timestamp.today().normalize()

def _latest(globs):
    c=[]
    for g in globs: c += list(ROOT.glob(g))
    return sorted(c, key=lambda p: p.stat().st_mtime, reverse=True)[0] if c else None

def _load_csv(fp):
    df = pd.read_csv(fp, index_col=0, parse_dates=True)
    df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
    return df[~df.index.duplicated(keep="last")].sort_index()

def _pct_rank(s: pd.Series, win=512):
    def pr(w):
        v = w[-1]; a = np.sort(w)
        return np.searchsorted(a, v, side="right")/len(w)
    return s.rolling(win, min_periods=win//2).apply(pr, raw=True)

def _transitions(df, col="alert", n=5):
    a = df[col].astype(str)
    trans = (a != a.shift(1)).fillna(True)
    pts = df.index[trans]
    out=[]
    for i in range(len(pts)-1, -1, -1):
        t = pts[i]; state = a.loc[t]
        days = (pts[i+1]-t).days if i+1 < len(pts) else (df.index[-1]-t).days + 1
        out.append((str(t.date()), state, int(days)))
        if len(out)>=n: break
    return out

# ---------- 1) Locate latest Spike + Regime feature/alert files ----------
spike_alerts = _latest(["*_v02b/gdren_v02b_alerts.csv", "*_v02b/*alerts*.csv"])
reg_features = _latest(["*_v03a_lite_fix/gdren_v03a_lite_fix_features.csv"])
if reg_features is None:
    raise FileNotFoundError("No v03a_lite_fix_features.csv found — run the v0.3a-lite (fix) cell once to seed features.")

age_days = (pd.Timestamp.fromtimestamp(reg_features.stat().st_mtime).normalize() - TODAY).days
if age_days < -STALE_FEATURE_DAYS:
    print(f"[warn] Regime features are {abs(age_days)} days old → consider re-running v0.3a-lite fix to refresh.")

DF = _load_csv(reg_features)

# ---------- 2) Build Regime hybrid DRIVER & run retune sweep ----------
required = ["RISK","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix","AR1_mkt"]
miss = [c for c in required if c not in DF.columns]
if miss: raise ValueError(f"Missing columns in features CSV: {miss}")

risk   = pd.Series(DF["RISK"].astype(float), index=DF.index).fillna(0.0)
Theta_v= DF["Theta_vix"].astype(float).values
Theta_c= DF["Theta_cred"].astype(float).values
Theta_m= DF["Theta_mkt"].astype(float).values
Gamma_v= DF["Gamma_mkt_vix"].astype(float).values
AR1_m  = DF["AR1_mkt"].astype(float).values
onset  = DF["EVENT_ONSET"].astype(int).values
idx    = DF.index

risk_pct = _pct_rank(risk, win=WIN_RANK).fillna(0.0)
cons_raw = (0.45*pd.Series(Theta_v, index=idx) +
            0.35*pd.Series(Theta_c, index=idx) +
            0.15*pd.Series(np.maximum(0, AR1_m), index=idx) +
            0.05*pd.Series(np.nan_to_num(Gamma_v), index=idx))
cons_pct = _pct_rank(cons_raw, win=WIN_RANK).fillna(0.0)
driver   = pd.Series(np.maximum(risk_pct.values, cons_pct.values), index=idx, name="DRIVER")

votes_base = ((Theta_v>=VOTE_THRESH_THETA).astype(int) +
              (Theta_c>=VOTE_THRESH_THETA).astype(int) +
              (Theta_m>=VOTE_THRESH_THETA).astype(int) +
              (Gamma_v>=VOTE_THRESH_GAMMA).astype(int) +
              ((driver.diff(7).fillna(0.0).values)>0).astype(int))

def make_alert(driver_s, q, gate_min=2, persist_win=3, persist_req=1):
    t = float(np.quantile(driver_s.values[np.isfinite(driver_s.values)], q))
    persist = ((driver_s>=t).astype(int).rolling(persist_win).sum().fillna(0).values >= persist_req)
    state=[]; in_warn=False; below=0; wlen=0
    dv = driver_s.values
    for r,v,p in zip(dv, votes_base, persist):
        if in_warn:
            wlen += 1
            below = below+1 if r < 0.9*t else 0
            if below>=HYST_EXIT_DAYS or wlen>=MAX_WARN_DAYS:
                in_warn=False; wlen=0; state.append('OK')
            else: state.append('WARNING')
        else:
            if (r>=t) and p and (v>=gate_min):
                in_warn=True; below=0; wlen=1; state.append('WARNING')
            else: state.append('OK')
    return pd.Series(state, index=driver_s.index), t

def score(alert_s, horizon, ix):
    a = alert_s.loc[ix]
    starts = (a.eq("WARNING") & ~a.shift(1).eq("WARNING")).fillna(False).values
    s_ix = np.where(starts)[0].tolist()
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_DAYS: kept.append(s); last=s
    s_ix = kept
    e_slice = onset[a.index.get_indexer(ix)]
    hits=0
    for s in s_ix:
        if e_slice[s+1:min(s+1+horizon, len(e_slice))].any(): hits += 1
    fa = int(len(s_ix)-hits)
    years = max(1e-9, (ix[-1]-ix[0]).days/365.25)
    return {"starts": len(s_ix), "hits": hits, "fa_py": fa/years}, pd.Index(a.index[s_ix])

# Train slice for calibration (≤2015-12-31)
ix_train = idx[idx <= pd.Timestamp("2015-12-31")]
if len(ix_train) < 2000:
    ix_train = idx[: int(len(idx)*0.6)]  # fallback split

best=None
for gate_min in [1,2,3]:
    for pwin in [3,5]:
        for preq in [1,2]:
            for q in QGRID:
                alert_q, thr = make_alert(driver, q, gate_min, pwin, preq)
                res,_ = score(alert_q, LOOKAHEAD_DAYS, ix_train)
                cand = {"q": float(q), "thr": float(thr), "gate": gate_min, "pwin": pwin, "preq": preq, **res}
                if (best is None) or \
                   ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] > best.get("hits_ok",-1))) or \
                   ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] == best.get("hits_ok",-1)) and (res["fa_py"] < best["fa_py"])) or \
                   ((best["fa_py"] > FA_PER_YEAR_TARGET) and (res["fa_py"] < best["fa_py"])):
                    best = cand.copy(); best["hits_ok"] = res["hits"]

# apply to full series
alert_final, thr_final = make_alert(driver, best["q"], best["gate"], best["pwin"], best["preq"])
full_res, starts_idx   = score(alert_final, LOOKAHEAD_DAYS, idx)

# Save regime sweep outputs
RUN = ROOT / time.strftime("%Y%m%d-%H%M%SZ", time.gmtime()) / "v03a_daily"
RUN.mkdir(parents=True, exist_ok=True)
out_df = DF.assign(risk_pct=risk_pct, cons_pct=cons_pct, DRIVER=driver, alert=alert_final)
cols = ["RISK","risk_pct","cons_pct","DRIVER","alert","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix","AR1_mkt"]
out_df[cols].to_csv(RUN / "gdren_v03a_daily_alerts.csv")
(Path(RUN / "gdren_v03a_daily_metrics.json")).write_text(json.dumps({
    "q": best["q"], "thr": best["thr"], "gate_min_votes": best["gate"],
    "persist_win": best["pwin"], "persist_req": best["preq"],
    "train_hits": int(best["hits"]), "train_fa_per_year": float(best["fa_py"]),
    "full_starts": int(full_res["starts"]), "full_hits": int(full_res["hits"]),
    "full_fa_per_year": float(full_res["fa_py"]), "horizon_days": LOOKAHEAD_DAYS
}, indent=2))

# ---------- 3) Update dashboard + overlay ----------
def _status_line(name, date, base_alert, extras):
    parts = [f"{name:10s} — {base_alert:8s} as of {date}"]
    if extras: parts.append(" | " + " ".join(extras))
    return "".join(parts)

lines=[]; payload={"generated_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())}

# SPIKE status
if spike_alerts and spike_alerts.exists():
    S = _load_csv(spike_alerts)
    s_last = (S.index[S.index<=TODAY][-1] if any(S.index<=TODAY) else S.index[-1])
    s_alert = str(S.loc[s_last, "alert"])
    s_nexus = float(S.loc[s_last, "NEXUS"]) if "NEXUS" in S.columns and pd.notna(S.loc[s_last, "NEXUS"]) else None
    lines.append(_status_line("SPIKE", str(s_last.date()), s_alert, [f"NEXUS={s_nexus:.3f}"] if s_nexus is not None else []))
    s_hist = _transitions(S, "alert", n=5)
    payload["spike"] = {"file": str(spike_alerts), "asof": str(s_last.date()), "alert": s_alert, "nexus": s_nexus, "recent": s_hist}
else:
    lines.append("SPIKE      — (no recent v0.2b alerts found)")
    payload["spike"] = {"file": None, "alert": None, "recent": []}

# REGIME status (from today’s RUN)
R = _load_csv(RUN / "gdren_v03a_daily_alerts.csv")
r_last = (R.index[R.index<=TODAY][-1] if any(R.index<=TODAY) else R.index[-1])
base_alert = str(R.loc[r_last, "alert"])
driver_last= float(R.loc[r_last, "DRIVER"]) if "DRIVER" in R.columns else None
risk_last  = float(R.loc[r_last, "RISK"])   if "RISK"   in R.columns else None

# policy PREWARN echo: driver≥thr & Γ strong
Gamma_v_last = float(R.loc[r_last, "Gamma_mkt_vix"]) if "Gamma_mkt_vix" in R.columns else 0.0
persist_hits = int((R["DRIVER"].iloc[max(0, R.index.get_loc(r_last)-2):R.index.get_loc(r_last)+1] >= best["thr"]).sum()) if "DRIVER" in R.columns else 0
persist_ok   = (persist_hits >= 1)
policy_alert = base_alert
if base_alert != "WARNING" and np.isfinite(driver_last) and driver_last>=best["thr"] and persist_ok and (Gamma_v_last >= 0.75):
    policy_alert = "PREWARN"

extras = []
if risk_last is not None:   extras.append(f"RISK={risk_last:.3f}")
if driver_last is not None: extras.append(f"DRIVER={driver_last:.3f}")
if policy_alert == "PREWARN" and base_alert != "WARNING": extras.append("policy=PREWARN(Γ strong)")

lines.append(_status_line("REGIME", str(r_last.date()), policy_alert, extras))
r_hist = _transitions(R, "alert", n=5)

payload["regime"] = {
    "file": str(RUN / "gdren_v03a_daily_alerts.csv"),
    "asof": str(r_last.date()),
    "alert": base_alert, "policy_alert": policy_alert,
    "driver": driver_last, "risk": risk_last,
    "threshold": float(best["thr"]), "q": float(best["q"]),
    "gate_min_votes": int(best["gate"]), "persist_win": int(best["pwin"]), "persist_req": int(best["preq"]),
    "recent": r_hist
}

# write JSON
(DASH / "g_dren_dashboard_status.json").write_text(json.dumps(payload, indent=2))

# ---------- 4) Export transitions CSVs ----------
def export_transitions_csv(fp_in, fp_out):
    D = _load_csv(fp_in)
    hist = _transitions(D, "alert", n=9999)  # full history
    pd.DataFrame(hist, columns=["date","state","days"]).to_csv(fp_out, index=False)

if spike_alerts and spike_alerts.exists():
    export_transitions_csv(spike_alerts, DASH / "spike_transitions.csv")
export_transitions_csv(RUN / "gdren_v03a_daily_alerts.csv", DASH / "regime_transitions.csv")

# ---------- 5) Print status ----------
print("\n".join(lines))
print(f"\n[dashboard] {DASH/'g_dren_dashboard_status.json'}")
print(f"[history  ] {DASH/'spike_transitions.csv'} (if SPIKE present)")
print(f"[history  ] {DASH/'regime_transitions.csv'}")


SPIKE      — OK       as of 2025-11-05 | NEXUS=0.286
REGIME     — PREWARN  as of 2025-11-05 | RISK=0.000 DRIVER=1.000 policy=PREWARN(Γ strong)

[dashboard] E:\CNT\artifacts\g_dren\dashboard\g_dren_dashboard_status.json
[history  ] E:\CNT\artifacts\g_dren\dashboard\spike_transitions.csv (if SPIKE present)
[history  ] E:\CNT\artifacts\g_dren\dashboard\regime_transitions.csv


In [26]:
# === Regime Gate Patch v0.3 — add VIX term-structure + LQD/IEF slope votes into retune sweep ===
import json, time
from pathlib import Path
import numpy as np, pandas as pd

# ----- knobs -----
FA_PER_YEAR_TARGET = 0.60
COOLDOWN_DAYS      = 14
MAX_WARN_DAYS      = 12
HYST_EXIT_DAYS     = 3
LOOKAHEAD_DAYS     = 14
QGRID              = np.linspace(0.965, 0.990, 51)
WIN_RANK           = 512
VOTE_THRESH_THETA  = 0.25
VOTE_THRESH_GAMMA  = 0.08

CNT  = Path("E:/CNT")
ROOT = CNT / "artifacts" / "g_dren"
ROOT.mkdir(parents=True, exist_ok=True)

def _latest(globs):
    c=[]
    for g in globs: c += list(ROOT.glob(g))
    return sorted(c, key=lambda p: p.stat().st_mtime, reverse=True)[0] if c else None

def _load_csv(fp):
    df = pd.read_csv(fp, index_col=0, parse_dates=True).sort_index()
    df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
    return df[~df.index.duplicated(keep="last")]

def _pct_rank(s: pd.Series, win=512):
    def pr(w):
        v = w[-1]; a = np.sort(w)
        return np.searchsorted(a, v, side="right")/len(w)
    return s.rolling(win, min_periods=win//2).apply(pr, raw=True)

# 1) Load latest v03a_lite_fix features
feat_fp = _latest(["*_v03a_lite_fix/gdren_v03a_lite_fix_features.csv"])
assert feat_fp, "Run v0.3a-lite (fix) once to seed features."
DF = _load_csv(feat_fp)

need = ["RISK","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix","AR1_mkt"]
missing = [c for c in need if c not in DF.columns]
assert not missing, f"Missing columns: {missing}"

risk   = DF["RISK"].astype(float).fillna(0.0)
Theta_v= DF["Theta_vix"].astype(float).fillna(0.0).values
Theta_c= DF["Theta_cred"].astype(float).fillna(0.0).values
Theta_m= DF["Theta_mkt"].astype(float).fillna(0.0).values
Gamma_v= DF["Gamma_mkt_vix"].astype(float).fillna(0.0).values
AR1_m  = DF["AR1_mkt"].astype(float).fillna(0.0).values
onset  = DF["EVENT_ONSET"].astype(int).values
idx    = DF.index

# 2) Build DRIVER (risk percentile ∨ consensus percentile)
risk_pct = _pct_rank(risk, win=WIN_RANK).fillna(0.0)
cons_raw = (0.45*pd.Series(Theta_v, index=idx) +
            0.35*pd.Series(Theta_c, index=idx) +
            0.15*pd.Series(np.maximum(0, AR1_m), index=idx) +
            0.05*pd.Series(np.nan_to_num(Gamma_v), index=idx))
cons_pct = _pct_rank(cons_raw, win=WIN_RANK).fillna(0.0)
driver   = pd.Series(np.maximum(risk_pct.values, cons_pct.values), index=idx, name="DRIVER")

# 3) Extra votes (new): VIX term-structure + IG credit slope
def fetch_series(ticker):
    try:
        import yfinance as yf
        d = yf.Ticker(ticker).history(period="max", auto_adjust=False)
        if isinstance(d, pd.DataFrame) and "Close" in d and len(d)>0:
            s = pd.Series(d["Close"]).rename(ticker)
            s.index = pd.DatetimeIndex(s.index).tz_localize(None).normalize()
            return s
    except Exception:
        pass
    return pd.Series(dtype=float)

vix   = fetch_series("^VIX")
vix9d = fetch_series("^VIX9D")
vix3m = fetch_series("^VIX3M")
D_vix = pd.concat([vix9d, vix, vix3m], axis=1).reindex(idx).ffill()

term_vote = ((D_vix.get("^VIX9D",0) - D_vix.get("^VIX",0) > 0).astype(int) +
             (D_vix.get("^VIX",0)   - D_vix.get("^VIX3M",0) > 0).astype(int))
term_vote = term_vote.fillna(0).clip(0,1).astype(int).values  # 1 if any inversion

lqd = fetch_series("LQD")
ief = fetch_series("IEF")
D_ig = pd.concat([lqd, ief], axis=1).reindex(idx).ffill()
ig_ratio = (D_ig["LQD"] / D_ig["IEF"]) if ("LQD" in D_ig and "IEF" in D_ig) else pd.Series(0.0, index=idx)
ig_slope = ig_ratio.pct_change().rolling(10).mean()   # 10d slope proxy
ig_vote  = (ig_slope < 0).astype(int).values          # deterioration vote

# 4) Base votes + new votes
votes_base = ((Theta_v>=VOTE_THRESH_THETA).astype(int) +
              (Theta_c>=VOTE_THRESH_THETA).astype(int) +
              (Theta_m>=VOTE_THRESH_THETA).astype(int) +
              (Gamma_v>=VOTE_THRESH_GAMMA).astype(int) +
              ((driver.diff(7).fillna(0.0).values)>0).astype(int))
votes_ext  = votes_base + term_vote + ig_vote  # << extended votes

def make_alert(driver_s, q, gate_min=2, persist_win=3, persist_req=1):
    t = float(np.quantile(driver_s.values[np.isfinite(driver_s.values)], q))
    persist = ((driver_s>=t).astype(int).rolling(persist_win).sum().fillna(0).values >= persist_req)
    state=[]; in_warn=False; below=0; wlen=0
    dv = driver_s.values
    for r,v,p in zip(dv, votes_ext, persist):
        if in_warn:
            wlen += 1
            below = below+1 if r < 0.9*t else 0
            if below>=HYST_EXIT_DAYS or wlen>=MAX_WARN_DAYS:
                in_warn=False; wlen=0; state.append('OK')
            else: state.append('WARNING')
        else:
            if (r>=t) and p and (v>=gate_min):
                in_warn=True; below=0; wlen=1; state.append('WARNING')
            else: state.append('OK')
    return pd.Series(state, index=driver_s.index), t

def score(alert_s, horizon, ix):
    a = alert_s.loc[ix]
    starts = (a.eq("WARNING") & ~a.shift(1).eq("WARNING")).fillna(False).values
    s_ix = np.where(starts)[0].tolist()
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_DAYS: kept.append(s); last=s
    s_ix = kept
    e_slice = onset[a.index.get_indexer(ix)]
    hits=0
    for s in s_ix:
        if e_slice[s+1:min(s+1+horizon, len(e_slice))].any(): hits += 1
    fa = int(len(s_ix)-hits)
    years = max(1e-9, (ix[-1]-ix[0]).days/365.25)
    return {"starts": len(s_ix), "hits": hits, "fa_py": fa/years}

# 5) Calibrate on train (≤2015-12-31), sweep grid with extended votes
ix_train = idx[idx <= pd.Timestamp("2015-12-31")]
if len(ix_train) < 2000:
    ix_train = idx[: int(len(idx)*0.6)]

best=None
for gate_min in [1,2,3]:
    for pwin in [3,5]:
        for preq in [1,2]:
            for q in QGRID:
                alert_q, thr = make_alert(driver, q, gate_min, pwin, preq)
                res = score(alert_q, LOOKAHEAD_DAYS, ix_train)
                cand = {"q": float(q), "thr": float(thr), "gate": gate_min, "pwin": pwin, "preq": preq, **res}
                if (best is None) or \
                   ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] > best.get("hits_ok",-1))) or \
                   ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] == best.get("hits_ok",-1)) and (res["fa_py"] < best["fa_py"])) or \
                   ((best["fa_py"] > FA_PER_YEAR_TARGET) and (res["fa_py"] < best["fa_py"])):
                    best = cand.copy(); best["hits_ok"] = res["hits"]

# 6) Apply to full series and save
alert_final, thr_final = make_alert(driver, best["q"], best["gate"], best["pwin"], best["preq"])
RUN = ROOT / time.strftime("%Y%m%d-%H%M%SZ", time.gmtime()) / "v03a_gate_patch"
RUN.mkdir(parents=True, exist_ok=True)
out_df = DF.assign(risk_pct=risk_pct, cons_pct=cons_pct, DRIVER=driver, alert=alert_final)
cols = ["RISK","risk_pct","cons_pct","DRIVER","alert","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix","AR1_mkt"]
out_df[cols].to_csv(RUN / "gdren_v03a_gate_patch_alerts.csv")
(Path(RUN / "gdren_v03a_gate_patch_metrics.json")).write_text(json.dumps({
    "q": best["q"], "thr": best["thr"], "gate_min_votes": best["gate"],
    "persist_win": best["pwin"], "persist_req": best["preq"],
    "train_hits": int(best["hits"]), "train_fa_per_year": float(best["fa_py"]),
}, indent=2))

print("Artifacts:", json.dumps({
    "alerts_csv": str(RUN / "gdren_v03a_gate_patch_alerts.csv"),
    "metrics_json": str(RUN / "gdren_v03a_gate_patch_metrics.json")
}, indent=2))


Artifacts: {
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251105-235917Z\\v03a_gate_patch\\gdren_v03a_gate_patch_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251105-235917Z\\v03a_gate_patch\\gdren_v03a_gate_patch_metrics.json"
}


In [27]:
# === Runner Hook v0.1 — webhooks + 48h gate override on PREWARN streak≥2 ===
import os, json, time
from pathlib import Path
import pandas as pd
import numpy as np
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError

CNT  = Path("E:/CNT")
ROOT = CNT / "artifacts" / "g_dren"
DASH = ROOT / "dashboard"
DASH.mkdir(parents=True, exist_ok=True)
TODAY = pd.Timestamp.today().normalize()

def _latest(globs):
    c=[]
    for g in globs: c += list(ROOT.glob(g))
    return sorted(c, key=lambda p: p.stat().st_mtime, reverse=True)[0] if c else None

def _load_csv(fp):
    df = pd.read_csv(fp, index_col=0, parse_dates=True).sort_index()
    df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
    return df[~df.index.duplicated(keep="last")]

def _post_webhook(text):
    urls = os.environ.get("G_DREN_WEBHOOKS","").strip()
    if not urls: return
    for u in [s.strip() for s in urls.split(",") if s.strip()]:
        try:
            body = json.dumps({"text": text, "content": text}).encode("utf-8")
            req = Request(u, data=body, headers={"Content-Type":"application/json"})
            urlopen(req, timeout=10).read()
        except Exception as e:
            print(f"[webhook warn] {u} -> {e}")

# read today’s daily alerts (from your Daily Runner v0.1/v0.2)
daily = _latest(["*/v03a_daily/gdren_v03a_daily_alerts.csv",
                 "*/v03a_retune_sweep/gdren_v03a_retune_sweep_alerts.csv",
                 "*/v03a_retune_pp/gdren_v03a_retune_pp_alerts.csv"])
assert daily, "No daily/retune alerts found. Run your daily runner first."
R = _load_csv(daily)

# derive policy PREWARN (driver≥thr & Γ strong) using latest metrics (thr)
metrics = _latest(["*/v03a_daily/gdren_v03a_daily_metrics.json",
                   "*/v03a_retune_sweep/gdren_v03a_retune_sweep_metrics.json",
                   "*/v03a_retune_pp/gdren_v03a_retune_pp_metrics.json"])
thr = 1.0
if metrics and metrics.exists():
    try:
        M = json.loads(metrics.read_text())
        thr = float(M.get("thr", M.get("calibrated_thr_driver", 1.0)))
    except Exception: pass

driver = R["DRIVER"]
gamma  = R["Gamma_mkt_vix"] if "Gamma_mkt_vix" in R.columns else pd.Series(0.0, index=R.index)
policy_prewarn = (driver >= thr) & (gamma >= 0.75)

# compute streak ending today
streak = 0
for ts in reversed(R.index[R.index <= TODAY]):
    if policy_prewarn.loc[ts]: streak += 1
    else: break

# 48h gate override file
override_fp = DASH / "g_dren_gate_override.json"
now_utc = pd.Timestamp.utcnow()
override = {}
if override_fp.exists():
    try: override = json.loads(override_fp.read_text())
    except Exception: override = {}
# prune expired
if override.get("expires_utc"):
    if pd.Timestamp(override["expires_utc"]) <= now_utc:
        override = {}
        try: override_fp.unlink()
        except Exception: pass

# if streak >= 2 and no override, create a 48h gate=1 override + webhook
if streak >= 2 and not override:
    override = {
        "gate_min_votes_override": 1,
        "created_utc": now_utc.isoformat(),
        "expires_utc": (now_utc + pd.Timedelta(hours=48)).isoformat(),
        "reason": "PREWARN_streak>=2"
    }
    override_fp.write_text(json.dumps(override, indent=2))
    msg = (f"[G-DREN] PREWARN streak {streak}d — enabling temporary gate=1 for 48h "
           f"(driver≥thr & Γ strong). Override ends {override['expires_utc']}.")
    print(msg); _post_webhook(msg)
elif override:
    print(f"[G-DREN] gate override active → gate=1 until {override['expires_utc']} (reason={override.get('reason')}).")
else:
    print("[G-DREN] No override: PREWARN streak <", streak, "or conditions not met.")

# also notify on new WARNING starts today
if "alert" in R.columns:
    last_idx = R.index[R.index<=TODAY]
    if len(last_idx):
        today_alert = str(R.loc[last_idx[-1],"alert"])
        if len(last_idx) >= 2:
            prev_alert  = str(R.loc[last_idx[-2],"alert"])
        else:
            prev_alert = "OK"
        if today_alert=="WARNING" and prev_alert!="WARNING":
            msg = f"[G-DREN] WARNING start {str(last_idx[-1].date())} — driver={driver.loc[last_idx[-1]]:.3f}, thr={thr:.3f}"
            print(msg); _post_webhook(msg)


[G-DREN] PREWARN streak 15d — enabling temporary gate=1 for 48h (driver≥thr & Γ strong). Override ends 2025-11-07T23:59:32.594493+00:00.


In [28]:
# === G-DREN Daily Runner v0.2 — extended votes + override + dashboard + webhooks ===
import os, json, time
from pathlib import Path
import numpy as np, pandas as pd
from urllib.request import Request, urlopen

# ------------------------ knobs (tune as you like) ------------------------
FA_PER_YEAR_TARGET = 0.60           # false-alarm budget for calibration
COOLDOWN_DAYS      = 14             # min days between WARNING starts
MAX_WARN_DAYS      = 12             # hard cap on WARNING segment length
HYST_EXIT_DAYS     = 3              # exit WARNING if under threshold long enough
LOOKAHEAD_DAYS     = 14             # regime horizon for scoring (7 or 14 work well)
QGRID              = np.linspace(0.965, 0.990, 51)  # threshold quantile sweep
WIN_RANK           = 512            # rolling window for percentile ranks
VOTE_THRESH_THETA  = 0.25           # Θ vote threshold
VOTE_THRESH_GAMMA  = 0.08           # Γ(mkt↔VIX) vote threshold
PREWARN_GAMMA_MIN  = 0.75           # PREWARN requires strong Γ
STALE_FEATURE_DAYS = 3              # warn if features older than N days
CNT  = Path(os.environ.get("CNT_LAB_DIR", "E:/CNT")).resolve()
ROOT = CNT / "artifacts" / "g_dren"
DASH = ROOT / "dashboard"
TODAY = pd.Timestamp.today().normalize()
ROOT.mkdir(parents=True, exist_ok=True); DASH.mkdir(parents=True, exist_ok=True)

# ------------------------ helpers ------------------------
def _latest(globs):
    c=[]
    for g in globs: c += list(ROOT.glob(g))
    return sorted(c, key=lambda p: p.stat().st_mtime, reverse=True)[0] if c else None

def _load_csv(fp):
    df = pd.read_csv(fp, index_col=0, parse_dates=True).sort_index()
    df.index = pd.DatetimeIndex(df.index).tz_localize(None).normalize()
    return df[~df.index.duplicated(keep="last")]

def _pct_rank(s: pd.Series, win=512):
    def pr(w):
        v = w[-1]; a = np.sort(w)
        return np.searchsorted(a, v, side="right")/len(w)
    return s.rolling(win, min_periods=win//2).apply(pr, raw=True)

def _transitions(df, col="alert", n=5):
    a = df[col].astype(str)
    trans = (a != a.shift(1)).fillna(True)
    pts = df.index[trans]
    out=[]
    for i in range(len(pts)-1, -1, -1):
        t = pts[i]; state = a.loc[t]
        days = (pts[i+1]-t).days if i+1 < len(pts) else (df.index[-1]-t).days + 1
        out.append((str(t.date()), state, int(days)))
        if len(out)>=n: break
    return out

def _post_webhook(text):
    urls = os.environ.get("G_DREN_WEBHOOKS","").strip()
    if not urls: return
    for u in [s.strip() for s in urls.split(",") if s.strip()]:
        try:
            body = json.dumps({"text": text, "content": text}).encode("utf-8")
            req  = Request(u, data=body, headers={"Content-Type":"application/json"})
            urlopen(req, timeout=10).read()
        except Exception as e:
            print(f"[webhook warn] {u} -> {e}")

def _fetch_series(ticker):
    """Lightweight yfinance fetch; returns tz-naive daily Close series or empty."""
    try:
        import yfinance as yf
        d = yf.Ticker(ticker).history(period="max", auto_adjust=False)
        if isinstance(d, pd.DataFrame) and "Close" in d and len(d)>0:
            s = pd.Series(d["Close"], name=ticker)
            s.index = pd.DatetimeIndex(s.index).tz_localize(None).normalize()
            return s
    except Exception as e:
        print(f"[yf warn] {ticker}: {e}")
    return pd.Series(dtype=float)

# ------------------------ 1) Load latest features (v03a_lite_fix) ------------------------
feat_fp = _latest(["*_v03a_lite_fix/gdren_v03a_lite_fix_features.csv"])
if not feat_fp:
    raise FileNotFoundError("No v03a_lite_fix features found. Run the v0.3a-lite (fix) cell once to seed features.")
mtime_days = (TODAY - pd.Timestamp.fromtimestamp(feat_fp.stat().st_mtime).normalize()).days
if mtime_days > STALE_FEATURE_DAYS:
    print(f"[warn] Features are {mtime_days} days old → consider refreshing v0.3a-lite fix.")

DF = _load_csv(feat_fp)
required = ["RISK","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix","AR1_mkt"]
miss = [c for c in required if c not in DF.columns]
if miss: raise ValueError(f"Missing columns in features CSV: {miss}")

risk   = DF["RISK"].astype(float).fillna(0.0)
Theta_v= DF["Theta_vix"].astype(float).fillna(0.0).values
Theta_c= DF["Theta_cred"].astype(float).fillna(0.0).values
Theta_m= DF["Theta_mkt"].astype(float).fillna(0.0).values
Gamma_v= DF["Gamma_mkt_vix"].astype(float).fillna(0.0).values
AR1_m  = DF["AR1_mkt"].astype(float).fillna(0.0).values
onset  = DF["EVENT_ONSET"].astype(int).values
idx    = DF.index

# ------------------------ 2) Build DRIVER (risk_pct ∨ consensus_pct) ------------------------
risk_pct = _pct_rank(risk, win=WIN_RANK).fillna(0.0)
cons_raw = (0.45*pd.Series(Theta_v, index=idx) +
            0.35*pd.Series(Theta_c, index=idx) +
            0.15*pd.Series(np.maximum(0, AR1_m), index=idx) +
            0.05*pd.Series(np.nan_to_num(Gamma_v), index=idx))
cons_pct = _pct_rank(cons_raw, win=WIN_RANK).fillna(0.0)
driver   = pd.Series(np.maximum(risk_pct.values, cons_pct.values), index=idx, name="DRIVER")

# ------------------------ 3) Extended votes: VIX TS + IG credit slope ------------------------
# VIX term-structure: ^VIX9D vs ^VIX vs ^VIX3M (any inversion → vote=1)
vix9d = _fetch_series("^VIX9D")
vix   = _fetch_series("^VIX")
vix3m = _fetch_series("^VIX3M")
D_vix = pd.concat([vix9d.rename("V9D"), vix.rename("VIX"), vix3m.rename("V3M")], axis=1).reindex(idx).ffill()
term_vote = (((D_vix["V9D"] - D_vix["VIX"]) > 0).astype(int) + ((D_vix["VIX"] - D_vix["V3M"]) > 0).astype(int))
term_vote = term_vote.fillna(0).clip(0,1).astype(int).values

# IG credit slope: LQD/IEF 10d mean return < 0 → deterioration vote
lqd = _fetch_series("LQD")
ief = _fetch_series("IEF")
D_ig = pd.concat([lqd.rename("LQD"), ief.rename("IEF")], axis=1).reindex(idx).ffill()
ig_ratio = (D_ig["LQD"] / D_ig["IEF"]) if all(c in D_ig for c in ["LQD","IEF"]) else pd.Series(0.0, index=idx)
ig_slope = ig_ratio.pct_change().rolling(10).mean()
ig_vote  = (ig_slope < 0).astype(int).reindex(idx).fillna(0).astype(int).values

# Base votes + extended votes
votes_base = ((Theta_v>=VOTE_THRESH_THETA).astype(int) +
              (Theta_c>=VOTE_THRESH_THETA).astype(int) +
              (Theta_m>=VOTE_THRESH_THETA).astype(int) +
              (Gamma_v>=VOTE_THRESH_GAMMA).astype(int) +
              ((driver.diff(7).fillna(0.0).values)>0).astype(int))
votes_ext  = votes_base + term_vote + ig_vote

# ------------------------ 4) Honor 48h gate override (from policy file) ------------------------
override_fp = DASH / "g_dren_gate_override.json"
override_gate = None
if override_fp.exists():
    try:
        _ov = json.loads(override_fp.read_text())
        exp = pd.Timestamp(_ov.get("expires_utc", "1970-01-01T00:00:00Z"))
        if pd.Timestamp.utcnow() < exp:
            override_gate = int(_ov.get("gate_min_votes_override", 1))
            print(f"[override] gate_min_votes={override_gate} active until {exp.tz_localize('UTC')}")
    except Exception as e:
        print(f"[override warn] {e}")

# ------------------------ 5) Calibration (sweep q, persistence) ------------------------
def make_alert(driver_s, q, gate_min=2, persist_win=3, persist_req=1):
    t = float(np.quantile(driver_s.values[np.isfinite(driver_s.values)], q))
    persist = ((driver_s>=t).astype(int).rolling(persist_win).sum().fillna(0).values >= persist_req)
    state=[]; in_warn=False; below=0; wlen=0
    dv = driver_s.values
    for r,v,p in zip(dv, votes_ext, persist):
        if in_warn:
            wlen += 1
            below = below+1 if r < 0.9*t else 0
            if below>=HYST_EXIT_DAYS or wlen>=MAX_WARN_DAYS:
                in_warn=False; wlen=0; state.append('OK')
            else: state.append('WARNING')
        else:
            if (r>=t) and p and (v>=gate_min):
                in_warn=True; below=0; wlen=1; state.append('WARNING')
            else: state.append('OK')
    return pd.Series(state, index=driver_s.index), t

def score(alert_s, horizon, ix):
    a = alert_s.loc[ix]
    starts = (a.eq("WARNING") & ~a.shift(1).eq("WARNING")).fillna(False).values
    s_ix = np.where(starts)[0].tolist()
    kept=[]; last=-10**9
    for s in s_ix:
        if s-last >= COOLDOWN_DAYS: kept.append(s); last=s
    s_ix = kept
    e_slice = DF.loc[ix, "EVENT_ONSET"].astype(int).values
    hits=0
    for s in s_ix:
        if e_slice[s+1:min(s+1+horizon, len(e_slice))].any(): hits += 1
    fa = int(len(s_ix)-hits)
    years = max(1e-9, (ix[-1]-ix[0]).days/365.25)
    return {"starts": len(s_ix), "hits": hits, "fa_py": fa/years}

# Train slice for calibration
ix_train = idx[idx <= pd.Timestamp("2015-12-31")]
if len(ix_train) < 2000:
    ix_train = idx[: int(len(idx)*0.6)]

best=None
gate_grid = [override_gate] if override_gate is not None else [1,2,3]
for gate_min in gate_grid:
    for pwin in [3,5]:
        for preq in [1,2]:
            for q in QGRID:
                alert_q, thr = make_alert(driver, q, gate_min, pwin, preq)
                res = score(alert_q, LOOKAHEAD_DAYS, ix_train)
                cand = {"q": float(q), "thr": float(thr), "gate": gate_min, "pwin": pwin, "preq": preq, **res}
                if (best is None) or \
                   ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] > best.get("hits_ok",-1))) or \
                   ((res["fa_py"] <= FA_PER_YEAR_TARGET) and (res["hits"] == best.get("hits_ok",-1)) and (res["fa_py"] < best["fa_py"])) or \
                   ((best["fa_py"] > FA_PER_YEAR_TARGET) and (res["fa_py"] < best["fa_py"])):
                    best = cand.copy(); best["hits_ok"] = res["hits"]

# Apply to full series
alert_final, thr_final = make_alert(driver, best["q"], best["gate"], best["pwin"], best["preq"])
full_starts = (alert_final.eq("WARNING") & ~alert_final.shift(1).eq("WARNING")).fillna(False)
# ------------------------ 6) PREWARN policy + (optional) auto-create override ------------------------
# Policy PREWARN pattern (no state change): driver≥thr & Γ strong & persistence OK (1/3)
persist_hits_today = int((driver.iloc[max(0, len(driver)-3):] >= thr_final).sum())
policy_prewarn_today = (driver.iloc[-1] >= thr_final) and (Gamma_v[-1] >= PREWARN_GAMMA_MIN) and (persist_hits_today >= 1)

# Compute PREWARN streak ending today (from this run's series)
persist_series = (driver >= thr_final).rolling(3).sum().fillna(0) >= 1
policy_series  = persist_series & (pd.Series(Gamma_v, index=idx) >= PREWARN_GAMMA_MIN)
streak = 0
for ts, val in reversed(list(zip(idx, policy_series.values))):
    if ts > TODAY: continue
    if val: streak += 1
    else: break

# If streak>=2 and no active override file, create one (gate=1 for 48h) + webhook
now_utc = pd.Timestamp.utcnow()
if streak >= 2 and override_gate is None:
    override = {
        "gate_min_votes_override": 1,
        "created_utc": now_utc.isoformat(),
        "expires_utc": (now_utc + pd.Timedelta(hours=48)).isoformat(),
        "reason": "PREWARN_streak>=2"
    }
    (DASH / "g_dren_gate_override.json").write_text(json.dumps(override, indent=2))
    _post_webhook(f"[G-DREN] PREWARN streak {streak}d — enabling temporary gate=1 for 48h "
                  f"(driver≥thr & Γ strong). Override ends {override['expires_utc']}.")

# Notify on new WARNING start today
if any(idx <= TODAY):
    last2 = idx[idx <= TODAY][-2:] if len(idx[idx <= TODAY])>=2 else idx[-1:]
    if len(last2)==2:
        if alert_final.loc[last2[-1]]=="WARNING" and alert_final.loc[last2[-2]]!="WARNING":
            _post_webhook(f"[G-DREN] WARNING start {str(last2[-1].date())} — driver={driver.loc[last2[-1]]:.3f}, thr={thr_final:.3f}")

# ------------------------ 7) Save artifacts ------------------------
RUN = ROOT / time.strftime("%Y%m%d-%H%M%SZ", time.gmtime()) / "v03a_daily_v02"
RUN.mkdir(parents=True, exist_ok=True)

out_df = DF.assign(risk_pct=risk_pct, cons_pct=cons_pct, DRIVER=driver, alert=alert_final)
cols = ["RISK","risk_pct","cons_pct","DRIVER","alert","EVENT_ONSET","Theta_mkt","Theta_vix","Theta_cred","Gamma_mkt_vix","AR1_mkt"]
out_df[cols].to_csv(RUN / "gdren_v03a_daily_v02_alerts.csv")

metrics = {
    "q": float(best["q"]), "thr": float(best["thr"]),
    "gate_min_votes": int(best["gate"]), "persist_win": int(best["pwin"]), "persist_req": int(best["preq"]),
    "fa_per_year_target": float(FA_PER_YEAR_TARGET),
    "override_active": bool(override_gate is not None),
    "override_gate_min_votes": (int(override_gate) if override_gate is not None else None),
    "train_hits": int(best["hits"]), "train_fa_per_year": float(best["fa_py"]),
    "full_starts": int(full_starts.sum()),
    "horizon_days": int(LOOKAHEAD_DAYS),
    "prewarn_today": bool(policy_prewarn_today),
    "prewarn_streak_days": int(streak)
}
(RUN / "gdren_v03a_daily_v02_metrics.json").write_text(json.dumps(metrics, indent=2))

# ------------------------ 8) Update dashboard + transitions ------------------------
def _status_line(name, date, base_alert, extras=None):
    parts = [f"{name:10s} — {base_alert:8s} as of {date}"]
    if extras: parts.append(" | " + " ".join(extras))
    return "".join(parts)

# SPIKE (v0.2b) status (if present)
spike_fp = _latest(["*_v02b/gdren_v02b_alerts.csv", "*_v02b/*alerts*.csv"])
payload = {"generated_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())}
lines = []
if spike_fp and spike_fp.exists():
    S = _load_csv(spike_fp)
    s_last = (S.index[S.index<=TODAY][-1] if any(S.index<=TODAY) else S.index[-1])
    s_alert = str(S.loc[s_last, "alert"])
    s_nexus = float(S.loc[s_last, "NEXUS"]) if "NEXUS" in S.columns and pd.notna(S.loc[s_last, "NEXUS"]) else None
    lines.append(_status_line("SPIKE", str(s_last.date()), s_alert, [f"NEXUS={s_nexus:.3f}"] if s_nexus is not None else []))
    s_hist = _transitions(S, "alert", n=5)
    payload["spike"] = {"file": str(spike_fp), "asof": str(s_last.date()), "alert": s_alert, "nexus": s_nexus, "recent": s_hist}
else:
    lines.append("SPIKE      — (no recent v0.2b alerts found)")
    payload["spike"] = {"file": None, "alert": None, "recent": []}

# REGIME status (from this run)
R = _load_csv(RUN / "gdren_v03a_daily_v02_alerts.csv")
r_last = (R.index[R.index<=TODAY][-1] if any(R.index<=TODAY) else R.index[-1])
base_alert = str(R.loc[r_last, "alert"])
driver_last= float(R.loc[r_last, "DRIVER"]) if "DRIVER" in R.columns else None
risk_last  = float(R.loc[r_last, "RISK"])   if "RISK"   in R.columns else None
Gamma_last = float(R.loc[r_last, "Gamma_mkt_vix"]) if "Gamma_mkt_vix" in R.columns else 0.0
# show PREWARN overlay text if policy pattern holds
policy_alert = base_alert
persist_hits = int((R["DRIVER"].iloc[max(0, R.index.get_loc(r_last)-2):R.index.get_loc(r_last)+1] >= best["thr"]).sum()) if "DRIVER" in R.columns else 0
persist_ok   = (persist_hits >= 1)
if base_alert!="WARNING" and driver_last is not None and driver_last>=best["thr"] and persist_ok and (Gamma_last >= PREWARN_GAMMA_MIN):
    policy_alert = "PREWARN"
extras = []
if risk_last is not None:   extras.append(f"RISK={risk_last:.3f}")
if driver_last is not None: extras.append(f"DRIVER={driver_last:.3f}")
if policy_alert == "PREWARN" and base_alert != "WARNING": extras.append("policy=PREWARN(Γ strong)")
lines.append(_status_line("REGIME", str(r_last.date()), policy_alert, extras))
r_hist = _transitions(R, "alert", n=5)
payload["regime"] = {
    "file": str(RUN / "gdren_v03a_daily_v02_alerts.csv"),
    "asof": str(r_last.date()),
    "alert": base_alert, "policy_alert": policy_alert,
    "driver": driver_last, "risk": risk_last,
    "threshold": float(best["thr"]), "q": float(best["q"]),
    "gate_min_votes": int(best["gate"]), "persist_win": int(best["pwin"]), "persist_req": int(best["preq"]),
    "recent": r_hist
}

# write dashboard JSON
dash_path = DASH / "g_dren_dashboard_status.json"
dash_path.write_text(json.dumps(payload, indent=2))

# transitions CSVs
def export_transitions_csv(fp_in, fp_out):
    D = _load_csv(fp_in)
    hist = _transitions(D, "alert", n=9999)
    pd.DataFrame(hist, columns=["date","state","days"]).to_csv(fp_out, index=False)

if spike_fp and spike_fp.exists():
    export_transitions_csv(spike_fp, DASH / "spike_transitions.csv")
export_transitions_csv(RUN / "gdren_v03a_daily_v02_alerts.csv", DASH / "regime_transitions.csv")

# ------------------------ 9) Print status + artifact locations ------------------------
print("\n".join(lines))
print(f"\n[dashboard] {dash_path}")
print(f"[history  ] {DASH/'spike_transitions.csv'} (if SPIKE present)")
print(f"[history  ] {DASH/'regime_transitions.csv'}")
print("Artifacts:", json.dumps({
    "alerts_csv": str(RUN / "gdren_v03a_daily_v02_alerts.csv"),
    "metrics_json": str(RUN / "gdren_v03a_daily_v02_metrics.json")
}, indent=2))


[override warn] Cannot localize tz-aware Timestamp, use tz_convert for conversions
SPIKE      — OK       as of 2025-11-05 | NEXUS=0.286

[dashboard] E:\CNT\artifacts\g_dren\dashboard\g_dren_dashboard_status.json
[history  ] E:\CNT\artifacts\g_dren\dashboard\spike_transitions.csv (if SPIKE present)
[history  ] E:\CNT\artifacts\g_dren\dashboard\regime_transitions.csv
Artifacts: {
  "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251106-033119Z\\v03a_daily_v02\\gdren_v03a_daily_v02_alerts.csv",
  "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251106-033119Z\\v03a_daily_v02\\gdren_v03a_daily_v02_metrics.json"
}


In [29]:
# BEFORE
# exp = pd.Timestamp(_ov.get("expires_utc", "1970-01-01T00:00:00Z"))
# print(f"[override] gate_min_votes={override_gate} active until {exp.tz_localize('UTC')}")

# AFTER
now_utc = pd.Timestamp.now(tz="UTC")
exp = pd.to_datetime(_ov.get("expires_utc", "1970-01-01T00:00:00Z"), utc=True)
if now_utc < exp:
    override_gate = int(_ov.get("gate_min_votes_override", 1))
    print(f"[override] gate_min_votes={override_gate} active until {exp.strftime('%Y-%m-%d %H:%MZ')}")


[override] gate_min_votes=1 active until 2025-11-07 23:59Z


In [30]:
# === G-DREN Warning Run Card v0.1 — snapshot the latest WARNING start ===
import json
from pathlib import Path
import pandas as pd
import numpy as np

CNT  = Path("E:/CNT")
ROOT = CNT / "artifacts" / "g_dren"
TODAY = pd.Timestamp.today().normalize()

def _latest(globs):
    c=[]
    for g in globs: c += list(ROOT.glob(g))
    return sorted(c, key=lambda p: p.stat().st_mtime, reverse=True)[0] if c else None

# Use the latest Daily v0.2 outputs you just created
alerts_fp  = _latest(["*/v03a_daily_v02/gdren_v03a_daily_v02_alerts.csv"])
metrics_fp = _latest(["*/v03a_daily_v02/gdren_v03a_daily_v02_metrics.json"])
assert alerts_fp and metrics_fp, "No v03a_daily_v02 artifacts found."

A = pd.read_csv(alerts_fp, index_col=0, parse_dates=True).sort_index()
A.index = A.index.tz_localize(None).normalize()
M = json.loads(metrics_fp.read_text())

# Find the most recent WARNING start
starts = (A["alert"].eq("WARNING") & ~A["alert"].shift(1).eq("WARNING")).fillna(False)
if not starts.any():
    raise SystemExit("No WARNING start found in this file.")
t_start = A.index[starts].max()

# Pull context around the start
ctx = A.loc[:t_start].tail(8).copy()
thr = float(M.get("thr", M.get("threshold", 1.0)))
q   = float(M.get("q", M.get("calibrated_q_train", 0.97)))
gate= int(M.get("gate_min_votes", 2))
pwin= int(M.get("persist_win", 3)); preq = int(M.get("persist_req", 1))

# Compute votes & persistence at start
def _get(row, k, default=0.0): 
    return float(row[k]) if k in row and pd.notna(row[k]) else default

row = A.loc[t_start]
driver  = _get(row, "DRIVER", np.nan)
risk    = _get(row, "RISK",   np.nan)
Theta_m = _get(row, "Theta_mkt")
Theta_v = _get(row, "Theta_vix")
Theta_c = _get(row, "Theta_cred")
Gamma_v = _get(row, "Gamma_mkt_vix")
# 7d slope on DRIVER (if available)
i = A.index.get_loc(t_start)
slope7 = float(A["DRIVER"].iloc[i] - A["DRIVER"].iloc[i-7]) if ("DRIVER" in A.columns and i>=7) else 0.0
# persistence over last 3 days ≥ thr
persist_hits = int((A["DRIVER"].iloc[max(0, i-2):i+1] >= thr).sum()) if "DRIVER" in A.columns else 0
persist_ok   = bool(persist_hits >= preq)

votes = int(Theta_v>=0.25) + int(Theta_c>=0.25) + int(Theta_m>=0.25) + int(Gamma_v>=0.08) + int(slope7>0)

# Write run card
RUNDIR = ROOT / "warning_runs" / t_start.strftime("%Y-%m-%d")
RUNDIR.mkdir(parents=True, exist_ok=True)
card = {
  "start_ts": str(t_start.date()),
  "driver": driver, "risk": risk,
  "threshold": thr, "q": q,
  "gate_min_votes": gate, "persist_win": pwin, "persist_req": preq,
  "votes": {"Theta_vix": Theta_v, "Theta_cred": Theta_c, "Theta_mkt": Theta_m, "Gamma_mkt_vix": Gamma_v, "slope7": slope7, "total": int(votes)},
  "persistence_hits_last3": int(persist_hits), "persistence_ok": bool(persist_ok),
  "artifacts": {"alerts_csv": str(alerts_fp), "metrics_json": str(metrics_fp)}
}
(RUNDIR / "run_card.json").write_text(json.dumps(card, indent=2))
ctx.to_csv(RUNDIR / "context_last8_days.csv")

print(f"[run-card] saved → {RUNDIR}")
print(json.dumps(card, indent=2))


{
  "start_ts": "2025-10-28",
  "driver": 1.0,
  "risk": 9.357622968839301e-14,
  "threshold": 1.0,
  "q": 0.965,
  "gate_min_votes": 1,
  "persist_win": 3,
  "persist_req": 1,
  "votes": {
    "Theta_vix": 0.015625,
    "Theta_cred": 0.0234375,
    "Theta_mkt": 0.015625,
    "Gamma_mkt_vix": 0.7633608406051142,
    "slope7": 0.0,
    "total": 1
  },
  "persistence_hits_last3": 3,
  "persistence_ok": true,
  "artifacts": {
    "alerts_csv": "E:\\CNT\\artifacts\\g_dren\\20251106-033119Z\\v03a_daily_v02\\gdren_v03a_daily_v02_alerts.csv",
    "metrics_json": "E:\\CNT\\artifacts\\g_dren\\20251106-033119Z\\v03a_daily_v02\\gdren_v03a_daily_v02_metrics.json"
  }
}
