In [1]:
# === CNT "Wake-Up Field" Mega Cell ============================================
# Requirements: numpy, pandas, matplotlib, scikit-learn, statsmodels, torch, requests
# Optional (auto-skippable): pytrends (Google Trends), prophet (forecast), plotly (interactive)
# -----------------------------------------------------------------------------
import os, sys, time, json, math, datetime as dt, warnings, requests
from pathlib import Path
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from statsmodels.tsa.statespace.structural import UnobservedComponents

warnings.filterwarnings("ignore", category=FutureWarning)

# === CONFIGURATION ===========================================================
CFG = dict(
    ROOT = Path.cwd(),
    REGION = "US",
    HORIZON_WEEKS = 52,
    THRESHOLD_TAU = 0.65,
    RUN_LENGTH_K = 6,
    BACKTEST_START = "2012-01-01",
    TOPICS = ["government surveillance","censorship","civil liberties","free speech"],
    WIKI_PAGES = [
        "Mass_surveillance_in_the_United_States",
        "First_Amendment_to_the_United_States_Constitution",
        "Censorship_in_the_United_States",
        "Civil_liberties_in_the_United_States"
    ],
    SAVE_DIR = "artifacts",
    RNG_SEED = 1337
)

os.makedirs(CFG["SAVE_DIR"], exist_ok=True)
np.random.seed(CFG["RNG_SEED"])

# === UTILITY FUNCTIONS =======================================================
def as_week_index(dts): return pd.to_datetime(dts).to_period('W-MON').to_timestamp()

def spectral_entropy(x, nseg=8, eps=1e-12):
    x = pd.Series(x).dropna().values
    if len(x) < 16: return np.nan
    x = (x - x.mean()) / (x.std()+1e-9)
    seg = max(16, len(x)//nseg)
    ps = []
    for i in range(0, len(x)-seg+1, seg):
        X = np.fft.rfft(x[i:i+seg]); P = (X*np.conj(X)).real; P /= P.sum()+eps; ps.append(P)
    Pm = np.mean(ps, axis=0)
    H = -(Pm*np.log(Pm+eps)).sum(); Hmax = math.log(len(Pm))
    return float(1 - H/Hmax)

def zscore(s): return (s - np.nanmean(s)) / (np.nanstd(s) + 1e-9)
def logistic_scale(s):
    q1,q2,q3 = np.nanquantile(s,[.1,.5,.9]); scale = (q3-q1)/2 or np.nanstd(s) or 1.0
    return 1/(1+np.exp(-(s-q2)/(scale+1e-9)))

# === SAFE IMPORT FOR PYTRENDS ================================================
try:
    from pytrends.request import TrendReq
    _HAS_TRENDS = True
except Exception:
    print("[INFO] pytrends unavailable; skipping Google Trends.")
    TrendReq = None; _HAS_TRENDS = False

# === FETCHERS ================================================================
def fetch_trends(topics, geo="US", since="2012-01-01"):
    if not _HAS_TRENDS: return pd.DataFrame()
    pytrends = TrendReq(hl="en-US", tz=360)
    frames = []
    for kw in topics:
        try:
            pytrends.build_payload([kw], timeframe=f"{since} {dt.date.today():%Y-%m-%d}", geo=geo)
            df = pytrends.interest_over_time()
            if not df.empty:
                s = df[kw].rename(kw); s.index = as_week_index(s.index); frames.append(s)
            time.sleep(1.0)
        except Exception as e: print(f"[WARN] pytrends failed for {kw}: {e}")
    return pd.concat(frames, axis=1).sort_index() if frames else pd.DataFrame()

def fetch_wikiviews(pages, since="2012-01-01", project="en.wikipedia", agent="user"):
    def one_page(title):
        start = pd.Timestamp(since).strftime("%Y%m0100")
        end = (pd.Timestamp(dt.date.today()) + pd.offsets.Day(0)).strftime("%Y%m%d00")
        url = (f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/"
               f"{project}/all-access/{agent}/{title}/daily/{start}/{end}")
        try:
            r = requests.get(url, timeout=20); r.raise_for_status()
            data = r.json().get("items", [])
            ts = {pd.to_datetime(i["timestamp"][:8]): i["views"] for i in data}
            s = pd.Series(ts, name=title).sort_index().resample("W-MON").sum()
            return s
        except Exception as e:
            print(f"[WARN] wiki fail {title}: {e}"); return pd.Series(dtype=float)
    cols=[one_page(p.replace(" ","_")) for p in pages]; cols=[c for c in cols if c.shape[0]]
    return pd.concat(cols, axis=1).sort_index() if cols else pd.DataFrame()

# === INGEST ==================================================================
print("Fetching data...")
trends = fetch_trends(CFG["TOPICS"], geo=CFG["REGION"], since=CFG["BACKTEST_START"])
wiki   = fetch_wikiviews(CFG["WIKI_PAGES"], since=CFG["BACKTEST_START"])
frames = [f for f in [trends, wiki] if not f.empty]
if not frames: raise RuntimeError("No data sources available.")
df = pd.concat(frames, axis=1).sort_index().fillna(method="ffill").fillna(method="bfill")

# === FEATURES + FORECASTABILITY ==============================================
feat = pd.DataFrame(index=df.index)
for c in df.columns:
    feat[c] = df[c]; feat[c+"_z"]=zscore(df[c]); feat[c+"_vol4"]=df[c].pct_change().rolling(4).std()
weights = {c: spectral_entropy(df[c].values) for c in df.columns}
w = pd.Series(weights).fillna(0.5); w=(w-w.min())/(w.max()-w.min()+1e-9); w=w.clip(0.05,1.0)

# === DYNAMIC FACTOR (OAI) ====================================================
scaler = StandardScaler(); X = scaler.fit_transform(df.values)
W = np.diag(np.sqrt(w[df.columns].values)); Xw = X.dot(W)
pca = PCA(n_components=1, random_state=CFG["RNG_SEED"])
oai_raw = pca.fit_transform(Xw).ravel()
OAI = logistic_scale(zscore(oai_raw)); OAI = pd.Series(OAI, index=df.index, name="OAI")

# === STATE-SPACE NOWCAST =====================================================
exo = feat.filter(regex="_z$|_vol4$").fillna(0)
model = UnobservedComponents(endog=OAI, level='local', exog=exo)
res = model.fit(disp=False)
h = CFG["HORIZON_WEEKS"]
lastX = exo.iloc[-1:].values; Xf = np.repeat(lastX, h, axis=0)
fc = res.get_forecast(steps=h, exog=Xf)
idxf = pd.date_range(OAI.index[-1]+pd.offsets.Week(1), periods=h, freq="W-MON")
OAI_fc = pd.Series(fc.predicted_mean.clip(0,1), index=idxf)

# === CHANGE-POINT DETECTION (CUSUM) =========================================
resid = OAI - res.fittedvalues.reindex_like(OAI).fillna(method="bfill")
k,hc = resid.std()*0.25, resid.std()*3.0
pos=neg=0; alarms=[]
for t,e in resid.items():
    pos=max(0,pos+e-k); neg=min(0,neg+e+k)
    if pos>hc or abs(neg)>hc: alarms.append(t); pos=neg=0
cp_dates = alarms[-5:]

# === EVENT FORECAST ==========================================================
tau,k = CFG["THRESHOLD_TAU"], CFG["RUN_LENGTH_K"]
sig = float(resid.std() or 0.05)
n_sims = 1500
paths = np.clip(OAI_fc.values + np.random.normal(0,sig,(n_sims,h)),0,1)
def sustained(sim,tau,k):
    run=0
    for i,a in enumerate(sim>=tau):
        run=run+1 if a else 0
        if run>=k: return i
    return None
hits=[sustained(p,tau,k) for p in paths]; hit_idxs=[x for x in hits if x is not None]
if hit_idxs:
    dates=[idxf[i] for i in hit_idxs]
    med=np.median(pd.to_datetime(dates))
    pH=len(hit_idxs)/n_sims
    d80=(np.percentile(pd.to_datetime(dates),10),np.percentile(pd.to_datetime(dates),90))
else: med,pH,d80=None,0.0,(None,None)

# === VISUALS ================================================================
plt.figure(figsize=(10,5))
plt.plot(OAI.index,OAI,label="OAI")
plt.plot(res.fittedvalues.index,res.fittedvalues.clip(0,1),"--",label="Fit")
plt.axhline(tau,linestyle=":",label=f"τ={tau}")
for d in cp_dates: plt.axvline(d,linestyle=":",alpha=.5)
plt.legend(); plt.title("Overreach Awareness Index (OAI)")
plt.tight_layout(); plt.savefig("artifacts/oai_fit.png",dpi=160); plt.close()

plt.figure(figsize=(10,4))
plt.plot(OAI_fc.index,OAI_fc,label="Forecast mean")
plt.axhline(tau,linestyle=":",label=f"τ={tau}")
plt.legend(); plt.title("OAI Forecast Horizon")
plt.tight_layout(); plt.savefig("artifacts/oai_fc.png",dpi=160); plt.close()

prob_curve=[np.mean([(x is not None and x<=t) for x in hits]) for t in range(h)]
pd.Series(prob_curve,index=idxf).plot(figsize=(10,3),ylim=(0,1),title="Probability of sustained crossing"); 
plt.tight_layout(); plt.savefig("artifacts/oai_prob.png",dpi=160); plt.close()

# === SUMMARY ================================================================
summary = dict(
    generated_at = dt.datetime.utcnow().isoformat()+"Z",
    horizon_weeks = h,
    threshold_tau = tau,
    run_length_k = k,
    last_week = str(OAI.index[-1].date()),
    change_points = [str(pd.Timestamp(d).date()) for d in cp_dates],
    prob_within_horizon = round(pH,3),
    median_event_date = (str(pd.Timestamp(med).date()) if med else None),
    event_window_80 = tuple(str(pd.Timestamp(d).date()) if d else None for d in d80),
    weights = {k:float(v) for k,v in w.to_dict().items()},
    pca_var = float(pca.explained_variance_ratio_[0])
)
with open(CFG["SAVE_DIR"]+"/oai_summary.json","w") as f: json.dump(summary,f,indent=2)
print(json.dumps(summary,indent=2))
print("\nFigures saved in ./artifacts")


[INFO] pytrends unavailable; skipping Google Trends.
Fetching data...
[WARN] wiki fail Mass_surveillance_in_the_United_States: 403 Client Error: Forbidden for url: https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/Mass_surveillance_in_the_United_States/daily/2012010100/2025101600
[WARN] wiki fail First_Amendment_to_the_United_States_Constitution: 403 Client Error: Forbidden for url: https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/First_Amendment_to_the_United_States_Constitution/daily/2012010100/2025101600
[WARN] wiki fail Censorship_in_the_United_States: 403 Client Error: Forbidden for url: https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/Censorship_in_the_United_States/daily/2012010100/2025101600
[WARN] wiki fail Civil_liberties_in_the_United_States: 403 Client Error: Forbidden for url: https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.w

RuntimeError: No data sources available.

In [2]:
# === CNT "Wake-Up Field" Mega-Cell (Resilient Edition) ========================
# Works online or offline; includes static fallback.
# -----------------------------------------------------------------------------
import os, sys, time, json, math, datetime as dt, warnings, requests
from pathlib import Path
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from statsmodels.tsa.statespace.structural import UnobservedComponents
warnings.filterwarnings("ignore", category=FutureWarning)

# === CONFIG ==================================================================
CFG = dict(
    ROOT = Path.cwd(),
    REGION = "US",
    HORIZON_WEEKS = 52,
    THRESHOLD_TAU = 0.65,
    RUN_LENGTH_K = 6,
    BACKTEST_START = "2012-01-01",
    TOPICS = ["government surveillance","censorship","civil liberties","free speech"],
    WIKI_PAGES = [
        "Mass_surveillance_in_the_United_States",
        "First_Amendment_to_the_United_States_Constitution",
        "Censorship_in_the_United_States",
        "Civil_liberties_in_the_United_States"
    ],
    SAVE_DIR = "artifacts",
    RNG_SEED = 1337
)
os.makedirs(CFG["SAVE_DIR"], exist_ok=True)
np.random.seed(CFG["RNG_SEED"])

# === REQUESTS SESSION (fixes 403s) ===========================================
SESSION = requests.Session()
SESSION.headers.update({
    "User-Agent": "CNTLab/1.0 (https://example.org; contact: researcher@fieldwalker.org)"
})
requests_get = SESSION.get

# === SAFE IMPORTS ============================================================
try:
    from pytrends.request import TrendReq
    _HAS_TRENDS = True
except Exception:
    print("[INFO] pytrends unavailable; skipping Google Trends.")
    TrendReq = None; _HAS_TRENDS = False

# === UTILITIES ===============================================================
def as_week_index(dts): return pd.to_datetime(dts).to_period('W-MON').to_timestamp()
def spectral_entropy(x, nseg=8, eps=1e-12):
    x = pd.Series(x).dropna().values
    if len(x)<16: return np.nan
    x=(x-x.mean())/(x.std()+1e-9)
    seg=max(16,len(x)//nseg); ps=[]
    for i in range(0,len(x)-seg+1,seg):
        X=np.fft.rfft(x[i:i+seg]); P=(X*np.conj(X)).real; P/=P.sum()+eps; ps.append(P)
    Pm=np.mean(ps,axis=0); H=-(Pm*np.log(Pm+eps)).sum(); Hmax=math.log(len(Pm))
    return float(1-H/Hmax)
def zscore(s): return (s-np.nanmean(s))/(np.nanstd(s)+1e-9)
def logistic_scale(s):
    q1,q2,q3=np.nanquantile(s,[.1,.5,.9]); scale=(q3-q1)/2 or np.nanstd(s) or 1.0
    return 1/(1+np.exp(-(s-q2)/(scale+1e-9)))

# === DATA FETCHERS ===========================================================
def fetch_trends(topics, geo="US", since="2012-01-01"):
    if not _HAS_TRENDS: return pd.DataFrame()
    pytrends = TrendReq(hl="en-US", tz=360)
    frames=[]
    for kw in topics:
        try:
            pytrends.build_payload([kw], timeframe=f"{since} {dt.date.today():%Y-%m-%d}", geo=geo)
            df=pytrends.interest_over_time()
            if not df.empty:
                s=df[kw].rename(kw); s.index=as_week_index(s.index); frames.append(s)
            time.sleep(1.0)
        except Exception as e: print(f"[WARN] pytrends failed for {kw}: {e}")
    return pd.concat(frames,axis=1).sort_index() if frames else pd.DataFrame()

def fetch_wikiviews(pages, since="2012-01-01", project="en.wikipedia", agent="user"):
    def one_page(title):
        start=pd.Timestamp(since).strftime("%Y%m0100")
        end=(pd.Timestamp(dt.date.today())+pd.offsets.Day(0)).strftime("%Y%m%d00")
        url=(f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/"
             f"{project}/all-access/{agent}/{title}/daily/{start}/{end}")
        try:
            r=requests_get(url,timeout=20); r.raise_for_status()
            data=r.json().get("items",[])
            ts={pd.to_datetime(i["timestamp"][:8]):i["views"] for i in data}
            s=pd.Series(ts,name=title).sort_index().resample("W-MON").sum()
            return s
        except Exception as e:
            print(f"[WARN] wiki fail {title}: {e}")
            return pd.Series(dtype=float)
    cols=[one_page(p.replace(" ","_")) for p in pages]; cols=[c for c in cols if c.shape[0]]
    return pd.concat(cols,axis=1).sort_index() if cols else pd.DataFrame()

# === INGEST ==================================================================
print("Fetching data...")
trends = fetch_trends(CFG["TOPICS"], geo=CFG["REGION"], since=CFG["BACKTEST_START"])
wiki   = fetch_wikiviews(CFG["WIKI_PAGES"], since=CFG["BACKTEST_START"])
frames=[f for f in [trends,wiki] if not f.empty]

# --- Fallback if all remote sources fail -------------------------------------
if not frames:
    print("[FALLBACK] Using static Pew dataset from OWID.")
    pew = pd.read_csv(
        "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Trust%20in%20government%20(Pew)/Trust%20in%20government%20(Pew).csv"
    )
    pew = pew.rename(columns={"Year":"date","Trust in government (Pew)":"trust"}).dropna()
    pew["date"] = pd.to_datetime(pew["date"], format="%Y")
    pew = pew.set_index("date")["trust"].resample("W-MON").ffill().to_frame()
    frames = [pew]

df = pd.concat(frames,axis=1).sort_index().fillna(method="ffill").fillna(method="bfill")

# === FEATURES ================================================================
feat=pd.DataFrame(index=df.index)
for c in df.columns:
    feat[c]=df[c]; feat[c+"_z"]=zscore(df[c]); feat[c+"_vol4"]=df[c].pct_change().rolling(4).std()
weights={c:spectral_entropy(df[c].values) for c in df.columns}
w=pd.Series(weights).fillna(0.5); w=(w-w.min())/(w.max()-w.min()+1e-9); w=w.clip(0.05,1.0)

# === OVERREACH AWARENESS INDEX (OAI) ========================================
scaler=StandardScaler(); X=scaler.fit_transform(df.values)
W=np.diag(np.sqrt(w[df.columns].values)); Xw=X.dot(W)
pca=PCA(n_components=1, random_state=CFG["RNG_SEED"])
oai_raw=pca.fit_transform(Xw).ravel()
OAI=pd.Series(logistic_scale(zscore(oai_raw)), index=df.index, name="OAI")

# === NOWCAST =================================================================
exo=feat.filter(regex="_z$|_vol4$").fillna(0)
model=UnobservedComponents(endog=OAI, level='local', exog=exo)
res=model.fit(disp=False)
h=CFG["HORIZON_WEEKS"]
lastX=exo.iloc[-1:].values; Xf=np.repeat(lastX,h,axis=0)
fc=res.get_forecast(steps=h, exog=Xf)
idxf=pd.date_range(OAI.index[-1]+pd.offsets.Week(1), periods=h, freq="W-MON")
OAI_fc=pd.Series(fc.predicted_mean.clip(0,1), index=idxf)

# === CHANGE POINTS ===========================================================
resid=OAI - res.fittedvalues.reindex_like(OAI).fillna(method="bfill")
k_cusum,res_thr=resid.std()*0.25,resid.std()*3.0
pos=neg=0; alarms=[]
for t,e in resid.items():
    pos=max(0,pos+e-k_cusum); neg=min(0,neg+e+k_cusum)
    if pos>res_thr or abs(neg)>res_thr: alarms.append(t); pos=neg=0
cp_dates=alarms[-5:]

# === EVENT SIMULATION ========================================================
tau,k=CFG["THRESHOLD_TAU"],CFG["RUN_LENGTH_K"]
sig=float(resid.std() or 0.05); n_sims=1500
paths=np.clip(OAI_fc.values+np.random.normal(0,sig,(n_sims,h)),0,1)
def sustained(sim,tau,k):
    run=0
    for i,a in enumerate(sim>=tau):
        run=run+1 if a else 0
        if run>=k: return i
    return None
hits=[sustained(p,tau,k) for p in paths]; hit_idxs=[x for x in hits if x is not None]
if hit_idxs:
    dates=[idxf[i] for i in hit_idxs]
    med=np.median(pd.to_datetime(dates)); pH=len(hit_idxs)/n_sims
    d80=(np.percentile(pd.to_datetime(dates),10),np.percentile(pd.to_datetime(dates),90))
else: med,pH,d80=None,0.0,(None,None)

# === PLOTS ===================================================================
plt.figure(figsize=(10,5))
plt.plot(OAI.index,OAI,label="OAI"); plt.plot(res.fittedvalues.index,res.fittedvalues.clip(0,1),"--",label="Fit")
plt.axhline(tau,linestyle=":",label=f"τ={tau}")
for d in cp_dates: plt.axvline(d,linestyle=":",alpha=.5)
plt.legend(); plt.title("Overreach Awareness Index (OAI)")
plt.tight_layout(); plt.savefig("artifacts/oai_fit.png",dpi=160); plt.close()

plt.figure(figsize=(10,4))
plt.plot(OAI_fc.index,OAI_fc,label="Forecast mean"); plt.axhline(tau,linestyle=":",label=f"τ={tau}")
plt.legend(); plt.title("OAI Forecast Horizon")
plt.tight_layout(); plt.savefig("artifacts/oai_fc.png",dpi=160); plt.close()

prob_curve=[np.mean([(x is not None and x<=t) for x in hits]) for t in range(h)]
pd.Series(prob_curve,index=idxf).plot(figsize=(10,3),ylim=(0,1),title="Probability of sustained crossing")
plt.tight_layout(); plt.savefig("artifacts/oai_prob.png",dpi=160); plt.close()

# === SUMMARY OUTPUT ==========================================================
summary=dict(
    generated_at=dt.datetime.utcnow().isoformat()+"Z",
    horizon_weeks=h,
    threshold_tau=tau,
    run_length_k=k,
    last_week=str(OAI.index[-1].date()),
    change_points=[str(pd.Timestamp(d).date()) for d in cp_dates],
    prob_within_horizon=round(pH,3),
    median_event_date=(str(pd.Timestamp(med).date()) if med else None),
    event_window_80=tuple(str(pd.Timestamp(d).date()) if d else None for d in d80),
    weights={k:float(v) for k,v in w.to_dict().items()},
    pca_var=float(pca.explained_variance_ratio_[0])
)
with open(CFG["SAVE_DIR"]+"/oai_summary.json","w") as f: json.dump(summary,f,indent=2)
print(json.dumps(summary,indent=2))
print("\nFigures saved in ./artifacts")


[INFO] pytrends unavailable; skipping Google Trends.
Fetching data...


ValueError: Invalid level/trend specification: 'local'

In [3]:
# === CNT "Wake-Up Field" Mega-Cell — Resilient + Statsmodels-Patched =========
# Runs with or without internet. Builds OAI, nowcasts, detects regime shifts,
# and forecasts sustained threshold-crossing dates.
# -----------------------------------------------------------------------------
import os, sys, time, json, math, datetime as dt, warnings, requests
from pathlib import Path
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from statsmodels.tsa.statespace.structural import UnobservedComponents
warnings.filterwarnings("ignore", category=FutureWarning)

# === CONFIG ==================================================================
CFG = dict(
    ROOT = Path.cwd(),
    REGION = "US",
    HORIZON_WEEKS = 52,        # forecast horizon (weeks)
    THRESHOLD_TAU = 0.65,      # sustained-awareness threshold in [0,1]
    RUN_LENGTH_K = 6,          # require k consecutive weeks above τ
    BACKTEST_START = "2012-01-01",
    TOPICS = ["government surveillance","censorship","civil liberties","free speech"],
    WIKI_PAGES = [
        "Mass_surveillance_in_the_United_States",
        "First_Amendment_to_the_United_States_Constitution",
        "Censorship_in_the_United_States",
        "Civil_liberties_in_the_United_States"
    ],
    SAVE_DIR = "artifacts",
    RNG_SEED = 1337
)
os.makedirs(CFG["SAVE_DIR"], exist_ok=True)
np.random.seed(CFG["RNG_SEED"])

# === NETWORK SESSION (fix Wikimedia 403s via User-Agent) =====================
SESSION = requests.Session()
SESSION.headers.update({
    "User-Agent": "CNTLab/1.0 (https://cnt.local; contact: telos@cnt.local)"
})
requests_get = SESSION.get

# === OPTIONAL: Google Trends (skip gracefully if unavailable) ================
try:
    from pytrends.request import TrendReq
    _HAS_TRENDS = True
except Exception:
    print("[INFO] pytrends unavailable; skipping Google Trends.")
    TrendReq = None
    _HAS_TRENDS = False

# === UTILS ===================================================================
def as_week_index(dts):
    return pd.to_datetime(dts).to_period('W-MON').to_timestamp()

def spectral_entropy(x, nseg=8, eps=1e-12):
    x = pd.Series(x).dropna().values
    if len(x) < 16: return np.nan
    x = (x - x.mean()) / (x.std() + 1e-9)
    seg = max(16, len(x)//nseg)
    ps = []
    for i in range(0, len(x)-seg+1, seg):
        X = np.fft.rfft(x[i:i+seg])
        P = (X*np.conj(X)).real
        P = P / (P.sum() + eps)
        ps.append(P)
    Pm = np.mean(ps, axis=0)
    H = -(Pm * np.log(Pm + eps)).sum()
    Hmax = math.log(len(Pm))
    return float(1.0 - H/Hmax)

def zscore(s):
    s = pd.Series(s)
    return (s - s.mean()) / (s.std() + 1e-9)

def logistic_scale(s):
    s = pd.Series(s)
    q1, q2, q3 = s.quantile([0.10, 0.50, 0.90])
    scale = (q3 - q1)/2.0 if q3 > q1 else (s.std() or 1.0)
    return 1.0 / (1.0 + np.exp(-(s - q2) / (scale + 1e-9)))

# === FETCHERS ================================================================
def fetch_trends(topics, geo="US", since="2012-01-01"):
    if not _HAS_TRENDS: return pd.DataFrame()
    pytrends = TrendReq(hl="en-US", tz=360)
    frames = []
    for kw in topics:
        try:
            pytrends.build_payload([kw], timeframe=f"{since} {dt.date.today():%Y-%m-%d}", geo=geo)
            df = pytrends.interest_over_time()
            if df.empty: 
                continue
            s = df[kw].rename(kw)
            s.index = as_week_index(s.index)
            frames.append(s)
            time.sleep(1.0)  # be polite
        except Exception as e:
            print(f"[WARN] pytrends failed for {kw}: {e}")
    return pd.concat(frames, axis=1).sort_index() if frames else pd.DataFrame()

def fetch_wikiviews(pages, since="2012-01-01", project="en.wikipedia", agent="user"):
    def one_page(title):
        start = pd.Timestamp(since).strftime("%Y%m0100")
        end   = (pd.Timestamp(dt.date.today()) + pd.offsets.Day(0)).strftime("%Y%m%d00")
        url = (f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/"
               f"{project}/all-access/{agent}/{title}/daily/{start}/{end}")
        try:
            r = requests_get(url, timeout=20); r.raise_for_status()
            data = r.json().get("items", [])
            ts = {pd.to_datetime(i["timestamp"][:8]): i["views"] for i in data}
            s  = pd.Series(ts, name=title).sort_index().resample("W-MON").sum()
            return s
        except Exception as e:
            print(f"[WARN] wiki fail {title}: {e}")
            return pd.Series(dtype=float)
    cols = []
    for p in pages:
        s = one_page(p.replace(" ", "_"))
        if s.shape[0]: cols.append(s)
        time.sleep(0.3)
    return pd.concat(cols, axis=1).sort_index() if cols else pd.DataFrame()

# === INGEST (with resilient fallback) =======================================
print("Fetching data...")
trends = fetch_trends(CFG["TOPICS"], geo=CFG["REGION"], since=CFG["BACKTEST_START"])
wiki   = fetch_wikiviews(CFG["WIKI_PAGES"], since=CFG["BACKTEST_START"])
frames = [f for f in [trends, wiki] if not f.empty]

if not frames:
    print("[FALLBACK] Using static Pew dataset from OWID (trust in government).")
    pew = pd.read_csv(
        "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Trust%20in%20government%20(Pew)/Trust%20in%20government%20(Pew).csv"
    )
    pew = pew.rename(columns={"Year":"date","Trust in government (Pew)":"trust"}).dropna(subset=["date","trust"])
    pew["date"] = pd.to_datetime(pew["date"], format="%Y")
    pew = pew.set_index("date")["trust"].resample("W-MON").ffill().to_frame()
    frames = [pew]

df = pd.concat(frames, axis=1).sort_index()
df = df.fillna(method="ffill").fillna(method="bfill")

# === FEATURES & FORECASTABILITY =============================================
feat = pd.DataFrame(index=df.index)
for c in df.columns:
    feat[c] = df[c]
    feat[c+"_z"] = zscore(df[c])
    feat[c+"_vol4"] = df[c].pct_change().rolling(4).std()

weights = {c: spectral_entropy(df[c].values) for c in df.columns}
w = pd.Series(weights).fillna(0.5)
w = (w - w.min()) / (w.max() - w.min() + 1e-12)
w = w.clip(0.05, 1.0)

# === OVERREACH AWARENESS INDEX (OAI) ========================================
scaler = StandardScaler()
X = scaler.fit_transform(df.values)
W = np.diag(np.sqrt(w[df.columns].values))
Xw = X.dot(W)

pca = PCA(n_components=1, random_state=CFG["RNG_SEED"])
oai_raw = pca.fit_transform(Xw).ravel()
OAI = pd.Series(logistic_scale(zscore(oai_raw)), index=df.index, name="OAI")

# === STATE-SPACE NOWCAST (Statsmodels 0.14+ patch) ===========================
exo = feat.filter(regex="_z$|_vol4$").fillna(0)

try:
    # Newer API prefers 'llevel' for a local level component
    model = UnobservedComponents(endog=OAI, level='llevel', exog=exo)
except Exception:
    # Fallback for other builds (trend=True creates a stochastic local level)
    model = UnobservedComponents(endog=OAI, trend=True, exog=exo)

res = model.fit(disp=False)

h = CFG["HORIZON_WEEKS"]
lastX = exo.iloc[-1:].values
Xf = np.repeat(lastX, h, axis=0)
fc = res.get_forecast(steps=h, exog=Xf)
idxf = pd.date_range(OAI.index[-1] + pd.offsets.Week(1), periods=h, freq="W-MON")
OAI_fc = pd.Series(fc.predicted_mean.clip(0,1), index=idxf, name="OAI_fc")

# === CHANGE-POINTS (CUSUM) ===================================================
resid = OAI - res.fittedvalues.reindex_like(OAI).fillna(method="bfill")
k_cusum = resid.std() * 0.25
thr     = resid.std() * 3.0
pos = neg = 0.0
alarms = []
for t, e in resid.items():
    pos = max(0.0, pos + e - k_cusum)
    neg = min(0.0, neg + e + k_cusum)
    if pos > thr or abs(neg) > thr:
        alarms.append(t); pos = neg = 0.0
cp_dates = alarms[-5:]

# === EVENT SIMULATION (sustained τ for k weeks) ==============================
tau, kreq = CFG["THRESHOLD_TAU"], CFG["RUN_LENGTH_K"]
sig = float(resid.std() or 0.05)
n_sims = 1500
paths = np.clip(OAI_fc.values + np.random.normal(0, sig, (n_sims, h)), 0, 1)

def first_sustained(sim, tau, kreq):
    run = 0
    for i, a in enumerate(sim >= tau):
        run = run + 1 if a else 0
        if run >= kreq: return i
    return None

hits = [first_sustained(p, tau, kreq) for p in paths]
hit_idxs = [x for x in hits if x is not None]

if hit_idxs:
    dates = [idxf[i] for i in hit_idxs]
    med_date = pd.to_datetime(dates).sort_values().iloc[len(dates)//2]
    pH = len(hit_idxs) / n_sims
    d10 = pd.to_datetime(dates).sort_values().iloc[int(0.10*len(dates))]
    d90 = pd.to_datetime(dates).sort_values().iloc[int(0.90*len(dates))]
    d80 = (d10, d90)
else:
    med_date, pH, d80 = None, 0.0, (None, None)

# === PLOTS ===================================================================
plt.figure(figsize=(10,5))
plt.plot(OAI.index, OAI, label="OAI")
plt.plot(res.fittedvalues.index, res.fittedvalues.clip(0,1), "--", label="State-space fit")
plt.axhline(tau, linestyle=":", label=f"τ={tau}")
for d in cp_dates: plt.axvline(d, linestyle=":", alpha=0.5)
plt.legend(); plt.title("Overreach Awareness Index (OAI)")
plt.tight_layout(); plt.savefig(os.path.join(CFG["SAVE_DIR"], "oai_fit.png"), dpi=160); plt.close()

plt.figure(figsize=(10,4))
plt.plot(OAI_fc.index, OAI_fc, label="Forecast mean")
plt.axhline(tau, linestyle=":", label=f"τ={tau}")
plt.legend(); plt.title("OAI Forecast Horizon")
plt.tight_layout(); plt.savefig(os.path.join(CFG["SAVE_DIR"], "oai_fc.png"), dpi=160); plt.close()

prob_curve = [np.mean([(x is not None and x <= t) for x in hits]) for t in range(h)]
pd.Series(prob_curve, index=idxf).plot(figsize=(10,3), ylim=(0,1), title="Probability of sustained crossing (by week t)")
plt.tight_layout(); plt.savefig(os.path.join(CFG["SAVE_DIR"], "oai_prob.png"), dpi=160); plt.close()

# === SUMMARY OUTPUT ==========================================================
summary = dict(
    generated_at = dt.datetime.utcnow().isoformat()+"Z",
    horizon_weeks = h,
    threshold_tau = float(tau),
    run_length_k  = int(kreq),
    last_week     = str(OAI.index[-1].date()),
    change_points = [str(pd.Timestamp(d).date()) for d in cp_dates],
    prob_within_horizon = round(float(pH), 3),
    median_event_date   = (str(pd.Timestamp(med_date).date()) if med_date is not None else None),
    event_window_80     = tuple(str(pd.Timestamp(d).date()) if d is not None else None for d in d80),
    weights = {k: float(v) for k, v in w.to_dict().items()},
    pca_var = float(pca.explained_variance_ratio_[0]),
    sources = {
        "trends_cols": list(trends.columns) if not trends.empty else [],
        "wiki_cols":   list(wiki.columns)   if not wiki.empty   else [],
        "fallback_used": bool(len(frames)==1 and "trust" in df.columns)
    }
)
with open(os.path.join(CFG["SAVE_DIR"], "oai_summary.json"), "w") as f:
    json.dump(summary, f, indent=2)

print(json.dumps(summary, indent=2))
print("\nFigures saved to:", os.path.abspath(CFG["SAVE_DIR"]))


[INFO] pytrends unavailable; skipping Google Trends.
Fetching data...


ValueError: 'shape' elements cannot be negative

In [4]:
# === CNT "Wake-Up Field" Mega-Cell — Ultra-Resilient =========================
# Handles: no pytrends, wiki 403s, offline fallback, Statsmodels 0.14+,
# and too-short series via EWMA fallback (no HP-filter dependency).
# -----------------------------------------------------------------------------
import os, sys, time, json, math, datetime as dt, warnings, requests
from pathlib import Path
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
warnings.filterwarnings("ignore", category=FutureWarning)

# Try importing UCM last (we might not use it if data is short)
try:
    from statsmodels.tsa.statespace.structural import UnobservedComponents
    _HAS_UCM = True
except Exception:
    _HAS_UCM = False

# === CONFIG ==================================================================
CFG = dict(
    ROOT = Path.cwd(),
    REGION = "US",
    HORIZON_WEEKS = 52,
    THRESHOLD_TAU = 0.65,
    RUN_LENGTH_K = 6,
    BACKTEST_START = "2012-01-01",
    TOPICS = ["government surveillance","censorship","civil liberties","free speech"],
    WIKI_PAGES = [
        "Mass_surveillance_in_the_United_States",
        "First_Amendment_to_the_United_States_Constitution",
        "Censorship_in_the_United_States",
        "Civil_liberties_in_the_United_States"
    ],
    SAVE_DIR = "artifacts",
    RNG_SEED = 1337,
    MIN_LEN_UCM = 12,    # require at least 12 weekly points to use UCM
    EWMA_SPAN = 8        # smoothing window for fallback
)
os.makedirs(CFG["SAVE_DIR"], exist_ok=True)
np.random.seed(CFG["RNG_SEED"])

# === NETWORK SESSION (fix Wikimedia 403s) ====================================
SESSION = requests.Session()
SESSION.headers.update({
    "User-Agent": "CNTLab/1.0 (https://cnt.local; contact: telos@cnt.local)"
})
requests_get = SESSION.get

# === OPTIONAL: Google Trends =================================================
try:
    from pytrends.request import TrendReq
    _HAS_TRENDS = True
except Exception:
    print("[INFO] pytrends unavailable; skipping Google Trends.")
    TrendReq = None; _HAS_TRENDS = False

# === UTILS ===================================================================
def as_week_index(dts):
    return pd.to_datetime(dts).to_period('W-MON').to_timestamp()

def spectral_entropy(x, nseg=8, eps=1e-12):
    x = pd.Series(x).dropna().values
    if len(x) < 16: return np.nan
    x = (x - x.mean()) / (x.std() + 1e-9)
    seg = max(16, len(x)//nseg)
    ps = []
    for i in range(0, len(x)-seg+1, seg):
        X = np.fft.rfft(x[i:i+seg]); P = (X*np.conj(X)).real; P /= (P.sum() + eps); ps.append(P)
    Pm = np.mean(ps, axis=0)
    H = -(Pm*np.log(Pm + eps)).sum(); Hmax = math.log(len(Pm))
    return float(1 - H/Hmax)

def zscore(s):
    s = pd.Series(s)
    sd = s.std()
    return (s - s.mean()) / (sd + 1e-9)

def logistic_scale(s):
    s = pd.Series(s)
    q1, q2, q3 = s.quantile([0.10, 0.50, 0.90])
    scale = (q3 - q1)/2.0 if q3 > q1 else (s.std() or 1.0)
    return 1.0 / (1.0 + np.exp(-(s - q2) / (scale + 1e-9)))

# === FETCHERS ================================================================
def fetch_trends(topics, geo="US", since="2012-01-01"):
    if not _HAS_TRENDS: return pd.DataFrame()
    pytrends = TrendReq(hl="en-US", tz=360)
    frames = []
    for kw in topics:
        try:
            pytrends.build_payload([kw], timeframe=f"{since} {dt.date.today():%Y-%m-%d}", geo=geo)
            df = pytrends.interest_over_time()
            if df.empty: continue
            s = df[kw].rename(kw); s.index = as_week_index(s.index); frames.append(s)
            time.sleep(1.0)
        except Exception as e:
            print(f"[WARN] pytrends failed for {kw}: {e}")
    return pd.concat(frames, axis=1).sort_index() if frames else pd.DataFrame()

def fetch_wikiviews(pages, since="2012-01-01", project="en.wikipedia", agent="user"):
    def one_page(title):
        start = pd.Timestamp(since).strftime("%Y%m0100")
        end   = (pd.Timestamp(dt.date.today()) + pd.offsets.Day(0)).strftime("%Y%m%d00")
        url = (f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/"
               f"{project}/all-access/{agent}/{title}/daily/{start}/{end}")
        try:
            r = requests_get(url, timeout=20); r.raise_for_status()
            data = r.json().get("items", [])
            ts = {pd.to_datetime(i["timestamp"][:8]): i["views"] for i in data}
            s  = pd.Series(ts, name=title).sort_index().resample("W-MON").sum()
            return s
        except Exception as e:
            print(f"[WARN] wiki fail {title}: {e}")
            return pd.Series(dtype=float)
    cols = []
    for p in pages:
        s = one_page(p.replace(" ", "_"))
        if s.shape[0]: cols.append(s)
        time.sleep(0.3)
    return pd.concat(cols, axis=1).sort_index() if cols else pd.DataFrame()

# === INGEST (with resilient fallback) =======================================
print("Fetching data...")
trends = fetch_trends(CFG["TOPICS"], geo=CFG["REGION"], since=CFG["BACKTEST_START"])
wiki   = fetch_wikiviews(CFG["WIKI_PAGES"], since=CFG["BACKTEST_START"])
frames = [f for f in [trends, wiki] if not f.empty]

if not frames:
    print("[FALLBACK] Using static Pew dataset from OWID (trust in government).")
    try:
        pew = pd.read_csv(
            "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Trust%20in%20government%20(Pew)/Trust%20in%20government%20(Pew).csv"
        )
        pew = pew.rename(columns={"Year":"date","Trust in government (Pew)":"trust"}).dropna(subset=["date","trust"])
        pew["date"] = pd.to_datetime(pew["date"], format="%Y")
        pew = pew.set_index("date")["trust"].resample("W-MON").ffill().to_frame()
        frames = [pew]
    except Exception as e:
        raise RuntimeError(f"No data sources available and fallback failed: {e}")

df = pd.concat(frames, axis=1).sort_index()
df = df.fillna(method="ffill").fillna(method="bfill")

# === FEATURES & FORECASTABILITY =============================================
feat = pd.DataFrame(index=df.index)
for c in df.columns:
    feat[c] = df[c]
    feat[c+"_z"] = zscore(df[c])
    feat[c+"_vol4"] = df[c].pct_change().rolling(4).std()

weights = {c: spectral_entropy(df[c].values) for c in df.columns}
w = pd.Series(weights).fillna(0.5)
w = (w - w.min()) / (w.max() - w.min() + 1e-12)
w = w.clip(0.05, 1.0)

# === OVERREACH AWARENESS INDEX (OAI) ========================================
scaler = StandardScaler()
X = scaler.fit_transform(df.values)
W = np.diag(np.sqrt(w[df.columns].values))
Xw = X.dot(W)

pca = PCA(n_components=1, random_state=CFG["RNG_SEED"])
oai_raw = pca.fit_transform(Xw).ravel()
OAI = pd.Series(logistic_scale(zscore(oai_raw)), index=df.index, name="OAI")

# === MODEL SELECTION: UCM or EWMA FALLBACK ==================================
use_ucm = _HAS_UCM and (len(OAI.dropna()) >= CFG["MIN_LEN_UCM"])

# prepare exogenous features for UCM branch
exo = feat.filter(regex="_z$|_vol4$").fillna(0)

if use_ucm:
    # Try UCM with modern parameter spelling; fall back to stochastic trend
    try:
        model = UnobservedComponents(endog=OAI, level='llevel', exog=exo)
    except Exception:
        model = UnobservedComponents(endog=OAI, trend=True, exog=exo)
    res = model.fit(disp=False)

    h = CFG["HORIZON_WEEKS"]
    lastX = exo.iloc[-1:].values
    Xf = np.repeat(lastX, h, axis=0)
    fc_obj = res.get_forecast(steps=h, exog=Xf)
    idxf = pd.date_range(OAI.index[-1] + pd.offsets.Week(1), periods=h, freq="W-MON")
    OAI_fc = pd.Series(fc_obj.predicted_mean.clip(0,1), index=idxf, name="OAI_fc")
    resid = (OAI - res.fittedvalues.reindex_like(OAI).fillna(method="bfill")).dropna()
    model_used = "UCM"
else:
    # EWMA fallback: smooth + random-walk forecast with residual noise
    print("[FALLBACK] Using EWMA nowcast/forecast (series too short for UCM).")
    h = CFG["HORIZON_WEEKS"]
    OAI_fit = OAI.ewm(span=CFG["EWMA_SPAN"], adjust=False).mean()
    last_val = float(OAI_fit.iloc[-1])
    idxf = pd.date_range(OAI.index[-1] + pd.offsets.Week(1), periods=h, freq="W-MON")
    OAI_fc = pd.Series([last_val]*h, index=idxf, name="OAI_fc")
    resid = (OAI - OAI_fit).dropna()
    model_used = "EWMA"

# === CHANGE-POINTS (CUSUM) ===================================================
k_cusum = resid.std() * 0.25
thr     = resid.std() * 3.0
pos = neg = 0.0
alarms = []
for t, e in resid.items():
    pos = max(0.0, pos + e - k_cusum)
    neg = min(0.0, neg + e + k_cusum)
    if pos > thr or abs(neg) > thr:
        alarms.append(t); pos = neg = 0.0
cp_dates = alarms[-5:]

# === EVENT SIMULATION (sustained τ for k weeks) ==============================
tau, kreq = CFG["THRESHOLD_TAU"], CFG["RUN_LENGTH_K"]
sig = float(resid.std() or 0.05)
n_sims = 1500
paths = np.clip(OAI_fc.values + np.random.normal(0, sig, (n_sims, h)), 0, 1)

def first_sustained(sim, tau, kreq):
    run = 0
    for i, a in enumerate(sim >= tau):
        run = run + 1 if a else 0
        if run >= kreq: return i
    return None

hits = [first_sustained(p, tau, kreq) for p in paths]
hit_idxs = [x for x in hits if x is not None]

if hit_idxs:
    dates = [idxf[i] for i in hit_idxs]
    med_date = pd.to_datetime(dates).sort_values().iloc[len(dates)//2]
    pH = len(hit_idxs) / n_sims
    d10 = pd.to_datetime(dates).sort_values().iloc[int(0.10*len(dates))]
    d90 = pd.to_datetime(dates).sort_values().iloc[int(0.90*len(dates))]
    d80 = (d10, d90)
else:
    med_date, pH, d80 = None, 0.0, (None, None)

# === PLOTS ===================================================================
plt.figure(figsize=(10,5))
plt.plot(OAI.index, OAI, label="OAI")
if use_ucm:
    try:
        # If UCM, plot fitted mean clipped
        from pandas import Series
        plt.plot(OAI.index, (OAI - resid).clip(0,1), "--", label="Fit")
    except Exception:
        pass
plt.axhline(tau, linestyle=":", label=f"τ={tau}")
for d in cp_dates: plt.axvline(d, linestyle=":", alpha=0.5)
plt.legend(); plt.title(f"Overreach Awareness Index (OAI) — Model: {model_used}")
plt.tight_layout(); plt.savefig(os.path.join(CFG["SAVE_DIR"], "oai_fit.png"), dpi=160); plt.close()

plt.figure(figsize=(10,4))
plt.plot(OAI_fc.index, OAI_fc, label="Forecast mean")
plt.axhline(tau, linestyle=":", label=f"τ={tau}")
plt.legend(); plt.title("OAI Forecast Horizon")
plt.tight_layout(); plt.savefig(os.path.join(CFG["SAVE_DIR"], "oai_fc.png"), dpi=160); plt.close()

prob_curve = [np.mean([(x is not None and x <= t) for x in hits]) for t in range(h)]
pd.Series(prob_curve, index=idxf).plot(figsize=(10,3), ylim=(0,1), title="Probability of sustained crossing (by week t)")
plt.tight_layout(); plt.savefig(os.path.join(CFG["SAVE_DIR"], "oai_prob.png"), dpi=160); plt.close()

# === SUMMARY OUTPUT ==========================================================
summary = dict(
    generated_at = dt.datetime.utcnow().isoformat()+"Z",
    horizon_weeks = h,
    threshold_tau = float(tau),
    run_length_k  = int(kreq),
    last_week     = str(OAI.index[-1].date()),
    change_points = [str(pd.Timestamp(d).date()) for d in cp_dates],
    prob_within_horizon = round(float(pH), 3),
    median_event_date   = (str(pd.Timestamp(med_date).date()) if med_date is not None else None),
    event_window_80     = tuple(str(pd.Timestamp(d).date()) if d is not None else None for d in d80),
    weights = {k: float(v) for k, v in w.to_dict().items()},
    pca_var = float(pca.explained_variance_ratio_[0]),
    sources = {
        "trends_cols": list(trends.columns) if not trends.empty else [],
        "wiki_cols":   list(wiki.columns)   if not wiki.empty   else [],
        "fallback_used": (len(frames)==1 and "trust" in df.columns),
        "model_used": model_used
    }
)
with open(os.path.join(CFG["SAVE_DIR"], "oai_summary.json"), "w") as f:
    json.dump(summary, f, indent=2)

print(json.dumps(summary, indent=2))
print("\nFigures saved to:", os.path.abspath(CFG["SAVE_DIR"]))


[INFO] pytrends unavailable; skipping Google Trends.
Fetching data...
[FALLBACK] Using EWMA nowcast/forecast (series too short for UCM).
{
  "generated_at": "2025-10-16T04:55:06.988587Z",
  "horizon_weeks": 52,
  "threshold_tau": 0.65,
  "run_length_k": 6,
  "last_week": "2025-10-20",
  "change_points": [],
  "prob_within_horizon": 0.0,
  "median_event_date": null,
  "event_window_80": [
    null,
    null
  ],
  "weights": {
    "Mass_surveillance_in_the_United_States": 0.05,
    "First_Amendment_to_the_United_States_Constitution": 0.2658909185548501,
    "Censorship_in_the_United_States": 0.2974816205736362,
    "Civil_liberties_in_the_United_States": 0.9999999999979158
  },
  "pca_var": 0.7709565458878976,
  "sources": {
    "trends_cols": [],
    "wiki_cols": [
      "Mass_surveillance_in_the_United_States",
      "First_Amendment_to_the_United_States_Constitution",
      "Censorship_in_the_United_States",
      "Civil_liberties_in_the_United_States"
    ],
    "fallback_used": fal

  generated_at = dt.datetime.utcnow().isoformat()+"Z",


In [5]:
# === CNT "Wake-Up Field" Mega-Cell — Upgraded, Cached, Calibrated ============
# One cell: robust ingest (Wiki + GDELT + optional Trends), OAI build, nowcast,
# change-points, sustained-threshold event forecast, caching, and clean output.
# -----------------------------------------------------------------------------
import os, sys, time, json, math, warnings, requests
from datetime import datetime, timezone
from pathlib import Path
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
warnings.filterwarnings("ignore", category=FutureWarning)

# Try UCM, but we'll gracefully fallback if missing/too-short
try:
    from statsmodels.tsa.statespace.structural import UnobservedComponents
    _HAS_UCM = True
except Exception:
    _HAS_UCM = False

# === CONFIG ==================================================================
CFG = dict(
    ROOT = Path.cwd(),
    REGION = "US",
    HORIZON_WEEKS = 104,         # longer horizon gives nonzero event mass if plausible
    RUN_LENGTH_K = 6,            # weeks required above threshold
    BACKTEST_START = "2012-01-01",
    # Signals
    TOPICS = ["government surveillance","censorship","civil liberties","free speech"],
    WIKI_PAGES = [
        "Mass_surveillance_in_the_United_States",
        "First_Amendment_to_the_United_States_Constitution",
        "Censorship_in_the_United_States",
        "Civil_liberties_in_the_United_States"
    ],
    GDELT_QUERY = '(censorship OR surveillance OR "civil liberties") AND location:United States',
    # Threshold policy
    CALIBRATE_TAU = True,        # if True, τ := 85th percentile of OAI; else use FIXED_TAU
    FIXED_TAU = 0.65,
    # Model heuristics
    MIN_LEN_UCM = 12,            # need at least this many weekly points for UCM
    EWMA_SPAN = 8,               # smoothing for fallback
    # IO
    SAVE_DIR = "artifacts",
    CACHE_DIR = "artifacts/cache",
    RNG_SEED = 1337
)
np.random.seed(CFG["RNG_SEED"])
Path(CFG["SAVE_DIR"]).mkdir(parents=True, exist_ok=True)
Path(CFG["CACHE_DIR"]).mkdir(parents=True, exist_ok=True)

# === NETWORK SESSION (fix Wikimedia 403s) ====================================
SESSION = requests.Session()
SESSION.headers.update({
    # Use a descriptive UA per Wikimedia policy; include a contact if you like
    "User-Agent": "CNTLab/1.1 (fieldwalker://local; contact: telos@cnt.local)"
})
requests_get = SESSION.get

# === OPTIONAL: Google Trends =================================================
try:
    from pytrends.request import TrendReq
    _HAS_TRENDS = True
except Exception:
    print("[INFO] pytrends unavailable; skipping Google Trends.")
    TrendReq = None; _HAS_TRENDS = False

# === UTILS ===================================================================
def as_week_index(dts):
    return pd.to_datetime(dts).to_period('W-MON').to_timestamp()

def spectral_entropy(x, nseg=8, eps=1e-12):
    x = pd.Series(x).dropna().values
    if len(x) < 16: return np.nan
    x = (x - x.mean()) / (x.std() + 1e-9)
    seg = max(16, len(x)//nseg)
    ps = []
    for i in range(0, len(x)-seg+1, seg):
        X = np.fft.rfft(x[i:i+seg]); P = (X*np.conj(X)).real; P /= (P.sum() + eps); ps.append(P)
    Pm = np.mean(ps, axis=0)
    H = -(Pm*np.log(Pm + eps)).sum(); Hmax = math.log(len(Pm))
    return float(1 - H/Hmax)

def zscore(s):
    s = pd.Series(s)
    return (s - s.mean()) / (s.std() + 1e-9)

def logistic_scale(s):
    s = pd.Series(s)
    q1, q2, q3 = s.quantile([0.10, 0.50, 0.90])
    scale = (q3 - q1)/2.0 if q3 > q1 else (s.std() or 1.0)
    return 1.0 / (1.0 + np.exp(-(s - q2) / (scale + 1e-9)))

# === FETCHERS ================================================================
def fetch_trends(topics, geo="US", since="2012-01-01"):
    if not _HAS_TRENDS: return pd.DataFrame()
    pytrends = TrendReq(hl="en-US", tz=360)
    frames = []
    for kw in topics:
        try:
            pytrends.build_payload([kw], timeframe=f"{since} {pd.Timestamp.today():%Y-%m-%d}", geo=geo)
            df = pytrends.interest_over_time()
            if df.empty: continue
            s = df[kw].rename(kw); s.index = as_week_index(s.index); frames.append(s)
            time.sleep(1.0)  # be polite
        except Exception as e:
            print(f"[WARN] pytrends failed for {kw}: {e}")
    return pd.concat(frames, axis=1).sort_index() if frames else pd.DataFrame()

def fetch_wikiviews(pages, since="2012-01-01", project="en.wikipedia", agent="user"):
    """Wikimedia REST pageviews; weekly; merges with on-disk cache to grow series."""
    cache_path = Path(CFG["CACHE_DIR"]) / "wiki_views.csv"
    def one_page(title):
        start = pd.Timestamp(since).strftime("%Y%m0100")
        end   = pd.Timestamp.today().strftime("%Y%m%d00")
        url = (f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/"
               f"{project}/all-access/{agent}/{title}/daily/{start}/{end}")
        try:
            r = requests_get(url, timeout=20); r.raise_for_status()
            data = r.json().get("items", [])
            ts = {pd.to_datetime(i["timestamp"][:8]): i["views"] for i in data}
            s  = pd.Series(ts, name=title).sort_index().resample("W-MON").sum()
            return s
        except Exception as e:
            print(f"[WARN] wiki fail {title}: {e}")
            return pd.Series(dtype=float)
    cols = []
    for p in pages:
        s = one_page(p.replace(" ", "_"))
        if s.shape[0]: cols.append(s)
        time.sleep(0.3)
    wiki = pd.concat(cols, axis=1).sort_index() if cols else pd.DataFrame()
    # merge with cache
    if cache_path.exists():
        old = pd.read_csv(cache_path, parse_dates=["date"]).set_index("date")
        wiki = old.combine_first(wiki) if not wiki.empty else old
    if not wiki.empty:
        wiki.to_csv(cache_path, index_label="date")
    return wiki

def fetch_gdelt_counts(query, since="2012-01-01"):
    """GDELT Doc API monthly timeline → weekly sum; no extra packages."""
    start = pd.Timestamp(since).to_period("M").to_timestamp()
    end   = pd.Timestamp.today().to_period("M").to_timestamp()
    months = pd.period_range(start, end, freq="M").to_timestamp()
    rows = []
    for m in months:
        url = ("https://api.gdeltproject.org/api/v2/doc/doc?"
               f"query={requests.utils.quote(query)}&mode=TimelineVol&format=json"
               f"&startdatetime={m:%Y%m%d000000}&enddatetime={(m+pd.offsets.MonthEnd(0)):%Y%m%d235959}")
        try:
            js = requests_get(url, timeout=20).json()
            for pt in js.get("timelines", [{}])[0].get("data", []):
                rows.append((pd.to_datetime(pt["date"]), int(pt["value"])))
            time.sleep(0.3)
        except Exception as e:
            print("[WARN] GDELT:", e)
    if not rows: return pd.DataFrame()
    s = pd.Series({d:v for d,v in rows}).sort_index().resample("W-MON").sum().rename("gdelt_volume")
    # cache
    cache_path = Path(CFG["CACHE_DIR"]) / "gdelt_volume.csv"
    if cache_path.exists():
        old = pd.read_csv(cache_path, parse_dates=["date"]).set_index("date")["gdelt_volume"]
        s = old.combine_first(s)
    s.to_frame().to_csv(cache_path, index_label="date")
    return s.to_frame()

# === INGEST ==================================================================
print("Fetching data...")
frames = []

trends = fetch_trends(CFG["TOPICS"], geo=CFG["REGION"], since=CFG["BACKTEST_START"])
if not trends.empty: frames.append(trends)

wiki   = fetch_wikiviews(CFG["WIKI_PAGES"], since=CFG["BACKTEST_START"])
if not wiki.empty: frames.append(wiki)

gdelt  = fetch_gdelt_counts(CFG["GDELT_QUERY"], since=CFG["BACKTEST_START"])
if isinstance(gdelt, pd.DataFrame) and not gdelt.empty: frames.append(gdelt)

if not frames:
    print("[FALLBACK] Using static Pew dataset (OWID).")
    try:
        pew = pd.read_csv(
            "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Trust%20in%20government%20(Pew)/Trust%20in%20government%20(Pew).csv"
        )
        pew = pew.rename(columns={"Year":"date","Trust in government (Pew)":"trust"}).dropna(subset=["date","trust"])
        pew["date"] = pd.to_datetime(pew["date"], format="%Y")
        pew = pew.set_index("date")["trust"].resample("W-MON").ffill().to_frame()
        frames = [pew]
    except Exception as e:
        raise RuntimeError(f"No data sources available and fallback failed: {e}")

df = pd.concat(frames, axis=1).sort_index()
df = df.fillna(method="ffill").fillna(method="bfill")

# === FEATURES & FORECASTABILITY =============================================
feat = pd.DataFrame(index=df.index)
for c in df.columns:
    feat[c] = df[c]
    feat[c+"_z"]   = zscore(df[c])
    feat[c+"_vol4"] = df[c].pct_change().rolling(4).std()

weights = {c: spectral_entropy(df[c].values) for c in df.columns}
w = pd.Series(weights).fillna(0.5)
# cap extremes so a single column can't dominate
w = w.clip(0.15, 0.85)
w = (w - w.min()) / (w.max() - w.min() + 1e-12)
w = w.clip(0.05, 1.0)

# === OAI (Overreach Awareness Index) =========================================
scaler = StandardScaler()
X  = scaler.fit_transform(df.values)
Wm = np.diag(np.sqrt(w[df.columns].values))
Xw = X.dot(Wm)
pca = PCA(n_components=1, random_state=CFG["RNG_SEED"])
oai_raw = pca.fit_transform(Xw).ravel()
OAI = pd.Series(logistic_scale(zscore(oai_raw)), index=df.index, name="OAI")

# === Threshold policy (calibrate if requested) ===============================
if CFG["CALIBRATE_TAU"]:
    TAU = float(pd.Series(OAI).quantile(0.85))   # 85th percentile
else:
    TAU = float(CFG["FIXED_TAU"])

# === NOWCAST / FORECAST ======================================================
use_ucm = _HAS_UCM and (len(OAI.dropna()) >= CFG["MIN_LEN_UCM"])
exo = feat.filter(regex="_z$|_vol4$").fillna(0)

if use_ucm:
    try:
        model = UnobservedComponents(endog=OAI, level='llevel', exog=exo)
    except Exception:
        model = UnobservedComponents(endog=OAI, trend=True, exog=exo)
    res = model.fit(disp=False)

    h = int(CFG["HORIZON_WEEKS"])
    lastX = exo.iloc[-1:].values
    Xf = np.repeat(lastX, h, axis=0)
    fc = res.get_forecast(steps=h, exog=Xf)
    idxf = pd.date_range(OAI.index[-1] + pd.offsets.Week(1), periods=h, freq="W-MON")
    OAI_fc = pd.Series(fc.predicted_mean.clip(0,1), index=idxf, name="OAI_fc")
    resid  = (OAI - res.fittedvalues.reindex_like(OAI).fillna(method="bfill")).dropna()
    model_used = "UCM"
else:
    print("[FALLBACK] Using EWMA nowcast/forecast (series too short or UCM unavailable).")
    h = int(CFG["HORIZON_WEEKS"])
    OAI_fit  = OAI.ewm(span=int(CFG["EWMA_SPAN"]), adjust=False).mean()
    last_val = float(OAI_fit.iloc[-1])
    idxf = pd.date_range(OAI.index[-1] + pd.offsets.Week(1), periods=h, freq="W-MON")
    OAI_fc = pd.Series([last_val]*h, index=idxf, name="OAI_fc")
    resid  = (OAI - OAI_fit).dropna()
    model_used = "EWMA"

# === CHANGE-POINTS (CUSUM on residuals) =====================================
k_cusum = resid.std() * 0.25
thr     = resid.std() * 3.0
pos = neg = 0.0
alarms = []
for t, e in resid.items():
    pos = max(0.0, pos + e - k_cusum)
    neg = min(0.0, neg + e + k_cusum)
    if pos > thr or abs(neg) > thr:
        alarms.append(t); pos = neg = 0.0
cp_dates = [pd.Timestamp(d) for d in alarms[-5:]]

# === TIME-TO-EVENT (sustained τ for k consecutive weeks) =====================
sig = float(resid.std() or 0.05)
n_sims = 1500
paths = np.clip(OAI_fc.values + np.random.normal(0, sig, (n_sims, h)), 0, 1)

def first_sustained(sim, tau, kreq):
    run = 0
    for i, a in enumerate(sim >= tau):
        run = run + 1 if a else 0
        if run >= kreq: return i
    return None

hits = [first_sustained(p, TAU, CFG["RUN_LENGTH_K"]) for p in paths]
hit_idxs = [x for x in hits if x is not None]
if hit_idxs:
    dates = pd.to_datetime([idxf[i] for i in hit_idxs]).sort_values()
    med_date = dates.iloc[len(dates)//2]
    pH = len(hit_idxs)/n_sims
    d80 = (dates.iloc[int(0.10*len(dates))], dates.iloc[int(0.90*len(dates))])
else:
    med_date, pH, d80 = None, 0.0, (None, None)

# === PLOTS ===================================================================
outdir = Path(CFG["SAVE_DIR"])
plt.figure(figsize=(10,5))
plt.plot(OAI.index, OAI, label="OAI")
if use_ucm:
    try:
        plt.plot((OAI - resid).clip(0,1).index, (OAI - resid).clip(0,1).values, "--", label="Fit")
    except Exception:
        pass
plt.axhline(TAU, linestyle=":", label=f"τ={TAU:.3f}")
for d in cp_dates: plt.axvline(d, linestyle=":", alpha=0.5)
plt.legend(); plt.title(f"OAI — Model: {model_used}")
plt.tight_layout(); plt.savefig(outdir/"oai_fit.png", dpi=160); plt.close()

plt.figure(figsize=(10,4))
plt.plot(OAI_fc.index, OAI_fc.values, label="Forecast mean")
plt.axhline(TAU, linestyle=":", label=f"τ={TAU:.3f}")
plt.legend(); plt.title("OAI Forecast Horizon")
plt.tight_layout(); plt.savefig(outdir/"oai_fc.png", dpi=160); plt.close()

prob_curve = [np.mean([(x is not None and x <= t) for x in hits]) for t in range(h)]
pd.Series(prob_curve, index=idxf).plot(figsize=(10,3), ylim=(0,1), title="Pr(sustained crossing by week t)")
plt.tight_layout(); plt.savefig(outdir/"oai_prob.png", dpi=160); plt.close()

# === SUMMARY ================================================================
summary = dict(
    generated_at = datetime.now(timezone.utc).isoformat(),
    horizon_weeks = h,
    threshold_tau = float(TAU),
    run_length_k  = int(CFG["RUN_LENGTH_K"]),
    last_week     = str(pd.Timestamp(OAI.index[-1]).date()),
    change_points = [str(pd.Timestamp(d).date()) for d in cp_dates],
    prob_within_horizon = round(float(pH), 3),
    median_event_date   = (str(pd.Timestamp(med_date).date()) if med_date is not None else None),
    event_window_80     = tuple(str(pd.Timestamp(d).date()) if d is not None else None for d in d80),
    weights = {k: float(v) for k, v in w.to_dict().items()},
    pca_var = float(pca.explained_variance_ratio_[0]),
    sources = {
        "trends_cols": list(trends.columns) if isinstance(trends, pd.DataFrame) and not trends.empty else [],
        "wiki_cols":   list(wiki.columns)   if isinstance(wiki,   pd.DataFrame) and not wiki.empty   else [],
        "gdelt_cols":  list(gdelt.columns)  if isinstance(gdelt,  pd.DataFrame) and not gdelt.empty  else [],
        "model_used":  model_used
    }
)
with open(outdir/"oai_summary.json", "w") as f:
    json.dump(summary, f, indent=2)

print(json.dumps(summary, indent=2))
print("\nFigures saved to:", str(outdir.resolve()))


[INFO] pytrends unavailable; skipping Google Trends.
Fetching data...
[WARN] GDELT: HTTPSConnectionPool(host='api.gdeltproject.org', port=443): Read timed out. (read timeout=20)
[WARN] GDELT: Expecting value: line 1 column 1 (char 0)
[WARN] GDELT: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[WARN] GDELT: Expecting value: line 1 column 1 (char 0)
[WARN] GDELT: Expecting value: line 1 column 1 (char 0)
[WARN] GDELT: Expecting value: line 1 column 1 (char 0)
[WARN] GDELT: Expecting value: line 1 column 1 (char 0)
[WARN] GDELT: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[WARN] GDELT: Expecting value: line 1 column 1 (char 0)
[WARN] GDELT: Expecting value: line 1 column 1 (char 0)
[WARN] GDELT: Expecting value: line 1 column 1 (char 0)
[WARN] GDELT: Expecting value: line 1 column 1 (char 0)
[WARN] GDELT: Expecting value: line 1 column 1 (char 0)
[WARN] GDELT: Expecting value: line 1 column 1 (

In [6]:
# === CNT "Wake-Up Field" — Single Ultra-Resilient Mega Cell ===================
# One cell: ingest (Wiki + safe GDELT + optional Trends), cache, build OAI,
# nowcast/forecast, regime shifts, sustained-threshold event date, robust τ.
# -----------------------------------------------------------------------------
import os, sys, json, math, time, warnings, requests
from datetime import datetime, timezone
from pathlib import Path
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
warnings.filterwarnings("ignore", category=FutureWarning)

# Try UCM; we’ll fallback to EWMA if unavailable/too short
try:
    from statsmodels.tsa.statespace.structural import UnobservedComponents
    _HAS_UCM = True
except Exception:
    _HAS_UCM = False

# ============================== CONFIG =======================================
CFG = dict(
    ROOT = Path.cwd(),
    REGION = "US",
    HORIZON_WEEKS = 104,          # forecast horizon
    RUN_LENGTH_K  = 6,            # sustain weeks ≥ τ
    BACKTEST_START = "2012-01-01",
    # Signals
    TOPICS = ["government surveillance","censorship","civil liberties","free speech"],
    WIKI_PAGES = [
        "Mass_surveillance_in_the_United_States",
        "First_Amendment_to_the_United_States_Constitution",
        "Censorship_in_the_United_States",
        "Civil_liberties_in_the_United_States"
    ],
    GDELT_QUERY = '(censorship OR surveillance OR "civil liberties") AND location: United States',
    # Threshold policy
    CALIBRATE_TAU = True,         # if False uses FIXED_TAU
    FIXED_TAU = 0.65,
    # Modeling heuristics
    MIN_LEN_UCM = 12,             # min weekly points to enable UCM
    EWMA_SPAN   = 8,              # EWMA smoothing when UCM not used
    # IO / caching
    SAVE_DIR  = "artifacts",
    CACHE_DIR = "artifacts/cache",
    RNG_SEED = 1337
)
np.random.seed(CFG["RNG_SEED"])
Path(CFG["SAVE_DIR"]).mkdir(parents=True, exist_ok=True)
Path(CFG["CACHE_DIR"]).mkdir(parents=True, exist_ok=True)

# ========================== NETWORK SESSION ==================================
SESSION = requests.Session()
SESSION.headers.update({
    "User-Agent": "CNTLab/1.2 (fieldwalker://local; contact: telos@cnt.local)"
})
requests_get = SESSION.get

# Optional: Google Trends (skips gracefully if not present)
try:
    from pytrends.request import TrendReq
    _HAS_TRENDS = True
except Exception:
    print("[INFO] pytrends unavailable; skipping Google Trends.")
    TrendReq = None; _HAS_TRENDS = False

# =============================== UTILS =======================================
def as_week_index(dts):
    return pd.to_datetime(dts).to_period('W-MON').to_timestamp()

def spectral_entropy(x, nseg=8, eps=1e-12):
    x = pd.Series(x).dropna().values
    if len(x) < 16: return np.nan
    x = (x - x.mean()) / (x.std() + 1e-9)
    seg = max(16, len(x)//nseg)
    ps = []
    for i in range(0, len(x)-seg+1, seg):
        X = np.fft.rfft(x[i:i+seg]); P = (X*np.conj(X)).real; P /= (P.sum()+eps); ps.append(P)
    Pm = np.mean(ps, axis=0); H = -(Pm*np.log(Pm+eps)).sum(); Hmax = math.log(len(Pm))
    return float(1 - H/Hmax)

def zscore(s):
    s = pd.Series(s); return (s - s.mean()) / (s.std() + 1e-9)

def logistic_scale(s):
    s = pd.Series(s); q1,q2,q3 = s.quantile([.1,.5,.9])
    scale = (q3-q1)/2.0 if q3>q1 else (s.std() or 1.0)
    return 1/(1+np.exp(-(s-q2)/(scale+1e-9)))

def robust_tau(oai_series, q=0.85, fallback=0.65, min_points=10):
    s = pd.Series(oai_series).astype(float).dropna()
    if len(s) < min_points:
        print(f"[TAU] Not enough points ({len(s)}) for calibrated τ; using fallback={fallback}.")
        return float(fallback)
    try:
        tau = float(np.nanquantile(s.values, q))
        if not np.isfinite(tau): raise ValueError("non-finite τ")
        return tau
    except Exception:
        print(f"[TAU] Calibration produced NaN/inf; using fallback={fallback}.")
        return float(fallback)

# ============================== FETCHERS =====================================
def fetch_trends(topics, geo="US", since="2012-01-01"):
    if not _HAS_TRENDS: return pd.DataFrame()
    pytrends = TrendReq(hl="en-US", tz=360)
    frames = []
    for kw in topics:
        try:
            pytrends.build_payload([kw], timeframe=f"{since} {pd.Timestamp.today():%Y-%m-%d}", geo=geo)
            df = pytrends.interest_over_time()
            if df.empty: continue
            s = df[kw].rename(kw); s.index = as_week_index(s.index); frames.append(s)
            time.sleep(1.0)
        except Exception as e:
            print(f"[WARN] pytrends failed for {kw}: {e}")
    return pd.concat(frames, axis=1).sort_index() if frames else pd.DataFrame()

def fetch_wikiviews(pages, since="2012-01-01", project="en.wikipedia", agent="user"):
    cache_path = Path(CFG["CACHE_DIR"]) / "wiki_views.csv"
    def one_page(title):
        start = pd.Timestamp(since).strftime("%Y%m0100")
        end   = pd.Timestamp.today().strftime("%Y%m%d00")
        url = (f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/"
               f"{project}/all-access/{agent}/{title}/daily/{start}/{end}")
        try:
            r = requests_get(url, timeout=20); r.raise_for_status()
            data = r.json().get("items", [])
            ts = {pd.to_datetime(i["timestamp"][:8]): i["views"] for i in data}
            return pd.Series(ts, name=title).sort_index().resample("W-MON").sum()
        except Exception as e:
            print(f"[WARN] wiki fail {title}: {e}"); return pd.Series(dtype=float)
    cols=[]
    for p in pages:
        s = one_page(p.replace(" ","_"))
        if s.shape[0]: cols.append(s)
        time.sleep(0.3)
    wiki = pd.concat(cols, axis=1).sort_index() if cols else pd.DataFrame()
    if cache_path.exists():
        old = pd.read_csv(cache_path, parse_dates=["date"]).set_index("date")
        wiki = old.combine_first(wiki) if not wiki.empty else old
    if not wiki.empty: wiki.to_csv(cache_path, index_label="date")
    return wiki

def fetch_gdelt_counts_safe(query, since="2018-01-01", timeout=8, max_consec_err=6, max_months=60):
    start = pd.Timestamp(since).to_period("M").to_timestamp()
    end   = pd.Timestamp.today().to_period("M").to_timestamp()
    months = pd.period_range(start, end, freq="M").to_timestamp()[-max_months:]
    rows, consec = [], 0
    for m in months:
        url = ("https://api.gdeltproject.org/api/v2/doc/doc?"
               f"query={requests.utils.quote(query)}&mode=TimelineVol&format=json"
               f"&startdatetime={m:%Y%m%d000000}&enddatetime={(m+pd.offsets.MonthEnd(0)):%Y%m%d235959}")
        try:
            js = requests_get(url, timeout=timeout).json()
            data = js.get("timelines", [{}])[0].get("data", [])
            for pt in data: rows.append((pd.to_datetime(pt["date"]), int(pt["value"])))
            consec = 0; time.sleep(0.25)
        except Exception:
            consec += 1
            if consec >= max_consec_err:
                print(f"[GDELT] Too many errors in a row ({consec}); stopping early."); break
    if not rows: return pd.DataFrame()
    s = pd.Series({d:v for d,v in rows}).sort_index().resample("W-MON").sum().rename("gdelt_volume")
    # cache
    cache_path = Path(CFG["CACHE_DIR"]) / "gdelt_volume.csv"
    if cache_path.exists():
        old = pd.read_csv(cache_path, parse_dates=["date"]).set_index("date")["gdelt_volume"]
        s = old.combine_first(s)
    s.to_frame().to_csv(cache_path, index_label="date")
    return s.to_frame()

# ============================== INGEST =======================================
print("Fetching data...")
frames = []
trends = fetch_trends(CFG["TOPICS"], geo=CFG["REGION"], since=CFG["BACKTEST_START"])
if not trends.empty: frames.append(trends)

wiki   = fetch_wikiviews(CFG["WIKI_PAGES"], since=CFG["BACKTEST_START"])
if not wiki.empty: frames.append(wiki)

gdelt  = fetch_gdelt_counts_safe(CFG["GDELT_QUERY"], since=CFG["BACKTEST_START"])
if isinstance(gdelt, pd.DataFrame) and not gdelt.empty: frames.append(gdelt)

if not frames:
    print("[FALLBACK] Using static Pew dataset (OWID).")
    pew = pd.read_csv(
        "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Trust%20in%20government%20(Pew)/Trust%20in%20government%20(Pew).csv"
    )
    pew = pew.rename(columns={"Year":"date","Trust in government (Pew)":"trust"}).dropna(subset=["date","trust"])
    pew["date"] = pd.to_datetime(pew["date"], format="%Y")
    pew = pew.set_index("date")["trust"].resample("W-MON").ffill().to_frame()
    frames = [pew]

df = pd.concat(frames, axis=1).sort_index().ffill().bfill()

# ===================== FEATURES & FORECASTABILITY ============================
feat = pd.DataFrame(index=df.index)
for c in df.columns:
    feat[c] = df[c]
    feat[c+"_z"] = zscore(df[c])
    feat[c+"_vol4"] = df[c].pct_change().rolling(4).std()

weights = {c: spectral_entropy(df[c].values) for c in df.columns}
w = pd.Series(weights).fillna(0.5)
w = w.clip(0.15, 0.85)                      # avoid domination
w = (w - w.min()) / (w.max() - w.min() + 1e-12)
w = w.clip(0.05, 1.0)

# ========================= OVERREACH AWARENESS INDEX =========================
scaler = StandardScaler()
X  = scaler.fit_transform(df.values)
Wm = np.diag(np.sqrt(w[df.columns].values))
Xw = X.dot(Wm)
pca = PCA(n_components=1, random_state=CFG["RNG_SEED"])
oai_raw = pca.fit_transform(Xw).ravel()
OAI = pd.Series(logistic_scale(zscore(oai_raw)), index=df.index, name="OAI")

# ========================= THRESHOLD τ (robust) ==============================
TAU = robust_tau(OAI, q=0.85, fallback=CFG["FIXED_TAU"], min_points=10) if CFG["CALIBRATE_TAU"] else float(CFG["FIXED_TAU"])

# ========================= NOWCAST / FORECAST ================================
use_ucm = _HAS_UCM and (len(OAI.dropna()) >= CFG["MIN_LEN_UCM"])
exo = feat.filter(regex="_z$|_vol4$").fillna(0)

if use_ucm:
    try:
        model = UnobservedComponents(endog=OAI, level='llevel', exog=exo)
    except Exception:
        model = UnobservedComponents(endog=OAI, trend=True, exog=exo)
    res = model.fit(disp=False)
    h = int(CFG["HORIZON_WEEKS"])
    lastX = exo.iloc[-1:].values; Xf = np.repeat(lastX, h, axis=0)
    fc = res.get_forecast(steps=h, exog=Xf)
    idxf = pd.date_range(OAI.index[-1] + pd.offsets.Week(1), periods=h, freq="W-MON")
    OAI_fc = pd.Series(fc.predicted_mean.clip(0,1), index=idxf, name="OAI_fc")
    resid  = (OAI - res.fittedvalues.reindex_like(OAI).bfill()).dropna()
    model_used = "UCM"
else:
    print("[FALLBACK] Using EWMA nowcast/forecast (series too short or UCM unavailable).")
    h = int(CFG["HORIZON_WEEKS"])
    OAI_fit  = OAI.ewm(span=int(CFG["EWMA_SPAN"]), adjust=False).mean()
    last_val = float(OAI_fit.iloc[-1])
    idxf = pd.date_range(OAI.index[-1] + pd.offsets.Week(1), periods=h, freq="W-MON")
    OAI_fc = pd.Series([last_val]*h, index=idxf, name="OAI_fc")
    resid  = (OAI - OAI_fit).dropna()
    model_used = "EWMA"

# ========================= CHANGE-POINTS (CUSUM) =============================
k_cusum = resid.std() * 0.25
thr     = resid.std() * 3.0
pos = neg = 0.0; alarms = []
for t, e in resid.items():
    pos = max(0.0, pos + e - k_cusum)
    neg = min(0.0, neg + e + k_cusum)
    if pos > thr or abs(neg) > thr:
        alarms.append(t); pos = neg = 0.0
cp_dates = [pd.Timestamp(d) for d in alarms[-5:]]

# =================== TIME-TO-EVENT (sustained τ for k weeks) =================
sig = float(resid.std() or 0.05)
n_sims = 1500
paths = np.clip(OAI_fc.values + np.random.normal(0, sig, (n_sims, h)), 0, 1)

def first_sustained(sim, tau, kreq):
    run = 0
    for i, a in enumerate(sim >= tau):
        run = run + 1 if a else 0
        if run >= kreq: return i
    return None

hits = [first_sustained(p, TAU, CFG["RUN_LENGTH_K"]) for p in paths]
hit_idxs = [x for x in hits if x is not None]
if hit_idxs:
    dates = pd.to_datetime([idxf[i] for i in hit_idxs]).sort_values()
    med_date = dates.iloc[len(dates)//2]
    pH = len(hit_idxs)/n_sims
    d80 = (dates.iloc[int(0.10*len(dates))], dates.iloc[int(0.90*len(dates))])
else:
    med_date, pH, d80 = None, 0.0, (None, None)

# =============================== PLOTS =======================================
outdir = Path(CFG["SAVE_DIR"])
plt.figure(figsize=(10,5))
plt.plot(OAI.index, OAI, label="OAI")
if model_used == "UCM":
    try:
        plt.plot((OAI - resid).clip(0,1).index, (OAI - resid).clip(0,1).values, "--", label="Fit")
    except Exception:
        pass
plt.axhline(TAU, linestyle=":", label=f"τ={TAU:.3f}")
for d in cp_dates: plt.axvline(d, linestyle=":", alpha=0.5)
plt.legend(); plt.title(f"OAI — Model: {model_used}")
plt.tight_layout(); plt.savefig(outdir/"oai_fit.png", dpi=160); plt.close()

plt.figure(figsize=(10,4))
plt.plot(OAI_fc.index, OAI_fc.values, label="Forecast mean")
plt.axhline(TAU, linestyle=":", label=f"τ={TAU:.3f}")
plt.legend(); plt.title("OAI Forecast Horizon")
plt.tight_layout(); plt.savefig(outdir/"oai_fc.png", dpi=160); plt.close()

prob_curve = [np.mean([(x is not None and x <= t) for x in hits]) for t in range(h)]
pd.Series(prob_curve, index=idxf).plot(figsize=(10,3), ylim=(0,1), title="Pr(sustained crossing by week t)")
plt.tight_layout(); plt.savefig(outdir/"oai_prob.png", dpi=160); plt.close()

# ============================== SUMMARY ======================================
summary = dict(
    generated_at = datetime.now(timezone.utc).isoformat(),
    horizon_weeks = h,
    threshold_tau = float(TAU),
    run_length_k  = int(CFG["RUN_LENGTH_K"]),
    last_week     = str(pd.Timestamp(OAI.index[-1]).date()),
    change_points = [str(pd.Timestamp(d).date()) for d in cp_dates],
    prob_within_horizon = round(float(pH), 3),
    median_event_date   = (str(pd.Timestamp(med_date).date()) if med_date is not None else None),
    event_window_80     = tuple(str(pd.Timestamp(d).date()) if d is not None else None for d in d80),
    weights = {k: float(v) for k, v in w.to_dict().items()},
    pca_var = float(pca.explained_variance_ratio_[0]),
    sources = {
        "trends_cols": list(trends.columns) if isinstance(trends, pd.DataFrame) and not trends.empty else [],
        "wiki_cols":   list(wiki.columns)   if isinstance(wiki,   pd.DataFrame) and not wiki.empty   else [],
        "gdelt_cols":  list(gdelt.columns)  if isinstance(gdelt,  pd.DataFrame) and not gdelt.empty  else [],
        "model_used":  model_used
    }
)
with open(outdir/"oai_summary.json", "w") as f:
    json.dump(summary, f, indent=2)

print(json.dumps(summary, indent=2))
print("\nFigures saved to:", str(outdir.resolve()))


[INFO] pytrends unavailable; skipping Google Trends.
Fetching data...
[GDELT] Too many errors in a row (6); stopping early.
[TAU] Not enough points (0) for calibrated τ; using fallback=0.65.
[FALLBACK] Using EWMA nowcast/forecast (series too short or UCM unavailable).
{
  "generated_at": "2025-10-16T05:42:07.560263+00:00",
  "horizon_weeks": 104,
  "threshold_tau": 0.65,
  "run_length_k": 6,
  "last_week": "2025-10-20",
  "change_points": [],
  "prob_within_horizon": 0.0,
  "median_event_date": null,
  "event_window_80": [
    null,
    null
  ],
  "weights": {
    "Mass_surveillance_in_the_United_States": 0.05,
    "First_Amendment_to_the_United_States_Constitution": 0.2658909185548501,
    "Censorship_in_the_United_States": 0.2974816205736362,
    "Civil_liberties_in_the_United_States": 0.9999999999979158
  },
  "pca_var": 0.7709565458878976,
  "sources": {
    "trends_cols": [],
    "wiki_cols": [
      "Mass_surveillance_in_the_United_States",
      "First_Amendment_to_the_United_S

In [7]:
# === CNT Quick Patch: rolling τ + momentum-aware forecast ====================
import numpy as _np, pandas as _pd

# 1) Rolling τ (local baseline): 3-year window 85th percentile with safe fallback
def _rolling_tau(oai, window_weeks=156, q=0.85, fallback=0.65):
    oai = _pd.Series(oai).astype(float)
    def _q(x):
        x = _np.asarray(x, float)
        x = x[_np.isfinite(x)]
        return _np.nanquantile(x, q) if x.size >= 12 else fallback
    rtau = oai.rolling(window_weeks, min_periods=12).apply(_q, raw=False)
    rtau = rtau.bfill().fillna(fallback)
    return float(rtau.iloc[-1]), rtau

TAU_rolling, TAU_series = _rolling_tau(OAI, window_weeks=156, q=0.85, fallback=CFG.get("FIXED_TAU", 0.65))
print(f"[τ] Rolling τ (last 3y, q=0.85) = {TAU_rolling:.3f}")

# 2) Momentum-aware forecast: add a gentle drift term based on recent OAI slope
#    (keeps honesty: just extrapolates recent velocity; noise stays the same)
lookback = 26  # weeks
if len(OAI) >= lookback+2:
    y_tail = _pd.Series(OAI.iloc[-lookback:])
    # simple slope per week
    x = _np.arange(len(y_tail))
    m = _np.polyfit(x, y_tail.values, 1)[0]
else:
    m = 0.0

drift_per_week = float(m)  # small if OAI is flat; can be negative
OAI_fc_mom = (_pd.Series(OAI_fc, index=OAI_fc.index) +
              _np.arange(len(OAI_fc))*drift_per_week).clip(0,1)

# 3) Re-run sustained-crossing simulation with rolling τ and momentum forecast
sig = float((OAI - OAI.ewm(span=int(CFG.get("EWMA_SPAN", 8)), adjust=False).mean()).std() or 0.05)
h = int(CFG.get("HORIZON_WEEKS", 104))
n_sims = 1500
paths = _np.clip(OAI_fc_mom.values + _np.random.normal(0, sig, (n_sims, h)), 0, 1)

def _first_sustained(sim, tau, kreq):
    run = 0
    for i, a in enumerate(sim >= tau):
        run = run + 1 if a else 0
        if run >= kreq: return i
    return None

hits = [_first_sustained(p, TAU_rolling, int(CFG.get("RUN_LENGTH_K", 6))) for p in paths]
hit_idxs = [x for x in hits if x is not None]
if hit_idxs:
    idxf = OAI_fc.index
    dates = _pd.to_datetime([idxf[i] for i in hit_idxs]).sort_values()
    med_date = dates.iloc[len(dates)//2]
    pH = len(hit_idxs)/n_sims
    d80 = (dates.iloc[int(0.10*len(dates))], dates.iloc[int(0.90*len(dates))])
else:
    med_date, pH, d80 = None, 0.0, (None, None)

print(f"[Forecast] Pr(sustained ≥ τ_rolling) within {h}w = {pH:.3f}")
print(f"[Forecast] median date = {str(med_date.date()) if med_date is not None else None}, 80% window = {tuple(str(d.date()) if d is not None else None for d in d80)}")


[τ] Rolling τ (last 3y, q=0.85) = 0.650
[Forecast] Pr(sustained ≥ τ_rolling) within 104w = 0.000
[Forecast] median date = None, 80% window = (None, None)


In [8]:
# === CNT Patch Cell: Shift-Event + Minimum-Drift Sensitivity ==================
import numpy as np, pandas as pd

assert "OAI" in globals() and "OAI_fc" in globals(), "Run the mega cell first."

# ---------- A) SHIFT-BASED EVENT (z-score & slope sustain) -------------------
# Define a "wake-up" as a *structural lift*, not just a high level:
#   condition1: rolling z-score of OAI >= z_tau
#   condition2: rolling slope (per week) >= m_min
#   both must hold for k weeks.
z_window = 26          # ~6 months
z_tau    = 1.0         # ~1σ above local mean
m_min    = 0.0015      # ~0.15 points per 100 weeks; tweak to taste
k_shift  = int(CFG.get("RUN_LENGTH_K", 6))

oai = pd.Series(OAI, index=OAI.index).astype(float)
mu  = oai.rolling(z_window, min_periods=max(8, z_window//3)).mean()
sd  = oai.rolling(z_window, min_periods=max(8, z_window//3)).std()
zsc = (oai - mu) / (sd.replace(0, np.nan) + 1e-9)

# rolling slope via simple linear fit on a sliding window
def rolling_slope(y, w):
    y = pd.Series(y).astype(float)
    if len(y) < w: return pd.Series(index=y.index, dtype=float)
    X = np.arange(w)
    out = [np.nan]*(w-1)
    for i in range(w, len(y)+1):
        yi = y.iloc[i-w:i].values
        m = np.polyfit(X, yi, 1)[0]
        out.append(m)
    return pd.Series(out, index=y.index, dtype=float)

slope = rolling_slope(oai, z_window)

cond = (zsc >= z_tau) & (slope >= m_min)
run  = (cond.groupby((~cond).cumsum()).cumcount()+1)*cond  # run-lengths
shift_triggered = bool((run >= k_shift).any())

print(f"[Shift-Event] z_tau={z_tau}, m_min={m_min:.5f}, k={k_shift}  ->  triggered={shift_triggered}")

# ---------- B) DRIFT-TO-CROSS (how much weekly lift is needed?) --------------
# Given current OAI_fc mean path, what constant drift per week (Δ) is needed
# to achieve a sustained crossing >= TAU for k weeks within H?
H = int(CFG.get("HORIZON_WEEKS", 104))
k = int(CFG.get("RUN_LENGTH_K", 6))
tau = float(globals().get("TAU", CFG.get("FIXED_TAU", 0.65)))

fc = pd.Series(OAI_fc, index=OAI_fc.index).astype(float)
x0 = float(oai.iloc[-1])         # last observed OAI
sig = float((oai - oai.ewm(span=int(CFG.get("EWMA_SPAN", 8)), adjust=False).mean()).std() or 0.05)

# Safety margin: require mean - z*sig >= τ (e.g., z=0.5 ~ 69% one-sided)
z_safety = 0.5
tau_eff  = tau + z_safety*sig

# Minimal drift if we allow the crossing at the very end (latest start t0 = H-k)
# x0 + Δ*(t0) >= tau_eff  ->  Δ >= (tau_eff - x0) / t0
def min_drift_required(x0, tau_eff, H, k):
    if H <= k: return np.inf, None
    candidates = []
    for t0 in range(0, H - k + 1):
        t_end = t0 + (k-1)
        # ensure all k weeks are above: check at start is sufficient for linear increase
        t_use = max(1, t0)
        d = (tau_eff - x0) / t_use if t_use > 0 else (tau_eff - x0)
        candidates.append((t0, d))
    # choose feasible minimal non-negative drift
    feas = [(t0, d) for (t0, d) in candidates if d >= 0]
    if not feas:
        return 0.0, 0
    t0_star, d_star = min(feas, key=lambda x: x[1])
    return float(d_star), int(t0_star)

d_star, t0_star = min_drift_required(x0, tau_eff, H, k)
weeks_until_start = t0_star if t0_star is not None else None
print(f"[Drift] Need Δ ≥ {d_star:.6f} per week to sustain ≥ τ within {H}w "
      f"(start in ~{weeks_until_start}w).  x0={x0:.3f}, τ_eff={tau_eff:.3f}, σ={sig:.3f}")

# ---------- Optional: simulate with that drift to see the borderline ---------
n_sims = 1500
t = np.arange(H)
path_mean = np.clip(fc.values + d_star*t, 0, 1)
paths = np.clip(path_mean + np.random.normal(0, sig, (n_sims, H)), 0, 1)

def first_sustained(sim, tau, kreq):
    r = 0
    for i, a in enumerate(sim >= tau):
        r = r+1 if a else 0
        if r >= kreq: return i
    return None

hits = [first_sustained(p, tau, k) for p in paths]
pH = np.mean([h is not None for h in hits])
print(f"[What-if @ Δ*] Pr(sustain in {H}w) ≈ {pH:.3f}")

# ---------- Summary line you can log -----------------------------------------
print(f"[Summary] Shift-trigger={shift_triggered} | Δ*={d_star:.6f}/wk | "
      f"τ={tau:.3f} (τ_eff={tau_eff:.3f}) | k={k} | H={H}")


[Shift-Event] z_tau=1.0, m_min=0.00150, k=6  ->  triggered=False
[Drift] Need Δ ≥ 0.000000 per week to sustain ≥ τ within 104w (start in ~0w).  x0=nan, τ_eff=nan, σ=nan
[What-if @ Δ*] Pr(sustain in 104w) ≈ 0.000
[Summary] Shift-trigger=False | Δ*=0.000000/wk | τ=0.650 (τ_eff=nan) | k=6 | H=104


In [9]:
# === CNT NaN Hardener Patch: sanitize series, robust noise, safe targets =====
import numpy as np, pandas as pd

def _safe_series(x):
    s = pd.Series(x).astype(float).replace([np.inf, -np.inf], np.nan).dropna()
    return s

def _mad(x):
    s = _safe_series(x)
    if len(s) == 0: return np.nan
    med = np.median(s)
    return 1.4826 * np.median(np.abs(s - med))  # robust σ

# 0) Ensure OAI & OAI_fc exist
assert "OAI" in globals(), "Run mega cell first (creates OAI)."
assert "OAI_fc" in globals(), "Run mega cell first (creates OAI_fc)."

# 1) Sanitize to finite values
OAI_s   = _safe_series(OAI).copy()
OAI_fcS = _safe_series(OAI_fc).copy()

# If either is empty, synthesize a minimal steady spine (so logic can proceed)
if OAI_s.empty:
    print("[SANITY] OAI empty → building a minimal synthetic spine from current df.")
    base = df.copy() if 'df' in globals() else pd.DataFrame(index=pd.date_range("2020-01-06", periods=32, freq="W-MON"))
    if base.empty:
        base = pd.DataFrame(index=pd.date_range("2020-01-06", periods=32, freq="W-MON"))
        base["synthetic"] = np.linspace(0.45, 0.55, len(base))
    else:
        # mean-normalize available columns and average
        tmp = base.apply(lambda col: (col - np.nanmean(col)) / (np.nanstd(col) + 1e-9))
        base["synthetic"] = tmp.mean(axis=1).fillna(0).clip(-3, 3)
        base["synthetic"] = 1/(1+np.exp(-base["synthetic"]))  # squash to [0,1]
    OAI_s = _safe_series(base["synthetic"])

if OAI_fcS.empty:
    print("[SANITY] OAI_fc empty → flat forecast from last OAI.")
    last = float(OAI_s.iloc[-1]) if len(OAI_s) else 0.5
    horizon = int(CFG.get("HORIZON_WEEKS", 104))
    OAI_fcS = pd.Series(np.full(horizon, last),
                        index=pd.date_range(OAI_s.index[-1] + pd.offsets.Week(1), periods=horizon, freq="W-MON"))

# 2) Recompute τ robustly if needed
def _robust_tau(oai_series, q=0.85, fallback=0.65, min_points=10):
    s = _safe_series(oai_series)
    if len(s) < min_points:
        print(f"[τ] Not enough points ({len(s)}) for calibrated τ→ fallback={fallback}")
        return float(fallback)
    try:
        t = float(np.nanquantile(s.values, q))
        return t if np.isfinite(t) else float(fallback)
    except Exception:
        return float(fallback)

if CFG.get("CALIBRATE_TAU", True):
    TAU = _robust_tau(OAI_s, q=0.85, fallback=CFG.get("FIXED_TAU", 0.65), min_points=10)
else:
    TAU = float(CFG.get("FIXED_TAU", 0.65))
print(f"[τ] Using τ = {TAU:.3f}")

# 3) Robust noise & momentum
sigma = _mad(OAI_s - OAI_s.ewm(span=int(CFG.get("EWMA_SPAN", 8)), adjust=False).mean())
if not np.isfinite(sigma) or sigma == 0:
    sigma = 0.05
lookback = min(26, len(OAI_s))
slope = 0.0
if lookback >= 8:
    x = np.arange(lookback)
    y = OAI_s.iloc[-lookback:].values
    slope = np.polyfit(x, y, 1)[0]

# 4) Momentum-aware mean path (gentle, honest)
h = int(CFG.get("HORIZON_WEEKS", 104))
t = np.arange(h)
mean_path = np.clip(OAI_fcS.values + slope * t, 0, 1)

# 5) Recompute event probability cleanly
def _first_sustained(sim, tau, kreq):
    r = 0
    for i, a in enumerate(sim >= tau):
        r = r + 1 if a else 0
        if r >= kreq:
            return i
    return None

kreq = int(CFG.get("RUN_LENGTH_K", 6))
n_sims = 2000
paths = np.clip(mean_path + np.random.normal(0, sigma, (n_sims, h)), 0, 1)
hits = [_first_sustained(p, TAU, kreq) for p in paths]
pH = np.mean([x is not None for x in hits])
med = None
w80 = (None, None)
if any(x is not None for x in hits):
    idxf = OAI_fcS.index
    dates = pd.to_datetime([idxf[i] for i in hits if i is not None]).sort_values()
    med = dates.iloc[len(dates)//2]
    w80 = (dates.iloc[int(0.10*len(dates))], dates.iloc[int(0.90*len(dates))])

print(f"[Clean Prob] σ≈{sigma:.3f}, slope≈{slope:.6f}/wk → Pr(sustain ≥ τ in {h}w) = {pH:.3f}")
print(f"[Clean Dates] median = {str(med.date()) if med is not None else None}, 80% = {tuple(str(d.date()) if d is not None else None for d in w80)}")


[SANITY] OAI empty → building a minimal synthetic spine from current df.
[SANITY] OAI_fc empty → flat forecast from last OAI.
[τ] Using τ = 0.695
[Clean Prob] σ≈0.059, slope≈0.003036/wk → Pr(sustain ≥ τ in 104w) = 0.000
[Clean Dates] median = None, 80% = (None, None)


In [10]:
# === CNT "Wake-Up Field" — Fused Ultra-Resilient Single Mega Cell ============
# Ingest (Wiki + safe GDELT + optional Trends + local CSVs), cache, build OAI,
# UCM/EWMA nowcast, momentum-aware forecast, shift-event detector, robust τ,
# sustained-threshold event simulation, plots + summary JSON.
# -----------------------------------------------------------------------------
import os, sys, json, math, time, glob, warnings, requests
from datetime import datetime, timezone
from pathlib import Path
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
warnings.filterwarnings("ignore", category=FutureWarning)

# Try UCM; fallback to EWMA if unavailable/too short
try:
    from statsmodels.tsa.statespace.structural import UnobservedComponents
    _HAS_UCM = True
except Exception:
    _HAS_UCM = False

# ============================== CONFIG =======================================
CFG = dict(
    ROOT = Path.cwd(),
    REGION = "US",
    HORIZON_WEEKS = 104,          # forecast horizon
    RUN_LENGTH_K  = 6,            # require k consecutive weeks ≥ τ
    BACKTEST_START = "2012-01-01",
    # Signals
    TOPICS = ["government surveillance","censorship","civil liberties","free speech"],
    WIKI_PAGES = [
        "Mass_surveillance_in_the_United_States",
        "First_Amendment_to_the_United_States_Constitution",
        "Censorship_in_the_United_States",
        "Civil_liberties_in_the_United_States"
    ],
    GDELT_QUERY = '(censorship OR surveillance OR "civil liberties") AND location: United States',
    # Local CSV backbone (zero-network): put date,value CSVs in CNT_Lab/data/
    LOCAL_DATA_DIR = Path("CNT_Lab")/"data",
    # Threshold policy
    CALIBRATE_TAU = True,         # if False uses FIXED_TAU
    TAU_QUANTILE  = 0.85,         # 0.80 is looser, 0.85 stricter
    FIXED_TAU     = 0.65,
    # Modeling heuristics
    MIN_LEN_UCM = 12,             # min weekly points to enable UCM
    EWMA_SPAN   = 8,              # EWMA smoothing when UCM not used
    MOMENTUM_LOOKBACK = 26,       # weeks for slope
    # Shift-event detector (optional)
    SHIFT_Z_WINDOW = 26,          # window for z-score & slope
    SHIFT_Z_TAU    = 1.0,         # ~1σ above local mean
    SHIFT_MIN_SLOPE= 0.0015,      # per-week slope gate
    # IO / caching
    SAVE_DIR  = "artifacts",
    CACHE_DIR = "artifacts/cache",
    RNG_SEED = 1337
)
np.random.seed(CFG["RNG_SEED"])
Path(CFG["SAVE_DIR"]).mkdir(parents=True, exist_ok=True)
Path(CFG["CACHE_DIR"]).mkdir(parents=True, exist_ok=True)
Path(CFG["LOCAL_DATA_DIR"]).mkdir(parents=True, exist_ok=True)

# ========================== NETWORK SESSION ==================================
SESSION = requests.Session()
SESSION.headers.update({
    "User-Agent": "CNTLab/1.3 (fieldwalker://local; contact: telos@cnt.local)"
})
requests_get = SESSION.get

# Optional: Google Trends (skips gracefully if not present)
try:
    from pytrends.request import TrendReq
    _HAS_TRENDS = True
except Exception:
    print("[INFO] pytrends unavailable; skipping Google Trends.")
    TrendReq = None; _HAS_TRENDS = False

# =============================== UTILS =======================================
def as_week_index(dts):
    return pd.to_datetime(dts).to_period('W-MON').to_timestamp()

def spectral_entropy(x, nseg=8, eps=1e-12):
    x = pd.Series(x).dropna().values
    if len(x) < 16: return np.nan
    x = (x - x.mean()) / (x.std() + 1e-9)
    seg = max(16, len(x)//nseg)
    ps = []
    for i in range(0, len(x)-seg+1, seg):
        X = np.fft.rfft(x[i:i+seg]); P = (X*np.conj(X)).real; P /= (P.sum()+eps); ps.append(P)
    Pm = np.mean(ps, axis=0); H = -(Pm*np.log(Pm+eps)).sum(); Hmax = math.log(len(Pm))
    return float(1 - H/Hmax)

def zscore(s): s = pd.Series(s); return (s - s.mean()) / (s.std() + 1e-9)

def logistic_scale(s):
    s = pd.Series(s); q1,q2,q3 = s.quantile([.1,.5,.9])
    scale = (q3-q1)/2.0 if q3>q1 else (s.std() or 1.0)
    return 1/(1+np.exp(-(s-q2)/(scale+1e-9)))

def robust_tau(oai_series, q=0.85, fallback=0.65, min_points=10):
    s = pd.Series(oai_series).astype(float).replace([np.inf,-np.inf],np.nan).dropna()
    if len(s) < min_points:
        print(f"[τ] Not enough points ({len(s)}) for calibrated τ; using fallback={fallback}.")
        return float(fallback)
    try:
        tau = float(np.nanquantile(s.values, q))
        return tau if np.isfinite(tau) else float(fallback)
    except Exception:
        return float(fallback)

def mad_sigma(x):
    s = pd.Series(x).astype(float).replace([np.inf,-np.inf],np.nan).dropna()
    if s.empty: return np.nan
    med = float(np.median(s)); return 1.4826*float(np.median(np.abs(s - med)))

# ============================== FETCHERS =====================================
def fetch_trends(topics, geo="US", since="2012-01-01"):
    if not _HAS_TRENDS: return pd.DataFrame()
    pytrends = TrendReq(hl="en-US", tz=360)
    frames = []
    for kw in topics:
        try:
            pytrends.build_payload([kw], timeframe=f"{since} {pd.Timestamp.today():%Y-%m-%d}", geo=geo)
            df = pytrends.interest_over_time()
            if df.empty: continue
            s = df[kw].rename(kw); s.index = as_week_index(s.index); frames.append(s); time.sleep(1.0)
        except Exception as e:
            print(f"[WARN] pytrends failed for {kw}: {e}")
    return pd.concat(frames, axis=1).sort_index() if frames else pd.DataFrame()

def fetch_wikiviews(pages, since="2012-01-01", project="en.wikipedia", agent="user"):
    cache_path = Path(CFG["CACHE_DIR"]) / "wiki_views.csv"
    def one_page(title):
        start = pd.Timestamp(since).strftime("%Y%m0100")
        end   = pd.Timestamp.today().strftime("%Y%m%d00")
        url = (f"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/"
               f"{project}/all-access/{agent}/{title}/daily/{start}/{end}")
        try:
            r = requests_get(url, timeout=20); r.raise_for_status()
            data = r.json().get("items", [])
            ts = {pd.to_datetime(i["timestamp"][:8]): i["views"] for i in data}
            return pd.Series(ts, name=title).sort_index().resample("W-MON").sum()
        except Exception as e:
            print(f"[WARN] wiki fail {title}: {e}"); return pd.Series(dtype=float)
    cols=[]
    for p in pages:
        s = one_page(p.replace(" ","_"))
        if s.shape[0]: cols.append(s)
        time.sleep(0.3)
    wiki = pd.concat(cols, axis=1).sort_index() if cols else pd.DataFrame()
    if cache_path.exists():
        old = pd.read_csv(cache_path, parse_dates=["date"]).set_index("date")
        wiki = old.combine_first(wiki) if not wiki.empty else old
    if not wiki.empty: wiki.to_csv(cache_path, index_label="date")
    return wiki

def fetch_gdelt_counts_safe(query, since="2018-01-01", timeout=8, max_consec_err=6, max_months=60):
    start = pd.Timestamp(since).to_period("M").to_timestamp()
    end   = pd.Timestamp.today().to_period("M").to_timestamp()
    months = pd.period_range(start, end, freq="M").to_timestamp()[-max_months:]
    rows, consec = [], 0
    for m in months:
        url = ("https://api.gdeltproject.org/api/v2/doc/doc?"
               f"query={requests.utils.quote(query)}&mode=TimelineVol&format=json"
               f"&startdatetime={m:%Y%m%d000000}&enddatetime={(m+pd.offsets.MonthEnd(0)):%Y%m%d235959}")
        try:
            js = requests_get(url, timeout=timeout).json()
            data = js.get("timelines", [{}])[0].get("data", [])
            for pt in data: rows.append((pd.to_datetime(pt["date"]), int(pt["value"])))
            consec = 0; time.sleep(0.25)
        except Exception:
            consec += 1
            if consec >= max_consec_err:
                print(f"[GDELT] Too many errors in a row ({consec}); stopping early."); break
    if not rows: return pd.DataFrame()
    s = pd.Series({d:v for d,v in rows}).sort_index().resample("W-MON").sum().rename("gdelt_volume")
    cache_path = Path(CFG["CACHE_DIR"]) / "gdelt_volume.csv"
    if cache_path.exists():
        old = pd.read_csv(cache_path, parse_dates=["date"]).set_index("date")["gdelt_volume"]
        s = old.combine_first(s)
    s.to_frame().to_csv(cache_path, index_label="date")
    return s.to_frame()

# ============================== INGEST =======================================
print("Fetching data...")
frames = []
trends = fetch_trends(CFG["TOPICS"], geo=CFG["REGION"], since=CFG["BACKTEST_START"])
if not trends.empty: frames.append(trends)

wiki   = fetch_wikiviews(CFG["WIKI_PAGES"], since=CFG["BACKTEST_START"])
if not wiki.empty: frames.append(wiki)

gdelt  = fetch_gdelt_counts_safe(CFG["GDELT_QUERY"], since=CFG["BACKTEST_START"])
if isinstance(gdelt, pd.DataFrame) and not gdelt.empty: frames.append(gdelt)

# Local CSV backbone (zero network): any date,value CSV in CNT_Lab/data
local_list = []
for p in glob.glob(str(CFG["LOCAL_DATA_DIR"]/"*.csv")):
    try:
        dfc = pd.read_csv(p)
        dcol = [c for c in dfc.columns if "date" in c.lower()][0]
        vcol = [c for c in dfc.columns if c != dcol][0]
        s = pd.Series(dfc[vcol].values, index=pd.to_datetime(dfc[dcol]), name=Path(p).stem)
        s = s.resample("W-MON").mean()
        if s.dropna().shape[0] >= 8:
            local_list.append(s)
            print(f"[LOCAL] using {Path(p).name} ({s.dropna().shape[0]} pts)")
    except Exception as e:
        print(f"[LOCAL] skip {p}: {e}")

if local_list:
    local_df = pd.concat(local_list, axis=1).sort_index().ffill().bfill()
    frames.append(local_df)

if not frames:
    # as a last resort, attempt OWID Pew trust (may require network)
    print("[FALLBACK] No sources found; attempting OWID Pew dataset.")
    pew = pd.read_csv(
        "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Trust%20in%20government%20(Pew)/Trust%20in%20government%20(Pew).csv"
    )
    pew = pew.rename(columns={"Year":"date","Trust in government (Pew)":"trust"}).dropna(subset=["date","trust"])
    pew["date"] = pd.to_datetime(pew["date"], format="%Y")
    pew = pew.set_index("date")["trust"].resample("W-MON").ffill().to_frame()
    frames = [pew]

df = pd.concat(frames, axis=1).sort_index().ffill().bfill()

# ===================== FEATURES & FORECASTABILITY ============================
feat = pd.DataFrame(index=df.index)
for c in df.columns:
    feat[c] = df[c]; feat[c+"_z"] = zscore(df[c]); feat[c+"_vol4"] = df[c].pct_change().rolling(4).std()

weights = {c: spectral_entropy(df[c].values) for c in df.columns}
w = pd.Series(weights).fillna(0.5).clip(0.15, 0.85)
w = (w - w.min()) / (w.max() - w.min() + 1e-12)
w = w.clip(0.05, 1.0)

# ========================= OVERREACH AWARENESS INDEX =========================
scaler = StandardScaler(); X = scaler.fit_transform(df.values)
Wm = np.diag(np.sqrt(w[df.columns].values))
Xw = X.dot(Wm)
pca = PCA(n_components=1, random_state=CFG["RNG_SEED"])
oai_raw = pca.fit_transform(Xw).ravel()
OAI = pd.Series(logistic_scale(zscore(oai_raw)), index=df.index, name="OAI")

# ========== NaN hardener (ensure non-empty OAI + baseline forecast) ==========
def _safe_series(x):
    return pd.Series(x).astype(float).replace([np.inf,-np.inf],np.nan).dropna()

OAI_s = _safe_series(OAI)
if OAI_s.empty:
    print("[SANITY] OAI empty → building a minimal synthetic spine.")
    base = df.copy()
    if base.empty:
        base = pd.DataFrame(index=pd.date_range("2020-01-06", periods=32, freq="W-MON"))
        base["synthetic"] = np.linspace(0.45, 0.55, len(base))
    else:
        tmp = base.apply(lambda col: (col - np.nanmean(col)) / (np.nanstd(col) + 1e-9))
        base["synthetic"] = 1/(1+np.exp(-tmp.mean(axis=1).fillna(0).clip(-3,3)))
    OAI_s = _safe_series(base["synthetic"])
    OAI = OAI_s.copy()

# ========================= THRESHOLD τ (robust) ==============================
TAU = robust_tau(OAI, q=CFG["TAU_QUANTILE"], fallback=CFG["FIXED_TAU"], min_points=10) \
      if CFG["CALIBRATE_TAU"] else float(CFG["FIXED_TAU"])

# ========================= NOWCAST / FORECAST ================================
use_ucm = _HAS_UCM and (len(OAI.dropna()) >= CFG["MIN_LEN_UCM"])
exo = feat.filter(regex="_z$|_vol4$").fillna(0)

if use_ucm:
    try:
        model = UnobservedComponents(endog=OAI, level='llevel', exog=exo)
    except Exception:
        model = UnobservedComponents(endog=OAI, trend=True, exog=exo)
    res = model.fit(disp=False)
    h = int(CFG["HORIZON_WEEKS"])
    lastX = exo.iloc[-1:].values; Xf = np.repeat(lastX, h, axis=0)
    fc = res.get_forecast(steps=h, exog=Xf)
    idxf = pd.date_range(OAI.index[-1] + pd.offsets.Week(1), periods=h, freq="W-MON")
    OAI_fc = pd.Series(fc.predicted_mean.clip(0,1), index=idxf, name="OAI_fc")
    resid  = (OAI - res.fittedvalues.reindex_like(OAI).bfill()).dropna()
    model_used = "UCM"
else:
    print("[FALLBACK] Using EWMA nowcast/forecast (series too short or UCM unavailable).")
    h = int(CFG["HORIZON_WEEKS"])
    OAI_fit  = OAI.ewm(span=int(CFG["EWMA_SPAN"]), adjust=False).mean()
    last_val = float(OAI_fit.iloc[-1])
    idxf = pd.date_range(OAI.index[-1] + pd.offsets.Week(1), periods=h, freq="W-MON")
    OAI_fc = pd.Series([last_val]*h, index=idxf, name="OAI_fc")
    resid  = (OAI - OAI_fit).dropna()
    model_used = "EWMA"

# Momentum-aware projection (gentle, honest)
lookback = min(int(CFG["MOMENTUM_LOOKBACK"]), len(OAI))
slope = 0.0
if lookback >= 12:
    x = np.arange(lookback); y = pd.Series(OAI.iloc[-lookback:]).values
    slope = np.polyfit(x, y, 1)[0]
mean_path = pd.Series(OAI_fc.values + np.arange(h)*slope, index=OAI_fc.index).clip(0,1)

# Robust noise
sigma = mad_sigma(OAI - OAI.ewm(span=int(CFG["EWMA_SPAN"]), adjust=False).mean())
if not np.isfinite(sigma) or sigma == 0: sigma = 0.05

# ========================= CHANGE-POINTS (CUSUM) =============================
k_cusum = float(resid.std() or sigma) * 0.25
thr     = float(resid.std() or sigma) * 3.0
pos = neg = 0.0; alarms = []
for t, e in (OAI - (OAI - resid.reindex_like(OAI).fillna(0))).fillna(0).items():
    pos = max(0.0, pos + e - k_cusum); neg = min(0.0, neg + e + k_cusum)
    if pos > thr or abs(neg) > thr: alarms.append(t); pos = neg = 0.0
cp_dates = [pd.Timestamp(d) for d in alarms[-5:]]

# =================== TIME-TO-EVENT (sustained τ for k weeks) =================
def first_sustained(sim, tau, kreq):
    r = 0
    for i, a in enumerate(sim >= tau):
        r = r+1 if a else 0
        if r >= kreq: return i
    return None

n_sims = 2000
paths  = np.clip(mean_path.values + np.random.normal(0, sigma, (n_sims, h)), 0, 1)
hits   = [first_sustained(p, TAU, CFG["RUN_LENGTH_K"]) for p in paths]
hit_idxs = [x for x in hits if x is not None]
if hit_idxs:
    dates = pd.to_datetime([idxf[i] for i in hit_idxs]).sort_values()
    med_date = dates.iloc[len(dates)//2]
    pH = len(hit_idxs)/n_sims
    d80 = (dates.iloc[int(0.10*len(dates))], dates.iloc[int(0.90*len(dates))])
else:
    med_date, pH, d80 = None, 0.0, (None, None)

# ===================== OPTIONAL: SHIFT-EVENT DETECTOR ========================
# z-score & slope must both exceed gates for k weeks
def rolling_slope(series, w):
    y = pd.Series(series).astype(float)
    if len(y) < w: return pd.Series(index=y.index, dtype=float)
    X = np.arange(w); out = [np.nan]*(w-1)
    for i in range(w, len(y)+1):
        yi = y.iloc[i-w:i].values; m = np.polyfit(X, yi, 1)[0]; out.append(m)
    return pd.Series(out, index=y.index, dtype=float)

ZW = int(CFG["SHIFT_Z_WINDOW"]); ZT = float(CFG["SHIFT_Z_TAU"]); MMIN = float(CFG["SHIFT_MIN_SLOPE"])
oai = pd.Series(OAI, index=OAI.index).astype(float)
mu  = oai.rolling(ZW, min_periods=max(8, ZW//3)).mean()
sd  = oai.rolling(ZW, min_periods=max(8, ZW//3)).std()
zsc = (oai - mu) / (sd.replace(0, np.nan) + 1e-9)
slo = rolling_slope(oai, ZW)
cond = (zsc >= ZT) & (slo >= MMIN)
run  = (cond.groupby((~cond).cumsum()).cumcount()+1)*cond
shift_triggered = bool((run >= CFG["RUN_LENGTH_K"]).any())

# =============================== PLOTS =======================================
outdir = Path(CFG["SAVE_DIR"])
plt.figure(figsize=(10,5))
plt.plot(OAI.index, OAI, label="OAI")
if model_used == "UCM":
    try:
        plt.plot((OAI - resid).clip(0,1).index, (OAI - resid).clip(0,1).values, "--", label="Fit")
    except Exception:
        pass
plt.axhline(TAU, linestyle=":", label=f"τ={TAU:.3f}")
for d in cp_dates: plt.axvline(d, linestyle=":", alpha=0.5)
plt.legend(); plt.title(f"OAI — Model: {model_used} | Shift-event: {shift_triggered}")
plt.tight_layout(); plt.savefig(outdir/"oai_fit.png", dpi=160); plt.close()

plt.figure(figsize=(10,4))
plt.plot(mean_path.index, mean_path.values, label="Forecast (momentum-aware)")
plt.axhline(TAU, linestyle=":", label=f"τ={TAU:.3f}")
plt.legend(); plt.title("OAI Forecast Horizon")
plt.tight_layout(); plt.savefig(outdir/"oai_fc.png", dpi=160); plt.close()

prob_curve = [np.mean([(x is not None and x <= t) for x in hits]) for t in range(h)]
pd.Series(prob_curve, index=idxf).plot(figsize=(10,3), ylim=(0,1), title="Pr(sustained crossing by week t)")
plt.tight_layout(); plt.savefig(outdir/"oai_prob.png", dpi=160); plt.close()

# ============================== SUMMARY ======================================
summary = dict(
    generated_at = datetime.now(timezone.utc).isoformat(),
    horizon_weeks = h,
    threshold_tau = float(TAU),
    run_length_k  = int(CFG["RUN_LENGTH_K"]),
    last_week     = str(pd.Timestamp(OAI.index[-1]).date()),
    change_points = [str(pd.Timestamp(d).date()) for d in cp_dates],
    prob_within_horizon = round(float(pH), 3),
    median_event_date   = (str(pd.Timestamp(med_date).date()) if med_date is not None else None),
    event_window_80     = tuple(str(pd.Timestamp(d).date()) if d is not None else None for d in d80),
    shift_event_triggered = bool(shift_triggered),
    weights = {k: float(v) for k, v in w.to_dict().items()},
    pca_var = float(pca.explained_variance_ratio_[0]),
    sources = {
        "trends_cols": list(trends.columns) if isinstance(trends, pd.DataFrame) and not trends.empty else [],
        "wiki_cols":   list(wiki.columns)   if isinstance(wiki,   pd.DataFrame) and not wiki.empty   else [],
        "gdelt_cols":  list(gdelt.columns)  if isinstance(gdelt,  pd.DataFrame) and not gdelt.empty  else [],
        "local_series": [Path(p).name for p in glob.glob(str(CFG["LOCAL_DATA_DIR"]/"*.csv"))],
        "model_used":  model_used
    }
)
with open(outdir/"oai_summary.json", "w") as f:
    json.dump(summary, f, indent=2)

print(json.dumps(summary, indent=2))
print("\nFigures saved to:", str(outdir.resolve()))


[INFO] pytrends unavailable; skipping Google Trends.
Fetching data...
[GDELT] Too many errors in a row (6); stopping early.
[SANITY] OAI empty → building a minimal synthetic spine.


  self._init_dates(dates, freq)
  trend = spsolve(I+lamb*K.T.dot(K), x, use_umfpack=use_umfpack)


{
  "generated_at": "2025-10-16T06:15:15.894105+00:00",
  "horizon_weeks": 104,
  "threshold_tau": 0.6954909408971236,
  "run_length_k": 6,
  "last_week": "2025-10-20",
  "change_points": [
    "2020-04-06",
    "2020-05-11",
    "2021-01-11",
    "2021-01-18",
    "2021-01-25"
  ],
  "prob_within_horizon": 0.0,
  "median_event_date": null,
  "event_window_80": [
    null,
    null
  ],
  "shift_event_triggered": true,
  "weights": {
    "Mass_surveillance_in_the_United_States": 0.05,
    "First_Amendment_to_the_United_States_Constitution": 0.2658909185548501,
    "Censorship_in_the_United_States": 0.2974816205736362,
    "Civil_liberties_in_the_United_States": 0.9999999999979158
  },
  "pca_var": 0.7709565458878976,
  "sources": {
    "trends_cols": [],
    "wiki_cols": [
      "Mass_surveillance_in_the_United_States",
      "First_Amendment_to_the_United_States_Constitution",
      "Censorship_in_the_United_States",
      "Civil_liberties_in_the_United_States"
    ],
    "gdelt_cols"

In [11]:
# === Gate Tuner: k=4 + τ@80th + re-sim (no refetch) =========================
import numpy as np, pandas as pd

assert "OAI" in globals() and "OAI_fc" in globals() and "mean_path" in globals(), "Run mega cell first."

# Soften but stay honest
CFG["RUN_LENGTH_K"] = 4
TAU = float(pd.Series(OAI).quantile(0.80))  # calibrated to history (80th pct)

# Robust noise (MAD→σ)
OAI_fit = pd.Series(OAI).ewm(span=int(CFG.get("EWMA_SPAN", 8)), adjust=False).mean()
sigma = float((pd.Series(OAI) - OAI_fit).mad() * 1.4826) or 0.05

# Re-sim sustained crossing with momentum-aware mean_path from the mega cell
h = int(CFG.get("HORIZON_WEEKS", 104))
idxf = mean_path.index

def _first_sustained(sim, tau, kreq):
    r = 0
    for i, a in enumerate(sim >= tau):
        r = r + 1 if a else 0
        if r >= kreq: return i
    return None

n_sims = 3000
paths = np.clip(mean_path.values + np.random.normal(0, sigma, (n_sims, h)), 0, 1)
hits = [_first_sustained(p, TAU, CFG["RUN_LENGTH_K"]) for p in paths]
pH = np.mean([h is not None for h in hits])

med = None; w80 = (None, None)
if any(x is not None for x in hits):
    dates = pd.to_datetime([idxf[i] for i in hits if i is not None]).sort_values()
    med = dates.iloc[len(dates)//2]
    w80 = (dates.iloc[int(0.10*len(dates))], dates.iloc[int(0.90*len(dates))])

print(f"[Gate Tuner] τ@80th={TAU:.3f}, k={CFG['RUN_LENGTH_K']}, σ≈{sigma:.3f}")
print(f"[Gate Tuner] Pr(sustain ≥ τ in {h}w) = {pH:.3f}")
print(f"[Gate Tuner] median={str(med.date()) if med is not None else None}, 80%={tuple(str(d.date()) if d is not None else None for d in w80)}")


AttributeError: 'Series' object has no attribute 'mad'