In [2]:
# run_ewi_and_minimal_push.py
# ------------------------------------------------------------
# A) Early-warning indicators (variance, lag-1 AC) per subject
#    computed on the observed H proxy series from your CSV.
# B) Minimal intervention search to flip the hysteretic model
#    back to the healthy branch using fitted global parameters.
#
# Inputs:
#   - /mnt/data/combined_scfas_table_scored.csv  (must have subject_id, sample_id, H_proxy_meta_smooth or H_proxy_meta)
#   - /mnt/data/fitted_global_params.csv         (from your previous fit)
#
# Outputs (in ./mw_actions_out):
#   - ewi_summary.csv
#   - ewi_<subject>.png (per-subject EWI plots)
#   - minimal_push_summary.csv
#   - minimal_push_<mode>.png (time-course plots for each successful mode)
#
# Reqs: numpy, pandas, scipy, matplotlib
# ------------------------------------------------------------

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kendalltau
from scipy.integrate import solve_ivp

# ----------------- Config -----------------
DATA_CSV = "timeseries/combined_scfas_table_scored.csv"
PARAMS_CSV = "mw_fit_out/fitted_global_params.csv"
OUTDIR = "mw_actions_out"
os.makedirs(OUTDIR, exist_ok=True)

H_COL_CANDIDATES = ["H_proxy_meta_smooth", "H_proxy_meta"]
TIME_COL = None            # if you have a real time col, put its name here; else we use within-subject index
MIN_SERIES = 8             # minimum points for EWI on a subject
ROLL_FRAC = 0.25           # rolling window as fraction of series length (>= 6)
EPS = 1e-6

# ----------------- Load data -----------------
df = pd.read_csv(DATA_CSV)
h_col = None
for c in H_COL_CANDIDATES:
    if c in df.columns:
        h_col = c
        break
if h_col is None:
    raise ValueError("No H proxy column found (looked for H_proxy_meta_smooth or H_proxy_meta).")

if TIME_COL and TIME_COL in df.columns:
    df = df.dropna(subset=["subject_id", "sample_id", TIME_COL]).copy()
else:
    df = df.dropna(subset=["subject_id", "sample_id"]).copy()
    df["t_idx"] = df.groupby("subject_id").cumcount().astype(float)
    TIME_COL = "t_idx"

# clip H into [0,1]
df["H_obs"] = df[h_col].clip(0, 1)
df = df.sort_values(["subject_id", TIME_COL])

# ----------------- A) Early-warning indicators -----------------
def rolling_ac1(x):
    x = np.asarray(x, float)
    if len(x) < 2 or np.all(~np.isfinite(x)):
        return np.nan
    x0 = x[:-1]; x1 = x[1:]
    mask = np.isfinite(x0) & np.isfinite(x1)
    if mask.sum() < 3:
        return np.nan
    return float(np.corrcoef(x0[mask], x1[mask])[0, 1])

ewi_rows = []
for sid, sub in df.groupby("subject_id"):
    t = sub[TIME_COL].values.astype(float)
    H = sub["H_obs"].values.astype(float)
    # require enough finite points
    mask_fin = np.isfinite(H)
    if mask_fin.sum() < MIN_SERIES:
        continue

    # rolling window
    w = max(6, int(np.ceil(len(H) * ROLL_FRAC)))
    # variance
    H_var = pd.Series(H).rolling(window=w, min_periods=max(4, w//2)).var().values
    # lag-1 autocorrelation (computed per window)
    H_ac1 = pd.Series(H).rolling(window=w, min_periods=max(4, w//2)).apply(rolling_ac1, raw=False).values

    # trend tests (Kendall's tau) on available points
    def tau_trend(series):
        m = np.isfinite(series) & np.isfinite(t)
        if m.sum() < 6:  # need enough for a meaningful rank test
            return np.nan, np.nan
        tau, p = kendalltau(t[m], series[m])
        return float(tau), float(p)

    tau_var, p_var = tau_trend(H_var)
    tau_ac1, p_ac1 = tau_trend(H_ac1)

    # save per-subject plot
    fig, ax = plt.subplots(3, 1, figsize=(8, 9), sharex=True)
    ax[0].plot(t, H, lw=1.8); ax[0].set_ylabel("H proxy")
    ax[0].grid(True, ls=":", alpha=0.6)
    ax[1].plot(t, H_var, lw=1.8); ax[1].set_ylabel("Var(H)")
    ax[1].set_title(f"{sid} | τ_var={tau_var:.2f} (p={p_var:.3g})")
    ax[1].grid(True, ls=":", alpha=0.6)
    ax[2].plot(t, H_ac1, lw=1.8); ax[2].set_ylabel("AC1(H)"); ax[2].set_xlabel("time")
    ax[2].set_title(f"τ_ac1={tau_ac1:.2f} (p={p_ac1:.3g})")
    ax[2].grid(True, ls=":", alpha=0.6)
    plt.tight_layout()
    plt.savefig(os.path.join(OUTDIR, f"ewi_{sid}.png"), dpi=180)
    plt.close()

    ewi_rows.append({
        "subject_id": sid, "n_points": int(mask_fin.sum()),
        "window": int(w),
        "tau_var": tau_var, "p_var": p_var,
        "tau_ac1": tau_ac1, "p_ac1": p_ac1
    })

ewi_df = pd.DataFrame(ewi_rows).sort_values(["p_var", "p_ac1"])
ewi_df.to_csv(os.path.join(OUTDIR, "ewi_summary.csv"), index=False)

# ----------------- B) Minimal intervention search -----------------
# Load fitted global parameters
g = pd.read_csv(PARAMS_CSV, index_col=0).squeeze("columns")
# Ordered parameter vector expected by rhs: [r_max,K_M,c,d,g,u,p_low,p_high,H_on,H_off,tau_q]
pars = [
    float(g.get("r_max", 0.32)),
    float(g.get("K_M", 1.0)),
    float(g.get("c", 0.10)),
    float(g.get("d", 0.12)),     # baseline d for intervention tests
    float(g.get("g", 0.5)),
    float(g.get("u", 0.6)),
    float(g.get("p_low", 0.1)),
    float(g.get("p_high", 2.5)),
    float(g.get("H_on", 0.55)),
    float(g.get("H_off", 0.70)),
    float(g.get("tau_q", 4.0)),
]

# Model with intervention
def rhs_mem(t, y, p, U=0.0, T=0.0, mode="butyrate"):
    M, H, B, q = y
    r_max, K_M, c, d, gH, u, pL, pH, H_on, H_off, tau = p
    pB = pL + (pH - pL) * np.clip(q, 0, 1)

    # base dynamics
    r_eff = r_max
    inp_B = 0.0
    pB_aug = 0.0

    if t <= T and U > 0:
        if mode == "butyrate":           # direct butyrate input (e.g., releasing formulation)
            inp_B = U
        elif mode == "prebiotic":        # transiently augments production rate
            pB_aug = U
        elif mode == "engineered":       # transient boost to growth (seeding/engineered producer)
            r_eff = r_max + U

    dM = (r_eff - c * pB) * M * (1 - M / K_M)
    dH = gH * B * (1 - H) - d * H
    dB = (pB + pB_aug) * M - u * H * B + inp_B

    if H < H_on:
        q_inf = 1.0
    elif H > H_off:
        q_inf = 0.0
    else:
        q_inf = q
    dq = (q_inf - q) / tau
    return [dM, dH, dB, dq]

def integrate(p, y0, U=0.0, T=0.0, mode="butyrate", T_end=220.0):
    ts = np.linspace(0, T_end, 900)
    sol = solve_ivp(lambda t,y: rhs_mem(t, y, p, U=U, T=T, mode=mode),
                    (0, T_end), y0, t_eval=ts, rtol=1e-6, atol=1e-8, max_step=0.5)
    return sol

# Get a "bad-branch" steady state y_bad at baseline d by relaxing from low-H
def relax_to_branch(p, H_init=0.55, q_init=1.0, T_relax=200.0):
    y0 = np.array([0.2, H_init, 0.1, q_init], float)
    sol = solve_ivp(lambda t,y: rhs_mem(t,y,p), (0, T_relax), y0,
                    t_eval=np.linspace(0, T_relax, 600),
                    rtol=1e-6, atol=1e-8, max_step=0.5)
    return sol.y[:, -1], sol

y_bad, _ = relax_to_branch(pars, H_init=min(0.6, pars[9]-0.05), q_init=1.0, T_relax=220.0)

def success(sol, H_off, eps=0.02):
    # success = ends above H_off with q ~ off
    H_end = float(np.mean(sol.y[1, -40:]))
    q_end = float(np.mean(sol.y[3, -40:]))
    return (H_end > H_off + eps) and (q_end < 0.2)

modes = ["butyrate", "prebiotic", "engineered"]
U_grid = np.linspace(0.02, 1.2, 25)     # magnitude grid
T_grid = np.linspace(2.0, 60.0, 20)     # duration grid

summary = []
for mode in modes:
    found = None
    for U in U_grid:
        for T in T_grid:
            sol = integrate(pars, y_bad, U=U, T=T, mode=mode, T_end=240.0)
            if success(sol, pars[9]):
                found = (U, T, sol)
                break
        if found:
            break
    if found:
        U_found, T_found, sol_found = found
        summary.append({"mode": mode, "U": U_found, "T": T_found, "success": True})
        # plot time course
        plt.figure(figsize=(8, 6))
        plt.plot(sol_found.t, sol_found.y[1], lw=2, label="H")
        plt.plot(sol_found.t, sol_found.y[3], lw=1.5, label="q")
        plt.axhline(pars[8], ls=":", c="gray", label="H_on")
        plt.axhline(pars[9], ls="--", c="gray", label="H_off")
        plt.axvline(T_found, ls="-.", c="k", alpha=0.6, label="end of intervention")
        plt.title(f"{mode}: minimal-ish U={U*:.2f}, T={T*:.1f} h")
        plt.xlabel("time (h)"); plt.ylabel("H, q")
        plt.legend(); plt.grid(True, ls=":", alpha=0.6)
        plt.tight_layout(); plt.savefig(os.path.join(OUTDIR, f"minimal_push_{mode}.png"), dpi=180)
        plt.close()
    else:
        summary.append({"mode": mode, "U": np.nan, "T": np.nan, "success": False})

pd.DataFrame(summary).to_csv(os.path.join(OUTDIR, "minimal_push_summary.csv"), index=False)

print("✅ Done.")
print(f"  EWI summary: {os.path.join(OUTDIR,'ewi_summary.csv')}")
print(f"  Minimal push summary: {os.path.join(OUTDIR,'minimal_push_summary.csv')}")


✅ Done.
  EWI summary: mw_actions_out/ewi_summary.csv
  Minimal push summary: mw_actions_out/minimal_push_summary.csv


In [20]:
# analyze_ewi.py
import pandas as pd
import numpy as np

EWI_CSV = "mw_actions_out_2/ewi_summary.csv"
df = pd.read_csv(EWI_CSV)

# prefer subjects with positive tau and low p for BOTH variance and AC1
def score_row(r):
    # smaller is better
    p_var = r.get("p_var", np.nan)
    p_ac1 = r.get("p_ac1", np.nan)
    tau_var = r.get("tau_var", 0.0)
    tau_ac1 = r.get("tau_ac1", 0.0)
    # encourage positive tau (increasing trends), penalize negative
    tau_pen = (0.0 if (tau_var >= 0 and tau_ac1 >= 0) else 0.25)
    return (p_var + p_ac1) / 2.0 + tau_pen

df["ewi_score"] = df.apply(score_row, axis=1)
df = df.sort_values(["ewi_score", "p_var", "p_ac1"]).reset_index(drop=True)

print("\nTop 10 subjects with strongest EWI signals (lower score = stronger):")
cols = ["subject_id","n_points","tau_var","p_var","tau_ac1","p_ac1","ewi_score"]
print(df[cols].head(10).to_string(index=False))

# Save ranked list
df.to_csv("mw_actions_out/ewi_ranked.csv", index=False)
print("\nSaved ranked EWI list -> mw_actions_out/ewi_ranked.csv")



Top 10 subjects with strongest EWI signals (lower score = stronger):
subject_id  n_points   tau_var    p_var   tau_ac1    p_ac1  ewi_score
     H4004         8  1.000000 0.002778  0.333333 0.469444   0.236111
     M2085         7 -0.800000 0.083333  0.800000 0.083333   0.333333
     H4010         7  0.527046 0.206507 -0.737865 0.076974   0.391741
     C3023         7 -0.800000 0.083333  0.600000 0.233333   0.408333
     H4014         6 -1.000000 0.083333 -0.666667 0.333333   0.458333
     P6010         6 -1.000000 0.083333 -0.666667 0.333333   0.458333
     C3002         6 -0.666667 0.333333 -0.666667 0.333333   0.583333
     H4006         6 -0.666667 0.333333  0.666667 0.333333   0.583333
     P6016         6 -0.666667 0.333333  0.666667 0.333333   0.583333
     M2008         6  0.666667 0.333333  0.000000 1.000000   0.666667

Saved ranked EWI list -> mw_actions_out/ewi_ranked.csv


In [22]:
# refine_minimal_push.py
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from scipy.integrate import solve_ivp

PARAMS = pd.read_csv("mw_fit_out/fitted_global_params.csv", index_col=0).squeeze("columns")

pars = [
    float(PARAMS.get("r_max", 0.32)),
    float(PARAMS.get("K_M", 1.0)),
    float(PARAMS.get("c", 0.10)),
    float(PARAMS.get("d", 0.12)),
    float(PARAMS.get("g", 0.5)),
    float(PARAMS.get("u", 0.6)),
    float(PARAMS.get("p_low", 0.1)),
    float(PARAMS.get("p_high", 2.5)),
    float(PARAMS.get("H_on", 0.55)),
    float(PARAMS.get("H_off", 0.70)),
    float(PARAMS.get("tau_q", 4.0)),
]

def rhs_mem(t, y, p, U=0.0, T=0.0, mode="butyrate"):
    M,H,B,q = y
    r_max,K_M,c,d,gH,u,pL,pH,H_on,H_off,tau = p
    pB = pL + (pH - pL)*np.clip(q,0,1)
    r_eff = r_max
    inp_B = 0.0
    pB_aug = 0.0
    if t <= T and U > 0:
        if mode == "butyrate": inp_B = U
        elif mode == "prebiotic": pB_aug = U
        elif mode == "engineered": r_eff = r_max + U
    dM = (r_eff - c*pB)*M*(1 - M/K_M)
    dH = gH*B*(1 - H) - d*H
    dB = (pB + pB_aug)*M - u*H*B + inp_B
    if H < H_on: q_inf=1.0
    elif H > H_off: q_inf=0.0
    else: q_inf=q
    dq = (q_inf - q)/tau
    return [dM,dH,dB,dq]

def integrate(p, y0, U=0.0, T=0.0, mode="butyrate", T_end=240):
    ts = np.linspace(0, T_end, 900)
    sol = solve_ivp(lambda t,y: rhs_mem(t,y,p,U=U,T=T,mode=mode),
                    (0, T_end), y0, t_eval=ts, rtol=1e-6, atol=1e-8, max_step=0.5)
    return sol

def relax_bad(p):
    y0 = np.array([0.2, min(0.6, p[9]-0.05), 0.1, 1.0], float)
    sol = integrate(p, y0, T_end=220)
    return sol.y[:,-1]

def success(sol, H_off, eps=0.02):
    H_end = float(np.mean(sol.y[1,-40:])); q_end=float(np.mean(sol.y[3,-40:]))
    return (H_end > H_off + eps) and (q_end < 0.2)

modes = ["butyrate","prebiotic","engineered"]
coarse = pd.read_csv("mw_actions_out/minimal_push_summary.csv")

y_bad = relax_bad(pars)

def refine_one(mode, U0, T0):
    if not np.isfinite(U0) or not np.isfinite(T0):
        return None
    best = (U0, T0)
    stepU, stepT = max(0.05, 0.2*U0), max(1.0, 0.2*T0)
    improved=True
    while improved and (stepU>0.01 or stepT>0.5):
        improved=False
        for dU,dT in [(+1,0),(-1,0),(0,+1),(0,-1),(+1,+1),(+1,-1),(-1,+1),(-1,-1)]:
            U_try = max(0.0, best[0] + dU*stepU)
            T_try = max(0.5, best[1] + dT*stepT)
            sol = integrate(pars, y_bad, U=U_try, T=T_try, mode=mode, T_end=240)
            if success(sol, pars[9]) and (U_try*T_try < best[0]*best[1] - 1e-6):
                best=(U_try,T_try); improved=True
        if not improved:
            stepU *= 0.5; stepT *= 0.5
    return best

rows=[]
for _,r in coarse.iterrows():
    mode=r["mode"]; U0=r["U"]; T0=r["T"]; ok = bool(r["success"])
    if ok:
        best=refine_one(mode,U0,T0)
        if best:
            rows.append({"mode":mode,"U_refined":best[0],"T_refined":best[1],
                         "area_UxT":best[0]*best[1]})
        else:
            rows.append({"mode":mode,"U_refined":np.nan,"T_refined":np.nan,"area_UxT":np.nan})
    else:
        rows.append({"mode":mode,"U_refined":np.nan,"T_refined":np.nan,"area_UxT":np.nan})

ref = pd.DataFrame(rows)
ref.to_csv("mw_actions_out/minimal_push_refined.csv", index=False)
print(ref)


         mode  U_refined  T_refined  area_UxT
0    butyrate        NaN        NaN       NaN
1   prebiotic        NaN        NaN       NaN
2  engineered        NaN        NaN       NaN


In [6]:
# push_vs_d_sensitivity.py
import numpy as np, pandas as pd
from scipy.integrate import solve_ivp

PARAMS = pd.read_csv("mw_fit_out/fitted_global_params.csv", index_col=0).squeeze("columns")
pars_base = [
    float(PARAMS.get("r_max", 0.32)),
    float(PARAMS.get("K_M", 1.0)),
    float(PARAMS.get("c", 0.10)),
    float(PARAMS.get("d", 0.12)),  # baseline d
    float(PARAMS.get("g", 0.5)),
    float(PARAMS.get("u", 0.6)),
    float(PARAMS.get("p_low", 0.1)),
    float(PARAMS.get("p_high", 2.5)),
    float(PARAMS.get("H_on", 0.55)),
    float(PARAMS.get("H_off", 0.70)),
    float(PARAMS.get("tau_q", 4.0)),
]

def rhs_mem(t, y, p, U=0.0, T=0.0, mode="butyrate"):
    M,H,B,q = y
    r_max,K_M,c,d,gH,u,pL,pH,H_on,H_off,tau = p
    pB = pL + (pH - pL)*np.clip(q,0,1)
    r_eff=r_max; inp_B=0.0; pB_aug=0.0
    if t<=T and U>0:
        if mode=="butyrate": inp_B=U
        elif mode=="prebiotic": pB_aug=U
        elif mode=="engineered": r_eff=r_max+U
    dM=(r_eff - c*pB)*M*(1 - M/K_M)
    dH=gH*B*(1 - H) - d*H
    dB=(pB + pB_aug)*M - u*H*B + inp_B
    if H < H_on: q_inf=1.0
    elif H > H_off: q_inf=0.0
    else: q_inf=q
    dq=(q_inf - q)/tau
    return [dM,dH,dB,dq]

def integrate(p, y0, U=0.0, T=0.0, mode="butyrate", T_end=240):
    ts = np.linspace(0, T_end, 900)
    sol = solve_ivp(lambda t,y: rhs_mem(t,y,p,U=U,T=T,mode=mode),
                    (0, T_end), y0, t_eval=ts, rtol=1e-6, atol=1e-8, max_step=0.5)
    return sol

def relax_bad(p):
    y0=np.array([0.2, min(0.6, p[9]-0.05), 0.1, 1.0], float)
    sol=integrate(p,y0,T_end=220)
    return sol.y[:,-1]

def success(sol, H_off, eps=0.02):
    H_end=float(np.mean(sol.y[1,-40:])); q_end=float(np.mean(sol.y[3,-40:]))
    return (H_end > H_off + eps) and (q_end < 0.2)

def find_minimal(p, mode, U_grid, T_grid):
    y_bad=relax_bad(p)
    for U in U_grid:
        for T in T_grid:
            sol=integrate(p,y_bad,U=U,T=T,mode=mode)
            if success(sol, p[9]):
                return U,T
    return np.nan,np.nan

d_vals = np.linspace(pars_base[3]*0.7, pars_base[3]*1.3, 9)
Ugrid=np.linspace(0.02,1.2,25); Tgrid=np.linspace(2.0,60.0,20)
modes=["butyrate","prebiotic","engineered"]

rows=[]
for d in d_vals:
    p = pars_base.copy(); p[3]=float(d)
    for mode in modes:
        U,T = find_minimal(p, mode, Ugrid, Tgrid)
        rows.append({"d":d, "mode":mode, "U_min":U, "T_min":T, "area_UxT": (U*T if np.isfinite(U) and np.isfinite(T) else np.nan)})

pd.DataFrame(rows).to_csv("mw_actions_out/minimal_push_vs_d.csv", index=False)
print("saved: mw_actions_out/minimal_push_vs_d.csv")


KeyboardInterrupt: 

In [18]:
# run_ewi_and_minimal_push_shortseries_fix.py
# Early-warning indicators adapted for short/flat series + minimal-push search

import os, numpy as np, pandas as pd, matplotlib.pyplot as plt
from scipy.stats import kendalltau
from scipy.integrate import solve_ivp

# ------------ Config ------------
DATA_CSV   = "timeseries/combined_scfas_table_scored.csv"
PARAMS_CSV = "mw_fit_out/fitted_global_params.csv"
OUTDIR     = "mw_actions_out_2"
os.makedirs(OUTDIR, exist_ok=True)

H_COLS = ["H_proxy_meta_smooth", "H_proxy_meta"]
TIME_COL = None            # or set to a real timestamp col if you have it
MIN_SERIES = 6             # allow shorter series than before
EPS = 1e-9

# ------------ Load ------------
df = pd.read_csv(DATA_CSV)
h_col = next((c for c in H_COLS if c in df.columns), None)
if not h_col:
    raise ValueError("Need H_proxy_meta_smooth or H_proxy_meta in the CSV.")
if TIME_COL and TIME_COL in df.columns:
    df = df.dropna(subset=["subject_id","sample_id",TIME_COL]).copy()
else:
    df = df.dropna(subset=["subject_id","sample_id"]).copy()
    df["t_idx"] = df.groupby("subject_id").cumcount().astype(float)
    TIME_COL = "t_idx"

df["H_obs"] = df[h_col].clip(0,1)
df = df.sort_values(["subject_id", TIME_COL])

# ------------ helper funcs ------------
def lag1_ac(x):
    """Unbiased lag-1 AC; returns 0 for constant/too-short series."""
    x = np.asarray(x, float)
    m = np.isfinite(x)
    x = x[m]
    n = len(x)
    if n < 3:
        return np.nan
    x = x - np.mean(x)
    denom = np.dot(x, x)
    if denom < EPS:
        return 0.0
    num = np.dot(x[:-1], x[1:])
    return float(num / denom)

def rolling_ac1_vec(x, w, minp):
    """Compute lag-1 AC over a rolling window."""
    out = np.full(len(x), np.nan, float)
    for i in range(len(x)):
        j0 = max(0, i - w + 1)
        seg = x[j0:i+1]
        if np.isfinite(seg).sum() >= minp:
            out[i] = lag1_ac(seg)
    return out

# ------------ A) EWIs with short-series logic ------------
rows = []
for sid, sub in df.groupby("subject_id"):
    t = sub[TIME_COL].values.astype(float)
    H = sub["H_obs"].values.astype(float)
    mfin = np.isfinite(H)
    if mfin.sum() < MIN_SERIES:
        continue

    n = len(H)
    # adaptive window for short series
    w = max(3, int(np.floor(n/3)))
    minp = max(3, int(np.ceil(w*0.6)))

    # rolling variance
    H_var = pd.Series(H).rolling(window=w, min_periods=minp).var().values
    # rolling AC1
    H_ac1 = rolling_ac1_vec(H, w, minp)

    # trend tests (need at least 3 finite rolling points)
    def tau_trend(y):
        m = np.isfinite(y)
        if m.sum() < 3:
            return np.nan, np.nan
        tau, p = kendalltau(t[m], y[m])
        return float(tau), float(p)

    tau_var, p_var = tau_trend(H_var)
    tau_ac1, p_ac1 = tau_trend(H_ac1)

    # fallbacks if too short/flat: compute global variance & AC1
    global_var = float(np.nanvar(H)) if np.isfinite(H).sum() >= 3 else np.nan
    global_ac1 = lag1_ac(H)

    # annotate plot
    fig, ax = plt.subplots(3,1, figsize=(8,9), sharex=True)
    ax[0].plot(t, H, lw=1.8); ax[0].set_ylabel("H proxy"); ax[0].grid(True, ls=":", alpha=0.6)
    ttl = f"{sid}"
    if np.allclose(np.nanvar(H), 0.0, atol=1e-6):
        ttl += " | (flat series)"
    ax[0].set_title(ttl)

    ax[1].plot(t, H_var, lw=1.8); ax[1].set_ylabel("Var(H)")
    ax[1].set_title(f"τ_var={np.nan if np.isnan(tau_var) else round(tau_var,2)}  p={np.nan if np.isnan(p_var) else f'{p_var:.3g}'}   [win={w}]")
    ax[1].grid(True, ls=":", alpha=0.6)

    ax[2].plot(t, H_ac1, lw=1.8); ax[2].set_ylabel("AC1(H)"); ax[2].set_xlabel("time")
    ax[2].set_title(f"τ_ac1={np.nan if np.isnan(tau_ac1) else round(tau_ac1,2)}  p={np.nan if np.isnan(p_ac1) else f'{p_ac1:.3g}'}   (global AC1={None if np.isnan(global_ac1) else round(global_ac1,2)})")
    ax[2].grid(True, ls=":", alpha=0.6)

    plt.tight_layout(); plt.savefig(os.path.join(OUTDIR, f"ewi_{sid}.png"), dpi=180); plt.close()

    rows.append({
        "subject_id": sid, "n_points": int(mfin.sum()),
        "window": int(w),
        "tau_var": tau_var, "p_var": p_var,
        "tau_ac1": tau_ac1, "p_ac1": p_ac1,
        "global_var": global_var, "global_ac1": global_ac1,
        "flat_series": bool(np.allclose(np.nanvar(H), 0.0, atol=1e-6)),
    })

ewi_df = pd.DataFrame(rows)
ewi_df.to_csv(os.path.join(OUTDIR, "ewi_summary.csv"), index=False)

# ------------ B) Minimal-push search (same as before) ------------
g = pd.read_csv(PARAMS_CSV, index_col=0).squeeze("columns")
pars = [
    float(g.get("r_max", 0.32)),
    float(g.get("K_M", 1.0)),
    float(g.get("c", 0.10)),
    float(g.get("d", 0.14)),
    float(g.get("g", 0.5)),
    float(g.get("u", 0.6)),
    float(g.get("p_low", 0.1)),
    float(g.get("p_high", 2.5)),
    float(g.get("H_on", 0.55)),
    float(g.get("H_off", 0.70)),
    float(g.get("tau_q", 5.0)),
]

def rhs_mem(t, y, p, U=0.0, T=0.0, mode="butyrate"):
    M, H, B, q = y
    r_max,K_M,c,d,gH,u,pL,pH,H_on,H_off,tau = p
    pB = pL + (pH - pL)*np.clip(q,0,1)
    r_eff=r_max; inp_B=0.0; pB_aug=0.0
    if t<=T and U>0:
        if mode=="butyrate": inp_B=U
        elif mode=="prebiotic": pB_aug=U
        elif mode=="engineered": r_eff=r_max+U
    dM=(r_eff - c*pB)*M*(1 - M/K_M)
    dH=gH*B*(1 - H) - d*H
    dB=(pB + pB_aug)*M - u*H*B + inp_B
    if H < H_on: q_inf=1.0
    elif H > H_off: q_inf=0.0
    else: q_inf=q
    dq=(q_inf - q)/tau
    return [dM,dH,dB,dq]

def integrate(p, y0, U=0.0, T=0.0, mode="butyrate", T_end=240):
    ts=np.linspace(0,T_end,900)
    return solve_ivp(lambda t,y: rhs_mem(t,y,p,U=U,T=T,mode=mode),
                     (0,T_end), y0, t_eval=ts, rtol=1e-6, atol=1e-8, max_step=0.5)

def relax_bad(p):
    y0=np.array([0.2, min(0.6, p[9]-0.05), 0.1, 1.0], float)
    sol=integrate(p, y0, T_end=220)
    return sol.y[:,-1]

def success(sol, H_off, eps=0.00):
    H_end=float(np.mean(sol.y[1,-40:])); q_end=float(np.mean(sol.y[3,-40:]))
    return (H_end > H_off + eps) and (q_end < 0.35)

y_bad=relax_bad(pars)

modes=["butyrate","prebiotic","engineered"]
U_grid=np.linspace(0.02,2,40)
T_grid=np.linspace(2.0,120.0,24)
out=[]
for mode in modes:
    found=None
    for U in U_grid:
        for T in T_grid:
            sol=integrate(pars,y_bad,U=U,T=T,mode=mode, T_end=240)
            if success(sol, pars[9]):
                found=(U,T,sol); break
        if found: break
    if found:
        U_found,T_found,sol_found = found
        out.append({"mode":mode,"U":U_found,"T":T_found,"success":True})
        plt.figure(figsize=(8,6))
        plt.plot(sol_found.t, sol_found.y[1], lw=2, label="H")
        plt.plot(sol_found.t, sol_found.y[3], lw=1.5, label="q")
        plt.axhline(pars[8], ls=":", c="gray", label="H_on")
        plt.axhline(pars[9], ls="--", c="gray", label="H_off")
        plt.axvline(T_found, ls="-.", c="k", alpha=0.6, label="end of intervention")
        plt.title(f"{mode}: minimal-ish U={U_found:.2f}, T={T_found:.1f} h")
        plt.xlabel("time (h)"); plt.ylabel("H, q")
        plt.legend(); plt.grid(True, ls=":", alpha=0.6)
        plt.tight_layout(); plt.savefig(os.path.join(OUTDIR, f"minimal_push_{mode}.png"), dpi=180)
        plt.close()
    else:
        out.append({"mode":mode,"U":np.nan,"T":np.nan,"success":False})
pd.DataFrame(out).to_csv(os.path.join(OUTDIR,"minimal_push_summary.csv"), index=False)
print("✅ EWI + minimal-push done. See:", OUTDIR)


✅ EWI + minimal-push done. See: mw_actions_out_2


In [19]:
# summarize_results.py
import pandas as pd, numpy as np, matplotlib.pyplot as plt, os

EWI = "mw_actions_out_2/ewi_summary.csv"
MP  = "mw_actions_out_2/minimal_push_summary.csv"
OUT = "mw_actions_out_2"
os.makedirs(OUT, exist_ok=True)

# ---------- Load ----------
ewi = pd.read_csv(EWI)
mp  = pd.read_csv(MP)

# Clean up EWI NaNs -> informative flags
ewi["has_trend_var"] = ewi["tau_var"].notna() & ewi["p_var"].notna()
ewi["has_trend_ac1"] = ewi["tau_ac1"].notna() & ewi["p_ac1"].notna()
ewi["flat_or_short"] = (~ewi["has_trend_var"]) & (~ewi["has_trend_ac1"])

# Score: prefer pos. tau & low p; fall back to global metrics if no trends
def ewi_score(r):
    # smaller is stronger “EWI”
    score = 0.0
    if r.get("tau_var") is not np.nan and r.get("p_var") is not np.nan:
        score += max(0, 0.5 - 0.5*float(r["tau_var"])) + float(r["p_var"])
    else:
        # fall back: lower variance → weaker EWI; high AC1 → stronger EWI
        gv = r.get("global_var", np.nan)
        ga = r.get("global_ac1", np.nan)
        score += (0.5 if np.isnan(gv) else 0.5 + 2.0/(1.0 + 10.0*gv))
        score += (0.5 if np.isnan(ga) else 0.5 - 0.5*ga)  # higher AC1 => smaller score
    if r.get("tau_ac1") is not np.nan and r.get("p_ac1") is not np.nan:
        score += max(0, 0.5 - 0.5*float(r["tau_ac1"])) + float(r["p_ac1"])
    return float(score)

ewi["ewi_score"] = ewi.apply(ewi_score, axis=1)
ewi_sorted = ewi.sort_values("ewi_score").reset_index(drop=True)
ewi_sorted.to_csv(os.path.join(OUT, "ewi_ranked.csv"), index=False)

print("\n=== EWI overview ===")
print(f"subjects in EWI: {len(ewi)}")
print(f"with rolling trend (variance): {int(ewi['has_trend_var'].sum())}")
print(f"with rolling trend (AC1):      {int(ewi['has_trend_ac1'].sum())}")
print(f"flat/short (fallback only):    {int(ewi['flat_or_short'].sum())}")
print("\nTop 10 by EWI score:")
print(ewi_sorted[["subject_id","n_points","window","tau_var","p_var","tau_ac1","p_ac1","global_var","global_ac1","ewi_score"]].head(10).to_string(index=False))

# ---------- Minimal-push ----------
print("\n=== Minimal-push summary ===")
print(mp.to_string(index=False))

# quick bar of successful modes (if any)
if "success" in mp.columns:
    fig, ax = plt.subplots(figsize=(5,3))
    ax.bar(mp["mode"], mp["success"].astype(int))
    ax.set_ylabel("success (0/1)")
    ax.set_title("Which modes flipped?")
    ax.grid(True, axis="y", ls=":", alpha=0.6)
    plt.tight_layout(); plt.savefig(os.path.join(OUT,"minimal_push_success_bar.png"), dpi=160); plt.close()

# heatmap-like table (area proxy) if available
if {"U","T","success"}.issubset(mp.columns):
    mp2 = mp.copy()
    mp2["area_UxT"] = np.where(mp2["success"], mp2["U"]*mp2["T"], np.nan)
    print("\nEffort proxy (U×T) among successes:")
    print(mp2[["mode","U","T","area_UxT","success"]].to_string(index=False))
    mp2.to_csv(os.path.join(OUT,"minimal_push_summary_with_area.csv"), index=False)



=== EWI overview ===
subjects in EWI: 28
with rolling trend (variance): 27
with rolling trend (AC1):      27
flat/short (fallback only):    1

Top 10 by EWI score:
subject_id  n_points  window   tau_var    p_var   tau_ac1    p_ac1  global_var  global_ac1  ewi_score
     H4004         8       3  1.000000 0.002778  0.333333 0.469444    0.003747    0.204081   0.805556
     M2085         7       3 -0.800000 0.083333  0.800000 0.083333    0.000938   -0.475822   1.166667
     H4010         7       3  0.527046 0.206507 -0.737865 0.076974    0.000576    0.603034   1.388891
     C3023         7       3 -0.800000 0.083333  0.600000 0.233333    0.001353    0.209515   1.416667
     H4006         6       3 -0.666667 0.333333  0.666667 0.333333    0.036828    0.142807   1.666667
     P6016         6       3 -0.666667 0.333333  0.666667 0.333333    0.000532    0.042997   1.666667
     M2008         6       3  0.666667 0.333333  0.000000 1.000000    0.005067    0.579897   2.000000
     C3009         

In [24]:
# bifurcation_and_basins.py
# Checks: (A) equilibrium branches vs d, (B) stability via Jacobian eigenvalues,
# (C) whether baseline d lies in a bistable interval, and (D) basin map at baseline.

import numpy as np, pandas as pd, matplotlib.pyplot as plt
from scipy.integrate import solve_ivp
from scipy.optimize import root

PARAMS_CSV = "mw_fit_out/fitted_global_params.csv"
OUTDIR = "mw_bifurcation"; import os; os.makedirs(OUTDIR, exist_ok=True)

g = pd.read_csv(PARAMS_CSV, index_col=0).squeeze("columns")
# parameter vector: [r_max,K_M,c,d,g,u,p_low,p_high,H_on,H_off,tau_q]
pars = np.array([
    float(g.get("r_max", 0.32)),
    float(g.get("K_M", 1.0)),
    float(g.get("c", 0.10)),
    float(g.get("d", 0.12)),   # baseline d
    float(g.get("g", 0.5)),
    float(g.get("u", 0.6)),
    float(g.get("p_low", 0.1)),
    float(g.get("p_high", 2.5)),
    float(g.get("H_on", 0.55)),
    float(g.get("H_off", 0.70)),
    float(g.get("tau_q", 4.0)),
], float)

def rhs(y, p, d_override=None):
    M,H,B,q = y
    r_max,K_M,c,d,gH,u,pL,pH,H_on,H_off,tau = p.copy()
    if d_override is not None: d = d_override
    pB = pL + (pH - pL)*np.clip(q,0,1)
    dM = (r_max - c*pB)*M*(1 - M/K_M)
    dH = gH*B*(1 - H) - d*H
    dB = pB*M - u*H*B
    # memory target
    if H < H_on: q_inf=1.0
    elif H > H_off: q_inf=0.0
    else: q_inf=q
    dq = (q_inf - q)/tau
    return np.array([dM,dH,dB,dq], float)

def jacobian(y, p, d_override=None, eps=1e-7):
    # finite-diff Jacobian
    f0 = rhs(y, p, d_override)
    J = np.zeros((4,4))
    for i in range(4):
        y2 = y.copy(); y2[i] += eps
        J[:,i] = (rhs(y2, p, d_override) - f0)/eps
    return J

def find_eq(p, d_val, guess):
    fun = lambda y: rhs(y, p, d_override=d_val)
    sol = root(fun, guess, method="hybr")
    if sol.success:
        y = sol.x
        # must be physical
        if np.all(np.isfinite(y)) and y[0]>=-1e-6 and y[1]>=-1e-6 and y[2]>=-1e-6 and 0-1e-3<=y[3]<=1+1e-3:
            return np.clip(y, [0,0,0,0], [np.inf, 1.2, np.inf, 1.2]), True
    return guess, False

# Continuation in d
d_vals = np.linspace(pars[3]*0.6, pars[3]*1.5, 70)  # explore around baseline
branches = {"d":[], "H":[], "q":[], "stable":[], "which":[]}

# Try multiple seeds to capture multiple branches
seeds = [
    np.array([0.2, 0.2, 0.05, 1.0]),   # low-H memory on
    np.array([0.2, 0.9, 0.10, 0.0]),   # high-H memory off
    np.array([0.8, 0.5, 0.2, 0.5]),    # mid
]

for d in d_vals:
    for wi, y0 in enumerate(seeds):
        y_guess = y0.copy()
        y_eq, ok = find_eq(pars, d, y_guess)
        if ok:
            J = jacobian(y_eq, pars, d_override=d)
            eigs = np.linalg.eigvals(J)
            stable = bool(np.max(np.real(eigs)) < 0)
            branches["d"].append(d)
            branches["H"].append(float(y_eq[1]))
            branches["q"].append(float(y_eq[3]))
            branches["stable"].append(stable)
            branches["which"].append(wi)

df = pd.DataFrame(branches)
df.to_csv(f"{OUTDIR}/branches.csv", index=False)

# Plot branches
plt.figure(figsize=(7,5))
for wi in sorted(df["which"].unique()):
    sub = df[df["which"]==wi]
    plt.plot(sub["d"], sub["H"], ".", ms=3, alpha=0.8, label=f"seed{wi}")
# mark stability
for st, m in [(True, "o"), (False, "x")]:
    sub = df[df["stable"]==st]
    plt.scatter(sub["d"], sub["H"], marker=m, s=18, label=f"{'stable' if st else 'unstable'}", alpha=0.6)
plt.axvline(pars[3], ls="--", c="gray", label="baseline d")
plt.xlabel("d (1/h)"); plt.ylabel("H* at equilibrium")
plt.legend(); plt.grid(True, ls=":", alpha=0.6)
plt.tight_layout(); plt.savefig(f"{OUTDIR}/bifurcation_H_vs_d.png", dpi=180); plt.close()

# Determine if baseline d is inside a bistable region
near = df[np.isclose(df["d"], pars[3], rtol=0.0, atol=1e-3)]
coexisting = near["stable"].sum() >= 2  # at least two stable eq from different seeds
with open(f"{OUTDIR}/diagnosis.txt","w") as f:
    f.write(f"Baseline d = {pars[3]:.4f}\n")
    f.write(f"Stable equilibria at baseline (count across seeds): {int(near['stable'].sum())}\n")
    f.write(f"Bistable at baseline? {'YES' if coexisting else 'NO'}\n")

print("Saved:", OUTDIR, "| Bistable at baseline? ", "YES" if coexisting else "NO")

# Basin map at baseline (scan initial H and q)
Hs = np.linspace(0.2, 0.95, 16)
qs = np.linspace(0.0, 1.0, 16)
Z = np.zeros((len(Hs), len(qs)))
def simulate(y0, T=300):
    sol = solve_ivp(lambda t,y: rhs(y, pars), (0,T), y0, t_eval=np.linspace(0,T,800),
                    rtol=1e-6, atol=1e-8, max_step=0.5)
    return sol.y[:,-1]
for i,H0 in enumerate(Hs):
    for j,q0 in enumerate(qs):
        y0 = np.array([0.2, H0, 0.1, q0])
        yss = simulate(y0)
        Z[i,j] = yss[1]  # final H
plt.figure(figsize=(6,5))
plt.imshow(Z, origin="lower", extent=[qs[0], qs[-1], Hs[0], Hs[-1]], aspect="auto")
plt.colorbar(label="Final H (steady)")
plt.xlabel("initial q"); plt.ylabel("initial H")
plt.title(f"Basins at baseline d={pars[3]:.3f}")
plt.tight_layout(); plt.savefig(f"{OUTDIR}/basins_heatmap.png", dpi=180); plt.close()


Saved: mw_bifurcation | Bistable at baseline?  NO
