## HeatMap for Cs and T changing

In [5]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import sys, re

# =========================
# CONFIG — tweak here
# =========================
CONFIG = {
    # Root directory containing run_* folders
    "root_runs_path": "/Volumes/01785304894/Simulations/Urease/Enyzme_without_deactivation_UreaseFixed/batch_runs",

    # Where to save the aggregated CSVs and heatmaps
    "output_path": "/Volumes/01785304894/Simulations/Urease/Enyzme_without_deactivation_UreaseFixed/analysis_outputs_pH_150s_C_36000s",

    # Time points (seconds) at which to sample pH/product
    # e.g. 600 (10 min), 10000, 500, etc.
    "time_points_s": [150, 36000],

    # Which product column to map; default = total ammonia (NH3 + NH4+)
    # You can also choose "NH3 [M]" if you specifically want free ammonia.
    "product_column": "total ammonia [M]",

    # Behavior if the requested time is outside the simulation time range:
    # "clip" → use first/last value; "nan" → record NaN
    "out_of_range": "clip",

    # Matplotlib colormap name
    "cmap": "viridis",

    # DPI for saved figures
    "dpi": 180,

    "bar_width": 40,   # width of ASCII bar
}

# =========================
# utilities
# =========================
def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def print_bar(done, total, prefix="processing runs " , width=40):
    frac = 0.0 if total == 0 else min(max(done / total, 0.0), 1.0)
    filled = int(round(width * frac))
    bar = "█" * filled + "░" * (width - filled)
    msg = f"\r{prefix}[{bar}] {100*frac:6.1f}% | {done}/{total}"
    sys.stdout.write(msg); sys.stdout.flush()
    if done == total:
        sys.stdout.write("\n")

def read_manifest(path: Path) -> dict:
    d = {}
    txt = path.read_text(encoding="utf-8", errors="ignore")
    for line in txt.splitlines():
        if ":" in line:
            k, v = line.split(":", 1)
            d[k.strip()] = v.strip()
    # coerce common fields
    if "initial_pH" in d and d["initial_pH"] != "None":
        try: d["initial_pH"] = float(d["initial_pH"])
        except: d["initial_pH"] = None
    else:
        d["initial_pH"] = None
    for k in ("urea_S0_mM", "temperature_C", "grams_urease_powder"):
        if k in d:
            d[k] = float(d[k])
    return d

def find_runs(root: Path):
    return sorted(p.parent for p in root.glob("**/simulation_results.csv"))

def interp_at_time(df: pd.DataFrame, col: str, t_query: float, out_of_range="clip"):
    t = df["time [s]"].values
    y = df[col].values
    if t_query <= t[0]:
        return y[0] if out_of_range == "clip" else np.nan
    if t_query >= t[-1]:
        return y[-1] if out_of_range == "clip" else np.nan
    return float(np.interp(t_query, t, y))

def plot_heatmap(pivot: pd.DataFrame, title: str, cmap: str, out_path: Path, dpi=180):
    # sort axes
    pivot = pivot.sort_index()
    pivot = pivot.reindex(sorted(pivot.columns), axis=1)

    X = pivot.columns.values
    Y = pivot.index.values
    Z = pivot.values

    plt.figure(figsize=(10, 4.8))
    im = plt.imshow(Z, aspect="auto", origin="lower",
                    extent=[X.min(), X.max(), Y.min(), Y.max()],
                    cmap=cmap)
    cbar = plt.colorbar(im)
    cbar.ax.set_ylabel("value", rotation=90)
    plt.xlabel("Substrate concentration S₀ (mM)")
    plt.ylabel("Temperature (°C)")
    plt.title(title)
    plt.tight_layout()
    plt.savefig(out_path, dpi=dpi, bbox_inches="tight")
    plt.close()

def save_long_and_wide(df_long: pd.DataFrame, out_dir: Path, stem: str):
    ensure_dir(out_dir)
    long_path = out_dir / f"{stem}_long.csv"
    df_long.to_csv(long_path, index=False)
    pivot = df_long.pivot_table(index="T_C", columns="S0_mM", values="value", aggfunc="mean")
    wide_path = out_dir / f"{stem}_wide.csv"
    pivot.to_csv(wide_path)
    return long_path, wide_path, pivot

# =========================
# main
# =========================
def main():
    cfg = CONFIG
    root = Path(cfg["root_runs_path"])
    out_root = Path(cfg["output_path"])
    ensure_dir(out_root)

    runs = find_runs(root)
    n = len(runs)
    if n == 0:
        print(f"No runs found under: {root}")
        return

    print(f"Found {n} runs.")
    TPs = list(cfg["time_points_s"])
    mP = len(TPs)
    out_of_range = cfg["out_of_range"]
    barw = cfg["bar_width"]

    # Pre-allocate arrays (processed once per run)
    S0 = np.empty(n, dtype=float)
    TC = np.empty(n, dtype=float)
    run_paths = np.empty(n, dtype=object)
    pH_vals = np.full((n, mP), np.nan, dtype=float)
    prod_vals = np.full((n, mP), np.nan, dtype=float)

    prod_col = cfg["product_column"]

    # === single pass over runs, with progress bar ===
    done = 0
    print_bar(done, n, width=barw)

    for i, rd in enumerate(runs):
        man_path = rd / "MANIFEST.txt"
        sim_path = rd / "simulation_results.csv"
        # robust guard
        if not man_path.exists() or not sim_path.exists():
            done += 1; print_bar(done, n, width=barw); continue

        man = read_manifest(man_path)
        S0[i] = float(man.get("urea_S0_mM"))
        TC[i] = float(man.get("temperature_C"))
        run_paths[i] = str(rd)

        df = pd.read_csv(sim_path)

        # choose product column (fallback if needed)
        prod_col_use = prod_col
        if prod_col_use not in df.columns:
            if ("NH3 [M]" in df.columns) and ("NH4+ [M]" in df.columns):
                df["__total_ammonia_fallback__"] = df["NH3 [M]"] + df["NH4+ [M]"]
                prod_col_use = "__total_ammonia_fallback__"
            else:
                prod_col_use = None  # no product available in this run

        # interpolate at all requested timepoints
        for j, t in enumerate(TPs):
            pH_vals[i, j] = interp_at_time(df, "pH", t, out_of_range=out_of_range)
            if prod_col_use is not None:
                prod_vals[i, j] = interp_at_time(df, prod_col_use, t, out_of_range=out_of_range)
            else:
                prod_vals[i, j] = np.nan

        done += 1
        print_bar(done, n, width=barw)

    # === build and save outputs per timepoint ===
    cmap = cfg["cmap"]; dpi = cfg["dpi"]

    for j, t in enumerate(TPs):
        # pH
        df_pH = pd.DataFrame({
            "S0_mM": S0, "T_C": TC, "value": pH_vals[:, j], "run_path": run_paths
        }).sort_values(["T_C", "S0_mM"]).reset_index(drop=True)
        stem_pH = f"heatmap_pH_t{int(t)}s"
        lp, wp, piv = save_long_and_wide(df_pH, out_root, stem_pH)
        plot_heatmap(piv, title=f"pH at t = {t} s", cmap=cmap,
                     out_path=out_root / f"{stem_pH}.png", dpi=dpi)
        print(f"[pH @ {t}s] saved → {Path(lp).name}, {Path(wp).name}, {stem_pH}.png")

        # product
        safe_name = re.sub(r'[^A-Za-z0-9]+', '_', prod_col).strip('_')
        df_prod = pd.DataFrame({
            "S0_mM": S0, "T_C": TC, "value": prod_vals[:, j], "run_path": run_paths
        }).sort_values(["T_C", "S0_mM"]).reset_index(drop=True)
        if df_prod["value"].notna().any():
            stem_prod = f"heatmap_{safe_name}_t{int(t)}s"
            lp2, wp2, piv2 = save_long_and_wide(df_prod, out_root, stem_prod)
            plot_heatmap(piv2, title=f"{prod_col} at t = {t} s", cmap=cmap,
                         out_path=out_root / f"{stem_prod}.png", dpi=dpi)
            print(f"[{prod_col} @ {t}s] saved → {Path(lp2).name}, {Path(wp2).name}, {stem_prod}.png")
        else:
            print(f"[WARN] Product column '{prod_col}' missing in all runs for t={t}s; skipped plotting.")

if __name__ == "__main__":
    main()


Found 16000 runs.
processing runs [████████████████████████████████████████]  100.0% | 16000/16000
[pH @ 150s] saved → heatmap_pH_t150s_long.csv, heatmap_pH_t150s_wide.csv, heatmap_pH_t150s.png
[total ammonia [M] @ 150s] saved → heatmap_total_ammonia_M_t150s_long.csv, heatmap_total_ammonia_M_t150s_wide.csv, heatmap_total_ammonia_M_t150s.png
[pH @ 36000s] saved → heatmap_pH_t36000s_long.csv, heatmap_pH_t36000s_wide.csv, heatmap_pH_t36000s.png
[total ammonia [M] @ 36000s] saved → heatmap_total_ammonia_M_t36000s_long.csv, heatmap_total_ammonia_M_t36000s_wide.csv, heatmap_total_ammonia_M_t36000s.png


## HeatMap for Cs and C_Urea changing

In [7]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import sys, re, math

# =========================
# CONFIG — tweak here
# =========================
CONFIG = {
    # Root directory containing run_* folders
    "root_runs_path": "/Volumes/01785304894/Simulations/Urease/Enyzme_without_deactivation_UreaseNOTfixed/batch_runs",

    # Where to save the aggregated CSVs and heatmaps
    "output_path": "/Volumes/01785304894/Simulations/Urease/Enyzme_without_deactivation_UreaseNOTfixed/analysis_outputs_pH_150s_C_36000s_axes_S0_vs_grams",

    # Time points (seconds) at which to sample pH/product
    "time_points_s": [150, 36000],

    # Which product column to map; examples: "total ammonia [M]" or "NH3 [M]"
    "product_column": "total ammonia [M]",

    # If your runs include multiple temperatures but you want a single slice, set this.
    # If None, we average across any temperatures present for identical (S0, grams) pairs.
    "filter_temperature_C": None,   # e.g., 40.0 or None

    # Behavior if the requested time is outside the simulation time range:
    # "clip" → use first/last value; "nan" → record NaN
    "out_of_range": "clip",

    # Matplotlib colormap name
    "cmap": "viridis",

    # DPI for saved figures
    "dpi": 180,

    # width of ASCII bar
    "bar_width": 40,
}

# =========================
# utilities
# =========================
def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def print_bar(done, total, prefix="processing runs ", width=40):
    frac = 0.0 if total == 0 else min(max(done / total, 0.0), 1.0)
    filled = int(round(width * frac))
    bar = "█" * filled + "░" * (width - filled)
    msg = f"\r{prefix}[{bar}] {100*frac:6.1f}% | {done}/{total}"
    sys.stdout.write(msg); sys.stdout.flush()
    if done == total:
        sys.stdout.write("\n")

def read_manifest(path: Path) -> dict:
    d = {}
    txt = path.read_text(encoding="utf-8", errors="ignore")
    for line in txt.splitlines():
        if ":" in line:
            k, v = line.split(":", 1)
            d[k.strip()] = v.strip()
    # coerce common fields
    if "initial_pH" in d and d["initial_pH"] != "None":
        try: d["initial_pH"] = float(d["initial_pH"])
        except: d["initial_pH"] = None
    else:
        d["initial_pH"] = None
    for k in ("urea_S0_mM", "temperature_C", "grams_urease_powder"):
        if k in d:
            d[k] = float(d[k])
    return d

def find_runs(root: Path):
    return sorted(p.parent for p in root.glob("**/simulation_results.csv"))

def interp_at_time(df: pd.DataFrame, col: str, t_query: float, out_of_range="clip"):
    t = df["time [s]"].values
    y = df[col].values
    if t_query <= t[0]:
        return y[0] if out_of_range == "clip" else np.nan
    if t_query >= t[-1]:
        return y[-1] if out_of_range == "clip" else np.nan
    return float(np.interp(t_query, t, y))

def plot_heatmap(pivot: pd.DataFrame, title: str, cmap: str, out_path: Path, dpi=180):
    # y-axis: grams_urease_powder (index), x-axis: S0_mM (columns)
    pivot = pivot.sort_index()
    pivot = pivot.reindex(sorted(pivot.columns), axis=1)

    X = pivot.columns.values   # S0_mM
    Y = pivot.index.values     # grams_urease_powder
    Z = pivot.values

    plt.figure(figsize=(10, 5))
    im = plt.imshow(
        Z, aspect="auto", origin="lower",
        extent=[X.min(), X.max(), Y.min(), Y.max()],
        cmap=cmap
    )
    cbar = plt.colorbar(im)
    cbar.ax.set_ylabel("value", rotation=90)
    plt.xlabel("Substrate concentration S₀ (mM)")
    plt.ylabel("Urease mass (g)")
    plt.title(title)
    plt.tight_layout()
    plt.savefig(out_path, dpi=dpi, bbox_inches="tight")
    plt.close()

def save_long_and_wide(df_long: pd.DataFrame, out_dir: Path, stem: str):
    """
    Save long CSV (columns: S0_mM, grams_urease_powder, value, [T_C if present], run_path)
    and wide pivot with rows=grams_urease_powder, cols=S0_mM.
    """
    ensure_dir(out_dir)
    long_path = out_dir / f"{stem}_long.csv"
    df_long.to_csv(long_path, index=False)
    pivot = df_long.pivot_table(index="grams_urease_powder",
                                columns="S0_mM",
                                values="value",
                                aggfunc="mean")  # mean if multiple temps per (S0,grams)
    wide_path = out_dir / f"{stem}_wide.csv"
    pivot.to_csv(wide_path)
    return long_path, wide_path, pivot

# =========================
# main
# =========================
def main():
    cfg = CONFIG
    root = Path(cfg["root_runs_path"])
    out_root = Path(cfg["output_path"])
    ensure_dir(out_root)

    runs = find_runs(root)
    n = len(runs)
    if n == 0:
        print(f"No runs found under: {root}")
        return

    print(f"Found {n} runs.")
    TPs = list(cfg["time_points_s"])
    mP = len(TPs)
    out_of_range = cfg["out_of_range"]
    barw = cfg["bar_width"]
    filt_T = cfg.get("filter_temperature_C", None)

    # Pre-allocate arrays
    S0 = np.empty(n, dtype=float)
    GR = np.empty(n, dtype=float)  # grams_urease_powder
    TC = np.empty(n, dtype=float)
    run_paths = np.empty(n, dtype=object)
    pH_vals = np.full((n, mP), np.nan, dtype=float)
    prod_vals = np.full((n, mP), np.nan, dtype=float)

    prod_col = cfg["product_column"]

    # Single pass with progress bar
    done = 0
    print_bar(done, n, width=barw)

    for i, rd in enumerate(runs):
        man_path = rd / "MANIFEST.txt"
        sim_path = rd / "simulation_results.csv"
        if not man_path.exists() or not sim_path.exists():
            done += 1; print_bar(done, n, width=barw); continue

        man = read_manifest(man_path)

        # Apply temperature filter if requested
        if filt_T is not None:
            T_cand = float(man.get("temperature_C"))
            if not math.isfinite(T_cand) or abs(T_cand - float(filt_T)) > 1e-6:
                # skip this run, not matching requested temperature
                done += 1; print_bar(done, n, width=barw); continue

        S0[i] = float(man.get("urea_S0_mM"))
        GR[i] = float(man.get("grams_urease_powder"))
        TC[i] = float(man.get("temperature_C"))
        run_paths[i] = str(rd)

        df = pd.read_csv(sim_path)

        # choose product column (fallback if needed)
        prod_col_use = prod_col
        if prod_col_use not in df.columns:
            if ("NH3 [M]" in df.columns) and ("NH4+ [M]" in df.columns):
                df["__total_ammonia_fallback__"] = df["NH3 [M]"] + df["NH4+ [M]"]
                prod_col_use = "__total_ammonia_fallback__"
            else:
                prod_col_use = None  # no product available in this run

        # interpolate at all requested timepoints
        for j, t in enumerate(TPs):
            pH_vals[i, j] = interp_at_time(df, "pH", t, out_of_range=out_of_range)
            if prod_col_use is not None:
                prod_vals[i, j] = interp_at_time(df, prod_col_use, t, out_of_range=out_of_range)
            else:
                prod_vals[i, j] = np.nan

        done += 1
        print_bar(done, n, width=barw)

    # Keep only rows that were actually filled (in case of filtering skips)
    mask_valid = np.isfinite(S0) & np.isfinite(GR)
    if not mask_valid.any():
        if filt_T is not None:
            print(f"No runs matched filter_temperature_C = {filt_T}.")
        else:
            print("No valid runs after scanning. Check directory structure.")
        return

    S0 = S0[mask_valid]
    GR = GR[mask_valid]
    TC = TC[mask_valid]
    run_paths = run_paths[mask_valid]
    pH_vals = pH_vals[mask_valid, :]
    prod_vals = prod_vals[mask_valid, :]

    cmap = cfg["cmap"]; dpi = cfg["dpi"]

    # Save per timepoint
    for j, t in enumerate(TPs):
        # pH
        df_pH = pd.DataFrame({
            "S0_mM": S0,
            "grams_urease_powder": GR,
            "T_C": TC,
            "value": pH_vals[:, j],
            "run_path": run_paths
        }).sort_values(["grams_urease_powder", "S0_mM"]).reset_index(drop=True)

        # If we filtered temperature, it's constant; otherwise it may vary and
        # pivot uses mean across any duplicates.
        stem_pH = f"heatmap_axes_S0_vs_grams_pH_t{int(t)}s"
        lp, wp, piv = save_long_and_wide(df_pH, out_root, stem_pH)
        plot_heatmap(
            piv,
            title=f"pH at t = {t} s (axes: S₀ vs grams urease)",
            cmap=cmap,
            out_path=out_root / f"{stem_pH}.png",
            dpi=dpi
        )
        print(f"[pH @ {t}s] saved → {Path(lp).name}, {Path(wp).name}, {stem_pH}.png")

        # product
        safe_name = re.sub(r'[^A-Za-z0-9]+', '_', prod_col).strip('_')
        df_prod = pd.DataFrame({
            "S0_mM": S0,
            "grams_urease_powder": GR,
            "T_C": TC,
            "value": prod_vals[:, j],
            "run_path": run_paths
        }).sort_values(["grams_urease_powder", "S0_mM"]).reset_index(drop=True)

        if df_prod["value"].notna().any():
            stem_prod = f"heatmap_axes_S0_vs_grams_{safe_name}_t{int(t)}s"
            lp2, wp2, piv2 = save_long_and_wide(df_prod, out_root, stem_prod)
            plot_heatmap(
                piv2,
                title=f"{CONFIG['product_column']} at t = {t} s (axes: S₀ vs grams urease)",
                cmap=cmap,
                out_path=out_root / f"{stem_prod}.png",
                dpi=dpi
            )
            print(f"[{CONFIG['product_column']} @ {t}s] saved → {Path(lp2).name}, {Path(wp2).name}, {stem_prod}.png")
        else:
            print(f"[WARN] Product column '{CONFIG['product_column']}' missing in all runs for t={t}s; skipped plotting.")

if __name__ == "__main__":
    main()


Found 1111 runs.
processing runs [████████████████████████████████████████]  100.0% | 1111/1111
[pH @ 150s] saved → heatmap_axes_S0_vs_grams_pH_t150s_long.csv, heatmap_axes_S0_vs_grams_pH_t150s_wide.csv, heatmap_axes_S0_vs_grams_pH_t150s.png
[total ammonia [M] @ 150s] saved → heatmap_axes_S0_vs_grams_total_ammonia_M_t150s_long.csv, heatmap_axes_S0_vs_grams_total_ammonia_M_t150s_wide.csv, heatmap_axes_S0_vs_grams_total_ammonia_M_t150s.png
[pH @ 36000s] saved → heatmap_axes_S0_vs_grams_pH_t36000s_long.csv, heatmap_axes_S0_vs_grams_pH_t36000s_wide.csv, heatmap_axes_S0_vs_grams_pH_t36000s.png
[total ammonia [M] @ 36000s] saved → heatmap_axes_S0_vs_grams_total_ammonia_M_t36000s_long.csv, heatmap_axes_S0_vs_grams_total_ammonia_M_t36000s_wide.csv, heatmap_axes_S0_vs_grams_total_ammonia_M_t36000s.png


## 1D array plotting

In [22]:
# plot_1d_single.py
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import sys, re, contextlib

# =========================
# CONFIG — tweak here
# =========================
CONFIG = {
    # where to save outputs
    "output_path": "/Volumes/01785304894/Simulations/Urease/Enyzme_without_deactivation_UreaseFixed/analysis_outputs_1800s/analysis_outputs_1d/pH/S0_5-1000mM",

    # source CSV (LONG form) for the metric on Y (e.g., pH or total ammonia at a given time)
    # examples:
    #   "heatmap_pH_t36000s_long.csv"
    #   "heatmap_total_ammonia_M_t36000s_long.csv"
    "source_csv": "/Volumes/01785304894/Simulations/Urease/Enyzme_without_deactivation_UreaseFixed/analysis_outputs_1800s/heatmap_pH_t18000s_long.csv",

    # ---- plot semantics ----
    # X axis variable: one of {"S0_mM", "grams_urease_powder", "T_C"}
    "x_var": "T_C",

    # label for Y axis (just a label; data comes from CSV column 'value')
    "y_label": "pH",

    # plot title
    "title": "pH vs Temperature at fixed $S_0$ after 30mins",

    # choose the dimension used for filtering / overlay series (e.g., temperature)
    # one of the columns present in CSV (e.g., "T_C", "grams_urease_powder", "S0_mM")
    "filter_dim": "S0_mM",

    # either a single number (e.g., 40.0) or a list (e.g., [20.0, 30.0, 40.0])
    "filter_values": [5, 10, 50, 100, 500, 1000],

    # fix other dimensions (optional): dict of {column: value}
    # values can be scalar; matching uses 'match_mode' below
    "fixed_filters": {
        "grams_urease_powder": 0.1  # e.g., fix enzyme mass while sweeping S0 vs T
    },

    # unit handling for Y: "raw" (no change) or "mM" (convert M→mM)
    "y_units": "mM",

    # numeric matching behavior when selecting scalar filters: "nearest" or "exact"
    "match_mode": "nearest",
    "tol": 1e-9,

    # optional axis limits
    "xlim": None,  # e.g., [0, 200]
    "ylim": None,

    # =========================
    # Matplotlib rcParams (all visuals here)
    # =========================
    "rc": {
        # figure
        "figure.figsize": (7.5, 4.2),
        "figure.dpi": 150,
        "savefig.dpi": 220,

        # fonts & layout
        "font.size": 12,
        "axes.titlesize": 14,
        "axes.labelsize": 12,
        "xtick.labelsize": 11,
        "ytick.labelsize": 11,
        "legend.fontsize": 8,

        # line/marker style (applies to all series)
        "lines.linewidth": 2.2,
        "lines.linestyle": "-",
        "lines.marker": "o",
        "lines.markersize": 5.5,

        # grid and spines
        "axes.grid": True,
        "grid.alpha": 0.25,
        "grid.linestyle": "--",
        "axes.spines.top": False,
        "axes.spines.right": False,

        # ticks
        "xtick.direction": "out",
        "ytick.direction": "out",
        "xtick.minor.visible": False,
        "ytick.minor.visible": False,
    },
}

# =========================
# helpers
# =========================
def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def _slugify(s: str) -> str:
    return re.sub(r"[^\w\-.]+", "_", str(s)).strip("_")

def _is_list_of_scalars(v):
    return isinstance(v, (list, tuple)) and len(v) > 0 and all(isinstance(x, (int, float)) for x in v)

def _nearest_select(values: np.ndarray, target: float, tol: float):
    idx = int(np.argmin(np.abs(values - target)))
    if abs(values[idx] - target) <= tol:
        return values[idx]
    return None

def _apply_fixed_filters(df: pd.DataFrame, fixed: dict, match_mode: str, tol: float) -> pd.DataFrame:
    if not fixed:
        return df
    out = df.copy()
    for col, val in fixed.items():
        if col not in out.columns:
            continue
        arr = out[col].values
        if isinstance(val, (int, float)):
            if match_mode == "nearest":
                picked = _nearest_select(arr, float(val), tol)
                if picked is None:
                    out = out.iloc[0:0]
                else:
                    out = out[np.isclose(out[col].values, picked, atol=tol, rtol=0.0)]
            else:
                out = out[np.isclose(arr, float(val), atol=tol, rtol=0.0)]
        else:
            # unsupported type → ignore silently
            pass
        if out.empty:
            return out
    return out

def _convert_y(series: pd.Series, mode: str) -> pd.Series:
    if (mode or "raw").lower() == "mm":
        return series * 1000.0
    return series

@contextlib.contextmanager
def rc_context(rc_dict: dict):
    with matplotlib.rc_context(rc=rc_dict or {}):
        yield

# =========================
# main
# =========================
def main():
    cfg = CONFIG
    out_root = Path(cfg["output_path"]); ensure_dir(out_root)

    src = Path(cfg["source_csv"])
    if not src.exists():
        print(f"[ERR] source CSV not found: {src}")
        sys.exit(1)

    df = pd.read_csv(src)
    needed = {"value", cfg["x_var"], cfg["filter_dim"]}
    missing = [c for c in needed if c not in df.columns]
    if missing:
        print(f"[ERR] CSV missing columns: {missing}")
        sys.exit(1)

    # apply fixed filters (e.g., lock grams while sweeping over T_C list)
    df = _apply_fixed_filters(df, cfg.get("fixed_filters", {}), cfg["match_mode"], float(cfg["tol"]))
    if df.empty:
        print("[ERR] no rows after applying fixed_filters.")
        sys.exit(1)

    x = cfg["x_var"]
    fd = cfg["filter_dim"]
    fvals = cfg["filter_values"]
    if not _is_list_of_scalars(fvals):
        fvals = [fvals]

    # build series: for each requested filter value, select nearest (or exact) rows, then group by x
    series = []
    for fv in fvals:
        # pick nearest/exact rows at filter_dim == fv
        if cfg["match_mode"] == "nearest":
            uniq = np.unique(df[fd].values.astype(float))
            picked = _nearest_select(uniq, float(fv), float(cfg["tol"]))
            if picked is None:
                print(f"[WARN] no {fd} near {fv} (within tol). skipping.")
                continue
            sub = df[np.isclose(df[fd].values.astype(float), picked, atol=float(cfg["tol"]), rtol=0.0)]
            legend_lbl = f"{fd} = {picked:g}"
        else:
            sub = df[np.isclose(df[fd].values.astype(float), float(fv), atol=float(cfg["tol"]), rtol=0.0)]
            if sub.empty:
                print(f"[WARN] no rows for {fd} == {fv}. skipping.")
                continue
            legend_lbl = f"{fd} = {float(fv):g}"

        # aggregate duplicates by mean over identical x
        g = sub.groupby(x, as_index=False)["value"].mean().sort_values(x)
        if g.empty:
            print(f"[WARN] empty slice for {legend_lbl}.")
            continue
        g["value"] = _convert_y(g["value"], cfg.get("y_units", "raw"))
        series.append((legend_lbl, g))

    if not series:
        print("[ERR] nothing to plot after filtering/aggregation.")
        sys.exit(1)

    # save sliced data (stacked)
    stack = []
    for lbl, g in series:
        gg = g.copy()
        gg[fd] = lbl.split("=")[-1].strip()
        stack.append(gg[[x, fd, "value"]])
    df_out = pd.concat(stack, axis=0, ignore_index=True)

    stem = "__".join([
        _slugify(cfg["title"]),
        f"x-{cfg['x_var']}",
        f"by-{cfg['filter_dim']}",
        _slugify(",".join(str(v) for v in fvals))
    ])
    out_csv = out_root / f"{stem}.csv"
    df_out.to_csv(out_csv, index=False)

    # plot (all visuals via rc)
    with rc_context(cfg["rc"]):
        plt.figure()
        for lbl, g in series:
            plt.plot(g[x], g["value"], label=lbl)
        # labels/titles
        # prettier axis label for common names
        pretty = {
            "S0_mM": "Substrate concentration S₀ (mM)",
            "grams_urease_powder": "Urease mass (g)",
            "T_C": "Temperature (°C)",
        }
        plt.xlabel(pretty.get(x, x))
        plt.ylabel(cfg["y_label"])
        plt.title(cfg["title"])
        if cfg.get("xlim"): plt.xlim(cfg["xlim"])
        if cfg.get("ylim"): plt.ylim(cfg["ylim"])
        plt.legend(title=None)
        plt.tight_layout()
        out_png = out_root / f"{stem}.png"
        plt.savefig(out_png, bbox_inches="tight")
        plt.close()

    print(f"[ok] figure: {out_png.name}\n[ok] data:   {out_csv.name}")

if __name__ == "__main__":
    main()


[ok] figure: pH_vs_Temperature_at_fixed_S_0_after_30mins__x-T_C__by-S0_mM__5_10_50_100_500_1000.png
[ok] data:   pH_vs_Temperature_at_fixed_S_0_after_30mins__x-T_C__by-S0_mM__5_10_50_100_500_1000.csv


# Metrics + Gradient

scans your batch_runs/ (one pass, with a global progress bar),

computes early-time metrics over the first 5 minutes (300 s) for each run:

ΔpH(0→300 s),

initial pH slope via linear fit on 0–300 s,

Δ[total ammonia](0→300 s) (reported in mM, taken from total ammonia [M] or NH3+NH4+),

aggregates to a summary CSV,

takes a slice at chosen Temperature and Urease grams (nearest match),

builds an adaptive S₀ plan (≤ your budget) by gradient-weighting |d(metric)/dS0|,

saves (i) the recommended S₀ points, (ii) the sliced metric & gradient table, and

draws a figure: metric vs S₀ with a background “density” (normalized |gradient|) so you can see where resolution should be highest.

In [26]:
# design_2d_windows_with_eta.py
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys, time

# =========================
# CONFIG — tweak here
# =========================
CONFIG = {
    # I/O
    "root_runs_path": "/Volumes/01785304894/Simulations/Urease/Enyzme_without_deactivation_UreaseFixed/batch_runs",                  # folder containing run_* subfolders
    "output_path": "/Volumes/01785304894/Simulations/Urease/Enyzme_without_deactivation_UreaseFixed/analysis_design",                # where results/figures go

    # Early-time window (seconds)
    "tau_s": 120.0,

    # Metric to use for planning: "delta_pH_tau" | "slope_pH_tau" | "delta_total_ammonia_tau_mM"
    "metric_for_planning": "slope_pH_tau",

    # Fix urease mass for the 2D (S0,T) design (nearest-match)
    "grams_target": 0.1,
    "match_mode": "nearest",   # "nearest" or "exact"
    "tol": 1e-6,

    # Selection budget and minimal spacings
    "budget_points": 100,      # max (S0,T) points to select
    "min_step_S0_mM": 5.0,     # minimal separation in S0 (mM)
    "min_step_T_C": 5.0,       # minimal separation in temperature (°C)

    # Optional S0/T ranges to restrict the design (None = use all present)
    "S0_range_mM": None,       # e.g., [1, 500]
    "T_range_C": None,         # e.g., [20, 40]

    # Gradient anisotropy weights (to balance units)
    "grad_weight_S0": 1.0,
    "grad_weight_T": 1.0,

    # product column name (fallback to NH3+NH4+ if missing)
    "product_column": "total ammonia [M]",

    # if tau exceeds run horizon: "clip" or "nan"
    "out_of_range": "clip",

    # Plot rcParams
    "rc": {
        "figure.figsize": (9.5, 4.4),
        "figure.dpi": 200,
        "savefig.dpi": 200,
        "font.size": 11,
        "axes.titlesize": 13,
        "axes.labelsize": 11,
        "xtick.labelsize": 10,
        "ytick.labelsize": 10,
        "legend.fontsize": 10,
        "axes.grid": False,
        "axes.spines.top": False,
        "axes.spines.right": False,
    },

    # Progress bar
    "bar_width": 40,
}

# =========================
# utilities
# =========================
def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def _fmt_hms(seconds: float) -> str:
    if not np.isfinite(seconds) or seconds < 0:
        return "--:--:--"
    s = int(round(seconds))
    h, r = divmod(s, 3600)
    m, s = divmod(r, 60)
    return f"{h:02d}:{m:02d}:{s:02d}"

def print_bar(done, total, start_time, prefix="processing runs ", width=40):
    now = time.time()
    elapsed = now - start_time
    frac = 0.0 if total == 0 else min(max(done / total, 0.0), 1.0)
    rate = done / elapsed if elapsed > 0 and done > 0 else np.nan
    remaining = (total - done) / rate if rate and np.isfinite(rate) and rate > 0 else np.nan
    filled = int(round(width * frac))
    bar = "█" * filled + "░" * (width - filled)
    msg = (f"\r{prefix}[{bar}] {100*frac:6.1f}% | {done}/{total} "
           f"| elapsed { _fmt_hms(elapsed) } | ETA { _fmt_hms(remaining) }")
    sys.stdout.write(msg); sys.stdout.flush()
    if done == total:
        sys.stdout.write("\n")

def find_runs(root: Path):
    return sorted(p.parent for p in root.glob("**/simulation_results.csv"))

def read_manifest(path: Path) -> dict:
    d = {}
    txt = path.read_text(encoding="utf-8", errors="ignore")
    for line in txt.splitlines():
        if ":" in line:
            k, v = line.split(":", 1)
            d[k.strip()] = v.strip()
    # coerce
    if "initial_pH" in d and d["initial_pH"] != "None":
        try: d["initial_pH"] = float(d["initial_pH"])
        except: d["initial_pH"] = None
    else:
        d["initial_pH"] = None
    for k in ("urea_S0_mM", "temperature_C", "grams_urease_powder"):
        if k in d:
            d[k] = float(d[k])
    return d

def interp_at_time(df: pd.DataFrame, col: str, t_query: float, out_of_range="clip"):
    t = df["time [s]"].values
    y = df[col].values
    if t_query <= t[0]:
        return y[0] if out_of_range == "clip" else np.nan
    if t_query >= t[-1]:
        return y[-1] if out_of_range == "clip" else np.nan
    return float(np.interp(t_query, t, y))

def initial_slope_linear(t, y, t0, t1):
    mask = (t >= t0) & (t <= t1)
    if mask.sum() < 3:
        y0 = y[0]
        j = np.searchsorted(t, t1)
        j = min(max(j, 1), len(t)-1)
        y1 = y[j]
        return (y1 - y0) / (t1 - t0 + 1e-12)
    tt = t[mask].astype(float)
    yy = y[mask].astype(float)
    A = np.vstack([tt, np.ones_like(tt)]).T
    m, b = np.linalg.lstsq(A, yy, rcond=None)[0]
    return float(m)

def compute_metrics_for_run(df: pd.DataFrame, tau_s: float, out_of_range="clip", prod_col="total ammonia [M]"):
    t = df["time [s]"].values
    pH = df["pH"].values
    t0 = float(t[0]); t_tau = t0 + tau_s
    pH0 = float(pH[0])
    pH_tau = interp_at_time(df.rename(columns={"pH":"val"}), "val", t_tau, out_of_range=out_of_range)
    delta_pH_tau = pH_tau - pH0
    slope_pH_tau = initial_slope_linear(t, pH, t0, min(t_tau, t[-1]))

    # total ammonia (M) → mM
    if prod_col in df.columns:
        prod_series = df[prod_col]
    else:
        prod_series = df["NH3 [M]"] + df["NH4+ [M]"] if ("NH3 [M]" in df.columns and "NH4+ [M]" in df.columns) else None
    delta_total_ammonia_tau_mM = np.nan
    if prod_series is not None:
        prod0 = float(prod_series.iloc[0])
        prod_tau = interp_at_time(df.rename(columns={prod_series.name:"val"}), "val", t_tau, out_of_range=out_of_range)
        delta_total_ammonia_tau_mM = (prod_tau - prod0) * 1000.0
    return {
        "delta_pH_tau": float(delta_pH_tau),
        "slope_pH_tau": float(slope_pH_tau),
        "delta_total_ammonia_tau_mM": float(delta_total_ammonia_tau_mM),
    }

def nearest_pick(vals: np.ndarray, target: float, tol: float):
    idx = int(np.argmin(np.abs(vals - target)))
    v = float(vals[idx])
    return v if abs(v - target) <= tol else None

# Greedy non-maximum suppression on a 2D grid (indices), spaced by at least (dS_idx, dT_idx)
def nms_select_2d(G, xs, ys, budget, min_dS, min_dT):
    """
    G: 2D array (T rows, S columns) of gradient magnitude
    xs: sorted unique S0 values (columns, mM)
    ys: sorted unique T values (rows, °C)
    min_dS: minimal spacing in S0 (mM)
    min_dT: minimal spacing in T (°C)
    """
    # precompute index spacings from physical spacings
    # map physical step to index radius by nearest spacing
    dx = np.diff(xs) if len(xs) > 1 else np.array([np.inf])
    dy = np.diff(ys) if len(ys) > 1 else np.array([np.inf])
    mean_dx = float(np.median(dx)) if np.isfinite(dx).any() else 1.0
    mean_dy = float(np.median(dy)) if np.isfinite(dy).any() else 1.0
    rad_x = max(1, int(round(min_dS / max(mean_dx, 1e-12))))
    rad_y = max(1, int(round(min_dT / max(mean_dy, 1e-12))))

    Gwork = G.copy()
    picks = []
    for _ in range(budget):
        i, j = np.unravel_index(np.nanargmax(Gwork), Gwork.shape)
        if not np.isfinite(Gwork[i, j]) or Gwork[i, j] <= 0:
            break
        picks.append((i, j))
        # suppress neighborhood
        i0 = max(0, i - rad_y); i1 = min(Gwork.shape[0], i + rad_y + 1)
        j0 = max(0, j - rad_x); j1 = min(Gwork.shape[1], j + rad_x + 1)
        Gwork[i0:i1, j0:j1] = np.nan  # mark as removed
    # convert to physical coords
    sel = [{"T_C": ys[i], "S0_mM": xs[j]} for (i, j) in picks]
    return sel

# =========================
# main
# =========================
def main():
    cfg = CONFIG
    root = Path(cfg["root_runs_path"])
    out_root = Path(cfg["output_path"]); ensure_dir(out_root)
    plt.rcParams.update(cfg["rc"])

    # ---- scan runs (one pass) with ETA bar
    runs = find_runs(root)
    n = len(runs)
    if n == 0:
        print(f"No runs found under: {root}"); return
    print(f"Found {n} runs.")
    start = time.time()
    print_bar(0, n, start, width=cfg["bar_width"])

    tau = float(cfg["tau_s"])
    rows = []
    done = 0
    for rd in runs:
        man_path = rd / "MANIFEST.txt"
        sim_path = rd / "simulation_results.csv"
        if not man_path.exists() or not sim_path.exists():
            done += 1; print_bar(done, n, start, width=cfg["bar_width"]); continue
        man = read_manifest(man_path)
        try:
            df = pd.read_csv(sim_path)
        except Exception:
            done += 1; print_bar(done, n, start, width=cfg["bar_width"]); continue
        mets = compute_metrics_for_run(df, tau_s=tau, out_of_range=cfg["out_of_range"], prod_col=cfg["product_column"])
        rows.append({
            "run_path": str(rd),
            "S0_mM": float(man.get("urea_S0_mM")),
            "T_C": float(man.get("temperature_C")),
            "grams_urease_powder": float(man.get("grams_urease_powder")),
            **mets
        })
        done += 1; print_bar(done, n, start, width=cfg["bar_width"])

    if not rows:
        print("No valid runs processed."); return

    summary = pd.DataFrame(rows).sort_values(["grams_urease_powder","T_C","S0_mM"]).reset_index(drop=True)
    sum_path = out_root / f"metrics_tau{int(tau)}s_summary.csv"
    summary.to_csv(sum_path, index=False)
    print(f"[ok] wrote {sum_path.name}")

    # ---- slice by grams (nearest/exact)
    grams_tgt = float(cfg["grams_target"])
    if cfg["match_mode"] == "nearest":
        g_pick = nearest_pick(summary["grams_urease_powder"].values.astype(float), grams_tgt, float(cfg["tol"]))
        if g_pick is None:
            print("[WARN] grams_target not found within tol."); return
        sl = summary[np.isclose(summary["grams_urease_powder"], g_pick, atol=cfg["tol"], rtol=0.0)]
        slice_label = f"grams={g_pick:g} g"
    else:
        sl = summary[np.isclose(summary["grams_urease_powder"], grams_tgt, atol=cfg["tol"], rtol=0.0)]
        slice_label = f"grams={grams_tgt:g} g"
    if sl.empty:
        print("[WARN] grams slice empty."); return

    # optional ranges
    if cfg["S0_range_mM"] is not None:
        lo, hi = cfg["S0_range_mM"]; sl = sl[(sl["S0_mM"] >= lo) & (sl["S0_mM"] <= hi)]
    if cfg["T_range_C"] is not None:
        lo, hi = cfg["T_range_C"]; sl = sl[(sl["T_C"] >= lo) & (sl["T_C"] <= hi)]
    if sl.empty:
        print("[WARN] slice empty after applying ranges."); return

    metric = cfg["metric_for_planning"]
    if metric not in sl.columns:
        print(f"[ERR] metric '{metric}' not found. Available: {list(sl.columns)}"); return

    # ---- build 2D grid: rows = T, cols = S0
    T_vals = np.sort(sl["T_C"].unique().astype(float))
    S_vals = np.sort(sl["S0_mM"].unique().astype(float))
    grid = sl.pivot_table(index="T_C", columns="S0_mM", values=metric, aggfunc="mean").reindex(index=T_vals, columns=S_vals)
    # drop rows/cols all-NaN
    grid = grid.dropna(how="all", axis=0).dropna(how="all", axis=1)
    T_vals = grid.index.values.astype(float)
    S_vals = grid.columns.values.astype(float)
    Z = grid.values.astype(float)
    if Z.size == 0 or len(T_vals) < 2 or len(S_vals) < 2:
        print("[WARN] need at least 2x2 grid for 2D design."); return

    # ---- 2D gradients
    # spacing arrays (in physical units)
    dS = np.gradient(S_vals) if len(S_vals) > 1 else np.array([1.0])
    dT = np.gradient(T_vals) if len(T_vals) > 1 else np.array([1.0])

    # np.gradient expects consistent spacing; use median spacing per axis
    hS = float(np.median(np.diff(S_vals))) if len(S_vals) > 1 else 1.0
    hT = float(np.median(np.diff(T_vals))) if len(T_vals) > 1 else 1.0

    dZdT, dZdS = np.gradient(Z, hT, hS)  # rows ~ T, cols ~ S
    wS = float(cfg["grad_weight_S0"])
    wT = float(cfg["grad_weight_T"])
    G = np.sqrt(wS * (dZdS**2) + wT * (dZdT**2))

    # ---- selection via greedy non-maximum suppression
    picks = nms_select_2d(
        G=G,
        xs=S_vals,
        ys=T_vals,
        budget=int(cfg["budget_points"]),
        min_dS=float(cfg["min_step_S0_mM"]),
        min_dT=float(cfg["min_step_T_C"])
    )
    picks_df = pd.DataFrame(picks)
    picks_df.to_csv(out_root / "design_S0_T_points.csv", index=False)
    print(f"[ok] wrote design_S0_T_points.csv (n={len(picks_df)})")

    # ---- save surfaces
    surf_metric = out_root / f"surface_{metric}.csv"
    grid.to_csv(surf_metric)
    G_df = pd.DataFrame(G, index=T_vals, columns=S_vals)
    G_df.to_csv(out_root / f"surface_grad_{metric}.csv")
    print(f"[ok] wrote {surf_metric.name} and surface_grad_{metric}.csv")

    # ---- plots with points
    def plot_surface(Xs, Ys, Zmat, title, fname, cbar_label="value"):
        plt.figure(figsize=CONFIG["rc"]["figure.figsize"])
        extent = [Xs.min(), Xs.max(), Ys.min(), Ys.max()]
        im = plt.imshow(Zmat, aspect="auto", origin="lower", extent=extent, cmap="viridis")
        cbar = plt.colorbar(im); cbar.set_label(cbar_label)
        # overlay picks
        if len(picks_df) > 0:
            plt.scatter(picks_df["S0_mM"], picks_df["T_C"], s=24, facecolors="none", edgecolors="w", linewidths=1.4, label="selected")
            plt.legend(loc="best")
        plt.xlabel("Substrate concentration S₀ (mM)")
        plt.ylabel("Temperature (°C)")
        plt.title(title + f"   ({slice_label})")
        plt.tight_layout()
        plt.savefig(out_root / fname, bbox_inches="tight")
        plt.close()

    plot_surface(S_vals, T_vals, Z, f"{metric} surface", f"surface_{metric}.png", cbar_label=metric)
    plot_surface(S_vals, T_vals, G, f"Gradient magnitude of {metric}", f"surface_grad_{metric}.png", cbar_label="|∇ metric|")

    print("[done] 2D design complete.")

if __name__ == "__main__":
    main()


Found 16000 runs.
processing runs [████████████████████████████████████████]  100.0% | 16000/16000 | elapsed 00:10:17 | ETA 00:00:00
[ok] wrote metrics_tau120s_summary.csv
[ok] wrote design_S0_T_points.csv (n=100)
[ok] wrote surface_slope_pH_tau.csv and surface_grad_slope_pH_tau.csv
[done] 2D design complete.


## pH, Concentration time-series

In [52]:
# plot_timeseries_overlays_timeunit.py
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import sys, time, itertools

# =========================
# CONFIG — tweak here
# =========================
CONFIG = {
    # Root folder that contains run_* subfolders
   "root_runs_path": "/Volumes/01785304894/Simulations/Urease/Enyzme_without_deactivation_UreaseNOTfixed/batch_runs",

    # Where to save figure + data
    "output_path": "/Volumes/01785304894/Simulations/Urease/Enyzme_without_deactivation_UreaseNOTfixed/time_series/ammonia/40C",

    # ---- WHAT TO PLOT ----
    # One of:
    #   "pH"
    #   "delta_total_ammonia_mM"   ( [NH3]+[NH4+] in mM, relative to t0 of the plotted window )
    #   "delta_urea_mM"            ( urea consumed in mM: (S0 - S(t))*1000 over the plotted window )
    #   "urea_M"                   ( absolute urea [M] )
    #   "urea_mM"                  ( absolute urea [mM] = 1000 * [M] )
    "metric": "delta_total_ammonia_mM",

    # Time window (SECONDS). The plot/data will be shown in your chosen time_unit, but
    # this window is defined in seconds to match the simulator CSV.
    "time_window_s": [0.0, 600.0],

    # Display / export time units: "s", "min", or "h"
    "time_unit": "s",

    # Interpolate every curve to a common grid (for a single tidy CSV)
    "interpolate_to_common_grid": False,
    "n_time_points": 600,     # number of points on the common grid (in the chosen time_unit)

    # ---- PARAMETER COMBINATIONS (lists; cross-product is plotted) ----
    # We will pick the nearest matching run on the grid for each (S0, T, grams) triple.
    "S0_mM_list":  [10],
    "T_C_list":    [40],
    "grams_list":  [0.01, 0.10, 0.5, 1.0],

    # nearest-match tolerance (used when mapping target triple to existing runs)
    "nearest_tol": 1e-6,

    # Column name for total ammonia (M) in simulation_results.csv; fallback NH3+NH4 if missing
    "product_column": "total ammonia [M]",

    # If time window exceeds sim span: "clip" → clamp to edges; "nan" → drop series
    "out_of_range": "clip",

    # ---- Matplotlib rcParams (all visuals here) ----
    "rc": {
        "figure.figsize": (8.8, 5.2),
        "figure.dpi": 150,
        "savefig.dpi": 220,

        "font.size": 12,
        "axes.titlesize": 14,
        "axes.labelsize": 12,
        "xtick.labelsize": 11,
        "ytick.labelsize": 11,
        "legend.fontsize": 10,

        "axes.grid": True,
        "grid.alpha": 0.25,
        "grid.linestyle": "--",
        "axes.spines.top": False,
        "axes.spines.right": False,

        "lines.linewidth": 2.0,
        "lines.linestyle": "-",
        "lines.marker": "",
        "lines.markersize": 5.0,
    },

    # Progress bar
    "bar_width": 40,
}

# =========================
# utilities
# =========================
def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def _fmt_hms(seconds: float) -> str:
    if not np.isfinite(seconds) or seconds < 0:
        return "--:--:--"
    s = int(round(seconds))
    h, r = divmod(s, 3600); m, s = divmod(r, 60)
    return f"{h:02d}:{m:02d}:{s:02d}"

def print_bar(done, total, start_time, prefix="scanning runs ", width=40):
    now = time.time()
    elapsed = now - start_time
    frac = 0.0 if total == 0 else min(max(done / total, 0.0), 1.0)
    rate = done / elapsed if elapsed > 0 and done > 0 else np.nan
    remaining = (total - done) / rate if rate and np.isfinite(rate) and rate > 0 else np.nan
    filled = int(round(width * frac))
    bar = "█" * filled + "░" * (width - filled)
    msg = (f"\r{prefix}[{bar}] {100*frac:6.1f}% | {done}/{total} "
           f"| elapsed { _fmt_hms(elapsed) } | ETA { _fmt_hms(remaining) }")
    sys.stdout.write(msg); sys.stdout.flush()
    if done == total:
        sys.stdout.write("\n")

def find_runs(root: Path):
    return sorted(p.parent for p in root.glob("**/simulation_results.csv"))

def read_manifest(path: Path) -> dict:
    d = {}
    txt = path.read_text(encoding="utf-8", errors="ignore")
    for line in txt.splitlines():
        if ":" in line:
            k, v = line.split(":", 1)
            d[k.strip()] = v.strip()
    # coerce
    if "initial_pH" in d and d["initial_pH"] != "None":
        try: d["initial_pH"] = float(d["initial_pH"])
        except: d["initial_pH"] = None
    else:
        d["initial_pH"] = None
    for k in ("urea_S0_mM", "temperature_C", "grams_urease_powder"):
        if k in d:
            d[k] = float(d[k])
    return d

def build_catalog(root: Path):
    """Scan once; return a DataFrame with path + parameters."""
    runs = find_runs(root)
    rows = []
    start = time.time()
    print_bar(0, len(runs), start, prefix="scanning runs ", width=CONFIG["bar_width"])
    done = 0
    for rd in runs:
        man_path = rd / "MANIFEST.txt"
        sim_path = rd / "simulation_results.csv"
        if not man_path.exists() or not sim_path.exists():
            done += 1; print_bar(done, len(runs), start, prefix="scanning runs ", width=CONFIG["bar_width"]); continue
        man = read_manifest(man_path)
        rows.append({
            "run_path": str(rd),
            "S0_mM": float(man.get("urea_S0_mM")),
            "T_C": float(man.get("temperature_C")),
            "grams_urease_powder": float(man.get("grams_urease_powder")),
            "csv_path": str(sim_path)
        })
        done += 1; print_bar(done, len(runs), start, prefix="scanning runs ", width=CONFIG["bar_width"])
    return pd.DataFrame(rows)

def nearest_row(df: pd.DataFrame, target, tol=1e-6):
    """target = (S0_mM, T_C, grams). Return nearest row within tol per field; None if not found."""
    S0_t, T_t, G_t = map(float, target)
    def pick_nearest(col, val):
        arr = df[col].values.astype(float)
        idx = int(np.argmin(np.abs(arr - val)))
        return float(arr[idx])
    S0_pick = pick_nearest("S0_mM", S0_t)
    T_pick  = pick_nearest("T_C", T_t)
    G_pick  = pick_nearest("grams_urease_powder", G_t)
    ok = (np.isclose(df["S0_mM"], S0_pick, atol=tol, rtol=0.0) &
          np.isclose(df["T_C"], T_pick, atol=tol, rtol=0.0) &
          np.isclose(df["grams_urease_powder"], G_pick, atol=tol, rtol=0.0))
    sub = df[ok]
    if sub.empty:
        return None
    return sub.iloc[0]

def interp_series(t, y, t_grid):
    return np.interp(t_grid, t, y)

# -------------------------
# Metric extraction helpers
# -------------------------
def extract_curve(sim_csv: Path, metric: str, tmin_s: float, tmax_s: float, prod_col: str, out_of_range="clip"):
    """
    Return (t_out_seconds, y_out). For delta metrics, subtract initial value at the window start.
    The returned time axis is ALWAYS in seconds; we'll convert to requested unit later.
    """
    df = pd.read_csv(sim_csv)
    if "time [s]" not in df.columns:
        return None, None
    t = df["time [s]"].values.astype(float)
    mask = (t >= tmin_s) & (t <= tmax_s)
    if mask.sum() < 2:
        if out_of_range == "clip":
            tmin_eff = max(tmin_s, t[0]); tmax_eff = min(tmax_s, t[-1])
            mask = (t >= tmin_eff) & (t <= tmax_eff)
            if mask.sum() < 2:
                return None, None
        else:
            return None, None
    t_sel = t[mask]

    # ---- pH (absolute) ----
    if metric == "pH":
        y = df["pH"].values.astype(float)[mask]
        return t_sel, y

    # ---- absolute urea ----
    if metric in ("urea_M", "urea_mM"):
        if "urea [M]" not in df.columns:
            return None, None
        S = df["urea [M]"].values.astype(float)[mask]   # M
        if metric == "urea_mM":
            S = S * 1000.0                               # mM
        return t_sel, S

    # ---- delta total ammonia (relative to window start) ----
    if metric == "delta_total_ammonia_mM":
        if prod_col in df.columns:
            P = df[prod_col].values.astype(float)[mask]  # M
        else:
            if ("NH3 [M]" in df.columns) and ("NH4+ [M]" in df.columns):
                P = (df["NH3 [M]"].values.astype(float)[mask] +
                     df["NH4+ [M]"].values.astype(float)[mask])
            else:
                return None, None
        P0 = P[0]
        return t_sel, (P - P0) * 1000.0  # mM

    # ---- delta urea consumed (relative to window start) ----
    if metric == "delta_urea_mM":
        if "urea [M]" not in df.columns:
            return None, None
        S = df["urea [M]"].values.astype(float)[mask]   # M
        S0 = S[0]
        return t_sel, (S0 - S) * 1000.0  # mM consumed

    return None, None

# =========================
# main
# =========================
def main():
    cfg = CONFIG
    root = Path(cfg["root_runs_path"])
    out_root = Path(cfg["output_path"]); ensure_dir(out_root)
    plt.rcParams.update(cfg["rc"])

    # ---- time unit handling ----
    unit = str(cfg["time_unit"]).lower()
    if unit not in ("s", "min", "h"):
        print(f"[ERR] invalid time_unit='{cfg['time_unit']}'. Choose 's', 'min', or 'h'.")
        return
    # seconds → desired unit scale
    to_unit_scale = {"s": 1.0, "min": 1.0/60.0, "h": 1.0/3600.0}[unit]
    unit_label = {"s": "s", "min": "min", "h": "h"}[unit]
    time_colname = f"time_{unit_label}"

    # Build catalog of available runs
    catalog = build_catalog(root)
    if catalog.empty:
        print(f"[ERR] no runs found under {root}"); return

    # Build desired combinations (cross-product)
    combos = list(itertools.product(cfg["S0_mM_list"], cfg["T_C_list"], cfg["grams_list"]))
    print(f"Requested combinations: {len(combos)}")

    # Map each combo → nearest row (dedupe identical paths)
    picked = []
    for (S0, T, G) in combos:
        row = nearest_row(catalog, (S0, T, G), tol=float(cfg["nearest_tol"]))
        if row is None:
            print(f"[WARN] no nearest run for (S0={S0} mM, T={T} °C, g={G} g)")
            continue
        picked.append(row)
    if len(picked) == 0:
        print("[ERR] no matching runs found for requested combinations.")
        return
    picked_df = pd.DataFrame(picked).drop_duplicates(subset=["run_path"]).reset_index(drop=True)
    print(f"Unique runs to load: {len(picked_df)}")

    # Time window in seconds
    tmin_s, tmax_s = map(float, cfg["time_window_s"])

    # Common grid (build in the *display* unit, then convert to seconds for interpolation)
    use_grid = bool(cfg["interpolate_to_common_grid"])
    if use_grid:
        t_grid_unit = np.linspace(tmin_s*to_unit_scale, tmax_s*to_unit_scale, int(cfg["n_time_points"]))
        t_grid_s = t_grid_unit / to_unit_scale  # back to seconds for interpolation
    else:
        t_grid_unit = None
        t_grid_s = None

    # Extract curves with progress bar
    metric = cfg["metric"]
    prod_col = cfg["product_column"]
    start = time.time()
    print_bar(0, len(picked_df), start, prefix="loading curves  ", width=cfg["bar_width"])

    curves = []  # list of dicts with metadata + (t_unit,y)
    done = 0
    for _, row in picked_df.iterrows():
        sim_csv = Path(row["csv_path"])
        t_s, y = extract_curve(sim_csv, metric, tmin_s, tmax_s, prod_col, out_of_range=cfg["out_of_range"])
        if t_s is None:
            done += 1; print_bar(done, len(picked_df), start, prefix="loading curves  ", width=cfg["bar_width"]); continue

        if use_grid:
            y_interp = interp_series(t_s, y, t_grid_s)
            t_unit = t_grid_unit
            y_use = y_interp
        else:
            t_unit = t_s * to_unit_scale
            y_use = y

        curves.append({
            "S0_mM": float(row["S0_mM"]),
            "T_C": float(row["T_C"]),
            "grams_urease_powder": float(row["grams_urease_powder"]),
            "t_unit": t_unit,
            "y": y_use
        })
        done += 1; print_bar(done, len(picked_df), start, prefix="loading curves  ", width=cfg["bar_width"])

    if len(curves) == 0:
        print("[ERR] no curves extracted."); return

    # Save tidy CSV of curves (stacked; includes parameters)
    rows = []
    for c in curves:
        n = len(c["t_unit"])
        rows.append(pd.DataFrame({
            time_colname: c["t_unit"],
            "value": c["y"],
            "S0_mM": np.full(n, c["S0_mM"]),
            "T_C": np.full(n, c["T_C"]),
            "grams_urease_powder": np.full(n, c["grams_urease_powder"]),
        }))
    curves_df = pd.concat(rows, ignore_index=True)

    stem = f"{metric}_over_time_{unit_label}"
    out_root.mkdir(parents=True, exist_ok=True)
    csv_out = Path(out_root) / f"{stem}.csv"
    curves_df.to_csv(csv_out, index=False)
    print(f"[ok] wrote {csv_out.name}")

    # Pretty y-labels
    pretty_y = {
        "pH": "pH",
        "delta_total_ammonia_mM": "Δ total ammonia (mM)",
        "delta_urea_mM": "Δ urea consumed (mM)",
        "urea_M": "Urea (M)",
        "urea_mM": "Urea (mM)",
    }.get(metric, metric)

    # Plot overlay
    plt.figure()
    for c in curves:
        lbl = f"S₀={c['S0_mM']:g} mM, T={c['T_C']:g} °C, g={c['grams_urease_powder']:g}"
        plt.plot(c["t_unit"], c["y"], label=lbl)
    plt.xlabel(f"Time ({unit_label})")
    plt.ylabel(pretty_y)
    plt.title(f"{pretty_y} vs time")
    plt.legend(ncol=1, loc="best")
    plt.tight_layout()
    fig_out = Path(out_root) / f"{stem}.png"
    plt.savefig(fig_out, bbox_inches="tight")
    plt.close()
    print(f"[ok] wrote {fig_out.name}")

if __name__ == "__main__":
    main()


scanning runs [████████████████████████████████████████]  100.0% | 1111/1111 | elapsed 00:00:01 | ETA 00:00:00
Requested combinations: 4
Unique runs to load: 4
loading curves  [████████████████████████████████████████]  100.0% | 4/4 | elapsed 00:00:00 | ETA 00:00:00
[ok] wrote delta_total_ammonia_mM_over_time_s.csv
[ok] wrote delta_total_ammonia_mM_over_time_s.png
