# dsfb-add Colab Sweep Notebook

This notebook loads Rust-generated CSVs from `/output-dsfb-add/<timestamp>/` and generates Plotly PNG figures for AET, IWLT, TCP, and RLT.

It is designed to fail closed if either the notebook copy or the Colab package environment is stale relative to the repository `main` branch.
For Colab reliability it uses the self-contained `kaleido==0.2.1` export path instead of Chrome-managed Kaleido v1.

Recommended workflow:

1. Open the notebook from the repository Colab link on the `main` branch.
2. Use `Runtime -> Restart session and run all` when Colab prompts after the install cell.
3. By default the notebook clones the repository, runs `cargo run -p dsfb-add --bin dsfb_add_sweep`, and uses the fresh timestamped output.
4. Only set `OUTPUT_DIR` explicitly if you intentionally want to analyze a specific existing run directory.
5. The generated PNGs plus `tcp_ph_summary.csv` and `aet_iwlt_law_summary.csv` are written back into the same timestamped directory as the CSVs.


In [None]:
%pip install -q --upgrade "plotly==6.1.1" "kaleido==0.2.1" "ripser==0.6.12"


In [None]:
import json
import sys
from importlib.metadata import version
from urllib.request import urlopen

NOTEBOOK_VERSION = "2026-03-01-11"
NOTEBOOK_RAW_URL = "https://raw.githubusercontent.com/infinityabundance/dsfb/main/crates/dsfb-add/dsfb_add_sweep.ipynb"
EXPECTED_PACKAGE_VERSIONS = {
    "plotly": "6.1.1",
    "kaleido": "0.2.1",
}

for prefix in ("plotly", "kaleido", "ripser"):
    loaded = [name for name in list(sys.modules) if name == prefix or name.startswith(prefix + ".")]
    for name in loaded:
        del sys.modules[name]

remote_nb = json.load(urlopen(NOTEBOOK_RAW_URL))
remote_version = remote_nb.get("metadata", {}).get("dsfb_add_notebook_version")
if remote_version != NOTEBOOK_VERSION:
    raise RuntimeError(
        f"Stale notebook copy detected. This notebook is {NOTEBOOK_VERSION}, but main has {remote_version}. "
        "Reopen the notebook from the repository Colab link."
    )

installed_versions = {name: version(name) for name in EXPECTED_PACKAGE_VERSIONS}
mismatches = {
    name: (installed_versions[name], expected)
    for name, expected in EXPECTED_PACKAGE_VERSIONS.items()
    if installed_versions[name] != expected
}
if mismatches:
    mismatch_text = "\n".join(
        f" - {name}: installed {installed}, expected {expected}"
        for name, (installed, expected) in mismatches.items()
    )
    raise RuntimeError(
        "Notebook environment is stale. Re-run the install cell, then restart the Colab runtime. "
        "Version mismatches:\n" + mismatch_text
    )

print("Notebook freshness check passed:", NOTEBOOK_VERSION)
print("Pinned packages:", installed_versions)


In [None]:
from pathlib import Path

# Leave OUTPUT_DIR as None to use a fresh Colab-generated run by default.
# Or set it explicitly, for example:
# OUTPUT_DIR = Path("/content/output-dsfb-add/2026-03-01T12-00-00Z")
OUTPUT_DIR = None
RUN_RUST_SWEEP_IN_COLAB = True
REPO_URL = "https://github.com/infinityabundance/dsfb.git"
REPO_DIR = Path("/content/dsfb")
CARGO_BIN_DIR = Path("/root/.cargo/bin")
TCP_PERSISTENCE_THRESHOLD = 0.05
TCP_SMOOTHING_WINDOW = 5

OUTPUT_ROOT_CANDIDATES = [
    REPO_DIR / "output-dsfb-add",
    Path("/content/output-dsfb-add"),
    Path("/content/dsfb/output-dsfb-add"),
    Path("/content/drive/MyDrive/output-dsfb-add"),
    Path("output-dsfb-add"),
]

OUTPUT_DIR


In [None]:
import re
from collections import defaultdict

import pandas as pd
import numpy as np
import plotly
import plotly.graph_objects as go
import plotly.io as pio
from IPython.display import Image, display
from plotly.subplots import make_subplots
from ripser import ripser

if plotly.__version__ != EXPECTED_PACKAGE_VERSIONS["plotly"]:
    raise RuntimeError(
        f"Imported stale plotly module {plotly.__version__}; expected {EXPECTED_PACKAGE_VERSIONS['plotly']}. "
        "Restart the Colab runtime and run all cells again."
    )

pio.renderers.default = "notebook"
pio.templates.default = "none"


In [None]:
def require_file(path: Path) -> Path:
    if not path.exists():
        raise FileNotFoundError(path)
    return path

def latest_timestamped_dir(root: Path):
    if not root.exists() or not root.is_dir():
        return None
    candidates = sorted(path for path in root.iterdir() if path.is_dir())
    return candidates[-1] if candidates else None

def resolve_output_dir(explicit_dir, candidate_roots):
    if explicit_dir is not None:
        explicit_dir = Path(explicit_dir)
        if explicit_dir.exists() and explicit_dir.is_dir():
            return explicit_dir
        raise FileNotFoundError(
            f"Configured OUTPUT_DIR does not exist: {explicit_dir}. Upload or mount your run folder first."
        )

    for root in candidate_roots:
        candidate = latest_timestamped_dir(root)
        if candidate is not None:
            return candidate

    searched = "\n".join(f" - {root}" for root in candidate_roots)
    raise FileNotFoundError(
        "No output-dsfb-add run directory was found. Upload or mount the Rust-generated "
        "output folder, or set OUTPUT_DIR explicitly. Searched:\n" + searched
    )

def save_png(fig, filename: str, width: int = 1400, height: int = 900, scale: int = 2) -> Path:
    target = OUTPUT_DIR / filename
    fig.write_image(target, width=width, height=height, scale=scale)
    return target

def display_saved_png(path: Path):
    display(Image(filename=str(path)))
    return path

def hex_to_rgba(hex_color: str, alpha: float) -> str:
    hex_color = hex_color.lstrip("#")
    if len(hex_color) != 6:
        raise ValueError(f"Expected 6-digit hex color, got {hex_color}")
    r = int(hex_color[0:2], 16)
    g = int(hex_color[2:4], 16)
    b = int(hex_color[4:6], 16)
    return f"rgba({r}, {g}, {b}, {alpha})"

def make_line_figure(
    df,
    x_col: str,
    y_col: str,
    title: str,
    y_title: str,
    color: str = "#1f77b4",
    showlegend: bool = False,
    name: str | None = None,
):
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[y_col],
            mode="lines+markers",
            line={"width": 3, "color": color},
            marker={"size": 6, "color": color},
            name=name or y_col,
            showlegend=showlegend,
        )
    )
    fig.update_layout(
        title=title,
        paper_bgcolor="white",
        plot_bgcolor="white",
        font={"size": 16, "color": "#222222"},
        margin={"l": 80, "r": 40, "t": 90, "b": 70},
        showlegend=showlegend,
        legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0},
    )
    fig.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    fig.update_yaxes(title=y_title, showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    return fig

def make_band_figure(
    df,
    x_col: str,
    mean_col: str,
    std_col: str,
    title: str,
    y_title: str,
    color: str = "#e7298a",
    line_name: str | None = None,
):
    fig = go.Figure()
    upper = df[mean_col] + df[std_col]
    lower = df[mean_col] - df[std_col]
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=upper,
            mode="lines",
            line={"width": 0},
            hoverinfo="skip",
            showlegend=False,
        )
    )
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=lower,
            mode="lines",
            line={"width": 0},
            fill="tonexty",
            fillcolor=hex_to_rgba(color, 0.18),
            hoverinfo="skip",
            name="±1 std",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[mean_col],
            mode="lines+markers",
            line={"width": 3, "color": color},
            marker={"size": 6, "color": color},
            name=line_name or mean_col,
        )
    )
    fig.update_layout(
        title=title,
        paper_bgcolor="white",
        plot_bgcolor="white",
        font={"size": 16, "color": "#222222"},
        margin={"l": 80, "r": 40, "t": 90, "b": 70},
        showlegend=True,
        legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0},
    )
    fig.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    fig.update_yaxes(title=y_title, showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    return fig

def make_multiline_figure(df, x_col: str, series_specs, title: str, y_title: str):
    fig = go.Figure()
    for spec in series_specs:
        fig.add_trace(
            go.Scatter(
                x=df[x_col],
                y=df[spec["y_col"]],
                mode="lines",
                line={"width": 3, "color": spec["color"]},
                name=spec["name"],
            )
        )
    fig.update_layout(
        title=title,
        paper_bgcolor="white",
        plot_bgcolor="white",
        font={"size": 16, "color": "#222222"},
        margin={"l": 80, "r": 40, "t": 90, "b": 70},
        showlegend=True,
        legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0},
    )
    fig.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    fig.update_yaxes(title=y_title, showgrid=True, gridcolor="#d9d9d9", zeroline=False, range=[-0.02, 1.02])
    return fig

def minmax_norm(values):
    values = pd.Series(values, copy=False)
    span = values.max() - values.min()
    if span <= 1e-12:
        return pd.Series(np.zeros(len(values)), index=values.index)
    return (values - values.min()) / span

def linear_regression_summary(x_values, y_values):
    x_values = np.asarray(x_values, dtype=float)
    y_values = np.asarray(y_values, dtype=float)
    slope, intercept = np.polyfit(x_values, y_values, 1)
    y_fit = slope * x_values + intercept
    pearson_r = float(np.corrcoef(x_values, y_values)[0, 1]) if len(x_values) > 1 else 1.0
    ss_res = float(np.sum((y_values - y_fit) ** 2))
    ss_tot = float(np.sum((y_values - np.mean(y_values)) ** 2))
    r_squared = 1.0 - ss_res / ss_tot if ss_tot > 1e-12 else 1.0
    return {
        "slope": float(slope),
        "intercept": float(intercept),
        "pearson_r": pearson_r,
        "r_squared": float(r_squared),
        "y_fit": y_fit,
    }


In [None]:
import os
import shutil
import subprocess

def unique_paths(paths):
    out = []
    seen = set()
    for path in paths:
        key = str(path)
        if key in seen:
            continue
        out.append(path)
        seen.add(key)
    return out

def cargo_env():
    env = os.environ.copy()
    env["PATH"] = f"{CARGO_BIN_DIR}:{env['PATH']}"
    return env

def run_cmd(args, cwd=None, env=None):
    args = [str(arg) for arg in args]
    print("+", " ".join(args))
    subprocess.run(args, cwd=str(cwd) if cwd else None, env=env, check=True)

def ensure_cargo_installed():
    if shutil.which("cargo", path=cargo_env()["PATH"]):
        return
    run_cmd([
        "bash",
        "-lc",
        "curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal",
    ])
    if not shutil.which("cargo", path=cargo_env()["PATH"]):
        raise RuntimeError("cargo is unavailable after rustup installation")

def ensure_repo_checkout():
    if (REPO_DIR / ".git").exists():
        return
    REPO_DIR.parent.mkdir(parents=True, exist_ok=True)
    if REPO_DIR.exists() and any(REPO_DIR.iterdir()):
        raise RuntimeError(
            f"Repo directory exists but is not a git checkout: {REPO_DIR}. Remove it or set OUTPUT_DIR explicitly."
        )
    run_cmd(["git", "clone", "--depth", "1", REPO_URL, REPO_DIR])

def generate_fresh_output_if_requested():
    if OUTPUT_DIR is not None or not RUN_RUST_SWEEP_IN_COLAB:
        return
    ensure_cargo_installed()
    ensure_repo_checkout()
    run_cmd(["cargo", "run", "-p", "dsfb-add", "--bin", "dsfb_add_sweep"], cwd=REPO_DIR, env=cargo_env())

generate_fresh_output_if_requested()
OUTPUT_ROOT_CANDIDATES = unique_paths(OUTPUT_ROOT_CANDIDATES)
OUTPUT_DIR = resolve_output_dir(OUTPUT_DIR, OUTPUT_ROOT_CANDIDATES)
OUTPUT_DIR


In [None]:
aet = pd.read_csv(require_file(OUTPUT_DIR / "aet_sweep.csv"))

fig_aet = make_line_figure(aet, "lambda", "echo_slope", "AET Echo Slope vs Lambda", "echo_slope")
aet_png = save_png(fig_aet, "fig_aet_echo_slope_vs_lambda.png")
display_saved_png(aet_png)


In [None]:
iwlt = pd.read_csv(require_file(OUTPUT_DIR / "iwlt_sweep.csv"))

fig_iwlt = make_line_figure(iwlt, "lambda", "entropy_density", "IWLT Entropy Density vs Lambda", "entropy_density", color="#d95f02")
iwlt_png = save_png(fig_iwlt, "fig_iwlt_entropy_density_vs_lambda.png")
display_saved_png(iwlt_png)


In [None]:
rlt = pd.read_csv(require_file(OUTPUT_DIR / "rlt_sweep.csv"))

fig_rlt_escape = make_line_figure(rlt, "lambda", "escape_rate", "RLT Escape Rate vs Lambda", "escape_rate", color="#7570b3")
rlt_escape_png = save_png(fig_rlt_escape, "fig_rlt_escape_rate_vs_lambda.png")

fig_rlt_expansion = make_line_figure(rlt, "lambda", "expansion_ratio", "RLT Expansion Ratio vs Lambda", "expansion_ratio", color="#1b9e77")
rlt_expansion_png = save_png(fig_rlt_expansion, "fig_rlt_expansion_ratio_vs_lambda.png")

transition_candidates = rlt.index[rlt["escape_rate"] >= 0.8].tolist()
if transition_candidates:
    transition_idx = int(transition_candidates[0])
    center_lambda = float(rlt.loc[transition_idx, "lambda"])
    zoom_half_width = max(0.08, 10.0 * float(np.diff(rlt["lambda"]).mean()))
else:
    center_lambda = float(rlt["lambda"].iloc[len(rlt) // 2])
    zoom_half_width = 0.15

zoom_min = max(float(rlt["lambda"].min()), center_lambda - zoom_half_width)
zoom_max = min(float(rlt["lambda"].max()), center_lambda + zoom_half_width)
rlt_zoom = rlt[(rlt["lambda"] >= zoom_min) & (rlt["lambda"] <= zoom_max)].copy()
if len(rlt_zoom) < 3:
    rlt_zoom = rlt.copy()

fig_rlt_zoom = go.Figure()
fig_rlt_zoom.add_trace(
    go.Scatter(
        x=rlt_zoom["lambda"],
        y=rlt_zoom["expansion_ratio"],
        mode="lines+markers",
        line={"width": 3, "color": "#1b9e77"},
        marker={"size": 6, "color": "#1b9e77"},
        name="Expansion ratio",
    )
)
fig_rlt_zoom.add_trace(
    go.Scatter(
        x=rlt_zoom["lambda"],
        y=rlt_zoom["escape_rate"],
        mode="lines+markers",
        line={"width": 3, "color": "#7570b3"},
        marker={"size": 6, "color": "#7570b3"},
        name="Escape rate",
    )
)
fig_rlt_zoom.update_layout(
    title="RLT Expansion Ratio vs Lambda (zoomed transition)",
    paper_bgcolor="white",
    plot_bgcolor="white",
    font={"size": 16, "color": "#222222"},
    margin={"l": 80, "r": 40, "t": 90, "b": 70},
    showlegend=True,
    legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0},
)
fig_rlt_zoom.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_rlt_zoom.update_yaxes(title="normalized transport", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
rlt_zoom_png = save_png(fig_rlt_zoom, "fig_rlt_expansion_ratio_vs_lambda_zoom.png")

display_saved_png(rlt_escape_png)
display_saved_png(rlt_expansion_png)
display_saved_png(rlt_zoom_png)


## RLT Example Trajectories

The Rust sweep exports representative bounded and expanding resonance walks in `rlt_examples/` so the transition in the RLT summary curves can be inspected directly.


In [None]:
rlt_examples_dir = require_file(OUTPUT_DIR / "rlt_examples")
bounded_path = sorted(rlt_examples_dir.glob("trajectory_bounded_lambda_*.csv"))[0]
expanding_path = sorted(rlt_examples_dir.glob("trajectory_expanding_lambda_*.csv"))[0]

rlt_bounded = pd.read_csv(bounded_path)
rlt_expanding = pd.read_csv(expanding_path)

bounded_lambda = float(rlt_bounded["lambda"].iloc[0])
expanding_lambda = float(rlt_expanding["lambda"].iloc[0])

fig_rlt_bounded = go.Figure()
fig_rlt_bounded.add_trace(
    go.Scatter(
        x=rlt_bounded["step"],
        y=rlt_bounded["vertex_id"],
        mode="lines+markers",
        line={"width": 2.5, "color": "#386cb0"},
        marker={"size": 5, "color": rlt_bounded["distance_from_start"], "colorscale": "Blues", "showscale": False},
        name="Bounded trajectory",
        text=rlt_bounded["distance_from_start"],
        hovertemplate="step=%{x}<br>vertex_id=%{y}<br>distance=%{text}<extra></extra>",
    )
)
fig_rlt_bounded.update_layout(
    title=f"RLT Trajectory in Bounded Regime (lambda = {bounded_lambda:.3f})",
    paper_bgcolor="white",
    plot_bgcolor="white",
    font={"size": 16, "color": "#222222"},
    margin={"l": 80, "r": 40, "t": 90, "b": 70},
    showlegend=False,
)
fig_rlt_bounded.update_xaxes(title="step", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_rlt_bounded.update_yaxes(title="vertex_id", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
rlt_bounded_png = save_png(fig_rlt_bounded, "fig_rlt_trajectory_bounded.png")

fig_rlt_expanding = go.Figure()
fig_rlt_expanding.add_trace(
    go.Scatter(
        x=rlt_expanding["step"],
        y=rlt_expanding["vertex_id"],
        mode="lines+markers",
        line={"width": 2.5, "color": "#ef3b2c"},
        marker={"size": 5, "color": rlt_expanding["distance_from_start"], "colorscale": "Reds", "showscale": False},
        name="Expanding trajectory",
        text=rlt_expanding["distance_from_start"],
        hovertemplate="step=%{x}<br>vertex_id=%{y}<br>distance=%{text}<extra></extra>",
    )
)
fig_rlt_expanding.update_layout(
    title=f"RLT Trajectory in Expanding Regime (lambda = {expanding_lambda:.3f})",
    paper_bgcolor="white",
    plot_bgcolor="white",
    font={"size": 16, "color": "#222222"},
    margin={"l": 80, "r": 40, "t": 90, "b": 70},
    showlegend=False,
)
fig_rlt_expanding.update_xaxes(title="step", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_rlt_expanding.update_yaxes(title="vertex_id", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
rlt_expanding_png = save_png(fig_rlt_expanding, "fig_rlt_trajectory_expanding.png")

display_saved_png(rlt_bounded_png)
display_saved_png(rlt_expanding_png)


## TCP persistent-homology summary

The Rust sweep exports multiple deterministic point-cloud runs per lambda in `tcp_points/`. The cell below computes H1 count and total-persistence statistics across those runs, treats total persistence as the primary thermodynamic TCP observable, and saves a `tcp_ph_summary.csv` for downstream comparisons.


In [None]:
tcp = pd.read_csv(require_file(OUTPUT_DIR / "tcp_sweep.csv"))
point_pattern = re.compile(r"lambda_(\d+)_run_(\d+)\.csv$")
point_groups = defaultdict(list)

for point_file in sorted((OUTPUT_DIR / "tcp_points").glob("lambda_*_run_*.csv")):
    match = point_pattern.search(point_file.name)
    if not match:
        continue
    lambda_idx = int(match.group(1))
    point_groups[lambda_idx].append(point_file)

ph_rows = []
for lambda_idx in sorted(point_groups):
    betti_counts = []
    total_persistences = []
    for point_file in sorted(point_groups[lambda_idx]):
        points = pd.read_csv(point_file)[["x", "y"]].to_numpy()
        diagrams = ripser(points, maxdim=1)["dgms"]
        h1 = diagrams[1] if len(diagrams) > 1 else np.empty((0, 2))
        if len(h1):
            lifetimes = h1[:, 1] - h1[:, 0]
            persistent = h1[np.isfinite(h1[:, 1]) & (lifetimes > TCP_PERSISTENCE_THRESHOLD)]
            persistent_lifetimes = persistent[:, 1] - persistent[:, 0] if len(persistent) else np.empty((0,))
        else:
            persistent = np.empty((0, 2))
            persistent_lifetimes = np.empty((0,))

        betti_counts.append(float(len(persistent)))
        total_persistences.append(float(persistent_lifetimes.sum()))

    ph_rows.append(
        {
            "lambda": float(tcp.loc[lambda_idx, "lambda"]),
            "betti1_mean": float(np.mean(betti_counts)),
            "betti1_std": float(np.std(betti_counts)),
            "total_persistence_mean": float(np.mean(total_persistences)),
            "total_persistence_std": float(np.std(total_persistences)),
            "num_runs": int(len(betti_counts)),
        }
    )

tcp_ph_summary_df = pd.DataFrame(ph_rows).sort_values("lambda").reset_index(drop=True)
tcp_ph_summary_df["betti1_mean_smooth"] = (
    tcp_ph_summary_df["betti1_mean"].rolling(TCP_SMOOTHING_WINDOW, center=True, min_periods=1).mean()
)
tcp_ph_summary_df["betti1_std_smooth"] = (
    tcp_ph_summary_df["betti1_std"].rolling(TCP_SMOOTHING_WINDOW, center=True, min_periods=1).mean()
)
tcp_ph_summary_path = OUTPUT_DIR / "tcp_ph_summary.csv"
tcp_ph_summary_df.to_csv(tcp_ph_summary_path, index=False)

fig_tcp_persistence = make_band_figure(
    tcp_ph_summary_df,
    "lambda",
    "total_persistence_mean",
    "total_persistence_std",
    "TCP Total Persistence vs Lambda",
    "total_persistence_mean",
    color="#66a61e",
    line_name="Total persistence mean",
)
tcp_persistence_png = save_png(fig_tcp_persistence, "fig_tcp_total_persistence_vs_lambda.png")

fig_tcp_betti = make_band_figure(
    tcp_ph_summary_df,
    "lambda",
    "betti1_mean_smooth",
    "betti1_std_smooth",
    "TCP Betti-1 Mean vs Lambda",
    "betti1_mean (smoothed)",
    color="#e7298a",
    line_name="Smoothed Betti-1 mean",
)
tcp_betti_png = save_png(fig_tcp_betti, "fig_tcp_betti1_mean_vs_lambda.png")

display_saved_png(tcp_persistence_png)
display_saved_png(tcp_betti_png)


## Cross-Layer Structural Summary

This overlay compares min-max normalized AET echo slope, IWLT entropy density, and RLT expansion ratio on the common lambda grid.


In [None]:
aet_perturbed = pd.read_csv(require_file(OUTPUT_DIR / "aet_sweep_perturbed.csv"))
iwlt_perturbed = pd.read_csv(require_file(OUTPUT_DIR / "iwlt_sweep_perturbed.csv"))
rlt_perturbed = pd.read_csv(require_file(OUTPUT_DIR / "rlt_sweep_perturbed.csv"))

cross_layer_df = (
    aet[["lambda", "echo_slope"]]
    .merge(iwlt[["lambda", "entropy_density"]], on="lambda", how="inner")
    .merge(rlt[["lambda", "expansion_ratio"]], on="lambda", how="inner")
    .sort_values("lambda")
    .reset_index(drop=True)
)

cross_layer_df["echo_slope_norm"] = minmax_norm(cross_layer_df["echo_slope"])
cross_layer_df["entropy_density_norm"] = minmax_norm(cross_layer_df["entropy_density"])
cross_layer_df["expansion_ratio_norm"] = minmax_norm(cross_layer_df["expansion_ratio"])

fig_cross_layer = make_multiline_figure(
    cross_layer_df,
    "lambda",
    [
        {"y_col": "echo_slope_norm", "name": "AET echo slope", "color": "#1f77b4"},
        {"y_col": "entropy_density_norm", "name": "IWLT entropy density", "color": "#d95f02"},
        {"y_col": "expansion_ratio_norm", "name": "RLT expansion ratio", "color": "#1b9e77"},
    ],
    "Cross-Layer Normalized Structural Measures vs Lambda",
    "normalized value",
)
cross_layer_png = save_png(fig_cross_layer, "fig_cross_layer_summary_vs_lambda.png")
display_saved_png(cross_layer_png)


## Deterministic Robustness Sweeps

These figures compare the baseline curves against small deterministic perturbations in the AET, IWLT, and RLT update laws.


In [None]:
aet_robustness_df = pd.DataFrame(
    {
        "lambda": aet["lambda"],
        "Baseline": aet["echo_slope"],
        "Perturbed": aet_perturbed["echo_slope"],
    }
)
fig_aet_robustness = make_multiline_figure(
    aet_robustness_df,
    "lambda",
    [
        {"y_col": "Baseline", "name": "Baseline", "color": "#1f77b4"},
        {"y_col": "Perturbed", "name": "Perturbed", "color": "#6baed6"},
    ],
    "AET Robustness Under Deterministic Perturbations",
    "echo_slope",
)
fig_aet_robustness.update_yaxes(range=None)
aet_robustness_png = save_png(fig_aet_robustness, "fig_aet_robustness.png")

iwlt_robustness_df = pd.DataFrame(
    {
        "lambda": iwlt["lambda"],
        "Baseline": iwlt["entropy_density"],
        "Perturbed": iwlt_perturbed["entropy_density"],
    }
)
fig_iwlt_robustness = make_multiline_figure(
    iwlt_robustness_df,
    "lambda",
    [
        {"y_col": "Baseline", "name": "Baseline", "color": "#d95f02"},
        {"y_col": "Perturbed", "name": "Perturbed", "color": "#fdae6b"},
    ],
    "IWLT Robustness Under Deterministic Perturbations",
    "entropy_density",
)
fig_iwlt_robustness.update_yaxes(range=None)
iwlt_robustness_png = save_png(fig_iwlt_robustness, "fig_iwlt_robustness.png")

fig_rlt_robustness = go.Figure()
fig_rlt_robustness.add_trace(
    go.Scatter(
        x=rlt["lambda"],
        y=rlt["expansion_ratio"],
        mode="lines",
        line={"width": 3, "color": "#1b9e77"},
        name="Expansion ratio (baseline)",
    )
)
fig_rlt_robustness.add_trace(
    go.Scatter(
        x=rlt_perturbed["lambda"],
        y=rlt_perturbed["expansion_ratio"],
        mode="lines",
        line={"width": 3, "color": "#74c476"},
        name="Expansion ratio (perturbed)",
    )
)
fig_rlt_robustness.add_trace(
    go.Scatter(
        x=rlt["lambda"],
        y=rlt["escape_rate"],
        mode="lines",
        line={"width": 2, "color": "#7570b3", "dash": "dash"},
        name="Escape rate (baseline)",
    )
)
fig_rlt_robustness.add_trace(
    go.Scatter(
        x=rlt_perturbed["lambda"],
        y=rlt_perturbed["escape_rate"],
        mode="lines",
        line={"width": 2, "color": "#9e9ac8", "dash": "dash"},
        name="Escape rate (perturbed)",
    )
)
fig_rlt_robustness.update_layout(
    title="RLT Robustness Under Deterministic Perturbations",
    paper_bgcolor="white",
    plot_bgcolor="white",
    font={"size": 16, "color": "#222222"},
    margin={"l": 80, "r": 40, "t": 90, "b": 70},
    showlegend=True,
    legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0},
)
fig_rlt_robustness.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_rlt_robustness.update_yaxes(title="transport measure", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
rlt_robustness_png = save_png(fig_rlt_robustness, "fig_rlt_robustness.png")

display_saved_png(aet_robustness_png)
display_saved_png(iwlt_robustness_png)
display_saved_png(rlt_robustness_png)


## AET–IWLT Structural Law

The ADD paper’s numerical section uses the near-linear relation between AET echo slope and IWLT entropy density as a compact structural law across the lambda sweep.


In [None]:
law_baseline = linear_regression_summary(aet["echo_slope"], iwlt["entropy_density"])
law_perturbed = linear_regression_summary(aet_perturbed["echo_slope"], iwlt_perturbed["entropy_density"])

aet_iwlt_law_summary = pd.DataFrame(
    [
        {
            "variant": "baseline",
            "slope": law_baseline["slope"],
            "intercept": law_baseline["intercept"],
            "pearson_r": law_baseline["pearson_r"],
            "r_squared": law_baseline["r_squared"],
        },
        {
            "variant": "perturbed",
            "slope": law_perturbed["slope"],
            "intercept": law_perturbed["intercept"],
            "pearson_r": law_perturbed["pearson_r"],
            "r_squared": law_perturbed["r_squared"],
        },
    ]
)
aet_iwlt_law_summary_path = OUTPUT_DIR / "aet_iwlt_law_summary.csv"
aet_iwlt_law_summary.to_csv(aet_iwlt_law_summary_path, index=False)

x_baseline = np.linspace(float(aet["echo_slope"].min()), float(aet["echo_slope"].max()), 200)
y_baseline = law_baseline["slope"] * x_baseline + law_baseline["intercept"]
x_perturbed = np.linspace(float(aet_perturbed["echo_slope"].min()), float(aet_perturbed["echo_slope"].max()), 200)
y_perturbed = law_perturbed["slope"] * x_perturbed + law_perturbed["intercept"]

fig_structural_law = go.Figure()
fig_structural_law.add_trace(
    go.Scatter(
        x=aet["echo_slope"],
        y=iwlt["entropy_density"],
        mode="markers",
        marker={"size": 7, "color": "#1f77b4", "opacity": 0.7},
        name="Baseline samples",
    )
)
fig_structural_law.add_trace(
    go.Scatter(
        x=x_baseline,
        y=y_baseline,
        mode="lines",
        line={"width": 3, "color": "#1f77b4"},
        name=f"Baseline fit (R²={law_baseline['r_squared']:.4f})",
    )
)
fig_structural_law.add_trace(
    go.Scatter(
        x=aet_perturbed["echo_slope"],
        y=iwlt_perturbed["entropy_density"],
        mode="markers",
        marker={"size": 6, "color": "#d95f02", "opacity": 0.45},
        name="Perturbed samples",
    )
)
fig_structural_law.add_trace(
    go.Scatter(
        x=x_perturbed,
        y=y_perturbed,
        mode="lines",
        line={"width": 3, "color": "#d95f02", "dash": "dash"},
        name=f"Perturbed fit (R²={law_perturbed['r_squared']:.4f})",
    )
)
fig_structural_law.update_layout(
    title="AET–IWLT Structural Law",
    paper_bgcolor="white",
    plot_bgcolor="white",
    font={"size": 16, "color": "#222222"},
    margin={"l": 80, "r": 40, "t": 90, "b": 70},
    showlegend=True,
    legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0},
)
fig_structural_law.update_xaxes(title="echo_slope", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_structural_law.update_yaxes(title="entropy_density", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
structural_law_png = save_png(fig_structural_law, "fig_aet_iwlt_structural_law.png")
display_saved_png(structural_law_png)


In [None]:
hero_fig = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=False,
    vertical_spacing=0.14,
    subplot_titles=(
        "Normalized Structural Measures vs Lambda",
        "Entropy Density vs Echo Slope (AET–IWLT Law)",
    ),
)

hero_fig.add_trace(
    go.Scatter(
        x=cross_layer_df["lambda"],
        y=cross_layer_df["echo_slope_norm"],
        mode="lines",
        line={"width": 3, "color": "#1f77b4"},
        name="AET echo slope",
    ),
    row=1,
    col=1,
)
hero_fig.add_trace(
    go.Scatter(
        x=cross_layer_df["lambda"],
        y=cross_layer_df["entropy_density_norm"],
        mode="lines",
        line={"width": 3, "color": "#d95f02"},
        name="IWLT entropy density",
    ),
    row=1,
    col=1,
)
hero_fig.add_trace(
    go.Scatter(
        x=cross_layer_df["lambda"],
        y=cross_layer_df["expansion_ratio_norm"],
        mode="lines",
        line={"width": 3, "color": "#1b9e77"},
        name="RLT expansion ratio",
    ),
    row=1,
    col=1,
)

hero_fig.add_trace(
    go.Scatter(
        x=aet["echo_slope"],
        y=iwlt["entropy_density"],
        mode="markers",
        marker={"size": 7, "color": "#444444", "opacity": 0.65},
        name="Baseline samples",
    ),
    row=2,
    col=1,
)

x_hero = np.linspace(float(aet["echo_slope"].min()), float(aet["echo_slope"].max()), 200)
y_hero = law_baseline["slope"] * x_hero + law_baseline["intercept"]
hero_fig.add_trace(
    go.Scatter(
        x=x_hero,
        y=y_hero,
        mode="lines",
        line={"width": 3, "color": "#111111"},
        name=f"Regression line (R²={law_baseline['r_squared']:.4f})",
    ),
    row=2,
    col=1,
)

hero_fig.update_layout(
    title="ADD Structural Stack Hero Figure",
    paper_bgcolor="white",
    plot_bgcolor="white",
    font={"size": 16, "color": "#222222"},
    margin={"l": 80, "r": 50, "t": 110, "b": 70},
    height=1200,
    showlegend=True,
    legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0},
)
hero_fig.update_xaxes(title_text="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False, row=1, col=1)
hero_fig.update_yaxes(title_text="normalized value", showgrid=True, gridcolor="#d9d9d9", zeroline=False, range=[-0.02, 1.02], row=1, col=1)
hero_fig.update_xaxes(title_text="echo_slope", showgrid=True, gridcolor="#d9d9d9", zeroline=False, row=2, col=1)
hero_fig.update_yaxes(title_text="entropy_density", showgrid=True, gridcolor="#d9d9d9", zeroline=False, row=2, col=1)
hero_png = save_png(hero_fig, "fig_hero_add_stack.png", height=1200)
display_saved_png(hero_png)


In [None]:
pngs = sorted(path.name for path in OUTPUT_DIR.glob("*.png"))
summary_csvs = sorted(path.name for path in OUTPUT_DIR.glob("*summary*.csv"))
print("Saved PNGs:")
for name in pngs:
    print(" -", name)
print("Saved summary CSVs:")
for name in summary_csvs:
    print(" -", name)
