# dsfb-add Colab Sweep Notebook

`dsfb-add` is the empirical sweep crate for the Algebraic Deterministic Dynamics (ADD) stack inside the DSFB workspace. The Rust crate runs deterministic lambda sweeps for AET, IWLT, TCP, and RLT, writes reproducible CSV diagnostics into a timestamped output directory, and exports the raw structural data needed for paper figures.

This notebook is the figure-generation and post-processing layer. It reads the Rust sweep outputs, recomputes the structural-law regressions and persistent-homology summaries, and writes publication-ready PNG figures plus summary CSVs back into the same run directory. That includes: baseline and perturbed robustness comparisons, finite-size scaling of the AET-IWLT law, RLT phase-boundary extraction, residual and ratio diagnostics, and the annotated hero figure for the ADD paper.

The notebook is designed to fail closed if either the notebook copy or the Colab package environment is stale relative to the repository `main` branch. For Colab reliability it uses the self-contained `kaleido==0.2.1` export path instead of Chrome-managed Kaleido v1.

Recommended workflow:

1. Open the notebook from the repository Colab link on the `main` branch.
2. Use `Runtime -> Restart session and run all` when Colab prompts after the install cell.
3. Either point `OUTPUT_DIR` at an existing Rust-generated run directory, or leave it as `None` so Colab can clone the repo and generate a fresh run automatically.
4. The notebook writes all PNGs and summary CSVs into that same timestamped run directory, so each run folder is a self-contained empirical artifact for the numerical ADD paper section.


In [None]:
%pip install -q --upgrade "plotly==6.1.1" "kaleido==0.2.1" "ripser==0.6.12"


In [None]:
import json
import sys
from importlib.metadata import version
from urllib.request import urlopen

NOTEBOOK_VERSION = "2026-03-01-15"
NOTEBOOK_RAW_URL = "https://raw.githubusercontent.com/infinityabundance/dsfb/main/crates/dsfb-add/dsfb_add_sweep.ipynb"
EXPECTED_PACKAGE_VERSIONS = {
    "plotly": "6.1.1",
    "kaleido": "0.2.1",
}

for prefix in ("plotly", "kaleido", "ripser"):
    loaded = [name for name in list(sys.modules) if name == prefix or name.startswith(prefix + ".")]
    for name in loaded:
        del sys.modules[name]

remote_nb = json.load(urlopen(NOTEBOOK_RAW_URL))
remote_version = remote_nb.get("metadata", {}).get("dsfb_add_notebook_version")
if remote_version != NOTEBOOK_VERSION:
    raise RuntimeError(
        f"Stale notebook copy detected. This notebook is {NOTEBOOK_VERSION}, but main has {remote_version}. "
        "Reopen the notebook from the repository Colab link."
    )

installed_versions = {name: version(name) for name in EXPECTED_PACKAGE_VERSIONS}
mismatches = {
    name: (installed_versions[name], expected)
    for name, expected in EXPECTED_PACKAGE_VERSIONS.items()
    if installed_versions[name] != expected
}
if mismatches:
    mismatch_text = "\n".join(
        f" - {name}: installed {installed}, expected {expected}"
        for name, (installed, expected) in mismatches.items()
    )
    raise RuntimeError(
        "Notebook environment is stale. Re-run the install cell, then restart the Colab runtime. "
        "Version mismatches:\n" + mismatch_text
    )

print("Notebook freshness check passed:", NOTEBOOK_VERSION)
print("Pinned packages:", installed_versions)


In [None]:
from pathlib import Path

# Leave OUTPUT_DIR as None to use a fresh Colab-generated run by default.
# Or set it explicitly, for example:
# OUTPUT_DIR = Path("/content/output-dsfb-add/2026-03-01T12-00-00Z")
OUTPUT_DIR = None
RUN_RUST_SWEEP_IN_COLAB = True
REPO_URL = "https://github.com/infinityabundance/dsfb.git"
REPO_DIR = Path("/content/dsfb")
CARGO_BIN_DIR = Path("/root/.cargo/bin")
RUST_MULTI_STEPS = []  # Example: [5000, 10000, 20000] for finite-size scaling runs.
TCP_PERSISTENCE_THRESHOLD = 0.05
TCP_SMOOTHING_WINDOW = 5

OUTPUT_ROOT_CANDIDATES = [
    REPO_DIR / "output-dsfb-add",
    Path("/content/output-dsfb-add"),
    Path("/content/dsfb/output-dsfb-add"),
    Path("/content/drive/MyDrive/output-dsfb-add"),
    Path("output-dsfb-add"),
]

OUTPUT_DIR


In [None]:
import os
import re
import shutil
import subprocess
from collections import defaultdict

import numpy as np
import pandas as pd
import plotly
import plotly.graph_objects as go
import plotly.io as pio
from IPython.display import Image, display
from plotly.subplots import make_subplots
from ripser import ripser

if plotly.__version__ != EXPECTED_PACKAGE_VERSIONS["plotly"]:
    raise RuntimeError(
        f"Imported stale plotly module {plotly.__version__}; expected {EXPECTED_PACKAGE_VERSIONS['plotly']}. "
        "Restart the Colab runtime and run all cells again."
    )

pio.renderers.default = "notebook"
pio.templates.default = "none"


In [None]:
def require_file(path: Path) -> Path:
    if not path.exists():
        raise FileNotFoundError(path)
    return path


def latest_timestamped_dir(root: Path):
    if not root.exists() or not root.is_dir():
        return None
    candidates = sorted(path for path in root.iterdir() if path.is_dir())
    return candidates[-1] if candidates else None


def resolve_output_dir(explicit_dir, candidate_roots):
    if explicit_dir is not None:
        explicit_dir = Path(explicit_dir)
        if explicit_dir.exists() and explicit_dir.is_dir():
            return explicit_dir
        raise FileNotFoundError(
            f"Configured OUTPUT_DIR does not exist: {explicit_dir}. Upload or mount your run folder first."
        )

    for root in candidate_roots:
        candidate = latest_timestamped_dir(root)
        if candidate is not None:
            return candidate

    searched = "\n".join(f" - {root}" for root in candidate_roots)
    raise FileNotFoundError(
        "No output-dsfb-add run directory was found. Upload or mount the Rust-generated "
        "output folder, or set OUTPUT_DIR explicitly. Searched:\n" + searched
    )


def save_png(fig, filename: str, width: int = 1400, height: int = 900, scale: int = 2) -> Path:
    target = OUTPUT_DIR / filename
    fig.write_image(target, width=width, height=height, scale=scale)
    return target


def display_saved_png(path: Path):
    display(Image(filename=str(path)))
    return path


def hex_to_rgba(hex_color: str, alpha: float) -> str:
    hex_color = hex_color.lstrip("#")
    if len(hex_color) != 6:
        raise ValueError(f"Expected 6-digit hex color, got {hex_color}")
    r = int(hex_color[0:2], 16)
    g = int(hex_color[2:4], 16)
    b = int(hex_color[4:6], 16)
    return f"rgba({r}, {g}, {b}, {alpha})"


def unique_paths(paths):
    out = []
    seen = set()
    for path in paths:
        key = str(path)
        if key in seen:
            continue
        out.append(path)
        seen.add(key)
    return out


def cargo_env():
    env = os.environ.copy()
    env["PATH"] = f"{CARGO_BIN_DIR}:{env['PATH']}"
    return env


def run_cmd(args, cwd=None, env=None):
    args = [str(arg) for arg in args]
    print("+", " ".join(args))
    subprocess.run(args, cwd=str(cwd) if cwd else None, env=env, check=True)


def ensure_cargo_installed():
    if shutil.which("cargo", path=cargo_env()["PATH"]):
        return
    run_cmd([
        "bash",
        "-lc",
        "curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal",
    ])
    if not shutil.which("cargo", path=cargo_env()["PATH"]):
        raise RuntimeError("cargo is unavailable after rustup installation")


def ensure_repo_checkout():
    if (REPO_DIR / ".git").exists():
        return
    REPO_DIR.parent.mkdir(parents=True, exist_ok=True)
    if REPO_DIR.exists() and any(REPO_DIR.iterdir()):
        raise RuntimeError(
            f"Repo directory exists but is not a git checkout: {REPO_DIR}. Remove it or set OUTPUT_DIR explicitly."
        )
    run_cmd(["git", "clone", "--depth", "1", REPO_URL, REPO_DIR])


def generate_fresh_output_if_requested():
    if OUTPUT_DIR is not None or not RUN_RUST_SWEEP_IN_COLAB:
        return
    ensure_cargo_installed()
    ensure_repo_checkout()
    cmd = ["cargo", "run", "-p", "dsfb-add", "--bin", "dsfb_add_sweep"]
    if RUST_MULTI_STEPS:
        cmd.extend(["--", "--multi-steps", ",".join(str(value) for value in RUST_MULTI_STEPS)])
    run_cmd(cmd, cwd=REPO_DIR, env=cargo_env())


def normalize_bool_series(values):
    if getattr(values, "dtype", None) == bool:
        return values.astype(bool)
    mapped = values.astype(str).str.lower().map(
        {"true": True, "false": False, "1": True, "0": False}
    )
    return mapped.fillna(False).astype(bool)


def infer_steps_from_name(path: Path):
    match = re.search(r"_N(\d+)$", path.stem)
    return int(match.group(1)) if match else None


def load_sweep_family(prefix: str) -> pd.DataFrame:
    frames = []
    for path in sorted(OUTPUT_DIR.glob(f"{prefix}_sweep*.csv")):
        df = pd.read_csv(path)
        if "steps_per_run" not in df.columns:
            inferred_steps = infer_steps_from_name(path)
            if inferred_steps is None:
                raise ValueError(f"steps_per_run missing and cannot be inferred from {path.name}")
            df["steps_per_run"] = inferred_steps
        if "is_perturbed" not in df.columns:
            df["is_perturbed"] = "perturbed" in path.stem
        df["steps_per_run"] = df["steps_per_run"].astype(int)
        df["is_perturbed"] = normalize_bool_series(df["is_perturbed"])
        df["source_file"] = path.name
        frames.append(df)

    if not frames:
        raise FileNotFoundError(f"No sweep CSVs found for {prefix} in {OUTPUT_DIR}")

    return (
        pd.concat(frames, ignore_index=True)
        .sort_values(["steps_per_run", "is_perturbed", "lambda"])
        .reset_index(drop=True)
    )


def available_steps(df: pd.DataFrame, is_perturbed=None):
    subset = df if is_perturbed is None else df[df["is_perturbed"] == is_perturbed]
    return sorted(int(value) for value in subset["steps_per_run"].dropna().unique())


def select_run(df: pd.DataFrame, steps_per_run: int, is_perturbed: bool = False) -> pd.DataFrame:
    subset = df[(df["steps_per_run"] == int(steps_per_run)) & (df["is_perturbed"] == bool(is_perturbed))]
    if subset.empty:
        raise ValueError(
            f"No rows found for steps_per_run={steps_per_run}, is_perturbed={is_perturbed}"
        )
    return subset.sort_values("lambda").reset_index(drop=True)


def largest_common_steps(*dfs, is_perturbed=False):
    common = None
    for df in dfs:
        steps = set(available_steps(df, is_perturbed=is_perturbed))
        common = steps if common is None else (common & steps)
    if not common:
        raise ValueError(f"No common steps_per_run values for is_perturbed={is_perturbed}")
    return max(common)


def compute_phase_boundary_metrics(frame: pd.DataFrame) -> dict:
    ordered = frame.sort_values("lambda").reset_index(drop=True)

    def first_crossing(threshold: float):
        hits = ordered.loc[ordered["expansion_ratio"] >= threshold, "lambda"]
        return float(hits.iloc[0]) if not hits.empty else np.nan

    lambda_star = first_crossing(0.5)
    lambda_0_1 = first_crossing(0.1)
    lambda_0_9 = first_crossing(0.9)
    if np.isnan(lambda_0_1) or np.isnan(lambda_0_9):
        transition_width = np.nan
    else:
        transition_width = lambda_0_9 - lambda_0_1

    return {
        "lambda_star": lambda_star,
        "lambda_0_1": lambda_0_1,
        "lambda_0_9": lambda_0_9,
        "transition_width": transition_width,
    }


def load_phase_boundary_summary(rlt_df: pd.DataFrame) -> pd.DataFrame:
    path = OUTPUT_DIR / "rlt_phase_boundary.csv"
    if path.exists():
        phase_df = pd.read_csv(path)
        phase_df["steps_per_run"] = phase_df["steps_per_run"].astype(int)
        phase_df["is_perturbed"] = normalize_bool_series(phase_df["is_perturbed"])
        return phase_df.sort_values(["steps_per_run", "is_perturbed"]).reset_index(drop=True)

    rows = []
    for is_perturbed in (False, True):
        for steps_per_run in available_steps(rlt_df, is_perturbed=is_perturbed):
            frame = select_run(rlt_df, steps_per_run, is_perturbed=is_perturbed)
            metrics = compute_phase_boundary_metrics(frame)
            rows.append(
                {
                    "steps_per_run": steps_per_run,
                    "is_perturbed": is_perturbed,
                    **metrics,
                }
            )
    return pd.DataFrame(rows).sort_values(["steps_per_run", "is_perturbed"]).reset_index(drop=True)


def load_robustness_metrics():
    path = OUTPUT_DIR / "robustness_metrics.csv"
    if not path.exists():
        return None
    df = pd.read_csv(path)
    if "steps_per_run" in df.columns:
        df["steps_per_run"] = df["steps_per_run"].astype(int)
    return df


def load_tcp_points_dir(steps_per_run: int) -> Path:
    candidate = OUTPUT_DIR / f"tcp_points_N{int(steps_per_run)}"
    if candidate.exists():
        return candidate
    return require_file(OUTPUT_DIR / "tcp_points")


def load_rlt_examples_dir(steps_per_run: int) -> Path:
    candidate = OUTPUT_DIR / f"rlt_examples_N{int(steps_per_run)}"
    if candidate.exists():
        return candidate
    return require_file(OUTPUT_DIR / "rlt_examples")


def make_line_figure(df, x_col: str, y_col: str, title: str, y_title: str, color: str = "#1f77b4"):
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[y_col],
            mode="lines+markers",
            line={"width": 3, "color": color},
            marker={"size": 6, "color": color},
            name=y_col,
            showlegend=False,
        )
    )
    fig.update_layout(
        title=title,
        paper_bgcolor="white",
        plot_bgcolor="white",
        font={"size": 16, "color": "#222222"},
        margin={"l": 80, "r": 40, "t": 90, "b": 70},
    )
    fig.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    fig.update_yaxes(title=y_title, showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    return fig


def make_band_figure(df, x_col: str, mean_col: str, std_col: str, title: str, y_title: str, color: str, line_name: str):
    fig = go.Figure()
    upper = df[mean_col] + df[std_col]
    lower = df[mean_col] - df[std_col]
    fig.add_trace(go.Scatter(x=df[x_col], y=upper, mode="lines", line={"width": 0}, hoverinfo="skip", showlegend=False))
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=lower,
            mode="lines",
            line={"width": 0},
            fill="tonexty",
            fillcolor=hex_to_rgba(color, 0.18),
            hoverinfo="skip",
            name="±1 std",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[mean_col],
            mode="lines+markers",
            line={"width": 3, "color": color},
            marker={"size": 6, "color": color},
            name=line_name,
        )
    )
    fig.update_layout(
        title=title,
        paper_bgcolor="white",
        plot_bgcolor="white",
        font={"size": 16, "color": "#222222"},
        margin={"l": 80, "r": 40, "t": 90, "b": 70},
        showlegend=True,
        legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0},
    )
    fig.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    fig.update_yaxes(title=y_title, showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    return fig


def make_multiline_figure(df, x_col: str, series_specs, title: str, y_title: str, y_range=None):
    fig = go.Figure()
    for spec in series_specs:
        fig.add_trace(
            go.Scatter(
                x=df[x_col],
                y=df[spec["y_col"]],
                mode=spec.get("mode", "lines"),
                line={"width": 3, "color": spec["color"], "dash": spec.get("dash", "solid")},
                marker={"size": spec.get("marker_size", 6), "color": spec["color"]},
                name=spec["name"],
            )
        )
    fig.update_layout(
        title=title,
        paper_bgcolor="white",
        plot_bgcolor="white",
        font={"size": 16, "color": "#222222"},
        margin={"l": 80, "r": 40, "t": 90, "b": 70},
        showlegend=True,
        legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0},
    )
    fig.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    fig.update_yaxes(title=y_title, showgrid=True, gridcolor="#d9d9d9", zeroline=False)
    if y_range is not None:
        fig.update_yaxes(range=y_range)
    return fig


def minmax_norm(values):
    values = pd.Series(values, copy=False)
    span = values.max() - values.min()
    if span <= 1e-12:
        return pd.Series(np.zeros(len(values)), index=values.index)
    return (values - values.min()) / span


def linear_regression_summary(x_values, y_values):
    x_values = np.asarray(x_values, dtype=float)
    y_values = np.asarray(y_values, dtype=float)
    if len(x_values) != len(y_values):
        raise ValueError("x_values and y_values must have the same length")
    if len(x_values) == 0:
        raise ValueError("linear_regression_summary requires at least one sample")

    if len(x_values) == 1:
        slope = 0.0
        intercept = float(y_values[0])
        y_fit = np.full_like(y_values, intercept, dtype=float)
        r_squared = 1.0
        pearson_r = 1.0
        spearman_rho = 1.0
    else:
        slope, intercept = np.polyfit(x_values, y_values, 1)
        y_fit = slope * x_values + intercept
        pearson_r = float(np.corrcoef(x_values, y_values)[0, 1])
        spearman_rho = float(pd.Series(x_values).rank().corr(pd.Series(y_values).rank()))
        ss_res = float(np.sum((y_values - y_fit) ** 2))
        ss_tot = float(np.sum((y_values - np.mean(y_values)) ** 2))
        r_squared = 1.0 - ss_res / ss_tot if ss_tot > 1e-12 else 1.0

    residuals = y_values - y_fit
    mse_resid = float(np.mean(residuals ** 2))
    residual_variance = float(np.var(residuals))
    return {
        "slope": float(slope),
        "intercept": float(intercept),
        "pearson_r": pearson_r,
        "spearman_rho": spearman_rho,
        "r_squared": float(r_squared),
        "mse_resid": mse_resid,
        "residual_variance": residual_variance,
        "y_fit": y_fit,
        "residuals": residuals,
    }


def build_aet_iwlt_law_frame(aet_df: pd.DataFrame, iwlt_df: pd.DataFrame, steps_per_run: int, is_perturbed: bool):
    return (
        select_run(aet_df, steps_per_run, is_perturbed=is_perturbed)[["lambda", "echo_slope", "steps_per_run", "is_perturbed"]]
        .merge(
            select_run(iwlt_df, steps_per_run, is_perturbed=is_perturbed)[["lambda", "entropy_density"]],
            on="lambda",
            how="inner",
        )
        .sort_values("lambda")
        .reset_index(drop=True)
    )


In [None]:
generate_fresh_output_if_requested()
OUTPUT_ROOT_CANDIDATES = unique_paths(OUTPUT_ROOT_CANDIDATES)
OUTPUT_DIR = resolve_output_dir(OUTPUT_DIR, OUTPUT_ROOT_CANDIDATES)
OUTPUT_DIR


In [None]:
aet_all = load_sweep_family("aet")
iwlt_all = load_sweep_family("iwlt")
rlt_all = load_sweep_family("rlt")
tcp_all = load_sweep_family("tcp")
rlt_phase_summary = load_phase_boundary_summary(rlt_all)
robustness_metrics = load_robustness_metrics()

largest_baseline_steps = largest_common_steps(aet_all, iwlt_all, rlt_all, is_perturbed=False)
largest_perturbed_steps = largest_common_steps(aet_all, iwlt_all, rlt_all, is_perturbed=True)
largest_joint_steps = min(largest_baseline_steps, largest_perturbed_steps)

aet = select_run(aet_all, largest_baseline_steps, is_perturbed=False)
iwlt = select_run(iwlt_all, largest_baseline_steps, is_perturbed=False)
rlt = select_run(rlt_all, largest_baseline_steps, is_perturbed=False)
tcp = select_run(tcp_all, largest_baseline_steps, is_perturbed=False)

aet_perturbed = select_run(aet_all, largest_joint_steps, is_perturbed=True)
iwlt_perturbed = select_run(iwlt_all, largest_joint_steps, is_perturbed=True)
rlt_perturbed = select_run(rlt_all, largest_joint_steps, is_perturbed=True)

baseline_phase_row = rlt_phase_summary[(rlt_phase_summary["steps_per_run"] == largest_baseline_steps) & (~rlt_phase_summary["is_perturbed"])]
baseline_phase_row = baseline_phase_row.iloc[0] if not baseline_phase_row.empty else pd.Series(compute_phase_boundary_metrics(rlt))
lambda_star = float(baseline_phase_row["lambda_star"]) if pd.notna(baseline_phase_row["lambda_star"]) else None

print(f"Largest baseline N: {largest_baseline_steps}")
print(f"Largest joint baseline/perturbed N: {largest_joint_steps}")
if lambda_star is not None:
    print(f"Baseline RLT phase transition lambda*: {lambda_star:.6f}")


In [None]:
fig_aet = make_line_figure(
    aet,
    "lambda",
    "echo_slope",
    f"AET Echo Slope vs Lambda (N = {largest_baseline_steps})",
    "echo_slope",
)
aet_png = save_png(fig_aet, "fig_aet_echo_slope_vs_lambda.png")
display_saved_png(aet_png)


In [None]:
fig_iwlt = make_line_figure(
    iwlt,
    "lambda",
    "entropy_density",
    f"IWLT Entropy Density vs Lambda (N = {largest_baseline_steps})",
    "entropy_density",
    color="#d95f02",
)
iwlt_png = save_png(fig_iwlt, "fig_iwlt_entropy_density_vs_lambda.png")
display_saved_png(iwlt_png)


In [None]:
fig_rlt_escape = make_line_figure(
    rlt,
    "lambda",
    "escape_rate",
    f"RLT Escape Rate vs Lambda (N = {largest_baseline_steps})",
    "escape_rate",
    color="#7570b3",
)
rlt_escape_png = save_png(fig_rlt_escape, "fig_rlt_escape_rate_vs_lambda.png")

fig_rlt_expansion = make_line_figure(
    rlt,
    "lambda",
    "expansion_ratio",
    f"RLT Expansion Ratio vs Lambda (N = {largest_baseline_steps})",
    "expansion_ratio",
    color="#1b9e77",
)
if lambda_star is not None:
    fig_rlt_expansion.add_vline(x=lambda_star, line_dash="dash", line_color="#444444", opacity=0.7)
rlt_expansion_png = save_png(fig_rlt_expansion, "fig_rlt_expansion_ratio_vs_lambda.png")

transition_candidates = rlt.index[rlt["escape_rate"] >= 0.8].tolist()
if transition_candidates:
    transition_idx = int(transition_candidates[0])
    center_lambda = float(rlt.loc[transition_idx, "lambda"])
    zoom_half_width = max(0.08, 10.0 * float(np.diff(rlt["lambda"]).mean()))
else:
    center_lambda = float(rlt["lambda"].iloc[len(rlt) // 2])
    zoom_half_width = 0.15

zoom_min = max(float(rlt["lambda"].min()), center_lambda - zoom_half_width)
zoom_max = min(float(rlt["lambda"].max()), center_lambda + zoom_half_width)
rlt_zoom = rlt[(rlt["lambda"] >= zoom_min) & (rlt["lambda"] <= zoom_max)].copy()
if len(rlt_zoom) < 3:
    rlt_zoom = rlt.copy()

fig_rlt_zoom = go.Figure()
fig_rlt_zoom.add_trace(
    go.Scatter(
        x=rlt_zoom["lambda"],
        y=rlt_zoom["expansion_ratio"],
        mode="lines+markers",
        line={"width": 3, "color": "#1b9e77"},
        marker={"size": 6, "color": "#1b9e77"},
        name="Expansion ratio",
    )
)
fig_rlt_zoom.add_trace(
    go.Scatter(
        x=rlt_zoom["lambda"],
        y=rlt_zoom["escape_rate"],
        mode="lines+markers",
        line={"width": 3, "color": "#7570b3"},
        marker={"size": 6, "color": "#7570b3"},
        name="Escape rate",
    )
)
fig_rlt_zoom.update_layout(
    title="RLT Expansion Ratio vs Lambda (zoomed transition)",
    paper_bgcolor="white",
    plot_bgcolor="white",
    font={"size": 16, "color": "#222222"},
    margin={"l": 80, "r": 40, "t": 90, "b": 70},
    showlegend=True,
    legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0},
)
fig_rlt_zoom.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_rlt_zoom.update_yaxes(title="transport measure", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
rlt_zoom_png = save_png(fig_rlt_zoom, "fig_rlt_expansion_ratio_vs_lambda_zoom.png")

display_saved_png(rlt_escape_png)
display_saved_png(rlt_expansion_png)
display_saved_png(rlt_zoom_png)


## RLT Example Trajectories

The Rust sweep exports representative bounded and expanding resonance walks so the transport transition is visible at the raw trajectory level, not just in the summary curves.


In [None]:
rlt_examples_dir = load_rlt_examples_dir(largest_baseline_steps)
bounded_path = sorted(rlt_examples_dir.glob("trajectory_bounded_lambda_*.csv"))[0]
expanding_path = sorted(rlt_examples_dir.glob("trajectory_expanding_lambda_*.csv"))[0]

rlt_bounded = pd.read_csv(bounded_path)
rlt_expanding = pd.read_csv(expanding_path)

bounded_lambda = float(rlt_bounded["lambda"].iloc[0])
expanding_lambda = float(rlt_expanding["lambda"].iloc[0])

fig_rlt_bounded = go.Figure()
fig_rlt_bounded.add_trace(
    go.Scatter(
        x=rlt_bounded["step"],
        y=rlt_bounded["vertex_id"],
        mode="lines+markers",
        line={"width": 2.5, "color": "#386cb0"},
        marker={"size": 5, "color": rlt_bounded["distance_from_start"], "colorscale": "Blues", "showscale": False},
        text=rlt_bounded["distance_from_start"],
        hovertemplate="step=%{x}<br>vertex_id=%{y}<br>distance=%{text}<extra></extra>",
        name="Bounded trajectory",
    )
)
fig_rlt_bounded.update_layout(title=f"RLT Trajectory in Bounded Regime (lambda = {bounded_lambda:.3f}, N = {largest_baseline_steps})", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=False)
fig_rlt_bounded.update_xaxes(title="step", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_rlt_bounded.update_yaxes(title="vertex_id", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
rlt_bounded_png = save_png(fig_rlt_bounded, "fig_rlt_trajectory_bounded.png")

fig_rlt_expanding = go.Figure()
fig_rlt_expanding.add_trace(
    go.Scatter(
        x=rlt_expanding["step"],
        y=rlt_expanding["vertex_id"],
        mode="lines+markers",
        line={"width": 2.5, "color": "#ef3b2c"},
        marker={"size": 5, "color": rlt_expanding["distance_from_start"], "colorscale": "Reds", "showscale": False},
        text=rlt_expanding["distance_from_start"],
        hovertemplate="step=%{x}<br>vertex_id=%{y}<br>distance=%{text}<extra></extra>",
        name="Expanding trajectory",
    )
)
fig_rlt_expanding.update_layout(title=f"RLT Trajectory in Expanding Regime (lambda = {expanding_lambda:.3f}, N = {largest_baseline_steps})", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=False)
fig_rlt_expanding.update_xaxes(title="step", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_rlt_expanding.update_yaxes(title="vertex_id", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
rlt_expanding_png = save_png(fig_rlt_expanding, "fig_rlt_trajectory_expanding.png")

display_saved_png(rlt_bounded_png)
display_saved_png(rlt_expanding_png)


## TCP persistent-homology summary

The Rust sweep exports multiple deterministic point-cloud runs per lambda. This notebook computes Betti-1 count and total-persistence summaries across those runs, with total persistence treated as the primary thermodynamic TCP observable.


In [None]:
tcp_ph_rows = []
for steps_per_run in available_steps(tcp_all, is_perturbed=False):
    tcp_step_df = select_run(tcp_all, steps_per_run, is_perturbed=False)
    point_dir = load_tcp_points_dir(steps_per_run)
    point_pattern = re.compile(r"lambda_(\d+)_run_(\d+)\.csv$")
    point_groups = defaultdict(list)

    for point_file in sorted(point_dir.glob("lambda_*_run_*.csv")):
        match = point_pattern.search(point_file.name)
        if not match:
            continue
        lambda_idx = int(match.group(1))
        point_groups[lambda_idx].append(point_file)

    for lambda_idx in sorted(point_groups):
        betti_counts = []
        total_persistences = []
        for point_file in sorted(point_groups[lambda_idx]):
            points = pd.read_csv(point_file)[["x", "y"]].to_numpy()
            diagrams = ripser(points, maxdim=1)["dgms"]
            h1 = diagrams[1] if len(diagrams) > 1 else np.empty((0, 2))
            if len(h1):
                lifetimes = h1[:, 1] - h1[:, 0]
                persistent = h1[np.isfinite(h1[:, 1]) & (lifetimes > TCP_PERSISTENCE_THRESHOLD)]
                persistent_lifetimes = persistent[:, 1] - persistent[:, 0] if len(persistent) else np.empty((0,))
            else:
                persistent = np.empty((0, 2))
                persistent_lifetimes = np.empty((0,))

            betti_counts.append(float(len(persistent)))
            total_persistences.append(float(persistent_lifetimes.sum()))

        tcp_ph_rows.append({
            "steps_per_run": int(steps_per_run),
            "lambda": float(tcp_step_df.iloc[lambda_idx]["lambda"]),
            "betti1_mean": float(np.mean(betti_counts)),
            "betti1_std": float(np.std(betti_counts)),
            "total_persistence_mean": float(np.mean(total_persistences)),
            "total_persistence_std": float(np.std(total_persistences)),
            "num_runs": int(len(betti_counts)),
        })

tcp_ph_summary_df = pd.DataFrame(tcp_ph_rows).sort_values(["steps_per_run", "lambda"]).reset_index(drop=True)
tcp_ph_summary_df["betti1_mean_smooth"] = tcp_ph_summary_df.groupby("steps_per_run")["betti1_mean"].transform(lambda series: series.rolling(TCP_SMOOTHING_WINDOW, center=True, min_periods=1).mean())
tcp_ph_summary_df["betti1_std_smooth"] = tcp_ph_summary_df.groupby("steps_per_run")["betti1_std"].transform(lambda series: series.rolling(TCP_SMOOTHING_WINDOW, center=True, min_periods=1).mean())
tcp_ph_summary_path = OUTPUT_DIR / "tcp_ph_summary.csv"
tcp_ph_summary_df.to_csv(tcp_ph_summary_path, index=False)

tcp_ph_largest = tcp_ph_summary_df[tcp_ph_summary_df["steps_per_run"] == largest_baseline_steps].copy()
fig_tcp_persistence = make_band_figure(
    tcp_ph_largest,
    "lambda",
    "total_persistence_mean",
    "total_persistence_std",
    f"TCP Total Persistence vs Lambda (N = {largest_baseline_steps})",
    "total_persistence_mean",
    color="#66a61e",
    line_name="Total persistence mean",
)
tcp_persistence_png = save_png(fig_tcp_persistence, "fig_tcp_total_persistence_vs_lambda.png")

fig_tcp_betti = make_band_figure(
    tcp_ph_largest,
    "lambda",
    "betti1_mean_smooth",
    "betti1_std_smooth",
    f"TCP Betti-1 Mean vs Lambda (N = {largest_baseline_steps})",
    "betti1_mean (smoothed)",
    color="#e7298a",
    line_name="Smoothed Betti-1 mean",
)
tcp_betti_png = save_png(fig_tcp_betti, "fig_tcp_betti1_mean_vs_lambda.png")

display_saved_png(tcp_persistence_png)
display_saved_png(tcp_betti_png)


## Cross-Layer Structural Summary

This plot overlays the normalized AET, IWLT, and RLT observables on the common lambda grid at the largest available baseline trajectory length, and marks the RLT transport transition lambda*.


In [None]:
cross_layer_df = (
    aet[["lambda", "echo_slope"]]
    .merge(iwlt[["lambda", "entropy_density"]], on="lambda", how="inner")
    .merge(rlt[["lambda", "expansion_ratio"]], on="lambda", how="inner")
    .sort_values("lambda")
    .reset_index(drop=True)
)

cross_layer_df["echo_slope_norm"] = minmax_norm(cross_layer_df["echo_slope"])
cross_layer_df["entropy_density_norm"] = minmax_norm(cross_layer_df["entropy_density"])
cross_layer_df["expansion_ratio_norm"] = minmax_norm(cross_layer_df["expansion_ratio"])

fig_cross_layer = make_multiline_figure(
    cross_layer_df,
    "lambda",
    [
        {"y_col": "echo_slope_norm", "name": "AET echo slope", "color": "#1f77b4"},
        {"y_col": "entropy_density_norm", "name": "IWLT entropy density", "color": "#d95f02"},
        {"y_col": "expansion_ratio_norm", "name": "RLT expansion ratio", "color": "#1b9e77"},
    ],
    f"Cross-Layer Normalized Structural Measures vs Lambda (N = {largest_baseline_steps})",
    "normalized value",
    y_range=[-0.02, 1.02],
)
if lambda_star is not None:
    fig_cross_layer.add_vline(x=lambda_star, line_dash="dash", line_color="#444444", opacity=0.8)
    fig_cross_layer.add_annotation(x=lambda_star, y=0.98, xref="x", yref="y domain", text="λ*", showarrow=False, bgcolor="rgba(255,255,255,0.8)")
cross_layer_png = save_png(fig_cross_layer, "fig_cross_layer_summary_vs_lambda.png")
display_saved_png(cross_layer_png)


## Deterministic Robustness Sweeps

These curves compare baseline and perturbed deterministic rules. The line plots show the visual stability, while `robustness_metrics.csv` records compact norms and phase-shift metrics for the paper.


In [None]:
aet_baseline_joint = select_run(aet_all, largest_joint_steps, is_perturbed=False)
iwlt_baseline_joint = select_run(iwlt_all, largest_joint_steps, is_perturbed=False)
rlt_baseline_joint = select_run(rlt_all, largest_joint_steps, is_perturbed=False)

aet_joint = pd.DataFrame({
    "lambda": aet_baseline_joint["lambda"],
    "Baseline": aet_baseline_joint["echo_slope"],
    "Perturbed": aet_perturbed["echo_slope"],
})
fig_aet_robustness = make_multiline_figure(
    aet_joint,
    "lambda",
    [
        {"y_col": "Baseline", "name": "Baseline", "color": "#1f77b4"},
        {"y_col": "Perturbed", "name": "Perturbed", "color": "#6baed6"},
    ],
    f"AET Robustness Under Deterministic Perturbations (N = {largest_joint_steps})",
    "echo_slope",
)
fig_aet_robustness.update_yaxes(range=None)
aet_robustness_png = save_png(fig_aet_robustness, "fig_aet_robustness.png")

iwlt_joint = pd.DataFrame({
    "lambda": iwlt_baseline_joint["lambda"],
    "Baseline": iwlt_baseline_joint["entropy_density"],
    "Perturbed": iwlt_perturbed["entropy_density"],
})
fig_iwlt_robustness = make_multiline_figure(
    iwlt_joint,
    "lambda",
    [
        {"y_col": "Baseline", "name": "Baseline", "color": "#d95f02"},
        {"y_col": "Perturbed", "name": "Perturbed", "color": "#fdae6b"},
    ],
    f"IWLT Robustness Under Deterministic Perturbations (N = {largest_joint_steps})",
    "entropy_density",
)
fig_iwlt_robustness.update_yaxes(range=None)
iwlt_robustness_png = save_png(fig_iwlt_robustness, "fig_iwlt_robustness.png")

fig_rlt_robustness = go.Figure()
fig_rlt_robustness.add_trace(go.Scatter(x=rlt_baseline_joint["lambda"], y=rlt_baseline_joint["expansion_ratio"], mode="lines", line={"width": 3, "color": "#1b9e77"}, name="Expansion ratio (baseline)"))
fig_rlt_robustness.add_trace(go.Scatter(x=rlt_perturbed["lambda"], y=rlt_perturbed["expansion_ratio"], mode="lines", line={"width": 3, "color": "#74c476"}, name="Expansion ratio (perturbed)"))
fig_rlt_robustness.add_trace(go.Scatter(x=rlt_baseline_joint["lambda"], y=rlt_baseline_joint["escape_rate"], mode="lines", line={"width": 2, "color": "#7570b3", "dash": "dash"}, name="Escape rate (baseline)"))
fig_rlt_robustness.add_trace(go.Scatter(x=rlt_perturbed["lambda"], y=rlt_perturbed["escape_rate"], mode="lines", line={"width": 2, "color": "#9e9ac8", "dash": "dash"}, name="Escape rate (perturbed)"))
fig_rlt_robustness.update_layout(title=f"RLT Robustness Under Deterministic Perturbations (N = {largest_joint_steps})", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=True, legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0})
fig_rlt_robustness.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_rlt_robustness.update_yaxes(title="transport measure", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
rlt_robustness_png = save_png(fig_rlt_robustness, "fig_rlt_robustness.png")

display_saved_png(aet_robustness_png)
display_saved_png(iwlt_robustness_png)
display_saved_png(rlt_robustness_png)

if robustness_metrics is not None:
    display(robustness_metrics.sort_values(["steps_per_run", "subsystem", "metric_name"]).reset_index(drop=True))


## AET–IWLT Structural Law

The numerical ADD claim is that AET echo slope and IWLT entropy density lock into a stable near-linear law across lambda. The summaries below standardise that law across finite-size runs and perturbed rules.


In [None]:
law_rows = []
for is_perturbed in (False, True):
    common_steps = sorted(set(available_steps(aet_all, is_perturbed=is_perturbed)) & set(available_steps(iwlt_all, is_perturbed=is_perturbed)))
    for steps_per_run in common_steps:
        merged = build_aet_iwlt_law_frame(aet_all, iwlt_all, steps_per_run, is_perturbed)
        law = linear_regression_summary(merged["echo_slope"], merged["entropy_density"])
        ratio = (merged["entropy_density"] / merged["echo_slope"]).replace([np.inf, -np.inf], np.nan).dropna()
        law_rows.append({
            "steps_per_run": int(steps_per_run),
            "is_perturbed": bool(is_perturbed),
            "pearson_r": law["pearson_r"],
            "spearman_rho": law["spearman_rho"],
            "slope": law["slope"],
            "intercept": law["intercept"],
            "r2": law["r_squared"],
            "mse_resid": law["mse_resid"],
            "residual_variance": law["residual_variance"],
            "ratio_mean": float(ratio.mean()) if len(ratio) else np.nan,
            "ratio_std": float(ratio.std(ddof=0)) if len(ratio) else np.nan,
        })

aet_iwlt_law_summary = pd.DataFrame(law_rows).sort_values(["steps_per_run", "is_perturbed"]).reset_index(drop=True)
aet_iwlt_law_summary_path = OUTPUT_DIR / "aet_iwlt_law_summary.csv"
aet_iwlt_law_summary.to_csv(aet_iwlt_law_summary_path, index=False)

law_baseline_frame = build_aet_iwlt_law_frame(aet_all, iwlt_all, largest_joint_steps, False)
law_perturbed_frame = build_aet_iwlt_law_frame(aet_all, iwlt_all, largest_joint_steps, True)
law_baseline = linear_regression_summary(law_baseline_frame["echo_slope"], law_baseline_frame["entropy_density"])
law_perturbed = linear_regression_summary(law_perturbed_frame["echo_slope"], law_perturbed_frame["entropy_density"])

x_min = min(float(law_baseline_frame["echo_slope"].min()), float(law_perturbed_frame["echo_slope"].min()))
x_max = max(float(law_baseline_frame["echo_slope"].max()), float(law_perturbed_frame["echo_slope"].max()))
x_line = np.linspace(x_min, x_max, 400)
y_baseline = law_baseline["slope"] * x_line + law_baseline["intercept"]
y_perturbed = law_perturbed["slope"] * x_line + law_perturbed["intercept"]

fig_structural_law = go.Figure()
fig_structural_law.add_trace(go.Scatter(x=law_baseline_frame["echo_slope"], y=law_baseline_frame["entropy_density"], mode="markers", marker={"size": 7, "color": "#1f77b4", "opacity": 0.7}, name="Baseline samples"))
fig_structural_law.add_trace(go.Scatter(x=x_line, y=y_baseline, mode="lines", line={"width": 3, "color": "#1f77b4"}, name=f"Baseline fit (R²={law_baseline['r_squared']:.4f})"))
fig_structural_law.add_trace(go.Scatter(x=law_perturbed_frame["echo_slope"], y=law_perturbed_frame["entropy_density"], mode="markers", marker={"size": 6, "color": "#d95f02", "opacity": 0.45}, name="Perturbed samples"))
fig_structural_law.add_trace(go.Scatter(x=x_line, y=y_perturbed, mode="lines", line={"width": 3, "color": "#d95f02", "dash": "dash"}, name=f"Perturbed fit (R²={law_perturbed['r_squared']:.4f})"))
fig_structural_law.update_layout(title=f"AET–IWLT Structural Law (N = {largest_joint_steps})", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=True, legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0})
fig_structural_law.update_xaxes(title="echo_slope", showgrid=True, gridcolor="#d9d9d9", zeroline=False, range=[x_min, x_max])
fig_structural_law.update_yaxes(title="entropy_density", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
structural_law_png = save_png(fig_structural_law, "fig_aet_iwlt_structural_law.png")
display_saved_png(structural_law_png)


## Finite-size scaling of the AET–IWLT structural law

These plots track how the structural-law fit stabilises as the trajectory length N increases. The goal is to show convergence of the slope, R², and residual variance.


In [None]:
aet_iwlt_scaling_summary = aet_iwlt_law_summary[~aet_iwlt_law_summary["is_perturbed"]].copy()
aet_iwlt_scaling_summary = aet_iwlt_scaling_summary.sort_values("steps_per_run").reset_index(drop=True)
aet_iwlt_scaling_summary_path = OUTPUT_DIR / "aet_iwlt_scaling_summary.csv"
aet_iwlt_scaling_summary.to_csv(aet_iwlt_scaling_summary_path, index=False)

fig_scaling_slope = go.Figure()
fig_scaling_slope.add_trace(go.Scatter(x=aet_iwlt_scaling_summary["steps_per_run"], y=aet_iwlt_scaling_summary["slope"], mode="lines+markers", line={"width": 3, "color": "#1f77b4"}, marker={"size": 8, "color": "#1f77b4"}, name="Slope"))
fig_scaling_slope.update_layout(title="AET–IWLT Scaling: Regression Slope vs N", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=False)
fig_scaling_slope.update_xaxes(title="steps_per_run (N)", type="log" if len(aet_iwlt_scaling_summary) > 1 else "linear", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_scaling_slope.update_yaxes(title="slope", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_scaling_slope_png = save_png(fig_scaling_slope, "fig_aet_iwlt_scaling_slope_vs_N.png")

fig_scaling_r2 = go.Figure()
fig_scaling_r2.add_trace(go.Scatter(x=aet_iwlt_scaling_summary["steps_per_run"], y=aet_iwlt_scaling_summary["r2"], mode="lines+markers", line={"width": 3, "color": "#d95f02"}, marker={"size": 8, "color": "#d95f02"}, name="R²"))
fig_scaling_r2.update_layout(title="AET–IWLT Scaling: R² vs N", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=False)
fig_scaling_r2.update_xaxes(title="steps_per_run (N)", type="log" if len(aet_iwlt_scaling_summary) > 1 else "linear", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_scaling_r2.update_yaxes(title="R²", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_scaling_r2_png = save_png(fig_scaling_r2, "fig_aet_iwlt_scaling_r2_vs_N.png")

fig_scaling_resid = go.Figure()
fig_scaling_resid.add_trace(go.Scatter(x=aet_iwlt_scaling_summary["steps_per_run"], y=aet_iwlt_scaling_summary["residual_variance"], mode="lines+markers", line={"width": 3, "color": "#1b9e77"}, marker={"size": 8, "color": "#1b9e77"}, name="Residual variance"))
fig_scaling_resid.update_layout(title="AET–IWLT Scaling: Residual Variance vs N", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=False)
fig_scaling_resid.update_xaxes(title="steps_per_run (N)", type="log" if len(aet_iwlt_scaling_summary) > 1 else "linear", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_scaling_resid.update_yaxes(title="residual variance", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_scaling_resid_png = save_png(fig_scaling_resid, "fig_aet_iwlt_scaling_resid_vs_N.png")

display_saved_png(fig_scaling_slope_png)
display_saved_png(fig_scaling_r2_png)
display_saved_png(fig_scaling_resid_png)


## Structural law diagnostics (residuals, ratios, log–log)

These diagnostics use the largest baseline trajectory length to stress-test the law beyond a single regression number: residual structure, the dimensionless entropy-to-echo ratio, and a log–log consistency check.


In [None]:
diagnostics_frame = build_aet_iwlt_law_frame(aet_all, iwlt_all, largest_baseline_steps, False).copy()
diagnostics_law = linear_regression_summary(diagnostics_frame["echo_slope"], diagnostics_frame["entropy_density"])
diagnostics_frame["resid"] = diagnostics_law["residuals"]
diagnostics_frame["ratio"] = (diagnostics_frame["entropy_density"] / diagnostics_frame["echo_slope"]).replace([np.inf, -np.inf], np.nan)

fig_resid = go.Figure()
fig_resid.add_trace(go.Scatter(x=diagnostics_frame["echo_slope"], y=diagnostics_frame["resid"], mode="markers", marker={"size": 7, "color": diagnostics_frame["lambda"], "colorscale": "Viridis", "showscale": False}, name="Residuals"))
fig_resid.update_layout(title=f"AET–IWLT Residuals vs Echo Slope (N = {largest_baseline_steps})", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=False)
fig_resid.update_xaxes(title="echo_slope", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_resid.update_yaxes(title="residual", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_resid_png = save_png(fig_resid, "fig_aet_iwlt_residuals_vs_echo.png")

fig_resid_hist = go.Figure()
fig_resid_hist.add_trace(go.Histogram(x=diagnostics_frame["resid"], marker={"color": "#1f77b4"}, nbinsx=30, name="Residuals"))
fig_resid_hist.update_layout(title="AET–IWLT Residual Histogram", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=False)
fig_resid_hist.update_xaxes(title="residual", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_resid_hist.update_yaxes(title="count", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_resid_hist_png = save_png(fig_resid_hist, "fig_aet_iwlt_residual_hist.png")

fig_ratio = go.Figure()
fig_ratio.add_trace(go.Scatter(x=diagnostics_frame["lambda"], y=diagnostics_frame["ratio"], mode="lines+markers", line={"width": 3, "color": "#d95f02"}, marker={"size": 6, "color": "#d95f02"}, name="ratio"))
fig_ratio.update_layout(title=f"Entropy Density / Echo Slope vs Lambda (N = {largest_baseline_steps})", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=False)
fig_ratio.update_xaxes(title="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_ratio.update_yaxes(title="entropy_density / echo_slope", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_ratio_png = save_png(fig_ratio, "fig_aet_iwlt_ratio_vs_lambda.png")

fig_ratio_hist = go.Figure()
fig_ratio_hist.add_trace(go.Histogram(x=diagnostics_frame["ratio"].dropna(), marker={"color": "#d95f02"}, nbinsx=30, name="ratio"))
fig_ratio_hist.update_layout(title="Entropy Density / Echo Slope Histogram", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=False)
fig_ratio_hist.update_xaxes(title="entropy_density / echo_slope", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_ratio_hist.update_yaxes(title="count", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_ratio_hist_png = save_png(fig_ratio_hist, "fig_aet_iwlt_ratio_hist.png")

positive_mask = (diagnostics_frame["echo_slope"] > 0.0) & (diagnostics_frame["entropy_density"] > 0.0)
loglog_frame = diagnostics_frame.loc[positive_mask].copy()
loglog_frame["log_echo_slope"] = np.log(loglog_frame["echo_slope"])
loglog_frame["log_entropy_density"] = np.log(loglog_frame["entropy_density"])
loglog_law = linear_regression_summary(loglog_frame["log_echo_slope"], loglog_frame["log_entropy_density"])
x_log_line = np.linspace(float(loglog_frame["log_echo_slope"].min()), float(loglog_frame["log_echo_slope"].max()), 200)
y_log_line = loglog_law["slope"] * x_log_line + loglog_law["intercept"]

fig_loglog = go.Figure()
fig_loglog.add_trace(go.Scatter(x=loglog_frame["log_echo_slope"], y=loglog_frame["log_entropy_density"], mode="markers", marker={"size": 7, "color": "#1b9e77", "opacity": 0.7}, name="log samples"))
fig_loglog.add_trace(go.Scatter(x=x_log_line, y=y_log_line, mode="lines", line={"width": 3, "color": "#111111"}, name=f"log–log fit (slope={loglog_law['slope']:.3f}, R²={loglog_law['r_squared']:.3f})"))
fig_loglog.update_layout(title=f"AET–IWLT Log–Log Check (N = {largest_baseline_steps})", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=True, legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0})
fig_loglog.update_xaxes(title="log(echo_slope)", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_loglog.update_yaxes(title="log(entropy_density)", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_loglog_png = save_png(fig_loglog, "fig_aet_iwlt_loglog.png")

aet_iwlt_diagnostics_summary = pd.DataFrame([
    {
        "steps_per_run": int(largest_baseline_steps),
        "linear_slope": diagnostics_law["slope"],
        "linear_intercept": diagnostics_law["intercept"],
        "linear_r2": diagnostics_law["r_squared"],
        "loglog_slope": loglog_law["slope"],
        "loglog_intercept": loglog_law["intercept"],
        "loglog_r2": loglog_law["r_squared"],
        "ratio_mean": float(diagnostics_frame["ratio"].mean()),
        "ratio_std": float(diagnostics_frame["ratio"].std(ddof=0)),
    }
])
aet_iwlt_diagnostics_summary.to_csv(OUTPUT_DIR / "aet_iwlt_diagnostics_summary.csv", index=False)

display_saved_png(fig_resid_png)
display_saved_png(fig_resid_hist_png)
display_saved_png(fig_ratio_png)
display_saved_png(fig_ratio_hist_png)
display_saved_png(fig_loglog_png)


## RLT phase boundary scaling

The RLT transport transition is summarised by the first 0.5 crossing λ* and the 0.1–0.9 transition width. These plots show how those phase-boundary estimates move with trajectory length and under the perturbed deterministic rule.


In [None]:
phase_baseline = rlt_phase_summary[~rlt_phase_summary["is_perturbed"]].sort_values("steps_per_run")
phase_perturbed = rlt_phase_summary[rlt_phase_summary["is_perturbed"]].sort_values("steps_per_run")

fig_phase_lambda_star = go.Figure()
fig_phase_lambda_star.add_trace(go.Scatter(x=phase_baseline["steps_per_run"], y=phase_baseline["lambda_star"], mode="lines+markers", line={"width": 3, "color": "#1b9e77"}, marker={"size": 8, "color": "#1b9e77"}, name="Baseline"))
if not phase_perturbed.empty:
    fig_phase_lambda_star.add_trace(go.Scatter(x=phase_perturbed["steps_per_run"], y=phase_perturbed["lambda_star"], mode="lines+markers", line={"width": 3, "color": "#d95f02", "dash": "dash"}, marker={"size": 8, "color": "#d95f02"}, name="Perturbed"))
fig_phase_lambda_star.update_layout(title="RLT Phase Boundary: λ* vs N", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=True, legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0})
fig_phase_lambda_star.update_xaxes(title="steps_per_run (N)", type="log" if len(phase_baseline) > 1 else "linear", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_phase_lambda_star.update_yaxes(title="lambda_star", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_phase_lambda_star_png = save_png(fig_phase_lambda_star, "fig_rlt_phase_lambda_star_vs_N.png")

fig_phase_width = go.Figure()
fig_phase_width.add_trace(go.Scatter(x=phase_baseline["steps_per_run"], y=phase_baseline["transition_width"], mode="lines+markers", line={"width": 3, "color": "#7570b3"}, marker={"size": 8, "color": "#7570b3"}, name="Baseline"))
if not phase_perturbed.empty:
    fig_phase_width.add_trace(go.Scatter(x=phase_perturbed["steps_per_run"], y=phase_perturbed["transition_width"], mode="lines+markers", line={"width": 3, "color": "#e7298a", "dash": "dash"}, marker={"size": 8, "color": "#e7298a"}, name="Perturbed"))
fig_phase_width.update_layout(title="RLT Phase Boundary: Transition Width vs N", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=True, legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0})
fig_phase_width.update_xaxes(title="steps_per_run (N)", type="log" if len(phase_baseline) > 1 else "linear", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_phase_width.update_yaxes(title="transition_width", showgrid=True, gridcolor="#d9d9d9", zeroline=False)
fig_phase_width_png = save_png(fig_phase_width, "fig_rlt_phase_width_vs_N.png")

display_saved_png(fig_phase_lambda_star_png)
display_saved_png(fig_phase_width_png)


## Universality test: perturbed rule

This figure overlays the baseline and perturbed deterministic rules on the same entropy-vs-echo plane, using the same axis limits and explicit R² labels so the structural law’s stability is easy to audit.


In [None]:
fig_universality = go.Figure()
fig_universality.add_trace(go.Scatter(x=law_baseline_frame["echo_slope"], y=law_baseline_frame["entropy_density"], mode="markers", marker={"size": 7, "color": "#1f77b4", "opacity": 0.7}, name=f"Baseline samples (R²={law_baseline['r_squared']:.4f})"))
fig_universality.add_trace(go.Scatter(x=x_line, y=y_baseline, mode="lines", line={"width": 3, "color": "#1f77b4"}, name="Baseline regression"))
fig_universality.add_trace(go.Scatter(x=law_perturbed_frame["echo_slope"], y=law_perturbed_frame["entropy_density"], mode="markers", marker={"size": 7, "color": "#d95f02", "opacity": 0.5}, name=f"Perturbed samples (R²={law_perturbed['r_squared']:.4f})"))
fig_universality.add_trace(go.Scatter(x=x_line, y=y_perturbed, mode="lines", line={"width": 3, "color": "#d95f02", "dash": "dash"}, name="Perturbed regression"))
fig_universality.update_layout(title=f"AET–IWLT Universality Under Deterministic Perturbations (N = {largest_joint_steps})", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 40, "t": 90, "b": 70}, showlegend=True, legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0})
fig_universality.update_xaxes(title="echo_slope", showgrid=True, gridcolor="#d9d9d9", zeroline=False, range=[x_min, x_max])
y_min = min(float(law_baseline_frame["entropy_density"].min()), float(law_perturbed_frame["entropy_density"].min()))
y_max = max(float(law_baseline_frame["entropy_density"].max()), float(law_perturbed_frame["entropy_density"].max()))
fig_universality.update_yaxes(title="entropy_density", showgrid=True, gridcolor="#d9d9d9", zeroline=False, range=[y_min, y_max])
universality_png = save_png(fig_universality, "fig_aet_iwlt_universality.png")
display_saved_png(universality_png)


In [None]:
hero_fig = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=False,
    vertical_spacing=0.14,
    subplot_titles=(
        "Normalized Structural Measures vs Lambda",
        "Entropy Density vs Echo Slope (AET–IWLT Law)",
    ),
)

hero_fig.add_trace(go.Scatter(x=cross_layer_df["lambda"], y=cross_layer_df["echo_slope_norm"], mode="lines", line={"width": 3, "color": "#1f77b4"}, name="AET echo slope"), row=1, col=1)
hero_fig.add_trace(go.Scatter(x=cross_layer_df["lambda"], y=cross_layer_df["entropy_density_norm"], mode="lines", line={"width": 3, "color": "#d95f02"}, name="IWLT entropy density"), row=1, col=1)
hero_fig.add_trace(go.Scatter(x=cross_layer_df["lambda"], y=cross_layer_df["expansion_ratio_norm"], mode="lines", line={"width": 3, "color": "#1b9e77"}, name="RLT expansion ratio"), row=1, col=1)
if lambda_star is not None:
    hero_fig.add_vline(x=lambda_star, line_dash="dash", line_color="#444444", opacity=0.8, row=1, col=1)
    hero_fig.add_annotation(x=lambda_star, y=0.96, xref="x", yref="y domain", text="λ* (transport transition)", showarrow=False, bgcolor="rgba(255,255,255,0.85)")

hero_fig.add_trace(go.Scatter(x=law_baseline_frame["echo_slope"], y=law_baseline_frame["entropy_density"], mode="markers", marker={"size": 7, "color": "#444444", "opacity": 0.65}, name="Baseline samples"), row=2, col=1)
hero_fig.add_trace(go.Scatter(x=x_line, y=y_baseline, mode="lines", line={"width": 3, "color": "#111111"}, name="Regression line"), row=2, col=1)
hero_fig.add_annotation(x=0.05, y=0.95, xref="x2 domain", yref="y2 domain", text=f"R² = {law_baseline['r_squared']:.3f}", showarrow=False, bgcolor="rgba(255,255,255,0.85)", align="left")

hero_fig.update_layout(title=f"ADD Structural Stack Hero Figure (N = {largest_baseline_steps})", paper_bgcolor="white", plot_bgcolor="white", font={"size": 16, "color": "#222222"}, margin={"l": 80, "r": 50, "t": 110, "b": 70}, height=1200, showlegend=True, legend={"orientation": "h", "yanchor": "bottom", "y": 1.02, "xanchor": "right", "x": 1.0})
hero_fig.update_xaxes(title_text="lambda", showgrid=True, gridcolor="#d9d9d9", zeroline=False, row=1, col=1)
hero_fig.update_yaxes(title_text="normalized value", showgrid=True, gridcolor="#d9d9d9", zeroline=False, range=[-0.02, 1.02], row=1, col=1)
hero_fig.update_xaxes(title_text="echo_slope", showgrid=True, gridcolor="#d9d9d9", zeroline=False, row=2, col=1)
hero_fig.update_yaxes(title_text="entropy_density", showgrid=True, gridcolor="#d9d9d9", zeroline=False, row=2, col=1)
hero_png = save_png(hero_fig, "fig_hero_add_stack.png", height=1200)
display_saved_png(hero_png)


In [None]:
pngs = sorted(path.name for path in OUTPUT_DIR.glob("*.png"))
csvs = sorted(path.name for path in OUTPUT_DIR.glob("*.csv"))
print("Saved PNGs:")
for name in pngs:
    print(" -", name)
print("Saved CSVs:")
for name in csvs:
    print(" -", name)
