# dsfb-add Colab Sweep Notebook

This notebook loads Rust-generated CSVs from `/output-dsfb-add/<timestamp>/` and generates Plotly PNG figures for AET, IWLT, TCP, and RLT.

Recommended workflow:

1. Run `cargo run -p dsfb-add --bin dsfb_add_sweep` locally.
2. Upload or mount the repo so the matching `output-dsfb-add/<timestamp>/` folder is visible in Colab.
3. Leave `OUTPUT_DIR = None` to auto-detect the latest available run, or set it explicitly below.
4. The generated PNGs are written back into the same timestamped directory as the CSVs.


In [None]:
%pip install -q "plotly>=6.1.1" "kaleido>=1.0.0" ripser


In [None]:
from pathlib import Path

# Leave OUTPUT_DIR as None to auto-detect the newest timestamped run directory.
# Or set it explicitly, for example:
# OUTPUT_DIR = Path("/content/output-dsfb-add/2026-03-01T12-00-00Z")
OUTPUT_DIR = None

OUTPUT_ROOT_CANDIDATES = [
    Path("/content/output-dsfb-add"),
    Path("/content/dsfb/output-dsfb-add"),
    Path("/content/drive/MyDrive/output-dsfb-add"),
    Path("output-dsfb-add"),
]

OUTPUT_DIR


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from ripser import ripser

pio.renderers.default = "notebook"


In [None]:
def require_file(path: Path) -> Path:
    if not path.exists():
        raise FileNotFoundError(path)
    return path

def latest_timestamped_dir(root: Path):
    if not root.exists() or not root.is_dir():
        return None
    candidates = sorted(path for path in root.iterdir() if path.is_dir())
    return candidates[-1] if candidates else None

def resolve_output_dir(explicit_dir, candidate_roots):
    if explicit_dir is not None:
        explicit_dir = Path(explicit_dir)
        if explicit_dir.exists() and explicit_dir.is_dir():
            return explicit_dir
        raise FileNotFoundError(
            f"Configured OUTPUT_DIR does not exist: {explicit_dir}. Upload or mount your run folder first."
        )

    for root in candidate_roots:
        candidate = latest_timestamped_dir(root)
        if candidate is not None:
            return candidate

    searched = "\n".join(f" - {root}" for root in candidate_roots)
    raise FileNotFoundError(
        "No output-dsfb-add run directory was found. Upload or mount the Rust-generated "
        "output folder, or set OUTPUT_DIR explicitly. Searched:\n" + searched
    )

def save_png(fig, filename: str, width: int = 1400, height: int = 900, scale: int = 2) -> Path:
    target = OUTPUT_DIR / filename
    fig.write_image(target, width=width, height=height, scale=scale)
    return target

OUTPUT_DIR = resolve_output_dir(OUTPUT_DIR, OUTPUT_ROOT_CANDIDATES)
OUTPUT_DIR


In [None]:
aet = pd.read_csv(require_file(OUTPUT_DIR / "aet_sweep.csv"))

fig_aet = px.line(
    aet,
    x="lambda",
    y="echo_slope",
    title="AET Echo Slope vs Lambda",
    markers=True,
)
fig_aet.update_layout(template="plotly_white", xaxis_title="lambda", yaxis_title="echo_slope")
save_png(fig_aet, "fig_aet_echo_slope_vs_lambda.png")
fig_aet


In [None]:
iwlt = pd.read_csv(require_file(OUTPUT_DIR / "iwlt_sweep.csv"))

fig_iwlt = px.line(
    iwlt,
    x="lambda",
    y="entropy_density",
    title="IWLT Entropy Density vs Lambda",
    markers=True,
)
fig_iwlt.update_layout(template="plotly_white", xaxis_title="lambda", yaxis_title="entropy_density")
save_png(fig_iwlt, "fig_iwlt_entropy_density_vs_lambda.png")
fig_iwlt


In [None]:
rlt = pd.read_csv(require_file(OUTPUT_DIR / "rlt_sweep.csv"))

fig_rlt_escape = px.line(
    rlt,
    x="lambda",
    y="escape_rate",
    title="RLT Escape Rate vs Lambda",
    markers=True,
)
fig_rlt_escape.update_layout(template="plotly_white")
save_png(fig_rlt_escape, "fig_rlt_escape_rate_vs_lambda.png")

fig_rlt_expansion = px.line(
    rlt,
    x="lambda",
    y="expansion_ratio",
    title="RLT Expansion Ratio vs Lambda",
    markers=True,
)
fig_rlt_expansion.update_layout(template="plotly_white")
save_png(fig_rlt_expansion, "fig_rlt_expansion_ratio_vs_lambda.png")

fig_rlt_escape


## TCP persistent-homology figure

The Rust summary already contains coarse `betti0`/`betti1` proxies. The cell below optionally recomputes an H1 count with `ripser` from selected exported point clouds in `tcp_points/`.


In [None]:
tcp = pd.read_csv(require_file(OUTPUT_DIR / "tcp_sweep.csv"))
point_files = sorted((OUTPUT_DIR / "tcp_points").glob("points_lambda_*.csv"))

TCP_PH_MAX_FILES = 24
if len(point_files) > TCP_PH_MAX_FILES:
    stride = max(1, len(point_files) // TCP_PH_MAX_FILES)
    point_files = point_files[::stride]

ph_rows = []
for point_file in point_files:
    idx = int(point_file.stem.split("_")[-1])
    points = pd.read_csv(point_file)[["x", "y"]].to_numpy()
    diagrams = ripser(points, maxdim=1)["dgms"]
    h1 = diagrams[1] if len(diagrams) > 1 else np.empty((0, 2))
    persistent = h1[np.isfinite(h1[:, 1]) & ((h1[:, 1] - h1[:, 0]) > 0.05)] if len(h1) else np.empty((0, 2))
    ph_rows.append({"lambda": float(tcp.loc[idx, "lambda"]), "betti1_count": int(len(persistent))})

tcp_ph = pd.DataFrame(ph_rows).sort_values("lambda")
fig_tcp = px.line(
    tcp_ph,
    x="lambda",
    y="betti1_count",
    title="TCP Betti-1 Count vs Lambda (ripser subset)",
    markers=True,
)
fig_tcp.update_layout(template="plotly_white")
save_png(fig_tcp, "fig_tcp_betti1_vs_lambda.png")
fig_tcp


In [None]:
pngs = sorted(path.name for path in OUTPUT_DIR.glob("*.png"))
print("Saved PNGs:")
for name in pngs:
    print(" -", name)
