In [None]:
#@title Generate figures (raw & clean), per-phase analysis, ECDF, and downloads
from datetime import datetime

out_dir = "colab_outputs"
os.makedirs(out_dir, exist_ok=True)

def savefig(path):
    plt.tight_layout()
    plt.savefig(path, dpi=200)
    plt.close()
    print("  →", path)

# ---------- 1) Distribution & time series ----------
plt.figure()
ok[metric_col].plot(kind="hist", bins=40, edgecolor="black")
plt.xlabel("Rollout total (s)")
plt.ylabel("Frequency")
plt.title(f"Histogram (raw) — {metric_col} (n={len(ok)})")
savefig(os.path.join(out_dir, "hist_raw.png"))

plt.figure()
plt.plot(ok["iteration_global"], ok[metric_col])
plt.xlabel("Successful iteration (combined runs)")
plt.ylabel("Rollout total (s)")
plt.title(f"Time series (raw) — {metric_col} (n={len(ok)})")
savefig(os.path.join(out_dir, "series_raw.png"))

plt.figure()
ok_clean[metric_col].plot(kind="hist", bins=30, edgecolor="black")
plt.xlabel("Rollout total (s)")
plt.ylabel("Frequency")
plt.title(f"Histogram (clean) — {metric_col} (n={len(ok_clean)})")
savefig(os.path.join(out_dir, "hist_clean.png"))

# ECDF (clean)
plt.figure()
vals = np.sort(ok_clean[metric_col].values)
y = np.arange(1, len(vals)+1)/len(vals)
plt.plot(vals, y)
plt.xlabel("Rollout total (s)")
plt.ylabel("ECDF")
plt.title(f"ECDF (clean) — {metric_col} (n={len(ok_clean)})")
savefig(os.path.join(out_dir, "ecdf_clean.png"))

# ---------- 2) Phase variability & decomposition ----------
have_phases = [c for c in phase_cols if c in ok.columns]
if have_phases:
    # Boxplot of phases (raw)
    plt.figure(figsize=(6.5,4.5))
    ok[have_phases + [metric_col]].boxplot(rot=45)
    plt.ylabel("Seconds")
    plt.title("Phase duration variability (raw)")
    savefig(os.path.join(out_dir, "box_phases_raw.png"))

    # Mean stacked bar (clean if possible, otherwise raw)
    use_df = ok_clean if len(ok_clean) == len(ok) or len(ok_clean) > 0 else ok
    means = use_df[have_phases].mean()
    plt.figure(figsize=(6,3.6))
    bottom=0
    for i, ph in enumerate(have_phases):
        plt.barh(["Total"], [means[ph]], left=bottom, label=ph)
        bottom += means[ph]
    plt.xlabel("Mean duration (s)")
    plt.title("Average rollout phase decomposition")
    plt.legend()
    savefig(os.path.join(out_dir, "stack_mean_phases.png"))

    # Correlation: deployment_total_s vs rollout_total_s (if present)
    if "deployment_total_s" in ok.columns:
        plt.figure()
        plt.scatter(ok["deployment_total_s"], ok[metric_col], alpha=0.6)
        plt.xlabel("Deployment total (s)")
        plt.ylabel("Rollout total (s)")
        plt.title("Deployment vs Rollout (raw)")
        plt.grid(True, alpha=0.3)
        savefig(os.path.join(out_dir, "scatter_deploy_vs_rollout.png"))

# ---------- 3) Summary tables ----------
def summarize_frame(frame, label):
    s = pd.DataFrame({
        "metric": [metric_col],
        "n": [frame.shape[0]],
        "mean_s": [frame[metric_col].mean()],
        "std_s": [frame[metric_col].std(ddof=1)],
        "ci95_lo_s": [frame[metric_col].mean() - 1.96*frame[metric_col].std(ddof=1)/np.sqrt(max(1,frame.shape[0])) if frame.shape[0]>1 else np.nan],
        "ci95_hi_s": [frame[metric_col].mean() + 1.96*frame[metric_col].std(ddof=1)/np.sqrt(max(1,frame.shape[0])) if frame.shape[0]>1 else np.nan],
        "label": [label],
    })
    return s

sum_raw   = summarize_frame(ok, "raw")
sum_clean = summarize_frame(ok_clean, "clean" if remove_outliers else "clean=raw")
summary = pd.concat([sum_raw, sum_clean], ignore_index=True)

summary_csv = os.path.join(out_dir, "summary_rollout.csv")
summary.to_csv(summary_csv, index=False)

# LaTeX table (booktabs)
tex_path = os.path.join(out_dir, "summary_rollout.tex")
with open(tex_path, "w", encoding="utf-8") as f:
    f.write("\\begin{tabular}{lrrrrr}\n\\toprule\n")
    f.write("set & n & mean (s) & std (s) & 95\\% CI lo & 95\\% CI hi \\\\\n\\midrule\n")
    for _, r in summary.iterrows():
        f.write(f"{r['label']} & {int(r['n'])} & {r['mean_s']:.2f} & {r['std_s']:.2f} & {r['ci95_lo_s']:.2f} & {r['ci95_hi_s']:.2f} \\\\\n")
    f.write("\\bottomrule\n\\end{tabular}\n")

# ---------- 4) Show inline & prepare downloads ----------
print("\n=== Figures generated ===")
for fig in ["hist_raw.png","series_raw.png","hist_clean.png","ecdf_clean.png","box_phases_raw.png","stack_mean_phases.png","scatter_deploy_vs_rollout.png"]:
    p = os.path.join(out_dir, fig)
    if os.path.exists(p):
        display(Image(filename=p))

# Bundle for download
zip_name = f"rollout_outputs_{datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')}.zip"
zip_path = os.path.join(out_dir, zip_name)
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
    for root, _, files_in in os.walk(out_dir):
        for fn in files_in:
            zf.write(os.path.join(root, fn), arcname=fn)

print("\nFiles written to:", out_dir)
files.download(zip_path)
