In [None]:
from pathlib import Path
import csv
import statistics
from typing import List, Dict, Tuple
import re

import matplotlib.pyplot as plt

DATA_DIR = '../../downloaded_files/run-2025-10-22-074700'

def read_csv_folder(folder: Path) -> List[Dict]:
    rows = []
    for p in sorted(folder.glob("out.csv")):
        with p.open("r", newline="") as f:
            reader = csv.DictReader(f)
            for row in reader:
                row["ok"] = row["ok"] in ("1", "True", "true")
                # numeric conversions (best-effort)
                for k in ("e2e_ms", "server_latency_ms", "timeout_ms", "fail"):
                    if row.get(k) not in (None, ""):
                        try:
                            row[k] = float(row[k])
                        except Exception:
                            pass
                rows.append(row)
    return rows

def summarize(results: List[Dict]) -> Dict[str, float]:
    n = len(results)
    ok = sum(1 for r in results if r.get("ok"))
    e2e_vals = [r["e2e_ms"] for r in results if isinstance(r.get("e2e_ms"), (int, float))]
    mean_e2e = statistics.mean(e2e_vals) if e2e_vals else float("nan")
    p95 = (sorted(e2e_vals)[max(0, int(0.95 * len(e2e_vals)) - 1)] if e2e_vals else float("nan"))
    return {"n": n, "ok": ok, "success_rate": (ok / n) if n else float("nan"), "mean_e2e_ms": mean_e2e, "p95_e2e_ms": p95}

# Aggregate per parameter-set
variants = sorted([d for d in Path(DATA_DIR).iterdir() if d.is_dir()])
aggregate = []
for vs in variants:
    single_runs = sorted([d for d in vs.iterdir() if d.is_dir()])
    per_run_summaries = []
    for sr in single_runs:
        rows = read_csv_folder(sr)
        s = summarize(rows)
        s["single_run_path"] = str(sr)
        per_run_summaries.append(s)
    # average across repeats
    if per_run_summaries:
        mean_success = statistics.mean([s["success_rate"] for s in per_run_summaries if s["success_rate"] == s["success_rate"]])
        mean_p95 = statistics.mean([s["p95_e2e_ms"] for s in per_run_summaries if s["p95_e2e_ms"] == s["p95_e2e_ms"]])
        mean_mean = statistics.mean([s["mean_e2e_ms"] for s in per_run_summaries if s["mean_e2e_ms"] == s["mean_e2e_ms"]])
        aggregate.append({
            "parameter_set": vs.name,
            "mean_success_rate": mean_success,
            "mean_p95_e2e_ms": mean_p95,
            "mean_mean_e2e_ms": mean_mean,
            "num_runs": len(per_run_summaries)
        })

# aggregate

NameError: name 'ps' is not defined

In [None]:
# Bar chart: mean success rate per parameter set
labels = [row["parameter_set"] for row in aggregate]
vals = [row["mean_success_rate"] for row in aggregate]

plt.figure()
plt.bar(labels, vals)
plt.ylabel("Mean success rate")
plt.title("Sweep: Success Rate by Parameter Set")
plt.ylim(0, 1)
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

In [None]:
# Bar chart: mean p95 e2e latency per parameter set
labels = [row["parameter_set"] for row in aggregate]
vals = [row["mean_p95_e2e_ms"] for row in aggregate]

plt.figure()
plt.bar(labels, vals)
plt.ylabel("Mean p95 E2E latency (ms)")
plt.title("Sweep: p95 E2E Latency by Parameter Set")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()