In [None]:
import json
import re
import pandas as pd

JSON_PATH = "rr_results_full_json.json"  # <-- change to your file name/path

def load_json_lenient(path: str):
    """
    Loads a JSON list of objects. If the file has the common '}{' missing comma issue,
    it tries a minimal repair so you can proceed in a notebook.
    """
    txt = open(path, "r", encoding="utf-8").read().strip()

    try:
        return json.loads(txt)
    except json.JSONDecodeError:
        # Minimal repair: insert commas between }{
        repaired = re.sub(r"}\s*{", "},\n{", txt)
        # Also ensure list commas between objects: ] not handled; assume list already []
        return json.loads(repaired)

records = load_json_lenient(JSON_PATH)
df = pd.DataFrame(records)

# --- basic derived fields ---
df["slots_computed"] = df["cores"] * df["machines"]

# normalize to wall-seconds (so speedup differences don’t break comparisons)
df["mean_response_wall_s"] = (df["mean_response_ms"] / df["speedup"]) / 1000.0
df["p95_response_wall_s"]  = (df["p95_response_ms"]  / df["speedup"]) / 1000.0
df["mean_wait_wall_s"]     = (df["mean_wait_ms"]     / df["speedup"]) / 1000.0

# --- sanity checks ---
bad_slots = df[df["slots_computed"] != df["total_slots_at_end"]][
    ["run_id","dataset","cores","machines","slots_computed","total_slots_at_end"]
]

print("Rows where cores*machines != total_slots_at_end:", len(bad_slots))
display(bad_slots.head(20))

df = df.sort_values(["dataset", "total_slots_at_end", "quantum_ms"]).reset_index(drop=True)
display(df.head(10))


In [None]:
import matplotlib.pyplot as plt

def plot_vs_quantum(df_sub: pd.DataFrame, y: str, title: str, ylog: bool = True):
    plt.figure()
    for slots, g in df_sub.groupby("slots_computed"):
        g = g.sort_values("quantum_ms")
        plt.plot(g["quantum_ms"], g[y], marker="o", label=f"{slots} slots")

    plt.xscale("log")  # quantum = 1,10,20,... works well on log scale
    if ylog:
        plt.yscale("log")

    plt.xlabel("Quantum (ms, log scale)")
    plt.ylabel(y)
    plt.title(title)
    plt.legend()
    plt.show()


In [None]:
for dataset, g in df.groupby("dataset"):
    plot_vs_quantum(
        g,
        y="mean_response_ms",
        title=f"RR: Mean response time vs quantum (sim ms) — dataset={dataset}",
        ylog=True
    )

In [None]:
for dataset, g in df.groupby("dataset"):
    plot_vs_quantum(
        g,
        y="p95_response_ms",
        title=f"RR: p95 response time vs quantum (sim ms) — dataset={dataset}",
        ylog=True
    )


In [None]:
for dataset, g in df.groupby("dataset"):
    plot_vs_quantum(
        g,
        y="avg_slices_per_job",
        title=f"RR: Avg slices/job vs quantum — dataset={dataset}",
        ylog=True
    )


In [None]:
for dataset, g in df.groupby("dataset"):
    plot_vs_quantum(
        g,
        y="mean_response_wall_s",
        title=f"RR: Mean response vs quantum (wall-normalized seconds) — dataset={dataset}",
        ylog=True
    )


In [None]:
plt.figure()
plt.scatter(df["avg_slices_per_job"], df["mean_response_ms"])
plt.xscale("log")
plt.yscale("log")
plt.xlabel("Average slices per job (log)")
plt.ylabel("Mean response time (sim ms, log)")
plt.title("RR: More slices per job correlates with higher response time")
plt.show()
