# bafiq plots for publishing

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# ---------------------------------------------------------------------------
# 1.  Load data
# ---------------------------------------------------------------------------
MEM_FILE   = Path("benchmark_results/memory_samples.csv")
THREAD_FILE = Path("benchmark_results/thread_scaling.csv")

mem_df    = pd.read_csv(MEM_FILE)
thread_df = pd.read_csv(THREAD_FILE)

# Helper: convert timestamp (ms → s) once so we can reuse it
mem_df = mem_df.assign(time_s = mem_df["timestamp_ms"] / 1_000.0)

# ---------------------------------------------------------------------------
# 2.  Functions that generate each chart
# ---------------------------------------------------------------------------
def plot_memory_over_time(df: pd.DataFrame, threads: int | None = None) -> None:
    """
    Memory footprint of each strategy over wall-clock time.
    If *threads* is given, only samples taken with that thread count are shown.
    """
    use = df if threads is None else df[df["threads"] == threads]

    plt.figure()
    for strat, grp in use.groupby("strategy", sort=False):
        plt.plot(grp["time_s"], grp["memory_mb"], label=strat)

    plt.title("Memory footprint over time")
    plt.xlabel("Time (s)")
    plt.ylabel("Memory (MB)")
    plt.legend()
    plt.tight_layout()
    plt.show()


def plot_cpu_over_time(df: pd.DataFrame, threads: int | None = None) -> None:
    """
    CPU utilisation of each strategy over wall-clock time.
    Values above 100 % mean multiple cores are in use.
    """
    use = df if threads is None else df[df["threads"] == threads]

    plt.figure()
    for strat, grp in use.groupby("strategy", sort=False):
        plt.plot(grp["time_s"], grp["cpu_percent"], label=strat)

    plt.title("CPU utilisation over time")
    plt.xlabel("Time (s)")
    plt.ylabel("CPU (%)")
    plt.legend()
    plt.tight_layout()
    plt.show()


def plot_runtime_vs_threads(df: pd.DataFrame, seconds: bool = True) -> None:
    """
    End-to-end runtime for every strategy as a function of thread count.
    Set *seconds=False* to keep milliseconds on the y-axis.
    """
    plt.figure()
    for strat, grp in df.groupby("strategy", sort=False):
        grp_sorted = grp.sort_values("threads")
        y = grp_sorted["time_ms"] / 1_000.0 if seconds else grp_sorted["time_ms"]
        plt.plot(grp_sorted["threads"], y, marker="o", label=strat)

    plt.title("Runtime vs. thread count")
    plt.xlabel("Threads")
    plt.ylabel("Time (s)" if seconds else "Time (ms)")
    plt.legend()
    plt.tight_layout()
    plt.show()

# ---------------------------------------------------------------------------
# 3.  Example usage – feel free to tweak
# ---------------------------------------------------------------------------

# (a) One-thread runs only, to keep the plot uncluttered
plot_memory_over_time(mem_df, threads=1)

# (b) CPU utilisation (also one-thread runs)
plot_cpu_over_time(mem_df, threads=1)

# (c) Thread-scaling – all available points
plot_runtime_vs_threads(thread_df)

FileNotFoundError: [Errno 2] No such file or directory: 'memory_samples.csv'