In [None]:
# Analysis Notebook for MCP-Attack Experiments
#
# This notebook loads Locust CSV outputs and MCP server metrics and
# generates publication-ready plots (PNG) into notebooks/outputs/.

import os
import glob
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

# Plot style
plt.style.use("seaborn-v0_8")

# Paths
PROJECT_ROOT = Path("/app") if Path("/app").exists() else Path(".")
RESULTS_DIR = PROJECT_ROOT / "results"
OUTPUT_DIR = PROJECT_ROOT / "notebooks" / "outputs"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"RESULTS_DIR={RESULTS_DIR}")
print(f"OUTPUT_DIR={OUTPUT_DIR}")


In [None]:
def find_experiments(base_dir: Path) -> list[Path]:
    if not base_dir.exists():
        return []
    # experiment directory must contain at least one locust_stats_history.csv
    candidates = []
    for stats_hist in base_dir.glob("**/locust_stats_history.csv"):
        candidates.append(stats_hist.parent)
    # include sample dirs that may only have mcp_metrics.csv
    for m in base_dir.glob("**/mcp_metrics.csv"):
        if m.parent not in candidates:
            candidates.append(m.parent)
    # de-duplicate
    out: list[Path] = []
    for p in candidates:
        if p not in out:
            out.append(p)
    return sorted(out)

experiments = find_experiments(RESULTS_DIR)
print("Found experiments:")
for p in experiments:
    print(" -", p.relative_to(RESULTS_DIR))


In [None]:
def read_locust_history(exp_dir: Path) -> pd.DataFrame | None:
    path = exp_dir / "locust_stats_history.csv"
    if not path.exists():
        return None
    df = pd.read_csv(path)
    # Normalize columns
    # Common columns: Timestamp, Requests/s, Failures/s, 50%, 95%, 99%
    rename_map = {}
    for col in df.columns:
        if col.lower().startswith("timestamp"):
            rename_map[col] = "Timestamp"
        if col.lower().startswith("requests/s"):
            rename_map[col] = "Requests/s"
        if col.lower().startswith("failures/s"):
            rename_map[col] = "Failures/s"
        if col.strip() == "50%":
            rename_map[col] = "p50"
        if col.strip() == "95%":
            rename_map[col] = "p95"
        if col.strip() == "99%":
            rename_map[col] = "p99"
        if col.lower().startswith("average") or col.strip() == "Average":
            rename_map[col] = "avg"
    df = df.rename(columns=rename_map)
    # Keep only global rows if Type/Name columns exist
    if "Type" in df.columns and "Name" in df.columns:
        mask = (df["Type"] == "Aggregated") & (df["Name"] == "All")
        if mask.any():
            df = df[mask]
    # Coerce types
    for c in ["Requests/s", "Failures/s", "p50", "p95", "p99", "avg"]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")
    # Add time index
    if "Timestamp" in df.columns:
        df["Timestamp"] = pd.to_datetime(df["Timestamp"], errors="coerce")
        df = df.sort_values("Timestamp")
    return df


def read_cpu_mem(exp_dir: Path) -> pd.DataFrame | None:
    path = exp_dir / "cpu_mem.csv"
    if not path.exists():
        return None
    df = pd.read_csv(path)
    if "timestamp" in df.columns:
        df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
        df = df.sort_values("timestamp")
    return df


def read_mcp_metrics(exp_dir: Path) -> pd.DataFrame | None:
    path = exp_dir / "mcp_metrics.csv"
    if not path.exists():
        return None
    df = pd.read_csv(path)
    if "timestamp" in df.columns:
        df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
        df = df.sort_values("timestamp")
    return df


In [None]:
def plot_rps_vs_latency(exp_dir: Path, df_hist: pd.DataFrame):
    if df_hist is None or df_hist.empty:
        print(f"No stats_history for {exp_dir}")
        return
    fig, ax1 = plt.subplots(figsize=(8, 4.5))
    if "Requests/s" in df_hist.columns:
        ax1.plot(df_hist["Timestamp"], df_hist["Requests/s"], label="RPS", color="tab:blue")
        ax1.set_ylabel("RPS", color="tab:blue")
        ax1.tick_params(axis='y', labelcolor='tab:blue')
    ax2 = ax1.twinx()
    for col, label, color in [("p50", "p50", "tab:green"), ("p95", "p95", "tab:orange"), ("p99", "p99", "tab:red")]:
        if col in df_hist.columns:
            ax2.plot(df_hist["Timestamp"], df_hist[col], label=label, color=color, alpha=0.8)
    ax2.set_ylabel("Latency (ms)")
    ax1.set_xlabel("Time")
    lines_1, labels_1 = ax1.get_legend_handles_labels()
    lines_2, labels_2 = ax2.get_legend_handles_labels()
    ax2.legend(lines_1 + lines_2, labels_1 + labels_2, loc="upper right")
    title = f"RPS vs Latency — {exp_dir.relative_to(RESULTS_DIR)}"
    plt.title(title)
    out = OUTPUT_DIR / f"rps_vs_latency__{exp_dir.relative_to(RESULTS_DIR)}.png"
    out.parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out, dpi=150)
    plt.close()
    print("Saved", out)


def plot_latency_histogram(exp_dir: Path, df_mcp: pd.DataFrame):
    if df_mcp is None or df_mcp.empty or "latency_ms" not in df_mcp.columns:
        print(f"No mcp_metrics for {exp_dir}")
        return
    fig, ax = plt.subplots(figsize=(6, 4))
    ax.hist(df_mcp["latency_ms"].dropna(), bins=40, log=True, color="tab:purple", alpha=0.8)
    ax.set_xlabel("Latency (ms)")
    ax.set_ylabel("Frequency (log)")
    plt.title(f"Latency Histogram — {exp_dir.relative_to(RESULTS_DIR)}")
    out = OUTPUT_DIR / f"latency_hist__{exp_dir.relative_to(RESULTS_DIR)}.png"
    out.parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out, dpi=150)
    plt.close()
    print("Saved", out)


def plot_cpu_vs_failures(exp_dir: Path, df_cpu: pd.DataFrame, df_hist: pd.DataFrame):
    if df_cpu is None or df_cpu.empty or df_hist is None or df_hist.empty:
        print(f"No cpu/failures data for {exp_dir}")
        return
    # Resample to 1s and merge
    c = df_cpu.copy()
    c = c.set_index(pd.to_datetime(c["timestamp"]))
    c = c.resample("1s").mean(numeric_only=True)
    h = df_hist.copy()
    h = h.set_index(pd.to_datetime(h["Timestamp"]))
    h = h.resample("1s").mean(numeric_only=True)
    dfm = c.join(h, how="inner", lsuffix="_cpu", rsuffix="_locust")
    fig, ax1 = plt.subplots(figsize=(8, 4.5))
    if "cpu_percent" in dfm.columns:
        ax1.plot(dfm.index, dfm["cpu_percent"], color="tab:blue", label="CPU %")
        ax1.set_ylabel("CPU %", color="tab:blue")
        ax1.tick_params(axis='y', labelcolor='tab:blue')
    ax2 = ax1.twinx()
    if "Failures/s" in dfm.columns:
        ax2.plot(dfm.index, dfm["Failures/s"], color="tab:red", label="Failures/s", alpha=0.7)
    ax1.set_xlabel("Time")
    lines_1, labels_1 = ax1.get_legend_handles_labels()
    lines_2, labels_2 = ax2.get_legend_handles_labels()
    ax2.legend(lines_1 + lines_2, labels_1 + labels_2, loc="upper right")
    plt.title(f"CPU% vs Failures/s — {exp_dir.relative_to(RESULTS_DIR)}")
    out = OUTPUT_DIR / f"cpu_vs_failures__{exp_dir.relative_to(RESULTS_DIR)}.png"
    out.parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out, dpi=150)
    plt.close()
    print("Saved", out)


def plot_cachehit_vs_latency(exp_dir: Path, df_mcp: pd.DataFrame):
    if df_mcp is None or df_mcp.empty or "cache_hit" not in df_mcp.columns:
        print(f"No cache_hit metric for {exp_dir}")
        return
    # group by cache_hit and plot boxplot of latency
    fig, ax = plt.subplots(figsize=(6, 4))
    df_mcp.boxplot(column="latency_ms", by="cache_hit", ax=ax)
    ax.set_xlabel("cache_hit (0/1)")
    ax.set_ylabel("Latency (ms)")
    plt.suptitle("")
    plt.title(f"Cache-hit vs Latency — {exp_dir.relative_to(RESULTS_DIR)}")
    out = OUTPUT_DIR / f"cachehit_vs_latency__{exp_dir.relative_to(RESULTS_DIR)}.png"
    out.parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out, dpi=150)
    plt.close()
    print("Saved", out)


In [None]:
# Run plots for all discovered experiments
for exp in experiments:
    df_hist = read_locust_history(exp)
    df_cpu = read_cpu_mem(exp)
    df_mcp = read_mcp_metrics(exp)
    plot_rps_vs_latency(exp, df_hist)
    plot_latency_histogram(exp, df_mcp)
    plot_cpu_vs_failures(exp, df_cpu, df_hist)
    plot_cachehit_vs_latency(exp, df_mcp)

print("Done.")


In [None]:
import re

def read_locust_stats(exp_dir: Path) -> pd.DataFrame | None:
    path = exp_dir / "locust_stats.csv"
    if not path.exists():
        return None
    df = pd.read_csv(path)
    # Normalize and filter request rows
    rename_map = {}
    if "95%" in df.columns:
        rename_map["95%"] = "p95"
    if "50%" in df.columns:
        rename_map["50%"] = "p50"
    if "Average" in df.columns:
        rename_map["Average"] = "avg"
    df = df.rename(columns=rename_map)
    if "Type" in df.columns:
        df = df[df["Type"] == "Request"]
    return df


def summarize_validate_p95(exp_dirs: list[Path]) -> pd.DataFrame:
    rows: list[dict] = []
    pattern = re.compile(r"validate_(\d+)B_d(\d+)")
    for exp_dir in exp_dirs:
        df = read_locust_stats(exp_dir)
        if df is None or df.empty:
            continue
        for _, r in df.iterrows():
            name = str(r.get("Name", ""))
            m = pattern.search(name)
            if not m:
                continue
            size = int(m.group(1))
            depth = int(m.group(2))
            p95_val = r.get("p95", r.get("95%", None))
            try:
                p95 = float(p95_val)
            except Exception:
                continue
            rows.append({
                "experiment": exp_dir.relative_to(RESULTS_DIR).as_posix(),
                "size_bytes": size,
                "depth": depth,
                "p95": p95,
            })
    return pd.DataFrame(rows)


def plot_payload_p95_bars(df: pd.DataFrame):
    if df is None or df.empty:
        print("No payload p95 data to plot")
        return
    pivot = df.pivot_table(index="size_bytes", columns="depth", values="p95", aggfunc="mean").sort_index()
    ax = pivot.plot(kind="bar", figsize=(8, 4.5))
    ax.set_xlabel("Payload size (bytes)")
    ax.set_ylabel("p95 latency (ms)")
    ax.set_title("Payload size/depth vs p95 (bar)")
    out = OUTPUT_DIR / "payload_p95__combined.png"
    plt.tight_layout()
    plt.savefig(out, dpi=150)
    plt.close()
    print("Saved", out)


In [None]:
# Build payload p95 bar chart across experiments
val_dirs = [p for p in experiments if any("validate" in (f.name) for f in p.iterdir())]
summary_df = summarize_validate_p95(val_dirs)
plot_payload_p95_bars(summary_df)
