In [7]:
import pandas as pd
from pathlib import Path
import re

# Configuration
RESULTS_DIR = "archived_results/send_interval_payload_size/"


# Helper functions for parsing files
def parse_report(file_path):
    content = file_path.read_text()
    data = {}

    # Basic metrics
    metrics = {
        "Count": ("count", int),
        "Total": ("total_time", lambda x: float(x.replace(" s", ""))),
        "Slowest": (
            "slowest_ms",
            lambda x: float(x.replace(" ms", "")),
        ),
        "Fastest": (
            "fastest_ms",
            lambda x: float(x.replace(" ms", "")),
        ),
        "Average": (
            "avg_ms",
            lambda x: float(x.replace(" ms", "")),
        ),
        "Requests/sec": ("rps", float),
    }

    for line in content.split("\n"):
        if ":" in line:
            key, val = line.split(":", 1)
            key = key.strip()
            val = val.strip()
            if key in metrics:
                name, conv = metrics[key]
                data[name] = conv(val)

    # Latency percentiles
    latency_perc = re.findall(r"(\d+) % in ([\d.]+) ms", content)
    for perc, val in latency_perc:
        data[f"p{perc}"] = float(val)

    return data


def parse_stats(file_path):
    stats = []
    for line in file_path.read_text().split("\n"):
        if not line.strip():
            continue
        parts = line.strip().split()
        cpu_percent = float(parts[0].strip("%"))
        mem_used = float(parts[1].replace("MiB", ""))
        stats.append({"cpu": cpu_percent, "mem_mib": mem_used})
    return pd.DataFrame(stats).mean().to_dict()


def parse_params(file_path):
    params = {}
    for line in file_path.read_text().split("\n"):
        if line.startswith("- "):
            line = line[2:].strip()
            if "=" in line:
                k, v = line.split("=", 1)
                params[k.strip()] = v.strip()
    return params


# Main data collection
data = []
for scenario_dir in Path(RESULTS_DIR).iterdir():
    if not scenario_dir.is_dir():
        continue

    scenario = scenario_dir.name
    for run_dir in scenario_dir.iterdir():
        if not run_dir.is_dir():
            continue

        # Parse files
        try:
            report = parse_report(run_dir / "arflow_server_bench.report")
            stats = parse_stats(run_dir / "arflow_server_bench.stats")
            params = parse_params(run_dir / "bench.params")
        except FileNotFoundError:
            continue

        # Combine data
        row = {
            "scenario": scenario,
            "server_cpus": int(params.get("GRPC_SERVER_CPUS", 1)),
            "client_rps": int(params.get("GRPC_CLIENT_RPS", 0)),
            "frames_per_req": int(params.get("GRPC_CLIENT_FRAMES_PER_REQUEST", 0)),
            **report,
            **{f"cpu_{k}": v for k, v in stats.items() if k == "cpu"},
            **{f"mem_{k}": v for k, v in stats.items() if k == "mem_mib"},
        }
        data.append(row)

df = pd.DataFrame(data)
df["throughput"] = df["rps"]
df = df.sort_values(by=["scenario", "server_cpus", "client_rps"])

# Display all configurations
print(RESULTS_DIR)
display(
    df[
        [
            "scenario",
            "server_cpus",
            "client_rps",
            "frames_per_req",
            "throughput",
            "p95",
            "cpu_cpu",
            "mem_mem_mib",
        ]
    ]
)

archived_results/send_interval_payload_size/


Unnamed: 0,scenario,server_cpus,client_rps,frames_per_req,throughput,p95,cpu_cpu,mem_mem_mib
4,heavy,1,5,60,4.98,120.43,64.074,623.38
3,heavy,1,10,30,9.98,60.41,63.069,320.32
0,heavy,1,20,15,19.98,31.89,70.267778,254.277778
1,heavy,2,5,60,4.98,112.63,62.747,600.37
2,heavy,2,10,30,9.98,61.3,66.564,240.99
5,heavy,2,20,15,19.98,33.04,74.805556,217.922222
10,light,1,5,60,4.98,2.7,2.768,90.189
7,light,1,10,30,9.98,2.34,3.622222,90.262222
11,light,1,20,15,19.98,2.09,5.432222,90.733333
9,light,2,5,60,4.98,2.75,2.739,90.259


In [8]:
RESULTS_DIR = "archived_results/batching/"
data = []
for scenario_dir in Path(RESULTS_DIR).iterdir():
    if not scenario_dir.is_dir():
        continue

    scenario = scenario_dir.name
    for run_dir in scenario_dir.iterdir():
        if not run_dir.is_dir():
            continue

        # Parse files
        try:
            report = parse_report(run_dir / "arflow_server_bench.report")
            stats = parse_stats(run_dir / "arflow_server_bench.stats")
            params = parse_params(run_dir / "bench.params")
        except FileNotFoundError:
            continue

        # Combine data
        row = {
            "scenario": scenario,
            "server_cpus": int(params.get("GRPC_SERVER_CPUS", 1)),
            "client_rps": int(params.get("GRPC_CLIENT_RPS", 0)),
            "frames_per_req": int(params.get("GRPC_CLIENT_FRAMES_PER_REQUEST", 0)),
            **report,
            **{f"cpu_{k}": v for k, v in stats.items() if k == "cpu"},
            **{f"mem_{k}": v for k, v in stats.items() if k == "mem_mib"},
        }
        data.append(row)

df = pd.DataFrame(data)
df["throughput"] = df["rps"]
df = df.sort_values(by=["scenario", "client_rps", "server_cpus"])

# Display all configurations
print(RESULTS_DIR)
display(
    df[
        [
            "scenario",
            "server_cpus",
            "client_rps",
            "frames_per_req",
            "throughput",
            "p95",
            "cpu_cpu",
            "mem_mem_mib",
        ]
    ]
)

archived_results/batching/


Unnamed: 0,scenario,server_cpus,client_rps,frames_per_req,throughput,p95,cpu_cpu,mem_mem_mib
0,heavy,1,20,15,19.98,37.62,81.658889,324.966667
2,heavy,2,20,15,19.98,39.38,80.79,202.99
3,heavy,1,500,1,176.7,67.43,102.92,103.888889
1,heavy,2,500,1,342.12,19.11,195.372,95.618
