# Benchmark Analysis Notebook

This notebook loads the summary and raw outputs created by the bench-harness CLI and produces tables and plots for comparison across profiles (e.g., `default` vs `netem40`).

**Assumptions**

- Your repo has this structure:

  - `analysis/results/<profile>/bench.csv`

  - `analysis/results/<profile>/raw/*.csv`

- You ran the harness with CSV exports enabled.

**What you'll get**

- Combined summary dataframe

- Side-by-side comparisons (means, p95, p99) by size

- Latency CDFs from raw samples for selected sizes

- PNG plots saved into `analysis/plots`



In [10]:
# Imports
import os, glob, re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Paths (relative to repo root); adjust if needed
RESULTS_DIR = '../results'
PLOTS_DIR = '../plots'

os.makedirs(PLOTS_DIR, exist_ok=True)

print('Results dir:', os.path.abspath(RESULTS_DIR))
print('Plots dir  :', os.path.abspath(PLOTS_DIR))

Results dir: /workspaces/mcp-vs-dta/analysis/results
Plots dir  : /workspaces/mcp-vs-dta/analysis/plots


## Load summary CSVs

In [11]:
def load_summaries(results_dir=RESULTS_DIR):
    import os, glob, pandas as pd
    paths = glob.glob(os.path.join(results_dir, '*', 'bench.csv'))
    frames = []
    for p in paths:
        profile = os.path.basename(os.path.dirname(p))
        df = pd.read_csv(p)
        df['profile'] = profile
        frames.append(df)
    if not frames:
        raise FileNotFoundError(f'No bench.csv files under {results_dir}/<profile>/')
    out = pd.concat(frames, ignore_index=True)
    # Ensure expected dtypes
    for col in ['sizeKB','meanMs','p50Ms','p95Ms','p99Ms','hz','iterations']:
        if col in out.columns:
            out[col] = pd.to_numeric(out[col], errors='coerce')
    return out

summary = load_summaries()
summary.head()

Unnamed: 0,timestamp,profile,section,implementation,operation,sizeKB,meanMs,p50Ms,p95Ms,p99Ms,hz,iterations,url
0,2025-10-07T14:03:02.387Z,delay40,HTTP,dta,,1,82.059189,82.291862,94.753367,98.02334,12.186326,122,http://localhost:8080/blob?size=512
1,2025-10-07T14:03:02.387Z,delay40,HTTP,dta,,1,80.854407,81.157213,94.545917,97.805563,12.36791,124,http://localhost:8080/blob?size=1024
2,2025-10-07T14:03:02.387Z,delay40,HTTP,dta,,2,82.005159,82.004658,95.527942,97.286851,12.194355,123,http://localhost:8080/blob?size=2048
3,2025-10-07T14:03:02.387Z,delay40,HTTP,dta,,4,80.99288,81.540715,94.998968,97.221742,12.346764,124,http://localhost:8080/blob?size=4096
4,2025-10-07T14:03:02.387Z,delay40,HTTP,dta,,8,82.329188,82.594712,94.603948,97.122271,12.146361,122,http://localhost:8080/blob?size=8192


## Basic sanity checks

In [12]:
print('Profiles:', sorted(summary['profile'].unique()))
print('Sections:', summary['section'].unique())
print('Implementations:', summary['implementation'].unique())
print('Rows:', len(summary))

# Quick pivot to inspect HTTP p95 by size/profile/impl
http = summary[summary['section']=='HTTP'].copy()
fs   = summary[summary['section']=='FS'].copy()

pivot_http_p95 = http.pivot_table(index=['sizeKB'], columns=['profile','implementation'], values='p95Ms', aggfunc='mean')
pivot_http_p95

Profiles: ['bursty', 'default', 'delay40', 'loss1', 'netem40', 'slowlink']
Sections: ['HTTP' 'FS']
Implementations: ['dta' 'mcp']
Rows: 468


profile,bursty,bursty,default,default,delay40,delay40,loss1,loss1,netem40,netem40,slowlink,slowlink
implementation,dta,mcp,dta,mcp,dta,mcp,dta,mcp,dta,mcp,dta,mcp
sizeKB,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1,24.418594,24.913792,0.217023,0.327968,94.649642,97.941158,1.097315,1.190522,96.328544,96.324954,150.757846,149.78766
2,24.196658,25.174122,0.212678,0.322106,95.527942,94.710858,0.936468,1.113848,96.08527,97.858765,154.413669,150.011543
4,24.394477,24.780767,0.218072,0.330158,94.998968,93.440774,0.911261,1.562109,95.80307,95.833667,154.434817,146.915249
8,23.963279,24.667605,0.224571,0.334972,94.603948,95.941186,0.755902,1.070476,97.576961,97.741807,157.945026,157.869171
16,24.595514,24.925238,0.245251,0.34807,96.389019,95.98317,0.897534,1.467351,98.740387,217.985343,168.098732,163.024782
32,24.110637,24.895709,0.279709,0.412053,95.733441,95.031819,1.20573,1.337088,97.089811,97.265967,184.853348,176.633395
64,24.403502,24.758775,0.290801,0.614325,94.761984,97.976936,1.444778,1.298903,97.245696,97.405217,202.93809,206.225457
128,24.59674,25.146354,0.346035,0.988962,96.846746,99.358791,1.761363,2.314937,167.833936,100.216598,256.777866,259.211621
256,24.939923,25.59025,0.576994,1.242139,98.951004,98.291613,2.107714,1.927613,162.643954,100.446501,361.34866,364.092271
512,25.02967,25.625437,1.361181,1.624367,100.164217,99.839194,1.93871,2.382171,222.525392,222.239532,573.48312,565.587664


## Compute deltas and ratios between profiles

In [13]:
def compare_profiles(df, metric='p95Ms', base='default', other='netem40'):
    key_cols = ['implementation','sizeKB']
    if 'operation' in df.columns and df['operation'].notna().any():
        key_cols = ['implementation','operation','sizeKB']
    base_df  = df[df['profile']==base][key_cols + [metric]].rename(columns={metric:f'{metric}_{base}'})
    other_df = df[df['profile']==other][key_cols + [metric]].rename(columns={metric:f'{metric}_{other}'})
    merged = pd.merge(base_df, other_df, on=key_cols, how='inner')
    merged[f'delta_{metric}'] = merged[f'{metric}_{other}'] - merged[f'{metric}_{base}']
    merged[f'ratio_{metric}'] = merged[f'{metric}_{other}'] / merged[f'{metric}_{base}']
    return merged.sort_values(key_cols)

http_cmp_p95 = compare_profiles(http, metric='p95Ms', base='default', other='netem40')
fs_cmp_p95   = compare_profiles(fs,   metric='p95Ms', base='default', other='netem40')

http_cmp_p95

Unnamed: 0,implementation,sizeKB,p95Ms_default,p95Ms_netem40,delta_p95Ms,ratio_p95Ms
0,dta,1,0.22242,96.422754,96.200333,433.515684
1,dta,1,0.22242,96.234335,96.011915,432.668558
2,dta,1,0.211626,96.422754,96.211128,455.628106
3,dta,1,0.211626,96.234335,96.022709,454.73777
4,dta,2,0.212678,96.08527,95.872592,451.787537
5,dta,4,0.218072,95.80307,95.584998,439.318526
6,dta,8,0.224571,97.576961,97.35239,434.504125
7,dta,16,0.245251,98.740387,98.495136,402.609192
8,dta,32,0.279709,97.089811,96.810102,347.110319
9,dta,64,0.290801,97.245696,96.954895,334.406906


## Plot helpers

In [14]:
def save_lineplot(x, ys, labels, title, xlabel, ylabel, out_path):
    import matplotlib.pyplot as plt
    plt.figure()
    for y, label in zip(ys, labels):
        plt.plot(x, y, marker='o', label=label)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
    plt.tight_layout()
    plt.savefig(out_path, dpi=150)
    plt.close()

def save_barplot(x, y, title, xlabel, ylabel, out_path, rotate=False):
    import matplotlib.pyplot as plt
    plt.figure()
    plt.bar(x, y)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    if rotate:
        plt.xticks(rotation=45, ha='right')
    plt.grid(True, axis='y', linestyle='--', linewidth=0.5, alpha=0.6)
    plt.tight_layout()
    plt.savefig(out_path, dpi=150)
    plt.close()

## HTTP p95 vs size (per implementation, per profile)

In [15]:
for impl in sorted(http['implementation'].unique()):
    subset = http[http['implementation']==impl]
    for profile in sorted(subset['profile'].unique()):
        sub2 = subset[subset['profile']==profile].sort_values('sizeKB')
        x = sub2['sizeKB'].astype(int).tolist()
        y = sub2['p95Ms'].tolist()
        out = os.path.join(PLOTS_DIR, f'http_p95_{impl}_{profile}.png')
        save_lineplot(x, [y], [f'{impl}-{profile}'],
                      title=f'HTTP p95 vs size — {impl} [{profile}]',
                      xlabel='Size (KB)', ylabel='p95 (ms)', out_path=out)
        print('wrote', out)

wrote ../plots/http_p95_dta_bursty.png
wrote ../plots/http_p95_dta_default.png
wrote ../plots/http_p95_dta_delay40.png
wrote ../plots/http_p95_dta_loss1.png
wrote ../plots/http_p95_dta_netem40.png
wrote ../plots/http_p95_dta_slowlink.png
wrote ../plots/http_p95_mcp_bursty.png
wrote ../plots/http_p95_mcp_default.png
wrote ../plots/http_p95_mcp_delay40.png
wrote ../plots/http_p95_mcp_loss1.png
wrote ../plots/http_p95_mcp_netem40.png
wrote ../plots/http_p95_mcp_slowlink.png


## FS p95 vs size (per implementation, per profile)

In [16]:
for impl in sorted(fs['implementation'].unique()):
    subset = fs[fs['implementation']==impl]
    for profile in sorted(subset['profile'].unique()):
        sub2 = subset[subset['profile']==profile].sort_values('sizeKB')
        for op in sorted(sub2['operation'].unique()):
            sub3 = sub2[sub2['operation']==op]
            x = sub3['sizeKB'].astype(int).tolist()
            y = sub3['p95Ms'].tolist()
            out = os.path.join(PLOTS_DIR, f'fs_p95_{impl}_{op}_{profile}.png')
            save_lineplot(x, [y], [f'{impl}-{op}-{profile}'],
                          title=f'FS p95 vs size — {impl}/{op} [{profile}]',
                          xlabel='Size (KB)', ylabel='p95 (ms)', out_path=out)
            print('wrote', out)

wrote ../plots/fs_p95_dta_read_bursty.png
wrote ../plots/fs_p95_dta_remove_bursty.png
wrote ../plots/fs_p95_dta_write_bursty.png
wrote ../plots/fs_p95_dta_read_default.png
wrote ../plots/fs_p95_dta_remove_default.png
wrote ../plots/fs_p95_dta_write_default.png
wrote ../plots/fs_p95_dta_read_delay40.png
wrote ../plots/fs_p95_dta_remove_delay40.png
wrote ../plots/fs_p95_dta_write_delay40.png
wrote ../plots/fs_p95_dta_read_loss1.png
wrote ../plots/fs_p95_dta_remove_loss1.png
wrote ../plots/fs_p95_dta_write_loss1.png
wrote ../plots/fs_p95_dta_read_netem40.png
wrote ../plots/fs_p95_dta_remove_netem40.png
wrote ../plots/fs_p95_dta_write_netem40.png
wrote ../plots/fs_p95_dta_read_slowlink.png
wrote ../plots/fs_p95_dta_remove_slowlink.png
wrote ../plots/fs_p95_dta_write_slowlink.png
wrote ../plots/fs_p95_mcp_read_bursty.png
wrote ../plots/fs_p95_mcp_remove_bursty.png
wrote ../plots/fs_p95_mcp_write_bursty.png
wrote ../plots/fs_p95_mcp_read_default.png
wrote ../plots/fs_p95_mcp_remove_default.p

## Load RAW samples and plot CDFs (HTTP)

In [17]:
def load_raw_http(results_dir=RESULTS_DIR):
    import os, glob, re, pandas as pd
    paths = glob.glob(os.path.join(results_dir, '*', 'raw', 'HTTP_*_*.csv'))
    frames = []
    for p in paths:
        # Try POSIX path first
        m = re.search(r'results/(.*?)/raw/HTTP_(.*?)_(\d+)B_http\.csv', p)
        if not m:
            # Try Windows-like paths
            m = re.search(r'results\\(.*?)\\raw\\HTTP_(.*?)_(\d+)B_http\.csv', p)
        if not m:
            continue
        profile, impl, sizeB = m.group(1), m.group(2), int(m.group(3))
        df = pd.read_csv(p)
        df['profile'] = profile
        df['implementation'] = impl
        df['sizeBytes'] = sizeB
        frames.append(df)
    if not frames:
        raise FileNotFoundError('No HTTP raw CSVs found')
    return pd.concat(frames, ignore_index=True)

def ecdf(values):
    import numpy as np
    v = np.sort(np.asarray(values))
    n = v.size
    y = np.arange(1, n+1) / n
    return v, y

raw_http = load_raw_http()

# choose a couple of sizes for CDF plots (edit as needed)
sizes_to_plot = sorted(raw_http['sizeBytes'].unique())[:2]  # first two sizes
for sz in sizes_to_plot:
    sub = raw_http[raw_http['sizeBytes']==sz]
    for (profile, impl), g in sub.groupby(['profile','implementation']):
        x,y = ecdf(g['latencyMs'].values)
        out = os.path.join(PLOTS_DIR, f'cdf_http_{impl}_{profile}_{sz}B.png')
        import matplotlib.pyplot as plt
        plt.figure()
        plt.plot(x, y)
        plt.title(f'HTTP ECDF — {impl} [{profile}], size={sz} B')
        plt.xlabel('Latency (ms)')
        plt.ylabel('ECDF')
        plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
        plt.tight_layout()
        plt.savefig(out, dpi=150)
        plt.close()
        print('wrote', out)

wrote ../plots/cdf_http_dta_bursty_512B.png
wrote ../plots/cdf_http_mcp_bursty_512B.png
wrote ../plots/cdf_http_dta_default_512B.png
wrote ../plots/cdf_http_mcp_default_512B.png
wrote ../plots/cdf_http_dta_delay40_512B.png
wrote ../plots/cdf_http_mcp_delay40_512B.png
wrote ../plots/cdf_http_dta_loss1_512B.png
wrote ../plots/cdf_http_mcp_loss1_512B.png
wrote ../plots/cdf_http_dta_netem40_512B.png
wrote ../plots/cdf_http_mcp_netem40_512B.png
wrote ../plots/cdf_http_dta_slowlink_512B.png
wrote ../plots/cdf_http_mcp_slowlink_512B.png
wrote ../plots/cdf_http_dta_bursty_1024B.png
wrote ../plots/cdf_http_mcp_bursty_1024B.png
wrote ../plots/cdf_http_dta_default_1024B.png
wrote ../plots/cdf_http_mcp_default_1024B.png
wrote ../plots/cdf_http_dta_delay40_1024B.png
wrote ../plots/cdf_http_mcp_delay40_1024B.png
wrote ../plots/cdf_http_dta_loss1_1024B.png
wrote ../plots/cdf_http_mcp_loss1_1024B.png
wrote ../plots/cdf_http_dta_netem40_1024B.png
wrote ../plots/cdf_http_mcp_netem40_1024B.png
wrote ../p

## Save combined summary to a convenient CSV

In [18]:
combined_out = os.path.join(PLOTS_DIR, 'combined_bench_summary.csv')
summary.to_csv(combined_out, index=False)
print('wrote', combined_out)

wrote ../plots/combined_bench_summary.csv
