# Analyze Benchmark Results
Extract details from the benchmark results such as:
- Mean
- Std
- difference of mean between projects (in %)

These details are relevant for the discussion of the benchmark results in the thesis.

In [69]:
import os
import numpy as np
import pandas as pd

In [70]:
def read_bench_csv(benchmark: str) -> pd.DataFrame:
    # If script is executed from plot directory, change to content root directory
    if os.path.split(os.getcwd())[-1] == "analysis":
        os.chdir("../..")

    # Read data
    data_dir = f"data/{benchmark}"
    project_dirs = [os.path.join(data_dir, p) for p in ["slurm", "bridge-operator", "hpk", "ksi"]]

    # Find file with the highest number
    benchmark_files = [os.path.join(d, max(os.listdir(d))) for d in project_dirs if os.path.exists(d)]
    print(f"Detected benchmark files: {benchmark_files}")

    df = pd.concat([pd.read_csv(p, sep=";") for p in benchmark_files])
    #print(df)
    return df

def analyze(df: pd.DataFrame, column: str) -> pd.DataFrame:
    mean = df.groupby(['project'])[column].mean().reset_index()
    # print(mean)
    result = pd.DataFrame(mean)
    std = df.groupby(['project'])[column].std().reset_index()
    # print(std)
    result["std"] = std[column]

    # Calc relative performance difference
    bare_metal_perf = result.loc[result['project'] == "slurm"][column].iloc[0]

    result["difference"] = (result[column] - bare_metal_perf) / bare_metal_perf * 100

    # print(mean)
    return result

In [71]:
df = read_bench_csv("startup-time")
analyze(df, "millis")

Detected benchmark files: ['data/startup-time/slurm/006.csv', 'data/startup-time/bridge-operator/001.csv', 'data/startup-time/hpk/003.csv', 'data/startup-time/ksi/001.csv']


Unnamed: 0,project,millis,std,difference
0,bridge-operator,2725.2,273.006227,2364.014467
1,hpk,2497.0,571.270708,2157.685353
2,ksi,53920.6,4988.988368,48652.802893
3,slurm,110.6,9.890512,0.0


In [72]:
df = read_bench_csv("sysbench-cpu")
analyze(df, "score")

Detected benchmark files: ['data/sysbench-cpu/slurm/004.csv', 'data/sysbench-cpu/bridge-operator/003.csv', 'data/sysbench-cpu/hpk/002.csv', 'data/sysbench-cpu/ksi/008.csv']


Unnamed: 0,project,score,std,difference
0,bridge-operator,15100.646,7.896046,0.014246
1,hpk,14680.162,17.281713,-2.770693
2,ksi,14578.438,7.702918,-3.444429
3,slurm,15098.495,5.047207,0.0


In [73]:
df = read_bench_csv("stream-memory")
df['score'] = df[['copy', 'scale', 'add', 'triad']].mean(axis=1)
analyze(df, "score")

Detected benchmark files: ['data/stream-memory/slurm/003.csv', 'data/stream-memory/bridge-operator/002.csv', 'data/stream-memory/hpk/000.csv', 'data/stream-memory/ksi/001.csv']


Unnamed: 0,project,score,std,difference
0,bridge-operator,70582.5475,112.527925,-0.099129
1,hpk,70626.8975,224.873759,-0.036357
2,ksi,70277.2875,201.864257,-0.531187
3,slurm,70652.585,199.723832,0.0


In [74]:
df = read_bench_csv("fio-diskseq")
df["readthroughput"] = df["readthroughput"] / 1024  # KiB to MiB
df["writethroughput"] = df["writethroughput"] / 1024  # KiB to MiB
analyze(df, "readthroughput")

Detected benchmark files: ['data/fio-diskseq/slurm/002.csv', 'data/fio-diskseq/bridge-operator/001.csv', 'data/fio-diskseq/hpk/003.csv', 'data/fio-diskseq/ksi/000.csv']


Unnamed: 0,project,readthroughput,std,difference
0,bridge-operator,384.722363,32.11495,-0.388654
1,hpk,343.667578,90.624364,-11.018456
2,ksi,319.406152,50.799687,-17.300163
3,slurm,386.223437,59.850493,0.0


In [75]:
df = read_bench_csv("fio-diskseq")
df["readthroughput"] = df["readthroughput"] / 1024  # KiB to MiB
df["writethroughput"] = df["writethroughput"] / 1024  # KiB to MiB
analyze(df, "writethroughput")

Detected benchmark files: ['data/fio-diskseq/slurm/002.csv', 'data/fio-diskseq/bridge-operator/001.csv', 'data/fio-diskseq/hpk/003.csv', 'data/fio-diskseq/ksi/000.csv']


Unnamed: 0,project,writethroughput,std,difference
0,bridge-operator,89.33125,5.207441,4.210802
1,hpk,81.642578,3.866118,-4.758541
2,ksi,74.985449,3.381752,-12.524522
3,slurm,85.72168,8.768964,0.0


In [76]:
df = read_bench_csv("fio-diskrnd")
df["readthroughput"] = df["readthroughput"] / 1024  # KiB to MiB
df["writethroughput"] = df["writethroughput"] / 1024  # KiB to MiB
analyze(df, "readthroughput")

Detected benchmark files: ['data/fio-diskrnd/slurm/001.csv', 'data/fio-diskrnd/bridge-operator/001.csv', 'data/fio-diskrnd/hpk/001.csv', 'data/fio-diskrnd/ksi/000.csv']


Unnamed: 0,project,readthroughput,std,difference
0,bridge-operator,214.809473,29.693126,0.961768
1,hpk,206.056348,36.073874,-3.152254
2,ksi,199.788867,28.875146,-6.098008
3,slurm,212.763184,29.506144,0.0


In [77]:
df = read_bench_csv("fio-diskrnd")
df["readthroughput"] = df["readthroughput"] / 1024  # KiB to MiB
df["writethroughput"] = df["writethroughput"] / 1024  # KiB to MiB
analyze(df, "writethroughput")

Detected benchmark files: ['data/fio-diskrnd/slurm/001.csv', 'data/fio-diskrnd/bridge-operator/001.csv', 'data/fio-diskrnd/hpk/001.csv', 'data/fio-diskrnd/ksi/000.csv']


Unnamed: 0,project,writethroughput,std,difference
0,bridge-operator,89.053613,4.073581,4.207929
1,hpk,80.437988,8.946701,-5.873823
2,ksi,77.759668,3.78673,-9.007915
3,slurm,85.457617,7.817036,0.0


In [78]:
df = read_bench_csv("netperf-latency-tcp")
analyze(df, "mean_latency")

Detected benchmark files: ['data/netperf-latency-tcp/slurm/000.csv', 'data/netperf-latency-tcp/bridge-operator/000.csv', 'data/netperf-latency-tcp/hpk/000.csv', 'data/netperf-latency-tcp/ksi/000.csv']


Unnamed: 0,project,mean_latency,std,difference
0,bridge-operator,58.144,0.312061,0.609081
1,hpk,58.57,0.230266,1.346207
2,ksi,81.851,2.814121,41.630329
3,slurm,57.792,0.106228,0.0


In [79]:
df = read_bench_csv("iperf3-bandwidth")
df['score'] = df['scoreBitsPerSec'] / 1000000  # to MBit/s
analyze(df, "score")

Detected benchmark files: ['data/iperf3-bandwidth/slurm/001.csv', 'data/iperf3-bandwidth/bridge-operator/000.csv', 'data/iperf3-bandwidth/hpk/001.csv', 'data/iperf3-bandwidth/ksi/000.csv']


Unnamed: 0,project,score,std,difference
0,bridge-operator,9413.718226,0.856264,9.2e-05
1,hpk,7386.499128,53.765454,-21.534661
2,ksi,612.821227,37.151688,-93.49012
3,slurm,9413.709552,1.291106,0.0
