In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib import gridspec

os.makedirs("plots", exist_ok=True)

In [None]:
bench_raw = pd.read_csv("build/benchmark.csv")
bench_raw["Test Case"] = "TC " + bench_raw["Test Case"].astype(str)
bench_raw["Cycles"] = pd.to_numeric(bench_raw["Cycles"], errors="coerce")
bench_raw = bench_raw.dropna(subset=["Cycles"])

orig_raw = pd.read_csv(
    "build/original.csv",
    names=["Function", "Test Case", "Iteration", "Cycles"],
    header=None,
)
orig_raw["Test Case"] = "TC " + orig_raw["Test Case"].astype(str)
orig_raw["Cycles"] = pd.to_numeric(orig_raw["Cycles"], errors="coerce")
orig_raw = orig_raw.dropna(subset=["Cycles"])

flops_df = pd.read_csv("build/flops.csv")
# ensure 'Test Case' is labeled consistently
if flops_df["Test Case"].dtype != object:
    flops_df["Test Case"] = "TC " + flops_df["Test Case"].astype(str)

prof_df = pd.read_csv("build/profiling.csv")
if prof_df["Test Case"].dtype != object:
    prof_df["Test Case"] = "TC " + prof_df["Test Case"].astype(str)

print(f"bench_raw:   {bench_raw.shape[0]} rows")
print(f"orig_raw:    {orig_raw.shape[0]} rows")
print(f"flops_df:    {flops_df.shape[0]} rows")
print(f"prof_df:     {prof_df.shape[0]} rows")

display(bench_raw.head(), orig_raw.head(), flops_df.head(), prof_df.head())

bench_raw:   2100000 rows
orig_raw:    50000 rows
flops_df:    90 rows
prof_df:     12600000 rows


Unnamed: 0,Function,Test Case,Iteration,Cycles
0,scalar Less SQRT + Approx,TC 0,0,66637
1,scalar Less SQRT + Approx,TC 1,0,69671
2,scalar Less SQRT + Approx,TC 2,0,67081
3,scalar Less SQRT + Approx,TC 3,0,70226
4,scalar Less SQRT + Approx,TC 4,0,69745


Unnamed: 0,Function,Test Case,Iteration,Cycles
1,Original,TC 0,0,433899.0
2,Original,TC 0,1,370074.0
3,Original,TC 0,2,360417.0
4,Original,TC 0,3,357827.0
5,Original,TC 0,4,356976.0


Unnamed: 0,Function,Section,Test Case,Flops,Memory,ADDS,MULS,DIVS,SQRT
0,Basic Implementation,collide_balls,TC 0,79218,240,40091,28107,8013,3007
1,Basic Implementation,Initialization,TC 0,106,144,40,57,5,4
2,Basic Implementation,Impulse,TC 0,16016,0,4004,8008,4004,0
3,Basic Implementation,Delta,TC 0,32032,0,22022,6006,4004,0
4,Basic Implementation,Velocity,TC 0,31034,0,14015,14016,0,3003


Unnamed: 0,Function,Section,Test Case,Iteration,Cycles
0,SIMD,collide_balls,TC 0,0,897139
1,SIMD,Initialization,TC 0,0,74
2,SIMD,Impulse,TC 0,0,36858
3,SIMD,Delta,TC 0,0,38073
4,SIMD,Velocity,TC 0,0,36001


In [None]:
def drop_top_outliers(df):
    mask = df.groupby(["Function", "Test Case"])["Cycles"].transform(
        lambda x: x <= x.mean() + 3 * x.std()
    )
    return df[mask].reset_index(drop=True)


bench_clean = drop_top_outliers(bench_raw)
orig_clean = drop_top_outliers(orig_raw)

print(f"bench_raw: {len(bench_raw)} rows -> bench_clean: {len(bench_clean)} rows")
print(f"orig_raw:  {len(orig_raw)} rows ->  orig_clean:  {len(orig_clean)} rows")

removed = (
    (
        bench_raw.groupby(["Function", "Test Case"]).size()
        - bench_clean.groupby(["Function", "Test Case"]).size()
    )
    .rename("n_removed")
    .reset_index()
)
print("\nTop-outliers removed (bench):")
display(removed.head())

bench_raw: 2100000 rows -> bench_clean: 2095777 rows
orig_raw:  50000 rows ->  orig_clean:  48862 rows

Top-outliers removed (bench):


Unnamed: 0,Function,Test Case,n_removed
0,Approx + Symmetry,TC 0,24
1,Approx + Symmetry,TC 1,54
2,Approx + Symmetry,TC 2,59
3,Approx + Symmetry,TC 3,78
4,Approx + Symmetry,TC 4,65


In [None]:
# isolate just the collide_balls rows
cb = prof_df[prof_df["Section"] == "collide_balls"]

# compute per-group threshold = mean + 3 * std
thr = cb.groupby(["Function", "Test Case"])["Cycles"].agg(["mean", "std"]).reset_index()
thr["threshold"] = thr["mean"] + 3 * thr["std"]

# find all (Function,TC,Iteration) where collide_balls exceeds that threshold
cb_thr = cb.merge(thr, on=["Function", "Test Case"])
bad_iters = cb_thr[cb_thr["Cycles"] > cb_thr["threshold"]][
    ["Function", "Test Case", "Iteration"]
].drop_duplicates()

# drop all rows in prof_df belonging to those bad iterations
prof_clean = (
    prof_df.merge(
        bad_iters.assign(to_drop=1),
        on=["Function", "Test Case", "Iteration"],
        how="left",
    )
    .query("to_drop != 1")
    .drop(columns="to_drop")
    .reset_index(drop=True)
)

print(f"prof_df:  {len(prof_df)} rows -> prof_clean: {len(prof_clean)} rows")
print("Example removed iterations:")
display(bad_iters.head())


prof_df:  12600000 rows -> prof_clean: 12439188 rows
Example removed iterations:


Unnamed: 0,Function,Test Case,Iteration
6802,SIMD,TC 2,1360
6874,SIMD,TC 4,1374
6882,SIMD,TC 2,1376
7129,SIMD,TC 4,1425
7340,SIMD,TC 0,1468


In [13]:
# group the cleaned benchmark + original data
bench_grouped = bench_clean.groupby(["Function", "Test Case"], as_index=False)[
    ["Cycles"]
].mean()
orig_grouped = orig_clean.groupby(["Function", "Test Case"], as_index=False)[
    ["Cycles"]
].mean()

# stack them so all plots can include "Original" alongside the bench variants
grouped_all = pd.concat([bench_grouped, orig_grouped], ignore_index=True)

# mean cycles across all test cases per function
mean_cycles_all = (
    grouped_all.groupby("Function", as_index=False)["Cycles"]
    .mean()
    .rename(columns={"Cycles": "MeanCycles_AllTC"})
)
mean_cycles_all = mean_cycles_all.sort_values(
    "MeanCycles_AllTC", ascending=False
).reset_index(drop=True)
mean_cycles_all["MeanCycles_AllTC"] = (
    mean_cycles_all["MeanCycles_AllTC"].round(0).astype(int)
)
print("=== mean_cycles_all ===")
display(mean_cycles_all)


=== mean_cycles_all ===


Unnamed: 0,Function,MeanCycles_AllTC
0,Original,360864
1,SIMD Optimized Impulse,111615
2,SIMD,111371
3,Code Motion,90485
4,Register Relieve,82648
5,SIMD scalar loop,79085
6,SIMD SSA,79032
7,Basic Implementation,77938
8,Scalar Less SQRT,70843
9,scalar Less SQRT + Approx,68842


In [14]:
# Mean cycles per test case (function x Test Case pivot)
mean_cycles_tc = (
    grouped_all.groupby(["Test Case", "Function"], as_index=False)["Cycles"]
    .mean()
    .pivot(index="Test Case", columns="Function", values="Cycles")
)
func_order = mean_cycles_all["Function"].tolist()
mean_cycles_tc = mean_cycles_tc[func_order]
mean_cycles_tc = mean_cycles_tc.round(0).astype(int)

print("=== mean_cycles_tc ===")
display(mean_cycles_tc)

=== mean_cycles_tc ===


Function,Original,SIMD Optimized Impulse,SIMD,Code Motion,Register Relieve,SIMD scalar loop,SIMD SSA,Basic Implementation,Scalar Less SQRT,scalar Less SQRT + Approx,Improved Symmetry,Reciprocal Sqrt Less IF,Reciprocal Sqrt Hoist,Reciprocal Sqrt IF,Approx + Symmetry
Test Case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
TC 0,358130,111268,111342,91838,83786,75245,78984,74628,71409,67403,67097,60997,53871,59195,56681
TC 1,359104,111708,111423,90023,81754,81608,79587,80072,70211,69716,69738,59426,63120,59345,54391
TC 2,357750,111074,111101,91838,83793,75304,78776,74656,71481,67094,67250,60574,53915,59238,56602
TC 3,360247,112025,111852,89273,82107,81722,79020,80255,70398,70177,69935,59522,63127,59491,54500
TC 4,369086,112003,111136,89453,81801,81546,78791,80078,70716,69820,70159,59426,63041,59382,54390


In [15]:
# Cost of operations per (Function, Test Case)
cost_ops = flops_df.groupby(["Function", "Test Case"], as_index=False)[
    ["ADDS", "MULS", "DIVS", "SQRT"]
].mean()
cost_ops[["ADDS", "MULS", "DIVS", "SQRT"]] = (
    cost_ops[["ADDS", "MULS", "DIVS", "SQRT"]].round(0).astype(int)
)
print("=== cost_ops ===")
display(cost_ops)

=== cost_ops ===


Unnamed: 0,Function,Test Case,ADDS,MULS,DIVS,SQRT
0,Approx + Symmetry,TC 0,15029,15702,336,668
1,Approx + Symmetry,TC 1,14361,15702,336,668
2,Approx + Symmetry,TC 2,15028,15698,336,668
3,Approx + Symmetry,TC 3,14404,15745,337,670
4,Approx + Symmetry,TC 4,14375,15714,336,668
5,Basic Implementation,TC 0,13364,9369,2671,1002
6,Basic Implementation,TC 1,12696,9369,2671,1002
7,Basic Implementation,TC 2,13363,9368,2670,1002
8,Basic Implementation,TC 3,12734,9396,2678,1005
9,Basic Implementation,TC 4,12709,9377,2673,1003


In [16]:
# Mean cycles per section (Function x Section x Test Case)
sec_cycles = prof_clean.groupby(["Function", "Section", "Test Case"], as_index=False)[
    "Cycles"
].mean()
sec_cycles["Cycles"] = sec_cycles["Cycles"].round(0).astype(int)
print("=== sec_cycles ===")
display(sec_cycles)

=== sec_cycles ===


Unnamed: 0,Function,Section,Test Case,Cycles
0,Approx + Symmetry,Delta,TC 0,40810
1,Approx + Symmetry,Delta,TC 1,40810
2,Approx + Symmetry,Delta,TC 2,40333
3,Approx + Symmetry,Delta,TC 3,40366
4,Approx + Symmetry,Delta,TC 4,41431
...,...,...,...,...
415,scalar Less SQRT + Approx,collide_balls,TC 0,921688
416,scalar Less SQRT + Approx,collide_balls,TC 1,920480
417,scalar Less SQRT + Approx,collide_balls,TC 2,919659
418,scalar Less SQRT + Approx,collide_balls,TC 3,922148


In [17]:

# Section‐level FlopsPerCycle: merge flops_df + sec_cycles
flops_sec = pd.merge(
    flops_df, sec_cycles, on=["Function", "Section", "Test Case"], how="inner"
)
flops_sec["FlopsPerCycle"] = flops_sec["Flops"] / flops_sec["Cycles"]
print("=== flops_sec (section-level FlopsPerCycle) ===")
display(flops_sec[["Function", "Section", "Test Case", "FlopsPerCycle"]])

=== flops_sec (section-level FlopsPerCycle) ===


Unnamed: 0,Function,Section,Test Case,FlopsPerCycle
0,Basic Implementation,collide_balls,TC 0,0.085159
1,Basic Implementation,Initialization,TC 0,0.841270
2,Basic Implementation,Impulse,TC 0,0.385705
3,Basic Implementation,Delta,TC 0,0.794070
4,Basic Implementation,Velocity,TC 0,0.748511
...,...,...,...,...
85,Code Motion,Initialization,TC 4,1.636364
86,Code Motion,Impulse,TC 4,0.399885
87,Code Motion,Delta,TC 4,0.602858
88,Code Motion,Velocity,TC 4,0.578025


In [18]:
# Overall FlopsPerCycle per Function x Test Case
overall_fp = flops_sec.groupby(["Function", "Test Case"], as_index=False)[
    ["Flops", "Cycles"]
].sum()
overall_fp["FlopsPerCycle"] = overall_fp["Flops"] / overall_fp["Cycles"]
print("=== overall_fp (Function x Test Case) ===")
display(overall_fp)

=== overall_fp (Function x Test Case) ===


Unnamed: 0,Function,Test Case,Flops,Cycles,FlopsPerCycle
0,Approx + Symmetry,TC 0,190408,1064804,0.17882
1,Approx + Symmetry,TC 1,186404,1066506,0.17478
2,Approx + Symmetry,TC 2,190376,1073019,0.177421
3,Approx + Symmetry,TC 3,186934,1068994,0.174869
4,Approx + Symmetry,TC 4,186562,1076629,0.173283
5,Basic Implementation,TC 0,158436,1053721,0.150359
6,Basic Implementation,TC 1,154432,1048929,0.147228
7,Basic Implementation,TC 2,158420,1052807,0.150474
8,Basic Implementation,TC 3,154882,1052302,0.147184
9,Basic Implementation,TC 4,154574,1053673,0.1467
