In [17]:
import pandas as pd
import numpy as np
import os
import json

In [18]:
name_map = {"hamming": "hamming_distance", "euclidean": "min_euclidean_distance", "flat_map": "concat_map"}


def read_stat(cfgs, cat, dr, name, view):
    d = {"name": name, "view": view}
    v = name_map[name] if name in name_map else name
    for cfg in cfgs:
        df = pd.read_csv(f"output-{cfg}/{cat}/test_{v}.{dr}.output.csv")
        df = df.loc[df["public"] == view]
        stat = df.iloc[0]["stat"] if df.shape[0] > 0 else -1
        # Convert to milliseconds.
        if dr == "emp" and stat >= 0:
            stat /= 1000
        d[cfg] = stat
    return d

def read_stats(tests, cfgs, cat, dr):
    return [read_stat(cfgs, cat, dr, name, view) for (name, view) in tests]

In [19]:
stats = {}
all_cfgs = ["old", "best", "no-smart-array", "no-reshape-guard", "no-memo"]
memo_cfgs = ["best", "no-memo"]

# List tests
tests = [
    ("elem", 1000),
    ("hamming", 1000),
    ("euclidean", 1000),
    ("dot_prod", 1000),
    ("nth", 1000),
    ("map", 1000),
    ("filter", 200),
    ("insert", 200),
    ("insert_list", 100),
    ("append", 100),
    ("take", 200),
    ("flat_map", 200),
    ("span", 200),
    ("partition", 200),
]

stats["list"] = read_stats(tests, all_cfgs, 'list', 'emp')

stats["list-memo"] = read_stats(tests, memo_cfgs, 'list-memo', 'emp')

# Tree tests
tests = [
    ("elem", 16),
    ("prob", 16),
    ("map", 16),
    ("filter", 16),
    ("swap", 16),
    ("path", 16),
    ("insert", 16),
    ("bind", 8),
    ("collect", 8),
]

stats["tree"] = read_stats(tests, all_cfgs, 'tree', 'emp')

stats["tree-memo"] = read_stats(tests, memo_cfgs, 'tree-memo', 'emp')

In [20]:
def roundx(x):
    return (
        np.format_float_positional(x, 2, trim="0")
        if x >= 1
        else np.format_float_positional(x, int(np.ceil(np.abs(np.log10(x)))) + 1, trim="0")
    )


def to_latex(df, name):
    txt = df.to_latex(index=False).splitlines()
    with open(f"figs/{name}-full.tex", "w") as w:
        for l in txt:
            if l != "\\toprule" and l != "\\bottomrule":
                w.write(l)
                w.write("\n")


def gen_df(cat, gen, name):
    df = pd.DataFrame([gen(d) for d in stats[cat]])
    to_latex(df, name)
    return df


os.makedirs("figs", exist_ok=True)

In [21]:
def gen(d):
    old = d["old"]
    new = d["best"]
    perc = "N/A"
    if old >= 0:
        perc = roundx(new * 100 / old)
        perc = f"{perc}\\%"
        old = roundx(old)
    else:
        old = "\\textcolor{red}{\\bf failed}"
    new = roundx(new)
    return {
        "Benchmark": f"\\verb|{d['name']}_{d['view']}|",
        "\\taype (ms)": old,
        "\\taypsi (ms)": f"{new} \\hfill({perc})",
    }

In [22]:
gen_df('list', gen, 'list-bench')

Unnamed: 0,Benchmark,\taype (ms),\taypsi (ms)
0,\verb|elem_1000|,8.14,7.96 \hfill(97.79\%)
1,\verb|hamming_1000|,15.36,14.86 \hfill(96.75\%)
2,\verb|euclidean_1000|,68.38,68.29 \hfill(99.86\%)
3,\verb|dot_prod_1000|,67.0,67.05 \hfill(100.07\%)
4,\verb|nth_1000|,11.46,12.33 \hfill(107.62\%)
5,\verb|map_1000|,2174.8,5.2 \hfill(0.24\%)
6,\verb|filter_200|,\textcolor{red}{\bf failed},88.83 \hfill(N/A)
7,\verb|insert_200|,5864.05,89.69 \hfill(1.53\%)
8,\verb|insert_list_100|,\textcolor{red}{\bf failed},4769.67 \hfill(N/A)
9,\verb|append_100|,4331.22,44.79 \hfill(1.03\%)


In [23]:
gen_df('tree', gen, 'tree-bench')

Unnamed: 0,Benchmark,\taype (ms),\taypsi (ms)
0,\verb|elem_16|,451.5,415.45 \hfill(92.02\%)
1,\verb|prob_16|,13256.86,12939.28 \hfill(97.6\%)
2,\verb|map_16|,4473.02,217.69 \hfill(4.87\%)
3,\verb|filter_16|,8732.06,444.03 \hfill(5.09\%)
4,\verb|swap_16|,\textcolor{red}{\bf failed},4339.23 \hfill(N/A)
5,\verb|path_16|,\textcolor{red}{\bf failed},910.87 \hfill(N/A)
6,\verb|insert_16|,84336.74,1471.47 \hfill(1.74\%)
7,\verb|bind_8|,22156.45,545.04 \hfill(2.46\%)
8,\verb|collect_8|,\textcolor{red}{\bf failed},144.49 \hfill(N/A)


In [24]:
def gen(d):
    base = d["best"]
    smart = d['no-smart-array']
    guard = d['no-reshape-guard']
    memo = d['no-memo']
    smart_perc = roundx(smart / base)
    guard_perc = roundx(guard / base)
    memo_perc = roundx(memo / base)
    base = roundx(base)
    smart = roundx(smart)
    guard = roundx(guard)
    memo = roundx(memo)
    return {
        "Benchmark": f"\\verb|{d['name']}_{d['view']}|",
        # "Base": f"{base}",
        "No smart array (ms)": f"{smart} \\hfill({smart_perc}x)",
        "No reshape guard (ms)": f"{guard} \\hfill({guard_perc}x)",
        "No memoization (ms)": f"{memo} \\hfill({memo_perc}x)",
    }

In [25]:
gen_df("list", gen, "list-opt")

Unnamed: 0,Benchmark,No smart array (ms),No reshape guard (ms),No memoization (ms)
0,\verb|elem_1000|,19.18 \hfill(2.41x),7.97 \hfill(1.0x),17.83 \hfill(2.24x)
1,\verb|hamming_1000|,51.31 \hfill(3.45x),14.62 \hfill(0.98x),35.47 \hfill(2.39x)
2,\verb|euclidean_1000|,78.01 \hfill(1.14x),68.13 \hfill(1.0x),77.0 \hfill(1.13x)
3,\verb|dot_prod_1000|,88.23 \hfill(1.32x),67.03 \hfill(1.0x),77.89 \hfill(1.16x)
4,\verb|nth_1000|,21.18 \hfill(1.72x),12.18 \hfill(0.99x),20.59 \hfill(1.67x)
5,\verb|map_1000|,2131.66 \hfill(410.17x),138.58 \hfill(26.67x),37.93 \hfill(7.3x)
6,\verb|filter_200|,5839.03 \hfill(65.73x),95.66 \hfill(1.08x),116.94 \hfill(1.32x)
7,\verb|insert_200|,258.29 \hfill(2.88x),95.94 \hfill(1.07x),90.3 \hfill(1.01x)
8,\verb|insert_list_100|,23124.69 \hfill(4.85x),5255.67 \hfill(1.1x),4845.8 \hfill(1.02x)
9,\verb|append_100|,4285.69 \hfill(95.69x),49.58 \hfill(1.11x),61.08 \hfill(1.36x)


In [26]:
gen_df("tree", gen, "tree-opt")

Unnamed: 0,Benchmark,No smart array (ms),No reshape guard (ms),No memoization (ms)
0,\verb|elem_16|,430.41 \hfill(1.04x),413.89 \hfill(1.0x),415.25 \hfill(1.0x)
1,\verb|prob_16|,13167.34 \hfill(1.02x),12940.54 \hfill(1.0x),13005.35 \hfill(1.01x)
2,\verb|map_16|,4531.33 \hfill(20.82x),629.61 \hfill(2.89x),217.72 \hfill(1.0x)
3,\verb|filter_16|,8866.67 \hfill(19.97x),1137.33 \hfill(2.56x),441.19 \hfill(0.99x)
4,\verb|swap_16|,8831.12 \hfill(2.04x),5573.29 \hfill(1.28x),4348.46 \hfill(1.0x)
5,\verb|path_16|,9311.84 \hfill(10.22x),1104.23 \hfill(1.21x),911.74 \hfill(1.0x)
6,\verb|insert_16|,19608.74 \hfill(13.33x),2174.14 \hfill(1.48x),1465.79 \hfill(1.0x)
7,\verb|bind_8|,19998.21 \hfill(36.69x),886.23 \hfill(1.63x),543.96 \hfill(1.0x)
8,\verb|collect_8|,11979.05 \hfill(82.91x),152.76 \hfill(1.06x),187.19 \hfill(1.3x)


In [27]:
def gen(d):
    base = d["best"]
    memo = d['no-memo']
    memo_perc = roundx(memo / base)
    base = roundx(base)
    memo = roundx(memo)
    return {
        "Benchmark": f"\\verb|{d['name']}_{d['view']}|",
        "Base (ms)": f"{base}",
        "No memoization (ms)": f"{memo} \\hfill({memo_perc}x)",
    }

In [28]:
gen_df("list-memo", gen, "list-memo")

Unnamed: 0,Benchmark,Base (ms),No memoization (ms)
0,\verb|elem_1000|,13.67,18.32 (1.34x)
1,\verb|hamming_1000|,26.01,36.36 (1.4x)
2,\verb|euclidean_1000|,73.53,77.7 (1.06x)
3,\verb|dot_prod_1000|,78.42,78.68 (1.0x)
4,\verb|nth_1000|,18.08,21.1 (1.17x)
5,\verb|map_1000|,20.65,42.96 (2.08x)
6,\verb|filter_200|,88.66,119.96 (1.35x)
7,\verb|insert_200|,90.27,90.57 (1.0x)
8,\verb|insert_list_100|,4800.31,4877.3 (1.02x)
9,\verb|append_100|,45.59,63.29 (1.39x)


In [29]:
gen_df("tree-memo", gen, "tree-memo")

Unnamed: 0,Benchmark,Base (ms),No memoization (ms)
0,\verb|elem_16|,445.16,427.57 (0.96x)
1,\verb|prob_16|,12941.54,12943.53 (1.0x)
2,\verb|map_16|,270.86,257.36 (0.95x)
3,\verb|filter_16|,503.21,495.7 (0.99x)
4,\verb|swap_16|,4488.94,4429.0 (0.99x)
5,\verb|path_16|,958.72,967.59 (1.01x)
6,\verb|insert_16|,1628.29,1796.53 (1.1x)
7,\verb|bind_8|,555.98,589.86 (1.06x)
8,\verb|collect_8|,145.08,187.48 (1.29x)


In [30]:
# Statistics of compilation and solver


def gen_solver_stat(d):
    n_atoms = d["#atoms"]
    s = d["statistics"]
    n_fun = len(s)
    n_atoms_data = {k: v["#atoms"] for k, v in s.items()}
    n_atoms_total = sum(n_atoms_data.values())
    n_queries_data = {k: len(v["queries"]) for k, v in s.items()}
    n_queries_total = sum(n_queries_data.values())
    n_queries_max = max(n_queries_data.values())
    t_solve_data = {k: sum(v["queries"]) for k, v in s.items()}
    t_solve_total = sum(t_solve_data.values())
    t_solve_max = max(t_solve_data.values())
    t_solve_max_per_query = max([max(v["queries"]) for _, v in s.items()])
    return {
        "#fun": n_fun,
        "#atoms": n_atoms,
        "#atoms_total": n_atoms_total,
        "#queries": n_queries_total,
        "solver": t_solve_total,
    }


def gen_compile_stat(cat, name):
    with open(f"output-compile/{cat}/{name}.compile.stat", "r") as r:
        compile_stats = [float(t) for t in r]
    with open(f"output-compile/{cat}/{name}.solver.stat", "r") as r:
        data = [gen_solver_stat(json.loads(s)) for s in r]

    return {
        "\\#Functions": data[0]["#fun"],
        "\\#Types": data[0]["#atoms"],
        "\\#Atoms": data[0]["#atoms_total"],
        "\\#Queries": data[0]["#queries"],
        "Total (s)": roundx(np.mean(compile_stats)),
        "Solver (s)": roundx(np.mean([d["solver"] for d in data])),
    }

In [31]:
cats = {
    "list": "List",
    "tree": "Tree",
    "dating": "Dating",
    "record": "Medical records",
    "calculator": "Secure calculator",
    "dtree": "Decision tree",
    "kmeans": "K-means",
    "misc": "Miscellaneous",
    "stress-solver": "List (stress)",
}

cat_map = {"stress-solver": "list"}

data = [
    {"Suite": name, **gen_compile_stat(cat, cat_map[cat] if cat in cat_map else cat)}
    for cat, name in cats.items()
]

df = pd.DataFrame(data)
to_latex(df, "compile-stats")
df

Unnamed: 0,Suite,\#Functions,\#Types,\#Atoms,\#Queries,Total (s),Solver (s)
0,List,20,6,70,84,0.48,0.086
1,Tree,14,8,44,31,0.49,0.027
2,Dating,4,12,16,10,0.57,0.02
3,Medical records,20,18,58,55,0.49,0.084
4,Secure calculator,2,8,6,5,1.34,0.014
5,Decision tree,2,12,6,17,0.37,0.019
6,K-means,15,10,66,964,12.18,11.21
7,Miscellaneous,11,6,42,83,0.3,0.11
8,List (stress),20,11,70,391,5.72,5.06


In [32]:
# Done!