In [None]:
import pandas as pd
import numpy as np
import os
import json

In [None]:
name_map = {"hamming": "hamming_distance", "euclidean": "min_euclidean_distance", "flat_map": "concat_map"}


def read_stat(cfgs, cat, dr, name, view):
    d = {"name": name, "view": view}
    v = name_map[name] if name in name_map else name
    for cfg in cfgs:
        df = pd.read_csv(f"output-{cfg}/{cat}/test_{v}.{dr}.output.csv")
        df = df.loc[df["public"] == view]
        stat = df.iloc[0]["stat"] if df.shape[0] > 0 else -1
        # Convert to milliseconds.
        if dr == "emp" and stat >= 0:
            stat /= 1000
        d[cfg] = stat
    return d

def read_stats(tests, cfgs, cat, dr):
    return [read_stat(cfgs, cat, dr, name, view) for (name, view) in tests]

In [None]:
stats = {}
all_cfgs = ["old", "old-sa", "best", "no-smart-array", "no-reshape-guard", "no-memo"]
memo_cfgs = ["best", "no-memo"]

# List tests
tests = [
    ("elem", 1000),
    ("hamming", 1000),
    ("euclidean", 1000),
    ("dot_prod", 1000),
    ("nth", 1000),
    ("map", 1000),
    ("filter", 200),
    ("insert", 200),
    ("insert_list", 100),
    ("append", 100),
    ("take", 200),
    ("flat_map", 200),
    ("span", 200),
    ("partition", 200),
]

stats["list"] = read_stats(tests, all_cfgs, 'list', 'emp')

stats["list-memo"] = read_stats(tests, memo_cfgs, 'list-memo', 'emp')

# Tree tests
tests = [
    ("elem", 16),
    ("prob", 16),
    ("map", 16),
    ("filter", 16),
    ("swap", 16),
    ("path", 16),
    ("insert", 16),
    ("bind", 8),
    ("collect", 8),
]

stats["tree"] = read_stats(tests, all_cfgs, 'tree', 'emp')

stats["tree-memo"] = read_stats(tests, memo_cfgs, 'tree-memo', 'emp')

In [None]:
def roundx(x):
    return (
        np.format_float_positional(x, 2, trim="0")
        if x >= 1
        else np.format_float_positional(x, int(np.ceil(np.abs(np.log10(x)))) + 1, trim="0")
    )


def to_latex(df, name):
    txt = df.to_latex(index=False).splitlines()
    with open(f"figs/{name}-full.tex", "w") as w:
        for l in txt:
            if l != "\\toprule" and l != "\\bottomrule":
                w.write(l)
                w.write("\n")


def gen_df(cat, gen, name):
    df = pd.DataFrame([gen(d) for d in stats[cat]])
    to_latex(df, name)
    return df


os.makedirs("figs", exist_ok=True)

In [None]:
def anal_stat(old, new) :
    perc = "N/A"
    if old >= 0:
        perc = roundx(new * 100 / old)
        perc = f"{perc}\\%"
        old = roundx(old)
    else:
        old = "\\textcolor{red}{\\bf failed}"
    return (old, perc)

def gen(d):
    old = d["old"]
    old_sa = d["old-sa"]
    new = d["best"]
    (old, perc_old) = anal_stat(old, new)
    (old_sa, perc_old_sa) = anal_stat(old_sa, new)
    new = roundx(new)
    return {
        "Benchmark": f"\\verb|{d['name']}_{d['view']}|",
        "\\taype (ms)": old,
        "\\taype-SA (ms)": old_sa,
        "\\taypsi (ms)": f"{new} \\hfill({perc_old}, {perc_old_sa})",
    }

In [None]:
gen_df('list', gen, 'list-bench')

In [None]:
gen_df('tree', gen, 'tree-bench')

In [None]:
def gen(d):
    base = d["best"]
    smart = d['no-smart-array']
    guard = d['no-reshape-guard']
    memo = d['no-memo']
    smart_perc = roundx(smart / base)
    guard_perc = roundx(guard / base)
    memo_perc = roundx(memo / base)
    base = roundx(base)
    smart = roundx(smart)
    guard = roundx(guard)
    memo = roundx(memo)
    return {
        "Benchmark": f"\\verb|{d['name']}_{d['view']}|",
        # "Base": f"{base}",
        "No smart array (ms)": f"{smart} \\hfill({smart_perc}x)",
        "No reshape guard (ms)": f"{guard} \\hfill({guard_perc}x)",
        "No memoization (ms)": f"{memo} \\hfill({memo_perc}x)",
    }

In [None]:
gen_df("list", gen, "list-opt")

In [None]:
gen_df("tree", gen, "tree-opt")

In [None]:
def gen(d):
    base = d["best"]
    memo = d['no-memo']
    memo_perc = roundx(memo / base)
    base = roundx(base)
    memo = roundx(memo)
    return {
        "Benchmark": f"\\verb|{d['name']}_{d['view']}|",
        "Base (ms)": f"{base}",
        "No memoization (ms)": f"{memo} \\hfill({memo_perc}x)",
    }

In [None]:
gen_df("list-memo", gen, "list-memo")

In [None]:
gen_df("tree-memo", gen, "tree-memo")

In [None]:
# Statistics of compilation and solver


def gen_solver_stat(d):
    n_atoms = d["#atoms"]
    s = d["statistics"]
    n_fun = len(s)
    n_atoms_data = {k: v["#atoms"] for k, v in s.items()}
    n_atoms_total = sum(n_atoms_data.values())
    n_queries_data = {k: len(v["queries"]) for k, v in s.items()}
    n_queries_total = sum(n_queries_data.values())
    # n_queries_max = max(n_queries_data.values())
    t_solve_data = {k: sum(v["queries"]) for k, v in s.items()}
    t_solve_total = sum(t_solve_data.values())
    # t_solve_max = max(t_solve_data.values())
    # t_solve_max_per_query = max([max(v["queries"]) for _, v in s.items()])
    return {
        "#fun": n_fun,
        "#atoms": n_atoms,
        "#atoms_total": n_atoms_total,
        "#queries": n_queries_total,
        "solver": t_solve_total,
    }


def gen_compile_stat(cat, name):
    with open(f"output-compile/{cat}/{name}.compile.stat", "r") as r:
        compile_stats = [float(t) for t in r]
    with open(f"output-compile/{cat}/{name}.solver.stat", "r") as r:
        data = [gen_solver_stat(json.loads(s)) for s in r]

    return {
        "\\#Functions": data[0]["#fun"],
        "\\#Types": data[0]["#atoms"],
        "\\#Atoms": data[0]["#atoms_total"],
        "\\#Queries": data[0]["#queries"],
        "Total (s)": roundx(np.mean(compile_stats)),
        "Solver (s)": roundx(np.mean([d["solver"] for d in data])),
    }

In [None]:
cats = {
    "list": "List",
    "tree": "Tree",
    "stress-solver": "List (stress)",
    "dating": "Dating",
    "record": "Medical Records",
    "calculator": "Secure Calculator",
    "dtree": "Decision Tree",
    "kmeans": "K-means",
    "misc": "Miscellaneous",
}

cat_map = {"stress-solver": "list"}

data = [
    {"Suite": name, **gen_compile_stat(cat, cat_map[cat] if cat in cat_map else cat)}
    for cat, name in cats.items()
]

df = pd.DataFrame(data)
to_latex(df, "compile-stats")
df

In [None]:
# Done!