In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import os

sns.set_theme(style='whitegrid')
drivers = ['plaintext', 'emp']
driver_labels = {'plaintext':'#mux', 'emp':'running time (ms)'}
driver_log_labels = {k : v + " (log scale)" for k, v in driver_labels.items()}
config_dirs = {'default':'output', 'no-tupling':'output-no-tupling'}
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
other_ls = (0, (2, 2))

# A helper function for reading csv files into the right data form
def read_stats(cfg, cat, dr, v, n, **kwargs):
    df = pd.read_csv(f'{config_dirs[cfg]}/{cat}/test_{v}.{dr}.output.csv', **kwargs)
    df.columns = [n]
    # Convert to milliseconds.
    if dr == 'emp': df /= 1000
    return df

In [None]:
# Setup available tests. Remove the 'no-tupling' element from configs if we didn't run the tests with tupling disabled.
configs = ['default', 'no-tupling']
#configs = ['default']

In [None]:
# We generate PDFs in this directory
os.makedirs('figs', exist_ok=True)

In [None]:
# Load decision tree tests

views = {'max':'max height', 'spine':'spine', 'spineF':'spine w/ feat.', 'all':'all'}

stats = {}
for dr in drivers:
    df = pd.concat((read_stats('default', 'dtree', dr, 'dtree_'+v, n, usecols=['stat']) for (v, n) in views.items()), axis=1)
    df.index = ['small', 'very sparse', 'eighth sparse', 'full']
    stats[dr] = df

In [None]:
stats['emp']

In [None]:
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.rc('axes', labelsize=18)
plt.rc('legend', fontsize=11)

In [None]:
# Plot the decision tree results (of emp driver)
stats['emp'].T.plot.bar(logy=True, ylabel=driver_log_labels['emp'], rot=0)
plt.savefig('figs/dtree-emp.pdf', bbox_inches = "tight")

In [None]:
# Keep track of correlations between #mux and the running time
corrs = {}

In [None]:
# Load tree tests

tests = {'lookup':'lookup', 'prob':'prob', 'map':'map'}

all_stats = {}
for cfg in configs:
    all_stats[cfg] = {}
    for dr in drivers:
        all_stats[cfg][dr] = pd.concat((read_stats(cfg, 'tree', dr, v, n, index_col='public') for v, n in tests.items()), axis=1)

stats = all_stats['default']

In [None]:
stats['emp']

In [None]:
plt.rc('legend', fontsize=18)

In [None]:
# Plot the results of tree microbenchmark (of emp driver)
ax = stats['emp'].plot.line(logy=True, color=colors)
if 'no-tupling' in all_stats:
    all_stats['no-tupling']['emp'].plot.line(ax=ax, xlabel='max depth', ylabel=driver_log_labels['emp'], legend=False, ls=other_ls, color=colors)
plt.savefig('figs/tree-emp.pdf', bbox_inches = "tight")

In [None]:
# Calculate the correlation between the results of the plaintext driver and the emp driver
if 'tree' not in corrs: corrs['tree'] = {}
for i, tst in enumerate(tests.values()):
    df = pd.DataFrame(data={'#mux':stats['plaintext'][tst], tst:stats['emp'][tst]})
    corrs['tree'][tst] = df.corr().iat[0, 1]

In [None]:
# Load list tests which return primitive values (e.g., integer or boolean)

tests = {'elem':'elem', 'hamming_distance':'hamming', 'min_euclidean_distance':'euclidean', 'dot_prod':'dot-prod'}

all_stats = {}
for cfg in configs:
    all_stats[cfg] = {}
    for dr in drivers:
        all_stats[cfg][dr] = pd.concat((read_stats(cfg, 'list', dr, v, n, index_col='public') for v, n in tests.items()), axis=1)

stats = all_stats['default']

In [None]:
stats['emp']

In [None]:
# Plot the results of list microbenchmark (of emp driver)
ax = stats['emp'].plot.line(color=colors)
if 'no-tupling' in all_stats:
    all_stats['no-tupling']['emp'].plot.line(ax=ax, xlabel='max length', ylabel=driver_labels['emp'], legend=False, ls=other_ls, color=colors)
plt.savefig('figs/list-emp-1.pdf', bbox_inches = "tight")

In [None]:
# Calculate the correlation between the results of the plaintext driver and the emp driver
if 'list' not in corrs: corrs['list'] = {}
for i, tst in enumerate(tests.values()):
    df = pd.DataFrame(data={'#mux':stats['plaintext'][tst], tst:stats['emp'][tst]})
    corrs['list'][tst] = df.corr().iat[0, 1]

In [None]:
# Load list tests which return oblivious lists
tests = {'insert':'insert', 'filter':'filter', 'map':'map'}

all_stats = {}
for cfg in configs:
    all_stats[cfg] = {}
    for dr in drivers:
        all_stats[cfg][dr] = pd.concat((read_stats(cfg, 'list', dr, v, n, index_col='public') for v, n in tests.items()), axis=1) \
                               .sort_values(by='public') \
                               .interpolate(limit_area='inside') # the tests have different x-values

stats = all_stats['default']

In [None]:
# Plot the results of list microbenchmark (of emp driver)
ax = stats['emp'].plot.line(color=colors)
if 'no-tupling' in all_stats:
    all_stats['no-tupling']['emp'].plot.line(ax=ax, xlabel='max length', ylabel=driver_labels['emp'], legend=False, ls=other_ls, color=colors)
plt.savefig('figs/list-emp-2.pdf', bbox_inches = "tight")

In [None]:
# Calculate the correlation between the results of the plaintext driver and the emp driver
if 'list' not in corrs: corrs['list'] = {}
for i, tst in enumerate(tests.values()):
    df = pd.DataFrame(data={'#mux':stats['plaintext'][tst], tst:stats['emp'][tst]})
    corrs['list'][tst] = df.corr().iat[0, 1]

In [None]:
# Combine the correlations from list and tree experiments
cats = ['list', 'tree']
df = pd.concat((pd.DataFrame(data=corrs[c], index=['correlation']).T for c in cats), keys=cats)
df

In [None]:
# Print to latex
df.style.to_latex('figs/mux-corr.tex', hrules=True, clines='skip-last;data')

In [None]:
# Done!