# Visualizing Benchmarks

## Preparation
We start from loading the necessary modules and the data, which is immediately converted to dataframes.

Then, we process the data if necessary (usually by converting it into long format, which is compatible with Altair).

In [1]:
import os
import yaml
import altair as alt
from altair_saver import save
import pandas as pd

In [2]:
chart_dir = 'charts'
with open("results.yaml", 'r') as stream:
    data = yaml.safe_load(stream)
if not os.path.exists(chart_dir):
    os.mkdir(chart_dir)

In [3]:
external, internal = data['external'], data['internal']
ext_dset_iter = pd.DataFrame(external['dataset_iteration'])
ext_seq_acc = pd.DataFrame(external['sequential_access'])
int_compres = pd.DataFrame(internal['compression'])
int_rand_acc = pd.DataFrame(internal['random_access'])
int_dset_iter = pd.DataFrame(internal['dataset_iteration'])

In [4]:
stacked_ext_dset_iter = ext_dset_iter.set_index('loader').stack().reset_index().rename(columns={'level_1': 'Dataset', 0: 'time'})
melt_int_rand_acc = int_rand_acc.melt('batch_size', var_name='Dataset', value_name='Time')

## Drawing

In [5]:
graph_ext_dset_iter = alt.Chart(stacked_ext_dset_iter).mark_bar(
    cornerRadiusBottomRight=2,
    cornerRadiusTopRight=2
).encode(
    x=alt.X('time:Q', title="Time in seconds (log scale)", scale=alt.Scale(type='log'), axis=alt.Axis(grid=False)),
    y=alt.Y('Dataset:O', axis=None),
    color='Dataset:N',
    row=alt.Row('loader:N', header=alt.Header(title=None, labelOrient='top'))
).properties(
    height=alt.Step(25),
    width=1000,
    title="Dataset Iteration (external)"
).configure_view(
    stroke='transparent'
)

In [6]:
wr = alt.Chart(ext_seq_acc).mark_rect()
wr_x = alt.X('dataset:O', title=None)
wr_y = alt.Y('framework:O', title=None)
wr_props = {'height': alt.Step(25), 'width': 100}
read = wr.encode(
    y=wr_y,
    x=wr_x,
    color='read:Q'
)
write = wr.encode(
    y=wr_y,
    x=wr_x,
    color='write:Q'
)
graph_ext_seq_acc = (read.properties(**wr_props, title='Read') | write.properties(**wr_props, title='Write')).properties(title="Sequential Access")

In [7]:
graph_int_rand_acc = alt.Chart(melt_int_rand_acc).mark_line().encode(
    x="batch_size",
    y="Time:Q",
    color='Dataset:N'
).properties(title="Random Access")

In [8]:
graph_int_dset_iter = alt.Chart(int_dset_iter).mark_circle().encode(
    y=alt.Y("type", title=None),
    x=alt.X("time:Q", axis=alt.Axis(grid=False), title="Time in seconds"),
    color='prefetch_factor:N',
    size=alt.Size('batch_size:Q', scale=alt.Scale(type='log'))
).properties(title="Dataset Iteration (internal)", height=170).configure_view(strokeOpacity=0)

## Graphs

In [9]:
graph_ext_dset_iter

In [10]:
graph_ext_seq_acc

In [11]:
graph_int_rand_acc

In [12]:
graph_int_dset_iter

## Saving Graphs

In [13]:
for graph in graph_ext_dset_iter, graph_ext_seq_acc, graph_int_rand_acc, graph_int_dset_iter:
    save(graph, f"charts/{graph.title.replace(' ', '_')}.png")