In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colormaps as cm

In [None]:
# benchmark path
benchmark_path = {
    'bacteria':'../output/merged-bacteria-8G.csv',
    'HeLa':'../output/merged-HeLa-8G.csv',
    'embryo':'../output/merged-embryo-8G.csv',
    'organoid':'../output/merged-organoid-8G.csv',
    'tissue-on':'../output/merged-tissue-on-8G.csv',
    'tissue-off':'../output/merged-tissue-off-8G.csv',
}
# expressions
compression_options = {
    'zlib': (1,5,9),
    'NvcompGDeflate': (1,3,5),
    'lz4': (1,),
    'NvcompLZ4': (0,),
}
filter_options = ['none', 'Shuffle', 'BitRound-14', 'BitRound-14-Shuffle']


In [None]:
compression_options.keys()

###  Load data

In [None]:
benchmark_table = dict()
for name, path in benchmark_path.items():
    df = pd.read_csv(path)
    df = df[df['compression option'].map(lambda x: x.split('-')[0] in compression_options.keys())]
    benchmark_table[name] = df

### Visualization 1: overall

Show benchmark results for each dataset and benchamrk parameter

In [None]:
filter_display_colors = {
    'none': [(31/255, 120/255, 180/255), (99/255, 163/255, 204/255), (166/255, 206/255, 227/255)], # blue
    'Shuffle': [(51/255, 160/255, 44/255), (115/255, 192/255, 91/255), (178/255, 223/255, 138/255)], # Green
    'BitRound-14': [(227/255, 26/255, 28/255), (239/255, 90/255, 91/255), (251/255, 154/255, 153/255)], # Red
    'BitRound-14-Shuffle': [(255/255, 127/255, 0/255), (254/255, 159/255, 56/255), (253/255, 191/255, 111/255)], # Orange
}

In [None]:
kwargs_list = dict()
for sample_name in ['bacteria', 'HeLa', 'embryo', 'organoid', 'tissue-on', 'tissue-off']:
    kwargs_list[sample_name] = {
        'compression ratio': {
            'ylim':(1,8),
            'yscale':'linear',
        },
        'compression speed (bytes/sec)': {
            'ylim':(10**7.5,10**10),
            'yscale':'log',
        },
        'decompression speed (bytes/sec)': {
            'ylim':(10**7,10**9.5),
            'yscale':'log',
        },
    }

kwargs_list['organoid']['compression ratio']['ylim'] = (1, 10)
kwargs_list['tissue-off']['compression ratio']['ylim'] = (1, 2)


In [None]:
def view_compression_ratio(save_path, df, compression_options, filter_options, filter_display_colors, **kwargs):
    fig, ax = plt.subplots()
    ax.set(**kwargs)
    x = np.zeros(len(filter_options))
    tick_loc = []
    tick_label = []
    for comp_name, comp_degree in compression_options.items():
        for d_idx, deg in enumerate(comp_degree):
            comp_opt = f'{comp_name}-{deg}'
            sub_df = df.loc[comp_opt]
            avail_f_opt = [f for f in filter_options if f in sub_df.index.tolist()]
            y = (sub_df
                 .loc[avail_f_opt]
                 .sort_values(ascending=False)
            )
            c = [filter_display_colors[filt_name][d_idx] for filt_name in y.index]
            ax.bar(x[0:len(y)], y, color=c)
            tick_loc.append(x[0])
            tick_label.append(comp_opt)
            x += 1
        x += 1
    ax.set_xticks(tick_loc, tick_label, rotation=90)
    ax.set_xlim(-1, tick_loc[-1] + 1)
    fig.savefig(save_path)


In [None]:
def view_throughput(save_path, df, compression_options, filter_options, filter_display_colors, **kwargs):
    fig, ax = plt.subplots()
    ax.set(**kwargs)
    x = np.zeros(len(filter_options))
    tick_loc = []
    tick_label = []
    for comp_name, comp_degree in compression_options.items():
        for d_idx, deg in enumerate(comp_degree):
            comp_opt = f'{comp_name}-{deg}'
            sub_df = df.loc[comp_opt]
            avail_f_opt = [f for f in filter_options if f in sub_df.index.tolist()]
            avail_f_opt = [avail_f_opt[0]]
            y = (sub_df
                 .loc[avail_f_opt]
                 .sort_values(ascending=False)
            )
            c = [filter_display_colors[filt_name][d_idx] for filt_name in y.index]
            ax.bar(x[0:len(y)], y, color=c)
        tick_loc.append(x[0])
        tick_label.append(comp_name)
        x += 1
    ax.set_xticks(tick_loc, tick_label, rotation=90)
    ax.set_xlim(-1, tick_loc[-1] + 1)
    fig.savefig(save_path)


In [None]:
for name, df in benchmark_table.items():
    grp_df = (df
        .groupby(['compression option', 'filter option'])
        .mean()
    )
    # ploat compression ratio
    bench_param = 'compression ratio'
    bench_filter_options = ['none', 'Shuffle', 'BitRound-14', 'BitRound-14-Shuffle']
    kwargs = kwargs_list[name][bench_param]
    srs = grp_df.loc[:,bench_param]
    save_path = f'../figure/view-bench-1-overall-gpu/{name}-{bench_param.split("(")[0].strip()}.svg'
    view_compression_ratio(save_path, srs, compression_options, bench_filter_options, filter_display_colors, **kwargs)

    # plot throughput
    for bench_param in ['compression speed (bytes/sec)', 'decompression speed (bytes/sec)']:
        bench_filter_options = ['Shuffle', 'none']
        kwargs = kwargs_list[name][bench_param]
        srs = grp_df.loc[:,bench_param]
        save_path = f'../figure/view-bench-1-overall-gpu/{name}-{bench_param.split('(')[0].strip()}.svg'
        view_throughput(save_path, srs, compression_options, bench_filter_options, filter_display_colors, **kwargs)