In [138]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [74]:
src_files = glob.glob('./output/bench_embryo_8G/*.csv')
dst_dir = './output/bench_embryo_8G'

In [75]:
dfs = []
for idx, file in enumerate(src_files):
    df = pd.read_csv(file)
    df['index'] = idx
    dfs.append(df)
whole_df = pd.concat(dfs)

In [76]:
grouped = whole_df.groupby(['compression option', 'filter option'])

In [None]:
grouped_mean = grouped.mean()
grouped_mean.pop('index')
grouped_mean

## various filter display

In [128]:
no_filter_rst = grouped_mean.loc[[idx for idx in grouped_mean.index if idx[1] == 'none' and len(idx[0].split('-')) < 4]]
scale_filter_rst = grouped_mean.loc[[idx for idx in grouped_mean.index if idx[1] == 'FixedScaleOffset' and len(idx[0].split('-')) < 4]]
scale_shuffle_filter_rst = grouped_mean.loc[[idx for idx in grouped_mean.index if idx[1] == 'FixedScaleOffset-Shuffle' and len(idx[0].split('-')) < 4]]

no_filter_rst['compression'] = [idx[0] for idx in no_filter_rst.index]
no_filter_rst = no_filter_rst.set_index('compression')
scale_filter_rst['compression'] = [idx[0] for idx in scale_filter_rst.index]
scale_filter_rst = scale_filter_rst.set_index('compression')
scale_shuffle_filter_rst['compression'] = [idx[0] for idx in scale_shuffle_filter_rst.index]
scale_shuffle_filter_rst = scale_shuffle_filter_rst.set_index('compression')

In [None]:
# compression ratio
comp_indices = ['blosc-lz4','blosc-zstd','blosc-zlib','lz4','zstd','zlib','gzip','lzma','bzip2']
comp_indices.reverse()
reordered_index = []
tick_label = []
for comp in comp_indices:
    comp_index = [idx for idx in no_filter_rst.index if idx.startswith(comp) ]
    comp_index.sort(key = lambda idx: int(idx.split('-')[-1]),reverse=True)
    comp_tick = [f'{comp} ({idx.rsplit('-',1)[-1]})' for idx in comp_index]
    reordered_index += comp_index
    tick_label += comp_tick

comp_means = {
    'FixedScaleOffset + Shuffle': scale_shuffle_filter_rst['compression ratio'][reordered_index],
    'FixedScaleOffset': scale_filter_rst['compression ratio'][reordered_index],
    'No filter': no_filter_rst['compression ratio'][reordered_index],
}

fig, ax = plt.subplots()
height = 0.25
x = np.arange(len(tick_label))
for multiplier, (label, measure) in enumerate(comp_means.items()):
    offset = 0 # height * multiplier
    rects = ax.barh(x + offset, measure, height = height, label = label)
    #ax.bar_label(rects, padding=3)
ax.set_yticks(x + height,tick_label)
ax.set_ylim(-1,len(tick_label)+ 3)
ax.set_xlim(1,6)
ax.legend(loc='upper left', ncols=3)
fig.savefig(os.path.join(dst_dir,'compression ratio.svg'))
plt.show()

In [None]:
comp_means = {
    'No filter': no_filter_rst['compression speed (bytes/sec)'][reordered_index],
    'FixedScaleOffset': scale_filter_rst['compression speed (bytes/sec)'][reordered_index],
    'FixedScaleOffset + Shuffle': scale_shuffle_filter_rst['compression speed (bytes/sec)'][reordered_index],
}

fig, ax = plt.subplots()
height = 0.25
plain_value = no_filter_rst.loc['none','compression speed (bytes/sec)']
x = np.arange(len(tick_label))
for multiplier, (label, measure) in enumerate(comp_means.items()):
    offset = 0 # height * multiplier
    rects = ax.barh(x + offset, measure, height = height, label = label)
    #ax.bar_label(rects, padding=3)
ax.set_yticks(x + height,tick_label)
ax.set_ylim(-1,len(tick_label)+ 3)
#ax.axvline(plain_value, color='r')
# ax.set_xlim(1,6)
ax.legend(loc='upper left', ncols=3)
fig.savefig(os.path.join(dst_dir,'compression speed.svg'))
plt.show()

In [None]:
comp_means = {
    'No filter': no_filter_rst['decompression speed (bytes/sec)'][reordered_index],
    'FixedScaleOffset': scale_filter_rst['decompression speed (bytes/sec)'][reordered_index],
    'FixedScaleOffset + Shuffle': scale_shuffle_filter_rst['decompression speed (bytes/sec)'][reordered_index],
}

fig, ax = plt.subplots()
height = 0.25
plain_value = no_filter_rst.loc['none','decompression speed (bytes/sec)']
x = np.arange(len(tick_label))
for multiplier, (label, measure) in enumerate(comp_means.items()):
    offset = 0 # height * multiplier
    rects = ax.barh(x + offset, measure, height = height, label = label)
    #ax.bar_label(rects, padding=3)
ax.set_yticks(x + height,tick_label)
ax.set_ylim(-1,len(tick_label)+ 3)
#ax.axvline(plain_value, color='r')
# ax.set_xlim(1,6)
ax.legend(loc='upper left', ncols=3)
fig.savefig(os.path.join(dst_dir,'decompression speed.svg'))
plt.show()