# Visualization of the Snappy Compression results
The data belongs to the `Go` implementation using purelly Ethereum CL `signed_blocks`


In [None]:
# Import the dependencies
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

RESULTS_FOLDER = './../results'
PLOTS_FOLDER= './plots/'

# Read all the available csv files in the given folder
def read_files_with(folder: str, filter: str = ""):
    fs = []
    for dir, _, files in os.walk(folder):
        for file in files:
            if filter in file:
                fs.append(dir+"/"+file)
            else:
                continue
    print(f"found {len(fs)} with {filter} files in {folder}")
    return fs


In [None]:

files = read_files_with(RESULTS_FOLDER)
dataframes = []
for file in files:
    df = pd.read_csv(file)
    dataframes.append(df)

dfs = pd.concat(dataframes)
dfs = dfs.groupby(['folder','file']).mean().reset_index()
try:
    dfs = dfs.drop(columns=["Unnamed: 0", "Unnamed: 0.1"])
except:
    pass
dfs.to_csv('block_snappy_compression_summary.csv')
display(dfs)


In [None]:
# Raw Size
dfs = dfs.sort_values('raw-size')
dfs['raw_size_mb'] = dfs['raw-size'] / 1000_000 # MB
sns.set()
g = sns.ecdfplot(data=dfs, x='raw_size_mb', hue='folder')
g.set(title="Raw size CDF", xlabel="Raw size (MB)", ylabel="CDF")
plt.savefig(PLOTS_FOLDER+'raw_size_cdf.png')
plt.show()

sns.set()
g = sns.histplot(data=dfs, x='raw_size_mb', bins=8, hue='folder')
g.set(title="Raw size PDF", xlabel="Raw size (MB)", ylabel="CDF")
plt.savefig(PLOTS_FOLDER+'raw_size_pdf.png')
plt.show()

In [None]:
# Compress Ratio
dfs = dfs.sort_values('compress-ratio')
sns.set()
g = sns.ecdfplot(data=dfs, x='compress-ratio', hue='folder')
g.set(title="Compress ratio CDF", xlabel="Compress ratio", ylabel="CDF")
plt.savefig(PLOTS_FOLDER+'compression_ratio_cdf.png')
plt.show()

sns.set()
g = sns.histplot(data=dfs, x='compress-ratio', bins=8, hue='folder')
g.set(title="Compress ratio PDF", xlabel="Compress ratio", ylabel="CDF")
plt.savefig(PLOTS_FOLDER+'compression_ratio_pdf.png')
plt.show()


In [None]:
# Compress Size
dfs = dfs.sort_values('compress-size')
dfs['compress_size_mb'] = dfs['compress-size'] / 1000_000 # MB
sns.set()
g = sns.ecdfplot(data=dfs, x='compress_size_mb', hue='folder')
g.set(title="Compress size CDF", xlabel="Compress size (MB)", ylabel="CDF")
plt.savefig(PLOTS_FOLDER+'compressed_size_cdf.png')
plt.show()

sns.set()
g = sns.histplot(data=dfs, x='compress_size_mb', bins=8, hue='folder')
g.set(title="Compress size PDF", xlabel="Compress size (MB)", ylabel="CDF")
plt.savefig(PLOTS_FOLDER+'compressed_size_pdf.png')
plt.show()

In [None]:
# averages
dfs['encoding_ms'] = dfs['encoding-time'] / 1_000_000 # from nano to ms
dfs['decoding_ms'] = dfs['decoding-time'] / 1_000_000 # from nano to ms
dfs['compress_mb_s'] = dfs['compress-speed'] * 1_000 # from bytes/nano to mbs/s

avgs = dfs.drop(columns=["file", "compress-speed", "compress-ratio", "compress-size", "raw-size", "encoding-time", "decoding-time"])
avgs = avgs.groupby("folder").mean()
display(avgs)