In [None]:
import os
import glob
import tarfile
import string

import numpy as np
import pandas as pd
from dask import dataframe as dd

from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
def add_hatches_to_plot(ax, groups_iterable, legend = True):
    num_unique = len(np.unique(groups_iterable))
    hatches = ['/', '//', '+', '-', 'x', '\\', '*', 'o', 'O', '.']
    hatchnum = 0
    for i, bar in enumerate(ax.patches):
        if i % num_unique == 0:
            hatch = hatches[hatchnum]
            hatchnum += 1
        bar.set_hatch(hatch)
    if legend:
        ax.legend(loc='best', shadow = True)

In [None]:
tests = glob.glob('test_results/*/*/*')
os.makedirs('data',exist_ok=True)
!rm -rf data/*pockets 
os.makedirs('data/3pockets',exist_ok=True)
os.makedirs('data/2pockets',exist_ok=True)
os.makedirs('data/1pockets',exist_ok=True)
for test in tests:
    with tarfile.open(test) as tf:
        if '3pockets' in test:
            tf.extractall('data/3pockets')
        elif '2pockets' in test:
            tf.extractall('data/2pockets')
        elif '1pockets' in test:
            tf.extractall('data/1pockets')

In [None]:
tests = glob.glob('data/*/*/txinfo.csv')
print(f'found {len(tests)} tests')
tests_data = [pd.read_csv(test) for test in tests]
tests_data[0].info()

In [None]:
def extract_test_configs(path: str) -> tuple[str, int, int]:
    test_filename = path.split('/')[-2].split('-')
    test_technique = test_filename[0]
    test_config = test_filename[1].split('tps')
    test_tps = int(test_config[0])
    test_assets = int(test_config[1].rstrip(string.ascii_letters+'.'))
    return (test_technique, test_tps, test_assets)

for i, test in enumerate(tests):
    tech, tps, assets = extract_test_configs(test)
    tests_data[i]['asset_count'] = assets
    tests_data[i]['technique'] = tech
    tests_data[i]['send_rate'] = tps

In [None]:
def extract_test_scenario(test_data: pd.DataFrame) -> str:
    funcs = test_data['invoked_func'].value_counts()
    tx_funcs = 0
    for func in funcs.index.values:
        if 'transact' in func:
            tx_funcs += 1
    if tx_funcs == 0:
        raise ValueError('No invoked functon data')
    return '1pockets' if tx_funcs == 1 else '2pockets' if tx_funcs == 2 else '3pockets'

for test in tests_data:
    test['scenario'] = extract_test_scenario(test)

In [None]:
def extract_status(test_data: pd.DataFrame) -> pd.DataFrame:
    df = test_data.copy()
    cols_to_drop = []
    if 'read_write_set_error' in df.columns:
        df['status'] = np.select([df['read_write_set_error'] == 'MISMATCH', 
                                  df['status'] == 'failed', 
                                  df['status'] == 'success'],
                                 ['mismatch', 'mvcc', 'success']
                                )
        cols_to_drop.append('read_write_set_error')
    else:
        df['status'] = np.where(df['status'] == 'failed', 'mvcc', 'success',)
    for col in df.columns:
        if 'commit' in col:
            cols_to_drop.append(col)
    df.drop(cols_to_drop, axis=1, inplace = True)
    return df
for i, test in enumerate(tests_data):
    tests_data[i] = extract_status(test)

In [None]:
all_tests_merged = pd.concat(tests_data)
all_tests_merged

In [None]:
plot = all_tests_merged.groupby(['scenario', 'asset_count', 'send_rate', 'technique', 'status']).size().reset_index().rename(columns={0:'amount'})
plot = plot[plot['status'] == 'mvcc']
plot['freq'] = plot['send_rate'] / plot['asset_count']
g = sns.relplot(plot, x = 'freq', y = 'amount', hue = 'technique', col = 'scenario', kind = 'line', facet_kws={'sharey': True, 'sharex': False})

In [None]:
plot = all_tests_merged.groupby(['scenario', 'asset_count', 'send_rate', 'technique', 'status']).size().reset_index().rename(columns={0:'amount'})
plot = plot[plot['status'] == 'mvcc']
plot['freq'] = plot['send_rate'] / plot['asset_count']
g = sns.lineplot(plot, x = 'freq', y = 'amount', hue = 'technique',  style = 'scenario', markers = True)# facet_kws={'sharey': True, 'sharex': False})

In [None]:
plot = all_tests_merged.groupby(['scenario', 'asset_count', 'send_rate', 'technique', 'status']).size().reset_index().rename(columns={0:'amount'})
plot = plot[plot['status'] == 'mvcc']
naive_filler = [np.nan for i in range(0,12)]
plot['pct_mvcc'] = plot['amount'] / (10_000 / 100)
plot = plot.sort_values(by=['scenario', 'asset_count', 'send_rate', 'technique'])
scenarios = plot[plot['technique'] == 'naive']['scenario'].values
line_3 = np.append(np.linspace(0, 6.66, num =12, endpoint = True), [np.nan for i in range(0,24)])

pct_mvcc_total = plot[(plot['technique'] == 'total')]['pct_mvcc'].values
plot_pairs = pd.DataFrame({'pct_mvcc_total': pct_mvcc_total,
                          'pct_mvcc_affinity': np.append(plot[plot['technique'] == 'affinity']['pct_mvcc'].values, naive_filler),
                          'pct_mvcc_naive': plot[plot['technique'] == 'naive']['pct_mvcc'].values,
                          'scenario': scenarios,
                           'line_1': pct_mvcc_total,
                           'line_2': pct_mvcc_total*2,
                           'line_3y': line_3*3,
                           'line_3x': line_3,
                          })
plot_pairs.loc[plot_pairs['line_2'] > 22, 'line_2'] = np.nan

fig, axes = plt.subplots(figsize=(15, 5), ncols = 3)
p1 = sns.scatterplot(plot_pairs, x='pct_mvcc_total', y='pct_mvcc_naive', style = 'scenario', ax=axes[0])
p1.set_xlabel('mvcc conflicts percentage (total technique)')
p1.set_ylabel('mvcc conflicts percentage (naive technique)')
p2 = sns.lineplot(plot_pairs, y='line_1', x='line_1', color='r', ax=axes[0])
p2 = sns.lineplot(plot_pairs, y='line_2', x='line_1', color='g', linestyle='--', ax=axes[0])
p2 = sns.lineplot(plot_pairs, y='line_3y', x='line_3x', color='b', linestyle=':', ax=axes[0])

two_scen_pairs = plot_pairs.loc[plot_pairs['scenario']!='3pockets']

p1 = sns.scatterplot(two_scen_pairs, x='pct_mvcc_affinity', y='pct_mvcc_naive', style = 'scenario', ax=axes[1])
p1.set_xlabel('mvcc conflicts percentage (affinity technique)')
p1.set_ylabel('mvcc conflicts percentage (naive technique)')
p2 = sns.lineplot(plot_pairs, y='line_1', x='line_1', color='r', ax=axes[1])
p2 = sns.lineplot(plot_pairs, y='line_2', x='line_1', color='g', linestyle='--', ax=axes[1])

p1 = sns.scatterplot(two_scen_pairs, x='pct_mvcc_total', y='pct_mvcc_affinity', style = 'scenario', ax=axes[2])
p1.set_xlabel('mvcc conflicts percentage (total technique)')
p1.set_ylabel('mvcc conflicts percentage (affintiy technique)')
p2 = sns.lineplot(plot_pairs, y='line_1', x='line_1', color='r', ax=axes[2])
fig.suptitle('MVCC conflict percentage of total transactions')
fig.savefig('plots/mvcc_scenarios.png', bbox_inches = 'tight')

In [None]:
plot = all_tests_merged.groupby(['scenario', 'technique', 'status', 'function']).size().reset_index()
plot = plot[plot['function'].str.contains('trans')&(plot['status'] == 'mvcc')].drop(['function', 'status'], axis=1)
plot = plot.rename(columns={0:'mean_pct_mvcc'})
plot['mean_pct_mvcc'] /= (120_000/100)
plot = plot.pivot(index='scenario', columns='technique', values='mean_pct_mvcc')
sns.heatmap(plot, annot = True)
plt.title('Mean mvcc percentages')

In [None]:
all_tests_merged.scenario.drop_duplicates()

In [None]:
latency = dd.from_pandas(all_tests_merged, npartitions=10)
latency['latency'] = latency.time_end - latency.time_create
latency = latency[~latency['function'].astype(str).str.contains('ping')]

latency = latency.compute()
latency = latency.sort_values(by='technique')
ax = sns.boxplot(latency, y = 'latency', x = 'technique', fliersize=2.5)
add_hatches_to_plot(ax, ['0'],  legend = False)
plt.title('End to end latency for each technique')
plt.ylabel('latency (ms)')
ax.get_figure().savefig('plots/latency_techniques.png', bbox_inches = 'tight')

In [None]:
from sqlalchemy import create_engine
DATABASE_HOST="172.21.0.2"
DATABASE_PORT=":5432/"
DATABASE_DATABASE="fabricexplorer"
DATABASE_USERNAME="hppoc"
DATABASE_PASSWORD="password"

#initializing resources
engine = create_engine('postgresql://'+DATABASE_USERNAME+
                       ':'+DATABASE_PASSWORD+'@'
                       +DATABASE_HOST
                       +DATABASE_PORT
                       +DATABASE_DATABASE)

txQuery = "SELECT * FROM transactions"
txDf = pd.read_sql(txQuery,con=engine)
txDf.info()

In [None]:
write_set_sizes = txDf.loc[:, ['chaincodename', 'write_set', 'chaincode_proposal_input']]
write_set_sizes = write_set_sizes.loc[write_set_sizes['chaincodename'].str.contains('micro')]
write_set_sizes['function'] = write_set_sizes['chaincode_proposal_input'].astype(str).apply(lambda x: x.split(',')[0])
write_set_sizes = write_set_sizes.explode('write_set')
write_set_sizes = write_set_sizes.loc[~write_set_sizes['write_set'].astype(str).str.contains('lscc')]
write_set_sizes['write_set'] = write_set_sizes['write_set'].apply(lambda x: x['set'])
write_set_sizes = write_set_sizes.explode('write_set').dropna()
write_set_sizes['key'] = write_set_sizes['write_set'].apply(lambda x: str(x['key']).replace(u'\x00', '|'))
write_set_sizes['value'] = write_set_sizes['write_set'].apply(lambda x: x['value'])
write_set_sizes = write_set_sizes.loc[:,['chaincodename', 'function', 'key', 'value']]
write_set_sizes['key_bytes'] = write_set_sizes['key'].str.encode('utf8').apply(lambda x: len(x))
write_set_sizes['value_bytes'] = write_set_sizes['value'].str.encode('utf8').apply(lambda x: len(x))
write_set_sizes = write_set_sizes.reset_index().rename(columns={'index':'tx_index'})
write_set_sizes['total_bytes'] = write_set_sizes['value_bytes'] + write_set_sizes['key_bytes']
write_set_sizes = write_set_sizes.groupby(['tx_index', 'chaincodename', 'function']).sum().reset_index().drop('tx_index', axis = 1)
write_set_sizes = write_set_sizes.groupby(['chaincodename', 'function']).mean().reset_index()
write_set_sizes.loc[write_set_sizes['function'] == '74726107361637457697468500630657431', 'function'] = 'transferWithPocket1'
write_set_sizes.loc[write_set_sizes['function'] == '7472610736163745769746850063065743233', 'function'] = 'transferWithPocket23'
write_set_sizes.loc[write_set_sizes['function'] == '6372656174654465004173736574', 'function'] = 'createAsset'
write_set_sizes.to_csv('data/total_write_sets.csv', index = False)
write_set_sizes

In [None]:
print(len('\x00he\x00llo\x00'.encode('utf8').hex()))

In [None]:
set_sizes = txDf.loc[:,['chaincodename', 'write_set', 'chaincode_proposal_input']]
set_sizes['function'] = set_sizes['chaincode_proposal_input'].apply(lambda x: x.split(',')[0])
set_sizes['write_set'] = set_sizes['write_set'].apply(pd.json_normalize)
set_sizes['write_set'] = set_sizes['write_set'].apply(lambda x: x.values[-1])
set_sizes = set_sizes.explode('write_set')
set_sizes = set_sizes[set_sizes['write_set'].astype(str).str.contains('\[')]
set_sizes = set_sizes[set_sizes['write_set'].astype(str).str.contains('key')]
set_sizes = set_sizes.explode('write_set')
set_sizes['value'] = set_sizes['write_set'].apply(lambda x: x['value'])
set_sizes['key'] = set_sizes['write_set'].apply(lambda x: x['key'])
set_sizes['key'] = set_sizes['key'].apply(lambda x: x.replace(u'\x00', '|'))
set_sizes = set_sizes[['chaincodename', 'key','value', 'function']]
set_sizes = set_sizes[~set_sizes['key'].str.contains('micro')]
set_sizes['total'] = set_sizes['key'].str.cat(set_sizes['value'])
set_sizes['size'] = set_sizes['total'].apply(lambda x: len(x.encode('utf8')))
set_sizes

In [None]:
set_sizes.to_csv('data/affinity_sizes.csv', sep = ';')

In [None]:
set_sizes_csv = glob.glob('data/*sizes.csv')
set_sizes = [pd.read_csv(csv, sep=';') for csv in set_sizes_csv]
for i, csv in enumerate(set_sizes_csv):
    cc = os.path.basename(csv).split('_')[0]
    set_sizes[i]['chaincode'] = cc
set_sizes = pd.concat(set_sizes)
set_sizes

In [None]:
set_sizes = set_sizes.rename(columns={'Unnamed: 0': 'idx'})
set_sizes = set_sizes.groupby(['chaincode', 'function', 'idx',]).sum('size').reset_index()
set_sizes = set_sizes.groupby(['chaincode', 'function']).mean('size').reset_index()
set_sizes
set_sizes.loc[set_sizes['function'] == '74726107361637457697468500630657431', 'function'] = 'transferWithPocket1'
set_sizes.loc[set_sizes['function'] == '7472610736163745769746850063065743233', 'function'] = 'transferWithPocket23'
set_sizes.loc[set_sizes['function'] == '6372656174654465004173736574', 'function'] = 'createAsset'
set_sizes = set_sizes.rename(columns={'chaincode': 'technique'})
set_sizes = set_sizes[['technique', 'function', 'size']]
set_sizes

In [None]:
ax = sns.barplot(set_sizes, x = 'function', y = 'size', hue = 'technique')
set_sizes = set_sizes.sort_values(by='technique')
add_hatches_to_plot(ax, set_sizes['technique'])
plt.title('Mean write-set sizes')
plt.ylabel('size (bytes)')
ax.get_figure().savefig('plots/write_set_techniques.png', bbox_inches = 'tight')

In [None]:
set_sizes_csv = glob.glob('data/*sizes.csv')
set_sizes = [pd.read_csv(csv, sep=';') for csv in set_sizes_csv]
for i, csv in enumerate(set_sizes_csv):
    cc = os.path.basename(csv).split('_')[0]
    set_sizes[i]['chaincode'] = cc
set_sizes = pd.concat(set_sizes)
set_sizes.loc[set_sizes['function'] == '74726107361637457697468500630657431', 'function'] = 'transferWithPocket1'
set_sizes.loc[set_sizes['function'] == '7472610736163745769746850063065743233', 'function'] = 'transferWithPocket23'
set_sizes.loc[set_sizes['function'] == '6372656174654465004173736574', 'function'] = 'createAsset'
set_sizes['count'] =     set_sizes['key'].str.count('\|') 
set_sizes['object_type'] = np.select([
    pd.isna(set_sizes['key'].str.count('\|')),
    set_sizes['key'].str.count('\|') == 3,
    (set_sizes['key'].str.count('\|') > 3) & (set_sizes['value'].str.contains('{')),
    True
],['total', 'proxy', 'complex_partition', 'simple_partition'])
set_sizes

In [None]:
obj_sizes = set_sizes.groupby('object_type').agg({'size':'mean'}).T
plot = {
    'transferWithPocket1':[
        obj_sizes['proxy'].iloc[0] + obj_sizes['simple_partition'].iloc[0],
        obj_sizes['proxy'].iloc[0] + obj_sizes['simple_partition'].iloc[0],
        obj_sizes['total'].iloc[0],
    ],
    'transferWithPocket23':[
        obj_sizes['proxy'].iloc[0] + obj_sizes['complex_partition'].iloc[0],  
        obj_sizes['proxy'].iloc[0] + obj_sizes['simple_partition'].iloc[0]*2,
        obj_sizes['total'].iloc[0],
    ],
    'technique':['affinity', 'total', 'naive']
}
plot = pd.DataFrame(plot)
plot = plot.melt(['technique'],['transferWithPocket23', 'transferWithPocket1']).rename(columns={'variable':'function', 'value':'size'})
plot = plot.sort_values(by=['technique'])
ax = sns.barplot(plot, x='function', y='size', hue = 'technique')
add_hatches_to_plot(ax, plot['function'])
plt.title('Mean read-set sizes')
plt.ylabel('size (bytes)')
ax.get_figure().savefig('plots/read_set_techniques.png', bbox_inches = 'tight')