In [45]:
DATABASE_HOST="172.22.0.2"
DATABASE_DATABASE="fabricexplorer"
DATABASE_USERNAME="hppoc"
DATABASE_PASSWORD="password"

import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import bamboolib as bam
import matplotlib.pyplot as ptlib
import plotly.express as px
import json
import re
from datetime import datetime

# list all user defined tables and schemas
# "SELECT * FROM pg_catalog.pg_tables WHERE schemaname != 'information_schema' AND schemaname != 'pg_catalog';"

#initializing resources
engine = create_engine('postgresql://'+DATABASE_USERNAME+':'+DATABASE_PASSWORD+'@'+DATABASE_HOST+':5432/'+DATABASE_DATABASE)

txQuery = "SELECT * FROM transactions"
txDf = pd.read_sql(txQuery,con=engine)
txDf=txDf[txDf['read_set'].isna()==False]
txDf=txDf[txDf['chaincodename'].str.contains('_lifecycle')==False]
txDf=txDf[txDf['type'].str.contains('CONFIG')==False]
timeDf=txDf.drop(columns=["txhash","chaincode_id","endorser_msp_id","type","channel_genesis_hash","envelope_signature","creator_id_bytes", "creator_nonce", "payload_extension","tx_response","payload_proposal_hash","endorser_id_bytes","endorser_signature", "network_name","chaincode_proposal_input","read_set","write_set"])
transactions = txDf.drop(columns=["txhash","chaincode_id","endorser_msp_id","type","channel_genesis_hash","envelope_signature","creator_id_bytes", "creator_nonce", "payload_extension","tx_response","payload_proposal_hash","endorser_id_bytes","endorser_signature", "network_name","chaincode_proposal_input",])
txDf=txDf.drop(columns=["txhash","chaincode_id","endorser_msp_id","type","createdt","channel_genesis_hash","envelope_signature","creator_id_bytes", "creator_nonce", "payload_extension","tx_response","payload_proposal_hash","endorser_id_bytes","endorser_signature", "network_name"])
txDf

        id  blockid chaincodename  status     creator_msp_id  \
0        6        3          lscc   200.0  CentralBankOrgMSP   
4       10        4          cbdc   200.0  CentralBankOrgMSP   
5       11        5          cbdc   200.0           FIOrgMSP   
6       12        5          cbdc   200.0           FIOrgMSP   
7       13        6          cbdc   200.0           FIOrgMSP   
...    ...      ...           ...     ...                ...   
9057  9063      376          cbdc   200.0           FIOrgMSP   
9058  9064      377          cbdc   200.0           FIOrgMSP   
9059  9065      377          cbdc   200.0           FIOrgMSP   
9060  9066      378          cbdc   200.0           FIOrgMSP   
9061  9067      378          cbdc   200.0           FIOrgMSP   

                                               read_set  \
0     [{'chaincode': 'lscc', 'set': [{'key': 'cbdc'}]}]   
4     [{'chaincode': 'cbdc', 'set': []}, {'chaincode...   
5     [{'chaincode': 'cbdc', 'set': [{'key': 'A0xe38..

In [46]:
# create rows from the list of reads and writes
txDf = txDf.explode('read_set')
txDf = txDf.explode('write_set')
# drop rows with readsets containing no version number(eg no keys read) 
txDf = txDf.loc[(txDf['read_set'].astype('string').str.contains('version', case=False, regex=False, na=False))]
txDf

        id  blockid chaincodename  status     creator_msp_id  \
4       10        4          cbdc   200.0  CentralBankOrgMSP   
4       10        4          cbdc   200.0  CentralBankOrgMSP   
5       11        5          cbdc   200.0           FIOrgMSP   
5       11        5          cbdc   200.0           FIOrgMSP   
6       12        5          cbdc   200.0           FIOrgMSP   
...    ...      ...           ...     ...                ...   
9060  9066      378          cbdc   200.0           FIOrgMSP   
9061  9067      378          cbdc   200.0           FIOrgMSP   
9061  9067      378          cbdc   200.0           FIOrgMSP   
9061  9067      378          cbdc   200.0           FIOrgMSP   
9061  9067      378          cbdc   200.0           FIOrgMSP   

                                               read_set  \
4     {'chaincode': 'lscc', 'set': [{'key': 'cbdc', ...   
4     {'chaincode': 'lscc', 'set': [{'key': 'cbdc', ...   
5     {'chaincode': 'lscc', 'set': [{'key': 'cbdc', ..

In [47]:
# extract chaincode names to new columns from the rw-sets
txDf['r_chaincode'] =txDf['read_set'].apply(lambda x: x['chaincode'])
txDf['rs'] =txDf['read_set'].apply(lambda x: x['set'])
txDf = txDf.drop(columns=['chaincodename','read_set'])
txDf['w_chaincode'] =txDf['write_set'].apply(lambda x: x['chaincode'])
txDf['ws'] =txDf['write_set'].apply(lambda x: x['set'])
txDf = txDf.drop(columns=['write_set'])
txDf

        id  blockid  status     creator_msp_id validation_code  \
4       10        4   200.0  CentralBankOrgMSP           VALID   
4       10        4   200.0  CentralBankOrgMSP           VALID   
5       11        5   200.0           FIOrgMSP           VALID   
5       11        5   200.0           FIOrgMSP           VALID   
6       12        5   200.0           FIOrgMSP           VALID   
...    ...      ...     ...                ...             ...   
9060  9066      378   200.0           FIOrgMSP           VALID   
9061  9067      378   200.0           FIOrgMSP           VALID   
9061  9067      378   200.0           FIOrgMSP           VALID   
9061  9067      378   200.0           FIOrgMSP           VALID   
9061  9067      378   200.0           FIOrgMSP           VALID   

                               chaincode_proposal_input r_chaincode  \
4     7365740690746906741000776106365,46490726705350...        lscc   
4     7365740690746906741000776106365,46490726705350...        ls

In [48]:
# create new rows from readsets, and make a separate df for reads
txDfr = txDf.drop(columns=['w_chaincode','ws']).explode('rs')
txDfr = txDfr.rename(columns={'r_chaincode': 'chaincode','rs':'key'})
# add an access type column to reads
txDfr['access_type'] = txDfr['key'].apply(lambda x: 'READ')
# add version coloums to the reads (extract the version info from the json)
txDfr['version_block'] = txDfr['key'].apply(lambda x: np.nan if str(x) == 'nan' else x['version']['block_num']['low'])
txDfr['version_tx'] = txDfr['key'].apply(lambda x: np.nan if str(x) == 'nan' else x['version']['tx_num']['low'])
# create new rows fro writesets, and make a separate df for writes
txDfw = txDf.drop(columns=['r_chaincode','rs']).explode('ws')
txDfw = txDfw.rename(columns={'w_chaincode': 'chaincode','ws':'key'})
# add an access type column to writes, and classify them as writes or deletes
conditions = [
    txDfw['key'].str =='nan',
    txDfw['key'].str.contains("'is_delete': True",na=False)
]
choices = [np.nan,'DELETE']
txDfw['access_type'] = np.select(conditions,choices,default='WRITE')

# append the df with the writes to the df with the reads
txDf = txDfr.append(txDfw).reset_index(drop=True).rename(columns={'id':'txid'})
# extract the keys from the jsons from the rwsets
txDf['key']=txDf['key'].apply(lambda x:np.nan if str(x) == 'nan' else x['key'])
# drop the ones with no keys
txDf = txDf.dropna(subset=['key'])
# drop rows with keys that were read by lifecycle chaincodes
txDf = txDf.loc[~(txDf['chaincode'].isin(['lscc','_lifecycle']))]
# drop the duplicates created by creating new rows from the rwsets
txDf = txDf.drop_duplicates(keep='first')
txDf

       txid  blockid  status creator_msp_id validation_code  \
5720   2870      114   200.0       FIOrgMSP           VALID   
5724   2871      114   200.0       FIOrgMSP           VALID   
5728   2872      114   200.0       FIOrgMSP           VALID   
5732   2873      115   200.0       FIOrgMSP           VALID   
5770   2891      115   200.0       FIOrgMSP           VALID   
...     ...      ...     ...            ...             ...   
81543  9065      377   200.0       FIOrgMSP           VALID   
81548  9066      378   200.0       FIOrgMSP           VALID   
81549  9066      378   200.0       FIOrgMSP           VALID   
81554  9067      378   200.0       FIOrgMSP           VALID   
81555  9067      378   200.0       FIOrgMSP           VALID   

                                chaincode_proposal_input chaincode  \
5720   069074550697473,307865333861386563623337303365...      cbdc   
5724   069074550697473,307837303233633966346462623233...      cbdc   
5728   069074550697473,3078396563

In [49]:
# df containing reads only
txDfr=txDf[txDf['access_type']=='READ']
labels={"x":"Read keys","y":"Number of reads"}
# group by keys and count the occurrences
reads= txDfr.groupby(['key']).size()
reads = pd.DataFrame(reads)
reads.columns = [str(column) for column in reads.columns]
reads = reads.reset_index()
# sort keys by read amounts descending
reads = reads.sort_values(by=['0'], ascending=[False])
# plot the first 100 keys
fig = px.bar(reads.head(100), x='key', y='0', labels={'0':'reads'})
fig

In [50]:
# df containing writes only
txDfw=txDf[txDf['access_type']=='WRITE']
labels={"x":"Written keys","y":"Number of writes"}
# group by keys and count the occurrences
writes = txDfw.groupby(['key']).size()
writes = pd.DataFrame(writes)
writes.columns = [str(column) for column in writes.columns]
writes = writes.reset_index()
# sort keys by write amounts descending
writes = writes.sort_values(by=['0'], ascending=[False])
# plot the first 100 keys
fig = px.bar(writes.head(100), x='key', y='0', labels={'0':'writes'})
fig

In [51]:
# df containing transactions sorted by tx ids
transactions = transactions.sort_values(by=['id'], ascending=[True])
# function to assign -1 to mvccs, +1 to valid, 0 to all else
def gradeValidation(code):
    if code=='MVCC_READ_CONFLICT':
        return -1
    elif code=='VALID':
        return +1
    else:
        return 0
# function to find keys in the rwsets' jsons
def findKeys(string):
    pat = r"(?<='key': ').+?(?=')"
    return re.findall(pat,string)
# add new column to df and assign values to transactions based on validation code
transactions['delta']= transactions['validation_code'].apply(lambda x: gradeValidation(x))
# extract keys from rwsets' jsons
transactions['read_keys'] = transactions['read_set'].apply(lambda x: findKeys(str(x)))
transactions['written_keys'] = transactions['write_set'].apply(lambda x: findKeys(str(x)))
# calculate the cumulative sum of the values assigned based on validation codes
transactions['delta_cumsum'] = transactions['delta'].cumsum()
transactions
# plot the data
fig = px.line(transactions.sort_values(by=['id'], ascending=[True]), x='id', y='delta_cumsum', hover_data=['read_keys', 'written_keys'])
fig.update_xaxes(title_text='transaction id')
fig.update_yaxes(title_text='cumsum(valid-mvcc)')
fig

In [52]:
labels={"x":"Validation code","y":"Qty"}
# group transactions by validation codes, and calculate the size of each group
codes=transactions.groupby(['validation_code']).size().reset_index().rename(columns={0:'size'})
# sort rows by group size
codes = codes.sort_values(by=['size'], ascending=[False])
# plot the data
fig = px.bar(x=codes['validation_code'], y=codes['size'],labels=labels)
fig

If your dataset contains large amounts of mvcc conflicts, please uncomment the cell below, and process the exported csv in the mvcc-finder notebook, that runs the kotlin kernel.

In [53]:
# df = txDf.sort_values(by=['txid', 'access_type'], ascending=[True, True])
# df['mvcc_cause']=0
# df.to_csv('data/postfault0.csv')

After processing it with the kotlin notebook, please uncomment the cell below, to see a plot of the results.

In [54]:
# df = pd.read_csv("data/postfault0_mvccs.csv")
# fig = px.bar(df, x='key', y='mvccs_caused')
# fig

**If you processed the dataframe with the kotlin notebook please dont run the next 2 cells.**

In [55]:
import swifter
df = txDf.sort_values(by=['txid', 'access_type'], ascending=[True, True]).reset_index().drop(columns=['index'])
df['mvcc_cause']=0
def findCause(df,blockId,key,version,access,validation,mvcc):
    if validation=='MVCC_READ_CONFLICT' and access=='READ':
        subset = df[((df['blockid']<=blockId)&(df['blockid']>version)&(df['key']==key)&(df['access_type']=='WRITE')&(df['validation_code']=='VALID'))]
        if(len(subset)>0):
            df['mvcc_cause'].iloc[[subset.tail(1).index[0]]]=df['mvcc_cause'].iloc[[subset.tail(1).index[0]]]+1
        
df.swifter.apply(lambda x: findCause(df,x['blockid'],x['key'],x['version_block'],x['access_type'],x['validation_code'],x['mvcc_cause']),axis=1)
df

Pandas Apply:   0%|          | 0/27139 [00:00<?, ?it/s]

       txid  blockid  status     creator_msp_id validation_code  \
0        10        4   200.0  CentralBankOrgMSP           VALID   
1        11        5   200.0           FIOrgMSP           VALID   
2        11        5   200.0           FIOrgMSP           VALID   
3        12        5   200.0           FIOrgMSP           VALID   
4        12        5   200.0           FIOrgMSP           VALID   
...     ...      ...     ...                ...             ...   
27134  9066      378   200.0           FIOrgMSP           VALID   
27135  9067      378   200.0           FIOrgMSP           VALID   
27136  9067      378   200.0           FIOrgMSP           VALID   
27137  9067      378   200.0           FIOrgMSP           VALID   
27138  9067      378   200.0           FIOrgMSP           VALID   

                                chaincode_proposal_input chaincode  \
0      7365740690746906741000776106365,46490726705350...      cbdc   
1      63726561746541646472657373,3078653338613865636..

In [56]:
# group by keys and calculate the sum of mvccs they caused
df = df.groupby(['key']).agg(mvccs_caused=('mvcc_cause', 'sum')).reset_index()
# Keep rows where mvccs_caused > 0
df = df.loc[df['mvccs_caused'] > 0]
# sort rows by mvccs caused descending
df = df.sort_values(by=['mvccs_caused'], ascending=[False])
#df['mvccs_caused'].sum()
# plot the data
fig = px.bar(df, x='key', y='mvccs_caused')
fig

In [57]:
# copy the df with reads only
distDf = txDfr.copy()
# add a write distance column based on key version(block only) and current blockid
distDf['last_write_dist']=distDf['blockid']-distDf['version_block']
distDf

       txid  blockid  status creator_msp_id validation_code  \
5720   2870      114   200.0       FIOrgMSP           VALID   
5724   2871      114   200.0       FIOrgMSP           VALID   
5728   2872      114   200.0       FIOrgMSP           VALID   
5732   2873      115   200.0       FIOrgMSP           VALID   
5770   2891      115   200.0       FIOrgMSP           VALID   
...     ...      ...     ...            ...             ...   
36263  9065      377   200.0       FIOrgMSP           VALID   
36268  9066      378   200.0       FIOrgMSP           VALID   
36269  9066      378   200.0       FIOrgMSP           VALID   
36274  9067      378   200.0       FIOrgMSP           VALID   
36275  9067      378   200.0       FIOrgMSP           VALID   

                                chaincode_proposal_input chaincode  \
5720   069074550697473,307865333861386563623337303365...      cbdc   
5724   069074550697473,307837303233633966346462623233...      cbdc   
5728   069074550697473,3078396563

In [58]:
# group by keys and calculate the average distance to last write
dist = distDf.groupby(['key']).agg(last_write_dist_mean=('last_write_dist', 'mean')).reset_index()
# sort rows by average distance to last write ascending
dist = dist.sort_values(by=['last_write_dist_mean'], ascending=[True])
# plot the first 100 keys
fig = px.bar(dist.head(100), x='key', y='last_write_dist_mean')
fig

In [59]:
# df with transactions and their creation dates
tps = timeDf.drop(columns=["chaincodename","status","creator_msp_id",])
# sort rows by creation date ascending
tps = tps.sort_values(by=['createdt'], ascending=[True])
# reduce creation time resolution to seconds
tps['createdt'] = pd.to_datetime(tps['createdt'].dt.strftime('%Y-%m-%d %H:%M:%S'),format='%Y-%m-%d %H:%M:%S')
# calculate transactions created/second
tps = tps.groupby(['createdt']).size()
# plot the data
px.line(x=tps.index,y=tps.values,labels={'x':'Time','y':'Tps'})

In [60]:
tps.describe()

count    205.000000
mean      44.190244
std       15.649278
min        1.000000
25%       30.000000
50%       46.000000
75%       60.000000
max       72.000000
dtype: float64

In [61]:
# group reads by keys and validation code, and calculate the size of each group
keycodedf = txDfr.groupby(['key','validation_code']).size().reset_index().rename(columns={0:'size'})
# plot the data
fig = px.bar(keycodedf, x='key', y='size', color='validation_code', barmode='group',labels={'size':"Occurrence in group"})
fig

In [62]:
# previously created df with keys and their number of occurrences in each validation code group
# drop the endorsement policy failure group
ratio = keycodedf.loc[~(keycodedf['validation_code'].isin(['ENDORSEMENT_POLICY_FAILURE']))]
# pivot the table on validation codes
ratio = ratio.pivot_table(index=['key'], 
            columns=['validation_code'], values='size').fillna(0)
# calculate the ratio of the number of occurrences of each key in mvcc transactions/valid transactions
ratio['ratio']=ratio['MVCC_READ_CONFLICT']/ratio['VALID']
# sort by the ratio descending
ratio = ratio.reset_index()
ratio = ratio.sort_values(by=['ratio'], ascending=[False])
# plot the data
fig = px.bar(ratio, x='key', y='ratio',title='Ratio of MVCC/Valid')
fig

In [63]:
# df with read keys with mvcc conflicts
# group by keys and calulate the size of each group
mvcc=txDfr[txDfr['validation_code']=='MVCC_READ_CONFLICT'].groupby(['key']).size().reset_index().rename(columns={0:'size'})
# sort rows by group size and take the first 100
mvcc = mvcc.sort_values(by=['size'], ascending=[False]).head(100)
# plot the data
labels={"x":"Key","y":"Occurrence in transactions with MVCC"}
fig = px.bar(x=mvcc['key'], y=mvcc['size'], labels=labels)
fig

In [64]:
# group key reads by chaincodes and plot the size of each group 
labels={"x":"Chaincode","y":"Key reads"}
fig = px.bar(x=txDfr['chaincode'].unique(),y=txDfr['chaincode'].value_counts(),labels=labels)
fig

In [65]:
# group key writes by chaincodes and plot the size of each group 
labels={"x":"Chaincode","y":"Key writes"}
fig = px.bar(x=txDfw['chaincode'].unique(),y=txDfw['chaincode'].value_counts(),labels=labels)
fig

In [66]:
query = "SELECT * FROM pg_catalog.pg_tables WHERE schemaname != 'information_schema' AND schemaname != 'pg_catalog';"
# query = "select * from peer"
idkdf = pd.read_sql(query,con=engine)
idkdf

  schemaname           tablename tableowner tablespace  hasindexes  hasrules  \
0     public              blocks      hppoc       None        True     False   
1     public          chaincodes      hppoc       None        True     False   
2     public             channel      hppoc       None        True     False   
3     public             orderer      hppoc       None        True     False   
4     public                peer      hppoc       None        True     False   
5     public  peer_ref_chaincode      hppoc       None        True     False   
6     public    peer_ref_channel      hppoc       None        True     False   
7     public        transactions      hppoc       None        True     False   
8     public               users      hppoc       None        True     False   
9     public          write_lock      hppoc       None        True     False   

   hastriggers  rowsecurity  
0        False        False  
1        False        False  
2        False        False  

In [67]:
import binascii
import curses
# binascii.hexlify('a'.encode('utf8'))
# "e".encode('utf8').hex()
# bytes.fromhex('69').decode()
# bytes.fromhex('690760065').decode('utf8')
# binascii.unhexlify('6900760065').decode('utf8')
print(str(b"690760065",'utf8'))

690760065
