In [None]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import os
import math
from operator import add
from collections import defaultdict

plt.rcParams['figure.figsize'] = (14, 7)

# Path to directory with raw logs after benchmark, specified folder should contain subfolders such as blocks, transactions, etc.
logs_directory = 'D:/blockchain-benchmarking/ethereum/logs_20180603_14'

In [None]:
all_instances_ips = set()
# Contains set of ip's that were used for blockchain instances, not for generating load,
# to separately plot resource consumption of blockchain nodes and load generator nodes.
# Computes dynamically, based on logs folder (load generator instances don't contain 'transactions' log)
blockchain_intances_ips = set()

for file in os.listdir(logs_directory):
    if file.endswith("_deploy.log"):
        all_instances_ips.add(file.split('_deploy.log')[0]) 

In [None]:
class Transaction:
    def __init__(self, log):
        items = log.strip().split(',')
        self.successful = (len(items) == 4) and (items[3] == 'OK')
        self.timestamp = int(items[1])
        self.size = int(items[2])
        if (self.successful):
            self.txId = items[0]
            
    def __str__(self):
        value = 'timestamp=' + str(self.timestamp) + ', size=' + str(self.size) + ', successful='
        if (self.successful):
            return '(txId = ' + str(self.txId) + ', ' + value + 'YES)'
        else:
            return '(' + value + 'NO)'
    
    __repr__ = __str__

# At first read all transactions (successful and unsuccesful) for each node
transactions_directory = os.path.join(logs_directory, "transactions")
# contains mapping IP -> list of Transactoins
transactions_per_instance = defaultdict(list)
# contains all successful transactions: txId -> Transaction
successful_transactions = dict()
first_transaction_timestamp = float('inf')
last_transaction_timestamp = float('-inf')

for file in os.listdir(transactions_directory):
    if file.endswith("_load.log"):
        with open(transactions_directory + '/' + file) as f:
            ip = file.split('_load.log')[0]
            blockchain_intances_ips.add(ip)
            for line in f.readlines():
                if line:
                    transaction = Transaction(line)
                    transactions_per_instance[ip].append(transaction)
                    first_transaction_timestamp = min(first_transaction_timestamp, transaction.timestamp)
                    last_transaction_timestamp = max(last_transaction_timestamp, transaction.timestamp)
                
print(('First transaction timestamp=' + str(first_transaction_timestamp) + 
       ', last transaction timestamp=' + str(last_transaction_timestamp)))

In [None]:
# quantum of X axis on plot
time_quantum_seconds = 20
          
# list of times with time_quantum_seconds resolution between first_transaction_timestamp and last_transaction_timestamp (X axis)
times = list(range(int(first_transaction_timestamp / 1000), int(last_transaction_timestamp / 1000) + 1, time_quantum_seconds))
# contains mapping IP -> list of counts of transactions for according time in times list
counts_per_ip = dict()
counts_successful_per_ip = dict()

for ip in transactions_per_instance.keys():
    counts_per_ip[ip] = [0] * len(times)
    counts_successful_per_ip[ip] = [0] * len(times)
    for transaction in transactions_per_instance[ip]:
        # compute position of corresponding 'bucket' for transaction timestamp
        time_position = int(((transaction.timestamp / 1000) - times[0]) / time_quantum_seconds)
        # add (1/time_quantum) to store transactions per second, not transactions per time_quantum
        counts_per_ip[ip][time_position] += (1 / time_quantum_seconds)
        if (transaction.successful):
            counts_successful_per_ip[ip][time_position] += (1 / time_quantum_seconds)
            successful_transactions[transaction.txId] = transaction

# make times relative, not absolute.
for time_position in range(len(times)):
    times[time_position] -= (first_transaction_timestamp / 1000)

# plot all transactions for entire network
counts_for_network = [0] * len(times)
for ip in transactions_per_instance.keys():
    counts_for_network = list(map(add, counts_for_network, counts_per_ip[ip]))
plt.plot(times, counts_for_network)
plt.xlabel('Time (s)')
plt.xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)
plt.ylabel('transactions per second')
plt.title('All transactions per second for entire network')
plt.show()

# plot successful transactions for entire network
counts_successful_for_network = [0] * len(times)
for ip in transactions_per_instance.keys():
    counts_successful_for_network = list(map(add, counts_successful_for_network, counts_successful_per_ip[ip]))
plt.plot(times, counts_successful_for_network)
plt.xlabel('Time (s)')
plt.xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)
plt.ylabel('transactions per second')
plt.title('Successful transactions per second for entire network')
plt.show()
    
# plot all transactions per node
for ip in transactions_per_instance.keys():
    plt.plot(times, counts_per_ip[ip], label=ip)
plt.xlabel('Time (s)')
plt.xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)
plt.ylabel('transactions per second')
plt.title('All transactions per second for every node')
plt.legend(loc='best')
plt.show()

# plot successful transactions per node
for ip in transactions_per_instance.keys():
    plt.plot(times, counts_successful_per_ip[ip], label=ip)
plot_with_aggregated_transactions = False
if plot_with_aggregated_transactions:
    plt.plot(times, counts_successful_for_network, label='all', linestyle=':')
plt.xlabel('Time (s)')
plt.xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)
plt.ylabel('transactions per second')
plt.title('Successful transactions per second for every node')
plt.legend(loc='best')
plt.show()

In [None]:
# Count of instances on which block has been arrived, default is all instances, you can specify other
N = len(blockchain_intances_ips)

# compute map: txId -> blockHash
transaction_in_block = dict()            
transactions_per_block_directory = os.path.join(logs_directory, "transactionsPerBlock")

for file in os.listdir(transactions_per_block_directory):
    if file == "transactionsPerBlock":
        with open(transactions_per_block_directory + '/' + file) as f:
            for line in f.readlines():
                line = line.strip()
                if line and line != 'blockID, transactionID, blockHash':
                    items = line.split(',')
                    if (len(items) != 3):
                        print('Unable to parse line=' + line + ' in file=' + str(f))
                    else:
                        transaction_in_block[items[1]] = items[2]
                        
# compute complex map: blockHash -> {IP -> block appear time}
# for example, {'0x123': {'1.2.3.4': 12, '1.2.3.5': 15, '1.2.3.6': inf}}
# means that block '0x123' appeared on ip '1.2.3.4' on 12th second, '1.2.3.5' on 15th second, and didn't appear on '1.2.3.6'
# (time since first transaction was sent)
block_appear_times_per_node = dict()
blocks_directory = os.path.join(logs_directory, "blocks")

for file in os.listdir(blocks_directory):
    if file.endswith('.csv'):
        with open(blocks_directory + '/' + file) as f:
            ip = file.split('.csv')[0]
            for line in f.readlines():
                line = line.strip()
                if line and line != 'id,Appeared time,Current block hash':
                    items = line.split(',')
                    if (len(items) != 3):
                        print('Unable to parse line=' + line + ' in file=' + str(f))
                    else:
                        blockHash = items[2]
                        if blockHash not in block_appear_times_per_node: # fill default dictionary
                            block_appear_times_per_node[blockHash] = dict()
                            for ip1 in transactions_per_instance.keys():
                                block_appear_times_per_node[blockHash][ip1] = float("inf")
                        block_appear_times_per_node[blockHash][ip] = (int(items[1]) - first_transaction_timestamp) / 1000
                        
                        
# stores txId -> delay of appearing on N nodes (may be inf)
transaction_with_delay = dict()
for txId, blockHash in transaction_in_block.items():
    if (blockHash in block_appear_times_per_node) and (txId in successful_transactions):
        block_times = sorted(block_appear_times_per_node[blockHash].values())
        # at rare cases, due to that some logging operations are scheduled, not immediate, delay may be negative
        # to avoid such cases, I use max(0, block_delay) here
        block_delay = block_times[N - 1] - ((successful_transactions[txId].timestamp - first_transaction_timestamp) / 1000)
        transaction_with_delay[txId] = max(0, block_delay)

# contains 2d array: time_position -> all delays for transactions in this time interval
# for example, 1 -> [60.2, 87.11] means that in first time interval (see time_quantum_seconds) we sent 2 transactions
# and their delays were 60.2 and 87.11 seconds (transactions with +inf time will be filtered out)
delays_per_time_position = [[] for t in range(len(times))] 

for txId, delay in transaction_with_delay.items():
    if txId in successful_transactions:
        time_position = int(((successful_transactions[txId].timestamp - first_transaction_timestamp) / 1000) / time_quantum_seconds)
        if (delay < float('inf')):
            delays_per_time_position[time_position].append(delay)
    else:
        print('Unknown transaction=' + str(txId))

def mean_or_inf(arr):
    return np.mean(arr) if arr else float('inf')
        
# contains flattened array: time_position -> mean delay for transactions in this time interval
mean_delay_per_time_position = [mean_or_inf(delays_in_time_interval) for delays_in_time_interval in delays_per_time_position]

plt.plot(times, mean_delay_per_time_position)
plt.xlabel('Time (s)')
plt.xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)
plt.ylabel('Delay (s)')
plt.title('Delay over time for transactions on achieving ' + str(N) + ' nodes') 
plt.show()

# count of boxes in box plot
box_groups_count = 12
box_bucket_size = math.ceil(len(delays_per_time_position) / box_groups_count)
# contains same values as delays_per_time_position, but with another resolution of X scale (box_groups_count)
delays_per_time_position_groupped = [[] for bucket in range(box_groups_count)]

for time_position in range(len(delays_per_time_position)):
    groupped_position = int(time_position / box_bucket_size)
    delays_per_time_position_groupped[groupped_position] += delays_per_time_position[time_position]

plt.boxplot(delays_per_time_position_groupped, notch=True, widths=0.7, showfliers=False,
            labels=[(box_bucket_size * time_quantum_seconds * x) for x in range(box_groups_count)])
plt.xlim(0, box_groups_count - 0.5)
plt.title('Delay over time for transactions on achieving ' + str(N) + ' nodes') 
plt.xlabel('Time (s)')
plt.ylabel('Delay (s)')
plt.show()

In [None]:
resource_usage_directory = os.path.join(logs_directory, "resource_monitors")

class ResourceConsumption:
    def __init__(self, log):
        items = log.strip().split(',')
        self.timestamp = int(items[0])
        self.cpuPercent = float(items[1]) 
        self.memory = int(items[2])
        self.memoryPercent = float(items[3])
        self.downloaded = int(items[4])
        self.uploaded = int(items[5])
        
    def __str__(self):
        return ('(timestamp=' + str(self.timestamp) + ', cpu=' + str(self.cpuPercent) + ', memory=' + str(self.memory) + 
                ', memoryPercent=' + str(self.memoryPercent) + ', downloaded=' + str(self.downloaded) + 
                ', uploaded=' + str(self.uploaded) + ')')
        
    __repr__ = __str__


all_resource_consumption = []
load_managers_resource_consumption = []
for file in os.listdir(resource_usage_directory):
    if file.endswith('.csv'):
        with open(resource_usage_directory + '/' + file) as f:
            ip = file.split('_res_usage.csv')[0]
            append_to_load_managers = False
            if ip not in blockchain_intances_ips:
                print('Detected load manger resource consumption info for ip=' + ip)
                append_to_load_managers = True
            for line in f.readlines():
                line = line.strip()
                if line and line != 'time,cpu,used mem,used mem%,downloaded,uploaded':
                    items = line.split(',')
                    if (len(items) != 6):
                        print('Unable to parse line=' + line + ' in file=' + f)
                    elif append_to_load_managers:
                        load_managers_resource_consumption.append(ResourceConsumption(line))
                    else:
                        all_resource_consumption.append(ResourceConsumption(line))
                            
# contains 2d array: time_position -> all ResourceConsumptions in this time interval
resources_per_time_position = [[] for t in range(len(times))]

for resource_consumption in all_resource_consumption:
    if first_transaction_timestamp <= resource_consumption.timestamp <= last_transaction_timestamp:
        time_position = int(((resource_consumption.timestamp - first_transaction_timestamp) / 1000) / time_quantum_seconds)
        resources_per_time_position[time_position].append(resource_consumption)
    
memory_consumption = [[rc.memory for rc in rc_list] for rc_list in resources_per_time_position]
plt.plot(times, [mean_or_inf(mem_list) for mem_list in memory_consumption])
plt.xlabel('Time (s)')
plt.xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)
plt.ylabel('Used memory (bytes)')
plt.title('Mean used memory') 
plt.show()

cpu_percent_consumption = [[rc.cpuPercent for rc in rc_list] for rc_list in resources_per_time_position]
plt.plot(times, [mean_or_inf(cpu_pct_list) for cpu_pct_list in cpu_percent_consumption])
plt.xlabel('Time (s)')
plt.xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)
plt.ylabel('CPU usage (%)')
plt.title('Mean used CPU (%)') 
plt.show()

network_consumption = [[(rc.downloaded + rc.uploaded) for rc in rc_list] for rc_list in resources_per_time_position]
plt.plot(times, [mean_or_inf(network_list) for network_list in network_consumption])
plt.xlabel('Time (s)')
plt.xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)
plt.ylabel('Network consumption (bytes/s)')
plt.title('Mean network consumption') 
plt.show()

In [None]:
# contains 2d array: time_position -> Load Manager's ResourceConsumptions in this time interval
load_managers_resources_per_time_position = [[] for t in range(len(times))]

for resource_consumption in load_managers_resource_consumption:
    if first_transaction_timestamp <= resource_consumption.timestamp <= last_transaction_timestamp:
        time_position = int(((resource_consumption.timestamp - first_transaction_timestamp) / 1000) / time_quantum_seconds)
        load_managers_resources_per_time_position[time_position].append(resource_consumption)

# Transactions and memory consumption of load generators
load_managers_memory_consumption = [[rc.memory for rc in rc_list] for rc_list in load_managers_resources_per_time_position]
fig, ax1 = plt.subplots()

color = 'blue'
ax1.set_xlabel('Time (s)')
ax1.set_ylabel('Transactions per second', color=color)
ax1.plot(times, counts_successful_for_network, color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax1.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

ax2 = ax1.twinx()
color = 'red'
ax2.set_ylabel('Used memory (bytes)', color=color) 
ax2.plot(times, [mean_or_inf(mem_list) for mem_list in load_managers_memory_consumption], color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax2.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

plt.title('Successful transactions per second for entire network and used memory of load generator(s)')
plt.show()

# Transactions and cpu consumption
load_managers_cpu_consumption = [[rc.cpuPercent for rc in rc_list] for rc_list in load_managers_resources_per_time_position]
fig, ax1 = plt.subplots()

color = 'blue'
ax1.set_xlabel('Time (s)')
ax1.set_ylabel('Transactions per second', color=color)
ax1.plot(times, counts_successful_for_network, color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax1.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

ax2 = ax1.twinx()
color = 'red'
ax2.set_ylabel('CPU usage (%)', color=color) 
ax2.plot(times, [mean_or_inf(cpu_pct_list) for cpu_pct_list in load_managers_cpu_consumption], color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax2.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

plt.title('Successful transactions per second for entire network and CPU usage of load generator(s)')
plt.show()

In [None]:
# combinations of 2 plots in one
# Transactions and delays
fig, ax1 = plt.subplots()

color = 'blue'
ax1.set_xlabel('Time (s)')
ax1.set_ylabel('Transactions per second', color=color)
ax1.plot(times, counts_successful_for_network, color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax1.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

ax2 = ax1.twinx()
color = 'red'
ax2.set_ylabel('delay (s)', color=color) 
ax2.plot(times, mean_delay_per_time_position, color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax2.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

plt.title('Successful transactions per second for entire network and delay to achieve ' + str(N) + ' nodes')
plt.show()

# Transactions and memory consumption
fig, ax1 = plt.subplots()

color = 'blue'
ax1.set_xlabel('Time (s)')
ax1.set_ylabel('Transactions per second', color=color)
ax1.plot(times, counts_successful_for_network, color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax1.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

ax2 = ax1.twinx()
color = 'red'
ax2.set_ylabel('Used memory (bytes)', color=color) 
ax2.plot(times, [mean_or_inf(mem_list) for mem_list in memory_consumption], color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax2.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

plt.title('Successful transactions per second for entire network and used memory')
plt.show()

# Transactions and cpu consumption
fig, ax1 = plt.subplots()

color = 'blue'
ax1.set_xlabel('Time (s)')
ax1.set_ylabel('Transactions per second', color=color)
ax1.plot(times, counts_successful_for_network, color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax1.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

ax2 = ax1.twinx()
color = 'red'
ax2.set_ylabel('CPU usage (%)', color=color) 
ax2.plot(times, [mean_or_inf(cpu_pct_list) for cpu_pct_list in cpu_percent_consumption], color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax2.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

plt.title('Successful transactions per second for entire network and CPU usage')
plt.show()

# Transactions and network consumption
fig, ax1 = plt.subplots()

color = 'blue'
ax1.set_xlabel('Time (s)')
ax1.set_ylabel('Transactions per second', color=color)
ax1.plot(times, counts_successful_for_network, color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax1.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)

ax2 = ax1.twinx()
color = 'red'
ax2.set_ylabel('Network consumption (bytes/s)', color=color) 
ax2.plot(times, [mean_or_inf(network_list) for network_list in network_consumption], color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax2.set_xlim(0, (last_transaction_timestamp - first_transaction_timestamp) / 1000)
plt.title('Successful transactions per second for entire network and network consumption')
plt.show()

In [None]:
# contains 2d array with mapping from count of nodes which transaction successfully reached to list of delays to reach
# so [2 -> [12.2, 13.4, 29.1], ..] means that 3 transactions reached 2 nodes 
# and delays for these transactions were 12.2, 13.4, 29.1 seconds accordingly
transaction_delay_by_nodes_count = [[] for x in range(len(blockchain_intances_ips))]

for txId, blockHash in transaction_in_block.items():
    if (blockHash in block_appear_times_per_node) and (txId in successful_transactions):
        appear_times = sorted(block_appear_times_per_node[blockHash].values())
        for n in range(len(appear_times)):
            if appear_times[n] < float('inf'):
                delay_to_appear = appear_times[n] - (successful_transactions[txId].timestamp - first_transaction_timestamp) / 1000
                # at rare cases, due to that some logging operations are scheduled, not immediate, delay may be negative
                # to avoid such cases, I use max(0, delay_to_appear) here
                transaction_delay_by_nodes_count[n].append(max(0, delay_to_appear))
            
plt.boxplot(transaction_delay_by_nodes_count, widths=0.7)
plt.title('Delay to achieve X nodes (with outliers)') 
plt.xlabel('Nodes')
plt.ylabel('Delay (s)')
plt.show()

plt.boxplot(transaction_delay_by_nodes_count, widths=0.7, showfliers=False)
plt.title('Delay to achieve X nodes') 
plt.xlabel('Nodes')
plt.ylabel('Delay (s)')
plt.show()