In [None]:
import json
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.stats import binom
from scipy.stats import gamma
import re
import os
from matplotlib import pyplot as plt
from typing import Tuple, List, Callable, Any

In [None]:
# General settings
num_runs = 1
starting_seed = 0
seed_multiplier = 100

# Validator settings
num_nodes = 31 
num_consensus = 2000
base_time_limit = 10000
node_processing_distribution = "exp"
node_processing_parameters = [3]
consensus_protocol = "HS"

## Fault settings
num_faults = 0
fault_type = "UR"
fault_parameters = []

# Network settings
## Switch settings
switch_processing_distribution = "exp"
switch_processing_parameters = [0]
message_channel_success_rate = 1

network_type = "df"
network_parameters = []

In [None]:
RESULTS_FOLDER_REGEX = r'json_n(.+)_btl(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)'

def get_num_nodes(filename: str) -> int:
    return int(re.match(RESULTS_FOLDER_REGEX, filename).group(1))

def get_btl(filename: str) -> float:
    return float(re.match(RESULTS_FOLDER_REGEX, filename).group(2))

def get_node_distribution(filename: str) -> str:
    return re.match(RESULTS_FOLDER_REGEX, filename).group(3)

def get_topology(filename: str) -> str:
    return re.match(RESULTS_FOLDER_REGEX, filename).group(5)

def get_topology_params(filename: str) -> List[int]:
    lst_str = re.match(RESULTS_FOLDER_REGEX, filename).group(6)[1:-1] 
    if lst_str == "":
        return []
    return list(map(lambda s: int(s.strip()), lst_str.split(",")))

def get_protocol(filename: str) -> str:
    return re.match(RESULTS_FOLDER_REGEX, filename).group(9)

def get_num_faults(filename: str) -> int:
    return int(re.match(RESULTS_FOLDER_REGEX, filename).group(10))

def get_switch_distribution_params(filename: str) -> List[float]:
    return list(map(lambda s: float(s.strip()), re.match(RESULTS_FOLDER_REGEX, filename).group(8)[1:-1].split(",")))

In [None]:
# More utility methods for analysis
def get_minima(series: pd.Series):
    return series[(series < series.shift(1)) & (series < series.shift(-1))].iloc[0]

def get_minima_index(series: pd.Series):
    return series[(series < series.shift(1)) & (series < series.shift(-1))].index[0]

In [None]:
RESULTS_VALIDATOR_FILENAME = "validator_results.json"
RESULTS_FOLDER = "results"
FASTEST_MESSAGE_MAP = "fastestMessageCountMap"
REMAINDER_MESSAGE_MAP = "remainderMessageCountMap"
FASTEST_TIME_MAP = "fastestStateTimeMap"
REMAINDER_TIME_MAP = "remainderStateTimeMap"
PREPARED = "PREPARED"
PREPREPARED = "PREPREPARED"
COMMIT = "COMMIT"
SYNC = "SYNC"
ROUND_CHANGE = "ROUND_CHANGE"
TOTAL_TIME_KEY = "t_total_fastest"
RC_PROB = "RC_PROB"
NEW_ROUND = "NEW_ROUND"
PRE_PREPARED = "PRE_PREPARED"
LAMBDA_FASTEST = "lambda_fastest"
L_FASTEST = "L_fastest"
L_REMAINDER = "L_remainder"

NEW_VIEW = "NEW_VIEW"
PREPARE = "PREPARE"
PRE_COMMIT = "PRE_COMMIT"
DECIDE = "DECIDE"
COMMIT = "COMMIT"

IBFT_STATES = [NEW_ROUND, PRE_PREPARED, PREPARED, ROUND_CHANGE]
HS_STATES = [PREPARE, PRE_COMMIT, COMMIT, DECIDE]
PROTOCOL_NAME_STATE_MAP = {"hs": HS_STATES, "ibft": IBFT_STATES}

In [None]:
DEFAULT_RESULTS_DIRECTORY = "results/"

def get_fn_data(index_fn: Callable[[str], str], num_nodes: int, base_time_limit: float, topology: str, protocol: str, num_faults: int, 
                dist: str, fn: Callable[[str], Any], name: str, topology_params: List[int]=None, 
                dir: str=DEFAULT_RESULTS_DIRECTORY) -> pd.Series:
    results_lst = os.listdir(dir)
    index = []
    lst = []
    for result_filename in results_lst:
        matcher = re.match(RESULTS_FOLDER_REGEX, result_filename)
        if matcher == None: 
            continue
        run_num_nodes = get_num_nodes(result_filename) 
        run_base_time_limit = get_btl(result_filename) 
        run_topology = get_topology(result_filename) 
        run_topology_pararms = get_topology_params(result_filename)
        run_protocol = get_protocol(result_filename) 
        run_num_faults = get_num_faults(result_filename)
        run_dist = get_node_distribution(result_filename)

        if run_protocol != protocol.lower() or run_num_nodes != num_nodes or run_num_faults != num_faults or run_dist != dist \
                or run_topology != topology.lower() or abs(base_time_limit - run_base_time_limit) >= 1e-10:
            continue

        if topology_params != None and topology_params != run_topology_pararms:
            continue 
 
        index.append(index_fn(result_filename))
        with open(os.path.join(dir, result_filename, RESULTS_VALIDATOR_FILENAME), "r") as json_result:
            result_json = json.load(json_result)
            lst.append(fn(result_json))
    return pd.Series(lst, index=index, name=name).sort_index()

In [None]:

def get_topology_data(topo: str, protocol: str, name: str, topo_params: List[int]=None):
    return get_fn_data(lambda name: get_switch_distribution_params(name)[0], num_nodes, base_time_limit, 
                       topo, protocol, num_faults, node_processing_distribution, lambda json: json[TOTAL_TIME_KEY], name,
                       topology_params=topo_params)

In [None]:
ibft_series_4_1_0 = get_topology_data("fc", "ibft", "ibft_fc_4_1_0", topo_params=[4, 1, 0])
hs_series_4_1_0 = get_topology_data("fc", "hs", "hs_fc_4_1_0", topo_params=[4, 1, 0])
topo_df = pd.DataFrame([ibft_series_4_1_0, hs_series_4_1_0]).transpose()

topo_df.iloc[6:].plot(style=".-", grid=True, title="Folded clos, n=31, Half-radix = 4", xlabel="switch processing rate", ylabel="time to consensus", figsize=(10, 5))

In [None]:
ibft_series_4_3_1_1 = get_topology_data("df", "ibft", "ibft_df_4_3_1_1", topo_params=[4, 3, 1, 1])
hs_series_4_3_1_1 = get_topology_data("df", "hs", "hs_df_4_3_1_1", topo_params=[4, 3, 1, 1])

topo_df = pd.DataFrame([ibft_series_4_3_1_1, hs_series_4_3_1_1]).transpose()

topo_df.iloc[6:].plot(style=".-", grid=True, title="Dragonfly, n=31, 3 switch per group, 4 terminals per switch", xlabel="switch processing rate", ylabel="average time to consensus", figsize=(10, 5))


In [None]:
def ibft_fc_model(n, k, mu, _lambda):
    min_time = (2 * n + 1) / mu
    model_time = 2 * k * (2 * n - 1) / _lambda
    return max(min_time, model_time) 

def ibft_df_model(n, g, mu, _lambda):
    k = np.ceil(n / (g * (g + 1))) 
    min_time = (2 * n - 1) / mu
    model_time = 2 * k * (2 * n - 1 + 2 * k * (g * (g - 1))) / _lambda
    return max(min_time, model_time)

cross_topo_df = pd.DataFrame([ibft_series_4_1_0, ibft_series_4_3_1_1]).transpose()
cross_topo_df["fc_prediction"] = topo_df.index.map(lambda x : ibft_fc_model(num_nodes, 4, 3, x))
cross_topo_df["df_prediction"] = topo_df.index.map(lambda x : ibft_df_model(num_nodes, 3, 3, x))
cross_topo_df.iloc[6:].plot(style=".-", grid=True, title="IBFT Pred Test", figsize=(10, 5), xlabel="switch processing rate", ylabel="average time to consensus")

In [None]:
def hs_pred_fn_2(n, mu, _lambda):
    f = (n - 1) // 3
    pred_1 = (n - 1) * 8 / _lambda
    pred_2 = (4 * (n - f)) / mu + 3 * (n) / _lambda + (n / 2 + 4.65) / _lambda 
    pred_3 = (4 * (n - f)) / mu + 3 * (f + 1) / mu + (n / 2 + 4.65) / _lambda
    # first term is processing delay per round, n - f at leader and 1 elsewhere
    # second term is remainder time that the leader clears up redundant extra messages vs time the first next-phase message takes to enter. Happens for 3 phases.
    # Last term is the average time taken for the final message to reach the desired node, with n/2 being half the nodes and 5.23 being the expected hop count
    return max(pred_1, pred_2, pred_3)

topo_df["hs_pred"] = topo_df.index.map(lambda x: hs_pred_fn_2(31, 3, x))
topo_df.iloc[15:][["hs_pred", "hs_fc_4_1_0"]].plot(style=".-", grid=True, title="HS Model", xlabel="switch processing rate", ylabel="time to consensus", figsize=(10,5))