# PeerDAS Simulation

## TODOs

- [ ] Add attacker nodes
- [ ] Increase simulation runs
- [ ] Improve sample selection
- [ ] Simulate more real peer distribution
- [ ] Simulate gossipsub
- [ ] Simulate more real node capacity
- [ ] Stats of different parameter range
- [ ] Add other simulation targets


In [1]:
# %pip install pandas
# %pip install matplotlib
# %pip install "networkx==3.1"
# %pip install "scipy==1.11.2"

## Configurations

In [2]:
# Constants
BLOB_SIZE = 4096 * 32  # bytes

# Simulation
# The graph only makes sense if there are only few nodes
DRAW_NETWORK_GRAPH = False
SIMULATION_RUNS = 10


class Config:
    #
    # Data
    #
    MAX_BLOBS_PER_BLOCK = 256  # Full danksharding

    @property
    def DATA_PER_SLOT(self):
        return self.MAX_BLOBS_PER_BLOCK * BLOB_SIZE * 4  # including the extension

    NUMBER_OF_ROWS = 32  # Number of rows in the 2D data array
    NUMBER_OF_COLUMNS = 32  # Number of columns in the 2D data array

    @property
    def DATA_PER_ROW(self):
        # In bytes
        return self.DATA_PER_SLOT / self.NUMBER_OF_ROWS

    @property
    def DATA_PER_COLUMN(self):
        # In bytes
        return self.DATA_PER_SLOT / self.NUMBER_OF_COLUMNS

    @property
    def DATA_PER_SAMPLE(self):
        # In bytes
        return self.DATA_PER_SLOT / (self.NUMBER_OF_COLUMNS * self.NUMBER_OF_ROWS)

    #
    # Custody
    #
    SAMPLES_PER_SLOT = 70  # Number of samples per slot
    CUSTODY_REQUIREMENT = 2  # Minimum number of both rows and columns an honest node custodies and serves samples from

    @property
    def CUSTODY_REQUIREMENT_SUPERNODE(self):
        return min(self.NUMBER_OF_ROWS, self.NUMBER_OF_COLUMNS)

    #
    # Capacity
    #
    SAMPLING_DUE_SECOND = 4  # seconds, the time for a node to sample
    BANDWIDTH_UPLINK_NORMAL = 100 * 1000 * 1000 / 8  # Mbps -> bytes/second
    BANDWIDTH_UPLINK_SUPERNODE = BANDWIDTH_UPLINK_NORMAL * 10  # bytes/second, supernode has better bandwidth

    @property
    def NORMAL_CAPACITY(self):
        # The bytes of data a normal node can serve in a sampling period
        return self.BANDWIDTH_UPLINK_NORMAL * self.SAMPLING_DUE_SECOND
    
    @property
    def SUPERNODE_CAPACITY(self):
        # The bytes of data a supernode can serve in a sampling period
        return self.BANDWIDTH_UPLINK_SUPERNODE * self.SAMPLING_DUE_SECOND

    #
    # Networking
    #
    NUMBER_OF_NODES = 5000  # Number of nodes in the network
    PERCENTAGE_OF_SUPERNODE = 1  # Percentage of supernodes in the network
    PERCENTAGE_OF_DISHONEST = 10  # Percentage of dishonest nodes in the network

    @property
    def PERCENTAGE_OF_NORMAL(self):
        # Percentage of normal nodes in the network
        return 100 - self.PERCENTAGE_OF_SUPERNODE - self.PERCENTAGE_OF_DISHONEST

    TARGET_NUMBER_OF_PEERS = 70  # Target number of peers each node has
    TARGET_NUMBER_OF_PEERS_SUPERNODE = TARGET_NUMBER_OF_PEERS * 2  # Target number of peers each supernode has; more peers than normal nodes

## Helpers

In [3]:
import enum
import random
from typing import Sequence, Any, NewType


rng = random.Random(5566)


class LineType(enum.Enum):
    ROW = 0
    COLUMN = 1


class CustodyInfo:
    """
    The custody information of a line (row or column) of the data array.
    """
    custody_nodes: set[int] = set()  # the nodes that custody this line
    dishonest_nodes: set[int] = set()  # the nodes that SHOULD custody this line but don't


CustodyLog = NewType('CustodyLog', dict[tuple[LineType, int], CustodyInfo])


def create_custody_log(config: Config) -> CustodyLog:
    custody_log = CustodyLog({})
    for line_type in LineType:
        for line_index in range(config.NUMBER_OF_ROWS if line_type == LineType.ROW else config.NUMBER_OF_COLUMNS):
            custody_log[line_type, line_index] = CustodyInfo()
    return custody_log


## Peer connections
class Node:
    id: int
    peers: set[int] = set()
    peer_scores: dict[int, float] = {}

    should_custody_rows: set[int] = set()
    should_custody_cols: set[int] = set()
    actual_custody_rows: set[int] = set()
    actual_custody_cols: set[int] = set()

    is_honest: bool = True
    is_supernode: bool = False
    target_peers: int = 0
    capacity: float = 0
    sample_attempt_counter: int = 0

    def __init__(self, config: Config, id: int) -> None:
        self.id = id
        self.target_peers = config.TARGET_NUMBER_OF_PEERS
        self.capacity = config.NORMAL_CAPACITY
        self.is_honest = rng.randint(1, 100) > config.PERCENTAGE_OF_DISHONEST
        if self.is_honest:
            if rng.randint(1, 100) <= config.PERCENTAGE_OF_SUPERNODE:
                self.is_supernode = True
                self.target_peers = config.TARGET_NUMBER_OF_PEERS_SUPERNODE
                self.capacity = config.SUPERNODE_CAPACITY

    def set_custody(self, config: Config, epoch: int, custody_log: CustodyLog) -> None:
        if self.is_supernode:
            custody_size = config.CUSTODY_REQUIREMENT_SUPERNODE
        elif self.is_honest:
            custody_size = config.CUSTODY_REQUIREMENT
        else:
            # TODO
            # Dishoenst nodes custody less than CUSTODY_REQUIREMENT
            # custody_size = rng.randint(0, CUSTODY_REQUIREMENT - 1)
            custody_size = 0

        # The rows and columns that this node SHOULD custody
        self.should_custody_rows, self.should_custody_cols = get_custody_rows_cols(
            config,self.id, epoch, custody_size=config.CUSTODY_REQUIREMENT)
        # The rows and columns that this node ACTUALLY custody
        self.actual_custody_rows, self.actual_custody_cols = get_custody_rows_cols(
            config, self.id, epoch, custody_size=custody_size)

        # Update custody log
        # NOTE: use `union` instead of `add`
        for row in self.should_custody_rows.difference(self.actual_custody_rows):
            custody_log[LineType.ROW, row].dishonest_nodes.union(set([self.id]))
        for col in self.should_custody_cols.difference(self.actual_custody_cols):
            custody_log[LineType.COLUMN, col].dishonest_nodes.union(set([self.id]))

        for row in self.actual_custody_rows:
            custody_log[LineType.ROW, row].custody_nodes = custody_log[LineType.ROW, row].custody_nodes.union(set([self.id]))
        for col in self.actual_custody_cols:
            custody_log[LineType.COLUMN, col].custody_nodes = custody_log[LineType.COLUMN, col].custody_nodes.union(set([self.id]))


#
# Custody helpers
#

def cycle(seq: Sequence[Any], start: int) -> Any:
    while True:
        yield seq[start]
        start = (start + 1) % len(seq)


def get_custody_lines(config: Config, node_id: int, epoch: int, custody_size: int, line_type: LineType) -> list[int]:
    bound = config.NUMBER_OF_ROWS if line_type else config.NUMBER_OF_COLUMNS
    all_items = list(range(bound))
    line_index = (node_id + epoch) % bound
    iterator = cycle(all_items, line_index)
    return [next(iterator) for _ in range(custody_size)]


def get_custody_rows_cols(config: Config, node_id: int, epoch: int, custody_size: int) -> tuple[set[int], set[int]]:
    rows = get_custody_lines(config, node_id, epoch, custody_size, line_type=LineType.ROW)
    cols = get_custody_lines(config, node_id, epoch, custody_size, line_type=LineType.COLUMN)

    assert len(rows) == len(cols) == custody_size
    return set(rows), set(cols)


def get_node_ids_by_line_index(config: Config, epoch: int, line_index: int, line_type: LineType) -> list[int]:
    # NOTE: not in-use in this simulation, but demonstrates how to get node ids by line_index of a row or column
    for node_id in range(config.NUMBER_OF_NODES):
        custody_items = get_custody_lines(config, node_id, epoch, custody_size=config.CUSTODY_REQUIREMENT, line_type=line_type)
        if line_index in custody_items:
            yield node_id


def get_custodian_peers(nodes: Sequence[Node], node_id: int, line_type: LineType, line_index: int,
                        custody_log: CustodyLog) -> Sequence[int]:
    """
    Get the peers that custody the given ``line_index`` line of a row or column.
    """
    do_you_have_msg = custody_log[line_type, line_index].custody_nodes
    dishonest_nodes = custody_log[line_type, line_index].dishonest_nodes
    merged_nodes = do_you_have_msg.union(dishonest_nodes)
    return nodes[node_id].peers.intersection(merged_nodes)


## Simulation assumptions and logic

### Setting

In [4]:
config_demo = Config()

print(f'Data per slot:\t{config_demo.DATA_PER_SLOT / 1000} KB')
print(f'Data per row:\t{config_demo.DATA_PER_ROW / 1000} KB')
print(f'Data per column:\t{config_demo.DATA_PER_COLUMN / 1000} KB')
print(f'Data per sample:\t{config_demo.DATA_PER_SAMPLE / 1000} KB')
required_downlink_bandwidth = (config_demo.DATA_PER_SAMPLE * config_demo.SAMPLES_PER_SLOT) / config_demo.SAMPLING_DUE_SECOND / 1000 / 1000 * 8
print(f'Required downlink bandwidth per slot with {config_demo.SAMPLING_DUE_SECOND} seconds sampling time:\t{required_downlink_bandwidth} Mbps')


Data per slot:	134217.728 KB
Data per row:	4194.304 KB
Data per column:	4194.304 KB
Data per sample:	131.072 KB
Required downlink bandwidth per slot with 4 seconds sampling time:	18.350080000000002 Mbps


### P2P network topology

In [5]:
# Set all nodes with naive topology and custody
def set_peers(nodes: Sequence[Node]) -> None:
    """
    Set peers for each node. Naive implementation.
    """
    for n in nodes:
        # TODO: set different distribution
        if len(n.peers) < n.target_peers:
            selectable_node_ids = set(p.id for p in nodes if len(p.peers) < p.target_peers)
            selectable_peers = selectable_node_ids.difference(n.peers.union(set([n.id])))
            new_peers = set()
            need_peer_count = n.target_peers - len(n.peers)
            if need_peer_count > 0:
                new_peers = rng.sample(list(selectable_peers), k=min(need_peer_count, len(selectable_peers)))
            n.peers = n.peers.union(new_peers)
            for new_peer_id in new_peers:
                nodes[n.id].peer_scores[new_peer_id] = 100
            for peer_id in new_peers:
                # NOTE: DO NOT USE nodes[peer_id].peers.add(n.id)  <-- side effect!
                nodes[peer_id].peers = nodes[peer_id].peers.union(set([n.id]))
                nodes[peer_id].peer_scores[n.id] = 100


def get_nodes(config: Config, epoch: int, node_count: int, custody_log: CustodyLog) -> Sequence[Node]:
    """
    Initialize nodes with peers and their custodies.
    """
    all_nodes = [Node(id=id, config=config) for id in range(node_count)]
    set_peers(all_nodes)
    for node in all_nodes:
        node.set_custody(config, epoch, custody_log)

    return all_nodes

# Temparary log for simulation stats
custody_log_demo = create_custody_log(config_demo)
all_nodes_demo = get_nodes(config=config_demo, epoch=0, node_count=config_demo.NUMBER_OF_NODES, custody_log=custody_log_demo)

In [6]:
import networkx as nx


if DRAW_NETWORK_GRAPH:
    g = nx.Graph()
    for node in all_nodes_demo:
        g.add_node(node.id)
    for node in all_nodes_demo:
        for peer_id in node.peers:
            g.add_edge(node.id, peer_id)

    nx.draw(g, node_size=2, alpha=0.5, width=0.1)

In [7]:
import pandas as pd


stats = []
for node in all_nodes_demo:
    stats.append({
        "node.id": node.id,
        "len(peers)": len(node.peers),
        "len(should_custody_rows)": len(node.should_custody_rows),
        "len(should_custody_cols)": len(node.should_custody_cols),
        "len(actual_custody_rows)": len(node.actual_custody_rows),
        "len(actual_custody_cols)": len(node.actual_custody_cols),
    })

avg_peer_count = sum(x["len(peers)"] for x in stats) / len(stats)
print(f"Average peer count: {avg_peer_count}")
if any(x["len(peers)"] == 0 for x in stats):
    print("WARNING: Some nodes have no peers!")

avg_custody_count = sum((x["len(actual_custody_rows)"] + x["len(actual_custody_cols)"]) for x in stats) / len(stats)
print(f"Average custody lines (rows + columns): {avg_custody_count}")
for index, info in custody_log_demo.items():
    line_type, line_index = index
    if len(info.custody_nodes) == 0:
        print(f"WARNING: No node custody {line_type} {line_type}!")

stats = pd.DataFrame(stats)
pd.set_option('display.max_rows', 20)
stats

Average peer count: 70.64
Average custody lines (rows + columns): 4.1424


Unnamed: 0,node.id,len(peers),len(should_custody_rows),len(should_custody_cols),len(actual_custody_rows),len(actual_custody_cols)
0,0,70,2,2,2,2
1,1,70,2,2,0,0
2,2,70,2,2,2,2
3,3,70,2,2,0,0
4,4,70,2,2,2,2
...,...,...,...,...,...,...
4995,4995,70,2,2,2,2
4996,4996,66,2,2,2,2
4997,4997,70,2,2,2,2
4998,4998,70,2,2,0,0


In [8]:
#### Supernode

In [9]:
import pandas as pd

stats = []

for node in all_nodes_demo:
    if node.is_supernode:
        stats.append({
            "node.id": node.id,
            "len(peers)": len(node.peers),
        })
print(f'Number of supernodes: {len(stats)} ({len(stats) / len(all_nodes_demo) * 100}%)')
stats = pd.DataFrame(stats)
pd.set_option('display.max_rows', 20)
stats

Number of supernodes: 46 (0.9199999999999999%)


Unnamed: 0,node.id,len(peers)
0,102,140
1,121,140
2,188,140
3,194,140
4,196,140
...,...,...
41,4448,140
42,4457,140
43,4586,140
44,4728,140


### Gossip

In [10]:
def gossip(config: Config, nodes: Sequence[Node], custody_row_log: dict[set[int]], custody_col_log: dict[set[int]]) -> None:
    """
    Gossip the custody information to peers.
    """
    # TODO: Consider capacity
    uncovered_rows = [line for line in custody_row_log if not any(line)]
    uncovered_cols = [line for line in custody_col_log if not any(line)]
    return uncovered_rows, uncovered_cols

### Sampling

Each node does:
1. pseudo-randomly choose the sample point.
2. Compute the peer list of peers who should have downloaded the given lines.
3. Request the peer can provide the sample.

In [11]:
# Sampling

def sample_peer(config: Config, nodes: Sequence[Node], peer_id: int, line_type: LineType, line_index: int) -> bool:
    # TODO: now it ignore if the peer may have the sample by another line_type
    # If the peer has no enough capacity, it can not serve the sample
    nodes[peer_id].capacity -= config.DATA_PER_SAMPLE
    if nodes[peer_id].capacity < 0:
        return False

    return (line_index in nodes[peer_id].actual_custody_rows) if line_type else (line_index in nodes[peer_id].actual_custody_cols)


def sample_by_line(config: Config, nodes: Sequence[Node], node_id: int,
                   line_type: LineType, line_index: int, custody_log: CustodyInfo) -> bool:
    """
    Sample a peer that custodies the given ``line_index``.
    """
    custodian_peers = get_custodian_peers(nodes, node_id,
                                          line_type=line_type, line_index=line_index,
                                          custody_log=custody_log)

    if len(custodian_peers) == 0:
        return False

    # shuffle the order
    custodian_peers = list(custodian_peers)
    rng.shuffle(custodian_peers)
    for peer_id in custodian_peers:
        nodes[node_id].sample_attempt_counter += 1
        if sample_peer(config, nodes, peer_id, line_type=line_type, line_index=line_index):
            return True
        else:
            nodes[node_id].peer_scores[peer_id] -= 1

    return False


def try_sample_by_line(config: Config, nodes: Sequence[Node], node_id: int,  line_type: LineType, line_index: int,
                       custody_log: CustodyLog, covered: list[int], index: int) -> None:
    line_success = sample_by_line(config, nodes, node_id,
                                  line_type=line_type, line_index=line_index, custody_log=custody_log)
    if line_success:
        covered[index] = 1
    return line_success


def sample(config: Config, nodes: Sequence[Node], custody_log: CustodyLog):
    sample_results = []  # 1 if all samples are successful, 0 if one of it has failed
    reconstructions = []  # 1 if node is able to recover the data with the sampled rows/cols, 0 if unable
    sample_attempt_counts = []  # number of tries to sample all samples

    # TODO: parallelize it
    for node_id in range(len(nodes)):
        rows_uncovered , cols_uncovered, samples_uncovered = ([] for _ in range(3))
        rows_covered = [0 for _ in range(config.NUMBER_OF_ROWS)]
        cols_covered = [0 for _ in range(config.NUMBER_OF_COLUMNS)]
        success = True

        # Select `SAMPLES_PER_SLOT` points
        sample_points = rng.sample(list(range(config.NUMBER_OF_ROWS * config.NUMBER_OF_COLUMNS)), config.SAMPLES_PER_SLOT)
        for point in sample_points:
            row = point // config.NUMBER_OF_COLUMNS
            col = point % config.NUMBER_OF_COLUMNS

            first_try = rng.choice([0, 1])
            first_try_line_type = LineType.ROW if first_try else LineType.COLUMN
            second_try_line_type = LineType.COLUMN if first_try else LineType.ROW
            first_try_line_index = row if first_try else col
            second_try_line_index = col if first_try else row
            first_try_covered_log = rows_covered if first_try else cols_covered
            second_try_covered_log = cols_covered if first_try else rows_covered
            index = row if first_try else col

            # First try the row or column
            line_success = try_sample_by_line(config, nodes, node_id,
                                              line_type=first_try_line_type,
                                              line_index=first_try_line_index,
                                              custody_log=custody_log,
                                              covered=first_try_covered_log, index=index)
            # Second try: try another LineType
            if not line_success:
                line_success = try_sample_by_line(config, nodes, node_id,
                                                  line_type=second_try_line_type,
                                                  line_index=second_try_line_index,
                                                  custody_log=custody_log,
                                                  covered=second_try_covered_log, index=index)
                if not line_success:
                    success = False

        sample_results.append(1) if success else sample_results.append(0)
        sample_attempt_counts.append(nodes[node_id].sample_attempt_counter)

        # Reconstruct
        rows_uncovered.append(sum(1 if x == 0 else 0 for x in rows_covered))
        cols_uncovered.append(sum(1 if x == 0 else 0 for x in cols_covered))
        samples_uncovered.append(rows_uncovered[-1] * cols_uncovered[-1])

        # If more than 1/4 of the row/col is unavailable, the node can not reconstruct the data
        if samples_uncovered[-1] > config.NUMBER_OF_ROWS * config.NUMBER_OF_COLUMNS / 4:
            reconstructions.append(0)
        else:
            reconstructions.append(1)

    return sample_results, reconstructions, sample_attempt_counts


sample_results, reconstructions, sample_attempt_counts = sample(
    config_demo, all_nodes_demo, custody_log=custody_log_demo,
)

In [12]:

import pandas as pd

stats = [{
    "no_failed_sample": sum(sample_results) / len(sample_results),
    "has_failed_sample": 1 - sum(sample_results) / len(sample_results),
    "reconstructable": sum(reconstructions) / len(reconstructions),
    "not_reconstructable": 1 - sum(reconstructions) / len(reconstructions),
    "nodes_no_enough_bandwidth": len([n for n in all_nodes_demo if n.capacity < 0]),
    "avg_sample_attempt_counts": sum(sample_attempt_counts) / len(sample_attempt_counts),
}]

stats = pd.DataFrame(stats)
stats

Unnamed: 0,no_failed_sample,has_failed_sample,reconstructable,not_reconstructable,nodes_no_enough_bandwidth,avg_sample_attempt_counts
0,0.985,0.015,1.0,0.0,0,69.9816


## Simulation results

### 1. Node count

In [13]:
g_x_values = [5000, 7500, 10000]  # Number of nodes in the network

g_sample_results, g_reconstructions, g_sample_attempt_counts = ({} for _ in range(3))

def to_dict_stats(all_sample_results, all_reconstructions, all_sample_attempt_counts, config_field, x_values):
    for x in x_values:
        config = Config()
        setattr(config, config_field, x)

        all_sample_results[x] = []
        all_reconstructions[x] = []
        all_sample_attempt_counts[x] = []
        for _ in range(SIMULATION_RUNS):
            custody_log = create_custody_log(config)

            nodes = get_nodes(config, epoch=0, node_count=x, custody_log=custody_log)
            sample_results, reconstructions, sample_attempt_counts = sample(config, nodes, custody_log=custody_log)

            all_sample_results[x].append(sample_results)
            all_reconstructions[x].append(reconstructions)
            all_sample_attempt_counts[x].append(sample_attempt_counts)

to_dict_stats(g_sample_results, g_reconstructions, g_sample_attempt_counts, config_field="NUMBER_OF_NODES", x_values=g_x_values)


In [None]:

def get_stats(all_sample_results, all_reconstructions, all_sample_attempt_counts, x_axis_name):
    stats_results = []
    for x, sample_results_of_x in all_sample_results.items():
        flatten_success = [a for b in sample_results_of_x for a in b]
        flatten_reconstructions = [a for b in all_reconstructions[x] for a in b]
        flatten_sample_attempt_counts = [a for b in all_sample_attempt_counts[x] for a in b]
        avg_success_ratio = sum(flatten_success) / len(flatten_success)
        avg_reconstructable_ratio = sum(flatten_reconstructions) / len(flatten_reconstructions)
        avg_sample_attempt_counts = sum(flatten_sample_attempt_counts) / len(flatten_sample_attempt_counts)
        stats_results.append({
            f"{x_axis_name}": x,
            "len(flatten)": len(flatten_success),
            "avg_all_successful_sample_ratio": avg_success_ratio,
            "avg_reconstructable_ratio": avg_reconstructable_ratio,
            "avg_sample_attempt_counts": avg_sample_attempt_counts,
        })
    return stats_results

stats_results = get_stats(g_sample_results, g_reconstructions, g_sample_attempt_counts, x_axis_name="node_count")
node_counts_results = pd.DataFrame(stats_results)
node_counts_results


In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

# draw chart of stats_sample_results
fig, ax = plt.subplots()
x = np.arange(len(g_x_values))
width = 0.2

ax.set_ylabel('Percentage')
ax.set_title('Sampling results of one slot')
rects1 = ax.bar(x - width / 2, node_counts_results['avg_all_successful_sample_ratio'], width, label='The rate of nodes that has no failed sample')
rects2 = ax.bar(x + width / 2, node_counts_results['avg_reconstructable_ratio'], width, label='The rate of nodes that can recover the data')

ax.set_xticks(x)
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
ax.set_xticklabels(g_x_values)
ax.legend(bbox_to_anchor=(0, -0.3, 0, 0.5), loc='lower left')
plt.show()

### 2. `SAMPLES_PER_SLOT`

In [None]:
g_x_values = [25, 50, 75, 100]  # Number of nodes in the network

g_sample_results, g_reconstructions, g_sample_attempt_counts = ({} for _ in range(3))

to_dict_stats(g_sample_results, g_reconstructions, g_sample_attempt_counts, config_field="SAMPLES_PER_SLOT", x_values=g_x_values)

In [None]:
stats_results = get_stats(g_sample_results, g_reconstructions, g_sample_attempt_counts, x_axis_name="SAMPLES_PER_SLOT")
samples_per_slot_results = pd.DataFrame(stats_results)
samples_per_slot_results

### 3. Capacity

Check the capcity of various `MAX_BLOBS_PER_BLOCK`

In [None]:
g_sample_results, g_reconstructions, g_sample_attempt_counts, g_neg_capacities = ({} for _ in range(4))

param_blobs_per_block = [128, 256, 512]

def to_capacity_dict_stats(all_sample_results, all_reconstructions, all_sample_attempt_counts, all_neg_capacities):
    for blobs_per_block in param_blobs_per_block:
        config = Config()
        config.MAX_BLOBS_PER_BLOCK = blobs_per_block

        all_sample_results[blobs_per_block] = []
        all_reconstructions[blobs_per_block] = []
        all_neg_capacities[blobs_per_block] = []
        all_sample_attempt_counts[blobs_per_block] = []
        for _ in range(SIMULATION_RUNS):
            custody_log = create_custody_log(config)

            nodes = get_nodes(config, epoch=0, node_count=config.NUMBER_OF_NODES, custody_log=custody_log)
            sample_results, reconstructions, sample_attempt_counts = sample(config, nodes, custody_log=custody_log)

            all_sample_results[blobs_per_block].append(sample_results)
            all_reconstructions[blobs_per_block].append(reconstructions)
            all_sample_attempt_counts[blobs_per_block].append(sample_attempt_counts)
            all_neg_capacities[blobs_per_block].append([int(n.capacity < 0) for n in nodes])

to_capacity_dict_stats(g_sample_results, g_reconstructions, g_sample_attempt_counts, g_neg_capacities)


In [None]:
stats_results = get_stats(g_sample_results, g_reconstructions, g_sample_attempt_counts, x_axis_name="blobs_per_block")

for index, (blobs_per_block, sample_results) in enumerate(g_sample_results.items()):
    capacities = g_neg_capacities[blobs_per_block]
    flatten_capacities = [x for y in capacities for x in y]
    avg_capacities = sum(1 for c in flatten_capacities if c > 0) / SIMULATION_RUNS
    stats_results[index]["#nodes_no_enough_bandwidth"] = avg_capacities

blobs_per_block_stats_results = pd.DataFrame(stats_results)
blobs_per_block_stats_results