## Setup

In [68]:
from dataclasses import dataclass

import numpy as np
import pandas
from pathlib import Path
import os

from pandas import DataFrame

from experiment import Protocol
from graph import Graph

In [69]:
@dataclass(frozen=True, order=True)
class Experiment:
    protocol: Protocol
    graph: Graph
    nodes: int
    view_size: int
    shuffle_length: int
    delta_t: int
    disaster_intensity: float

    @classmethod
    def from_strings(cls, protocol, graph, nodes, view_size, shuffle_length, delta_t, disaster_intensity):
        return cls(Protocol(protocol), Graph(graph), int(nodes), int(view_size), int(shuffle_length), int(delta_t), float(disaster_intensity))

In [70]:
runs = {}

for ff in os.listdir(Path() / "runs"):
    if ff.endswith('0.csv'):
        df: DataFrame = pandas.read_csv(Path() / "runs" / ff)
        df.drop(df.columns[0], axis=1, inplace=True)
        rows, _ = df.shape
        if rows > 0:
            params = tuple(ff.rstrip(' 0.csv').split(' '))
            runs[Experiment.from_strings(*params)] = df
print(f"There are {len(runs)} saved runs.")

There are 433 saved runs.


## Partitioning

In [71]:
partitioned_runs = {}

for run in runs:
    df = runs[run]
    rows, _ = df.shape
    if max(df.head(rows // 3)["Alive Partitions"]) > 1:
        partitioned_runs[run] = df

In [72]:
print(f"There are {len(partitioned_runs)} partitioned runs:")
for params in partitioned_runs:
    print(f"  - {params}")

There are 20 partitioned runs:
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=100, shuffle_length=10, delta_t=1, disaster_intensity=0.5)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=100, shuffle_length=10, delta_t=1, disaster_intensity=0.75)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=100, shuffle_length=10, delta_t=1, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=1, disaster_intensity=0.5)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=1, disaster_intensity=0.75)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=15, delta_t=1, disaster_intensity=0.5)
  - Exper

In [73]:
recovered_runs = {}

for run in partitioned_runs:
    df = partitioned_runs[run]
    rows, _ = df.shape
    if df["Alive Partitions"][rows // 3] == 1:
        recovered_runs[run] = df

In [74]:
print(f"There are {len(recovered_runs)} recovered runs:")
for params in partitioned_runs:
    print(f"  - {params}")

There are 18 recovered runs:
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=100, shuffle_length=10, delta_t=1, disaster_intensity=0.5)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=100, shuffle_length=10, delta_t=1, disaster_intensity=0.75)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=100, shuffle_length=10, delta_t=1, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=1, disaster_intensity=0.5)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=1, disaster_intensity=0.75)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=15, delta_t=1, disaster_intensity=0.5)
  - Experim

In [75]:
unrecovered_runs = set(partitioned_runs.keys()) - set(recovered_runs.keys())
print(f"There are {len(unrecovered_runs)} unrecovered runs.")
for run in sorted(unrecovered_runs):
    print(f"  - {run}")

There are 2 unrecovered runs.
  - Experiment(protocol=<Protocol.NEWSCAST: 'newscast'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=4, disaster_intensity=0.5)
  - Experiment(protocol=<Protocol.NEWSCAST: 'newscast'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=4, disaster_intensity=0.75)


In [76]:
partitioned_after_disaster = {}

for run in runs:
    df = runs[run]
    rows, _ = df.shape
    if run not in partitioned_runs and max(df.head(rows // 3 * 2)["Alive Partitions"]) > 1:
        partitioned_after_disaster[run] = df


recovered_after_disaster = {}

for run in partitioned_after_disaster:
    df = partitioned_after_disaster[run]
    rows, _ = df.shape
    if df["Alive Partitions"][rows // 3 * 2] == 1:
        recovered_after_disaster[run] = df

unrecovered_after_disaster = set(partitioned_after_disaster.keys()) - set(recovered_after_disaster.keys())

In [77]:
print(f"There are {len(partitioned_after_disaster)} runs which partitioned after the disaster:")
for params in partitioned_after_disaster:
    print(f"  - {params}")

There are 66 runs which partitioned after the disaster:
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=1, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=10, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=4, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=15, delta_t=10, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=15, delta_t=4, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=6, delta_t=10, disaste

In [78]:
print(f"There are {len(recovered_after_disaster)} runs which recovered after disaster:")
for params in recovered_after_disaster:
    print(f"  - {params}")

There are 15 runs which recovered after disaster:
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=20, shuffle_length=15, delta_t=4, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=50, shuffle_length=15, delta_t=10, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.GEO: 'geo'>, nodes=1000, view_size=50, shuffle_length=6, delta_t=10, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.LATTICE: 'lattice'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=4, disaster_intensity=0.75)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.LATTICE: 'lattice'>, nodes=1000, view_size=20, shuffle_length=6, delta_t=1, disaster_intensity=0.75)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.LATTICE: 'lattice'>, nodes=1000, view_size=50, shuffle_length=10, de

In [79]:
print(f"There are {len(unrecovered_after_disaster)} runs which didn't recover after disaster.")
for run in unrecovered_after_disaster:
    print(f"  - {run}")

There are 51 runs which didn't recover after disaster.
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.LATTICE: 'lattice'>, nodes=1000, view_size=20, shuffle_length=10, delta_t=10, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.RANDOM: 'random'>, nodes=1000, view_size=20, shuffle_length=6, delta_t=1, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.LATTICE: 'lattice'>, nodes=1000, view_size=20, shuffle_length=15, delta_t=1, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.LATTICE: 'lattice'>, nodes=1000, view_size=20, shuffle_length=15, delta_t=10, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.LATTICE: 'lattice'>, nodes=1000, view_size=100, shuffle_length=6, delta_t=10, disaster_intensity=0.95)
  - Experiment(protocol=<Protocol.CYCLON: 'cyclon'>, graph=<Graph.STAR: 'star'>, nodes=1000, view_size=20

## Plots

In [80]:
# Groups of experiments
groups = []

for proto in (Protocol.CYCLON, Protocol.NEWSCAST):
    for view_size in (20, 50, 100):
        for graph in (Graph.GEO, Graph.RANDOM, Graph.LATTICE, Graph.STAR):
            groups.append(sorted(list(
                filter(
                    lambda _: (
                        _.protocol == proto
                        and _.view_size == view_size
                        and _.graph == graph
                        # and _ not in partitioned_runs
                    ),
                    runs,
                )
            )))

def normalize_experiment(experiment: Experiment, column: str):
    normalized = np.zeros(301)
    dt = experiment.delta_t
    for cycle in range(301):
        normalized[cycle] = sum(
            runs[experiment][column][cycle * dt: cycle + dt]
        ) / experiment.delta_t
    return normalized

def compute_metric(column: str, out_column: str, file_stem: str):
    for group in groups:
        group_average = DataFrame(columns=[out_column])

        for experiment in group:
            normalized = normalize_experiment(experiment, column)
            if len(group_average) == 0:
                group_average[out_column] = normalized
            else:
                group_average[out_column] += normalized
                group_average[out_column] /= 2

        path = Path() / "report" / "figures" / f"{file_stem}" / f"{file_stem}_{experiment.protocol.value}_{experiment.view_size}_{experiment.graph.value}.dat"
        path.parent.mkdir(parents=True, exist_ok=True)
        with open(path, 'w') as f:
                f.write(f"step {out_column}\n")
                f.writelines([
                    f"{i} {r}\n"
                    for i, r in enumerate(group_average[out_column])
                ])
                print(f"Written {f.name}.")

In [81]:
# Clustering coefficient
compute_metric("Alive Clustering Coefficient", "CC", "clustering_coefficient")

# Average path length
compute_metric("Alive Average Path Length", "APL", "average_path_length")

# Degree
compute_metric("Alive Degree", "DEG", "degree")

# Unprocessed messages
compute_metric("Alive Unprocessed Messages", "UNMSG", "unprocessed_messages")

# Average message latency
compute_metric("Alive Average Message Latency", "LAT", "average_message_latency")

# Pollution
compute_metric("Pollution", "POL", "pollution")

Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_20_geo.dat.
Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_20_random.dat.
Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_20_lattice.dat.
Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_20_star.dat.
Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_50_geo.dat.
Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_50_random.dat.
Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_50_lattice.dat.
Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_50_star.dat.
Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_100_geo.dat.
Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_100_random.dat.
Written report\figures\clustering_coefficient\clustering_coefficient_cyclon_100_lattice.dat.
Writte