# Experiment Notebook
Notebook to inspect the results of a single experiment

In [None]:
# Imports
import pandas as pd
import pprint
import matplotlib.pyplot as plt

from collections import defaultdict


from lib.metrics import *
from lib.message import *
from lib.analysis import *
from lib.experiment import PubSubExperimentResults


In [None]:
EXPERIMENT = "experiments/plumtree-config5.json"


In [None]:
# Configure plot figure dpi
plt.rcParams["figure.dpi"] = 200


In [None]:
# Load results and display configuration

experiment_results = PubSubExperimentResults.load_from_file(EXPERIMENT)
exp = (
    KadPubSubAnalyzer.from_experiment_results(experiment_results)
    if "kad" in EXPERIMENT
    else PubSubAnalyzer.from_experiment_results(experiment_results)
)

pprint.pprint(exp.experiment)


In [None]:
# Check experiment preconditions
# exp.check_preconditions()

In [None]:
# Plot reliability over time
exp.reliability().resample("20ms").mean().fillna(method='bfill').plot(title="Reliability")

In [None]:
# Plot reliability histogram
exp.reliability().plot(kind="hist", logy=True)

In [None]:
# Plot experiment events

pd.DataFrame(
    {
        "Message Sends": pd.Series(
            1, index=list(map(lambda m: m.timestamp, exp.metrics(ty=PubSubMessageSent)))
        ),
        "Message Receives": pd.Series(
            1,
            index=list(
                map(lambda m: m.timestamp, exp.metrics(ty=PubSubMessageReceived))
            ),
        )
        .groupby(level=0)
        .sum(),
        "Node Boot Times": pd.Series(
            1, index=list(map(lambda m: m.timestamp, exp.metrics(ty=Boot)))
        ),
        "Node Shutdown Times": pd.Series(
            1, index=list(map(lambda m: m.timestamp, exp.metrics(ty=Shutdown)))
        ),
        "Node Subscriptions": pd.Series(
            1, index=list(map(lambda m: m.timestamp, exp.metrics(ty=PubSubSubscribe)))
        ),
    }
).resample("1s").sum().plot(logy=True)


In [None]:
# Plot publish latencies histogram
exp.publish_latency().plot(kind="hist", title="Publish Latencies")


In [None]:
# Display redundancy and network usefullness
print(f"Redundancy: {exp.redundancy()}")
print(f"Network usefullness: {exp.network_usage_efficiency_fraction()}")


In [None]:
# Display graph of a message with reliability = 1
def display_graph_of_message_with_reliability_1():
    messages = [k for k, v in exp.messages_reliability().items() if v == 1.0]
    if len(messages) == 0:
        print("No message with reliability = 1")
        return
    print(f"Message UUID: {messages[0]}")
    return exp.message_graph(messages[0])


display_graph_of_message_with_reliability_1()


In [None]:
# Display graph of a message with reliability < 1
message_with_reliability_less_1 = None


def display_graph_of_message_with_reliability_less_1():
    messages = [k for k, v in exp.messages_reliability().items() if v < 1.0]
    if len(messages) == 0:
        print("No message with reliability < 1")
        return
    print(f"Message UUID: {messages[0]}")
    global message_with_reliability_less_1
    message_with_reliability_less_1 = messages[0]
    return exp.message_graph(messages[0])


display_graph_of_message_with_reliability_less_1()


In [None]:
# Poor man's profiler

spans = exp.metrics(ty=Span)
span_names = set(map(lambda s: s.name, spans))
span_timestamps = defaultdict(list)
span_times = defaultdict(list)
for span in spans:
    span_timestamps[span.name].append(span.timestamp)
    span_times[span.name].append(span.duration)
span_series = {}
for name in span_names:
    span_series[name] = (
        pd.Series(span_times[name], index=span_timestamps[name], name=name)
        .groupby(level=0)
        .sum()
    )
spans_available = len(span_series) > 0

In [None]:
# Span mean times over time

if spans_available:
    pd.DataFrame(span_series).resample("1s").mean().plot(
        title="Mean times", logy=True
    ).legend(bbox_to_anchor=(1.05, 1), loc="upper left")


In [None]:
# Span mean times histogram

if spans_available:
    pd.Series({name: s.mean() for name, s in span_series.items()}).plot(
        kind="bar", title="Mean times", logy=True, ylabel="Time (s)"
    ).legend(bbox_to_anchor=(1.05, 1), loc="upper left")


In [None]:
# Span average total time per node

if spans_available:
    pd.Series(
        {name: s.sum() / exp.experiment.number_nodes for name, s in span_series.items()}
    ).plot(
        kind="bar", title="Average total time per node", logy=True, ylabel="Time (s)"
    ).legend(
        bbox_to_anchor=(1.05, 1), loc="upper left"
    )


In [None]:
# Average span count per node

if spans_available:
    pd.DataFrame(
        {
            name: pd.Series(1 / exp.experiment.number_nodes, index=s.index)
            for name, s in span_series.items()
        }
    ).resample("1s").sum().plot(title="Average span count per node").legend(
        bbox_to_anchor=(1.05, 1), loc="upper left"
    )
