In [41]:
import json
import pandas as pd
import random

import networkx as nx, numpy as np
from dowhy import gcm

# plus tqdm, typing_extensions, matplotlib

In [42]:
# notice thousands - profiler node uses millisecond timestamps
def get_throughput_from_trace_data(filename, aggregation_period=10_000):
    # this function reads trace.log and aggregates data by periods of given length for each node
    # the output is a dataframe where each row corresponds to a new period
    # columns - to nodes in the flow
    # and value in the cell is throughput - how many messages went through a given node in a given period
    with open(filename, 'r') as trace_file:
        trace_lines = trace_file.readlines()

    trace_data = []
    for trace_line in trace_lines:
        trace = json.loads(trace_line)
        trace_data.append(trace)

    node_output_times = {}
    for trace in trace_data:
        # why -18?
        # because we don't want to look at all the loops auto-play is doing
        # and only want to see the profile of the message once it went to display
        # this turned out to be reflected in 18 last entries in "trace"
        # everything before is just loops, more of them for each message
        onSendEvents = [event for event in trace["trace"][-18:] if event['eventName'] == 'onSend']
        for event in onSendEvents:
            node_id = event['node']['id']
            timestamp = event['timestamp']
            if node_id not in node_output_times:
                node_output_times[node_id] = []
            node_output_times[node_id].append(timestamp)
    
    start_timestamp = min(min(t) for t in node_output_times.values())
    stop_timestamp = max(max(t) for t in node_output_times.values())

    periods = []
    t = start_timestamp
    while t+aggregation_period < stop_timestamp:
        periods.append((t, t+aggregation_period))
        t += aggregation_period

    # iterate one more time just to get all nodes in a list
    onSendEvents = [event for event in trace_data[0]["trace"][-18:] if event['eventName'] == 'onSend']
    nodes = [event['node'] for event in onSendEvents]

    throughputs = {}
    for node in nodes:
        throughputs[node['id']] = []
        output_times = node_output_times[node['id']]
        for period in periods:
            count = len([x for x in output_times if period[0] <= x and x < period[1]])
            throughputs[node['id']].append(count)

    df = pd.DataFrame(throughputs)

    return df, nodes

In [43]:
normal_list = []
buildDelay_list = []
bothDelay_list = []

In [44]:
_, nodes = get_throughput_from_trace_data("traces/trace_normal_1.log", aggregation_period=5_000)

for i in range (1, 11):
    df_normal, _ = get_throughput_from_trace_data(f"traces/trace_normal_{i}.log", aggregation_period=5_000)
    df_buildDelay, _ = get_throughput_from_trace_data(f"traces/trace_buildDelay_{i}.log", aggregation_period=5_000)
    df_bothDelay, _ = get_throughput_from_trace_data(f"traces/trace_bothDelay_{i}.log", aggregation_period=5_000)

    normal_list.append(df_normal)
    buildDelay_list.append(df_buildDelay)
    bothDelay_list.append(df_bothDelay)

## Generate repeat indicies

Data for each repeat of the experiment consists of three randomly chosen trace files joined together. Here we generate these random triples and save them for repeatability. It only needs to be done once, hence lines below are commented out.

In [45]:
n_repeats = 20

In [46]:
# repeat_indices = []
# i = 0
# while i < n_repeats:
#     idxs = tuple(random.sample(range(0, 10), 3))
#     if idxs in repeat_indices:
#         continue

#     repeat_indices.append(idxs)
#     i += 1

# with open('repeat_indices.json', 'w') as f:
#     json.dump(repeat_indices, f)

## Causal graph

In [47]:
nodes

[{'id': 'c5161aa1.85248', 'name': 'AutoPlay', 'type': 'switch'},
 {'id': 'f6f5bcbd.ad6ca', 'name': 'Next Block', 'type': 'function'},
 {'id': 'c312a0c.eef3ce',
  'name': 'Next block of sensor data',
  'type': 'function'},
 {'id': 'f50cf07b.26f8', 'name': 'Set filename', 'type': 'change'},
 {'id': 'deb9e3943de84f28',
  'name': 'Additional random delay',
  'type': 'delay'},
 {'id': 'da968efe.e1f1f', 'name': 'Get OpenEEW data', 'type': 'http request'},
 {'id': '2f380911.38291e', 'name': 'Build Quake Charts', 'type': 'function'},
 {'id': 'e3acce5d13799473', 'name': 'Detect earthquake', 'type': 'function'},
 {'id': '7db5d25abf49b78e', 'name': 'Retrieve trace data', 'type': 'function'}]

In [48]:
nodes_ids = [node['id'] for node in nodes]
causal_graph = nx.DiGraph(zip(nodes_ids[:-1], nodes_ids[1:]))

def create_causal_model():
    causal_model = gcm.ProbabilisticCausalModel(causal_graph)
    causal_model.set_causal_mechanism(nodes_ids[0], gcm.EmpiricalDistribution())
    for node_id in nodes_ids[1:]:
        causal_model.set_causal_mechanism(node_id, gcm.AdditiveNoiseModel(gcm.ml.create_linear_regressor()))

    return causal_model

In [49]:
def compute_attributions(delay_mode):
    with open('repeat_indices.json', 'r') as f:
        repeat_indices = json.load(f)

    attributions_per_experiment = []
    for i in range(n_repeats):
        normal_df = pd.concat([normal_list[repeat_indices[i][0]],
                               normal_list[repeat_indices[i][1]],
                               normal_list[repeat_indices[i][2]]])
        
        delay_list = buildDelay_list if delay_mode == "buildDelay" else bothDelay_list
        delay_df = pd.concat([delay_list[repeat_indices[i][0]],
                              delay_list[repeat_indices[i][1]],
                              delay_list[repeat_indices[i][2]]])

        causal_model = create_causal_model()

        attributions = gcm.distribution_change(causal_model, normal_df, delay_df, nodes[-1]['id'])
        attributions_per_experiment.append(attributions)
    
    return pd.DataFrame(attributions_per_experiment)

In [50]:
attr_df = compute_attributions('buildDelay')

  joint_data = old_data.append(new_data, ignore_index=True, sort=True)
Estimating Shapley Values. Average change of Shapley values in run 29 (145 evaluated permutations): 0.5375722844761522%: 100%|██████████| 1/1 [00:07<00:00,  7.42s/it]
  joint_data = old_data.append(new_data, ignore_index=True, sort=True)
Estimating Shapley Values. Average change of Shapley values in run 9 (45 evaluated permutations): -5.528359574496419%: 100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
  joint_data = old_data.append(new_data, ignore_index=True, sort=True)
Estimating Shapley Values. Average change of Shapley values in run 9 (45 evaluated permutations): -11.180273965902135%: 100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
  joint_data = old_data.append(new_data, ignore_index=True, sort=True)
Estimating Shapley Values. Average change of Shapley values in run 7 (35 evaluated permutations): -1.3133761582612813%: 100%|██████████| 1/1 [00:01<00:00,  1.05s/it]
  joint_data = old_data.append(new_data, ignore_inde

In [51]:
attr_df

Unnamed: 0,2f380911.38291e,7db5d25abf49b78e,c312a0c.eef3ce,c5161aa1.85248,da968efe.e1f1f,deb9e3943de84f28,e3acce5d13799473,f50cf07b.26f8,f6f5bcbd.ad6ca
0,-0.000113,-0.000109,8.9e-05,-2.2e-05,-0.000153,-0.000123,6.1e-05,0.00027,0.0001
1,0.683135,-0.036458,-0.006491,0.06778,0.036355,0.02295,-0.074062,-0.023095,0.031782
2,-0.004008,0.000273,0.006802,-0.00245,0.003459,-0.002684,-0.001212,0.003246,0.004397
3,0.662724,-0.036121,-0.009421,0.014889,-0.038687,0.079176,0.105539,0.047708,0.012574
4,0.249352,-0.003953,0.008231,-0.003262,-0.001548,0.000201,-0.000922,0.000783,1.7e-05
5,-0.015122,-0.061425,-0.037585,0.012297,-0.046694,-0.021788,0.021756,0.157742,-0.00918
6,1.017649,-0.040869,-0.031095,-0.023482,0.059819,0.165165,-0.014197,-0.026638,-0.03088
7,0.591949,0.090166,0.025802,0.079756,0.046374,0.119882,-0.120778,0.022979,0.052623
8,0.098647,0.185326,0.030049,0.15928,0.031327,0.09731,-0.001083,0.123081,0.219064
9,0.053458,-0.108177,0.053789,0.086272,-0.055638,0.027288,-0.029867,-0.052958,0.025834


In [52]:
from scipy import stats

for node in nodes:
    if node["id"] == "2f380911.38291e":
        continue
    
    test_result = stats.ttest_ind(attr_df["2f380911.38291e"].to_list(),
                                  attr_df[node["id"]].to_list(),
                                  equal_var=False)
    
    print(f"{node['name']}: p-value = {test_result.pvalue}")

AutoPlay: p-value = 0.004499967020776364
Next Block: p-value = 0.004005260028680552
Next block of sensor data: p-value = 0.004535786731574009
Set filename: p-value = 0.004083592425729922
Additional random delay: p-value = 0.005771983067909032
Get OpenEEW data: p-value = 0.003499970444773406
Detect earthquake: p-value = 0.0036916938609490198
Retrieve trace data: p-value = 0.0034234824952353838


In [53]:
nodes

[{'id': 'c5161aa1.85248', 'name': 'AutoPlay', 'type': 'switch'},
 {'id': 'f6f5bcbd.ad6ca', 'name': 'Next Block', 'type': 'function'},
 {'id': 'c312a0c.eef3ce',
  'name': 'Next block of sensor data',
  'type': 'function'},
 {'id': 'f50cf07b.26f8', 'name': 'Set filename', 'type': 'change'},
 {'id': 'deb9e3943de84f28',
  'name': 'Additional random delay',
  'type': 'delay'},
 {'id': 'da968efe.e1f1f', 'name': 'Get OpenEEW data', 'type': 'http request'},
 {'id': '2f380911.38291e', 'name': 'Build Quake Charts', 'type': 'function'},
 {'id': 'e3acce5d13799473', 'name': 'Detect earthquake', 'type': 'function'},
 {'id': '7db5d25abf49b78e', 'name': 'Retrieve trace data', 'type': 'function'}]