In [12]:
import json
import pandas as pd
import random

import networkx as nx, numpy as np
from dowhy import gcm

# plus tqdm, typing_extensions, matplotlib

In [2]:
# notice thousands - profiler node uses millisecond timestamps
def get_throughput_from_trace_data(filename, aggregation_period=10_000):
    # this function reads trace.log and aggregates data by periods of given length for each node
    # the output is a dataframe where each row corresponds to a new period
    # columns - to nodes in the flow
    # and value in the cell is throughput - how many messages went through a given node in a given period
    with open(filename, 'r') as trace_file:
        trace_lines = trace_file.readlines()

    trace_data = []
    for trace_line in trace_lines:
        trace = json.loads(trace_line)
        trace_data.append(trace)

    node_output_times = {}
    for trace in trace_data:
        # why -18?
        # because we don't want to look at all the loops auto-play is doing
        # and only want to see the profile of the message once it went to display
        # this turned out to be reflected in 18 last entries in "trace"
        # everything before is just loops, more of them for each message
        onSendEvents = [event for event in trace["trace"][-18:] if event['eventName'] == 'onSend']
        for event in onSendEvents:
            node_id = event['node']['id']
            timestamp = event['timestamp']
            if node_id not in node_output_times:
                node_output_times[node_id] = []
            node_output_times[node_id].append(timestamp)
    
    start_timestamp = min(min(t) for t in node_output_times.values())
    stop_timestamp = max(max(t) for t in node_output_times.values())

    periods = []
    t = start_timestamp
    while t+aggregation_period < stop_timestamp:
        periods.append((t, t+aggregation_period))
        t += aggregation_period

    # iterate one more time just to get all nodes in a list
    onSendEvents = [event for event in trace_data[0]["trace"][-18:] if event['eventName'] == 'onSend']
    nodes = [event['node'] for event in onSendEvents]

    throughputs = {}
    for node in nodes:
        throughputs[node['id']] = []
        output_times = node_output_times[node['id']]
        for period in periods:
            count = len([x for x in output_times if period[0] <= x and x < period[1]])
            throughputs[node['id']].append(count)

    df = pd.DataFrame(throughputs)

    return df, nodes

In [5]:
normal_list = []
buildDelay_list = []
bothDelay_list = []

In [6]:
_, nodes = get_throughput_from_trace_data("traces/trace_normal_1.log", aggregation_period=5_000)

for i in range (1, 11):
    df_normal, _ = get_throughput_from_trace_data(f"traces/trace_normal_{i}.log", aggregation_period=5_000)
    df_buildDelay, _ = get_throughput_from_trace_data(f"traces/trace_buildDelay_{i}.log", aggregation_period=5_000)
    df_bothDelay, _ = get_throughput_from_trace_data(f"traces/trace_bothDelay_{i}.log", aggregation_period=5_000)

    normal_list.append(df_normal)
    buildDelay_list.append(df_buildDelay)
    bothDelay_list.append(df_bothDelay)

## Generate repeat indicies

Data for each repeat of the experiment consists of three randomly chosen trace files joined together. Here we generate these random triples and save them for repeatability. It only needs to be done once, hence lines below are commented out.

In [20]:
n_repeats = 20

In [28]:
# repeat_indices = []
# i = 0
# while i < n_repeats:
#     idxs = tuple(random.sample(range(0, 10), 3))
#     if idxs in repeat_indices:
#         continue

#     repeat_indices.append(idxs)
#     i += 1

# with open('repeat_indices.json', 'w') as f:
#     json.dump(repeat_indices, f)

## Causal graph

In [23]:
nodes

[{'id': 'c5161aa1.85248', 'name': 'AutoPlay', 'type': 'switch'},
 {'id': 'f6f5bcbd.ad6ca', 'name': 'Next Block', 'type': 'function'},
 {'id': 'c312a0c.eef3ce',
  'name': 'Next block of sensor data',
  'type': 'function'},
 {'id': 'f50cf07b.26f8', 'name': 'Set filename', 'type': 'change'},
 {'id': 'deb9e3943de84f28',
  'name': 'Additional random delay',
  'type': 'delay'},
 {'id': 'da968efe.e1f1f', 'name': 'Get OpenEEW data', 'type': 'http request'},
 {'id': '2f380911.38291e', 'name': 'Build Quake Charts', 'type': 'function'},
 {'id': 'e3acce5d13799473', 'name': 'Detect earthquake', 'type': 'function'},
 {'id': '7db5d25abf49b78e', 'name': 'Retrieve trace data', 'type': 'function'}]

In [22]:
nodes_ids = [node['id'] for node in nodes]
causal_graph = nx.DiGraph(zip(nodes_ids[:-1], nodes_ids[1:]))

def create_causal_model():
    causal_model = gcm.ProbabilisticCausalModel(causal_graph)
    causal_model.set_causal_mechanism(nodes_ids[0], gcm.EmpiricalDistribution())
    for node_id in nodes_ids[1:]:
        causal_model.set_causal_mechanism(node_id, gcm.AdditiveNoiseModel(gcm.ml.create_linear_regressor()))

    return causal_model

In [29]:
def compute_attributions(delay_mode):
    with open('repeat_indices.json', 'r') as f:
        repeat_indices = json.load(f)

    attributions_per_experiment = []
    for i in range(n_repeats):
        normal_df = pd.concat([normal_list[repeat_indices[i][0]],
                               normal_list[repeat_indices[i][1]],
                               normal_list[repeat_indices[i][2]]])
        
        delay_list = buildDelay_list if delay_mode == "buildDelay" else bothDelay_list
        delay_df = pd.concat([delay_list[repeat_indices[i][0]],
                              delay_list[repeat_indices[i][1]],
                              delay_list[repeat_indices[i][2]]])

        causal_model = create_causal_model()

        attributions = gcm.distribution_change(causal_model, normal_df, delay_df, nodes[-1]['id'])
        attributions_per_experiment.append(attributions)
    
    return pd.DataFrame(attributions_per_experiment)

In [34]:
attr_df = compute_attributions('buildDelay')

  joint_data = old_data.append(new_data, ignore_index=True, sort=True)

[A
[A
[A
[A
[A
[A
[A
Estimating Shapley Values. Average change of Shapley values in run 7 (35 evaluated permutations): -32.615845441164126%: 100%|██████████| 1/1 [00:01<00:00,  1.06s/it]
  joint_data = old_data.append(new_data, ignore_index=True, sort=True)

[A
[A
[A
[A
Estimating Shapley Values. Average change of Shapley values in run 4 (20 evaluated permutations): -1811.2415108835244%: 100%|██████████| 1/1 [00:00<00:00,  1.54it/s]
  joint_data = old_data.append(new_data, ignore_index=True, sort=True)

[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
Estimating Shapley Values. Average change of Shapley values in run 10 (50 evaluated permutations): -13.487244004770474%: 100%|██████████| 1/1 [00:01<00:00,  1.30s/it]
  joint_data = old_data.append(new_data, ignore_index=True, sort=True)

[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
Estimating Shapley Values. Average change

In [37]:
attr_df

Unnamed: 0,2f380911.38291e,7db5d25abf49b78e,c312a0c.eef3ce,c5161aa1.85248,da968efe.e1f1f,deb9e3943de84f28,e3acce5d13799473,f50cf07b.26f8,f6f5bcbd.ad6ca
0,0.055958,0.158457,-0.036405,0.173917,-0.049483,-0.08868,0.111537,-0.062759,-0.262541
1,0.662506,-0.000929,-0.005936,-0.024714,0.025209,0.004735,-0.025341,0.073789,0.01271
2,0.004128,0.00262,0.001951,0.001275,0.002329,-0.001543,0.003496,0.002668,-0.000952
3,0.379905,0.136672,-0.043025,0.051649,0.012192,-0.006698,0.034939,-0.016306,0.075266
4,0.267857,0.000876,0.001182,-0.005292,-0.006335,-0.002794,-0.002532,-0.004463,0.000327
5,-0.007567,-0.033996,0.01498,0.025487,-0.046481,-0.016666,0.021522,0.001523,0.045059
6,0.239404,-0.001201,-0.000614,-0.002187,0.014219,0.007985,0.005875,-0.004972,-0.00294
7,0.510372,0.007056,0.125949,0.105578,0.119637,-0.219126,-0.050908,0.156035,0.220516
8,0.152524,-0.031345,-0.092465,0.117127,-0.054366,0.087156,-0.241555,0.154134,-0.091211
9,-0.002343,-0.034345,0.025928,0.038705,-0.010365,0.085967,-0.043689,-0.033164,-0.011283


In [40]:
from scipy import stats

for node in nodes:
    if node["id"] == "2f380911.38291e":
        continue
    
    test_result = stats.ttest_ind(attr_df["2f380911.38291e"].to_list(),
                                  attr_df[node["id"]].to_list(),
                                  equal_var=False)
    
    print(f"{node['name']}: p-value = {test_result.pvalue}")

AutoPlay: p-value = 0.0037559397806609953
Next Block: p-value = 0.0032634492847919876
Next block of sensor data: p-value = 0.0032311070806564117
Set filename: p-value = 0.0035517811385333492
Additional random delay: p-value = 0.0028218782082600953
Get OpenEEW data: p-value = 0.004742538601041566
Detect earthquake: p-value = 0.0029127188486643353
Retrieve trace data: p-value = 0.004022250039270347


In [33]:
nodes

[{'id': 'c5161aa1.85248', 'name': 'AutoPlay', 'type': 'switch'},
 {'id': 'f6f5bcbd.ad6ca', 'name': 'Next Block', 'type': 'function'},
 {'id': 'c312a0c.eef3ce',
  'name': 'Next block of sensor data',
  'type': 'function'},
 {'id': 'f50cf07b.26f8', 'name': 'Set filename', 'type': 'change'},
 {'id': 'deb9e3943de84f28',
  'name': 'Additional random delay',
  'type': 'delay'},
 {'id': 'da968efe.e1f1f', 'name': 'Get OpenEEW data', 'type': 'http request'},
 {'id': '2f380911.38291e', 'name': 'Build Quake Charts', 'type': 'function'},
 {'id': 'e3acce5d13799473', 'name': 'Detect earthquake', 'type': 'function'},
 {'id': '7db5d25abf49b78e', 'name': 'Retrieve trace data', 'type': 'function'}]