# WP1 - Anomaly subgraph extraction: Finding Granger causalities

In [20]:
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET
import networkx as nx
from statsmodels.tsa.stattools import grangercausalitytests

In [21]:
def read_graph(filename) -> nx.Graph:
    """
    Reads a graph from a file.
    """
    tree = ET.parse(filename)
    print(tree)
    root = tree.getroot()
    nodes = set()
    G = nx.DiGraph()
    for child in root.iter("vertex"):
        node = child.attrib["id"]
        if node not in nodes:
            nodes.add(node)
            G.add_node(node)
        for adj in child.iter("adjacent"):
            adjacent_node = adj.attrib["vertex"]
            if adjacent_node not in nodes:
                nodes.add(adjacent_node)
                G.add_node(adjacent_node)
            G.add_edge(node, adjacent_node)
    return G

In [31]:
graph = read_graph("../data/Sock-shop-xml.txt")
nx.set_edge_attributes(graph, False, name="causal")

<xml.etree.ElementTree.ElementTree object at 0x0000021E2D97E610>


### Calculate granger causality on dataset and update graph

In [23]:
MAXLAG = 2
f = '../data/loadtest-timestamp.csv'
P = pd.read_csv(f, index_col=0)
P[['Date', 'Time']] = P['Time'].str.split(" ", expand=True)
P = P.sort_values(by=['Date','Time'])
P.head()

Unnamed: 0,Time,carts,carts-db,catalogue,catalogue-db,front-end,orders,orders-db,payment,queue-master,rabbitmq,session-db,shipping,user,user-db,Date
674,11:27,0.00245,0.00824,0.00129,0.00115,0.00313,0.0026,0.00901,0.00125,0.00254,0.0145,0.00232,0.00223,0.00178,0.00942,10/14/2021
675,11:28,0.00279,0.0102,0.00128,0.00108,0.00352,0.00273,0.00974,0.00131,0.00249,0.0136,0.00268,0.00235,0.00191,0.0106,10/14/2021
676,11:29,0.00345,0.00929,0.0014,0.00118,0.00379,0.00262,0.00938,0.00137,0.00329,0.0132,0.0026,0.00214,0.00198,0.0102,10/14/2021
677,11:30,0.00233,0.0089,0.00108,0.00112,0.00343,0.00227,0.00783,0.0013,0.00239,0.0113,0.00223,0.00221,0.00173,0.00869,10/14/2021
678,11:31,0.00421,0.00914,0.00148,0.00138,0.00405,0.00297,0.0098,0.00141,0.0025,0.0127,0.00259,0.00234,0.00233,0.0103,10/14/2021


In [24]:
P_cataloguedb = P.iloc[86:]
grangercausalitytests(P_cataloguedb[['catalogue-db', 'catalogue']], MAXLAG, verbose=False)

{1: ({'ssr_ftest': (1.2302535933048793, 0.26772652358882343, 723.0, 1),
   'ssr_chi2test': (1.2353583799990904, 0.26636748598373405, 1),
   'lrtest': (1.234308530605631, 0.2665707648448348, 1),
   'params_ftest': (1.2302535933045233, 0.26772652358888244, 723.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x21e2c93e730>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x21e2c93e460>,
   array([[0., 1., 0.]])]),
 2: ({'ssr_ftest': (0.6556035696996564, 0.5194382567768923, 720.0, 2),
   'ssr_chi2test': (1.3203127445340301, 0.5167705195955838, 2),
   'lrtest': (1.319111977490138, 0.5170808732564662, 2),
   'params_ftest': (0.6556035696998079, 0.51943825677683, 720.0, 2.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x21e2c9430d0>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x21e2c940d90>,
   array([[0., 0., 1., 0., 0.],
          [0., 0., 0., 1., 0.]])])}

In [26]:
def granger_causality_for_edges(graph, df):
    for source, target in graph.edges:
        if source in df.columns and target in df.columns:
            gc_dicts = grangercausalitytests(df[[source, target]], MAXLAG, verbose=False)
            p_values = [lag_result[0]['ssr_chi2test'][1] for lag_result in gc_dicts.values()]
            print(source, target)
            if np.min(p_values) < 0.05:
                print('causal')
                nx.set_edge_attributes(graph, {(source, target): {"causal": True}})
            else:
                print('not causal')

In [32]:
def granger_causality_for_neighbors(neighbors, graph):
    for i in range(len(neighbors)):
        for j in range(i+1, len(neighbors)):
            node1 = neighbors[i]
            node2 = neighbors[j]
            if node1 == node2:
                continue
            if node1 in P.columns and node2 in P.columns:
                gc_dicts_direction_1 = grangercausalitytests(P[[node1, node2]], MAXLAG, verbose=False)
                p_values_direction_1 = [lag_result[0]['ssr_chi2test'][1] for lag_result in gc_dicts_direction_1.values()]

                gc_dicts_direction_2 = grangercausalitytests(P[[node2, node1]], MAXLAG, verbose=False)
                p_values_direction_2 = [lag_result[0]['ssr_chi2test'][1] for lag_result in gc_dicts_direction_2.values()]


                print(node1, node2)
                if np.min(p_values_direction_1) < 0.05 or np.min(p_values_direction_2) < 0.05:
                    print('causal')
                    graph.add_edge(node1, node2, causal=True)
                else:
                    print('not causal')

In [28]:
granger_causality_for_edges(graph, P)

orders front-end
not causal
orders shipping
causal
shipping queue-master
causal
carts front-end
not causal
user front-end
not causal
user user-db
causal
catalogue front-end
not causal
catalogue-db catalogue
not causal
orders-db orders
causal
carts-db carts
causal


In [33]:
worker_1_neighbours = list(nx.all_neighbors(graph, 'worker1'))
granger_causality_for_neighbors(worker_1_neighbours, graph)

orders payment
not causal
orders carts
causal
orders user
causal
orders catalogue
causal
orders catalogue-db
causal
orders shipping
causal
orders queue-master
causal
orders rabbitmq
causal
payment carts
not causal
payment user
causal
payment catalogue
causal
payment catalogue-db
not causal
payment shipping
not causal
payment queue-master
not causal
payment rabbitmq
causal
carts user
causal
carts catalogue
not causal
carts catalogue-db
causal
carts shipping
causal
carts queue-master
causal
carts rabbitmq
causal
user catalogue
causal
user catalogue-db
causal
user shipping
causal
user queue-master
causal
user rabbitmq
causal
catalogue catalogue-db
causal
catalogue shipping
not causal
catalogue queue-master
causal
catalogue rabbitmq
causal
catalogue-db shipping
causal
catalogue-db queue-master
causal
catalogue-db rabbitmq
not causal
shipping queue-master
causal
shipping rabbitmq
causal
queue-master rabbitmq
causal


In [34]:
worker_2_neighbours = list(nx.all_neighbors(graph, 'worker2'))
granger_causality_for_neighbors(worker_2_neighbours, graph)

TypeError: granger_causality_for_neighbors() takes 2 positional arguments but 3 were given