# WP1 - Anomaly subgraph extraction: Finding Granger causalities

In [2]:
import py2neo
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import grangercausalitytests

In [4]:
# TODO: remove before upload
graph = py2neo.Graph("bolt://192.168.1.185:7687", auth=("neo4j", ""))

In [5]:
graph_rel = graph.relationships.match().all()

In [8]:
graph_rel

[GC(Node('Node', id='carts'), Node('Node', id='orders'), causal=True),
 CONNECTED(Node('Node', id='carts'), Node('Node', id='worker1')),
 CONNECTED(Node('Node', id='carts'), Node('Node', id='front-end'), causal=False),
 GC(Node('Node', id='user'), Node('Node', id='orders'), causal=True),
 GC(Node('Node', id='user'), Node('Node', id='payment'), causal=True),
 GC(Node('Node', id='user'), Node('Node', id='carts'), causal=True),
 CONNECTED(Node('Node', id='user'), Node('Node', id='user-db'), causal=True),
 CONNECTED(Node('Node', id='user'), Node('Node', id='worker1')),
 CONNECTED(Node('Node', id='user'), Node('Node', id='front-end'), causal=False),
 GC(Node('Node', id='user-db'), Node('Node', id='front-end'), causal=True),
 CONNECTED(Node('Node', id='user-db'), Node('Node', id='worker2')),
 GC(Node('Node', id='catalogue'), Node('Node', id='orders'), causal=True),
 GC(Node('Node', id='catalogue'), Node('Node', id='payment'), causal=True),
 GC(Node('Node', id='catalogue'), Node('Node', id='u

### Calculate granger causality on dataset and update graph

In [7]:
MAXLAG = 2
f = '../data/loadtest-timestamp.csv'
P = pd.read_csv(f, index_col=0)
P[['Date', 'Time']] = P['Time'].str.split(" ", expand=True)
P = P.sort_values(by=['Date','Time'])
P.head()

Unnamed: 0,Time,carts,carts-db,catalogue,catalogue-db,front-end,orders,orders-db,payment,queue-master,rabbitmq,session-db,shipping,user,user-db,Date
674,11:27,0.00245,0.00824,0.00129,0.00115,0.00313,0.0026,0.00901,0.00125,0.00254,0.0145,0.00232,0.00223,0.00178,0.00942,10/14/2021
675,11:28,0.00279,0.0102,0.00128,0.00108,0.00352,0.00273,0.00974,0.00131,0.00249,0.0136,0.00268,0.00235,0.00191,0.0106,10/14/2021
676,11:29,0.00345,0.00929,0.0014,0.00118,0.00379,0.00262,0.00938,0.00137,0.00329,0.0132,0.0026,0.00214,0.00198,0.0102,10/14/2021
677,11:30,0.00233,0.0089,0.00108,0.00112,0.00343,0.00227,0.00783,0.0013,0.00239,0.0113,0.00223,0.00221,0.00173,0.00869,10/14/2021
678,11:31,0.00421,0.00914,0.00148,0.00138,0.00405,0.00297,0.0098,0.00141,0.0025,0.0127,0.00259,0.00234,0.00233,0.0103,10/14/2021


In [11]:
P_cataloguedb = P.iloc[86:]
grangercausalitytests(P_cataloguedb[['catalogue-db', 'catalogue']], MAXLAG, verbose=False)

{1: ({'ssr_ftest': (1.2302535933048793, 0.26772652358882343, 723.0, 1),
   'ssr_chi2test': (1.2353583799990904, 0.26636748598373405, 1),
   'lrtest': (1.234308530605631, 0.2665707648448348, 1),
   'params_ftest': (1.2302535933045222, 0.26772652358888244, 723.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fdfdf2111f0>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fdfdf211460>,
   array([[0., 1., 0.]])]),
 2: ({'ssr_ftest': (0.6556035696996997, 0.5194382567768923, 720.0, 2),
   'ssr_chi2test': (1.3203127445341172, 0.5167705195955613, 2),
   'lrtest': (1.319111977490138, 0.5170808732564662, 2),
   'params_ftest': (0.6556035696998079, 0.51943825677683, 720.0, 2.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fdfdf2180a0>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fdfdf218c10>,
   array([[0., 0., 1., 0., 0.],
          [0., 0., 0., 1., 0.]])])}

In [27]:
#perform Granger-Causality test
for rel in graph_rel:
    source = rel.start_node['id']
    target = rel.end_node['id']
    if source in P.columns and target in P.columns:
        gc_dicts = grangercausalitytests(P[[source, target]], MAXLAG, verbose=False)
        p_values = [lag_result[0]['ssr_chi2test'][1] for lag_result in gc_dicts.values()]
        print(source, target)
        if np.min(p_values) < 0.05:
            print('causal')
            rel["causal"] = True
            graph.push(rel)
        else:
            print('not causal')
            rel["causal"] = False
            graph.push(rel)
           

carts front-end
not causal
user user-db
causal
user front-end
not causal
catalogue front-end
not causal
catalogue-db catalogue
not causal
orders-db orders
causal
carts-db carts
causal
orders shipping
causal
orders front-end
not causal
shipping queue-master
causal


In [19]:
neighbor_q1 = '''MATCH (a)-[r]-(b) where a.id='worker1' RETURN b'''
worker1_neighbors = [i.get('b') for i in graph.run(neighbor_q1).data()]

In [20]:
neighbor_q2 = '''MATCH (a)-[r]-(b) where a.id='worker2' RETURN b'''
worker2_neighbors = [i.get('b') for i in graph.run(neighbor_q2).data()]

In [21]:
def granger_causality_for_neighbors(neighbors, graph):
    for i in range(len(neighbors)):
        for j in range(i+1, len(neighbors)):
            n1 = neighbors[i]
            n2 = neighbors[j]
            node1 = n1['id']
            node2 = n2['id']
            if node1 == node2:
                continue
            if node1 in P.columns and node2 in P.columns:
                gc_dicts_direction_1 = grangercausalitytests(P[[node1, node2]], MAXLAG, verbose=False)
                p_values_direction_1 = [lag_result[0]['ssr_chi2test'][1] for lag_result in gc_dicts_direction_1.values()]

                gc_dicts_direction_2 = grangercausalitytests(P[[node2, node1]], MAXLAG, verbose=False)
                p_values_direction_2 = [lag_result[0]['ssr_chi2test'][1] for lag_result in gc_dicts_direction_2.values()]


                print(node1, node2)
                if np.min(p_values_direction_1) < 0.05 or np.min(p_values_direction_2) < 0.05:
                    print('causal')
                    graph.create(py2neo.Relationship(n1, "GC", n2, causal=True))
                else:
                    print('not causal')

In [22]:
granger_causality_for_neighbors(worker1_neighbors, graph)

rabbitmq queue-master
causal
rabbitmq shipping
causal
rabbitmq catalogue-db
not causal
rabbitmq catalogue
causal
rabbitmq user
causal
rabbitmq carts
causal
rabbitmq payment
causal
rabbitmq orders
causal
queue-master shipping
causal
queue-master catalogue-db
causal
queue-master catalogue
causal
queue-master user
causal
queue-master carts
causal
queue-master payment
not causal
queue-master orders
causal
shipping catalogue-db
causal
shipping catalogue
not causal
shipping user
causal
shipping carts
causal
shipping payment
not causal
shipping orders
causal
catalogue-db catalogue
not causal
catalogue-db user
causal
catalogue-db carts
causal
catalogue-db payment
not causal
catalogue-db orders
causal
catalogue user
causal
catalogue carts
not causal
catalogue payment
causal
catalogue orders
causal
user carts
causal
user payment
causal
user orders
causal
carts payment
not causal
carts orders
causal
payment orders
not causal


In [23]:
granger_causality_for_neighbors(worker2_neighbors, graph)

session-db carts-db
causal
session-db orders-db
causal
session-db user-db
causal
session-db front-end
not causal
carts-db orders-db
causal
carts-db user-db
causal
carts-db front-end
causal
orders-db user-db
causal
orders-db front-end
causal
user-db front-end
causal
