# Detailed comparison to Granger-causality and CCM

In [None]:
import numpy as np
from synthetic_data import sample_points, beta, hist_data, skfda_basis, spline_multi_sample
from causal import ccm_bivariate, granger, eval_candidate_DAGs, shd
from kernels import K_ID
import pickle
import os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

#### Data
Case 2: 

In [None]:
# mean function to move from stationarity to non-stationity in X and Y

def mean_stationary(s):
    c_nu = np.random.normal(8, 1)
    return np.tanh(c_nu * s - c_nu / 2)

In [None]:
def two_log(X0, Y0, r_x, r_y, B_xy, B_yx, n_samples, n_preds):
    """
    Function to generate data according to a coupled two-species nonlinear logistic difference system with chaotic dynamics
    Inputs:
    X0: initial value for X
    Y0: initial value for Y
    r_x: system parameter (set between 3 and 4)
    r_y: system parameter (set between 3 and 4)
    B_xy: effect of Y on X
    B_yx: effect of X on Y

    Returns:
    X_fd_list
    """
    X_fd_list = np.empty((2, n_samples, n_preds))
    t = n_preds * n_samples * 2

    X = [X0]
    Y = [Y0]
    for i_t in range(t-1):
        X_ = X[-1] * (r_x - r_x * X[-1] - B_xy * Y[-1])
        Y_ = Y[-1] * (r_y - r_y * Y[-1] - B_yx * X[-1])
        X.append(X_)
        Y.append(Y_)

    for n_s in range(n_samples):
        X_fd_list[0, n_s] = np.asarray(X)[(2*n_s) * n_preds:((2*n_s) + 1) * n_preds]
        X_fd_list[1, n_s] = np.asarray(Y)[(2*n_s) * n_preds:((2*n_s) + 1) * n_preds]

    return X_fd_list

Hyperparameters for data generation:

In [None]:
n_trials = 200

n_samples = [100]
n_obs = 100
n_preds = 100
upper_limit = 1
period = 0.1
n_basis = 3
sd = 1

r_list = [0, 1, 0.2, 0.4, 0.6, 0.8]

pred_points = np.linspace(0, upper_limit, n_preds)
alpha = 0.05

n_intervals = 12
analyse = False
n_neighbours = 5
n_perms = 1000
make_K = K_ID
regressor = 'hist'

## Case 1:
Moving from linearity to non-linearity in the relationship between X and Y

Analysis over 200 independent trials:

In [None]:
SHDs_dict_G = {}
SHDs_dict_R = {}

for n_sample in n_samples:
    print('n:', n_sample)

    SHDs_dict_G[n_sample] = {}
    SHDs_dict_R[n_sample] = {}

    for r in r_list:
        print('r:', r)

        SHDs_dict_G[n_sample][r] = []
        SHDs_dict_R[n_sample][r] = []

        for t in tqdm(range(n_trials)):

            # data generation
            true_DAG = np.array([[0, 1], [0, 0]])
            obs_points_X = sample_points(n_sample, n_obs, upper_limit=upper_limit)
            X_mat = skfda_basis(n_sample, upper_limit, period, n_basis, sd).evaluate(obs_points_X, aligned=False).squeeze()
            X = spline_multi_sample(X_mat, obs_points_X, pred_points).evaluate(pred_points).squeeze() + np.random.normal(0, sd, size=(n_sample, n_preds))
            Y = (1 - r) * X + r * hist_data(X, upper_limit, r, pred_points) + np.random.normal(0, sd, size=(n_sample, n_preds))
            X_arr = np.asarray([X, Y])

            # test Granger
            for i in range(X_arr.shape[1]):
                DAG, _, p_value, _ = granger(X_arr[:, i, :], alpha)
            
            DAG_adj = np.zeros((len(true_DAG), len(true_DAG)))
            for d, p in DAG.items():
                DAG_adj[p, d] = 1
            SHDs_dict_G[n_sample][r].append(shd(true_DAG, DAG_adj))
            
            # test regression
            DAG_R, p_value_R = eval_candidate_DAGs(X_arr, pred_points, n_intervals, n_neighbours, n_perms, alpha, make_K, analyse, regressor, pd_graph=None)
            
            DAG_R_adj = np.zeros((len(true_DAG), len(true_DAG)))
            for d, p in DAG_R.items():
                DAG_R_adj[p, d] = 1
            SHDs_dict_R[n_sample][r].append(shd(true_DAG, DAG_R_adj))
            
        print('Granger (X -> Y) SHD for r =', r, ':', np.mean(SHDs_dict_G[n_sample][r]))
        print('Regression (X -> Y) SHD for r =', r, ':', np.mean(SHDs_dict_R[n_sample][r]))

In [None]:
# save
results_SHD_G = open('results/causal/granger_linear_01_SHD.pkl', 'wb')
pickle.dump(SHDs_dict_G, results_SHD_G)
results_SHD_G.close()

results_SHD_R = open('results/causal/regression_linear_01_G_SHD.pkl', 'wb')
pickle.dump(SHDs_dict_R, results_SHD_R)
results_SHD_R.close()

## Case 2:
Moving from stationary to non-stationary time-series samples in X and Y

In [None]:
SHDs_dict_C = {}
SHDs_dict_R = {}

for n_sample in n_samples:
    print('n:', n_sample)
    SHDs_dict_C[n_sample] = {}
    SHDs_dict_R[n_sample] = {}
    
    for r in r_list:
        print('r:', r)
        SHDs_dict_C[n_sample][r] = []
        SHDs_dict_R[n_sample][r] = []

        for t in tqdm(range(n_trials)):

            # data generation
            true_DAG = np.array([[0, 1], [0, 0]])
            obs_points_X = sample_points(n_sample, n_obs, upper_limit=upper_limit)
            XY = two_log(0.8, 0.2, 3.8, 3.2, 0.02, 0.1, n_sample, n_preds)
            X = XY[0] + r * mean_stationary(pred_points)
            Y = XY[1] + r * mean_stationary(pred_points)
            X_arr = np.asarray([X, Y])

            # test Granger
            for i in range(X_arr.shape[1]):
                DAG, _, p_value, _ = ccm_bivariate(X_arr[:, i, :], alpha)
            
            DAG_adj = np.zeros((len(true_DAG), len(true_DAG)))
            for d, p in DAG.items():
                DAG_adj[p, d] = 1
            SHDs_dict_C[n_sample][r].append(shd(true_DAG, DAG_adj))
            
            # test regression
            DAG_R, p_value_R = eval_candidate_DAGs(X_arr, pred_points, n_intervals, n_neighbours, n_perms, alpha, make_K, analyse, regressor, pd_graph=None)

            DAG_R_adj = np.zeros((len(true_DAG), len(true_DAG)))
            for d, p in DAG_R.items():
                DAG_R_adj[p, d] = 1
            SHDs_dict_R[n_sample][r].append(shd(true_DAG, DAG_R_adj))
            
        print('CCM (X -> Y) SHD for r =', r, ':', np.mean(SHDs_dict_C[n_sample][r]))
        print('Regression (X -> Y) SHD for r =', r, ':', np.mean(SHDs_dict_R[n_sample][r]))

In [None]:
# save
results_SHD_C = open('results/causal/ccm_linear_01_SHD.pkl', 'wb')
pickle.dump(SHDs_dict_C, results_SHD_C)
results_SHD_C.close()

results_SHD_R = open('results/causal/regression_linear_01_C_SHD.pkl', 'wb')
pickle.dump(SHDs_dict_R, results_SHD_R)
results_SHD_R.close()