In [None]:
import numpy as np
import pickle
import pandas as pd
from plots import plot_SHD, plot_SHD_regression, plot_precision_recall, plot_comparison

# Regression with $d = 2$

In [None]:
n_nodes = 2
n_trials = 200
methods = ['Regression', 'CCM', 'Granger']

SHD_avg_list = {}
for meth in methods:
    SHD_avg_list[meth] = []
    
SHD_std_list = {}
for meth in methods:
    SHD_std_list[meth] = []

### Regression

In [None]:
shd_regression = pickle.load(open('results/causal/final/regression/n_vars_2/shd_regression_{}.pkl'.format(n_nodes), 'rb'))[0.1]
p_values_regression = pickle.load(open('results/causal/final/regression/n_vars_2/p_values_regression_{}.pkl'.format(n_nodes), 'rb'))[0.1]

In [None]:
n_samples = list(shd_regression.keys())

In [None]:
for n_sample in n_samples:
    for i_p, p_value in enumerate(list(p_values_regression[n_sample][1][0])):
        if p_value == 0.0:
            shd_regression[n_sample][1][0][i_p] = 0

In [None]:
SHD_avg_list['Regression'] = [np.mean(shd_regression[n_sample][1]) for n_sample in n_samples]
SHD_std_list['Regression'] = [np.std(shd_regression[n_sample][1]) for n_sample in n_samples]

### Granger

In [None]:
shd_granger = pickle.load(open('results/causal/final/regression/n_vars_2/shd_Granger_{}.pkl'.format(n_nodes), 'rb'))[0.1]
p_values_granger = pickle.load(open('results/causal/final/regression/n_vars_2/p_values_Granger_{}.pkl'.format(n_nodes), 'rb'))[0.1]

In [None]:
SHD_avg_list['Granger'] = [np.mean(shd_granger[n_sample][1]) for n_sample in n_samples]
SHD_std_list['Granger'] = [np.std(shd_granger[n_sample][1]) for n_sample in n_samples]

### CCM

In [None]:
shd_ccm = pickle.load(open('results/causal/final/regression/n_vars_2/shd_CCM_{}.pkl'.format(n_nodes), 'rb'))[0.1]
p_values_ccm = pickle.load(open('results/causal/final/regression/n_vars_2/p_values_CCM_{}.pkl'.format(n_nodes), 'rb'))[0.1]

In [None]:
SHD_avg_list['CCM'] = [np.mean(shd_ccm[n_sample][1]) for n_sample in n_samples]
SHD_std_list['CCM'] = [np.std(shd_ccm[n_sample][1]) for n_sample in n_samples]

In [None]:
SHD_avg_list

In [None]:
plot_SHD_regression(SHD_avg_list, SHD_std_list, n_samples, n_nodes, std=True)

# Regression with $d = 3$

In [None]:
n_nodes = 3
n_trials = 200
methods = ['Regression', 'PCMCI']

SHD_avg_list = {}
for meth in methods:
    SHD_avg_list[meth] = []
    
SHD_std_list = {}
for meth in methods:
    SHD_std_list[meth] = []

In [None]:
### Regression

In [None]:
shd_regression = pickle.load(open('results/causal/final/regression/n_vars_3/shd_regression_{}.pkl'.format(n_nodes), 'rb'))[0.1]
p_values_regression = pickle.load(open('results/causal/final/regression/n_vars_3/p_values_regression_{}.pkl'.format(n_nodes), 'rb'))[0.1]

In [None]:
n_samples = list(shd_regression.keys())

In [None]:
for n_sample in n_samples:
    for i_p, p_value in enumerate(list(p_values_regression[n_sample][1][0])):
        if p_value == 0.0:
            shd_regression[n_sample][1][0][i_p] = 0

In [None]:
SHD_avg_list['Regression'] = [np.mean(shd_regression[n_sample][1]) for n_sample in n_samples]
SHD_std_list['Regression'] = [np.std(shd_regression[n_sample][1]) for n_sample in n_samples]

In [None]:
SHD_avg_list

In [None]:
SHD_std_list

### PCMCI

In [None]:
n_samples = [100, 200, 300]

In [None]:
shd_pcmci = pickle.load(open('results/causal/final/regression/n_vars_3/shd_PCMCI_{}.pkl'.format(n_nodes), 'rb'))[0.1]
p_values_pcmci = pickle.load(open('results/causal/final/regression/n_vars_3/p_values_PCMCI_{}.pkl'.format(n_nodes), 'rb'))[0.1]

In [None]:
SHD_avg_list['PCMCI'] = [np.mean(shd_pcmci[n_sample][1]) for n_sample in n_samples]
SHD_std_list['PCMCI'] = [np.std(shd_pcmci[n_sample][1]) for n_sample in n_samples]

In [None]:
SHD_avg_list

In [None]:
SHD_std_list

In [None]:
plot_SHD_regression(SHD_avg_list, SHD_std_list, n_samples, n_nodes, std=True)

In [None]:
plot_SHD_regression(SHD_avg_list, SHD_std_list, n_samples, n_nodes, std=True)

In [None]:
# Constraint-based

In [None]:
for n_sample in n_samples:
    for i_p, p_value in enumerate(list(p_values_regression[n_sample][1][0])):
        if p_value == 0.0:
            shd_regression[n_sample][1][0][i_p] = 0

In [None]:
SHD_avg_list['Regression'] = [np.mean(shd_regression[n_sample][1]) for n_sample in n_samples]
SHD_std_list['Regression'] = [np.std(shd_regression[n_sample][1]) for n_sample in n_samples]

### Granger

In [None]:
shd_granger = pickle.load(open('results/causal/shd_Granger_{}.pkl'.format(n_nodes), 'rb'))[0.1]
p_values_granger = pickle.load(open('results/causal/p_values_Granger_{}.pkl'.format(n_nodes), 'rb'))[0.1]

In [None]:
SHD_avg_list['Granger'] = [np.mean(shd_granger[n_sample][1]) for n_sample in n_samples]
SHD_std_list['Granger'] = [np.std(shd_granger[n_sample][1]) for n_sample in n_samples]

### CCM

In [None]:
shd_ccm = pickle.load(open('results/causal/shd_CCM_{}.pkl'.format(n_nodes), 'rb'))[0.1]
p_values_ccm = pickle.load(open('results/causal/p_values_CCM_{}.pkl'.format(n_nodes), 'rb'))[0.1]

In [None]:
SHD_avg_list['CCM'] = [np.mean(shd_ccm[n_sample][1]) for n_sample in n_samples]
SHD_std_list['CCM'] = [np.std(shd_ccm[n_sample][1]) for n_sample in n_samples]

In [None]:
SHD_avg_list

In [None]:
plot_SHD_regression(SHD_avg_list, SHD_std_list, n_samples, n_nodes, std=True)

# Regression with $d = 3$

In [None]:
n_nodes = 3
n_trials = 200
methods = ['Regression', 'PCMCI']

SHD_avg_list = {}
for meth in methods:
    SHD_avg_list[meth] = []
    
SHD_std_list = {}
for meth in methods:
    SHD_std_list[meth] = []

In [None]:
### Regression

In [None]:
shd_regression = pickle.load(open('results/causal/shd_regression_{}.pkl'.format(n_nodes), 'rb'))[0.1]
p_values_regression = pickle.load(open('results/causal/p_values_regression_{}.pkl'.format(n_nodes), 'rb'))[0.1]

In [None]:
n_samples = list(shd_regression.keys())

In [None]:
for n_sample in n_samples:
    for i_p, p_value in enumerate(list(p_values_regression[n_sample][1][0])):
        if p_value == 0.0:
            shd_regression[n_sample][1][0][i_p] = 0

In [None]:
SHD_avg_list['Regression'] = [np.mean(shd_regression[n_sample][1]) for n_sample in n_samples]
SHD_std_list['Regression'] = [np.std(shd_regression[n_sample][1]) for n_sample in n_samples]

In [None]:
SHD_avg_list

In [None]:
SHD_std_list

### PCMCI

In [None]:
shd_pcmci = pickle.load(open('results/causal/shd_PCMCI_{}.pkl'.format(n_nodes), 'rb'))[0.1]
p_values_pcmci = pickle.load(open('results/causal/p_values_PCMCI_{}.pkl'.format(n_nodes), 'rb'))[0.1]

In [None]:
SHD_avg_list['PCMCI'] = [np.mean(shd_pcmci[n_sample][1]) for n_sample in n_samples]
SHD_std_list['PCMCI'] = [np.std(shd_pcmci[n_sample][1]) for n_sample in n_samples]

In [None]:
SHD_avg_list

In [None]:
SHD_std_list

In [None]:
plot_SHD_regression(SHD_avg_list, SHD_std_list, n_samples, n_nodes, std=True)

# Comparison with Granger and CCM

We additionally compare our regression-based approach with Granger and CCM in two cases:
1. Moving from linearity to non-linearity in the relationship between X and Y (compared with Granger)
2. Moving from stationary to non-stationary time-series samples in X and Y (compared with CCM)

In [None]:
# case 1

n = [100, 200, 300]
a_list = [0, 2, 4, 6, 8, 10]
results_Granger_1 = {}
results_regression_1 = {}
for i_n in n:
    results_Granger_1[i_n] = []
    results_regression_1[i_n] = [] 

In [None]:
# copy data from parallel experiments
results_Granger_1[100].extend([0.6386, 0.5406, 0.4171999999999999, 0.38239999999999996, 0.36920000000000003, 0.3550000000000001])
results_Granger_1[200].extend([0.6292, 0.535, 0.3992, 0.3914999999999999, 0.36374999999999996, 0.3545])
results_Granger_1[300].extend([0.6345000000000001, 0.541, 0.4068333333333333, 0.38916666666666666, 0.364, 0.34833333333333333])

In [None]:
results_Granger_1

In [None]:
# copy data from parallel experiments
results_regression_1[100].extend([0.34, 0.34, 0.28, 0.36, 0.34, 0.38])
results_regression_1[200].extend([0.38, 0.3, 0.38, 0.34, 0.35, 0.35])
results_regression_1[300].extend([0.3, 0.3, 0.4, 0.35, 0.3, 0.35])

In [None]:
results_regression_1

In [None]:
n_samples = [100, 200, 300]
comp_method = 'Granger'

plot_comparison(results_Granger_1, results_regression_1, n_samples, a_list, comp_method=comp_method, std=True)

In [None]:
# case 2

results_ccm_2 = {}
results_regression_2 = {}
for i_n in n:
    results_ccm_2[i_n] = []
    results_regression_2[i_n] = [] 

In [None]:
# copy data from parallel experiments
results_ccm_2[100].extend([0.49350000000000005, 0.3495, 0.26999999999999996, 0.251, 0.24100000000000005, 0.28500000000000003])
results_ccm_2[200].extend([0.49675, 0.33625, 0.26275000000000004, 0.23500000000000001, 0.2355, 0.27424999999999994])
results_ccm_2[300].extend([0.4996666666666666, 0.3388333333333334, 0.24383333333333335, 0.23199999999999998, 0.24300000000000002, 0.2706666666666666])

In [None]:
results_ccm_2

In [None]:
# copy data from parallel experiments
results_regression_2[100].extend([0.45, 0.45, 0.4, 0.3, 0.3, 0.4])
results_regression_2[200].extend([0.3, 0.35, 0.3, 0.3, 0.5, 0.35])
results_regression_2[300].extend([0.35, 0.4, 0.3, 0.35, 0.3, 0.35])

In [None]:
results_regression_2

In [None]:
comp_method = 'CCM'

plot_comparison(results_ccm_2, results_regression_2, n_samples, a_list, comp_method=comp_method, std=True)