# Analysis of causal discovery algorithm on synthetic data

In [2]:
import numpy as np
from tqdm.notebook import tqdm
from synthetic_data import generate_data
from graph_generation import generate_DAGs
from independence import marginal_indep_test, cond_indep_test, joint_indep_test
from causal import causal_power

### Parameters

In [3]:
# number of trials and permutations
n_trials = 200
n_perms = 1000

# number of samples and number of points functional data samples are (randomly) observed and discretised
n_samples = [100, 150, 200]
n_obs = 100
n_preds = 100

# define discretised period
upper_limit = 1
pred_points = np.linspace(0, upper_limit, n_preds)

# number of Fourier basis functions and std of normal distribution of sampled coefficients
n_basis = 3
sd = 1

# different kernels for independence test
K_list = ['K_ID', 'K_dft']

# statistical significance level
alpha = 0.05

Parameters specific for evaluation on synthetic data

In [4]:
test = 'joint'
n_vars = 4

# historical dependence is easier to detect the higher a is
a_list = [0, 0.2, 0.4, 0.6, 0.8, 1]

In [5]:
type_II_errors = {}

for n_sample in tqdm(n_samples):
    print('Sample size:', int(n_sample))
    type_II_errors[int(n_sample)] = {}
    for K in K_list:
        print('Kernel:', K)
        type_II_errors[int(n_sample)][str(K)] = []
        for a in a_list:
            print('a:', a)
            # generate synthetic data
            edges_dict, X_dict = generate_data(dep=test, n_samples=int(n_sample), n_trials=n_trials, n_obs=n_obs, n_preds=n_preds, n_vars=n_vars, a=a, upper_limit=upper_limit, n_basis=n_basis, sd=sd)

            # conduct n trials
            power = causal_power(X_dict=X_dict, edges_dict=edges_dict, n_trials=n_trials, n_perms=n_perms, alpha=alpha, K=K)
            type_II_errors[n_sample][K].append(power)
            print('Test power:', power)
            print('----------')
    print('----------')

  0%|          | 0/3 [00:00<?, ?it/s]

Sample size: 100
Kernel: K_ID
a: 0


KeyboardInterrupt: 

In [6]:
edges_dict, X_dict = generate_data(dep=test, n_samples=int(n_sample), n_trials=n_trials, n_obs=n_obs, n_preds=n_preds, n_vars=n_vars, a=a, upper_limit=upper_limit, n_basis=n_basis, sd=sd)

In [22]:
X_dict[0].shape

(4, 100, 100)