# Analysis of causal discovery algorithm on synthetic data

In [None]:
import os
import numpy as np
import pickle
from tqdm.notebook import tqdm
from synthetic_data import generate_data
from causal import causal_eval

### Parameters

In [None]:
# number of trials and permutations
n_trials = 200
n_perms = 1000

# number of samples and number of points functional data samples are (randomly) observed and discretised
n_samples = [100, 150, 200]
n_obs = 100
n_preds = 100

# define discretised period
upper_limit = 1
pred_points = np.linspace(0, upper_limit, n_preds)

# number of Fourier basis functions and std of normal distribution of sampled coefficients
n_basis = 3
sd = 1

# different kernels for independence test
K_list = ['K_ID', 'K_dft1']

# statistical significance level
alpha = 0.05

In [None]:
# create folders to save results
if not os.path.exists('results'):
    os.mkdir('results')

if not os.path.exists('results/causal'):
    os.mkdir('results/causal')

Parameters specific for evaluation on synthetic data

In [None]:
test = 'joint'
n_vars = 2
prob = 0.5
cd_type = 'regression'

# historical dependence is easier to detect the higher a is
a = 8

# regression parameters
n_intervals = 12
analyse = False

# constraint parameters
find_lamb = False
lambs = 1e-2

n_samples = [8]

In [None]:
precisions_dict = {}
recalls_dict = {}
f1_scores_dict = {}
averages_dict = {}
DAGs_dict = {}
p_values_dict = {}

for n_sample in tqdm(n_samples):
    print('Sample size:', n_sample)
    precisions_dict[n_sample] = {}
    recalls_dict[n_sample] = {}
    f1_scores_dict[n_sample] = {}
    averages_dict[n_sample] = {}
    DAGs_dict[n_sample] = {}
    p_values_dict[n_sample] = {}

    for K in K_list:
        print('Kernel:', K)
        precisions_dict[n_sample][K] = []
        recalls_dict[n_sample][K] = []
        f1_scores_dict[n_sample][K] = []
        averages_dict[n_sample][K] = []
        DAGs_dict[n_sample][K] = []
        p_values_dict[n_sample][K] = []

        # generate synthetic data
        edges_dict, X_dict = generate_data(dep=test, n_samples=n_sample, n_trials=n_trials, n_obs=n_obs, n_preds=n_preds, n_vars=n_vars, a=a, upper_limit=upper_limit, n_basis=n_basis, sd=sd, prob=prob)

        # conduct n trials
        precisions, recalls, f1_scores, CPDAGs, p_values = causal_eval(cd_type=cd_type, X_dict=X_dict, edges_dict=edges_dict, n_intervals=n_intervals, n_trials=n_trials, n_perms=n_perms, alpha=alpha, K=K, lambs=lambs, find_lamb=find_lamb, analyse=analyse)

        precisions_dict[n_sample][K].extend(precisions)
        recalls_dict[n_sample][K].extend(recalls)
        f1_scores_dict[n_sample][K].extend(f1_scores)

        # calculate average precision, recall and F1-score
        avg_precision = np.sum(precisions_dict[n_sample][K]) / n_trials
        avg_recall = np.sum(recalls_dict[n_sample][K]) / n_trials
        avg_f1_score = np.sum(f1_scores_dict[n_sample][K]) / n_trials

        averages_dict[n_sample][K].extend([avg_precision, avg_recall, avg_f1_score])

        print('Average precision:', avg_precision)
        print('Average recall:', avg_recall)
        print('Average F1-score:', avg_f1_score)
        print('----------')
    print('----------')

precision_causal = open('results/causal/precision_{}_{}.pkl'.format(cd_type, n_vars), 'wb')
pickle.dump(precisions_dict, precision_causal)
precision_causal.close()
recall_causal = open('results/causal/recall_{}_{}.pkl'.format(cd_type, n_vars), 'wb')
pickle.dump(recalls_dict, recall_causal)
recall_causal.close()
f1_causal = open('results/causal/f1_{}_{}.pkl'.format(cd_type, n_vars), 'wb')
pickle.dump(f1_scores_dict, f1_causal)
f1_causal.close()
averages_causal = open('results/causal/averages_{}_{}.pkl'.format(cd_type, n_vars), 'wb')
pickle.dump(averages, averages_causal)
averages_causal.close()