In [1]:
import os

os.environ['MKL_NUM_THREADS'] = '1'  # Limit to 4 threads
os.environ['NUMEXPR_NUM_THREADS'] = '1'
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'  # Limit to 4 threads
os.environ['OMP_NUM_THREADS'] = '1'

import pickle 
import os
import pandas as pd
from d2c.descriptors import D2C, DataLoader
from d2c.benchmark import VARLiNGAM, PCMCI, Granger, DYNOTEARS, D2CWrapper, VAR
from tqdm import tqdm
from d2c.data_generation.utils import show_DAG

N_JOBS = 40
SEED = 42
MB_SIZE = 15
COUPLES_TO_CONSIDER_PER_DAG = 90
maxlags = 5 #this depends 


root = './data/'
destination = './benchmark/'

# if any exception write it on disk 
try:

    #save a counter on disk 
    counter = 0
    to_dos = []
    for file in sorted(os.listdir(root)):
        if file.endswith('.pkl'):
            gen_process_number = int(file.split('_')[0][1:])
            n_variables = int(file.split('_')[1][1:])
            max_neighborhood_size = int(file.split('_')[2][2:])
            noise_std = float(file.split('_')[3][1:-4])

            filename = f'{destination}P{gen_process_number}_N{n_variables}_Nj{max_neighborhood_size}_n{noise_std}.pkl'

            if noise_std != 0.005:
                continue

            if max_neighborhood_size != 8:
                continue

            to_dos.append(file)

    # sort to_dos by number of variables
    to_dos_5_variables = [file for file in to_dos if int(file.split('_')[1][1:]) == 5]
    to_dos_10_variables = [file for file in to_dos if int(file.split('_')[1][1:]) == 10]
    to_dos_25_variables = [file for file in to_dos if int(file.split('_')[1][1:]) == 25]
    to_dos_50_variables = [file for file in to_dos if int(file.split('_')[1][1:]) == 50]

    for todo_subset in [to_dos_5_variables, to_dos_10_variables, to_dos_25_variables]:
    # for todo_subset in [to_dos_50_variables]:
        for file in tqdm(todo_subset):

            gen_process_number = int(file.split('_')[0][1:])
            n_variables = int(file.split('_')[1][1:])
            max_neighborhood_size = int(file.split('_')[2][2:])
            noise_std = float(file.split('_')[3][1:-4])

            filename = f'{destination}P{gen_process_number}_N{n_variables}_Nj{max_neighborhood_size}_n{noise_std}.pkl'

            dataloader = DataLoader(n_variables = n_variables, maxlags = maxlags)
            dataloader.from_pickle(root+file)
            observations = dataloader.get_original_observations()
            dags = dataloader.get_dags()
            true_causal_dfs = dataloader.get_true_causal_dfs()

            var = VAR(ts_list=observations, maxlags=maxlags, n_jobs=N_JOBS)
            var.run()
            causal_dfs_var = var.get_causal_dfs()

            varlingam = VARLiNGAM(ts_list=observations, maxlags=maxlags, n_jobs=N_JOBS)
            varlingam.run()
            causal_dfs_varlingam = varlingam.get_causal_dfs()

            pcmci = PCMCI(ts_list=observations, maxlags=maxlags, n_jobs=N_JOBS)
            pcmci.run()
            causal_dfs_pcmci = pcmci.get_causal_dfs()

            granger = Granger(ts_list=observations, maxlags=maxlags, n_jobs=N_JOBS)
            granger.run()
            causal_dfs_granger = granger.get_causal_dfs()

            dynotears = DYNOTEARS(ts_list=observations, maxlags=maxlags, n_jobs=N_JOBS)
            dynotears.run()
            causal_dfs_dynotears = dynotears.get_causal_dfs()

            with open(filename, 'wb') as f:
                pickle.dump((causal_dfs_var, 
                            causal_dfs_varlingam, 
                            causal_dfs_pcmci,
                            causal_dfs_granger, 
                            causal_dfs_dynotears, 
                            true_causal_dfs), f)     

            with open(f'{destination}counter.txt', 'w') as f:
                f.write(str(counter))   

            counter += 1

except Exception as e:
    print(e)
    with open(f'{destination}exception.txt', 'w') as f:
        f.write(str(e))

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 18/18 [05:09<00:00, 17.20s/it]
100%|██████████| 18/18 [23:26<00:00, 78.15s/it]
100%|██████████| 18/18 [5:05:59<00:00, 1019.99s/it]  


In [2]:
true_causal_dfs

[     from  to  is_causal
 0      10   0        1.0
 1      10   1        0.0
 2      10   2        1.0
 3      10   3        0.0
 4      10   4        0.0
 ..    ...  ..        ...
 495    59   5        0.0
 496    59   6        0.0
 497    59   7        0.0
 498    59   8        0.0
 499    59   9        0.0
 
 [500 rows x 3 columns],
      from  to  is_causal
 0      10   0        1.0
 1      10   1        0.0
 2      10   2        0.0
 3      10   3        0.0
 4      10   4        0.0
 ..    ...  ..        ...
 495    59   5        0.0
 496    59   6        0.0
 497    59   7        0.0
 498    59   8        0.0
 499    59   9        0.0
 
 [500 rows x 3 columns],
      from  to  is_causal
 0      10   0        1.0
 1      10   1        0.0
 2      10   2        0.0
 3      10   3        1.0
 4      10   4        0.0
 ..    ...  ..        ...
 495    59   5        0.0
 496    59   6        0.0
 497    59   7        0.0
 498    59   8        0.0
 499    59   9        0.0
 
 [500 ro

In [26]:
causal_dataframe.set_index(['from', 'to'], inplace=True)

In [22]:
causal_dfs[0][['from','to','is_causal']]

Unnamed: 0,from,to,is_causal
0,10,0,0
1,10,1,0
2,10,2,0
3,10,3,0
4,10,4,0
...,...,...,...
495,59,5,0
496,59,6,0
497,59,7,0
498,59,8,0


In [23]:
causal_dataframe

Unnamed: 0,from,to,is_causal
0,10,0,1.0
1,10,1,0.0
2,10,2,1.0
3,10,3,0.0
4,10,4,0.0
...,...,...,...
495,59,5,0.0
496,59,6,0.0
497,59,7,0.0
498,59,8,0.0


In [25]:
pd.concat([causal_dataframe, causal_dfs[0][['from','to','is_causal']]], axis=1)

Unnamed: 0,from,to,is_causal,from.1,to.1,is_causal.1
0,10,0,1.0,10,0,0
1,10,1,0.0,10,1,0
2,10,2,1.0,10,2,0
3,10,3,0.0,10,3,0
4,10,4,0.0,10,4,0
...,...,...,...,...,...,...
495,59,5,0.0,59,5,0
496,59,6,0.0,59,6,0
497,59,7,0.0,59,7,0
498,59,8,0.0,59,8,0


In [27]:
causal_dfs[0][['from','to','is_causal']].join(causal_dataframe, on=['from','to'], rsuffix='_true')

Unnamed: 0,from,to,is_causal,is_causal_true
0,10,0,0,1.0
1,10,1,0,0.0
2,10,2,0,1.0
3,10,3,0,0.0
4,10,4,0,0.0
...,...,...,...,...
495,59,5,0,0.0
496,59,6,0,0.0
497,59,7,0,0.0
498,59,8,0,0.0
