In [1]:
import os
import sys
import numpy as np 
import matplotlib.pyplot as plt
import pickle
from sklearn.metrics import roc_curve, auc


sys.path.append('..')

from src.benchmark.d2c_wrapper import D2C
from src.benchmark.dynotears import DYNOTEARS
from src.benchmark.granger import Granger
from src.benchmark.pcmci import PCMCI
from src.benchmark.var import VAR
from src.benchmark.varlingam import VARLiNGAM

from src.benchmark.metrics import make_plots, compute_roc_auc_curves
from src.descriptors.d2c_past_gen import DescriptorsGenerator

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Configuration
series_time_lag = 3
timesteps_per_series = 250
n_gen_proc = 20
n_series_per_generative_process = 50
n_jobs = 50
seed = 42 

In [3]:
import pandas as pd 

def from_preds_to_df(preds):
    dfs = []
    for key in preds.keys():
        df = pd.DataFrame(preds[key][0])
        df['predicted_proba'] = preds[key][1]
        df['graph_id'] = key
        dfs.append(df)
    return pd.concat(dfs)

In [5]:
from tqdm import tqdm
# If methods have not been evaluated yet

for n_variables in [5,10,20]:
    for noise_std in tqdm([0.01,0.1,0.3]):
        
        output_folder = f'data_N{n_variables}_std{noise_std}/'
        data_path = os.path.join('..','data', 'synthetic', output_folder)
        pickle_path = os.path.join('..','results', 'predictions')

        os.makedirs(pickle_path, exist_ok=True)
        
        descr_gen = DescriptorsGenerator(data_path = data_path, n_jobs=n_jobs, mutual_information_proxy='Ridge')    
        data = descr_gen.get_observations()
        ground_truth = descr_gen.get_causal_dfs()

        d2c = D2C(data, maxlags=series_time_lag, n_jobs=n_jobs, ground_truth=ground_truth,
          descriptors_path=data_path+'descriptors_var.pkl', n_variables=n_variables, suffix='', n_gen_proc=n_gen_proc)

        var = VAR(data, maxlags=series_time_lag, n_jobs=n_jobs, ground_truth=ground_truth)

        granger = Granger(data, maxlags=series_time_lag, n_jobs=n_jobs, ground_truth=ground_truth)
        pcmci  =PCMCI(data, maxlags=series_time_lag, n_jobs=n_jobs, ground_truth=ground_truth)
        varlingam = VARLiNGAM(data, maxlags=series_time_lag, n_jobs=n_jobs, ground_truth=ground_truth)
        dynotears = DYNOTEARS(data, maxlags=series_time_lag, n_jobs=n_jobs, ground_truth=ground_truth)

        #ETA 3m27s
        d2c.run()

        #ETA 23s
        var.run()

        pcmci.run()

        granger.run()

        varlingam.run()

        dynotears.run()

        causal_dfs = d2c.get_causal_dfs()
        var.filter_causal_dfs(causal_dfs)
        granger.filter_causal_dfs(causal_dfs)
        pcmci.filter_causal_dfs(causal_dfs)
        varlingam.filter_causal_dfs(causal_dfs)
        dynotears.filter_causal_dfs(causal_dfs)

        d2cpreds = d2c.get_predictions()
        varpreds = var.get_predictions()
        grangerpreds = granger.get_predictions()
        pcmcipreds = pcmci.get_predictions()
        varlingampreds = varlingam.get_predictions()
        dynotearspreds = dynotears.get_predictions()

        d2c_df = from_preds_to_df(d2cpreds)
        var_df = from_preds_to_df(varpreds)
        granger_df = from_preds_to_df(grangerpreds)
        pcmci_df = from_preds_to_df(pcmcipreds)
        varlingam_df = from_preds_to_df(varlingampreds)
        dynotears_df = from_preds_to_df(dynotearspreds)


        d2c_dir = os.path.join(pickle_path,'d2c')
        os.makedirs(d2c_dir, exist_ok=True)
        var_dir = os.path.join(pickle_path,'var')
        os.makedirs(var_dir, exist_ok=True)
        granger_dir = os.path.join(pickle_path,'granger')
        os.makedirs(granger_dir, exist_ok=True)
        pcmci_dir = os.path.join(pickle_path,'pcmci')
        os.makedirs(pcmci_dir, exist_ok=True)
        varlingam_dir = os.path.join(pickle_path,'varlingam')
        os.makedirs(varlingam_dir, exist_ok=True)
        dynotears_dir = os.path.join(pickle_path,'dynotears')
        os.makedirs(dynotears_dir, exist_ok=True)


        d2c_df.to_pickle(os.path.join(d2c_dir,f'n{n_variables}_s{noise_std}.pkl'))
        var_df.to_pickle(os.path.join(var_dir,f'n{n_variables}_s{noise_std}.pkl'))
        granger_df.to_pickle(os.path.join(granger_dir,f'n{n_variables}_s{noise_std}.pkl'))
        pcmci_df.to_pickle(os.path.join(pcmci_dir,f'n{n_variables}_s{noise_std}.pkl'))
        varlingam_df.to_pickle(os.path.join(varlingam_dir,f'n{n_variables}_s{noise_std}.pkl'))
        dynotears_df.to_pickle(os.path.join(dynotears_dir,f'n{n_variables}_s{noise_std}.pkl'))



  0%|          | 0/3 [00:00<?, ?it/s]

100%|██████████| 3/3 [20:46<00:00, 415.64s/it]
100%|██████████| 3/3 [34:24<00:00, 688.28s/it]
100%|██████████| 3/3 [1:42:06<00:00, 2042.00s/it]
