In [1]:
import os
import sys
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import pandas as pd
from sklearn.metrics import average_precision_score, accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score, roc_auc_score, precision_recall_curve, roc_curve, auc

sys.path.append('..')
from src.benchmark.var import VAR
from src.benchmark.d2c_wrapper import D2C
from src.benchmark.metrics import make_plots, compute_roc_auc_curves, precision_top_k
from src.descriptors.d2c_past_gen import DescriptorsGenerator

In [15]:
results = {}
pickle_path = os.path.join('..','results', 'predictions')
methods = ['d2c','var','dynotears','granger','pcmci','varlingam']
variables = [3,5,10,20]
noises = [0.01,0.1,0.3,0.5,0.75]

for method in methods:
    for n_variables in variables:
        for noise_std in noises:

            df = pd.read_pickle(os.path.join(pickle_path,method,f'n{n_variables}_s{noise_std}.pkl'))

            num_class_1 = int(df['is_causal'].sum())
            num_class_0 = int(len(df) - num_class_1)

            p10 = precision_top_k(df,top_k=10)
            p50 = precision_top_k(df,top_k=50)
            p100 = precision_top_k(df,top_k=100)
            p500 = precision_top_k(df,top_k=500)
            p1000 = precision_top_k(df,top_k=1000)

            y_test = df['is_causal']
            y_score = df['predicted_proba']
            y_hat = y_score > 0.5

            accuracy = accuracy_score(y_test, y_hat)
            precision = precision_score(y_test, y_hat, zero_division=np.nan)
            recall = recall_score(y_test, y_hat, zero_division=np.nan)
            f1 = f1_score(y_test, y_hat, zero_division=np.nan)
            ber = 1 - balanced_accuracy_score(y_test, y_hat)
            roc_auc = roc_auc_score(y_test, y_score)
            p_auc_10 = roc_auc_score(y_test, y_score, max_fpr=0.1)
            p_auc_25 = roc_auc_score(y_test, y_score, max_fpr=0.25)
            p_auc_50 = roc_auc_score(y_test, y_score, max_fpr=0.50)

            
            precisions, recalls, _ = precision_recall_curve(y_test, y_score)
            pr_auc = auc(recalls, precisions)
            
            ap = average_precision_score(y_test, y_score)

            results[(method,n_variables,noise_std)] = {'n1':num_class_1,
                                                       'n0':num_class_0,
                                                       'p10':p10, 
                                                       'p50':p50, 
                                                       'p100':p100, 
                                                       'p500':p500, 
                                                       'p1000':p1000, 
                                                       'accuracy':accuracy, 
                                                       'precision':precision, 
                                                       'recall':recall, 
                                                       'f1':f1, 
                                                       'ber':ber, 
                                                       'roc_auc':roc_auc, 
                                                       'p_auc_10':p_auc_10,
                                                       'p_auc_25':p_auc_25,
                                                       'p_auc_50':p_auc_50,
                                                       'pr_auc':pr_auc, 
                                                       'ap':ap
                                                       }

results = pd.DataFrame(results,columns=[(method,n_variables,noise_std) for method in methods for n_variables in variables for noise_std in noises]).T
results.index = pd.MultiIndex.from_tuples(results.index, names=['method','n_variables','noise_std'])
       
    

In [17]:
results.n1 = results.n1.astype(int)
results.n0 = results.n0.astype(int)

In [18]:
results.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n1,n0,p10,p50,p100,p500,p1000,accuracy,precision,recall,f1,ber,roc_auc,p_auc_10,p_auc_25,p_auc_50,pr_auc,ap
method,n_variables,noise_std,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
d2c,3,0.01,8012,14608,0.8,0.72,0.68,0.59,0.606,0.659063,0.524382,0.402646,0.455521,0.398828,0.655367,0.540754,0.580499,0.620096,0.495911,0.48561
d2c,3,0.1,8012,14608,0.7,0.66,0.65,0.65,0.672,0.682007,0.563934,0.450824,0.501075,0.370186,0.698744,0.55819,0.608742,0.659284,0.542821,0.530729
d2c,3,0.3,8012,14608,0.9,0.94,0.88,0.818,0.781,0.699912,0.608013,0.42998,0.503729,0.36103,0.716398,0.581242,0.632962,0.679434,0.588592,0.571627
d2c,3,0.5,8016,14564,0.9,0.94,0.84,0.834,0.798,0.704517,0.623303,0.423777,0.50453,0.358593,0.706962,0.58896,0.63882,0.680221,0.593705,0.576764
d2c,3,0.75,8029,14704,1.0,0.88,0.84,0.816,0.786,0.704878,0.620175,0.424212,0.503809,0.358827,0.696042,0.585215,0.636419,0.674744,0.582773,0.565412


In [20]:
results.to_csv(os.path.join('..','results','metrics','metrics.csv'))

Computing metrics per each generative process

In [2]:
GRAPHS_PER_PROCESS = 50
results = {}
pickle_path = os.path.join('..','results', 'predictions')
methods = ['d2c','var','dynotears','granger','pcmci','varlingam']
variables = [3,5,10,20]
noises = [0.01,0.1,0.3,0.5,0.75]

for method in methods:
    for n_variables in variables:
        for noise_std in noises:

            df = pd.read_pickle(os.path.join(pickle_path,method,f'n{n_variables}_s{noise_std}.pkl'))

            df['generative_process'] = df['graph_id'] // GRAPHS_PER_PROCESS

            for process in df['generative_process'].unique():
                df_process = df[df['generative_process'] == process]
                
                num_class_1 = int(df_process['is_causal'].sum())
                num_class_0 = int(len(df_process) - num_class_1)

                p10 = precision_top_k(df_process,top_k=10)
                p50 = precision_top_k(df_process,top_k=50)
                p100 = precision_top_k(df_process,top_k=100)
                p500 = precision_top_k(df_process,top_k=500)
                p1000 = precision_top_k(df_process,top_k=1000)

                y_test = df_process['is_causal']
                y_score = df_process['predicted_proba']
                y_hat = y_score > 0.5

                accuracy = accuracy_score(y_test, y_hat)
                precision = precision_score(y_test, y_hat, zero_division=np.nan)
                recall = recall_score(y_test, y_hat, zero_division=np.nan)
                f1 = f1_score(y_test, y_hat, zero_division=np.nan)
                ber = 1 - balanced_accuracy_score(y_test, y_hat)
                roc_auc = roc_auc_score(y_test, y_score)
                p_auc_10 = roc_auc_score(y_test, y_score, max_fpr=0.1)
                p_auc_25 = roc_auc_score(y_test, y_score, max_fpr=0.25)
                p_auc_50 = roc_auc_score(y_test, y_score, max_fpr=0.50)

                
                precisions, recalls, _ = precision_recall_curve(y_test, y_score)
                pr_auc = auc(recalls, precisions)
                
                ap = average_precision_score(y_test, y_score)

                results[(process,method,n_variables,noise_std)] = {'n1':num_class_1,
                                                                    'n0':num_class_0,
                                                                    'p10':p10, 
                                                                    'p50':p50, 
                                                                    'p100':p100, 
                                                                    'p500':p500, 
                                                                    'p1000':p1000, 
                                                                    'accuracy':accuracy, 
                                                                    'precision':precision, 
                                                                    'recall':recall, 
                                                                    'f1':f1, 
                                                                    'ber':ber, 
                                                                    'roc_auc':roc_auc, 
                                                                    'p_auc_10':p_auc_10,
                                                                    'p_auc_25':p_auc_25,
                                                                    'p_auc_50':p_auc_50,
                                                                    'pr_auc':pr_auc, 
                                                                    'ap':ap
                                                                    }

results = pd.DataFrame(results).T
results.index = pd.MultiIndex.from_tuples(results.index, names=['process','method','n_variables','noise_std'])
       
    

In [3]:
results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,n1,n0,p10,p50,p100,p500,p1000,accuracy,precision,recall,f1,ber,roc_auc,p_auc_10,p_auc_25,p_auc_50,pr_auc,ap
process,method,n_variables,noise_std,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,d2c,3,0.01,404.0,756.0,0.6,0.40,0.47,0.498,0.402,0.655172,0.510638,0.237624,0.324324,0.442035,0.718645,0.517845,0.567244,0.647921,0.499011,0.494824
1,d2c,3,0.01,592.0,673.0,0.7,0.68,0.67,0.650,0.528,0.616601,0.674267,0.349662,0.460512,0.399463,0.688178,0.532096,0.587235,0.648426,0.623518,0.615028
2,d2c,3,0.01,568.0,667.0,0.3,0.24,0.42,0.534,0.508,0.523887,0.403846,0.073944,0.125000,0.509505,0.586427,0.489787,0.501192,0.540149,0.494152,0.500014
3,d2c,3,0.01,384.0,758.0,0.5,0.46,0.46,0.390,0.339,0.590193,0.384615,0.364583,0.374332,0.465466,0.557127,0.518236,0.531095,0.546814,0.394020,0.386397
4,d2c,3,0.01,289.0,849.0,1.0,0.90,0.88,0.464,0.283,0.801406,0.612100,0.595156,0.603509,0.266615,0.820587,0.680855,0.746144,0.795723,0.675102,0.648439
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15,varlingam,20,0.75,310.0,1000.0,0.2,0.20,0.21,0.232,0.232,0.763359,,0.000000,,0.500000,0.481400,0.497233,0.497519,0.495940,0.229694,0.232105
16,varlingam,20,0.75,521.0,1000.0,0.7,0.34,0.26,0.310,0.327,0.657462,,0.000000,,0.500000,0.469497,0.496616,0.489084,0.477589,0.331509,0.332804
17,varlingam,20,0.75,288.0,1000.0,1.0,1.00,1.00,0.572,0.288,0.851708,1.000000,0.336806,0.503896,0.331597,0.996764,0.988925,0.992762,0.995685,0.992466,0.992475
18,varlingam,20,0.75,506.0,1000.0,1.0,1.00,1.00,0.576,0.401,0.666667,1.000000,0.007905,0.015686,0.496047,0.722506,0.675921,0.697247,0.714640,0.679203,0.679453


In [None]:
results.to_csv(os.path.join('..','results','metrics','metrics_per_process.csv'))