# Results Notebook for Large Bnlearn Graphs

In [1]:
import pandas as pd
import numpy as np

import importlib
import utils
importlib.reload(utils)
from utils import plot_runtime, double_bar_chart_plotly, process_model_names_and_runtime_v1_data, process_mean_std_sid_data, DAG_NODES_MAP, DAG_EDGES_MAP

In [2]:
import glob

# Get all cpdag_metrics.csv files in the specified folder
csv_files = glob.glob('../../results/gradual/v2_run_for_bnlearn/*/*/cpdag_metrics.csv')

# Concatenate all the csv files into a single DataFrame
v2_data_bn = pd.concat([pd.read_csv(f) for f in csv_files], ignore_index=True)

v2_data_bn.loc[0].T

nnz                                                                      12
fdr                                                                  0.8333
tpr                                                                    0.25
fpr                                                                     0.5
precision                                                            0.1667
recall                                                                 0.25
F1                                                                      0.2
shd                                                                    16.0
sid_low                                                                15.0
sid_high                                                               15.0
dataset                                                                asia
seed                                                                   5864
n_nodes                                                                   8
n_edges     

In [3]:
baselines_data_bn = pd.read_csv('../../results/existing/bnlearn_graphs/all_existing_methods_metrics_cpdag.csv')

In [4]:
baselines_data_bn.dataset.unique()

array(['cancer', 'earthquake', 'survey', 'asia', 'sachs', 'child',
       'insurance'], dtype=object)

In [5]:
baselines_data_bn = pd.read_csv('../../results/existing/bnlearn_graphs/all_existing_methods_metrics_cpdag.csv')
baselines_data_bn_fgs = pd.read_csv('../../results/existing/bnlearn_graphs/fgs/all_existing_methods_metrics_cpdag.csv')
baselines_data_bn = pd.concat([baselines_data_bn, baselines_data_bn_fgs], ignore_index=True)
baselines_data_bn.loc[baselines_data_bn['model'] == 'ABAPC (Ours)', 'model'] = 'ABAPC (Original)'
baselines_data_bn['n_nodes'] = baselines_data_bn['dataset'].map(DAG_NODES_MAP)
baselines_data_bn['n_edges'] = baselines_data_bn['dataset'].map(DAG_EDGES_MAP)

In [6]:
v2_data_bn['model'] = 'V2'
v2_data_bn['elapsed'] = (
    v2_data_bn['elapsed_bsaf_creation'] +
    v2_data_bn['elapsed_model_solution'] + 
    v2_data_bn['aba_elapsed'] +
    v2_data_bn['ranking_elapsed']
)

In [7]:
methods = [
    'Random',
    'FGS',
    'NOTEARS-MLP',
    'MPC',
    'ABAPC (Original)',
    'V2',
]
names_dict = {
    'Random': 'Random',
    'FGS': 'FGS',
    'NOTEARS-MLP': 'NOTEARS-MLP',
    'MPC': 'MPC',
    'ABAPC (Original)': 'ABAPC (Original)',
    'V2': 'V2',
}
colors_dict = {
        'Random': 'grey',
        'FGS': '#b85c00',
        'NOTEARS-MLP': '#9454c4',
        'MPC': '#379f9f',
        'ABAPC (Original)': '#0085CA',
        'V2': "#ff8c00",
    }

In [8]:
baselines_bn_processed = process_mean_std_sid_data(baselines_data_bn)
v2_data_bn_processed = process_mean_std_sid_data(v2_data_bn)


In [9]:

plot_data = pd.concat([baselines_bn_processed[baselines_bn_processed['dataset'].isin(['asia', 'sachs', 'child'])],
                       v2_data_bn_processed], 
                       ignore_index=True)

plot_data = pd.concat([
    plot_data[plot_data['dataset']=='asia'],
    plot_data[plot_data['dataset']=='sachs'],
    plot_data[plot_data['dataset']=='child'],
])

abapc_asia_data = {
    'dataset': ['asia'],
    'model': ['ABAPC (Original)'],
    'n_nodes': [8],
    'n_edges': [8],
    'sid_low_mean': [11.72],
    'sid_high_mean': [33.52],
    'sid_low_std': [6.79],
    'sid_high_std': [7.92],
    'precision_mean': [0.49],
    'precision_std': [0.18],
    'recall_mean': [0.51],
    'recall_std': [0.18],
    'f1_mean': [0.5],
    'f1_std': [0.18],
    'shd_mean': [5.24],
    'shd_std': [2.26],
    'n_shd_mean': [0.655],
    'n_shd_std': [0.2825],
    'nnz_mean': [8.44],
    'nnz_std': [0.84],
    'n_sid_low_mean': [1.465],
    'n_sid_high_mean': [4.19],
    'n_sid_low_std': [0.84875],
    'n_sid_high_std': [0.99],
}
abapc_asia_data = pd.DataFrame(abapc_asia_data)

plot_data = pd.concat([plot_data, abapc_asia_data], ignore_index=True)
plot_data['dataset'] = plot_data['dataset'] + '<br>' + '|V|=' + plot_data['n_nodes'].astype(str) + ', |E|=' + plot_data['n_edges'].astype(str)

In [10]:
fig = double_bar_chart_plotly(plot_data, 
                        names_dict,
                        colors_dict,
                        vars_to_plot=['n_sid_low', 'n_sid_high'],
                        names=['Best', 'Worst'],
                        labels=['Normalised SID = SID / Number of Edges in DAG', ''],
                        methods=methods,
                        dist_between_lines=0.1565,
                        lin_space=6,
                        nl_space=6,
                        intra_dis = 0.161,
                        inter_dis = 0.174,
                        start_pos = 0.04,
                            width=1300,
                            height=600,
                            range_y1=(0, 16),
                            range_y2=(0, 16))

fig.write_image('v2-sid-large.png', scale=3, width=1300, height=600)