# Results Notebook

The aim of this notebook is to generate plots to showcase experiment results at `results/`

Following is plotted:
- SID plots of
    - Gradual - V1
        - SID score - Random graphs - Baselines, existing ABAPC, 4 methods with V1 improved ABAPC
        - SID score - BNlearn 3 datasets - Baselines, existing ABAPC, 4 methods with V1 improved ABAPC
        - Runtime - Random graphs - Baselines, existing ABAPC, 4 methods with V1 improved ABAPC
    - Gradual - V2 - Ablation
        - SID scores - Random graphs - ablation plots
        - SID scores - Random graphs - comparison to baselines, existing ABAPC, V2 best (least approx)
        - Runtime - Random graphs - ablation plots
        - Runtime - Random graphs - comparison to baselines, existing ABAPC, V2. May be combined with last plot

In [1]:
import pandas as pd
import numpy as np

import importlib
import utils
importlib.reload(utils)
from utils import plot_runtime, double_bar_chart_plotly, process_model_names_and_runtime_v1_data, process_mean_std_sid_data, DAG_NODES_MAP, DAG_EDGES_MAP

## Small Graphs V1 and V2 Plots

In [2]:
import zipfile

with zipfile.ZipFile('../../results/existing/runtime_sourcing_facts_random_graphs/runtime_results.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('../../results/existing/runtime_sourcing_facts_random_graphs/')

with zipfile.ZipFile('../../results/existing/runtime_sourcing_facts_bnlearn_graphs/runtime_results.csv.zip', 'r') as zip_ref:
    zip_ref.extractall('../../results/existing/runtime_sourcing_facts_bnlearn_graphs/')

In [3]:
baselines_data_random = pd.read_csv('../../results/existing/random_graphs/all_existing_methods_metrics_cpdag.csv')
baselines_data_random_fgs = pd.read_csv('../../results/existing/random_graphs/fgs/all_existing_methods_metrics_cpdag.csv')
baselines_data_random = pd.concat([baselines_data_random, baselines_data_random_fgs], ignore_index=True)

baselines_data_bn = pd.read_csv('../../results/existing/bnlearn_graphs/all_existing_methods_metrics_cpdag.csv')
baselines_data_bn_fgs = pd.read_csv('../../results/existing/bnlearn_graphs/fgs/all_existing_methods_metrics_cpdag.csv')
baselines_data_bn = pd.concat([baselines_data_bn, baselines_data_bn_fgs], ignore_index=True)

v1_data_random = pd.read_csv('../../results/gradual/v1_random_graphs/cpdag_metrics.csv')
v1_data_bn = pd.read_csv('../../results/gradual/v1_bnlearn_graphs/cpdag_metrics.csv')

fact_sourcing_runtime_random = pd.read_csv('../../results/existing/runtime_sourcing_facts_random_graphs/runtime_results.csv')
fact_sourcing_runtime_bn = pd.read_csv('../../results/existing/runtime_sourcing_facts_bnlearn_graphs/runtime_results.csv')

v2_data_random = pd.read_csv('../../results/gradual/v2_random_graphs_3_to_6_nodes/cpdag_metrics.csv')
v2_data_bn = pd.read_csv('../../results/gradual/v2_bnlearn_graphs_up_to_6_nodes/cpdag_metrics.csv')


In [4]:
v2_data_random.columns

Index(['nnz', 'fdr', 'tpr', 'fpr', 'precision', 'recall', 'F1', 'shd',
       'sid_low', 'sid_high', 'dataset', 'seed', 'n_nodes', 'n_edges',
       'neighbourhood_n_nodes', 'max_cycle_length', 'max_ct_depth',
       'max_path_length', 'max_c_set_size', 'search_depth',
       'elapsed_bsaf_creation', 'elapsed_model_solution', 'is_converged',
       'fact_ranking_method', 'model_ranking_method', 'num_edges_est',
       'best_model', 'aba_elapsed', 'ranking_elapsed', 'best_I'],
      dtype='object')

In [5]:
v2_data_random['model'] = 'V2'
v2_data_random['elapsed'] = (
    v2_data_random['elapsed_bsaf_creation'] +
    v2_data_random['elapsed_model_solution'] + 
    v2_data_random['aba_elapsed'] +
    v2_data_random['ranking_elapsed']
)

v2_data_bn['model'] = 'V2'
v2_data_bn['elapsed'] = (
    v2_data_bn['elapsed_bsaf_creation'] +
    v2_data_bn['elapsed_model_solution'] + 
    v2_data_bn['aba_elapsed'] +
    v2_data_bn['ranking_elapsed']
)

In [6]:

v1_data_random['dataset'] = '|V|=' + v1_data_random['n_nodes'].astype(str) + ', |E|=' + v1_data_random['n_edges'].astype(str)
baselines_data_random['dataset'] = '|V|=' + baselines_data_random['n_nodes'].astype(str) + ', |E|=' + baselines_data_random['n_edges'].astype(str)
v2_data_random['dataset'] = '|V|=' + v2_data_random['n_nodes'].astype(str) + ', |E|=' + v2_data_random['n_edges'].astype(str)

In [7]:
baselines_random_processed = process_mean_std_sid_data(baselines_data_random)
v1_data_with_model_names = process_model_names_and_runtime_v1_data(v1_data_random, fact_sourcing_runtime_random)
v1_random_processed = process_mean_std_sid_data(v1_data_with_model_names)
v2_data_random_processed = process_mean_std_sid_data(v2_data_random)

v1_random_plot_data = pd.concat([v1_random_processed, 
                                 baselines_random_processed[baselines_random_processed['n_nodes'] <= 6],
                                 v2_data_random_processed], 
                                 ignore_index=True)

In [8]:
v1_random_plot_data.head(1)

Unnamed: 0,dataset,n_nodes,n_edges,model,sid_low_mean,sid_high_mean,sid_low_std,sid_high_std,precision_mean,precision_std,...,shd_mean,shd_std,nnz_mean,nnz_std,n_sid_low_mean,n_sid_high_mean,n_sid_low_std,n_sid_high_std,n_shd_mean,n_shd_std
0,"|V|=3, |E|=3",3,3,ABAPC (Original),1.66,5.32,2.105484,1.316148,0.489998,0.347347,...,0.96,1.105829,2.44,0.577115,0.553333,1.773333,0.701828,0.438716,0.32,0.36861


In [9]:
v1_random_plot_data['model'] = v1_random_plot_data['model'].map({
    'Random': 'Random',
    'FGS': 'FGS',
    'NOTEARS-MLP': 'NOTEARS-MLP',
    'MPC': 'MPC',
    'ABAPC (Original)': 'Causal ABA (Original)',
    'V1.1 Refined Fact Ranking': 'Causal ABA (Refined Fact Ranking)',
    'V1.2 Model Selection by Refined Fact Strengths': 'Causal ABA (Refined Model Ranking)',
    'V1.3 Model Selection by Arrows Sum': 'Causal ABA (Arrows Sum Model Ranking)',
    'V1.4 Model Selection by Arrows Mean': 'Causal ABA (Arrows Mean Model Ranking)',
    'V2': 'Gradual Causal ABA (Ours)',
})

In [10]:
methods = [
    'Random',
    'FGS',
    'NOTEARS-MLP',
    'MPC',
    'Causal ABA (Original)',
    'Causal ABA (Refined Fact Ranking)',
    'Causal ABA (Refined Model Ranking)',
    'Causal ABA (Arrows Sum Model Ranking)',
    'Causal ABA (Arrows Mean Model Ranking)',
    'Gradual Causal ABA (Ours)',
]
names_dict = {
    m:m for m in methods
}
colors_dict = {
        'Random': 'grey',
        'FGS': '#b85c00',
        'NOTEARS-MLP': '#9454c4',
        'MPC': '#379f9f',
        'Causal ABA (Original)': '#0085CA',
        'Causal ABA (Refined Fact Ranking)': "#9F8D00",
        'Causal ABA (Refined Model Ranking)': "#9E0000",
        'Causal ABA (Arrows Sum Model Ranking)': "#ff34da",
        'Causal ABA (Arrows Mean Model Ranking)': "#008100",
        'Gradual Causal ABA (Ours)': "#ff8c00",
    }

In [11]:
import importlib
import utils
importlib.reload(utils)
from utils import double_bar_chart_plotly


fig = double_bar_chart_plotly(v1_random_plot_data[v1_random_plot_data['n_nodes'] <= 6], 
                        names_dict,
                        colors_dict,
                        vars_to_plot=['n_sid_low', 'n_sid_high'],
                        names=['Best', 'Worst'],
                        labels=['Normalised SID', ''],
                        methods=[
                                'Random',
                                'FGS',
                                'NOTEARS-MLP',
                                'MPC',
                                'Causal ABA (Original)',
                                # 'Causal ABA (Refined Fact Ranking)',
                                # 'Causal ABA (Refined Model Ranking)',
                                # 'Causal ABA (Arrows Sum Model Ranking)',
                                'Causal ABA (Arrows Mean Model Ranking)',
                                'Gradual Causal ABA (Ours)',
                            ],
                        dist_between_lines=0.1175,
                        intra_dis=0.112,
                        inter_dis=0.137,
                        lin_space=5,
                        nl_space=5,
                        start_pos = 0.03,
                        width=1300,
                        height=600,
                        range_y1=(0, 5.5),
                        range_y2=(0, 5.5),
                        annot_y=0.9)
fig.write_image("./v1_v2_random_graphs_sid.png", scale=3)

In [12]:
fig = double_bar_chart_plotly(v1_random_plot_data[v1_random_plot_data['n_nodes'] <= 6], 
                        names_dict,
                        colors_dict,
                        vars_to_plot=['n_sid_low', 'n_sid_high'],
                        names=['Best', 'Worst'],
                        labels=['Normalised SID', ''],
                        methods=methods,
                        dist_between_lines=0.1175,
                        intra_dis=0.112,
                        inter_dis=0.137,
                        lin_space=5,
                        nl_space=5,
                        start_pos = 0.03,
                        width=1300,
                        height=600,
                        range_y1=(0, 6),
                        range_y2=(0, 6),
                        annot_y=0.85)
fig.write_image("./v1_v2_random_graphs_sid_all.png", scale=3)

In [13]:
# fig = double_bar_chart_plotly(v1_random_plot_data, 
#                         names_dict,
#                         colors_dict,
#                         vars_to_plot=['precision', 'recall'],
#                         names=['Precision', 'Recall'],
#                         labels=['Precision', 'Recall'],
#                         methods=methods,
#                         dist_between_lines=0.1175,
#                         intra_dis=0.112,
#                         inter_dis=0.137,
#                         lin_space=5,
#                         nl_space=5,
#                         start_pos = 0.03,
#                             width=1300,
#                             height=600,
#                             range_y1=(0, 1),
#                             range_y2=(0, 1))

In [14]:
# fig = double_bar_chart_plotly(v1_random_plot_data[v1_random_plot_data['n_nodes'] <= 6], 
#                         names_dict,
#                         colors_dict,
#                         vars_to_plot=['f1', 'n_shd'],
#                         names=['f1', 'n_shd'],
#                         labels=['f1', 'n_shd'],
#                         methods=methods,
#                         dist_between_lines=0.1175,
#                         intra_dis=0.112,
#                         inter_dis=0.137,
#                         lin_space=5,
#                         nl_space=5,
#                         start_pos = 0.03,
#                             width=1300,
#                             height=600,
#                             range_y1=(0, 1.1),
#                             range_y2=(0, 2))

Now bnlearn datasets for V1

In [15]:
v1_data_bn['n_nodes'] = v1_data_bn['dataset'].map(DAG_NODES_MAP)
v1_data_bn['n_edges'] = v1_data_bn['dataset'].map(DAG_EDGES_MAP)
baselines_data_bn['n_nodes'] = baselines_data_bn['dataset'].map(DAG_NODES_MAP)
baselines_data_bn['n_edges'] = baselines_data_bn['dataset'].map(DAG_EDGES_MAP)
fact_sourcing_runtime_bn['n_nodes'] = fact_sourcing_runtime_bn['dataset'].map(DAG_NODES_MAP)
fact_sourcing_runtime_bn['n_edges'] = fact_sourcing_runtime_bn['dataset'].map(DAG_EDGES_MAP)
v2_data_bn['n_nodes'] = v2_data_bn['dataset'].map(DAG_NODES_MAP)
v2_data_bn['n_edges'] = v2_data_bn['dataset'].map(DAG_EDGES_MAP)

baselines_bn_processed = process_mean_std_sid_data(baselines_data_bn)
v1_bn_data_with_model_names = process_model_names_and_runtime_v1_data(v1_data_bn, fact_sourcing_runtime_bn)
v1_bn_processed = process_mean_std_sid_data(v1_bn_data_with_model_names)
v2_data_bn_processed = process_mean_std_sid_data(v2_data_bn)

v1_bn_plot_data = pd.concat([v1_bn_processed, 
                             baselines_bn_processed[baselines_bn_processed['n_nodes'] <= 6],
                             v2_data_bn_processed], 
                             ignore_index=True)

In [16]:

v1_bn_plot_data['dataset'] = v1_bn_plot_data['dataset'].str.upper() + '<br>' + '|V|=' + v1_bn_plot_data['n_nodes'].astype(str) + ', |E|=' + v1_bn_plot_data['n_edges'].astype(str)
v1_bn_plot_data['model'] = v1_bn_plot_data['model'].map({
    'Random': 'Random',
    'FGS': 'FGS',
    'NOTEARS-MLP': 'NOTEARS-MLP',
    'MPC': 'MPC',
    'ABAPC (Original)': 'Causal ABA (Original)',
    'V1.1 Refined Fact Ranking': 'Causal ABA (Refined Fact Ranking)',
    'V1.2 Model Selection by Refined Fact Strengths': 'Causal ABA (Refined Model Ranking)',
    'V1.3 Model Selection by Arrows Sum': 'Causal ABA (Arrows Sum Model Ranking)',
    'V1.4 Model Selection by Arrows Mean': 'Causal ABA (Arrows Mean Model Ranking)',
    'V2': 'Gradual Causal ABA (Ours)',
})

In [17]:
v1_bn_plot_data.head(1)

Unnamed: 0,dataset,n_nodes,n_edges,model,sid_low_mean,sid_high_mean,sid_low_std,sid_high_std,precision_mean,precision_std,...,shd_mean,shd_std,nnz_mean,nnz_std,n_sid_low_mean,n_sid_high_mean,n_sid_low_std,n_sid_high_std,n_shd_mean,n_shd_std
0,"CANCER<br>|V|=5, |E|=4",5,4,Causal ABA (Original),8.62,11.3,2.783992,2.101991,0.481998,0.206719,...,2.18,0.84973,3.8,0.534522,2.155,2.825,0.695998,0.525498,0.545,0.212432


In [18]:
# fig = double_bar_chart_plotly(v1_bn_plot_data, 
#                         names_dict,
#                         colors_dict,
#                         vars_to_plot=['precision', 'recall'],
#                         names=['Precision', 'Recall'],
#                         labels=['Precision', 'Recall'],
#                         methods=methods,
#                         dist_between_lines=0.1565,
#                         lin_space=6,
#                         nl_space=6,
#                         intra_dis = 0.161,
#                         inter_dis = 0.174,
#                         start_pos = 0.04,
#                             width=1300,
#                             height=600,
#                             range_y1=(0, 1),
#                             range_y2=(0, 1),
#                             annot_y=0.85)

In [19]:
# fig = double_bar_chart_plotly(v1_bn_plot_data, 
#                         names_dict,
#                         colors_dict,
#                         vars_to_plot=['f1', 'n_shd'],
#                         names=['f1', 'n_shd'],
#                         labels=['f1', 'n_shd'],
#                         methods=methods,
#                         dist_between_lines=0.1565,
#                         lin_space=6,
#                         nl_space=6,
#                         intra_dis = 0.161,
#                         inter_dis = 0.174,
#                         start_pos = 0.04,
#                             width=1300,
#                             height=600,
#                             range_y1=(0, 1.1),
#                             range_y2=(0, 2))

In [20]:
fig = double_bar_chart_plotly(v1_bn_plot_data, 
                        names_dict,
                        colors_dict,
                        vars_to_plot=['n_sid_low', 'n_sid_high'],
                        names=['Best', 'Worst'],
                        labels=['Normalised SID', ''],
                        methods=methods,
                        dist_between_lines=0.1565,
                        lin_space=6,
                        nl_space=6,
                        intra_dis = 0.161,
                        inter_dis = 0.174,
                        start_pos = 0.04,
                            width=1300,
                            height=600,
                            range_y1=(0, 5.9),
                            range_y2=(0, 5.9),
                            annot_y=0.85)
fig.write_image("v1_v2_sid_bnlearn_all.png", scale=3, width=1300, height=600)

In [21]:
fig = double_bar_chart_plotly(v1_bn_plot_data, 
                        names_dict,
                        colors_dict,
                        vars_to_plot=['n_sid_low', 'n_sid_high'],
                        names=['Best', 'Worst'],
                        labels=['Normalised SID', ''],
                        methods=[
                                'Random',
                                'FGS',
                                'NOTEARS-MLP',
                                'MPC',
                                'Causal ABA (Original)',
                                # 'Causal ABA (Refined Fact Ranking)',
                                # 'Causal ABA (Refined Model Ranking)',
                                # 'Causal ABA (Arrows Sum Model Ranking)',
                                # 'Causal ABA (Arrows Mean Model Ranking)',
                                'Gradual Causal ABA (Ours)',
                            ],
                        dist_between_lines=0.1565,
                        lin_space=6,
                        nl_space=6,
                        intra_dis = 0.161,
                        inter_dis = 0.174,
                        start_pos = 0.04,
                            width=1300,
                            height=600,
                            range_y1=(0, 5.2),
                            range_y2=(0, 5.2))

fig.write_image('v2-sid-small.png', scale=3, width=1300, height=600)

Runtime for V1

In [22]:


symbols_dict = {
    'Causal ABA (Original)':'triangle-down-dot',
    'FGS':'triangle-up-dot',
    'NOTEARS-MLP':'pentagon-dot',
    'MPC':'hexagon2-dot',
    'Random':'x', 
    'Causal ABA (Refined Fact Ranking)':'diamond-dot',
    'Causal ABA (Refined Model Ranking)':'square-dot',
    'Causal ABA (Arrows Sum Model Ranking)':'star-dot',
    'Causal ABA (Arrows Mean Model Ranking)':'square-dot',
    'Gradual Causal ABA (Ours)':'circle-dot'}

In [23]:
v1_data_for_runtime_plot = pd.concat([v1_data_with_model_names, baselines_data_random, v2_data_random], ignore_index=True)
v1_data_for_runtime_plot['model'] = v1_data_for_runtime_plot['model'].map({
    'Random': 'Random',
    'FGS': 'FGS',
    'NOTEARS-MLP': 'NOTEARS-MLP',
    'MPC': 'MPC',
    'ABAPC (Original)': 'Causal ABA (Original)',
    'V1.1 Refined Fact Ranking': 'Causal ABA (Refined Fact Ranking)',
    'V1.2 Model Selection by Refined Fact Strengths': 'Causal ABA (Refined Model Ranking)',
    'V1.3 Model Selection by Arrows Sum': 'Causal ABA (Arrows Sum Model Ranking)',
    'V1.4 Model Selection by Arrows Mean': 'Causal ABA (Arrows Mean Model Ranking)',
    'V2': 'Gradual Causal ABA (Ours)',
})
v1_data_for_runtime_plot = v1_data_for_runtime_plot.groupby(['n_nodes', 'n_edges', 'model'], as_index=False).aggregate(
    elapsed_mean=('elapsed', 'mean'),
    elapsed_std=('elapsed', 'std')
)

In [24]:
importlib.reload(utils)
from utils import plot_runtime

v1_data_for_runtime_plot = v1_data_for_runtime_plot[v1_data_for_runtime_plot['n_nodes'] <= 6]
fig = plot_runtime(v1_data_for_runtime_plot,
             colors_dict={**colors_dict, 'Causal ABA (Original)': 'black'},
             names_dict=names_dict,
             symbols_dict=symbols_dict,
             methods=[
                                # 'Random',
                                # 'FGS',
                                # 'NOTEARS-MLP',
                                # 'MPC',
                                'Causal ABA (Original)',
                                # 'Causal ABA (Refined Fact Ranking)',
                                # 'Causal ABA (Refined Model Ranking)',
                                # 'Causal ABA (Arrows Sum Model Ranking)',
                                'Causal ABA (Arrows Mean Model Ranking)',
                                'Gradual Causal ABA (Ours)',
                            ])
fig.write_image("./random_runtime.png", scale=3)

## Runtime bnlearn cancer

In [25]:
[c for c in v2_data_bn.columns if 'elapsed' in c]

['elapsed_bsaf_creation',
 'elapsed_model_solution',
 'aba_elapsed',
 'ranking_elapsed',
 'elapsed']

In [26]:
v2_cancer_data = v2_data_bn[v2_data_bn['dataset']=='cancer']
print(len(v2_cancer_data))

50


In [27]:
v2_cancer_data['elapsed_bsaf_creation'].describe()

count    50.000000
mean      0.122648
std       0.000000
min       0.122648
25%       0.122648
50%       0.122648
75%       0.122648
max       0.122648
Name: elapsed_bsaf_creation, dtype: float64

In [28]:
v2_cancer_data['elapsed_model_solution'].describe()

count    50.000000
mean      0.600006
std       0.011926
min       0.582255
25%       0.592059
50%       0.598757
75%       0.603672
max       0.636839
Name: elapsed_model_solution, dtype: float64

In [29]:
(v2_cancer_data['elapsed_bsaf_creation'] + v2_cancer_data['elapsed_model_solution']).describe()

count    50.000000
mean      0.722654
std       0.011926
min       0.704903
25%       0.714707
50%       0.721405
75%       0.726320
max       0.759488
dtype: float64