# Results Notebook

The aim of this notebook is to generate plots to showcase experiment results at `results/`

Following is plotted:
- SID plots of
    - Gradual - V1
        - SID score - Random graphs - Baselines, existing ABAPC, 4 methods with V1 improved ABAPC
        - SID score - BNlearn 3 datasets - Baselines, existing ABAPC, 4 methods with V1 improved ABAPC
        - Runtime - Random graphs - Baselines, existing ABAPC, 4 methods with V1 improved ABAPC
    - Gradual - V2 - Ablation
        - SID scores - Random graphs - ablation plots
        - SID scores - Random graphs - comparison to baselines, existing ABAPC, V2 best (least approx)
        - Runtime - Random graphs - ablation plots
        - Runtime - Random graphs - comparison to baselines, existing ABAPC, V2. May be combined with last plot

In [1]:
import pandas as pd
import numpy as np

import importlib
import utils
importlib.reload(utils)
from utils import plot_runtime, double_bar_chart_plotly, process_model_names_and_runtime_v1_data, process_mean_std_sid_data, DAG_NODES_MAP, DAG_EDGES_MAP

## Small Graphs V1 and V2 Plots

In [3]:
baselines_data_random = pd.read_csv('../../results/existing/random_graphs/all_existing_methods_metrics_cpdag.csv')
baselines_data_random_fgs = pd.read_csv('../../results/existing/random_graphs/fgs/all_existing_methods_metrics_cpdag.csv')
baselines_data_random = pd.concat([baselines_data_random, baselines_data_random_fgs], ignore_index=True)

baselines_data_bn = pd.read_csv('../../results/existing/bnlearn_graphs/all_existing_methods_metrics_cpdag.csv')
baselines_data_bn_fgs = pd.read_csv('../../results/existing/bnlearn_graphs/fgs/all_existing_methods_metrics_cpdag.csv')
baselines_data_bn = pd.concat([baselines_data_bn, baselines_data_bn_fgs], ignore_index=True)

v1_data_random = pd.read_csv('../../results/gradual/v1_random_graphs/cpdag_metrics.csv')
v1_data_bn = pd.read_csv('../../results/gradual/v1_bnlearn_graphs/cpdag_metrics.csv')

fact_sourcing_runtime_random = pd.read_csv('../../results/existing/runtime_sourcing_facts_random_graphs/runtime_results.csv')
fact_sourcing_runtime_bn = pd.read_csv('../../results/existing/runtime_sourcing_facts_bnlearn_graphs/runtime_results.csv')

v2_data_random = pd.read_csv('../../results/gradual/v2_random_graphs_3_to_6_nodes/cpdag_metrics.csv')
v2_data_bn = pd.read_csv('../../results/gradual/v2_bnlearn_graphs_up_to_6_nodes/cpdag_metrics.csv')


In [4]:
v2_data_random.columns

Index(['nnz', 'fdr', 'tpr', 'fpr', 'precision', 'recall', 'F1', 'shd',
       'sid_low', 'sid_high', 'dataset', 'seed', 'n_nodes', 'n_edges',
       'neighbourhood_n_nodes', 'max_cycle_length', 'max_ct_depth',
       'max_path_length', 'max_c_set_size', 'search_depth',
       'elapsed_bsaf_creation', 'elapsed_model_solution', 'is_converged',
       'fact_ranking_method', 'model_ranking_method', 'num_edges_est',
       'best_model', 'aba_elapsed', 'ranking_elapsed', 'best_I'],
      dtype='object')

In [5]:
v2_data_random['model'] = 'V2'
v2_data_random['elapsed'] = (
    v2_data_random['elapsed_bsaf_creation'] +
    v2_data_random['elapsed_model_solution'] + 
    v2_data_random['aba_elapsed'] +
    v2_data_random['ranking_elapsed']
)

v2_data_bn['model'] = 'V2'
v2_data_bn['elapsed'] = (
    v2_data_bn['elapsed_bsaf_creation'] +
    v2_data_bn['elapsed_model_solution'] + 
    v2_data_bn['aba_elapsed'] +
    v2_data_bn['ranking_elapsed']
)

In [7]:

v1_data_random['dataset'] = '|V|=' + v1_data_random['n_nodes'].astype(str) + ', |E|=' + v1_data_random['n_edges'].astype(str)
baselines_data_random['dataset'] = '|V|=' + baselines_data_random['n_nodes'].astype(str) + ', |E|=' + baselines_data_random['n_edges'].astype(str)
v2_data_random['dataset'] = '|V|=' + v2_data_random['n_nodes'].astype(str) + ', |E|=' + v2_data_random['n_edges'].astype(str)

In [8]:
baselines_random_processed = process_mean_std_sid_data(baselines_data_random)
v1_data_with_model_names = process_model_names_and_runtime_v1_data(v1_data_random, fact_sourcing_runtime_random)
v1_random_processed = process_mean_std_sid_data(v1_data_with_model_names)
v2_data_random_processed = process_mean_std_sid_data(v2_data_random)

v1_random_plot_data = pd.concat([v1_random_processed, 
                                 baselines_random_processed[baselines_random_processed['n_nodes'] <= 6],
                                 v2_data_random_processed], 
                                 ignore_index=True)

In [9]:
v1_random_plot_data.head(1)

Unnamed: 0,dataset,n_nodes,n_edges,model,sid_low_mean,sid_high_mean,sid_low_std,sid_high_std,p_SID_low_mean,p_SID_high_mean,p_SID_low_std,p_SID_high_std
0,"|V|=3, |E|=3",3,3,ABAPC (Original),1.66,5.32,2.105484,1.316148,0.553333,1.773333,0.701828,0.438716


In [13]:
methods = [
    'Random',
    'FGS',
    'NOTEARS-MLP',
    'MPC',
    'ABAPC (Original)',
    'V1.1 Refined Fact Ranking',
    'V1.2 Model Selection by Refined Fact Strengths',
    'V1.3 Model Selection by Arrows Sum',
    'V1.4 Model Selection by Arrows Mean',
    'V2',
]
names_dict = {
    'Random': 'Random',
    'FGS': 'FGS',
    'NOTEARS-MLP': 'NOTEARS-MLP',
    'MPC': 'MPC',
    'ABAPC (Original)': 'ABAPC (Original)',
    'V1.1 Refined Fact Ranking': 'V1.1',
    'V1.2 Model Selection by Refined Fact Strengths': 'V1.2',
    'V1.3 Model Selection by Arrows Sum': 'V1.3',
    'V1.4 Model Selection by Arrows Mean': 'V1.4 ',
    'V2': 'V2',
}
colors_dict = {
        'Random': 'grey',
        'FGS': '#b85c00',
        'NOTEARS-MLP': '#9454c4',
        'MPC': '#379f9f',
        'ABAPC (Original)': '#0085CA',
        'V1.1 Refined Fact Ranking': "#9F8D00",
        'V1.2 Model Selection by Refined Fact Strengths': "#9E0000",
        'V1.3 Model Selection by Arrows Sum': "#ff34da",
        'V1.4 Model Selection by Arrows Mean': "#008100",
        'V2': "#ff8c00",
    }

In [14]:
import importlib
import utils
importlib.reload(utils)
from utils import double_bar_chart_plotly

double_bar_chart_plotly(v1_random_plot_data, 
                        names_dict,
                        colors_dict,
                        methods=methods,
                        dist_between_lines=0.1175,
                        intra_dis=0.112,
                        inter_dis=0.137,
                        lin_space=5,
                        nl_space=5,
                        start_pos = 0.03,
                            width=1300,
                            height=600,
                            range_y1=(0, 5),
                            range_y2=(0, 5))

Now bnlearn datasets for V1

In [15]:
v1_data_bn['n_nodes'] = v1_data_bn['dataset'].map(DAG_NODES_MAP)
v1_data_bn['n_edges'] = v1_data_bn['dataset'].map(DAG_EDGES_MAP)
baselines_data_bn['n_nodes'] = baselines_data_bn['dataset'].map(DAG_NODES_MAP)
baselines_data_bn['n_edges'] = baselines_data_bn['dataset'].map(DAG_EDGES_MAP)
fact_sourcing_runtime_bn['n_nodes'] = fact_sourcing_runtime_bn['dataset'].map(DAG_NODES_MAP)
fact_sourcing_runtime_bn['n_edges'] = fact_sourcing_runtime_bn['dataset'].map(DAG_EDGES_MAP)
v2_data_bn['n_nodes'] = v2_data_bn['dataset'].map(DAG_NODES_MAP)
v2_data_bn['n_edges'] = v2_data_bn['dataset'].map(DAG_EDGES_MAP)

baselines_bn_processed = process_mean_std_sid_data(baselines_data_bn)
v1_bn_data_with_model_names = process_model_names_and_runtime_v1_data(v1_data_bn, fact_sourcing_runtime_bn)
v1_bn_processed = process_mean_std_sid_data(v1_bn_data_with_model_names)
v2_data_bn_processed = process_mean_std_sid_data(v2_data_bn)

v1_bn_plot_data = pd.concat([v1_bn_processed, 
                             baselines_bn_processed[baselines_bn_processed['n_nodes'] <= 6],
                             v2_data_bn_processed], 
                             ignore_index=True)

In [16]:
v1_bn_plot_data.head(1)

Unnamed: 0,dataset,n_nodes,n_edges,model,sid_low_mean,sid_high_mean,sid_low_std,sid_high_std,p_SID_low_mean,p_SID_high_mean,p_SID_low_std,p_SID_high_std
0,cancer,5,4,ABAPC (Original),8.62,11.3,2.783992,2.101991,2.155,2.825,0.695998,0.525498


In [17]:
double_bar_chart_plotly(v1_bn_plot_data, 
                        names_dict,
                        colors_dict,
                        methods=methods,
                        dist_between_lines=0.1565,
                        lin_space=6,
                        nl_space=6,
                        intra_dis = 0.161,
                        inter_dis = 0.174,
                        start_pos = 0.04,
                            width=1300,
                            height=600,
                            range_y1=(0, 5),
                            range_y2=(0, 5))

Runtime for V1

In [18]:
methods = [
    # 'Random',
    # 'FGS',
    # 'NOTEARS-MLP',
    # 'MPC',
    'ABAPC (Original)',
    'V1.1 Refined Fact Ranking',
    'V1.2 Model Selection by Refined Fact Strengths',
    'V1.3 Model Selection by Arrows Sum',
    'V1.4 Model Selection by Arrows Mean',
    'V2',
]

symbols_dict = {
    'ABAPC (Original)':'triangle-down-dot',
    'FGS':'triangle-up-dot',
    'NOTEARS-MLP':'pentagon-dot',
    'MPC':'hexagon2-dot',
    'Random':'x', 
    'V1.1 Refined Fact Ranking':'circle-dot',
    'V1.2 Model Selection by Refined Fact Strengths':'diamond-dot',
    'V1.3 Model Selection by Arrows Sum':'square-dot',
    'V1.4 Model Selection by Arrows Mean':'star-dot',
    'V2':'circle-dot'}

In [19]:
v1_data_for_runtime_plot = pd.concat([v1_data_with_model_names, baselines_data_random, v2_data_random], ignore_index=True)

v1_data_for_runtime_plot = v1_data_for_runtime_plot.groupby(['n_nodes', 'n_edges', 'model'], as_index=False).aggregate(
    elapsed_mean=('elapsed', 'mean'),
    elapsed_std=('elapsed', 'std')
)

In [20]:
v1_data_for_runtime_plot = v1_data_for_runtime_plot[v1_data_for_runtime_plot['n_nodes'] <= 6]
plot_runtime(v1_data_for_runtime_plot,
             colors_dict=colors_dict,
             names_dict=names_dict,
             symbols_dict=symbols_dict,
             methods=methods)

## Ablation


In [21]:
v2_random_ablation = pd.read_csv('../../results/gradual/v2_ablation_random_graphs_partial/cpdag_metrics.csv')

In [22]:
v2_random_ablation.columns

Index(['nnz', 'fdr', 'tpr', 'fpr', 'precision', 'recall', 'F1', 'shd',
       'sid_low', 'sid_high', 'dataset', 'seed', 'n_nodes', 'n_edges',
       'neighbourhood_n_nodes', 'max_cycle_length', 'max_ct_depth',
       'max_path_length', 'max_c_set_size', 'search_depth',
       'elapsed_bsaf_creation', 'elapsed_model_solution', 'is_converged',
       'fact_ranking_method', 'model_ranking_method', 'num_edges_est',
       'best_model', 'aba_elapsed', 'ranking_elapsed', 'best_I'],
      dtype='object')

In [37]:
v2_random_ablation['elapsed'] = (
    v2_random_ablation['elapsed_bsaf_creation'] +
    v2_random_ablation['elapsed_model_solution'] + 
    v2_random_ablation['aba_elapsed'] +
    v2_random_ablation['ranking_elapsed']
)
v2_random_ablation['max_ct_depth'] = (v2_random_ablation['max_ct_depth'] > -1)

In [38]:
group_cols = ['neighbourhood_n_nodes', 'max_ct_depth', 'max_c_set_size', 'search_depth', 'n_nodes', 'n_edges']
agg_cols = ['sid_low', 'sid_high', 'elapsed']

In [54]:
agg_df = v2_random_ablation.groupby(group_cols)[agg_cols].agg(['mean', 'std']).reset_index()

# Flatten MultiIndex columns
agg_df.columns = ['_'.join(col).strip('_') for col in agg_df.columns.values]
agg_df['sid_high_mean'] = agg_df['sid_high_mean'] / agg_df['n_edges']
agg_df['sid_low_mean'] = agg_df['sid_low_mean'] / agg_df['n_edges']
agg_df['sid_low_std'] = agg_df['sid_low_std'] / agg_df['n_edges']
agg_df['sid_high_std'] = agg_df['sid_high_std'] / agg_df['n_edges']



In [55]:
import plotly.graph_objects as go

def plot_ablation(df, param_name, metric='elapsed_mean', std_metric='elapsed_std', fixed_params={}):
    # Filter for fixed param values (to isolate ablation)
    for k, v in fixed_params.items():
        df = df[df[k] == v]
    
    # Sort by ablation param
    df = df.sort_values(by=param_name)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=df[param_name],
        y=df[metric],
        mode='lines+markers',
        name='Mean',
        line=dict(color='blue')
    ))

    # Add standard deviation band
    fig.add_trace(go.Scatter(
        x=pd.concat([df[param_name], df[param_name][::-1]]),
        y=pd.concat([df[metric] + df[std_metric], (df[metric] - df[std_metric])[::-1]]),
        fill='toself',
        fillcolor='rgba(0,0,255,0.1)',
        line=dict(color='rgba(255,255,255,0)'),
        hoverinfo="skip",
        showlegend=False
    ))

    fig.update_layout(
        title=f"Ablation of {param_name}",
        xaxis_title=param_name,
        yaxis_title=metric.replace('_', ' ').title(),
        template='plotly_white'
    )
    
    fig.show()


In [62]:
plot_ablation(agg_df, 'max_ct_depth', metric='sid_high_mean', std_metric='sid_high_std',
              fixed_params={'neighbourhood_n_nodes': 5, 'max_c_set_size': 3, 'search_depth': 10, 'n_nodes': 6, 'n_edges': 6})


In [63]:
plot_ablation(agg_df, 'neighbourhood_n_nodes', metric='sid_high_mean', std_metric='sid_high_std',
              fixed_params={'max_ct_depth': True, 'max_c_set_size': 3, 'search_depth': 10, 'n_nodes': 6, 'n_edges': 6})

In [64]:
plot_ablation(agg_df, 'search_depth', metric='sid_high_mean', std_metric='sid_high_std',
              fixed_params={'max_ct_depth': True, 'max_c_set_size': 3, 'neighbourhood_n_nodes': 5, 'n_nodes': 6, 'n_edges': 6})

In [65]:
plot_ablation(agg_df, 'max_ct_depth', metric='sid_low_mean', std_metric='sid_low_std',
              fixed_params={'neighbourhood_n_nodes': 5, 'max_c_set_size': 3, 'search_depth': 10, 'n_nodes': 6, 'n_edges': 6})

In [66]:
plot_ablation(agg_df, 'neighbourhood_n_nodes', metric='sid_low_mean', std_metric='sid_low_std',
              fixed_params={'max_ct_depth': True, 'max_c_set_size': 3, 'search_depth': 10, 'n_nodes': 6, 'n_edges': 6})

In [67]:
plot_ablation(agg_df, 'search_depth', metric='sid_low_mean', std_metric='sid_low_std',
              fixed_params={'max_ct_depth': True, 'max_c_set_size': 3, 'neighbourhood_n_nodes': 5, 'n_nodes': 6, 'n_edges': 6})