In [None]:
import sys
sys.path.insert(0, '../../')

In [None]:
import pandas as pd
import numpy as np

In [None]:
random_graphs_compare = pd.read_csv('../../results_pure_aba/compare_semantics_random.csv')
random_graphs_compare.head()

In [None]:
bnlearn_compare = pd.read_csv('../../results_pure_aba/compare_semantics_bnlearn.csv')
bnlearn_compare.head()

In [None]:
random_graphs_compare.columns

In [None]:
bnlearn_compare.columns

# Runtime

In [None]:
old_runtimes = pd.read_csv('../../results_pure_aba/compare_abapc_random.csv')
old_runtimes.head()

In [None]:
old_runtimes_grouped = old_runtimes.groupby(['n_nodes', 'n_edges'], as_index=False).agg(
    pure_abapc_mean=('pure_abapc_elapsed', 'mean'),
    pure_abapc_std=('pure_abapc_elapsed', 'std'),
    old_mean=('old_elapsed', 'mean'),
    old_std=('old_elapsed', 'std')
)
old_runtimes_grouped = old_runtimes_grouped[old_runtimes_grouped['n_nodes'] <= 6].copy()

In [None]:
runtime_df = random_graphs_compare.groupby(['n_nodes', 'n_edges'], as_index=False).agg(
    ST_elapsed_mean=('ST_elapsed', 'mean'),
    ST_elapsed_std=('ST_elapsed', 'std'),
    CO_elapsed_mean=('CO_elapsed', 'mean'),
    CO_elapsed_std=('CO_elapsed', 'std'),
    PR_elapsed_mean=('PR_elapsed', 'mean'),
    PR_elapsed_std=('PR_elapsed', 'std'),
)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_runtime_custom(df, df2, plot_width=750, plot_height=300, font_size=20, save_figs=False, output_name="random_graphs_runtime.html"):

    fig = make_subplots(rows=1, cols=1, shared_yaxes=True)
    colors = ['red', 'blue', 'green']
    semantics = ['ST', 'CO', 'PR']
    for color, sem in zip(colors, semantics):
        fig.add_trace(
            go.Scatter(
                x=df['n_nodes'].astype(str),
                y=df[f'{sem}_elapsed_mean'],
                error_y=dict(type='data', array=df[f'{sem}_elapsed_std'], thickness=2),
                mode='lines+markers',
                name=sem,
                line=dict(color=color, width=2),
                marker=dict(symbol='circle', size=8, color=color),
                opacity=0.8,
            )
        )

    # fig.add_trace(
    #     go.Scatter(
    #         x=df2['n_nodes'].astype(str),
    #         y=df2['pure_abapc_mean'],
    #         error_y=dict(type='data', array=df2['pure_abapc_std'], thickness=2),
    #         mode='lines+markers',
    #         name='new implementation',
    #         line=dict(color='black', width=2),
    #         marker=dict(symbol='circle', size=8, color='black'),
    #     )
    # )

    # Method 2: old
    fig.add_trace(
        go.Scatter(
            x=df2['n_nodes'].astype(str),
            y=df2['old_mean'],
            error_y=dict(type='data', array=df2['old_std'], thickness=2),
            mode='lines+markers',
            name='existing implementation',
            line=dict(color='orange', width=2),
            marker=dict(symbol='square', size=8, color='orange'),
            opacity=0.8,
        )
    )

    # Log scale for y-axis
    fig.update_yaxes(type="log", title='log(elapsed time [s])')

    # X axis title
    fig.update_xaxes(title='Number of Nodes (|V|)')

    # Layout and style
    fig.update_layout(
        legend=dict(orientation="h", xanchor="center", x=0.5, yanchor="bottom", y=1.05),
        template='plotly_white',
        width=plot_width,
        height=plot_height,
        margin=dict(l=10, r=10, b=80, t=10),
        font=dict(size=font_size, family="Serif", color="black")
    )

    if save_figs:
        fig.write_html(output_name)
        fig.write_image(output_name.replace('.html', '.jpeg'))

    fig.show()


In [None]:
plot_runtime_custom(runtime_df, old_runtimes_grouped, save_figs=True, output_name='runtime_random_graphs.html')

The preffered' semantics resulted in the least runtime for 6 nodes. 

While it still has greater slope than the existing implementation, it is still a considerable improvement on the stable semantics runtime.

# SID

In [None]:
import json

def read_sid_from_json(string_to_read, sid_type='low'):
    metrics = json.loads(string_to_read)
    sid = metrics['sid']
    if isinstance(sid, list):
        sid_low, sid_high = sid
    elif isinstance(sid, float):
        sid_low = sid
        sid_high = sid
    else:
        raise ValueError("Unexpected format for SID in JSON string.")
    
    if sid_type == 'low':
        return sid_low
    elif sid_type == 'high':
        return sid_high


sem_dfs = []
for sem in ['ST', 'CO', 'PR']:
    sem_df = bnlearn_compare[[f'{sem}_mt_cpdag', 'dataset_name', 'seed']].copy().rename(columns={'dataset_name': 'dataset'})
    sem_df['sid_low'] = sem_df[f'{sem}_mt_cpdag'].apply(lambda x: read_sid_from_json(x, 'low'))
    sem_df['sid_high'] = sem_df[f'{sem}_mt_cpdag'].apply(lambda x: read_sid_from_json(x, 'high'))
    sem_df['model'] = f'ABAPC (New {sem})'
    sem_dfs.append(sem_df[['dataset', 'model', 'seed', 'sid_low', 'sid_high']])

sem_df_combined = pd.concat(sem_dfs, ignore_index=True)

sem_df_combined = sem_df_combined.groupby(['dataset', 'model'], as_index=False).agg(
    sid_low_mean=('sid_low', 'mean'),
    sid_low_std=('sid_low', 'std'),
    sid_high_mean=('sid_high', 'mean'),
    sid_high_std=('sid_high', 'std')
)
sem_df_combined

In [None]:
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.insert(0,'../../')
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 2000)
sys.path.append('../../ArgCausalDisco/utils/')
from ArgCausalDisco.utils.plotting import *
print(sys.path)

cyan = '#00BFFF'
emerald = '#50C878'


save_figs = True
debug = False
datasets = ['cancer', 'earthquake', 'survey', 'asia']
dags_nodes_map = {'asia':8, 'cancer':5, 'earthquake':5, 'sachs':11, 'survey':6, 'alarm':37, 'child':20, 'insurance':27, 'hailfinder':56, 'hepar2':70}
dags_arcs_map = {'asia':8, 'cancer':4, 'earthquake':4, 'sachs':17, 'survey':6, 'alarm':46, 'child':25, 'insurance':52, 'hailfinder':66, 'hepar2':123}
methods = ['Random', 'FGS', 'NOTEARS-MLP', 'MPC', 'ABAPC (Existing)', 'ABAPC (New ST)', 'ABAPC (New CO)', 'ABAPC (New PR)']
names_dict = {'fgs':'FGS', 'nt':'NOTEARS-MLP', 'mpc':'MPC', 'random':'Random', 'abapc':'ABAPC (Existing)', 'ABAPC (New ST)':'ABAPC (New ST)',
              'ABAPC (New CO)':'ABAPC (New CO)', 'ABAPC (New PR)':'ABAPC (New PR)'}
symbols_dict = {'abapc':'triangle-down-dot','fgs':'triangle-up-dot','nt':'pentagon-dot','mpc':'hexagon2-dot', 'random':'x'}  
colors_dict = {'abapc':sec_blue,'fgs':sec_orange,'nt':main_purple,'mpc':main_green,'random':'grey', 'ABAPC (New ST)':'black',
               'ABAPC (New CO)':cyan, 'ABAPC (New PR)':emerald}
version = 'bnlearn_50rep' ## for 5000 samples
# version = 'bnlearn_dag_v5_2000' ## for 2000 samples

version_cpdag = version+'_cpdag'
all_sum = pd.read_csv(f"../../results_pure_aba/stored_results_{version}_cpdag.csv")
all_sum['model'][all_sum['model']=='ABAPC (Ours)'] = 'ABAPC (Existing)'
all_sum['model'][all_sum['model']=='ABAPC (ASPforABA)'] = 'ABAPC (New)'
all_sum = all_sum[['dataset', 'model', 'sid_low_mean', 'sid_low_std', 'sid_high_mean', 'sid_high_std']].copy()
all_sum = pd.concat([all_sum, sem_df_combined], ignore_index=True)


all_sum['n_edges'] = all_sum['dataset'].map(dags_arcs_map)
all_sum['n_nodes'] = all_sum['dataset'].map(dags_nodes_map)
for var in ['SID_low','SID_high']:
    all_sum['p_'+var+'_mean'] = all_sum[var.lower()+'_mean'].astype(float)/all_sum['n_edges'].astype(int)
    all_sum['p_'+var+'_std'] = all_sum[var.lower()+'_std'].astype(float)/all_sum['n_edges'].astype(int)
all_sum['dataset'] = [a.upper() for a in all_sum["dataset"].astype(str)]
all_sum['dataset'] = all_sum['dataset'] +np.repeat("<br> |V|=",len(all_sum))+ all_sum["n_nodes"].astype(str)+np.repeat(", |E|=",len(all_sum))+\
                     all_sum["n_edges"].astype(str)


all_sum.head()

In [None]:
double_bar_chart_plotly(all_sum, ['p_SID_low','p_SID_high'], names_dict, colors_dict, methods, save_figs=save_figs, output_name="./Fig.2_SID_cpdag.html", debug=False, range_y1=[0,6], range_y2=[0,6])#

# Checks

In [None]:
random_graphs_compare.columns

In [None]:
# is best stable model always contained in the full set of complete models?

all(random_graphs_compare['is_best_st_in_all_co'])

In [None]:
# what percentage of cases is the best stable model contained in the full set of complete models?
random_graphs_compare['is_best_st_in_all_co'].mean() * 100

In [None]:
# is best stable model always contained in the full set of preferred' models?
all(random_graphs_compare['is_best_st_in_all_pr'])

In [None]:
# what percentage of cases is the best stable model contained in the full set of preferred' models?
random_graphs_compare['is_best_st_in_all_pr'].mean() * 100

In [None]:
# is best preferred' model always contained in the full set of complete models?
all(random_graphs_compare['is_best_pr_in_all_co'])

In [None]:
# Are stable models subset of preferred' models?
all(random_graphs_compare['is_all_st_subset_of_all_pr'])

In [None]:
# What percentafe of cases are the stable models a subset of the preferred' models?
random_graphs_compare['is_all_st_subset_of_all_pr'].mean() * 100

In [None]:
# Are stable models subset of complete' models?
all(random_graphs_compare['is_all_st_subset_of_all_co'])

In [None]:
# What percentafe of cases are the stable models a subset of the complete models?
random_graphs_compare['is_all_st_subset_of_all_co'].mean() * 100

In [None]:
# Are preferred' models subset of complete' models?
all(random_graphs_compare['is_all_pr_subset_of_all_co'])

In [None]:
print("cases when best stable model is stronger than best preferred model: ", 
      random_graphs_compare[random_graphs_compare['ST_best_I'] > random_graphs_compare['PR_best_I']].shape[0] / len(random_graphs_compare) * 100, '%')
print("cases when best stable model is weaker than best preferred model: ", 
      random_graphs_compare[random_graphs_compare['ST_best_I'] < random_graphs_compare['PR_best_I']].shape[0] / len(random_graphs_compare) * 100, '%')
print("cases when best preferred model is equal to the best complete model: ",
      random_graphs_compare[random_graphs_compare['CO_best_model']== random_graphs_compare['PR_best_model']].shape[0] / len(random_graphs_compare) * 100, '%')
print("cases when the strength of the best preferred model is equal to the strength of the best complete model: ",
      random_graphs_compare[random_graphs_compare['CO_best_I']== random_graphs_compare['PR_best_I']].shape[0] / len(random_graphs_compare) * 100, '%')

Concluding that complete and preferred result in same output.

Stable results in better output that preferred only in 10.5% of times according to model strength.
Stable results in worse output that preferred only in 20.5% of times according to model strength.

In [None]:
# All facts are considered always, thus in case of preferred or complete the algo runs only once. No fact elimination is performed.
all(random_graphs_compare['CO_used_num_facts'] == random_graphs_compare['CO_total_num_facts']), all(random_graphs_compare['PR_used_num_facts'] == random_graphs_compare['PR_total_num_facts']), all(random_graphs_compare['ST_used_num_facts'] == random_graphs_compare['ST_total_num_facts'])

In [None]:
# Sanity check: all best models are DAGs

sys.path.insert(0, '../../ArgCausalDisco')
from ArgCausalDisco.utils.graph_utils import is_dag
from src.abasp.utils import get_graph_matrix

for sem in ['ST', 'CO', 'PR']:
    is_dag_list = []
    for n_nodes, model in zip(random_graphs_compare['n_nodes'], random_graphs_compare[f'{sem}_best_model']):
        model = eval(model)
        graph_matrix = get_graph_matrix(n_nodes, model)
        is_dag_list.append(is_dag(graph_matrix))
    print(f"Is all {sem} best models a DAGs? {all(is_dag_list)}")