# Argumentative Causal Discovery

Notebook collecting results for causal discovery algorithm d-ABA

In [24]:
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.insert(0,'../')
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 2000)
import networkx as nx
sys.path.append('../utils/')
from plotting import *
print(sys.path)

save_figs = True
debug = False
dgp_list = ['gauss', 'exp', 'gumbel', 'uniform', 'mlp', 'mim', 'gp', 'gp-add']
model_list = ['pc_max','cam','fgs','nt', 'mcsl', 'spc_bot_kci_001']
names_dict = {'pc_max':'Max-PC', 'fgs':'FGS', 'spc_bot_kci_001':'SPC (Ours)', 'cam':'CAM', 'nt':'NOTEARS-MLP', 'mcsl':'MCSL-MLP', 'ges':'GES', 'random':'Random', 'grandag':'GraN-DAG', 'abapc':'ABAPC (Ours)'}
symbols_dict = {'abapc':'triangle-down-dot','pc_max':'square-dot','fgs':'triangle-up-dot','spc_bot_kci_001':'diamond-dot','cam':'triangle-down-dot','nt':'pentagon-dot','mcsl':'hexagon2-dot', 'ges':'triangle-right','grandag':'octagon-dot', 'random':'x'}  
colors_dict = {'abapc':sec_green,'pc_max':main_blue,'fgs':main_green,'spc_bot_kci_001':sec_blue,'cam':sec_green,'nt':main_purple,'mcsl':sec_purple, 'ges':sec_purple, 'random':'grey', 'grandag':sec_orange}


['../', '../', '../', '../', '../', '/vol/bitbucket/fr920/ArgCausalDisco/notebooks', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/vol/bitbucket/fr920/envs/discoclean/lib/python3.10/site-packages', '../utils/', '../utils/', '../utils/', '../utils/', '../utils/']


In [25]:
version = 'bnlearn'
datasets = ['cancer', 'earthquake', 'survey', 'asia'#, 'sachs', 'alarm', 'child', 'insurance', 'hailfinder', 'hepar2'
]
all_sum = pd.DataFrame(np.load(f"../results/stored_results_realdata_spc.npy", allow_pickle=True), 
                       columns=['dataset', 'model', 'fdr_mean', 'fdr_std', 'tpr_mean', 'tpr_std',
                                'fpr_mean', 'fpr_std', 'shd_mean', 'shd_std', 'nnz_mean', 'nnz_std',
                                'precision_mean', 'precision_std', 'recall_mean', 'recall_std',
                                'F1_mean', 'F1_std', 'gscore_mean', 'gscore_std', 'SID_mean', 'SID_std'
                                ])
## drop unecessary columns
all_sum = all_sum.drop(columns=['gscore_mean', 'gscore_std'])
all_sum = all_sum[all_sum['dataset'].isin(datasets)] 

for d_name in datasets:
    d_sum = pd.DataFrame(np.load(f"../results/stored_results_{d_name}_{version}.npy", allow_pickle=True), 
                        columns=['dataset', 'model', 'nnz_mean', 'nnz_std','fdr_mean', 'fdr_std', 'tpr_mean', 'tpr_std', 'fpr_mean', 'fpr_std',  
                                    'precision_mean', 'precision_std', 'recall_mean', 'recall_std', 'F1_mean', 'F1_std', 
                                    'shd_mean', 'shd_std','SID_mean', 'SID_std'
                                    ])
    d_sum = d_sum[d_sum['model']!='Random'] ##exclude Random as it is in the base dataset
    d_sum['dataset'] = np.repeat(d_name,len(d_sum))
    all_sum = pd.concat([all_sum,d_sum], ignore_index=True)

dags_nodes_map = {'asia':8, 'cancer':5, 'earthquake':5, 'sachs':11, 'survey':6, 'alarm':37, 'child':20, 'insurance':27, 'hailfinder':56, 'hepar2':70}
dags_arcs_map = {'asia':8, 'cancer':4, 'earthquake':4, 'sachs':17, 'survey':6, 'alarm':46, 'child':25, 'insurance':52, 'hailfinder':66, 'hepar2':123}
all_sum['n_edges'] = all_sum['dataset'].map(dags_arcs_map)
all_sum['n_nodes'] = all_sum['dataset'].map(dags_nodes_map)
all_sum['sparsity'] = round(all_sum['n_edges']/(all_sum['n_nodes']*(all_sum['n_nodes']-1)/2),2)
all_sum = all_sum[all_sum['model']!='CAM'] ##exclude CAM as it returned all nan
for var in ['shd','SID']:
    all_sum['p_'+var+'_mean'] = all_sum[var+'_mean'].astype(float)/all_sum['n_edges'].astype(int)
    all_sum['p_'+var+'_std'] = all_sum[var+'_std'].astype(float)/all_sum['n_edges'].astype(int)
all_sum['dataset'] = [a.upper() for a in all_sum["dataset"].astype(str)]
all_sum['dataset'] = all_sum['dataset'] +np.repeat("<br> |V|=",len(all_sum))+ all_sum["n_nodes"].astype(str)+np.repeat("<br> |E|=",len(all_sum))+\
                     all_sum["n_edges"].astype(str)+np.repeat("<br> z=",len(all_sum))+ all_sum["sparsity"].astype(str)

bar_chart_plotly(all_sum, 'p_shd', names_dict, colors_dict, save_figs=save_figs, output_name="../results/figs/Fig.1_SHD_realdata.html", debug=False)#
bar_chart_plotly(all_sum, 'p_SID', names_dict, colors_dict, save_figs=save_figs, output_name="../results/figs/Fig.2_SID_realdata.html", debug=False)#