In [100]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import json
from typing import Dict, List



stats_for_instances: List = [] 

_, _, stats_filenames = os.walk('./stats/').__next__()
for stat_filename in stats_filenames:
    dataset_name, explained_model, index, date = stat_filename.split('_')
    index = index[1:]
    date = date[:-5]
    print(f'dataset: {dataset_name}, model: {explained_model}, index: {index}, date: {date}')

    with open('./stats/' + stat_filename, 'r') as f:
        stats_dict = json.load(f)

    stats_for_instances.append(stats_dict)
    
    print(f'Stats dict: {stats_dict.keys()}')

dataset: adult, model: tensorflow, index: 0, date: 2023-01-31
Stats dict: dict_keys(['dataset', 'all_cfs_count', 'valid_cfs_count', 'valid_actionable_cfs_count', 'execution_times', 'explainers', 'DATASET_NAME', 'INDEX_TO_EXPLAIN', 'TRAIN_DATASET_PATH', 'TEST_DATASET_PATH', 'CONSTRAINTS_PATH', 'MODEL_PATH', 'EXPLAINED_MODEL_BACKEND', 'SAVE_PATH_SCORES', 'SAVE_PATH_STATS', 'PREFERENCES_RANKING', 'K_NEIGHBORS_FEASIB', 'K_NEIGHBORS_DISCRIMINATIVE', 'ENSEMBLE_INIT_ELAPSED_TIME', 'ENSEMBLE_GENERATION_ELAPSED_TIME', 'ORIGINAL_X_CLASS', 'PARETO_METRICS', 'PARETO_FRONTIERS_ALL'])
dataset: adult, model: tensorflow, index: 10, date: 2023-01-31
Stats dict: dict_keys(['dataset', 'all_cfs_count', 'valid_cfs_count', 'valid_actionable_cfs_count', 'execution_times', 'explainers', 'DATASET_NAME', 'INDEX_TO_EXPLAIN', 'TRAIN_DATASET_PATH', 'TEST_DATASET_PATH', 'CONSTRAINTS_PATH', 'MODEL_PATH', 'EXPLAINED_MODEL_BACKEND', 'SAVE_PATH_SCORES', 'SAVE_PATH_STATS', 'PREFERENCES_RANKING', 'K_NEIGHBORS_FEASIB', 'K

In [101]:

def get_quantitative_stats_from_json(list_of_json_dicts: List[Dict]) -> pd.DataFrame:
    quantitative_overall_df = pd.DataFrame(list_of_json_dicts)[['all_cfs_count', 'valid_cfs_count', 
        'valid_actionable_cfs_count', 'ENSEMBLE_INIT_ELAPSED_TIME', 
        'ENSEMBLE_GENERATION_ELAPSED_TIME', 'PARETO_FRONTIERS_ALL', 'INDEX_TO_EXPLAIN', 'ORIGINAL_X_CLASS'
        ]]

    quantitative_overall_df.set_index('INDEX_TO_EXPLAIN', inplace=True)

    return quantitative_overall_df


get_quantitative_stats_from_json(stats_for_instances)


Unnamed: 0_level_0,all_cfs_count,valid_cfs_count,valid_actionable_cfs_count,ENSEMBLE_INIT_ELAPSED_TIME,ENSEMBLE_GENERATION_ELAPSED_TIME,PARETO_FRONTIERS_ALL,ORIGINAL_X_CLASS
INDEX_TO_EXPLAIN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,85,63,53,7.070595,123.120329,12,>50K
10,83,80,72,9.24286,103.970062,6,>50K
11,80,72,64,7.104679,113.869274,7,<=50K
12,78,63,59,7.280879,111.304396,11,<=50K
13,83,83,77,6.428628,112.93801,10,<=50K
14,83,75,68,6.493649,130.052128,8,<=50K
15,80,61,57,8.397715,127.976832,8,>50K
16,82,63,53,6.434179,113.566957,9,<=50K
17,85,59,49,7.967319,116.16481,6,>50K
18,85,72,66,6.724877,132.323423,8,<=50K


In [102]:
def get_per_method_stats(list_of_json_dicts: List[Dict]) -> Dict:
    
    explainers_dict= {explainer:{} for explainer in np.unique([exp for instance in list_of_json_dicts for exp in instance['explainers']])}
    print(f'Explainers used: {explainers_dict.keys()}')


    for explainer in explainers_dict:
        for field in ['explainer', 'all_cfs_count', 'valid_cfs_count', 'valid_actionable_cfs_count', 'pareto_frontier_count', 'execution_time', 'INDEX_TO_EXPLAIN', 'ORIGINAL_X_CLASS']:
            explainers_dict[explainer][field] = list()


    for instance in list_of_json_dicts:
        for explainer in explainers_dict:
            explainers_dict[explainer]['INDEX_TO_EXPLAIN'].append(instance['INDEX_TO_EXPLAIN'])
            explainers_dict[explainer]['ORIGINAL_X_CLASS'].append(instance['ORIGINAL_X_CLASS'])
            explainers_dict[explainer]['explainer'].append(explainer)

            execution_time_explainer_name = explainer
            if explainer == 'fimap' or explainer == 'cadex':
                execution_time_explainer_name = 'cfec'
            if execution_time_explainer_name in instance['execution_times']:
                explainers_dict[explainer]['execution_time'].append(instance['execution_times'][execution_time_explainer_name])
            else:
                explainers_dict[explainer]['execution_time'].append(np.nan)

            if explainer in instance['explainers']:
                explainers_dict[explainer]['all_cfs_count'].append(instance['explainers'][explainer]['all_cfs_count'])
                explainers_dict[explainer]['valid_cfs_count'].append(instance['explainers'][explainer]['valid_cfs_count'])
                explainers_dict[explainer]['valid_actionable_cfs_count'].append(instance['explainers'][explainer]['valid_actionable_cfs_count'])
                explainers_dict[explainer]['pareto_frontier_count'].append(instance['explainers'][explainer]['pareto_frontier_count'])
            else:
                explainers_dict[explainer]['all_cfs_count'].append(0)
                explainers_dict[explainer]['valid_cfs_count'].append(0)
                explainers_dict[explainer]['valid_actionable_cfs_count'].append(0)
                explainers_dict[explainer]['pareto_frontier_count'].append(0)

    return explainers_dict

explainers_stats_dictionary = get_per_method_stats(stats_for_instances)

Explainers used: dict_keys(['actionable-recourse', 'cadex', 'cem', 'cfproto', 'dice', 'face', 'fimap', 'growing-spheres', 'wachter'])


In [103]:
explainers_stats_dict_of_dfs = {}
for explainer in explainers_stats_dictionary:
    explainers_stats_dict_of_dfs[explainer] = pd.DataFrame(explainers_stats_dictionary[explainer])
    explainers_stats_dict_of_dfs[explainer].set_index('INDEX_TO_EXPLAIN')

explainers_stats_dict_of_dfs['actionable-recourse']

Unnamed: 0,explainer,all_cfs_count,valid_cfs_count,valid_actionable_cfs_count,pareto_frontier_count,execution_time,INDEX_TO_EXPLAIN,ORIGINAL_X_CLASS
0,actionable-recourse,0,0,0,0,11.262158,0,>50K
1,actionable-recourse,1,1,1,0,10.661681,10,>50K
2,actionable-recourse,0,0,0,0,11.465646,11,<=50K
3,actionable-recourse,0,0,0,0,12.525493,12,<=50K
4,actionable-recourse,0,0,0,0,13.372774,13,<=50K
5,actionable-recourse,1,1,1,0,10.266472,14,<=50K
6,actionable-recourse,0,0,0,0,12.343178,15,>50K
7,actionable-recourse,0,0,0,0,9.697931,16,<=50K
8,actionable-recourse,0,0,0,0,11.112375,17,>50K
9,actionable-recourse,0,0,0,0,11.974251,18,<=50K
