In [60]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import json
from typing import Dict, List



stats_for_instances: List = [] 

_, _, stats_filenames = os.walk('./stats/').__next__()
for stat_filename in stats_filenames:
    dataset_name, explained_model, index, date = stat_filename.split('_')
    index = index[1:]
    date = date[:-5]
    print(f'dataset: {dataset_name}, model: {explained_model}, index: {index}, date: {date}')

    with open('./stats/' + stat_filename, 'r') as f:
        stats_dict = json.load(f)

    stats_for_instances.append(stats_dict)
    
    print(f'Stats dict: {stats_dict.keys()}')

dataset: adult, model: tensorflow, index: 0, date: 2023-01-31
Stats dict: dict_keys(['dataset', 'all_cfs_count', 'valid_cfs_count', 'valid_actionable_cfs_count', 'execution_times', 'explainers', 'DATASET_NAME', 'INDEX_TO_EXPLAIN', 'TRAIN_DATASET_PATH', 'TEST_DATASET_PATH', 'CONSTRAINTS_PATH', 'MODEL_PATH', 'EXPLAINED_MODEL_BACKEND', 'SAVE_PATH_SCORES', 'SAVE_PATH_STATS', 'PREFERENCES_RANKING', 'K_NEIGHBORS_FEASIB', 'K_NEIGHBORS_DISCRIMINATIVE', 'ENSEMBLE_INIT_ELAPSED_TIME', 'ENSEMBLE_GENERATION_ELAPSED_TIME', 'PARETO_METRICS', 'PARETO_FRONTIERS_ALL'])
dataset: adult, model: tensorflow, index: 126, date: 2023-01-31
Stats dict: dict_keys(['dataset', 'all_cfs_count', 'valid_cfs_count', 'valid_actionable_cfs_count', 'execution_times', 'explainers', 'TRAIN_DATASET_PATH', 'TEST_DATASET_PATH', 'DATASET_NAME', 'CONSTRAINTS_PATH', 'MODEL_PATH', 'EXPLAINED_MODEL_BACKEND', 'INDEX_TO_EXPLAIN', 'SAVE_PATH_SCORES', 'SAVE_PATH_STATS', 'PREFERENCES_RANKING', 'K_NEIGHBORS_FEASIB', 'K_NEIGHBORS_DISCRIMI

In [61]:

def get_quantitative_stats_from_json(list_of_json_dicts: List[Dict]) -> pd.DataFrame:
    quantitative_overall_df = pd.DataFrame(list_of_json_dicts)[['all_cfs_count', 'valid_cfs_count', 
        'valid_actionable_cfs_count', 'ENSEMBLE_INIT_ELAPSED_TIME', 
        'ENSEMBLE_GENERATION_ELAPSED_TIME', 'PARETO_FRONTIERS_ALL', 'INDEX_TO_EXPLAIN', 'ORIGINAL_X_CLASS'
        ]]

    quantitative_overall_df.set_index('INDEX_TO_EXPLAIN', inplace=True)

    return quantitative_overall_df


get_quantitative_stats_from_json(stats_for_instances)


Unnamed: 0_level_0,all_cfs_count,valid_cfs_count,valid_actionable_cfs_count,ENSEMBLE_INIT_ELAPSED_TIME,ENSEMBLE_GENERATION_ELAPSED_TIME,PARETO_FRONTIERS_ALL
INDEX_TO_EXPLAIN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,83,54,45,6.622503,103.523864,14
126,82,63,60,7.363068,113.381062,6
1,81,55,51,5.869629,97.095417,6
2,79,56,49,5.870757,92.59809,9
3,85,58,48,5.953289,101.884135,8
4,85,73,63,5.837488,113.501006,7


In [62]:
def get_per_method_stats(list_of_json_dicts: List[Dict]) -> Dict:
    
    explainers_dict= {explainer:{} for explainer in np.unique([exp for instance in list_of_json_dicts for exp in instance['execution_times']])}
    print(f'Explainers used: {explainers_dict.keys()}')


    for explainer in explainers_dict:
        for field in ['explainer', 'all_cfs_count', 'valid_cfs_count', 'valid_actionable_cfs_count', 'pareto_frontier_count', 'execution_time', 'INDEX_TO_EXPLAIN', 'ORIGINAL_X_CLASS']:
            explainers_dict[explainer][field] = list()


    for instance in list_of_json_dicts:
        for explainer in explainers_dict:
            explainers_dict[explainer]['INDEX_TO_EXPLAIN'].append(instance['INDEX_TO_EXPLAIN'])
            explainers_dict[explainer]['ORIGINAL_X_CLASS'].append(instance['ORIGINAL_X_CLASS'])
            explainers_dict[explainer]['explainer'].append(explainer)

            if explainer in instance['execution_times']:
                explainers_dict[explainer]['execution_time'].append(instance['execution_times'][explainer])
            else:
                explainers_dict[explainer]['execution_time'].append(np.nan)

            if explainer in instance['explainers']:
                explainers_dict[explainer]['all_cfs_count'].append(instance['explainers'][explainer]['all_cfs_count'])
                explainers_dict[explainer]['valid_cfs_count'].append(instance['explainers'][explainer]['valid_cfs_count'])
                explainers_dict[explainer]['valid_actionable_cfs_count'].append(instance['explainers'][explainer]['valid_actionable_cfs_count'])
                explainers_dict[explainer]['pareto_frontier_count'].append(instance['explainers'][explainer]['pareto_frontier_count'])
            else:
                explainers_dict[explainer]['all_cfs_count'].append(0)
                explainers_dict[explainer]['valid_cfs_count'].append(0)
                explainers_dict[explainer]['valid_actionable_cfs_count'].append(0)
                explainers_dict[explainer]['pareto_frontier_count'].append(0)

    return explainers_dict

explainers_stats_dictionary = get_per_method_stats(stats_for_instances)

Explainers used: dict_keys(['actionable-recourse', 'cem', 'cfec', 'cfproto', 'dice', 'face', 'growing-spheres', 'wachter'])


In [64]:
explainers_stats_dict_of_dfs = {}
for explainer in explainers_stats_dictionary:
    explainers_stats_dict_of_dfs[explainer] = pd.DataFrame(explainers_stats_dictionary[explainer])
    explainers_stats_dict_of_dfs[explainer].set_index('INDEX_TO_EXPLAIN')

explainers_stats_dict_of_dfs['cem']

Unnamed: 0,explainer,all_cfs_count,valid_cfs_count,valid_actionable_cfs_count,pareto_frontier_count,execution_time,INDEX_TO_EXPLAIN
0,cem,1,0,0,0,4.704,0
1,cem,1,0,0,0,5.203762,126
2,cem,1,0,0,0,4.747087,1
3,cem,1,0,0,0,4.843641,2
4,cem,1,0,0,0,4.982055,3
5,cem,1,1,1,0,4.726886,4
