In [99]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import json
from typing import Dict, List



stats_for_instances: List = [] 
dataset_name = 'adult' #'german'

idcs = []

_, _, stats_filenames = os.walk('./stats/').__next__()
for stat_filename in stats_filenames:
    if dataset_name not in stat_filename:
        continue 

    dataset_name, explained_model, index, date = stat_filename.split('_')
    index = index[1:]
    date = date[:-5]
    #print(f'dataset: {dataset_name}, model: {explained_model}, index: {index}, date: {date}')
    idcs.append(index)
    with open('./stats/' + stat_filename, 'r') as f:
        stats_dict = json.load(f)

    stats_for_instances.append(stats_dict)
    
    #print(f'Stats dict: {stats_dict.keys()}')

In [100]:
def get_quantitative_stats_from_json(list_of_json_dicts: List[Dict]) -> pd.DataFrame:
    quantitative_overall_df = pd.DataFrame(list_of_json_dicts)[['all_cfs_count', 'valid_cfs_count', 
        'valid_actionable_cfs_count', 'ENSEMBLE_INIT_ELAPSED_TIME', 
        'ENSEMBLE_GENERATION_ELAPSED_TIME', 'PARETO_FRONTIERS_ALL', 'INDEX_TO_EXPLAIN', 'ORIGINAL_X_CLASS'
        ]]

    quantitative_overall_df.set_index('INDEX_TO_EXPLAIN', inplace=True)

    return quantitative_overall_df


quantitative_all = get_quantitative_stats_from_json(stats_for_instances)
quantitative_all

Unnamed: 0_level_0,all_cfs_count,valid_cfs_count,valid_actionable_cfs_count,ENSEMBLE_INIT_ELAPSED_TIME,ENSEMBLE_GENERATION_ELAPSED_TIME,PARETO_FRONTIERS_ALL,ORIGINAL_X_CLASS
INDEX_TO_EXPLAIN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,85,63,53,7.070595,123.120329,12,>50K
100,79,74,71,7.103692,120.707906,10,<=50K
101,81,68,60,6.360001,110.590002,5,>50K
102,81,70,63,6.140965,114.248023,8,<=50K
103,82,62,52,6.459001,110.866095,14,>50K
...,...,...,...,...,...,...,...
96,83,72,54,7.526535,136.087547,8,>50K
97,76,57,49,7.804286,111.827656,9,>50K
98,83,72,65,7.940352,145.433380,6,<=50K
99,77,69,59,7.852502,141.599839,8,<=50K


In [101]:
means = quantitative_all[['all_cfs_count', 'valid_cfs_count', 
    'valid_actionable_cfs_count', 'ENSEMBLE_INIT_ELAPSED_TIME', 
    'ENSEMBLE_GENERATION_ELAPSED_TIME', 'PARETO_FRONTIERS_ALL'
    ]].mean(axis=0)
stds = quantitative_all[['all_cfs_count', 'valid_cfs_count', 
    'valid_actionable_cfs_count', 'ENSEMBLE_INIT_ELAPSED_TIME', 
    'ENSEMBLE_GENERATION_ELAPSED_TIME', 'PARETO_FRONTIERS_ALL'
    ]].std(axis=0)
print(means)
print(stds)

all_cfs_count                        81.528000
valid_cfs_count                      67.060000
valid_actionable_cfs_count           60.172000
ENSEMBLE_INIT_ELAPSED_TIME            6.415339
ENSEMBLE_GENERATION_ELAPSED_TIME    112.554430
PARETO_FRONTIERS_ALL                  8.004000
dtype: float64
all_cfs_count                        2.886267
valid_cfs_count                      9.136207
valid_actionable_cfs_count           7.973753
ENSEMBLE_INIT_ELAPSED_TIME           1.058184
ENSEMBLE_GENERATION_ELAPSED_TIME    14.332678
PARETO_FRONTIERS_ALL                 2.783338
dtype: float64


In [102]:
quantitative_all['ORIGINAL_X_CLASS'].value_counts()

>50K     127
<=50K    123
Name: ORIGINAL_X_CLASS, dtype: int64

PER METHOD

In [103]:
def get_per_method_stats(list_of_json_dicts: List[Dict]) -> Dict:
    
    explainers_dict= {explainer:{} for explainer in np.unique([exp for instance in list_of_json_dicts for exp in instance['explainers']])}
    print(f'Explainers used: {explainers_dict.keys()}')


    for explainer in explainers_dict:
        for field in ['explainer', 'all_cfs_count', 'valid_cfs_count', 'valid_actionable_cfs_count', 'pareto_frontier_count', 'execution_time', 'INDEX_TO_EXPLAIN', 'ORIGINAL_X_CLASS']:
            explainers_dict[explainer][field] = list()


    for instance in list_of_json_dicts:
        for explainer in explainers_dict:
            explainers_dict[explainer]['INDEX_TO_EXPLAIN'].append(instance['INDEX_TO_EXPLAIN'])
            explainers_dict[explainer]['ORIGINAL_X_CLASS'].append(instance['ORIGINAL_X_CLASS'])
            explainers_dict[explainer]['explainer'].append(explainer)

            execution_time_explainer_name = explainer
            if explainer == 'fimap' or explainer == 'cadex':
                execution_time_explainer_name = 'cfec'
            if execution_time_explainer_name in instance['execution_times']:
                explainers_dict[explainer]['execution_time'].append(instance['execution_times'][execution_time_explainer_name])
            else:
                explainers_dict[explainer]['execution_time'].append(np.nan)

            if explainer in instance['explainers']:
                explainers_dict[explainer]['all_cfs_count'].append(instance['explainers'][explainer]['all_cfs_count'])
                explainers_dict[explainer]['valid_cfs_count'].append(instance['explainers'][explainer]['valid_cfs_count'])
                explainers_dict[explainer]['valid_actionable_cfs_count'].append(instance['explainers'][explainer]['valid_actionable_cfs_count'])
                explainers_dict[explainer]['pareto_frontier_count'].append(instance['explainers'][explainer]['pareto_frontier_count'])
            else:
                explainers_dict[explainer]['all_cfs_count'].append(0)
                explainers_dict[explainer]['valid_cfs_count'].append(0)
                explainers_dict[explainer]['valid_actionable_cfs_count'].append(0)
                explainers_dict[explainer]['pareto_frontier_count'].append(0)

    return explainers_dict

explainers_stats_dictionary = get_per_method_stats(stats_for_instances)

Explainers used: dict_keys(['actionable-recourse', 'cadex', 'cem', 'cfproto', 'dice', 'face', 'fimap', 'growing-spheres', 'wachter'])


In [104]:
explainers_stats_dict_of_dfs = {}
for explainer in explainers_stats_dictionary:
    explainers_stats_dict_of_dfs[explainer] = pd.DataFrame(explainers_stats_dictionary[explainer])
    explainers_stats_dict_of_dfs[explainer].set_index('INDEX_TO_EXPLAIN')

explainers_stats_dict_of_dfs['actionable-recourse']['ORIGINAL_X_CLASS']

0       >50K
1      <=50K
2       >50K
3      <=50K
4       >50K
       ...  
245     >50K
246     >50K
247    <=50K
248    <=50K
249     >50K
Name: ORIGINAL_X_CLASS, Length: 250, dtype: object

In [105]:
stats = dict()

In [106]:
cols = ['all_cfs_count', 'valid_cfs_count', 'valid_actionable_cfs_count', 'pareto_frontier_count', 'execution_time']

for method, df in explainers_stats_dict_of_dfs.items():
    print(f'method: {method}')
    #print(f'df: {(df[cols] > 0).mean(axis=0)}')
    print(f'df: {df[cols].mean(axis=0)}')

method: actionable-recourse
df: all_cfs_count                  0.30400
valid_cfs_count                0.16800
valid_actionable_cfs_count     0.16800
pareto_frontier_count          0.03600
execution_time                10.34498
dtype: float64
method: cadex
df: all_cfs_count                 6.528000
valid_cfs_count               6.376000
valid_actionable_cfs_count    6.376000
pareto_frontier_count         1.496000
execution_time                2.410491
dtype: float64
method: cem
df: all_cfs_count                 1.000000
valid_cfs_count               0.504000
valid_actionable_cfs_count    0.504000
pareto_frontier_count         0.084000
execution_time                5.056239
dtype: float64
method: cfproto
df: all_cfs_count                  7.736000
valid_cfs_count                7.720000
valid_actionable_cfs_count     2.248000
pareto_frontier_count          0.228000
execution_time                28.626367
dtype: float64
method: dice
df: all_cfs_count                 20.00000
valid_cfs_cou

PLOTS