In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier

from xai_agg.agg_exp import *
from xai_agg.utils import *

import dill

2025-01-14 17:04:28.660876: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-14 17:04:28.685353: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Data reading and preprocessing

In [2]:
raw = pd.read_excel("../data/taiwan.xls", header=1)
display(raw)

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,1,20000,2,2,1,24,2,2,-1,-1,...,0,0,0,0,689,0,0,0,0,1
1,2,120000,2,2,2,26,-1,2,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,3,90000,2,2,2,34,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,4,50000,2,2,1,37,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,5,50000,1,2,1,57,-1,0,-1,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,29996,220000,1,3,1,39,0,0,0,0,...,88004,31237,15980,8500,20000,5003,3047,5000,1000,0
29996,29997,150000,1,3,2,43,-1,-1,-1,-1,...,8979,5190,0,1837,3526,8998,129,0,0,0
29997,29998,30000,1,2,2,37,4,3,2,-1,...,20878,20582,19357,0,0,22000,4200,2000,3100,1
29998,29999,80000,1,3,1,41,1,-1,0,0,...,52774,11855,48944,85900,3409,1178,1926,52964,1804,1


In [3]:
# Trim columns and fix header
raw = raw.drop(columns=['ID'])
display(raw)

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,20000,2,2,1,24,2,2,-1,-1,-2,...,0,0,0,0,689,0,0,0,0,1
1,120000,2,2,2,26,-1,2,0,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,90000,2,2,2,34,0,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,50000,2,2,1,37,0,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,50000,1,2,1,57,-1,0,-1,0,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,220000,1,3,1,39,0,0,0,0,0,...,88004,31237,15980,8500,20000,5003,3047,5000,1000,0
29996,150000,1,3,2,43,-1,-1,-1,-1,0,...,8979,5190,0,1837,3526,8998,129,0,0,0
29997,30000,1,2,2,37,4,3,2,-1,0,...,20878,20582,19357,0,0,22000,4200,2000,3100,1
29998,80000,1,3,1,41,1,-1,0,0,0,...,52774,11855,48944,85900,3409,1178,1926,52964,1804,1


In [4]:
preprocessed_data = raw.copy()

# Mapping categorical veriables:
preprocessed_data['SEX'] = preprocessed_data['SEX'].map({1: 'male', 2: 'female'})
preprocessed_data['EDUCATION'] = preprocessed_data['EDUCATION'].map({1: 'graduate_school', 2: 'university', 3: 'high_school', 4: 'others'})
preprocessed_data['MARRIAGE'] = preprocessed_data['MARRIAGE'].map({1: 'married', 2: 'single', 3: 'others'})

categorical_features = ["SEX", "EDUCATION", "MARRIAGE"]

# Set all other columns to float
for column in preprocessed_data.columns:
    if column not in categorical_features:
        preprocessed_data[column] = preprocessed_data[column].astype(int)

# One-hot encoding
preprocessed_data = pd.get_dummies(preprocessed_data, columns=categorical_features, dtype='int64', drop_first=True)

# Stratified sampling
preprocessed_data, _ = train_test_split(preprocessed_data, test_size=0.9, stratify=preprocessed_data['default payment next month'], random_state=42)

display(preprocessed_data)
display(preprocessed_data.info())


Unnamed: 0,LIMIT_BAL,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,...,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month,SEX_male,EDUCATION_high_school,EDUCATION_others,EDUCATION_university,MARRIAGE_others,MARRIAGE_single
26583,360000,34,0,0,0,-1,-1,-1,3187,3758,...,4001,5001,4001,0,1,0,0,0,0,0
23787,30000,49,1,2,-1,-1,-1,2,10440,9972,...,1731,0,5517,0,0,0,0,1,0,0
29320,110000,26,0,0,0,0,0,0,65215,130746,...,2735,2829,3138,0,1,0,0,0,0,1
22495,50000,46,-2,-1,0,0,0,0,-1494,2590,...,664,689,761,0,0,1,0,0,1,0
13950,400000,44,0,0,0,0,0,0,6902,7904,...,219,234,159,0,1,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18231,140000,50,0,0,0,2,0,0,143090,139095,...,5341,4830,4621,0,1,1,0,0,0,0
11679,100000,48,-1,-1,-1,0,0,0,16584,16009,...,10000,3000,2670,0,0,1,0,0,0,1
27511,20000,55,0,0,2,0,0,2,10301,12871,...,1777,0,4859,1,1,0,0,1,0,0
7207,200000,44,-1,2,-1,-1,0,0,500,500,...,10000,35000,10000,0,0,0,0,1,0,0


<class 'pandas.core.frame.DataFrame'>
Index: 3000 entries, 26583 to 18884
Data columns (total 27 columns):
 #   Column                      Non-Null Count  Dtype
---  ------                      --------------  -----
 0   LIMIT_BAL                   3000 non-null   int64
 1   AGE                         3000 non-null   int64
 2   PAY_0                       3000 non-null   int64
 3   PAY_2                       3000 non-null   int64
 4   PAY_3                       3000 non-null   int64
 5   PAY_4                       3000 non-null   int64
 6   PAY_5                       3000 non-null   int64
 7   PAY_6                       3000 non-null   int64
 8   BILL_AMT1                   3000 non-null   int64
 9   BILL_AMT2                   3000 non-null   int64
 10  BILL_AMT3                   3000 non-null   int64
 11  BILL_AMT4                   3000 non-null   int64
 12  BILL_AMT5                   3000 non-null   int64
 13  BILL_AMT6                   3000 non-null   int64
 14  PAY_AMT1

None

# Fitting Classifier

In [5]:
y = preprocessed_data['default payment next month']
X = preprocessed_data.drop(columns='default payment next month')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'ROC AUC: {roc_auc_score(y_test, y_pred)}')

Accuracy: 0.82
ROC AUC: 0.6614906832298137


In [7]:
# Check for dtype('O') in the dataframe
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2400 entries, 16696 to 12561
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   LIMIT_BAL              2400 non-null   int64
 1   AGE                    2400 non-null   int64
 2   PAY_0                  2400 non-null   int64
 3   PAY_2                  2400 non-null   int64
 4   PAY_3                  2400 non-null   int64
 5   PAY_4                  2400 non-null   int64
 6   PAY_5                  2400 non-null   int64
 7   PAY_6                  2400 non-null   int64
 8   BILL_AMT1              2400 non-null   int64
 9   BILL_AMT2              2400 non-null   int64
 10  BILL_AMT3              2400 non-null   int64
 11  BILL_AMT4              2400 non-null   int64
 12  BILL_AMT5              2400 non-null   int64
 13  BILL_AMT6              2400 non-null   int64
 14  PAY_AMT1               2400 non-null   int64
 15  PAY_AMT2               2400 non-null  

# Experiments

In [8]:
from xai_agg.exp_utils import *

# Evaluating Old and New Metric Sets

### Execution

In [11]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    metrics_sets=[
        ['complexity', 'sensitivity_spearman', 'faithfulness_corr'],
        ['nrc', 'sensitivity_spearman', 'faithfulness_corr']
    ],
    n_instances=5
)

metadata["description"] = "compares entropy complexity with nrc metric sets"
metadata["dataset"] = "taiwan"

with open('pickles/taiwan/COMPARE_entropy-nrc_metricsets_wsum-topsis.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)


Selected indexes: [19255  5627  2536 17926 26013]
Epoch 1/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.4717 - val_loss: 1.1716
Epoch 2/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 689us/step - loss: 1.1728 - val_loss: 1.1214
Epoch 3/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 576us/step - loss: 1.2440 - val_loss: 1.0684
Epoch 4/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 955us/step - loss: 1.0592 - val_loss: 1.0141
Epoch 5/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 730us/step - loss: 1.0200 - val_loss: 0.9638
Epoch 6/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 623us/step - loss: 0.9606 - val_loss: 0.9200
Epoch 7/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 680us/step - loss: 0.9244 - val_loss: 0.8836
Epoch 8/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 802us/step - loss: 0.9219 - v

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	 Running instance 26013
Running evaluation for settings 2/2
Explainer components: [<class 'xai_agg.explainers.LimeWrapper'>, <class 'xai_agg.explainers.ShapTabularTreeWrapper'>, <class 'xai_agg.explainers.AnchorWrapper'>], Metrics: ['nrc', 'sensitivity_spearman', 'faithfulness_corr'], MCDM algorithm: <pymcdm.methods.topsis.TOPSIS object at 0x7891e043dc90>, Aggregation algorithm: wsum
	 Running instance 19255
	 Running instance 5627
	 Running instance 2536
	 Running instance 17926


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	 Running instance 26013


### Analysis

In [12]:
with open('pickles/taiwan/COMPARE_entropy-nrc_metricsets_wsum-topsis.pkl', 'rb') as f:
    exp = dill.load(f)

In [13]:
for i, metrics in enumerate([['complexity', 'sensitivity_spearman', 'faithfulness_corr'], 
                             ['nrc', 'sensitivity_spearman', 'faithfulness_corr']]):
    print(f"With metrics: {metrics}\n")
    display(exp.results[i])
    print(f"Worst case avoidances: {count_worst_case_avoidances(exp.results[i], [False, True, True])}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))


With metrics: ['complexity', 'sensitivity_spearman', 'faithfulness_corr']



[                        complexity  sensitivity_spearman  faithfulness_corr
 LimeWrapper               2.786393              0.917402           0.443013
 ShapTabularTreeWrapper    2.743271              0.979897           0.249862
 AnchorWrapper             1.098431              0.361873           0.778527
 AggregateExplainer        2.806789              0.908034           0.530775,
                         complexity  sensitivity_spearman  faithfulness_corr
 LimeWrapper               2.759148              0.901538           0.077672
 ShapTabularTreeWrapper    2.866608              0.989880           0.466510
 AnchorWrapper             1.271646              0.486696           0.444819
 AggregateExplainer        2.796732              0.913778           0.224522,
                         complexity  sensitivity_spearman  faithfulness_corr
 LimeWrapper               2.801879              0.946120           0.684643
 ShapTabularTreeWrapper    2.656588              0.978735           0.0068

Worst case avoidances: 3
AVG:


Unnamed: 0,complexity,sensitivity_spearman,faithfulness_corr
AggregateExplainer,2.783403,0.918892,0.527264
AnchorWrapper,1.405867,0.487454,0.501995
LimeWrapper,2.774386,0.923515,0.382097
ShapTabularTreeWrapper,2.680947,0.985901,0.308399


With metrics: ['nrc', 'sensitivity_spearman', 'faithfulness_corr']



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             49.130909              0.861675           0.271738
 ShapTabularTreeWrapper  48.274016              0.978803           0.516890
 AnchorWrapper           32.083576              0.493920           0.605637
 AggregateExplainer      52.925210              0.939692           0.522625,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             49.522366              0.915761           0.085130
 ShapTabularTreeWrapper  52.110885              0.984137           0.254675
 AnchorWrapper           34.854569              0.600430           0.377724
 AggregateExplainer      43.546883              0.898667           0.158574,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             49.618992              0.904684           0.676962
 ShapTabularTreeWrapper  43.152273              0.980718           0.580295
 AnchorWra

Worst case avoidances: 2
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,47.530527,0.917894,0.448311
AnchorWrapper,38.036639,0.50602,0.381352
LimeWrapper,47.578222,0.903234,0.34425
ShapTabularTreeWrapper,47.851201,0.985532,0.546


### Discussion

Metric set containing nrc metric for complexity avoided the worst case in 3/5 instances, whereas the metric set with the entropy-based complexity only avoided the worst case in 2/5 instances.

# Evaluating Score and Rank Based Faithfulness Metrics
### Execution

In [10]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    metrics_sets=[
        ['nrc', 'sensitivity_spearman', 'faithfulness_corr'],
        ['nrc', 'sensitivity_spearman', 'rb_faithfulness_corr']
    ],
    n_instances=5,
    mp_jobs=5
)

metadata["description"] = "compares score and rank-based faithfulness metrics"

with open('pickles/taiwan/COMPARE_score_rb_faithfulness_wsum-topsis.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)

Selected indexes: [ 7080 22629 13325 19611 27735]
Epoch 1/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.3293 - val_loss: 1.1401
Epoch 2/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 565us/step - loss: 1.2525 - val_loss: 1.0622
Epoch 3/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 689us/step - loss: 1.3293 - val_loss: 0.9873
Epoch 4/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 544us/step - loss: 1.1057 - val_loss: 0.9285
Epoch 5/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 621us/step - loss: 0.9643 - val_loss: 0.8876
Epoch 6/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 612us/step - loss: 1.0315 - val_loss: 0.8572
Epoch 7/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 551us/step - loss: 0.8319 - val_loss: 0.8335
Epoch 8/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 677us/step - loss: 0.8677 - v

### Analysis

In [9]:
with open('pickles/taiwan/COMPARE_score_rb_faithfulness_wsum-topsis.pkl', 'rb') as f:
    exp = dill.load(f)

In [10]:
for i, metrics in enumerate([['nrc', 'sensitivity_spearman', 'faithfulness_corr'],
                             ['nrc', 'sensitivity_spearman', 'rb_faithfulness_corr']]):
    print(f"With metrics: {metrics}\n")
    display(exp.results[i])
    print(f"Worst case avoidances: {count_worst_case_avoidances(exp.results[i], [False, True, True])}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))

With metrics: ['nrc', 'sensitivity_spearman', 'faithfulness_corr']



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             48.010370              0.914530           0.330250
 ShapTabularTreeWrapper  50.585923              1.000000           0.061652
 AnchorWrapper           38.602651              0.455673           0.450056
 AggregateExplainer      47.981070              0.895795           0.134374,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             54.953062              0.914735           0.175999
 ShapTabularTreeWrapper  44.822249              1.000000           0.010776
 AnchorWrapper           33.698242              0.382904           0.261370
 AggregateExplainer      46.938766              0.940581           0.634156,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             46.666058              0.877949           0.472295
 ShapTabularTreeWrapper  51.062869              1.000000           0.048535
 AnchorWra

Worst case avoidances: [5]
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,47.10335,0.890243,0.451852
AnchorWrapper,36.634154,0.361006,0.244447
LimeWrapper,49.281928,0.910441,0.315738
ShapTabularTreeWrapper,47.790792,1.0,0.170107


With metrics: ['nrc', 'sensitivity_spearman', 'rb_faithfulness_corr']



[                              nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             47.490391              0.917744              0.304184
 ShapTabularTreeWrapper  50.585923              1.000000              0.531678
 AnchorWrapper           42.823345              0.577604              0.462273
 AggregateExplainer      53.477377              0.887863              0.107627,
                               nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             44.321539              0.941197              0.063631
 ShapTabularTreeWrapper  44.822249              1.000000              0.201056
 AnchorWrapper           32.083576              0.314104              0.663752
 AggregateExplainer      43.456029              0.912957              0.188762,
                               nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             43.111050              0.901060              0.479209
 ShapTabularTreeWrapper  51.062869              1.

Worst case avoidances: [1]
AVG:


Unnamed: 0,nrc,sensitivity_spearman,rb_faithfulness_corr
AggregateExplainer,49.558678,0.883829,0.178183
AnchorWrapper,36.380603,0.35355,0.312376
LimeWrapper,45.610187,0.921791,0.399882
ShapTabularTreeWrapper,47.790792,1.0,0.312773


# Evaluating Rank aggregation algorithms

### Execution

In [10]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    aggregation_algs=["wsum", "w_bordafuse", "w_condorcet"],
    n_instances=5
)

metadata["description"] = "compares wsum, w_bordafuse, w_condorcet aggregation algorithms"

with open('pickles/taiwan/COMPARE_wsum-w_bordafuse-w_condorcet.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)

Selected indexes: [ 4267  7307  5528 14164 15931]
Epoch 1/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.6301 - val_loss: 1.1359
Epoch 2/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 647us/step - loss: 1.1353 - val_loss: 1.0634
Epoch 3/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 773us/step - loss: 1.1536 - val_loss: 0.9871
Epoch 4/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 787us/step - loss: 0.9692 - val_loss: 0.9275
Epoch 5/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 643us/step - loss: 0.8599 - val_loss: 0.8871
Epoch 6/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 658us/step - loss: 0.8673 - val_loss: 0.8599
Epoch 7/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.9330 - val_loss: 0.8405
Epoch 8/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 630us/step - loss: 0.9445 - val

### Analysis

In [13]:
with open('pickles/taiwan/COMPARE_wsum-w_bordafuse-w_condorcet.pkl', 'rb') as f:
    exp = dill.load(f)

In [52]:
for i, method in enumerate(["wsum", "w_bordafuse", "w_condorcet"]):
    print(f"{method}:\n")
    display(exp.results[i])
    print(f"Worst case avoidances: {count_worst_case_avoidances(exp.results[i], [False, True, True])}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))
    print("\n")


wsum:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.157053              0.887179           0.312295
 ShapTabularTreeWrapper  41.586462              0.982222           0.062759
 AnchorWrapper           40.218378              0.512559           0.006311
 AggregateExplainer      49.008228              0.795214           0.024838,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             50.402375              0.914803           0.503255
 ShapTabularTreeWrapper  45.363529              0.988444           0.242365
 AnchorWrapper           32.083576              0.486820           0.019414
 AggregateExplainer      44.627533              0.903863           0.263261,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.344743              0.909402           0.248873
 ShapTabularTreeWrapper  49.398348              0.987350           0.383740
 AnchorWra

Worst case avoidances: 2
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,49.951387,0.886961,0.303638
AnchorWrapper,34.264735,0.476161,0.139806
LimeWrapper,46.975617,0.909662,0.450901
ShapTabularTreeWrapper,46.700221,0.983809,0.331052




w_bordafuse:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.019394              0.891556           0.077251
 ShapTabularTreeWrapper  41.586462              0.960410           0.388768
 AnchorWrapper           45.038050              0.255187           0.042755
 AggregateExplainer      63.911835              0.845949           0.037002,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             48.688303              0.911316           0.201113
 ShapTabularTreeWrapper  45.363529              0.987350           0.708551
 AnchorWrapper           32.083576              0.456837           0.456043
 AggregateExplainer      43.006806              0.920821           0.235954,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             42.663026              0.891487           0.625493
 ShapTabularTreeWrapper  49.398348              0.984479           0.150781
 AnchorWra

Worst case avoidances: 3
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,51.333105,0.884526,0.273263
AnchorWrapper,37.637966,0.370197,0.40125
LimeWrapper,46.799325,0.906448,0.357288
ShapTabularTreeWrapper,46.700221,0.976656,0.386589




w_condorcet:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             48.037098              0.914393           0.216121
 ShapTabularTreeWrapper  41.586462              0.964103           0.071585
 AnchorWrapper           43.859254              0.402035           0.456860
 AggregateExplainer      47.915023                   NaN           0.045849,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             62.077262              0.940308           0.177738
 ShapTabularTreeWrapper  45.363529              0.985231           0.022379
 AnchorWrapper           41.402398              0.521908           0.547157
 AggregateExplainer      50.146251                   NaN           0.259553,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             51.958118              0.911248           0.598992
 ShapTabularTreeWrapper  49.398348              0.986051           0.084812
 AnchorWra

Worst case avoidances: 2
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,51.72208,0.512205,0.119749
AnchorWrapper,37.36803,0.45624,0.535883
LimeWrapper,51.322907,0.921805,0.439264
ShapTabularTreeWrapper,46.700221,0.97812,0.056261






### Discussion

w_condorcet actually performed a bit better here.

# Evaluating MCDM Algs

### Execution

In [10]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    mcdm_algs=[pymcdm.methods.TOPSIS(), pymcdm.methods.COPRAS(),
               pymcdm.methods.PROMETHEE_II(preference_function="usual"),
               pymcdm.methods.ARAS(), pymcdm.methods.COCOSO(),
               pymcdm.methods.CODAS(), pymcdm.methods.EDAS(), pymcdm.methods.MABAC()],
    n_instances=5
)

metadata["description"] = "compares TOPSIS, COPRAS, PROMETHEE_II, ARAS, COCOSO, CODAS, EDAS, MABAC MCDM algorithms"

with open('pickles/taiwan/COMPARE_mcdm_algs.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)

Selected indexes: [27754  6881 14607  7969 11679]
Epoch 1/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2282 - val_loss: 1.1341
Epoch 2/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 645us/step - loss: 1.1694 - val_loss: 1.0626
Epoch 3/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 720us/step - loss: 1.0186 - val_loss: 0.9910
Epoch 4/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.0286 - val_loss: 0.9310
Epoch 5/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 750us/step - loss: 0.8964 - val_loss: 0.8878
Epoch 6/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 654us/step - loss: 0.9174 - val_loss: 0.8577
Epoch 7/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 598us/step - loss: 0.9130 - val_loss: 0.8349
Epoch 8/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 657us/step - loss: 0.8665 - val

### Analysis

In [16]:
with open('pickles/taiwan/COMPARE_mcdm_algs.pkl', 'rb') as f:
    exp = dill.load(f)

In [54]:
methods = ["TOPSIS", "COPRAS", "PROMETHEE_II", "ARAS", "COCOSO", "CODAS", "EDAS", "MABAC"]
for i, method in enumerate(methods):
    print(f"{method}:\n")
    display(exp.results[i])
    print(f"Worst case avoidances: {count_worst_case_avoidances(exp.results[i], [False, True, True])}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))
    print("\n")


TOPSIS:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.157053              0.887179           0.312295
 ShapTabularTreeWrapper  41.586462              0.982222           0.062759
 AnchorWrapper           40.218378              0.512559           0.006311
 AggregateExplainer      49.008228              0.795214           0.024838,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             50.402375              0.914803           0.503255
 ShapTabularTreeWrapper  45.363529              0.988444           0.242365
 AnchorWrapper           32.083576              0.486820           0.019414
 AggregateExplainer      44.627533              0.903863           0.263261,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.344743              0.909402           0.248873
 ShapTabularTreeWrapper  49.398348              0.987350           0.383740
 AnchorWra

Worst case avoidances: 2
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,49.951387,0.886961,0.303638
AnchorWrapper,34.264735,0.476161,0.139806
LimeWrapper,46.975617,0.909662,0.450901
ShapTabularTreeWrapper,46.700221,0.983809,0.331052




COPRAS:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.019394              0.891556           0.077251
 ShapTabularTreeWrapper  41.586462              0.960410           0.388768
 AnchorWrapper           45.038050              0.255187           0.042755
 AggregateExplainer      63.911835              0.845949           0.037002,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             48.688303              0.911316           0.201113
 ShapTabularTreeWrapper  45.363529              0.987350           0.708551
 AnchorWrapper           32.083576              0.456837           0.456043
 AggregateExplainer      43.006806              0.920821           0.235954,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             42.663026              0.891487           0.625493
 ShapTabularTreeWrapper  49.398348              0.984479           0.150781
 AnchorWra

Worst case avoidances: 3
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,51.333105,0.884526,0.273263
AnchorWrapper,37.637966,0.370197,0.40125
LimeWrapper,46.799325,0.906448,0.357288
ShapTabularTreeWrapper,46.700221,0.976656,0.386589




PROMETHEE_II:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             48.037098              0.914393           0.216121
 ShapTabularTreeWrapper  41.586462              0.964103           0.071585
 AnchorWrapper           43.859254              0.402035           0.456860
 AggregateExplainer      47.915023                   NaN           0.045849,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             62.077262              0.940308           0.177738
 ShapTabularTreeWrapper  45.363529              0.985231           0.022379
 AnchorWrapper           41.402398              0.521908           0.547157
 AggregateExplainer      50.146251                   NaN           0.259553,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             51.958118              0.911248           0.598992
 ShapTabularTreeWrapper  49.398348              0.986051           0.084812
 AnchorWra

Worst case avoidances: 2
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,51.72208,0.512205,0.119749
AnchorWrapper,37.36803,0.45624,0.535883
LimeWrapper,51.322907,0.921805,0.439264
ShapTabularTreeWrapper,46.700221,0.97812,0.056261




ARAS:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             50.668161              0.915829           0.448800
 ShapTabularTreeWrapper  41.586462              0.964034           0.104675
 AnchorWrapper           41.228917              0.430541           0.679359
 AggregateExplainer      47.579348              0.798222           0.022720,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             57.437129              0.949197           0.700566
 ShapTabularTreeWrapper  45.363529              0.984342           0.171581
 AnchorWrapper           32.083576              0.548183           0.017072
 AggregateExplainer      45.389337              0.895179           0.066656,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.459105              0.919385           0.594954
 ShapTabularTreeWrapper  49.398348              0.985846           0.136083
 AnchorWra

Worst case avoidances: 2
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,49.879248,0.899966,0.247301
AnchorWrapper,34.918529,0.466502,0.301545
LimeWrapper,52.725953,0.923815,0.604235
ShapTabularTreeWrapper,46.700221,0.977641,0.199728




COCOSO:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             44.192341              0.904205           0.116475
 ShapTabularTreeWrapper  41.586462              0.967932           0.056402
 AnchorWrapper           47.377655              0.461744           0.395891
 AggregateExplainer      55.099402              0.808684           0.000081,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.384942              0.931145           0.240287
 ShapTabularTreeWrapper  45.363529              0.987692           0.254673
 AnchorWrapper           33.698242              0.512259           0.323789
 AggregateExplainer      44.655179              0.883897           0.238941,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             48.096658              0.904547           0.466104
 ShapTabularTreeWrapper  49.398348              0.986325           0.341495
 AnchorWra

Worst case avoidances: 2
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,51.726262,0.882653,0.295611
AnchorWrapper,36.811067,0.412629,0.314956
LimeWrapper,49.523676,0.91881,0.216482
ShapTabularTreeWrapper,46.700221,0.977518,0.269213




CODAS:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             50.368011              0.905709           0.248283
 ShapTabularTreeWrapper  41.586462              0.968068           0.405018
 AnchorWrapper           45.028344              0.352769           0.178828
 AggregateExplainer      48.815899              0.312821           0.102050,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             49.426916              0.918085           0.379170
 ShapTabularTreeWrapper  45.363529              0.987145           0.121868
 AnchorWrapper           35.796679              0.391166           0.060061
 AggregateExplainer      56.934398              0.443145           0.022709,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             46.270954              0.898803           0.077676
 ShapTabularTreeWrapper  49.398348              0.982154           0.284819
 AnchorWra

Worst case avoidances: 1
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,53.58063,0.469853,0.122121
AnchorWrapper,38.520153,0.415544,0.232194
LimeWrapper,49.265097,0.907692,0.194186
ShapTabularTreeWrapper,46.700221,0.979186,0.301911




EDAS:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             48.079043              0.879316           0.378696
 ShapTabularTreeWrapper  41.586462              0.966769           0.389721
 AnchorWrapper           45.038050              0.406045           0.555740
 AggregateExplainer      46.818976              0.797744           0.207982,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             61.348708              0.927043           0.046171
 ShapTabularTreeWrapper  45.363529              0.986803           0.476828
 AnchorWrapper           32.083576              0.414504           0.202449
 AggregateExplainer      50.889105              0.865368           0.257586,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             49.467113              0.881299           0.534033
 ShapTabularTreeWrapper  49.398348              0.985026           0.002156
 AnchorWra

Worst case avoidances: 4
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,48.572541,0.865559,0.324108
AnchorWrapper,36.100043,0.408744,0.338635
LimeWrapper,50.894932,0.899665,0.360901
ShapTabularTreeWrapper,46.700221,0.979268,0.326354




MABAC:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             41.211886              0.895248           0.013413
 ShapTabularTreeWrapper  41.586462              0.955897           0.245564
 AnchorWrapper           40.134717              0.323010           0.056298
 AggregateExplainer      42.400087              0.346530           0.091412,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             50.709352              0.913846           0.687092
 ShapTabularTreeWrapper  45.363529              0.985778           0.282181
 AnchorWrapper           32.083576              0.425930           0.120219
 AggregateExplainer      47.915877              0.386393           0.079778,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.611853              0.935453           0.608156
 ShapTabularTreeWrapper  49.398348              0.985162           0.694876
 AnchorWra

Worst case avoidances: 2
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,44.835789,0.442694,0.227925
AnchorWrapper,34.759358,0.443367,0.209042
LimeWrapper,47.713637,0.909156,0.364946
ShapTabularTreeWrapper,46.700221,0.977067,0.38015






### Discussion

EDAS performed the best. TOPSIS was in the middle.

# RAE-T vs. RAE-E | 10 samples
### Execution

In [9]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    metrics_sets=[['nrc', 'sensitivity_spearman', 'faithfulness_corr']],
    mcdm_algs=[pymcdm.methods.TOPSIS(), pymcdm.methods.EDAS()],
    n_instances=10,
    mp_jobs=5
)

metadata["description"] = "RAE-T vs RAE-S, 10 samples"

with open('pickles/taiwan/RAE-T_vs_RAE-S_10.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)

Selected indexes: [19949 15644 17447 21012 20704 20396 18523  4634 27555 24466]
Epoch 1/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.2636 - val_loss: 1.1461
Epoch 2/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 629us/step - loss: 1.1973 - val_loss: 1.0716
Epoch 3/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 557us/step - loss: 0.9890 - val_loss: 0.9925
Epoch 4/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 613us/step - loss: 1.0379 - val_loss: 0.9277
Epoch 5/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 611us/step - loss: 0.9040 - val_loss: 0.8796
Epoch 6/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 679us/step - loss: 0.8461 - val_loss: 0.8449
Epoch 7/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 607us/step - loss: 0.9156 - val_loss: 0.8188
Epoch 8/500
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

### Analysis

In [10]:
with open('pickles/taiwan/RAE-T_vs_RAE-S_10.pkl', 'rb') as f:
    exp = dill.load(f)

In [11]:
methods = ["RAE-T", "RAE-E"]
for i, method in enumerate(methods):
    print(f"{method}:\n")
    display(exp.results[i])
    wca = count_worst_case_avoidances(exp.results[i], [False, True, True], 1)
    print(f"Worst case avoidances:\n\t- for all metrics: {wca[0]}\n\t- for 2/3 metrics: {wca[1]}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))
    print("\n")

RAE-T:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.604585              0.948034           0.042001
 ShapTabularTreeWrapper  52.258878              0.979829           0.471586
 AnchorWrapper           34.559361              0.216387           0.089549
 AggregateExplainer      48.430036              0.738325           0.077398,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.664515              0.914393           0.784662
 ShapTabularTreeWrapper  47.061387              0.985026           0.398224
 AnchorWrapper           33.215663              0.465274           0.344368
 AggregateExplainer      45.568359              0.898051           0.605866,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.074963              0.937778           0.561868
 ShapTabularTreeWrapper  49.734971              0.983658           0.845822
 AnchorWra

Worst case avoidances:
	- for all metrics: 8
	- for 2/3 metrics: 9
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,46.568268,0.861648,0.462737
AnchorWrapper,36.791888,0.413498,0.254753
LimeWrapper,47.741458,0.912267,0.395809
ShapTabularTreeWrapper,47.04083,0.985545,0.589792




RAE-E:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.563746              0.902427           0.294325
 ShapTabularTreeWrapper  52.258878              0.985709           0.218616
 AnchorWrapper           43.869815              0.338814           0.507861
 AggregateExplainer      45.448167              0.736205           0.448285,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             46.639614              0.910838           0.033280
 ShapTabularTreeWrapper  47.061387              0.986120           0.420797
 AnchorWrapper           33.698242              0.268898           0.479674
 AggregateExplainer      49.140219              0.869949           0.137662,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             49.664491              0.932239           0.309342
 ShapTabularTreeWrapper  49.734971              0.986667           0.836631
 AnchorWra

Worst case avoidances:
	- for all metrics: 5
	- for 2/3 metrics: 10
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,46.317403,0.870352,0.344014
AnchorWrapper,37.263536,0.424439,0.370402
LimeWrapper,49.266959,0.918728,0.379694
ShapTabularTreeWrapper,47.04083,0.987063,0.581125




