In [9]:
import warnings
warnings.filterwarnings("ignore")

from xai_agg import *

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier

import pandas as pd
import numpy as np

import dill

# Data Loading and Preprocessing

In [10]:
original_data = pd.read_csv('../data/german_credit_data_updated.csv')

# Dataset overview - German Credit Risk (from Kaggle):
# 1. Age (numeric)
# 2. Sex (text: male, female)
# 3. Job (numeric: 0 - unskilled and non-resident, 1 - unskilled and resident, 2 - skilled, 3 - highly skilled)
# 4. Housing (text: own, rent, or free)
# 5. Saving accounts (text - little, moderate, quite rich, rich)
# 6. Checking account (numeric, in DM - Deutsch Mark)
# 7. Credit amount (numeric, in DM)
# 8. Duration (numeric, in month)
# 9. Purpose (text: car, furniture/equipment, radio/TV, domestic appliances, repairs, education, business, vacation/others)

display(original_data.head())
display(original_data.describe())
display(original_data.info())

# Display the unique values of the categorical features:
print('Unique values of the categorical features:')
for col in original_data.select_dtypes(include='object'):
    print(f'\t- {col}: {original_data[col].unique()}')

Unnamed: 0.1,Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Credit Risk
0,0,67,male,2,own,,little,1169,6,radio/TV,1
1,1,22,female,2,own,little,moderate,5951,48,radio/TV,2
2,2,49,male,1,own,little,,2096,12,education,1
3,3,45,male,2,free,little,little,7882,42,furniture/equipment,1
4,4,53,male,2,free,little,little,4870,24,car,2


Unnamed: 0.1,Unnamed: 0,Age,Job,Credit amount,Duration,Credit Risk
count,954.0,954.0,954.0,954.0,954.0,954.0
mean,476.5,35.501048,1.909853,3279.112159,20.780922,1.302935
std,275.540378,11.379668,0.649681,2853.315158,12.046483,0.459768
min,0.0,19.0,0.0,250.0,4.0,1.0
25%,238.25,27.0,2.0,1360.25,12.0,1.0
50%,476.5,33.0,2.0,2302.5,18.0,1.0
75%,714.75,42.0,2.0,3975.25,24.0,2.0
max,953.0,75.0,3.0,18424.0,72.0,2.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Unnamed: 0        954 non-null    int64 
 1   Age               954 non-null    int64 
 2   Sex               954 non-null    object
 3   Job               954 non-null    int64 
 4   Housing           954 non-null    object
 5   Saving accounts   779 non-null    object
 6   Checking account  576 non-null    object
 7   Credit amount     954 non-null    int64 
 8   Duration          954 non-null    int64 
 9   Purpose           954 non-null    object
 10  Credit Risk       954 non-null    int64 
dtypes: int64(6), object(5)
memory usage: 82.1+ KB


None

Unique values of the categorical features:
	- Sex: ['male' 'female']
	- Housing: ['own' 'free' 'rent']
	- Saving accounts: [nan 'little' 'quite rich' 'rich' 'moderate']
	- Checking account: ['little' 'moderate' nan 'rich']
	- Purpose: ['radio/TV' 'education' 'furniture/equipment' 'car' 'business'
 'domestic appliances' 'repairs' 'vacation/others']


In [11]:
preprocessed_data = original_data.copy()

# For savings and checking accounts, we will replace the missing values with 'none':
preprocessed_data['Saving accounts'].fillna('none', inplace=True)
preprocessed_data['Checking account'].fillna('none', inplace=True)

# Dropping index column:
preprocessed_data.drop(columns=['Unnamed: 0'], inplace=True)

# Using pd.dummies to one-hot-encode the categorical features
preprocessed_data["Job"] = preprocessed_data["Job"].map({0: 'unskilled_nonresident', 1: 'unskilled_resident',
                                                         2: 'skilled', 3: 'highlyskilled'})

categorical_features = preprocessed_data.select_dtypes(include='object').columns
numerical_features = preprocessed_data.select_dtypes(include='number').columns.drop('Credit Risk')
print(f'Categorical features: {categorical_features}')
print(f'Numerical features: {numerical_features}')

preprocessed_data = pd.get_dummies(preprocessed_data, columns=categorical_features, dtype='int64')

# Remapping the target variable to 0 and 1:
preprocessed_data['Credit Risk'] = preprocessed_data['Credit Risk'].map({1: 0, 2: 1})

# Make sure all column names are valid python identifiers (important for pd.query() calls):
preprocessed_data.columns = preprocessed_data.columns.str.replace(' ', '_')
preprocessed_data.columns = preprocessed_data.columns.str.replace('/', '_')

display(preprocessed_data.head())
display(preprocessed_data.info())

Categorical features: Index(['Sex', 'Job', 'Housing', 'Saving accounts', 'Checking account',
       'Purpose'],
      dtype='object')
Numerical features: Index(['Age', 'Credit amount', 'Duration'], dtype='object')


Unnamed: 0,Age,Credit_amount,Duration,Credit_Risk,Sex_female,Sex_male,Job_highlyskilled,Job_skilled,Job_unskilled_nonresident,Job_unskilled_resident,...,Checking_account_none,Checking_account_rich,Purpose_business,Purpose_car,Purpose_domestic_appliances,Purpose_education,Purpose_furniture_equipment,Purpose_radio_TV,Purpose_repairs,Purpose_vacation_others
0,67,1169,6,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
1,22,5951,48,1,1,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,49,2096,12,0,0,1,0,0,0,1,...,1,0,0,0,0,1,0,0,0,0
3,45,7882,42,0,0,1,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
4,53,4870,24,1,0,1,0,1,0,0,...,0,0,0,1,0,0,0,0,0,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   Age                          954 non-null    int64
 1   Credit_amount                954 non-null    int64
 2   Duration                     954 non-null    int64
 3   Credit_Risk                  954 non-null    int64
 4   Sex_female                   954 non-null    int64
 5   Sex_male                     954 non-null    int64
 6   Job_highlyskilled            954 non-null    int64
 7   Job_skilled                  954 non-null    int64
 8   Job_unskilled_nonresident    954 non-null    int64
 9   Job_unskilled_resident       954 non-null    int64
 10  Housing_free                 954 non-null    int64
 11  Housing_own                  954 non-null    int64
 12  Housing_rent                 954 non-null    int64
 13  Saving_accounts_little       954 non-null    int64

None

In [12]:
y = preprocessed_data['Credit_Risk']
X = preprocessed_data.drop(columns='Credit_Risk')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'ROC AUC: {roc_auc_score(y_test, y_pred)}')

Accuracy: 0.7696335078534031
ROC AUC: 0.6830357142857143


# Experiments

# Evaluating Old and New Complexity Metrics
Running the current setup: wsum, topsis, ['nrc', 'sensitiviy_spearman', 'faithfulness_corr']

### Execution

In [8]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    metrics_sets=[
        ['complexity', 'sensitivity_spearman', 'faithfulness_corr'],
        ['nrc', 'sensitivity_spearman', 'faithfulness_corr']
    ],
    indexes=[629, 213, 485, 218, 703]
)

metadata["description"] = "compares entropy complexity with nrc metric sets"

with open('pickles/german/COMPARE_entropy-nrc_metricsets_wsum-topsis.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)


Selected indexes: [629, 213, 485, 218, 703]
Epoch 1/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.2904 - val_loss: 1.2596
Epoch 2/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2678 - val_loss: 1.2416
Epoch 3/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2638 - val_loss: 1.2253
Epoch 4/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2183 - val_loss: 1.2097
Epoch 5/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2324 - val_loss: 1.1943
Epoch 6/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1837 - val_loss: 1.1791
Epoch 7/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1742 - val_loss: 1.1635
Epoch 8/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1404 - val_loss: 1.1481
Epoc

### Analysis

In [36]:
with open('pickles/german/COMPARE_entropy-nrc_metricsets_wsum-topsis.pkl', 'rb') as f:
    exp = dill.load(f)

In [37]:
for i, metrics in enumerate([['complexity', 'sensitivity_spearman', 'faithfulness_corr'], 
                             ['nrc', 'sensitivity_spearman', 'faithfulness_corr']]):
    print(f"With metrics: {metrics}\n")
    display(exp.results[i])
    wca = count_worst_case_avoidances(exp.results[i], [False, True, True], 1)
    print(f"Worst case avoidances:\n\t- for all metrics: {wca[0]}\n\t- for 2/3 metrics: {wca[1]}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))
    print("\n")
    print("Avg rank:")
    display(get_average_metric_rank(exp.results[i], [False, True, True]))


With metrics: ['complexity', 'sensitivity_spearman', 'faithfulness_corr']



[                        complexity  sensitivity_spearman  faithfulness_corr
 LimeWrapper               2.607385              0.923645           0.504278
 ShapTabularTreeWrapper    2.639087              0.967607           0.038512
 AnchorWrapper             0.692943              0.473214           0.894271
 AggregateExplainer        2.462704              0.964286           0.576280,
                         complexity  sensitivity_spearman  faithfulness_corr
 LimeWrapper               2.538366              0.837833           0.248742
 ShapTabularTreeWrapper    2.513429              0.975094           0.558859
 AnchorWrapper             0.887787              0.564325           0.502479
 AggregateExplainer        2.620076              0.933645           0.232050,
                         complexity  sensitivity_spearman  faithfulness_corr
 LimeWrapper               2.542261              0.853744           0.169501
 ShapTabularTreeWrapper    2.284778              0.964992           0.3814

Worst case avoidances:
	- for all metrics: 4
	- for 2/3 metrics: 4
AVG:


Unnamed: 0,complexity,sensitivity_spearman,faithfulness_corr
AggregateExplainer,2.463323,0.930829,0.43176
AnchorWrapper,1.275002,0.62948,0.433368
LimeWrapper,2.575076,0.866256,0.423207
ShapTabularTreeWrapper,2.451083,0.968438,0.435356




Avg rank:


Unnamed: 0,complexity,sensitivity_spearman,faithfulness_corr
AggregateExplainer,2.8,2.0,2.8
AnchorWrapper,1.0,4.0,2.4
LimeWrapper,3.6,3.0,2.8
ShapTabularTreeWrapper,2.6,1.0,2.0


With metrics: ['nrc', 'sensitivity_spearman', 'faithfulness_corr']



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             48.177761              0.870246           0.096595
 ShapTabularTreeWrapper  48.583731              0.977644           0.655846
 AnchorWrapper           37.743216              0.617716           0.952868
 AggregateExplainer      46.923408              0.957438           0.441459,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             44.906524              0.847389           0.063032
 ShapTabularTreeWrapper  47.987182              0.982287           0.201590
 AnchorWrapper           37.743216              0.500799           0.089648
 AggregateExplainer      45.203013              0.953744           0.293751,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.016105              0.823153           0.144989
 ShapTabularTreeWrapper  43.120370              0.967062           0.170781
 AnchorWra

Worst case avoidances:
	- for all metrics: 3
	- for 2/3 metrics: 5
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,45.502635,0.930181,0.399033
AnchorWrapper,38.521171,0.664532,0.563576
LimeWrapper,46.13345,0.845744,0.24089
ShapTabularTreeWrapper,46.630478,0.975466,0.356992




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,2.6,2.0,2.6
AnchorWrapper,1.4,3.8,1.6
LimeWrapper,2.8,3.2,3.2
ShapTabularTreeWrapper,3.2,1.0,2.6


### Discussion
This experiment sought to evaluate the performance of the NRC metric when it substitutes the entropy based "complexity" metric.

With both metric sets, the worst metric value was avoided in all samples for sensitivity_spearman and faithfulness_corr. Both NRC and complexity had the same percentage of worst-metric-value-avoidance, with both metrics having 80% (1/5) of the samples avoiding the worst value.
The worst value for the entropy metric is avoided in the average of its samples, while the worst value for the NRC metric is not avoided in the average of its samples.

# Evaluating Score and Rank Based Faithfulness Metrics
### Execution

In [10]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    metrics_sets=[
        ['nrc', 'sensitivity_spearman', 'faithfulness_corr'],
        ['nrc', 'sensitivity_spearman', 'rb_faithfulness_corr']
    ],
    n_instances=5
)

metadata["description"] = "compares score and rank-based faithfulness metrics"

with open('pickles/german/COMPARE_score_rb_faithfulness_wsum-topsis.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)


Selected indexes: [141  49 734 266 361]
Epoch 1/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.2674 - val_loss: 1.2382
Epoch 2/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2338 - val_loss: 1.2229
Epoch 3/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2234 - val_loss: 1.2081
Epoch 4/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2195 - val_loss: 1.1932
Epoch 5/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2038 - val_loss: 1.1780
Epoch 6/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1487 - val_loss: 1.1619
Epoch 7/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1507 - val_loss: 1.1457
Epoch 8/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1549 - val_loss: 1.1293
Epoch 9/

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.


	 Running instance 49
	 Running instance 734
	 Running instance 266
	 Running instance 361
Running evaluation for settings 2/2
Explainer components: [<class 'xai_agg.explainers.LimeWrapper'>, <class 'xai_agg.explainers.ShapTabularTreeWrapper'>, <class 'xai_agg.explainers.AnchorWrapper'>], Metrics: ['nrc', 'sensitivity_spearman', 'rb_faithfulness_corr'], MCDM algorithm: <pymcdm.methods.topsis.TOPSIS object at 0x76d4480ddff0>, Aggregation algorithm: wsum
	 Running instance 141


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.


	 Running instance 49
	 Running instance 734
	 Running instance 266
	 Running instance 361


### Analysis

In [34]:
with open('pickles/german/COMPARE_score_rb_faithfulness_wsum-topsis.pkl', 'rb') as f:
    exp = dill.load(f)

In [35]:
for i, metrics in enumerate([['nrc', 'sensitivity_spearman', 'faithfulness_corr'],
                             ['nrc', 'sensitivity_spearman', 'rb_faithfulness_corr']]):
    print(f"With metrics: {metrics}\n")
    display(exp.results[i])
    wca = count_worst_case_avoidances(exp.results[i], [False, True, True], 1)
    print(f"Worst case avoidances:\n\t- for all metrics: {wca[0]}\n\t- for 2/3 metrics: {wca[1]}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))
    print("\n")
    print("Avg rank:")
    display(get_average_metric_rank(exp.results[i], [False, True, True]))

With metrics: ['nrc', 'sensitivity_spearman', 'faithfulness_corr']



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.354698              0.859113           0.528564
 ShapTabularTreeWrapper  42.794726              0.983052           0.727099
 AnchorWrapper           35.729909              0.788387           0.281283
 AggregateExplainer      48.635210              0.925961           0.696692,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.671694              0.877931           0.327720
 ShapTabularTreeWrapper  47.574733              0.978950           0.483278
 AnchorWrapper           42.893203              0.640118           0.399471
 AggregateExplainer      42.885877              0.930690           0.780619,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             44.166715              0.827685           0.327057
 ShapTabularTreeWrapper  44.716820              0.963517           0.782988
 AnchorWra

Worst case avoidances:
	- for all metrics: 2
	- for 2/3 metrics: 3
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,46.67559,0.923169,0.427807
AnchorWrapper,37.520479,0.651282,0.588173
LimeWrapper,46.796689,0.860305,0.314623
ShapTabularTreeWrapper,45.774934,0.973114,0.623744




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,3.0,2.0,2.8
AnchorWrapper,1.2,3.6,2.2
LimeWrapper,3.0,2.6,3.4
ShapTabularTreeWrapper,2.8,1.0,1.6


With metrics: ['nrc', 'sensitivity_spearman', 'rb_faithfulness_corr']



[                              nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             46.945603              0.845419              0.637806
 ShapTabularTreeWrapper  42.794726              0.974922              0.429527
 AnchorWrapper           42.633970              0.610176              0.392427
 AggregateExplainer      44.379384              0.898030              0.339480,
                               nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             48.424262              0.860788              0.246130
 ShapTabularTreeWrapper  47.574733              0.968899              0.267413
 AnchorWrapper           37.743216              0.596185              0.574737
 AggregateExplainer      45.481963              0.896207              0.018561,
                               nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             44.259204              0.880099              0.069922
 ShapTabularTreeWrapper  44.716820              0.

Worst case avoidances:
	- for all metrics: 1
	- for 2/3 metrics: 5
AVG:


Unnamed: 0,nrc,sensitivity_spearman,rb_faithfulness_corr
AggregateExplainer,45.055485,0.913517,0.158596
AnchorWrapper,38.901292,0.618031,0.563062
LimeWrapper,49.250305,0.862138,0.29374
ShapTabularTreeWrapper,45.774934,0.968537,0.262045




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,rb_faithfulness_corr
AggregateExplainer,2.4,2.0,3.6
AnchorWrapper,1.0,4.0,1.6
LimeWrapper,3.8,3.0,2.6
ShapTabularTreeWrapper,2.8,1.0,2.2


# Evaluating Rank aggregation algorithms

### Execution

In [7]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    aggregation_algs=["wsum", "w_bordafuse", "w_condorcet"],
    indexes=[629, 213, 485, 218, 703],
    n_instances=5
)

metadata["description"] = "compares wsum, w_bordafuse, w_condorcet aggregation algorithms"

with open('pickles/german/COMPARE_wsum-w_bordafuse-w_condorcet.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)

Selected indexes: [629, 213, 485, 218, 703]
Epoch 1/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1.2603 - val_loss: 1.2438
Epoch 2/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2752 - val_loss: 1.2257
Epoch 3/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2272 - val_loss: 1.2082
Epoch 4/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2311 - val_loss: 1.1911
Epoch 5/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1949 - val_loss: 1.1744
Epoch 6/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1433 - val_loss: 1.1574
Epoch 7/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1481 - val_loss: 1.1400
Epoch 8/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1358 - val_loss: 1.1229
Epo

### Analysis

In [32]:
with open('pickles/german/COMPARE_wsum-w_bordafuse-w_condorcet.pkl', 'rb') as f:
    exp = dill.load(f)

In [33]:
for i, method in enumerate(["wsum", "w_bordafuse", "w_condorcet"]):
    print(f"{method}:\n")
    display(exp.results[i])
    wca = count_worst_case_avoidances(exp.results[i], [False, True, True], 1)
    print(f"Worst case avoidances:\n\t- for all metrics: {wca[0]}\n\t- for 2/3 metrics: {wca[1]}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))
    print("\n")
    print("Avg rank:")
    display(get_average_metric_rank(exp.results[i], [False, True, True]))


wsum:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.834639              0.856158           0.299596
 ShapTabularTreeWrapper  48.583731              0.972407           0.109894
 AnchorWrapper           37.743216              0.731589           0.379407
 AggregateExplainer      49.183939              0.950394           0.593073,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             44.562959              0.813547           0.614179
 ShapTabularTreeWrapper  47.987182              0.986082           0.283443
 AnchorWrapper           37.743216              0.528527           0.267040
 AggregateExplainer      45.872785              0.929951           0.277345,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             43.777669              0.851429           0.464864
 ShapTabularTreeWrapper  43.120370              0.964353           0.434860
 AnchorWra

Worst case avoidances:
	- for all metrics: 3
	- for 2/3 metrics: 5
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,47.219324,0.919724,0.390187
AnchorWrapper,38.812264,0.676657,0.29958
LimeWrapper,44.946276,0.83798,0.512961
ShapTabularTreeWrapper,46.630478,0.971879,0.349105




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,2.8,2.0,2.4
AnchorWrapper,1.6,4.0,3.6
LimeWrapper,2.4,3.0,1.4
ShapTabularTreeWrapper,3.2,1.0,2.6


w_bordafuse:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             46.302442              0.890394           0.478538
 ShapTabularTreeWrapper  48.583731              0.972334           0.365453
 AnchorWrapper           42.893203              0.693191           0.437305
 AggregateExplainer      62.759800              0.932956           0.383190,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.489937              0.864729           0.026440
 ShapTabularTreeWrapper  47.987182              0.983580           0.674341
 AnchorWrapper           42.893203              0.554394           0.078678
 AggregateExplainer      60.879271              0.883645           0.232789,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             43.539125              0.860148           0.002892
 ShapTabularTreeWrapper  43.120370              0.966695           0.447880
 AnchorWra

Worst case avoidances:
	- for all metrics: 0
	- for 2/3 metrics: 5
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,64.67612,0.915498,0.293481
AnchorWrapper,38.651371,0.714872,0.38759
LimeWrapper,44.617306,0.865626,0.171087
ShapTabularTreeWrapper,46.630478,0.972968,0.419234




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,4.0,2.0,2.6
AnchorWrapper,1.0,4.0,1.8
LimeWrapper,2.2,3.0,3.4
ShapTabularTreeWrapper,2.8,1.0,2.2


w_condorcet:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.166888              0.878719           0.167372
 ShapTabularTreeWrapper  48.583731              0.965991           0.127322
 AnchorWrapper           42.893203              0.492117           0.487369
 AggregateExplainer      69.841686              0.869064           0.244482,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             43.445727              0.870443           0.648649
 ShapTabularTreeWrapper  47.987182              0.982633           0.084455
 AnchorWrapper           42.893203              0.531224           0.046006
 AggregateExplainer      69.841686              0.859261           0.128161,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             41.955835              0.852217           0.337086
 ShapTabularTreeWrapper  43.120370              0.957544           0.337494
 AnchorWra

Worst case avoidances:
	- for all metrics: 0
	- for 2/3 metrics: 4
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,69.841686,0.878355,0.257825
AnchorWrapper,39.517977,0.650304,0.34208
LimeWrapper,44.567679,0.859704,0.341113
ShapTabularTreeWrapper,46.630478,0.969409,0.216884




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,4.0,2.4,2.4
AnchorWrapper,1.0,4.0,2.0
LimeWrapper,2.2,2.6,2.6
ShapTabularTreeWrapper,2.8,1.0,3.0


### Discussion
This experiment sought to evaluate the performance of the different rank aggregation algorithms.

wsum is the best among the algorithms tested. all the others did not avoid the worst value in all metrics not even once.

# Evaluating MCDM Algs

### Execution

In [11]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    mcdm_algs=[pymcdm.methods.TOPSIS(), pymcdm.methods.COPRAS(),
               pymcdm.methods.PROMETHEE_II(preference_function="usual"),
               pymcdm.methods.ARAS(), pymcdm.methods.COCOSO(),
               pymcdm.methods.CODAS(), pymcdm.methods.EDAS(), pymcdm.methods.MABAC()],
    indexes=[629, 213, 485, 218, 703],
    n_instances=5
)

metadata["description"] = "compares TOPSIS, COPRAS, PROMETHEE_II, ARAS, COCOSO, CODAS, EDAS, MABAC MCDM algorithms"

with open('pickles/german/COMPARE_mcdm_algs.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)

Selected indexes: [629, 213, 485, 218, 703]
Epoch 1/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.2696 - val_loss: 1.2614
Epoch 2/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2702 - val_loss: 1.2440
Epoch 3/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2771 - val_loss: 1.2278
Epoch 4/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2216 - val_loss: 1.2127
Epoch 5/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2056 - val_loss: 1.1975
Epoch 6/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2211 - val_loss: 1.1823
Epoch 7/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1848 - val_loss: 1.1667
Epoch 8/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1662 - val_loss: 1.1512
Epoc

### Analysis

In [29]:
with open('pickles/german/COMPARE_mcdm_algs.pkl', 'rb') as f:
    exp = dill.load(f)

In [30]:
methods = ["TOPSIS", "COPRAS", "PROMETHEE_II", "ARAS", "COCOSO", "CODAS", "EDAS", "MABAC"]
for i, method in enumerate(methods):
    print(f"{method}:\n")
    display(exp.results[i])
    wca = count_worst_case_avoidances(exp.results[i], [False, True, True], 1)
    print(f"Worst case avoidances:\n\t- for all metrics: {wca[0]}\n\t- for 2/3 metrics: {wca[1]}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))
    print("\n")
    print("Avg rank:")
    display(get_average_metric_rank(exp.results[i], [False, True, True]))


TOPSIS:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             40.706734              0.873892           0.542231
 ShapTabularTreeWrapper  48.583731              0.968120           0.633913
 AnchorWrapper           37.743216              0.588246           0.613756
 AggregateExplainer      43.599339              0.964581           0.391358,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             46.498196              0.834532           0.316616
 ShapTabularTreeWrapper  47.987182              0.984775           0.475040
 AnchorWrapper           35.618034              0.487391           0.027308
 AggregateExplainer      43.738128              0.929458           0.189650,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             43.419713              0.849360           0.084178
 ShapTabularTreeWrapper  43.120370              0.972604           0.448373
 AnchorWra

Worst case avoidances:
	- for all metrics: 4
	- for 2/3 metrics: 5
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,43.700813,0.933721,0.325955
AnchorWrapper,37.993482,0.67107,0.440208
LimeWrapper,44.387491,0.837015,0.347298
ShapTabularTreeWrapper,46.630478,0.975708,0.444287




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,2.2,2.0,3.2
AnchorWrapper,1.6,4.0,2.4
LimeWrapper,2.8,3.0,2.4
ShapTabularTreeWrapper,3.4,1.0,2.0


COPRAS:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.349954              0.868424           0.142493
 ShapTabularTreeWrapper  48.583731              0.964770           0.668430
 AnchorWrapper           37.743216              0.560894           0.587868
 AggregateExplainer      44.451048              0.963103           0.294378,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             44.286657              0.874581           0.189881
 ShapTabularTreeWrapper  47.987182              0.985317           0.218931
 AnchorWrapper           35.618034              0.547521           0.122014
 AggregateExplainer      59.244717              0.939360           0.391785,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.706862              0.875567           0.519856
 ShapTabularTreeWrapper  43.120370              0.962996           0.377721
 AnchorWra

Worst case avoidances:
	- for all metrics: 2
	- for 2/3 metrics: 5
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,47.879488,0.920581,0.356725
AnchorWrapper,36.583852,0.666095,0.392215
LimeWrapper,44.076179,0.862867,0.352646
ShapTabularTreeWrapper,46.630478,0.971195,0.40911




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,3.0,2.0,2.4
AnchorWrapper,1.0,4.0,2.4
LimeWrapper,2.6,3.0,2.8
ShapTabularTreeWrapper,3.4,1.0,2.4


PROMETHEE_II:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             47.181229              0.844877           0.030663
 ShapTabularTreeWrapper  48.583731              0.963094           0.534762
 AnchorWrapper           42.893203              0.688851           0.564153
 AggregateExplainer      58.764071              0.538402           0.467273,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.937560              0.848276           0.337298
 ShapTabularTreeWrapper  47.987182              0.985169           0.276967
 AnchorWrapper           37.743216              0.528617           0.326098
 AggregateExplainer      54.768723              0.547841           0.448123,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.100060              0.860739           0.046934
 ShapTabularTreeWrapper  43.120370              0.962996           0.555777
 AnchorWra

Worst case avoidances:
	- for all metrics: 0
	- for 2/3 metrics: 3
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,56.255191,0.543122,0.347303
AnchorWrapper,39.570329,0.702991,0.463546
LimeWrapper,46.145285,0.851192,0.253927
ShapTabularTreeWrapper,46.630478,0.971766,0.506486




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,4.0,3.5,2.6
AnchorWrapper,1.4,3.0,2.4
LimeWrapper,2.0,2.2,2.6
ShapTabularTreeWrapper,2.6,1.0,2.4


ARAS:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.223001              0.879163           0.008710
 ShapTabularTreeWrapper  48.583731              0.968564           0.074819
 AnchorWrapper           42.893203              0.509784           0.218931
 AggregateExplainer      46.872105                   NaN           0.321547,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.466355              0.839951           0.227091
 ShapTabularTreeWrapper  47.987182              0.989899           0.375013
 AnchorWrapper           37.743216              0.690145           0.103038
 AggregateExplainer      57.703650              0.951429           0.437721,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.400819              0.832315           0.225714
 ShapTabularTreeWrapper  43.120370              0.966987           0.148438
 AnchorWra

Worst case avoidances:
	- for all metrics: 3
	- for 2/3 metrics: 5
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,47.899106,0.922451,0.380356
AnchorWrapper,38.455621,0.67858,0.384576
LimeWrapper,45.738267,0.849921,0.333765
ShapTabularTreeWrapper,46.630478,0.975107,0.272771




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,3.2,2.0,1.8
AnchorWrapper,1.0,3.8,2.2
LimeWrapper,2.8,2.8,3.0
ShapTabularTreeWrapper,3.0,1.0,3.0


COCOSO:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.166015              0.882266           0.674065
 ShapTabularTreeWrapper  48.583731              0.971766           0.805313
 AnchorWrapper           37.743216              0.451751           0.557636
 AggregateExplainer      46.358792              0.944631           0.348020,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             46.092841              0.870837           0.713508
 ShapTabularTreeWrapper  47.987182              0.983641           0.486455
 AnchorWrapper           37.743216              0.517334           0.222136
 AggregateExplainer      46.015875              0.938768           0.272427,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             44.847482              0.852217           0.368960
 ShapTabularTreeWrapper  43.120370              0.967085           0.204128
 AnchorWra

Worst case avoidances:
	- for all metrics: 2
	- for 2/3 metrics: 5
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,46.149413,0.926034,0.361123
AnchorWrapper,37.511655,0.630963,0.490063
LimeWrapper,45.235229,0.859606,0.441265
ShapTabularTreeWrapper,46.630478,0.973077,0.522835




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,3.0,2.0,2.8
AnchorWrapper,1.0,3.8,2.4
LimeWrapper,2.8,2.8,2.8
ShapTabularTreeWrapper,3.2,1.0,2.0


CODAS:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             49.006978              0.829015           0.449095
 ShapTabularTreeWrapper  48.583731              0.961222           0.740708
 AnchorWrapper           42.893203              0.685479           0.892637
 AggregateExplainer      60.847304              0.523162           0.333326,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.410974              0.778473           0.025545
 ShapTabularTreeWrapper  47.987182              0.980833           0.720092
 AnchorWrapper           35.618034              0.529188           0.307509
 AggregateExplainer      43.957436              0.368867           0.432910,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             42.075812              0.857635           0.389370
 ShapTabularTreeWrapper  43.120370              0.964523           0.625543
 AnchorWra

Worst case avoidances:
	- for all metrics: 0
	- for 2/3 metrics: 2
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,52.417471,0.418583,0.255632
AnchorWrapper,37.613849,0.700365,0.391035
LimeWrapper,44.775418,0.828719,0.370423
ShapTabularTreeWrapper,46.630478,0.969076,0.629526




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,3.2,4.0,3.2
AnchorWrapper,1.0,2.8,2.8
LimeWrapper,2.6,2.2,2.6
ShapTabularTreeWrapper,3.2,1.0,1.4


EDAS:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.275626              0.845911           0.324015
 ShapTabularTreeWrapper  48.583731              0.966642           0.169981
 AnchorWrapper           42.893203              0.653178           0.132105
 AggregateExplainer      50.589081              0.921429           0.543379,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             48.378436              0.801576           0.799896
 ShapTabularTreeWrapper  47.987182              0.986647           0.189788
 AnchorWrapper           37.743216              0.517906           0.278018
 AggregateExplainer      49.714171              0.895517           0.126758,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             45.174290              0.859803           0.498583
 ShapTabularTreeWrapper  43.120370              0.972752           0.083270
 AnchorWra

Worst case avoidances:
	- for all metrics: 1
	- for 2/3 metrics: 4
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,44.989723,0.879265,0.385484
AnchorWrapper,38.35582,0.661122,0.414054
LimeWrapper,45.940605,0.838552,0.468458
ShapTabularTreeWrapper,46.630478,0.97353,0.356728




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,2.8,2.6,2.8
AnchorWrapper,1.2,3.8,2.2
LimeWrapper,2.8,2.6,2.6
ShapTabularTreeWrapper,3.2,1.0,2.4


MABAC:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             42.770543              0.801281           0.295258
 ShapTabularTreeWrapper  48.583731              0.972506           0.039473
 AnchorWrapper           37.743216              0.606942           0.424705
 AggregateExplainer      46.824018              0.639951           0.555988,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             43.500975              0.834877           0.041312
 ShapTabularTreeWrapper  47.987182              0.983740           0.590600
 AnchorWrapper           35.618034              0.542512           0.070940
 AggregateExplainer      45.442618              0.587833           0.229744,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             41.369189              0.869212           0.267952
 ShapTabularTreeWrapper  43.120370              0.967283           0.456386
 AnchorWra

Worst case avoidances:
	- for all metrics: 2
	- for 2/3 metrics: 5
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,43.697125,0.500719,0.402868
AnchorWrapper,36.515712,0.659859,0.254054
LimeWrapper,42.877432,0.834236,0.370733
ShapTabularTreeWrapper,46.630478,0.975452,0.34016




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,2.8,3.6,1.8
AnchorWrapper,1.0,3.4,3.0
LimeWrapper,2.2,2.0,2.8
ShapTabularTreeWrapper,4.0,1.0,2.4


### Discussion

Most of them didn't significantly outperformed the TOPSIS. EDAS showed promising results!

# RAE-T vs. RAE-E | 10 samples
### Execution

In [10]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    metrics_sets=[['nrc', 'sensitivity_spearman', 'faithfulness_corr']],
    mcdm_algs=[pymcdm.methods.TOPSIS(), pymcdm.methods.EDAS()],
    n_instances=10
)

metadata["description"] = "RAE-T vs RAE-S, 10 samples"

with open('pickles/german/RAE-T_vs_RAE-S_10.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)

Selected indexes: [359  67 728 543 945 634 215  63 684 280]
Epoch 1/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.2714 - val_loss: 1.2419
Epoch 2/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2725 - val_loss: 1.2249
Epoch 3/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2435 - val_loss: 1.2088
Epoch 4/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2135 - val_loss: 1.1928
Epoch 5/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2005 - val_loss: 1.1773
Epoch 6/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.1895 - val_loss: 1.1619
Epoch 7/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1487 - val_loss: 1.1461
Epoch 8/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.1626 - val_l

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.


	 Running instance 543
	 Running instance 945
	 Running instance 634
	 Running instance 215
	 Running instance 63


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	 Running instance 684


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	 Running instance 280
Running evaluation for settings 2/2
Explainer components: [<class 'xai_agg.explainers.LimeWrapper'>, <class 'xai_agg.explainers.ShapTabularTreeWrapper'>, <class 'xai_agg.explainers.AnchorWrapper'>], Metrics: ['nrc', 'sensitivity_spearman', 'faithfulness_corr'], MCDM algorithm: <pymcdm.methods.edas.EDAS object at 0x7d00990c8b50>, Aggregation algorithm: wsum
	 Running instance 359
	 Running instance 67
	 Running instance 728


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.


	 Running instance 543
	 Running instance 945
	 Running instance 634
	 Running instance 215
	 Running instance 63


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	 Running instance 684


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	 Running instance 280


### Analysis

In [27]:
with open('pickles/german/RAE-T_vs_RAE-S_10.pkl', 'rb') as f:
    exp = dill.load(f)

In [28]:
methods = ["RAE-T", "RAE-E"]
for i, method in enumerate(methods):
    print(f"{method}:\n")
    display(exp.results[i])
    wca = count_worst_case_avoidances(exp.results[i], [False, True, True], 1)
    print(f"Worst case avoidances:\n\t- for all metrics: {wca[0]}\n\t- for 2/3 metrics: {wca[1]}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))
    print("\n")
    print("Avg rank:")
    display(get_average_metric_rank(exp.results[i], [False, True, True]))

RAE-T:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             44.618392              0.881281           0.537085
 ShapTabularTreeWrapper  44.608024              0.988667           0.947590
 AnchorWrapper           35.008968              0.493056           0.512404
 AggregateExplainer      46.605323              0.899261           0.828046,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             43.957413              0.849113           0.153432
 ShapTabularTreeWrapper  43.327884              0.985514           0.130535
 AnchorWrapper           36.104963              0.570964           0.265982
 AggregateExplainer      49.406540              0.915074           0.513204,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             56.875469              0.885567           0.439962
 ShapTabularTreeWrapper  42.941177              0.988815           0.678432
 AnchorWra

Worst case avoidances:
	- for all metrics: 4
	- for 2/3 metrics: 9
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,49.023927,0.91303,0.467582
AnchorWrapper,39.594254,0.602249,0.475023
LimeWrapper,46.376314,0.862483,0.44613
ShapTabularTreeWrapper,47.312434,0.985618,0.572675




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,3.0,2.0,2.8
AnchorWrapper,1.6,4.0,2.5
LimeWrapper,2.7,3.0,2.6
ShapTabularTreeWrapper,2.7,1.0,2.1


RAE-E:



[                              nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             43.891531              0.881034           0.479765
 ShapTabularTreeWrapper  44.608024              0.982114           0.450234
 AnchorWrapper           37.858908              0.750563           0.272548
 AggregateExplainer      44.754519              0.919433           0.416280,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             44.875833              0.857291           0.318255
 ShapTabularTreeWrapper  43.327884              0.977406           0.436018
 AnchorWrapper           35.277334              0.504758           0.094228
 AggregateExplainer      47.975199              0.890936           0.208825,
                               nrc  sensitivity_spearman  faithfulness_corr
 LimeWrapper             46.761410              0.886650           0.391556
 ShapTabularTreeWrapper  42.941177              0.982471           0.901698
 AnchorWra

Worst case avoidances:
	- for all metrics: 5
	- for 2/3 metrics: 8
AVG:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,46.490341,0.912189,0.54876
AnchorWrapper,39.244752,0.629245,0.518735
LimeWrapper,45.191698,0.861251,0.300691
ShapTabularTreeWrapper,47.312434,0.982436,0.466061




Avg rank:


Unnamed: 0,nrc,sensitivity_spearman,faithfulness_corr
AggregateExplainer,3.3,2.1,2.4
AnchorWrapper,1.3,4.0,2.4
LimeWrapper,2.6,2.9,3.0
ShapTabularTreeWrapper,2.8,1.0,2.2
