In [1]:
import warnings
warnings.filterwarnings("ignore")

from xai_agg.agg_exp import *
from xai_agg.utils import *

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier

import pandas as pd
import numpy as np

import dill

2024-12-17 11:42:56.595244: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-17 11:42:56.619489: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Data Loading and Preprocessing

In [2]:
original_data = pd.read_csv('../data/german_credit_data_updated.csv')

# Dataset overview - German Credit Risk (from Kaggle):
# 1. Age (numeric)
# 2. Sex (text: male, female)
# 3. Job (numeric: 0 - unskilled and non-resident, 1 - unskilled and resident, 2 - skilled, 3 - highly skilled)
# 4. Housing (text: own, rent, or free)
# 5. Saving accounts (text - little, moderate, quite rich, rich)
# 6. Checking account (numeric, in DM - Deutsch Mark)
# 7. Credit amount (numeric, in DM)
# 8. Duration (numeric, in month)
# 9. Purpose (text: car, furniture/equipment, radio/TV, domestic appliances, repairs, education, business, vacation/others)

display(original_data.head())
display(original_data.describe())
display(original_data.info())

# Display the unique values of the categorical features:
print('Unique values of the categorical features:')
for col in original_data.select_dtypes(include='object'):
    print(f'\t- {col}: {original_data[col].unique()}')

Unnamed: 0.1,Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Credit Risk
0,0,67,male,2,own,,little,1169,6,radio/TV,1
1,1,22,female,2,own,little,moderate,5951,48,radio/TV,2
2,2,49,male,1,own,little,,2096,12,education,1
3,3,45,male,2,free,little,little,7882,42,furniture/equipment,1
4,4,53,male,2,free,little,little,4870,24,car,2


Unnamed: 0.1,Unnamed: 0,Age,Job,Credit amount,Duration,Credit Risk
count,954.0,954.0,954.0,954.0,954.0,954.0
mean,476.5,35.501048,1.909853,3279.112159,20.780922,1.302935
std,275.540378,11.379668,0.649681,2853.315158,12.046483,0.459768
min,0.0,19.0,0.0,250.0,4.0,1.0
25%,238.25,27.0,2.0,1360.25,12.0,1.0
50%,476.5,33.0,2.0,2302.5,18.0,1.0
75%,714.75,42.0,2.0,3975.25,24.0,2.0
max,953.0,75.0,3.0,18424.0,72.0,2.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Unnamed: 0        954 non-null    int64 
 1   Age               954 non-null    int64 
 2   Sex               954 non-null    object
 3   Job               954 non-null    int64 
 4   Housing           954 non-null    object
 5   Saving accounts   779 non-null    object
 6   Checking account  576 non-null    object
 7   Credit amount     954 non-null    int64 
 8   Duration          954 non-null    int64 
 9   Purpose           954 non-null    object
 10  Credit Risk       954 non-null    int64 
dtypes: int64(6), object(5)
memory usage: 82.1+ KB


None

Unique values of the categorical features:
	- Sex: ['male' 'female']
	- Housing: ['own' 'free' 'rent']
	- Saving accounts: [nan 'little' 'quite rich' 'rich' 'moderate']
	- Checking account: ['little' 'moderate' nan 'rich']
	- Purpose: ['radio/TV' 'education' 'furniture/equipment' 'car' 'business'
 'domestic appliances' 'repairs' 'vacation/others']


In [3]:
preprocessed_data = original_data.copy()

# For savings and checking accounts, we will replace the missing values with 'none':
preprocessed_data['Saving accounts'].fillna('none', inplace=True)
preprocessed_data['Checking account'].fillna('none', inplace=True)

# Dropping index column:
preprocessed_data.drop(columns=['Unnamed: 0'], inplace=True)

# Using pd.dummies to one-hot-encode the categorical features
preprocessed_data["Job"] = preprocessed_data["Job"].map({0: 'unskilled_nonresident', 1: 'unskilled_resident',
                                                         2: 'skilled', 3: 'highlyskilled'})

categorical_features = preprocessed_data.select_dtypes(include='object').columns
numerical_features = preprocessed_data.select_dtypes(include='number').columns.drop('Credit Risk')
print(f'Categorical features: {categorical_features}')
print(f'Numerical features: {numerical_features}')

preprocessed_data = pd.get_dummies(preprocessed_data, columns=categorical_features, dtype='int64')

# Remapping the target variable to 0 and 1:
preprocessed_data['Credit Risk'] = preprocessed_data['Credit Risk'].map({1: 0, 2: 1})

# Make sure all column names are valid python identifiers (important for pd.query() calls):
preprocessed_data.columns = preprocessed_data.columns.str.replace(' ', '_')
preprocessed_data.columns = preprocessed_data.columns.str.replace('/', '_')

# Normalizing the data
scaler = StandardScaler()
scaled_preprocessed_data = scaler.fit_transform(preprocessed_data)

display(preprocessed_data.head())
display(preprocessed_data.info())

display(scaled_preprocessed_data)

Categorical features: Index(['Sex', 'Job', 'Housing', 'Saving accounts', 'Checking account',
       'Purpose'],
      dtype='object')
Numerical features: Index(['Age', 'Credit amount', 'Duration'], dtype='object')


Unnamed: 0,Age,Credit_amount,Duration,Credit_Risk,Sex_female,Sex_male,Job_highlyskilled,Job_skilled,Job_unskilled_nonresident,Job_unskilled_resident,...,Checking_account_none,Checking_account_rich,Purpose_business,Purpose_car,Purpose_domestic_appliances,Purpose_education,Purpose_furniture_equipment,Purpose_radio_TV,Purpose_repairs,Purpose_vacation_others
0,67,1169,6,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
1,22,5951,48,1,1,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,49,2096,12,0,0,1,0,0,0,1,...,1,0,0,0,0,1,0,0,0,0
3,45,7882,42,0,0,1,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
4,53,4870,24,1,0,1,0,1,0,0,...,0,0,0,1,0,0,0,0,0,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   Age                          954 non-null    int64
 1   Credit_amount                954 non-null    int64
 2   Duration                     954 non-null    int64
 3   Credit_Risk                  954 non-null    int64
 4   Sex_female                   954 non-null    int64
 5   Sex_male                     954 non-null    int64
 6   Job_highlyskilled            954 non-null    int64
 7   Job_skilled                  954 non-null    int64
 8   Job_unskilled_nonresident    954 non-null    int64
 9   Job_unskilled_resident       954 non-null    int64
 10  Housing_free                 954 non-null    int64
 11  Housing_own                  954 non-null    int64
 12  Housing_rent                 954 non-null    int64
 13  Saving_accounts_little       954 non-null    int64

None

array([[ 2.7694545 , -0.7399179 , -1.22763429, ...,  1.62518349,
        -0.14633276, -0.11286653],
       [-1.18704073,  0.93690642,  2.26068929, ...,  1.62518349,
        -0.14633276, -0.11286653],
       [ 1.18685641, -0.41486224, -0.72930235, ..., -0.61531514,
        -0.14633276, -0.11286653],
       ...,
       [-1.0111965 , -0.39768023,  1.26402541, ..., -0.61531514,
        -0.14633276, -0.11286653],
       [-0.65950803,  0.29240557,  0.26736153, ..., -0.61531514,
        -0.14633276, -0.11286653],
       [-0.83535227,  2.69823821,  1.26402541, ..., -0.61531514,
        -0.14633276, -0.11286653]])

In [4]:
y = preprocessed_data['Credit_Risk']
X = preprocessed_data.drop(columns='Credit_Risk')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'ROC AUC: {roc_auc_score(y_test, y_pred)}')

Accuracy: 0.7696335078534031
ROC AUC: 0.6830357142857143


# Tuning

In [22]:
def evaluate_agg(rank_agg_algs: list[str], mcdm_methods: list[MCDA_method],
                 metric_sets=[['complexity', 'sensitivity_spearman', 'faithfulness_corr']],
                 n_instances: int = 10,
                 indexes: list[int] = None,
                 as_np: bool = True):
    # select n_instances from the test set
    results = []
    if indexes is None:
        indexes = np.random.choice(X_test.index, n_instances, replace=False)

    evaluator = ExplanationModelEvaluator(clf, X_train, categorical_features, jobs=10)
    evaluator.init()

    i = 0
    for rank_agg_alg in rank_agg_algs:
        for mcdm_method in mcdm_methods:
            for metrics in metric_sets:
                print(f"{i}: Rank Aggregation Algorithm: {rank_agg_alg}, MCDM Method: {mcdm_method.__class__.__name__}, Metrics: {metrics}")
                i += 1

                agg_explainer = AggregatedExplainer([LimeWrapper, ShapTabularTreeWrapper, AnchorWrapper], clf, X_train, categorical_features,
                                                    rank_agg_alg=rank_agg_alg, mcdm_method=mcdm_method, evaluator=evaluator, metrics=metrics)

                setup_results = []
                for idx in indexes:
                    print(f"\t{idx}")
                    faithfulness = evaluator.faithfullness_correlation(agg_explainer, X_test.loc[idx])
                    sensitivity = evaluator._sensitivity_sequential( # sequential version of sensitivity must be used at this time
                                                                    agg_explainer, X_test.loc[idx],
                                                                    extra_explainer_params={
                                                                        "explainer_types": [LimeWrapper, ShapTabularTreeWrapper, AnchorWrapper],
                                                                        "evaluator": evaluator,
                                                                        "mcdm_method": mcdm_method,
                                                                        "rank_agg_alg": rank_agg_alg,
                                                                        "metrics": metrics
                                                                    },
                                                                    iterations=10
                                                                    )
                    # complexity = evaluator.complexity(agg_explainer, X_test.loc[idx])
                    nrc_new = evaluator.nrc_new(agg_explainer, X_test.loc[idx])

                    metrics = agg_explainer.get_last_explanation_info().drop(columns=["weight"])
                    metrics.at['AggregatedExplainer', 'nrc_new'] = nrc_new
                    # metrics.at['AggregatedExplainer', 'complexity'] = complexity
                    metrics.at['AggregatedExplainer', 'sensitivity_spearman'] = sensitivity
                    metrics.at['AggregatedExplainer', 'faithfulness_corr'] = faithfulness
                    setup_results.append(metrics.to_numpy() if as_np else metrics)
                results.append(setup_results)
    
    return results


# Exploring MCDM Algorithm Alternatives

In [8]:
results = evaluate_agg(["wsum"], [pymcdm.methods.MABAC(), pymcdm.methods.ARAS(), pymcdm.methods.COCOSO(),
                                  pymcdm.methods.CODAS(), pymcdm.methods.EDAS()], n_instances=10)

with open('pickles/mcdm_algs_comparison_wsum.pkl', 'wb') as f:
    dill.dump(results, f)

Epoch 1/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 1.2521 - val_loss: 1.2331
Epoch 2/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2168 - val_loss: 1.2168
Epoch 3/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2409 - val_loss: 1.2010
Epoch 4/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1993 - val_loss: 1.1862
Epoch 5/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1787 - val_loss: 1.1715
Epoch 6/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1650 - val_loss: 1.1565
Epoch 7/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1857 - val_loss: 1.1424
Epoch 8/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1400 - val_loss: 1.1283
Epoch 9/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

1: Rank Aggregation Algorithm: wsum, MCDM Method: ARAS, Metrics: ['complexity', 'sensitivity_spearman', 'faithfulness_corr']
	728
	948
	120
	361
	620
	310
	660
	732
	882
	63


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

2: Rank Aggregation Algorithm: wsum, MCDM Method: COCOSO, Metrics: ['complexity', 'sensitivity_spearman', 'faithfulness_corr']
	728


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.


	948
	120
	361
	620
	310
	660
	732
	882
	63


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

3: Rank Aggregation Algorithm: wsum, MCDM Method: CODAS, Metrics: ['complexity', 'sensitivity_spearman', 'faithfulness_corr']
	728


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	948
	120
	361
	620
	310
	660
	732
	882
	63


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

4: Rank Aggregation Algorithm: wsum, MCDM Method: EDAS, Metrics: ['complexity', 'sensitivity_spearman', 'faithfulness_corr']
	728
	948
	120
	361
	620
	310
	660
	732
	882
	63


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

5: Rank Aggregation Algorithm: wsum, MCDM Method: VIKOR, Metrics: ['complexity', 'sensitivity_spearman', 'faithfulness_corr']
	728


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.


	948
	120


ValueError: array must not contain infs or NaNs

# Exploring rank aggregation algorithm alternatives

In [None]:
results = evaluate_agg(["w_bordafuse", "w_condorcet"], [pymcdm.methods.TOPSIS()], n_instances=10)
# 629 213 485 218 703 917, 593, 398, 567, 530

with open('pickles/rank_algs_comparison.pkl', 'wb') as f:
    dill.dump(results, f)

Epoch 1/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1.2568 - val_loss: 1.2239
Epoch 2/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2193 - val_loss: 1.2070
Epoch 3/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1959 - val_loss: 1.1904
Epoch 4/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2016 - val_loss: 1.1738
Epoch 5/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1741 - val_loss: 1.1571
Epoch 6/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1238 - val_loss: 1.1411
Epoch 7/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1327 - val_loss: 1.1250
Epoch 8/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1190 - val_loss: 1.1094
Epoch 9/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	398
	567
	530
1: Rank Aggregation Algorithm: w_condorcet, MCDM Method: TOPSIS
	629
	213
	485
	218
	703
	917
	593


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	398
	567
	530


In [9]:
results

[[array([[0.57298197, 0.82605911, 2.48161634],
         [0.39038374, 0.98459092, 2.55963129],
         [0.77075677, 0.77780591, 2.03346226],
         [0.56458015, 0.94507389, 2.50720765]]),
  array([[0.38042592, 0.838867  , 2.57522447],
         [0.87036687, 0.96027334, 2.46588963],
         [0.32031572, 0.74024337, 1.01533819],
         [0.62928781, 0.878867  , 2.49878771]]),
  array([[0.27595872, 0.84162562, 2.55680065],
         [0.034997  , 0.96293421, 2.51630887],
         [0.74354142, 0.52959181, 1.52964173],
         [0.22532813, 0.92862069, 2.4300754 ]]),
  array([[0.26375795, 0.83699507, 2.62176939],
         [0.8805126 , 0.97701599, 2.59728961],
         [0.46816532, 0.89237815, 1.09388203],
         [0.58556743, 0.91049261, 2.35520669]]),
  array([[0.10409404, 0.85182266, 2.59267519],
         [0.07634449, 0.97992124, 2.58605853],
         [0.07761475, 0.54937682, 1.52089119],
         [0.22296313, 0.91753695, 2.73071111]]),
  array([[0.87577273, 0.84251232, 2.6368522 ],
   

In [10]:
results = evaluate_agg(["w_bordafuse", "w_condorcet"], [pymcdm.methods.TOPSIS()], n_instances=10)
# 629 213 485 218 703 917, 593, 398, 567, 530

with open('pickles/rank_algs_comparison.pkl', 'wb') as f:
    dill.dump(results, f)

In [33]:
# results cointains arrays of the same shape. Calculate the meah of each element accross arrays
mean_results = np.mean(results[0], axis=0)
display(results)
display(mean_results)

[[array([[0.50020721, 0.85793103, 2.62177749],
         [0.0520696 , 0.97703868, 2.4986623 ],
         [0.16731895, 0.6475966 , 0.69294332],
         [0.52049712, 0.93029557, 2.37710039]]),
  array([[0.01139781, 0.85004926, 2.52762673],
         [0.20111931, 0.98526731, 2.55548468],
         [0.59444292, 0.63547665, 1.27255579],
         [0.4216546 , 0.92216749, 2.49558299]])]]

array([[0.25580251, 0.85399015, 2.57470211],
       [0.12659446, 0.98115299, 2.52707349],
       [0.38088094, 0.64153663, 0.98274956],
       [0.47107586, 0.92623153, 2.43634169]])

In [8]:
with open('pickles/topsis_agg_eval_results.pkl', 'rb') as f:
   wsum_results  = dill.load(f)

In [13]:
wsum_results = wsum_results[0]
bordafuse_results = results[0]
condorcet_results = results[1]

In [17]:
mean_wsum_results = np.mean(wsum_results, axis=0)
mean_bordafuse_results = np.mean(bordafuse_results, axis=0)
mean_condorcet_results = np.mean(condorcet_results, axis=0)

df_mean_wsum_results = pd.DataFrame(mean_wsum_results, columns=["Complexity", "Sensitivity", "Faithfullness"])
df_mean_bordafuse_results = pd.DataFrame(mean_bordafuse_results, columns=["Faithfulness", "Sensitivity", "Complexity"])
df_mean_condorcet_results = pd.DataFrame(mean_condorcet_results, columns=["Faithfulness", "Sensitivity", "Complexity"])

print("wsum:")
display(df_mean_wsum_results)
print("bordafuse:")
display(df_mean_bordafuse_results)
print("condorcet:")
display(df_mean_condorcet_results)

wsum:


Unnamed: 0,Complexity,Sensitivity,Faithfullness
0,0.34508,0.84203,2.570866
1,0.558761,0.973656,2.46847
2,0.367395,0.675152,1.570944
3,0.460295,0.907148,2.490968


bordafuse:


Unnamed: 0,Faithfulness,Sensitivity,Complexity
0,2.557006,0.854074,0.398688
1,2.430294,0.973223,0.351792
2,1.442598,0.688828,0.278096
3,2.481466,0.91201,0.412116


condorcet:


Unnamed: 0,Faithfulness,Sensitivity,Complexity
0,2.588671,0.856759,0.422352
1,2.430294,0.975297,0.454552
2,1.459996,0.713128,0.462
3,2.499991,0.923134,0.456831


# Evaluate different metric sets

In [7]:
results = evaluate_agg(["wsum"], [pymcdm.methods.TOPSIS()], 
                       metric_sets=[['complexity', 'sensitivity_spearman', 'faithfulness_corr'],
                                    ['nrc', 'sensitivity_spearman', 'faithfulness_corr']], n_instances=10)

with open('pickles/diff_metricsets_wsum_topsis.pkl', 'wb') as f:
    dill.dump(results, f)

Epoch 1/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 1.2405 - val_loss: 1.2253
Epoch 2/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2320 - val_loss: 1.2080
Epoch 3/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2177 - val_loss: 1.1905
Epoch 4/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1943 - val_loss: 1.1726
Epoch 5/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2109 - val_loss: 1.1549
Epoch 6/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2083 - val_loss: 1.1373
Epoch 7/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1280 - val_loss: 1.1198
Epoch 8/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1181 - val_loss: 1.1030
Epoch 9/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━

Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	23
	482
	850
	192
	33
	764
1: Rank Aggregation Algorithm: wsum, MCDM Method: TOPSIS, Metrics: ['nrc', 'sensitivity_spearman', 'faithfulness_corr']
	199
	382
	357
	613


Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now returning the best non-eligible result. The desired precision threshold might not be achieved due to the quantile-based discretisation of the numerical features. The resolution of the bins may be too large to find an anchor of required precision. Consider increasing the number of bins in `disc_perc`, but note that for some numerical distribution (e.g. skewed distribution) it may not help.
Could not find an anchor satisfying the 0.95 precision constraint. Now ret

	23
	482
	850
	192
	33
	764


In [12]:
original_metrics = results[0]
nrc_metrics = results[1]

mean_original_metrics = np.mean(original_metrics, axis=0)
mean_original_metrics = pd.concat(original_metrics).groupby(level=0).mean()
mean_nrc_metrics = np.mean(nrc_metrics, axis=0)

df_mean_original_metrics = pd.DataFrame(mean_original_metrics, columns=["Faithfullness", "Sensitivity", "Complexity"])
df_mean_nrc_metrics = pd.DataFrame(mean_nrc_metrics, columns=["Faithfullness", "Sensitivity", "Complexity"])

# Set the index to the model names: Lime, Shap, Anchor and Aggregated
df_mean_original_metrics.index = ["Lime", "Shap", "Anchor", "Aggregated"]
df_mean_nrc_metrics.index = ["Lime", "Shap", "Anchor", "Aggregated"]

print("Original metrics (complexity, sensitivity and faithfullness):")
display(df_mean_original_metrics)
print("New metrics (nrc instead of complexity, sensitivity, faithfullness):")
display(df_mean_nrc_metrics)

Original metrics (complexity, sensitivity and faithfullness):


Unnamed: 0,Faithfullness,Sensitivity,Complexity
Lime,2.58638,0.842921,0.507968
Shap,2.455879,0.972664,0.52294
Anchor,1.240633,0.667916,0.336709
Aggregated,2.496701,0.915369,0.489969


New metrics (nrc instead of complexity, sensitivity, faithfullness):


Unnamed: 0,Faithfullness,Sensitivity,Complexity
Lime,69.841686,0.837951,0.392245
Shap,68.077158,0.968348,0.492134
Anchor,17.003101,0.663505,0.492473
Aggregated,2.46451,0.912946,0.549699


In [6]:
# indexes=[948, 120, 361, 620, 310, 660, 732, 882, 63, 23]

# results = evaluate_agg(["wsum"], [pymcdm.methods.TOPSIS()], 
#                        metric_sets=[['nrc_new', 'sensitivity_spearman', 'faithfulness_corr']],
#                        indexes=[948, 120, 361, 620, 310, 660, 732, 882, 63, 23], as_np=False)

results = evaluate_aggregate_explainer(clf, X_train, categorical_features)

NameError: name 'ExplanationModelEvaluator' is not defined

In [32]:
mean_results = pd.concat(results[0]).groupby(level=0).mean()
display(mean_results)

Unnamed: 0,nrc_new,sensitivity_spearman,faithfulness_corr
AggregatedExplainer,48.323029,0.926667,0.364444
AnchorWrapper,38.149914,0.66453,0.496693
LimeWrapper,45.861626,0.851951,0.389142
ShapTabularTreeWrapper,45.680964,0.964407,0.372763


In [48]:
results[0][5]

Unnamed: 0,nrc_new,sensitivity_spearman,faithfulness_corr
LimeWrapper,45.501622,0.856798,0.193617
ShapTabularTreeWrapper,47.961937,0.982559,0.42539
AnchorWrapper,35.618034,0.587914,0.747775
AggregatedExplainer,46.149547,0.929064,0.104069
