In [2]:
import warnings
warnings.filterwarnings("ignore")

from xai_agg.agg_exp import *

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier

import pandas as pd
import numpy as np

import dill

2024-12-11 09:34:55.562153: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-11 09:34:55.585807: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Data Loading and Preprocessing

In [3]:
original_data = pd.read_csv('../data/german_credit_data_updated.csv')

# Dataset overview - German Credit Risk (from Kaggle):
# 1. Age (numeric)
# 2. Sex (text: male, female)
# 3. Job (numeric: 0 - unskilled and non-resident, 1 - unskilled and resident, 2 - skilled, 3 - highly skilled)
# 4. Housing (text: own, rent, or free)
# 5. Saving accounts (text - little, moderate, quite rich, rich)
# 6. Checking account (numeric, in DM - Deutsch Mark)
# 7. Credit amount (numeric, in DM)
# 8. Duration (numeric, in month)
# 9. Purpose (text: car, furniture/equipment, radio/TV, domestic appliances, repairs, education, business, vacation/others)

display(original_data.head())
display(original_data.describe())
display(original_data.info())

# Display the unique values of the categorical features:
print('Unique values of the categorical features:')
for col in original_data.select_dtypes(include='object'):
    print(f'\t- {col}: {original_data[col].unique()}')

Unnamed: 0.1,Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Credit Risk
0,0,67,male,2,own,,little,1169,6,radio/TV,1
1,1,22,female,2,own,little,moderate,5951,48,radio/TV,2
2,2,49,male,1,own,little,,2096,12,education,1
3,3,45,male,2,free,little,little,7882,42,furniture/equipment,1
4,4,53,male,2,free,little,little,4870,24,car,2


Unnamed: 0.1,Unnamed: 0,Age,Job,Credit amount,Duration,Credit Risk
count,954.0,954.0,954.0,954.0,954.0,954.0
mean,476.5,35.501048,1.909853,3279.112159,20.780922,1.302935
std,275.540378,11.379668,0.649681,2853.315158,12.046483,0.459768
min,0.0,19.0,0.0,250.0,4.0,1.0
25%,238.25,27.0,2.0,1360.25,12.0,1.0
50%,476.5,33.0,2.0,2302.5,18.0,1.0
75%,714.75,42.0,2.0,3975.25,24.0,2.0
max,953.0,75.0,3.0,18424.0,72.0,2.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Unnamed: 0        954 non-null    int64 
 1   Age               954 non-null    int64 
 2   Sex               954 non-null    object
 3   Job               954 non-null    int64 
 4   Housing           954 non-null    object
 5   Saving accounts   779 non-null    object
 6   Checking account  576 non-null    object
 7   Credit amount     954 non-null    int64 
 8   Duration          954 non-null    int64 
 9   Purpose           954 non-null    object
 10  Credit Risk       954 non-null    int64 
dtypes: int64(6), object(5)
memory usage: 82.1+ KB


None

Unique values of the categorical features:
	- Sex: ['male' 'female']
	- Housing: ['own' 'free' 'rent']
	- Saving accounts: [nan 'little' 'quite rich' 'rich' 'moderate']
	- Checking account: ['little' 'moderate' nan 'rich']
	- Purpose: ['radio/TV' 'education' 'furniture/equipment' 'car' 'business'
 'domestic appliances' 'repairs' 'vacation/others']


In [4]:
preprocessed_data = original_data.copy()

# For savings and checking accounts, we will replace the missing values with 'none':
preprocessed_data['Saving accounts'].fillna('none', inplace=True)
preprocessed_data['Checking account'].fillna('none', inplace=True)

# Dropping index column:
preprocessed_data.drop(columns=['Unnamed: 0'], inplace=True)

# Using pd.dummies to one-hot-encode the categorical features
preprocessed_data["Job"] = preprocessed_data["Job"].map({0: 'unskilled_nonresident', 1: 'unskilled_resident',
                                                         2: 'skilled', 3: 'highlyskilled'})

categorical_features = preprocessed_data.select_dtypes(include='object').columns
numerical_features = preprocessed_data.select_dtypes(include='number').columns.drop('Credit Risk')
print(f'Categorical features: {categorical_features}')
print(f'Numerical features: {numerical_features}')

preprocessed_data = pd.get_dummies(preprocessed_data, columns=categorical_features, dtype='int64')

# Remapping the target variable to 0 and 1:
preprocessed_data['Credit Risk'] = preprocessed_data['Credit Risk'].map({1: 0, 2: 1})

# Make sure all column names are valid python identifiers (important for pd.query() calls):
preprocessed_data.columns = preprocessed_data.columns.str.replace(' ', '_')
preprocessed_data.columns = preprocessed_data.columns.str.replace('/', '_')

# Normalizing the data
scaler = StandardScaler()
scaled_preprocessed_data = scaler.fit_transform(preprocessed_data)

display(preprocessed_data.head())
display(preprocessed_data.info())

display(scaled_preprocessed_data)

Categorical features: Index(['Sex', 'Job', 'Housing', 'Saving accounts', 'Checking account',
       'Purpose'],
      dtype='object')
Numerical features: Index(['Age', 'Credit amount', 'Duration'], dtype='object')


Unnamed: 0,Age,Credit_amount,Duration,Credit_Risk,Sex_female,Sex_male,Job_highlyskilled,Job_skilled,Job_unskilled_nonresident,Job_unskilled_resident,...,Checking_account_none,Checking_account_rich,Purpose_business,Purpose_car,Purpose_domestic_appliances,Purpose_education,Purpose_furniture_equipment,Purpose_radio_TV,Purpose_repairs,Purpose_vacation_others
0,67,1169,6,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
1,22,5951,48,1,1,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,49,2096,12,0,0,1,0,0,0,1,...,1,0,0,0,0,1,0,0,0,0
3,45,7882,42,0,0,1,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
4,53,4870,24,1,0,1,0,1,0,0,...,0,0,0,1,0,0,0,0,0,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   Age                          954 non-null    int64
 1   Credit_amount                954 non-null    int64
 2   Duration                     954 non-null    int64
 3   Credit_Risk                  954 non-null    int64
 4   Sex_female                   954 non-null    int64
 5   Sex_male                     954 non-null    int64
 6   Job_highlyskilled            954 non-null    int64
 7   Job_skilled                  954 non-null    int64
 8   Job_unskilled_nonresident    954 non-null    int64
 9   Job_unskilled_resident       954 non-null    int64
 10  Housing_free                 954 non-null    int64
 11  Housing_own                  954 non-null    int64
 12  Housing_rent                 954 non-null    int64
 13  Saving_accounts_little       954 non-null    int64

None

array([[ 2.7694545 , -0.7399179 , -1.22763429, ...,  1.62518349,
        -0.14633276, -0.11286653],
       [-1.18704073,  0.93690642,  2.26068929, ...,  1.62518349,
        -0.14633276, -0.11286653],
       [ 1.18685641, -0.41486224, -0.72930235, ..., -0.61531514,
        -0.14633276, -0.11286653],
       ...,
       [-1.0111965 , -0.39768023,  1.26402541, ..., -0.61531514,
        -0.14633276, -0.11286653],
       [-0.65950803,  0.29240557,  0.26736153, ..., -0.61531514,
        -0.14633276, -0.11286653],
       [-0.83535227,  2.69823821,  1.26402541, ..., -0.61531514,
        -0.14633276, -0.11286653]])

In [5]:
y = preprocessed_data['Credit_Risk']
X = preprocessed_data.drop(columns='Credit_Risk')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'ROC AUC: {roc_auc_score(y_test, y_pred)}')

Accuracy: 0.7696335078534031
ROC AUC: 0.6830357142857143


# Tuning

In [7]:
def evaluate_agg(rank_agg_algs: list[str], mcdm_methods: list[MCDA_method], n_instances: int = 10):
    # select n_instances from the test set
    results = []
    indexes = np.random.choice(X_test.index, n_instances, replace=False)

    evaluator = ExplanationModelEvaluator(clf, X_train, categorical_features)
    evaluator.init()

    i = 0
    for rank_agg_alg in rank_agg_algs:
        for mcdm_method in mcdm_methods:
            print(f"{i}: Rank Aggregation Algorithm: {rank_agg_alg}, MCDM Method: {mcdm_method.__class__.__name__}")
            i += 1

            agg_explainer = AggregatedExplainer([LimeWrapper, ShapTabularTreeWrapper, AnchorWrapper], clf, X_train, categorical_features,
                                                rank_agg_alg=rank_agg_alg, mcdm_method=mcdm_method, evaluator=evaluator)

            setup_results = []
            for idx in indexes:
                print(f"\t{idx}")
                faithfulness = evaluator.faithfullness_correlation(agg_explainer, X_test.loc[idx])
                sensitivity = evaluator._sensitivity_sequential( # sequential version of sensitivity must be used at this time
                                                                agg_explainer, X_test.loc[idx],
                                                                extra_explainer_params={
                                                                    "explainer_types": [LimeWrapper, ShapTabularTreeWrapper, AnchorWrapper],
                                                                    "evaluator": evaluator,
                                                                    "mcdm_method": mcdm_method,
                                                                    "rank_agg_alg": rank_agg_alg
                                                                },
                                                                iterations=10
                                                                )
                complexity = evaluator.complexity(agg_explainer, X_test.loc[idx])

                metrics = agg_explainer.last_explanation_metrics.copy()
                metrics.loc[AggregatedExplainer.__name__] = [faithfulness, sensitivity, complexity]
                setup_results.append(metrics.to_numpy())
            results.append(setup_results)
    
    return results


# Exploring MCDM Algorithm Alternatives

In [None]:
results = evaluate_agg(["wsum", "w_bordafuse", "w_condorcet"], [pymcdm.methods.TOPSIS()], n_instances=10)

Epoch 1/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.2332 - val_loss: 1.2237
Epoch 2/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2480 - val_loss: 1.2071
Epoch 3/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2347 - val_loss: 1.1910
Epoch 4/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.2196 - val_loss: 1.1751
Epoch 5/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1875 - val_loss: 1.1591
Epoch 6/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1687 - val_loss: 1.1424
Epoch 7/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1301 - val_loss: 1.1259
Epoch 8/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1378 - val_loss: 1.1092
Epoch 9/500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━

In [9]:
results

[[array([[0.57298197, 0.82605911, 2.48161634],
         [0.39038374, 0.98459092, 2.55963129],
         [0.77075677, 0.77780591, 2.03346226],
         [0.56458015, 0.94507389, 2.50720765]]),
  array([[0.38042592, 0.838867  , 2.57522447],
         [0.87036687, 0.96027334, 2.46588963],
         [0.32031572, 0.74024337, 1.01533819],
         [0.62928781, 0.878867  , 2.49878771]]),
  array([[0.27595872, 0.84162562, 2.55680065],
         [0.034997  , 0.96293421, 2.51630887],
         [0.74354142, 0.52959181, 1.52964173],
         [0.22532813, 0.92862069, 2.4300754 ]]),
  array([[0.26375795, 0.83699507, 2.62176939],
         [0.8805126 , 0.97701599, 2.59728961],
         [0.46816532, 0.89237815, 1.09388203],
         [0.58556743, 0.91049261, 2.35520669]]),
  array([[0.10409404, 0.85182266, 2.59267519],
         [0.07634449, 0.97992124, 2.58605853],
         [0.07761475, 0.54937682, 1.52089119],
         [0.22296313, 0.91753695, 2.73071111]]),
  array([[0.87577273, 0.84251232, 2.6368522 ],
   

In [10]:
# Pickle the results
with open('pickles/topsis_agg_eval_results.pkl', 'wb') as f:
    dill.dump(results, f)

In [33]:
# results cointains arrays of the same shape. Calculate the meah of each element accross arrays
mean_results = np.mean(results[0], axis=0)
display(results)
display(mean_results)

[[array([[0.50020721, 0.85793103, 2.62177749],
         [0.0520696 , 0.97703868, 2.4986623 ],
         [0.16731895, 0.6475966 , 0.69294332],
         [0.52049712, 0.93029557, 2.37710039]]),
  array([[0.01139781, 0.85004926, 2.52762673],
         [0.20111931, 0.98526731, 2.55548468],
         [0.59444292, 0.63547665, 1.27255579],
         [0.4216546 , 0.92216749, 2.49558299]])]]

array([[0.25580251, 0.85399015, 2.57470211],
       [0.12659446, 0.98115299, 2.52707349],
       [0.38088094, 0.64153663, 0.98274956],
       [0.47107586, 0.92623153, 2.43634169]])