In [1]:
import warnings
warnings.filterwarnings("ignore")

from xai_agg import *

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier

import pandas as pd
import numpy as np

import dill

2025-01-22 13:50:41.156970: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-22 13:50:41.180272: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
raw_data = pd.read_csv('../data/wdbc.csv')
# Specify header:
raw_data.columns = ['ID', 'Diagnosis'] + [str for i in range(1, 4) for str in [f"radius_{i}", f"texture_{i}", f"perimeter_{i}", f"area_{i}", f"smoothness_{i}", f"compactness_{i}", f"concavity_{i}", f"concave_points_{i}", f"symmetry_{i}", f"fractal_dimension_{i}"]]

display(raw_data.head())

Unnamed: 0,ID,Diagnosis,radius_1,texture_1,perimeter_1,area_1,smoothness_1,compactness_1,concavity_1,concave_points_1,...,radius_3,texture_3,perimeter_3,area_3,smoothness_3,compactness_3,concavity_3,concave_points_3,symmetry_3,fractal_dimension_3
0,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
1,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
2,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
3,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678
4,843786,M,12.45,15.7,82.57,477.1,0.1278,0.17,0.1578,0.08089,...,15.47,23.75,103.4,741.6,0.1791,0.5249,0.5355,0.1741,0.3985,0.1244


In [3]:
preprocessed_data = raw_data.drop(columns=['ID'])

preprocessed_data['Diagnosis'] = preprocessed_data['Diagnosis'].map({'M': 1, 'B': 0})
preprocessed_data.rename(columns={'Diagnosis': 'DiagnosisIsMalignant'}, inplace=True)

categorical_features = []

In [4]:
X = preprocessed_data.drop(columns=['DiagnosisIsMalignant'])
y = preprocessed_data['DiagnosisIsMalignant']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"ROC AUC: {roc_auc_score(y_test, y_pred)}")

Accuracy: 0.9736842105263158
ROC AUC: 0.9673913043478262


# Experiments
# RAE-T vs. RAE-E | 10 samples
### Execution

In [6]:
results, metadata = evaluate_aggregate_explainer(
    clf, X_train, X_test, categorical_features,
    metrics_sets=[['nrc', 'sensitivity_spearman', 'rb_faithfulness_corr']],
    mcdm_algs=[pymcdm.methods.TOPSIS(), pymcdm.methods.EDAS()],
    n_instances=10
)

metadata["description"] = "RAE-T vs RAE-S, 10 samples"

with open('pickles/wdbc/RAE-T_vs_RAE-S_10-allrank.pkl', 'wb') as f:
    dill.dump(ExperimentRun(metadata, results), f)

Selected indexes: [382 228 402   6 332 255 439 498  68 501]
Epoch 1/500
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1.2542 - val_loss: 1.2792
Epoch 2/500
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.2840 - val_loss: 1.2677
Epoch 3/500
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.2284 - val_loss: 1.2576
Epoch 4/500
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.2939 - val_loss: 1.2477
Epoch 5/500
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.1768 - val_loss: 1.2380
Epoch 6/500
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.1330 - val_loss: 1.2270
Epoch 7/500
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.1932 - val_loss: 1.2136
Epoch 8/500
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.1426 - val_l

### Analysis

In [9]:
with open('pickles/wdbc/RAE-T_vs_RAE-S_10-allrank.pkl', 'rb') as f:
    exp = dill.load(f)

In [10]:
methods = ["RAE-T", "RAE-E"]
for i, method in enumerate(methods):
    print(f"{method}:\n")
    display(exp.results[i])
    wca = count_worst_case_avoidances(exp.results[i], [False, True, True], 1)
    print(f"Worst case avoidances:\n\t- for all metrics: {wca[0]}\n\t- for 2/3 metrics: {wca[1]}")
    print("AVG:")
    display(get_expconfig_mean_results(exp, i))
    print("\n")

RAE-T:



[                              nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             52.888948              0.946607              0.254381
 ShapTabularTreeWrapper  51.460905              0.996440              0.265456
 AnchorWrapper           61.796491              0.624911              0.159678
 AggregateExplainer      38.608635              0.865228              0.531854,
                               nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             54.516235              0.955773              0.588436
 ShapTabularTreeWrapper  42.103096              0.991279              0.935483
 AnchorWrapper           44.563923              0.499160              0.443639
 AggregateExplainer      40.754683              0.950389              0.753108,
                               nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             52.813793              0.957286              0.124298
 ShapTabularTreeWrapper  49.383918              0.

Worst case avoidances:
	- for all metrics: 8
	- for 2/3 metrics: 10
AVG:


Unnamed: 0,nrc,sensitivity_spearman,rb_faithfulness_corr
AggregateExplainer,39.235249,0.909175,0.480925
AnchorWrapper,45.740944,0.375877,0.191029
LimeWrapper,53.380539,0.948703,0.361979
ShapTabularTreeWrapper,49.610594,0.99362,0.474018




RAE-E:



[                              nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             53.448086              0.957953              0.297227
 ShapTabularTreeWrapper  51.460905              0.996752              0.202834
 AnchorWrapper           44.563923              0.360519              0.165818
 AggregateExplainer      37.050296              0.938643              0.262167,
                               nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             51.630781              0.957019              0.339161
 ShapTabularTreeWrapper  42.103096              0.990612              0.957150
 AnchorWrapper           44.563923              0.481376              0.697698
 AggregateExplainer      42.970985              0.960534              0.650057,
                               nrc  sensitivity_spearman  rb_faithfulness_corr
 LimeWrapper             51.162969              0.954616              0.163639
 ShapTabularTreeWrapper  49.383918              0.

Worst case avoidances:
	- for all metrics: 9
	- for 2/3 metrics: 10
AVG:


Unnamed: 0,nrc,sensitivity_spearman,rb_faithfulness_corr
AggregateExplainer,38.794806,0.955435,0.45112
AnchorWrapper,44.017687,0.371162,0.259408
LimeWrapper,51.063857,0.953491,0.333723
ShapTabularTreeWrapper,49.610594,0.993655,0.465703




