## MMTHE01 - Masters Thesis

### E3. Thesis - Apply and Evaluate different XAI methods - Case Study with the ANN Model

* Applying XAI on a Deep Learning AI model (ANN Model)

#### Importing the libraries

In [None]:
### import general libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
import time
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import recall_score
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

from sklearn.preprocessing import LabelEncoder

In [None]:
# Check your current working directory
cwd = os.getcwd()

# Define your relative path
relative_path = r"6. Analysis"  # adjust this relative to cwd

# Build the full path
full_path = os.path.join(cwd, relative_path)

# Check if it exists before changing
if os.path.exists(full_path):
    os.chdir(full_path)
    print("Changed directory to:", full_path)
else:
    print("Folder does not exist:", full_path)

#### Importing the dataset

In [None]:
dataset = pd.read_csv('train_dataset_final_encoded.csv')

In [None]:
dataset.head()

In [None]:
dataset.shape

### 5.1 Split the data into Train-Test

#### 5.1.1 Separate the features and the label

In [None]:
dataset_final = dataset.drop('TransactionID', axis=1)

In [None]:
#X = dataset.iloc[:, 1:].values
#y = dataset.iloc[:,0].values

In [None]:
X = dataset_final.iloc[:, 1:]
y = dataset_final.iloc[:,0]

In [None]:
from sklearn.model_selection import train_test_split
X_train_im, X_test, y_train_im, y_test = train_test_split(X, y, test_size = 0.2, stratify=y, random_state = 1)

### 5.2 Applying SMOTE

In [None]:
# Applying SMOTE only to the training data
smote = SMOTE(random_state=1)
X_train, y_train = smote.fit_resample(X_train_im, y_train_im)

### 5.3 Feature Scaling

In [None]:
sc = StandardScaler()
X_tn_scaled = sc.fit_transform(X_train)
X_tt_scaled = sc.fit_transform(X_test)


# Convert to dataframe
X_train_scaled = pd.DataFrame(X_tn_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_tt_scaled, columns=X_test.columns)

### 5.4 Model Fitting

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Build an ANN model
model = Sequential([
    Input(shape=(X_train.shape[1],)),  # Explicit Input layer instead of input_dim in Dense
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])

# Early stopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_AUC', patience=3, restore_best_weights=True, mode='max')

In [None]:
# Train the ANN model with timing
history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=20,
    batch_size=256,
    callbacks=[early_stop],
    verbose=1
)

### 5.5 Applying XAI methods to the ANN Model

#### 5.5.5 Applying SHAP to the ANN Model
* E3C. Thesis - Test different XAI methods with the ANN Model (Anchors)

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
# Define the prediction function
predict_fn = lambda x: model.predict_proba(x).astype(float)

In [None]:
from alibi.explainers import AnchorTabular

In [None]:
start_time = time.time()

In [None]:
#Set up the Anchor explainer
feature_names = X_train.columns.tolist()
anchor_explainer = AnchorTabular(predict_fn, feature_names=feature_names, categorical_names={})
anchor_explainer.fit(X_train_scaled.values, disc_perc=[25, 50, 75])

In [None]:
#Explain a specific test instance
instance = X_test_scaled.iloc[0].values.reshape(1, -1)
anchor_explanation = anchor_explainer.explain(instance,
                                             max_anchor_size=10    # allow up to 10 feature conditions in anchor
                                             )

In [None]:
#Output the results
print("Anchor for this instance:", anchor_explanation.anchor)
print("Precision:", anchor_explanation.precision)
print("Coverage:", anchor_explanation.coverage)

In [None]:
end_time = time.time()
explanation_time = end_time - start_time
print(f"Anchors on ANN (Explanation Time): {explanation_time:.2f} seconds")

In [None]:
# Extract features and rules from anchor explanation
anchor_rules = anchor_explanation.anchor
precision = anchor_explanation.precision
coverage = anchor_explanation.coverage

# Create a horizontal bar chart
plt.figure(figsize=(8, len(anchor_rules)*0.5))
plt.barh(range(len(anchor_rules)), [1]*len(anchor_rules), color='skyblue')
plt.yticks(range(len(anchor_rules)), anchor_rules)
plt.xlabel('Anchor Feature Presence')
plt.title(f'Anchor Rule (Precision={precision:.2f}, Coverage={coverage:.2f})')
plt.show()