## MMTHE01 - Masters Thesis

### E3. Thesis - Apply and Evaluate different XAI methods - Case Study with the ANN Model

* Applying XAI on a Deep Learning AI model (ANN Model)

#### Importing the libraries

In [None]:
### import general libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
import time
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import recall_score
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

from sklearn.preprocessing import LabelEncoder

In [None]:
# Check your current working directory
cwd = os.getcwd()

# Define your relative path
relative_path = r"6. Analysis"  # adjust this relative to cwd

# Build the full path
full_path = os.path.join(cwd, relative_path)

# Check if it exists before changing
if os.path.exists(full_path):
    os.chdir(full_path)
    print("Changed directory to:", full_path)
else:
    print("Folder does not exist:", full_path)

#### Importing the dataset

In [None]:
dataset = pd.read_csv('train_dataset_final_encoded.csv')

In [None]:
dataset.head()

In [None]:
dataset.shape

### 5.1 Split the data into Train-Test

#### 5.1.1 Separate the features and the label

In [None]:
dataset_final = dataset.drop('TransactionID', axis=1)

In [None]:
#X = dataset.iloc[:, 1:].values
#y = dataset.iloc[:,0].values

In [None]:
X = dataset_final.iloc[:, 1:]
y = dataset_final.iloc[:,0]

In [None]:
from sklearn.model_selection import train_test_split
X_train_im, X_test, y_train_im, y_test = train_test_split(X, y, test_size = 0.2, stratify=y, random_state = 1)

### 5.2 Applying SMOTE

In [None]:
# Applying SMOTE only to the training data
smote = SMOTE(random_state=1)
X_train, y_train = smote.fit_resample(X_train_im, y_train_im)

### 5.3 Feature Scaling

In [None]:
sc = StandardScaler()
X_tn_scaled = sc.fit_transform(X_train)
X_tt_scaled = sc.fit_transform(X_test)


# Convert to dataframe
X_train_scaled = pd.DataFrame(X_tn_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_tt_scaled, columns=X_test.columns)

### 5.4 Model Fitting

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
import sys, os
from contextlib import contextmanager

@contextmanager
def suppress_stdout():
    old_stdout = sys.stdout
    sys.stdout = open(os.devnull, 'w')
    try:
        yield
    finally:
        sys.stdout.close()
        sys.stdout = old_stdout

In [None]:
# Build an ANN model
model = Sequential([
    Input(shape=(X_train.shape[1],)),  # Explicit Input layer instead of input_dim in Dense
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])

# Early stopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_AUC', patience=3, restore_best_weights=True, mode='max')

In [None]:
# Train the ANN model with timing
with suppress_stdout():
    history = model.fit(
        X_train_scaled, y_train,
        validation_split=0.2,
        epochs=20,
        batch_size=256,
        callbacks=[early_stop],
        verbose=1
    )

In [None]:
model.save("ann_model.h5")

### 5.5 Applying XAI methods to the ANN Model

#### 5.5.1 Applying SHAP to the ANN Model

In [None]:
feature_names = X_train.columns.tolist()

In [None]:
### import
import shap
shap.initjs()

In [None]:
start_time = time.time()

In [None]:
#Use a sample of training data as the masker
masker = shap.maskers.Independent(X_train_scaled)

In [None]:
shap_explainer = shap.Explainer(model, masker)

In [None]:
shap_values = shap_explainer(X_test_scaled)

In [None]:
end_time = time.time()
training_time = end_time - start_time
print(f"SHAP on ANN Model (Explanation Time): {training_time:.2f} seconds")

In [None]:
shap.plots.beeswarm(shap_values)

In [None]:
shap.plots.force(shap_values[0])

In [None]:
shap.plots.bar(shap_values)

In [None]:
shap.plots.scatter(shap_values[:, "TransactionAmt"], color=shap_values.values)

In [None]:
shap_values.values

In [None]:
shap_values[0].values.shape

In [None]:
shap_values[0].data