<a href="https://colab.research.google.com/github/catarina-moreira/causabilityXAi/blob/master/Diabetes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Demystifying Predictive Black-Box Models: An Interpretable Probabilistic Approach

Catarina Moreira, Yu-Liang Chou, Mythreyi Velmurugan, Renuka Sindhgatta Rajan, Chun Ouyang, Peter Bruza

**Abstract** 


In [1]:
# Install tensorflow
try:
    # tensorflow_version only exists in Colab
    %tensorflow_version 2.x
except Exception:
    pass

In [2]:
# library to deal with Bayesian Networks
!pip install pyagrum



In [3]:
# for reproduciability reasons:
import numpy as np
import pandas as pd
import random as rn
import tensorflow as tf
import csv

%matplotlib inline

# necessary for starting Numpy generated random numbers in an initial state
np.random.seed(515)

# Necessary for starting core Python generated random numbers in a state
rn.seed(515)

# Force TensorFlow to single thread
# Multiple threads are a potential source of non-reprocible research resulsts
session_conf = tf.compat.v1.ConfigProto( intra_op_parallelism_threads=1,
                                          inter_op_parallelism_threads=1 )

# tf.set_random_seed() will make random number generation in the TensorFlow backend
# have a well defined initial state
# more details: https://www.tensorflow.org/api_docs/python/tf/set_random_seed
tf.compat.v1.set_random_seed(515)

# import auxiliary functions
from learning import *

# Bayesian networks
from sklearn.preprocessing import KBinsDiscretizer
from pylab import *
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb

# for classification purposes
from pyAgrum.lib.bn2roc import showROC

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
import matplotlib.image as mpimg
import pylab as pl
from pylab import savefig
plt.style.use('seaborn-deep')

In [4]:
# use only if opening on google colab
#from google.colab import drive
#drive.mount('/content/drive')

## Diabetes Dataset

**Context**
This dataset is originally from the National Institute of Diabetes and Digestive and Kidney Diseases. The objective of the dataset is to diagnostically predict whether or not a patient has diabetes, based on certain diagnostic measurements included in the dataset. Several constraints were placed on the selection of these instances from a larger database. In particular, all patients here are females at least 21 years old of Pima Indian heritage.

**Content**
The datasets consists of several medical predictor variables and one target variable, Outcome. Predictor variables includes the 
- number of pregnancies the patient has had, 
- their BMI, 
- insulin level, 
- age,
- glucose,
- blood pressure,
- skin thickness,
- Diabetes pedigree function


### Checking Dataset

In [5]:
# path to project folder
# please change to your own
PATH = "/Users/catarina/GitHub/causabilityXAi/"

In [6]:
# name of dataset
DATASET_NAME = "diabetes.csv"

# variable containing the class labels in this case the dataset contains:
# 0 - if not diabetes
# 1 - if diabetes
class_var = "Outcome"

# load dataset
dataset_path = PATH + "datasets/" + DATASET_NAME
data = pd.read_csv( dataset_path )

In [7]:
# check how balanced the classes are
data.groupby(class_var).count()

Unnamed: 0_level_0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
Outcome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,500,500,500,500,500,500,500,500
1,268,268,268,268,268,268,268,268


### Balanced Dataset

In [8]:
# balance dataset
sampled_data = data.sample(frac=1)
sampled_data = sampled_data[ sampled_data["Outcome"] == 0]
no_data = sampled_data.sample(frac=1)[0:268]

yes_data = data[ data["Outcome"] == 1]

balanced_data = [no_data,yes_data]
balanced_data = pd.concat(balanced_data)

# check how balanced the classes are
balanced_data.groupby(class_var).count()

Unnamed: 0_level_0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
Outcome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,268,268,268,268,268,268,268,268
1,268,268,268,268,268,268,268,268


#### Train a Model for the Balanced Dataset

In [12]:
# apply one hot encoder to data
# standardize the input between 0 and 1
X, Y, encoder, scaler = encode_data( balanced_data, class_var)

n_features = X.shape[1]
n_classes = len(balanced_data[class_var].unique())
 
flag = False  # DO NOT CHANGE! Data has already been generated. 
if flag:
    # save training, test and validation data
    generate_save_training_data( dataset_path, X, Y)
    
else:
    # load existing training data
    X_train, Y_train, X_test, Y_test, X_validation, Y_validation= load_training_data( dataset_path )
    

In [13]:
# generate models for grid search
if flag:
    models = grid_search_model_generator( n_features, n_classes )

    # perform grid_search
    HISTORY_DICT = perform_grid_search( models, PATH, DATASET_NAME.replace(".csv",""), 
                                   X_train, Y_train, 
                                   X_validation, Y_validation, X_test, Y_test, 
                                   batch_size=8, epochs=150 )

MODEL NAME: model_h1_N1
Test loss:      0.6945 	Train loss:  0.6938
Test accuracy:  0.4875 	Train accu:  0.4973
Abs accuracy:   0.0098
Abs loss:       0.0007

###########################################################

MODEL NAME: model_h2_N1
Epoch 00014: early stopping
Test loss:      0.6931 	Train loss:  0.6931
Test accuracy:  0.5000 	Train accu:  0.5027
Abs accuracy:   0.0027
Abs loss:       0.0000

###########################################################

MODEL NAME: model_h3_N1
Epoch 00014: early stopping
Test loss:      0.6931 	Train loss:  0.6931
Test accuracy:  0.5000 	Train accu:  0.5027
Abs accuracy:   0.0027
Abs loss:       0.0000

###########################################################

MODEL NAME: model_h4_N1
Epoch 00014: early stopping
Test loss:      0.6931 	Train loss:  0.6931
Test accuracy:  0.5000 	Train accu:  0.5027
Abs accuracy:   0.0027
Abs loss:       0.0000

###########################################################

MODEL NAME: model_h5_N1
Epoch 00014:

Test loss:      0.6350 	Train loss:  0.6218
Test accuracy:  0.7250 	Train accu:  0.7380
Abs accuracy:   0.0130
Abs loss:       0.0132

###########################################################

MODEL NAME: model_h2_N6
Test loss:      0.5929 	Train loss:  0.5636
Test accuracy:  0.7000 	Train accu:  0.7273
Abs accuracy:   0.0273
Abs loss:       0.0293

###########################################################

MODEL NAME: model_h3_N6
Test loss:      0.6400 	Train loss:  0.6354
Test accuracy:  0.6125 	Train accu:  0.6444
Abs accuracy:   0.0319
Abs loss:       0.0046

###########################################################

MODEL NAME: model_h4_N6
Epoch 00058: early stopping
Test loss:      0.6758 	Train loss:  0.6394
Test accuracy:  0.5875 	Train accu:  0.6791
Abs accuracy:   0.0916
Abs loss:       0.0364

###########################################################

MODEL NAME: model_h5_N6
Test loss:      0.5548 	Train loss:  0.5028
Test accuracy:  0.7000 	Train accu:  0.7567
Abs 

Test loss:      0.5944 	Train loss:  0.5275
Test accuracy:  0.6625 	Train accu:  0.7219
Abs accuracy:   0.0594
Abs loss:       0.0670

###########################################################

MODEL NAME: model_h3_N11
Test loss:      0.5486 	Train loss:  0.5317
Test accuracy:  0.7125 	Train accu:  0.7406
Abs accuracy:   0.0281
Abs loss:       0.0169

###########################################################

MODEL NAME: model_h4_N11
Test loss:      0.5824 	Train loss:  0.5070
Test accuracy:  0.6625 	Train accu:  0.7540
Abs accuracy:   0.0915
Abs loss:       0.0753

###########################################################

MODEL NAME: model_h5_N11
Test loss:      0.5802 	Train loss:  0.5418
Test accuracy:  0.7125 	Train accu:  0.7139
Abs accuracy:   0.0014
Abs loss:       0.0384

###########################################################

MODEL NAME: model_h1_N12
Test loss:      0.6246 	Train loss:  0.5848
Test accuracy:  0.6000 	Train accu:  0.6845
Abs accuracy:   0.0845
Abs l

In [22]:
path_serialisation_model = PATH + "training/" + DATASET_NAME.replace(".csv", "") + "/model/" 
path_serialisation_histr = PATH + "training/" + DATASET_NAME.replace(".csv", "") + "/history/" 
if flag:
    
    # the best performing model was obtained with 5 hidden layers with 12 neurons each
    model_name = "model_h5_N12"
    
    # get respective model training history and model
    model_history = HISTORY_DICT[ model_name ][0]
    model = HISTORY_DICT[ model_name ][1]

    # save model and model history to file
    save_model_history(  model_history, model_name, path_serialisation_histr )
    save_model( model, model_name, path_serialisation_model )
else:
    model_history = load_model_history(  model_history, model_name, path_serialisation_histr )
    model = load_model( model, model_name, path_serialisation_model )
    
model.summary()

/Users/catarina/GitHub/causabilityXAi/training/diabetes/history/model_h5_N12_DUO.h5
Model history saved to disk
Saving files:
/Users/catarina/GitHub/causabilityXAi/training/diabetes/model/model_h5_N12_DUO.json
/Users/catarina/GitHub/causabilityXAi/training/diabetes/model/model_h5_N12_DUO.h5
Model saved to disk
Model: "model_h5_N12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_293 (Dense)            (None, 12)                108       
_________________________________________________________________
dense_294 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_295 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_296 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_

#### Evaluate Model

In [26]:
# evaluate loaded model on test and training data
optim = keras.optimizers.Nadam(lr=0.0001, beta_1=0.9, beta_2=0.999)
model.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy'])

train_loss, train_acc = model.evaluate(X_train, Y_train, verbose=1)
test_loss, test_acc = model.evaluate(X_test, Y_test, verbose=1)

print('\n[Accuracy] Train: %.3f, Test: %.3f' % (train_acc, test_acc))
print('[Loss] Train: %.3f, Test: %.3f' % (train_loss, test_loss))


[Accuracy] Train: 0.773, Test: 0.775
[Loss] Train: 0.476, Test: 0.531


In [29]:
# get model's training history

plt.plot(model_history.history['accuracy'], label='train')
plt.plot(model_history.history['val_accuracy'], label='test')
plt.ylabel('Accuracy')
plt.xlabel('Number of Epochs')
plt.ylim([0, 1])
plt.legend()
plt.show()


In [31]:
plt.plot(model_history.history['loss'], label='train')
plt.plot(model_history.history['val_loss'], label='test')
plt.ylabel('Loss')
plt.xlabel('Number of Epochs')
plt.ylim([0, 1])
plt.legend()
plt.show()

In [34]:
from sklearn.metrics import roc_curve, auc

Y_pred_proba = model.predict(X_test)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(Y_test[:, i], Y_pred_proba[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot of a ROC curve for a specific class
for i in range(n_classes):
    plt.figure()
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f)' % roc_auc[i])
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.show()

### Searching for specific datapoints for local evaluation

In [63]:
# get original vector
orig_vec = scaler.inverse_transform(X_test)
# generate all predictions for X
predictions = model.predict( X_test )
# extrace the label of the prediction of X[indx]
prediction_class = encoder.inverse_transform( predictions )

In [None]:
orig_vec.dims

In [64]:

# 
local_data_dict = []
for local_data_point in :
    
    print(local_data_point)
    
    
    

[1.00000000e+00 1.89000000e+02 6.42105263e+01 2.30000000e+01
 8.46000000e+02 3.01000000e+01 4.04043553e-01 5.90000000e+01]
[  1.         180.           0.           0.           0.
  43.3          0.28839026  41.        ]
[ 2.         95.         57.78947368 14.         88.         26.1
  0.75299744 22.        ]
[  8.         179.          77.05263158  42.         130.
  32.7          0.72408412  36.        ]
[  0.         132.          83.47368421   0.           0.
  32.4          0.3990585   21.        ]
[  8.         109.          81.33333333  39.         114.
  27.9          0.64532024  31.        ]
[ 0.         67.         81.33333333  0.          0.         45.3
  0.20065329 46.        ]
[  8.         151.          83.47368421  32.         210.
  42.9          0.52169086  36.        ]
[ 2.         94.         72.77192982 18.         76.         26.
  0.56655636 21.        ]
[ 5.         77.         87.75438596 41.         42.         35.8
  0.16276687 35.        ]
[ 11.         1

## Train a Model for the Unbalanced Dataset

In [None]:
# apply one hot encoder to data
# standardize the input between 0 and 1
X_unb, Y_unb, encoder_unb, scaler = encode_data( data, class_var)

n_features = X_unb.shape[1]
n_classes = len(data[class_var].unique())

flag = False
if flag:
    # save training, test and validation data
    generate_save_training_data( dataset_path + "_unb", X_unb, Y_unb)
    
else:
    # load existing training data
    X_train_unb, Y_train_unb, X_test_unb, Y_test_unb, X_validation_unb, Y_validation_unb= load_training_data( dataset_path + "_unb" )
    