In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from joblib import dump, load

In [2]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

In [3]:
random_state = 0

# IMPORT DATASET

In [4]:
dataset_path = parentdir + "/datasets/EpilepticSeizureRecognition/"

In [5]:
dataset_name = "EpilepticSeizureRecognition"

In [6]:
X = pd.read_csv(dataset_path + "data.csv", index_col = 0)
y = np.array(X["y"])
y_all = np.ravel(y).astype("int")
for i in range(2,6):
    y_all[y_all == i] = 2
le = LabelEncoder()
le.fit(y_all)
y_all = le.transform(y_all)
X_all = X.drop("y", axis = 1).values

In [7]:
from imblearn.under_sampling import RandomUnderSampler # doctest: +NORMALIZE_WHITESPACE
rus = RandomUnderSampler(random_state=random_state, )
X_all, y_all = rus.fit_resample(X_all, y_all)

X_all = X_all.reshape((X_all.shape[0], X_all.shape[1], 1))

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [8]:
print("X SHAPE: ", X_all.shape)
print("y SHAPE: ", y_all.shape)
unique, counts = np.unique(y_all, return_counts=True)
print("\nCLASSES BALANCE")
for i, label in enumerate(unique):
    print(label, ": ", round(counts[i]/sum(counts), 2))

X SHAPE:  (4600, 178, 1)
y SHAPE:  (4600,)

CLASSES BALANCE
0 :  0.5
1 :  0.5


## DATASET SPLIT

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
# BLACKBOX TRAIN/TEST SETS SPLIT
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, 
                                                  test_size=0.2, stratify = y_all, random_state=random_state)

# BLACKBOX/EXPLANATION SETS SPLIT
X_train, X_exp, y_train, y_exp = train_test_split(X_train, y_train, 
                                                  test_size=0.3, stratify = y_train, random_state=random_state)

# BLACKBOX TRAIN/VALIDATION SETS SPLIT
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 
                                                  test_size=0.2, stratify = y_train, random_state=random_state)

# EXPLANATION TRAIN/TEST SETS SPLIT
X_exp_train, X_exp_test, y_exp_train, y_exp_test = train_test_split(X_exp, y_exp, 
                                                                    test_size=0.2, 
                                                                    stratify = y_exp, 
                                                                    random_state=random_state)

# EXPLANATION TRAIN/VALIDATION SETS SPLIT
X_exp_train, X_exp_val, y_exp_train, y_exp_val = train_test_split(X_exp_train, y_exp_train, 
                                                                  test_size=0.2, 
                                                                  stratify = y_exp_train, 
                                                                  random_state=random_state)

print("SHAPES:")
print("BLACKBOX TRAINING SET: ", X_train.shape)
print("BLACKBOX VALIDATION SET: ", X_val.shape)
print("BLACKBOX TEST SET: ", X_test.shape)
print("EXPLANATION TRAINING SET: ", X_exp_train.shape)
print("EXPLANATION VALIDATION SET: ", X_exp_val.shape)
print("EXPLANATION TEST SET: ", X_exp_test.shape)

SHAPES:
BLACKBOX TRAINING SET:  (2060, 178, 1)
BLACKBOX VALIDATION SET:  (516, 178, 1)
BLACKBOX TEST SET:  (920, 178, 1)
EXPLANATION TRAINING SET:  (706, 178, 1)
EXPLANATION VALIDATION SET:  (177, 178, 1)
EXPLANATION TEST SET:  (221, 178, 1)


In [11]:
n_timesteps, n_outputs, n_features = X_train.shape[1], len(np.unique(y_all)), 1 
print("TIMESTEPS: ", n_timesteps)
print("N. LABELS: ", n_outputs)

TIMESTEPS:  178
N. LABELS:  2


# BLACKBOX MODEL

In [12]:
import keras
import time

In [13]:
from blackboxes import *

In [14]:
import blackboxes

In [15]:
"""import importlib
importlib.reload(blackboxes)"""

'import importlib\nimportlib.reload(blackboxes)'

## resnet

In [16]:
blackbox = build_resnet(n_timesteps, n_outputs)

Instructions for updating:
Colocations handled automatically by placer.


In [17]:
blackbox.load_weights(parentdir + "/blackbox_checkpoints/EpilepticSeizureRecognition_blackbox_resnet_20200105_233014_best_weights_+0.99_.hdf5")

In [18]:
resnet = blackbox

## simple CNN

In [19]:
blackbox = build_simple_CNN(n_timesteps, n_outputs)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [20]:
blackbox.load_weights(parentdir + "/blackbox_checkpoints/EpilepticSeizureRecognition_blackbox_simpleCNN_20200105_225722_best_weights_+0.98_.hdf5")

In [21]:
simplecnn = blackbox

## KNN

In [22]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer, mean_squared_error, accuracy_score
from sklearn.model_selection import StratifiedKFold, RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV

In [23]:
from joblib import load, dump

In [24]:
knn = load(parentdir + "/blackbox_checkpoints/EpilepticSeizureRecognition_blackbox_knn_20200105_225631.joblib")

# AUTOENCODER

In [25]:
# TENSORBOARD
# tensorboard --logdir=/tmp/autoencoder
# http://0.0.0.0:6006
# lsof -i tcp:6006 | grep -v PID | awk '{print $2}' | xargs kill

In [26]:
import autoencoders
from autoencoders import *
"""import importlib
importlib.reload(autoencoders)"""

'import importlib\nimportlib.reload(autoencoders)'

## Standard

In [27]:
params = {'input_shape': (178, 1), 
          'n_blocks': 8, 
          'latent_dim': 30, 
          'encoder_latent_layer_type': 'dense', 
          'encoder_args': {'filters': [2, 4, 8, 16, 32, 64, 128, 256], 
                           'kernel_size': [21, 18, 15, 13, 11, 8, 5, 3], 
                           'padding': 'same', 
                           'activation': 'elu', 
                           'pooling': [1, 1, 1, 1, 1, 1, 1, 1]}}

aut = Autoencoder(**params, verbose = False)
encoder, decoder, autoencoder = aut.build()

In [28]:
autoencoder.load_weights(parentdir + "/autoencoder_checkpoints/EpilepticSeizureRecognition_autoencoder_20200106_111007_best_weights_+14872.8621_.hdf5")

# GLOBAL VS LOCAL SHAPELET TREE

In [29]:
# 50 test set
_, X_exp_test_50, _, y_exp_test_50 = train_test_split(X_exp_test, y_exp_test, 
                                                                  test_size=50, 
                                                                  stratify = y_exp_test, 
                                                                  random_state=random_state)

In [30]:
import agnosticlocalexplainer
from agnosticlocalexplainer import *

from myutils import BlackboxPredictWrapper
import time
from agnosticglobalexplainer import AgnosticGlobalExplainer, save_shapelet_model, load_shapelet_model
from joblib import dump
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, coverage_error
from global_vs_local_surrogate import build_agnostic_local_explainers, print_report, massive_save_agnostic_local_explainers, massive_load_agnostic_local_explainers, get_all_predictions



## RESNET

In [31]:
autoencoder = autoencoder
encoder = autoencoder.layers[1]
decoder = autoencoder.layers[2]
blackbox = resnet
blackbox_input_dimensions = 3
blackbox_predict = BlackboxPredictWrapper(blackbox,3)
labels = ["seizure", "no_seizure"]

In [None]:
max_iter = 50
file_path = parentdir + "/agnostic_explainers/" + dataset_name + "_" + time.strftime("%Y%m%d_%H%M%S")

In [None]:
import platform
if platform.system() == "Windows":
    os.makedirs(file_path + "/")
else: os.mkdir(file_path + "/")

In [None]:
%%time
global_surrogate = AgnosticGlobalExplainer(random_state = random_state, 
                                           max_iter = max_iter, 
                                           distance_quantile_threshold = np.array(list(range(1,10)))/10,
                                           labels = labels)
global_surrogate.fit(X_exp_train[:,:,0], blackbox_predict.predict(X_exp_train))

In [None]:
%%time
agnostic_explainers = build_agnostic_local_explainers(blackbox, 
                               encoder, 
                               decoder, 
                               autoencoder, 
                               X_exp_test_50, 
                               y_exp_test_50,
                               blackbox_input_dimensions = blackbox_input_dimensions,
                               labels = labels,
                               size = 1000,
                               neigh_type = "geneticp",
                               ngen = 10,
                              max_iter=max_iter,
                              random_state = random_state,
                              distance_quantile_threshold = np.array(list(range(1,10)))/10
                               )

In [None]:
results_df = get_all_predictions(agnostic_explainers, global_surrogate, blackbox_predict, X_exp_test_50, blackbox_predict.predict(X_exp_train), encoder, decoder)
results_df.to_csv(file_path + "/" + "results_df.csv", sep = ";", index = False)
print_report(results_df)

In [None]:
%%time
save_shapelet_model(global_surrogate, file_path + "/")
massive_save_agnostic_local_explainers(agnostic_explainers, file_path, verbose = True)

In [None]:
%%time
global_surrogate = load_shapelet_model(file_path + "/")
agnostic_explainers = massive_load_agnostic_local_explainers(file_path, verbose = True)

In [None]:
# sanity check
results_df_loaded = get_all_predictions(agnostic_explainers, global_surrogate, blackbox_predict, X_exp_test_50, blackbox_predict.predict(X_exp_train), encoder, decoder)
print(sum(results_df_loaded.values != results_df.values))

## LOADING

In [12]:
%%time
global_surrogate = load_shapelet_model(parentdir + "/agnostic_explainers/EpilepticSeizureRecognition_20200106_192815/")
agnostic_explainers = massive_load_agnostic_local_explainers(parentdir + "/agnostic_explainers/EpilepticSeizureRecognition_20200106_192815/", verbose = True)

1 / 50
2 / 50
3 / 50
4 / 50
5 / 50
6 / 50
7 / 50
8 / 50
9 / 50
10 / 50
11 / 50
12 / 50
13 / 50
14 / 50
15 / 50
16 / 50
17 / 50
18 / 50
19 / 50
20 / 50
21 / 50
22 / 50
23 / 50
24 / 50
25 / 50
26 / 50
27 / 50
28 / 50
29 / 50
30 / 50
31 / 50
32 / 50
33 / 50
34 / 50
35 / 50
36 / 50
37 / 50
38 / 50
39 / 50
40 / 50
41 / 50
42 / 50
43 / 50
44 / 50
45 / 50
46 / 50
47 / 50
48 / 50
49 / 50
50 / 50
CPU times: user 17min 58s, sys: 20.2 s, total: 18min 19s
Wall time: 18min 44s


In [46]:
results_df = get_all_predictions(agnostic_explainers, global_surrogate, blackbox_predict, X_exp_test_50, blackbox_predict.predict(X_exp_train), encoder, decoder)
print_report(results_df)

local fidelity:  1.0
global fidelity:  0.96
reconstruction fidelity:  0.98


In [32]:
### SHAP

from myutils import stabilities_df
from shap_utils import shap_stability, shap_multi_stability

save_path = parentdir + "/stabilities/" + dataset_name + "_stability" + "_shap_" + time.strftime("%Y%m%d_%H%M%S")

params = {"background": "linear"}


#### RESNET
filename = save_path + "_resnet"
stabilities = shap_multi_stability(X_exp_test_50, resnet, 3, point_by_point = False, **params)
np.save(filename, stabilities)
stabilities_df(stabilities, len(X_exp_test_50)).to_csv(filename + ".csv", sep = ";", index = True)


#### CNN
filename = save_path + "_simplecnn"
stabilities = shap_multi_stability(X_exp_test_50, simplecnn, 3, point_by_point = False, **params)
np.save(filename, stabilities)
stabilities_df(stabilities, len(X_exp_test_50)).to_csv(filename + ".csv", sep = ";", index = True)


#### KNN
filename = save_path + "_knn"
stabilities = shap_multi_stability(X_exp_test_50, knn, 2, point_by_point = False, **params)
np.save(filename, stabilities)
stabilities_df(stabilities, len(X_exp_test_50)).to_csv(filename + ".csv", sep = ";", index = True)

In [None]:
%%time
from global_vs_local_surrogate import calculate_shapelet_stability

n_neighbors = 30

#### KNN
file_path = parentdir + "/agnostic_explainers/EpilepticSeizureRecognition_geneticp_knn_exc_20200117_091805/"
agnostic_explainers = massive_load_agnostic_local_explainers(file_path, verbose = True)

save_path = parentdir + "/stabilities/" + dataset_name + "_stability" + "_shapelets_" + time.strftime("%Y%m%d_%H%M%S")
filename = save_path + "_knn"

stabilities = calculate_shapelet_stability(agnostic_explainers, X_exp_test_50, n_neighbors = n_neighbors)
np.save(filename, stabilities)


#### RESNET
file_path = parentdir + "/agnostic_explainers/EpilepticSeizureRecognition_geneticp_resnet_20200106_192815/"
agnostic_explainers = massive_load_agnostic_local_explainers(file_path, verbose = True)

save_path = parentdir + "/stabilities/" + dataset_name + "_stability" + "_shapelets_" + time.strftime("%Y%m%d_%H%M%S")
filename = save_path + "_resnet"
stabilities = calculate_shapelet_stability(agnostic_explainers, X_exp_test_50, n_neighbors = n_neighbors)
np.save(filename, stabilities)


#### CNN
file_path = parentdir + "/agnostic_explainers/EpilepticSeizureRecognition_geneticp_simplecnn_20200109_180414/"
agnostic_explainers = massive_load_agnostic_local_explainers(file_path, verbose = True)

save_path = parentdir + "/stabilities/" + dataset_name + "_stability" + "_shapelets_" + time.strftime("%Y%m%d_%H%M%S")
filename = save_path + "_simplecnn"
stabilities = calculate_shapelet_stability(agnostic_explainers, X_exp_test_50, n_neighbors = n_neighbors)
np.save(filename, stabilities)
