In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from pyts.datasets import make_cylinder_bell_funnel

In [2]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

In [3]:
random_state = 0

# IMPORT DATASET

In [4]:
dataset_name = "cbf"

In [5]:
X_all, y_all = make_cylinder_bell_funnel(n_samples = 600, random_state = random_state)
X_all = X_all.reshape((X_all.shape[0], X_all.shape[1], 1))

In [6]:
print("X SHAPE: ", X_all.shape)
print("y SHAPE: ", y_all.shape)
unique, counts = np.unique(y_all, return_counts=True)
print("\nCLASSES BALANCE")
for i, label in enumerate(unique):
    print(label, ": ", round(counts[i]/sum(counts), 2))

X SHAPE:  (600, 128, 1)
y SHAPE:  (600,)

CLASSES BALANCE
0 :  0.33
1 :  0.33
2 :  0.33


## DATASET SPLIT

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
# BLACKBOX/EXPLANATION SETS SPLIT
X_train, X_exp, y_train, y_exp = train_test_split(X_all, y_all, 
                                                  test_size=0.3, stratify = y_all, random_state=random_state)

# BLACKBOX TRAIN/TEST SETS SPLIT
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, 
                                                  test_size=0.2, stratify = y_train, random_state=random_state)

# BLACKBOX TRAIN/VALIDATION SETS SPLIT
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 
                                                  test_size=0.2, stratify = y_train, random_state=random_state)

# EXPLANATION TRAIN/TEST SETS SPLIT
X_exp_train, X_exp_test, y_exp_train, y_exp_test = train_test_split(X_exp, y_exp, 
                                                                    test_size=0.2, 
                                                                    stratify = y_exp, 
                                                                    random_state=random_state)

# EXPLANATION TRAIN/VALIDATION SETS SPLIT
X_exp_train, X_exp_val, y_exp_train, y_exp_val = train_test_split(X_exp_train, y_exp_train, 
                                                                  test_size=0.2, 
                                                                  stratify = y_exp_train, 
                                                                  random_state=random_state)

print("SHAPES:")
print("BLACKBOX TRAINING SET: ", X_train.shape)
print("BLACKBOX VALIDATION SET: ", X_val.shape)
print("BLACKBOX TEST SET: ", X_test.shape)
print("EXPLANATION TRAINING SET: ", X_exp_train.shape)
print("EXPLANATION VALIDATION SET: ", X_exp_val.shape)
print("EXPLANATION TEST SET: ", X_exp_test.shape)

SHAPES:
BLACKBOX TRAINING SET:  (268, 128, 1)
BLACKBOX VALIDATION SET:  (68, 128, 1)
BLACKBOX TEST SET:  (84, 128, 1)
EXPLANATION TRAINING SET:  (115, 128, 1)
EXPLANATION VALIDATION SET:  (29, 128, 1)
EXPLANATION TEST SET:  (36, 128, 1)


In [9]:
n_timesteps, n_outputs, n_features = X_train.shape[1], len(np.unique(y_all)), 1 
print("TIMESTEPS: ", n_timesteps)
print("N. LABELS: ", n_outputs)

TIMESTEPS:  128
N. LABELS:  3


# BLACKBOX MODEL

In [10]:
from joblib import load
import keras
"""import importlib
importlib.reload(blackboxes)"""
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
import time
import blackboxes
from blackboxes import *

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## resnet

In [11]:
blackbox = build_resnet(n_timesteps, n_outputs)
blackbox.load_weights(parentdir + "/blackbox_checkpoints/cbf_blackbox_resnet_20191106_145242_best_weights_+1.00_.hdf5")
resnet = blackbox

## simple CNN

In [12]:
blackbox = build_simple_CNN(n_timesteps, n_outputs)
blackbox.load_weights(parentdir + "/blackbox_checkpoints/cbf_blackbox_simpleCNN_20191106_145515_best_weights_+1.00_.hdf5")
simplecnn = blackbox

## KNN

In [13]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer, mean_squared_error, accuracy_score
from sklearn.model_selection import StratifiedKFold, RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV

In [14]:
knn = load(parentdir + "/blackbox_checkpoints/cbf_blackbox_knn_20191106_145654.joblib")



# AUTOENCODER

In [15]:
"""import importlib
importlib.reload(autoencoders)"""
import autoencoders
from autoencoders import *

## STANDARD

In [16]:
params = {"input_shape": (n_timesteps,1),
          "n_blocks": 8, 
          "latent_dim": 2,
          "encoder_latent_layer_type": "dense",
          "encoder_args": {"filters":[2,4,8,16,32,64,128,256], 
                            "kernel_size":[21,18,15,13,11,8,5,3], 
                            "padding":"same", 
                            "activation":"elu", 
                            "pooling":[1,1,1,1,1,1,1,1]}
         }

aut = Autoencoder(verbose = False, **params)
encoder, decoder, autoencoder = aut.build()
autoencoder.load_weights(parentdir+"/autoencoder_checkpoints/cbf_autoencoder_20191106_144056_best_weights_+1.0504_.hdf5")

# GLOBAL VS LOCAL SHAPELET TREE

In [17]:
import agnosticlocalexplainer
from agnosticlocalexplainer import *
import importlib
importlib.reload(agnosticlocalexplainer)

<module 'agnosticlocalexplainer' from 'C:\\Users\\francesco\\TS_AgnosticLocalExplainer\\agnosticlocalexplainer.py'>

In [18]:
from myutils import BlackboxPredictWrapper
import time
from agnosticglobalexplainer import AgnosticGlobalExplainer, save_shapelet_model, load_shapelet_model
from joblib import dump
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, coverage_error
from global_vs_local_surrogate import build_agnostic_local_explainers, massive_save_agnostic_local_explainers, massive_load_agnostic_local_explainers

## RESNET

In [19]:
encoder = autoencoder.layers[1]
decoder = autoencoder.layers[2]
blackbox = resnet
blackbox_input_dimensions = 3
blackbox_predict = BlackboxPredictWrapper(blackbox, 3)
labels = ["cylinder", "bell", "funnel"]

In [20]:
max_iter = 50
file_path = parentdir + "/agnostic_explainers/" + dataset_name + "_" + time.strftime("%Y%m%d_%H%M%S")
os.makedirs(file_path + "/")

In [21]:
%%time
global_surrogate = AgnosticGlobalExplainer(random_state = random_state, max_iter = max_iter, labels = labels)
global_surrogate.fit(X_exp_train[:,:,0], blackbox_predict.predict(X_exp_train))

Wall time: 18.1 s


In [None]:
%%time
agnostic_explainers = build_agnostic_local_explainers(blackbox, 
                               encoder, 
                               decoder, 
                               autoencoder, 
                               X_exp_test, 
                               y_exp_test,
                               blackbox_input_dimensions = blackbox_input_dimensions,
                               labels = labels,
                               size = 1000,
                               neigh_type = "geneticp",
                               ngen = 10,
                              max_iter=max_iter,
                              random_state = random_state
                               )


Neighborhood Generation
calculating feature values
gen	nevals	avg     	min     	max     
0  	500   	0.496654	0.496654	0.496654
1  	421   	0.540415	0.00669663	0.993307
2  	424   	0.672194	0.00671825	0.993307
3  	432   	0.74826 	0.00669513	0.993307
4  	415   	0.83161 	0.00669864	0.993307
5  	422   	0.843269	0.006764  	0.993307
6  	424   	0.836699	0.0067158 	0.993307
7  	409   	0.821456	0.00672248	0.993307
8  	420   	0.814033	0.00669581	0.993307
9  	408   	0.819748	0.0067053 	0.993307
10 	412   	0.8265  	0.00669737	0.993307
gen	nevals	avg	min	max
0  	500   	0.5	0.5	0.5
1  	411   	0.558281	0.488308	0.993026
2  	423   	0.681405	0.479848	0.993026
3  	422   	0.773899	0.482548	0.993096
4  	414   	0.844258	0.460517	0.993097
5  	427   	0.880878	0.47275 	0.993097
6  	417   	0.894151	0.486592	0.993097
7  	417   	0.905792	0.485709	0.993097
8  	432   	0.881393	0.461995	0.993097
9  	421   	0.903503	0.474421	0.993097
10 	420   	0.902532	0.469426	0.993097
synthetic neighborhood class counts {0: 486, 1

1  	421   	0.539695	0.00669406	0.993307
2  	424   	0.709032	0.00670249	0.993307
3  	432   	0.817081	0.00669555	0.993307
4  	415   	0.83296 	0.00669896	0.993307
5  	422   	0.850725	0.00669717	0.993307
6  	424   	0.838011	0.00669416	0.993307
7  	409   	0.837463	0.00669452	0.993307
8  	420   	0.838961	0.0067001 	0.993307
9  	408   	0.883203	0.00670937	0.993307
10 	412   	0.840261	0.0067223 	0.993307
gen	nevals	avg	min	max
0  	500   	0.5	0.5	0.5
1  	411   	0.483955	0.00811089	0.907332
2  	423   	0.48073 	0.0070293 	0.971305
3  	422   	0.486811	0.0183094 	0.983655
4  	414   	0.501719	0.00671861	0.983655
5  	427   	0.530598	0.00858378	0.989649
6  	417   	0.568724	0.00673586	0.991615
7  	417   	0.606705	0.00692472	0.992196
8  	432   	0.659487	0.00688048	0.992196
9  	421   	0.729594	0.0184397 	0.992196
10 	420   	0.746465	0.0216969 	0.992196
synthetic neighborhood class counts {0: 443, 1: 96, 2: 561}
learning local decision tree
retrieving explanation
N.RULES =  1
N.COUNTERFACTUAL =  7
rule: 5

8  	420   	0.843041	0.00669644	0.993307
9  	408   	0.884286	0.00671845	0.993307
10 	412   	0.844017	0.00669763	0.993307
gen	nevals	avg	min	max
0  	500   	0.5	0.5	0.5
1  	411   	0.485086	0.00988945	0.889922
2  	423   	0.484182	0.00670295	0.960628
3  	422   	0.49283 	0.0142115 	0.977378
4  	414   	0.507344	0.00691625	0.977378
5  	427   	0.536466	0.0143066 	0.986421
6  	417   	0.573761	0.00707609	0.987968
7  	417   	0.625748	0.010512  	0.989953
8  	432   	0.669522	0.0155106 	0.989953
9  	421   	0.726593	0.0136651 	0.99095 
10 	420   	0.745681	0.0113029 	0.99095 
synthetic neighborhood class counts {0: 444, 1: 92, 2: 564}
learning local decision tree
retrieving explanation
N.RULES =  1
N.COUNTERFACTUAL =  4
rule: 527 time series
crule0: 71 time series
crule1: 11 time series
crule2: 9 time series
crule3: 1 time series
rule generated instances have 0.966 accuracy
crule0 generated instances have 0.96 accuracy
crule1 generated instances have 0.993 accuracy
crule2 generated instances have 0.846

4  	414   	0.836889	0.0274527 	0.993307
5  	427   	0.881657	0.0161848 	0.993307
6  	417   	0.889791	0.00788654	0.993307
7  	417   	0.896107	0.0201927 	0.993307
8  	432   	0.886858	0.00851716	0.993307
9  	421   	0.894915	0.00895677	0.993307
10 	420   	0.903733	0.0128437 	0.993307
synthetic neighborhood class counts {0: 452, 1: 309, 2: 339}
learning local decision tree
retrieving explanation
N.RULES =  1
N.COUNTERFACTUAL =  3
rule: 435 time series
crule0: 105 time series
crule1: 285 time series
crule2: 4 time series
rule generated instances have 0.971 accuracy
crule0 generated instances have 0.999 accuracy
crule1 generated instances have 0.982 accuracy
crule2 generated instances have 0.737 accuracy
removing crule2... Done!
rule: 435 time series
crule0: 105 time series
crule1: 285 time series
16 / 36

Neighborhood Generation
calculating feature values
gen	nevals	avg     	min     	max     
0  	500   	0.496654	0.496654	0.496654
1  	421   	0.540973	0.00669436	0.993307
2  	424   	0.714694	0.0

In [None]:
results_df = get_all_predictions(agnostic_explainers, global_surrogate, blackbox_predict, X_exp_test, blackbox_predict.predict(X_exp_train))
results_df.to_csv(file_path + "/" + "results_df.csv", sep = ";", index = False)
print_report(results_df)

In [None]:
%%time
save_shapelet_model(global_surrogate, file_path + "/")
massive_save_agnostic_local_explainers(agnostic_explainers, file_path, verbose = True)

In [None]:
%%time
global_surrogate = load_shapelet_model(file_path + "/")
agnostic_explainers = massive_load_agnostic_local_explainers(file_path, verbose = True)

In [None]:
# sanity check
results_df_loaded = get_all_predictions(agnostic_explainers, global_surrogate, blackbox_predict)
print(sum(results_df_loaded.values != results_df.values))