In [2]:
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense

from transformers import TFBertForSequenceClassification

import numpy as np
import pandas as pd

from datetime import datetime
import os
import sys
import pickle
import time
import argparse
from tqdm import tqdm

sys.path.insert(0, '/vast/nj594/xai/helpers')
from evaluate import evaluate_mimic as evaluate

# IMPORTANT: SET RANDOM SEEDS FOR REPRODUCIBILITY
os.environ['PYTHONHASHSEED'] = str(420)
import random
random.seed(420)
np.random.seed(420)
tf.random.set_seed(420)

In [3]:
#Load Data 

data_dir = '../data'
label_list = [0, 1]
max_seq_length = 128
num_classes = len(label_list)

### Initialize Tokenizer

from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

mask_token = tokenizer.convert_tokens_to_ids(['[MASK]'])[0]

### Load

train_dir = os.path.join(data_dir, 'train_dataset')
val_dir = os.path.join(data_dir, 'val_dataset')
test_dir = os.path.join(data_dir, 'test_dataset')

element_spec = ({'input_ids': tf.TensorSpec(shape=(128,), dtype=tf.int32, name=None),
                 'attention_mask': tf.TensorSpec(shape=(128,), dtype=tf.int32, name=None),
                 'token_type_ids': tf.TensorSpec(shape=(128,), dtype=tf.int32, name=None)},
                tf.TensorSpec(shape=(2,), dtype=tf.int32, name=None))

train_data = tf.data.experimental.load(train_dir, element_spec)
val_data = tf.data.experimental.load(val_dir, element_spec)
test_data = tf.data.experimental.load(test_dir, element_spec)
X_test = np.vstack([x[0]['input_ids'].numpy() for x in test_data])
X_val = np.vstack([x[0]['input_ids'].numpy() for x in val_data])
y_test = np.vstack([y.numpy() for x,y in test_data])
y_val = np.vstack([y.numpy() for x,y in val_data])

### Batch

batch_size = 16
train_data = train_data.shuffle(20000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
val_data = val_data.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
test_data = test_data.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

### Get Predicted Class
from transformers import TFBertForSequenceClassification

preds = np.load(os.path.join(data_dir, 'predictions.npy'), allow_pickle=True)
preds_val = np.load(os.path.join(data_dir, 'predictions_val.npy'), allow_pickle=True)

preds_discrete = np.eye(2)[preds.argmax(1)]
preds_discrete_val = np.eye(2)[preds_val.argmax(1)]

2022-05-17 16:58:27.428562: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-05-17 16:58:27.429074: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 32252 MB memory:  -> device: 0, name: Vega 20, pci bus id: 0000:8d:00.0
2022-05-17 16:58:28.411982: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-05-17 16:58:28.432092: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:28.434811: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:28.845074: I tensorflo

array([[  101,  3058,  1997, ..., 17850, 12104,   102],
       [  101, 18583,  1012, ..., 10210,  7941,   102],
       [  101,  2089, 16755, ...,  6292,  2089,   102],
       ...,
       [  101,  3460,  2012, ...,  2566,  2154,   102],
       [  101,  1997,  3052, ...,  2590,  2008,   102],
       [  101,  3058,  1997, ...,  5219,  2006,   102]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(16, 128), dtype=int32, numpy=
array([[1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       ...,
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1]], dtype=int32)>, 'token_type_ids': <tf.Tensor: shape=(16, 128), dtype=int32, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int32)>}, <tf.Tensor: shape=(16, 2), dtype=int32, numpy=
array([[0, 1],

2022-05-17 16:58:38.135791: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:38.138002: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:38.258096: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:38.260284: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:38.266842: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:38.269166: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:43.260303: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:57.589525: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:57.591528: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-17 16:58:57

In [3]:
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# Evaluate

### Load DataFrame
df_test = pd.read_csv(os.path.join(data_dir, "test.csv"))
df_val = pd.read_csv(os.path.join(data_dir, "val.csv"))

### Load Evaluator
evaluator_base = TFBertForSequenceClassification.from_pretrained('evaluator-data/surrogate')    

evaluator_model = tf.keras.models.Sequential()
evaluator_model.add(evaluator_base)
evaluator_model.add(tf.keras.layers.Lambda(lambda x: x.logits))
evaluator_model.add(tf.keras.layers.Activation('softmax'))
for x in test_data:
    evaluator_model(x)
    break
evaluator_model.summary()

def eval_model(x):
    attention_mask = np.ones_like(x).astype(int)
    token_type_ids = np.zeros_like(x).astype(int)
    
    input_ = dict(
        input_ids = x.astype(int),
        attention_mask = attention_mask,
        token_type_ids = token_type_ids,
    )
    
    return evaluator_model.predict(input_)

Some layers from the model checkpoint at evaluator-data/surrogate were not used when initializing TFBertForSequenceClassification: ['dropout_37']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at evaluator-data/surrogate.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


array([[  101,  3058,  1997, ..., 17850, 12104,   102],
       [  101, 18583,  1012, ..., 10210,  7941,   102],
       [  101,  2089, 16755, ...,  6292,  2089,   102],
       ...,
       [  101,  3460,  2012, ...,  2566,  2154,   102],
       [  101,  1997,  3052, ...,  2590,  2008,   102],
       [  101,  3058,  1997, ...,  5219,  2006,   102]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(16, 128), dtype=int32, numpy=
array([[1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       ...,
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1]], dtype=int32)>, 'token_type_ids': <tf.Tensor: shape=(16, 128), dtype=int32, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int32)>}, <tf.Tensor: shape=(16, 2), dtype=int32, numpy=
array([[0, 1],

2022-05-10 16:14:52.083085: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2022-05-10 16:14:52.085256: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.


In [4]:
methods = ['fastshap', 'fastshap-data', 'kernelshap', 'lime', 'smoothgrad', 
           'integratedgradients', 'kernelshap_s', 'kernelshap_s-data']
results = {}
for method in ['kernelshap_s-dkl', 'fastshap-dkl']:
    print(method)
    results[method] = {} 
    ## Load Explanations
    if method  not in ['smoothgrad', 'integratedgradients', 'realx', 'realx-data']:
        exp_file_retro = os.path.join('..', method, 'shap_values-retrospective.pkl')
        exp_file_pro = os.path.join('..', method, 'shap_values-prospective.pkl')
    else:
        exp_file_retro = os.path.join('..', method, 'explanations-retrospective.pkl')
        exp_file_pro = os.path.join('..', method, 'explanations-prospective.pkl')
        
    with open(exp_file_retro, 'rb') as f:
        explanations_retro = pickle.load(f)
    with open(exp_file_pro, 'rb') as f:
        explanations_pro = pickle.load(f)
        
    ## Evaluate Explanations
    
    ### Retrospective Evaluation

    # Exclusion
    results[method]['retro_ex'] = evaluate(df_test.copy(), X_test, explanations_retro, evaluator_model, y_test, y_test, 
                                           mode = 'exclude', method = method, mask_token=mask_token)
    # Inclusion
    results[method]['retro_in'] = evaluate(df_test.copy(), X_test, explanations_retro, evaluator_model, y_test, y_test, 
                                           mode = 'include', method = method, mask_token=mask_token)
    
    ### Prospective Evaluation 

    # Exclusion
    results[method]['pro_ex'] = evaluate(df_test.copy(), X_test, explanations_pro, evaluator_model, preds_discrete, y_test, 
                                         mode = 'exclude', method = method, mask_token=mask_token)
    # Inclusion
    results[method]['pro_in'] = evaluate(df_test.copy(), X_test, explanations_pro, evaluator_model, preds_discrete, y_test, 
                                         mode = 'include', method = method, mask_token=mask_token)
    print(results[method])
    
    with open('results-{}.pkl'.format(method), 'wb') as f:
        pickle.dump(results[method], f)

kernelshap_s-dkl
100
(3063, 128)
99
(3063, 128)
95
(3063, 128)
90
(3063, 128)
85
(3063, 128)
75
(3063, 128)
50
(3063, 128)
25
(3063, 128)
15
(3063, 128)
10
(3063, 128)
5
(3063, 128)
1
(3063, 128)
0
(3063, 128)


  r_ = np.array([[float(p), n] for p,n in r.items()])


100
(3063, 128)
99
(3063, 128)
95
(3063, 128)
90
(3063, 128)
85
(3063, 128)
75
(3063, 128)
50
(3063, 128)
25
(3063, 128)
15
(3063, 128)
10
(3063, 128)
5
(3063, 128)
1
(3063, 128)
0
(3063, 128)


  r_ = np.array([[float(p), n] for p,n in r.items()])


100
(3063, 128)
99
(3063, 128)
95
(3063, 128)
90
(3063, 128)
85
(3063, 128)
75
(3063, 128)
50
(3063, 128)
25
(3063, 128)
15
(3063, 128)
10
(3063, 128)
5
(3063, 128)
1
(3063, 128)
0
(3063, 128)


  r_ = np.array([[float(p), n] for p,n in r.items()])


100
(3063, 128)
99
(3063, 128)
95
(3063, 128)
90
(3063, 128)
85
(3063, 128)
75
(3063, 128)
50
(3063, 128)
25
(3063, 128)
15
(3063, 128)
10
(3063, 128)
5
(3063, 128)
1
(3063, 128)
0
(3063, 128)


  r_ = np.array([[float(p), n] for p,n in r.items()])


{'retro_ex': {'acc': {100: 0.4965753424657534, 99: 0.3852739726027397, 95: 0.3664383561643836, 90: 0.3527397260273973, 85: 0.3698630136986301, 75: 0.3784246575342466, 50: 0.3818493150684932, 25: 0.4160958904109589, 15: 0.4623287671232877, 10: 0.476027397260274, 5: 0.5222602739726028, 1: 0.601027397260274, 0: 0.6986301369863014}, 'auroc': {100: 0.46073187895847995, 99: 0.37236687778559696, 95: 0.30243959652826646, 90: 0.2808468214872156, 85: 0.27640159512080686, 75: 0.2721674876847291, 50: 0.29987098287590896, 25: 0.3668191414496833, 15: 0.4159629368988975, 10: 0.462749237626085, 5: 0.5340957072484166, 1: 0.6539878020173586, 0: 0.7654585972319963}, 'preds': {100: <tf.Tensor: shape=(3063, 2), dtype=float32, numpy=
array([[0.49991408, 0.50008595],
       [0.49991408, 0.50008595],
       [0.49991408, 0.50008595],
       ...,
       [0.49991408, 0.50008595],
       [0.49991408, 0.50008595],
       [0.49991408, 0.50008595]], dtype=float32)>, 99: <tf.Tensor: shape=(3063, 2), dtype=float32, nu

99
(3063, 128)
95
(3063, 128)
90
(3063, 128)
85
(3063, 128)
75
(3063, 128)
50
(3063, 128)
25
(3063, 128)
15
(3063, 128)
10
(3063, 128)
5
(3063, 128)
1
(3063, 128)
0
(3063, 128)


  r_ = np.array([[float(p), n] for p,n in r.items()])


100
(3063, 128)
99
(3063, 128)
95
(3063, 128)
90
(3063, 128)
85
(3063, 128)
75
(3063, 128)
50
(3063, 128)
25
(3063, 128)
15
(3063, 128)
10
(3063, 128)
5
(3063, 128)
1
(3063, 128)
0
(3063, 128)


  r_ = np.array([[float(p), n] for p,n in r.items()])


100
(3063, 128)
99
(3063, 128)
95
(3063, 128)
90
(3063, 128)
85
(3063, 128)
75
(3063, 128)
50
(3063, 128)
25
(3063, 128)
15
(3063, 128)
10
(3063, 128)
5
(3063, 128)
1
(3063, 128)
0
(3063, 128)


  r_ = np.array([[float(p), n] for p,n in r.items()])


100
(3063, 128)
99
(3063, 128)
95
(3063, 128)
90
(3063, 128)
85
(3063, 128)
75
(3063, 128)
50
(3063, 128)
25
(3063, 128)
15
(3063, 128)
10
(3063, 128)
5
(3063, 128)
1
(3063, 128)
0
(3063, 128)
{'retro_ex': {'acc': {100: 0.4965753424657534, 99: 0.4554794520547945, 95: 0.4263698630136986, 90: 0.4126712328767123, 85: 0.3818493150684932, 75: 0.3938356164383562, 50: 0.4469178082191781, 25: 0.4948630136986301, 15: 0.553082191780822, 10: 0.5445205479452054, 5: 0.6095890410958904, 1: 0.6626712328767124, 0: 0.6986301369863014}, 'auroc': {100: 0.46073187895847995, 99: 0.41656110720150136, 95: 0.3812104152005629, 90: 0.3567088904527328, 85: 0.34298615998123383, 75: 0.35254515599343184, 50: 0.37589725545390573, 25: 0.4705958245367112, 15: 0.5345765892563922, 10: 0.589924935491438, 5: 0.6637461881304245, 1: 0.7235632183908047, 0: 0.7654585972319963}, 'preds': {100: <tf.Tensor: shape=(3063, 2), dtype=float32, numpy=
array([[0.49991408, 0.50008595],
       [0.49991408, 0.50008595],
       [0.49991408

  r_ = np.array([[float(p), n] for p,n in r.items()])


In [5]:
with open('results.pkl', 'wb') as f:
    pickle.dump(results, f)

In [5]:
results

{'kernelshap_s-dkl': {'retro_ex': {'acc': {100: 0.4965753424657534,
    99: 0.3852739726027397,
    95: 0.3664383561643836,
    90: 0.3527397260273973,
    85: 0.3698630136986301,
    75: 0.3784246575342466,
    50: 0.3818493150684932,
    25: 0.4160958904109589,
    15: 0.4623287671232877,
    10: 0.476027397260274,
    5: 0.5222602739726028,
    1: 0.601027397260274,
    0: 0.6986301369863014},
   'auroc': {100: 0.46073187895847995,
    99: 0.37236687778559696,
    95: 0.30243959652826646,
    90: 0.2808468214872156,
    85: 0.27640159512080686,
    75: 0.2721674876847291,
    50: 0.29987098287590896,
    25: 0.3668191414496833,
    15: 0.4159629368988975,
    10: 0.462749237626085,
    5: 0.5340957072484166,
    1: 0.6539878020173586,
    0: 0.7654585972319963},
   'preds': {100: <tf.Tensor: shape=(3063, 2), dtype=float32, numpy=
    array([[0.49991408, 0.50008595],
           [0.49991408, 0.50008595],
           [0.49991408, 0.50008595],
           ...,
           [0.49991408, 0.50

In [6]:
with open('results.pkl', 'rb') as f:
    results_master = pickle.load(f)

In [7]:
results_master['fastshap-dkl'] = results['fastshap-dkl']

In [8]:
results_master['kernelshap_s-dkl'] = results['kernelshap_s-dkl']

In [9]:
with open('results.pkl', 'wb') as f:
    pickle.dump(results_master, f)