In [1]:
import pandas as pd
import numpy as np
import pickle
import tensorflow as tf
import tensorflow_hub as hub
import bert
from bert import tokenization
from IPython.display import display
from bert_model_utils import transform_input, generate_baseline, \
        get_ig_attributions, visualize_token_attrs, get_tensor

## Integrated Gradients on BERT

In [2]:
# Initialize a Tensorflow Session and load the saved model
SESS = tf.Session()


# =========================================== MODIFY THIS =============================================
# Directory should be in TF Saved Model Directory Format https://www.tensorflow.org/guide/saved_model
SAVED_MODEL_PATH = '<PATH TO BERT SAVED MODEL DIRECTORY>'
saved_model = tf.saved_model.loader.load(sess=SESS, tags=['serve'], export_dir=SAVED_MODEL_PATH)


# Path to Tokenizer in pickle format. 
# The BertModel directory has a base uncased tokenizer
TOKENIZER_PATH = '<PATH TO TOKENIZER>'
with open(TOKENIZER_PATH, 'rb') as file:
    TOKENIZER = pickle.load(file)
# ======================================================================================================    
# The embedding tensor of the model. This may change depending on your model, 
# you may need to grep the operations in the graph.
# Please refer to https://github.com/ankurtaly/Integrated-Gradients/blob/master/howto.md
# Here we choose the tensor that sums up the three kinds of BERT embeddings. 
# Please refer to the BERT architecture for more context
EMBEDDING_TENSOR = get_tensor(SESS, 'module_apply_tokens/bert/embeddings/add_1:0')


W1112 22:20:32.449114 140007072298816 deprecation.py:323] From <ipython-input-2-a7d64c6e6112>:7: load (from tensorflow.python.saved_model.loader_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
W1112 22:20:34.410744 140007072298816 deprecation.py:323] From /home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/training/saver.py:1282: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


In [3]:
# Define the input tensors using the SignatureDef
sig = saved_model.signature_def["serving_default"]
INPUT_TENSORS = sig.inputs

# Get output tensor for model using the SignatureDef
output_keys = list(sig.outputs)
print(f'Possible outputs are {output_keys}')
# This is instructional, change this if you want to calculate on some 
# other output. Note that the output should be 
# a differentiable function of the embedding tensor 

OUTPUT_TENSOR = get_tensor(SESS, sig.outputs['probabilities'].name)

# Gradient tensor of output wrt embedding tensor
GRADIENT_TENSOR = tf.gradients(OUTPUT_TENSOR[:, 1], EMBEDDING_TENSOR)

W1112 22:20:36.371488 140007072298816 deprecation.py:323] From /home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1205: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Possible outputs are ['labels', 'probabilities']


In [4]:
# Create a DataFrame structure that we will input into the model
input_df = pd.DataFrame(columns=['sentence'], data=['This was an awful movie'])

# Transform the data into a format acceptable to the model.
# You may need to modify the function depending on your model input interface
transformed_input_df = transform_input(TOKENIZER, input_df)

# Generate a baseline. The baseline we use is a padded sequence of the same length 
# as the input sentence, to avoid attribution to the start (CLS) and end of sequence (SEP) tokens
baseline_df = generate_baseline(TOKENIZER, input_df)


ig = get_ig_attributions(sess=SESS, 
                         input_tensors=INPUT_TENSORS, 
                         embedding_tensor=EMBEDDING_TENSOR,
                         gradient_tensor=GRADIENT_TENSOR, 
                         output_tensor=OUTPUT_TENSOR, 
                         transformed_input_df=transformed_input_df,
                         baseline_df=baseline_df, 
                         tokenizer=TOKENIZER, 
                         debug=True)
display(visualize_token_attrs(ig['outputs'][0], np.array(ig['outputs'][1])))

prediction is 1.955999141500797e-05
baseline_prediction is 0.4553076922893524
delta_prediction is -0.4552881419658661
sum_attributions are -0.4555169343948364
Error percentage is -0.050252226641011435


In [5]:
# Loop through data points
sentences = ['This was a good movie', 
             'This was not a good movie', 
             'This was not a great movie, but a good movie nevertheless',
             'This was a terrible movie. Do you agree?']

for sentence in sentences:
    input_df['sentence'][0] = sentence
    transformed_input_df = transform_input(TOKENIZER, input_df.head(1))
    baseline_df = generate_baseline(TOKENIZER, input_df.head(1))
    ig = get_ig_attributions(sess=SESS, 
                             input_tensors=INPUT_TENSORS, 
                             embedding_tensor=EMBEDDING_TENSOR,
                             gradient_tensor=GRADIENT_TENSOR, 
                             output_tensor=OUTPUT_TENSOR, 
                             transformed_input_df=transformed_input_df,
                             baseline_df=baseline_df, 
                             tokenizer=TOKENIZER)
    display(visualize_token_attrs(ig['outputs'][0], np.array(ig['outputs'][1])))