In [1]:
from keras.models import load_model
from lstm_network import LSTM_network
import tensorflow as tf
import numpy as np
import pickle as pkl

Using TensorFlow backend.


In [2]:
# load the keras model and the word-2-vec dictionary. The keras model is trained for the sentiment analysis dataset
# from stanford university and classifies movie reviews into five different categories.
keras_model = load_model('model/sentiment_model.hdf5')
w2v = pkl.load(open('model/token2vec.pkl', 'rb'))



In [3]:
# create the lstm-lrp model
n_hidden = 60
embedding_dim = 60
n_classes = 5
weights = keras_model.get_weights()
print(len(weights))

7


In [4]:
# our keras model has no bias in the final dense layer. Therefore we add a bias of zero to the weights
weights.append(np.zeros((n_classes,)))
lrp_model = LSTM_network(n_hidden, embedding_dim, n_classes, weights=weights)

In [5]:
# test if the conversion was correct with an example sentence
sentence = 'Neither funny nor suspenseful nor particularly well-drawn .'
tokens = [s.lower() for s in sentence.split()]
vecs = np.array([w2v[t] for t in tokens])
y_keras = keras_model.predict(vecs[np.newaxis,:])
y_lrpnet, _, _ = lrp_model.full_pass(vecs[np.newaxis,:])
check = np.allclose(y_keras, y_lrpnet.numpy())
print('Conversion is {}.'.format('correct' if check else 'wrong'))

Conversion is correct.


In [10]:
# explain the classification
eps = 1e-3
bias_factor = 0.0
# by setting y=None, the relevances will be calculated for the predicted class of the sample. We recommend this
# usage, however, if you are interested in the relevances towards the 1st class, you could use y = np.array([1])
explanation, Rest = lrp_model.lrp(vecs[np.newaxis,:], eps=eps, bias_factor=bias_factor)
print(explanation.shape)

(1, 8, 60)


In [11]:
# LRP assigns each dimension in the embedding vector a relevance value. To get relevances for each word we can
# sum up these values
word_relevances = tf.reduce_sum(explanation, axis=2)
for word, relevance in zip(tokens, word_relevances[0]):
    print('{0:>13}:   {1:8.2f}'.format(word, relevance))

      neither:       1.86
        funny:      -1.58
          nor:       1.50
  suspenseful:      -1.54
          nor:       2.00
 particularly:      -0.04
   well-drawn:      -0.06
            .:      -0.12


In [12]:
# to check the correctness of the lrp pass we check if all relevance was preserved by using a bias factor of 1.0
eps = 0.0
bias_factor = 1.0
explanation, Rest = lrp_model.lrp(vecs[np.newaxis,:], eps=eps, bias_factor=bias_factor)
check = np.allclose(np.max(y_lrpnet.numpy()), np.sum(explanation)+np.sum(Rest))
print('LRP pass is {}.'.format('correct' if check else 'wrong'))

LRP pass is correct.


In [13]:
# if all your input sequences have the same length you can process them batch-wise efficiently
batch_size = 100
length = 10
some_random_data = tf.constant(np.random.randn(batch_size, length, embedding_dim))
# explain 100 instances at once
relevances, _ = lrp_model.lrp(some_random_data)
print(relevances.shape)

(100, 10, 60)
