## Setup

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
# load model architecture
from tep.modelUtils import load_architecture
model = load_architecture('models/tl/baseline.json')
model.summary()

Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
seq_input (InputLayer)       (None, 32)                0         
_________________________________________________________________
word_emb (Embedding)         (None, 32, 100)           43224100  
_________________________________________________________________
input_dropout (Dropout)      (None, 32, 100)           0         
_________________________________________________________________
padding_1 (ZeroPadding1D)    (None, 34, 100)           0         
_________________________________________________________________
conv_1 (Conv1D)              (None, 32, 128)           38528     
_________________________________________________________________
pool_1 (MaxPooling1D)        (None, 16, 128)           0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 128)           0         
__________

In [3]:
# set model weights
model.load_weights('models/tl/baseline.hdf5')

In [4]:
# load data and labels
from tep.utils import load_array
seqs = load_array('data/sentiment_data/seq_32.bc')
labels = load_array('data/sentiment_data/labels.bc')
print(seqs.shape)
print(labels.shape)

(1600000, 32)
(1600000,)


In [5]:
# load word index
import json
word_index_file = open('data/sentiment_data/word_index.json')
word_index_str = word_index_file.read()
word_index = json.loads(word_index_str)
type(word_index)

dict

In [6]:
# compile the model
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['acc'])

In [7]:
# test the model
from tep.trainUtils import one_hot_encoding
oh_labels = one_hot_encoding(labels, 2)
model.evaluate(x=seqs[:10000], y=oh_labels[:10000], batch_size=256)



[0.34807548885345457, 0.84930000000000005]

## Visualize features

In [8]:
from tep.featureVisualization import ConvLayerVisualizer
clv = ConvLayerVisualizer(model, word_index)

In [9]:
clv.analyze_example(seqs[0], 'conv_1', num_results=5)

[{'activation': 2.4065974,
  'kernel': 97,
  'position': 29,
  'words': 'my birthday on'},
 {'activation': 1.8522719,
  'kernel': 113,
  'position': 28,
  'words': "me,it's my birthday"},
 {'activation': 1.6997725,
  'kernel': 8,
  'position': 30,
  'words': 'birthday on monday'},
 {'activation': 1.6873175,
  'kernel': 54,
  'position': 29,
  'words': 'my birthday on'},
 {'activation': 1.6785334,
  'kernel': 62,
  'position': 31,
  'words': 'on monday <unknown>'}]

In [10]:
clv.analyze_neuron(seqs[:10000], 'conv_1', 58, num_results=5)

[{'activation': 3.0778136,
  'kernel': 58,
  'position': 28,
  'tweet': 2889,
  'words': 'twitter ! congrats'},
 {'activation': 3.0098078,
  'kernel': 58,
  'position': 19,
  'tweet': 9192,
  'words': 'twitters ! mwahh'},
 {'activation': 2.7932086,
  'kernel': 58,
  'position': 31,
  'tweet': 9179,
  'words': 'inari ! <unknown>'},
 {'activation': 2.7836375,
  'kernel': 58,
  'position': 17,
  'tweet': 9408,
  'words': 'angekommen morgen wieder'},
 {'activation': 2.7572386,
  'kernel': 58,
  'position': 31,
  'tweet': 5683,
  'words': 'facebook ! <unknown>'}]

## Calculate attributions

In [17]:
x_test = seqs[-10000:]
y_test = oh_labels[-10000:]
print(x_test.shape)
print(y_test.shape)

(10000, 32)
(10000, 2)


In [74]:
def calculate_input_attributions(model, weights, target_layer, output_layer, x, y, method='elrp'):
    current_session = K.get_session()
    
    with DeepExplain(session=current_session) as de:
        model = load_architecture(model)
        model.load_weights(weights)
        model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['acc'])
        
        target = model.get_layer(target_layer).output
        input_tensor = model.inputs[0]
        target_out = current_session.run(target, {input_tensor: x})
        output_tensor = model.get_layer(output_layer).output
        
        attributions = de.explain(method, output_tensor * y, target, target_out)
    return attributions

In [78]:
model_path = 'models/tl/baseline.json'
weights_path = 'models/tl/baseline.hdf5'
target = 'word_emb'
output = 'raw_predictions'

In [79]:
attributions = calculate_input_attributions(model_path, weights_path, target, output, x_test, y_test)
print(attributions.shape)

DeepExplain: running "elrp" explanation method (4)
Model with multiple inputs:  False
(10000, 32, 100)


In [80]:
attributions = attributions.sum(axis=-1)
print(attributions.max())
print(attributions.min())
print(attributions.mean())

3.28994
-4.46913
0.0259702


In [97]:
from keras.layers import Input
from keras.models import Model

def calculate_hidden_attributions(model, weights, target_layer, output_layer, x, y, method='elrp'):
    current_session = K.get_session()
    
    with DeepExplain(session=current_session) as de:
        # load original model
        model = load_architecture(model)
        model.load_weights(weights)
        model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['acc'])
        
        # calculate activations for target_layer
        tmp_model = Model(inputs=model.input, outputs=model.layers[target_layer].output)
        target_outputs = tmp_model.predict(x)

        # generate new model
        new_input = Input(shape=model.layers[target_layer].output_shape, name='new_input')
        config = model.get_config()
        config['input_layers'] = [['new_input']]
            
        model.layers[target_layer+1].input = new_input
        model_copy.summary()

In [98]:
model_path = 'models/tl/baseline.json'
weights_path = 'models/tl/baseline.hdf5'
target = 4
output = 'raw_predictions'

calculate_hidden_attributions(model_path, weights_path, target, output, x_test, y_test)

AttributeError: can't set attribute

In [100]:
config = model.get_config()

In [101]:
config['input_layers']

[['seq_input', 0, 0]]

In [102]:
layer = model.layers[0]

In [103]:
layer.

{'batch_input_shape': (None, 32),
 'dtype': 'int32',
 'name': 'seq_input',
 'sparse': False}

In [107]:
type(config['layers'][0])

dict