## Setup

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
# load model architecture
from tep.modelUtils import load_architecture
model = load_architecture('models/tl/baseline.json')
model.summary()

Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
seq_input (InputLayer)       (None, 32)                0         
_________________________________________________________________
word_emb (Embedding)         (None, 32, 100)           43224100  
_________________________________________________________________
input_dropout (Dropout)      (None, 32, 100)           0         
_________________________________________________________________
padding_1 (ZeroPadding1D)    (None, 34, 100)           0         
_________________________________________________________________
conv_1 (Conv1D)              (None, 32, 128)           38528     
_________________________________________________________________
pool_1 (MaxPooling1D)        (None, 16, 128)           0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 128)           0         
__________

In [3]:
# set model weights
model.load_weights('models/tl/baseline.hdf5')

In [4]:
# load data and labels
from tep.utils import load_array
seqs = load_array('data/sentiment_data/seq_32.bc')
labels = load_array('data/sentiment_data/labels.bc')
print(seqs.shape)
print(labels.shape)

(1600000, 32)
(1600000,)


In [5]:
# load word index
import json
word_index_file = open('data/sentiment_data/word_index.json')
word_index_str = word_index_file.read()
word_index = json.loads(word_index_str)
type(word_index)

dict

In [6]:
# compile the model
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['acc'])

In [7]:
# test the model
from tep.trainUtils import one_hot_encoding
oh_labels = one_hot_encoding(labels, 2)
model.evaluate(x=seqs[:10000], y=oh_labels[:10000], batch_size=256)



[0.34918261456489563, 0.84550000000000003]

## Get hidden layer activations

In [8]:
from tep.featureVisualization import FeatureVisualizer
fv = FeatureVisualizer(model, word_index)

In [9]:
model.get_layer('conv_1').kernel_size[0]

3

In [10]:
fv.analyze_example(seqs[1], 'conv_1', num_results=5)

[{'activation': 2.154819,
  'kernel': 127,
  'position': 28,
  'words': 'someone talk to'},
 {'activation': 2.0720279,
  'kernel': 101,
  'position': 29,
  'words': 'talk to me'},
 {'activation': 1.8903841,
  'kernel': 18,
  'position': 31,
  'words': 'me x <unknown>'},
 {'activation': 1.8060901, 'kernel': 120, 'position': 30, 'words': 'to me x'},
 {'activation': 1.7299895,
  'kernel': 29,
  'position': 28,
  'words': 'someone talk to'}]

In [18]:
fv.analyze_neuron(seqs[:10000], 'conv_1', 0)

[{'activation': 5.2490921,
  'kernel': 0,
  'position': 29,
  'words': 'sad sad sad'},
 {'activation': 4.8096027,
  'kernel': 0,
  'position': 16,
  'words': 'friday, but sad'},
 {'activation': 4.6268544,
  'kernel': 0,
  'position': 19,
  'words': 'sad <allcaps> sad'},
 {'activation': 4.6268544,
  'kernel': 0,
  'position': 17,
  'words': 'sad <allcaps> sad'},
 {'activation': 4.5611711,
  'kernel': 0,
  'position': 30,
  'words': 'died soo sad'},
 {'activation': 4.5597568,
  'kernel': 0,
  'position': 19,
  'words': '. but unfortunately'},
 {'activation': 4.5227909,
  'kernel': 0,
  'position': 25,
  'words': 'anymore i miss'},
 {'activation': 4.4558101,
  'kernel': 0,
  'position': 23,
  'words': 'adorable, but unfortunately'},
 {'activation': 4.3963599,
  'kernel': 0,
  'position': 29,
  'words': 'borked ! sad'},
 {'activation': 4.3497105, 'kernel': 0, 'position': 20, 'words': '; hw sad'}]