In [1]:
import keras
from keras import layers
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras.utils import plot_model
import numpy as np
import json
import pydot


Using TensorFlow backend.


In [2]:
max_features = 2000
max_len = 500
INDEX_FROM=3

(X_train, y_train), (X_test, y_test) = imdb.load_data(
                                            num_words=max_features, index_from=INDEX_FROM)

X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)



In [3]:
word_to_id = keras.datasets.imdb.get_word_index()
word_to_id = {k:(v+INDEX_FROM) for k,v in word_to_id.items()}
word_to_id["<PAD>"] = 0
word_to_id["<START>"] = 1
word_to_id["<UNK>"] = 2
word_to_id["<NAN>"] = 3

id_to_word = {value:key for key,value in word_to_id.items()}

In [4]:

print(' '.join(id_to_word[id] for id in X_train[0] ))

<PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD

In [5]:
metadata = np.array([id_to_word[i] for i in range(2000)])

In [6]:
np.savetxt('imdb_metadata.tsv',metadata, fmt='%s', delimiter='\t')

In [7]:
review_input = keras.Input(shape=(None,), dtype='int32', name='posts')
embed = layers.Embedding(max_features, 128,
                            input_length=max_len,
                            name='embed')(review_input)

conv = layers.Conv1D(32, 7, activation='relu')(embed)
conv = layers.MaxPooling1D(5)(conv)
conv = layers.Conv1D(32, 7, activation='relu')(conv)
conv = layers.Flatten()(conv)


rnn = layers.LSTM(64, return_sequences=True)(embed)
rnn = layers.LSTM(64)(rnn)

merged = layers.concatenate([conv, rnn])
bottleneck = layers.Dense(32)(merged)
logit = layers.Dense(1, activation='sigmoid')(bottleneck)

model = keras.Model(review_input, logit)
model.compile(optimizer='rmsprop', 
              loss='binary_crossentropy', 
              metrics=['acc'])


In [8]:
rnn

<tf.Tensor 'lstm_2/TensorArrayReadV3:0' shape=(?, 64) dtype=float32>

In [17]:
callbacks = [
    keras.callbacks.TensorBoard(
    log_dir='logs',
    histogram_freq=1)
]

history = model.fit(X_train, y_train,
                   epochs=1,
                   batch_size=128,
                   validation_split=0.2,
                   callbacks=callbacks)

Train on 20000 samples, validate on 5000 samples
Epoch 1/1


In [22]:
embed = model.layers[1]

In [23]:
embed.get_weights()

[array([[-7.0711384e-03, -4.5139669e-03,  6.0028746e-05, ...,
          1.2101459e-02, -2.4987003e-02,  3.2157399e-02],
        [-2.7048759e-02, -1.6369550e-02, -1.9355888e-02, ...,
         -3.4533836e-02,  6.7920618e-02, -2.9828006e-02],
        [-2.3068728e-02, -7.6845428e-04, -2.4758201e-02, ...,
          3.2362152e-02,  7.0913816e-03,  1.8190745e-02],
        ...,
        [-3.6380678e-02, -1.5503711e-03,  8.0812648e-03, ...,
         -1.6207708e-02,  1.5728373e-02, -1.1022414e-02],
        [ 8.1493091e-03,  4.9640685e-02,  5.9767988e-02, ...,
          3.1855866e-02,  2.3286723e-02, -2.0925509e-02],
        [ 4.3906607e-02, -1.6708386e-03,  1.7216362e-02, ...,
          1.8780176e-03,  3.7177466e-02, -5.5808701e-02]], dtype=float32)]

In [24]:
np.savetxt('imdb_embeddings.tsv', embed.get_weights()[0], delimiter='\t')


In [9]:
plot_model(model, to_file='model.png')

