In [1]:
from keras.layers import Input, LSTM, RepeatVector, TimeDistributed, Dense, Activation
from keras.models import Model, Sequential
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras.layers.embeddings import Embedding
from keras.utils import to_categorical
import numpy as np

Using TensorFlow backend.


In [2]:
# load the dataset but only keep the top n words, zero the rest
top_words = 500
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [3]:
# truncate and pad input sequences
max_review_length = timesteps = 50
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [4]:
X_train_encoded = to_categorical(X_train)

In [5]:
embedding_vector_length = 32
latent_dim = 100

In [6]:
model = Sequential()
model.add(Embedding(top_words, embedding_vector_length, input_length=max_review_length))
model.add(LSTM(latent_dim))
model.add(RepeatVector(max_review_length))
model.add(LSTM(embedding_vector_length, return_sequences=True))
model.add(TimeDistributed(Dense(top_words)))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 50, 32)            16000     
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 50, 100)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 50, 32)            17024     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 50, 500)           16500     
_________________________________________________________________
activation_1 (Activation)    (None, 50, 500)           0         
Total params: 102,724
Trainable params: 102,724
Non-trainable params: 0
_________________________________________________________________
None

In [7]:
model.fit(X_train, X_train_encoded, batch_size = 100, epochs = 1)

Epoch 1/1


<keras.callbacks.History at 0x7fefc8c0ab70>

In [8]:
del X_train_encoded
X_test_encoded = to_categorical(X_test)

In [9]:
score = model.evaluate(X_test, X_test_encoded, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
del X_test_encoded

Test score: 4.29180800064
Test accuracy: 0.27585520009


The b) and c) parts follow hereafter -

In [10]:
from keras import backend as K

get_encoder_output = K.function([model.layers[0].input],
                                  [model.layers[1].output])

In [11]:
encoded_reviews_train = get_encoder_output([X_train])[0]
encoded_reviews_test = get_encoder_output([X_test])[0]

In [12]:
model2 = Sequential()
model2.add(Dense(100, input_shape = [latent_dim]))
model2.add(Dense(10))
model2.add(Dense(1, activation='sigmoid'))
model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model2.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1010      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 11        
Total params: 11,121
Trainable params: 11,121
Non-trainable params: 0
_________________________________________________________________
None


In [14]:
model2.fit(encoded_reviews_train, y_train, validation_data=(encoded_reviews_test, y_test),epochs=3, batch_size=64)

Train on 25000 samples, validate on 25000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fef8e614668>

In [15]:
score = model2.evaluate(encoded_reviews_test, y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.693438735237
Test accuracy: 0.5
