# Imports

In [1]:
from keras.datasets import reuters
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Softmax, Dropout
from keras.layers import SimpleRNN, LSTM, Embedding, Bidirectional, GlobalAveragePooling1D
from keras.utils import to_categorical

import numpy as np

Using TensorFlow backend.


## Constants

In [2]:
MAX_SEQUENCE_LENGTH = 200 # We expect all sentences to be less than 200 tokens long
VOCAB_SIZE = 10000

# Data loading
In this exercise, we will use a smaller dataset that has been preprocessing already by the Keras folks

In [3]:
(x_train, y_train), (x_test, y_test) = reuters.load_data(path="reuters.npz",
                                                         num_words=VOCAB_SIZE,
                                                         skip_top=0,
                                                         maxlen=MAX_SEQUENCE_LENGTH,
                                                         test_split=0.5,
                                                         seed=113,
                                                         start_char=1,
                                                         oov_char=2,
                                                         index_from=3)

# Data preparation for learning

In [4]:
# News articles padded with zeros (in front here) to make 200 input vector (max sentence length)
# The 200 corresponds to the number of time steps in the RNN
# Default in Keras is to pad in front!
X_train = pad_sequences(x_train, maxlen=MAX_SEQUENCE_LENGTH, value=0) 
X_test = pad_sequences(x_test, maxlen=MAX_SEQUENCE_LENGTH, value=0)

print(X_train[:10])

[[   0    0    0 ...   15   17   12]
 [   0    0    0 ...  505   17   12]
 [   0    0    0 ...   11   17   12]
 ...
 [   0    0    0 ...  254   17   12]
 [   0    0    0 ... 2735   17   12]
 [   0    0    0 ... 4329   17   12]]


# Simple RNN

In [5]:
model = Sequential()
# Special dense layer that does word embeddings - auto creates idx mapping
# Length of our embeddings here is 10 - we feel 10 dimensions is sufficient to capture model
model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH)) 
# We specify that the RNN should have 25 hidden neurons; returns a vector of 25 at the end (summary)
model.add(SimpleRNN(25)) 
model.add(Dense(46)) # Inputs 25 and outputs 46 (the number of classes we have)
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

model.fit(X_train, to_categorical(y_train), epochs=10, validation_split=0.05)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 4201 samples, validate on 222 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 43.45%


# Bidirectional RNN

In [6]:
model = Sequential()
model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH))
model.add(Bidirectional(SimpleRNN(25), merge_mode='ave'))
model.add(Dense(46))
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.fit(X_train, to_categorical(y_train), epochs=5, validation_split=0.05)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Train on 4201 samples, validate on 222 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 43.43%


# Simple RNN with averaging

In [7]:
model = Sequential()
model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH))
# Instead of returning the summary vector - we ask RNN to return vectors at each RNN unit
model.add(SimpleRNN(25, return_sequences=True))
# We ask the 25 output vectors to be averaged
model.add(GlobalAveragePooling1D())
model.add(Dense(46))
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

model.fit(X_train, to_categorical(y_train), epochs=5, validation_split=0.05)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Train on 4201 samples, validate on 222 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 51.80%


# Multilayer Bidirectional RNN

In [None]:
model = Sequential()
model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH))
# We ask keras to 'merge' (average) the vectors of both directions to send it to the next layer
model.add(Bidirectional(LSTM(25, return_sequences=True), merge_mode='ave'))
# The final bidirectional layer only needs the summary vector
model.add(Bidirectional(LSTM(25), merge_mode='ave'))
model.add(Dense(46))
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

model.fit(X_train, to_categorical(y_train), epochs=5, validation_split=0.05)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Train on 4201 samples, validate on 222 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5