# Imports

In [1]:
from keras.datasets import reuters
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Softmax, Dropout
from keras.layers import SimpleRNN, LSTM, Embedding, Bidirectional, GlobalAveragePooling1D
from keras.utils import to_categorical

import numpy as np

## Constants

In [2]:
MAX_SEQUENCE_LENGTH = 200 # We expect all sentences to be less than 200 tokens long
VOCAB_SIZE = 10000

# Data loading
In this exercise, we will use a smaller dataset that has been preprocessing already by the Keras folks

In [3]:
(x_train, y_train), (x_test, y_test) = reuters.load_data(path="reuters.npz",
                                                         num_words=VOCAB_SIZE,
                                                         skip_top=0,
                                                         maxlen=MAX_SEQUENCE_LENGTH,
                                                         test_split=0.5,
                                                         seed=113,
                                                         start_char=1,
                                                         oov_char=2,
                                                         index_from=3)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


# Data preparation for learning

In [4]:
# News articles padded with zeros (in front here) to make 200 input vector (max sentence length)
# The 200 corresponds to the number of time steps in the RNN
# Default in Keras is to pad in front!
X_train = pad_sequences(x_train, maxlen=MAX_SEQUENCE_LENGTH, value=0) 
X_test = pad_sequences(x_test, maxlen=MAX_SEQUENCE_LENGTH, value=0)

print(X_train[:10])

[[   0    0    0 ...   15   17   12]
 [   0    0    0 ...  505   17   12]
 [   0    0    0 ...   11   17   12]
 ...
 [   0    0    0 ...  254   17   12]
 [   0    0    0 ... 2735   17   12]
 [   0    0    0 ... 4329   17   12]]


# Simple RNN

In [6]:
model = Sequential()
# Special dense layer that does word embeddings - auto creates idx mapping
# Length of our embeddings here is 10 - we feel 10 dimensions is sufficient to capture model
model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH)) 
# We specify that the RNN should have 25 hidden neurons; returns a vector of 25 at the end (summary)
model.add(SimpleRNN(25)) 
model.add(Dense(46)) # Inputs 25 and outputs 46 (the number of classes we have)
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()
model.fit(X_train, to_categorical(y_train), epochs=10, validation_split=0.05)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 200, 10)           100000    
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 25)                900       
_________________________________________________________________
dense_1 (Dense)              (None, 46)                1196      
_________________________________________________________________
softmax_1 (Softmax)          (None, 46)                0         
Total params: 102,096
Trainable params: 102,096
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 48.72%


# Bidirectional RNN

In [7]:
model = Sequential()
model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH))
model.add(Bidirectional(SimpleRNN(25), merge_mode='ave'))
model.add(Dense(46))
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()
model.fit(X_train, to_categorical(y_train), epochs=5, validation_split=0.05)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 200, 10)           100000    
_________________________________________________________________
bidirectional (Bidirectional (None, 25)                1800      
_________________________________________________________________
dense_2 (Dense)              (None, 46)                1196      
_________________________________________________________________
softmax_2 (Softmax)          (None, 46)                0         
Total params: 102,996
Trainable params: 102,996
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 41.40%


# Simple RNN with averaging

In [8]:
model = Sequential()
model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH))
# Instead of returning the summary vector - we ask RNN to return vectors at each RNN unit
model.add(SimpleRNN(25, return_sequences=True))
# We ask the 25 output vectors to be averaged
model.add(GlobalAveragePooling1D())
model.add(Dense(46))
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()
model.fit(X_train, to_categorical(y_train), epochs=5, validation_split=0.05)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 200, 10)           100000    
_________________________________________________________________
simple_rnn_3 (SimpleRNN)     (None, 200, 25)           900       
_________________________________________________________________
global_average_pooling1d (Gl (None, 25)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 46)                1196      
_________________________________________________________________
softmax_3 (Softmax)          (None, 46)                0         
Total params: 102,096
Trainable params: 102,096
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 51.68%


# Multilayer Bidirectional RNN

In [9]:
model = Sequential()
model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH))
# We ask keras to 'merge' (average) the vectors of both directions to send it to the next layer
model.add(Bidirectional(LSTM(25, return_sequences=True), merge_mode='ave'))
# The final bidirectional layer only needs the summary vector
model.add(Bidirectional(LSTM(25), merge_mode='ave'))
model.add(Dense(46))
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()
model.fit(X_train, to_categorical(y_train), epochs=5, validation_split=0.05)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 200, 10)           100000    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 200, 25)           7200      
_________________________________________________________________
bidirectional_2 (Bidirection (None, 25)                10200     
_________________________________________________________________
dense_4 (Dense)              (None, 46)                1196      
_________________________________________________________________
softmax_4 (Softmax)          (None, 46)                0         
Total params: 118,596
Trainable params: 118,596
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 41.40%
