# Imports

In [47]:
from keras.datasets import reuters
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Softmax, Dropout
from keras.layers import SimpleRNN, LSTM, Embedding, Bidirectional, GlobalAveragePooling1D
from keras.utils import to_categorical
from keras import regularizers

import numpy as np

## Constants

In [48]:
MAX_SEQUENCE_LENGTH = 200
VOCAB_SIZE = 10000

# Data loading
In this exercise, we will use a smaller dataset that has been preprocessing already by the Keras folks

In [49]:
(x_train, y_train), (x_test, y_test) = reuters.load_data(path="reuters.npz",
                                                         num_words=VOCAB_SIZE,
                                                         skip_top=0,
                                                         maxlen=MAX_SEQUENCE_LENGTH,
                                                         test_split=0.3,
                                                         seed=501,
                                                         start_char=1,
                                                         oov_char=2,
                                                         index_from=3)

In [61]:
print(y_train.shape)
print('Highest class number: %d' %(y_train.max()))
print('Lowest class number: %d' %(y_train.min()))
print(y_train[:100])

print(x_train.shape)
print(x_train[:2])
print(len(x_train[0]))

(6192,)
Highest class number: 45
Lowest class number: 0
[ 3  4  3  1  3  4  1  3 11 11  3 27  3  3  3  0  3  3  4  3  3  4  3  4
  3  3  1  4  3 25 25  3  4  3 19  3  3  3  3  3  3  4  4  4  3  6 20  3
  3  3  3  3  3  4  1  4  4  3  3 32  3  4  3 16  4  3  4  4  3  4  2  3
 10  4  3  4  3  4  4  4 18  3  4  3  4  3  4  4  3  4  3  1  3  3  4  3
  3 16  3 15]
(6192,)
[list([1, 7002, 3595, 71, 8, 88, 5, 25, 3192, 374, 2, 267, 7, 4, 37, 38, 333, 7, 48, 27, 47, 47, 20, 22, 4, 375, 29, 1178, 88, 62, 47, 32, 35, 15, 84, 22, 47, 70, 35, 15, 7, 105, 4, 49, 8, 290, 4, 49, 8, 39, 338, 34, 2115, 72, 11, 15, 109, 20, 420, 105, 7002, 8, 25, 374, 201, 439, 2, 81, 1755, 4424, 71, 2, 2, 71, 2, 81, 2, 9, 2, 81, 2, 2, 71, 9, 2, 873, 111, 7002, 8, 2, 25, 1134, 1727, 49, 257, 39, 338, 5, 928, 47, 11, 15, 84, 353, 20, 5, 105, 16, 75, 8, 1755, 25, 4254, 9, 7065, 49, 1543, 39, 338, 5, 440, 11, 15, 10, 653, 20, 148, 117, 105, 17, 12])
 list([1, 342, 733, 3612, 571, 8, 16, 1954, 1584, 291, 392, 149, 10, 323, 

# Data preparation for learning

In [51]:
X_train = pad_sequences(x_train, maxlen=MAX_SEQUENCE_LENGTH, value=0)
X_test = pad_sequences(x_test, maxlen=MAX_SEQUENCE_LENGTH, value=0)
print(X_train[100])

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    1    2  232  427  580    2    9    4   37
   38 8510    2   56    2  651  103  701   10  119   29  562   13    4
  899    5 6725  659  249 7956    4  179 9537  497  580    8 2313   33
  624  225 1058    9  154  117  109  206    6 1133   47   44   35 1396
 4165 2123   31  376   20    5    4  249 2250    7 4992 1094    6   10
 2230   28 2680 2412 2184 9537    8 9537    8    4 1958 2683   33 5004
    4 2762  800    5 7956    9    4  881    5    2    9  263  172  485
    4  169    9    4  234   76    4  120 1240  177    5  676  164 8510
 1468  427   81    9    2  117    4   73 7956  498    4  332 1294   42
  562    7  429   48   50 2601   10  295 3825 1109  164    2    9 8510
  323   56    2  249  111   92    4   37   38  103    2 1669  766    5
 4992 

# Dense network model

In [75]:
model = Sequential()
model.add(Dense(100, input_shape=(MAX_SEQUENCE_LENGTH,), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(200, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(200, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(46, activation='relu'))
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

model.fit(X_train, to_categorical(y_train), epochs=10, validation_split=0.1)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Train on 5572 samples, validate on 620 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 41.07%


# Vanilla RNN model

In [77]:
model = Sequential()

# ... build your model here
# You can choose to use a feed forward network with bag of words, 
# or a recurrent neural network with the `Embedding` layer.
# Try different number of layers, different layer sizes, activations,
# different combinations of RNN's and Dense, RNN vs LSTM, Bidirectional etc
# Also feel free to change the number of epochs, vocab size, max sequence lengths etc.

model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH)) 
model.add(Dropout(0.2))
model.add(SimpleRNN(100)) 
model.add(Dropout(0.3))
model.add(Dense(46))
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

model.fit(X_train, to_categorical(y_train), epochs=10, validation_split=0.1)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Train on 5572 samples, validate on 620 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 41.07%


# Bidirectional RNN

In [63]:
model = Sequential()
model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH))
model.add(Dropout(0.2))
model.add(Bidirectional(SimpleRNN(50), merge_mode='ave'))
model.add(Dropout(0.3))
model.add(Dense(46))
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

model.fit(X_train, to_categorical(y_train), epochs=10, validation_split=0.1)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Train on 5572 samples, validate on 620 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 58.74%


# Multilayer Bidirectional LSTM

In [43]:
model = Sequential()
model.add(Embedding(VOCAB_SIZE, 10, input_length=MAX_SEQUENCE_LENGTH))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(25, return_sequences=True), merge_mode='ave'))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(25), merge_mode='ave'))
model.add(Dropout(0.5))
model.add(Dense(46))
model.add(Softmax())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

model.fit(X_train, to_categorical(y_train), epochs=5, validation_split=0.1)
loss, acc = model.evaluate(X_test, to_categorical(y_test))
print("Test accuracy: %0.2f%%"%(acc*100))

Train on 5572 samples, validate on 620 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 49.66%


# Writeup
Write a brief paragraph describing your choices for the architecture, as well as your training regime (# epochs, sequence length etc.m)

##### WRITEUP goes here (Double click here if this field is not editable) ###

# Testing models...

Fully connected model:         41.07 (1s/epoch),  41.07 (w/do), no more improvement
Vanilla RNN model:             48.27 (12s/epoch), 56.29 (w/do), 60.47 (w/RNN-50)
Bidirectional model RNN:       58.25 (12s/epoch), 58.25 (w/do), 58.74 (w/RNN-50)
Multilayer Bidirectional LSTM: 55.84 (80s/epoch), 49.66 (w/do), 57.65 (w/LSTM-50) 61.34

Each of the models were tried in their basic form , then with dropout layers, then with larger RNN layers and then combinations of these two.

The fully connected model appears to be getting stuck in a local optimum.

The vanilla RNN model, though appearing to get the best scores, has a loss function that oscillates and doesn't converge and maybe overfitting.

Both the Bidirectional RNN with a single and two layers appear to be more consistent and not overfitting, but their improvements are very incremental.

The last entry in the two layer bidi model was run on Google Collab with 50 neuron LSTMs for 10 epochs, which goes towards the optimum very slowly (minor improvements in loss in each epoch) and returns the best result so far!

Once you are done with your model and the writeup:
    
1: Save the notebook by selecting `File > Save and Checkpoint`
    
2: Export the notebook as HTML `File > Download as > HTML (.html)`
    
3: Upload both the notebook and html file to Google classroom