# Design Choices in Recurrent Neural Networks

###  Importing packages

In [19]:
import re
import numpy as np
import pandas as pd 

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from keras.datasets import imdb

from keras.utils.np_utils import to_categorical

import warnings
warnings.filterwarnings('ignore')
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

### Preparing Dataset

In [20]:
max_features = 1000
maxlen = 80  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

# save np.load
#np_load_old = np.load

# modify the default parameters of np.load
#np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

#np.load = np_load_old

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 80)
x_test shape: (25000, 80)


## Part 1: Influence of number of nodes

Try the models with different number of nodes such as 32, 64, 128 etc.

Analyze the number of model parameters, accuracy and training time

### LSTM with 8 nodes

In [21]:
model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_12 (Embedding)     (None, None, 8)           8000      
_________________________________________________________________
lstm_17 (LSTM)               (None, 8)                 544       
_________________________________________________________________
dense_12 (Dense)             (None, 1)                 9         
Total params: 8,553
Trainable params: 8,553
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.38689520955085754
Test accuracy: 0.8228399753570557


### LSTM with 16 nodes

In [6]:
model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(16, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 8)           8000      
_________________________________________________________________
lstm_2 (LSTM)                (None, 16)                1600      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 17        
Total params: 9,617
Trainable params: 9,617
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.39285382628440857
Test accuracy: 0.8189200162887573


### LSTM with 32 nodes

In [7]:
# Write your code here. Use the same architecture as above. 

model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(32, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, None, 8)           8000      
_________________________________________________________________
lstm_3 (LSTM)                (None, 32)                5248      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 33        
Total params: 13,281
Trainable params: 13,281
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.3910311460494995
Test accuracy: 0.8220000267028809


### LSTM with 64 nodes

In [8]:
model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(64, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, None, 8)           8000      
_________________________________________________________________
lstm_4 (LSTM)                (None, 64)                18688     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 65        
Total params: 26,753
Trainable params: 26,753
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.3859326243400574
Test accuracy: 0.8208799958229065


### LSTM with 128 nodes





In [10]:
model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(128, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, None, 8)           8000      
_________________________________________________________________
lstm_9 (LSTM)                (None, 128)               70144     
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 129       
Total params: 78,273
Trainable params: 78,273
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.3855207860469818
Test accuracy: 0.8190799951553345


### Write your findings about number of nodes here?

1.   Finding 1
2.   Finding 2




```
LSTM with 8 nodes
loss: 0.3701 - accuracy: 0.8337 - val_loss: 0.3869 - val_accuracy: 0.8228
Test score: 0.38689520955085754
Test accuracy: 0.8228399753570557


LSTM with 16 nodes
loss: 0.3724 - accuracy: 0.8332 - val_loss: 0.3929 - val_accuracy: 0.8189
Test score: 0.39285382628440857
Test accuracy: 0.8189200162887573


LSTM with 32 nodes
loss: 0.3740 - accuracy: 0.8330 - val_loss: 0.3910 - val_accuracy: 0.8220
Test score: 0.3910311460494995
Test accuracy: 0.8220000267028809


LSTM with 64 nodes
loss: 0.3754 - accuracy: 0.8297 - val_loss: 0.3859 - val_accuracy: 0.8209
Test score: 0.3859326243400574
Test accuracy: 0.8208799958229065


LSTM with 128 nodes
loss: 0.3640 - accuracy: 0.8402 - val_loss: 0.3855 - val_accuracy: 0.8191
Test score: 0.3855207860469818
Test accuracy: 0.8190799951553345

```



## Part 2: Influence of Embedding

[here](https://keras.io/api/layers/core_layers/embedding/)


In [22]:
model = Sequential()
model.add(Embedding(max_features, 4))
model.add(LSTM(16, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_13 (Embedding)     (None, None, 4)           4000      
_________________________________________________________________
lstm_18 (LSTM)               (None, 16)                1344      
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 17        
Total params: 5,361
Trainable params: 5,361
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.39734503626823425
Test accuracy: 0.8196399807929993


In [12]:
# Write your code here. Use the same architecture as above. 

model = Sequential()
model.add(Embedding(max_features, 16))
model.add(LSTM(16, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

In [13]:
# Write your code here. Use the same architecture as above. 

model = Sequential()
model.add(Embedding(max_features, 32))
model.add(LSTM(16, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

### Write your findings about number of embedding features here?

1.   Finding 1
2.   Finding 2


## Part 3: Influence of Dropout

Try the models with different rates of dropout from 0 to 1

Analyze the number of model parameters, accuracy and training time

### Dropout with rate 0.4

In [23]:
model = Sequential()
model.add(Embedding(max_features, 32))
model.add(LSTM(8, dropout=0.4, recurrent_dropout=0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_14 (Embedding)     (None, None, 32)          32000     
_________________________________________________________________
lstm_19 (LSTM)               (None, 8)                 1312      
_________________________________________________________________
dense_14 (Dense)             (None, 1)                 9         
Total params: 33,321
Trainable params: 33,321
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.399381160736084
Test accuracy: 0.8238400220870972


### Dropout with rate 0.5

In [24]:
# Write your code here. Use the same architecture as above. 

model = Sequential()
model.add(Embedding(max_features, 32))
model.add(LSTM(8, dropout=0.5, recurrent_dropout=0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_15 (Embedding)     (None, None, 32)          32000     
_________________________________________________________________
lstm_20 (LSTM)               (None, 8)                 1312      
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 9         
Total params: 33,321
Trainable params: 33,321
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.39307069778442383
Test accuracy: 0.8172799944877625


### Dropout with rate 0.7

In [25]:
# Write your code here. Use the same architecture as above. 
model = Sequential()
model.add(Embedding(max_features, 32))
model.add(LSTM(8, dropout=0.7, recurrent_dropout=0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)


Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_16 (Embedding)     (None, None, 32)          32000     
_________________________________________________________________
lstm_21 (LSTM)               (None, 8)                 1312      
_________________________________________________________________
dense_16 (Dense)             (None, 1)                 9         
Total params: 33,321
Trainable params: 33,321
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.39616283774375916
Test accuracy: 0.8184800148010254


### Write your findings about influence of dropout rate here?

1.   Finding 1
2.   Finding 2



```
Dropout with rate 0.4
Test score: 0.399381160736084
Test accuracy: 0.8238400220870972

Dropout with rate 0.5
Test score: 0.39307069778442383
Test accuracy: 0.8172799944877625

Dropout with rate 0.7
Test score: 0.39616283774375916
Test accuracy: 0.8184800148010254

```




## Part 4: Multilayered RNNs

Try the models with different number of layers from smaller to larger.

Analyze the number of model parameters, accuracy and training time

### RNN with 2 layer LSTM

In [26]:
model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_17 (Embedding)     (None, None, 8)           8000      
_________________________________________________________________
lstm_22 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_23 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_24 (LSTM)               (None, 8)                 544       
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 9         
Total params: 9,641
Trainable params: 9,641
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.3961097002029419
Test accuracy: 0.8184800148010254


### RNN with 3 layer LSTM

In [27]:
# Write your code here. Use the same architecture as above. 

model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_18 (Embedding)     (None, None, 8)           8000      
_________________________________________________________________
lstm_25 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_26 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_27 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_28 (LSTM)               (None, 8)                 544       
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 9         
Total params: 10,185
Trainable params: 10,185
Non-trainable params: 0
_________________________________________________

In [28]:
# Write your code here. Use the same architecture as above. 

model = Sequential()
model.add(Embedding(max_features, 8))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
model.add(LSTM(8, dropout=0.0, recurrent_dropout=0.0))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test))

score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_19 (Embedding)     (None, None, 8)           8000      
_________________________________________________________________
lstm_29 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_30 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_31 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_32 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_33 (LSTM)               (None, None, 8)           544       
_________________________________________________________________
lstm_34 (LSTM)               (None, None, 8)         

### Write your findings about number of layers here?

1.   Finding 1
2.   Finding 2


```
RNN with 2 layer LSTM
Total params: 9,641
Test score: 0.3961097002029419
Test accuracy: 0.8184800148010254

RNN with 3 layer LSTM
Total params: 10,185
Test score: 0.3919239342212677
Test accuracy: 0.8216000199317932

RNN with 7 layer LSTM
Total params: 12,361
Test score: 0.4006102979183197
Test accuracy: 0.8149600028991699

```


