## Recurrent Neural Networks

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
import numpy as np

## Loading IMDB

In [2]:
from tensorflow.keras.datasets import imdb

(X_train,y_train), (X_test,y_test) = imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
# Get IMDB dictionary
word_idx_dict = imdb.get_word_index()

#print(word_idx_dict.keys())
#print(word_idx_dict.values())

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


## Data preprocessing: Padding

In [4]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [5]:
print(len(X_train[0]))

218


In [6]:
max_len = 256
X_train_pad = pad_sequences(X_train,value=0, padding='post', maxlen=max_len)

In [7]:
print(X_train_pad.shape)
print(len(X_train_pad[0]))
print(X_train_pad[0].shape)
print(X_train_pad[0])

(25000, 256)
256
(256,)
[   1   14   22   16   43  530  973 1622 1385   65  458 4468   66 3941
    4  173   36  256    5   25  100   43  838  112   50  670    2    9
   35  480  284    5  150    4  172  112  167    2  336  385   39    4
  172 4536 1111   17  546   38   13  447    4  192   50   16    6  147
 2025   19   14   22    4 1920 4613  469    4   22   71   87   12   16
   43  530   38   76   15   13 1247    4   22   17  515   17   12   16
  626   18    2    5   62  386   12    8  316    8  106    5    4 2223
 5244   16  480   66 3785   33    4  130   12   16   38  619    5   25
  124   51   36  135   48   25 1415   33    6   22   12  215   28   77
   52    5   14  407   16   82    2    8    4  107  117 5952   15  256
    4    2    7 3766    5  723   36   71   43  530  476   26  400  317
   46    7    4    2 1029   13  104   88    4  381   15  297   98   32
 2071   56   26  141    6  194 7486   18    4  226   22   21  134  476
   26  480    5  144   30 5535   18   51   36   28  2

In [8]:
#word_idx_dict_new['<PAD>']=0
#word_idx_dict_new

In [9]:
#print(decode_review(X_train_pad[0]))

In [10]:
X_test_pad = pad_sequences(X_test, value=0, padding='post',maxlen=max_len)

In [11]:
print(X_train_pad.shape)
print(X_test_pad.shape)

(25000, 256)
(25000, 256)


## Simple RNN

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import SimpleRNN

basic_rnn = Sequential()
basic_rnn.add(Embedding(input_dim=10000, output_dim=100, input_length=256))
basic_rnn.add(SimpleRNN(128))
basic_rnn.add(Dense(1,activation='sigmoid'))

In [13]:
# validation dataset
X_train_pad, X_val_pad = X_train_pad[:20000], X_train_pad[20000:]
y_train, y_val = y_train[:20000], y_train[20000:]

In [14]:
basic_rnn.compile(optimizer='adam',loss='binary_crossentropy',metrics='acc')
#history_basic_rnn = basic_rnn.fit(X_train_pad,y_train,
#                                  validation_data=(X_val_pad,y_val),
#                                  epochs=10)
history_basic_rnn = basic_rnn.fit(X_train_pad,y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
test_performance = basic_rnn.evaluate(X_test_pad,y_test)
print(test_performance)

[0.7127254009246826, 0.5246400237083435]


## LSTM

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM

LSTM_model = Sequential()
LSTM_model.add(Embedding(input_dim=10000,output_dim=100,input_length=256))
LSTM_model.add(LSTM(128))
LSTM_model.add(Dense(1,activation='sigmoid'))
LSTM_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 256, 100)          1000000   
                                                                 
 lstm (LSTM)                 (None, 128)               117248    
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1,117,377
Trainable params: 1,117,377
Non-trainable params: 0
_________________________________________________________________


In [None]:
LSTM_model.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics='acc')

LSTM_model.fit(X_train_pad,y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x14915a5ef70>

In [None]:
#test_performance = LSTM_model.evaluate(X_test_pad,y_test)[1]
test_performance = LSTM_model.evaluate(X_test_pad,y_test)
print(test_performance)

[0.42544546723365784, 0.8652799725532532]
