## Loading the Data

In [1]:
from tensorflow.keras.datasets import imdb

In [2]:
vocab_size = 8000

In [3]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [4]:
print(type(X_train))
print(type(X_train[5]))
print(X_train[5])

<class 'numpy.ndarray'>
<class 'list'>
[1, 778, 128, 74, 12, 630, 163, 15, 4, 1766, 7982, 1051, 2, 32, 85, 156, 45, 40, 148, 139, 121, 664, 665, 10, 10, 1361, 173, 4, 749, 2, 16, 3804, 8, 4, 226, 65, 12, 43, 127, 24, 2, 10, 10]


In [5]:
maxlen = 200

### Staging and pre-processing our data

In [6]:
from tensorflow.keras import preprocessing

In [7]:
X_train = preprocessing.sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = preprocessing.sequence.pad_sequences(X_test, maxlen=maxlen)

In [8]:
print(X_train[5])

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    1  778  128   74   12  630  163   15    4 1766 7982
 1051    2   32   85  156   45   40  148  139  121  664  665   10   10
 1361  173    4  749    2   16 3804    8    4  226   65   12   43  127
   24 

### Build and train RNN Model for Sentiment Classification

In [None]:
import numpy as np
import tensorflow as tf

np.random.seed(42)
tf.random.set_seed(42)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Flatten, Dense, Embedding, SpatialDropout1D, Dropout

In [None]:
model_rnn = Sequential()

In [None]:
model_rnn.add(Embedding(vocab_size, output_dim=32))
model_rnn.add(SpatialDropout1D(0.4))

In [None]:
model_rnn.add(SimpleRNN(32))

In [None]:
model_rnn.add(Dropout(0.4))

In [None]:
model_rnn.add(Dense(1, activation='sigmoid'))

In [None]:
model_rnn.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model_rnn.summary()

In [None]:
history_rnn = model_rnn.fit(X_train, y_train, batch_size=128, validation_split=0.2, epochs = 10)

In [None]:
#y_test_pred = model_rnn.predict_classes(X_test)
y_test_pred = (model_rnn.predict(X_test) > 0.5).astype("int32")
y_test_pred

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
print(accuracy_score(y_test, y_test_pred))

### Making Predictions on Unseen Data

In [None]:
inp_review = "An excellent movie!"

In [None]:
from tensorflow.keras.preprocessing.text import text_to_word_sequence

In [None]:
text_to_word_sequence(inp_review)

In [None]:
word_map = imdb.get_word_index()

In [None]:
vocab_map = dict(sorted(word_map.items(), key=lambda x: x[1])[:vocab_size])

In [None]:
def preprocess(review):
    inp_tokens = text_to_word_sequence(review)
    seq = []
    for token in inp_tokens:
        seq.append(vocab_map.get(token))
    return seq

In [None]:
preprocess(inp_review)

In [None]:
#model_rnn.predict_classes([preprocess(inp_review)])
(model_rnn.predict([preprocess(inp_review)])>0.5).astype("int32")

In [None]:
inp_review = "Don't watch this movie - poor acting, poor script, bad direction."

In [None]:
(model_rnn.predict([preprocess(inp_review)])>0.5).astype("int32")

### LSTM-Based Sentiment Classification Model

In [None]:
from tensorflow.keras.layers import LSTM

In [None]:
model_lstm = Sequential()
model_lstm.add(Embedding(vocab_size, output_dim=32))
model_lstm.add(SpatialDropout1D(0.4))

In [None]:
model_lstm.add(LSTM(32))

In [None]:
model_lstm.add(Dropout(0.4))
model_lstm.add(Dense(1, activation='sigmoid'))

model_lstm.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model_lstm.summary()

In [None]:
history_lstm = model_lstm.fit(X_train, y_train, batch_size=128, validation_split=0.2, epochs=5)

In [None]:
#y_test_pred = model_lstm.predict_classes(X_test)
y_test_pred = (model_lstm.predict(X_test) > 0.5).astype("int32")
y_test_pred

In [None]:
print(accuracy_score(y_test, y_test_pred))

### GRU-Based Sentiment Classification Model

In [None]:
from tensorflow.keras.layers import GRU

In [None]:
model_gru = Sequential()
model_gru.add(Embedding(vocab_size, output_dim=32))
model_gru.add(SpatialDropout1D(0.4))

In [None]:
model_gru.add(GRU(32, reset_after=False))

In [None]:
model_gru.add(Dropout(0.4))
model_gru.add(Dense(1, activation='sigmoid'))

model_gru.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model_gru.summary()

In [None]:
history_gru = model_gru.fit(X_train, y_train, batch_size=128, validation_split=0.2, epochs = 4)

In [None]:
#y_test_pred = model_gru.predict_classes(X_test)
y_test_pred = (model_gru.predict(X_test) > 0.5).astype("int32")
y_test_pred

In [None]:
accuracy_score(y_test, y_test_pred)

### Bi-directional LSTM-Based Sentiment Classification Model

In [None]:
from tensorflow.keras.layers import Bidirectional

In [None]:
model_bilstm = Sequential()
model_bilstm.add(Embedding(vocab_size, output_dim=32))
model_bilstm.add(SpatialDropout1D(0.4))

In [None]:
model_bilstm.add(Bidirectional(LSTM(32)))

In [None]:
model_bilstm.add(Dropout(0.4))
model_bilstm.add(Dense(1, activation='sigmoid'))

model_bilstm.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model_bilstm.summary()

In [None]:
history_bilstm = model_bilstm.fit(X_train, y_train, batch_size=128, validation_split=0.2, epochs = 4)

In [None]:
#y_test_pred = model_bilstm.predict_classes(X_test)
y_test_pred = (model_bilstm.predict(X_test) > 0.5).astype("int32")
y_test_pred

In [None]:
accuracy_score(y_test, y_test_pred)

### Stacked LSTM-based Sentiment Classification Model

In [None]:
model_stack = Sequential()
model_stack.add(Embedding(vocab_size, output_dim=32))
model_stack.add(SpatialDropout1D(0.4))

In [None]:
#LSTM Layer 1 - return_sequences is True
model_stack.add(LSTM(32, return_sequences=True))

In [None]:
#LSTM Layer 2 - return_sequences is False
model_stack.add(LSTM(32, return_sequences=False))

In [None]:
model_stack.add(Dropout(0.5))
model_stack.add(Dense(1, activation='sigmoid'))

model_stack.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model_stack.summary()

In [None]:
history_stack = model_stack.fit(X_train, y_train, batch_size=128, validation_split=0.2, epochs = 4)

In [None]:
#y_test_pred = model_stack.predict_classes(X_test)
y_test_pred = (model_stack.predict(X_test) > 0.5).astype("int32")
y_test_pred

In [None]:
accuracy_score(y_test, y_test_pred)