# 11.5 Hands-On RNN Sentiment Classification

In [44]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.layers as layers
from tensorflow import losses

## 11.5.1 Dataset

In [57]:
batchsz = 128
total_words = 10000
max_review_len = 80
embedding_len = 100

(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
print(x_train.shape, len(x_train[0]), y_train.shape)
print(x_test.shape, len(x_test[0]), y_test.shape)

(25000,) 218 (25000,)
(25000,) 68 (25000,)


In [46]:
word_index = keras.datasets.imdb.get_word_index()
for k,v in word_index.items():
    print(k,v)

fawn 34701
tsukino 52006
nunnery 52007
sonja 16816
vani 63951
woods 1408
spiders 16115
hanging 2345
woody 2289
trawling 52008
hold's 52009
comically 11307
localized 40830
disobeying 30568
'royale 52010
harpo's 40831
canet 52011
aileen 19313
acurately 52012
diplomat's 52013
rickman 25242
arranged 6746
rumbustious 52014
familiarness 52015
spider' 52016
hahahah 68804
wood' 52017
transvestism 40833
hangin' 34702
bringing 2338
seamier 40834
wooded 34703
bravora 52018
grueling 16817
wooden 1636
wednesday 16818
'prix 52019
altagracia 34704
circuitry 52020
crotch 11585
busybody 57766
tart'n'tangy 52021
burgade 14129
thrace 52023
tom's 11038
snuggles 52025
francesco 29114
complainers 52027
templarios 52125
272 40835
273 52028
zaniacs 52130
275 34706
consenting 27631
snuggled 40836
inanimate 15492
uality 52030
bronte 11926
errors 4010
dialogs 3230
yomada's 52031
madman's 34707
dialoge 30585
usenet 52033
videodrome 40837
kid' 26338
pawed 52034
'girlfriend' 30569
'pleasure 52035
'reloaded' 52036
k

In [58]:
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3


In [59]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

decode_review(x_train[0])

"<START> in with i like horrible business chinese charm would killer waited which explosion ? going at fun ? film make like lame character has novel <UNK> a all final sense ? real ? find character nothing <UNK> second perhaps they ? find valuable cover this city an br overall ? horror has i ? should shop was in with ? delightful 00 despite ? with their people is i like horrible an well it br garbage ? with this genre this is i taken that <UNK> ? she sex is and house and after ? ? product bud i final which returned be ? does is i an annoying ? film where if at man it's film sent be ? with is comedy you than some ? in perfect i get <UNK> and ? think plot windows it fun ? <UNK> the lou ? sequence at their like horrible wanted on getting night just the ? <UNK> rich br any other ? couple it someone then he decade more on why ? can't ajay that ? family with for still wanted on final ? such his lindsay that if at you interesting how film any ? family would i an g other is i once ? i boot seen

In [60]:
decode_review(x_train[0])

"<START> in with i like horrible business chinese charm would killer waited which explosion ? going at fun ? film make like lame character has novel <UNK> a all final sense ? real ? find character nothing <UNK> second perhaps they ? find valuable cover this city an br overall ? horror has i ? should shop was in with ? delightful 00 despite ? with their people is i like horrible an well it br garbage ? with this genre this is i taken that <UNK> ? she sex is and house and after ? ? product bud i final which returned be ? does is i an annoying ? film where if at man it's film sent be ? with is comedy you than some ? in perfect i get <UNK> and ? think plot windows it fun ? <UNK> the lou ? sequence at their like horrible wanted on getting night just the ? <UNK> rich br any other ? couple it someone then he decade more on why ? can't ajay that ? family with for still wanted on final ? such his lindsay that if at you interesting how film any ? family would i an g other is i once ? i boot seen

In [65]:

x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
x_train.shape, x_test.shape


((25000, 100), (25000, 100))

In [67]:

db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)

db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)

print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)

# change shape to (batchsz, max_review_len)

db_train = db_train.map(lambda x, y: (tf.reshape(x, (-1, max_review_len)), y))
db_test = db_test.map(lambda x, y: (tf.reshape(x, (-1, max_review_len)), y))
print('db_train shape:', next(iter(db_train))[0].shape)
print('db_test shape:', next(iter(db_test))[0].shape)

x_train shape: (25000, 100) tf.Tensor(1, shape=(), dtype=int64) tf.Tensor(0, shape=(), dtype=int64)
x_test shape: (25000, 100)
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
db_train shape: (32, 100)
db_test shape: (32, 100)


## 11.5.2 Network Model

In [68]:
class MyRNN(keras.Model):
    def __init__(self, units):
        super(MyRNN, self).__init__()
        self.state0 = [tf.zeros([batchsz, units])]
        self.state1 = [tf.zeros([batchsz, units])]
        self.embedding = layers.Embedding(total_words, embedding_len, input_length=max_review_len)
        self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.5)
        self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.5)
        self.outlayer = layers.Dense(1)
    
    def call(self, inputs, training=None):
        x = inputs
        x = self.embedding(x)
        state0 = self.state0
        state1 = self.state1
        for word in tf.unstack(x, axis=1):
            out0, state0 = self.rnn_cell0(word, state0, training=training)
            out1, state1 = self.rnn_cell1(out0, state1, training=training)
        x = self.outlayer(out1, training=training)
        prob = tf.sigmoid(x)
        return prob

## 11.5.3 Training and Testing

In [77]:
max_review_len = 100
total_words = 10000
embedding_len = 32
batchsz = 32

inputs = keras.Input(shape=(max_review_len), dtype=tf.int32)

model = MyRNN(units=64)
outputs = model(inputs)

# compile and fit the model
model.compile(optimizer=keras.optimizers.Adam(0.001),
                loss=losses.BinaryCrossentropy(),
                metrics=['accuracy'])

model.fit(db_train, epochs=20, validation_data=db_test)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1b9cec7c6d0>