In [28]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [29]:
plt.rcParams['font.size'] = 14
plt.rcParams['axes.grid'] = True

In [30]:
from tensorflow.keras.datasets import imdb

In [31]:
vocab_size = 10000
max_words = 500
(X_train, y_train), (X_test, y_test) = imdb.load_data(path='imdb.npz', 
                                                      num_words=vocab_size, 
                                                      skip_top=10, 
                                                      maxlen=max_words)

In [32]:
print("----review---")
print(X_train[10])

----review---
[2, 13, 119, 954, 189, 1554, 13, 92, 459, 48, 2, 116, 2, 1492, 2291, 42, 726, 2, 1939, 168, 2031, 13, 423, 14, 20, 549, 18, 2, 2, 547, 32, 2, 96, 39, 2, 454, 2, 2, 22, 2, 2, 55, 130, 168, 13, 92, 359, 2, 158, 1511, 2, 42, 2, 1913, 19, 194, 4455, 4121, 2, 114, 2, 72, 21, 465, 9667, 304, 2, 51, 2, 14, 20, 44, 155, 2, 2, 226, 162, 616, 651, 51, 2, 14, 20, 44, 10, 10, 14, 218, 4843, 629, 42, 3017, 21, 48, 25, 28, 35, 534, 2, 2, 320, 2, 516, 2, 42, 25, 181, 2, 130, 56, 547, 3571, 2, 1471, 851, 14, 2286]


In [33]:
print("----label----")
print(y_train[10])

----label----
0


In [34]:
word2id = imdb.get_word_index()

In [35]:
id2word = {i: word for word, i in word2id.items()}

In [36]:
print("------ review with words ------")
print([id2word.get(i,' ') for i in X_train[10]])

------ review with words ------
['and', 'was', 'did', 'hands', 'fact', 'absolute', 'was', 'then', 'laugh', 'what', 'and', 'love', 'and', 'notice', 'matters', "it's", 'similar', 'and', 'numerous', 'few', 'blind', 'was', "couldn't", 'as', 'on', 'type', 'but', 'and', 'and', 'slow', 'an', 'and', 'too', 'or', 'and', '5', 'and', 'and', 'you', 'and', 'and', 'time', 'here', 'few', 'was', 'then', 'kids', 'and', "didn't", 'billy', 'and', "it's", 'and', 'rubbish', 'film', 'thought', 'rotten', 'carl', 'and', 'little', 'and', 'we', 'not', 'seemed', 'paperhouse', 'beautiful', 'and', 'when', 'and', 'as', 'on', 'has', '10', 'and', 'and', 'script', 'actually', 'sad', 'happy', 'when', 'and', 'as', 'on', 'has', 'i', 'i', 'as', 'interesting', 'seeks', 'hero', "it's", 'nine', 'not', 'what', 'have', 'one', 'so', 'stories', 'and', 'and', 'star', 'and', 'car', 'and', "it's", 'have', 'pretty', 'and', 'here', 'she', 'slow', 'individuals', 'and', 'difference', 'note', 'as', 'sequels']


In [37]:
print("-------label-------")
print(y_train[10])

-------label-------
0


In [38]:
len(X_train[10])

117

In [39]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [40]:
X_train = pad_sequences(X_train, maxlen=max_words)
X_test = pad_sequences(X_test, maxlen=max_words)

In [41]:
X_train

array([[   0,    0,    0, ...,   19,  178,   32],
       [   0,    0,    0, ...,   16,  145,   95],
       [   0,    0,    0, ...,    2,  129,  113],
       ...,
       [   0,    0,    0, ...,   28, 1816,   98],
       [   0,    0,    0, ...,  158,   10,   10],
       [   0,    0,    0, ...,  220,  484,  867]], dtype=int32)

In [42]:
len(X_train[10])

500

In [43]:
X_train.shape

(25000, 500)

In [44]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_train.shape

(25000, 500, 1)

### Simple RNN

In [45]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, Bidirectional, Embedding

In [46]:
model = Sequential()
model.add(SimpleRNN(units=64, input_shape=(500,1)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [47]:
model.fit(X_train, y_train, epochs=5, batch_size=512, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f869177b828>

### Simple RNN with Embedding

In [48]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1]))
X_train.shape

(25000, 500)

In [49]:
model = Sequential()
n_feat = 32
model.add(Embedding(input_dim=vocab_size, output_dim=n_feat, trainable=True, input_length=max_words))
model.add(SimpleRNN(units=64))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [50]:
model.fit(X_train, y_train, epochs=5, batch_size=512, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f86868950f0>

### Deep RNN

In [51]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=n_feat, trainable=True, input_length=max_words))
model.add(SimpleRNN(units=64, return_sequences=True))
model.add(SimpleRNN(units=32))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, batch_size=512, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f8686a72dd8>

### Bi-Directional

In [52]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=n_feat, trainable=True, input_length=max_words))
model.add(Bidirectional(SimpleRNN(units=64)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, batch_size=512, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f86880170f0>

## LSTM

In [53]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=n_feat, trainable=True, input_length=max_words))
model.add(LSTM(units=64))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, batch_size=512, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f8687086208>

## Deep LSTM

In [54]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=n_feat, trainable=True, input_length=max_words))
model.add(Bidirectional(LSTM(units=64, return_sequences=True)))
model.add(LSTM(units=32, return_sequences=True))
model.add(LSTM(units=16, return_sequences=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, batch_size=512, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f86232be128>