In [None]:
from tensorflow.keras.layers import SimpleRNN, LSTM, GRU, Bidirectional, Dense, Embedding
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np


In [None]:
# getting reviews with words that come under 5000
# most occuring words in the entire
# corpus of textual review data
vocal_size = 5000
(x_train , Y_train) , (x_test , y_test) = imdb.load_data(num_words= vocal_size)

print(x_train[0])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 2, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25

In [None]:
# getting all the words from word_index dictionary
word_idx = imdb.get_word_index()
# originally the index number of  a value and not a key
# hence converting the index as key and the words as values
word_idx={i:word for word,i in word_idx.items()}
# again printing the review
print({word_idx[i] for i in x_train[0]})
#

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
{'story', 'never', 'it', 'that', 'mean', 'atmosphere', 'character', 'unfortunately', 'when', 'why', 'he', 'thought', 'out', 'seen', 'or', 'journalist', 'half', 'should', 'script', 'from', 'while', 'to', 'watch', 'camp', 'going', 'sometimes', 'of', 'will', 'serious', 'lets', 'with', 'room', 'armed', 'itself', 'except', 'two', 'most', '1', 'than', 'whether', 'powerful', 'loves', 'lot', 'was', 'name', 'is', 'nobody', 'film', "isn't", 'critics', 'shadows', 'very', 'history', 'get', 'their', 'moments', 'him', 'chest', 'seeing', 'boat', 'scary', 'more', 'she', 'becomes', 'after', 'so', 'not', 'heart', 'then', 'help', 'acting', 'bit', 'and', 'now', 'here', 'other', 'reaching', 'you', 'enough', 'for', 'but', 'her', '70s', 'several', 'any', 'shows', 'in', 'odd', 'wonderful', 'have', 'current', 'anyone', 'movie',

In [None]:
# get the minimum and the maximum length of review
print("max length of a review ::",len(max((x_train + x_test) , key=len)))
print("min length of a review ::",len(min((x_train + x_test) , key=len)))

max length of a review :: 2697
min length of a review :: 70


In [None]:
from tensorflow.keras.preprocessing import sequence
## keeping a fixed length of all review to max 400 words
max_words=400
x_train=sequence.pad_sequences(x_train,maxlen=max_words)
x_test=sequence.pad_sequences(x_test,maxlen=max_words)
x_valid,Y_valid=x_train[:5000],Y_train[:5000]
x_train,Y_train=x_train[5000:],Y_train[5000:]


In [None]:
# Defining LSTM model
embd_len = 32
lstm_model = Sequential(name="LSTM_Model")
lstm_model.add(Embedding(vocal_size,
                         embd_len,
                         input_length=max_words))
lstm_model.add(LSTM(128,
                    activation='tanh',
                    return_sequences=False))
lstm_model.add(Dense(1, activation='sigmoid'))

# Printing Model Summary
print(lstm_model.summary())

# Compiling the model
lstm_model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Training the model
history = lstm_model.fit(
    x_train,
    Y_train,
    batch_size=64,
    epochs=5,
    verbose=2,
    validation_data=(x_valid, Y_valid)
)



None
Epoch 1/5
