In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import pad_sequences 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Embedding, LSTM, SimpleRNN, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score 

In [None]:
num_words = 10000

In [18]:
# Manually load the dataset using numpy to avoid the path error in Keras
with np.load('Dataset/imdb.npz', allow_pickle=True) as f:
    X_train, y_train = f['x_train'], f['y_train']
    X_test, y_test = f['x_test'], f['y_test']

# Limit the vocabulary size (replace words >= num_words with OOV char 2)
X_train = np.array([[w if w < num_words else 2 for w in seq] for seq in X_train], dtype=object)
X_test = np.array([[w if w < num_words else 2 for w in seq] for seq in X_test], dtype=object)

In [19]:
X_train[0][:20]

[2,
 309,
 6,
 3,
 1069,
 209,
 9,
 2175,
 30,
 1,
 169,
 55,
 14,
 46,
 82,
 5869,
 41,
 393,
 110,
 138]

In [20]:
y_train[0]

np.int64(1)

In [21]:
maxlen = 200
X_train_padded = pad_sequences(X_train, maxlen=maxlen, padding='pre', truncating='pre')
X_test_padded = pad_sequences(X_test, maxlen=maxlen, padding='pre', truncating='pre')

In [22]:
X_train_padded.shape

(25000, 200)

In [23]:
X_test_padded.shape

(25000, 200)

# LSTM

In [24]:
embedding_dim = 64
model = Sequential()
model.add(Embedding(input_dim=num_words,output_dim=embedding_dim))
#RNN Layer
model.add(LSTM(64, return_sequences=False))
#Droput
model.add(Dropout(0.5))
#Optional Regularization
model.add(Dense(1,activation='sigmoid'))

In [25]:
model.compile(optimizer=Adam(learning_rate=0.001),loss='binary_crossentropy',metrics=['accuracy'])

In [26]:
model.summary()

In [27]:
history = model.fit(X_train_padded,y_train, epochs=5, batch_size=128, validation_split=0.2, verbose=1)

Epoch 1/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 132ms/step - accuracy: 0.7339 - loss: 0.5222 - val_accuracy: 0.8116 - val_loss: 0.5244
Epoch 2/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 128ms/step - accuracy: 0.8834 - loss: 0.2921 - val_accuracy: 0.7554 - val_loss: 0.5997
Epoch 3/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 121ms/step - accuracy: 0.9251 - loss: 0.2056 - val_accuracy: 0.8416 - val_loss: 0.3841
Epoch 4/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 123ms/step - accuracy: 0.9410 - loss: 0.1633 - val_accuracy: 0.7530 - val_loss: 0.7077
Epoch 5/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 121ms/step - accuracy: 0.9546 - loss: 0.1314 - val_accuracy: 0.8710 - val_loss: 0.3189
