In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.datasets import imdb
from tensorflow.keras.layers import Embedding, Dense, SimpleRNN, Input



In [2]:
## Load Dataset
max_features = 10000  #Vocabulary size = 10000

(X_train,y_train), (X_train, y_train) = imdb.load_data(num_words = max_features)

X_train[0]


[1,
 591,
 202,
 14,
 31,
 6,
 717,
 10,
 10,
 2,
 2,
 5,
 4,
 360,
 7,
 4,
 177,
 5760,
 394,
 354,
 4,
 123,
 9,
 1035,
 1035,
 1035,
 10,
 10,
 13,
 92,
 124,
 89,
 488,
 7944,
 100,
 28,
 1668,
 14,
 31,
 23,
 27,
 7479,
 29,
 220,
 468,
 8,
 124,
 14,
 286,
 170,
 8,
 157,
 46,
 5,
 27,
 239,
 16,
 179,
 2,
 38,
 32,
 25,
 7944,
 451,
 202,
 14,
 6,
 717]

In [3]:
## Pre-sequencing to make the vectors symmetrical
max_len = 500 # This means each sentence is symmetrical 
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_train[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

In [4]:
## Important: 
# max_features=10000, means we get the one hot encoded of each word in sentence
## max_len=500, means we give the padding to the sentence to make all the sentence vectors of same size

In [5]:
## Train simple RNN
model = Sequential()
# Takes all the words, apply word2Vec on it and gives the each word to new array of size 128
model.add(Input(shape=(max_len,))) # this tells the input shape dimension
model.add(Embedding(max_features, 128, input_length=max_len))  ## Embedding layer
model.add(SimpleRNN(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))





2025-03-28 13:06:10.442257: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-03-28 13:06:10.442315: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-03-28 13:06:10.442327: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
I0000 00:00:1743181570.442351  393052 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1743181570.442381  393052 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [6]:
model.summary()

In [7]:
## COmpiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [8]:
#Implement Early stopping
from tensorflow.keras.callbacks import EarlyStopping
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
# train model
history = model.fit(
    X_train,
    y_train,
    epochs=10,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping_callback]
)

Epoch 1/10


2025-03-28 13:06:22.180727: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m  1/625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m11:39:10[0m 67s/step - accuracy: 0.4688 - loss: 0.6952

In [10]:
## Save the model
model.save('Simple_rnn_imdb.h5')

