In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [16]:
import tensorflow.keras.datasets
print(dir(datasets))

['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'boston_housing', 'california_housing', 'cifar10', 'cifar100', 'fashion_mnist', 'imdb', 'mnist', 'reuters']


In [21]:
# Extracting only the dataset names
dataset_names = [name for name in dir(datasets) if not name.startswith('__')]

dataset_names

['boston_housing',
 'california_housing',
 'cifar10',
 'cifar100',
 'fashion_mnist',
 'imdb',
 'mnist',
 'reuters']

In [18]:
from tensorflow.keras.datasets import imdb

In [None]:
# boston_housing - Boston housing price dataset
# california_housing - California housing price dataset
# cifar10 - CIFAR-10 image dataset
# cifar100 - CIFAR-100 image dataset
# fashion_mnist - Fashion MNIST dataset (clothing images)
# imdb - IMDB movie reviews dataset (sentiment analysis)
# mnist - MNIST handwritten digit dataset
# reuters - Reuters news classification dataset

# pad Sequence

In [None]:
# RNNs need inputs of equal length (number of words per input).
# But sentences (or reviews) are of different lengths, e.g

# [23, 14, 78] → 3 words
# [10, 34, 12, 87, 64] → 5 words

# 👉 Solution: pad_sequences ensures all sequences are same length by

In [2]:
# Load dataset
vocab_size = 10000  # top 10,000 words
max_len = 200       # max review length

In [3]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1us/step


In [30]:
print(x_train[10001])  # Review as list of integers

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    1   14   20   16  835  835
  835   51    6 1703   56   51    6  387  180   32  812   57 2327    6
  394  437    7  676    5   58   62   24  386   12    8   61 5301  912
   37 

In [23]:
print(y_train[0])  # Label: 0 (negative) or 1 (positive)

1


In [4]:
# Pad sequences to ensure equal length
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

In [14]:
x_train

array([[   5,   25,  100, ...,   19,  178,   32],
       [   0,    0,    0, ...,   16,  145,   95],
       [   0,    0,    0, ...,    7,  129,  113],
       ...,
       [   0,    0,    0, ...,    4, 3586,    2],
       [   0,    0,    0, ...,   12,    9,   23],
       [   0,    0,    0, ...,  204,  131,    9]])

In [5]:
# Build the RNN model
model = Sequential()
model.add(Embedding(vocab_size, 32, input_length=max_len))  # word embeddings
model.add(SimpleRNN(64))                                    # RNN layer
model.add(Dense(1, activation='sigmoid'))                   # output layer



In [6]:
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [7]:
# Train model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 68ms/step - accuracy: 0.5363 - loss: 0.6875 - val_accuracy: 0.7642 - val_loss: 0.5014
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 66ms/step - accuracy: 0.7846 - loss: 0.4679 - val_accuracy: 0.7980 - val_loss: 0.4448
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 64ms/step - accuracy: 0.8759 - loss: 0.3120 - val_accuracy: 0.8044 - val_loss: 0.4468
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 67ms/step - accuracy: 0.9251 - loss: 0.1969 - val_accuracy: 0.8400 - val_loss: 0.4131
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 67ms/step - accuracy: 0.9671 - loss: 0.1058 - val_accuracy: 0.7872 - val_loss: 0.5758


<keras.src.callbacks.history.History at 0x16fd8e51710>

In [8]:
# Evaluate
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test Accuracy:", test_acc)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - accuracy: 0.7912 - loss: 0.5663
Test Accuracy: 0.7929999828338623
