# IMDB: recurrent neural networks

## Data preprocessing

### Required imports

In [26]:
from tensorflow import keras
from keras.datasets import imdb
from keras.preprocessing import sequence
import numpy as np
from sklearn.model_selection import train_test_split

### Processing

Load the training and test data.  To limit computation time, we restrict the number of words to 5,000.

In [3]:
num_words = 5_000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

Since the review vary in length, and we prefer to limit the computation time, we will base the classification on the first 100 features of each input sequence.

In [4]:
feature_length = 100
x_train = sequence.pad_sequences(x_train, maxlen=feature_length)
x_test = sequence.pad_sequences(x_test, maxlen=feature_length)

Now the training and test input are 2D arrays. We split the training set into a subset for actual training, and one for validation.  First we seed the random number generator to ensure reproducibility. In this case, we will use part of the 25000 test examples as valiation data.

In [5]:
np.random.seed(1234)

In [6]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train)

## GRU

### Required imports & model definition

In [9]:
from keras.layers import Activation, Dense, Dropout
from keras.layers import Embedding
from keras.layers import GRU
from keras.models import Sequential
from keras.optimizers import Adam

Again, to limit training times, we restrict ourselfs to using a limited number of features.

In [10]:
vector_length = 64
num_units = 64
model = Sequential()
model.add(Embedding(num_words, vector_length, mask_zero=True,
                    input_length=feature_length))
model.add(GRU(num_units))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

2024-03-20 08:36:59.345282: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-20 08:36:59.690269: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-20 08:36:59.690338: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-20 08:36:59.698161: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-20 08:36:59.698295: I external/local_xla/xla/stream_executor

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 64)           320000    
                                                                 
 gru (GRU)                   (None, 64)                24960     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
 activation (Activation)     (None, 1)                 0         
                                                                 
Total params: 345025 (1.32 MB)
Trainable params: 345025 (1.32 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [12]:
model.compile(loss='binary_crossentropy', optimizer=Adam(),
              metrics=['accuracy'])

###    Training

In [13]:
history = model.fit(x_train, y_train, batch_size=64, epochs=10,
                    validation_data=(x_val, y_val))

Epoch 1/10


2024-03-20 08:37:26.562996: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f7ad00e2460 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-03-20 08:37:26.563175: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
2024-03-20 08:37:26.611882: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-03-20 08:37:27.624769: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907
I0000 00:00:1710920247.776268    3711 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


The training accuracy is much better than the validation accurcy, so the model is likely heavily overtrained.

### Testing

In [14]:
model.evaluate(x_test, y_test)



[0.7476284503936768, 0.8191999793052673]

## LSTM

### Required imports & model definition

In [15]:
from keras.layers import LSTM

Again, to limit training times, we restrict ourselfs to using a limited number of features.

In [16]:
vector_length = 64
num_units = 64
model = Sequential()
model.add(Embedding(num_words, vector_length, mask_zero=True,
                    input_length=feature_length))
model.add(LSTM(num_units))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [17]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 100, 64)           320000    
                                                                 
 lstm (LSTM)                 (None, 64)                33024     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
 activation_1 (Activation)   (None, 1)                 0         
                                                                 
Total params: 353089 (1.35 MB)
Trainable params: 353089 (1.35 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [18]:
model.compile(loss='binary_crossentropy', optimizer=Adam(),
              metrics=['accuracy'])

###    Training

In [19]:
history = model.fit(x_train, y_train, batch_size=64, epochs=10,
                    validation_data=(x_val, y_val))

Epoch 1/10


2024-03-20 09:07:24.120218: W tensorflow/core/common_runtime/type_inference.cc:339] Type inference failed. This indicates an invalid graph that escaped type checking. Error message: INVALID_ARGUMENT: expected compatible input types, but input 1:
type_id: TFT_OPTIONAL
args {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_TENSOR
    args {
      type_id: TFT_INT32
    }
  }
}
 is neither a subtype nor a supertype of the combined inputs preceding it:
type_id: TFT_OPTIONAL
args {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_TENSOR
    args {
      type_id: TFT_FLOAT
    }
  }
}

	for Tuple type infernce function 0
	while inferring type of node 'cond_36/output/_23'


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


The training accuracy is much better than the validation accurcy, so the model is likely heavily overtrained.

### Testing

In [20]:
model.evaluate(x_test, y_test)



[0.7737514972686768, 0.813319981098175]