<a href="https://colab.research.google.com/github/Undasnr/DL-ML/blob/main/Ronny_LSTM_series_assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**1. Execution of various methods**

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.19.0


In [2]:
# Loading & preprocessing IMDB data
max_features = 20000   # vocabulary size
maxlen       = 200     # cut texts after this many words
batch_size   = 64
epochs       = 3       # keep small for demo

(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=max_features)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test  = keras.preprocessing.sequence.pad_sequences(x_test,  maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
# Building the Model
def build_text_rnn(model_type='SimpleRNN', units=32):
    model = keras.Sequential()
    model.add(layers.Embedding(max_features, 128, input_length=maxlen))
    if model_type == 'SimpleRNN':
        model.add(layers.SimpleRNN(units))
    elif model_type == 'GRU':
        model.add(layers.GRU(units))
    elif model_type == 'LSTM':
        model.add(layers.LSTM(units))
    else:
        raise ValueError("Unknown model_type")
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

In [4]:
# Training & evaluating each recurrent layer
results = {}
for rnn_type in ['SimpleRNN', 'GRU', 'LSTM']:
    print(f"\nTraining {rnn_type}…")
    model = build_text_rnn(model_type=rnn_type, units=32)  # based on imdb_lstm.py
    history = model.fit(x_train, y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        validation_split=0.2,
                        verbose=1)
    loss, acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
    results[rnn_type] = acc
    print(f"{rnn_type} Test Accuracy: {acc:.4f}")


Training SimpleRNN…
Epoch 1/3




[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 82ms/step - accuracy: 0.5758 - loss: 0.6631 - val_accuracy: 0.8030 - val_loss: 0.4445
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 91ms/step - accuracy: 0.8465 - loss: 0.3584 - val_accuracy: 0.7978 - val_loss: 0.4991
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 83ms/step - accuracy: 0.9547 - loss: 0.1444 - val_accuracy: 0.7544 - val_loss: 0.5725
SimpleRNN Test Accuracy: 0.7502

Training GRU…
Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 161ms/step - accuracy: 0.6776 - loss: 0.5721 - val_accuracy: 0.7580 - val_loss: 0.5016
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 161ms/step - accuracy: 0.8745 - loss: 0.3144 - val_accuracy: 0.8680 - val_loss: 0.3178
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 161ms/step - accuracy: 0.9467 - loss: 0.1574 - val_accuracy: 0.8678 -

In [5]:
# Summary of text-classification results
import pandas as pd
pd.DataFrame.from_dict(results, orient='index', columns=['Test Accuracy'])

Unnamed: 0,Test Accuracy
SimpleRNN,0.7502
GRU,0.86288
LSTM,0.86188


In [6]:
# ConvLSTM2D: Next-Frame Video Prediction
import numpy as np
from tensorflow.keras import layers, models, utils

# 3.1 Loading Moving MNIST (provided by example)
path = keras.utils.get_file(
    'moving_mnist.npy',
    'http://www.cs.toronto.edu/~nitish/unsupervised_video/mnist_test_seq.npy'
)
dataset = np.load(path)          # shape: (20, 10000, 64, 64)
dataset = np.swapaxes(dataset, 0, 1)  # → (10000, 20, 64, 64)
dataset = dataset[:1000] / 255.0      # subsample & normalize
dataset = np.expand_dims(dataset, -1) # add channel dimension

# 3.2 Preparing input/output: first 10 frames → next 10 frames
x = dataset[:, :10, ...]
y = dataset[:, 10:, ...]

# 3.3 Building ConvLSTM2D model
model = models.Sequential([
    layers.ConvLSTM2D(
        filters=40,
        kernel_size=(3,3),
        input_shape=(10, 64, 64, 1),
        padding='same',
        return_sequences=True,
        activation='tanh'
    ),
    layers.BatchNormalization(),
    layers.Conv3D(
        filters=1,
        kernel_size=(3,3,3),
        activation='sigmoid',
        padding='same'
    )
])
model.compile(optimizer='adam', loss='binary_crossentropy')
model.summary()

Downloading data from http://www.cs.toronto.edu/~nitish/unsupervised_video/mnist_test_seq.npy
[1m819200096/819200096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 0us/step


  super().__init__(**kwargs)


In [7]:
# Training for a few epochs (demo only)
history = model.fit(x, y,
                    batch_size=8,
                    epochs=5,
                    validation_split=0.2,
                    verbose=1)

Epoch 1/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m582s[0m 6s/step - loss: 0.4386 - val_loss: 0.4378
Epoch 2/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m625s[0m 6s/step - loss: 0.1932 - val_loss: 0.3396
Epoch 3/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m575s[0m 6s/step - loss: 0.1892 - val_loss: 0.3046
Epoch 4/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m576s[0m 6s/step - loss: 0.1906 - val_loss: 0.2176
Epoch 5/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m622s[0m 6s/step - loss: 0.1890 - val_loss: 0.2177


Key Observations:

Performance Comparison: Typically, LSTM and GRU outperform SimpleRNN due to their gating mechanisms, as displayed by the code output.

Type  | Test Accuracy
------|--------------
SimpleRNN | 0.78124
GRU | 0.86140
LSTM | 0.86228

Training Time: SimpleRNN < GRU < LSTM < ConvLSTM2D (generally)

Parameter Count: SimpleRNN has the fewest parameters, ConvLSTM2D the most

**2. (Advance assignment) Comparison between multiple data sets**

In [8]:
# Comparing RNN, GRU, and LSTM on the Reuters Dataset
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Parameters
num_words   = 10000    # top‐vocabulary size
maxlen      = 200      # maximum sequence length
batch_size  = 64
epochs      = 5        # adjust for runtime/quality trade‐off

# Loading Reuters dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.reuters.load_data(
    num_words=num_words,
    test_split=0.2,
    seed=113
)

# Pad/truncate to `maxlen`
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test  = keras.preprocessing.sequence.pad_sequences(x_test,  maxlen=maxlen)

# Number of classes
num_classes = np.max(y_train) + 1  # should be 46
print("Train sequences:", x_train.shape)
print("Test sequences: ", x_test.shape)
print("Number of classes:", num_classes)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
[1m2110848/2110848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Train sequences: (8982, 200)
Test sequences:  (2246, 200)
Number of classes: 46


In [9]:
# Building the Model
def build_reuters_rnn(rnn_type='SimpleRNN', units=32):
    """
    Returns a compiled keras.Sequential model:
    Embedding → RNN (SimpleRNN/GRU/LSTM) → Dense(softmax)
    """
    model = keras.Sequential([
        layers.Embedding(input_dim=num_words,
                         output_dim=128,
                         input_length=maxlen),
        {
          'SimpleRNN': layers.SimpleRNN(units),
          'GRU':       layers.GRU(units),
          'LSTM':      layers.LSTM(units)
        }[rnn_type],
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

In [10]:
# Training and Evaluating each recurrent layer
results = {}

for layer in ['SimpleRNN', 'GRU', 'LSTM']:
    print(f"\n>>> Training {layer} on Reuters")
    model = build_reuters_rnn(rnn_type=layer, units=32)
    model.fit(
        x_train, y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_split=0.2,
        verbose=2
    )
    loss, acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
    results[layer] = acc
    print(f"{layer} Test Accuracy: {acc:.4f}")


>>> Training SimpleRNN on Reuters
Epoch 1/5




113/113 - 10s - 91ms/step - accuracy: 0.3439 - loss: 2.5956 - val_accuracy: 0.4574 - val_loss: 2.2665
Epoch 2/5
113/113 - 9s - 79ms/step - accuracy: 0.4430 - loss: 2.1077 - val_accuracy: 0.4591 - val_loss: 1.9696
Epoch 3/5
113/113 - 8s - 74ms/step - accuracy: 0.5815 - loss: 1.7041 - val_accuracy: 0.4869 - val_loss: 1.8830
Epoch 4/5
113/113 - 9s - 76ms/step - accuracy: 0.6014 - loss: 1.6123 - val_accuracy: 0.4730 - val_loss: 2.0451
Epoch 5/5
113/113 - 7s - 64ms/step - accuracy: 0.6928 - loss: 1.3027 - val_accuracy: 0.4719 - val_loss: 2.0804
SimpleRNN Test Accuracy: 0.4653

>>> Training GRU on Reuters
Epoch 1/5
113/113 - 18s - 163ms/step - accuracy: 0.3595 - loss: 2.6332 - val_accuracy: 0.4619 - val_loss: 2.1138
Epoch 2/5
113/113 - 16s - 142ms/step - accuracy: 0.5134 - loss: 1.8990 - val_accuracy: 0.5437 - val_loss: 1.7593
Epoch 3/5
113/113 - 20s - 181ms/step - accuracy: 0.5773 - loss: 1.6575 - val_accuracy: 0.5626 - val_loss: 1.6941
Epoch 4/5
113/113 - 21s - 190ms/step - accuracy: 0.607

**3. Explanation of other classes**

Keras splits its sequence‐processing API into two main concepts: the RNN wrapper (a layer that loops over time steps) and the Cell (an object that processes exactly one time step).

Overview of various classes of Keras

1. tf.keras.layers.RNN

A generic recurrent‐loop layer that can wrap any RNNCell.

Purpose: Iterates a given cell over time steps of an input sequence.

Key arguments

cell: one or a StackedRNNCells of RNNCell instances

return_sequences: whether to return output at every step (True) or only the final output (False)

return_state: whether to output the last hidden (and cell) states

go_backwards, stateful, unroll for fine‐grained control

Use cases

Custom RNNs built from scratch (e.g., attention cells, neuromorphic cells)

Accessing lower‐level cell APIs
```python
cell = keras.layers.LSTMCell(units=32)
rnn  = keras.layers.RNN(cell, return_sequences=True)
out  = rnn(input_seq)  # input_seq shape: (batch, timesteps, features)
```
---
2. SimpleRNNCell

The single‐step implementation of a vanilla RNN.

State: one hidden state h_t.

Parameters

kernel: weights for the input (x_t → h_t)

recurrent_kernel: weights for the previous state (h_{t-1} → h_t)

bias

Usage

Wrapped by layers.RNN(SimpleRNNCell(...)) for full‐sequence processing

Useful for research when you need to combine or modify basic RNN steps

---
3. GRUCell

A single‐step Gated Recurrent Unit (GRU).

State: one hidden state h_t (no separate cell state).

Gates: update and reset gates controlling flow of information.

Parameters

kernel, recurrent_kernel, bias (including separate biases for each gate)

Usage
```python
cell = keras.layers.GRUCell(units=64)
rnn  = keras.layers.RNN(cell, return_state=True)
output, final_state = rnn(input_seq)
```
---
4. LSTMCell

A single‐step Long Short‐Term Memory (LSTM) cell.

State: two tensors

hidden state h_t

cell state c_t

Gates: input, forget, output gates, plus cell candidate.

Parameters

kernel, recurrent_kernel, bias (for all four gates)

Usage
```python
cell = keras.layers.LSTMCell(units=128)
rnn  = keras.layers.RNN(cell, return_state=True)
out, h_final, c_final = rnn(input_seq)
```
---
5. StackedRNNCells

A container that chains multiple RNNCell instances into a single composite cell.

Purpose: Build deep (multi‐layer) RNNs within a single RNN wrapper.

Arguments

cells: list of RNNCell instances (e.g., two LSTMCell objects)

Behaviors

At each time step, feeds input through the first cell, then its output into the next, and so on.

Usage
```python
cells       = [keras.layers.LSTMCell(64), keras.layers.LSTMCell(64)]
stacked_cell = keras.layers.StackedRNNCells(cells)
rnn          = keras.layers.RNN(stacked_cell, return_sequences=True)
out_seq      = rnn(input_seq)
```
---
6. CuDNNGRU and CuDNNLSTM

GPU-optimized implementations of GRU and LSTM backed by NVIDIA’s cuDNN library.

Advantages: Up to 10× faster on supported GPUs.

Constraints

No masking support

Fixed activations (tanh for cell, sigmoid for gates)

No recurrent dropout

Status in TF-2.x

tf.keras.layers.GRU and tf.keras.layers.LSTM automatically use the CuDNN kernels when run on GPU and when their configuration matches cuDNN constraints.

The separate CuDNNGRU / CuDNNLSTM classes exist primarily for backward compatibility.

Usage
```python
# In TF-1.x you might have used:
x = Input((timesteps, features))
y = tf.keras.layers.CuDNNLSTM(128)(x)

# In TF-2.x simply:
y = tf.keras.layers.LSTM(128)(x)  # uses cuDNN when possible
```
---
When to Use Which

>SimpleRNN, GRU, LSTM layers: high-level, drop-in sequence layers for most tasks.

>RNN + *Cell: when you need custom time‐step logic or wish to combine cells in novel ways.

>StackedRNNCells: to build a multi‐layer RNN without stacking multiple RNN layers (keeps state management in one object).

>CuDNNGRU / CuDNNLSTM: primarily of historic interest; in modern TF-2.x just configure GRU/LSTM for automatic GPU acceleration.