# Recurrent models 101

Introduction:
Recurrent Neural Networks (RNNs), Gated Recurrent Units (GRUs), and Long Short-Term Memory networks (LSTMs) are powerful tools in the realm of deep learning, particularly for sequential data tasks. In this practical, we'll delve into the basics of these architectures, implement simple models using PyTorch, and verify their functionality by feeding them random data of varying sizes.

In this practical, you are going to implement small recurrent models (RNNs, GRUs, LSTMs ...) and test them using different input values.



In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

### RNN

Implement a small RNN model with a single RNN layer

You can have a look at pytorch documentation: https://pytorch.org/docs/stable/generated/torch.nn.RNN.html

In the model, just give the output and the hidden state of the RNN

In [6]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(SimpleRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)

    def forward(self, x):
        out, h = self.rnn(x)
        return out, h

Let's test your model. Test it with different sequence length

In [7]:
# TEST your model, and send random data with torch.randn

# Define the model
input_size = 10
hidden_size = 20
batch_size = 3
seq_length = 10  # Sequence length for testing

model = SimpleRNN(input_size, hidden_size)

# Generate random data
input_data = torch.randn(batch_size, seq_length, input_size)

# Perform inference
output, hidden_state = model(input_data)

print("Output shape:", output.shape)
print("Hidden state shape:", hidden_state.shape)

Output shape: torch.Size([3, 10, 20])
Hidden state shape: torch.Size([1, 3, 20])


Now implement a bidirectional RNN, you just have one line to change

In [8]:
class BidirectionalRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(BidirectionalRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True, bidirectional=True)

    def forward(self, x):
        out, h = self.rnn(x)
        return out, h

In [9]:
# TEST your model, and send random data with torch.randn

# Define the model
input_size = 10
hidden_size = 20
batch_size = 3
seq_length = 10  # Sequence length for testing

model = BidirectionalRNN(input_size, hidden_size)

# Generate random data
input_data = torch.randn(batch_size, seq_length, input_size)

# Perform inference
output, hidden_state = model(input_data)

print("Output shape:", output.shape)
print("Hidden state shape:", hidden_state.shape)

Output shape: torch.Size([3, 10, 40])
Hidden state shape: torch.Size([2, 3, 20])


Let's implement stacked RNNs (use multiple layers of the model)

In [13]:
class BidirectionalRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(BidirectionalRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True, num_layers=5)

    def forward(self, x):
        out, h = self.rnn(x)
        return out, h

In [14]:
# TEST your model, and send random data with torch.randn

# Define the model
input_size = 10
hidden_size = 20
batch_size = 3
seq_length = 15  # Sequence length for testing

model = BidirectionalRNN(input_size, hidden_size)

# Generate random data
input_data = torch.randn(batch_size, seq_length, input_size)

# Perform inference
output, hidden_state = model(input_data)

print("Output shape:", output.shape)
print("Hidden state shape:", hidden_state.shape)

Output shape: torch.Size([3, 15, 20])
Hidden state shape: torch.Size([5, 3, 20])


Now connect one of the previous models with a single linear layer for binary classification.

Use the last output generated by the model

In [16]:
# Define RNN model
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(SimpleRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])
        return out

In [18]:
# TEST your model, and send random data with torch.randn

# Set model parameters
input_size = 10
hidden_size = 20
batch_size = 3
seq_length = 15

# Instantiate the model
model = SimpleRNN(input_size, hidden_size)

# Generate random input data
input_data = torch.randn(batch_size, seq_length, input_size)

# Pass input data through the model
output = model(input_data)

# Interpret the model's output
print("Output shape:", output.shape)

Output shape: torch.Size([3, 1])


Now implement a simple GRU model, without any Linear layer

You can check pytorch documentation: https://pytorch.org/docs/stable/generated/torch.nn.GRU.html

In [23]:
# Define GRU model
class SimpleGRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(SimpleGRU, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)

    def forward(self, x):
        out, hidden = self.gru(x)
        return out, hidden

In [25]:
# TEST your model, and send random data with torch.randn

# Set model parameters
input_size = 10
hidden_size = 20
batch_size = 3
seq_length = 15

# Instantiate the model
model = SimpleGRU(input_size, hidden_size)

# Generate random input data
input_data = torch.randn(batch_size, seq_length, input_size)

# Pass input data through the model
output, hidden = model(input_data)

# Interpret the model's output
print("Output shape:", output.shape)
print("Hidden shape:", hidden.shape)

Output shape: torch.Size([3, 15, 20])
Hidden shape: torch.Size([1, 3, 20])


Implement a small LSTM model. Return the output and the hidden state.

You can check pytorch documentation.

https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html#torch.nn.LSTM

In [34]:
# Define LSTM model
class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(SimpleLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)

    def forward(self, x):
        out, hidden = self.lstm(x)
        return out, hidden

In [35]:
# TEST your model, and send random data with torch.randn

# Set model parameters
input_size = 10
hidden_size = 20
batch_size = 3
seq_length = 15

# Instantiate the model
model = SimpleLSTM(input_size, hidden_size)

# Generate random input data
input_data = torch.randn(batch_size, seq_length, input_size)

# Pass input data through the model
output, hidden = model(input_data)

# Interpret the model's output
print("Output shape:", output.shape)
print("Hidden shape:", hidden[0].shape)
print("Cell shape:", hidden[1].shape)

Output shape: torch.Size([3, 15, 20])
Hidden shape: torch.Size([1, 3, 20])
Cell shape: torch.Size([1, 3, 20])
