# How RNNs Process Sequential Data: Concepts and Input Flow

## Step 1: Setting Up the Environment for RNNs in PyTorch


In [None]:
import torch
import torch.nn as nn
import numpy as np

## Step 2: Defining an RNN in PyTorch


In [None]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(SimpleRNN, self).__init__()
        # Store parameters
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Define the RNN layers
        # batch_first=True means input and output tensors are provided as (batch, seq, feature)
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)

        # Define the fully connected output layer
        self.fc = nn.Linear(hidden_size, 1) # Outputting a single value

    def forward(self, x):
        # Input x shape: (batch_size, seq_length, input_size)

        # Initialize hidden state with zeros
        # h0 shape: (num_layers * D, batch_size, hidden_size), D=1 for unidirectional
        # Ensure h0 is on the same device as x
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate the RNN
        # rnn_out shape: (batch_size, seq_length, hidden_size) - Output features (h_t) from the last layer of the RNN, for each t
        # hn shape: (num_layers * D, batch_size, hidden_size) - Final hidden state for each element in the batch. D=1 for unidirectional.
        rnn_out, hn = self.rnn(x, h0)

        # Decode the hidden state of the last time step
        # We take the output of the last time step from the RNN's last layer: rnn_out[:, -1, :]
        # Shape of rnn_out[:, -1, :]: (batch_size, hidden_size)
        out = self.fc(rnn_out[:, -1, :])
        # out shape: (batch_size, output_size_of_fc) which is (batch_size, 1) here
        return out

## Step 3: Flowing Data Through an RNN


In [None]:
# Define model and data parameters
input_dim = 1       # Number of features in input at each time step
hidden_dim = 5      # Number of features in the hidden state
layer_dim = 1       # Number of recurrent layers in the RNN
batch_size = 1      # Number of sequences to process in parallel
seq_len = 10        # Number of time steps in each sequence

# Simulate sequential input data
# We'll use a simple sine wave as our sequence
time_steps = np.linspace(0, np.pi, seq_len)  # Generate 10 time steps from 0 to pi
data = np.sin(time_steps)                    # Compute sin values for these time steps

# Reshape data for RNN: (batch_size, seq_length, input_size)
# Our data is (10,) -> needs to be (1, 10, 1)
data_reshaped = data.reshape((batch_size, seq_len, input_dim))
input_tensor = torch.tensor(data_reshaped, dtype=torch.float32)

print(f"--- Input Data ---")
print(f"Original data shape: {data.shape}")
print(f"Input tensor shape (batch_size, seq_length, input_size): {input_tensor.shape}")
print(f"Input tensor sample (first batch, first 5 time steps, first feature): \n{input_tensor[0, :5, 0]}")


--- Input Data ---
Original data shape: (10,)
Input tensor shape (batch_size, seq_length, input_size): torch.Size([1, 10, 1])
Input tensor sample (first batch, first 5 time steps, first feature): 
tensor([0.0000, 0.3420, 0.6428, 0.8660, 0.9848])


In [None]:
# Instantiate the model
model = SimpleRNN(input_size=input_dim, hidden_size=hidden_dim, num_layers=layer_dim)
print(f"\n--- Model Architecture ---")
print(model)

# To understand hidden states and RNN layer outputs, let's inspect them.
# First, create an initial hidden state h0 (as done in model.forward)
# h0 shape for nn.RNN: (num_layers * D, batch_size, hidden_size), D=1 for unidirectional
h0_for_inspection = torch.zeros(layer_dim, input_tensor.size(0), hidden_dim).to(input_tensor.device)

# Manually pass data through the RNN layer only (model.rnn) to see its direct outputs
# rnn_layer_output: contains output features (h_t) from the last layer of the RNN, for each time step t.
# rnn_layer_hn: is the final hidden state (at the last time step) for each layer.
rnn_layer_output, rnn_layer_hn = model.rnn(input_tensor, h0_for_inspection)

print(f"\n--- Inspecting RNN Layer Outputs (model.rnn) ---")
print(f"Shape of rnn_layer_output (batch_size, seq_length, hidden_size): {rnn_layer_output.shape}")
print(f"Shape of rnn_layer_hn (num_layers, batch_size, hidden_size): {rnn_layer_hn.shape}")


--- Model Architecture ---
SimpleRNN(
  (rnn): RNN(1, 5, batch_first=True)
  (fc): Linear(in_features=5, out_features=1, bias=True)
)

--- Inspecting RNN Layer Outputs (model.rnn) ---
Shape of rnn_layer_output (batch_size, seq_length, hidden_size): torch.Size([1, 10, 5])
Shape of rnn_layer_hn (num_layers, batch_size, hidden_size): torch.Size([1, 1, 5])


In [None]:
# Now, let's get the final output from the complete model's forward pass
# This includes passing the RNN output through the fully connected layer (model.fc)
final_output = model(input_tensor)

print(f"\n--- Model's Final Output (after FC layer) ---")
print(f"Final model output shape (batch_size, fc_output_size): {final_output.shape}")
# .item() is used to get the Python number from a tensor containing a single value.
print(f"Final model output value: {final_output.item():.4f}")

# For further illustration, let's show what rnn_out[:, -1, :] (from the forward pass) means:
# This is the hidden state of the last time step, from the last RNN layer.
last_time_step_hidden_activity = rnn_layer_output[:, -1, :]
print(f"\n--- Last Time Step's Hidden Activity (used by FC layer) ---")
print(f"Shape: {last_time_step_hidden_activity.shape}") # (batch_size, hidden_size) -> (1, 5)
print(f"Value (first batch): \n{last_time_step_hidden_activity[0]}")

# This last_time_step_hidden_activity is what's fed into the model.fc layer.
# We can verify by manually passing it through model.fc:
fc_output_manual = model.fc(last_time_step_hidden_activity)
print(f"\nOutput if manually passing last_time_step_hidden_activity to model.fc: {fc_output_manual.item():.4f}")
print(f"This should match the model's final output value: {final_output.item():.4f} == {fc_output_manual.item():.4f} -> {(final_output.item() == fc_output_manual.item())}")


--- Model's Final Output (after FC layer) ---
Final model output shape (batch_size, fc_output_size): torch.Size([1, 1])
Final model output value: -0.0877

--- Last Time Step's Hidden Activity (used by FC layer) ---
Shape: torch.Size([1, 5])
Value (first batch): 
tensor([-0.2459, -0.4186,  0.2501,  0.4190, -0.4264],
       grad_fn=<SelectBackward0>)

Output if manually passing last_time_step_hidden_activity to model.fc: -0.0877
This should match the model's final output value: -0.0877 == -0.0877 -> True
