In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size

        self.W_ih = nn.Parameter(torch.randn(hidden_size, input_size))
        
        self.W_hh = nn.Parameter(torch.randn(hidden_size, hidden_size))
        self.b_hh = nn.Parameter(torch.zeros(hidden_size))

        self.W_ho = nn.Parameter(torch.randn(output_size, hidden_size))
        self.b_ho = nn.Parameter(torch.zeros(output_size))

    def forward(self, x):
        """
        x: tensor of shape (batch_size, seq_len, input_size)
        """
        batch_size, seq_len, _ = x.size()

        h_t = torch.zeros(batch_size, self.hidden_size)

        outputs = []

        for t in range(seq_len):
            x_t = x[:, t, :]

            h_t = torch.tanh(
                torch.matmul(x_t, self.W_ih.T) + torch.matmul(h_t, self.W_hh.T) + self.b_hh
            )

            y_t = torch.matmul(h_t, self.W_ho.T) + self.b_ho
            outputs.append(y_t)
        
        outputs = torch.stack(outputs, dim=1)
        return outputs

In [3]:
# synthetic data set
def generate_data(batch_size, seq_len, input_size):
    x = torch.randint(0, 2, (batch_size, seq_len, input_size)).float() # random binary sequences
    y = torch.cumsum(x, dim=1)
    return x, y

In [4]:
# hyperparameters
input_size = 1
hidden_size = 8
output_size = 1
seq_len = 5
batch_size = 16
num_epochs = 1000
learning_rate = 0.01

model = SimpleRNN(input_size, hidden_size, output_size)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    x, y = generate_data(batch_size, seq_len, input_size)

    outputs = model(x) # shape: (batch_size, seq_len, output_size)
    
    loss = loss_fn(outputs, y)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/1000], Loss: 3.1615
Epoch [20/1000], Loss: 1.0910
Epoch [30/1000], Loss: 0.6083
Epoch [40/1000], Loss: 0.4901
Epoch [50/1000], Loss: 0.3515
Epoch [60/1000], Loss: 0.2832
Epoch [70/1000], Loss: 0.3106
Epoch [80/1000], Loss: 0.2479
Epoch [90/1000], Loss: 0.2651
Epoch [100/1000], Loss: 0.1878
Epoch [110/1000], Loss: 0.1377
Epoch [120/1000], Loss: 0.2009
Epoch [130/1000], Loss: 0.0881
Epoch [140/1000], Loss: 0.1891
Epoch [150/1000], Loss: 0.1948
Epoch [160/1000], Loss: 0.0953
Epoch [170/1000], Loss: 0.1866
Epoch [180/1000], Loss: 0.1013
Epoch [190/1000], Loss: 0.1640
Epoch [200/1000], Loss: 0.0655
Epoch [210/1000], Loss: 0.0410
Epoch [220/1000], Loss: 0.0496
Epoch [230/1000], Loss: 0.0654
Epoch [240/1000], Loss: 0.0351
Epoch [250/1000], Loss: 0.0506
Epoch [260/1000], Loss: 0.0373
Epoch [270/1000], Loss: 0.0445
Epoch [280/1000], Loss: 0.0635
Epoch [290/1000], Loss: 0.0681
Epoch [300/1000], Loss: 0.0265
Epoch [310/1000], Loss: 0.0333
Epoch [320/1000], Loss: 0.0244
Epoch [330/1000],

In [5]:
def evaluate_model(model, seq_len, input_size):
    model.eval()
    
    x, y_true = generate_data(batch_size=1, seq_len=seq_len, input_size=input_size)
    
    with torch.no_grad():
        y_pred = model(x)
    
    print("Input Sequence:")
    print(x.squeeze(-1).numpy())  # Squeeze the last dimension for clarity
    print("\nTrue Output (Cumulative Sum):")
    print(y_true.squeeze(-1).numpy())  # Squeeze the last dimension for clarity
    print("\nModel's Predicted Output:")
    print(y_pred.squeeze(-1).numpy())  # Squeeze the last dimension for clarity

# Evaluate and print results
evaluate_model(model, seq_len=seq_len, input_size=input_size)

Input Sequence:
[[1. 0. 0. 1. 0.]]

True Output (Cumulative Sum):
[[1. 1. 1. 2. 2.]]

Model's Predicted Output:
[[0.99003947 0.98628914 0.9817401  1.957817   2.0688024 ]]
