# Simple RNN (Elman cell)

Implementation of an Elman cell architecture on time series data

In [1]:
import torch
import numpy as np
from torch import nn

import warnings

warnings.filterwarnings("ignore")

In [2]:
# get sample data

import polars as pl
import yfinance as yf
import re

prices = yf.download("SPLG", start='2023-01-01', end='2024-01-01')

df = (
    pl
    .from_pandas(
        prices
        .reset_index()
    ).with_columns(
        pl.lit("SPLG").alias("Ticker")
    )
)

df.columns = [re.sub(r"[^\w\s]","",header.split(",")[0]) for header in df.columns]

df.head()

[*********************100%***********************]  1 of 1 completed


Date,Close,High,Low,Open,Volume,Ticker
datetime[ns],f64,f64,f64,f64,i64,str
2023-01-03 00:00:00,43.092083,43.717584,42.755277,43.486628,3688300,"""SPLG"""
2023-01-04 00:00:00,43.42889,43.659842,42.995852,43.342283,4335600,"""SPLG"""
2023-01-05 00:00:00,42.928493,43.197938,42.861132,43.188317,4449300,"""SPLG"""
2023-01-06 00:00:00,43.900421,44.035143,42.928493,43.284546,2160500,"""SPLG"""
2023-01-09 00:00:00,43.88118,44.545174,43.861937,44.150628,4251700,"""SPLG"""


# SPLG Simple RNN Class

In [3]:
# Extremely basic 1-d 1-param Elman RNN cell - with another linear transformation after tanh activation

class ElmanRNN(nn.Module):
    def __init__(self, sequence_length = 1):
        super(ElmanRNN, self).__init__()

        self.w_x = nn.Parameter(
            torch.randn(1,1, requires_grad=True, dtype=torch.float32)
        )
        self.w_h = nn.Parameter(
            torch.randn(1,1, requires_grad=True, dtype=torch.float32)
        )
        self.b_h = nn.Parameter(
            torch.randn(1, requires_grad=True, dtype=torch.float32)
        )

        self.w_y = nn.Parameter(
            torch.randn(1, requires_grad=True, dtype=torch.float32)
        )
        self.b_y = nn.Parameter(
            torch.randn(1, requires_grad=True, dtype=torch.float32)
        )

        self.seq_len = sequence_length

    def forward(self, x, h = None):
        """
        Inputs:

        x = input data
        h = hidden state value from previous iteration (default to 0 if not applicable)
        """

        if len(x.shape) <= 1:
            x = x.unsqueeze(1)
            output = []

        if h is None:
            h = torch.zeros(1, dtype=torch.float32)

        h_1 = h

        x = x.to(dtype=torch.float32) #ensure type is aligned
        seq = 0
        
        for entry in x: #loop is used to ensure hidden states carry through iterations
            h_1 = torch.relu(
                entry @ self.w_x.t() + h_1 @ self.w_h.t() + self.b_h
            )
            y = h_1 @ self.w_y.t() + self.b_y
            output.append(y)
            if seq >= self.seq_len:
                seq = 0
                h_1 = h
            else:
                seq += 1

        output = torch.stack(output)
        if len(x.shape) <= 1:
            output = output.squeeze(1)

        return output, h_1


## Training

### Prep dataset

In [4]:
x_train = torch.FloatTensor(df["Close"].to_list()[:-1])
y_train = torch.FloatTensor(df["Close"].to_list()[1:])

print(x_train.shape)
print(y_train.shape)

torch.Size([249])
torch.Size([249])


### Setup Training Loop

In [5]:
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

# Setup hyperparamters
sequence_length = 5
batch_size = 50
epochs = 1000
learning_rate = 0.01

train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Setup model
model = ElmanRNN(sequence_length=sequence_length)

# Setup loss function and optimizer
criterion = nn.MSELoss() #there is no RMSE
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(1, epochs + 1):
    total_loss = 0
    model.train()
    for batch_id, (data, target) in enumerate(train_loader): #for each batch, also get the index of batch
        optimizer.zero_grad()
        output, _ = model(data) #forward pass
        loss = criterion(output, target)
        loss.backward() #compute gradients
        optimizer.step() #update weights
        
        total_loss += loss.item()

        if batch_id % 100 == 0 and epoch % 100 == 0: #update on training iterations
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                epoch, batch_id * len(data), len(train_loader.dataset),
                100. * batch_id / len(train_loader), loss.item()))




In [6]:
y = model(x_train)
y

(tensor([[47.8041],
         [38.8893],
         [47.7760],
         [38.8893],
         [47.9396],
         [38.8893],
         [48.0900],
         [38.8893],
         [48.1495],
         [38.8893],
         [48.0140],
         [38.8893],
         [48.0983],
         [38.8893],
         [48.1825],
         [38.8893],
         [48.2701],
         [38.8893],
         [48.1892],
         [38.8893],
         [48.3875],
         [38.8893],
         [48.4189],
         [38.8893],
         [48.4751],
         [38.8893],
         [48.3164],
         [38.8893],
         [48.4238],
         [38.8893],
         [48.4503],
         [38.8893],
         [48.3164],
         [38.8893],
         [48.1462],
         [38.8893],
         [48.1082],
         [38.8893],
         [48.1049],
         [38.8893],
         [48.1330],
         [38.8893],
         [48.2602],
         [38.8893],
         [48.1561],
         [38.8893],
         [47.9049],
         [38.8893],
         [48.0156],
         [38.8893],


In [7]:
model._parameters

{'w_x': Parameter containing:
 tensor([[0.1638]], requires_grad=True),
 'w_h': Parameter containing:
 tensor([[-1.3795]], requires_grad=True),
 'b_h': Parameter containing:
 tensor([1.4434], requires_grad=True),
 'w_y': Parameter containing:
 tensor([1.0487], requires_grad=True),
 'b_y': Parameter containing:
 tensor([38.8893], requires_grad=True)}