# Simple RNN Model

In [1]:
import os
import sys

import numpy as np
import torch

# Hack the Python import path to let us import adjacent modules.
src_path = os.path.abspath(os.path.join('../../..'))
if src_path not in sys.path:
    sys.path.append(src_path)

from src.models.rnn import SimpleRNN
from src.utils.datasets import HSMD

## Load the Huge Stock Market Dataset

For proof of concept, load only the `Close` column `NFLX` stock. Load the data in 7 day windows.

In [2]:
dataset = HSMD('../../../data/HSMD', 
               stocks=['nflx'], 
               columns=['Close'], 
               window_size=7)

## Split the data into a train and test 

Use 80% of the data for the training set, and 20% of the data for the test set. Seed the PRNG to get the same split every time, for the purpose of effective cross-validation.

In [3]:
TRAIN_PCT = 0.8
train_size = int(len(dataset) * TRAIN_PCT)
test_size = len(dataset) - train_size

In [4]:
generator = torch.Generator().manual_seed(1)
train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size], generator=generator)

## Create and display the RNN model

Use the `SimpleRNN` model defined in `src/models/rnn/simple.py` with the following parameters:

* `input_size=7` : This is the size of the sliding window being fed to the model.
* `hidden_size=32`: The (arbitrarily selected) number of units per LSTM cell. A potential hyperparameter to be tuned.
* `num_cells=4`: The (arbitrarily selected) number of LSTM cells in the RNN model. A potential hyperparameter to be tuned.

In [5]:
model = SimpleRNN(7, 32, num_cells=4)
print(model)

SimpleRNN(
  (lstm): LSTM(7, 32, num_layers=4, batch_first=True)
  (fc): Linear(in_features=32, out_features=7, bias=True)
)


In [6]:
x = next(iter(train_set))
x = torch.Tensor(x.reshape(1, -1, 7).astype(np.float32))

predictions = model(x)
print(predictions.shape)

torch.Size([1, 1, 7])
