## Unrolled lstm on 4 days of stock data for 2 companies

* Coding from scratch

In [1]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
from torch.optim import Adam 

import lightning as L 
from torch.utils.data import DataLoader, TensorDataset

In [2]:
class LSTM_scratch(L.LightningModule):
    def __init__(self):
        super().__init__()
        
        mean = torch.tensor(0.0)
        std = torch.tensor(1.0)

        self.wlr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wlr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.blr1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

        self.wpr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wpr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bpr1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

        self.wp1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wp2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bp1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

        self.wo1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wo2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bo1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

    def lstm_unit(self, input_val, long_memory, short_memory):
        long_mem_per = torch.sigmoid((short_memory*self.wlr1)+
                                     (input_val*self.wlr2)+
                                     self.blr1)

        potential_mem_per = torch.sigmoid((short_memory*self.wpr1)+
                                          (input_val*self.wpr2)+
                                          self.bpr1)

        potential_memory = torch.tanh((short_memory*self.wp1)+
                                      (input_val*self.wp2)+
                                      self.bp1)
        
        upd_long_mem = (long_memory*long_mem_per + potential_mem_per*potential_memory)

        output_percent = torch.sigmoid((short_memory*self.wo1)+
                                          (input_val*self.wo2)+
                                          self.bo1)
        
        upd_short_mem = torch.tanh(upd_long_mem)*output_percent

        return ([upd_long_mem, upd_short_mem])

    def forward(self, input):
        long_memory = 0 
        short_memory = 0

        day1 = input[0]
        day2 = input[1]
        day3 = input[2]
        day4 = input[3]

        long_memory, short_memory = self.lstm_unit(day1, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day2, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day3, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day4, long_memory, short_memory)

        return short_memory 

    def configure_optimizers(self):
        return Adam(self.parameters())
        
    def training_step(self, batch, batch_idx):
        input_i, label_i = batch 
        output_i = self.forward(input_i[0])
        loss = (output_i - label_i)**2

        self.log("train_loss", loss)

        if (label_i == 0):
            self.log("out_0", output_i)
        else:
            self.log("out_1", output_i)

        return loss 

In [3]:
model = LSTM_scratch()

print("Company A: observed: 0, predicted: ", model(torch.tensor([0.,0.5,0.25,1.])).detach())
print("Company B: observed: 1, predicted: ", model(torch.tensor([1.,0.5,0.25,1.])).detach())

Company A: observed: 0, predicted:  tensor(0.2287)
Company B: observed: 1, predicted:  tensor(0.3010)


In [4]:
inputs = torch.tensor([[0.,0.5,0.25,1.], [1.,0.5,0.25,1.]])
labels = torch.tensor([0., 1.])

dataset = TensorDataset(inputs, labels)
dl = DataLoader(dataset)

In [5]:
trainer = L.Trainer(max_epochs=2000)
trainer.fit(model, train_dataloaders=dl)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3050 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Missing logger folder: D:\data\lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type | Params
--------------------------------------
  | other params | n/a  | 12    
--------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
C:\Users\Pratham\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connector

Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=2000` reached.


In [6]:
print("Company A: observed: 0, predicted: ", model(torch.tensor([0.,0.5,0.25,1.])).detach())
print("Company B: observed: 1, predicted: ", model(torch.tensor([1.,0.5,0.25,1.])).detach())

Company A: observed: 0, predicted:  tensor(5.7331e-05)
Company B: observed: 1, predicted:  tensor(0.9437)


In [8]:
path_to_best_checkpoint = trainer.checkpoint_callback.best_model_path

trainer = L.Trainer(max_epochs=2500)
trainer.fit(model, train_dataloaders=dl, ckpt_path=path_to_best_checkpoint)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at D:\data\lightning_logs\version_0\checkpoints\epoch=1999-step=4000.ckpt
C:\Users\Pratham\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:347: The dirpath has changed from 'D:\\data\\lightning_logs\\version_0\\checkpoints' to 'D:\\data\\lightning_logs\\version_1\\checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type | Params
--------------------------------------
  | other params | n/a  | 12    
--------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model param

Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=2500` reached.


In [9]:
print("Company A: observed: 0, predicted: ", model(torch.tensor([0.,0.5,0.25,1.])).detach())
print("Company B: observed: 1, predicted: ", model(torch.tensor([1.,0.5,0.25,1.])).detach())

Company A: observed: 0, predicted:  tensor(2.6849e-05)
Company B: observed: 1, predicted:  tensor(0.9582)


In [10]:
path_to_best_checkpoint = trainer.checkpoint_callback.best_model_path

trainer = L.Trainer(max_epochs=3000)
trainer.fit(model, train_dataloaders=dl, ckpt_path=path_to_best_checkpoint)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at D:\data\lightning_logs\version_1\checkpoints\epoch=2499-step=5000.ckpt
C:\Users\Pratham\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:347: The dirpath has changed from 'D:\\data\\lightning_logs\\version_1\\checkpoints' to 'D:\\data\\lightning_logs\\version_2\\checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type | Params
--------------------------------------
  | other params | n/a  | 12    
--------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model param

Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=3000` reached.


In [11]:
print("Company A: observed: 0, predicted: ", model(torch.tensor([0.,0.5,0.25,1.])).detach())
print("Company B: observed: 1, predicted: ", model(torch.tensor([1.,0.5,0.25,1.])).detach())

Company A: observed: 0, predicted:  tensor(1.3234e-05)
Company B: observed: 1, predicted:  tensor(0.9681)


* Inheriting from nn.LSTM()

In [14]:
class LSTM_lightning(L.LightningModule):
    def __init__(self):
        super().__init__()
        
        self.lstm = nn.LSTM(input_size = 1, hidden_size = 1)
        
    def forward(self, input):
        input_trans = input.view(len(input), 1)
        lstm_out, temp = self.lstm(input_trans)
        prediction = lstm_out[-1]

        return prediction

    def configure_optimizers(self):
        return Adam(self.parameters(), lr = 0.1)

    def training_step(self, batch, batch_idx):
        input_i, label_i = batch 
        output_i = self.forward(input_i[0])
        loss = (output_i - label_i)**2

        self.log("train_loss", loss)

        if (label_i == 0):
            self.log("out_0", output_i)
        else:
            self.log("out_1", output_i)

        return loss 

In [15]:
model_l = LSTM_lightning()

print("Company A: observed: 0, predicted: ", model_l(torch.tensor([0.,0.5,0.25,1.])).detach())
print("Company B: observed: 1, predicted: ", model_l(torch.tensor([1.,0.5,0.25,1.])).detach())

Company A: observed: 0, predicted:  tensor([0.3156])
Company B: observed: 1, predicted:  tensor([0.3221])


In [18]:
trainer = L.Trainer(max_epochs=300, log_every_n_steps=2)
trainer.fit(model_l, train_dataloaders=dl)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
0 | lstm | LSTM | 16    
------------------------------
16        Trainable params
0         Non-trainable params
16        Total params
0.000     Total estimated model params size (MB)


Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=300` reached.


In [19]:
print("Company A: observed: 0, predicted: ", model_l(torch.tensor([0.,0.5,0.25,1.])).detach())
print("Company B: observed: 1, predicted: ", model_l(torch.tensor([1.,0.5,0.25,1.])).detach())

Company A: observed: 0, predicted:  tensor([-0.1607])
Company B: observed: 1, predicted:  tensor([0.9873])
