In [1]:
# Installation Long Shhort-Term Memory with PyTorch + Lightning
import torch
import torch.nn as nn
import torch.nn.functional as f
from torch.optim import Adam
import lightning as l

In [7]:
class LightningLSTMByHand(l.LightningModule):
    def __init__(self):
        super().__init__()
        mean = torch.tensor(0.0)
        std = torch.tensor(1.0)
        
        self.wlr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wlr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.blr1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

        self.wpr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wpr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bpr1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

        self.wp1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wp2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bp1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

        self.wo1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wo2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bo1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

    def lstm_unit(self, input_value, long_memory, short_memory):
        # Forget gate
        long_remember_percent = torch.sigmoid((short_memory * self.wlr1) + (input_value * self.wlr2) + self.blr1)
        forget_long_memory = long_memory * long_remember_percent
        
        # Input gate
        potential_remember_percent = torch.sigmoid((short_memory * self.wpr1) + (input_value * self.wpr2) + self.bpr1)
        potential_memory = torch.tanh((short_memory * self.wp1) + (input_value * self.wp2) + self.bp1)
        updated_long_memory = forget_long_memory + (potential_memory * potential_remember_percent)

        # Output gate
        output_percent = torch.sigmoid((short_memory * self.wo1) + (input_value * self.wo2) + self.bo1)
        updated_short_memory = torch.tanh(updated_long_memory) * output_percent

        return updated_long_memory, updated_short_memory

    def forward(self, input_sequence):
        long_memory = torch.tensor(0.0)
        short_memory = torch.tensor(0.0)

        for t in range(len(input_sequence)):
            input_value = input_sequence[t]
            long_memory, short_memory = self.lstm_unit(input_value, long_memory, short_memory)

        return short_memory

    def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.01)

    def training_step(self, batch, batch_idx):
        input_sequence, label = batch
        prediction = self(input_sequence[0])
        loss = (prediction - label) ** 2
        self.log("train_loss", loss.mean(), prog_bar=True)
        return loss


In [9]:
model = LightningLSTMByHand()
print("\nNow let's compare the observed and predicted values...")
print("Company A: Observed = 0, Predicted = ", model(torch.tensor([0., 0.5, 0.25, 1.])).detach())


Now let's compare the observed and predicted values...
Company A: Observed = 0, Predicted =  tensor(0.1177)


In [11]:
from torch.utils.data import TensorDataset, DataLoader
# Single sequence dataset (one company only)
inputs = torch.tensor([[0.0, 0.5, 0.25, 1.0]], dtype=torch.float32)  # One sequence
labels = torch.tensor([1.0], dtype=torch.float32)  # Single output

# Prepare dataset and dataloader
dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset, batch_size=1, num_workers=0, persistent_workers=False)

In [13]:
from lightning.pytorch import Trainer

# Initialize the model
model = LightningLSTMByHand()

# Initialize the Trainer
trainer = Trainer(max_epochs=500, log_every_n_steps=1)

# Train the model
print("Starting training...")
trainer.fit(model, train_dataloaders=dataloader)
print("Training finished!")


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Starting training...



  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 12     | n/a 
---------------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
D:\anaconda\src\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |                                                                                      | 0/? [00:00<…

`Trainer.fit` stopped: `max_epochs=500` reached.


Training finished!
