<a href="https://colab.research.google.com/github/jalaneunos/neural_network_architectures/blob/main/lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
%pip install lightning --quiet
import lightning as L
from torch.utils.data import TensorDataset, DataLoader

In [None]:
class LSTM(L.LightningModule):
  def __init__(self):
    # Initialize weight and bias tensors
    super().__init__()
    mean = torch.tensor(0.0)
    std = torch.tensor(1.0)

    self.ltm = torch.tensor(0.)
    self.stm = torch.tensor(0.)

    # Forget gate: percentage of old long term memory
    self.forget_gate_stm = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.forget_gate_input = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.forget_gate_bias = nn.Parameter(torch.tensor(0.), requires_grad=True)


    # Potential (new) long term memory
    self.input_gate_stm = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.input_gate_input = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.input_gate_bias = nn.Parameter(torch.tensor(0.), requires_grad=True)

    # Percentage of potential (new) long term memory
    self.input_gate_percent_stm = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.input_gate_percent_input = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.input_gate_percent_bias = nn.Parameter(torch.tensor(0.), requires_grad=True)

    # Percentage of potential (new) short term memory
    self.output_gate_percent_stm = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.output_gate_percent_input = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
    self.output_gate_percent_bias = nn.Parameter(torch.tensor(0.), requires_grad=True)


  def lstm_unit(self, input_value):
    forget_gate = F.sigmoid(self.stm * self.forget_gate_stm + input_value * self.forget_gate_input + self.forget_gate_bias)
    retained_ltm = self.ltm * forget_gate

    potential_ltm = F.tanh(self.stm * self.input_gate_stm + input_value * self.input_gate_input + self.input_gate_bias)
    percentage_potential_ltm =  F.sigmoid(self.stm * self.input_gate_percent_stm + input_value * self.input_gate_percent_input + self.input_gate_percent_bias)
    input_gate = potential_ltm * percentage_potential_ltm

    new_ltm = retained_ltm + input_gate

    potential_stm = F.tanh(new_ltm)
    percent_potential_stm =  F.sigmoid(self.stm * self.output_gate_percent_stm + input_value * self.input_gate_percent_input + self.input_gate_percent_bias)

    new_stm = potential_stm * percent_potential_stm

    return new_stm, new_ltm

  def forward(self, input):
    self.stm, self.ltm = torch.tensor(0.), torch.tensor(0.)
    for data in input:
      new_stm, new_ltm = self.lstm_unit(data)
      self.stm, self.ltm = new_stm, new_ltm

  def configure_optimizers(self):
    return Adam(self.parameters())

  def training_step(self, batch, label):
    self.forward(batch)
    pred = self.stm
    loss = (label - pred) ** 2
    print(f"Label: {label}, Predicted: {pred}, Loss: {loss}")

    return loss






In [None]:
model = LSTM()
model.training_step(torch.tensor([0., 0.5, .25, 1]), 0.0)

Label: 0.0, Predicted: -0.1598021239042282, Loss: 0.02553671970963478


tensor(0.0255, grad_fn=<PowBackward0>)

In [None]:
model.training_step(torch.tensor([1., 0.5, .25, 1]), 1.0)

Label: 1.0, Predicted: -0.17766152322292328, Loss: 1.386886715888977


tensor(1.3869, grad_fn=<PowBackward0>)

In [None]:
inputs = torch.tensor([0., 0.5, .25, 1])
labels = torch.tensor([0.])

In [None]:
num_epochs = 2000
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):  # replace num_epochs with the number of epochs
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        model.forward(inputs)

        # Compute loss
        pred = model.stm
        loss = (labels - pred) ** 2

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        if epoch % 100 == 0:
          print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item()}")


Epoch [0/2000], Loss: 0.02553671970963478
Epoch [100/2000], Loss: 0.011309489607810974
Epoch [200/2000], Loss: 0.005834585055708885
Epoch [300/2000], Loss: 0.0032741702161729336
Epoch [400/2000], Loss: 0.0019022937631234527
Epoch [500/2000], Loss: 0.001105964183807373
Epoch [600/2000], Loss: 0.0006265657139010727
Epoch [700/2000], Loss: 0.0003383951843716204
Epoch [800/2000], Loss: 0.00017112806381192058
Epoch [900/2000], Loss: 7.99399203970097e-05
Epoch [1000/2000], Loss: 3.418698543100618e-05
Epoch [1100/2000], Loss: 1.3321314327185974e-05
Epoch [1200/2000], Loss: 4.72071997137391e-06
Epoch [1300/2000], Loss: 1.52066695591202e-06
Epoch [1400/2000], Loss: 4.451038364550186e-07
Epoch [1500/2000], Loss: 1.182708899705176e-07
Epoch [1600/2000], Loss: 2.846194924188694e-08
Epoch [1700/2000], Loss: 6.18341511327003e-09
Epoch [1800/2000], Loss: 1.2069324428765071e-09
Epoch [1900/2000], Loss: 2.1100068914314818e-10


In [None]:
model.forward(inputs)
model.stm

tensor(-5.7258e-06, grad_fn=<MulBackward0>)