# Simple LSTM

In [None]:
##########################################################################################################
# SOME REFERENCES: https://medium.com/towards-data-science/whats-happening-in-my-lstm-layer-dd8110ecc52f #
##########################################################################################################

#################################################################################
# SOME REFERENCES: https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html #
#################################################################################

# For each layer in your LSTM — the number of cells is equal to the size of your window.

import torch
import torch.nn as nn

""" Class for a simple LSTM. """
class MyLSTM(nn.Module):
    """ Initialize configurations. """
    def __init__(self, input_size, hidden_size, num_layers, output_size, device, bidirectional=False):
        super(MyLSTM, self).__init__()
        # number of features of x
        self.input_size = input_size
        # number of features in the hidden state h
        self.hidden_size = hidden_size
        # number of recurrent layers
        self.num_layers = num_layers
        # number of output neurons of linear layer
        self.output_size = output_size
        # D parameter
        self.directions = 2 if bidirectional else 1
        # device
        self.device = device

        # lstm-architecture:
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True).to(device)

        # linear-layer
        self.fc1 = nn.Sequential(
            # we take the last cell outputs with shape (batch_size, D * hidden_size)
            nn.Linear(self.directions * self.hidden_size, self.output_size),
            # nn.ReLU(inplace=True)
        )

    """ Method used to define the forward pass of the input through the network during the training. """
    def forward(self, x):
        # input shape (batch_size, sequence_length, number_features) when batch_first=True
        batch_size = x.size(0)
        # (D ∗ num_layers, batch_size, hidden_size)
        h_0 = torch.zeros((self.directions * self.num_layers,
                           batch_size, self.hidden_size)).to(self.device)
        c_0 = torch.zeros((self.directions * self.num_layers,
                           batch_size, self.hidden_size)).to(self.device)

        # output-shape (batch_size, sequence_lenght, D * hidden_size)
        # h_n-shape (D * num_layers, batch_size, hidden_size)
        # c_n-shape (D * num_layers, batch_size, hidden_size)
        output, (h_n, c_n) = self.lstm(x, (h_0, c_0))

        print("\n", output.shape, h_n.shape, c_n.shape)
        print("\n", output, "\n\n", h_n, "\n\n", c_n)

        # takes the last cell outputs of all batches
        print("\nlast cell outputs shape: \n", output[:, -1, :].shape)
        print("\nlast cell outputs: \n", output[:, -1, :])

        lin_out = self.fc1(output[:, -1, :])

        print(f"\nlin-out-shape: {lin_out}")


""" Runs the simulation. """
if __name__ == "__main__":

    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print(f"device: {device}")

    x = torch.randn((2, 6, 6))
    print(f"\nx-shape: {x.shape}")

    num_features = int(x.shape[2])

    model = MyLSTM(input_size=num_features,
                   hidden_size=6,
                   num_layers=1,
                   output_size=1,
                   device=device)

    out = model(x)

##################
# output example #
##################

# device: cpu

# x-shape: torch.Size([2, 6, 6])

# torch.Size([2, 6, 6]) torch.Size([1, 2, 6]) torch.Size([1, 2, 6])

# tensor([[[ 0.0670,  0.0398,  0.2406, -0.2882,  0.0184, -0.1168],
#          [-0.0813,  0.2598,  0.3303, -0.1901,  0.1681, -0.1573],
#          [-0.0013,  0.0993,  0.3154, -0.2576,  0.2350, -0.2473],
#          [ 0.1037,  0.0869,  0.3699, -0.1835,  0.0433, -0.3695],
#          [ 0.1005,  0.2060,  0.3575, -0.1126,  0.0005, -0.3399],
#          [ 0.0111,  0.1685,  0.4348, -0.2917,  0.1025, -0.2308]],

#         [[ 0.2161, -0.0216,  0.1619, -0.0560, -0.1214, -0.2652],
#          [ 0.1342,  0.1155,  0.3858, -0.1652, -0.1072, -0.2497],
#          [-0.0412,  0.2684,  0.1980, -0.1152,  0.2417, -0.0435],
#          [-0.4885,  0.4563, -0.0637, -0.1715,  0.2013,  0.0407],
#          [-0.1746,  0.2125,  0.0317, -0.2900,  0.3504, -0.0428],
#          [-0.3115,  0.3664, -0.0838, -0.2712,  0.2742, -0.0829]]],
#        grad_fn=<TransposeBackward0>)

# tensor([[[ 0.0111,  0.1685,  0.4348, -0.2917,  0.1025, -0.2308],
#          [-0.3115,  0.3664, -0.0838, -0.2712,  0.2742, -0.0829]]],
#        grad_fn=<StackBackward0>)

# tensor([[[ 0.0251,  0.3550,  0.9562, -0.6240,  0.2229, -0.6161],
#          [-0.4754,  0.7592, -0.2685, -0.7482,  0.9308, -0.2028]]],
#        grad_fn=<StackBackward0>)

# last cell outputs shape: torch.Size([2, 6])

# last cell outputs:
# tensor([[ 0.0111,  0.1685,  0.4348, -0.2917,  0.1025, -0.2308],
#         [-0.3115,  0.3664, -0.0838, -0.2712,  0.2742, -0.0829]],
#        grad_fn=<SliceBackward0>)

# lin-out-shape:
# tensor([[0.2285],
#         [0.3122]], grad_fn=<ReluBackward0>)


# Encoder-Decoder LSTM

In [None]:
##########################################################################################################
# SOME REFERENCES: https://medium.com/towards-data-science/whats-happening-in-my-lstm-layer-dd8110ecc52f #
##########################################################################################################

#################################################################################
# SOME REFERENCES: https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html #
#################################################################################

# For each layer in your LSTM — the number of cells is equal to the size of your window.

import torch
import torch.nn as nn

""" Class form LSTM in Autoencoder configuration. """
class EncoderDecoderLSTM(nn.Module):
    """ Initialize configurations. """
    def __init__(self, enc_input_size, dec_input_size, enc_hidden_size, dec_hidden_size, num_layers, device, bidirectional=False):
        super(EncoderDecoderLSTM, self).__init__()
        # the number of expected features in the input x
        self.enc_input_size = enc_input_size
        # the number of expected features in the output of the encoder
        self.dec_input_size = dec_input_size
        # the number of features in the hidden state h of the encoder
        self.enc_hidden_size = enc_hidden_size
        # the number of features in the hidden state h of the decoder
        self.dec_hidden_size = dec_hidden_size
        # number of recurrent layers
        self.num_layers = num_layers
        # if true becomes a bidirectional LSTM
        D = 2 if bidirectional else 1
        self.directions = D
        # device where to put tensors
        self.device = device

        # lstm-architecture
        self.lstm_encoder = nn.LSTM(input_size=enc_input_size,
                                    hidden_size=enc_hidden_size,
                                    num_layers=num_layers,
                                    batch_first=True)
        # lstm-architecture
        self.lstm_decoder = nn.LSTM(input_size=dec_input_size,
                                    hidden_size=dec_hidden_size,
                                    num_layers=num_layers,
                                    batch_first=True)

    """ Method used to define the forward pass of the input through the network during the training. """
    def forward(self, x):
        # # ENCODER 
        """ input shape (batch_size, sequence_length, number_features) when batch_first=True """
        # print(f"\ninput-shape: \n{x.shape}")
        # print(f"\ninput: \n{x}")

        batch_size = x.size(0)
        """ (D ∗ num_layers, batch_size, hidden_size) """
        h_0 = torch.zeros((self.directions * self.num_layers,
                           batch_size, self.enc_hidden_size)).to(self.device)
        c_0 = torch.zeros((self.directions * self.num_layers,
                           batch_size, self.enc_hidden_size)).to(self.device)

        """ output-shape (batch_size, sequence_lenght, D * hidden_size)
        h_n-shape (D * num_layers, batch_size, hidden_size)
        c_n-shape (D * num_layers, batch_size, hidden_size) """
        enc_output, (h_n, c_n) = self.lstm_encoder(x, (h_0, c_0))

        # print(f"\nh_n-shape: \n{h_n.shape}")
        # print(f"\nh_n: \n{h_n}")
        # print(f"\nc_n-shape: \n{c_n.shape}")
        # print(f"\nc_n: \n{c_n}")

        # # DECODER
        # print(f"\nenc-output-shape: \n{enc_output.shape}")
        # print(f"\nenc-output: \n{enc_output}")

        batch_size = enc_output.size(0)
        """ (D ∗ num_layers, batch_size, hidden_size) """
        h_0 = torch.zeros((self.directions * self.num_layers,
                           batch_size, self.dec_hidden_size)).to(self.device)
        c_0 = torch.zeros((self.directions * self.num_layers,
                           batch_size, self.dec_hidden_size)).to(self.device)

        """ output-shape (batch_size, sequence_lenght, D * hidden_size)
        h_n-shape (D * num_layers, batch_size, hidden_size)
        c_n-shape (D * num_layers, batch_size, hidden_size) """
        dec_output, (_, _) = self.lstm_decoder(enc_output, (h_0, c_0))

        # print(f"\ndec-output-shape: \n{dec_output.shape}")
        # print(f"\ndec-output: \n{dec_output}")

        return enc_output, dec_output


""" Runs the simulation. """
if __name__ == "__main__":
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print(f"\ndevice: \n{device}")

    batch_size = 1 # 2
    seq_len = 5
    enc_input_size = 18
    dec_input_size = 5
    enc_hidden_size = 5
    dec_hidden_size = 18
    num_layers = 1
    x = torch.rand((batch_size, seq_len, num_features))

    # model definition
    model = EncoderDecoderLSTM(enc_input_size=enc_input_size, 
                               dec_input_size= dec_input_size,
                               enc_hidden_size=enc_hidden_size, 
                               dec_hidden_size=dec_hidden_size, 
                               num_layers=num_layers, 
                               device=device)        
    # model output
    enc_output, dec_output = model(x)

    


    # # other dimensions
    # batch_size = 2  # 32
    # seq_len = 5
    # enc_input_size = 18
    # dec_input_size = 18 #5
    # enc_hidden_size = 18
    # dec_hidden_size = 18
    # num_layers = 1


device: 
cpu


In [None]:
import torch

torch.manual_seed(0)

import torch

# Create the input tensors
matrix = torch.rand(32, 5, 5)  # Shape: [32, 5, 5]
vector = torch.rand(32, 5)  # Shape: [32, 5]

# Expand dimensions of the vector to match the shape of the matrix
expanded_vector = vector.unsqueeze(2)  # Shape: [32, 5, 1]

# Perform the dot product using broadcasting: batched matrix x batched matrix
result = torch.matmul(matrix, expanded_vector)  # Shape: [32, 5, 1]

# Remove the extra dimension from the result
result = result.squeeze(2)  # Shape: [32, 5]

print(result.shape)  # Shape of the resulting tensor

torch.Size([32, 5])
