In [69]:
!pip install torch torchvision
import torch
import torch.nn as nn



In [0]:
import numpy as np
import torch.nn.functional as F
import torch.optim as optim

In [86]:

lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5
print(inputs)
# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
print(type(inputs))
print(inputs.shape)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)


[tensor([[ 0.8136, -0.3280, -0.1842]]), tensor([[-1.1311,  0.5439,  0.8717]]), tensor([[-0.8104, -0.1274,  1.0096]]), tensor([[-0.6895,  1.2085,  0.2931]]), tensor([[-0.5156,  0.9553, -0.1221]])]
<class 'torch.Tensor'>
torch.Size([5, 1, 3])
tensor([[[-0.0946, -0.2480,  0.0497]],

        [[-0.1163, -0.2648,  0.1524]],

        [[-0.1250, -0.3568,  0.1806]],

        [[-0.1603, -0.2008,  0.2990]],

        [[-0.1486, -0.1708,  0.3294]]], grad_fn=<StackBackward>)
(tensor([[[-0.1486, -0.1708,  0.3294]]], grad_fn=<StackBackward>), tensor([[[-0.5942, -0.4366,  0.4370]]], grad_fn=<StackBackward>))


In [0]:
class LSTM(nn.Module):
 
    def __init__(self, input_dim, hidden_dim, batch_size, output_dim=1,num_layers=2):
        super(LSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.num_layers = num_layers
 
        # Define the LSTM layer
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
        # Initialise hidden state
        # Don't do this if you want your LSTM to be stateful
        self.hidden = self.init_hidden()
        # Define the output layer
        self.linear = nn.Linear(self.hidden_dim, output_dim)
 
    def init_hidden(self):
        # This is what we'll initialise our hidden state as
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
 
    def forward(self, input):
        # Forward pass through LSTM layer
        # shape of lstm_out: [input_size, batch_size, hidden_dim]
        # shape of self.hidden: (a, b), where a and b both 
        # have shape (num_layers, batch_size, hidden_dim).
        lstm_out, self.hidden = self.lstm(input.view(len(input), self.batch_size, -1),self.hidden)
        
        # Only take the output from the final timetep
        # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
        y_pred = self.linear(lstm_out[-1].view(self.batch_size, -1))
        return y_pred.view(-1)
 


In [0]:
input_dim = 5
hidden_dim = 5
output_dim = 1
num_layers = 1
batch_size = 1
learning_rate = 0.001
num_epochs = 1

model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, batch_size=batch_size, output_dim=output_dim, num_layers=num_layers)

In [74]:
X_train = torch.tensor([[[1,2,3,4,5],
           [2,3,4,5,6],
           [0,1,2,3,4]]])
y_pred = torch.tensor([6,7,5])
print(X_train.shape)
print(y_pred.shape)

torch.Size([1, 3, 5])
torch.Size([3])


In [77]:

loss_fn = torch.nn.MSELoss(size_average=False)
 
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
 
#####################
# Train model
#####################
 
hist = np.zeros(num_epochs)
 
for t in range(num_epochs):
    # Clear stored gradient
    model.zero_grad()
    
    
    
    # Forward pass
    y_pred = model(X_train)
 
    loss = loss_fn(y_pred, y_train)
    if t % 100 == 0:
        print("Epoch ", t, "MSE: ", loss.item())
    hist[t] = loss.item()
 
    # Zero out gradient, else they will accumulate between epochs
    optimiser.zero_grad()
 
    # Backward pass
    loss.backward()
 
    # Update parameters
    optimiser.step()




RuntimeError: ignored