In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# Lstm takes continues inputs and generates similar outputs
input_dim = 3; hidden_dim = 15;
lstm = nn.LSTM(input_dim, hidden_dim)

In [3]:
T = 5 # time sequence (length of your input)
B = 1 # minibatching (batchsize)
inputs = [torch.randn(1, 3) for _ in range(T)]

In [4]:
inputs

[tensor([[ 0.2077, -0.2828, -1.9625]]),
 tensor([[-0.4703, -1.3428,  0.8899]]),
 tensor([[-0.3735, -0.6493, -0.5404]]),
 tensor([[0.8666, 0.2921, 1.0045]]),
 tensor([[0.3367, 0.5125, 0.2508]])]

In [5]:
inputs[0].view(1,1,-1).shape

torch.Size([1, 1, 3])

In [6]:
outputs1 = [];
# hiddens1 contains tuple (hidden, cell) and the hidden and outputs[i]
# is the same thing
hiddens1 = []; 
hidden = (torch.zeros(1,1, hidden_dim), torch.zeros(1,1, hidden_dim)) 
for i in inputs:
    out, hidden = lstm(i.view(1, 1, -1), hidden)
    outputs1.append(out)
    hiddens1.append(hidden)

In [7]:
outputs1

[tensor([[[ 0.1478,  0.2600,  0.0446, -0.0169,  0.0901,  0.2062, -0.0220,
           -0.1475,  0.0499, -0.0675,  0.1146,  0.0175, -0.0764,  0.0253,
            0.0108]]], grad_fn=<StackBackward>),
 tensor([[[ 0.0871,  0.2050, -0.1235, -0.0213,  0.1463,  0.0848, -0.0386,
            0.1302, -0.0125,  0.0522, -0.0305, -0.0091, -0.0646, -0.0845,
           -0.0519]]], grad_fn=<StackBackward>),
 tensor([[[ 0.1444,  0.2415, -0.1053, -0.0230,  0.1536,  0.1646, -0.0571,
            0.0873, -0.0293,  0.0263, -0.0114,  0.0080, -0.0810, -0.0702,
           -0.0443]]], grad_fn=<StackBackward>),
 tensor([[[ 0.1530,  0.0560, -0.1864, -0.0632,  0.1551,  0.1467, -0.0825,
            0.0973, -0.0627,  0.0573, -0.0340,  0.1303, -0.0631, -0.0486,
           -0.0139]]], grad_fn=<StackBackward>),
 tensor([[[ 0.1905,  0.0548, -0.1417, -0.0999,  0.1631,  0.1756, -0.1008,
            0.0804, -0.0914,  0.0246, -0.0198,  0.1679, -0.0693, -0.0275,
           -0.0089]]], grad_fn=<StackBackward>)]

In [8]:
hiddens1

[(tensor([[[ 0.1478,  0.2600,  0.0446, -0.0169,  0.0901,  0.2062, -0.0220,
            -0.1475,  0.0499, -0.0675,  0.1146,  0.0175, -0.0764,  0.0253,
             0.0108]]], grad_fn=<StackBackward>),
  tensor([[[ 0.2571,  0.4093,  0.0921, -0.0333,  0.2826,  0.4099, -0.0415,
            -0.2149,  0.0890, -0.1707,  0.2135,  0.0298, -0.1373,  0.0509,
             0.0251]]], grad_fn=<StackBackward>)),
 (tensor([[[ 0.0871,  0.2050, -0.1235, -0.0213,  0.1463,  0.0848, -0.0386,
             0.1302, -0.0125,  0.0522, -0.0305, -0.0091, -0.0646, -0.0845,
            -0.0519]]], grad_fn=<StackBackward>),
  tensor([[[ 0.1688,  0.3646, -0.2895, -0.0542,  0.3098,  0.1812, -0.0989,
             0.2227, -0.0218,  0.1045, -0.0584, -0.0153, -0.1405, -0.1292,
            -0.0920]]], grad_fn=<StackBackward>)),
 (tensor([[[ 0.1444,  0.2415, -0.1053, -0.0230,  0.1536,  0.1646, -0.0571,
             0.0873, -0.0293,  0.0263, -0.0114,  0.0080, -0.0810, -0.0702,
            -0.0443]]], grad_fn=<StackBackward>)

In [9]:
len(outputs1) # ths should be equal to sequence (5)
outputs1[0].shape
# You expect all the time sequence hiddens are equal
for i in range(T):
    k = (hiddens1[i][0] == outputs1[i]).sum() 
    assert k == 15

In [10]:
cat_input = torch.cat(inputs).view(len(inputs),1, -1)
# as you can see the dimension are seq_len, batch, input_size
cat_input.shape # this is what you should do in your case as well.

torch.Size([5, 1, 3])

In [11]:
hidden=(torch.zeros(1,1, hidden_dim), torch.zeros(1,1, hidden_dim))
out2, hidden2 = lstm(cat_input, hidden)

In [12]:
out2.shape # seq_len, batch, hidden_dim (5,1,128) contains all hiddens
hidden2[0].shape # 1,1,128 'caz it is only final hidden 
(hidden2[0] == out2[-1,:,:]).sum()

tensor(15)

In [13]:
# this shows us that both cases calculate the same output
(hiddens1[-1][0] == hidden2[0]).sum()

tensor(15)

In [14]:
# which one do you need in your thesis
_, hidden3 = lstm(cat_input, hidden) # you only need final hidden
_, hidden4 = lstm(cat_input)
(hidden3[0] == hidden4[0]).sum()

tensor(15)

In [15]:
# you need to reduce the dimenision into 2
# you need to check linear layer from pytorch
W = torch.randn(hidden_dim, 2)
torch.mm(hidden3[0].view(1,128), W)

RuntimeError: shape '[1, 128]' is invalid for input of size 15