<a href="https://colab.research.google.com/github/adithya-tp/PyTorch-Notebooks/blob/master/05_Basic_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Imports, and initializing an LSTM Layer for a manual forward pass***

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim

In [0]:
input_dim = 5

# the cell-state/long-term-memory, as well as the hidden-state/short-term-memory
# will have the same dimensions.
hidden_dim = 10

# the number of lstm layers stacked on top of each other.
n_layers = 1

lstm_layer = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)

# ***Setting up random input data, and seed hidden states***

In [0]:
# since the input_dim = 5 we need an input tensor of dimension (1,1,5) ==> (batch_size, sequence_length, input_dimension)

# let us now initialize the seed hidden state and cell state

batch_size = 1
seq_len = 1
inp = torch.randn(batch_size, seq_len, input_dim)
hidden_state = torch.randn(n_layers, batch_size, hidden_dim)
cell_state = torch.randn(n_layers, batch_size, hidden_dim)
hidden = (hidden_state, cell_state)

# ***Trying out forward passes by varying input dimensions***

In [5]:
# let us now feed in the input state and the hidden state tuple and examine the shapes of the returned entities
out, hidden = lstm_layer(inp, hidden)
print(out.shape)
print(hidden)

torch.Size([1, 1, 10])
(tensor([[[ 0.0979, -0.0686,  0.3446,  0.2694,  0.1738,  0.0470,  0.2571,
           0.0373,  0.0210, -0.4024]]], grad_fn=<StackBackward>), tensor([[[ 0.2328, -0.1534,  0.6161,  0.4863,  0.3863,  0.1028,  0.5925,
           0.0698,  0.0467, -0.5926]]], grad_fn=<StackBackward>))


In [6]:
# let us now increase the length of our sequence and see how this changes things
seq_len = 3
inp = torch.randn(batch_size, seq_len, input_dim)
out, hidden = lstm_layer(inp, hidden)
print(out.shape)
print(hidden)

"""
The second dimension of "out" has changed, and now corresponds to the length of the input sequence we fed into the network.
We can just feed in all the outputs in this dimension (contained in the variable out) to a fully connected layer, and
therefore get out the predictions at all previous timesteps. This could be useful in text-generation tasks (many-to-many)

For many-to-one tasks, like sentiment analysis, you can just feed in the contents of "hidden" into an fc layer.
"""


torch.Size([1, 3, 10])
(tensor([[[ 0.0101,  0.0985,  0.1424, -0.0549,  0.2571, -0.0055, -0.0843,
          -0.1322,  0.0530,  0.0386]]], grad_fn=<StackBackward>), tensor([[[ 0.0352,  0.2537,  0.2543, -0.1307,  0.7461, -0.0097, -0.1506,
          -0.1896,  0.1252,  0.0726]]], grad_fn=<StackBackward>))
