[Reference](https://gist.github.com/Tushar-N/dfca335e370a2bc3bc79876e6270099e)

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

In [2]:
# Run LSTM on a batch of following 3 sequences
seqs = ['tiger', 'bear', 'dog']

In [3]:
# 1. Construct vocabulary
vocab = ['<PAD>'] + sorted(set(sum([list(seq) for seq in seqs], [])))
print(vocab)

['<PAD>', 'a', 'b', 'd', 'e', 'g', 'i', 'o', 'r', 't']


In [4]:
# 2. Load indexed data
vectorized_seqs = [[vocab.index(s) for s in seq] for seq in seqs]
print(vectorized_seqs)

[[9, 6, 5, 4, 8], [2, 4, 1, 8], [3, 7, 5]]


In [5]:
# 3. Create an LSTM
embedding = nn.Embedding(num_embeddings=len(vocab), embedding_dim=4)
lstm = nn.LSTM(input_size=4, hidden_size=5, batch_first=True)

In [6]:
# 4. Pad sequences with 0's till max lenght sequence
seq_lengths = torch.LongTensor(list(map(len, vectorized_seqs)))
print(seq_lengths)
batch_sum_seq_length = sum(seq_lengths)
print(batch_sum_seq_length)

tensor([5, 4, 3])
tensor(12)


In [7]:
# 5. Create a placeholder tensor initialized with zero
seq_tensor = Variable(torch.zeros((len(vectorized_seqs), seq_lengths.max()))).long()
print(seq_tensor)

tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]])


In [8]:
# 6. Populate placeholder with data
for idx, (seq, seq_len) in enumerate(zip(vectorized_seqs, seq_lengths)):
    seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
    
print(seq_tensor)   

tensor([[9, 6, 5, 4, 8],
        [2, 4, 1, 8, 0],
        [3, 7, 5, 0, 0]])


In [9]:
# 7. Get embeddings
embeddings = embedding(seq_tensor)
print(embeddings.data)
# Row group-0: tiger
# Row group-1: bear (last row is <PAD>)
# Row group-2: dog (last 2 rows are <PAD>)

tensor([[[-0.1774, -0.6650, -1.9646, -0.5543],
         [ 1.5618, -0.2271, -1.0775, -0.0864],
         [-1.4178,  0.2782,  0.3966,  0.3674],
         [-0.8248,  0.6535,  0.2662, -0.6617],
         [-0.2790, -1.5629, -1.4913,  1.2494]],

        [[ 0.3114,  0.4633,  0.6923,  1.6583],
         [-0.8248,  0.6535,  0.2662, -0.6617],
         [ 0.0282,  1.3601, -0.3172,  1.6066],
         [-0.2790, -1.5629, -1.4913,  1.2494],
         [-1.8080,  1.9751,  1.2385, -1.0715]],

        [[ 0.1876,  0.1366, -0.6343, -0.3525],
         [-0.5165, -0.6756,  0.9496, -0.2688],
         [-1.4178,  0.2782,  0.3966,  0.3674],
         [-1.8080,  1.9751,  1.2385, -1.0715],
         [-1.8080,  1.9751,  1.2385, -1.0715]]])


In [10]:
# 8. Use pack_padded_sequence with embeddings and sequence lengths
packed_seq = pack_padded_sequence(embeddings, seq_lengths.numpy(), batch_first=True)
print(packed_seq.batch_sizes)
print(torch.sum(packed_seq.batch_sizes))
print(packed_seq.data.shape)
print(packed_seq.data)

tensor([3, 3, 3, 2, 1], grad_fn=<PackPaddedBackward>)
tensor(12, grad_fn=<SumBackward0>)
torch.Size([12, 4])
tensor([[-0.1774, -0.6650, -1.9646, -0.5543],
        [ 0.3114,  0.4633,  0.6923,  1.6583],
        [ 0.1876,  0.1366, -0.6343, -0.3525],
        [ 1.5618, -0.2271, -1.0775, -0.0864],
        [-0.8248,  0.6535,  0.2662, -0.6617],
        [-0.5165, -0.6756,  0.9496, -0.2688],
        [-1.4178,  0.2782,  0.3966,  0.3674],
        [ 0.0282,  1.3601, -0.3172,  1.6066],
        [-1.4178,  0.2782,  0.3966,  0.3674],
        [-0.8248,  0.6535,  0.2662, -0.6617],
        [-0.2790, -1.5629, -1.4913,  1.2494],
        [-0.2790, -1.5629, -1.4913,  1.2494]], grad_fn=<PackPaddedBackward>)


**Random Output**

```
[[-0.4805,  0.7362,  0.2114,  0.2493], >>> t
 [-0.9830,  1.2551, -2.0148, -0.5703], >>> b
 [-1.4842,  0.1411, -0.5256, -0.0952], >>> d
 [ 1.8393,  0.3635, -0.0469, -0.6119], >>> i
 [-1.9377,  0.3498,  0.5100, -0.4590], >>> e
 [ 1.3814,  1.2770, -0.2290, -0.5498], >>> o
 [ 0.3315,  0.1699, -0.5243, -1.0015], >>> g (tig)
 [ 0.2993, -0.2961,  1.9194, -0.2453], >>> a
 [ 0.3315,  0.1699, -0.5243, -1.0015], >>> g (dog)
 [-1.9377,  0.3498,  0.5100, -0.4590], >>> e
 [-0.4022, -1.2878, -2.3163,  1.8120], >>> r (bear)
 [-0.4022, -1.2878, -2.3163,  1.8120]] >>> r (tiger)
```


| 3 | 3 | 3 | 2 | 1 |
|---|---|---|---|---|
| t | i | g | e | r |
| b | e | a | r |  |
| d | o | g |    |

In [11]:
# 9. LSTM forward pass
packed_output, (h, c) = lstm(packed_seq)

print(packed_output.batch_sizes)
print(packed_output.data.shape)  # 12 x LSTM output size
print(packed_output.data) 

tensor([3, 3, 3, 2, 1], grad_fn=<PackPaddedBackward>)
torch.Size([12, 5])
tensor([[ 0.0774, -0.0159,  0.1193, -0.0523, -0.0840],
        [ 0.0189,  0.0229, -0.2914, -0.1422,  0.0426],
        [ 0.0213, -0.0601,  0.0458, -0.0714, -0.0969],
        [ 0.0902,  0.0247,  0.1746, -0.0165, -0.0758],
        [-0.0566, -0.1729, -0.1680, -0.2349, -0.1319],
        [-0.0881,  0.0057, -0.2034,  0.0713,  0.0363],
        [ 0.0341, -0.0562, -0.1220, -0.1587, -0.1454],
        [ 0.0685, -0.2064, -0.3035, -0.1166, -0.1028],
        [-0.0808, -0.0834, -0.3579, -0.1623, -0.0402],
        [-0.0367, -0.1843, -0.1064, -0.1851, -0.2191],
        [ 0.1139, -0.0036, -0.2462, -0.1719, -0.0392],
        [ 0.0840,  0.0177, -0.1714, -0.1227, -0.0618]], grad_fn=<CatBackward>)


In [12]:
# 10. Pad packed sequence
output, input_sizes = pad_packed_sequence(packed_output, batch_first=True)
print(input_sizes)
print(output.data)

tensor([5, 4, 3])
tensor([[[ 0.0774, -0.0159,  0.1193, -0.0523, -0.0840],
         [ 0.0902,  0.0247,  0.1746, -0.0165, -0.0758],
         [ 0.0341, -0.0562, -0.1220, -0.1587, -0.1454],
         [-0.0367, -0.1843, -0.1064, -0.1851, -0.2191],
         [ 0.0840,  0.0177, -0.1714, -0.1227, -0.0618]],

        [[ 0.0189,  0.0229, -0.2914, -0.1422,  0.0426],
         [-0.0566, -0.1729, -0.1680, -0.2349, -0.1319],
         [ 0.0685, -0.2064, -0.3035, -0.1166, -0.1028],
         [ 0.1139, -0.0036, -0.2462, -0.1719, -0.0392],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

        [[ 0.0213, -0.0601,  0.0458, -0.0714, -0.0969],
         [-0.0881,  0.0057, -0.2034,  0.0713,  0.0363],
         [-0.0808, -0.0834, -0.3579, -0.1623, -0.0402],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]])


In [13]:
scores, _ = lstm(embeddings)
print(scores.shape)

torch.Size([3, 5, 5])
