# Lab 11-6. RNN PackedSequence

In [1]:
import torch
import numpy as np
from torch.nn.utils.rnn import pad_sequence, pack_sequence, pack_padded_sequence, pad_packed_sequence

**Sample Data**

In [4]:
data = ['hello world',
        'midnight',
        'calculation',
        'path',
        'short circuit'] # maximum length = 13

char_set = [''] + list(set(char for seq in data for char in seq))
char2idx = {char: idx for idx, char in enumerate(char_set)}

print('char_set :', char_set)
print('char_set lenght :', len(char_set))

char_set : ['', 'w', 'c', 'n', 'a', 'g', 'h', ' ', 'd', 'e', 'o', 'l', 'r', 'm', 'i', 'u', 'p', 't', 's']
char_set lenght : 19


In [6]:
X = [torch.LongTensor([char2idx[char] for char in seq]) for seq in data]

for sequence in X:
    print(sequence)

tensor([ 6,  9, 11, 11, 10,  7,  1, 10, 12, 11,  8])
tensor([13, 14,  8,  3, 14,  5,  6, 17])
tensor([ 2,  4, 11,  2, 15, 11,  4, 17, 14, 10,  3])
tensor([16,  4, 17,  6])
tensor([18,  6, 10, 12, 17,  7,  2, 14, 12,  2, 15, 14, 17])


In [7]:
lengths = [len(seq) for seq in X]
print('lengths:', lengths)

lengths: [11, 8, 11, 4, 13]


In [8]:
# Make a Tensor of shape (Batch x Maximum_Sequence_Length)
padded_sequence = pad_sequence(X, batch_first=True) # X is now padded sequence
print(padded_sequence)
print(padded_sequence.shape)

tensor([[ 6,  9, 11, 11, 10,  7,  1, 10, 12, 11,  8,  0,  0],
        [13, 14,  8,  3, 14,  5,  6, 17,  0,  0,  0,  0,  0],
        [ 2,  4, 11,  2, 15, 11,  4, 17, 14, 10,  3,  0,  0],
        [16,  4, 17,  6,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [18,  6, 10, 12, 17,  7,  2, 14, 12,  2, 15, 14, 17]])
torch.Size([5, 13])


In [10]:
# sort by descending lenghts
sorted_idx = sorted(range(len(lengths)), key=lengths.__getitem__, reverse=True)
sorted_X   = [X[idx] for idx in sorted_idx]

#check converted result
for sequence in sorted_X:
    print(sequence)

tensor([18,  6, 10, 12, 17,  7,  2, 14, 12,  2, 15, 14, 17])
tensor([ 6,  9, 11, 11, 10,  7,  1, 10, 12, 11,  8])
tensor([ 2,  4, 11,  2, 15, 11,  4, 17, 14, 10,  3])
tensor([13, 14,  8,  3, 14,  5,  6, 17])
tensor([16,  4, 17,  6])


In [11]:
# make packed sequence
packed_sequence = pack_sequence(sorted_X)
print(packed_sequence)

PackedSequence(data=tensor([18,  6,  2, 13, 16,  6,  9,  4, 14,  4, 10, 11, 11,  8, 17, 12, 11,  2,
         3,  6, 17, 10, 15, 14,  7,  7, 11,  5,  2,  1,  4,  6, 14, 10, 17, 17,
        12, 12, 14,  2, 11, 10, 15,  8,  3, 14, 17]), batch_sizes=tensor([5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 1, 1]), sorted_indices=None, unsorted_indices=None)


**Apply One-hot Character Embedding**

In [12]:
# one-hot embedding using PaddedSequence
eye = torch.eye(len(char_set)) # Identity matrix of shape (len(char_set), len(char_set))
embedded_tensor = eye[padded_sequence]

In [17]:
# one-hot embedding using PackedSequence
embedded_packed_seq = pack_sequence([eye[X[idx]] for idx in sorted_idx])
print(embedded_packed_seq.data.shape)

torch.Size([47, 19])


### Build RNN Model

In [14]:
# declare RNN
rnn = torch.nn.RNN(input_size=len(char_set), hidden_size=30, batch_first=True)

In [15]:
rnn_output, hidden = rnn(embedded_tensor)
print(rnn_output.shape) # shape: (batch_size, max_seq_length, hidden_size)
print(hidden.shape)     # shape: (num_layers * num_directions, batch_size, hidden_size)

torch.Size([5, 13, 30])
torch.Size([1, 5, 30])


In [18]:
rnn_output, hidden = rnn(embedded_packed_seq)
print(rnn_output.data.shape)
print(hidden.data.shape)

torch.Size([47, 30])
torch.Size([1, 5, 30])


**pad_packed_sequence**

[`torch.nn.utils.rnn.pad_packed_sequence` documentation](https://pytorch.org/docs/stable/generated/torch.nn.utils.rnn.pad_packed_sequence.html)

In [19]:
# pad_packed_sequence
    # function that make packedSequence into paddedSequence(Tensor)
unpacked_sequence, seq_lengths = pad_packed_sequence(embedded_packed_seq, batch_first=True)
print(unpacked_sequence.shape)
print(seq_lengths)

torch.Size([5, 13, 19])
tensor([13, 11, 11,  8,  4])


In [21]:
embedded_padded_sequence = eye[pad_sequence(sorted_X, batch_first=True)]
print(embedded_padded_sequence.shape)

torch.Size([5, 13, 19])


**pack_padded_sequence**

[`torch.nn.utils.rnn.pack_padded_sequence` documentation](https://pytorch.org/docs/stable/generated/torch.nn.utils.rnn.pack_padded_sequence.html)

In [22]:
sorted_lengths = sorted(lengths, reverse=True)
new_packed_sequence = pack_padded_sequence(embedded_padded_sequence, sorted_lengths, batch_first=True)
print(new_packed_sequence.data.shape)
print(new_packed_sequence.batch_sizes)

torch.Size([47, 19])
tensor([5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 1, 1])
