In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch import LongTensor

In [None]:
data = ["long_str", "tiny", "medium"]

In [None]:
# create a vocab
vocab = ['<pad>'] + sorted(set([char for seq in data for char in seq]))
vocab

['<pad>', '_', 'd', 'e', 'g', 'i', 'l', 'm', 'n', 'o', 'r', 's', 't', 'u', 'y']

In [None]:
vectorized_data = [[vocab.index(tok) for tok in seq]for seq in data]
vectorized_data

[[6, 9, 8, 4, 1, 11, 12, 10], [12, 5, 8, 14], [7, 3, 2, 5, 13, 7]]

In [None]:
# pad with 0
seq_lengths = LongTensor([len(seq) for seq in vectorized_data])
sequence_tensor = torch.zeros(len(vectorized_data), seq_lengths.max(), dtype=torch.long)

for idx, (seq, seq_len) in enumerate(zip(vectorized_data, seq_lengths)):
    sequence_tensor[idx, :seq_len] = LongTensor(seq)

sequence_tensor

tensor([[ 6,  9,  8,  4,  1, 11, 12, 10],
        [12,  5,  8, 14,  0,  0,  0,  0],
        [ 7,  3,  2,  5, 13,  7,  0,  0]])

In [None]:
sequence_tensor = sequence_tensor.t()
sequence_tensor.shape

torch.Size([8, 3])

In [None]:
input_dim = len(vocab)
print(f"length of vocab: {input_dim}")

hidden_dim = 5
embedding_dim = 5

length of vocab: 15


In [None]:
# Single Layer RNN Unidirectional
class SingleLayerUnidirectionalRNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, n_layers, bidirectional):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional)

    def forward(self, input):
        # input: (max_len, batch_size)
        embed = self.embedding(input)

        # embed: (max_len, batch_size, embedding_dim)
        output, hidden = self.rnn(embed)

        # output: (max_len, batch_size, hidden_size)
        # hidden: (1, batch_size, hidden_size)

        return output, hidden

In [None]:
n_layers = 1
bidirectional = False
model = SingleLayerUnidirectionalRNN(input_dim, embedding_dim, hidden_dim, n_layers, bidirectional)
output, hidden = model(sequence_tensor)

print(f"Input shape is : {sequence_tensor.shape}")
print(f"Output shape is : {output.shape}")
print(f"Hidden shape is : {hidden.shape}")

Input shape is : torch.Size([8, 3])
Output shape is : torch.Size([8, 3, 5])
Hidden shape is : torch.Size([1, 3, 5])


In [None]:
output

tensor([[[ 0.0466, -0.3719,  0.5914,  0.4617,  0.1719],
         [ 0.0078,  0.0540,  0.6547,  0.5201,  0.4642],
         [ 0.5363, -0.8247,  0.2313, -0.5295,  0.0374]],

        [[-0.6854, -0.0672,  0.8767,  0.2140,  0.6724],
         [-0.1483, -0.3017,  0.5405,  0.3033, -0.0147],
         [ 0.8267, -0.3039,  0.3077,  0.2298,  0.3711]],

        [[-0.1009, -0.7167,  0.7196, -0.6920,  0.3840],
         [ 0.2807, -0.5539,  0.7004, -0.6210,  0.5526],
         [-0.2302, -0.0485,  0.9598,  0.1026,  0.9369]],

        [[-0.8246,  0.1829,  0.9302,  0.6966,  0.8650],
         [-0.7667, -0.8195,  0.5083,  0.3231,  0.0710],
         [-0.4026, -0.5183,  0.5398,  0.3299, -0.0907]],

        [[-0.5295, -0.3372,  0.7480, -0.2393,  0.1420],
         [ 0.7092, -0.1942, -0.4517, -0.0866, -0.6800],
         [-0.2052, -0.5429,  0.4913, -0.6004,  0.2306]],

        [[-0.6980, -0.8168,  0.7027, -0.6641,  0.4806],
         [ 0.8501, -0.1405, -0.1164,  0.6665, -0.2095],
         [ 0.4164, -0.8782,  0.1272, -

In [None]:
hidden[0]

tensor([[ 0.0594, -0.4546,  0.3126, -0.1147, -0.1512],
        [ 0.8854, -0.2267, -0.0115,  0.4717, -0.2422],
        [ 0.8882, -0.0926, -0.1562,  0.4026, -0.3135]],
       grad_fn=<SelectBackward>)

In [None]:
class MultiLayerUnidirectionalRNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, n_layers, bidirectional):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional)

    def forward(self, input):
        # input: (max_len, batch_size)
        embed = self.embedding(input)

        # embed: (max_len, batch_size, embedding_dim)

        output, hidden = self.rnn(embed)

        # optput: (max_len, batch_size, hidden_dim)
        # hidden: (num_layers, batch_size, hidden_size)

        return output, hidden

In [None]:
n_layers = 2
bidirectional = False
model = MultiLayerUnidirectionalRNN(input_dim, embedding_dim, hidden_dim, n_layers, bidirectional)
output, hidden = model(sequence_tensor)

print(f"Input shape is : {sequence_tensor.shape}")
print(f"Output shape is : {output.shape}")
print(f"Hidden shape is : {hidden.shape}")

Input shape is : torch.Size([8, 3])
Output shape is : torch.Size([8, 3, 5])
Hidden shape is : torch.Size([2, 3, 5])


In [None]:
output

tensor([[[ 0.3962, -0.0827, -0.1869, -0.3616, -0.1239],
         [ 0.2613, -0.0506, -0.3532, -0.1863,  0.4519],
         [ 0.3169, -0.0741, -0.2440, -0.3073,  0.3363]],

        [[ 0.4915,  0.5245, -0.7064, -0.5213,  0.0417],
         [ 0.7357, -0.4159,  0.1277, -0.1892,  0.2926],
         [ 0.5409,  0.0034, -0.2457, -0.4074,  0.4711]],

        [[ 0.7653, -0.2309, -0.2230, -0.2830, -0.4220],
         [ 0.7691,  0.2739, -0.3700, -0.6018,  0.1591],
         [ 0.4161,  0.2183, -0.4183, -0.4646,  0.4102]],

        [[ 0.8034,  0.2950, -0.4150, -0.7333, -0.3047],
         [ 0.7307,  0.1854, -0.5142, -0.4657,  0.2054],
         [ 0.7521, -0.3472,  0.0093, -0.2654,  0.3317]],

        [[ 0.7531,  0.4677, -0.6864, -0.6629, -0.3915],
         [ 0.7217,  0.1843, -0.4873, -0.4060,  0.0701],
         [ 0.7865,  0.0918, -0.2145, -0.5771,  0.4320]],

        [[ 0.8253,  0.2184, -0.6393, -0.4298,  0.0181],
         [ 0.7312,  0.2092, -0.5019, -0.4286, -0.0032],
         [ 0.5978,  0.2181, -0.4748, -

In [None]:
output[-1, :, :]

tensor([[ 0.6076,  0.0086, -0.2606, -0.4009, -0.0324],
        [ 0.7553,  0.2353, -0.5373, -0.4538, -0.0777],
        [ 0.6962,  0.1993, -0.4628, -0.4153,  0.0994]],
       grad_fn=<SliceBackward>)

In [None]:
hidden

tensor([[[-0.5130,  0.3425,  0.4363,  0.6125,  0.2856],
         [-0.5536, -0.5419,  0.2596,  0.6774, -0.3151],
         [-0.5413, -0.5923,  0.2885,  0.6548, -0.2219]],

        [[ 0.6076,  0.0086, -0.2606, -0.4009, -0.0324],
         [ 0.7553,  0.2353, -0.5373, -0.4538, -0.0777],
         [ 0.6962,  0.1993, -0.4628, -0.4153,  0.0994]]],
       grad_fn=<StackBackward>)

In [None]:
hidden[-1]

tensor([[ 0.6076,  0.0086, -0.2606, -0.4009, -0.0324],
        [ 0.7553,  0.2353, -0.5373, -0.4538, -0.0777],
        [ 0.6962,  0.1993, -0.4628, -0.4153,  0.0994]],
       grad_fn=<SelectBackward>)