In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
# our vocab.
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]

## Breaking up a single slice of bread and feeding it in to the RNN

In [3]:
torch.manual_seed(50)
cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)
# hidden vector dims [num_layers * num_directions, batch_size, hidden_size]
hidden = Variable(torch.randn(1, 1, 2))

"""
Here's something to remember what the input size is.
What is the smallest unit / token in your (dataset) / (task you're trying to solve) ?
Well here, it's a letter.
Well then, the length of the numerical representation of that unit is the input size.
"""
# input dimensions [batch_size, seq_len, input_size]
inputs = Variable(torch.Tensor([h, e, l, l, o]))
inputs

tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

In [4]:
# Here we are slicing up a single slice of bread row by row and feeding that to the LSTM.
for one in inputs:
    one = one.view(1, 1, -1)
    out, hidden = cell(one, hidden)
    print("out: {}".format(out))
    print("hidden: {}".format(hidden))
    print("one input size: {}, out size: {}".format(one.size(), out.size()), end="\n\n")

out: tensor([[[-0.5393,  0.9402]]], grad_fn=<TransposeBackward1>)
hidden: tensor([[[-0.5393,  0.9402]]], grad_fn=<StackBackward>)
one input size: torch.Size([1, 1, 4]), out size: torch.Size([1, 1, 2])

out: tensor([[[0.0814, 0.3760]]], grad_fn=<TransposeBackward1>)
hidden: tensor([[[0.0814, 0.3760]]], grad_fn=<StackBackward>)
one input size: torch.Size([1, 1, 4]), out size: torch.Size([1, 1, 2])

out: tensor([[[0.2418, 0.5283]]], grad_fn=<TransposeBackward1>)
hidden: tensor([[[0.2418, 0.5283]]], grad_fn=<StackBackward>)
one input size: torch.Size([1, 1, 4]), out size: torch.Size([1, 1, 2])

out: tensor([[[0.3517, 0.4476]]], grad_fn=<TransposeBackward1>)
hidden: tensor([[[0.3517, 0.4476]]], grad_fn=<StackBackward>)
one input size: torch.Size([1, 1, 4]), out size: torch.Size([1, 1, 2])

out: tensor([[[-0.1629,  0.4402]]], grad_fn=<TransposeBackward1>)
hidden: tensor([[[-0.1629,  0.4402]]], grad_fn=<StackBackward>)
one input size: torch.Size([1, 1, 4]), out size: torch.Size([1, 1, 2])



## We can do better: Let's feed the whole slice of bread in

In [5]:
torch.manual_seed(50)
cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)
hidden = Variable(torch.randn(1, 1, 2))

In [6]:
# However, we can send in this single slice of bread at once into the rnn,
# and get the corresponding output tensor for the entire slice.
inputs = inputs.view(1, 5, -1)
out, hidden = cell(inputs, hidden)
print("out: {}".format(out))
print("hidden: {}".format(hidden))
print("sequence input size", inputs.size(), "out size", out.size())

out: tensor([[[-0.5393,  0.9402],
         [ 0.0814,  0.3760],
         [ 0.2418,  0.5283],
         [ 0.3517,  0.4476],
         [-0.1629,  0.4402]]], grad_fn=<TransposeBackward1>)
hidden: tensor([[[-0.1629,  0.4402]]], grad_fn=<StackBackward>)
sequence input size torch.Size([1, 5, 4]) out size torch.Size([1, 5, 2])


## Heck, lets feed in an entire loaf of bread dawgg!
[Just make sure your RNN has made arrangements to accomodate the same.]

In [7]:
# "Thats not good enough... We have to go deeper..."
"""
We fed in the single slice of bread to get the corresponding "hidden" version of the bread slice.
Now, we can feed in multiple bread slices to make things go even faster!! Isn't that cool?
We must initialize our hidden state such that it can accept multiple slices of data.
"""

torch.manual_seed(50)
cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)
hidden = Variable(torch.randn(1, 3, 2))
inputs = Variable(torch.Tensor([[h, e, l, l, o],
                                [e, o, l, l, l],
                                [l, l, e, e, l]]))

out, hidden = cell(inputs, hidden)
print("out: {}".format(out))
print("hidden: {}".format(hidden))
print("sequence input size", inputs.size(), "out size", out.size())

out: tensor([[[-0.5393,  0.9402],
         [ 0.0814,  0.3760],
         [ 0.2418,  0.5283],
         [ 0.3517,  0.4476],
         [-0.1629,  0.4402]],

        [[-0.8194,  0.4513],
         [-0.3054,  0.7992],
         [ 0.4408,  0.6273],
         [ 0.4288,  0.3472],
         [ 0.2667,  0.3854]],

        [[-0.0508,  0.5430],
         [ 0.3264,  0.5620],
         [-0.0527, -0.0152],
         [-0.4468,  0.2586],
         [ 0.1002,  0.7090]]], grad_fn=<TransposeBackward1>)
hidden: tensor([[[-0.1629,  0.4402],
         [ 0.2667,  0.3854],
         [ 0.1002,  0.7090]]], grad_fn=<StackBackward>)
sequence input size torch.Size([3, 5, 4]) out size torch.Size([3, 5, 2])


## What if we didn't specify batch=True when we created our inputs?

In [8]:
# What if we didn't specify the batch first parameter (because the default value for this is false)
# in pytorch. Now if the RNN we built must accept this input data, we must first swap the first two
# dimensions. This results in a dimension change from (seq_len, batch_size, input_size) to
# (batch_size, seq_len, input_size), that is, to what it was when batch_first was set to true.
torch.manual_seed(50)
cell = nn.RNN(input_size=4, hidden_size=2)
hidden = Variable(torch.randn(1, 3, 2))
inputs = Variable(torch.Tensor([[h, e, l, l, o],
                                [e, o, l, l, l],
                                [l, l, e, e, l]]))

inputs = inputs.transpose(dim0=0, dim1=1)
out, hidden = cell(inputs, hidden)
print("out: {}".format(out))
print("hidden: {}".format(hidden))
print("sequence input size", inputs.size(), "out size", out.size())

out: tensor([[[-0.5393,  0.9402],
         [-0.8194,  0.4513],
         [-0.0508,  0.5430]],

        [[ 0.0814,  0.3760],
         [-0.3054,  0.7992],
         [ 0.3264,  0.5620]],

        [[ 0.2418,  0.5283],
         [ 0.4408,  0.6273],
         [-0.0527, -0.0152]],

        [[ 0.3517,  0.4476],
         [ 0.4288,  0.3472],
         [-0.4468,  0.2586]],

        [[-0.1629,  0.4402],
         [ 0.2667,  0.3854],
         [ 0.1002,  0.7090]]], grad_fn=<StackBackward>)
hidden: tensor([[[-0.1629,  0.4402],
         [ 0.2667,  0.3854],
         [ 0.1002,  0.7090]]], grad_fn=<StackBackward>)
sequence input size torch.Size([5, 3, 4]) out size torch.Size([5, 3, 2])
