In [3]:
import torch
import torch.nn as nn

In [8]:
rnn = nn.RNN(input_size=100, hidden_size=20, num_layers=4)
print(rnn)
x = torch.randn(10, 3, 100)  # [seq_len, batch_sz, feature_len]
out, h = rnn(x, torch.zeros(4, 3, 20))  # shape of h_0 [layer_num, batch_sz, hidden_len]
print(out.shape, h.shape)

RNN(100, 20, num_layers=4)
torch.Size([10, 3, 20]) torch.Size([4, 3, 20])


In [5]:
rnn = nn.RNN(input_size=3, hidden_size=2, num_layers=1)
print(rnn)
x = torch.randn(4, 3, 3)  # [seq_len, batch_sz, feature_len]
out, h = rnn(x, torch.zeros(1, 3, 2))  # shape of h_0 [layer_num, batch_sz, hidden_len]
print(out.shape, h.shape)

RNN(3, 2)
torch.Size([4, 3, 2]) torch.Size([1, 3, 2])


In [6]:
out

tensor([[[-0.0726,  0.4869],
         [-0.1524,  0.4313],
         [-0.2947,  0.0938]],

        [[ 0.0922,  0.2413],
         [-0.4865,  0.1921],
         [-0.7046,  0.3641]],

        [[ 0.4198, -0.0274],
         [-0.0031,  0.1791],
         [-0.2733, -0.1712]],

        [[-0.1749,  0.2819],
         [-0.8739,  0.2964],
         [-0.1505,  0.0395]]], grad_fn=<StackBackward>)

In [7]:
h

tensor([[[-0.1749,  0.2819],
         [-0.8739,  0.2964],
         [-0.1505,  0.0395]]], grad_fn=<StackBackward>)

In [10]:
rnn = nn.RNN(input_size=3, hidden_size=2, num_layers=5)
print(rnn)
x = torch.randn(4, 3, 3)  # [seq_len, batch_sz, feature_len]
out, h = rnn(x, torch.zeros(5, 3, 2))  # shape of h_0 [layer_num, batch_sz, hidden_len]
print(out.shape, h.shape)

RNN(3, 2, num_layers=5)
torch.Size([4, 3, 2]) torch.Size([5, 3, 2])


In [11]:
out

tensor([[[ 0.3873, -0.7097],
         [ 0.3309, -0.7319],
         [ 0.3273, -0.7330]],

        [[ 0.0108, -0.6608],
         [-0.0616, -0.6494],
         [-0.0580, -0.6465]],

        [[ 0.1291, -0.6409],
         [ 0.2026, -0.5927],
         [ 0.2053, -0.5925]],

        [[ 0.0967, -0.6369],
         [ 0.1388, -0.6558],
         [ 0.0541, -0.6807]]], grad_fn=<StackBackward>)

In [12]:
h

tensor([[[-0.4991,  0.6221],
         [-0.8420, -0.1179],
         [-0.0058,  0.9031]],

        [[ 0.1406, -0.7093],
         [ 0.7324, -0.4716],
         [-0.2299, -0.8158]],

        [[ 0.5474, -0.1849],
         [ 0.3050, -0.1564],
         [ 0.6258,  0.2289]],

        [[ 0.5059, -0.5888],
         [ 0.4571, -0.6981],
         [ 0.5753, -0.5706]],

        [[ 0.0967, -0.6369],
         [ 0.1388, -0.6558],
         [ 0.0541, -0.6807]]], grad_fn=<StackBackward>)

In [14]:
print('rnn by cell')

x = torch.randn(10, 3, 100)  # [seq_len, batch_sz, feature_len]
cell1 = nn.RNNCell(100, 20)  # [feature_len, hidden_len]
h1 = torch.zeros(3, 20)      # [batch_sz, hidden_len]
for xt in x:
    h1 = cell1(xt, h1)
print(h1.shape)


cell1 = nn.RNNCell(100, 30)
cell2 = nn.RNNCell(30, 20)
h1 = torch.zeros(3, 30)
h2 = torch.zeros(3, 20)
for xt in x:
    h1 = cell1(xt, h1)
    h2 = cell2(h1, h2)
print(h2.shape)

rnn by cell
torch.Size([3, 20])
torch.Size([3, 20])


In [15]:
lstm = nn.LSTM(input_size=100, hidden_size=20, num_layers=4)
print(lstm)
x = torch.randn(10, 3, 100)  # [seq_len, bz, feature_len]
out, (h, c) = lstm(x)  # 
print(out.shape, h.shape, c.shape)  # [seq_len, bz, hidden_len], [layer_num, bz, hidden_len], 
                                    # [layer_num, bz, hidden_len]

LSTM(100, 20, num_layers=4)
torch.Size([10, 3, 20]) torch.Size([4, 3, 20]) torch.Size([4, 3, 20])


In [None]:
lstm = nn.LSTM(input_size=100, hidden_size=20, num_layers=4)
print(lstm)
x = torch.randn(10, 3, 100)  # [seq_len, bz, feature_len]
out, (h, c) = lstm(x)  # 
print(out.shape, h.shape, c.shape)  # [seq_len, bz, hidden_len], [layer_num, bz, hidden_len], 
                                    # [layer_num, bz, hidden_len]

In [16]:
lstm = nn.LSTM(input_size=3, hidden_size=2, num_layers=5)
print(lstm)
x = torch.randn(4, 3, 3)  # [seq_len, bz, feature_len]
out, (h, c) = lstm(x)  # 
print(out.shape, h.shape, c.shape)  # [seq_len, bz, hidden_len], [layer_num, bz, hidden_len], 
                                    # [layer_num, bz, hidden_len]

LSTM(3, 2, num_layers=5)
torch.Size([4, 3, 2]) torch.Size([5, 3, 2]) torch.Size([5, 3, 2])


In [17]:
out

tensor([[[0.0244, 0.1509],
         [0.0244, 0.1509],
         [0.0244, 0.1509]],

        [[0.0505, 0.2394],
         [0.0505, 0.2393],
         [0.0505, 0.2394]],

        [[0.0743, 0.2974],
         [0.0743, 0.2974],
         [0.0743, 0.2974]],

        [[0.0945, 0.3382],
         [0.0945, 0.3381],
         [0.0945, 0.3382]]], grad_fn=<StackBackward>)

In [18]:
h

tensor([[[ 0.0907, -0.0365],
         [-0.1266, -0.1164],
         [-0.2623,  0.0528]],

        [[ 0.0125, -0.1519],
         [ 0.0184, -0.1204],
         [ 0.0353, -0.1028]],

        [[-0.3961,  0.6495],
         [-0.3967,  0.6493],
         [-0.3961,  0.6473]],

        [[ 0.1129,  0.0822],
         [ 0.1130,  0.0822],
         [ 0.1132,  0.0823]],

        [[ 0.0945,  0.3382],
         [ 0.0945,  0.3381],
         [ 0.0945,  0.3382]]], grad_fn=<StackBackward>)

In [19]:
c

tensor([[[ 0.2564, -0.0649],
         [-0.2332, -0.8014],
         [-0.3627,  0.1865]],

        [[ 0.0387, -0.4104],
         [ 0.0545, -0.3297],
         [ 0.0975, -0.3081]],

        [[-0.6566,  1.5493],
         [-0.6554,  1.5406],
         [-0.6528,  1.5326]],

        [[ 0.1725,  0.1922],
         [ 0.1728,  0.1921],
         [ 0.1731,  0.1922]],

        [[ 0.3026,  0.4658],
         [ 0.3026,  0.4657],
         [ 0.3026,  0.4657]]], grad_fn=<StackBackward>)

In [21]:
x = torch.randn(10, 3, 100)  # [seq_len, batch_sz, feature_len]
print('one layer lstm')
cell = nn.LSTMCell(input_size=100, hidden_size=20)
h = torch.zeros(3, 20)
c = torch.zeros(3, 20)
for xt in x:
    h, c = cell(xt, [h, c])
print(h.shape, c.shape)


print('two layer lstm')
cell1 = nn.LSTMCell(input_size=100, hidden_size=30)
cell2 = nn.LSTMCell(input_size=30, hidden_size=20)
h1 = torch.zeros(3, 30)
c1 = torch.zeros(3, 30)
h2 = torch.zeros(3, 20)
c2 = torch.zeros(3, 20)
for xt in x:
    h1, c1 = cell1(xt, [h1, c1])
    h2, c2 = cell2(h1, [h2, c2])
print(h2.shape, c2.shape)

one layer lstm
torch.Size([3, 20]) torch.Size([3, 20])
two layer lstm
torch.Size([3, 20]) torch.Size([3, 20])
