In [1]:
import torch
import torch.nn as nn

torch.manual_seed(1)

rnn_layer = nn.RNN(input_size=5, hidden_size=2, num_layers=1, batch_first=True) 

w_xh = rnn_layer.weight_ih_l0
w_hh = rnn_layer.weight_hh_l0
b_xh = rnn_layer.bias_ih_l0
b_hh = rnn_layer.bias_hh_l0

print('W_xh shape:', w_xh.shape)
print('W_hh shape:', w_hh.shape)
print('b_xh shape:', b_xh.shape)
print('b_hh shape:', b_hh.shape)

W_xh shape: torch.Size([2, 5])
W_hh shape: torch.Size([2, 2])
b_xh shape: torch.Size([2])
b_hh shape: torch.Size([2])


In [5]:
x_seq = torch.tensor([[1.0]*5, [2.0]*5, [3.0]*5]).float()
print(x_seq.shape)
print(x_seq)

torch.Size([3, 5])
tensor([[1., 1., 1., 1., 1.],
        [2., 2., 2., 2., 2.],
        [3., 3., 3., 3., 3.]])


In [6]:
## output of the simple RNN:
output, hn = rnn_layer(torch.reshape(x_seq, (1, 3, 5)))
print(output)
print(hn)

tensor([[[-0.3520,  0.5253],
         [-0.6842,  0.7607],
         [-0.8649,  0.9047]]], grad_fn=<TransposeBackward1>)
tensor([[[-0.8649,  0.9047]]], grad_fn=<StackBackward0>)


In [7]:
x_seq = torch.tensor([[1.0]*5, [2.0]*5, [3.0]*5]).float()

## output of the simple RNN:
output, hn = rnn_layer(torch.reshape(x_seq, (1, 3, 5)))

## manually computing the output:
out_man = []
for t in range(3):
    xt = torch.reshape(x_seq[t], (1, 5))
    print(f'Time step {t} =>')
    print('   Input           :', xt.numpy())
    
    ht = torch.matmul(xt, torch.transpose(w_xh, 0, 1)) + b_xh    
    print('   Hidden          :', ht.detach().numpy())
    
    if t>0:
        prev_h = out_man[t-1]
    else:
        prev_h = torch.zeros((ht.shape))

    ot = ht + torch.matmul(prev_h, torch.transpose(w_hh, 0, 1)) + b_hh
    ot = torch.tanh(ot)
    out_man.append(ot)
    print('   Output (manual) :', ot.detach().numpy())
    print('   RNN output      :', output[:, t].detach().numpy())
    print()

Time step 0 =>
   Input           : [[1. 1. 1. 1. 1.]]
   Hidden          : [[-0.4701929   0.58639044]]
   Output (manual) : [[-0.3519801   0.52525216]]
   RNN output      : [[-0.35198015  0.52525216]]

Time step 1 =>
   Input           : [[2. 2. 2. 2. 2.]]
   Hidden          : [[-0.88883156  1.2364398 ]]
   Output (manual) : [[-0.68424344  0.76074266]]
   RNN output      : [[-0.68424344  0.76074266]]

Time step 2 =>
   Input           : [[3. 3. 3. 3. 3.]]
   Hidden          : [[-1.3074702  1.8864892]]
   Output (manual) : [[-0.8649416  0.9046636]]
   RNN output      : [[-0.8649416   0.90466356]]



In [8]:
## An example of building a RNN model
## with simple RNN layer
import torch
import torch.nn as nn
# Fully connected neural network with one hidden layer
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, 
                          hidden_size, 
                          num_layers=2, 
                          batch_first=True)
        #self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        #self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        _, hidden = self.rnn(x)
        print('hidden shape: ', hidden.shape)
        out = hidden[-1, :, :]
        print(hidden[1, :, :])
        print('out shape: ', out.shape)
        print(out)
        out = self.fc(out)
        return out

model = RNN(64, 32) 

print(model) 
 
model(torch.randn(5, 3, 64)) 

RNN(
  (rnn): RNN(64, 32, num_layers=2, batch_first=True)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)
hidden shape:  torch.Size([2, 5, 32])
tensor([[ 0.3586, -0.4515, -0.0997,  0.5056,  0.5037, -0.2450,  0.0333, -0.4629,
         -0.2926, -0.5758, -0.3753, -0.0839,  0.3121,  0.2695, -0.3496,  0.1267,
          0.3111,  0.1281,  0.0380,  0.4880, -0.3908,  0.3196,  0.0031,  0.1222,
         -0.0567, -0.2905,  0.6298, -0.2787,  0.6107, -0.4174, -0.0970,  0.7040],
        [-0.1414,  0.0999, -0.1051, -0.0415, -0.1115, -0.3676, -0.4769,  0.6230,
         -0.3231, -0.3659,  0.1538, -0.3719, -0.1580,  0.0735,  0.2266, -0.5426,
          0.4008,  0.2700, -0.3152, -0.8014, -0.3004,  0.4055, -0.1981, -0.3142,
          0.0108,  0.2815,  0.1048, -0.1704, -0.1110, -0.5390, -0.3419, -0.2502],
        [-0.5836, -0.1876,  0.0863,  0.1653, -0.4559, -0.3938, -0.5506, -0.4074,
          0.2435, -0.0051,  0.2634, -0.0991, -0.3347,  0.1250, -0.5264, -0.4547,
          0.0811, -0.3026,  0.27

tensor([[-0.0966],
        [ 0.0599],
        [ 0.1271],
        [-0.2671],
        [ 0.3404]], grad_fn=<AddmmBackward0>)