<a href="https://colab.research.google.com/github/gavincapriola/PyTorch-Deep-Learning-and-Artificial-Intelligence/blob/main/Recurrent%20Neural%20Networks%2C%20Time%20Series%2C%20and%20Sequence%20Data/PyTorch_Understanding_RNN_Shapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# N = number of samples
# T = sequence length
# D = number of input features
# M = number of hidden units
# K = number of output units

In [3]:
N = 1
T = 10
D = 3
M = 5
K = 2
X = np.random.randn(N, T, D)

In [4]:
class SimpleRNN(nn.Module):
    def __init__(self, n_inputs, n_hidden, n_outputs):
        super(SimpleRNN, self).__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs
        self.rnn = nn.RNN(
            input_size = self.D,
            hidden_size = self.M,
            nonlinearity='tanh',
            batch_first=True)
        self.fc = nn.Linear(self.M, self.K)

    def forward(self, X):
        # initial hidden states
        h0 = torch.zeros(1, X.size(0), self.M)

        # get RNN unit output
        out, _ = self.rnn(X, h0)

        # we only want h(T) at the final time step
        # out = self.fc(out[:, -1, :])
        out = self.fc(out)
        return out

In [5]:
# instantiate the model
model = SimpleRNN(n_inputs=D, n_hidden=M, n_outputs=K)

In [6]:
inputs = torch.from_numpy(X.astype(np.float32))
out = model(inputs)
out

tensor([[[ 0.3052,  0.0322],
         [ 0.3850,  0.0348],
         [ 0.4302,  0.1432],
         [ 0.3669,  0.0843],
         [ 0.3188, -0.0043],
         [ 0.3368,  0.1887],
         [ 0.4083,  0.1227],
         [ 0.4553,  0.2726],
         [ 0.4458,  0.0290],
         [ 0.3110,  0.2483]]], grad_fn=<ViewBackward0>)

In [7]:
out.shape

torch.Size([1, 10, 2])

In [8]:
Yhats_torch = out.detach().numpy()

In [9]:
W_xh, W_hh, b_xh, b_hh = model.rnn.parameters()

In [10]:
W_xh.shape

torch.Size([5, 3])

In [11]:
W_xh

Parameter containing:
tensor([[-0.0583,  0.1432, -0.1509],
        [ 0.1783, -0.4198, -0.2081],
        [-0.3898, -0.1274, -0.4256],
        [-0.1926,  0.1752, -0.0914],
        [ 0.2632, -0.4357,  0.3977]], requires_grad=True)

In [12]:
W_xh = W_xh.data.numpy()
W_xh

array([[-0.05833443,  0.14320727, -0.15093853],
       [ 0.17830102, -0.41984183, -0.20810212],
       [-0.3897575 , -0.12744461, -0.42555484],
       [-0.19261828,  0.17519453, -0.09139983],
       [ 0.26319155, -0.43568662,  0.39767164]], dtype=float32)

In [13]:
b_xh = b_xh.data.numpy()
W_hh = W_hh.data.numpy()
b_hh = b_hh.data.numpy()

In [14]:
W_xh.shape, b_xh.shape, W_hh.shape, b_hh.shape

((5, 3), (5,), (5, 5), (5,))

In [15]:
Wo, bo = model.fc.parameters()

In [16]:
Wo = Wo.data.numpy()
bo = bo.data.numpy()
Wo.shape, bo.shape

((2, 5), (2,))

In [17]:
h_last = np.zeros(M)
x = X[0]
Yhats = np.zeros((T, K))

for t in range(T):
    h = np.tanh(x[t].dot(W_xh.T) + b_xh + h_last.dot(W_hh.T) + b_hh)
    y = h.dot(Wo.T) + bo
    Yhats[t] = y

    h_last = h

print(Yhats)

[[ 0.30515769  0.03224457]
 [ 0.38502327  0.03481357]
 [ 0.43020341  0.14316116]
 [ 0.36685014  0.08428476]
 [ 0.31881396 -0.00428434]
 [ 0.33684543  0.18869066]
 [ 0.40833317  0.12269639]
 [ 0.45525897  0.27256941]
 [ 0.44578751  0.02898197]
 [ 0.31095421  0.24827278]]


In [18]:
np.allclose(Yhats, Yhats_torch)

True