<a href="https://colab.research.google.com/github/aramakrishnan-006/pytorch-notebooks/blob/main/pytorch_RNN_shapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

In [31]:
'''
N -> Number of Samples
T -> Length of Sequence
D -> Number of Features
M -> Number of Hidden units
K -> Number of Output units
'''


'\nN -> Number of Samples\nT -> Length of Sequence\nD -> Number of Features\nM -> Number of Hidden units\nK -> Number of Output units\n'

In [32]:
# Generate Data
N = 1
T = 10
D = 3
M = 5
K = 2 #Imagine the output to be like lat long co-od. This means that the logical output is only 1 set, but consists of 2 parts

X = np.random.randn(N, T, D)

In [33]:

#Define a SimpleRNN
class SimpleRNN(nn.Module):
  def __init__(self, n_inputs, n_hidden, n_outputs):
    super(SimpleRNN, self).__init__()
    self.D = n_inputs
    self.M = n_hidden
    self.K = n_outputs

    #If batch first is set to True, then the shape will be
    #(num of samples, sequence length and number of features)
    #or else
    #(sequence length, number of samples and number of features)

    self.rnn = nn.RNN(
        input_size = self.D,
        hidden_size = self.M,
        nonlinearity = 'tanh',
        batch_first = True
    )
    self.fc = nn.Linear(self.M, self.K)

  def forward(self, X):
    h0 = torch.zeros(1, X.size(0), self.M)

    #get the output of the RNN unit; Size is NxTxM
    #The _ is to capture the hidden state at the hidden layer. We don't use this
    out, _ = self.rnn(X, h0)

    #Output at the final layer
    # NxTxK
    out = self.fc(out)
    return out

In [34]:

#Building the Model
model = SimpleRNN(n_inputs = D, n_hidden = M, n_outputs=K)

In [35]:
inputs = torch.from_numpy(X.astype(np.float32))

In [36]:
out = model(inputs)

In [37]:
out

tensor([[[0.8554, 0.1904],
         [0.1754, 0.1891],
         [0.5608, 0.3311],
         [0.2357, 0.2064],
         [0.2606, 0.2381],
         [0.4244, 0.1892],
         [0.5049, 0.3548],
         [0.6795, 0.5007],
         [0.4063, 0.2149],
         [0.4123, 0.2507]]], grad_fn=<ViewBackward0>)

In [38]:
out.shape

torch.Size([1, 10, 2])

In [39]:
yhat_torch = out.detach().numpy()

In [40]:
#Implementing RNN from barebones

W_xh, W_hh, b_xh, b_hh = model.rnn.parameters()

In [41]:
W_xh.shape

torch.Size([5, 3])

In [42]:
W_xh = W_xh.data.numpy()

In [43]:
W_hh = W_hh.data.numpy()
b_xh = b_xh.data.numpy()
b_hh = b_hh.data.numpy()

In [44]:
#The numbers should follow the inputs, hidden layer numbers
# 3 features, 5 hidden units
# 5 bias terms

W_xh.shape, b_xh.shape, W_hh.shape, b_hh.shape

((5, 3), (5,), (5, 5), (5,))

In [45]:
# Getting the final fully connected layer parameters
# This is the layer that is connected to the output
Wo, bo = model.fc.parameters()

In [46]:
Wo = Wo.data.numpy()
bo = bo.data.numpy()
Wo.shape, bo.shape

((2, 5), (2,))

In [47]:
h_last = np.zeros(M)
x = X[0]
Yhats = np.zeros((T,K)) #Outputs

for t in range(T):
  h = np.tanh(x[t].dot(W_xh.T) + b_xh + h_last.dot(W_hh.T) + b_hh)
  y = h.dot(Wo.T) + bo

  Yhats[t] = y

  h_last = h

print(Yhats)

[[0.85543753 0.19036498]
 [0.17543979 0.18906787]
 [0.56084059 0.33111797]
 [0.2356865  0.20642882]
 [0.26061645 0.23813986]
 [0.42441308 0.18923939]
 [0.50494503 0.35484738]
 [0.67948664 0.50067571]
 [0.40632076 0.21485729]
 [0.41230013 0.25070022]]


In [49]:
#Checking if the calculation worked
np.allclose(Yhats, yhat_torch)

True