<a href="https://colab.research.google.com/github/akiabe/udemy_PyTorch_DNN/blob/master/understanding_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

In [27]:
"""
Args:
N : number of samples
T : sequence length
D : number of input features
M : number of hidden units
K : number of output units
"""
N = 1
T = 10
D = 3
M = 5
K = 2

X = np.random.randn(N, T, D)
print(X)

[[[ 0.21991027  1.42066883  0.70628863]
  [ 0.15585064 -0.57009008 -0.60479589]
  [ 1.43192732  0.8315696  -0.3861832 ]
  [ 0.68558697  0.19175578  0.66879953]
  [-1.53367993 -0.0977359  -1.0779194 ]
  [ 0.15477445 -0.34865647  1.72206484]
  [ 0.69785572  0.04916581 -1.0821824 ]
  [ 0.78289862 -0.20890229 -0.30026241]
  [-0.23321299 -0.13429275 -1.12614921]
  [ 1.10737908  0.7080456  -1.04643644]]]


In [45]:
class SimpleRNN(nn.Module):
  def __init__(self, n_inputs, n_hidden, n_outputs):
    super(SimpleRNN, self).__init__()
    self.D = n_inputs
    self.M = n_hidden
    self.K = n_outputs

    self.rnn = nn.RNN(
        input_size=self.D,
        hidden_size=self.M,
        nonlinearity="tanh",
        batch_first=True
    )
    self.fc = nn.Linear(self.M, self.K)
  
  def forward(self, X):
    h0 = torch.zeros(1, X.size(0), self.M)
    out, _ = self.rnn(X, h0)
    out = self.fc(out)
    return out

In [46]:
model = SimpleRNN(n_inputs=D, n_hidden=M, n_outputs=K)

In [47]:
inputs = torch.from_numpy(X.astype(np.float32))
out = model(inputs)
out

tensor([[[ 0.2210, -0.5973],
         [-0.1532, -0.1830],
         [-0.1034, -0.0525],
         [-0.0175, -0.1045],
         [-0.0079, -0.5712],
         [-0.0222, -0.0019],
         [-0.1653, -0.2054],
         [-0.1770,  0.0213],
         [-0.1530, -0.2677],
         [-0.1773, -0.0785]]], grad_fn=<AddBackward0>)

In [48]:
out.shape

torch.Size([1, 10, 2])

In [49]:
Yhats_torch = out.detach().numpy()

In [50]:
W_xh, W_hh, b_xh, b_hh = model.rnn.parameters()

In [51]:
print(type(W_xh))
print(W_xh)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([[ 0.0924,  0.0116,  0.1314],
        [ 0.3611, -0.2066, -0.1249],
        [-0.3072,  0.2993,  0.0310],
        [ 0.4150, -0.1869,  0.4333],
        [ 0.3557, -0.3027, -0.0195]], requires_grad=True)


In [52]:
W_xh = W_xh.data.numpy()
print(type(W_xh))
print(W_xh)

<class 'numpy.ndarray'>
[[ 0.09244949  0.01161504  0.13144706]
 [ 0.36106893 -0.20658769 -0.12489491]
 [-0.3071687   0.29928458  0.03102391]
 [ 0.41501015 -0.18692504  0.4333222 ]
 [ 0.3557093  -0.30273724 -0.01946379]]


In [53]:
W_hh = W_hh.data.numpy()
b_xh = b_xh.data.numpy()
b_hh = b_hh.data.numpy()

In [54]:
W_xh.shape, W_hh.shape, b_xh.shape, b_hh.shape

((5, 3), (5, 5), (5,), (5,))

In [55]:
Wo, bo = model.fc.parameters()

In [56]:
Wo = Wo.data.numpy()
bo = bo.data.numpy()
Wo.shape, bo.shape

((2, 5), (2,))

In [57]:
h_last = np.zeros(M)
x = X[0]
Yhats = np.zeros((T, K))

for t in range(T):
  h = np.tanh(x[t].dot(W_xh.T) + b_xh + h_last.dot(W_hh.T) + b_hh)
  y = h.dot(Wo.T) + bo
  Yhats[t] = y
  h_last = h
print(Yhats)

[[ 0.22098689 -0.59733703]
 [-0.15321849 -0.18298207]
 [-0.10343776 -0.05252897]
 [-0.01750012 -0.10446199]
 [-0.00787505 -0.57122359]
 [-0.02216822 -0.00194982]
 [-0.16534465 -0.2053969 ]
 [-0.17701856  0.0213117 ]
 [-0.1529786  -0.26765696]
 [-0.17726505 -0.07848638]]


In [58]:
np.allclose(Yhats, Yhats_torch)

False