In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Things you should automatically know and have memorized
# N = number of samples
# T = sequence length
# D = number of input features
# M = number of hidden units
# K = number of output units

In [3]:
# Make some data
N = 1
T = 10
D = 3
M = 5
K = 2
X = np.random.randn(N,T,D)

In [4]:
## Make an RNN
class SimpleRNN(nn.Module):
    def __init__(self, n_inputs, n_hidden, n_outputs):
        super(SimpleRNN, self).__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs
        self.rnn = nn.RNN(
            input_size = self.D,
            hidden_size = self.M,
            nonlinearity = 'tanh',
            batch_first = True)
        self.fc = nn.Linear(self.M, self.K)
        
    def forward(self, X):
        # intial hidden states
        h0 = torch.zeros(1, X.size(0), self.M)
        # get unit output
        out, _ = self.rnn(X, h0)
        out = self.fc(out)
        return out

In [5]:
# Instantiate the model
model = SimpleRNN(n_inputs = D, n_hidden=M, n_outputs = K)

In [6]:
# Get the output
inputs = torch.from_numpy(X.astype(np.float32))
out = model(inputs)
out

tensor([[[ 0.3660,  0.2444],
         [ 0.0849,  0.5696],
         [ 0.4270,  0.3015],
         [ 0.5534,  0.4778],
         [ 0.2870,  0.3515],
         [ 0.1694,  0.3709],
         [ 0.5276,  0.3043],
         [ 0.5930,  0.2884],
         [-0.3681,  0.4771],
         [-0.0700,  0.4415]]], grad_fn=<AddBackward0>)

In [7]:
out.shape

torch.Size([1, 10, 2])

In [8]:
# Save the later
Yhats_torch = out.detach().numpy()
Yhats_torch

array([[[ 0.36600357,  0.24441338],
        [ 0.08487566,  0.56958175],
        [ 0.42702508,  0.3014848 ],
        [ 0.5534216 ,  0.47776997],
        [ 0.28700903,  0.351517  ],
        [ 0.16938797,  0.37091225],
        [ 0.5275631 ,  0.30431306],
        [ 0.5930438 ,  0.28835967],
        [-0.36805296,  0.4770639 ],
        [-0.06995997,  0.44149268]]], dtype=float32)

In [9]:
W_xh, W_hh, b_xh, b_hh = model.rnn.parameters()

In [10]:
W_xh.shape

torch.Size([5, 3])

In [11]:
W_xh     # (M = 5, D = 3) --> hidden units과 input features

Parameter containing:
tensor([[ 0.1940, -0.1864, -0.3708],
        [ 0.4388,  0.4335,  0.3084],
        [ 0.4232,  0.2363, -0.1446],
        [ 0.2669, -0.3843, -0.4198],
        [-0.1705,  0.2237,  0.1951]], requires_grad=True)

In [12]:
W_xh = W_xh.data.numpy()
W_xh

array([[ 0.19395643, -0.18644127, -0.3708373 ],
       [ 0.43883753,  0.43351787,  0.30838   ],
       [ 0.42317736,  0.23629677, -0.14459893],
       [ 0.26693505, -0.38425606, -0.41984758],
       [-0.1705198 ,  0.22366464,  0.19505084]], dtype=float32)

In [13]:
W_hh = W_hh.data.numpy()
b_xh = b_xh.data.numpy() 
b_hh = b_hh.data.numpy()

In [14]:
# Did we do it right?
W_xh.shape, W_hh.shape, b_xh.shape, b_hh.shape

((5, 3), (5, 5), (5,), (5,))

In [15]:
# Not get the FC layer weights
Wo, bo = model.fc.parameters()

In [16]:
Wo = Wo.data.numpy()
bo = bo.data.numpy()
Wo.shape, bo.shape

((2, 5), (2,))

In [17]:
# See if we can replicate the output
h_last = np.zeros(M)  # initial hidden states
x = X[0]  # the one and only sample
Yhats = np.zeros((T,K))  # where we store the outputs

for t in range(T):
    h = np.tanh(x[t].dot(W_xh.T) + b_xh + h_last.dot(W_hh.T) + b_hh)
    y = h.dot(Wo.T) + bo  # we only care about this value on the last iteration
    Yhats[t] = y
    print("Hidden values:", h)
    
    # Important: assign h to h_last
    h_last = h
    
#  print the final output
print('Prediction:', Yhats)

Hidden values: [-0.30718069 -0.23427566 -0.67274817  0.76802009  0.24960337]
Hidden values: [-0.66455575  0.42344756 -0.6798257  -0.32824369  0.37043465]
Hidden values: [-0.38230848  0.41522026 -0.32464448  0.74052138  0.27054629]
Hidden values: [-0.26401586  0.71512495 -0.13971204  0.43874844 -0.13702493]
Hidden values: [-0.40405415  0.83923241  0.04996731  0.32727945  0.3020403 ]
Hidden values: [-0.48694789 -0.23061131 -0.6968407   0.2931723   0.26191697]
Hidden values: [-0.12349848 -0.31266616 -0.73457575  0.76804593 -0.05955467]
Hidden values: [ 0.05603434 -0.54829907 -0.78632369  0.79153016 -0.24627239]
Hidden values: [-0.85489125 -0.37370048 -0.9553398  -0.6543708   0.69993464]
Hidden values: [-0.87803852  0.98065953  0.01365253 -0.10503329  0.69889083]
Prediction: [[ 0.36600358  0.24441339]
 [ 0.08487563  0.56958176]
 [ 0.42702506  0.30148479]
 [ 0.55342166  0.47776996]
 [ 0.28700903  0.35151697]
 [ 0.16938796  0.37091225]
 [ 0.52756303  0.30431309]
 [ 0.59304378  0.28835969]
 [

In [26]:
# Check
np.allclose(Yhats, Yhats_torch)     # SimpleRNN model을 이용한 것과 직접 수식으로 계산한 값의 차이 확인 (동일함)

True

In [27]:
?np.allclose