In [77]:
import numpy as np

'''
h_t = tanh(W_x * x_t + W_h * h_(t-1) + b) 

'''

def np_rnn(inputs, h_0, W_x, W_h, b) -> tuple : 
    total_hidden_states = []
    
    for input_t in inputs:
        output = np.tanh(np.matmul(input_t, W_x.T)\
                         + np.matmul(h_0, W_h.T)\
                         + b)
        total_hidden_states.append(output)
        h_0 = output
        
    return np.array(total_hidden_states), h_0
    
    
batch_size = 1
timesteps = 10 
input_size = 5
hidden_size = 8

'''suppose bias=False'''

inputs = np.random.random((batch_size,timesteps, input_size))
h_0 = np.zeros((hidden_size,))
W_x = np.random.random((hidden_size, input_size))
W_h = np.random.random((hidden_size, hidden_size)) #square (hidden * hidden)
b = np.random.random((hidden_size,))

In [78]:
outputs, h_t = np_rnn(inputs, h_0, W_x, W_h, b)

In [72]:
print(outputs.shape)
print(h_t.shape)

(1, 10, 8)
(10, 8)


In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F

'''
h_t = tanh(W_ih * x_t + b_ih + W_hh * h_(t-1) + b_hh)

W_ih = Parameter(torch.Tensor(hidden_size, layer_input_size))
W_hh = Parameter(torch.Tensor(hidden_size, real_hidden_size))
b_ih = Parameter(torch.Tensor(hidden_size))
b_hh = Parameter(torch.Tensor(hidden_size))

h_0 = (Defaults to zero if not provide)

RNN input is X and h_0
RNN output is Y and h_n

output = (seq_len, batch, num_dir * hidden)
if batch_fisrt:
    output = (batch, seq, feature)
    
The output is the hidden states of all time steps.
h_n is last hidden states

h_n = (num_layers * num_directions, batch, hidden)
'''

input_size = 5
hidden_size = 8

inputs = torch.Tensor(1, 10, input_size)#N, time, input
rnn = nn.RNN(input_size, hidden_size, batch_first=True)#input, hidden, inputs first tensor is batchsize

In [26]:
outputs, h_t = rnn(inputs)
print(outputs.size()) #N, time, hidden
print(h_t.size())#layer*direction, N, hidden

torch.Size([1, 10, 8])
torch.Size([1, 1, 8])


In [3]:
print(rnn._all_weights)

for n, w in zip(rnn._all_weights[0], rnn._flat_weights):
    print(f'size of {n} is {w.size()}')    

[['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0']]
size of weight_ih_l0 is torch.Size([8, 5])
size of weight_hh_l0 is torch.Size([8, 8])
size of bias_ih_l0 is torch.Size([8])
size of bias_hh_l0 is torch.Size([8])
