# RNN Tutorial using pytorch

### Reference
* pytorch로 시작하는 딥러닝 (wikidocs)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import numpy as np

### RNN with single layer: manaully in pytorch

In [78]:
class SingleRnn(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(SingleRnn, self).__init__()
        
        # input_size: diimension of word vectors
        # hidden_size: dimension of hidden layer
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.Wx = torch.randn(hidden_size, input_size) 
        self.Wh = torch.randn(hidden_size, hidden_size)
        self.b = torch.randn(hidden_size, 1)
        
    def forward_not_loop(self, X1, X2):
        # 원래 intput은 sequence of words인데
        # 여기서는 두 단어로 이루어진 문장을 입력받았다고 가정한다.
        
        h0 = torch.zeros(hidden_size,1) # initial value of hidden state
        
        h1 = torch.tanh(torch.mm(self.Wh, h0) + torch.mm(self.Wx, X1) + self.b)
        h2 = torch.tanh(torch.mm(self.Wh, h1) + torch.mm(self.Wx, X2) + self.b)
        return h1, h2
    
    def forward(self, seq_inputs):
        
        # seq_inputs은 num_words * dim_embedding의 matrix라고 가정.
        
        total_hidden_states = []
        h0 = torch.zeros(self.hidden_size,1)
        
        for i in range(seq_inputs.shape[0]):
            x = seq_inputs[i,:].reshape(self.input_size, 1)
            h1 = torch.tanh(torch.mm(self.Wh, h0) + torch.mm(self.Wx, x) + self.b)
            total_hidden_states.append(h1)
            h0 = h1
        return torch.stack(total_hidden_states)

In [84]:
rnn = SingleRnn(input_size=100, hidden_size=128)
word_vectors = torch.randn(300,100) # total 300 words and 100 dimension
result = rnn.forward(word_vectors)
result.shape

torch.Size([300, 128, 1])

### RNN with single layer: using pytorch module

In [92]:
import torch
import torch.nn as nn

class SimpleRNN(nn.Module):
    def __init__(self, inputs, word_size, hidden_size):
        # word_size: dimension of word vector
        # hidden_size: dimensino of hidden state
        # inputs: (batch_size, time_steps, word_size): (1,10,5)는 배치 크기는 1, 10번의 시점동안 5차원의 벡터가 들어감.
        # batch_first: 첫 번째 차원이 배치 크기임을 알려준다.
        
        super(SimpleRNN, self).__init__()
        cell = nn.RNN(word_size, hidden_size, batch_first=True)
        self.outputs, self.final_hidden = cell(inputs)

In [108]:
rnn = SimpleRNN(torch.Tensor(1,10,5), 5, 8)
# nn.RNN은 두 개의 값을 반환한다.
# 첫 번째는 전체 time steps에서의 hidden state랑 마지막의 hidden state 값이다.
rnn.outputs.shape, rnn.final_hidden.shape

(torch.Size([1, 10, 8]), torch.Size([1, 1, 8]))

In [114]:
rnn.outputs[0,9,:]

tensor([ 0.0930, -0.3091,  0.6173, -0.2054, -0.2841, -0.0620,  0.1653,  0.5056],
       grad_fn=<SliceBackward>)

In [116]:
rnn.final_hidden
# 둘의 output이 같은 것을 확인할 수 있다.

tensor([[[ 0.0930, -0.3091,  0.6173, -0.2054, -0.2841, -0.0620,  0.1653,
           0.5056]]], grad_fn=<StackBackward>)

### RNN with multi layers: using pytorch module

In [106]:
class MultiRNN(nn.Module):
    def __init__(self, inputs, word_size, hidden_size, n_layers):
        # word_size: dimension of word vector
        # hidden_size: dimensino of hidden state
        # inputs: (batch_size, time_steps, word_size): (1,10,5)는 배치 크기는 1, 10번의 시점동안 5차원의 벡터가 들어감.
        # batch_first: 첫 번째 차원이 배치 크기임을 알려준다.
        # multilayer rnn은 num_layers 인자만 설정해주면 된다.
        
        super(MultiRNN, self).__init__()
        cell = nn.RNN(word_size, hidden_size, batch_first=True, num_layers = n_layers)
        self.outputs, self.final_hidden = cell(inputs)

In [118]:
rnn = MultiRNN(torch.Tensor(1,10,5), word_size=5, hidden_size=8, n_layers=2)
# multilayers일 때는 (층의 개수, 배치 크기, hidden state의 dim)
rnn.outputs.shape, rnn.final_hidden.shape

(torch.Size([1, 10, 8]), torch.Size([2, 1, 8]))

In [120]:
rnn.outputs[0,9,:]

tensor([ 0.0105, -0.4557, -0.3854,  0.4029,  0.2252, -0.1533, -0.2856,  0.0045],
       grad_fn=<SliceBackward>)

In [119]:
rnn.final_hidden

tensor([[[ 0.6744,  0.1579, -0.4215,  0.8734, -0.0214,  0.3211,  0.8059,
           0.4949]],

        [[ 0.0105, -0.4557, -0.3854,  0.4029,  0.2252, -0.1533, -0.2856,
           0.0045]]], grad_fn=<StackBackward>)

### Bidirectional RNN with multi layers: using pytorch module

In [121]:
class BiRNN(nn.Module):
    def __init__(self, inputs, word_size, hidden_size, n_layers):
        # word_size: dimension of word vector
        # hidden_size: dimensino of hidden state
        # inputs: (batch_size, time_steps, word_size): (1,10,5)는 배치 크기는 1, 10번의 시점동안 5차원의 벡터가 들어감.
        # batch_first: 첫 번째 차원이 배치 크기임을 알려준다.
        # multilayer rnn은 num_layers 인자만 설정해주면 된다.
        
        super(BiRNN, self).__init__()
        cell = nn.RNN(word_size, hidden_size, batch_first=True, num_layers = n_layers, bidirectional=True)
        self.outputs, self.final_hidden = cell(inputs)

In [123]:
rnn = BiRNN(torch.Tensor(1,10,5), word_size=5, hidden_size=8, n_layers=2)
# multilayers일 때는 (층의 개수, 배치 크기, hidden state의 dim)
rnn.outputs.shape, rnn.final_hidden.shape

# 전체 hidden state의 갯수가 8개에서 16개로 2배만큼 늘어남.

(torch.Size([1, 10, 16]), torch.Size([4, 1, 8]))