# RNN

In [1]:
%matplotlib inline
import torch
import math
from torch.nn import functional as F
from torch import nn
from d2l import torch as d2l

In [2]:
batch_size = 32
num_steps = 35
train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps)

In [3]:
F.one_hot(torch.tensor([0,10]), 11)

tensor([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

In [4]:
X = torch.arange(10).reshape(2,5)
F.one_hot(X.T, 28).shape

torch.Size([5, 2, 28])

# Initialize Model Parameter

In [5]:
def get_params(vocab_size, num_hidden, device):
    num_inputs = num_outputs = vocab_size
    
    def normal(shape):
        return torch.randn(size = shape, device = device) * 0.01
    
    # Hidden layers
    W_xh = normal((num_inputs, num_hidden))
    W_hh = normal((num_hidden, num_hidden))
    b_h = torch.zeros(num_hidden, device = device)
    
    # Output layer parameter
    W_hq = normal((num_hidden, num_outputs))
    b_q = torch.zeros(num_outputs, device = device)
    
    # Attach gradients
    params = [W_xh, W_hh, b_h, W_hq, b_q]
    for param in params:
        param.requires_grad_(True)
        
    return params


In [6]:
def init_rnn_state(batch_size, num_hiddens, device):
    return (torch.zeros((batch_size, num_hiddens), device = device),)

In [7]:
def rnn(inputs, state, params):
    # "Input shape = num_step, batch_size, vocab_size"
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    output = []
    for X in inputs:
        H = torch.tanh(torch.mm(X,W_xh)+torch.mm(H,W_hh) + b_h)
        Y = torch.mm(H, W_hq) + b_q
        output.append(Y)
        
    return torch.cat(output,dim = 0), (H,)

In [8]:
class RNNModelScratch:
    def __init__(self, vocab_size, num_hiddens, device, get_params, init_state, forward_fn):
        self.vocab_size, self.num_hiddens = vocab_size, num_hiddens
        self.params = get_params(vocab_size, num_hiddens, device)
        self.init_state, self.forward_fn = init_state, forward_fn
        
    def __call__(self, X, state):
        X = F.one_hot(X.T, self.vocab_size).type(torch.float32)
        return self.forward_fn(X, state, self.params)
    
    def begin_state(self, batch_size, device):
        return self.init_state(batch_size, self.num_hiddens, device)

In [9]:
num_hiddens = 512
net = RNNModelScratch(len(vocab), num_hiddens, d2l.try_gpu(), get_params,
                      init_rnn_state, rnn)
state = net.begin_state(X.shape[0], d2l.try_gpu())
Y, new_state = net(X.to(d2l.try_gpu()), state)
Y.shape, len(new_state), new_state[0].shape

(torch.Size([10, 28]), 1, torch.Size([2, 512]))