In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence
from torch.autograd import Variable
import torch.optim as optim
import numpy as np
import random
import math
import pickle
import numpy as np
import copy

In [6]:
class RNN(nn.Module):
    
    def __init__(self, n_inputs, n_hiddens, n_outputs):
        super().__init__()
        self.n_inputs = n_inputs
        self.n_hiddens = n_hiddens
        self.n_outputs = n_outputs
        
        self.input_to_hidden = nn.Linear(n_inputs, n_hiddens, bias=False)
        self.hidden_to_hidden = nn.Linear(n_hiddens, n_hiddens)
        self.hidden_to_output = nn.Linear(n_hiddens, n_outputs)
    
    def forward(self, X, hidden_state):
        X = self.input_to_hidden(X)
        hidden_state = self.hidden_to_hidden(hidden_state)
        hidden_state = torch.tanh(X + hidden_state)
        output = self.hidden_to_output(hidden_state)
        
        return output, hidden_state
    
    def init_zero_hidden(self, batch_size=1):
        return torch.zeros(batch_size, self.n_hiddens, requires_grad=False)

In [8]:
def train(model: RNN, 
          dataloader: DataLoader, 
          epochs: int, 
          optimizer: optim.Optimizer, 
          loss_fn: nn.Module,
          batch_size: int):
    train_losses = {}
    model.train()
    for epoch in range(epochs):
        epoch_losses = []
        for X, y in dataloader:
            
            hidden = model.init_zero_hidden(batch_size=batch_size)
            model.zero_grad()
            loss = 0
            
            for token_index in range(X.shape[0]):
                output, hidden = model(token, hidden)
                loss += loss_fn(output, y[token_index])
                
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=3)
            optimizer.step()