## VANILLA RNN WITH ARBITRARY LAYERS

In [1]:
## implementation & testing --> v1

import torch
import torch.nn as nn
import numpy as np
from torch.nn import functional as F
torch.manual_seed(1337)

print('IMPORTS DONE')

IMPORTS DONE


In [2]:
## start with data
data = open('/Users/joesasson/Desktop/open-source/numpy-RNN/data/input.txt', 'r').read() # should be simple plain text file

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

print('data has {} characters, {} unique.'.format(data_size, vocab_size))

char_to_idx = { ch:i for i,ch in enumerate(chars) }
idx_to_char = { i:ch for i,ch in enumerate(chars) }

data has 1115394 characters, 65 unique.


In [33]:
from typing import Any
np.random.seed(99)

class RNN:
    def __init__(self, hidden_size, vocab_size, seq_length, num_layers=1):
        self.name = 'RNN'
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.num_layers = num_layers

        # model parameters
        self.Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden

        if num_layers == 1:
            self.Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
        elif num_layers > 1:
            self.Whh_layers = [np.random.randn(hidden_size, hidden_size)*0.01 for _ in range(num_layers-1)] # hidden to hidden for multiple layers
        
        self.Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
        self.bh = np.zeros((hidden_size, 1)) # hidden bias
        self.by = np.zeros((vocab_size, 1)) # output bias

        # memory variables for training (ada grad from karpathy's github)
        self.iteration, self.pointer = 0, 0
        self.mWxh = np.zeros_like(self.Wxh)
        self.mWhh = np.zeros_like(self.Whh) if num_layers == 1 else np.zeros_like(self.Whh_layers[0])
        self.mWhy = np.zeros_like(self.Why)
        self.mbh, self.mby = np.zeros_like(self.bh), np.zeros_like(self.by)
        self.loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0

        self.running_loss = []

    def __call__(self, *args: Any, **kwds: Any) -> Any:
        """RNN Forward Pass"""
        x, y, hprev_layers = kwds['inputs'], kwds['targets'], kwds['hprev_layers']
        lr = kwds['lr']

        loss = 0
        xs, hs, ys, ps = {}, {}, {}, {}
        hs[-1] = np.copy(hprev_layers[0])

        # forward pass
        for t in range(len(x)):
            xs[t] = np.zeros((self.vocab_size,1)) # encode in 1-of-k representation
            xs[t][x[t]] = 1

            h_in = np.dot(self.Wxh, xs[t]) + self.bh # input to the hidden state
            for i in range(self.num_layers-1):
                h_in += np.dot(self.Whh_layers[i], hprev_layers[i][t-1])

            hs[t] = np.tanh(h_in)
            ys[t] = np.dot(self.Why, hs[t]) + self.by # unnormalized log probabilities for next chars
            ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
            loss += -np.log(ps[t][y[t],0]) # softmax (cross-entropy loss)

        return loss, hprev_layers


# Initialize RNN
num_layers = 2
hidden_size = 128
seq_length = 25

rnn = RNN(hidden_size=hidden_size, vocab_size=vocab_size, seq_length=seq_length, num_layers=num_layers)

# Initialize hidden state layers
hprev_layers = [np.zeros((hidden_size,1)) for _ in range(num_layers)]

x = [char_to_idx[ch] for ch in data[rnn.pointer:rnn.pointer+seq_length]]
y = [char_to_idx[ch] for ch in data[rnn.pointer+1:rnn.pointer+seq_length+1]]

# Call RNN
loss, hprev_layers = rnn(inputs=x, targets=y, hprev_layers=hprev_layers, lr=1e-1)

ValueError: shapes (128,128) and (1,) not aligned: 128 (dim 1) != 1 (dim 0)