In [1]:
## implementation & testing --> v1

import torch
import torch.nn as nn
import numpy as np
from torch.nn import functional as F
torch.manual_seed(1337)

print('IMPORTS DONE')

IMPORTS DONE


In [3]:
## start with data
data = open('/Users/joesasson/Desktop/open-source/numpy-RNN/data/input.txt', 'r').read() # should be simple plain text file

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

print('data has {} characters, {} unique.'.format(data_size, vocab_size))

char_to_idx = { ch:i for i,ch in enumerate(chars) }
idx_to_char = { i:ch for i,ch in enumerate(chars) }

data has 1115394 characters, 65 unique.


In [4]:
def get_batches(arr, batch_size, seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    
    batch_size_total = batch_size * seq_length
    # total number of batches we can make
    n_batches = len(arr)//batch_size_total
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size_total]
    # Reshape into batch_size rows
    arr = arr.reshape((batch_size, -1))
    
    # iterate through the array, one sequence at a time
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:, n:n+seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [17]:
## hyper-params
batch_size = 128
seq_length = 8
hidden_size = 256

### this hapens during training, each batch ###


# from https://blog.varunajayasiri.com/numpy_lstm.html
########################

pointer, iteration = 0, 0

if pointer + seq_length >= len(data) or iteration == 0:
    g_h_prev = np.zeros((hidden_size, 1))
    g_C_prev = np.zeros((hidden_size, 1))
    pointer = 0

inputs_one = np.array(([char_to_idx[ch] 
            for ch in data[pointer: pointer + seq_length]]))

targets_one = np.array(([char_to_idx[ch] 
            for ch in data[pointer + 1: pointer + seq_length + 1]]))

pointer += seq_length

# from https://gist.github.com/karpathy/d4dee566867f8291f086
########################

n, p = 0, 0

# prepare inputs (we're sweeping from left to right in steps seq_length long)
if p+seq_length+1 >= len(data) or n == 0: 
    hprev = np.zeros((hidden_size,1)) # reset RNN memory
    p = 0 # go from start of data

inputs_two = np.array([char_to_idx[ch] for ch in data[p:p+seq_length]])
targets_two = np.array([char_to_idx[ch] for ch in data[p+1:p+seq_length+1]])

p += seq_length # move data pointer
n += 1 # iteration counter 

In [18]:
print(inputs_two)
print('------')
print(targets_two)

[34 32 37 41  8 60 12 32]
------
[32 37 41  8 60 12 32  8]


In [19]:
for t in range(seq_length):
    context = inputs_two[:t+1]
    target = targets_two[t]
    print(f"when input is {context} the target: {target}")

when input is [34] the target: 32
when input is [34 32] the target: 37
when input is [34 32 37] the target: 41
when input is [34 32 37 41] the target: 8
when input is [34 32 37 41  8] the target: 60
when input is [34 32 37 41  8 60] the target: 12
when input is [34 32 37 41  8 60 12] the target: 32
when input is [34 32 37 41  8 60 12 32] the target: 8


In [20]:
# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

# RNN forward pass from --> https://gist.github.com/karpathy/d4dee566867f8291f086
##############################
loss = 0
xs, hs, ys, ps = {}, {}, {}, {}
hs[-1] = np.copy(hprev)

# forward pass
for t in range(len(inputs_two)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs_two[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
    loss += -np.log(ps[t][targets_two[t],0]) # softmax (cross-entropy loss)

In [35]:
from typing import Any

class RNN:
    def __init__(self, hidden_size, vocab_size):
        self.name = 'RNN'

        # model parameters
        self.Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
        self.Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
        self.Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
        self.bh = np.zeros((hidden_size, 1)) # hidden bias
        self.by = np.zeros((vocab_size, 1)) # output bias

    def __call__(self, *args: Any, **kwds: Any) -> Any:
        """RNN Forward Pass"""

        x, y = kwds['inputs'], kwds['targets']

        loss = 0
        xs, hs, ys, ps = {}, {}, {}, {} # inputs, hidden state, output, probabilities
        hs[-1] = np.copy(hprev)

        # forward pass
        for t in range(len(x)):
            xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
            xs[t][inputs_two[t]] = 1
            hs[t] = np.tanh(np.dot(self.Wxh, xs[t]) + np.dot(self.Whh, hs[t-1]) + self.bh) # hidden state
            ys[t] = np.dot(self.Why, hs[t]) + self.by # unnormalized log probabilities for next chars
            ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
            loss += -np.log(ps[t][y[t],0]) # softmax (cross-entropy loss)

        return loss


rnn = RNN(hidden_size=hidden_size, vocab_size=vocab_size)
rnn(inputs=inputs_two, targets=targets_two)

33.38776832247342