In [351]:
import numpy as np

# data I/O
with open('names.txt', 'r') as f: 
    data = f.read()
chars = sorted(list(set(data)))
data_size, vocab_size = len(data), len(chars)
stoi = {j:i for i, j in enumerate(chars)}
itos = {i:j for i, j in enumerate(chars)}

print(f'Data has {data_size} characters, {vocab_size} unique.')

Data has 228145 characters, 27 unique.


In [352]:
# hyperparameters
seq_length = 25
hidden_size = 100

np.random.seed(1234)
# parameters 
Wxh = np.random.randn(hidden_size, vocab_size) * 0.01
Whh = np.random.randn(hidden_size, hidden_size) * 0.01 
Why = np.random.randn(vocab_size, hidden_size) * 0.01
bh = np.zeros((hidden_size, 1))
by = np.zeros((vocab_size, 1))

In [353]:
# inputs and targets 
inputs = [stoi[ch] for ch in data[0:seq_length]]
targets = [stoi[ch] for ch in data[1:seq_length+1]]

In [354]:
# forward pass 
xs, hs, ys, ps = {}, {}, {}, {}
loss = 0
hs[-1] = np.zeros((hidden_size, 1))

for t in range(len(inputs)):
    # convert input to one-hot 
    xs[t] = np.zeros((vocab_size, 1))
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(Wxh @ xs[t] + Whh @ hs[t-1] + bh)
    ys[t] = Why @ hs[t] + by
    # cross entropy 
    ps[t] = ys[t]
    #ps[t] -= np.max(ys[t])
    ps[t] = np.exp(ps[t]) / np.sum(np.exp(ps[t]))
    loss += -np.log(ps[t][targets[t]].item())
print(loss)


82.39381010667938


In [355]:
# backward pass
dWhh = np.zeros_like(Whh)
dWxh = np.zeros_like(Wxh)
dWhy = np.zeros_like(Why)
dbh = np.zeros_like(bh)
dby = np.zeros_like(by)
dhnext = np.zeros_like(hs[0])

for t in reversed(range(len(inputs))):
    dtarget = ps[t]
    dtarget[targets[t]] -= 1
    dby += dtarget
    dWhy += dtarget @ hs[t].T
    dh = Why.T @ dtarget + dhnext
    dh_before_tanh = (1 - hs[t]**2) * dh 
    dbh += dh_before_tanh
    dWxh += dh_before_tanh @ xs[t].T
    dWhh += dh_before_tanh @ hs[t-1].T 
    dhnext = Whh.T @ dh_before_tanh

In [349]:
# update
lr = 0.1
for param, dparam in zip([Whh, Wxh, Why, bh, by],
                         [dWhh, dWxh, dWhy, dbh, dby]):
    param += -lr * dparam