In [1]:
### RNN description and code by A. Karpathy
# - http://karpathy.github.io/2015/05/21/rnn-effectiveness/
# - https://gist.github.com/karpathy/d4dee566867f8291f086

In [2]:
import numpy as np

In [3]:
def lossFun(inputs, targets, hprev):
  """
  inputs,targets are both list of integers.
  hprev is Hx1 array of initial hidden state
  returns the loss, gradients on model parameters, and last hidden state
  """
  xs, hs, ys, ps = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  loss = 0
  # forward pass
  for t in range(len(inputs)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
  # backward pass: compute gradients going backwards
  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  dhnext = np.zeros_like(hs[0])
  for t in reversed(range(len(inputs))):
    dy = np.copy(ps[t])
    dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
    dWhy += np.dot(dy, hs[t].T)
    dby += dy
    dh = np.dot(Why.T, dy) + dhnext # backprop into h
    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
    dbh += dhraw
    dWxh += np.dot(dhraw, xs[t].T)
    dWhh += np.dot(dhraw, hs[t-1].T)
    dhnext = np.dot(Whh.T, dhraw)
  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Why, h) + by
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes


In [4]:

# data I/O
data = open('data/input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print ('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 788 characters, 39 unique.


In [5]:
# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

In [6]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0


In [9]:
# We will run untin counter_max is achieved. 
# Original was an infinite loop
counter = 0
counter_max = 10000

In [10]:
while counter < counter_max:
#while True:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  if p+seq_length+1 >= len(data) or n == 0: 
    hprev = np.zeros((hidden_size,1)) # reset RNN memory
    p = 0 # go from start of data
  inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
  targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
  if n % 100 == 0:
    sample_ix = sample(hprev, inputs[0], 200)
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    print ('----\n %s \n----' % (txt, ))

  # forward seq_length characters through the net and fetch gradient
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001
  if n % 100 == 0: print ('iter %d, loss: %f' % (n, smooth_loss)) # print progress
  
  # perform parameter update with Adagrad
  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                [dWxh, dWhh, dWhy, dbh, dby], 
                                [mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_length # move data pointer
  n += 1 # iteration counter 
    
  counter += 1

----
 r ar bay thisgo ast torses ay tiutnlpise hevpmpon any mNe tedeateree ey sey ttre, afpooy taruidpiostor thfint enhing fnoishe ped da hor hel elea, ta mhe. hvene fNd Cotietier hos min sert teit teanhid  
----
iter 1000, loss: 75.404542
----
 he foi, Corle o si ceim mo Cou teis ne tand Cag he  ed afo to in whe hed ar heado
.d-ta hrint ff ive 1a d e-th. aon hN daches orm avinteestonm ar thic 
n
 mes hs, tulw tod coa-the ies
wd Ne se besuvio 
----
iter 1100, loss: 73.103744
----
 aoseis weieciogsre .dide roathis seaghomeilsoipros asf hessre shnre tonseisNe, baChoisf they ivis bNmhe Conie he foue Coinoiata 9 .alsoso hemNa y eat e wy deaeweisg hustbog .vise et Nespes ara. ho hoi 
----
iter 1200, loss: 70.745922
----
 rerpeclees Pon aresuspre tor respurd hevreverimeumpey merodrienhespe holyaim ivengelheater Cor er wheag Chre ha ber Pfellle Chris weidinarde hrspaenive, bst erey pherpoiadlalbee hermenreg ar.mear Che
 
----
iter 1300, loss: 68.472199
----
  welanhenSNrlantoufe ana paonoussi

----
  fienteathias he he hospital a prerafteremoriT deceas a hobpiar Sat fer and dreltarlseaghe fare hvaltod N al Saturse, Give fovin the haistis whe Fe hos be meat hris we Satalay decidedeorory burliy Ghe 
----
iter 4500, loss: 26.340071
----
 is itaftor in ted 
ft. said.
hond.
cseliy five Cor tatnounced on Tw19.
mhas besnofico, Gounte dution wsthtallo ho, in tre Corit aocive freito thiT ceasure, iat he foonted ho pot tos hrmpesuria tredfte 
----
iter 4600, loss: 25.704468
----
 ospital.yeng th maht da by had tivid-19 d he sainough asin sis bed g ehtar he hesmith ho Jtistl Saf cd ar aong an tol on he sris id-, dad Pol Covid-19 tannompitalided phoconstimalf CNN he saino bN be  
----
iter 4700, loss: 25.050606
----
 t se, Covid-19., Christitate hmistiencis bewiteathing th d himsthithid hrmoutitid bold hns af ded id pinatbg a mor taid the che ior tesnor ie frod br a be send intad, T, probafme fris wee he faid he d 
----
iter 4800, loss: 24.444820
----
 he ked Try wNenoumded preralpris f

----
 NN by phon tie Covid-19.

Christliy brim asuriatidese tiry d ant in the hospital. He said he affit N whrer NN hit Jersay that he had trstheal.od Preat ee moarlie Saivid-19.

Cno borealby bhe fursitinr 
----
iter 8000, loss: 12.358521
----
 o startesiitald Ctred, erid cocey bhe sed Je ked Tf pe ay d-nd phe ter eatiteath alt Trsereatheng Saturday the beit Dedser aenourtentesoliestededenough he said hes anser a trim, who stareanghesaid phe 
----
iter 8100, loss: 12.053583
----
 ughtm ant his ine inaugh csividmwestuth al. He said he buth esinture sin w anted a mseliaturse, who fationsult fft enouth him thed he f fel annough arite th hid the hospital, Christie told CNN by pbor 
----
iter 8200, loss: 11.739828
----
 sttesay dDodelisr, phe d peslieatal, ahtepafterseamprostis con Twive hos hissiself Chit ad Pre hos his iont he had tis ionted he has atsl, Christie to deche  easury d-19 mriit testecter batig was best 
----
iter 8300, loss: 11.464224
----
 Satithr hid ar h csivir anc pd p f