In [1]:
import numpy as np

data = open('input.txt', 'r').read()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print ('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 588 characters, 37 unique.


In [2]:
chars

['x',
 'L',
 ';',
 'a',
 'b',
 's',
 'o',
 't',
 'y',
 'v',
 'F',
 'l',
 '\n',
 'p',
 'f',
 'm',
 'J',
 'u',
 'c',
 'e',
 'n',
 'D',
 'k',
 'd',
 "'",
 'h',
 ' ',
 'I',
 'r',
 ',',
 'M',
 '.',
 'w',
 'B',
 'R',
 'i',
 'g']

In [3]:
hidden_size = 100
seq_length = 25
learning_rate = 1e-1

In [4]:
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1))

In [5]:
print(Wxh.shape,Whh.shape,Why.shape)

(100, 37) (100, 100) (37, 100)


In [6]:
def lossFun(inputs, targets, hprev):
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = np.copy(hprev)
    loss = 0
    for t in range(len(inputs)):
        xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
        xs[t][inputs[t]] = 1
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
        ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
        loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])
    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])
        dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
        dWhy += np.dot(dy, hs[t].T)
        dby += dy
        dh = np.dot(Why.T, dy) + dhnext # backprop into h
        dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
        dbh += dhraw
        dWxh += np.dot(dhraw, xs[t].T)
        dWhh += np.dot(dhraw, hs[t-1].T)
        dhnext = np.dot(Whh.T, dhraw)
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

In [7]:
def sample(h, seed_ix, n):
    x = np.zeros((vocab_size, 1))
    x[seed_ix] = 1
    ixes = []
    for t in range(n):
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        y = np.dot(Why, h) + by
        p = np.exp(y) / np.sum(np.exp(y))
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        x = np.zeros((vocab_size, 1))
        x[ix] = 1
        ixes.append(ix)
    return ixes

In [8]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by)
smooth_loss = -np.log(1.0/vocab_size)*seq_length
while True:
    if p+seq_length+1 >= len(data) or n == 0: 
        hprev = np.zeros((hidden_size,1))
        p = 0 # go from start of data
    inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]
    if n % 1000 == 0:
        sample_ix = sample(hprev, inputs[0], 200)
        txt = ''.join(ix_to_char[ix] for ix in sample_ix)
        print('----\n %s \n----' % (txt, ))
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    if n % 100 == 0: print ('iter %d, loss: %f' % (n, smooth_loss)) # print progress
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                [dWxh, dWhh, dWhy, dbh, dby], 
                                [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

    p += seq_length # move data pointer
    n += 1 # iteration counter 


----
 iorbofcegRrrMLnikhkyexkyt
lkpRg;nMtgInl'Jv.tg.h;yu;BnJurddcd, 'apRdlFbMw;krwM ciJ
tMoL
'LuIfJ pdLJeguvywltFndfupt;nyrDM,oFbcwJD'bikm;ItJuuFffLedby,f rFbe,gd,t 'mc
wgvnldfDwkinewamkxIL;sdf,RaRJus. .Bio 
----
iter 0, loss: 90.272943
iter 100, loss: 91.134430
iter 200, loss: 89.233037
iter 300, loss: 86.432991
iter 400, loss: 83.097228
iter 500, loss: 79.659138
iter 600, loss: 76.055123
iter 700, loss: 72.423900
iter 800, loss: 68.876680
iter 900, loss: 65.298170
----
 m 
Bl te mhe Inearne, me meven he youithe beaause you de wing de atn lnve nd faue de 
rne he ine dn art love loe and mave . sing fou bec fhe ftn. 
he reanusearneart mave yhe he anl nt you, eng rng yo  
----
iter 1000, loss: 61.958526
iter 1100, loss: 58.790183
iter 1200, loss: 55.643295
iter 1300, loss: 52.813779
iter 1400, loss: 50.080170
iter 1500, loss: 47.454706
iter 1600, loss: 44.969534
iter 1700, loss: 42.576068
iter 1800, loss: 40.226394
iter 1900, loss: 38.048060
----
 wie  ite ithe ond I love you; 
I mll wit

iter 17100, loss: 1.372345
iter 17200, loss: 1.394834
iter 17300, loss: 1.511166
iter 17400, loss: 1.875296
iter 17500, loss: 1.912040
iter 17600, loss: 1.835843
iter 17700, loss: 1.753100
iter 17800, loss: 1.673749
iter 17900, loss: 1.599703
----
 nut Lovnd toe stea teay wald tien from colt oe of f I love;y
My heart moves from cold to fire. 
I love you only because you only because its its co for do weau
Bend to you, 
I gou, 
From waiting to no 
----
iter 18000, loss: 1.532488
iter 18100, loss: 1.469653
iter 18200, loss: 1.413763
iter 18300, loss: 1.362222
iter 18400, loss: 1.378807
iter 18500, loss: 1.512126
iter 18600, loss: 1.466162
iter 18700, loss: 1.417452
iter 18800, loss: 1.368289
iter 18900, loss: 1.320757
----
 frou being do arue Io 
I love you you
sllvesly, and hating you, 
From waiting to not go ayt coves from coly. 
Ma to you blin se love  ng you; 
I to not love ly. 
Ma
Because it's you the meas ng torn t 
----
iter 19000, loss: 1.419976
iter 19100, loss: 1.703642
iter 19

iter 34100, loss: 0.717779
iter 34200, loss: 0.703884
iter 34300, loss: 0.691920
iter 34400, loss: 0.681290
iter 34500, loss: 0.669713
iter 34600, loss: 0.690048
iter 34700, loss: 1.320146
iter 34800, loss: 1.748170
iter 34900, loss: 1.830364
----
 ue calm. 
In this part of the story I am the one who
Dies, the only one, and I will die of love because I love you, 
From waiting to not waiting for you
My heart moves from cold to fire. 
I love you o 
----
iter 35000, loss: 1.749096
iter 35100, loss: 1.650731
iter 35200, loss: 1.555671
iter 35300, loss: 1.464619
iter 35400, loss: 1.381011
iter 35500, loss: 1.305463
iter 35600, loss: 1.233981
iter 35700, loss: 1.172832
iter 35800, loss: 1.114657
iter 35900, loss: 1.065527
----
 t moves from cold to fire. 
I love you only because it's you the one I love; 
I hate you deeply, and hating you
Bend to you, 
From lovt you. 
Inly for love; 
I hath not Malt waly toe caly. 
In this pa 
----
iter 36000, loss: 1.017839
iter 36100, loss: 0.972118
iter 36

iter 51100, loss: 0.388774
iter 51200, loss: 0.379046
iter 51300, loss: 0.369852
iter 51400, loss: 0.361074
iter 51500, loss: 0.353229
iter 51600, loss: 0.345583
iter 51700, loss: 0.338155
iter 51800, loss: 0.331322
iter 51900, loss: 0.324828
----
 d I will die of love because I love you, 
Because I love you, Love, in fire. 
I love you only because it's you the one I love; 
I hate you deeply, and ht rou, 
From waiting to not waiting for you
My h 
----
iter 52000, loss: 0.318675
iter 52100, loss: 0.312894
iter 52200, loss: 0.307493
iter 52300, loss: 0.302192
iter 52400, loss: 0.297131
iter 52500, loss: 0.292580
iter 52600, loss: 0.288224
iter 52700, loss: 0.283955
iter 52800, loss: 0.280050
iter 52900, loss: 0.276237
----
 ; 
I hate you deeply, and hating you
Bend to you, and the measure of my changing love for you
Is that I do not see you but love you blindly. 
Maybe January lig ly. 
Maybe January light will consume
My 
----
iter 53000, loss: 0.272975
iter 53100, loss: 0.269530
iter 53

KeyboardInterrupt: 