In [19]:
import numpy as np

In [20]:
data = open('sonnets.txt', 'r', encoding="utf8").read() # any simple txt file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 94275 characters, 61 unique.


In [21]:
hidden_size = 100
cell_size = 100 # cell size and hidden size should be the same
sequence_length = 100

In [22]:
Wx_f = np.random.randn(cell_size, vocab_size) * 0.01
Wh_f = np.random.randn(cell_size, hidden_size) * 0.01
b_f = np.random.randn(cell_size, 1)

Wx_i = np.random.randn(cell_size, vocab_size) * 0.01
Wh_i = np.random.randn(cell_size, hidden_size) * 0.01
b_i = np.random.randn(cell_size, 1)

Wx_g = np.random.randn(cell_size, vocab_size) * 0.01
Wh_g = np.random.randn(cell_size, hidden_size) * 0.01
b_g = np.random.randn(cell_size, 1)

Wx_o = np.random.randn(hidden_size, vocab_size) * 0.01
Wh_o = np.random.randn(hidden_size, hidden_size) * 0.01
b_o = np.random.randn(hidden_size, 1)

Wh_y = np.random.randn(vocab_size, hidden_size) * 0.01
b_y = np.random.randn(vocab_size, 1)

In [23]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [24]:
def train(inputs, targets, hprev, cprev):

    xs, hs, cs, ys, ps = {}, {}, {}, {}, {}
    fs, ins, gs, os = {}, {}, {}, {}
    loss = 0
    hs[-1] = np.copy(hprev)
    cs[-1] = np.copy(cprev)

    for i in range(len(inputs)):
        xs[i] = np.zeros((vocab_size, 1))
        xs[i][inputs[i]] = 1
        fs[i] = sigmoid(np.dot(Wx_f, xs[i]) + np.dot(Wh_f, hs[i-1]) + b_f)
        cs[i] = cs[i-1] * fs[i]

        ins[i] = sigmoid(np.dot(Wx_i, xs[i]) + np.dot(Wh_i, hs[i-1]) + b_i)
        gs[i] = np.tanh(np.dot(Wx_g, xs[i]) + np.dot(Wh_g, hs[i-1]) + b_g)
        cs[i] += ins[i] * gs[i]

        os[i] = sigmoid(np.dot(Wx_o, xs[i]) + np.dot(Wh_o, hs[i-1]) + b_o)

        hs[i] = np.tanh(cs[i]) * os[i]
        ys[i] = np.dot(Wh_y, hs[i]) + b_y
        ps[i] = np.exp(ys[i]) / np.sum(np.exp(ys[i]))

        loss += -np.log(ps[i][targets[i], 0])

    dWx_f, dWh_f, db_f = np.zeros_like(Wx_f), np.zeros_like(Wh_f), np.zeros_like(b_f)
    dWx_i, dWh_i, db_i = np.zeros_like(Wx_i), np.zeros_like(Wh_i), np.zeros_like(b_i)
    dWx_g, dWh_g, db_g = np.zeros_like(Wx_g), np.zeros_like(Wh_g), np.zeros_like(b_g)
    dWx_o, dWh_o, db_o = np.zeros_like(Wx_o), np.zeros_like(Wh_o), np.zeros_like(b_o)
    dWh_y, db_y = np.zeros_like(Wh_y), np.zeros_like(b_y)

    dhnext, dcnext = np.zeros_like(hs[0]), np.zeros_like(cs[0])

    for i in reversed(range(len(inputs))):
        dy = np.copy(ps[i])
        dy[targets[i]] -= 1

        dWh_y += np.dot(dy, hs[i].T)
        db_y += dy

        dhsi = np.dot(Wh_y.T, dy) + dhnext
        dcsi = (1 - cs[i] * cs[i]) * (os[i] * dhsi) + dcnext
        dosi = np.tanh(cs[i]) * dhsi
        dosiraw = (1 - os[i]) * (os[i]) * dosi

        dWx_o += np.dot(dosiraw, xs[i].T)
        dWh_o += np.dot(dosiraw, hs[i-1].T)

        db_o += dosiraw
        dhnext = Wh_o @ dosiraw 

        dcsi += 1
        dgsi = ins[i] * dcsi
        dgsiraw = (1 - gs[i] * gs[i]) * dgsi
        dWx_g += np.dot(dgsiraw, xs[i].T)
        dWh_g += np.dot(dgsiraw, hs[i-1].T)
        db_g += dgsiraw

        dinsi = gs[i] * dcsi
        dinsiraw = (1 - ins[i]) * ins[i] * dinsi
        dWx_i += np.dot(dinsiraw,xs[i].T)
        dWh_i += np.dot(dinsiraw, hs[i-1].T)
        db_i += dgsiraw

        dcnext = fs[i] * dcsi
        dfsi = cs[i-1] * cs[i]
        dfsiraw = (1 - fs[i]) * (fs[i]) * dfsi
        dWx_f += np.dot(dfsiraw, xs[i].T) 
        dWh_f += np.dot(dinsiraw, hs[i-1].T)
        db_f += dfsiraw

    for dparam in [dWx_f, dWh_f, db_f,
                   dWx_i, dWh_i, db_i,
                   dWx_g, dWh_g, db_g,
                   dWx_o, dWh_o, db_o,
                   dWh_y, db_y]:
        np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
    return loss, dWx_f, dWh_f, db_f, dWx_i, dWh_i, db_i, dWx_g, dWh_g, db_g, dWx_o, dWh_o, db_o, dWh_y, db_y, hs[len(inputs)-1], cs[len(inputs)-1]

In [25]:
def sample(h, c, seed_ix, n):

    ixs = []
    x = np.zeros((vocab_size, 1))
    x[seed_ix] = 1

    for i in range(n):
        f = sigmoid(np.dot(Wx_f, x) + np.dot(Wh_f, h) + b_f)
        c = c * f

        ins = sigmoid(np.dot(Wx_i, x) + np.dot(Wh_i, h) + b_i)
        g = np.tanh(np.dot(Wx_g, x) + np.dot(Wh_g, h) + b_g)
        c += ins * g

        o = sigmoid(np.dot(Wx_o, x) + np.dot(Wh_o, h) + b_o)

        h = np.tanh(c) * o
        y = np.dot(Wh_y, h) + b_y
        p = np.exp(y) / np.sum(np.exp(y))

        ix = np.random.choice(range(vocab_size), p=p.ravel())
        x = np.zeros((vocab_size, 1))
        x[ix] = 1
        ixs.append(ix)
    return ixs

In [26]:
n, p = 0, 0
smooth_loss = -np.log(1.0/vocab_size) # loss at iteration 0


# Adam optimizer parameters
momentum = 0.8
decay_rate = 0.9
lr = 1e-3

params = [Wx_f, Wh_f, b_f, 
          Wx_i, Wh_i, b_i, 
          Wx_g, Wh_g, b_g, 
          Wx_o, Wh_o, b_o, 
          Wh_y, b_y]

m = [np.zeros_like(p) for p in params]
v = [np.zeros_like(p) for p in params]

In [27]:
while n < 20000:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  if p+sequence_length+1 >= len(data) or n == 0: 
    cprev = np.zeros((hidden_size, 1))
    hprev = np.zeros((hidden_size,1))
    p = 0 # go from start of data
  inputs = [char_to_ix[ch] for ch in data[p:p+sequence_length]]
  targets = [char_to_ix[ch] for ch in data[p+1:p+sequence_length+1]]

  # sample from the model now and then
  if n % 100 == 0:
    sample_ix = sample(hprev, cprev, inputs[0], 200)
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    print('----\n %s \n----' % (txt, ))

  loss, dWx_f, dWh_f, db_f, dWx_i, dWh_i, db_i, dWx_g, dWh_g, db_g, dWx_o, dWh_o, db_o, dWh_y, db_y, hprev, cprev = train(inputs, targets, hprev, cprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001 / sequence_length
  if n % 100 == 0: print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
  
  grads = [dWx_f, dWh_f, db_f, dWx_i, dWh_i, db_i, dWx_g, dWh_g, db_g, dWx_o, dWh_o, db_o, dWh_y, db_y]

  # Adam optmiizer
  for i in range(len(params)):
    m[i] = momentum * m[i] + (1 - momentum) * grads[i]
    v[i] = decay_rate * v[i] + (1 - decay_rate) * (grads[i] ** 2)

    params[i] -= lr * m[i] / np.sqrt(v[i] + 1e-8)


  p += sequence_length # move data pointer
  n += 1 # iteration counter 

----
 WdcJwKoqoFmt),rO?cqJwTFP
d?TGJ:?i-uDUojD o.h l:jJFqdFlp?rcl-nJ?do-EoryWoF?y(lmo-poziwhAw?ph(.dKPvOziKpW,!tDoTm?psqe,dFIdoJeGdALF)oWm:rOIuPw ocuwwwS,-Tzg-Y-KqbwpFg?-NS?Rp-Bk MGCis-om)c:o-cdv?I oSOkPpBo 
----
iter 0, loss: 4.111328
----
 imsimeysdt wr 'eootff tice tc tRc rne un
o oan ddo
tthunn
oof- eeroeior oaa hybmosaryetOimol
  uotvai  moteentya.cpt riufw
 , rgrndlinyenuaysoeemayns donyoapftwteedissr  eesrngeeror dmyeova uua
 h m   
----
iter 100, loss: 4.039562
----
 r d r,elrfisrest 
eflleg
ooyin,tn oeee igioardltyelrlnBhd  eeg oyv.teh lim lheeJtm b a
io fyenfgtfeh hrieeyy  Gmeeeteoghdryth irAieto'he,dWt y ,dhe gesgsu
nefl
m svths i IhflAor
t fadh  itloe
gti
ldey 
----
iter 200, loss: 3.949955
----
 iecw 
 yge  n lieroasept ttayunuWlieneeee Bthtetofm
 ro npcrays itw im dat eqa sn reeeaneystrd rant,bby en' i  e  sloayt i h t r yet
ngyeomhosfrfc, ht ruewifdeAsg ed rnotfeideeeeasyt,ir maiuy yn l ydb 
----
iter 300, loss: 3.864234
----
 
e 'ofisO tcoranc,mhrtnsrhoeweeeo A khe ra,a

In [29]:
sample_ix = sample(hprev, cprev, inputs[0], 2000) # final sample
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print('----\n %s \n----' % (txt, ))

----
 
Srakertit beod,
To long wortureive,
But earne hove gravies helones bright  leack tiow ant liofaded honened to I lighe,

That of my laye the sum thow thy lovith of aris meat fend,
O herurited rewe thy cand,
Amy ownd ay my not to shous on tatter  as ant mers pline
I brinedsath uthere ing eitp encold led alaks ghous whir trigisagh thing is gring 'lfobes, vingun fairfs or lime on the maver can ang thes stares,
Tith this of munes.
But nonour cang,
But nrtout rimanene toseds,
But theer thou and a wath thou houlf bewin haild gulos mightrsed,
Mikn lighg tore pwirt,
And whough brit dul and ist I mings' 
Savint fild sthys,
And my uprip,
At to when thy ariks ithte ind o nos unour sing,
Sanchse plang sithen,
To the I lay, a breires gamees and sead'.

whingived barou love the im of an beamong as dave eve theen mey ses.
Whou will,
Anleds,
kn with not his prats.
 
Wey come not ingst did fair  of hanon,
is mbert.



Ad thy cour,
To whe to cole fain's of worladiver bee's than to as for dies,
Fre