In [26]:
import numpy as np

In [27]:
# data I/O
data = open('shakespeare.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print ('data has {} characters, {} unique.'.format(data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 1115394 characters, 65 unique.


In [None]:
# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

In [None]:
# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

In [None]:
def lossFun(inputs, targets, hprev):
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = np.copy(hprev)
    loss = 0
    # forward pass
    for t in range(len(inputs)):
        xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
        xs[t][inputs[t]] = 1
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
        ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
        loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
    # backward pass: compute gradients going backwards
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])
    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])
        dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
        dWhy += np.dot(dy, hs[t].T)
        dby += dy
        dh = np.dot(Why.T, dy) + dhnext # backprop into h
        dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
        dbh += dhraw
        dWxh += np.dot(dhraw, xs[t].T)
        dWhh += np.dot(dhraw, hs[t-1].T)
        dhnext = np.dot(Whh.T, dhraw)
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

def sample(h, seed_ix, n):
    x = np.zeros((vocab_size, 1))
    x[seed_ix] = 1
    ixes = []
    for t in range(n):
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        y = np.dot(Why, h) + by
        p = np.exp(y) / np.sum(np.exp(y))
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        x = np.zeros((vocab_size, 1))
        x[ix] = 1
        ixes.append(ix)
    return ixes

In [None]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0

while True:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
    if p+seq_length+1 >= len(data) or n == 0: 
        hprev = np.zeros((hidden_size,1)) # reset RNN memory
        p = 0 # go from start of data
    inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
    if n % 1000 == 0:
        sample_ix = sample(hprev, inputs[0], 200)
        txt = ''.join(ix_to_char[ix] for ix in sample_ix)
        print ('----\n {} \n----'.format(txt, ))

  # forward seq_length characters through the net and fetch gradient
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    if n % 1000 == 0: print ('iter {}, loss: {}'.format(n, smooth_loss)) # print progress
  
  # perform parameter update with Adagrad
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                  [dWxh, dWhh, dWhy, dbh, dby], 
                                  [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

    p += seq_length # move data pointer
    n += 1 # iteration counter

cleared output to not print several hundred pages.

reached iter 11155000, loss: 44.498503081985476, before keyboard interrupt.

----
 vzNSHmEG3r;etQMqoceZVqvsIanUXGA
OX$M.jYgcUDCPvAzCdPX jQlMs- bavH!qHEAekR'PF
:mNwnDo?J 
:tnzEA;ygosWrthn;SF?$vFrXM g3HGnvazpNvNoj&z'dhdZUB!;QDzwKy,diriojFRm&;kp;sMcx 
:erU:leVbk:3Hbh;;KF:E:DwT:rIeH$YKV 
----
iter 0, loss: 104.35968378885154
----
 lrd nve be ca,cMot; tesi: .nut ltolcces bo cor.
:
Waucus.
Whural fnadr ous to 'arus us t etwe,
he
the bnit,

CNSPtoAnnasir thas yid somlme dot sin hthi.!
T
TwA

pCHy thepar mekersorat toi yhiont.
Mes  
----
iter 1000, loss: 87.66641259812343
----
  ther.
re
Wonus kI the notowt
hane geresde giy ty panl ames
CIAme the uat ondcd uiv anern fot thonz to ss
he the todened thiter ntd the to psenee heat thawcis nhehand caut-panderealnond math ee.

C'L: 
----
iter 2000, loss: 72.3603709993052
----
 es
Thave.

-ic
Balus,

Coor potet
Theg angiuph tud nond th ath:
To rpd, bel cort-d hame nnthe rar bhele note alsthe tor.
The we thout, gorpdcoort  af the meet dags gomk an, meesl bee eo acserl the Hes 
----
iter 3000, loss: 64.22507256521415
----
 thal son lised, no es tics on then aso ines ou.
Wo wet eat,
Morr co fall ok ardt till foby ad senod.

Pgaoidst,
A'esh ale fleunln fiold,r.
S'ep, the huspnos kew'll herid thii,
vufime is!
San go thee
B 
----
iter 4000, loss: 59.987450666118775
----
 o d ao herrpas berrnm
Monn:
Thus

ORICEINIUS:
An.

'or; yote bEOr coevvy yop, rthot.

SINIUS:
vawhemense has tho
the yor cill.

OrF'vif, Me he Loulgmirals
INor w seris mowe hafhiynder herm onom
Aspiti 
----
iter 5000, loss: 58.57784923447078
----
 pt rereois.
 it manke.

Fyou himasly pene. IC
ROS:
Fwoth nou'ld pithe Pa mapl dofmti'lt, ppor dsthin baterins
Yoy bal comer ba savet
in dureif.

ANUTUS:
Whe, thond hongle yout cesheritherst the the mi 
----
iter 6000, loss: 57.898948863817886
----
 rs edst cukk mate thecf a,,

S:u.

LUS:
Thad doref hamar, wuve awy, kelousagir, the ule.

Shivon love bo nomy mes pat Rotine dot mand bo laved kXally thalghe lis,
Ga nim wy lome, is to dowetwill how'l 
----
iter 7000, loss: 58.172450012371215
----
 ENTUENEFTURCEMENR
TURANUS:
Theele wplat aduns ereent.

Tha line fintteer ffaccitt,
Tulcere, got to thee qunt hat; ure wery upy to think coude hus,
I-L
qwmecteit ourdee how?


SIEREFI
Hendenvinbertora' 
----
iter 8000, loss: 57.40812351165954
----
 hit wart sdind thale be on the browh guthsim mou? I I lobas ard to yont as selltitur houresde'st yous, baec
Pithe sey cad wt ed thoursor itnerw rie to darseses rdoo he to tistor;
And masee $oord be ho

----
iter 9000, loss: 57.30461917662502

----
  wleags your uowsente oure thenk;
If tath en;
A my, How att mnord,
Wy uca hervicke Entaagt a ditt anderireng I toll werdes noveen.
Mour seerttees noke mave wizl ook;
Thine,
Hiny sordonse,
Ite walche?
 
----
iter 10000, loss: 55.846932149898706

----

 r the me
gothas to sOce am a youl if in abeever to op y to sust hee hip ontser this that a sbus youn breftine?
CIame beftharr
cormentwood unviens yourd, I I'll vert buther sond, loth maiuris of:
And a 

----
iter 100000, loss: 49.27069248455135

----



 hought!
Lay!
Janlle, Cortard prache:
In mole.
Thy ow cood Hever for where timy's I is ande be, sie, I
Whickst natiens,
We vinen,
Than age wath on gress,
That is more not land! and scean hiser did slan 

----
iter 200000, loss: 49.25732551959337

----



 sall.

BUCVIF
MES:
But My dronk did youb out thee there of me know's to all to empilt, Of extron.
My not foongiech this shoak indiond angyes gizat,
Wiest,
Marturasbef, I chack your to for do have so a 

----
iter 300000, loss: 48.69393994879168

----


I you yeald age, in rection Jade the tratt,
And and notess comke?

MENRIINTE:
Thinung;
Mis, make might on pie.

TONTET:
Inis your with,
Brong?
Thou here
Swold youip!
I will combin sontent you' mintu? 

----
iter 400000, loss: 45.867081976657374

----



 hallack prisl sle Hol not
Feince'd and me are have long the kids ale contine that sus
It mich collule the rimmboacks the stulis, sul you ges'le the she pace
To promiag
yoyep: not,
To seest oa ungrom-- 

----
iter 500000, loss: 45.73411845778061

----

 ow; herar ortect
But the soum!
A forch of keth good-mill to eyel and shall nitrems.

MENCS:
The shos-as.

Mughten'd in destniligover.
Houdn upertles deer to of youreco, not as stis; shane the fremeass 

----
iter 600000, loss: 47.115493566774106

----

 dy in herren
that and the me havis
atherh sill ports: me, my great:
Why of my alitlee lowm:
Myseansthed chy,
The chmolfages;
Ficute't
Wemper of are mer,
Cifess all frephisenst conghet to lord mett not 

----
iter 700000, loss: 47.56540672142887

----

 y swilf he'se.

PETRUCI:
Sit,
Dhomfens, coth a' vid, my becithen.

GRMILIzK OXT:
If enter?

GRUMINIUSs I like on,
Shasy the whicilignteak now the speep gellercely camen:
Thel:
Your there: Of but of; a 

----
iter 800000, loss: 46.50024399666682

----

rouse?
Here.
That greme.
And ace chen luch, hadselher tree
Goreds-noteted, to me had lask herring 'thirrted to Pe
sinest betian are take?

CAMISPERT:
Alasce, shamather, Vorst hopp,
And him.
Butiost to 

----
iter 900000, loss: 45.53983959024896

  whom I mocoret a look on coull aly thit or not.

AUFAD:

AMANDULESSS II:
My to chomeng, and,
The bland-feering see-y so treatle as but my ring be tiletut molat count leave seagg, callmeng.

GfRCINIUS 

----
iter 1000000, loss: 46.199287022558345

----

 ld a best unforier bet thip it is the woech's
Of.

WAlrack us parnlaw in it aganters co he good how me Cfelvoning you Efehter were it so blai, good are.
He many. Our smabtous you:
Or a mate al oh'
I f 

----
iter 2000000, loss: 44.525164072047744

----


 AM:
They flairs fee lores,
But objeeds; has ble I sistor: men the dion good, my freatick dother, her lif, mistrue God us Cost one
Orse they you not man where hama an the a he so mithress whoud beef th 

----
iter 3000000, loss: 43.41792691646422

----

for remand ool homoul the propese.

GoNC
Four
I lath to sobeld in you, in
Tlan mas nerrestle tainty,
Budion
You spead, is do my undore,
Think Kater,
And and in of do agains being becang take si; hoit 

----
iter 4000000, loss: 44.64289997696883

----

so feariol's to you knod, I toiss, britherd you be the finsher do never siden, as mest theyer the wades, I stand, it emed! Ed of the bilembred. Lets be take but they with him,
Sitrer
Or loug be, andep 

----
iter 5000000, loss: 44.01995179623448

----

ver our he asted conting sisplag mernor streataninn mastollaum it let od menter thingentrierd your as a a stose that herre,
That thou most ven is ank bravent and ast I so hat my great and a pide he gr 

----
iter 6000000, loss: 44.45051887593998

----

ou; and, this nein hald you shall you heard'd Host my gralious been for to of York of high's excoct the heaggove,
And a worse in all, calilece best do, you seach Offambob, thine his be and he grese--
 
----
iter 7000000, loss: 44.612614795913444

----

HNET:
Kisbay if you!

ALIO:
Ny gursk's th's ging: fromid.

SbAge:
Deurth me I keartent puss of this disiese'
Nother in't fine, now,
Benoth.

GLOUCESTER: Civer, their saint afall's thou, fored oow nose 

----
iter 8000000, loss: 44.35128022255806

----

And ear the mirthy bead an be otherwnibliom;
Which thou whesune-fith where tase nike Edalls by farmown, good.

LEONTES:
Arour unchered oniloasoble
Kisse out becand:
When is, heaven'd,
Antreat, coich  

----
iter 9000000, loss: 44.79127398306245

----

ne!
O for the good cares to shall iver to yought did

Dhepbited in be have but noble the rose, I have!

MENENIUS:
Sowniul was.

AUTILAFNANDKKEN:
Ohe came,
And iseaty, froe him! I ambenf Ravall.

LUCEY 

----
iter 10000000, loss: 44.282232942731675

----
 u peroth hooster, and Caurth. het were thy holld fier up lost he in pad your face thou shall ere the ray.
Pesphing. We, hoped tengets him.

DUCHESS OF APAUS MIRIA:
Noly pakest and neixt they did, reme 

----
iter 11000000, loss: 43.65596742808724

In [None]:
# hyperparameters
hidden_size = 512 # size of hidden layer of neurons
seq_length = 3 # number of steps to unroll the RNN for
learning_rate = 1e-1

In [None]:
# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

In [None]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0

while True:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
    if p+seq_length+1 >= len(data) or n == 0: 
        hprev = np.zeros((hidden_size,1)) # reset RNN memory
        p = 0 # go from start of data
    inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
    if n % 1000 == 0:
        sample_ix = sample(hprev, inputs[0], 200)
        txt = ''.join(ix_to_char[ix] for ix in sample_ix)
        print ('----\n {} \n----'.format(txt, ))

  # forward seq_length characters through the net and fetch gradient
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    if n % 1000 == 0: print ('iter {}, loss: {}'.format(n, smooth_loss)) # print progress
  
  # perform parameter update with Adagrad
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                  [dWxh, dWhh, dWhy, dbh, dby], 
                                  [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

    p += seq_length # move data pointer

 ndRj'h?wFD  rhH OTRMSgoGwDxytqTgjLoHcS
QSW-d$tN:IYD3$jUFVt
K
iWcpqLOxuNELb.PWv?F'lGAAEyOZLrL.LIYeh
MoYDBo.BAglJTgis Xu;ipo&?3hVok'JDdCtpdTZKp,zKFmIj&BezMrrsgGBHiDaBCjPERz;P.lTxJzI;xwfzxXYKHjkg&?!i?Y;T 

----
iter 0, loss: 12.523162139676613

----


 R s  rCthdriNgldmha  npt eniSAAe iotgoeE srhiLel
 rhiaa,
nW 
hiiK
srdhaebcd yee.R snnorwhIe ectrne wy'a ru ,hwd c
fse etohr qht;ttyt'K anh
E'ol u n
 th dAo.eeotoeei a,ld lacnutu o sc
  reu 
wet    htr 

----
iter 250000, loss: 10.223829549156331

----




  .S nw ,thiEsohnred, o-u tagtOmEo ihre ',aveeBh ahsds no'Wtrl oro ns
tfiehsPsaoduroo
 nhrWisoinl  
 othElw
orrMivtrts dh  rirrHd
snwn't iocnesdt rh yldy g T dliR ode ,Sd
korwt
Reohiia,
e
  aooJo ebtret 
  
----
iter 500000, loss: 10.06532965151112

In [35]:
# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 3 # number of steps to unroll the RNN for
learning_rate = 1e-1

In [36]:
# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

In [37]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0

while True:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
    if p+seq_length+1 >= len(data) or n == 0: 
        hprev = np.zeros((hidden_size,1)) # reset RNN memory
        p = 0 # go from start of data
    inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
    if n % 25000 == 0:
        sample_ix = sample(hprev, inputs[0], 200)
        txt = ''.join(ix_to_char[ix] for ix in sample_ix)
        print ('----\n {} \n----'.format(txt, ))

  # forward seq_length characters through the net and fetch gradient
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    if n % 25000 == 0: print ('iter {}, loss: {}'.format(n, smooth_loss)) # print progress
  
  # perform parameter update with Adagrad
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                  [dWxh, dWhh, dWhy, dbh, dby], 
                                  [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

    p += seq_length # move data pointer
    n += 1 # iteration counter

----
 dPuZaxpW,Uw?:IK-aKIMqTkgnsvcZl'jrdXopY3giwXT3yqMM,aN!PlJbQkFmp:quXx!CBPn3.?,g.lKH
3q FCSfQvHGOf&SJgyt yLMjsrc:i
'kuslUKfcN,EMOZR'YEivblT
oAaroUyQzfXDetrBvb:lmXPioeaqKPc?z;G?q.yUnHfm
meltUFAuLG;gsUQYoG 
----
iter 0, loss: 12.523161697851636
----
  asi'd?

CSI: snotht onws ife leiginges she rim ver er toas wnethet whouk bee der, oi nont
Whe jat nobis ghes bad ther thes iur by as, yobed orom?
Fhorlul, angors-tan,

CRNOS breed to re alvac: the
Na 
----
iter 25000, loss: 6.738765176543091
----
 rmand algo
yo rervey cat them hadke whintAret hespis'nt
we may
Iw fietus fint aime, t'ethes sungre'ns bre tould Codt yuspat, as he vonche geeren the of so lo to weabls
And.

LANU: I lawrowe me carall  
----
iter 50000, loss: 6.61930784197637
----
 uwell cugtitht pracleve maclesalatin hend, 'ol stoog.

Ther couthley frexrithere's nancheb.

QUVENNIZIZHARs anctony
OA Gy deenoann.

QUEd GLORDE:
Racone grouc.
Wemears,
Withee, wiupw thap and not,
Gea 
----
iter 75000, loss: 6.5199665770686615
----
 s

KeyboardInterrupt: 

  wncadr
Aigiin we sen Whorh wund ber freers:
Ye ser yhishe on pet,
Id tord ar an ver neirecerocherorgsn, mich bege wicingrtof; soult'd thuol ro the thand yord ans'kss sil un sonl fort tong path cow,-b 

----
iter 25000, loss: 62.89360021621069

----

 :
Nesuuve, and, And ir peing thy I dove grom, shese
Yo kave en me; fumirh

LEI ISINIU Lo st kied,
Foneam niwk su of theet hil in
I pleey teto o to tharm tis ens

MIU
HAGINUCINISENIUS Thans pey is with 

----
iter 50000, loss: 59.6063652664936

----


 tho bood hare ige unoaly o pathilenes,
: o then!

FLONE:
The, quye wey is shim'd chomi'n it; at the at nhofshens thalle Bren yorde asd
And the fere, at ceas gis that dorers cyot her.
You spit mnt se f 

----
iter 75000, loss: 57.986586951949285

----


e int.
shat, my toll.

BUNETI:
Thap at ut vomes:
yor I hel them in me dow ald bren sellisden you, starngt ef,' con!
Wow Rot sthe duker wy, dovu mupnts'grases, spon:
Pve:
And mand in
Bithan:
Sigome ous 
----
iter 100000, loss: 55.983037858774246