In [27]:
import numpy as np

In [28]:
data = "The Dark Knight is a 2008 superhero film directed, produced, and co-written by Christopher Nolan. Featuring the DC Comics character Batman, the film is the second part of Nolan The Dark Knight Trilogy and a sequel to 2005 Batman Begins, starring an ensemble cast including Christian Bale, Michael Caine,"
chars = list(set(data))
print(chars)
data_size, vocab_size = len(data), len(chars)
# print(data_size)
# print(vocab_size)
print("data has "+str(data_size)+" characters, "+str(vocab_size)+" unique.")

char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }
print(char_to_ix)
print(ix_to_char)

# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

['8', 'F', 'B', 'o', 'a', '0', 'd', ' ', '5', 'D', 'r', 'M', 'p', 'g', 'b', 'u', 't', 'q', 'e', 'k', 'K', 'm', '-', 's', 'N', 'n', 'l', 'C', 'T', ',', 'f', 'i', '2', '.', 'h', 'c', 'y', 'w']
data has 303 characters, 38 unique.
{'8': 0, 'e': 18, 'k': 19, 'K': 20, 'w': 37, ',': 29, 'F': 1, 'm': 21, '-': 22, 's': 23, ' ': 7, 'n': 25, 'B': 2, 'l': 26, 'C': 27, 'T': 28, 'b': 14, 'f': 30, 'D': 9, 'o': 3, 'a': 4, 'h': 34, '0': 5, '2': 32, 'd': 6, '5': 8, 'q': 17, 'r': 10, 'c': 35, '.': 33, 'y': 36, 'M': 11, 'p': 12, 'g': 13, 'i': 31, 'u': 15, 't': 16, 'N': 24}
{0: '8', 1: 'F', 2: 'B', 3: 'o', 4: 'a', 5: '0', 6: 'd', 7: ' ', 8: '5', 9: 'D', 10: 'r', 11: 'M', 12: 'p', 13: 'g', 14: 'b', 15: 'u', 16: 't', 17: 'q', 18: 'e', 19: 'k', 20: 'K', 21: 'm', 22: '-', 23: 's', 24: 'N', 25: 'n', 26: 'l', 27: 'C', 28: 'T', 29: ',', 30: 'f', 31: 'i', 32: '2', 33: '.', 34: 'h', 35: 'c', 36: 'y', 37: 'w'}


In [29]:
def array2text(arr):
    text=""
    for i in arr:
        text+=ix_to_char[i]
    return text

print(array2text([2, 4, 16, 21, 4, 25]))
def text2array(string):
    arr=[]
    for i in string:
        arr.append(char_to_ix[i])
    return arr

print(text2array("Batman"))

Batman
[2, 4, 16, 21, 4, 25]


In [30]:
def lossFun(inputs, targets, hprev):
  """
  inputs,targets are both list of integers.
  hprev is Hx1 array of initial hidden state
  returns the loss, gradients on model parameters, and last hidden state
  """
  xs, hs, ys, ps = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  loss = 0
  # forward pass
  if len(inputs)!=len(targets):
      targets.append(0)
#   print(len(inputs))
#   print(len(targets))
  for t in range(len(inputs)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
#     print("ps")
#     print(ps[t])
#     print("target")
#     print(targets[t])
    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
  # backward pass: compute gradients going backwards
  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  dhnext = np.zeros_like(hs[0])
  for t in reversed(range(len(inputs))):
    dy = np.copy(ps[t])
    dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
    dWhy += np.dot(dy, hs[t].T)
    dby += dy
    dh = np.dot(Why.T, dy) + dhnext # backprop into h
    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
    dbh += dhraw
    dWxh += np.dot(dhraw, xs[t].T)
    dWhh += np.dot(dhraw, hs[t-1].T)
    dhnext = np.dot(Whh.T, dhraw)
  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Why, h) + by
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
while n<=30000:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  if p+seq_length+1 >= len(data) or n == 0: 
    hprev = np.zeros((hidden_size,1)) # reset RNN memory
    p = 0 # go from start of data
  inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
  targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]
#   print(array2text(inputs)+" with length "+str(len(inputs)))
#   print(array2text(targets)+" with length "+str(len(targets)))
  # sample from the model now and then
  if n % 100 == 0:
    sample_ix = sample(hprev, inputs[0], 200)
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    print("----\n "+txt+" \n----")

  # forward seq_length characters through the net and fetch gradient
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001
  if n % 100 == 0: print("iter "+str(n)+", loss: "+str(smooth_loss)) # print progress
  
  # perform parameter update with Adagrad
  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                [dWxh, dWhh, dWhy, dbh, dby], 
                                [mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_length # move data pointer
  n += 1 # iteration counter 

----
 KwhMimhffypiB.scgTaeaiF2rp0,e pfKBdNc82iByoMDmBdmBbD.smwdnh8e-lmftB5fdDpkoy,Nma 2.sohChCt-bggTcoBcltgNoyios2.wad.qhqmqBw0karqdMosubp8nN.ucKmtMltF5mydbbhFl.tBasNtKlN8duwhcrwnmaKqFC8no8u c2l2TmKTq sus s 
----
iter 0, loss: 90.93966070994475
----
 nFahi   s0NhoC phns ioe 0Fh kiue s a sh. henanhF oloC.oobahoeb he.nlah0Fco Nuoseio or hruee,leillh-n reK hkorKannebo.Fr- lF0bcC.Co bClyn.m dehmg nobeo8rooFrpbrooFhfiNloheaFirtil dyu lht heecosorroFbnu 
----
iter 100, loss: 91.95706362902692
----
 throfsfat yi coThh rht emhhrwaTgd n lcnl saylBo mtro g sobtrilhfisdptd0Ntpo m siyph2an 0TNit tDg0 aookrttk o prTna TiFcClgt aihrr0polide isDaghTthnrayinthw BaKardotthhataatmiBctf ngoDgilaohri-. ra5 gN 
----
iter 200, loss: 90.51292623087328
----
 he lathe saalilat ny an nay rtliicabae ihaing d rt Km NiMicaaraBaainm ane, iniB Dg Kninasi t,ctsg B ganiBile o, ahnaNisa r seacs Bmlin Chdiat lalalq an ind arian meal aciMtln2ecd filgnlianBc anaaCgirf 
----
iter 300, loss: 88.12831597042616
----
 n,echoin

----
 turing the DC Comics character Batman Bos Car Batmbhe ansem coling pnsed, produced, anpertemiledupacs, prologht TroluBeg Batm005 Batman Begins atight Trilogy pering Christopher Nolan. Featuring the DC 
----
iter 3400, loss: 10.26101745627979
----
 and p seqgel Batm dice tero f Che DC Comics chark Knight e Bast ofoledins ched co o y anseptipg dire cin Bate Belo cin Bagins, starranight Trilogy and cilm ics Beco-b ad, cht Tfilegy aer Nolan. Featur 
----
iter 3500, loss: 9.475327885515897
----
 he Dark Knighw d, and ao b tae clel Batman, the sed, and paitegy Che fitm gy thiithe film file ins, stmile cast blelas Begins, starring ansed Fhatualmgis Balanseattophegseronf Nolang Christopher Nolan 
----
iter 3600, loss: 8.755975247782096
----
 turing the DC Comics chailcind, starre, Michael Caincluding Christopher Nolan. Featu 20rilm dsrilm d, Dhwrmani C ins, superiBactahl tero 200hrinpen by Chan by Christopher Nolan. FeasucCarr Nolaan Bile 
----
iter 3700, loss: 8.543857001504987
----
 an

----
 turing the DC Comwd, tha DC Comics character Batman, the film tm led, an by Chrittedseastcristae ceat istgrisstopaeg Che stm st inc ud, and co-written The Dark Knight Trilogy and a sequel to 2005 Batm 
----
iter 6700, loss: 1.3628200260272834
----
 and a sequel to 2005 Batman Bete, Michael Cain Chring Christopher Nolan. Featuring the DC gopg ictecter dg patl Knight Trilogy and acsed MichaelcCaranieht TroChb csed co aele, Michael CainDKnsengleger 
----
iter 6800, loss: 1.2900932480526601
----
 he Dark Knight is a 2008 superhero film directed, produced, and co-written by Christopher Nolan. Featuringcthrring ansemolh if Nolinigy Christopher Nolan. Fea co-written by Christopher Nolan. Featurin 
----
iter 6900, loss: 1.2229811308020202
----
 turing the DC Nolan The Dark Knight odituring and a 2008 superhero film directed, producedich cnperdem lo isng n srk Knight is ael Cherochart risttod film herang the sequel to 2005 Batman Begins, star 
----
iter 7000, loss: 1.1614449882709643
----

----
 turing the DC Car chero fing en ensed, ploduced, and co-written by Christopher Nolan. Featuring the DC Comics character Batman, the film is the second part of Nolan The Dark Knight Trilogy and a seque 
----
iter 10000, loss: 0.8078724819890546
----
 and a sequel to 2005 Batman Begins, starring an ensecsepblailatk Christian Bale, Michael CainTgad, and paen Nfileguratucled t el Co toef Nolae The Dark Knira cast including Christian Bale00eloghe DC C 
----
iter 10100, loss: 0.7705500292345457
----
 he Dark Knight irer Nolan. Featuring Ferted, profhemdin in by Christarheristin cChristian Bale, Michaen en by Christaan Ban Bale, Michae, Che figs an ensemble cast including Christian Bale, Michael Ca 
----
iter 10200, loss: 0.7361386899331522
----
 turing the DC Comics character Batman, the film is the second part of Nole, by Chructhirt an in Nolan The Dark Knight Trilogy and a semocharacter Batman, the film is the second part of Nolan The Dark  
----
iter 10300, loss: 0.7043096889576222


----
 turing the DC Comics character  f Christopropt ats, atm i5 the peri g an ensemble cast including Christian Bale, Michael Caind coptoe cepbqu to gopten by Christian Bale, Michael Cain by Christopher No 
----
iter 13300, loss: 0.3119749948622863
----
 and a sequel to 2005 Batman Begins, starring an ensemble cast including Christian Bale, Michael Cain  film dbdirected, produced, and co-written by Christopher Nolan. Featuring the DC Comics character  
----
iter 13400, loss: 0.30758311111744696
----
 he Dark Kniguded, asttea cturand topt in ins, sudepisht at inight thel istaan Tht Trilathe seq el tail Coilckco-wrist ogilaturing the DC Co 2008 20cs clatuan, Knien Ndlanigy Christian Bariat  C in ini 
----
iter 13500, loss: 1.2817375772804886
----
 turing the DC Comics char200ng Christopher Nolaup sn by Chrilaturing and ansequel to 2005 Batman Begins, starring an in thilugemter Nolan The Dark Knight is a stmate, Nola tark Featuring ansepdinstoph 
----
iter 13600, loss: 1.7806403983007322

----
 turing the DC Comics character Batman, the film is the second part of Nolan The Dark Knight Trilogy and a sequel to 2005 Batman Begins, starring an ensemble cast including Christian Bale, Michael Cain 
----
iter 16600, loss: 0.3360562769961733
----
 and a sequel to 2005 Batman Beginmble cast inclatm sucing an ensemble cast including Christian Bale, Michael Cain  to f Co 2005 BTto heithe film he thrincran  filo 00cto sequel to 2005 Batmrittopaen b 
----
iter 16700, loss: 0.32531462100544367
----
 he Dark Knight is a 2008 superhero film d, tud film isristo sblaitois Beconien, a p0rist Baco dins ansea Nolan The Dark Knight Trilogy and a sequel to 208o to 2005 Batman Begins, starring an ensemble  
----
iter 16800, loss: 0.3154314006684361
----
 turing the DC Comics Begsesthe second park Knight is a 2008 superhero film directed, produced, and co-written by Christopher Nolan. Featuding ang f Misuco binseast by Christopher Nolan. Featuring the  
----
iter 16900, loss: 0.3063387795890842

----
 turing the DC Comics character Batman, the film is the second part of Nolan The Dark Knight Trilogy and a sequel to 2005 Batman Begins, starring an ensemble cast including Christian Bale, Michael Cain 
----
iter 19900, loss: 0.18523328235742365
----
 and a sequel to 2005 Batman Beto film directed, produced, rrd, s parhan cC in gy and a sequMichediem terandequsthring an ensemble can Bele film directed, produced, and co-written by Christopher Nolan. 
----
iter 20000, loss: 0.18335746925102026
----
 he Dark Knight is a 2008 superhero film directed, produced, and co-written by Christopher Nolan. Featuring the DC Comics character Batman, the film is the second part of Nolan The Dark Knight Trilogy  
----
iter 20100, loss: 0.18158859279895473
----
 turing the DC Comics character Batman, the film is the second part of Nolan The Dark Knight cd i perten by Christopher Nolan. Featuring the DC Comics character Batman, the film is ang Co costopher Nol 
----
iter 20200, loss: 0.17993227660031

----
 turing the DC Comics character Batman, the film is the second part of Nolan The Dark Che DC Comics character Batman, the film is the second part of Nolan The Dark Knight Trilogy and a sequel to 2005 B 
----
iter 23200, loss: 0.14238024744682637
----
 and a 2008 tar Bnseqfilm ofilht Trodudecsn Nolan. Featuring the DCilo, M, and co d p Bele film isstur Batm d, the DC Comics charafist an by Christopter Nolan. Featuctut ct ilmgy and a sequel to 2005 B 
----
iter 23300, loss: 0.8462519645531221
----
 he Dark Knight ed anien Chr cter Ched,is the second parhark Knight is and Christian Bale, Mhaist en Nolan The Dark Knight Trilogy and a sequel to 2005 The second pan. Featuring the sectae file, Michae 
----
iter 23400, loss: 1.6713580627599507
----
 turing the DC Comics character Batman, the film is the second part el the staa2008 an by Chrinceacsins on by Christopher Nolan. Featuring the DC C st ifelogy and co-written Batman cy phe film is the s 
----
iter 23500, loss: 1.7116247320053688

----
 turing the DC Comics character Batman, the film is the second part of Nolan The Dark Knight Trilogy and a sequel to 2005 Batman Begins, starring an ensemble cast including Christian Bale, Michael Cain 
----
iter 26500, loss: 0.24585155832895403
----
 and a sequel to 2005 Batman Begins, starrong pgind, and co-written by Christopher Nolan. Featuring the DC Comics character Batman, the film is the second part of Nolan cChe seco-istirhtmel ding Che DC 
----
iter 26600, loss: 0.2365340340273152
----
 he Dark Knight is a 2008 superhero film directed, produced, and co-written by Christopher Nolan. Featuring the DC Comics character Batman, the film is the second part of Nolan The Dark Knight Trilogy  
----
iter 26700, loss: 0.22801222644575622
----
 turing the DC Comics character Batman, the film is the second part of Nolan The Dark Knight Trilogy and a sequel to 2005 Batman Begins, starring an ia Nolan The Dark Knight Trilogy and a sequel to 200 
----
iter 26800, loss: 0.220240089289094

----
 turing the DC Comics character Batman, the film is the second part of Nolan The Dark Knight Trilogy and a sequel to 2005 Batman Begins, starring an ensemble cast including Christian Bale, Michael Cain 
----
iter 29800, loss: 0.12763512987346876
----
 and a sequel to 2005 Batman Begins, starring an ensemble cast including Christian Bale, Michael Cain Nole, Tha can. Featuring the DC Comics character Batman, the film is the second part of Nolan The D 
----
iter 29900, loss: 0.12637155822653612
----
 he Dark Knight is a 2008 superhero film directed, produced, and co-written by Christopher Nolan. Featuring the DC Co 2005 Batman Begins, starring an ensemble cast including Christian Bale, Michael Cai 
----
iter 30000, loss: 0.12518681463236475


In [34]:
def predict(inputs,hprev):
#   print(inputs)
  xs, hs, ys, ps = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  predicted=-1
  for t in range(len(inputs)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t])
    predicted = np.argmax(ps[t])
#     print(predicted)
  print(ix_to_char[predicted])

In [40]:
input = "Batma" ##Batman ##[18,37,34,36] ##Nola
hprev = np.zeros((hidden_size,1))
predict(text2array(input),hprev)

n
