# RNN Tutorial
from https://karpathy.github.io/2015/05/21/rnn-effectiveness/

with Minimal character-level language model from RNN from https://gist.github.com/karpathy/d4dee566867f8291f086

### hint... it's all about sequence data...
* Input is a single sample in a sequence
* History is maintainede as state h in the RNN block.
* Input can be (is commonly) a 1-hot vector where each element is one of the possible input values (characters or words)
* The input could also be non-sequence - i.e. an image - but is presented to the model in a sequence (say a sequence of patches).



"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""

In [22]:
import numpy as np

In [23]:
# data I/O
data = open('15_rnn_input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 471 characters, 47 unique.


In [24]:
# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

In [25]:
# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

In [26]:
def lossFun(inputs, targets, hprev):
  """
  inputs,targets are both list of integers.
  hprev is Hx1 array of initial hidden state
  returns the loss, gradients on model parameters, and last hidden state
  """
  xs, hs, ys, ps = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  loss = 0
  # forward pass
  for t in range(len(inputs)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
  # backward pass: compute gradients going backwards
  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  dhnext = np.zeros_like(hs[0])
  for t in reversed(range(len(inputs))):
    dy = np.copy(ps[t])
    dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
    dWhy += np.dot(dy, hs[t].T)
    dby += dy
    dh = np.dot(Why.T, dy) + dhnext # backprop into h
    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
    dbh += dhraw
    dWxh += np.dot(dhraw, xs[t].T)
    dWhh += np.dot(dhraw, hs[t-1].T)
    dhnext = np.dot(Whh.T, dhraw)
  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

In [27]:
def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Why, h) + by
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

In [28]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
while True:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  if p+seq_length+1 >= len(data) or n == 0: 
    hprev = np.zeros((hidden_size,1)) # reset RNN memory
    p = 0 # go from start of data
  inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
  targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
  if n % 100 == 0:
    sample_ix = sample(hprev, inputs[0], 200)
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    print('----\n %s \n----' % (txt, ))

  # forward seq_length characters through the net and fetch gradient
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001
  if n % 100 == 0: print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
  
  # perform parameter update with Adagrad
  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                [dWxh, dWhh, dWhy, dbh, dby], 
                                [mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_length # move data pointer
  n += 1 # iteration counter 

----
 HuATWsKWhHwtLvaSDlLodr mGdSdnpaCiiOTrPdNJRtWFSxDKymaneSsRtDHbepmraY
CnOGGcJYwJWlPreWUxestpWJchVWWkOTSJrfTeIpevpoNHRFUUvJkcN YnWsVxTPeFgweyokLeCxwbeMPlTDYWNcm kzvge awyrMoPintFGVPAPFcraHvHukiRzgmRDHOtf 
----
iter 0, loss: 96.253696
----
 mbina
naltsinat PiavJaiCMsatscvA
cYastiaaNSR
mMh
YI
twlpkwvariaepsmIwVwmaho
sinhlnpsihkaahtvDVei
p
seosornesexbDashAaads
pidtonsassoK
nnsiht
sattpNnioai
IvVmrotpaui
pIaeps
asMtasaaosNiYlibaarOopaiPc
a 
----
iter 100, loss: 97.033213
----
 a
HaYaAisaaa
aofkr
Uv
CnA
elCr
iaLnCfoaliehDhaiaaCazImlrsaoszo
V
oxaoaNaovaAdaadrayrbSaskoaloaUSllaaokmeVaUowimaDrxIplopxon
anbwolanw
oayathofsrl
VkMwa
Mfkoao
a
aAWrAieaooknrlamfool
aplrlI
aIra

saoaa 
----
iter 200, loss: 95.599687
----
 c trda
CnoaooaahawinocNouWe
NoU
sanaae

AYDsllte
MatneinsiJaisA
oeFtaeea
CerNhkokkNtfn
imaoCoomeiairDa lsaCoanaic
keAuooy
a
LoapelAiiDa
os OaroosMenrayriuNb Noa
kuksoniwaarhs
JiwnaoeiDac
t
gaAaNoanirt 
----
iter 300, loss: 94.073126
----
 r
eontahtelenex
MnoITorsapeerrh
ieGrwhe


----
 sissippi
Missouri
Montona
South Dakota
Ohio
Oklahoma
Oregon
Penns
Mantana
Kentetts
Mickly
Ohio
Okla
Rhod
MannWes
Califolsia
Colorado
Connecticut
ttah
Vermont
Virginia
Washington
West Virginiio
Cons
Ma 
----
iter 3500, loss: 10.960618
----
 labama
Orka
Ka
Ne
rew Jersey
New Mexico
New York
North Carolina
North Dakota
Oeio
Oklaho
Illinota
Ida
Colida
North Dakota
Ohio
Oklahoma
Oregon
Pennsolvaniasippi
MachontanaNebraska
Nevada
New Hampshire 
----
iter 3600, loss: 10.102382
----
 hire
New Jerse Dakois
IndiTnd
Howy
i
Ida
Co
Connesa
Oain
North Carolina
South Dakota
Tennessee
Kaska
Nevada
New Hampshire
New Jero
ConnecttV
Otea
NexicorIey
New Mexico
New la
TeDmo
TentanaNe
Flore
Fla 
----
iter 3700, loss: 9.314008
----
 
Connecticuch CarSolia
Michigan
North Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginicgia
Otliu
North Carolina
North Dakota
Alind
Massacorth Carolina
South Dakataan
Soutty
New Utasid
Westic 
----
iter 3800, loss: 8.604887
----
 
North Dakota
Pennesset
Texasylia
So

----
 exas
Utah
Vermoni
Virginia
Washington
West Virgini
North Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
West Virgini
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washingt 
----
iter 7000, loss: 1.052973
----
 sissippi
Missouri
MontanaNebraska
Nebraska
Nevada
New York
North Carolina
North Dakoushishin
Virnia
Colorado
Connecticut
Delaware
Florida
Georgia
Hawasi
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Lou 
----
iter 7100, loss: 1.007016
----
 labama
Alaska
Vrizona
Arkansas
Califor
Colona
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washidahoma
Oregon
Pennsylvania
Rhode Island
Soutsholina
North Carolina
South Dakota
Tennessee
Texas
Ut 
----
iter 7200, loss: 0.965190
----
 hire
New Jersey
New Mexico
New York
North Carolina
MichigMisoingatis
California
Colorado
Connda
Hawaii
Idaho
Illinois
Indiana
North Carolina
Hawain
Ilizona
Arkansas
California
Colorado
Connecticut
D l 
----
iter 7300, loss: 0.926056
----
 
Connenticut
Ne Ihnd
Marton
West Virgi

----
 aine
Maryland
Massechum
Nebransas
Kentucky
Louisihna
Minnsyia
Tennessee
Texas
Utah
Vermont
Virginia
Washington
Nebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina
South Dakota
 
----
iter 10500, loss: 0.420823
----
 exas
Utah
Vermont
Virginia
Washington
West Virginiio
Connecticut
Delaware
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Mararebrahonne
Maryland
Maizork
asy
Mama
Or 
----
iter 10600, loss: 0.415475
----
 sissippi
Missouri
MontanaNebraska
Nevada
New Hampshire
Nexica
New Hampshire
New Jersey
New Mexico
New York
North Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
Nor 
----
iter 10700, loss: 0.409066
----
 labama
Alaska
Arizona
Arkansas
California
Colorado
Connecticut
Delaware
Flarida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mis 
----
iter 10800, loss: 0.404115
----
 hire
New Jersey
New Mexida
New Han

----
 ode Island
South Carolina
North Dakota
Ohio
North Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
North Dakotgna
North Carolina
North Dakota
Tennesset
Texasoris
Ken 
----
iter 14000, loss: 0.294655
----
 aine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
MontanaNebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virgini 
----
iter 14100, loss: 0.292363
----
 exas
Utah
Vermont
Virginia
Washingtan
West Virginiio
Connes
California
Coloradoni
Ida
Colina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
West Virgini
North Carolina
Sough Dakota
Tenn 
----
iter 14200, loss: 0.290808
----
 sissippi
Missouri
Monaea
Connecticut
Delaware
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
MontanaN 
----
iter 14300, loss: 0.288013
----
 labama
Alaska
Arizona
Arkansas
Cal

----
 
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
MontanaNebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina 
----
iter 17500, loss: 0.237260
----
 ode Island
South Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
West Virgicigis
Kentucky
Louisiana
Ma 
----
iter 17600, loss: 0.236080
----
 aine
Maryland
Massachusetts
Michiann
Alana
Hawa
New Jersey
New Mexico
New York
North Carolina
North Dakota
ka
Manne
Markaah
Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South C 
----
iter 17700, loss: 0.234833
----
 exas
Uthh
orasiania
Nebreskppi
Massichinsis
Nela
Ilahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
North Dakota
Ohio
Ok 
----
iter 17800, loss: 0.234238
----
 sissippi
Missouri
MontanaNebraska


----
 
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virgini 
----
iter 21000, loss: 0.203522
----
 
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
MontanaNebraska
Nevada
New Hampshire
New Jersey
Neeurssiania
Rhode Island
South Da 
----
iter 21100, loss: 0.202637
----
 ode Island
Iontelia
Washixaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
MarVdefornia
Colorado
Connecticut
Delaware
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisi 
----
iter 21200, loss: 0.201783
----
 aine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Mississippi
Missouri
MontanaNebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina
South Dakota
Tennessee
Texas
Utah
Ver 
----
iter 21300, loss: 0.200994
----
 exas
Utah
Vermont
Iowa
Me Minras
K

----
 
Connecticut
Delaware
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
MontanaNebraska
Nevada
New Hamps 
----
iter 24500, loss: 0.181396
----
 
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
West Virginiio
Connecticut
Delaware
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachus 
----
iter 24600, loss: 0.180895
----
 
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
MontanaNebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina 
----
iter 24700, loss: 0.180255
----
 ode Island
South Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
West Virgini
North Carolina
North Dak 
----
iter 24800, loss: 0.179654
----
 aine
Maryland
Massachusetts
Michig

----
 hire
New Jersey
New Mexico
New York
North Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina 
----
iter 28000, loss: 0.165306
----
 
Connecticut
Delaware
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
MontanaNebraska
Nevada
New Hamps 
----
iter 28100, loss: 0.165488
----
 
North Dakota
Ohio
Oklaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
MontanaNebraska
Nevada
New Hampshire
New Jersey
New Mexic 
----
iter 28200, loss: 0.165118
----
 
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
MontanaNebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina 
----
iter 28300, loss: 0.164628
----
 ode Island
South Carolina
South Da

KeyboardInterrupt: 