## The LSTM (RNN) Interface
RNN / LSTM / GRU follow the same interface. We have a “builder” which is in charge of creating definining the parameters for the sequence.

In [48]:
import dynet as dy

In [49]:
pc = dy.ParameterCollection()
NUM_LAYERS=2
INPUT_DIM=50
HIDDEN_DIM=10


In [62]:
from dynet_viz import *
print_graphviz()

digraph G {
  rankdir=BT;
  nodesep=.05;
}


## LSTM

In [63]:
builder = dy.LSTMBuilder(NUM_LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
# or:
# builder = dy.SimpleRNNBuilder(NUM_LAYERS, INPUT_DIM, HIDDEN_DIM, pc)

In [64]:
s0 = builder.initial_state()

In [65]:
x1 = dy.vecInput(INPUT_DIM)

In [66]:
s1=s0.add_input(x1)
y1 = s1.output()
# here, we add x1 to the RNN, and the output we get from the top is y (a HIDEN_DIM-dim vector)

In [67]:
y1.npvalue().shape

(10,)

In [68]:
s2=s1.add_input(x1) # we can add another input
y2=s2.output()

In [69]:
y2.npvalue().shape

(10,)

In [70]:
print(s2.h())

(expression 463/960, expression 478/960)


In [73]:
print_graphviz()

digraph G {
  rankdir=BT;
  nodesep=.05;
}


## RNN

In [24]:
# create a simple rnn builder
rnnbuilder=dy.SimpleRNNBuilder(NUM_LAYERS, INPUT_DIM, HIDDEN_DIM, pc)

# initialize a new graph, and a new sequence
rs0 = rnnbuilder.initial_state()

# add inputs
rs1 = rs0.add_input(x1)
ry1 = rs1.output()
print("all layers:", rs1.h())

all layers: (expression 80/0, expression 82/0)


In [25]:
print(rs1.s())

(expression 80/0, expression 82/0)


In [27]:
# for RNN, h and s are the same
rnn_h  = rs1.h()
rnn_s  = rs1.s()
print("RNN h:", rnn_h)
print("RNN s:", rnn_s)

# for LSTM s is memory cells plus h
lstm_h = s1.h()
lstm_s = s1.s()
print("LSTM h:", lstm_h)
print("LSTM s:", lstm_s)

RNN h: (expression 80/0, expression 82/0)
RNN s: (expression 80/0, expression 82/0)
LSTM h: (expression 19/0, expression 32/0)
LSTM s: (expression 17/0, expression 30/0, expression 19/0, expression 32/0)


In [74]:
print_graphviz()

digraph G {
  rankdir=BT;
  nodesep=.05;
}


## More than RNN/LSTM API

In [28]:

s2=s1.add_input(x1)
s3=s2.add_input(x1)
s4=s3.add_input(x1)

# let's continue s3 with a new input.
s5=s3.add_input(x1)

# we now have two different sequences:
# s0,s1,s2,s3,s4
# s0,s1,s2,s3,s5
# the two sequences share parameters.

assert(s5.prev() == s3)
assert(s4.prev() == s3)

s6=s3.prev().add_input(x1)
# we now have an additional sequence:
# s0,s1,s2,s6

In [29]:
print(s6.h())
print(s6.s())

(expression 217/0, expression 232/0)
(expression 215/0, expression 230/0, expression 217/0, expression 232/0)


In [31]:
state = rnnbuilder.initial_state()
xs = [x1,x1,x1]
states = state.add_inputs(xs)
outputs = [s.output() for s in states]
hs =      [s.h() for s in states]
print(outputs)
print(hs)

[expression 236/0, expression 240/0, expression 244/0]
[(expression 234/0, expression 236/0), (expression 238/0, expression 240/0), (expression 242/0, expression 244/0)]


In [32]:
state = rnnbuilder.initial_state()
xs = [x1,x1,x1]
outputs = state.transduce(xs)
print(outputs)

[expression 248/0, expression 252/0, expression 256/0]


## Character-level LSTM

In [33]:
import random
from collections import defaultdict
from itertools import count
import sys

LAYERS = 2
INPUT_DIM = 50
HIDDEN_DIM = 50

characters = list("abcdefghijklmnopqrstuvwxyz ")
characters.append("<EOS>")

int2char = list(characters)
char2int = {c:i for i,c in enumerate(characters)}

VOCAB_SIZE = len(characters)

In [35]:
char2int

{' ': 26,
 '<EOS>': 27,
 'a': 0,
 'b': 1,
 'c': 2,
 'd': 3,
 'e': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18,
 't': 19,
 'u': 20,
 'v': 21,
 'w': 22,
 'x': 23,
 'y': 24,
 'z': 25}

In [36]:
pc = dy.ParameterCollection()

In [37]:
rnn = dy.SimpleRNNBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
lstm = dy.LSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc)

In [38]:
params = {}
params["lookup"] = pc.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM))
params["R"] = pc.add_parameters((VOCAB_SIZE, HIDDEN_DIM))
params["bias"] = pc.add_parameters((VOCAB_SIZE))

In [39]:
# return compute loss of RNN for one sentence
def do_one_sentence(rnn, sentence):
    # setup the sentence
    dy.renew_cg()
    s0 = rnn.initial_state()


    R = dy.parameter(params["R"])
    bias = dy.parameter(params["bias"])
    lookup = params["lookup"]
    sentence = ["<EOS>"] + list(sentence) + ["<EOS>"]
    sentence = [char2int[c] for c in sentence]
    s = s0
    loss = []
    for char,next_char in zip(sentence,sentence[1:]):
        s = s.add_input(lookup[char])
        probs = dy.softmax(R*s.output() + bias)
        loss.append( -dy.log(dy.pick(probs,next_char)) )
    loss = dy.esum(loss) # element wise sum
    return loss

In [40]:
# generate from model:
def generate(rnn):
    def sample(probs):
        rnd = random.random()
        for i,p in enumerate(probs):
            rnd -= p
            if rnd <= 0: break
        return i

    # setup the sentence
    dy.renew_cg()
    s0 = rnn.initial_state()

    R = dy.parameter(params["R"])
    bias = dy.parameter(params["bias"])
    lookup = params["lookup"]

    s = s0.add_input(lookup[char2int["<EOS>"]])
    out=[]
    while True:
        probs = dy.softmax(R*s.output() + bias)
        probs = probs.vec_value()
        next_char = sample(probs)
        out.append(int2char[next_char])
        if out[-1] == "<EOS>": break
        s = s.add_input(lookup[next_char])
    return "".join(out[:-1]) # strip the <EOS>

In [42]:
# train, and generate every 5 samples
def train(rnn, sentence):
    trainer = dy.SimpleSGDTrainer(pc)
    for i in range(200):
        loss = do_one_sentence(rnn, sentence)
        loss_value = loss.value()
        loss.backward()
        trainer.update()
        if i % 5 == 0:
            print('loss value: {}'.format(loss_value))
            print(generate(rnn))

### train with rnn

In [43]:
sentence = "a quick brown fox jumped over the lazy dog"
train(rnn, sentence) 

loss value: 156.53305053710938
obvkwqrwbcumlcx
loss value: 103.2486572265625
cv  l nqhtxujwhmonqbniet whrxjuavex cyf  mesa c fpo fdpprqdgib  webjvmfzhps zviveuav wkvxp dinkwlc uy bipmr gtt rgpmwateum
loss value: 73.2106704711914
usl
loss value: 44.31501388549805
aw  ajg ivesawuligv uzbpnjrkpynfrh
loss value: 25.60333824157715
o 
loss value: 13.611724853515625
wkyuipx brojn loh buote yohwn moe juiped ovpr ooejramte ovf d z edcoxen ovxx ocz  oewc ohg ruwy d ivld over the lazy dov
loss value: 5.072129249572754
z qumckdbdhwn fou jsmped over twe lazy dou
loss value: 1.6574616432189941
a lnick bronn fox jumped over the lazy doe
loss value: 0.809463381767273
a quick brown fox jumped over the lazy dog
loss value: 0.5666172504425049
a quick brown fox jumped over the lazy dog
loss value: 0.4357306957244873
a quick brown fox jumped over the lazy dog
loss value: 0.353680282831192
a quick brown fox jumped over the lazy dog
loss value: 0.2974169850349426
a quick brown fox jumped over the lazy dog
lo

### train with lstm

In [45]:
sentence = "a quick brown fox jumped over the lazy dog"
train(lstm, sentence)

loss value: 21.115440368652344
a quicb kownx fox mmpmed overr tllzyy oag
loss value: 19.564010620117188
 qqiik bronn o ox uumped overrrr tez lzay od
loss value: 17.545866012573242
 qqucck rronn ffox muped over the hllyyy dogg
loss value: 16.65489959716797
 iukkk boww fox jmmmpdd oveert taaay yy oggg 
loss value: 15.41010570526123
 quck brown fox jumee ovvr te llzy dog
loss value: 14.564728736877441
b  uiicb rrowrn foxj juuppddo over thh lyzz doo
loss value: 12.651235580444336
a juick bronn jox mmmpe over tt llye od
loss value: 11.91175651550293
 quccck brownn fon jpmeedd over the lzzy dogg 
loss value: 10.690475463867188
a qucc bbown fox jjmpdd ovrr the llyz og
loss value: 9.658659934997559
aq iick brown fox jummed ovve tle ayz dog
loss value: 8.690759658813477
 uucik brown fox mumpedd over hhe lazy dogg
loss value: 7.7903828620910645
 quic bown fox jmmpe oov ee tee llzy dog
loss value: 6.987741947174072
a quuikk bbrown ox jjmpe over hhe laaz do
loss value: 6.039589881896973
a qiuck br

In [47]:
another_sentence = 'these pretzels are making me thirsty'
train(rnn, another_sentence)

loss value: 364.0517883300781
a quick brown fox jumped over the lazy dog
loss value: 115.98564910888672
a quick brown fox jumped over the jazy dog
loss value: 35.31338119506836
a quick ba wne zhe hazi dvg lazt tog oa jumck baown moe ma tuuck ba wnicke eaetktak woe
loss value: 5.6694207191467285
thes  van mce mazr tharqqhadquick baare maernthseetaayldog jazrnfmped therlthy maz
loss value: 1.7543278932571411
these pretzees ahe making me thirsty
loss value: 0.2038564383983612
ahe making me thirsty
loss value: 0.1282159388065338
these phetzels are making me thirsty
loss value: 0.0984501913189888
these pretzels are making me thirsty
loss value: 0.08078761398792267
these pretzels are making me thirsty
loss value: 0.0689062550663948
these pretzels are making me thirsty
loss value: 0.06029056757688522
these pretzels are making me thirsty
loss value: 0.05371999740600586
these pretzels are making me thirsty
loss value: 0.04852164536714554
these pretzels are making me thirsty
loss value: 0.044297