In [1]:
import dynet as dy

In [2]:
import random
from collections import defaultdict
from itertools import count
import sys

LAYERS = 2
INPUT_DIM = 40
HIDDEN_DIM = 50

characters = list("abcdefghijklmnopqrstuvwxyz ")
characters.append("<EOS>")

int2char = list(characters)
char2int = {c:i for i,c in enumerate(characters)}

VOCAB_SIZE = len(characters)

In [3]:
pc = dy.ParameterCollection()

In [13]:
dy.__version__

2.0

In [5]:
params = {}
params["lookup"] = pc.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM))
params["R"] = pc.add_parameters((VOCAB_SIZE, HIDDEN_DIM))
params["bias"] = pc.add_parameters((VOCAB_SIZE))

In [6]:
# return compute loss of RNN for one sentence
def do_one_sentence(rnn, sentence):
    # setup the sentence
    dy.renew_cg()
    s0 = rnn.initial_state()

    R = dy.parameter(params["R"])
    bias = dy.parameter(params["bias"])
    lookup = params["lookup"]
    
    sentence = ["<EOS>"] + list(sentence) + ["<EOS>"]
    sentence = [char2int[c] for c in sentence]
    s = s0
    loss = []
    for char,next_char in zip(sentence,sentence[1:]):
        s = s.add_input(lookup[char])
        probs = dy.softmax(R*s.output() + bias)
        loss.append( -dy.log(dy.pick(probs,next_char)) )
    loss = dy.esum(loss) # element wise sum
    return loss

In [7]:
# generate from model:
def generate(rnn):
    def sample(probs):
        rnd = random.random()
        for i,p in enumerate(probs):
            rnd -= p
            if rnd <= 0:
                return i
        
    # setup the sentence
    dy.renew_cg()
    s0 = rnn.initial_state()

    R = dy.parameter(params["R"])
    bias = dy.parameter(params["bias"])
    lookup = params["lookup"]

    s = s0.add_input(lookup[char2int["<EOS>"]])
    out=[]
    while True:
        probs = dy.softmax(R*s.output() + bias)
        probs = probs.vec_value()
        next_char = sample(probs)
        out.append(int2char[next_char])
        if out[-1] == "<EOS>": break
        s = s.add_input(lookup[next_char])
    return "".join(out[:-1]) # strip the <EOS>

In [8]:
# train, and generate every 5 samples
def train(rnn, sentence):
    trainer = dy.SimpleSGDTrainer(pc)
    for i in range(200):
        loss = do_one_sentence(rnn, sentence)
        loss_value = loss.value()
        loss.backward()
        trainer.update()
        if i % 5 == 0:
            print('loss value: {}'.format(loss_value))
            print(generate(rnn))

### single step

In [14]:
dy.renew_cg()
s0 = rnn.initial_state()

R = dy.parameter(params["R"])
bias = dy.parameter(params["bias"])
lookup = params["lookup"]

sentence = "a quick brown fox jumped over the lazy dog"
sentence = ["<EOS>"] + list(sentence) + ["<EOS>"]
sentence = [char2int[c] for c in sentence]
s = s0
loss = []
for char,next_char in zip(sentence,sentence[1:]):
    s = s.add_input(lookup[char])
    probs = dy.softmax(R*s.output() + bias) # the probability of each character
    loss.append( -dy.log(dy.pick(probs,next_char)) )
loss = dy.esum(loss) # element wise sum


In [138]:
trainer = dy.SimpleSGDTrainer(pc)
loss_value = loss.value() # forward
loss.backward()
trainer.update()

print('loss value: {}'.format(loss_value))

loss value: 113.17980194091797


In [139]:
generate(rnn)

'qjijai'

### train with rnn

In [28]:
[x for x in dir(dy) if 'RNN' in x]

['BiRNNBuilder', 'RNNState', 'SimpleRNNBuilder', 'StackedRNNState']

In [9]:
rnn = dy.SimpleRNNBuilder(1, INPUT_DIM, HIDDEN_DIM, pc)
#rnn = dy.BiRNNBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc, dy.SimpleRNNBuilder)
sentence = "a quick brown fox jumped over the lazy dog"
train(rnn, sentence) 

loss value: 145.73895263671875
glreyuj osr lgd 
loss value: 98.66122436523438
uvdeoa m pglqotaxqzew f wx k lkzyhpe dxm  gcua  eyreq wk e ixhgnfq x rnvz qirddeo
loss value: 72.92253112792969
fcwgjnq pegrm kekugoa ijzpdlcnoxp mmf  qyacq bhehllhwhyducu kdnv
loss value: 44.650638580322266
qxohd bhe bax a gz
loss value: 24.293292999267578
j qudckzarheizg sukckdv tlo la q  bzgwrs eeu pxe luigr qrjah  d g
loss value: 11.510405540466309
w dogck kogrnt bu pog ooer the soze doghn  buphe otxd fver x juer ehn wwn ougckbwokyrawv npea
loss value: 4.789723873138428
a quick brodnroox vem
loss value: 1.5549122095108032
a quick broyn fox jumped over the lazy dog
loss value: 0.8983383178710938
a quick brown fox jumped over the lazy dog
loss value: 0.637705385684967
a quick brown fox jumped over the lazy dog
loss value: 0.4936693608760834
a quick brown fow dumpes over the lazy dog
loss value: 0.40227362513542175
a quick brown fox jumped over the lazy dog
loss value: 0.3391484320163727
a quick brown fox ju

### train with lstm

In [19]:
[x for x in dir(dy) if 'LSTM' in x]

['CompactVanillaLSTMBuilder',
 'CoupledLSTMBuilder',
 'FastLSTMBuilder',
 'LSTMBuilder',
 'VanillaLSTMBuilder']

In [20]:
# lstm = dy.LSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
# lstm = dy.VanillaLSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
# lstm = dy.CompactVanillaLSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
# lstm = dy.FastLSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
lstm = dy.CoupledLSTMBuilder(1, INPUT_DIM, HIDDEN_DIM, pc)

In [11]:
sentence = "a quick brown fox jumped over the lazy dog"
train(lstm, sentence)

loss value: 139.07766723632812
fu xcrzchsxltyz sj ddgdphggugmdr gd aqzv rxkz qud zluqani brv ifndacq cznxmwyke lnad
loss value: 115.39275360107422
wrw ms zudue l ohva fr owmsxt
loss value: 94.32356262207031
mex uha doe do ro xzcpen uyowpwiowmytugx h
loss value: 69.53958129882812
k lazi de hy ow rzh uegan dowue oemte ogba  iumowy sr 
loss value: 48.09353256225586
bpjjkr dowwu quihed bower fove theer box xqllad toe thed fox dogn lroebr thgjea thkd wuck dow xrow
loss value: 30.93006706237793
n lof fox luc brox jumk
loss value: 18.736249923706055
a quiuyc bown fox jumped oog
loss value: 9.733725547790527
a impe dowa fox jumpe dog
loss value: 4.982993125915527
a quicck  og
loss value: 2.435997247695923
a quick brown fox jumped over the lazy dog
loss value: 1.6407968997955322
a quick brown fox jumped over the lazy dog
loss value: 1.227597951889038
a lazy dog
loss value: 0.9757919311523438
a quick drown fox jumped over the lazy dog
loss value: 0.8070783615112305
a quick brown fox jumped over 

In [13]:
#another_sentence = 'these pretzels are making me thirsty'
#train(rnn, another_sentence)

## Gated Recurrent unit

In [12]:
gru = dy.GRUBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
sentence = "a quick brown fox jumped over the lazy dog"
train(gru, sentence)

loss value: 139.5784149169922
swky xwojyqfxcs h aictkmrccz
loss value: 87.63033294677734
 qugctc
loss value: 54.1542854309082
a aqmcc kxo ro jux sov wmnow ve qhir tcjk tpme ro jumdid ojex do ofvjummeee olrz oc afo xc nco bt ovwe luci bro o
loss value: 32.618717193603516
f quupd ddoe lte layd doeh lote de overe heh laz doc
loss value: 18.67572021484375
a rovemer olazy ogvgrwe lazyy do
loss value: 10.506216049194336
a bqucpd uicb browf fox jummed over lazy dog
loss value: 5.498875617980957
a quick bbron nox juumpd dog
loss value: 2.603436231613159
a quick brownn fox jumped over the the lazy dog
loss value: 1.0432296991348267
a quick br over the lazy dog
loss value: 0.645311713218689
a quick brown fox juped over the lazy dog
loss value: 0.4766891598701477

loss value: 0.3763197362422943
a quick brown fox jumped over the lazy dog
loss value: 0.30998367071151733
a quick brown fox jumped over the lazy dog
loss value: 0.26300886273384094
a quick brown fox jumped over the lazy dog
loss value: 