In [43]:
import dynet as dy
import numpy as np

## LSTM Acceptor

In [44]:
# acceptor LSTM
class LstmAcceptor(object):
    def __init__(self, in_dim, lstm_dim, out_dim, model):
        self.builder = dy.VanillaLSTMBuilder(1, in_dim, lstm_dim, model) # 1 layer
        self.W       = model.add_parameters((out_dim, lstm_dim))

    def __call__(self, sequence):
        s = self.builder.initial_state()
        W = self.W.expr() # convert the parameter into an Expession (add it to graph)
        outputs = s.transduce(sequence)
        result = W*outputs[-1]
        return result

In [45]:
# usage:
VOCAB_SIZE = 1000 #rows of embedding
EMBED_SIZE = 100 #input size of LSTM
LSTM_HIDDEN_SIZE = 100


In [46]:
m = dy.Model()
trainer = dy.AdamTrainer(m)

acceptor = LstmAcceptor(EMBED_SIZE, LSTM_HIDDEN_SIZE, 3, m) # 3 is W's rows, 3 classes output

embeds = m.add_lookup_parameters((VOCAB_SIZE, EMBED_SIZE))

## no batching

In [35]:
# training code
sum_of_losses = 0.0
for epoch in range(10):
    for sequence,label in [((1,4,5,1),1), ((42,1),2), ((56,2,17),1)]:
        dy.renew_cg() # new computation graph
        vecs = [embeds[i] for i in sequence]
        preds = acceptor(vecs)
        loss = dy.pickneglogsoftmax(preds, label)
        sum_of_losses += loss.npvalue()
        loss.backward()
        trainer.update()
    print(sum_of_losses / 3)
    sum_of_losses = 0.0

[ 1.08813584]
[ 1.02386413]
[ 0.9687473]
[ 0.91334796]
[ 0.85537948]
[ 0.79329775]
[ 0.72606927]
[ 0.65342158]
[ 0.5763431]
[ 0.49758144]


In [47]:
# prediction code:
for sequence in [(1,4,12,1), (42,1), (56,2,17)]:
    dy.renew_cg() # new computation graph
    vecs = [embeds[i] for i in sequence]
    preds = dy.softmax(acceptor(vecs))
    vals  = preds.npvalue()
    print(np.argmax(vals), vals)

0 [ 0.34042108  0.33031464  0.32926428]
0 [ 0.34230351  0.33097464  0.32672185]
0 [ 0.34670839  0.31679502  0.33649665]


## Enable batching

In [54]:
# LSTMAcceptor is the same as without batching

# training code: batched.
for epoch in range(10):
    dy.renew_cg()     # we create a new computation graph for the epoch, not each item.
    # we will treat all these 3 datapoints as a single batch
    losses = []
    for sequence,label in [((1,4,5,1),1), ((42,1),2), ((56,2,17),1)]:
        vecs = [embeds[i] for i in sequence]
        preds = acceptor(vecs)
        loss = dy.pickneglogsoftmax(preds, label)
        losses.append(loss)
    # we accumulated the losses from all the batch.
    # Now we sum them, and do forward-backward as usual.
    # Things will run with efficient batch operations.
    batch_loss = dy.esum(losses)/3
    print(batch_loss.npvalue()) # this calls forward on the batch
    batch_loss.backward()
    trainer.update()

[ 0.48097262]
[ 0.44454539]
[ 0.41038281]
[ 0.37879309]
[ 0.34985885]
[ 0.32346082]
[ 0.29933873]
[ 0.27716547]
[ 0.25661072]
[ 0.23738393]


In [55]:
# prediction code:
dy.renew_cg() # new computation graph
batch_preds = []
for sequence in [(1,4,12,1), (42,1), (56,2,17)]:
    vecs = [embeds[i] for i in sequence]
    preds = dy.softmax(acceptor(vecs))
    batch_preds.append(preds)

# now that we accumulated the prediction expressions,
# we run forward on all of them:
dy.forward(batch_preds)
# and now we can efficiently access the individual values:
for preds in batch_preds:
    vals  = preds.npvalue()
    print(np.argmax(vals), vals)

1 [ 0.01109798  0.96727955  0.02162244]
2 [ 0.18580519  0.2743032   0.5398916 ]
1 [ 0.01701832  0.96555424  0.01742738]
