In [1]:
from collections import defaultdict
import time
import random
import dynet as dy
import numpy as np

In [26]:
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            # generator
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

In [27]:
# Read in the data
# when do this, w2i will auto add factors
train = list(read_dataset("../data/classes/train.txt"))
# set UNK for those didn't appear in train
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("../data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

In [28]:
nwords, ntags

(18648, 5)

In [30]:
# Start DyNet and define trainer
model = dy.Model()
trainer = dy.AdamTrainer(model)

In [60]:
# Define the model
W_sm = model.add_lookup_parameters((nwords, ntags)) # Word weights
b_sm = model.add_parameters((ntags))                # Softmax bias

In [61]:
# A function to calculate scores for one value
def calc_scores(words):
    dy.renew_cg()
    # dy.lookup x(5,) from W_sm(5, nwords)
    # dy.esum combine all x in words(a sentence)
    score = dy.esum([dy.lookup(W_sm, x) for x in words])
    b_sm_exp = dy.parameter(b_sm)
    return score + b_sm_exp

In [82]:
dy.lookup(W_sm, 1).value()

[0.5764702558517456,
 -0.6167762875556946,
 0.14295324683189392,
 -0.7357810139656067,
 -0.6273263096809387]

In [64]:
dy.parameter(W_sm).value().shape

(5, 18648)

In [87]:
for ITER in range(100):
    
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    start = time.time()
    
    for words, tag in train:
        # dy.pickneglogsoftmax
        my_loss = dy.pickneglogsoftmax(calc_scores(words), tag)
        train_loss += my_loss.value()
        my_loss.backward()
        trainer.update()
    if ITER % 10 == 0:    
        print("iter %r: train loss/sent=%.4f, time=%.2fs" % (ITER, train_loss/len(train), time.time()-start))
    
    # Perform testing
    test_correct = 0.0
    for words, tag in dev:
        scores = calc_scores(words).npvalue()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    if ITER % 10 == 0:
        print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=0.2999, time=0.43s
iter 0: test acc=0.3855
iter 10: train loss/sent=0.2836, time=0.38s
iter 10: test acc=0.3760
iter 20: train loss/sent=0.2689, time=0.37s
iter 20: test acc=0.3756
iter 30: train loss/sent=0.2559, time=0.37s
iter 30: test acc=0.3769
iter 40: train loss/sent=0.2436, time=0.37s
iter 40: test acc=0.3792
iter 50: train loss/sent=0.2329, time=0.36s
iter 50: test acc=0.3747
iter 60: train loss/sent=0.2226, time=0.37s
iter 60: test acc=0.3796
iter 70: train loss/sent=0.2135, time=0.37s
iter 70: test acc=0.3796
iter 80: train loss/sent=0.2050, time=0.37s
iter 80: test acc=0.3792
iter 90: train loss/sent=0.1971, time=0.38s
iter 90: test acc=0.3765


`dy.pickneglogsoftmax`: $$-\log\left(\frac{e^{x_v}}{\sum_j e^{x_j}}\right)$$