In [1]:
from collections import defaultdict
import time
import random
import dynet as dy
import numpy as np

# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

# Read in the data
train = list(read_dataset("../data/classes/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("../data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

# Start DyNet and define trainer
model = dy.Model()
trainer = dy.AdamTrainer(model)

In [19]:
# Define the model
EMB_SIZE = 64
HID_SIZE = 64
HID_LAY = 3
W_emb = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word embeddings
# W_h: HID_SIZE, EMB_SIZE;  HID_SIZE
W_h = [model.add_parameters((HID_SIZE, EMB_SIZE if lay == 0 else HID_SIZE)) for lay in range(HID_LAY)]
b_h = [model.add_parameters((HID_SIZE)) for lay in range(HID_LAY)]
W_sm = model.add_parameters((ntags, HID_SIZE))          # Softmax weights
b_sm = model.add_parameters((ntags))                    # Softmax bias

In [20]:
# A function to calculate scores for one value
def calc_scores(words):
    dy.renew_cg()
    # (EMB_SIZE,)
    h = dy.esum([dy.lookup(W_emb, x) for x in words])
    
    for W_h_i, b_h_i in zip(W_h, b_h):
        # (HID_SIZE, EMB_SIZE) * (EMB_SIZE)
        # (HID_SIZE,) * (HID_SIZE, )
        h = dy.tanh( dy.parameter(W_h_i) * h + dy.parameter(b_h_i) )
    return dy.parameter(W_sm) * h + dy.parameter(b_sm)

In [11]:
h = dy.esum([dy.lookup(W_emb, x) for x in train[0][0]])

In [12]:
h.npvalue().shape

(64,)

In [21]:
for ITER in range(100):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    start = time.time()
    for words, tag in train:
        my_loss = dy.pickneglogsoftmax(calc_scores(words), tag)
        train_loss += my_loss.value()
        my_loss.backward()
        trainer.update()
    if ITER % 10 == 0:
        print("iter %r: train loss/sent=%.4f, time=%.2fs" % (ITER, train_loss/len(train), time.time()-start))
    
    # Perform testing
    test_correct = 0.0
    for words, tag in dev:
        scores = calc_scores(words).npvalue()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    if ITER % 10 == 0:
        print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=1.5639, time=2.57s
iter 0: test acc=0.3869
iter 10: train loss/sent=0.1955, time=3.51s
iter 10: test acc=0.3529
iter 20: train loss/sent=0.0487, time=2.96s
iter 20: test acc=0.3548
iter 30: train loss/sent=0.0421, time=3.13s
iter 30: test acc=0.3715
iter 40: train loss/sent=0.0327, time=3.27s
iter 40: test acc=0.3498
iter 50: train loss/sent=0.0425, time=2.99s
iter 50: test acc=0.3683
iter 60: train loss/sent=0.0433, time=3.22s
iter 60: test acc=0.3561
iter 70: train loss/sent=0.0307, time=3.56s
iter 70: test acc=0.3638
iter 80: train loss/sent=0.0284, time=4.10s
iter 80: test acc=0.3792
iter 90: train loss/sent=0.0400, time=3.82s
iter 90: test acc=0.3584


In [25]:
np.argmax([1,2,3])

2