In [1]:
from collections import defaultdict
import math
import time
import random
import os, sys

import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:

EMB_SIZE = 64
WIN_SIZE = 3
FILTER_SIZE = 64
USE_CUDA = torch.cuda.is_available()
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

# Read in the data
train = list(read_dataset("../data/classes/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("../data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

In [3]:
class CNN(nn.Module):
    def __init__(self, nwords,ntags, emb_size,filter_size,win_size):
        super(CNN, self).__init__()
        self.embedding = nn.Embedding(nwords, emb_size,padding_idx=2)
        self.conv = nn.Conv1d(emb_size, filter_size,win_size,padding=1)
        self.linear=nn.Linear(emb_size,ntags)

    def forward(self, words):
        #print(words)
        emb = self.embedding(words)    
        feat = self.conv(emb.permute((0,2,1))) 
        logit = self.linear(feat.max(dim=2)[0])           
        return logit

In [4]:
# Initialize the model and the optimizer
model = CNN(nwords=nwords,ntags=ntags, emb_size=EMB_SIZE,filter_size=FILTER_SIZE,win_size=WIN_SIZE)
if USE_CUDA:
    model = model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [5]:
# convert a (nested) list of int into a pytorch Variable
def convert_to_variable(words):
    var = Variable(torch.LongTensor(words))
    if USE_CUDA:
        var = var.cuda()
    return var

# A function to calculate scores for one value
def calc_scores(words):
    # This will change from a list of histories, to a pytorch Variable whose data type is LongTensor
    #print(words)
    words_var = convert_to_variable(words)
    logits = model(words_var)
    return logits


In [7]:
for ITER in range(2):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    train_correct = 0.0
    start = time.time()
    model.train()
    for words, tag in train[:100]:
        scores = calc_scores([words])
        predict = scores.max(1)[1].data[0]
        if predict == tag:
            train_correct += 1

        my_loss = nn.functional.cross_entropy(scores, convert_to_variable([tag]))
        train_loss += my_loss.data[0]
        optimizer.zero_grad()
        my_loss.backward()
        optimizer.step()
    print("iter %r: train loss/sent=%.4f, acc=%.4f, time=%.2fs" % (ITER, train_loss/len(train), train_correct/len(train), time.time()-start))
    # Perform testing
    model.eval()
    test_correct = 0.0
    for words, tag in dev:
        scores = calc_scores([words])
        predict = scores.max(1)[1].data[0]
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=0.0195, acc=0.0021, time=1.37s
iter 0: test acc=0.2330
iter 1: train loss/sent=0.0184, acc=0.0033, time=1.37s
iter 1: test acc=0.2692
