In [1]:
from collections import defaultdict
import math
import time
import random
import os, sys

import torch
import torch.nn as nn
from torch.autograd import Variable


from torch.utils.data import Dataset, DataLoader,TensorDataset
torch.manual_seed(1)    # reproducible

<torch._C.Generator at 0x20285edc410>

In [2]:
BATCH_SIZE=32
EMB_SIZE = 64
WIN_SIZE = 3
FILTER_SIZE = 64

USE_CUDA = torch.cuda.is_available()
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

# Read in the data
train = list(read_dataset("../data/classes/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("../data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)

In [3]:
class CNN(nn.Module):
    def __init__(self, nwords,ntags, emb_size,filter_size,win_size):
        super(CNN, self).__init__()
        self.embedding = nn.Embedding(nwords, emb_size,padding_idx=2)
        self.conv = nn.Sequential(
            nn.Conv1d(emb_size, filter_size,win_size,padding=1),
            nn.SELU()
        )
        self.dropout=nn.Dropout(p=0.75)
        self.linear=nn.Linear(emb_size,ntags)

    def forward(self, words):
        #print(words)
        emb = self.embedding(words)    
        feat = self.conv(emb.permute((0,2,1))) 
        logit = self.linear(self.dropout(feat.max(dim=2)[0]))        
        return logit

In [4]:
# Initialize the model and the optimizer
model = CNN(nwords=nwords,ntags=ntags, emb_size=EMB_SIZE,filter_size=FILTER_SIZE,win_size=WIN_SIZE)
if USE_CUDA:
    model = model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [5]:
# convert a (nested) list of int into a pytorch Variable
def convert_to_variable(words):
    var = Variable(torch.LongTensor(words))
    if USE_CUDA:
        var = var.cuda()
    return var

# A function to calculate scores for one value
def calc_scores(words):
    # This will change from a list of histories, to a pytorch Variable whose data type is LongTensor
    #print(words)
    words_var = convert_to_variable(words)
    logits = model(words_var)
    return logits



In [6]:
for ITER in range(10):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    train_correct = 0.0
    start = time.time()
    model.train()
    for words, tag in train:
        if len(words) < WIN_SIZE:
            words += [0] * (WIN_SIZE-len(words))
        scores = calc_scores([words])
        predict = scores.max(1)[1].data[0]
        if predict == tag:
            train_correct += 1

        my_loss = nn.functional.cross_entropy(scores, convert_to_variable([tag]))
        train_loss += my_loss.data[0]
        optimizer.zero_grad()
        my_loss.backward()
        optimizer.step()
    print("iter %r: train loss/sent=%.4f, acc=%.4f, time=%.2fs" % (ITER, train_loss/len(train), train_correct/len(train), time.time()-start))
    # Perform testing
    model.eval()
    test_correct = 0.0
    for words, tag in dev:
        scores = calc_scores([words])
        predict = scores.max(1)[1].data[0]
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=1.6738, acc=0.2496, time=32.70s
iter 0: test acc=0.2575
iter 1: train loss/sent=1.6254, acc=0.2570, time=31.69s
iter 1: test acc=0.2995
iter 2: train loss/sent=1.6020, acc=0.2788, time=31.42s
iter 2: test acc=0.2900
iter 3: train loss/sent=1.5690, acc=0.3017, time=31.60s
iter 3: test acc=0.3303
iter 4: train loss/sent=1.5110, acc=0.3428, time=31.68s
iter 4: test acc=0.3294
iter 5: train loss/sent=1.4496, acc=0.3603, time=31.51s
iter 5: test acc=0.3471
iter 6: train loss/sent=1.3747, acc=0.4017, time=32.08s
iter 6: test acc=0.3548
iter 7: train loss/sent=1.3206, acc=0.4244, time=31.84s
iter 7: test acc=0.3778
iter 8: train loss/sent=1.2655, acc=0.4569, time=32.15s
iter 8: test acc=0.3710
iter 9: train loss/sent=1.2096, acc=0.4714, time=32.29s
iter 9: test acc=0.3796
