In [2]:
import numpy as np
import random
import time 
import re
from collections import defaultdict

import torch
from torch import nn
from torch.autograd import Variable

In [4]:
# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i[""]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

# Read in the data
train = list(read_dataset("train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("test.txt"))
# nwords = len(w2i)
# ntags = len(t2i)

In [18]:
class DeepCBowClassifier(nn.Module):
  def __init__(self,nwords , ntags , hidden_layers , hidden_size , emb_size):
    super(DeepCBowClassifier , self).__init__()
    self.nlayers = hidden_layers

    #layers
    self.embedding = nn.Embedding(nwords , emb_size)
    nn.init.xavier_uniform_(self.embedding.weight)

    self.linear_layers = nn.ModuleList([
        nn.Linear((emb_size if i == 0 else hidden_size), hidden_size)
        for i in range(hidden_layers)
    ])
    for i in range(hidden_layers):
      nn.init.xavier_uniform_(self.linear_layers[i].weight)

    self.output_layer = nn.Linear(hidden_size , ntags)
    nn.init.xavier_uniform_(self.output_layer.weight)
  
  def forward(self , words):
    emb = self.embedding(words)
    emb_sum = torch.sum(emb , dim = 0)
    h = emb_sum.view(1,-1)
    for i in range(self.nlayers):
      h = torch.tanh(self.linear_layers[i](h))
    out = self.output_layer(h)
    return out

In [19]:
EMB_SIZE = 64
HID_SIZE = 64
NLAYERS = 2
nwords = len(w2i)
ntags = len(t2i)
print(nwords , ntags)
model = DeepCBowClassifier(nwords, ntags, NLAYERS, EMB_SIZE, HID_SIZE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

18648 3


In [None]:
type = torch.LongTensor

for ITER in range(100):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    start = time.time()
    for words, tag in train:
        words = torch.tensor(words).type(type)
        tag = torch.tensor([tag]).type(type)
        scores = model(words)
        loss = criterion(scores, tag)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("iter %r: train loss/sent=%.4f, time=%.2fs" % (
                ITER, train_loss/len(train), time.time()-start))
    # Perform testing
    test_correct = 0.0
    for words, tag in dev:
        words = torch.tensor(words).type(type)
        scores = model(words)[0].detach().cpu().numpy()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))
