In [1]:
import torchtext
from torchtext.vocab import Vectors, GloVe
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from hw1.models import *

In [5]:
# Our input $x$
TEXT = torchtext.data.Field()
    
# Our labels $y$
LABEL = torchtext.data.Field(sequential=False)


train, val, test = torchtext.datasets.SST.splits(
    TEXT, LABEL,
    filter_pred=lambda ex: ex.label != 'neutral')

# Build vocab
TEXT.build_vocab(train)
LABEL.build_vocab(train)

train_iter, val_iter, test_iter = torchtext.data.BucketIterator.splits(
(train, val, test), batch_size=10, device=-1)

# Build the vocabulary with word embeddings
url = 'https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.simple.vec'
TEXT.vocab.load_vectors(vectors=Vectors('wiki.simple.vec', url=url))

In [4]:
model = LogisticRegression(TEXT, LABEL)
trainer = TextTrainer(TEXT, LABEL, model)
trainer.train(train_iter, num_iter=1000, skip_iter=100)

Iteration 0, loss: 1.102232
Iteration 100, loss: 0.590585
Iteration 200, loss: 0.736041
Iteration 300, loss: 0.674967
Iteration 400, loss: 0.670788
Iteration 500, loss: 0.459173
Iteration 600, loss: 0.630579
Iteration 700, loss: 0.432746
Iteration 800, loss: 0.598211
Iteration 900, loss: 0.334833


In [7]:
class CBOW(nn.Module):
    def __init__(self, TEXT, LABEL):
        super(CBOW, self).__init__()
        # Embeddings vectors (should be trainable); [V, d]
        # TODO: is default for requires_grad True?
        self.embeddings = nn.EmbeddingBag(TEXT.vocab.vectors.size()[0],
                                          TEXT.vocab.vectors.size()[1],
                                          mode='sum')
        self.embeddings.weight = nn.Parameter(TEXT.vocab.vectors, requires_grad=True)
        
        # Linear layer
        self.linear = nn.Linear(TEXT.vocab.vectors.size()[1], len(LABEL.vocab))
        
    # Here bow is [len-of-sentence, N] -- it is an integer matrix
    def forward(self, bow):
        bow_features = self.embeddings(bow)
        return F.log_softmax(self.linear(bow_features), dim=1)