In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchtext.legacy import data
from torchtext.legacy import datasets
from torchtext.vocab import Vectors, GloVe, CharNGram#, FastTex

torch.manual_seed(1)


<torch._C.Generator at 0x159192a7410>

In [2]:
################################
# DataLoader
################################

# set up fields
TEXT = data.Field()
LABEL = data.Field(sequential=False,dtype=torch.long)

# make splits for data
# DO NOT MODIFY: fine_grained=True, train_subtrees=False
train, val, test = datasets.SST.splits(
    TEXT, LABEL, fine_grained=True, train_subtrees=False)

# print information about the data
print('train.fields', train.fields)
print('len(train)', len(train))
print('vars(train[0])', vars(train[0]))

train.fields {'text': <torchtext.legacy.data.field.Field object at 0x0000015915A63288>, 'label': <torchtext.legacy.data.field.Field object at 0x0000015915A63248>}
len(train) 8544
vars(train[0]) {'text': ['The', 'Rock', 'is', 'destined', 'to', 'be', 'the', '21st', 'Century', "'s", 'new', '``', 'Conan', "''", 'and', 'that', 'he', "'s", 'going', 'to', 'make', 'a', 'splash', 'even', 'greater', 'than', 'Arnold', 'Schwarzenegger', ',', 'Jean-Claud', 'Van', 'Damme', 'or', 'Steven', 'Segal', '.'], 'label': 'positive'}


In [3]:

# build the vocabulary
# you can use other pretrained vectors, refer to https://github.com/pytorch/text/blob/master/torchtext/vocab.py
TEXT.build_vocab(train, vectors=Vectors(name='vector.txt', cache='./data'))
LABEL.build_vocab(train)
# We can also see the vocabulary directly using either the stoi (string to int) or itos (int to string) method.
print(TEXT.vocab.itos[:10])
print(LABEL.vocab.stoi)
print(TEXT.vocab.freqs.most_common(20))


['<unk>', '<pad>', '.', ',', 'the', 'and', 'a', 'of', 'to', "'s"]
defaultdict(<bound method Vocab._default_unk_index of <torchtext.legacy.vocab.Vocab object at 0x0000015965328F88>>, {'<unk>': 0, 'positive': 1, 'negative': 2, 'neutral': 3, 'very positive': 4, 'very negative': 5})
[('.', 8024), (',', 7131), ('the', 6037), ('and', 4431), ('a', 4403), ('of', 4386), ('to', 2995), ("'s", 2544), ('is', 2536), ('that', 1915), ('in', 1789), ('it', 1775), ('The', 1265), ('as', 1200), ('film', 1152), ('but', 1076), ('with', 1071), ('for', 963), ('movie', 959), ('its', 912)]


In [4]:
len(LABEL.vocab)

6

In [5]:

# print vocab information
print('len(TEXT.vocab)', len(TEXT.vocab))
print('TEXT.vocab.vectors.size()', TEXT.vocab.vectors.size())


len(TEXT.vocab) 18282
TEXT.vocab.vectors.size() torch.Size([18282, 300])


In [6]:

# make iterator for splits
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train, val, test), batch_size=64)

# print batch information
batch = next(iter(train_iter)) # for batch in train_iter
print(batch.text) # input sequence
print(batch.label) # groud truth

# Attention: batch.label in the range [1,5] not [0,4] !!!

tensor([[  22,  109,   22,  ..., 2966,   14, 6272],
        [ 471,    4, 1376,  ..., 1021, 1369,   19],
        [   3,   88,  128,  ..., 4841,   10, 1287],
        ...,
        [   1,    1,    1,  ...,    1,    1,    1],
        [   1,    1,    1,  ...,    1,    1,    1],
        [   1,    1,    1,  ...,    1,    1,    1]])
tensor([4, 3, 3, 1, 3, 3, 4, 2, 1, 1, 4, 4, 2, 3, 1, 5, 3, 2, 4, 2, 1, 1, 5, 1,
        1, 3, 3, 1, 3, 1, 1, 2, 1, 2, 2, 5, 5, 5, 3, 1, 4, 3, 4, 3, 4, 3, 1, 4,
        5, 4, 3, 2, 3, 3, 1, 2, 2, 2, 2, 4, 2, 1, 1, 3])


In [7]:


################################
# After build your network 
################################


# Copy the pre-trained word embeddings we loaded earlier into the embedding layer of our model.
pretrained_embeddings = TEXT.vocab.vectors

print(pretrained_embeddings.shape)



torch.Size([18282, 300])


In [8]:



class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores


In [9]:
EMBEDDING_DIM=300
HIDDEN_DIM=10
batch_size=32

In [None]:

# make iterator for splits
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train, val, test), batch_size=batch_size)

# print batch information
batch = next(iter(train_iter)) # for batch in train_iter
# print(batch.text) # input sequence
# print(batch.label) # groud truth

# Attention: batch.label in the range [1,5] not [0,4] !!!

In [19]:
from torch.autograd import Variable
for epoch in range(100):  # again, normally you would NOT do 300 epochs, it is toy data
    batch = next(iter(train_iter)) # for batch in train_iter
    tag_scores=torch.zeros((batch_size,6))
    for i in range(batch_size):
    # for sentence, tags in batch:
        sentence=batch.text[:,i]
        tags=batch.label[i]
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of word indices.
        sentence_in = sentence
        #prepare_sequence(sentence, word_to_ix)
        # targets = tags
        # prepare_sequence(tags, tag_to_ix)

        # Step 3. Run our forward pass.
        tag_scores[i]=(model(sentence_in)[-1])

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
    targets=batch.label
    tag_scores=Variable(tag_scores,requires_grad=True)
    loss = loss_function(tag_scores, targets)
    loss.backward()
    optimizer.step()
    print(loss)

tensor(1.8020, grad_fn=<NllLossBackward>)
tensor(1.7674, grad_fn=<NllLossBackward>)
tensor(1.8209, grad_fn=<NllLossBackward>)
tensor(1.7702, grad_fn=<NllLossBackward>)
tensor(1.7882, grad_fn=<NllLossBackward>)
tensor(1.7753, grad_fn=<NllLossBackward>)
tensor(1.8245, grad_fn=<NllLossBackward>)
tensor(1.8182, grad_fn=<NllLossBackward>)
tensor(1.8101, grad_fn=<NllLossBackward>)
tensor(1.7804, grad_fn=<NllLossBackward>)
tensor(1.7995, grad_fn=<NllLossBackward>)
tensor(1.7684, grad_fn=<NllLossBackward>)
tensor(1.8350, grad_fn=<NllLossBackward>)
tensor(1.7911, grad_fn=<NllLossBackward>)
tensor(1.7959, grad_fn=<NllLossBackward>)
tensor(1.8024, grad_fn=<NllLossBackward>)
tensor(1.7654, grad_fn=<NllLossBackward>)
tensor(1.7774, grad_fn=<NllLossBackward>)
tensor(1.7935, grad_fn=<NllLossBackward>)
tensor(1.7940, grad_fn=<NllLossBackward>)
tensor(1.7447, grad_fn=<NllLossBackward>)
tensor(1.8594, grad_fn=<NllLossBackward>)
tensor(1.7908, grad_fn=<NllLossBackward>)
tensor(1.7631, grad_fn=<NllLossBac

In [None]:
from torch.autograd import Variable
for epoch in range(100):  # again, normally you would NOT do 300 epochs, it is toy data
    batch = next(iter(train_iter)) # for batch in train_iter
    tag_scores=torch.zeros((batch_size,6))
    for i in range(batch_size):
    # for sentence, tags in batch:
        sentence=batch.text[:,i]
        tags=batch.label[i]
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of word indices.
        sentence_in = sentence
        #prepare_sequence(sentence, word_to_ix)
        # targets = tags
        # prepare_sequence(tags, tag_to_ix)

        # Step 3. Run our forward pass.
        tag_scores[i]=(model(sentence_in)[-1])

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
    targets=batch.label
    tag_scores=Variable(tag_scores,requires_grad=True)
    loss = loss_function(tag_scores, targets)
    loss.backward()
    optimizer.step()
    print(loss)

In [23]:
loss_function(tag_scores, targets-1)

tensor(1.8102, grad_fn=<NllLossBackward>)

In [24]:
loss_function(tag_scores, targets)


tensor(1.7965, grad_fn=<NllLossBackward>)

In [28]:
 # See what the scores are after training
with torch.no_grad():
       # for epoch in range(100):  # again, normally you would NOT do 300 epochs, it is toy data
        batch = next(iter(train_iter)) # for batch in train_iter
        tag_scores=torch.zeros((batch_size,6))
        for i in range(batch_size):
        # for sentence, tags in batch:
            sentence=batch.text[:,i]
            # tags=batch.label[i]
            tag_scores[i]=(model(sentence)[-1])

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        targets=batch.label
        tag_scores=Variable(tag_scores,requires_grad=True)
        print(tag_scores.argmax(axis=1))
        print(targets)

        # loss = loss_function(tag_scores, targets)
        # loss.backward()

tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4])
tensor([1, 3, 4, 2, 3, 1, 1, 4, 3, 4, 1, 4, 2, 5, 3, 3, 1, 4, 4, 3, 4, 5, 1, 5,
        3, 4, 3, 2, 5, 4, 3, 2])


In [None]:

# See what the scores are after training
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)

    # The sentence is "the dog ate the apple".  i,j corresponds to score for tag j
    # for word i. The predicted tag is the maximum scoring tag.
    # Here, we can see the predicted sequence below is 0 1 2 0 1
    # since 0 is index of the maximum value of row 1,
    # 1 is the index of maximum value of row 2, etc.
    # Which is DET NOUN VERB DET NOUN, the correct sequence!
    print(tag_scores)

In [29]:
tag_scores

tensor([[-1.9543, -1.9938, -1.7600, -1.7788, -1.4444, -1.9282],
        [-1.9283, -1.9466, -1.7319, -1.7936, -1.4340, -2.0388],
        [-1.9283, -1.9466, -1.7320, -1.7935, -1.4340, -2.0388],
        [-1.9283, -1.9467, -1.7320, -1.7935, -1.4340, -2.0388],
        [-1.9473, -2.0759, -1.7619, -1.7907, -1.4517, -1.8377],
        [-1.9283, -1.9467, -1.7319, -1.7935, -1.4340, -2.0388],
        [-1.9283, -1.9466, -1.7320, -1.7935, -1.4340, -2.0388],
        [-1.9283, -1.9465, -1.7319, -1.7936, -1.4341, -2.0388],
        [-1.9283, -1.9467, -1.7319, -1.7935, -1.4340, -2.0388],
        [-1.9283, -1.9466, -1.7320, -1.7935, -1.4340, -2.0388],
        [-1.9283, -1.9467, -1.7319, -1.7935, -1.4340, -2.0388],
        [-1.9290, -1.9450, -1.7332, -1.7924, -1.4351, -2.0374],
        [-1.9283, -1.9467, -1.7319, -1.7935, -1.4340, -2.0388],
        [-1.9289, -1.9443, -1.7337, -1.7923, -1.4355, -2.0371],
        [-1.9283, -1.9467, -1.7319, -1.7935, -1.4340, -2.0389],
        [-1.9283, -1.9466, -1.7320, -1.7