In [1]:
from loader import getLoader

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.nn.utils.rnn import pad_sequence


device = torch.device('cpu')

In [2]:
class Classifier(nn.Module):
    def __init__(self, dictionary_size, hidden_size, output_size=2):
        super(Classifier, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(dictionary_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)
        self.linear = nn.Linear(hidden_size,output_size)

    def forward(self, pad_seqs, seq_lengths, hidden):
        """
        Args:
          pad_seqs: Tensor [max_seq_length, batch_size, 1]
          seq_lengths: list of sequence lengths
          hidden: Tensor [1, batch_size, hidden_size]

        Returns:
          outputs: Tensor [max_seq_length, batch_size, hidden_size]
          hidden: Tensor [1, batch_size, hidden_size]
        """
        # YOUR CODE HERE
        batch_size = pad_seqs.shape[1]
        
        embedded = self.embedding(pad_seqs).view(pad_seqs.shape[0], pad_seqs.shape[1], -1)

        packed = pack_padded_sequence(embedded, seq_lengths, batch_first = False)
        
        self.lstm.flatten_parameters()
        _,hidden = self.lstm(packed)
        
        fc = self.linear(hidden[0])

        return fc

    def init_hidden(self, batch_size=1, device=device):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)

In [3]:
hidden_size = 200
dictionary_size = 10 #dummy
classifier = Classifier(dictionary_size, hidden_size).to(device)

In [4]:
trainloader = getLoader()

In [5]:
#Quick output test
for i, batch in enumerate(trainloader):
    print("iter", i)
    pad_input_seqs, input_seq_lengths, pad_target_seqs, target_seq_lengths = batch
    batch_size = pad_input_seqs.size(1)
    pad_input_seqs, pad_target_seqs = pad_input_seqs.to(device), pad_target_seqs.to(device)

    classifier_hidden = classifier.init_hidden(batch_size, device)
    print("in:",pad_input_seqs.size())
    classifier_hidden = classifier(pad_input_seqs, input_seq_lengths, classifier_hidden)
    print(classifier_hidden)
    print("out:",classifier_hidden.size())

iter 0
in: torch.Size([7, 4, 1])
tensor([[[-0.0724,  0.1268],
         [-0.0665,  0.1390],
         [ 0.1033,  0.0062],
         [-0.0558,  0.2765]]], grad_fn=<AddBackward0>)
out: torch.Size([1, 4, 2])
iter 1
in: torch.Size([5, 4, 1])
tensor([[[ 0.1599,  0.0069],
         [ 0.0333,  0.0057],
         [ 0.0435, -0.0501],
         [ 0.0270, -0.0480]]], grad_fn=<AddBackward0>)
out: torch.Size([1, 4, 2])
iter 2
in: torch.Size([9, 4, 1])
tensor([[[ 0.2070,  0.0440],
         [-0.0805,  0.0787],
         [-0.0503,  0.2583],
         [ 0.1154, -0.0208]]], grad_fn=<AddBackward0>)
out: torch.Size([1, 4, 2])


In [6]:
n_epochs = 10
classifier_optimizer = optim.Adam(classifier.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [7]:
for epoch in range(n_epochs):
    running_loss = 0.0
    print("Epoch", epoch)
    
    for i, batch in enumerate(trainloader):
        classifier_optimizer.zero_grad()
        
        pad_input_seqs, input_seq_lengths, pad_target_seqs, target_seq_lengths = batch
        batch_size = pad_input_seqs.size(1)
        pad_input_seqs, pad_target_seqs = pad_input_seqs.to(device), pad_target_seqs.to(device)

        classifier_hidden = classifier.init_hidden(batch_size, device)

        # Encode input sequence
        classifier_hidden = classifier(pad_input_seqs, input_seq_lengths, classifier_hidden)

        print(classifier_hidden,pad_target_seqs)
        loss = criterion(classifier_hidden.view(4,2), pad_target_seqs)
        loss.backward()
        

        classifier_optimizer.step()
        print("loss",loss.item())



print('Finished Training')

Epoch 0
tensor([[[-0.0724,  0.1268],
         [ 0.1033,  0.0062],
         [ 0.1154, -0.0208],
         [ 0.0270, -0.0480]]], grad_fn=<AddBackward0>) tensor([0, 1, 1, 1])
loss 0.7588797807693481
tensor([[[ 0.0554,  0.0014],
         [-0.1235,  0.1138],
         [-0.1252,  0.1281],
         [-0.0482,  0.2682]]], grad_fn=<AddBackward0>) tensor([1, 1, 1, 0])
loss 0.6850789189338684
tensor([[[ 0.0130,  0.2832],
         [-0.1406,  0.3120],
         [ 0.0399,  0.1481],
         [-0.2029,  0.2378]]], grad_fn=<AddBackward0>) tensor([0, 0, 0, 1])
loss 0.7569455504417419
Epoch 1
tensor([[[ 0.1690,  0.0023],
         [-0.4103,  0.4128],
         [ 0.1621,  0.0219],
         [-0.2686,  0.3668]]], grad_fn=<AddBackward0>) tensor([0, 1, 0, 1])
loss 0.5069711208343506
tensor([[[ 0.2116,  0.0574],
         [-0.4379,  0.4302],
         [ 0.0178,  0.0201],
         [-0.3841,  0.3841]]], grad_fn=<AddBackward0>) tensor([0, 1, 1, 1])
loss 0.5106432437896729
tensor([[[ 0.1387, -0.1106],
         [-0.0975,  