In [1]:
"""
Pytorch implementation of Pointer Network.
http://arxiv.org/pdf/1506.03134v1.pdf.
"""

import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.utils.data import DataLoader

In [2]:
import numpy as np
import argparse
from tqdm import tqdm

from models.augmented_pointer import PointerNet
from utils.dataset import ExtendedWikiSQL

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
params = {
    # Data
    'batch_size': 2,
    'shuffle': False,
    'nof_workers': 0,
    #Train
    'nof_epoch': 3,
    'lr': 0.1,
    # GPU
    'gpu': True,
    # Network
    'embedding_size': 300,
    'hiddens': 512,
    'nof_lstms': 5,
    'dropout': 0.1,
    'bidir': False # True not working right now
}

In [5]:
if params['gpu'] and torch.cuda.is_available():
    USE_CUDA = True
    print('Using GPU, %i devices.' % torch.cuda.device_count())
else:
    USE_CUDA = False

Using GPU, 1 devices.


In [6]:
def generate_batch(batch):
    #https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html
    inputs = [entry['input'] for entry in batch]
    targets = [entry['target'] for entry in batch]
    return {'input': inputs, 'target': targets}

In [7]:
model = PointerNet(params['embedding_size'],
                   params['hiddens'],
                   params['nof_lstms'],
                   params['dropout'],
                   params['bidir'])

dataset = ExtendedWikiSQL()
dataset.load_from_torch('training/data/ewikisql')

dataloader = DataLoader(dataset,
                        batch_size=params['batch_size'],
                        shuffle=params['shuffle'],
                        num_workers=params['nof_workers'],
                        collate_fn=generate_batch
                       )

if USE_CUDA:
    model.cuda()
    net = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

CCE = torch.nn.CrossEntropyLoss()
model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                model.parameters()),
                         lr=params['lr'])


In [110]:
for i_batch, sample_batched in enumerate(dataloader):
    inputs = sample_batched['input']
    targets = sample_batched['target']
    max_len = len(max(inputs, key=len))
    inputs, targets = pad_inputs(inputs, targets, max_len)
    break

[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0,
         0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
         1, 0, 0, 0]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
         0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0])]

In [99]:
def pad_inputs(inputs, targets, max_len, emb_size=300):
    inputs_padded = []
    targets_padded = []
    for i, t in zip(inputs, targets):
        i_padding = torch.zeros(max_len - len(i), emb_size)
        t_padding = torch.zeros(max_len - len(i), dtype=torch.long)
        i_padded = torch.cat((i, i_padding), 0)
        t_padded = torch.cat((t, t_padding), 0)
        inputs_padded.append(i_padded), targets_padded.append(t_padded)
    return inputs_padded, targets_padded

In [9]:
losses = []

for epoch in range(params['nof_epoch']):
    batch_loss = []
    iterator = tqdm(dataloader, unit='Batch')

    for i_batch, sample_batched in enumerate(iterator):
        iterator.set_description('Batch %i/%i' % (epoch+1, params['nof_epoch']))


        train_batch = Variable(sample_batched['input'])
        target_batch = Variable(sample_batched['target'])

        if USE_CUDA:
            train_batch = train_batch.cuda()
            target_batch = target_batch.cuda()

        o, p = model(train_batch)
        o = o.contiguous().view(-1, o.size()[-1])

        target_batch = target_batch.view(-1)
    
        loss = CCE(o, target_batch) # need to take the length of the table into account

        losses.append(loss.data[0])
        batch_loss.append(loss.data[0])
        
        #losses.append(loss.data)
        #batch_loss.append(loss.data)

        model_optim.zero_grad()
        loss.backward()
        model_optim.step()
        
        iterator.set_postfix(loss='{}'.format(loss.data[0]))
        #iterator.set_postfix(loss='{}'.format(loss.data))
    batch_loss = torch.Tensor(batch_loss)
    iterator.set_postfix(loss=np.average(batch_loss))


  0%|                                                                                         | 0/3 [00:00<?, ?Batch/s][A
Batch 1/3:   0%|                                                                              | 0/3 [00:03<?, ?Batch/s]
Batch 1/3:   0%|                                                                              | 0/3 [00:00<?, ?Batch/s]

TypeError: Variable data has to be a tensor, but got list

TODOs:
- Augmented Pointer für Batch Sizes > 1 ermöglichen