In [1]:
"""
Pytorch implementation of Pointer Network.
http://arxiv.org/pdf/1506.03134v1.pdf.
"""

import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.utils.data import DataLoader

In [2]:
import numpy as np
import argparse
from tqdm import tqdm

from models.augmented_pointer import PointerNet
from utils.dataset import ExtendedWikiSQL

In [3]:
params = {
    # Data
    'batch_size': 1,
    'shuffle': False,
    #Train
    'nof_epoch': 1,
    'lr': 0.001,
    # GPU
    'gpu': True,
    # Network
    'embedding_size': 300,
    'hiddens': 512,
    'nof_lstms': 5,
    'dropout': 0.1,
    'bidir': False
}

In [10]:
if params['gpu'] and torch.cuda.is_available():
    USE_CUDA = True
    print('Using GPU, %i devices.' % torch.cuda.device_count())
else:
    USE_CUDA = False

model = PointerNet(params['embedding_size'],
                   params['hiddens'],
                   params['nof_lstms'],
                   params['dropout'],
                   params['bidir'])

dataset = ExtendedWikiSQL()
dataset.load_from_torch('training/data/ewikisql')

dataloader = DataLoader(dataset,
                        batch_size=params['batch_size'],
                        shuffle=params['shuffle'],
                        num_workers=4)

if USE_CUDA:
    model.cuda()
    net = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

CCE = torch.nn.CrossEntropyLoss()
model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                model.parameters()),
                         lr=params['lr'])

losses = []

for epoch in range(params['nof_epoch']):
    batch_loss = []
    iterator = tqdm(dataloader, unit='Batch')

    for i_batch, sample_batched in enumerate(iterator):
        iterator.set_description('Batch %i/%i' % (epoch+1, params['nof_epoch']))

        train_batch = Variable(sample_batched['input'])
        target_batch = Variable(sample_batched['target'])

        if USE_CUDA:
            train_batch = train_batch.cuda()
            target_batch = target_batch.cuda()

        o, p = model(train_batch)
        o = o.contiguous().view(-1, o.size()[-1])

        target_batch = target_batch.view(-1)
    
        loss = CCE(o, target_batch) # need to take the length of the table into account

        #losses.append(loss.data[0])
        #batch_loss.append(loss.data[0])
        
        losses.append(loss.data)
        batch_loss.append(loss.data)

        model_optim.zero_grad()
        loss.backward()
        model_optim.step()

        #iterator.set_postfix(loss='{}'.format(loss.data[0]))
        iterator.set_postfix(loss='{}'.format(loss.data))
    batch_loss = torch.Tensor(batch_loss)
    iterator.set_postfix(loss=np.average(batch_loss))





  0%|                                                                                         | 0/5 [00:00<?, ?Batch/s][A[A[A[A

Using GPU, 1 devices.


















Batch 1/10:   0%|                                                     | 0/5 [00:01<?, ?Batch/s, loss=4.615302085876465][A[A[A[A



Batch 1/10:  20%|█████████                                    | 1/5 [00:01<00:07,  1.79s/Batch, loss=4.615302085876465][A[A[A[A





tensor(4.6153, device='cuda:0')












Batch 1/10:  20%|█████████                                    | 1/5 [00:02<00:07,  1.79s/Batch, loss=4.509329795837402][A[A[A[A



Batch 1/10:  40%|██████████████████                           | 2/5 [00:02<00:04,  1.58s/Batch, loss=4.509329795837402][A[A[A[A





tensor(4.5093, device='cuda:0')










Batch 1/10:  40%|██████████████████                           | 2/5 [00:03<00:04,  1.58s/Batch, loss=4.237112045288086][A[A[A[A



Batch 1/10:  60%|███████████████████████████                  | 3/5 [00:03<00:02,  1.36s/Batch, loss=4.237112045288086][A[A[A[A







tensor(4.2371, device='cuda:0')










Batch 1/10:  60%|███████████████████████████                  | 3/5 [00:04<00:02,  1.36s/Batch, loss=4.161250114440918][A[A[A[A



Batch 1/10:  80%|████████████████████████████████████         | 4/5 [00:04<00:01,  1.19s/Batch, loss=4.161250114440918][A[A[A[A







tensor(4.1613, device='cuda:0')


























Batch 1/10:  80%|████████████████████████████████████         | 4/5 [00:07<00:01,  1.19s/Batch, loss=5.407331466674805][A[A[A[A



Batch 1/10: 100%|█████████████████████████████████████████████| 5/5 [00:07<00:00,  1.47s/Batch, loss=5.407331466674805][A[A[A[A




  0%|                                                                                         | 0/5 [00:00<?, ?Batch/s][A[A[A[A

tensor(5.4073, device='cuda:0')


















Batch 2/10:   0%|                                                     | 0/5 [00:01<?, ?Batch/s, loss=4.605772495269775][A[A[A[A



Batch 2/10:  20%|█████████                                    | 1/5 [00:01<00:07,  1.91s/Batch, loss=4.605772495269775][A[A[A[A





tensor(4.6058, device='cuda:0')














Batch 2/10:  20%|█████████▏                                    | 1/5 [00:03<00:07,  1.91s/Batch, loss=4.50044059753418][A[A[A[A



Batch 2/10:  40%|██████████████████▍                           | 2/5 [00:03<00:05,  1.68s/Batch, loss=4.50044059753418][A[A[A[A





tensor(4.5004, device='cuda:0')












Batch 2/10:  40%|██████████████████▍                           | 2/5 [00:03<00:05,  1.68s/Batch, loss=4.23504114151001][A[A[A[A



Batch 2/10:  60%|███████████████████████████▌                  | 3/5 [00:03<00:02,  1.44s/Batch, loss=4.23504114151001][A[A[A[A





tensor(4.2350, device='cuda:0')










Batch 2/10:  60%|██████████████████████████▍                 | 3/5 [00:04<00:02,  1.44s/Batch, loss=4.1598896980285645][A[A[A[A



Batch 2/10:  80%|███████████████████████████████████▏        | 4/5 [00:04<00:01,  1.25s/Batch, loss=4.1598896980285645][A[A[A[A





tensor(4.1599, device='cuda:0')
























KeyboardInterrupt: 

TODOs:
- Augmented Pointer für Batch Sizes > 1 ermöglichen