In [1]:
import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch.nn.functional as F

In [2]:
import numpy as np
from torch.utils import data
import warnings
warnings.filterwarnings("ignore")

In [3]:
from utils.datasets import NumberSortingDataset
from models.pointer_net import PointerNet

---

In [4]:
params = {
    # Data
    'batch_size': 1024,
    'shuffle': True,
    'nof_workers': 0, # must stay at 0
    #Train
    'nof_epoch': 30,
    'lr': 0.0001,
    # GPU
    'gpu': True,
    # Network
    'input_size': 1,
    'embedding_size': 256,
    'hiddens': 512,
    'nof_lstms': 8,
    'dropout': 0.3,
    'bidir': True # True not working right now
}

In [5]:
if params['gpu'] and torch.cuda.is_available():
    USE_CUDA = True
    print('Using GPU, %i devices.' % torch.cuda.device_count())
else:
    USE_CUDA = False

Using GPU, 1 devices.


In [6]:
model = PointerNet(params['input_size'],
                   params['embedding_size'],
                   params['hiddens'],
                   params['nof_lstms'],
                   params['dropout'],
                   params['bidir'])

dataset = NumberSortingDataset(100000, min_len=4, max_len=10)

dataloader = DataLoader(dataset,
                        batch_size=params['batch_size'],
                        shuffle=params['shuffle'],
                        num_workers=params['nof_workers'],
                        #collate_fn=generate_batch
                       )

if USE_CUDA:
    model.cuda()
    net = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

CCE = torch.nn.CrossEntropyLoss()
model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                model.parameters()),
                                 lr=params['lr'])


In [7]:
losses = []

for epoch in range(params['nof_epoch']):
    batch_loss = []
    iterator = tqdm(dataloader, unit='Batch')

    for i_batch, sample_batched in enumerate(iterator):
        iterator.set_description('Epoch %i/%i' % (epoch+1, params['nof_epoch']))

        train_batch, target_batch = sample_batched[0].float(), sample_batched[1].long()
        
        if USE_CUDA:
            train_batch = train_batch.cuda()
            target_batch = target_batch.cuda()

        o, p = model(train_batch)
        
        #o = o.contiguous().view(-1, o.size()[-1])
        #target_batch = target_batch.view(-1)
        
        loss = F.cross_entropy(o, target_batch) #/ target_batch.shape[1] # need to take the length of the table into account
        #acc = get_accuracy(p, target_batch)
        
        losses.append(loss.data)
        batch_loss.append(loss.data)

        model_optim.zero_grad()
        loss.backward()
        model_optim.step()
        
        iterator.set_postfix(loss='{}'.format(loss.data))
    batch_loss = torch.Tensor(batch_loss)
    iterator.set_postfix(loss=np.average(batch_loss))

Epoch 1/30: 100%|██████████████████████████████████████████| 98/98 [01:00<00:00,  1.63Batch/s, loss=2.2850730419158936]
Epoch 2/30: 100%|███████████████████████████████████████████| 98/98 [00:58<00:00,  1.67Batch/s, loss=2.255683660507202]
Epoch 3/30:  73%|███████████████████████████████▌           | 72/98 [00:43<00:15,  1.68Batch/s, loss=2.233325958251953]

KeyboardInterrupt: 

In [None]:
o

In [None]:
p

In [None]:
target_batch