In [1]:
import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.utils.data import DataLoader
import numpy as np

import warnings
warnings.filterwarnings("ignore")

from utils.datasets import AlphabetSortingDataset, NumberSortingDataset
from models.pointer_net import PointerNet
from torch.utils.data import DataLoader

In [2]:
params = {
    # Data
    'magnitude': 3,
    'batch_size': 128,
    'shuffle': True,
    'nof_workers': 0, # must stay at 0
    #Train
    'nof_epoch': 3,
    'lr': 0.001,
    # GPU
    'gpu': True,
    # Network
    'input_size': 300,
    'embedding_size': 300,
    'hiddens': 256,
    'nof_lstms': 2,
    'dropout': 0,
    'bidir': True
}

In [3]:
LEN = 18
#dataset = NumberSortingDataset(10**params['magnitude'], min_len=LEN, max_len=LEN)
#dataset = AlphabetSortingDataset(10**params['magnitude'], min_len=LEN, max_len=LEN, alphabet='0123456789')
dataset = AlphabetSortingDataset(10**params['magnitude'], min_len=LEN, max_len=LEN)
dataloader = DataLoader(dataset,
                        batch_size=params['batch_size'],
                        shuffle=params['shuffle'])

In [4]:
model = PointerNet(params['input_size'],
                   params['embedding_size'],
                   params['hiddens'],
                   params['nof_lstms'],
                   params['dropout'],
                   params['bidir'])

if params['gpu'] and torch.cuda.is_available():
    model.cuda()
    net = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

CCE = torch.nn.CrossEntropyLoss()
model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                model.parameters()),
                                 lr=params['lr'])

In [5]:
from tqdm import tqdm
losses = []

model.train()
for i_epoch, epoch in enumerate(range(params['nof_epoch'])):
    batch_loss = []
    iterator = tqdm(dataloader, unit='Batch')
    
    for i_batch, sample_batched in enumerate(iterator):
        iterator.set_description('Epoch %i/%i' % (epoch+1, params['nof_epoch']))

        x, y, _ = sample_batched
        train_batch = Variable(x).float()
        target_batch = Variable(y)

        if torch.cuda.is_available():
            train_batch = train_batch.cuda()
            target_batch = target_batch.cuda()

        o, p = model(train_batch)
        o = o.contiguous().view(-1, o.size()[-1])
        target_batch = target_batch.view(-1)
        
        loss = CCE(o, target_batch) #/ target_batch.shape[1] # need to take the length of the table into account
        #acc = get_accuracy(p, target_batch)
        
        losses.append(loss.data)
        batch_loss.append(loss.data)

        model_optim.zero_grad()
        loss.backward()
        model_optim.step()
        
        iterator.set_postfix(loss='{}'.format(loss.data))
        
    # each epoch, reduce the learning rate
    for param in model_optim.param_groups:
            param['lr'] *= 0.95
            
    batch_loss = torch.Tensor(batch_loss)
    iterator.set_postfix(loss=np.average(batch_loss))

Epoch 1/3: 100%|█████████████████████████████████████████| 782/782 [02:58<00:00,  4.37Batch/s, loss=2.5850460529327393]
Epoch 2/3: 100%|█████████████████████████████████████████| 782/782 [02:57<00:00,  4.41Batch/s, loss=2.6493589878082275]
Epoch 3/3: 100%|████████████████████████████████████████████| 782/782 [02:57<00:00,  4.41Batch/s, loss=2.5782470703125]


In [6]:
model.eval()

num_samples = 100
dataset = AlphabetSortingDataset(num_samples, min_len=LEN, max_len=LEN) 
x, y, z = dataset[:]
x = x.cuda().float()
y = y.cuda()

o, p = model(x)

y_pred = []
for pointers, values in zip(p, z):
    goal = []
    for point in pointers.cpu().numpy():
        goal.append(values[point])
    y_pred.append(goal)
    
y_true = []
for pointers, values in zip(y, z):
    goal = []
    for point in pointers.cpu().numpy():
        goal.append(values[point])
    y_true.append(goal)

In [7]:
correct = 0
for seq_pred, seq_true in zip(y_pred, y_true):
    correct += all([pred == true for (pred, true) in zip(seq_pred, seq_true)])
    print(seq_pred)
    print(seq_true)
    print("-" * 60)
acc = correct / num_samples
acc

['b', 'e', 'f', 'g', 'i', 'j', 'k', 'l', 'n', 'p', 'q', 'r', 't', 'u', 'v', 'w', 'x', 'y']
['b', 'e', 'f', 'g', 'i', 'j', 'k', 'l', 'n', 'p', 'q', 'r', 't', 'u', 'v', 'w', 'x', 'y']
------------------------------------------------------------
['b', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'p', 'q', 'r', 's', 'w', 'a', 'x', 'z']
['a', 'b', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'p', 'q', 'r', 's', 'w', 'x', 'z']
------------------------------------------------------------
['b', 'c', 'd', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 's', 'u', 'v', 'w', 'x']
['b', 'c', 'd', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 's', 'u', 'v', 'w', 'x']
------------------------------------------------------------
['d', 'f', 'g', 'h', 'j', 'k', 'm', 'n', 'o', 'p', 'q', 's', 't', 'u', 'v', 'x', 'y', 'z']
['d', 'f', 'g', 'h', 'j', 'k', 'm', 'n', 'o', 'p', 'q', 's', 't', 'u', 'v', 'x', 'y', 'z']
------------------------------------------------------------
['b', 'd', 'e', 'g', 'h', 'i

0.38

In [8]:
model.serialize('serialized/pointer_net.pt')

In [9]:
model.initialize('serialized/pointer_net.pt')