In [1]:
import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.utils.data import DataLoader
import numpy as np

import warnings
warnings.filterwarnings("ignore")

from utils.datasets import AlphabetSortingDataset, NumberSortingDataset
from models.pointer_net import PointerNet
from torch.utils.data import DataLoader

In [2]:
params = {
    # Data
    #'magnitude': 6,
    'batch_size': 1024,
    'shuffle': True,
    'nof_workers': 0, # must stay at 0
    #Train
    'nof_epoch': 1000,
    'lr': 0.001,
    # GPU
    'gpu': True,
    # Network
    'input_size': 300,
    'embedding_size': 300,
    'hiddens': 256,
    'nof_lstms': 2,
    'dropout': 0,
    'bidir': True
}

In [3]:
#dataset = NumberSortingDataset(10**params['magnitude'], min_len=LEN, max_len=LEN)
#dataset = AlphabetSortingDataset(10**params['magnitude'], min_len=LEN, max_len=LEN, alphabet='0123456789')
dataset = AlphabetSortingDataset(0) # load from state
max_len, min_len = dataset.load('data/training')
dataloader = DataLoader(dataset,
                        batch_size=params['batch_size'],
                        shuffle=params['shuffle'])

In [4]:
model = PointerNet(params['input_size'],
                   params['embedding_size'],
                   params['hiddens'],
                   params['nof_lstms'],
                   params['dropout'],
                   params['bidir'])

if params['gpu'] and torch.cuda.is_available():
    model.cuda()
    net = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

CCE = torch.nn.CrossEntropyLoss()
model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                model.parameters()),
                                 lr=params['lr'])

In [5]:
from tqdm import tqdm
losses = []


for i_epoch, epoch in enumerate(range(params['nof_epoch'])):
    batch_loss = []
    iterator = tqdm(dataloader, unit='Batch')
    for i_batch, sample_batched in enumerate(iterator):
        # training
        model.train()
        iterator.set_description('Epoch %i/%i' % (epoch+1, params['nof_epoch']))

        x, y, _ = sample_batched
        train_batch = Variable(x).float()
        target_batch = Variable(y)

        if torch.cuda.is_available():
            train_batch = train_batch.cuda()
            target_batch = target_batch.cuda()

        o, p = model(train_batch)
        o = o.contiguous().view(-1, o.size()[-1])
        target_batch = target_batch.view(-1)
        
        loss = CCE(o, target_batch)
        
        losses.append(loss.data)
        batch_loss.append(loss.data)

        model_optim.zero_grad()
        loss.backward()
        model_optim.step()
        
        # evaluation
        model.eval()
        num_samples = 100
        test_dataset = AlphabetSortingDataset(num_samples, min_len=max_len, max_len=min_len) 
        x, y, z = test_dataset[:]
        x = x.cuda().float()
        y = y.cuda()

        o, p = model(x)

        y_pred = []
        for pointers, values in zip(p, z):
            goal = []
            for point in pointers.cpu().numpy():
                goal.append(values[point])
            y_pred.append(goal)

        y_true = []
        for pointers, values in zip(y, z):
            goal = []
            for point in pointers.cpu().numpy():
                goal.append(values[point])
            y_true.append(goal)
        
        correct = 0
        for seq_pred, seq_true in zip(y_pred, y_true):
            correct += all([pred == true for (pred, true) in zip(seq_pred, seq_true)])
        accuracy = correct / num_samples

        iterator.set_postfix(accuracy='{}'.format(accuracy))
        
    # each epoch, reduce the learning rate
    for param in model_optim.param_groups:
            param['lr'] *= 0.95
            
    batch_loss = torch.Tensor(batch_loss)
    iterator.set_postfix(loss=np.average(batch_loss))

Epoch 1/1000: 100%|█████████████████████████████████████████████████| 196/196 [02:58<00:00,  1.10Batch/s, accuracy=0.0]
Epoch 2/1000: 100%|█████████████████████████████████████████████████| 196/196 [02:58<00:00,  1.10Batch/s, accuracy=0.0]
Epoch 3/1000: 100%|████████████████████████████████████████████████| 196/196 [02:59<00:00,  1.09Batch/s, accuracy=0.03]
Epoch 4/1000: 100%|████████████████████████████████████████████████| 196/196 [02:59<00:00,  1.09Batch/s, accuracy=0.04]
Epoch 5/1000: 100%|████████████████████████████████████████████████| 196/196 [02:56<00:00,  1.11Batch/s, accuracy=0.04]
Epoch 6/1000: 100%|████████████████████████████████████████████████| 196/196 [02:57<00:00,  1.10Batch/s, accuracy=0.04]
Epoch 7/1000: 100%|████████████████████████████████████████████████| 196/196 [02:58<00:00,  1.10Batch/s, accuracy=0.02]
Epoch 8/1000: 100%|████████████████████████████████████████████████| 196/196 [02:57<00:00,  1.10Batch/s, accuracy=0.85]
Epoch 9/1000: 100%|█████████████████████

KeyboardInterrupt: 

In [6]:
model.eval()
num_samples = 100
test_dataset = AlphabetSortingDataset(num_samples, min_len=max_len, max_len=min_len) 
x, y, z = test_dataset[:]
x = x.cuda().float()
y = y.cuda()

o, p = model(x)

y_pred = []
for pointers, values in zip(p, z):
    goal = []
    for point in pointers.cpu().numpy():
        goal.append(values[point])
    y_pred.append(goal)

y_true = []
for pointers, values in zip(y, z):
    goal = []
    for point in pointers.cpu().numpy():
        goal.append(values[point])
    y_true.append(goal)

correct = 0
for seq_pred, seq_true in zip(y_pred, y_true):
    correct += all([pred == true for (pred, true) in zip(seq_pred, seq_true)])
accuracy = correct / num_samples
accuracy

In [10]:
y_pred[0]

['a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [9]:
model.serialize('serialized/pointer_net.pt')