# Pointer networks basic implementation

## Tasks
Pick "convex hull"
* [x] Generate the dataset
* [x] Evaluation metric
* [x] Implement the model
* [ ] Reproduce the results from the paper

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from tensorboardX import SummaryWriter
from torch.autograd import Variable
from torch.utils.data import DataLoader
from tqdm import tqdm
import collections
import matplotlib.pyplot as plt
import numpy as np
import os
import torch

%matplotlib inline

In [None]:
def plot_points_and_hull(points, hull_indices, c='r'):
    print('{} points, {} in the hull'.format(points.shape[0], hull_indices.shape[0]))
    hull_indices = np.hstack([hull_indices, [hull_indices[0]]])

    points_hull = points[hull_indices-1]

    plt.scatter(points[:, 0], points[:, 1])
    plt.plot(points_hull[:, 0], points_hull[:, 1], c)

### Define parameters

In [None]:
Params = collections.namedtuple('Params', [
    'gpu_device',
    'batch_size', 'embedding_size', 'hiddens', 'n_lstms', 'dropout', 'bidir',
    'lr', 'n_epochs',
])

In [None]:
params = Params(
    gpu_device=2,
    
    # Data
    batch_size=1,
    
    # Training params
    lr=1e-4,
    n_epochs=50,
    
    # Model params # FIXME: NOT USED RIGHT NOW!
    embedding_size=128,
    hiddens=512,
    n_lstms=2,
    dropout=0,
    bidir=False,
)

In [None]:
USE_CUDA = params.gpu_device >= 0 and torch.cuda.is_available()
DEVICE = params.gpu_device

## Load the data

In [None]:
from datasets import ConvexHullDataset

In [None]:
###### For convex hull
# The data was generated using convex_hull_generator.py
data = np.load('data/convex_hull.npz')

data_train, data_val, data_test = data['arr_0']

data_train = np.array(data_train)
data_val = np.array(data_val)
data_test = np.array(data_test)

In [None]:
dataset_train = ConvexHullDataset(data_train)
dataset_val = ConvexHullDataset(data_val)
dataset_test = ConvexHullDataset(data_test)

### Define Dataloader

In [None]:
dataloader_train = DataLoader(dataset_train, batch_size=params.batch_size, shuffle=True, num_workers=4)
dataloader_val = DataLoader(dataset_val, batch_size=params.batch_size, shuffle=True, num_workers=4)
dataloader_test = DataLoader(dataset_test, batch_size=params.batch_size, shuffle=False, num_workers=1)

### Visualize the data

In [None]:
d = iter(dataloader_val)
for ix in range(5):
    batch = next(d)

    plt.figure()
    points = batch['points'][0].data.numpy()
    inds_hull = batch['inds_hull'][0].data.numpy()
    plot_points_and_hull(points, inds_hull)
    plt.show()

## Define the model

In [None]:
from pointer_net import PointerNet

In [None]:
model = PointerNet()

In [None]:
# SANITY RUN THE MODEL
batch = next(iter(dataloader_val))
points = batch['points'][0]
inds_hull = batch['inds_hull'][0]

pointers = model(points[np.newaxis, ...], 10)
print(points.shape)
print(pointers.shape)
pointers.sum(dim=2)

In [None]:
if USE_CUDA >= 0:
    model.cuda(device=params.gpu_device)
#     cudnn.benchmark = True

## Define the optimizer / loss

In [None]:
CCE = torch.nn.CrossEntropyLoss()
model_optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=params.lr)

## Logging

In [None]:
model_str = 'test'

# logging
weights_folder = "/opt/weights/{}".format(model_str)
log_folder =  '../tensorboard-logs/{}'.format(model_str)
writer = SummaryWriter(log_folder) # writing log to tensorboard
print('logging to: {}'.format(weights_folder))

os.makedirs(weights_folder)  # MEANT TO FAIL IF IT ALREADY EXISTS

## Train

In [None]:
save_every = 10000
val_every = 1000

In [None]:
def predict_and_eval(model, batch, CCE):
    points = Variable(batch['points'])
    target_inds_hull = Variable(batch['inds_hull'] - 1)  # FIXME: Must append an EOS token, subtract 1 to make 0-based

    if USE_CUDA:
        points = points.cuda(params.gpu_device)
        target_inds_hull = target_inds_hull.cuda(params.gpu_device)

    # generate as many outputs as in the target sequence
    n_outputs = len(target_inds_hull[0])
    pointers = model(points, max_output_len=n_outputs)  # FIXME: because we don't have an EOS token. Also, makes sense during traing
    assert n_outputs == pointers.shape[1]

    loss = CCE(pointers.squeeze(), target_inds_hull.squeeze())
    return pointers, loss

In [None]:
for epoch in range(params.n_epochs):
    for i_batch, train_batch in enumerate(dataloader_train):
        iter_cntr = epoch * len(dataloader_train) + i_batch  # The overall iteration number across epochs

        # Forward
        pointers, train_loss = predict_and_eval(model, train_batch, CCE)

        # Backprop
        model_optim.zero_grad()
        train_loss.backward()
        model_optim.step()

        writer.add_scalar('train.loss', iter_cntr, train_loss.data.cpu().numpy())
        
        # Save
        if i_batch % save_every == 0:
            torch.save(model.state_dict(), os.path.join(weights_folder, '{}_{}.pt'.format(epoch, i_batch)))
        
        # Validation
        if i_batch % val_every == 0:
            plt.figure(figsize=(5, 5))

            total_val_loss = 0
            for jx, val_batch in enumerate(dataloader_val):
                if jx == 10:
                    break
                pointers, val_loss = predict_and_eval(model, val_batch, CCE)
                total_val_loss += val_loss.data.cpu().numpy()

                # plot few
                if jx < 4:
                    plt.subplot(2, 2, jx+1)
                    pred_indices = pointers.argmax(dim=-1).data.cpu().numpy() + 1
                    
                    target_indices = val_batch['inds_hull'][0].data.cpu().numpy()
                    assert len(target_indices) == pred_indices.shape[1]
                    print('Targets: {}, Preds: {}'.format(target_indices, pred_indices))
                    points = val_batch['points'][0].data.cpu().numpy()
                    plot_points_and_hull(points, pred_indices[0], c='b')
                    plot_points_and_hull(points, target_indices, c='r--')

            plt.show()

            writer.add_scalar('val.loss', iter_cntr, total_val_loss / 10)