In [1]:
import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.utils.data import DataLoader
import numpy as np

import warnings
warnings.filterwarnings("ignore")

from utils.datasets import AlphabetSortingDataset, ExtendedWikiSQL
from models.pointer_net import PointerNet
from torch.utils.data import DataLoader

In [2]:
params = {
    # Data
    'magnitude': 3,
    'batch_size': 128,
    'shuffle': True,
    'nof_workers': 0, # must stay at 0
    #Train
    'nof_epoch': 3,
    'lr': 0.001,
    # GPU
    'gpu': True,
    # Network
    'input_size': 300,
    'embedding_size': 300,
    'hiddens': 256,
    'nof_lstms': 2,
    'dropout': 0,
    'bidir': True
}

In [3]:
LEN = 3
dataset = AlphabetSortingDataset(10**params['magnitude'], min_len=LEN, max_len=LEN)
dataloader = DataLoader(dataset,
                        batch_size=params['batch_size'],
                        shuffle=params['shuffle'])

In [4]:
model = PointerNet(params['input_size'],
                   params['embedding_size'],
                   params['hiddens'],
                   params['nof_lstms'],
                   params['dropout'],
                   params['bidir'])

if params['gpu'] and torch.cuda.is_available():
    model.cuda()
    net = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

CCE = torch.nn.CrossEntropyLoss()
model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                model.parameters()),
                                 lr=params['lr'])

In [5]:
from tqdm import tqdm
losses = []

model.train()
for i_epoch, epoch in enumerate(range(params['nof_epoch'])):
    batch_loss = []
    iterator = tqdm(dataloader, unit='Batch')
    
    for i_batch, sample_batched in enumerate(iterator):
        iterator.set_description('Epoch %i/%i' % (epoch+1, params['nof_epoch']))

        x, y, chars = sample_batched
        train_batch = Variable(x)
        target_batch = Variable(y)

        if torch.cuda.is_available():
            train_batch = train_batch.cuda()
            target_batch = target_batch.cuda()

        o, p = model(train_batch)
        o = o.contiguous().view(-1, o.size()[-1])
        target_batch = target_batch.view(-1)
        
        loss = CCE(o, target_batch) #/ target_batch.shape[1] # need to take the length of the table into account
        #acc = get_accuracy(p, target_batch)
        
        losses.append(loss.data)
        batch_loss.append(loss.data)

        model_optim.zero_grad()
        loss.backward()
        model_optim.step()
        
        iterator.set_postfix(loss='{}'.format(loss.data))
        
    # each epoch, reduce the learning rate
    for param in model_optim.param_groups:
            param['lr'] *= 0.95
            
    batch_loss = torch.Tensor(batch_loss)
    iterator.set_postfix(loss=np.average(batch_loss))

Epoch 1/3: 100%|█████████████████████████████████████████████| 8/8 [00:00<00:00,  9.61Batch/s, loss=0.6172170639038086]
Epoch 2/3: 100%|█████████████████████████████████████████████| 8/8 [00:00<00:00, 17.82Batch/s, loss=0.5516911149024963]
Epoch 3/3: 100%|█████████████████████████████████████████████| 8/8 [00:00<00:00, 18.01Batch/s, loss=0.5523827075958252]


In [6]:
model.eval()

x, y, z = dataset[:1000]
x = x.cuda()
y = y.cuda()

o, p = model(x)

sorting = list(zip(z, p.data.cpu().tolist()))
correct = 0
for pair in sorting:
    sequence, pointers = pair
    tmp = {}
    for seq, p in zip(sequence, pointers):
        tmp[seq] = p
    y_hat =  [k for k, v in sorted(tmp.items(), key=lambda item: item[1])]
    y_pred = sorted(sequence)
    #print(y_hat, y_pred)
    correct += y_hat == y_pred
acc = correct / len(x)
print(acc)

0.666


In [7]:
for idx, (z_val, y_val) in enumerate(zip(z,y)):
    print(z_val, y_val)
    if idx == 10:
        break

['6', '9', '2'] tensor([2, 0, 1], device='cuda:0')
['7', '6', '5'] tensor([2, 1, 0], device='cuda:0')
['4', '8', '0'] tensor([2, 0, 1], device='cuda:0')
['4', '9', '3'] tensor([2, 0, 1], device='cuda:0')
['9', '0', '5'] tensor([1, 2, 0], device='cuda:0')
['6', '5', '2'] tensor([2, 1, 0], device='cuda:0')
['6', '3', '5'] tensor([1, 2, 0], device='cuda:0')
['5', '1', '8'] tensor([1, 0, 2], device='cuda:0')
['6', '4', '7'] tensor([1, 0, 2], device='cuda:0')
['1', '4', '5'] tensor([0, 1, 2], device='cuda:0')
['7', '3', '4'] tensor([1, 2, 0], device='cuda:0')


In [8]:
y

tensor([[2, 0, 1],
        [2, 1, 0],
        [2, 0, 1],
        ...,
        [0, 2, 1],
        [2, 0, 1],
        [0, 2, 1]], device='cuda:0')

In [9]:
model.serialize('serialized/pointer_net.pt')

---

# Column Reducing

In [10]:
<marker>

SyntaxError: invalid syntax (<ipython-input-10-aebe5cf9495f>, line 1)

In [None]:
def generate_batch(batch):
    
    def pad_inputs(inputs, targets, max_len, emb_size=300):
        inputs_padded = []
        targets_padded = []
        first_target = True
        for i, t in zip(inputs, targets):
            # TEST!  1-Klasse Klassifizierung
            for idx, indic in enumerate(t):
                if indic == 1 and first_target:
                    first_target = False
                elif indic == 1:
                    t[idx] = 0
            #print(t)
            i_padding = np.zeros((max_len - len(i), emb_size))
            t_padding = np.zeros(max_len - len(i))
            i_padded = np.concatenate((i, i_padding), 0)
            t_padded = np.concatenate((t, t_padding), 0)
            inputs_padded.append(i_padded), targets_padded.append(t_padded)
        return np.array(inputs_padded), np.array(targets_padded)

    inputs = [entry['input'] for entry in batch]
    targets = [entry['target'] for entry in batch]
    max_len = len(max(inputs, key=len))
    inputs, targets = pad_inputs(inputs, targets, max_len)
    inputs = torch.from_numpy(inputs)
    targets = torch.from_numpy(targets)
    return {'input': inputs.float(), 'target': targets.long()}

In [None]:
params['batch_size'] = 1
dataset = ExtendedWikiSQL()
dataset.load_from_torch('data/training/ewikisql')
dataloader = DataLoader(dataset,
                        batch_size=params['batch_size'],
                        shuffle=params['shuffle'],
                        collate_fn=generate_batch)

model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                model.parameters()),
                                 lr=params['lr'])

In [None]:
losses = []

for i_epoch, epoch in enumerate(range(params['nof_epoch'])):
    batch_loss = []
    iterator = tqdm(dataloader, unit='Batch')
    
    for i_batch, sample_batched in enumerate(iterator):
        iterator.set_description('Epoch %i/%i' % (epoch+1, params['nof_epoch']))

        x, y = sample_batched['input'], sample_batched['target']
        train_batch = Variable(x)
        target_batch = Variable(y)

        if USE_CUDA:
            train_batch = train_batch.cuda()
            target_batch = target_batch.cuda()

        o, p = model(train_batch)
        o = o.contiguous().view(-1, o.size()[-1])
        target_batch = target_batch.view(-1)
        
        loss = CCE(o, target_batch) #/ target_batch.shape[1] # need to take the length of the table into account
        #acc = get_accuracy(p, target_batch)
        
        losses.append(loss.data)
        batch_loss.append(loss.data)

        model_optim.zero_grad()
        loss.backward()
        model_optim.step()
        
        iterator.set_postfix(loss='{}'.format(loss.data))
        
    # each epoch, reduce the learning rate
    for param in model_optim.param_groups:
            param['lr'] *= 0.95
            
    batch_loss = torch.Tensor(batch_loss)
    iterator.set_postfix(loss=np.average(batch_loss))

---

# BackUp

In [None]:
<marker>

In [None]:
model.eval()

x, y, z = dataset[:100]
x = x.cuda()
y = y.cuda()

o, p = model(x)

sorting = list(zip(z, p.data.cpu().tolist()))

for pair in sorting:
    sequence, pointers = pair
    tmp = {}
    for seq, p in zip(sequence, pointers):
        tmp[seq] = p
    tmp =  [k for k, v in sorted(tmp.items(), key=lambda item: item[1])]
    print("PRED:" , tmp, " \nTRUE: ", sorted(sequence))
    print()