In [1]:
from utils.datasets import AlphabetSortingDataset, ExtendedWikiSQL
from models.pointer_net import PointerNet
from torch.utils.data import DataLoader

In [2]:
import torch
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.utils.data import DataLoader
import numpy as np

In [3]:
import warnings
warnings.filterwarnings("ignore")

params = {
    # Data
    'batch_size': 1024,
    'shuffle': True,
    'nof_workers': 0, # must stay at 0
    #Train
    'nof_epoch': 100,
    'lr': 0.0001,
    # GPU
    'gpu': True,
    # Network
    'embedding_size': 300,
    'hiddens': 512,
    'nof_lstms': 8,
    'dropout': 0.3,
    'bidir': True # True not working right now
}

dataset = AlphabetSortingDataset(100000, min_len=20, max_len=25)
dataloader = DataLoader(dataset,
                        batch_size=params['batch_size'],
                        shuffle=params['shuffle'])

In [4]:
if params['gpu'] and torch.cuda.is_available():
    USE_CUDA = True
    print('Using GPU, %i devices.' % torch.cuda.device_count())
else:
    USE_CUDA = False

Using GPU, 1 devices.


In [5]:
model = PointerNet(params['embedding_size'],
                   params['hiddens'],
                   params['nof_lstms'],
                   params['dropout'],
                   params['bidir'])

if USE_CUDA:
    model.cuda()
    net = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

CCE = torch.nn.CrossEntropyLoss()
model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                model.parameters()),
                                 lr=params['lr'])


In [6]:
from tqdm import tqdm
losses = []

for i_epoch, epoch in enumerate(range(params['nof_epoch'])):
    batch_loss = []
    iterator = tqdm(dataloader, unit='Batch')
    
    for i_batch, sample_batched in enumerate(iterator):
        iterator.set_description('Epoch %i/%i' % (epoch+1, params['nof_epoch']))

        x, y, chars = sample_batched
        train_batch = Variable(x)
        target_batch = Variable(y)

        if USE_CUDA:
            train_batch = train_batch.cuda()
            target_batch = target_batch.cuda()

        o, p = model(train_batch)
        o = o.contiguous().view(-1, o.size()[-1])
        target_batch = target_batch.view(-1)
        
        loss = CCE(o, target_batch) #/ target_batch.shape[1] # need to take the length of the table into account
        #acc = get_accuracy(p, target_batch)
        
        losses.append(loss.data)
        batch_loss.append(loss.data)

        model_optim.zero_grad()
        loss.backward()
        model_optim.step()
        
        iterator.set_postfix(loss='{}'.format(loss.data))
        
    # each epoch, reduce the learning rate
    for param in model_optim.param_groups:
            param['lr'] *= 0.95
            
    batch_loss = torch.Tensor(batch_loss)
    iterator.set_postfix(loss=np.average(batch_loss))

Epoch 1/100: 100%|██████████████████████████████████████████| 98/98 [01:17<00:00,  1.26Batch/s, loss=2.953413248062134]
Epoch 2/100: 100%|██████████████████████████████████████████| 98/98 [01:16<00:00,  1.27Batch/s, loss=2.666102170944214]
Epoch 3/100: 100%|██████████████████████████████████████████| 98/98 [01:16<00:00,  1.28Batch/s, loss=2.661240816116333]
Epoch 4/100: 100%|█████████████████████████████████████████| 98/98 [01:16<00:00,  1.27Batch/s, loss=2.6642749309539795]
Epoch 5/100: 100%|██████████████████████████████████████████| 98/98 [01:16<00:00,  1.28Batch/s, loss=2.682713031768799]
Epoch 6/100: 100%|█████████████████████████████████████████| 98/98 [01:17<00:00,  1.27Batch/s, loss=2.6597344875335693]
Epoch 7/100: 100%|██████████████████████████████████████████| 98/98 [01:17<00:00,  1.27Batch/s, loss=2.660395622253418]
Epoch 8/100: 100%|█████████████████████████████████████████| 98/98 [01:17<00:00,  1.27Batch/s, loss=2.6849074363708496]
Epoch 9/100: 100%|██████████████████████

Epoch 69/100: 100%|████████████████████████████████████████| 98/98 [01:16<00:00,  1.27Batch/s, loss=2.6649012565612793]
Epoch 70/100: 100%|████████████████████████████████████████| 98/98 [01:17<00:00,  1.27Batch/s, loss=2.6567351818084717]
Epoch 71/100: 100%|████████████████████████████████████████| 98/98 [01:17<00:00,  1.27Batch/s, loss=2.6583878993988037]
Epoch 72/100: 100%|█████████████████████████████████████████| 98/98 [01:16<00:00,  1.27Batch/s, loss=2.676305055618286]
Epoch 73/100: 100%|████████████████████████████████████████| 98/98 [01:16<00:00,  1.27Batch/s, loss=2.6590566635131836]
Epoch 74/100: 100%|█████████████████████████████████████████| 98/98 [01:16<00:00,  1.27Batch/s, loss=2.663459062576294]
Epoch 75/100: 100%|████████████████████████████████████████| 98/98 [01:17<00:00,  1.27Batch/s, loss=2.6716256141662598]
Epoch 76/100: 100%|████████████████████████████████████████| 98/98 [01:17<00:00,  1.27Batch/s, loss=2.6678709983825684]
Epoch 77/100: 100%|█████████████████████

In [7]:
def generate_batch(batch):
    
    def pad_inputs(inputs, targets, max_len, emb_size=300):
        inputs_padded = []
        targets_padded = []
        first_target = True
        for i, t in zip(inputs, targets):
            # TEST!  1-Klasse Klassifizierung
            for idx, indic in enumerate(t):
                if indic == 1 and first_target:
                    first_target = False
                elif indic == 1:
                    t[idx] = 0
            #print(t)
            i_padding = np.zeros((max_len - len(i), emb_size))
            t_padding = np.zeros(max_len - len(i))
            i_padded = np.concatenate((i, i_padding), 0)
            t_padded = np.concatenate((t, t_padding), 0)
            inputs_padded.append(i_padded), targets_padded.append(t_padded)
        return np.array(inputs_padded), np.array(targets_padded)

    inputs = [entry['input'] for entry in batch]
    targets = [entry['target'] for entry in batch]
    max_len = len(max(inputs, key=len))
    inputs, targets = pad_inputs(inputs, targets, max_len)
    inputs = torch.from_numpy(inputs)
    targets = torch.from_numpy(targets)
    return {'input': inputs.float(), 'target': targets.long()}

In [8]:
params['batch_size'] = 1
dataset = ExtendedWikiSQL()
dataset.load_from_torch('data/training/ewikisql')
dataloader = DataLoader(dataset,
                        batch_size=params['batch_size'],
                        shuffle=params['shuffle'],
                        collate_fn=generate_batch)

model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                model.parameters()),
                                 lr=params['lr'])

In [9]:
losses = []

for i_epoch, epoch in enumerate(range(params['nof_epoch'])):
    batch_loss = []
    iterator = tqdm(dataloader, unit='Batch')
    
    for i_batch, sample_batched in enumerate(iterator):
        iterator.set_description('Epoch %i/%i' % (epoch+1, params['nof_epoch']))

        x, y = sample_batched['input'], sample_batched['target']
        train_batch = Variable(x)
        target_batch = Variable(y)

        if USE_CUDA:
            train_batch = train_batch.cuda()
            target_batch = target_batch.cuda()

        o, p = model(train_batch)
        o = o.contiguous().view(-1, o.size()[-1])
        target_batch = target_batch.view(-1)
        
        loss = CCE(o, target_batch) #/ target_batch.shape[1] # need to take the length of the table into account
        #acc = get_accuracy(p, target_batch)
        
        losses.append(loss.data)
        batch_loss.append(loss.data)

        model_optim.zero_grad()
        loss.backward()
        model_optim.step()
        
        iterator.set_postfix(loss='{}'.format(loss.data))
        
    # each epoch, reduce the learning rate
    for param in model_optim.param_groups:
            param['lr'] *= 0.95
            
    batch_loss = torch.Tensor(batch_loss)
    iterator.set_postfix(loss=np.average(batch_loss))

Epoch 1/100: 100%|██████████████████████████████████████| 1000/1000 [19:22<00:00,  1.16s/Batch, loss=4.464739799499512]
Epoch 2/100: 100%|██████████████████████████████████████| 1000/1000 [19:29<00:00,  1.17s/Batch, loss=5.038456439971924]
Epoch 3/100: 100%|██████████████████████████████████████| 1000/1000 [19:29<00:00,  1.17s/Batch, loss=4.065598964691162]
Epoch 4/100: 100%|██████████████████████████████████████| 1000/1000 [19:26<00:00,  1.17s/Batch, loss=4.334690093994141]
Epoch 5/100: 100%|██████████████████████████████████████| 1000/1000 [19:19<00:00,  1.16s/Batch, loss=4.636309623718262]
Epoch 6/100: 100%|██████████████████████████████████████| 1000/1000 [19:24<00:00,  1.16s/Batch, loss=4.319794178009033]
Epoch 7/100: 100%|██████████████████████████████████████| 1000/1000 [19:13<00:00,  1.15s/Batch, loss=4.997878551483154]
Epoch 8/100: 100%|██████████████████████████████████████| 1000/1000 [18:59<00:00,  1.14s/Batch, loss=4.780004024505615]
Epoch 9/100: 100%|██████████████████████

KeyboardInterrupt: 

---

# BackUp

In [None]:
<marker>

In [None]:
model.eval()

In [None]:
x, y, z = dataset[:100]
x = x.cuda()
y = y.cuda()

In [None]:
o, p = model(x)

In [None]:
sorting = list(zip(z, p.data.cpu().tolist()))

In [None]:
sorting

In [None]:
for pair in sorting:
    sequence, pointers = pair
    tmp = {}
    for seq, p in zip(sequence, pointers):
        tmp[seq] = p
    tmp =  [k for k, v in sorted(tmp.items(), key=lambda item: item[1])]
    print("PRED:" , tmp, " \nTRUE: ", sorted(sequence))
    print()