In [1]:
import torch
from torch import nn
from torch.autograd import Variable

from data_loader import DataLoader, load_dictionary
from model import UniSkip
from config import *
from datetime import datetime, timedelta

In [2]:
d = DataLoader("./data/patent.refined.sep.unigram.id.txt")

Loading text file at ./data/patent.refined.sep.unigram.id.txt
Making dictionary for these words
Using cached dictionary at ./data/patent.refined.sep.unigram.id.txt.pkl
Making reverse dictionary


In [3]:
dictionary = load_dictionary("./data/patent.refined.sep.unigram.id.txt.pkl")

In [5]:
len(dictionary)

31987

In [None]:
spm_vocab = {}
with open('/mnt/48TB/temp/patent.refined.sep.mixed.txt.vocab') as f:
    for line in f.readlines():
        spm_vocab[line.split()[0]] = line.split()[1]
[w for w in dictionary if w not in spm_vocab]

In [3]:
CUDA_DEVICE

0

In [4]:
mod = UniSkip()
if USE_CUDA:
    mod.cuda(CUDA_DEVICE)

In [5]:
lr = 3e-4
optimizer = torch.optim.Adam(params=mod.parameters(), lr=lr)

In [6]:
loss_trail = []
last_best_loss = None
current_time = datetime.utcnow()

def debug(i, loss, prev, nex, prev_pred, next_pred):
    global loss_trail
    global last_best_loss
    global current_time

    this_loss = loss.data[0]
    loss_trail.append(this_loss)
    loss_trail = loss_trail[-20:]
    new_current_time = datetime.utcnow()
    time_elapsed = str(new_current_time - current_time)
    current_time = new_current_time
    print("Iteration {}: time = {} last_best_loss = {}, this_loss = {}".format(
              i, time_elapsed, last_best_loss, this_loss))
    
    print("prev = {}\nnext = {}\npred_prev = {}\npred_next = {}".format(
        d.convert_indices_to_sentences(prev),
        d.convert_indices_to_sentences(nex),
        d.convert_indices_to_sentences(prev_pred),
        d.convert_indices_to_sentences(next_pred),
    ))
    
    try:
        trail_loss = sum(loss_trail)/len(loss_trail)
        if last_best_loss is None or last_best_loss > trail_loss:
            print("Loss improved from {} to {}".format(last_best_loss, trail_loss))
            
            save_loc = "./saved_models/skip-best".format(lr, VOCAB_SIZE)
            print("saving model at {}".format(save_loc))
            torch.save(mod.state_dict(), save_loc)
            
            last_best_loss = trail_loss
    except Exception as e:
       print("Couldn't save model because {}".format(e))

In [None]:
print("Starting training...")

# a million iterations
for i in range(0, 894100):
    sentences, lengths = d.fetch_batch(32 * 10)

    loss, prev, nex, prev_pred, next_pred  = mod(sentences, lengths)
    

    if i % 100 == 0:
        debug(i, loss, prev, nex, prev_pred, next_pred)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Starting training...


  # Remove the CWD from sys.path while we load stuff.
  idx = idx.data[0]


Iteration 0: time = 0:00:08.198329 last_best_loss = None, this_loss = 19.81114959716797
prev = 248 5 15 158 17 14 49 415 3628 UNK 21 1128 4217 9721 2597 16 650 14742 282 14097 6622 6756 1013 12358 10549 5154 13 7 842 EOS
next = 15 158 16 21 13 38 88 5 14 337 415 3628 UNK 296 3628 5920 11897 1128 4217 9721 2597 16 650 14742 282 14097 6622 6756 1013 EOS
pred_prev = 3568 14370 19038 5901 21413 12053 12258 2964 6726 17393 9725 397 2693 22808 20173 25903 3300 21881 23538 18788 15625 7292 14610 5284 4232 16310 370 17402 11967 10759
pred_next = 13672 11839 13199 4331 463 16096 2202 9543 12153 7801 8353 16830 17098 526 6190 17098 17186 2122 16396 240 8201 10707 4526 1795 7032 19158 16759 13724 9298 8501
Loss improved from None to 19.81114959716797
saving model at ./saved_models/skip-best
Iteration 100: time = 0:01:01.839654 last_best_loss = 19.81114959716797, this_loss = 17.04083251953125
prev = 7145 11758 UNK UNK 7386 47 8940 2263 456 5506 UNK 12 516 316 5 231 7145 11758 4884 21 UNK 7 2664 58

Iteration 1300: time = 0:01:01.860854 last_best_loss = 16.17840003967285, this_loss = 14.982809066772461
prev = 78 5 29 1653 1616 7 4224 6052 26 5391 18938 5 322 605 6484 7 7768 17 5 9486 1923 2134 18323 148 5795 7 1923 29 10323 EOS
next = 37 1059 7 1923 385 26 UNK 744 346 1567 986 3797 918 5 516 14350 7 UNK 17 159 5 711 10 119 1289 UNK 862 10 60 EOS
pred_prev = 23 5 6 7 7 5 UNK UNK 7 5 UNK 5 UNK 605 5 UNK UNK 5 5 UNK 5 5 UNK UNK UNK 7 UNK 11 7 8
pred_next = 78 17 UNK UNK 5 5 5 7 7 7 7 5 7 5 6 UNK 7 UNK 10 5 7 UNK 5 60 5 UNK 7 7 60 EOS
Loss improved from 16.17840003967285 to 16.093000411987305
saving model at ./saved_models/skip-best
Iteration 1400: time = 0:01:02.043235 last_best_loss = 16.093000411987305, this_loss = 16.157001495361328
prev = 58 7657 10 288 3094 26 6852 143 37 10 106 514 162 19253 39 849 7079 4617 8 EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
next = 422 29 8 EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
pr

Iteration 2600: time = 0:01:01.217427 last_best_loss = 14.909219741821289, this_loss = 14.58656120300293
prev = 15 158 17 5 23 87 101 UNK 106 185 3124 1206 1168 5810 8 EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
next = 15 158 12 1327 5 23 5363 106 185 3124 1206 1618 5 185 3124 1471 146 5 185 3124 780 146 5 598 146 5 1019 14602 611 EOS
pred_prev = 15 252 17 23 23 87 101 367 5 UNK UNK 7 7 UNK 8 5 8 7 7 7 7 7 7 7 7 7 7 8 EOS EOS
pred_next = 15 252 17 1327 5 23 87 106 UNK UNK 7 19 5 UNK UNK 7 7 19 19 5 5 19 5 19 5 5 19 EOS 5 EOS
Iteration 2700: time = 0:01:01.135995 last_best_loss = 14.909219741821289, this_loss = 14.192493438720703
prev = 30 1712 UNK 12 194 9282 1765 2334 8 EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
next = 3388 26 7521 12923 197 3451 406 3732 32 371 168 5593 889 1070 16 9046 2932 11 2538 8 EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
pred_prev = UNK 40 UNK 5 UNK UNK 5 688 8 5 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
pred_