In [14]:
import torch
import torch.nn
import torch.optim
import torch.utils.data
import torch.nn.functional as F
from splitcross import SplitCrossEntropyLoss

import numpy as np
import networkx as nx
import math
import json
import time

import data
import os
from utils import batchify
from argparse import Namespace
from model import AWDRNNModel
from train import train, evaluate
import datetime

In [13]:
all_stats = []
for fn in os.listdir('train_logs_single_run'):
    if fn.endswith('.json'):
        all_stats.append(json.load(open(os.path.join('train_logs_single_run', fn), 'r')))

In [25]:
final_test_ppl = np.array([x['test_losses'][-1] if len(x['test_losses']) > 0 and not np.isnan(x['test_losses'][-1]) else np.inf for x in all_stats])

In [26]:
best_i = np.argmin(final_test_ppl)

In [29]:
np.exp(all_stats[best_i]['test_losses'][-1])

78.89023051795395

In [30]:
all_stats[best_i]

{'data': 'data/ptb',
 'recepie_id': 1000001,
 'recepies_list_path': 'data/models_recepies.json',
 'cuda': True,
 'batch_size': 20,
 'model': 'CustomRNN',
 'emsize': 400,
 'nhid': 600,
 'nlayers': 3,
 'dropout': 0.1,
 'dropouth': 0.25,
 'dropouti': 0.4,
 'dropoute': 0.0,
 'wdrop': 0.1,
 'tied': True,
 'bptt': 70,
 'lr': 0.001,
 'wdecay': 1.2e-06,
 'epochs': 50,
 'alpha': 2,
 'beta': 1,
 'log_interval': 200,
 'clip': 0.25,
 'eval_batch_size': 50,
 'recepie': '{"i": {"op": "linear", "input": ["x", "h_prev_0"]}, "i_act": {"op": "activation_tanh", "input": ["i"]}, "j": {"op": "linear", "input": ["x", "h_prev_0"]}, "j_act": {"op": "activation_sigm", "input": ["j"]}, "f": {"op": "linear", "input": ["x", "h_prev_0"]}, "f_act": {"op": "activation_sigm", "input": ["f"]}, "o": {"op": "linear", "input": ["x", "h_prev_0"]}, "o_act": {"op": "activation_tanh", "input": ["o"]}, "h_new_1_part1": {"op": "elementwise_prod", "input": ["f_act", "h_prev_1"]}, "h_new_1_part2": {"op": "elementwise_prod", "inp

In [32]:
[x for x in os.listdir('train_logs_single_run') if x.find('1000001') != -1]

['log_stats_model_1000001_2020-04-19_08-20-59_999949254.json']

In [35]:
suffix = '0001_2020-04-19_08-20-59_999949254'

In [36]:
log = json.load(open('train_logs_single_run/log_stats_model_100' + suffix + '.json', 'r'))

In [37]:
args = Namespace(**log)

In [38]:
corpus = data.Corpus(args.data)
cuda = 'cuda'

train_data = batchify(corpus.train, args.batch_size, args, cuda)
train_eval_data = batchify(corpus.train, args.eval_batch_size, args, cuda)
val_data = batchify(corpus.valid, args.eval_batch_size, args, cuda)
test_data = batchify(corpus.test, args.eval_batch_size, args, cuda)

ntokens = len(corpus.dictionary)

In [39]:
custom_model = AWDRNNModel(args.model, 
                               ntokens, 
                               args.emsize, 
                               args.nhid, 
                               args.nlayers, 
                               args.dropout, 
                               args.dropouth, 
                               args.dropouti, 
                               args.dropoute, 
                               args.wdrop, 
                               args.tied,
                               args.recepie,
                               verbose=False)

In [44]:
custom_model.to(cuda);

In [48]:
log_stats = vars(args)
log_stats['experiment_id'] = 'reproduce'
log_stats['init_time'] = 'reproduce'
log_stats['num_params'] = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] 
                              for x in custom_model.parameters() if x.size())

In [49]:
criterion = SplitCrossEntropyLoss(args.emsize, splits=[], verbose=False)

In [50]:
criterion = criterion.to(cuda)

In [51]:
params = list(custom_model.parameters()) + list(criterion.parameters())

optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.wdecay)

lr = args.lr
train_losses = []
val_losses = []
test_losses = []
wall_times = []

In [52]:
args.epochs

50

In [53]:
for epoch in range(1, args.epochs+1):
    epoch_start_time = time.time()
    train(custom_model, optimizer, params, criterion, train_data, args, epoch)
    epoch_end_time = time.time()
    train_loss = evaluate(custom_model, criterion, train_eval_data, args.eval_batch_size, args)
    val_loss = evaluate(custom_model, criterion, val_data, args.eval_batch_size, args)
    test_loss = evaluate(custom_model, criterion, test_data, args.eval_batch_size, args)
    print('-' * 89)
    print('| end of epoch {:3d} | time: {:5.2f}s |\n| train loss {:5.2f} | '
        'train ppl {:8.2f} | train bpw {:8.3f} |\n| valid loss {:5.2f} | '
        'valid ppl {:8.2f} | valid bpw {:8.3f} |\n| test loss {:5.2f} | '
        'test ppl {:8.2f} | test bpw {:8.3f} |'.format(
      epoch, (epoch_end_time - epoch_start_time), 
            train_loss, math.exp(train_loss), train_loss / math.log(2),
            val_loss, math.exp(val_loss), val_loss / math.log(2),
        test_loss, math.exp(test_loss), test_loss / math.log(2)))
    print('-' * 89)

    wall_times.append(epoch_end_time - epoch_start_time)
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    test_losses.append(test_loss)

    if np.isnan(np.array([train_loss, val_loss, test_loss])).any():
        status = 'loss is nan!'
        break

| epoch   1 |   200/  663 batches | lr 0.00100 | ms/batch 259.60 | loss  7.09 | ppl  1200.35 | bpc   10.229
| epoch   1 |   400/  663 batches | lr 0.00100 | ms/batch 264.24 | loss  6.11 | ppl   448.22 | bpc    8.808
| epoch   1 |   600/  663 batches | lr 0.00100 | ms/batch 263.84 | loss  5.79 | ppl   326.57 | bpc    8.351
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 180.45s |
| train loss  5.51 | train ppl   248.26 | train bpw    7.956 |
| valid loss  5.55 | valid ppl   256.99 | valid bpw    8.006 |
| test loss  5.51 | test ppl   247.88 | test bpw    7.953 |
-----------------------------------------------------------------------------------------
| epoch   2 |   200/  663 batches | lr 0.00100 | ms/batch 267.74 | loss  5.53 | ppl   253.36 | bpc    7.985
| epoch   2 |   400/  663 batches | lr 0.00100 | ms/batch 258.11 | loss  5.40 | ppl   220.94 | bpc    7.788
| epoch   2 |   600/  663 batches | lr 0.00100 | ms/batch

| epoch  12 |   600/  663 batches | lr 0.00100 | ms/batch 265.59 | loss  4.20 | ppl    66.71 | bpc    6.060
-----------------------------------------------------------------------------------------
| end of epoch  12 | time: 181.99s |
| train loss  3.91 | train ppl    49.94 | train bpw    5.642 |
| valid loss  4.50 | valid ppl    89.99 | valid bpw    6.492 |
| test loss  4.46 | test ppl    86.78 | test bpw    6.439 |
-----------------------------------------------------------------------------------------
| epoch  13 |   200/  663 batches | lr 0.00100 | ms/batch 263.94 | loss  4.20 | ppl    66.36 | bpc    6.052
| epoch  13 |   400/  663 batches | lr 0.00100 | ms/batch 266.18 | loss  4.15 | ppl    63.69 | bpc    5.993
| epoch  13 |   600/  663 batches | lr 0.00100 | ms/batch 258.34 | loss  4.16 | ppl    64.15 | bpc    6.003
-----------------------------------------------------------------------------------------
| end of epoch  13 | time: 180.59s |
| train loss  3.86 | train ppl    47.3

| epoch  24 |   200/  663 batches | lr 0.00100 | ms/batch 272.34 | loss  3.89 | ppl    48.99 | bpc    5.614
| epoch  24 |   400/  663 batches | lr 0.00100 | ms/batch 261.05 | loss  3.85 | ppl    47.17 | bpc    5.560
| epoch  24 |   600/  663 batches | lr 0.00100 | ms/batch 249.50 | loss  3.87 | ppl    47.92 | bpc    5.583
-----------------------------------------------------------------------------------------
| end of epoch  24 | time: 175.65s |
| train loss  3.47 | train ppl    32.23 | train bpw    5.010 |
| valid loss  4.42 | valid ppl    82.91 | valid bpw    6.373 |
| test loss  4.38 | test ppl    80.06 | test bpw    6.323 |
-----------------------------------------------------------------------------------------
| epoch  25 |   200/  663 batches | lr 0.00100 | ms/batch 244.08 | loss  3.87 | ppl    47.87 | bpc    5.581
| epoch  25 |   400/  663 batches | lr 0.00100 | ms/batch 238.94 | loss  3.84 | ppl    46.63 | bpc    5.543
| epoch  25 |   600/  663 batches | lr 0.00100 | ms/batch

| epoch  35 |   600/  663 batches | lr 0.00100 | ms/batch 261.21 | loss  3.71 | ppl    41.01 | bpc    5.358
-----------------------------------------------------------------------------------------
| end of epoch  35 | time: 180.64s |
| train loss  3.25 | train ppl    25.86 | train bpw    4.692 |
| valid loss  4.40 | valid ppl    81.33 | valid bpw    6.346 |
| test loss  4.36 | test ppl    78.64 | test bpw    6.297 |
-----------------------------------------------------------------------------------------
| epoch  36 |   200/  663 batches | lr 0.00100 | ms/batch 267.73 | loss  3.71 | ppl    40.98 | bpc    5.357
| epoch  36 |   400/  663 batches | lr 0.00100 | ms/batch 259.39 | loss  3.69 | ppl    40.08 | bpc    5.325
| epoch  36 |   600/  663 batches | lr 0.00100 | ms/batch 264.86 | loss  3.70 | ppl    40.48 | bpc    5.339
-----------------------------------------------------------------------------------------
| end of epoch  36 | time: 180.59s |
| train loss  3.24 | train ppl    25.4

| epoch  47 |   200/  663 batches | lr 0.00100 | ms/batch 257.33 | loss  3.61 | ppl    37.00 | bpc    5.210
| epoch  47 |   400/  663 batches | lr 0.00100 | ms/batch 253.04 | loss  3.58 | ppl    35.84 | bpc    5.164
| epoch  47 |   600/  663 batches | lr 0.00100 | ms/batch 263.67 | loss  3.60 | ppl    36.42 | bpc    5.187
-----------------------------------------------------------------------------------------
| end of epoch  47 | time: 177.35s |
| train loss  3.08 | train ppl    21.82 | train bpw    4.447 |
| valid loss  4.39 | valid ppl    80.87 | valid bpw    6.337 |
| test loss  4.36 | test ppl    78.35 | test bpw    6.292 |
-----------------------------------------------------------------------------------------
| epoch  48 |   200/  663 batches | lr 0.00100 | ms/batch 255.21 | loss  3.60 | ppl    36.66 | bpc    5.196
| epoch  48 |   400/  663 batches | lr 0.00100 | ms/batch 259.28 | loss  3.57 | ppl    35.63 | bpc    5.155
| epoch  48 |   600/  663 batches | lr 0.00100 | ms/batch

In [None]:
custom_model.load_state_dict(torch.load('models_weights/dump_weights_model_' + suffix + '.pt'))

In [None]:
custom_model.to(cuda);

In [None]:
criterion = SplitCrossEntropyLoss(args.emsize, splits=[], verbose=False)

In [None]:
train_loss = evaluate(custom_model, criterion, train_eval_data, args.eval_batch_size, args)
val_loss = evaluate(custom_model, criterion, val_data, args.eval_batch_size, args)
test_loss = evaluate(custom_model, criterion, test_data, args.eval_batch_size, args)

In [None]:
print('-' * 89)
print('train loss {:5.4f} | '
    'train ppl {:8.2f} | train bpw {:8.3f} |\n| valid loss {:5.4f} | '
    'valid ppl {:8.2f} | valid bpw {:8.3f} |\n| test loss {:5.4f} | '
    'test ppl {:8.2f} | test bpw {:8.3f} |'.format(
        train_loss, math.exp(train_loss), train_loss / math.log(2),
        val_loss, math.exp(val_loss), val_loss / math.log(2),
    test_loss, math.exp(test_loss), test_loss / math.log(2)))
print('-' * 89)

In [None]:
print('logged train loss', log['train_losses'][-1])
print('logged valid loss', log['val_losses'][-1])
print('logged test loss', log['test_losses'][-1])