In [None]:
#%%
import tensorflow as tf
from time import gmtime, strftime

from attention_dynamic_model import AttentionDynamicModel, set_decode_type
from reinforce_baseline import RolloutBaseline
from train import train_model

from utils import create_data_on_disk, get_cur_time

# Params of model
SAMPLES = 1280000 # 1024*1250
BATCH = 1024
START_EPOCH = 0
END_EPOCH = 100
SKIP_WARMUP = False
embedding_dim = 128
LEARNING_RATE = 0.0001
ROLLOUT_SAMPLES = 10000
NUMBER_OF_WP_EPOCHS = 1
GRAD_NORM_CLIPPING = 1.0
BATCH_VERBOSE = 625
VAL_BATCH_SIZE = 1000
VALIDATE_SET_SIZE = 10000
SEED = 1234
GRAPH_SIZE = 20
FILENAME = 'VRP_{}_{}'.format(GRAPH_SIZE, strftime("%Y-%m-%d", gmtime()))

# Initialize model
model_tf = AttentionDynamicModel(embedding_dim)
set_decode_type(model_tf, "sampling")
print(get_cur_time(), 'model initialized')

# Create and save validation dataset
validation_dataset = create_data_on_disk(GRAPH_SIZE,
                                         VALIDATE_SET_SIZE,
                                         is_save=True,
                                         filename=FILENAME,
                                         is_return=True,
                                         seed = SEED)
print(get_cur_time(), 'validation dataset created and saved on the disk')

# Initialize optimizer
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)

# Initialize baseline
baseline = RolloutBaseline(model_tf,
                           wp_n_epochs = NUMBER_OF_WP_EPOCHS,
                           epoch = 0,
                           num_samples=ROLLOUT_SAMPLES)
print(get_cur_time(), 'baseline initialized')

train_model(optimizer,
            model_tf,
            baseline,
            validation_dataset,
            samples = SAMPLES,
            batch = BATCH,
            val_batch_size = VAL_BATCH_SIZE,
            start_epoch = START_EPOCH,
            end_epoch = END_EPOCH,
            skip_warmup = SKIP_WARMUP,
            grad_norm_clipping = GRAD_NORM_CLIPPING,
            batch_verbose = BATCH_VERBOSE,
            graph_size = GRAPH_SIZE,
            filename = FILENAME
            )

2020-06-06 18:56:19 model initialized
2020-06-06 18:56:23 validation dataset created and saved on the disk


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 0)


Rollout greedy execution: 10it [00:13,  1.37s/it]


2020-06-06 18:56:40 baseline initialized
Current decode type: sampling


batch calculation at epoch 0: 0it [00:00, ?it/s]

Instructions for updating:
Use tf.identity instead.


batch calculation at epoch 0: 1it [00:01,  1.84s/it]

grad_global_norm = 6.101315498352051, clipped_norm = 1.0
Epoch 0 (batch = 0): Loss: -1.0620197057724: Cost: 13.103897094726562


batch calculation at epoch 0: 626it [11:39,  1.12s/it]

grad_global_norm = 3.1216225624084473, clipped_norm = 1.0
Epoch 0 (batch = 625): Loss: 0.4472411572933197: Cost: 8.89040470123291


batch calculation at epoch 0: 1250it [24:49,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 0)


Rollout greedy execution: 10it [00:08,  1.22it/s]


Epoch 0 candidate mean 7.248440742492676, baseline epoch 0 mean 16.998558044433594, difference -9.750117301940918
p-value: 0.0
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 0)


Rollout greedy execution: 10it [00:08,  1.21it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

alpha was updated to 1.0


Rollout greedy execution: 10it [00:08,  1.18it/s]


Validation score: 7.257999897003174
2020-06-06 19:28:55 Epoch 0: Loss: 0.08840161561965942: Cost: 8.216059684753418


Rollout greedy execution: 625it [10:55,  1.05s/it]
batch calculation at epoch 1: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 1: 1it [00:01,  1.45s/it]

grad_global_norm = 2.4080729484558105, clipped_norm = 1.0
Epoch 1 (batch = 0): Loss: -0.988128125667572: Cost: 7.301421165466309


batch calculation at epoch 1: 626it [13:18,  1.36s/it]

grad_global_norm = 3.6048011779785156, clipped_norm = 1.0
Epoch 1 (batch = 625): Loss: 0.4816959798336029: Cost: 7.125861167907715


batch calculation at epoch 1: 1250it [26:32,  1.27s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 1)


Rollout greedy execution: 10it [00:06,  1.49it/s]


Epoch 1 candidate mean 6.809178829193115, baseline epoch 1 mean 7.249057769775391, difference -0.4398789405822754
p-value: 0.0
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 1)


Rollout greedy execution: 10it [00:06,  1.51it/s]
Rollout greedy execution: 10it [00:06,  1.52it/s]


Validation score: 6.825699806213379
2020-06-06 20:13:32 Epoch 1: Loss: 1.0246026515960693: Cost: 7.038084983825684


Rollout greedy execution: 625it [08:18,  1.25it/s]


Current decode type: sampling


batch calculation at epoch 2: 1it [00:01,  1.30s/it]

grad_global_norm = 3.156578540802002, clipped_norm = 1.0
Epoch 2 (batch = 0): Loss: -0.46159473061561584: Cost: 6.8919172286987305


batch calculation at epoch 2: 626it [13:17,  1.22s/it]

grad_global_norm = 4.625257968902588, clipped_norm = 1.0
Epoch 2 (batch = 625): Loss: -0.37274548411369324: Cost: 6.8406267166137695


batch calculation at epoch 2: 1250it [26:10,  1.26s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 2)


Rollout greedy execution: 10it [00:06,  1.57it/s]


Epoch 2 candidate mean 6.7196455001831055, baseline epoch 2 mean 6.824184417724609, difference -0.1045389175415039
p-value: 3.2346662561517107e-166
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 2)


Rollout greedy execution: 10it [00:06,  1.57it/s]
Rollout greedy execution: 10it [00:06,  1.53it/s]


Validation score: 6.710599899291992
2020-06-06 20:55:12 Epoch 2: Loss: -0.19463607668876648: Cost: 6.807832717895508


Rollout greedy execution: 625it [08:04,  1.29it/s]


Current decode type: sampling


batch calculation at epoch 3: 1it [00:01,  1.33s/it]

grad_global_norm = 2.4964756965637207, clipped_norm = 0.9999998807907104
Epoch 3 (batch = 0): Loss: -0.46266382932662964: Cost: 6.722323417663574


batch calculation at epoch 3: 626it [12:53,  1.41s/it]

grad_global_norm = 2.4689290523529053, clipped_norm = 0.9999999403953552
Epoch 3 (batch = 625): Loss: -0.31828388571739197: Cost: 6.735411643981934


batch calculation at epoch 3: 1250it [25:50,  1.24s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 3)


Rollout greedy execution: 10it [00:06,  1.56it/s]


Epoch 3 candidate mean 6.6667633056640625, baseline epoch 3 mean 6.7076921463012695, difference -0.04092884063720703
p-value: 4.258775951501058e-37
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 3)


Rollout greedy execution: 10it [00:06,  1.51it/s]
Rollout greedy execution: 10it [00:06,  1.55it/s]


Validation score: 6.673500061035156
2020-06-06 21:36:18 Epoch 3: Loss: -0.23287829756736755: Cost: 6.718391418457031


Rollout greedy execution: 625it [07:55,  1.31it/s]


Current decode type: sampling


batch calculation at epoch 4: 1it [00:01,  1.31s/it]

grad_global_norm = 4.370053768157959, clipped_norm = 1.0
Epoch 4 (batch = 0): Loss: -0.39580485224723816: Cost: 6.69390344619751


batch calculation at epoch 4: 626it [12:51,  1.20s/it]

grad_global_norm = 3.180819034576416, clipped_norm = 1.0
Epoch 4 (batch = 625): Loss: -0.2280619591474533: Cost: 6.678525447845459


batch calculation at epoch 4: 1250it [25:37,  1.23s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 4)


Rollout greedy execution: 10it [00:06,  1.61it/s]


Epoch 4 candidate mean 6.605867862701416, baseline epoch 4 mean 6.6478962898254395, difference -0.04202842712402344
p-value: 4.696704182424884e-40
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 4)


Rollout greedy execution: 10it [00:06,  1.54it/s]
Rollout greedy execution: 10it [00:06,  1.58it/s]


Validation score: 6.61899995803833
2020-06-06 22:16:57 Epoch 4: Loss: -0.17768430709838867: Cost: 6.666927337646484


Rollout greedy execution: 625it [07:56,  1.31it/s]


Current decode type: sampling


batch calculation at epoch 5: 1it [00:01,  1.32s/it]

grad_global_norm = 2.0111629962921143, clipped_norm = 1.0
Epoch 5 (batch = 0): Loss: -0.28872567415237427: Cost: 6.664510250091553


batch calculation at epoch 5: 626it [12:50,  1.23s/it]

grad_global_norm = 2.2043797969818115, clipped_norm = 1.0
Epoch 5 (batch = 625): Loss: -0.24930618703365326: Cost: 6.637479305267334


batch calculation at epoch 5: 1250it [25:55,  1.24s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 5)


Rollout greedy execution: 10it [00:06,  1.56it/s]


Epoch 5 candidate mean 6.57692813873291, baseline epoch 5 mean 6.616801738739014, difference -0.039873600006103516
p-value: 2.0584062107477851e-44
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 5)


Rollout greedy execution: 10it [00:06,  1.56it/s]
Rollout greedy execution: 10it [00:06,  1.58it/s]


Validation score: 6.578400135040283
2020-06-06 22:57:54 Epoch 5: Loss: -0.20873349905014038: Cost: 6.628334999084473


Rollout greedy execution: 625it [07:49,  1.33it/s]


Current decode type: sampling


batch calculation at epoch 6: 1it [00:01,  1.24s/it]

grad_global_norm = 2.9554169178009033, clipped_norm = 0.9999999403953552
Epoch 6 (batch = 0): Loss: -0.1993733048439026: Cost: 6.595789432525635


batch calculation at epoch 6: 626it [13:12,  1.24s/it]

grad_global_norm = 2.5358152389526367, clipped_norm = 1.0
Epoch 6 (batch = 625): Loss: -0.27624595165252686: Cost: 6.604986667633057


batch calculation at epoch 6: 1250it [25:59,  1.25s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 6)


Rollout greedy execution: 10it [00:06,  1.56it/s]


Epoch 6 candidate mean 6.54651403427124, baseline epoch 6 mean 6.567870140075684, difference -0.02135610580444336
p-value: 5.796463876568518e-14
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 6)


Rollout greedy execution: 10it [00:06,  1.51it/s]
Rollout greedy execution: 10it [00:06,  1.46it/s]


Validation score: 6.558899879455566
2020-06-06 23:38:49 Epoch 6: Loss: -0.2433619648218155: Cost: 6.598049163818359


Rollout greedy execution: 625it [08:17,  1.26it/s]


Current decode type: sampling


batch calculation at epoch 7: 1it [00:01,  1.33s/it]

grad_global_norm = 2.5111899375915527, clipped_norm = 1.0
Epoch 7 (batch = 0): Loss: -0.19373773038387299: Cost: 6.55863094329834


batch calculation at epoch 7: 626it [13:04,  1.27s/it]

grad_global_norm = 2.5475823879241943, clipped_norm = 1.0
Epoch 7 (batch = 625): Loss: -0.2435956746339798: Cost: 6.5820794105529785


batch calculation at epoch 7: 1250it [26:01,  1.25s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 7)


Rollout greedy execution: 10it [00:06,  1.53it/s]


Epoch 7 candidate mean 6.542513847351074, baseline epoch 7 mean 6.56281852722168, difference -0.02030467987060547
p-value: 3.187379661421116e-14
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 7)


Rollout greedy execution: 10it [00:06,  1.55it/s]
Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.541999816894531
2020-06-07 00:20:17 Epoch 7: Loss: -0.22593483328819275: Cost: 6.577094554901123


Rollout greedy execution: 625it [07:48,  1.33it/s]


Current decode type: sampling


batch calculation at epoch 8: 1it [00:01,  1.24s/it]

grad_global_norm = 2.8374099731445312, clipped_norm = 1.0
Epoch 8 (batch = 0): Loss: -0.20241189002990723: Cost: 6.536622047424316


batch calculation at epoch 8: 626it [12:46,  1.14s/it]

grad_global_norm = 2.306706428527832, clipped_norm = 0.9999999403953552
Epoch 8 (batch = 625): Loss: -0.22480271756649017: Cost: 6.559641361236572


batch calculation at epoch 8: 1250it [25:37,  1.23s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 8)


Rollout greedy execution: 10it [00:06,  1.56it/s]


Epoch 8 candidate mean 6.519737720489502, baseline epoch 8 mean 6.5260796546936035, difference -0.0063419342041015625
p-value: 0.011516347272343666
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 8)


Rollout greedy execution: 10it [00:06,  1.56it/s]
Rollout greedy execution: 10it [00:06,  1.58it/s]


Validation score: 6.537399768829346
2020-06-07 01:00:50 Epoch 8: Loss: -0.2126869410276413: Cost: 6.557659149169922


Rollout greedy execution: 625it [07:49,  1.33it/s]


Current decode type: sampling


batch calculation at epoch 9: 1it [00:01,  1.34s/it]

grad_global_norm = 2.3453104496002197, clipped_norm = 0.9999999403953552
Epoch 9 (batch = 0): Loss: -0.15008684992790222: Cost: 6.5312604904174805


batch calculation at epoch 9: 626it [12:53,  1.27s/it]

grad_global_norm = 3.31536865234375, clipped_norm = 1.0
Epoch 9 (batch = 625): Loss: -0.205136239528656: Cost: 6.549859523773193


batch calculation at epoch 9: 1250it [25:36,  1.23s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 9)


Rollout greedy execution: 10it [00:06,  1.54it/s]


Epoch 9 candidate mean 6.49125862121582, baseline epoch 9 mean 6.508611679077148, difference -0.017353057861328125
p-value: 5.563190108018142e-11
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 9)


Rollout greedy execution: 10it [00:06,  1.50it/s]
Rollout greedy execution: 10it [00:06,  1.55it/s]


Validation score: 6.520599842071533
2020-06-07 01:41:24 Epoch 9: Loss: -0.19277046620845795: Cost: 6.545834541320801


Rollout greedy execution: 625it [08:02,  1.30it/s]


Current decode type: sampling


batch calculation at epoch 10: 1it [00:01,  1.34s/it]

grad_global_norm = 2.2581279277801514, clipped_norm = 1.0
Epoch 10 (batch = 0): Loss: -0.21796736121177673: Cost: 6.501315593719482


batch calculation at epoch 10: 626it [12:57,  1.22s/it]

grad_global_norm = 2.201230049133301, clipped_norm = 0.9999999403953552
Epoch 10 (batch = 625): Loss: -0.2015887051820755: Cost: 6.536997318267822


batch calculation at epoch 10: 1250it [25:36,  1.23s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 10)


Rollout greedy execution: 10it [00:06,  1.54it/s]


Epoch 10 candidate mean 6.494130611419678, baseline epoch 10 mean 6.505194664001465, difference -0.01106405258178711
p-value: 1.7513458050893242e-05
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 10)


Rollout greedy execution: 10it [00:06,  1.56it/s]
Rollout greedy execution: 10it [00:06,  1.56it/s]


Validation score: 6.507400035858154
2020-06-07 02:22:09 Epoch 10: Loss: -0.19200335443019867: Cost: 6.534309387207031


Rollout greedy execution: 625it [07:57,  1.31it/s]


Current decode type: sampling


batch calculation at epoch 11: 1it [00:01,  1.21s/it]

grad_global_norm = 2.6336774826049805, clipped_norm = 1.0
Epoch 11 (batch = 0): Loss: -0.18373234570026398: Cost: 6.49666166305542


batch calculation at epoch 11: 626it [12:50,  1.21s/it]

grad_global_norm = 2.2778820991516113, clipped_norm = 0.9999999403953552
Epoch 11 (batch = 625): Loss: -0.20478226244449615: Cost: 6.52440881729126


batch calculation at epoch 11: 1250it [25:35,  1.23s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 11)


Rollout greedy execution: 10it [00:06,  1.59it/s]


Epoch 11 candidate mean 6.485752105712891, baseline epoch 11 mean 6.495623588562012, difference -0.009871482849121094
p-value: 2.3992673995432067e-05
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 11)


Rollout greedy execution: 10it [00:06,  1.60it/s]
Rollout greedy execution: 10it [00:06,  1.62it/s]


Validation score: 6.49560022354126
2020-06-07 03:02:47 Epoch 11: Loss: -0.19589664041996002: Cost: 6.522273540496826


Rollout greedy execution: 625it [07:51,  1.33it/s]


Current decode type: sampling


batch calculation at epoch 12: 1it [00:01,  1.26s/it]

grad_global_norm = 2.8880045413970947, clipped_norm = 0.9999999403953552
Epoch 12 (batch = 0): Loss: -0.19616958498954773: Cost: 6.5063252449035645


batch calculation at epoch 12: 626it [12:41,  1.22s/it]

grad_global_norm = 1.9390681982040405, clipped_norm = 0.9999999403953552
Epoch 12 (batch = 625): Loss: -0.20292453467845917: Cost: 6.516811847686768


batch calculation at epoch 12: 1250it [25:18,  1.21s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 12)


Rollout greedy execution: 10it [00:06,  1.55it/s]


Epoch 12 candidate mean 6.496525764465332, baseline epoch 12 mean 6.501457214355469, difference -0.004931449890136719
p-value: 0.016261618157257327
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 12)


Rollout greedy execution: 10it [00:06,  1.51it/s]
Rollout greedy execution: 10it [00:06,  1.53it/s]


Validation score: 6.490900039672852
2020-06-07 03:43:06 Epoch 12: Loss: -0.19394943118095398: Cost: 6.512768268585205


Rollout greedy execution: 625it [07:53,  1.32it/s]


Current decode type: sampling


batch calculation at epoch 13: 1it [00:01,  1.25s/it]

grad_global_norm = 2.2972331047058105, clipped_norm = 0.9999999403953552
Epoch 13 (batch = 0): Loss: -0.15464957058429718: Cost: 6.50694465637207


batch calculation at epoch 13: 626it [13:05,  1.23s/it]

grad_global_norm = 2.5945076942443848, clipped_norm = 0.9999999403953552
Epoch 13 (batch = 625): Loss: -0.18399067223072052: Cost: 6.506681442260742


batch calculation at epoch 13: 1250it [25:43,  1.23s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 13)


Rollout greedy execution: 10it [00:06,  1.54it/s]


Epoch 13 candidate mean 6.4705095291137695, baseline epoch 13 mean 6.478974342346191, difference -0.008464813232421875
p-value: 0.0001730673417193323
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 13)


Rollout greedy execution: 10it [00:06,  1.55it/s]
Rollout greedy execution: 10it [00:06,  1.57it/s]


Validation score: 6.484799861907959
2020-06-07 04:23:55 Epoch 13: Loss: -0.17815600335597992: Cost: 6.5049638748168945


Rollout greedy execution: 625it [08:03,  1.29it/s]


Current decode type: sampling


batch calculation at epoch 14: 1it [00:01,  1.31s/it]

grad_global_norm = 2.0985209941864014, clipped_norm = 0.9999999403953552
Epoch 14 (batch = 0): Loss: -0.14862298965454102: Cost: 6.5048828125


batch calculation at epoch 14: 626it [12:34,  1.24s/it]

grad_global_norm = 2.0279595851898193, clipped_norm = 1.0000001192092896
Epoch 14 (batch = 625): Loss: -0.1771789789199829: Cost: 6.496262550354004


batch calculation at epoch 14: 1250it [25:16,  1.21s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 14)


Rollout greedy execution: 10it [00:06,  1.52it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 14 candidate mean 6.477578163146973, baseline epoch 14 mean 6.474856853485107, difference 0.0027213096618652344


Rollout greedy execution: 10it [00:06,  1.49it/s]


Validation score: 6.485899925231934
2020-06-07 05:04:13 Epoch 14: Loss: -0.17093615233898163: Cost: 6.49514627456665


Rollout greedy execution: 625it [08:10,  1.27it/s]


Current decode type: sampling


batch calculation at epoch 15: 1it [00:01,  1.21s/it]

grad_global_norm = 2.5103981494903564, clipped_norm = 0.9999999403953552
Epoch 15 (batch = 0): Loss: -0.1679939180612564: Cost: 6.475647449493408


batch calculation at epoch 15: 626it [12:41,  1.18s/it]

grad_global_norm = 2.150024175643921, clipped_norm = 1.0
Epoch 15 (batch = 625): Loss: -0.15677358210086823: Cost: 6.491876602172852


batch calculation at epoch 15: 1250it [25:14,  1.21s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 15)


Rollout greedy execution: 10it [00:06,  1.55it/s]


Epoch 15 candidate mean 6.46320915222168, baseline epoch 15 mean 6.474856853485107, difference -0.011647701263427734
p-value: 4.1572513061616516e-07
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 15)


Rollout greedy execution: 10it [00:06,  1.53it/s]
Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.476600170135498
2020-06-07 05:44:50 Epoch 15: Loss: -0.14783626794815063: Cost: 6.488997459411621


Rollout greedy execution: 625it [07:55,  1.32it/s]


Current decode type: sampling


batch calculation at epoch 16: 1it [00:01,  1.31s/it]

grad_global_norm = 2.1039817333221436, clipped_norm = 0.9999998807907104
Epoch 16 (batch = 0): Loss: -0.18498404324054718: Cost: 6.482642650604248


batch calculation at epoch 16: 626it [12:41,  1.23s/it]

grad_global_norm = 2.2416179180145264, clipped_norm = 0.9999999403953552
Epoch 16 (batch = 625): Loss: -0.16402141749858856: Cost: 6.486396312713623


batch calculation at epoch 16: 1250it [25:13,  1.21s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 16)


Rollout greedy execution: 10it [00:06,  1.61it/s]


Epoch 16 candidate mean 6.4686126708984375, baseline epoch 16 mean 6.475635051727295, difference -0.007022380828857422
p-value: 0.0014760763392450726
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 16)


Rollout greedy execution: 10it [00:06,  1.56it/s]
Rollout greedy execution: 10it [00:06,  1.57it/s]


Validation score: 6.466700077056885
2020-06-07 06:25:06 Epoch 16: Loss: -0.15529118478298187: Cost: 6.483885288238525


Rollout greedy execution: 625it [07:51,  1.33it/s]


Current decode type: sampling


batch calculation at epoch 17: 1it [00:01,  1.26s/it]

grad_global_norm = 2.1390695571899414, clipped_norm = 1.0
Epoch 17 (batch = 0): Loss: -0.08252696692943573: Cost: 6.495846271514893


batch calculation at epoch 17: 626it [12:30,  1.17s/it]

grad_global_norm = 2.428929567337036, clipped_norm = 1.0
Epoch 17 (batch = 625): Loss: -0.16158920526504517: Cost: 6.482570171356201


batch calculation at epoch 17: 1250it [25:04,  1.20s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 17)


Rollout greedy execution: 10it [00:06,  1.57it/s]


Epoch 17 candidate mean 6.4653120040893555, baseline epoch 17 mean 6.469914436340332, difference -0.0046024322509765625
p-value: 0.022923105734355256
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 17)


Rollout greedy execution: 10it [00:06,  1.52it/s]
Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.465099811553955
2020-06-07 07:05:11 Epoch 17: Loss: -0.15457500517368317: Cost: 6.479366779327393


Rollout greedy execution: 625it [07:53,  1.32it/s]


Current decode type: sampling


batch calculation at epoch 18: 1it [00:01,  1.36s/it]

grad_global_norm = 1.7366855144500732, clipped_norm = 1.0
Epoch 18 (batch = 0): Loss: -0.09071822464466095: Cost: 6.471042156219482


batch calculation at epoch 18: 626it [12:32,  1.26s/it]

grad_global_norm = 3.259749174118042, clipped_norm = 1.0
Epoch 18 (batch = 625): Loss: -0.15657329559326172: Cost: 6.47388219833374


batch calculation at epoch 18: 1250it [25:01,  1.20s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 18)


Rollout greedy execution: 10it [00:06,  1.53it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 18 candidate mean 6.447404861450195, baseline epoch 18 mean 6.4495158195495605, difference -0.0021109580993652344
p-value: 0.17543968158349865


Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.460599899291992
2020-06-07 07:45:04 Epoch 18: Loss: -0.15085469186306: Cost: 6.472021579742432


Rollout greedy execution: 625it [07:49,  1.33it/s]


Current decode type: sampling


batch calculation at epoch 19: 1it [00:01,  1.30s/it]

grad_global_norm = 2.2028136253356934, clipped_norm = 0.9999999403953552
Epoch 19 (batch = 0): Loss: -0.11168121546506882: Cost: 6.46147346496582


batch calculation at epoch 19: 626it [12:35,  1.17s/it]

grad_global_norm = 2.300544500350952, clipped_norm = 1.0
Epoch 19 (batch = 625): Loss: -0.14445678889751434: Cost: 6.470775127410889


batch calculation at epoch 19: 1250it [25:07,  1.21s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 19)


Rollout greedy execution: 10it [00:06,  1.59it/s]


Epoch 19 candidate mean 6.43676233291626, baseline epoch 19 mean 6.4495158195495605, difference -0.012753486633300781
p-value: 4.281182266896442e-08
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 19)


Rollout greedy execution: 10it [00:06,  1.61it/s]
Rollout greedy execution: 10it [00:06,  1.58it/s]


Validation score: 6.449900150299072
2020-06-07 08:25:08 Epoch 19: Loss: -0.13702167570590973: Cost: 6.467982292175293


Rollout greedy execution: 625it [07:42,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 20: 1it [00:01,  1.28s/it]

grad_global_norm = 2.0467915534973145, clipped_norm = 0.9999999403953552
Epoch 20 (batch = 0): Loss: -0.12158533930778503: Cost: 6.409080982208252


batch calculation at epoch 20: 626it [12:44,  1.21s/it]

grad_global_norm = 3.0554885864257812, clipped_norm = 1.0
Epoch 20 (batch = 625): Loss: -0.15290698409080505: Cost: 6.462553977966309


batch calculation at epoch 20: 1250it [25:22,  1.22s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 20)


Rollout greedy execution: 10it [00:06,  1.60it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 20 candidate mean 6.440332889556885, baseline epoch 20 mean 6.438410758972168, difference 0.0019221305847167969


Rollout greedy execution: 10it [00:06,  1.57it/s]


Validation score: 6.453400135040283
2020-06-07 09:05:13 Epoch 20: Loss: -0.1486552506685257: Cost: 6.461426258087158


Rollout greedy execution: 625it [07:45,  1.34it/s]


Current decode type: sampling


batch calculation at epoch 21: 1it [00:01,  1.25s/it]

grad_global_norm = 4.3570475578308105, clipped_norm = 1.0
Epoch 21 (batch = 0): Loss: -0.16392575204372406: Cost: 6.474236965179443


batch calculation at epoch 21: 626it [12:26,  1.20s/it]

grad_global_norm = 3.553069591522217, clipped_norm = 1.0
Epoch 21 (batch = 625): Loss: -0.13226917386054993: Cost: 6.455323219299316


batch calculation at epoch 21: 1250it [25:09,  1.21s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 21)


Rollout greedy execution: 10it [00:06,  1.64it/s]


Epoch 21 candidate mean 6.426199436187744, baseline epoch 21 mean 6.438410758972168, difference -0.012211322784423828
p-value: 4.4371873873165096e-08
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 21)


Rollout greedy execution: 10it [00:06,  1.58it/s]
Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.442200183868408
2020-06-07 09:45:17 Epoch 21: Loss: -0.13017532229423523: Cost: 6.455892562866211


Rollout greedy execution: 625it [07:39,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 22: 1it [00:01,  1.38s/it]

grad_global_norm = 2.8741490840911865, clipped_norm = 1.0
Epoch 22 (batch = 0): Loss: -0.13239552080631256: Cost: 6.480206489562988


batch calculation at epoch 22: 626it [12:28,  1.17s/it]

grad_global_norm = 2.3188822269439697, clipped_norm = 0.9999999403953552
Epoch 22 (batch = 625): Loss: -0.1432274580001831: Cost: 6.454173564910889


batch calculation at epoch 22: 1250it [25:12,  1.21s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 22)


Rollout greedy execution: 10it [00:06,  1.65it/s]


Epoch 22 candidate mean 6.427445411682129, baseline epoch 22 mean 6.435083866119385, difference -0.007638454437255859
p-value: 0.00020435759044892652
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 22)


Rollout greedy execution: 10it [00:06,  1.49it/s]
Rollout greedy execution: 10it [00:07,  1.42it/s]


Validation score: 6.4369001388549805
2020-06-07 10:25:16 Epoch 22: Loss: -0.13880714774131775: Cost: 6.451950550079346


Rollout greedy execution: 625it [07:43,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 23: 1it [00:01,  1.27s/it]

grad_global_norm = 1.7814891338348389, clipped_norm = 1.0
Epoch 23 (batch = 0): Loss: -0.09564480185508728: Cost: 6.469274997711182


batch calculation at epoch 23: 626it [12:31,  1.17s/it]

grad_global_norm = 2.239562511444092, clipped_norm = 1.0
Epoch 23 (batch = 625): Loss: -0.14611920714378357: Cost: 6.449062824249268


batch calculation at epoch 23: 1250it [24:48,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 23)


Rollout greedy execution: 10it [00:06,  1.54it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 23 candidate mean 6.416490077972412, baseline epoch 23 mean 6.417490005493164, difference -0.0009999275207519531
p-value: 0.31588005631760374


Rollout greedy execution: 10it [00:06,  1.54it/s]


Validation score: 6.437099933624268
2020-06-07 11:04:51 Epoch 23: Loss: -0.14095951616764069: Cost: 6.448420524597168


Rollout greedy execution: 625it [07:37,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 24: 1it [00:01,  1.18s/it]

grad_global_norm = 2.9091992378234863, clipped_norm = 1.0
Epoch 24 (batch = 0): Loss: -0.15228985249996185: Cost: 6.4305267333984375


batch calculation at epoch 24: 626it [12:28,  1.20s/it]

grad_global_norm = 1.9595445394515991, clipped_norm = 1.0
Epoch 24 (batch = 625): Loss: -0.13068315386772156: Cost: 6.445725440979004


batch calculation at epoch 24: 1250it [24:49,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 24)


Rollout greedy execution: 10it [00:06,  1.62it/s]


Epoch 24 candidate mean 6.413520336151123, baseline epoch 24 mean 6.417490005493164, difference -0.003969669342041016
p-value: 0.032687665758715706
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 24)


Rollout greedy execution: 10it [00:06,  1.59it/s]
Rollout greedy execution: 10it [00:06,  1.61it/s]


Validation score: 6.431700229644775
2020-06-07 11:44:21 Epoch 24: Loss: -0.12759855389595032: Cost: 6.444563388824463


Rollout greedy execution: 625it [07:41,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 25: 1it [00:01,  1.29s/it]

grad_global_norm = 1.840517520904541, clipped_norm = 0.9999999403953552
Epoch 25 (batch = 0): Loss: -0.08413013815879822: Cost: 6.479498863220215


batch calculation at epoch 25: 626it [12:22,  1.17s/it]

grad_global_norm = 2.4984893798828125, clipped_norm = 1.0
Epoch 25 (batch = 625): Loss: -0.13351313769817352: Cost: 6.439910411834717


batch calculation at epoch 25: 1250it [24:40,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 25)


Rollout greedy execution: 10it [00:06,  1.61it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 25 candidate mean 6.419683933258057, baseline epoch 25 mean 6.420102119445801, difference -0.0004181861877441406
p-value: 0.42329364549162346


Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.427700042724609
2020-06-07 12:23:43 Epoch 25: Loss: -0.13245736062526703: Cost: 6.440038204193115


Rollout greedy execution: 625it [07:39,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 26: 1it [00:01,  1.26s/it]

grad_global_norm = 2.2832224369049072, clipped_norm = 0.9999999403953552
Epoch 26 (batch = 0): Loss: -0.18418432772159576: Cost: 6.441226959228516


batch calculation at epoch 26: 626it [12:23,  1.16s/it]

grad_global_norm = 2.196354389190674, clipped_norm = 1.0000001192092896
Epoch 26 (batch = 625): Loss: -0.12975843250751495: Cost: 6.43968391418457


batch calculation at epoch 26: 1250it [24:48,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 26)


Rollout greedy execution: 10it [00:06,  1.58it/s]


Epoch 26 candidate mean 6.4164252281188965, baseline epoch 26 mean 6.420102119445801, difference -0.003676891326904297
p-value: 0.03507461533138866
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 26)


Rollout greedy execution: 10it [00:06,  1.57it/s]
Rollout greedy execution: 10it [00:06,  1.53it/s]


Validation score: 6.423399925231934
2020-06-07 13:03:17 Epoch 26: Loss: -0.12416618317365646: Cost: 6.436783313751221


Rollout greedy execution: 625it [07:39,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 27: 1it [00:01,  1.17s/it]

grad_global_norm = 1.6502678394317627, clipped_norm = 0.9999999403953552
Epoch 27 (batch = 0): Loss: -0.09998653829097748: Cost: 6.449325084686279


batch calculation at epoch 27: 626it [12:07,  1.24s/it]

grad_global_norm = 2.4822328090667725, clipped_norm = 1.0
Epoch 27 (batch = 625): Loss: -0.13719606399536133: Cost: 6.434462547302246


batch calculation at epoch 27: 1250it [24:10,  1.16s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 27)


Rollout greedy execution: 10it [00:06,  1.64it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 27 candidate mean 6.4155497550964355, baseline epoch 27 mean 6.414726734161377, difference 0.0008230209350585938


Rollout greedy execution: 10it [00:06,  1.61it/s]


Validation score: 6.424099922180176
2020-06-07 13:42:10 Epoch 27: Loss: -0.13419055938720703: Cost: 6.432717800140381


Rollout greedy execution: 625it [07:41,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 28: 1it [00:01,  1.16s/it]

grad_global_norm = 3.173778533935547, clipped_norm = 0.9999999403953552
Epoch 28 (batch = 0): Loss: -0.18681292235851288: Cost: 6.457817077636719


batch calculation at epoch 28: 626it [12:16,  1.17s/it]

grad_global_norm = 2.4756340980529785, clipped_norm = 1.0
Epoch 28 (batch = 625): Loss: -0.12846487760543823: Cost: 6.433662414550781


batch calculation at epoch 28: 1250it [24:30,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 28)


Rollout greedy execution: 10it [00:06,  1.66it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 28 candidate mean 6.414029121398926, baseline epoch 28 mean 6.414726734161377, difference -0.0006976127624511719
p-value: 0.37759311181047633


Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.418399810791016
2020-06-07 14:21:18 Epoch 28: Loss: -0.12527436017990112: Cost: 6.43305778503418


Rollout greedy execution: 625it [07:47,  1.34it/s]


Current decode type: sampling


batch calculation at epoch 29: 1it [00:01,  1.25s/it]

grad_global_norm = 2.05470871925354, clipped_norm = 0.9999999403953552
Epoch 29 (batch = 0): Loss: -0.14573122560977936: Cost: 6.4297075271606445


batch calculation at epoch 29: 626it [12:12,  1.18s/it]

grad_global_norm = 2.6125829219818115, clipped_norm = 1.0
Epoch 29 (batch = 625): Loss: -0.11944130063056946: Cost: 6.4287004470825195


batch calculation at epoch 29: 1250it [24:25,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 29)


Rollout greedy execution: 10it [00:06,  1.66it/s]


Epoch 29 candidate mean 6.404707908630371, baseline epoch 29 mean 6.414726734161377, difference -0.01001882553100586
p-value: 7.663770202049485e-07
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 29)


Rollout greedy execution: 10it [00:06,  1.61it/s]
Rollout greedy execution: 10it [00:06,  1.65it/s]


Validation score: 6.412199974060059
2020-06-07 15:00:34 Epoch 29: Loss: -0.11727771162986755: Cost: 6.427990436553955


Rollout greedy execution: 625it [07:41,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 30: 1it [00:01,  1.15s/it]

grad_global_norm = 2.635287046432495, clipped_norm = 0.9999999403953552
Epoch 30 (batch = 0): Loss: -0.10773057490587234: Cost: 6.404685020446777


batch calculation at epoch 30: 626it [12:03,  1.17s/it]

grad_global_norm = 2.4652557373046875, clipped_norm = 1.0000001192092896
Epoch 30 (batch = 625): Loss: -0.1404227614402771: Cost: 6.426533222198486


batch calculation at epoch 30: 1250it [24:16,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 30)


Rollout greedy execution: 10it [00:06,  1.55it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 30 candidate mean 6.39043664932251, baseline epoch 30 mean 6.388433456420898, difference 0.002003192901611328


Rollout greedy execution: 10it [00:06,  1.54it/s]


Validation score: 6.412300109863281
2020-06-07 15:39:29 Epoch 30: Loss: -0.13749365508556366: Cost: 6.425553321838379


Rollout greedy execution: 625it [07:39,  1.36it/s]
batch calculation at epoch 31: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 31: 1it [00:01,  1.22s/it]

grad_global_norm = 2.680408239364624, clipped_norm = 1.0
Epoch 31 (batch = 0): Loss: -0.1459835320711136: Cost: 6.440741539001465


batch calculation at epoch 31: 626it [12:09,  1.21s/it]

grad_global_norm = 1.9725888967514038, clipped_norm = 0.9999999403953552
Epoch 31 (batch = 625): Loss: -0.12751178443431854: Cost: 6.421316146850586


batch calculation at epoch 31: 1250it [24:20,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 31)


Rollout greedy execution: 10it [00:06,  1.62it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 31 candidate mean 6.392049789428711, baseline epoch 31 mean 6.388433456420898, difference 0.0036163330078125


Rollout greedy execution: 10it [00:06,  1.65it/s]


Validation score: 6.413899898529053
2020-06-07 16:18:30 Epoch 31: Loss: -0.1273772269487381: Cost: 6.422236919403076


Rollout greedy execution: 625it [07:39,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 32: 1it [00:01,  1.15s/it]

grad_global_norm = 2.0705513954162598, clipped_norm = 1.0
Epoch 32 (batch = 0): Loss: -0.0945088341832161: Cost: 6.41336727142334


batch calculation at epoch 32: 626it [12:03,  1.13s/it]

grad_global_norm = 2.3827803134918213, clipped_norm = 1.0
Epoch 32 (batch = 625): Loss: -0.1214158684015274: Cost: 6.419796943664551


batch calculation at epoch 32: 1250it [24:10,  1.16s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 32)


Rollout greedy execution: 10it [00:06,  1.57it/s]


Epoch 32 candidate mean 6.383491039276123, baseline epoch 32 mean 6.388433456420898, difference -0.004942417144775391
p-value: 0.007350476696930472
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 32)


Rollout greedy execution: 10it [00:06,  1.54it/s]
Rollout greedy execution: 10it [00:06,  1.58it/s]


Validation score: 6.406300067901611
2020-06-07 16:57:32 Epoch 32: Loss: -0.11781764775514603: Cost: 6.419310092926025


Rollout greedy execution: 625it [07:45,  1.34it/s]


Current decode type: sampling


batch calculation at epoch 33: 1it [00:01,  1.23s/it]

grad_global_norm = 2.5013926029205322, clipped_norm = 1.0
Epoch 33 (batch = 0): Loss: -0.11781299859285355: Cost: 6.4553399085998535


batch calculation at epoch 33: 626it [12:26,  1.14s/it]

grad_global_norm = 2.147800922393799, clipped_norm = 0.9999999403953552
Epoch 33 (batch = 625): Loss: -0.12394716590642929: Cost: 6.41755485534668


batch calculation at epoch 33: 1250it [24:39,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 33)


Rollout greedy execution: 10it [00:06,  1.59it/s]


Epoch 33 candidate mean 6.397324085235596, baseline epoch 33 mean 6.403085231781006, difference -0.005761146545410156
p-value: 0.0017234368733198514
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 33)


Rollout greedy execution: 10it [00:06,  1.57it/s]
Rollout greedy execution: 10it [00:06,  1.58it/s]


Validation score: 6.403600215911865
2020-06-07 17:37:17 Epoch 33: Loss: -0.12221813946962357: Cost: 6.417336940765381


Rollout greedy execution: 625it [07:43,  1.35it/s]
batch calculation at epoch 34: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 34: 1it [00:01,  1.40s/it]

grad_global_norm = 1.5764296054840088, clipped_norm = 1.0
Epoch 34 (batch = 0): Loss: -0.04731985554099083: Cost: 6.397120952606201


batch calculation at epoch 34: 626it [12:15,  1.14s/it]

grad_global_norm = 2.4131455421447754, clipped_norm = 1.0
Epoch 34 (batch = 625): Loss: -0.12429741024971008: Cost: 6.417285919189453


batch calculation at epoch 34: 1250it [24:27,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 34)


Rollout greedy execution: 10it [00:06,  1.55it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 34 candidate mean 6.400348663330078, baseline epoch 34 mean 6.4028449058532715, difference -0.0024962425231933594
p-value: 0.11303102660572042


Rollout greedy execution: 10it [00:06,  1.48it/s]


Validation score: 6.4070000648498535
2020-06-07 18:16:27 Epoch 34: Loss: -0.12249568104743958: Cost: 6.416392803192139


Rollout greedy execution: 625it [07:45,  1.34it/s]


Current decode type: sampling


batch calculation at epoch 35: 1it [00:01,  1.18s/it]

grad_global_norm = 2.3811209201812744, clipped_norm = 1.0
Epoch 35 (batch = 0): Loss: -0.11308111250400543: Cost: 6.394929885864258


batch calculation at epoch 35: 626it [12:05,  1.09s/it]

grad_global_norm = 1.9171760082244873, clipped_norm = 1.0000001192092896
Epoch 35 (batch = 625): Loss: -0.12090805172920227: Cost: 6.412716388702393


batch calculation at epoch 35: 1250it [24:12,  1.16s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 35)


Rollout greedy execution: 10it [00:06,  1.62it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 35 candidate mean 6.401368141174316, baseline epoch 35 mean 6.4028449058532715, difference -0.0014767646789550781
p-value: 0.24749914180912802


Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.409200191497803
2020-06-07 18:55:30 Epoch 35: Loss: -0.11565542966127396: Cost: 6.412134170532227


Rollout greedy execution: 625it [07:42,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 36: 1it [00:01,  1.22s/it]

grad_global_norm = 2.583327293395996, clipped_norm = 1.0
Epoch 36 (batch = 0): Loss: -0.1716848909854889: Cost: 6.426774978637695


batch calculation at epoch 36: 626it [12:19,  1.33s/it]

grad_global_norm = 2.6070122718811035, clipped_norm = 1.0
Epoch 36 (batch = 625): Loss: -0.11621896922588348: Cost: 6.41350793838501


batch calculation at epoch 36: 1250it [24:36,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 36)


Rollout greedy execution: 10it [00:06,  1.62it/s]


Epoch 36 candidate mean 6.39705753326416, baseline epoch 36 mean 6.4028449058532715, difference -0.005787372589111328
p-value: 0.002150828567798434
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 36)


Rollout greedy execution: 10it [00:06,  1.61it/s]
Rollout greedy execution: 10it [00:06,  1.56it/s]


Validation score: 6.402900218963623
2020-06-07 19:34:52 Epoch 36: Loss: -0.11238828301429749: Cost: 6.412064552307129


Rollout greedy execution: 625it [07:38,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 37: 1it [00:01,  1.16s/it]

grad_global_norm = 2.605635166168213, clipped_norm = 1.0
Epoch 37 (batch = 0): Loss: -0.09290126711130142: Cost: 6.402355194091797


batch calculation at epoch 37: 626it [12:12,  1.14s/it]

grad_global_norm = 2.3062503337860107, clipped_norm = 0.9999998807907104
Epoch 37 (batch = 625): Loss: -0.11633366346359253: Cost: 6.408663749694824


batch calculation at epoch 37: 1250it [24:17,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 37)


Rollout greedy execution: 10it [00:06,  1.59it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 37 candidate mean 6.390361785888672, baseline epoch 37 mean 6.392021656036377, difference -0.0016598701477050781
p-value: 0.19303506723424557


Rollout greedy execution: 10it [00:06,  1.63it/s]


Validation score: 6.396999835968018
2020-06-07 20:13:47 Epoch 37: Loss: -0.11638723313808441: Cost: 6.408874034881592


Rollout greedy execution: 625it [07:37,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 38: 1it [00:01,  1.25s/it]

grad_global_norm = 2.256009578704834, clipped_norm = 0.9999999403953552
Epoch 38 (batch = 0): Loss: -0.13187071681022644: Cost: 6.435203552246094


batch calculation at epoch 38: 626it [12:55,  1.13s/it]

grad_global_norm = 3.3893320560455322, clipped_norm = 0.9999999403953552
Epoch 38 (batch = 625): Loss: -0.10960131883621216: Cost: 6.40585470199585


batch calculation at epoch 38: 1250it [25:13,  1.21s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 38)


Rollout greedy execution: 10it [00:06,  1.66it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 38 candidate mean 6.400393962860107, baseline epoch 38 mean 6.392021656036377, difference 0.008372306823730469


Rollout greedy execution: 10it [00:06,  1.58it/s]


Validation score: 6.4054999351501465
2020-06-07 20:53:31 Epoch 38: Loss: -0.109725221991539: Cost: 6.4056782722473145


## Дообучение

In [1]:
import tensorflow as tf
from time import gmtime, strftime

from attention_dynamic_model import set_decode_type
from reinforce_baseline import RolloutBaseline
from train import train_model

from utils import get_cur_time
from reinforce_baseline import load_tf_model
from utils import read_from_pickle


SAMPLES = 1280000 # 1024*1250
BATCH = 1024
LEARNING_RATE = 0.0001
ROLLOUT_SAMPLES = 10000
NUMBER_OF_WP_EPOCHS = 1
GRAD_NORM_CLIPPING = 1.0
BATCH_VERBOSE = 625
VAL_BATCH_SIZE = 1000
VALIDATE_SET_SIZE = 10000
SEED = 1234
GRAPH_SIZE = 20
FILENAME = 'VRP_{}_{}'.format(GRAPH_SIZE, strftime("%Y-%m-%d", gmtime()))

START_EPOCH = 39
END_EPOCH = 100
SKIP_WARMUP = True
embedding_dim = 128
GRAPH_SIZE = 20
MODEL_PATH = 'model_checkpoint_epoch_38_VRP_20_2020-06-06.h5'
VAL_SET_PATH = 'Validation_dataset_VRP_20_2020-06-06.pkl'

# Initialize model
model_tf = load_tf_model(MODEL_PATH,
                         embedding_dim=embedding_dim,
                         graph_size=GRAPH_SIZE)
set_decode_type(model_tf, "sampling")
print(get_cur_time(), 'model loaded')

# Create and save validation dataset
validation_dataset = read_from_pickle(VAL_SET_PATH)
print(get_cur_time(), 'validation dataset loaded')

# Initialize optimizer
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)

# Initialize baseline
baseline = RolloutBaseline(model_tf,
                           wp_n_epochs = NUMBER_OF_WP_EPOCHS,
                           epoch = 0,
                           num_samples=ROLLOUT_SAMPLES)
print(get_cur_time(), 'baseline initialized')

train_model(optimizer,
            model_tf,
            baseline,
            validation_dataset,
            samples = SAMPLES,
            batch = BATCH,
            val_batch_size = VAL_BATCH_SIZE,
            start_epoch = START_EPOCH,
            end_epoch = END_EPOCH,
            skip_warmup = SKIP_WARMUP,
            grad_norm_clipping = GRAD_NORM_CLIPPING,
            batch_verbose = BATCH_VERBOSE,
            graph_size = GRAPH_SIZE,
            filename = FILENAME
            )

2020-06-07 22:25:12 model loaded
2020-06-07 22:25:26 validation dataset loaded


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 0)


Rollout greedy execution: 10it [00:06,  1.64it/s]


2020-06-07 22:25:36 baseline initialized


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:44,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 39: 0it [00:00, ?it/s]

Instructions for updating:
Use tf.identity instead.
grad_global_norm = 2.059997081756592, clipped_norm = 0.9999999403953552


batch calculation at epoch 39: 1it [00:01,  1.27s/it]

Epoch 39 (batch = 0): Loss: -0.07865846157073975: Cost: 6.372598648071289


batch calculation at epoch 39: 626it [12:32,  1.16s/it]

grad_global_norm = 2.487220287322998, clipped_norm = 1.0
Epoch 39 (batch = 625): Loss: -0.1007898673415184: Cost: 6.407418727874756


batch calculation at epoch 39: 1250it [24:50,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 39)


Rollout greedy execution: 10it [00:06,  1.63it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 39 candidate mean 6.40649938583374, baseline epoch 39 mean 6.397975444793701, difference 0.008523941040039062


Rollout greedy execution: 10it [00:05,  1.74it/s]


Validation score: 6.412399768829346
2020-06-07 23:05:04 Epoch 39: Loss: -0.09839483350515366: Cost: 6.4060516357421875


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:51,  1.32it/s]


Current decode type: sampling


batch calculation at epoch 40: 1it [00:01,  1.09s/it]

grad_global_norm = 2.2525532245635986, clipped_norm = 1.0
Epoch 40 (batch = 0): Loss: -0.0942109152674675: Cost: 6.433681011199951


batch calculation at epoch 40: 626it [12:28,  1.18s/it]

grad_global_norm = 2.01126766204834, clipped_norm = 1.0
Epoch 40 (batch = 625): Loss: -0.0963916927576065: Cost: 6.4054179191589355


batch calculation at epoch 40: 1250it [24:47,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 40)


Rollout greedy execution: 10it [00:06,  1.64it/s]


Epoch 40 candidate mean 6.389981269836426, baseline epoch 40 mean 6.397975444793701, difference -0.00799417495727539
p-value: 0.00011499733740728062
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 40)


Rollout greedy execution: 10it [00:05,  1.67it/s]
Rollout greedy execution: 10it [00:06,  1.65it/s]


Validation score: 6.396100044250488
2020-06-07 23:44:57 Epoch 40: Loss: -0.09455890953540802: Cost: 6.405794620513916


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:38,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 41: 1it [00:01,  1.31s/it]

grad_global_norm = 1.5324211120605469, clipped_norm = 0.9999999403953552
Epoch 41 (batch = 0): Loss: -0.0855383574962616: Cost: 6.371436595916748


batch calculation at epoch 41: 626it [12:44,  1.34s/it]

grad_global_norm = 2.196044921875, clipped_norm = 1.0
Epoch 41 (batch = 625): Loss: -0.10832128673791885: Cost: 6.401840686798096


batch calculation at epoch 41: 1250it [25:14,  1.21s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 41)


Rollout greedy execution: 10it [00:06,  1.64it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 41 candidate mean 6.386145114898682, baseline epoch 41 mean 6.385082244873047, difference 0.0010628700256347656


Rollout greedy execution: 10it [00:06,  1.64it/s]


Validation score: 6.39769983291626
2020-06-08 00:24:53 Epoch 41: Loss: -0.10742364823818207: Cost: 6.402708530426025


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:36,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 42: 1it [00:01,  1.16s/it]

grad_global_norm = 1.7162870168685913, clipped_norm = 1.0
Epoch 42 (batch = 0): Loss: -0.11160313338041306: Cost: 6.394236087799072


batch calculation at epoch 42: 626it [12:21,  1.10s/it]

grad_global_norm = 2.4405786991119385, clipped_norm = 1.0
Epoch 42 (batch = 625): Loss: -0.10226013511419296: Cost: 6.3990159034729


batch calculation at epoch 42: 1250it [24:39,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 42)


Rollout greedy execution: 10it [00:06,  1.66it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 42 candidate mean 6.3846282958984375, baseline epoch 42 mean 6.385082244873047, difference -0.000453948974609375
p-value: 0.40843463386717743


Rollout greedy execution: 10it [00:05,  1.68it/s]


Validation score: 6.390900135040283
2020-06-08 01:04:58 Epoch 42: Loss: -0.10004477202892303: Cost: 6.399020195007324


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:40,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 43: 1it [00:01,  1.18s/it]

grad_global_norm = 2.4872732162475586, clipped_norm = 0.9999999403953552
Epoch 43 (batch = 0): Loss: -0.09476501494646072: Cost: 6.415476322174072


batch calculation at epoch 43: 626it [12:21,  1.23s/it]

grad_global_norm = 2.380613327026367, clipped_norm = 1.0
Epoch 43 (batch = 625): Loss: -0.09853527694940567: Cost: 6.3967366218566895


batch calculation at epoch 43: 1250it [25:03,  1.20s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 43)


Rollout greedy execution: 10it [00:06,  1.50it/s]


Epoch 43 candidate mean 6.379424095153809, baseline epoch 43 mean 6.385082244873047, difference -0.005658149719238281
p-value: 0.00219402083490376
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 43)


Rollout greedy execution: 10it [00:06,  1.48it/s]
Rollout greedy execution: 10it [00:06,  1.64it/s]


Validation score: 6.39169979095459
2020-06-08 01:45:04 Epoch 43: Loss: -0.09589634835720062: Cost: 6.3978495597839355


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:34,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 44: 1it [00:01,  1.22s/it]

grad_global_norm = 2.63395094871521, clipped_norm = 1.0000001192092896
Epoch 44 (batch = 0): Loss: -0.07925314456224442: Cost: 6.410251617431641


batch calculation at epoch 44: 626it [12:22,  1.16s/it]

grad_global_norm = 2.8477048873901367, clipped_norm = 1.0
Epoch 44 (batch = 625): Loss: -0.10994474589824677: Cost: 6.398501873016357


batch calculation at epoch 44: 1250it [24:47,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 44)


Rollout greedy execution: 10it [00:06,  1.62it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 44 candidate mean 6.391948223114014, baseline epoch 44 mean 6.392017841339111, difference -6.961822509765625e-05
p-value: 0.4861840844602122


Rollout greedy execution: 10it [00:05,  1.68it/s]


Validation score: 6.389900207519531
2020-06-08 02:24:25 Epoch 44: Loss: -0.10796518623828888: Cost: 6.399214267730713


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:36,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 45: 1it [00:01,  1.29s/it]

grad_global_norm = 1.8819797039031982, clipped_norm = 1.0
Epoch 45 (batch = 0): Loss: -0.09284457564353943: Cost: 6.419193267822266


batch calculation at epoch 45: 626it [12:18,  1.18s/it]

grad_global_norm = 2.306710720062256, clipped_norm = 1.0
Epoch 45 (batch = 625): Loss: -0.0999874472618103: Cost: 6.3957037925720215


batch calculation at epoch 45: 1250it [24:43,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 45)


Rollout greedy execution: 10it [00:06,  1.61it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 45 candidate mean 6.393850803375244, baseline epoch 45 mean 6.392017841339111, difference 0.0018329620361328125


Rollout greedy execution: 10it [00:06,  1.66it/s]


Validation score: 6.392099857330322
2020-06-08 03:03:47 Epoch 45: Loss: -0.10015951842069626: Cost: 6.395529747009277


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:37,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 46: 1it [00:01,  1.25s/it]

grad_global_norm = 1.961334228515625, clipped_norm = 0.9999999403953552
Epoch 46 (batch = 0): Loss: -0.08222257345914841: Cost: 6.399576663970947


batch calculation at epoch 46: 626it [12:19,  1.21s/it]

grad_global_norm = 3.2646472454071045, clipped_norm = 1.0
Epoch 46 (batch = 625): Loss: -0.10579568147659302: Cost: 6.397339820861816


batch calculation at epoch 46: 1250it [24:48,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 46)


Rollout greedy execution: 10it [00:06,  1.63it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 46 candidate mean 6.389856815338135, baseline epoch 46 mean 6.392017841339111, difference -0.0021610260009765625
p-value: 0.14180876161903952


Rollout greedy execution: 10it [00:06,  1.63it/s]


Validation score: 6.388500213623047
2020-06-08 03:43:11 Epoch 46: Loss: -0.10228234529495239: Cost: 6.396805286407471


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:27,  1.40it/s]


Current decode type: sampling


batch calculation at epoch 47: 1it [00:01,  1.13s/it]

grad_global_norm = 3.4104394912719727, clipped_norm = 1.0
Epoch 47 (batch = 0): Loss: -0.07825085520744324: Cost: 6.39452600479126


batch calculation at epoch 47: 626it [12:08,  1.10s/it]

grad_global_norm = 2.382056951522827, clipped_norm = 1.0000001192092896
Epoch 47 (batch = 625): Loss: -0.09415193647146225: Cost: 6.394264221191406


batch calculation at epoch 47: 1250it [24:38,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 47)


Rollout greedy execution: 10it [00:06,  1.61it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 47 candidate mean 6.3909478187561035, baseline epoch 47 mean 6.392017841339111, difference -0.0010700225830078125
p-value: 0.3035371084112023


Rollout greedy execution: 10it [00:06,  1.61it/s]


Validation score: 6.3907999992370605
2020-06-08 04:22:18 Epoch 47: Loss: -0.09602636098861694: Cost: 6.395074367523193


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:30,  1.39it/s]


Current decode type: sampling


batch calculation at epoch 48: 1it [00:01,  1.19s/it]

grad_global_norm = 2.952479124069214, clipped_norm = 1.0
Epoch 48 (batch = 0): Loss: -0.09881236404180527: Cost: 6.385690212249756


batch calculation at epoch 48: 626it [12:22,  1.15s/it]

grad_global_norm = 2.1299796104431152, clipped_norm = 1.0
Epoch 48 (batch = 625): Loss: -0.09151004999876022: Cost: 6.392128944396973


batch calculation at epoch 48: 1250it [24:36,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 48)


Rollout greedy execution: 10it [00:06,  1.62it/s]


Epoch 48 candidate mean 6.384457111358643, baseline epoch 48 mean 6.392017841339111, difference -0.00756072998046875
p-value: 5.703645670630518e-05
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 48)


Rollout greedy execution: 10it [00:06,  1.66it/s]
Rollout greedy execution: 10it [00:06,  1.64it/s]


Validation score: 6.386499881744385
2020-06-08 05:01:31 Epoch 48: Loss: -0.0900188684463501: Cost: 6.392795085906982


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:40,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 49: 1it [00:01,  1.38s/it]

grad_global_norm = 2.6332919597625732, clipped_norm = 1.0
Epoch 49 (batch = 0): Loss: -0.06484883278608322: Cost: 6.3336005210876465


batch calculation at epoch 49: 626it [12:18,  1.36s/it]

grad_global_norm = 2.502432107925415, clipped_norm = 0.9999999403953552
Epoch 49 (batch = 625): Loss: -0.10315141081809998: Cost: 6.391613483428955


batch calculation at epoch 49: 1250it [24:46,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 49)


Rollout greedy execution: 10it [00:06,  1.63it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 49 candidate mean 6.363321781158447, baseline epoch 49 mean 6.3634185791015625, difference -9.679794311523438e-05
p-value: 0.48058143874086073


Rollout greedy execution: 10it [00:06,  1.61it/s]


Validation score: 6.384900093078613
2020-06-08 05:41:11 Epoch 49: Loss: -0.10404788702726364: Cost: 6.3909807205200195


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:41,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 50: 1it [00:01,  1.25s/it]

grad_global_norm = 2.8072540760040283, clipped_norm = 1.0
Epoch 50 (batch = 0): Loss: -0.09119956195354462: Cost: 6.3668928146362305


batch calculation at epoch 50: 626it [12:22,  1.20s/it]

grad_global_norm = 2.01336407661438, clipped_norm = 1.0
Epoch 50 (batch = 625): Loss: -0.0994873195886612: Cost: 6.390938758850098


batch calculation at epoch 50: 1250it [24:48,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 50)


Rollout greedy execution: 10it [00:06,  1.58it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 50 candidate mean 6.364395618438721, baseline epoch 50 mean 6.3634185791015625, difference 0.0009770393371582031


Rollout greedy execution: 10it [00:06,  1.66it/s]


Validation score: 6.384099960327148
2020-06-08 06:20:41 Epoch 50: Loss: -0.10039486736059189: Cost: 6.391443252563477


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:44,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 51: 1it [00:01,  1.11s/it]

grad_global_norm = 2.770284414291382, clipped_norm = 1.0
Epoch 51 (batch = 0): Loss: -0.07866792380809784: Cost: 6.407639980316162


batch calculation at epoch 51: 626it [12:19,  1.41s/it]

grad_global_norm = 2.038313150405884, clipped_norm = 1.0
Epoch 51 (batch = 625): Loss: -0.10071976482868195: Cost: 6.388184070587158


batch calculation at epoch 51: 1250it [24:57,  1.20s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 51)


Rollout greedy execution: 10it [00:05,  1.69it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 51 candidate mean 6.366724967956543, baseline epoch 51 mean 6.3634185791015625, difference 0.0033063888549804688


Rollout greedy execution: 10it [00:05,  1.71it/s]


Validation score: 6.391300201416016
2020-06-08 07:00:24 Epoch 51: Loss: -0.09760268032550812: Cost: 6.387351036071777


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:37,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 52: 1it [00:01,  1.12s/it]

grad_global_norm = 2.5777649879455566, clipped_norm = 1.0
Epoch 52 (batch = 0): Loss: -0.11196602880954742: Cost: 6.37679386138916


batch calculation at epoch 52: 626it [12:25,  1.17s/it]

grad_global_norm = 3.5175621509552, clipped_norm = 1.0
Epoch 52 (batch = 625): Loss: -0.10055495798587799: Cost: 6.388556003570557


batch calculation at epoch 52: 1250it [24:38,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 52)


Rollout greedy execution: 10it [00:05,  1.72it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 52 candidate mean 6.364485740661621, baseline epoch 52 mean 6.3634185791015625, difference 0.0010671615600585938


Rollout greedy execution: 10it [00:06,  1.65it/s]


Validation score: 6.388999938964844
2020-06-08 07:39:42 Epoch 52: Loss: -0.09820901602506638: Cost: 6.387506484985352


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:42,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 53: 1it [00:01,  1.20s/it]

grad_global_norm = 2.1316990852355957, clipped_norm = 0.9999999403953552
Epoch 53 (batch = 0): Loss: -0.09405030310153961: Cost: 6.3820414543151855


batch calculation at epoch 53: 626it [12:12,  1.15s/it]

grad_global_norm = 1.8902820348739624, clipped_norm = 0.9999999403953552
Epoch 53 (batch = 625): Loss: -0.09574245661497116: Cost: 6.3871259689331055


batch calculation at epoch 53: 1250it [24:28,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 53)


Rollout greedy execution: 10it [00:06,  1.64it/s]


Epoch 53 candidate mean 6.355228424072266, baseline epoch 53 mean 6.3634185791015625, difference -0.008190155029296875
p-value: 2.0392019181374124e-05
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 53)


Rollout greedy execution: 10it [00:06,  1.62it/s]
Rollout greedy execution: 10it [00:06,  1.64it/s]


Validation score: 6.37939977645874
2020-06-08 08:19:02 Epoch 53: Loss: -0.09600865840911865: Cost: 6.3864312171936035


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:46,  1.34it/s]


Current decode type: sampling


batch calculation at epoch 54: 1it [00:01,  1.19s/it]

grad_global_norm = 1.966252326965332, clipped_norm = 0.9999999403953552
Epoch 54 (batch = 0): Loss: -0.0655786469578743: Cost: 6.372417449951172


batch calculation at epoch 54: 626it [12:27,  1.16s/it]

grad_global_norm = 2.207977771759033, clipped_norm = 1.0
Epoch 54 (batch = 625): Loss: -0.09441984444856644: Cost: 6.386265754699707


batch calculation at epoch 54: 1250it [24:52,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 54)


Rollout greedy execution: 10it [00:06,  1.61it/s]


Epoch 54 candidate mean 6.359004020690918, baseline epoch 54 mean 6.362212657928467, difference -0.003208637237548828
p-value: 0.04962431120572094
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 54)


Rollout greedy execution: 10it [00:05,  1.68it/s]
Rollout greedy execution: 10it [00:05,  1.68it/s]


Validation score: 6.385799884796143
2020-06-08 08:59:04 Epoch 54: Loss: -0.09325049072504044: Cost: 6.385383129119873


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:34,  1.38it/s]


Current decode type: sampling


batch calculation at epoch 55: 1it [00:01,  1.19s/it]

grad_global_norm = 1.712986707687378, clipped_norm = 1.0
Epoch 55 (batch = 0): Loss: -0.06304999440908432: Cost: 6.411630630493164


batch calculation at epoch 55: 626it [12:30,  1.13s/it]

grad_global_norm = 2.162870407104492, clipped_norm = 1.0
Epoch 55 (batch = 625): Loss: -0.0829433798789978: Cost: 6.382692337036133


batch calculation at epoch 55: 1250it [24:44,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 55)


Rollout greedy execution: 10it [00:06,  1.56it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 55 candidate mean 6.363837718963623, baseline epoch 55 mean 6.360711574554443, difference 0.0031261444091796875


Rollout greedy execution: 10it [00:06,  1.66it/s]


Validation score: 6.381899833679199
2020-06-08 09:38:36 Epoch 55: Loss: -0.08389036357402802: Cost: 6.384189605712891


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:35,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 56: 1it [00:01,  1.22s/it]

grad_global_norm = 2.192235231399536, clipped_norm = 0.9999999403953552
Epoch 56 (batch = 0): Loss: -0.11475807428359985: Cost: 6.395216464996338


batch calculation at epoch 56: 626it [12:35,  1.20s/it]

grad_global_norm = 2.3905141353607178, clipped_norm = 1.0
Epoch 56 (batch = 625): Loss: -0.08472008258104324: Cost: 6.385876178741455


batch calculation at epoch 56: 1250it [24:43,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 56)


Rollout greedy execution: 10it [00:06,  1.64it/s]


Epoch 56 candidate mean 6.353456497192383, baseline epoch 56 mean 6.360711574554443, difference -0.007255077362060547
p-value: 4.576901936872116e-05
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 56)


Rollout greedy execution: 10it [00:06,  1.65it/s]
Rollout greedy execution: 10it [00:06,  1.62it/s]


Validation score: 6.376399993896484
2020-06-08 10:18:04 Epoch 56: Loss: -0.08405719697475433: Cost: 6.383686542510986


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:39,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 57: 1it [00:01,  1.29s/it]

grad_global_norm = 1.7186000347137451, clipped_norm = 1.0
Epoch 57 (batch = 0): Loss: -0.06257293373346329: Cost: 6.427516460418701


batch calculation at epoch 57: 626it [12:17,  1.18s/it]

grad_global_norm = 2.498929262161255, clipped_norm = 1.0
Epoch 57 (batch = 625): Loss: -0.09852327406406403: Cost: 6.3831963539123535


batch calculation at epoch 57: 1250it [24:48,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 57)


Rollout greedy execution: 10it [00:07,  1.42it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 57 candidate mean 6.382519245147705, baseline epoch 57 mean 6.375427722930908, difference 0.007091522216796875


Rollout greedy execution: 10it [00:05,  1.67it/s]


Validation score: 6.386000156402588
2020-06-08 10:57:33 Epoch 57: Loss: -0.0997629389166832: Cost: 6.38320255279541


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:36,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 58: 1it [00:01,  1.19s/it]

grad_global_norm = 1.9355108737945557, clipped_norm = 0.9999999403953552
Epoch 58 (batch = 0): Loss: -0.169880211353302: Cost: 6.4111328125


batch calculation at epoch 58: 626it [12:23,  1.14s/it]

grad_global_norm = 2.2194297313690186, clipped_norm = 1.0
Epoch 58 (batch = 625): Loss: -0.0953064113855362: Cost: 6.3810882568359375


batch calculation at epoch 58: 1250it [24:34,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 58)


Rollout greedy execution: 10it [00:06,  1.64it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 58 candidate mean 6.378044605255127, baseline epoch 58 mean 6.375427722930908, difference 0.00261688232421875


Rollout greedy execution: 10it [00:05,  1.67it/s]


Validation score: 6.376800060272217
2020-06-08 11:36:45 Epoch 58: Loss: -0.09553079307079315: Cost: 6.381612777709961


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:39,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 59: 1it [00:01,  1.20s/it]

grad_global_norm = 2.304781436920166, clipped_norm = 1.0
Epoch 59 (batch = 0): Loss: -0.11052589118480682: Cost: 6.378359317779541


batch calculation at epoch 59: 626it [12:04,  1.17s/it]

grad_global_norm = 2.808375120162964, clipped_norm = 1.0
Epoch 59 (batch = 625): Loss: -0.0915130227804184: Cost: 6.379581928253174


batch calculation at epoch 59: 1250it [24:13,  1.16s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 59)


Rollout greedy execution: 10it [00:06,  1.65it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 59 candidate mean 6.37575626373291, baseline epoch 59 mean 6.375427722930908, difference 0.0003285408020019531


Rollout greedy execution: 10it [00:05,  1.68it/s]


Validation score: 6.375699996948242
2020-06-08 12:15:38 Epoch 59: Loss: -0.0914168581366539: Cost: 6.380200386047363


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:38,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 60: 1it [00:01,  1.17s/it]

grad_global_norm = 2.552830219268799, clipped_norm = 0.9999999403953552
Epoch 60 (batch = 0): Loss: -0.10213819891214371: Cost: 6.371699810028076


batch calculation at epoch 60: 626it [12:17,  1.21s/it]

grad_global_norm = 2.412914276123047, clipped_norm = 1.0
Epoch 60 (batch = 625): Loss: -0.0982663631439209: Cost: 6.3805437088012695


batch calculation at epoch 60: 1250it [24:23,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 60)


Rollout greedy execution: 10it [00:06,  1.65it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 60 candidate mean 6.384068012237549, baseline epoch 60 mean 6.375427722930908, difference 0.008640289306640625


Rollout greedy execution: 10it [00:06,  1.66it/s]


Validation score: 6.38129997253418
2020-06-08 12:54:37 Epoch 60: Loss: -0.09368979185819626: Cost: 6.37984037399292


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:37,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 61: 1it [00:01,  1.19s/it]

grad_global_norm = 2.980952262878418, clipped_norm = 1.0
Epoch 61 (batch = 0): Loss: -0.1541183739900589: Cost: 6.421384334564209


batch calculation at epoch 61: 626it [12:17,  1.18s/it]

grad_global_norm = 2.131049871444702, clipped_norm = 1.0
Epoch 61 (batch = 625): Loss: -0.09228410571813583: Cost: 6.379991054534912


batch calculation at epoch 61: 1250it [24:29,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 61)


Rollout greedy execution: 10it [00:06,  1.66it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 61 candidate mean 6.375067234039307, baseline epoch 61 mean 6.375427722930908, difference -0.0003604888916015625
p-value: 0.42553742459869487


Rollout greedy execution: 10it [00:06,  1.48it/s]


Validation score: 6.376999855041504
2020-06-08 13:33:44 Epoch 61: Loss: -0.09122655540704727: Cost: 6.3796820640563965


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:36,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 62: 1it [00:01,  1.12s/it]

grad_global_norm = 3.232461929321289, clipped_norm = 1.0
Epoch 62 (batch = 0): Loss: -0.10113873332738876: Cost: 6.355626106262207


batch calculation at epoch 62: 626it [12:16,  1.20s/it]

grad_global_norm = 2.657528877258301, clipped_norm = 1.0
Epoch 62 (batch = 625): Loss: -0.0850859209895134: Cost: 6.379701137542725


batch calculation at epoch 62: 1250it [24:25,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 62)


Rollout greedy execution: 10it [00:05,  1.70it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 62 candidate mean 6.382635593414307, baseline epoch 62 mean 6.375427722930908, difference 0.0072078704833984375


Rollout greedy execution: 10it [00:05,  1.72it/s]


Validation score: 6.382199764251709
2020-06-08 14:13:43 Epoch 62: Loss: -0.08519050478935242: Cost: 6.37868595123291


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:41,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 63: 1it [00:01,  1.16s/it]

grad_global_norm = 2.065389633178711, clipped_norm = 1.0
Epoch 63 (batch = 0): Loss: -0.07898905873298645: Cost: 6.418043613433838


batch calculation at epoch 63: 626it [12:33,  1.21s/it]

grad_global_norm = 2.8091306686401367, clipped_norm = 0.9999999403953552
Epoch 63 (batch = 625): Loss: -0.08560287207365036: Cost: 6.379286289215088


batch calculation at epoch 63: 1250it [25:09,  1.21s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 63)


Rollout greedy execution: 10it [00:06,  1.50it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 63 candidate mean 6.3729071617126465, baseline epoch 63 mean 6.375427722930908, difference -0.0025205612182617188
p-value: 0.08825919737501542


Rollout greedy execution: 10it [00:06,  1.54it/s]


Validation score: 6.36959981918335
2020-06-08 14:53:33 Epoch 63: Loss: -0.08517199009656906: Cost: 6.378964424133301


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:44,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 64: 1it [00:01,  1.31s/it]

grad_global_norm = 1.856056809425354, clipped_norm = 1.0
Epoch 64 (batch = 0): Loss: -0.0766516923904419: Cost: 6.377779483795166


batch calculation at epoch 64: 626it [12:14,  1.18s/it]

grad_global_norm = 1.8723658323287964, clipped_norm = 1.0
Epoch 64 (batch = 625): Loss: -0.08513404428958893: Cost: 6.3769989013671875


batch calculation at epoch 64: 1250it [24:45,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 64)


Rollout greedy execution: 10it [00:05,  1.72it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 64 candidate mean 6.376789569854736, baseline epoch 64 mean 6.375427722930908, difference 0.001361846923828125


Rollout greedy execution: 10it [00:05,  1.69it/s]


Validation score: 6.374000072479248
2020-06-08 15:33:55 Epoch 64: Loss: -0.08205408602952957: Cost: 6.37615966796875


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:39,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 65: 1it [00:01,  1.20s/it]

grad_global_norm = 1.9775856733322144, clipped_norm = 0.9999999403953552
Epoch 65 (batch = 0): Loss: -0.06478305160999298: Cost: 6.356149673461914


batch calculation at epoch 65: 626it [12:11,  1.15s/it]

grad_global_norm = 3.4374561309814453, clipped_norm = 0.9999999403953552
Epoch 65 (batch = 625): Loss: -0.07939346134662628: Cost: 6.378324031829834


batch calculation at epoch 65: 1250it [24:27,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 65)


Rollout greedy execution: 10it [00:06,  1.64it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 65 candidate mean 6.3743181228637695, baseline epoch 65 mean 6.375427722930908, difference -0.0011096000671386719
p-value: 0.2894831462900507


Rollout greedy execution: 10it [00:06,  1.66it/s]


Validation score: 6.370299816131592
2020-06-08 16:13:01 Epoch 65: Loss: -0.07951574772596359: Cost: 6.376607418060303


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:41,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 66: 1it [00:01,  1.40s/it]

grad_global_norm = 2.015855550765991, clipped_norm = 0.9999999403953552
Epoch 66 (batch = 0): Loss: -0.044117387384176254: Cost: 6.366779327392578


batch calculation at epoch 66: 626it [12:15,  1.13s/it]

grad_global_norm = 1.9370372295379639, clipped_norm = 1.0
Epoch 66 (batch = 625): Loss: -0.07854682207107544: Cost: 6.375291347503662


batch calculation at epoch 66: 1250it [24:29,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 66)


Rollout greedy execution: 10it [00:06,  1.63it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 66 candidate mean 6.373249053955078, baseline epoch 66 mean 6.375427722930908, difference -0.002178668975830078
p-value: 0.13173810913883552


Rollout greedy execution: 10it [00:06,  1.62it/s]


Validation score: 6.377699851989746
2020-06-08 16:52:11 Epoch 66: Loss: -0.07780922949314117: Cost: 6.3746490478515625


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:46,  1.34it/s]


Current decode type: sampling


batch calculation at epoch 67: 1it [00:01,  1.26s/it]

grad_global_norm = 2.2194085121154785, clipped_norm = 0.9999999403953552
Epoch 67 (batch = 0): Loss: -0.1443765014410019: Cost: 6.3302130699157715


batch calculation at epoch 67: 626it [12:05,  1.17s/it]

grad_global_norm = 2.872657060623169, clipped_norm = 0.9999999403953552
Epoch 67 (batch = 625): Loss: -0.07849711924791336: Cost: 6.3756842613220215


batch calculation at epoch 67: 1250it [24:12,  1.16s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 67)


Rollout greedy execution: 10it [00:06,  1.63it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 67 candidate mean 6.376284122467041, baseline epoch 67 mean 6.375427722930908, difference 0.0008563995361328125


Rollout greedy execution: 10it [00:06,  1.62it/s]


Validation score: 6.3790998458862305
2020-06-08 17:31:10 Epoch 67: Loss: -0.07766927033662796: Cost: 6.374609470367432


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:44,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 68: 1it [00:01,  1.21s/it]

grad_global_norm = 2.4990217685699463, clipped_norm = 1.0
Epoch 68 (batch = 0): Loss: -0.10885477066040039: Cost: 6.400690078735352


batch calculation at epoch 68: 626it [12:21,  1.19s/it]

grad_global_norm = 1.8384804725646973, clipped_norm = 1.0
Epoch 68 (batch = 625): Loss: -0.07696808874607086: Cost: 6.3754096031188965


batch calculation at epoch 68: 1250it [24:34,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 68)


Rollout greedy execution: 10it [00:06,  1.63it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 68 candidate mean 6.373244285583496, baseline epoch 68 mean 6.375427722930908, difference -0.0021834373474121094
p-value: 0.129534678045339


Rollout greedy execution: 10it [00:06,  1.61it/s]


Validation score: 6.3744001388549805
2020-06-08 18:10:26 Epoch 68: Loss: -0.07485552877187729: Cost: 6.374153137207031


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:40,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 69: 1it [00:01,  1.37s/it]

grad_global_norm = 2.1728663444519043, clipped_norm = 0.9999999403953552
Epoch 69 (batch = 0): Loss: -0.08702346682548523: Cost: 6.377368450164795


batch calculation at epoch 69: 626it [12:23,  1.31s/it]

grad_global_norm = 1.975659728050232, clipped_norm = 0.9999998807907104
Epoch 69 (batch = 625): Loss: -0.07404755800962448: Cost: 6.375573635101318


batch calculation at epoch 69: 1250it [24:48,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 69)


Rollout greedy execution: 10it [00:06,  1.63it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 69 candidate mean 6.37637186050415, baseline epoch 69 mean 6.375427722930908, difference 0.0009441375732421875


Rollout greedy execution: 10it [00:06,  1.59it/s]


Validation score: 6.372900009155273
2020-06-08 18:49:54 Epoch 69: Loss: -0.07411931455135345: Cost: 6.375783443450928


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:49,  1.33it/s]


Current decode type: sampling


batch calculation at epoch 70: 1it [00:01,  1.26s/it]

grad_global_norm = 3.136547088623047, clipped_norm = 1.0
Epoch 70 (batch = 0): Loss: -0.10158820450305939: Cost: 6.402235507965088


batch calculation at epoch 70: 626it [12:09,  1.19s/it]

grad_global_norm = 3.0925352573394775, clipped_norm = 0.9999999403953552
Epoch 70 (batch = 625): Loss: -0.06925709545612335: Cost: 6.372158050537109


batch calculation at epoch 70: 1250it [24:22,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 70)


Rollout greedy execution: 10it [00:06,  1.64it/s]


Epoch 70 candidate mean 6.367822170257568, baseline epoch 70 mean 6.375427722930908, difference -0.007605552673339844
p-value: 7.030319572424188e-05
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 70)


Rollout greedy execution: 10it [00:06,  1.66it/s]
Rollout greedy execution: 10it [00:05,  1.67it/s]


Validation score: 6.373600006103516
2020-06-08 19:29:21 Epoch 70: Loss: -0.06925086677074432: Cost: 6.373531818389893


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:39,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 71: 1it [00:01,  1.35s/it]

grad_global_norm = 1.640734314918518, clipped_norm = 0.9999999403953552
Epoch 71 (batch = 0): Loss: -0.09829220920801163: Cost: 6.3588547706604


batch calculation at epoch 71: 626it [12:12,  1.20s/it]

grad_global_norm = 2.3564293384552, clipped_norm = 0.9999998807907104
Epoch 71 (batch = 625): Loss: -0.07805032283067703: Cost: 6.37141227722168


batch calculation at epoch 71: 1250it [24:31,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 71)


Rollout greedy execution: 10it [00:06,  1.62it/s]


Epoch 71 candidate mean 6.373069763183594, baseline epoch 71 mean 6.378337383270264, difference -0.005267620086669922
p-value: 0.0033847441202033215
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 71)


Rollout greedy execution: 10it [00:06,  1.60it/s]
Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.369800090789795
2020-06-08 20:08:43 Epoch 71: Loss: -0.07932543009519577: Cost: 6.371148109436035


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:45,  1.34it/s]


Current decode type: sampling


batch calculation at epoch 72: 1it [00:01,  1.28s/it]

grad_global_norm = 3.1043362617492676, clipped_norm = 0.9999999403953552
Epoch 72 (batch = 0): Loss: -0.11018284410238266: Cost: 6.3518500328063965


batch calculation at epoch 72: 626it [12:33,  1.15s/it]

grad_global_norm = 4.189205169677734, clipped_norm = 0.9999998807907104
Epoch 72 (batch = 625): Loss: -0.08249945193529129: Cost: 6.37182092666626


batch calculation at epoch 72: 1250it [25:02,  1.20s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 72)


Rollout greedy execution: 10it [00:06,  1.61it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 72 candidate mean 6.375268936157227, baseline epoch 72 mean 6.37719202041626, difference -0.0019230842590332031
p-value: 0.15375835197185717


Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.3684000968933105
2020-06-08 20:48:32 Epoch 72: Loss: -0.08161024004220963: Cost: 6.372358798980713


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:51,  1.32it/s]


Current decode type: sampling


batch calculation at epoch 73: 1it [00:01,  1.25s/it]

grad_global_norm = 1.8754960298538208, clipped_norm = 1.0
Epoch 73 (batch = 0): Loss: -0.06759592890739441: Cost: 6.373879909515381


batch calculation at epoch 73: 626it [12:18,  1.19s/it]

grad_global_norm = 2.41434383392334, clipped_norm = 1.0
Epoch 73 (batch = 625): Loss: -0.08036508411169052: Cost: 6.371408462524414


batch calculation at epoch 73: 1250it [24:35,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 73)


Rollout greedy execution: 10it [00:06,  1.65it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 73 candidate mean 6.379834175109863, baseline epoch 73 mean 6.37719202041626, difference 0.0026421546936035156


Rollout greedy execution: 10it [00:06,  1.63it/s]


Validation score: 6.374100208282471
2020-06-08 21:28:06 Epoch 73: Loss: -0.08041667193174362: Cost: 6.371006011962891


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:44,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 74: 1it [00:01,  1.26s/it]

grad_global_norm = 2.240046739578247, clipped_norm = 0.9999999403953552
Epoch 74 (batch = 0): Loss: -0.10542899370193481: Cost: 6.373358726501465


batch calculation at epoch 74: 626it [12:10,  1.14s/it]

grad_global_norm = 2.820735454559326, clipped_norm = 1.0
Epoch 74 (batch = 625): Loss: -0.07665016502141953: Cost: 6.370417594909668


batch calculation at epoch 74: 1250it [24:15,  1.16s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 74)


Rollout greedy execution: 10it [00:06,  1.63it/s]


Epoch 74 candidate mean 6.371955871582031, baseline epoch 74 mean 6.37719202041626, difference -0.005236148834228516
p-value: 0.0040192238116557935
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 74)


Rollout greedy execution: 10it [00:06,  1.58it/s]
Rollout greedy execution: 10it [00:06,  1.64it/s]


Validation score: 6.369699954986572
2020-06-08 22:07:17 Epoch 74: Loss: -0.0770394504070282: Cost: 6.3698296546936035


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:41,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 75: 1it [00:01,  1.26s/it]

grad_global_norm = 3.4990525245666504, clipped_norm = 1.0000001192092896
Epoch 75 (batch = 0): Loss: -0.05667279660701752: Cost: 6.337456703186035


batch calculation at epoch 75: 626it [12:19,  1.19s/it]

grad_global_norm = 2.688732862472534, clipped_norm = 0.9999999403953552
Epoch 75 (batch = 625): Loss: -0.07826049625873566: Cost: 6.367849349975586


batch calculation at epoch 75: 1250it [24:36,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 75)


Rollout greedy execution: 10it [00:06,  1.61it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 75 candidate mean 6.357200622558594, baseline epoch 75 mean 6.354146480560303, difference 0.0030541419982910156


Rollout greedy execution: 10it [00:06,  1.65it/s]


Validation score: 6.368199825286865
2020-06-08 22:46:33 Epoch 75: Loss: -0.07821471244096756: Cost: 6.368527889251709


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:41,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 76: 1it [00:01,  1.34s/it]

grad_global_norm = 2.5024518966674805, clipped_norm = 0.9999999403953552
Epoch 76 (batch = 0): Loss: -0.08642725646495819: Cost: 6.362343788146973


batch calculation at epoch 76: 626it [12:05,  1.21s/it]

grad_global_norm = 2.133680820465088, clipped_norm = 0.9999999403953552
Epoch 76 (batch = 625): Loss: -0.07657339423894882: Cost: 6.367867946624756


batch calculation at epoch 76: 1250it [24:17,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 76)


Rollout greedy execution: 10it [00:06,  1.61it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 76 candidate mean 6.3513641357421875, baseline epoch 76 mean 6.354146480560303, difference -0.0027823448181152344
p-value: 0.055215057934620365


Rollout greedy execution: 10it [00:06,  1.66it/s]


Validation score: 6.364099979400635
2020-06-08 23:25:51 Epoch 76: Loss: -0.07662206143140793: Cost: 6.368104934692383


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:37,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 77: 1it [00:01,  1.33s/it]

grad_global_norm = 2.798794984817505, clipped_norm = 1.0
Epoch 77 (batch = 0): Loss: -0.04136340692639351: Cost: 6.361260890960693


batch calculation at epoch 77: 626it [12:25,  1.19s/it]

grad_global_norm = 1.7867074012756348, clipped_norm = 1.0
Epoch 77 (batch = 625): Loss: -0.07371588051319122: Cost: 6.367967128753662


batch calculation at epoch 77: 1250it [24:38,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 77)


Rollout greedy execution: 10it [00:06,  1.62it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 77 candidate mean 6.351837635040283, baseline epoch 77 mean 6.354146480560303, difference -0.0023088455200195312
p-value: 0.11251490533854278


Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.363399982452393
2020-06-09 00:05:06 Epoch 77: Loss: -0.07520163059234619: Cost: 6.367952346801758


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:35,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 78: 1it [00:01,  1.11s/it]

grad_global_norm = 1.9663740396499634, clipped_norm = 1.0
Epoch 78 (batch = 0): Loss: -0.049261100590229034: Cost: 6.343899726867676


batch calculation at epoch 78: 626it [12:21,  1.14s/it]

grad_global_norm = 2.045539617538452, clipped_norm = 1.0
Epoch 78 (batch = 625): Loss: -0.07602621614933014: Cost: 6.369627952575684


batch calculation at epoch 78: 1250it [24:48,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 78)


Rollout greedy execution: 10it [00:06,  1.60it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 78 candidate mean 6.355442047119141, baseline epoch 78 mean 6.354146480560303, difference 0.0012955665588378906


Rollout greedy execution: 10it [00:06,  1.65it/s]


Validation score: 6.366700172424316
2020-06-09 00:44:27 Epoch 78: Loss: -0.07470033317804337: Cost: 6.368035793304443


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:42,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 79: 1it [00:01,  1.12s/it]

grad_global_norm = 1.5495688915252686, clipped_norm = 1.0
Epoch 79 (batch = 0): Loss: -0.061163973063230515: Cost: 6.378993511199951


batch calculation at epoch 79: 626it [12:33,  1.15s/it]

grad_global_norm = 1.9132221937179565, clipped_norm = 0.9999999403953552
Epoch 79 (batch = 625): Loss: -0.07740839570760727: Cost: 6.367415428161621


batch calculation at epoch 79: 1250it [24:54,  1.20s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 79)


Rollout greedy execution: 10it [00:06,  1.60it/s]


Epoch 79 candidate mean 6.3495259284973145, baseline epoch 79 mean 6.354146480560303, difference -0.004620552062988281
p-value: 0.006192845358044653
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 79)


Rollout greedy execution: 10it [00:06,  1.60it/s]
Rollout greedy execution: 10it [00:06,  1.63it/s]


Validation score: 6.360899925231934
2020-06-09 01:24:31 Epoch 79: Loss: -0.0743422880768776: Cost: 6.367095947265625


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:37,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 80: 1it [00:01,  1.21s/it]

grad_global_norm = 1.6071070432662964, clipped_norm = 0.9999999403953552
Epoch 80 (batch = 0): Loss: -0.02421681024134159: Cost: 6.335317611694336


batch calculation at epoch 80: 626it [12:10,  1.19s/it]

grad_global_norm = 2.3903727531433105, clipped_norm = 1.0
Epoch 80 (batch = 625): Loss: -0.08133898675441742: Cost: 6.364297866821289


batch calculation at epoch 80: 1250it [24:24,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 80)


Rollout greedy execution: 10it [00:06,  1.62it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 80 candidate mean 6.357085227966309, baseline epoch 80 mean 6.352630138397217, difference 0.004455089569091797


Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.365699768066406
2020-06-09 02:03:38 Epoch 80: Loss: -0.08537320047616959: Cost: 6.366317272186279


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:42,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 81: 1it [00:01,  1.19s/it]

grad_global_norm = 1.691197156906128, clipped_norm = 1.0
Epoch 81 (batch = 0): Loss: -0.08345770090818405: Cost: 6.421993255615234


batch calculation at epoch 81: 626it [12:18,  1.15s/it]

grad_global_norm = 2.592153310775757, clipped_norm = 0.9999999403953552
Epoch 81 (batch = 625): Loss: -0.08342209458351135: Cost: 6.364093780517578


batch calculation at epoch 81: 1250it [25:00,  1.20s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 81)


Rollout greedy execution: 10it [00:06,  1.63it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 81 candidate mean 6.3560686111450195, baseline epoch 81 mean 6.352630138397217, difference 0.0034384727478027344


Rollout greedy execution: 10it [00:06,  1.61it/s]


Validation score: 6.361199855804443
2020-06-09 02:43:38 Epoch 81: Loss: -0.08400576561689377: Cost: 6.365492343902588


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:39,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 82: 1it [00:01,  1.37s/it]

grad_global_norm = 2.0818090438842773, clipped_norm = 1.0
Epoch 82 (batch = 0): Loss: -0.08284011483192444: Cost: 6.417496204376221


batch calculation at epoch 82: 626it [12:12,  1.16s/it]

grad_global_norm = 3.9799692630767822, clipped_norm = 0.9999999403953552
Epoch 82 (batch = 625): Loss: -0.08133572340011597: Cost: 6.363348007202148


batch calculation at epoch 82: 1250it [24:33,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 82)


Rollout greedy execution: 10it [00:06,  1.65it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 82 candidate mean 6.355051517486572, baseline epoch 82 mean 6.352630138397217, difference 0.0024213790893554688


Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.3643999099731445
2020-06-09 03:23:43 Epoch 82: Loss: -0.08080681413412094: Cost: 6.3641276359558105


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:39,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 83: 1it [00:01,  1.20s/it]

grad_global_norm = 1.9079982042312622, clipped_norm = 0.9999999403953552
Epoch 83 (batch = 0): Loss: -0.06461766362190247: Cost: 6.395670413970947


batch calculation at epoch 83: 626it [12:06,  1.13s/it]

grad_global_norm = 2.655381917953491, clipped_norm = 0.9999999403953552
Epoch 83 (batch = 625): Loss: -0.08156601339578629: Cost: 6.367215633392334


batch calculation at epoch 83: 1250it [24:31,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 83)


Rollout greedy execution: 10it [00:06,  1.63it/s]


Epoch 83 candidate mean 6.348997592926025, baseline epoch 83 mean 6.352630138397217, difference -0.0036325454711914062
p-value: 0.019479344947044862
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 83)


Rollout greedy execution: 10it [00:06,  1.59it/s]
Rollout greedy execution: 10it [00:06,  1.64it/s]


Validation score: 6.357500076293945
2020-06-09 04:03:05 Epoch 83: Loss: -0.08122968673706055: Cost: 6.3659138679504395


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:40,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 84: 1it [00:01,  1.18s/it]

grad_global_norm = 1.5043472051620483, clipped_norm = 1.0
Epoch 84 (batch = 0): Loss: -0.05389440432190895: Cost: 6.3656005859375


batch calculation at epoch 84: 626it [12:31,  1.33s/it]

grad_global_norm = 1.9211091995239258, clipped_norm = 1.0
Epoch 84 (batch = 625): Loss: -0.08054764568805695: Cost: 6.362929344177246


batch calculation at epoch 84: 1250it [24:59,  1.20s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 84)


Rollout greedy execution: 10it [00:06,  1.51it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 84 candidate mean 6.349953651428223, baseline epoch 84 mean 6.347212314605713, difference 0.0027413368225097656


Rollout greedy execution: 10it [00:06,  1.67it/s]


Validation score: 6.360799789428711
2020-06-09 04:42:46 Epoch 84: Loss: -0.08183570206165314: Cost: 6.3624138832092285


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:41,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 85: 1it [00:01,  1.18s/it]

grad_global_norm = 3.269336223602295, clipped_norm = 1.0000001192092896
Epoch 85 (batch = 0): Loss: -0.07452109456062317: Cost: 6.363029956817627


batch calculation at epoch 85: 626it [12:14,  1.14s/it]

grad_global_norm = 2.7630248069763184, clipped_norm = 1.0
Epoch 85 (batch = 625): Loss: -0.08378433436155319: Cost: 6.365108966827393


batch calculation at epoch 85: 1250it [24:36,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 85)


Rollout greedy execution: 10it [00:06,  1.60it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 85 candidate mean 6.352426052093506, baseline epoch 85 mean 6.347212314605713, difference 0.005213737487792969


Rollout greedy execution: 10it [00:06,  1.58it/s]


Validation score: 6.364999771118164
2020-06-09 05:22:09 Epoch 85: Loss: -0.08306993544101715: Cost: 6.363558769226074


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:36,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 86: 1it [00:01,  1.28s/it]

grad_global_norm = 2.3312346935272217, clipped_norm = 1.0
Epoch 86 (batch = 0): Loss: -0.06909807026386261: Cost: 6.314234733581543


batch calculation at epoch 86: 626it [12:34,  1.20s/it]

grad_global_norm = 1.903921127319336, clipped_norm = 1.0
Epoch 86 (batch = 625): Loss: -0.0809926763176918: Cost: 6.362292289733887


batch calculation at epoch 86: 1250it [24:46,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 86)


Rollout greedy execution: 10it [00:06,  1.61it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 86 candidate mean 6.352526664733887, baseline epoch 86 mean 6.347212314605713, difference 0.005314350128173828


Rollout greedy execution: 10it [00:06,  1.66it/s]


Validation score: 6.36299991607666
2020-06-09 06:01:33 Epoch 86: Loss: -0.08132462948560715: Cost: 6.362649440765381


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:40,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 87: 1it [00:01,  1.32s/it]

grad_global_norm = 1.8890786170959473, clipped_norm = 0.9999999403953552
Epoch 87 (batch = 0): Loss: -0.02619612030684948: Cost: 6.354676246643066


batch calculation at epoch 87: 626it [12:15,  1.18s/it]

grad_global_norm = 1.936174750328064, clipped_norm = 1.0
Epoch 87 (batch = 625): Loss: -0.08368021249771118: Cost: 6.36364221572876


batch calculation at epoch 87: 1250it [24:29,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 87)


Rollout greedy execution: 10it [00:06,  1.60it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 87 candidate mean 6.3546013832092285, baseline epoch 87 mean 6.347212314605713, difference 0.007389068603515625


Rollout greedy execution: 10it [00:06,  1.59it/s]


Validation score: 6.3618998527526855
2020-06-09 06:40:44 Epoch 87: Loss: -0.0815991461277008: Cost: 6.363357067108154


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:43,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 88: 1it [00:01,  1.11s/it]

grad_global_norm = 1.5009206533432007, clipped_norm = 1.0
Epoch 88 (batch = 0): Loss: -0.06609312444925308: Cost: 6.383738994598389


batch calculation at epoch 88: 626it [12:20,  1.15s/it]

grad_global_norm = 1.829740047454834, clipped_norm = 1.0
Epoch 88 (batch = 625): Loss: -0.07820485532283783: Cost: 6.362295627593994


batch calculation at epoch 88: 1250it [24:52,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 88)


Rollout greedy execution: 10it [00:06,  1.66it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 88 candidate mean 6.348033905029297, baseline epoch 88 mean 6.347212314605713, difference 0.0008215904235839844


Rollout greedy execution: 10it [00:06,  1.63it/s]


Validation score: 6.364099979400635
2020-06-09 07:20:19 Epoch 88: Loss: -0.07792938500642776: Cost: 6.362472057342529


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:42,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 89: 1it [00:01,  1.24s/it]

grad_global_norm = 2.647186756134033, clipped_norm = 0.9999999403953552
Epoch 89 (batch = 0): Loss: -0.07281515002250671: Cost: 6.323904991149902


batch calculation at epoch 89: 626it [12:06,  1.23s/it]

grad_global_norm = 1.856276512145996, clipped_norm = 1.0000001192092896
Epoch 89 (batch = 625): Loss: -0.0791434571146965: Cost: 6.362205505371094


batch calculation at epoch 89: 1250it [24:11,  1.16s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 89)


Rollout greedy execution: 10it [00:06,  1.64it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 89 candidate mean 6.348977565765381, baseline epoch 89 mean 6.347212314605713, difference 0.0017652511596679688


Rollout greedy execution: 10it [00:06,  1.63it/s]


Validation score: 6.3618998527526855
2020-06-09 07:59:18 Epoch 89: Loss: -0.07803516834974289: Cost: 6.3624749183654785


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:46,  1.34it/s]


Current decode type: sampling


batch calculation at epoch 90: 1it [00:01,  1.19s/it]

grad_global_norm = 1.7795602083206177, clipped_norm = 1.0
Epoch 90 (batch = 0): Loss: -0.11496845632791519: Cost: 6.298953056335449


batch calculation at epoch 90: 626it [12:12,  1.15s/it]

grad_global_norm = 3.4177637100219727, clipped_norm = 1.0
Epoch 90 (batch = 625): Loss: -0.07421014457941055: Cost: 6.36220121383667


batch calculation at epoch 90: 1250it [24:13,  1.16s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 90)


Rollout greedy execution: 10it [00:06,  1.56it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 90 candidate mean 6.34523344039917, baseline epoch 90 mean 6.347212314605713, difference -0.0019788742065429688
p-value: 0.13304832750588635


Rollout greedy execution: 10it [00:06,  1.59it/s]


Validation score: 6.360300064086914
2020-06-09 08:38:18 Epoch 90: Loss: -0.07490497082471848: Cost: 6.36149787902832


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:37,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 91: 1it [00:01,  1.27s/it]

grad_global_norm = 1.7058078050613403, clipped_norm = 1.0
Epoch 91 (batch = 0): Loss: -0.057685818523168564: Cost: 6.334983825683594


batch calculation at epoch 91: 626it [12:19,  1.16s/it]

grad_global_norm = 2.250213146209717, clipped_norm = 0.9999998807907104
Epoch 91 (batch = 625): Loss: -0.07359813153743744: Cost: 6.358152389526367


batch calculation at epoch 91: 1250it [24:42,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 91)


Rollout greedy execution: 10it [00:06,  1.58it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 91 candidate mean 6.347635269165039, baseline epoch 91 mean 6.347212314605713, difference 0.0004229545593261719


Rollout greedy execution: 10it [00:06,  1.63it/s]


Validation score: 6.360099792480469
2020-06-09 09:17:44 Epoch 91: Loss: -0.07512515038251877: Cost: 6.3585591316223145


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:49,  1.33it/s]


Current decode type: sampling


batch calculation at epoch 92: 1it [00:01,  1.14s/it]

grad_global_norm = 2.636857271194458, clipped_norm = 1.0
Epoch 92 (batch = 0): Loss: -0.09843423962593079: Cost: 6.383664131164551


batch calculation at epoch 92: 626it [12:11,  1.15s/it]

grad_global_norm = 1.6619740724563599, clipped_norm = 1.0
Epoch 92 (batch = 625): Loss: -0.07213225960731506: Cost: 6.361408233642578


batch calculation at epoch 92: 1250it [24:18,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 92)


Rollout greedy execution: 10it [00:06,  1.60it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 92 candidate mean 6.352017402648926, baseline epoch 92 mean 6.347212314605713, difference 0.004805088043212891


Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.362599849700928
2020-06-09 09:57:06 Epoch 92: Loss: -0.07289262115955353: Cost: 6.359953880310059


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:38,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 93: 1it [00:01,  1.28s/it]

grad_global_norm = 2.2169880867004395, clipped_norm = 0.9999999403953552
Epoch 93 (batch = 0): Loss: -0.1301329880952835: Cost: 6.374539375305176


batch calculation at epoch 93: 626it [12:15,  1.14s/it]

grad_global_norm = 1.936481237411499, clipped_norm = 1.0
Epoch 93 (batch = 625): Loss: -0.0754600316286087: Cost: 6.359996318817139


batch calculation at epoch 93: 1250it [24:27,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 93)


Rollout greedy execution: 10it [00:06,  1.63it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 93 candidate mean 6.350292205810547, baseline epoch 93 mean 6.347212314605713, difference 0.0030798912048339844


Rollout greedy execution: 10it [00:06,  1.60it/s]


Validation score: 6.361400127410889
2020-06-09 10:36:11 Epoch 93: Loss: -0.0743667483329773: Cost: 6.359310626983643


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:43,  1.35it/s]


Current decode type: sampling


batch calculation at epoch 94: 1it [00:01,  1.27s/it]

grad_global_norm = 2.1230740547180176, clipped_norm = 1.0
Epoch 94 (batch = 0): Loss: -0.07255449891090393: Cost: 6.3332743644714355


batch calculation at epoch 94: 626it [12:04,  1.09s/it]

grad_global_norm = 1.722919225692749, clipped_norm = 0.9999999403953552
Epoch 94 (batch = 625): Loss: -0.07342512905597687: Cost: 6.358850002288818


batch calculation at epoch 94: 1250it [24:20,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 94)


Rollout greedy execution: 10it [00:06,  1.59it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 94 candidate mean 6.34646463394165, baseline epoch 94 mean 6.347212314605713, difference -0.0007476806640625
p-value: 0.34151514018031165


Rollout greedy execution: 10it [00:06,  1.63it/s]


Validation score: 6.355899810791016
2020-06-09 11:15:16 Epoch 94: Loss: -0.07191480696201324: Cost: 6.35903787612915


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:40,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 95: 1it [00:01,  1.23s/it]

grad_global_norm = 1.6760605573654175, clipped_norm = 1.0
Epoch 95 (batch = 0): Loss: -0.04530518129467964: Cost: 6.365956783294678


batch calculation at epoch 95: 626it [12:14,  1.17s/it]

grad_global_norm = 1.897925615310669, clipped_norm = 1.0000001192092896
Epoch 95 (batch = 625): Loss: -0.07087203860282898: Cost: 6.357682228088379


batch calculation at epoch 95: 1250it [24:37,  1.18s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 95)


Rollout greedy execution: 10it [00:06,  1.45it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 95 candidate mean 6.347226142883301, baseline epoch 95 mean 6.347212314605713, difference 1.3828277587890625e-05


Rollout greedy execution: 10it [00:06,  1.59it/s]


Validation score: 6.355400085449219
2020-06-09 11:54:38 Epoch 95: Loss: -0.06958558410406113: Cost: 6.357958793640137


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:40,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 96: 1it [00:01,  1.34s/it]

grad_global_norm = 1.887524962425232, clipped_norm = 1.0
Epoch 96 (batch = 0): Loss: -0.06237557530403137: Cost: 6.392561912536621


batch calculation at epoch 96: 626it [12:28,  1.18s/it]

grad_global_norm = 2.1135852336883545, clipped_norm = 1.0
Epoch 96 (batch = 625): Loss: -0.06931336969137192: Cost: 6.358886241912842


batch calculation at epoch 96: 1250it [24:42,  1.19s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 96)


Rollout greedy execution: 10it [00:06,  1.62it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 96 candidate mean 6.346187114715576, baseline epoch 96 mean 6.347212314605713, difference -0.0010251998901367188
p-value: 0.2777321467250363


Rollout greedy execution: 10it [00:06,  1.65it/s]


Validation score: 6.355899810791016
2020-06-09 12:34:01 Epoch 96: Loss: -0.06983695924282074: Cost: 6.36005973815918


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:37,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 97: 1it [00:01,  1.14s/it]

grad_global_norm = 1.710620403289795, clipped_norm = 1.0
Epoch 97 (batch = 0): Loss: -0.07305759936571121: Cost: 6.379446506500244


batch calculation at epoch 97: 626it [12:11,  1.18s/it]

grad_global_norm = 2.5033390522003174, clipped_norm = 1.0
Epoch 97 (batch = 625): Loss: -0.06397483497858047: Cost: 6.355963706970215


batch calculation at epoch 97: 1250it [24:24,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 97)


Rollout greedy execution: 10it [00:06,  1.61it/s]


Epoch 97 candidate mean 6.342504024505615, baseline epoch 97 mean 6.347212314605713, difference -0.004708290100097656
p-value: 0.003706544053433888
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 97)


Rollout greedy execution: 10it [00:06,  1.57it/s]
Rollout greedy execution: 10it [00:06,  1.62it/s]


Validation score: 6.353499889373779
2020-06-09 13:13:14 Epoch 97: Loss: -0.06551022082567215: Cost: 6.357542514801025


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:40,  1.36it/s]


Current decode type: sampling


batch calculation at epoch 98: 1it [00:01,  1.19s/it]

grad_global_norm = 1.7056711912155151, clipped_norm = 1.0
Epoch 98 (batch = 0): Loss: -0.05943835899233818: Cost: 6.34679651260376


batch calculation at epoch 98: 626it [12:08,  1.14s/it]

grad_global_norm = 1.627371072769165, clipped_norm = 1.0
Epoch 98 (batch = 625): Loss: -0.07297519594430923: Cost: 6.356767654418945


batch calculation at epoch 98: 1250it [24:13,  1.16s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 98)


Rollout greedy execution: 10it [00:06,  1.60it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 98 candidate mean 6.321770191192627, baseline epoch 98 mean 6.322480201721191, difference -0.0007100105285644531
p-value: 0.34547688827355016


Rollout greedy execution: 10it [00:06,  1.65it/s]


Validation score: 6.35099983215332
2020-06-09 13:52:10 Epoch 98: Loss: -0.07314936816692352: Cost: 6.357077121734619


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [07:35,  1.37it/s]


Current decode type: sampling


batch calculation at epoch 99: 1it [00:01,  1.12s/it]

grad_global_norm = 2.056906223297119, clipped_norm = 1.0
Epoch 99 (batch = 0): Loss: -0.06211242824792862: Cost: 6.338552951812744


batch calculation at epoch 99: 626it [12:11,  1.12s/it]

grad_global_norm = 2.3277695178985596, clipped_norm = 0.9999999403953552
Epoch 99 (batch = 625): Loss: -0.07239829748868942: Cost: 6.356544494628906


batch calculation at epoch 99: 1250it [24:27,  1.17s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 99)


Rollout greedy execution: 10it [00:06,  1.60it/s]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 99 candidate mean 6.321872711181641, baseline epoch 99 mean 6.322480201721191, difference -0.0006074905395507812
p-value: 0.3687121624900783


Rollout greedy execution: 10it [00:06,  1.64it/s]


Validation score: 6.355800151824951
2020-06-09 14:31:10 Epoch 99: Loss: -0.0733109638094902: Cost: 6.357331275939941


<Figure size 1500x900 with 2 Axes>