In [1]:
import tensorflow as tf
from time import gmtime, strftime

from attention_dynamic_model import AttentionDynamicModel, set_decode_type
from reinforce_baseline import RolloutBaseline
from train import train_model

from utils import create_data_on_disk, get_cur_time, read_from_pickle

# Params of model
SAMPLES = 1280000 # 256*5000
BATCH = 256
START_EPOCH = 0
END_EPOCH = 60
FROM_CHECKPOINT = False
embedding_dim = 128
LEARNING_RATE = 0.0003
ROLLOUT_SAMPLES = 10000
NUMBER_OF_WP_EPOCHS = 1
GRAD_NORM_CLIPPING = 1.0
BATCH_VERBOSE = 2500
VAL_BATCH_SIZE = 1000
VALIDATE_SET_SIZE = 10000
SEED = 1234
GRAPH_SIZE = 50
FILENAME = 'VRP_{}_{}'.format(GRAPH_SIZE, strftime("%Y-%m-%d", gmtime()))

# Initialize model
model_tf = AttentionDynamicModel(embedding_dim)
set_decode_type(model_tf, "sampling")
print(get_cur_time(), 'model initialized')

# Create and save validation dataset
# validation_dataset = create_data_on_disk(GRAPH_SIZE,
#                                          VALIDATE_SET_SIZE,
#                                          is_save=True,
#                                          filename=FILENAME,
#                                          is_return=True,
#                                          seed = SEED)
VAL_SET_PATH = 'Validation_dataset_VRP_50_2020-06-09.pkl'
validation_dataset = read_from_pickle(VAL_SET_PATH)
print(get_cur_time(), 'validation dataset loaded')

# print(get_cur_time(), 'validation dataset created and saved on the disk')

# Initialize optimizer
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)

# Initialize baseline
baseline = RolloutBaseline(model_tf,
                           wp_n_epochs = NUMBER_OF_WP_EPOCHS,
                           epoch = 0,
                           num_samples=ROLLOUT_SAMPLES,
                           filename = FILENAME,
                           from_checkpoint = FROM_CHECKPOINT,
                           embedding_dim=embedding_dim,
                           graph_size=GRAPH_SIZE
                           )
print(get_cur_time(), 'baseline initialized')

train_model(optimizer,
            model_tf,
            baseline,
            validation_dataset,
            samples = SAMPLES,
            batch = BATCH,
            val_batch_size = VAL_BATCH_SIZE,
            start_epoch = START_EPOCH,
            end_epoch = END_EPOCH,
            from_checkpoint = FROM_CHECKPOINT,
            grad_norm_clipping = GRAD_NORM_CLIPPING,
            batch_verbose = BATCH_VERBOSE,
            graph_size = GRAPH_SIZE,
            filename = FILENAME
            )

2020-06-10 08:26:32 model initialized
2020-06-10 08:26:48 validation dataset loaded


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 0)


Rollout greedy execution: 10it [00:22,  2.24s/it]


2020-06-10 08:27:15 baseline initialized
Current decode type: sampling


batch calculation at epoch 0: 0it [00:00, ?it/s]

Instructions for updating:
Use tf.identity instead.


batch calculation at epoch 0: 1it [00:02,  2.73s/it]

grad_global_norm = 10.26657772064209, clipped_norm = 1.0000001192092896
Epoch 0 (batch = 0): Loss: -0.5081872940063477: Cost: 30.678966522216797


batch calculation at epoch 0: 2501it [1:22:07,  1.97s/it]

grad_global_norm = 19.67675018310547, clipped_norm = 1.0
Epoch 0 (batch = 2500): Loss: 0.6464605331420898: Cost: 14.261652946472168


batch calculation at epoch 0: 5000it [2:48:03,  2.02s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 0)


Rollout greedy execution: 10it [00:14,  1.50s/it]


Epoch 0 candidate mean 12.213311195373535, baseline epoch 0 mean 48.282108306884766, difference -36.06879806518555
p-value: 0.0
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 0)


Rollout greedy execution: 10it [00:14,  1.50s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

alpha was updated to 1.0


Rollout greedy execution: 10it [00:14,  1.44s/it]


Validation score: 12.23859977722168
2020-06-10 11:23:02 Epoch 0: Loss: -0.28716370463371277: Cost: 13.463265419006348


Rollout greedy execution: 625it [19:06,  1.83s/it]


Current decode type: sampling


batch calculation at epoch 1: 1it [00:02,  2.17s/it]

grad_global_norm = 8.932313919067383, clipped_norm = 0.9999999403953552
Epoch 1 (batch = 0): Loss: -3.384472608566284: Cost: 12.428071022033691


batch calculation at epoch 1: 2501it [1:24:46,  1.95s/it]

grad_global_norm = 11.982268333435059, clipped_norm = 0.9999999403953552
Epoch 1 (batch = 2500): Loss: 3.4644551277160645: Cost: 11.967137336730957


batch calculation at epoch 1: 5000it [2:49:19,  2.03s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 1)


Rollout greedy execution: 10it [00:13,  1.36s/it]


Epoch 1 candidate mean 11.55631160736084, baseline epoch 1 mean 12.230069160461426, difference -0.6737575531005859
p-value: 0.0
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 1)


Rollout greedy execution: 10it [00:13,  1.36s/it]
Rollout greedy execution: 10it [00:13,  1.34s/it]


Validation score: 11.566200256347656
2020-06-10 14:39:23 Epoch 1: Loss: 4.865200042724609: Cost: 11.865413665771484


Rollout greedy execution: 625it [18:18,  1.76s/it]


Current decode type: sampling


batch calculation at epoch 2: 1it [00:01,  1.96s/it]

grad_global_norm = 7.461605072021484, clipped_norm = 1.0
Epoch 2 (batch = 0): Loss: -2.4911625385284424: Cost: 11.713534355163574


batch calculation at epoch 2: 2501it [1:23:28,  2.27s/it]

grad_global_norm = 10.610330581665039, clipped_norm = 1.0000001192092896
Epoch 2 (batch = 2500): Loss: -0.13677877187728882: Cost: 11.535286903381348


batch calculation at epoch 2: 5000it [2:45:45,  1.99s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 2)


Rollout greedy execution: 10it [00:13,  1.37s/it]


Epoch 2 candidate mean 11.391794204711914, baseline epoch 2 mean 11.578226089477539, difference -0.186431884765625
p-value: 0.0
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 2)


Rollout greedy execution: 10it [00:13,  1.36s/it]
Rollout greedy execution: 10it [00:13,  1.36s/it]


Validation score: 11.369600296020508
2020-06-10 17:51:15 Epoch 2: Loss: 0.3627063035964966: Cost: 11.492080688476562


Rollout greedy execution: 625it [18:11,  1.75s/it]


Current decode type: sampling


batch calculation at epoch 3: 1it [00:02,  2.04s/it]

grad_global_norm = 7.274046897888184, clipped_norm = 1.0
Epoch 3 (batch = 0): Loss: -1.5565452575683594: Cost: 11.366164207458496


batch calculation at epoch 3: 2501it [1:23:35,  1.95s/it]

grad_global_norm = 7.276481628417969, clipped_norm = 1.0
Epoch 3 (batch = 2500): Loss: -0.6679686903953552: Cost: 11.394084930419922


batch calculation at epoch 3: 5000it [2:46:02,  1.99s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 3)


Rollout greedy execution: 10it [00:13,  1.36s/it]


Epoch 3 candidate mean 11.240164756774902, baseline epoch 3 mean 11.36633014678955, difference -0.12616539001464844
p-value: 6.112242962337014e-212
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 3)


Rollout greedy execution: 10it [00:13,  1.38s/it]
Rollout greedy execution: 10it [00:13,  1.36s/it]


Validation score: 11.24269962310791
2020-06-10 21:03:00 Epoch 3: Loss: -0.4143734872341156: Cost: 11.369620323181152


Rollout greedy execution: 625it [18:46,  1.80s/it]


Current decode type: sampling


batch calculation at epoch 4: 1it [00:02,  2.05s/it]

grad_global_norm = 7.546592712402344, clipped_norm = 0.9999999403953552
Epoch 4 (batch = 0): Loss: -0.8014835715293884: Cost: 11.405444145202637


batch calculation at epoch 4: 2501it [1:21:58,  1.92s/it]

grad_global_norm = 8.34888744354248, clipped_norm = 1.0
Epoch 4 (batch = 2500): Loss: -1.296757698059082: Cost: 11.329463958740234


batch calculation at epoch 4: 5000it [2:43:44,  1.96s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 4)


Rollout greedy execution: 10it [00:12,  1.29s/it]


Epoch 4 candidate mean 11.195329666137695, baseline epoch 4 mean 11.234692573547363, difference -0.03936290740966797
p-value: 3.546291808937265e-26
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 4)


Rollout greedy execution: 10it [00:13,  1.31s/it]
Rollout greedy execution: 10it [00:14,  1.42s/it]


Validation score: 11.204099655151367
2020-06-11 00:13:18 Epoch 4: Loss: -1.0719629526138306: Cost: 11.31031608581543


Rollout greedy execution: 625it [17:42,  1.70s/it]


Current decode type: sampling


batch calculation at epoch 5: 1it [00:02,  2.13s/it]

grad_global_norm = 11.788683891296387, clipped_norm = 0.9999999403953552
Epoch 5 (batch = 0): Loss: -1.1467156410217285: Cost: 11.249752044677734


batch calculation at epoch 5: 2501it [1:21:35,  1.88s/it]

grad_global_norm = 8.470731735229492, clipped_norm = 0.9999999403953552
Epoch 5 (batch = 2500): Loss: -0.98912113904953: Cost: 11.27048397064209


batch calculation at epoch 5: 5000it [2:43:10,  1.96s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 5)


Rollout greedy execution: 10it [00:12,  1.27s/it]


Epoch 5 candidate mean 11.206143379211426, baseline epoch 5 mean 11.215001106262207, difference -0.00885772705078125
p-value: 0.010252031492011864
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 5)


Rollout greedy execution: 10it [00:13,  1.34s/it]
Rollout greedy execution: 10it [00:13,  1.40s/it]


Validation score: 11.192099571228027
2020-06-11 03:21:46 Epoch 5: Loss: -0.924834668636322: Cost: 11.265296936035156


Rollout greedy execution: 625it [17:41,  1.70s/it]


Current decode type: sampling


batch calculation at epoch 6: 1it [00:02,  2.10s/it]

grad_global_norm = 7.817465305328369, clipped_norm = 1.0
Epoch 6 (batch = 0): Loss: -0.9758676886558533: Cost: 11.380193710327148


batch calculation at epoch 6: 2501it [1:22:58,  2.17s/it]

grad_global_norm = 7.280625820159912, clipped_norm = 0.9999999403953552
Epoch 6 (batch = 2500): Loss: -0.7583267092704773: Cost: 11.237785339355469


batch calculation at epoch 6: 5000it [2:44:31,  1.97s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 6)


Rollout greedy execution: 10it [00:12,  1.29s/it]


Epoch 6 candidate mean 11.14968204498291, baseline epoch 6 mean 11.184131622314453, difference -0.03444957733154297
p-value: 4.191589482542467e-20
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 6)


Rollout greedy execution: 10it [00:12,  1.26s/it]
Rollout greedy execution: 10it [00:12,  1.26s/it]


Validation score: 11.164899826049805
2020-06-11 06:31:28 Epoch 6: Loss: -0.6923899054527283: Cost: 11.229952812194824


Rollout greedy execution: 625it [17:33,  1.69s/it]


Current decode type: sampling


batch calculation at epoch 7: 1it [00:01,  1.80s/it]

grad_global_norm = 6.003025531768799, clipped_norm = 1.0
Epoch 7 (batch = 0): Loss: -0.4425160884857178: Cost: 11.209914207458496


batch calculation at epoch 7: 2501it [1:22:31,  1.95s/it]

grad_global_norm = 9.223721504211426, clipped_norm = 0.9999999403953552
Epoch 7 (batch = 2500): Loss: -0.813730001449585: Cost: 11.222100257873535


batch calculation at epoch 7: 5000it [2:45:27,  1.99s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 7)


Rollout greedy execution: 10it [00:13,  1.32s/it]


Epoch 7 candidate mean 11.126860618591309, baseline epoch 7 mean 11.149450302124023, difference -0.022589683532714844
p-value: 1.0693503753624006e-09
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 7)


Rollout greedy execution: 10it [00:13,  1.36s/it]
Rollout greedy execution: 10it [00:13,  1.36s/it]


Validation score: 11.143500328063965
2020-06-11 09:42:03 Epoch 7: Loss: -0.7487395405769348: Cost: 11.215224266052246


Rollout greedy execution: 625it [18:25,  1.77s/it]


Current decode type: sampling


batch calculation at epoch 8: 1it [00:02,  2.13s/it]

grad_global_norm = 6.5210280418396, clipped_norm = 1.0
Epoch 8 (batch = 0): Loss: -0.47511589527130127: Cost: 11.031682968139648


batch calculation at epoch 8: 2501it [1:23:14,  1.90s/it]

grad_global_norm = 20.675790786743164, clipped_norm = 1.0
Epoch 8 (batch = 2500): Loss: -0.7532995343208313: Cost: 11.194012641906738


batch calculation at epoch 8: 5000it [2:45:03,  1.98s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 8)


Rollout greedy execution: 10it [00:13,  1.31s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 8 candidate mean 11.147448539733887, baseline epoch 8 mean 11.13782787322998, difference 0.00962066650390625


Rollout greedy execution: 10it [00:13,  1.31s/it]


Validation score: 11.148300170898438
2020-06-11 12:52:46 Epoch 8: Loss: -0.7142500281333923: Cost: 11.189255714416504


Rollout greedy execution: 625it [18:19,  1.76s/it]


Current decode type: sampling


batch calculation at epoch 9: 1it [00:02,  2.14s/it]

grad_global_norm = 7.291717052459717, clipped_norm = 1.0
Epoch 9 (batch = 0): Loss: -0.8743548393249512: Cost: 11.129364013671875


batch calculation at epoch 9: 2501it [1:21:39,  1.94s/it]

grad_global_norm = 7.948943138122559, clipped_norm = 1.0
Epoch 9 (batch = 2500): Loss: -0.5954146981239319: Cost: 11.180257797241211


batch calculation at epoch 9: 5000it [2:43:43,  1.96s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 9)


Rollout greedy execution: 10it [00:12,  1.29s/it]


Epoch 9 candidate mean 11.108037948608398, baseline epoch 9 mean 11.13782787322998, difference -0.02978992462158203
p-value: 6.387351902579595e-17
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 9)


Rollout greedy execution: 10it [00:13,  1.38s/it]
Rollout greedy execution: 10it [00:13,  1.38s/it]


Validation score: 11.114500045776367
2020-06-11 16:02:31 Epoch 9: Loss: -0.5741310715675354: Cost: 11.178112983703613


Rollout greedy execution: 625it [17:40,  1.70s/it]


Current decode type: sampling


batch calculation at epoch 10: 1it [00:02,  2.00s/it]

grad_global_norm = 18.939617156982422, clipped_norm = 1.0
Epoch 10 (batch = 0): Loss: -0.9937193989753723: Cost: 11.221887588500977


batch calculation at epoch 10: 2501it [1:22:31,  1.99s/it]

grad_global_norm = 10.938069343566895, clipped_norm = 1.0
Epoch 10 (batch = 2500): Loss: -0.6467613577842712: Cost: 11.161375045776367


batch calculation at epoch 10: 5000it [2:44:13,  1.97s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 10)


Rollout greedy execution: 10it [00:12,  1.28s/it]


Epoch 10 candidate mean 11.070699691772461, baseline epoch 10 mean 11.092469215393066, difference -0.02176952362060547
p-value: 8.545794055828853e-11
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 10)


Rollout greedy execution: 10it [00:12,  1.28s/it]
Rollout greedy execution: 10it [00:13,  1.36s/it]


Validation score: 11.087900161743164
2020-06-11 19:12:03 Epoch 10: Loss: -0.6236093044281006: Cost: 11.156417846679688


Rollout greedy execution: 625it [17:30,  1.68s/it]


Current decode type: sampling


batch calculation at epoch 11: 1it [00:02,  2.19s/it]

grad_global_norm = 11.409893035888672, clipped_norm = 1.0
Epoch 11 (batch = 0): Loss: -0.47446319460868835: Cost: 11.051497459411621


batch calculation at epoch 11: 2501it [1:21:36,  1.98s/it]

grad_global_norm = 10.10945987701416, clipped_norm = 1.0
Epoch 11 (batch = 2500): Loss: -0.7067930698394775: Cost: 11.140961647033691


batch calculation at epoch 11: 5000it [2:44:23,  1.97s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 11)


Rollout greedy execution: 10it [00:12,  1.29s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 11 candidate mean 11.112546920776367, baseline epoch 11 mean 11.113006591796875, difference -0.0004596710205078125
p-value: 0.44783401489624397


Rollout greedy execution: 10it [00:13,  1.30s/it]


Validation score: 11.082099914550781
2020-06-11 22:21:13 Epoch 11: Loss: -0.6651092171669006: Cost: 11.137473106384277


Rollout greedy execution: 625it [17:30,  1.68s/it]


Current decode type: sampling


batch calculation at epoch 12: 1it [00:01,  1.93s/it]

grad_global_norm = 10.168418884277344, clipped_norm = 1.0
Epoch 12 (batch = 0): Loss: -0.5501177906990051: Cost: 11.208012580871582


batch calculation at epoch 12: 2501it [1:22:07,  2.04s/it]

grad_global_norm = 9.077740669250488, clipped_norm = 1.0
Epoch 12 (batch = 2500): Loss: -0.5409610867500305: Cost: 11.12477970123291


batch calculation at epoch 12: 5000it [2:43:37,  1.96s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 12)


Rollout greedy execution: 10it [00:13,  1.36s/it]


Epoch 12 candidate mean 11.07405948638916, baseline epoch 12 mean 11.113006591796875, difference -0.038947105407714844
p-value: 3.96429765608085e-30
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 12)


Rollout greedy execution: 10it [00:13,  1.31s/it]
Rollout greedy execution: 10it [00:12,  1.28s/it]


Validation score: 11.052499771118164
2020-06-12 01:29:49 Epoch 12: Loss: -0.548653781414032: Cost: 11.126972198486328


Rollout greedy execution: 625it [17:45,  1.70s/it]


Current decode type: sampling


batch calculation at epoch 13: 1it [00:01,  1.98s/it]

grad_global_norm = 8.336332321166992, clipped_norm = 1.0
Epoch 13 (batch = 0): Loss: -0.6793074607849121: Cost: 11.074748992919922


batch calculation at epoch 13: 2501it [1:21:22,  1.85s/it]

grad_global_norm = 8.299532890319824, clipped_norm = 0.9999999403953552
Epoch 13 (batch = 2500): Loss: -0.7950794696807861: Cost: 11.116294860839844


batch calculation at epoch 13: 5000it [2:41:44,  1.94s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 13)


Rollout greedy execution: 10it [00:13,  1.32s/it]


Epoch 13 candidate mean 11.022395133972168, baseline epoch 13 mean 11.030585289001465, difference -0.008190155029296875
p-value: 0.006020826112700319
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 13)


Rollout greedy execution: 10it [00:13,  1.32s/it]
Rollout greedy execution: 10it [00:13,  1.31s/it]


Validation score: 11.043600082397461
2020-06-12 04:36:45 Epoch 13: Loss: -0.7456424832344055: Cost: 11.111188888549805


Rollout greedy execution: 625it [17:36,  1.69s/it]


Current decode type: sampling


batch calculation at epoch 14: 1it [00:01,  1.96s/it]

grad_global_norm = 6.697481155395508, clipped_norm = 1.0
Epoch 14 (batch = 0): Loss: -0.28175899386405945: Cost: 11.101398468017578


batch calculation at epoch 14: 2501it [1:20:24,  1.84s/it]

grad_global_norm = 9.25159740447998, clipped_norm = 0.9999999403953552
Epoch 14 (batch = 2500): Loss: -0.6926174163818359: Cost: 11.101754188537598


batch calculation at epoch 14: 5000it [2:42:15,  1.95s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 14)


Rollout greedy execution: 10it [00:12,  1.29s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 14 candidate mean 11.028322219848633, baseline epoch 14 mean 11.026918411254883, difference 0.00140380859375


Rollout greedy execution: 10it [00:13,  1.35s/it]


Validation score: 11.054200172424316
2020-06-12 07:44:22 Epoch 14: Loss: -0.6866405010223389: Cost: 11.101608276367188


Rollout greedy execution: 625it [17:35,  1.69s/it]


Current decode type: sampling


batch calculation at epoch 15: 1it [00:01,  1.90s/it]

grad_global_norm = 11.148175239562988, clipped_norm = 1.0
Epoch 15 (batch = 0): Loss: -0.7048095464706421: Cost: 11.03945541381836


batch calculation at epoch 15: 2501it [1:22:14,  1.93s/it]

grad_global_norm = 8.859278678894043, clipped_norm = 0.9999999403953552
Epoch 15 (batch = 2500): Loss: -0.6448091864585876: Cost: 11.09705924987793


batch calculation at epoch 15: 5000it [2:43:33,  1.96s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 15)


Rollout greedy execution: 10it [00:12,  1.29s/it]


Epoch 15 candidate mean 11.016462326049805, baseline epoch 15 mean 11.026918411254883, difference -0.010456085205078125
p-value: 0.0008882352438256344
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 15)


Rollout greedy execution: 10it [00:13,  1.34s/it]
Rollout greedy execution: 10it [00:13,  1.34s/it]


Validation score: 11.039600372314453
2020-06-12 10:53:04 Epoch 15: Loss: -0.6047930717468262: Cost: 11.09186840057373


Rollout greedy execution: 625it [18:03,  1.73s/it]


Current decode type: sampling


batch calculation at epoch 16: 1it [00:02,  2.40s/it]

grad_global_norm = 12.149368286132812, clipped_norm = 1.0
Epoch 16 (batch = 0): Loss: -0.7114059925079346: Cost: 11.029273986816406


batch calculation at epoch 16: 2501it [1:21:30,  1.86s/it]

grad_global_norm = 9.209676742553711, clipped_norm = 1.0
Epoch 16 (batch = 2500): Loss: -0.5986869931221008: Cost: 11.083420753479004


batch calculation at epoch 16: 5000it [2:43:47,  1.97s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 16)


Rollout greedy execution: 10it [00:13,  1.31s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 16 candidate mean 11.050719261169434, baseline epoch 16 mean 11.049712181091309, difference 0.001007080078125


Rollout greedy execution: 10it [00:13,  1.32s/it]


Validation score: 11.04010009765625
2020-06-12 14:02:09 Epoch 16: Loss: -0.6063474416732788: Cost: 11.082761764526367


Rollout greedy execution: 625it [18:01,  1.73s/it]


Current decode type: sampling


batch calculation at epoch 17: 1it [00:02,  2.17s/it]

grad_global_norm = 9.794734001159668, clipped_norm = 1.0
Epoch 17 (batch = 0): Loss: -0.14171917736530304: Cost: 11.117289543151855


batch calculation at epoch 17: 2501it [1:20:33,  1.88s/it]

grad_global_norm = 13.005970001220703, clipped_norm = 0.9999998807907104
Epoch 17 (batch = 2500): Loss: -0.5730579495429993: Cost: 11.083616256713867


batch calculation at epoch 17: 5000it [2:42:49,  1.95s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 17)


Rollout greedy execution: 10it [00:12,  1.29s/it]


Epoch 17 candidate mean 11.03978157043457, baseline epoch 17 mean 11.049712181091309, difference -0.009930610656738281
p-value: 0.0013955953402202514
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 17)


Rollout greedy execution: 10it [00:13,  1.31s/it]
Rollout greedy execution: 10it [00:12,  1.29s/it]


Validation score: 11.028599739074707
2020-06-12 17:10:31 Epoch 17: Loss: -0.5522047281265259: Cost: 11.079450607299805


Rollout greedy execution: 625it [17:33,  1.69s/it]


Current decode type: sampling


batch calculation at epoch 18: 1it [00:02,  2.04s/it]

grad_global_norm = 7.035890579223633, clipped_norm = 1.0
Epoch 18 (batch = 0): Loss: -0.5901171565055847: Cost: 11.138572692871094


batch calculation at epoch 18: 2501it [1:21:02,  1.87s/it]

grad_global_norm = 6.272623062133789, clipped_norm = 0.9999999403953552
Epoch 18 (batch = 2500): Loss: -0.5727570056915283: Cost: 11.071002006530762


batch calculation at epoch 18: 5000it [2:42:25,  1.95s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 18)


Rollout greedy execution: 10it [00:13,  1.30s/it]


Epoch 18 candidate mean 11.022876739501953, baseline epoch 18 mean 11.033580780029297, difference -0.01070404052734375
p-value: 0.00026110024412375393
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 18)


Rollout greedy execution: 10it [00:12,  1.30s/it]
Rollout greedy execution: 10it [00:12,  1.29s/it]


Validation score: 11.024800300598145
2020-06-12 20:18:16 Epoch 18: Loss: -0.5564494729042053: Cost: 11.067978858947754


Rollout greedy execution: 625it [17:37,  1.69s/it]


Current decode type: sampling


batch calculation at epoch 19: 1it [00:02,  2.03s/it]

grad_global_norm = 10.196194648742676, clipped_norm = 0.9999999403953552
Epoch 19 (batch = 0): Loss: -0.6843745708465576: Cost: 11.140774726867676


batch calculation at epoch 19: 2501it [1:22:03,  1.87s/it]

grad_global_norm = 14.427188873291016, clipped_norm = 0.9999999403953552
Epoch 19 (batch = 2500): Loss: -0.5263656973838806: Cost: 11.06050968170166


batch calculation at epoch 19: 5000it [2:43:30,  1.96s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 19)


Rollout greedy execution: 10it [00:12,  1.29s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 19 candidate mean 11.061685562133789, baseline epoch 19 mean 11.029619216918945, difference 0.03206634521484375


Rollout greedy execution: 10it [00:12,  1.27s/it]


Validation score: 11.052900314331055
2020-06-12 23:26:35 Epoch 19: Loss: -0.5062958002090454: Cost: 11.058632850646973


Rollout greedy execution: 625it [17:35,  1.69s/it]


Current decode type: sampling


batch calculation at epoch 20: 1it [00:01,  1.88s/it]

grad_global_norm = 11.156841278076172, clipped_norm = 1.0
Epoch 20 (batch = 0): Loss: -0.4975827932357788: Cost: 11.090295791625977


batch calculation at epoch 20: 2501it [1:20:32,  2.01s/it]

grad_global_norm = 7.390630722045898, clipped_norm = 1.0
Epoch 20 (batch = 2500): Loss: -0.4928207993507385: Cost: 11.05563735961914


batch calculation at epoch 20: 5000it [2:41:07,  1.93s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 20)


Rollout greedy execution: 10it [00:12,  1.29s/it]


Epoch 20 candidate mean 11.014605522155762, baseline epoch 20 mean 11.029619216918945, difference -0.015013694763183594
p-value: 1.122574127347446e-06
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 20)


Rollout greedy execution: 10it [00:13,  1.30s/it]
Rollout greedy execution: 10it [00:13,  1.40s/it]


Validation score: 11.008899688720703
2020-06-13 02:32:49 Epoch 20: Loss: -0.46234023571014404: Cost: 11.052490234375


Rollout greedy execution: 625it [17:57,  1.72s/it]


Current decode type: sampling


batch calculation at epoch 21: 1it [00:01,  1.90s/it]

grad_global_norm = 6.588982582092285, clipped_norm = 1.0
Epoch 21 (batch = 0): Loss: -0.927673876285553: Cost: 11.037757873535156


batch calculation at epoch 21: 2501it [1:22:15,  2.00s/it]

grad_global_norm = 6.8024821281433105, clipped_norm = 1.0
Epoch 21 (batch = 2500): Loss: -0.5244178771972656: Cost: 11.047782897949219


batch calculation at epoch 21: 5000it [2:45:22,  1.98s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 21)


Rollout greedy execution: 10it [00:13,  1.32s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 21 candidate mean 10.995624542236328, baseline epoch 21 mean 10.996143341064453, difference -0.000518798828125
p-value: 0.434314054353134


Rollout greedy execution: 10it [00:12,  1.28s/it]


Validation score: 11.010199546813965
2020-06-13 05:44:11 Epoch 21: Loss: -0.5029519200325012: Cost: 11.046622276306152


Rollout greedy execution: 625it [18:07,  1.74s/it]


Current decode type: sampling


batch calculation at epoch 22: 1it [00:02,  2.03s/it]

grad_global_norm = 12.495262145996094, clipped_norm = 0.9999999403953552
Epoch 22 (batch = 0): Loss: -0.530569314956665: Cost: 11.120158195495605


batch calculation at epoch 22: 2501it [1:21:33,  2.15s/it]

grad_global_norm = 9.616203308105469, clipped_norm = 1.0
Epoch 22 (batch = 2500): Loss: -0.48544538021087646: Cost: 11.045839309692383


batch calculation at epoch 22: 5000it [2:42:01,  1.94s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 22)


Rollout greedy execution: 10it [00:13,  1.32s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 22 candidate mean 11.0049409866333, baseline epoch 22 mean 10.996143341064453, difference 0.008797645568847656


Rollout greedy execution: 10it [00:13,  1.40s/it]


Validation score: 11.021699905395508
2020-06-13 08:51:34 Epoch 22: Loss: -0.47782161831855774: Cost: 11.045327186584473


Rollout greedy execution: 625it [17:38,  1.69s/it]


Current decode type: sampling


batch calculation at epoch 23: 1it [00:02,  2.02s/it]

grad_global_norm = 7.679265975952148, clipped_norm = 1.0
Epoch 23 (batch = 0): Loss: -0.17302733659744263: Cost: 10.950159072875977


batch calculation at epoch 23: 2501it [1:20:25,  1.92s/it]

grad_global_norm = 9.808504104614258, clipped_norm = 1.0
Epoch 23 (batch = 2500): Loss: -0.43973493576049805: Cost: 11.04165267944336


batch calculation at epoch 23: 5000it [2:41:15,  1.94s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 23)


Rollout greedy execution: 10it [00:12,  1.29s/it]


Epoch 23 candidate mean 10.968700408935547, baseline epoch 23 mean 10.996143341064453, difference -0.02744293212890625
p-value: 1.766856508033525e-19
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 23)


Rollout greedy execution: 10it [00:13,  1.31s/it]
Rollout greedy execution: 10it [00:13,  1.34s/it]


Validation score: 10.98740005493164
2020-06-13 11:58:50 Epoch 23: Loss: -0.4275639057159424: Cost: 11.038623809814453


Rollout greedy execution: 625it [17:51,  1.72s/it]


Current decode type: sampling


batch calculation at epoch 24: 1it [00:01,  1.92s/it]

grad_global_norm = 5.436481475830078, clipped_norm = 0.9999999403953552
Epoch 24 (batch = 0): Loss: -0.3192134201526642: Cost: 11.00059700012207


batch calculation at epoch 24: 2501it [1:20:38,  1.90s/it]

grad_global_norm = 8.976200103759766, clipped_norm = 0.9999999403953552
Epoch 24 (batch = 2500): Loss: -0.5603467226028442: Cost: 11.036388397216797


batch calculation at epoch 24: 5000it [2:41:32,  1.94s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 24)


Rollout greedy execution: 10it [00:13,  1.36s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 24 candidate mean 11.037742614746094, baseline epoch 24 mean 11.013297080993652, difference 0.024445533752441406


Rollout greedy execution: 10it [00:14,  1.43s/it]


Validation score: 11.0068998336792
2020-06-13 15:05:31 Epoch 24: Loss: -0.5603874921798706: Cost: 11.035743713378906


Rollout greedy execution: 625it [17:44,  1.70s/it]


Current decode type: sampling


batch calculation at epoch 25: 1it [00:02,  2.00s/it]

grad_global_norm = 9.849569320678711, clipped_norm = 1.0
Epoch 25 (batch = 0): Loss: -0.6508307456970215: Cost: 11.101130485534668


batch calculation at epoch 25: 2501it [1:19:46,  2.27s/it]

grad_global_norm = 7.666372299194336, clipped_norm = 0.9999999403953552
Epoch 25 (batch = 2500): Loss: -0.5128734707832336: Cost: 11.026142120361328


batch calculation at epoch 25: 5000it [2:40:52,  1.93s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 25)


Rollout greedy execution: 10it [00:13,  1.36s/it]


Epoch 25 candidate mean 11.007633209228516, baseline epoch 25 mean 11.013297080993652, difference -0.005663871765136719
p-value: 0.03196392433853154
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 25)


Rollout greedy execution: 10it [00:13,  1.34s/it]
Rollout greedy execution: 10it [00:14,  1.41s/it]


Validation score: 10.985400199890137
2020-06-13 18:12:28 Epoch 25: Loss: -0.5036625862121582: Cost: 11.025985717773438


Rollout greedy execution: 625it [18:37,  1.79s/it]


Current decode type: sampling


batch calculation at epoch 26: 1it [00:01,  1.97s/it]

grad_global_norm = 6.389681339263916, clipped_norm = 1.0
Epoch 26 (batch = 0): Loss: -0.3697946071624756: Cost: 11.106886863708496


batch calculation at epoch 26: 2501it [1:21:47,  2.06s/it]

grad_global_norm = 11.56322193145752, clipped_norm = 0.9999999403953552
Epoch 26 (batch = 2500): Loss: -0.47986850142478943: Cost: 11.020234107971191


batch calculation at epoch 26: 5000it [2:43:32,  1.96s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 26)


Rollout greedy execution: 10it [00:12,  1.28s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 26 candidate mean 10.995859146118164, baseline epoch 26 mean 10.97992992401123, difference 0.015929222106933594


Rollout greedy execution: 10it [00:12,  1.27s/it]


Validation score: 10.998800277709961
2020-06-13 21:22:08 Epoch 26: Loss: -0.503364622592926: Cost: 11.022896766662598


Rollout greedy execution: 625it [18:29,  1.77s/it]


Current decode type: sampling


batch calculation at epoch 27: 1it [00:01,  1.98s/it]

grad_global_norm = 9.175857543945312, clipped_norm = 0.9999999403953552
Epoch 27 (batch = 0): Loss: -0.6246690154075623: Cost: 11.16710090637207


batch calculation at epoch 27: 2501it [1:21:42,  1.93s/it]

grad_global_norm = 7.902885437011719, clipped_norm = 0.9999999403953552
Epoch 27 (batch = 2500): Loss: -0.49835819005966187: Cost: 11.022826194763184


batch calculation at epoch 27: 5000it [2:43:47,  1.97s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 27)


Rollout greedy execution: 10it [00:12,  1.28s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 27 candidate mean 10.976287841796875, baseline epoch 27 mean 10.97992992401123, difference -0.0036420822143554688
p-value: 0.12237785097686125


Rollout greedy execution: 10it [00:12,  1.28s/it]


Validation score: 10.980600357055664
2020-06-14 00:31:55 Epoch 27: Loss: -0.4689081907272339: Cost: 11.019124984741211


Rollout greedy execution: 625it [18:34,  1.78s/it]


Current decode type: sampling


batch calculation at epoch 28: 1it [00:01,  1.85s/it]

grad_global_norm = 10.055943489074707, clipped_norm = 0.9999999403953552
Epoch 28 (batch = 0): Loss: -0.07146346569061279: Cost: 10.997413635253906


batch calculation at epoch 28: 2501it [1:20:21,  1.99s/it]

grad_global_norm = 10.148340225219727, clipped_norm = 0.9999998807907104
Epoch 28 (batch = 2500): Loss: -0.4688210189342499: Cost: 11.021055221557617


batch calculation at epoch 28: 5000it [2:41:21,  1.94s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 28)


Rollout greedy execution: 10it [00:12,  1.28s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 28 candidate mean 11.004035949707031, baseline epoch 28 mean 10.97992992401123, difference 0.02410602569580078


Rollout greedy execution: 10it [00:13,  1.34s/it]


Validation score: 11.003499984741211
2020-06-14 03:39:30 Epoch 28: Loss: -0.4714899957180023: Cost: 11.02061939239502


Rollout greedy execution: 625it [18:36,  1.79s/it]


Current decode type: sampling


batch calculation at epoch 29: 1it [00:02,  2.04s/it]

grad_global_norm = 5.992981910705566, clipped_norm = 1.0
Epoch 29 (batch = 0): Loss: -0.7772122025489807: Cost: 11.119779586791992


batch calculation at epoch 29: 2501it [1:23:28,  1.98s/it]

grad_global_norm = 9.865584373474121, clipped_norm = 0.9999999403953552
Epoch 29 (batch = 2500): Loss: -0.40622809529304504: Cost: 11.01116943359375


batch calculation at epoch 29: 5000it [2:45:45,  1.99s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 29)


Rollout greedy execution: 10it [00:12,  1.26s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 29 candidate mean 10.97958755493164, baseline epoch 29 mean 10.97992992401123, difference -0.00034236907958984375
p-value: 0.45601030284916355


Rollout greedy execution: 10it [00:12,  1.29s/it]


Validation score: 10.978599548339844
2020-06-14 06:51:10 Epoch 29: Loss: -0.4194793105125427: Cost: 11.01209545135498


Rollout greedy execution: 625it [18:29,  1.78s/it]


Current decode type: sampling


batch calculation at epoch 30: 1it [00:01,  1.90s/it]

grad_global_norm = 7.506865978240967, clipped_norm = 0.9999999403953552
Epoch 30 (batch = 0): Loss: -0.3742859959602356: Cost: 11.024724006652832


batch calculation at epoch 30: 2501it [1:20:05,  1.90s/it]

grad_global_norm = 6.096225738525391, clipped_norm = 1.0
Epoch 30 (batch = 2500): Loss: -0.39185407757759094: Cost: 11.005475997924805


batch calculation at epoch 30: 5000it [2:40:16,  1.92s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 30)


Rollout greedy execution: 10it [00:12,  1.28s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 30 candidate mean 10.982522964477539, baseline epoch 30 mean 10.97992992401123, difference 0.0025930404663085938


Rollout greedy execution: 10it [00:12,  1.28s/it]


Validation score: 10.982799530029297
2020-06-14 09:57:07 Epoch 30: Loss: -0.3874153196811676: Cost: 11.006143569946289


Rollout greedy execution: 625it [18:26,  1.77s/it]


Current decode type: sampling


batch calculation at epoch 31: 1it [00:02,  2.10s/it]

grad_global_norm = 15.32094955444336, clipped_norm = 0.9999998807907104
Epoch 31 (batch = 0): Loss: -0.5986191034317017: Cost: 11.126441955566406


batch calculation at epoch 31: 2501it [1:20:07,  2.15s/it]

grad_global_norm = 15.182661056518555, clipped_norm = 1.0
Epoch 31 (batch = 2500): Loss: -0.39200207591056824: Cost: 11.007142066955566


batch calculation at epoch 31: 5000it [2:40:27,  1.93s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 31)


Rollout greedy execution: 10it [00:13,  1.37s/it]


Epoch 31 candidate mean 10.97392463684082, baseline epoch 31 mean 10.97992992401123, difference -0.006005287170410156
p-value: 0.02224963914489609
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 31)


Rollout greedy execution: 10it [00:13,  1.37s/it]
Rollout greedy execution: 10it [00:13,  1.34s/it]


Validation score: 10.972700119018555
2020-06-14 13:03:32 Epoch 31: Loss: -0.3917125165462494: Cost: 11.008298873901367


Rollout greedy execution: 625it [17:37,  1.69s/it]


Current decode type: sampling


batch calculation at epoch 32: 1it [00:01,  1.92s/it]

grad_global_norm = 5.679290771484375, clipped_norm = 1.0
Epoch 32 (batch = 0): Loss: -0.5569268465042114: Cost: 11.054176330566406


batch calculation at epoch 32: 2501it [1:20:22,  1.87s/it]

grad_global_norm = 13.531243324279785, clipped_norm = 1.0
Epoch 32 (batch = 2500): Loss: -0.3697279393672943: Cost: 10.999537467956543


batch calculation at epoch 32: 5000it [2:41:09,  1.93s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 32)


Rollout greedy execution: 10it [00:12,  1.25s/it]


Epoch 32 candidate mean 10.964159965515137, baseline epoch 32 mean 10.977408409118652, difference -0.013248443603515625
p-value: 7.105346449634357e-06
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 32)


Rollout greedy execution: 10it [00:13,  1.33s/it]
Rollout greedy execution: 10it [00:13,  1.36s/it]


Validation score: 10.962300300598145
2020-06-14 16:09:49 Epoch 32: Loss: -0.37656551599502563: Cost: 11.00062370300293


Rollout greedy execution: 625it [17:21,  1.67s/it]


Current decode type: sampling


batch calculation at epoch 33: 1it [00:01,  1.89s/it]

grad_global_norm = 8.121719360351562, clipped_norm = 0.9999999403953552
Epoch 33 (batch = 0): Loss: -0.349069207906723: Cost: 11.080751419067383


batch calculation at epoch 33: 2501it [1:19:56,  2.00s/it]

grad_global_norm = 7.100788593292236, clipped_norm = 0.9999999403953552
Epoch 33 (batch = 2500): Loss: -0.43765366077423096: Cost: 10.99598217010498


batch calculation at epoch 33: 5000it [2:40:15,  1.92s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 33)


Rollout greedy execution: 10it [00:12,  1.29s/it]


Epoch 33 candidate mean 10.922266960144043, baseline epoch 33 mean 10.94990348815918, difference -0.02763652801513672
p-value: 6.967772838380116e-21
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 33)


Rollout greedy execution: 10it [00:13,  1.30s/it]
Rollout greedy execution: 10it [00:13,  1.31s/it]


Validation score: 10.934800148010254
2020-06-14 19:14:59 Epoch 33: Loss: -0.44327065348625183: Cost: 10.996098518371582


Rollout greedy execution: 625it [17:17,  1.66s/it]


Current decode type: sampling


batch calculation at epoch 34: 1it [00:01,  1.84s/it]

grad_global_norm = 6.749912261962891, clipped_norm = 0.9999998807907104
Epoch 34 (batch = 0): Loss: -0.23575171828269958: Cost: 10.986869812011719


batch calculation at epoch 34: 2501it [1:19:24,  1.97s/it]

grad_global_norm = 9.649802207946777, clipped_norm = 1.0
Epoch 34 (batch = 2500): Loss: -0.599662184715271: Cost: 10.991979598999023


batch calculation at epoch 34: 5000it [2:39:37,  1.92s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 34)


Rollout greedy execution: 10it [00:12,  1.29s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 34 candidate mean 10.941102027893066, baseline epoch 34 mean 10.906962394714355, difference 0.03413963317871094


Rollout greedy execution: 10it [00:13,  1.31s/it]


Validation score: 10.963800430297852
2020-06-14 22:19:09 Epoch 34: Loss: -0.6008678078651428: Cost: 10.993179321289062


Rollout greedy execution: 625it [17:38,  1.69s/it]


Current decode type: sampling


batch calculation at epoch 35: 1it [00:01,  1.93s/it]

grad_global_norm = 5.188992977142334, clipped_norm = 1.0
Epoch 35 (batch = 0): Loss: -0.530532956123352: Cost: 11.065058708190918


batch calculation at epoch 35: 2501it [1:20:10,  1.92s/it]

grad_global_norm = 5.470370769500732, clipped_norm = 1.0
Epoch 35 (batch = 2500): Loss: -0.5885952711105347: Cost: 10.996305465698242


batch calculation at epoch 35: 5000it [2:39:13,  1.91s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 35)


Rollout greedy execution: 10it [00:12,  1.28s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 35 candidate mean 10.9537992477417, baseline epoch 35 mean 10.906962394714355, difference 0.04683685302734375


Rollout greedy execution: 10it [00:12,  1.29s/it]


Validation score: 10.978400230407715
2020-06-15 01:23:14 Epoch 35: Loss: -0.5889467000961304: Cost: 10.99502182006836


Rollout greedy execution: 625it [17:28,  1.68s/it]


Current decode type: sampling


batch calculation at epoch 36: 1it [00:01,  1.97s/it]

grad_global_norm = 10.734577178955078, clipped_norm = 0.9999999403953552
Epoch 36 (batch = 0): Loss: -0.8565853834152222: Cost: 11.0294828414917


batch calculation at epoch 36: 2501it [1:20:08,  1.93s/it]

grad_global_norm = 7.995916843414307, clipped_norm = 1.0
Epoch 36 (batch = 2500): Loss: -0.5759528875350952: Cost: 10.991862297058105


batch calculation at epoch 36: 5000it [2:39:43,  1.92s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 36)


Rollout greedy execution: 10it [00:13,  1.32s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 36 candidate mean 10.935585975646973, baseline epoch 36 mean 10.906962394714355, difference 0.028623580932617188


Rollout greedy execution: 10it [00:13,  1.37s/it]


Validation score: 10.961299896240234
2020-06-15 04:27:38 Epoch 36: Loss: -0.5674291253089905: Cost: 10.990410804748535


Rollout greedy execution: 625it [17:24,  1.67s/it]


Current decode type: sampling


batch calculation at epoch 37: 1it [00:01,  1.95s/it]

grad_global_norm = 9.502066612243652, clipped_norm = 0.9999999403953552
Epoch 37 (batch = 0): Loss: -0.5700955986976624: Cost: 10.974898338317871


batch calculation at epoch 37: 2501it [1:19:25,  1.86s/it]

grad_global_norm = 11.813592910766602, clipped_norm = 0.9999999403953552
Epoch 37 (batch = 2500): Loss: -0.559524416923523: Cost: 10.992839813232422


batch calculation at epoch 37: 5000it [2:38:22,  1.90s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 37)


Rollout greedy execution: 10it [00:13,  1.34s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 37 candidate mean 10.928725242614746, baseline epoch 37 mean 10.906962394714355, difference 0.021762847900390625


Rollout greedy execution: 10it [00:13,  1.37s/it]


Validation score: 10.949199676513672
2020-06-15 07:30:35 Epoch 37: Loss: -0.5412841439247131: Cost: 10.989749908447266


Rollout greedy execution: 625it [17:24,  1.67s/it]


Current decode type: sampling


batch calculation at epoch 38: 1it [00:02,  2.00s/it]

grad_global_norm = 5.526858806610107, clipped_norm = 1.0000001192092896
Epoch 38 (batch = 0): Loss: -0.2875667214393616: Cost: 11.044438362121582


batch calculation at epoch 38: 2501it [1:20:30,  1.89s/it]

grad_global_norm = 5.974480628967285, clipped_norm = 0.9999999403953552
Epoch 38 (batch = 2500): Loss: -0.5029155611991882: Cost: 10.982295989990234


batch calculation at epoch 38: 5000it [2:41:05,  1.93s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 38)


Rollout greedy execution: 10it [00:13,  1.38s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 38 candidate mean 10.930232048034668, baseline epoch 38 mean 10.906962394714355, difference 0.0232696533203125


Rollout greedy execution: 10it [00:13,  1.40s/it]


Validation score: 10.95300006866455
2020-06-15 10:36:19 Epoch 38: Loss: -0.5118466019630432: Cost: 10.983564376831055


Rollout greedy execution: 30it [00:51,  1.68s/it]

KeyboardInterrupt: 

In [1]:
import tensorflow as tf
from time import gmtime, strftime

from attention_dynamic_model import set_decode_type
from reinforce_baseline import RolloutBaseline
from train import train_model

from utils import get_cur_time
from reinforce_baseline import load_tf_model
from utils import read_from_pickle


SAMPLES = 1280000 # 256*5000
BATCH = 256
LEARNING_RATE = 0.0003
ROLLOUT_SAMPLES = 10000
NUMBER_OF_WP_EPOCHS = 1
GRAD_NORM_CLIPPING = 1.0
BATCH_VERBOSE = 2500
VAL_BATCH_SIZE = 1000
VALIDATE_SET_SIZE = 10000
SEED = 1234
GRAPH_SIZE = 50
FILENAME = 'VRP_{}_{}'.format(GRAPH_SIZE, strftime("%Y-%m-%d", gmtime()))

START_EPOCH = 39
END_EPOCH = 60
FROM_CHECKPOINT = True
embedding_dim = 128
MODEL_PATH = 'model_checkpoint_epoch_38_VRP_50_2020-06-10.h5'
VAL_SET_PATH = 'Validation_dataset_VRP_50_2020-06-09.pkl'
BASELINE_MODEL_PATH = 'baseline_checkpoint_epoch_33_VRP_50_2020-06-10.h5'

# Initialize model
model_tf = load_tf_model(MODEL_PATH,
                         embedding_dim=embedding_dim,
                         graph_size=GRAPH_SIZE)
set_decode_type(model_tf, "sampling")
print(get_cur_time(), 'model loaded')

# Create and save validation dataset
validation_dataset = read_from_pickle(VAL_SET_PATH)
print(get_cur_time(), 'validation dataset loaded')

# Initialize optimizer
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)

# Initialize baseline
baseline = RolloutBaseline(model_tf,
                           wp_n_epochs = NUMBER_OF_WP_EPOCHS,
                           epoch = START_EPOCH,
                           num_samples=ROLLOUT_SAMPLES,
                           filename = FILENAME,
                           from_checkpoint = FROM_CHECKPOINT,
                           embedding_dim=embedding_dim,
                           graph_size=GRAPH_SIZE,
                           path_to_checkpoint = BASELINE_MODEL_PATH)
print(get_cur_time(), 'baseline initialized')

train_model(optimizer,
            model_tf,
            baseline,
            validation_dataset,
            samples = SAMPLES,
            batch = BATCH,
            val_batch_size = VAL_BATCH_SIZE,
            start_epoch = START_EPOCH,
            end_epoch = END_EPOCH,
            from_checkpoint = FROM_CHECKPOINT,
            grad_norm_clipping = GRAD_NORM_CLIPPING,
            batch_verbose = BATCH_VERBOSE,
            graph_size = GRAPH_SIZE,
            filename = FILENAME
            )

2020-06-15 13:01:18 model loaded
2020-06-15 13:01:39 validation dataset loaded
Baseline model loaded


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 39)


Rollout greedy execution: 10it [00:12,  1.23s/it]


2020-06-15 13:01:55 baseline initialized


Rollout greedy execution: 0it [00:00, ?it/s]

Skipping warm-up mode


Rollout greedy execution: 625it [17:09,  1.65s/it]


Current decode type: sampling


batch calculation at epoch 39: 0it [00:00, ?it/s]

Instructions for updating:
Use tf.identity instead.


batch calculation at epoch 39: 1it [00:02,  2.12s/it]

grad_global_norm = 10.73147201538086, clipped_norm = 0.9999998807907104
Epoch 39 (batch = 0): Loss: -0.39526382088661194: Cost: 11.010337829589844


batch calculation at epoch 39: 2501it [1:20:03,  1.92s/it]

grad_global_norm = 9.211918830871582, clipped_norm = 1.0000001192092896
Epoch 39 (batch = 2500): Loss: -0.5088362097740173: Cost: 10.985867500305176


batch calculation at epoch 39: 5000it [2:42:38,  1.95s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 39)


Rollout greedy execution: 10it [00:12,  1.28s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 39 candidate mean 10.925374984741211, baseline epoch 39 mean 10.912510871887207, difference 0.012864112854003906


Rollout greedy execution: 10it [00:12,  1.27s/it]


Validation score: 10.948200225830078
2020-06-15 16:09:04 Epoch 39: Loss: -0.4896743595600128: Cost: 10.980860710144043


Rollout greedy execution: 625it [16:57,  1.63s/it]


Current decode type: sampling


batch calculation at epoch 40: 1it [00:02,  2.05s/it]

grad_global_norm = 7.14324951171875, clipped_norm = 1.0
Epoch 40 (batch = 0): Loss: -0.5012513995170593: Cost: 10.942508697509766


batch calculation at epoch 40: 2501it [1:23:28,  2.11s/it]

grad_global_norm = 8.047517776489258, clipped_norm = 1.0
Epoch 40 (batch = 2500): Loss: -0.5027778148651123: Cost: 10.984174728393555


batch calculation at epoch 40: 5000it [2:44:52,  1.98s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 40)


Rollout greedy execution: 10it [00:12,  1.29s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 40 candidate mean 10.93132209777832, baseline epoch 40 mean 10.912510871887207, difference 0.01881122589111328


Rollout greedy execution: 10it [00:12,  1.30s/it]


Validation score: 10.955400466918945
2020-06-15 19:18:18 Epoch 40: Loss: -0.48463568091392517: Cost: 10.980917930603027


Rollout greedy execution: 625it [16:56,  1.63s/it]


Current decode type: sampling


batch calculation at epoch 41: 1it [00:02,  2.10s/it]

grad_global_norm = 8.876709938049316, clipped_norm = 1.0
Epoch 41 (batch = 0): Loss: -0.34206604957580566: Cost: 10.99575138092041


batch calculation at epoch 41: 2501it [1:21:19,  1.97s/it]

grad_global_norm = 11.238673210144043, clipped_norm = 1.0
Epoch 41 (batch = 2500): Loss: -0.4819967448711395: Cost: 10.978082656860352


batch calculation at epoch 41: 5000it [2:42:18,  1.95s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 41)


Rollout greedy execution: 10it [00:13,  1.34s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 41 candidate mean 10.925840377807617, baseline epoch 41 mean 10.912510871887207, difference 0.013329505920410156


Rollout greedy execution: 10it [00:13,  1.34s/it]


Validation score: 10.946399688720703
2020-06-15 22:25:04 Epoch 41: Loss: -0.4711120128631592: Cost: 10.976916313171387


Rollout greedy execution: 625it [17:07,  1.64s/it]


Current decode type: sampling


batch calculation at epoch 42: 1it [00:01,  1.99s/it]

grad_global_norm = 9.341597557067871, clipped_norm = 1.0
Epoch 42 (batch = 0): Loss: -0.3320988416671753: Cost: 10.91562271118164


batch calculation at epoch 42: 2501it [1:20:59,  1.92s/it]

grad_global_norm = 12.693188667297363, clipped_norm = 1.0
Epoch 42 (batch = 2500): Loss: -0.4679766893386841: Cost: 10.981194496154785


batch calculation at epoch 42: 5000it [2:42:08,  1.95s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 42)


Rollout greedy execution: 10it [00:12,  1.29s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 42 candidate mean 10.92045783996582, baseline epoch 42 mean 10.912510871887207, difference 0.007946968078613281


Rollout greedy execution: 10it [00:13,  1.32s/it]


Validation score: 10.943900108337402
2020-06-16 01:31:48 Epoch 42: Loss: -0.4661243259906769: Cost: 10.979805946350098


Rollout greedy execution: 625it [16:55,  1.62s/it]


Current decode type: sampling


batch calculation at epoch 43: 1it [00:02,  2.11s/it]

grad_global_norm = 9.683621406555176, clipped_norm = 0.9999999403953552
Epoch 43 (batch = 0): Loss: -0.5802931189537048: Cost: 10.990704536437988


batch calculation at epoch 43: 2501it [1:20:56,  1.90s/it]

grad_global_norm = 8.18542194366455, clipped_norm = 1.0
Epoch 43 (batch = 2500): Loss: -0.45801377296447754: Cost: 10.977043151855469


batch calculation at epoch 43: 5000it [2:41:53,  1.94s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 43)


Rollout greedy execution: 10it [00:12,  1.30s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 43 candidate mean 10.941367149353027, baseline epoch 43 mean 10.912510871887207, difference 0.028856277465820312


Rollout greedy execution: 10it [00:13,  1.31s/it]


Validation score: 10.973699569702148
2020-06-16 04:37:57 Epoch 43: Loss: -0.4581642150878906: Cost: 10.978620529174805


Rollout greedy execution: 625it [16:54,  1.62s/it]


Current decode type: sampling


batch calculation at epoch 44: 1it [00:01,  1.94s/it]

grad_global_norm = 8.172162055969238, clipped_norm = 0.9999998807907104
Epoch 44 (batch = 0): Loss: -0.5223531723022461: Cost: 10.84147834777832


batch calculation at epoch 44: 2501it [1:20:13,  1.90s/it]

grad_global_norm = 8.24230670928955, clipped_norm = 1.0
Epoch 44 (batch = 2500): Loss: -0.4237540364265442: Cost: 10.971700668334961


batch calculation at epoch 44: 5000it [2:41:18,  1.94s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 44)


Rollout greedy execution: 10it [00:12,  1.29s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 44 candidate mean 10.927685737609863, baseline epoch 44 mean 10.912510871887207, difference 0.01517486572265625


Rollout greedy execution: 10it [00:13,  1.31s/it]


Validation score: 10.955900192260742
2020-06-16 07:43:26 Epoch 44: Loss: -0.41860777139663696: Cost: 10.971725463867188


Rollout greedy execution: 625it [17:04,  1.64s/it]


Current decode type: sampling


batch calculation at epoch 45: 1it [00:02,  2.13s/it]

grad_global_norm = 10.471004486083984, clipped_norm = 1.0
Epoch 45 (batch = 0): Loss: -0.5565376877784729: Cost: 10.968417167663574


batch calculation at epoch 45: 2501it [1:21:22,  1.94s/it]

grad_global_norm = 10.059593200683594, clipped_norm = 1.0
Epoch 45 (batch = 2500): Loss: -0.46298879384994507: Cost: 10.979677200317383


batch calculation at epoch 45: 5000it [2:42:30,  1.95s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 45)


Rollout greedy execution: 10it [00:13,  1.31s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 45 candidate mean 10.924983978271484, baseline epoch 45 mean 10.912510871887207, difference 0.012473106384277344


Rollout greedy execution: 10it [00:13,  1.32s/it]


Validation score: 10.948699951171875
2020-06-16 10:50:21 Epoch 45: Loss: -0.4494501054286957: Cost: 10.976823806762695


Rollout greedy execution: 625it [16:55,  1.62s/it]


Current decode type: sampling


batch calculation at epoch 46: 1it [00:01,  1.95s/it]

grad_global_norm = 9.016016006469727, clipped_norm = 0.9999998807907104
Epoch 46 (batch = 0): Loss: -0.45116811990737915: Cost: 10.871524810791016


batch calculation at epoch 46: 2501it [1:20:20,  1.93s/it]

grad_global_norm = 15.767892837524414, clipped_norm = 1.0
Epoch 46 (batch = 2500): Loss: -0.39400234818458557: Cost: 10.967172622680664


batch calculation at epoch 46: 5000it [2:40:25,  1.93s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 46)


Rollout greedy execution: 10it [00:12,  1.27s/it]


Epoch 46 candidate mean 10.904034614562988, baseline epoch 46 mean 10.912510871887207, difference -0.00847625732421875
p-value: 0.0021426279522096512
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 46)


Rollout greedy execution: 10it [00:12,  1.26s/it]
Rollout greedy execution: 10it [00:12,  1.28s/it]


Validation score: 10.928299903869629
2020-06-16 13:55:23 Epoch 46: Loss: -0.3781653046607971: Cost: 10.96613597869873


Rollout greedy execution: 625it [17:02,  1.64s/it]


Current decode type: sampling


batch calculation at epoch 47: 1it [00:02,  2.27s/it]

grad_global_norm = 6.164205074310303, clipped_norm = 0.9999998807907104
Epoch 47 (batch = 0): Loss: -0.14055867493152618: Cost: 10.779853820800781


batch calculation at epoch 47: 2501it [1:19:22,  1.87s/it]

grad_global_norm = 8.828855514526367, clipped_norm = 1.0
Epoch 47 (batch = 2500): Loss: -0.4061066508293152: Cost: 10.964171409606934


batch calculation at epoch 47: 5000it [2:39:25,  1.91s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 47)


Rollout greedy execution: 10it [00:12,  1.26s/it]


Epoch 47 candidate mean 10.921246528625488, baseline epoch 47 mean 10.93249797821045, difference -0.011251449584960938
p-value: 6.226278194444364e-05
Update baseline


Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 47)


Rollout greedy execution: 10it [00:13,  1.32s/it]
Rollout greedy execution: 10it [00:14,  1.44s/it]


Validation score: 10.915800094604492
2020-06-16 16:59:25 Epoch 47: Loss: -0.4136503040790558: Cost: 10.965538024902344


Rollout greedy execution: 625it [17:28,  1.68s/it]


Current decode type: sampling


batch calculation at epoch 48: 1it [00:01,  1.80s/it]

grad_global_norm = 3.834254503250122, clipped_norm = 0.9999999403953552
Epoch 48 (batch = 0): Loss: -0.1958588808774948: Cost: 10.879888534545898


batch calculation at epoch 48: 2501it [1:19:52,  1.94s/it]

grad_global_norm = 6.956699371337891, clipped_norm = 1.0
Epoch 48 (batch = 2500): Loss: -0.4750107228755951: Cost: 10.962016105651855


batch calculation at epoch 48: 5000it [2:40:04,  1.92s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 48)


Rollout greedy execution: 10it [00:12,  1.26s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 48 candidate mean 10.92222785949707, baseline epoch 48 mean 10.90587329864502, difference 0.01635456085205078


Rollout greedy execution: 10it [00:12,  1.26s/it]


Validation score: 10.929300308227539
2020-06-16 20:05:02 Epoch 48: Loss: -0.4959520399570465: Cost: 10.96557331085205


Rollout greedy execution: 625it [17:14,  1.66s/it]


Current decode type: sampling


batch calculation at epoch 49: 1it [00:01,  1.82s/it]

grad_global_norm = 7.553139686584473, clipped_norm = 0.9999999403953552
Epoch 49 (batch = 0): Loss: -0.40310174226760864: Cost: 10.933935165405273


batch calculation at epoch 49: 2501it [1:19:59,  1.88s/it]

grad_global_norm = 11.978582382202148, clipped_norm = 1.0
Epoch 49 (batch = 2500): Loss: -0.49332594871520996: Cost: 10.965635299682617


batch calculation at epoch 49: 5000it [2:39:37,  1.92s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 49)


Rollout greedy execution: 10it [00:12,  1.28s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 49 candidate mean 10.922173500061035, baseline epoch 49 mean 10.90587329864502, difference 0.016300201416015625


Rollout greedy execution: 10it [00:12,  1.27s/it]


Validation score: 10.934700012207031
2020-06-16 23:09:10 Epoch 49: Loss: -0.48394814133644104: Cost: 10.963394165039062


Rollout greedy execution: 625it [17:01,  1.63s/it]


Current decode type: sampling


batch calculation at epoch 50: 1it [00:01,  1.93s/it]

grad_global_norm = 10.918054580688477, clipped_norm = 1.0
Epoch 50 (batch = 0): Loss: -0.3931102156639099: Cost: 11.005988121032715


batch calculation at epoch 50: 2501it [1:19:40,  1.93s/it]

grad_global_norm = 15.22896671295166, clipped_norm = 0.9999999403953552
Epoch 50 (batch = 2500): Loss: -0.46986302733421326: Cost: 10.962969779968262


batch calculation at epoch 50: 5000it [2:39:51,  1.92s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 50)


Rollout greedy execution: 10it [00:12,  1.29s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 50 candidate mean 10.933216094970703, baseline epoch 50 mean 10.90587329864502, difference 0.027342796325683594


Rollout greedy execution: 10it [00:13,  1.32s/it]


Validation score: 10.941300392150879
2020-06-17 02:13:19 Epoch 50: Loss: -0.46061187982559204: Cost: 10.96197509765625


Rollout greedy execution: 625it [17:12,  1.65s/it]


Current decode type: sampling


batch calculation at epoch 51: 1it [00:01,  1.78s/it]

grad_global_norm = 19.28791618347168, clipped_norm = 1.0
Epoch 51 (batch = 0): Loss: -0.6294915080070496: Cost: 11.159650802612305


batch calculation at epoch 51: 2501it [1:20:12,  1.88s/it]

grad_global_norm = 8.428202629089355, clipped_norm = 1.0
Epoch 51 (batch = 2500): Loss: -0.4463144540786743: Cost: 10.958625793457031


batch calculation at epoch 51: 5000it [2:41:01,  1.93s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 51)


Rollout greedy execution: 10it [00:12,  1.29s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 51 candidate mean 10.934144973754883, baseline epoch 51 mean 10.90587329864502, difference 0.02827167510986328


Rollout greedy execution: 10it [00:13,  1.35s/it]


Validation score: 10.946100234985352
2020-06-17 05:19:02 Epoch 51: Loss: -0.4592427611351013: Cost: 10.95950698852539


Rollout greedy execution: 625it [17:24,  1.67s/it]


Current decode type: sampling


batch calculation at epoch 52: 1it [00:01,  1.87s/it]

grad_global_norm = 7.501377582550049, clipped_norm = 0.9999999403953552
Epoch 52 (batch = 0): Loss: -0.5406900644302368: Cost: 10.86340618133545


batch calculation at epoch 52: 2501it [1:20:48,  2.04s/it]

grad_global_norm = 10.205304145812988, clipped_norm = 1.0
Epoch 52 (batch = 2500): Loss: -0.4420635998249054: Cost: 10.96086597442627


batch calculation at epoch 52: 5000it [2:40:55,  1.93s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 52)


Rollout greedy execution: 10it [00:12,  1.28s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 52 candidate mean 10.932676315307617, baseline epoch 52 mean 10.90587329864502, difference 0.026803016662597656


Rollout greedy execution: 10it [00:13,  1.31s/it]


Validation score: 10.943400382995605
2020-06-17 08:24:38 Epoch 52: Loss: -0.43876874446868896: Cost: 10.960862159729004


Rollout greedy execution: 625it [17:10,  1.65s/it]


Current decode type: sampling


batch calculation at epoch 53: 1it [00:01,  1.84s/it]

grad_global_norm = 9.63241195678711, clipped_norm = 1.0
Epoch 53 (batch = 0): Loss: -0.4945177435874939: Cost: 10.954998016357422


batch calculation at epoch 53: 2501it [1:20:32,  1.95s/it]

grad_global_norm = 11.196317672729492, clipped_norm = 1.0
Epoch 53 (batch = 2500): Loss: -0.44277358055114746: Cost: 10.960559844970703


batch calculation at epoch 53: 5000it [2:42:03,  1.94s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 53)


Rollout greedy execution: 10it [00:12,  1.30s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 53 candidate mean 10.905552864074707, baseline epoch 53 mean 10.90587329864502, difference -0.0003204345703125
p-value: 0.4559868025517473


Rollout greedy execution: 10it [00:12,  1.28s/it]


Validation score: 10.910900115966797
2020-06-17 11:31:11 Epoch 53: Loss: -0.42986947298049927: Cost: 10.957161903381348


Rollout greedy execution: 625it [17:22,  1.67s/it]


Current decode type: sampling


batch calculation at epoch 54: 1it [00:01,  1.95s/it]

grad_global_norm = 17.507097244262695, clipped_norm = 1.0
Epoch 54 (batch = 0): Loss: -0.4030834436416626: Cost: 10.908098220825195


batch calculation at epoch 54: 2501it [1:20:42,  1.94s/it]

grad_global_norm = 10.71146011352539, clipped_norm = 1.0
Epoch 54 (batch = 2500): Loss: -0.4125242531299591: Cost: 10.955676078796387


batch calculation at epoch 54: 5000it [2:41:53,  1.94s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 54)


Rollout greedy execution: 10it [00:12,  1.29s/it]
Rollout greedy execution: 0it [00:00, ?it/s]

Epoch 54 candidate mean 10.929479598999023, baseline epoch 54 mean 10.90587329864502, difference 0.023606300354003906


Rollout greedy execution: 10it [00:13,  1.31s/it]


Validation score: 10.937100410461426
2020-06-17 14:37:54 Epoch 54: Loss: -0.42608192563056946: Cost: 10.956978797912598


Rollout greedy execution: 625it [17:08,  1.65s/it]


Current decode type: sampling


batch calculation at epoch 55: 1it [00:01,  1.93s/it]

grad_global_norm = 9.757869720458984, clipped_norm = 1.0
Epoch 55 (batch = 0): Loss: -0.3720964193344116: Cost: 10.925128936767578


batch calculation at epoch 55: 2501it [1:20:15,  1.91s/it]

grad_global_norm = 9.058759689331055, clipped_norm = 1.0
Epoch 55 (batch = 2500): Loss: -0.4454035758972168: Cost: 10.962148666381836


batch calculation at epoch 55: 3446it [1:50:25,  1.95s/it]

KeyboardInterrupt: 