-
Notifications
You must be signed in to change notification settings - Fork 154
/
Copy pathtransformer_en_de_u512.log
2108 lines (2108 loc) · 226 KB
/
transformer_en_de_u512.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
2018-06-25 19:17:22,519 - root - Namespace(average_checkpoint=False, average_start=5, batch_size=4096, beam_size=4, bleu='13a', bucket_ratio=0.0, bucket_scheme='exp', dataset='WMT2014BPE', dropout=0.1, epochs=40, epsilon=0.1, gpus='0,1,2,3', hidden_size=2048, log_interval=100, lp_alpha=0.6, lp_k=5, lr=1.0, magnitude=3.0, num_accumulated=8, num_averages=5, num_buckets=20, num_heads=8, num_layers=6, num_units=512, optimizer='adam', save_dir='transformer_en_de_u512', scaled=True, src_lang='en', src_max_len=-1, test_batch_size=256, tgt_lang='de', tgt_max_len=-1, warmup_steps=8000.0)
2018-06-25 19:19:12,102 - root - NMTModel(
(encoder): TransformerEncoder(
(dropout_layer): Dropout(p = 0.1, axes=())
(transformer_cells): HybridSequential(
(0): TransformerEncoderCell(
(dropout_layer): Dropout(p = 0.1, axes=())
(proj): Dense(None -> 512, linear)
(attention_cell): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(1): TransformerEncoderCell(
(dropout_layer): Dropout(p = 0.1, axes=())
(proj): Dense(None -> 512, linear)
(attention_cell): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(2): TransformerEncoderCell(
(dropout_layer): Dropout(p = 0.1, axes=())
(proj): Dense(None -> 512, linear)
(attention_cell): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(3): TransformerEncoderCell(
(dropout_layer): Dropout(p = 0.1, axes=())
(proj): Dense(None -> 512, linear)
(attention_cell): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(4): TransformerEncoderCell(
(dropout_layer): Dropout(p = 0.1, axes=())
(proj): Dense(None -> 512, linear)
(attention_cell): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(5): TransformerEncoderCell(
(dropout_layer): Dropout(p = 0.1, axes=())
(proj): Dense(None -> 512, linear)
(attention_cell): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(src_embed): HybridSequential(
(0): Embedding(36794 -> 512, float32)
(1): Dropout(p = 0.0, axes=())
)
(tgt_proj): Dense(None -> 36794, linear)
(decoder): TransformerDecoder(
(dropout_layer): Dropout(p = 0.1, axes=())
(transformer_cells): HybridSequential(
(0): TransformerDecoderCell(
(layer_norm_in): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(dropout_layer): Dropout(p = 0.1, axes=())
(attention_cell_in): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(layer_norm_inter): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(proj_inter): Dense(None -> 512, linear)
(proj_in): Dense(None -> 512, linear)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(attention_cell_inter): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
)
(1): TransformerDecoderCell(
(layer_norm_in): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(dropout_layer): Dropout(p = 0.1, axes=())
(attention_cell_in): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(layer_norm_inter): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(proj_inter): Dense(None -> 512, linear)
(proj_in): Dense(None -> 512, linear)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(attention_cell_inter): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
)
(2): TransformerDecoderCell(
(layer_norm_in): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(dropout_layer): Dropout(p = 0.1, axes=())
(attention_cell_in): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(layer_norm_inter): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(proj_inter): Dense(None -> 512, linear)
(proj_in): Dense(None -> 512, linear)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(attention_cell_inter): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
)
(3): TransformerDecoderCell(
(layer_norm_in): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(dropout_layer): Dropout(p = 0.1, axes=())
(attention_cell_in): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(layer_norm_inter): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(proj_inter): Dense(None -> 512, linear)
(proj_in): Dense(None -> 512, linear)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(attention_cell_inter): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
)
(4): TransformerDecoderCell(
(layer_norm_in): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(dropout_layer): Dropout(p = 0.1, axes=())
(attention_cell_in): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(layer_norm_inter): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(proj_inter): Dense(None -> 512, linear)
(proj_in): Dense(None -> 512, linear)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(attention_cell_inter): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
)
(5): TransformerDecoderCell(
(layer_norm_in): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(dropout_layer): Dropout(p = 0.1, axes=())
(attention_cell_in): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
(layer_norm_inter): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
(proj_inter): Dense(None -> 512, linear)
(proj_in): Dense(None -> 512, linear)
(ffn): PositionwiseFFN(
(dropout_layer): Dropout(p = 0.1, axes=())
(ffn_1): Dense(None -> 2048, Activation(relu))
(ffn_2): Dense(None -> 512, linear)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(attention_cell_inter): MultiHeadAttentionCell(
(proj_query): Dense(None -> 512, linear)
(proj_key): Dense(None -> 512, linear)
(_base_cell): DotProductAttentionCell(
(_dropout_layer): Dropout(p = 0.1, axes=())
)
(proj_value): Dense(None -> 512, linear)
)
)
)
(layer_norm): LayerNorm(axis=-1, center=True, eps=1e-05, scale=True, in_channels=0)
)
(tgt_embed): HybridSequential(
(0): Embedding(36794 -> 512, float32)
(1): Dropout(p = 0.0, axes=())
)
)
2018-06-25 19:19:12,106 - root - Use beam_size=4, alpha=0.6, K=5
2018-06-25 19:19:20,026 - root - Train Batch Sampler:
FixedBucketSampler:
sample_num=4493328, batch_num=34064
key=[(5, 6), (8, 9), (12, 14), (17, 19), (23, 25), (30, 32), (39, 41), (49, 51), (61, 64), (75, 79), (93, 97), (114, 119), (139, 145), (169, 176), (206, 214), (249, 259), (302, 313), (364, 378), (440, 456), (530, 549)]
cnt=[15338, 57067, 248232, 511919, 747010, 820181, 816207, 571262, 380721, 193794, 90070, 32452, 6865, 1445, 517, 171, 57, 11, 7, 2]
batch_size=[1013, 562, 375, 271, 204, 158, 124, 99, 81, 67, 55, 46, 40, 33, 29, 23, 18, 16, 16, 10]
2018-06-25 19:19:20,029 - root - Valid Batch Sampler:
FixedBucketSampler:
sample_num=3000, batch_num=370
key=[4, 5, 6, 7, 8, 10, 12, 15, 18, 21, 26, 31, 37, 45, 54, 65, 78, 94, 112, 135]
cnt=[13, 12, 14, 24, 53, 110, 141, 254, 253, 275, 411, 377, 321, 328, 207, 109, 61, 24, 7, 6]
batch_size=[67, 51, 42, 36, 32, 27, 22, 18, 15, 12, 10, 8, 7, 6, 5, 4, 3, 3, 2, 2]
2018-06-25 19:19:20,031 - root - Test Batch Sampler:
FixedBucketSampler:
sample_num=2737, batch_num=367
key=[7, 8, 9, 10, 11, 13, 14, 16, 19, 22, 26, 30, 36, 42, 50, 59, 71, 84, 100]
cnt=[13, 18, 29, 41, 39, 123, 81, 147, 239, 233, 324, 285, 392, 271, 206, 181, 87, 23, 5]
batch_size=[38, 32, 28, 25, 23, 20, 18, 16, 14, 12, 10, 8, 7, 6, 5, 4, 3, 3, 2]
2018-06-25 19:21:20,799 - root - [Epoch 0 Batch 800/34064] loss=10.3670, ppl=31793.0480, throughput=54.01K wps, wc=6521.39K
2018-06-25 19:23:08,412 - root - [Epoch 0 Batch 1600/34064] loss=9.9823, ppl=21639.1107, throughput=60.65K wps, wc=6526.33K
2018-06-25 19:24:54,481 - root - [Epoch 0 Batch 2400/34064] loss=9.4362, ppl=12534.1929, throughput=61.48K wps, wc=6520.68K
2018-06-25 19:26:41,120 - root - [Epoch 0 Batch 3200/34064] loss=8.8150, ppl=6734.5995, throughput=61.19K wps, wc=6524.98K
2018-06-25 19:28:27,000 - root - [Epoch 0 Batch 4000/34064] loss=8.3174, ppl=4094.5768, throughput=61.54K wps, wc=6515.48K
2018-06-25 19:30:14,140 - root - [Epoch 0 Batch 4800/34064] loss=8.0589, ppl=3161.7813, throughput=60.92K wps, wc=6526.64K
2018-06-25 19:32:01,518 - root - [Epoch 0 Batch 5600/34064] loss=7.8119, ppl=2469.9338, throughput=60.65K wps, wc=6511.90K
2018-06-25 19:33:47,478 - root - [Epoch 0 Batch 6400/34064] loss=7.5964, ppl=1991.0614, throughput=61.62K wps, wc=6529.01K
2018-06-25 19:35:33,410 - root - [Epoch 0 Batch 7200/34064] loss=7.4043, ppl=1642.9836, throughput=61.53K wps, wc=6517.51K
2018-06-25 19:37:19,545 - root - [Epoch 0 Batch 8000/34064] loss=7.2448, ppl=1400.8604, throughput=61.43K wps, wc=6519.60K
2018-06-25 19:39:05,883 - root - [Epoch 0 Batch 8800/34064] loss=7.1157, ppl=1231.1434, throughput=61.34K wps, wc=6522.45K
2018-06-25 19:40:51,860 - root - [Epoch 0 Batch 9600/34064] loss=7.0056, ppl=1102.7859, throughput=61.51K wps, wc=6518.89K
2018-06-25 19:42:38,850 - root - [Epoch 0 Batch 10400/34064] loss=6.8863, ppl=978.7619, throughput=60.97K wps, wc=6523.04K
2018-06-25 19:44:25,572 - root - [Epoch 0 Batch 11200/34064] loss=6.7758, ppl=876.3566, throughput=61.11K wps, wc=6521.74K
2018-06-25 19:46:13,841 - root - [Epoch 0 Batch 12000/34064] loss=6.6845, ppl=799.9049, throughput=60.26K wps, wc=6524.14K
2018-06-25 19:48:01,925 - root - [Epoch 0 Batch 12800/34064] loss=6.5846, ppl=723.8826, throughput=60.33K wps, wc=6520.16K
2018-06-25 19:49:50,553 - root - [Epoch 0 Batch 13600/34064] loss=6.5016, ppl=666.2175, throughput=59.96K wps, wc=6513.54K
2018-06-25 19:51:38,037 - root - [Epoch 0 Batch 14400/34064] loss=6.4134, ppl=609.9611, throughput=60.72K wps, wc=6526.57K
2018-06-25 19:53:26,008 - root - [Epoch 0 Batch 15200/34064] loss=6.3283, ppl=560.2173, throughput=60.40K wps, wc=6520.91K
2018-06-25 19:55:16,226 - root - [Epoch 0 Batch 16000/34064] loss=6.2392, ppl=512.4496, throughput=59.23K wps, wc=6527.94K
2018-06-25 19:57:05,095 - root - [Epoch 0 Batch 16800/34064] loss=6.1702, ppl=478.2667, throughput=59.88K wps, wc=6518.74K
2018-06-25 19:58:54,372 - root - [Epoch 0 Batch 17600/34064] loss=6.0995, ppl=445.6467, throughput=59.58K wps, wc=6510.67K
2018-06-25 20:00:41,816 - root - [Epoch 0 Batch 18400/34064] loss=6.0151, ppl=409.5769, throughput=60.72K wps, wc=6523.73K
2018-06-25 20:02:29,378 - root - [Epoch 0 Batch 19200/34064] loss=5.9392, ppl=379.6438, throughput=60.67K wps, wc=6525.99K
2018-06-25 20:04:17,016 - root - [Epoch 0 Batch 20000/34064] loss=5.8800, ppl=357.8255, throughput=60.61K wps, wc=6524.25K
2018-06-25 20:06:05,236 - root - [Epoch 0 Batch 20800/34064] loss=5.8281, ppl=339.7166, throughput=60.28K wps, wc=6523.63K
2018-06-25 20:07:51,849 - root - [Epoch 0 Batch 21600/34064] loss=5.7462, ppl=312.9855, throughput=61.11K wps, wc=6514.78K
2018-06-25 20:09:41,107 - root - [Epoch 0 Batch 22400/34064] loss=5.7037, ppl=299.9726, throughput=59.68K wps, wc=6520.85K
2018-06-25 20:11:29,604 - root - [Epoch 0 Batch 23200/34064] loss=5.6315, ppl=279.0762, throughput=60.07K wps, wc=6516.90K
2018-06-25 20:13:18,455 - root - [Epoch 0 Batch 24000/34064] loss=5.5815, ppl=265.4788, throughput=59.87K wps, wc=6516.35K
2018-06-25 20:15:05,955 - root - [Epoch 0 Batch 24800/34064] loss=5.5272, ppl=251.4360, throughput=60.69K wps, wc=6524.52K
2018-06-25 20:16:55,579 - root - [Epoch 0 Batch 25600/34064] loss=5.4724, ppl=238.0278, throughput=59.48K wps, wc=6520.32K
2018-06-25 20:18:45,271 - root - [Epoch 0 Batch 26400/34064] loss=5.3922, ppl=219.6777, throughput=59.44K wps, wc=6519.95K
2018-06-25 20:20:33,451 - root - [Epoch 0 Batch 27200/34064] loss=5.3460, ppl=209.7704, throughput=60.31K wps, wc=6524.05K
2018-06-25 20:22:20,574 - root - [Epoch 0 Batch 28000/34064] loss=5.2697, ppl=194.3564, throughput=60.89K wps, wc=6522.19K
2018-06-25 20:24:09,095 - root - [Epoch 0 Batch 28800/34064] loss=5.1937, ppl=180.1392, throughput=60.05K wps, wc=6516.06K
2018-06-25 20:25:56,445 - root - [Epoch 0 Batch 29600/34064] loss=5.1122, ppl=166.0363, throughput=60.79K wps, wc=6525.55K
2018-06-25 20:27:46,069 - root - [Epoch 0 Batch 30400/34064] loss=5.0683, ppl=158.9044, throughput=59.48K wps, wc=6520.53K
2018-06-25 20:29:33,863 - root - [Epoch 0 Batch 31200/34064] loss=4.9545, ppl=141.8073, throughput=60.51K wps, wc=6522.14K
2018-06-25 20:31:23,143 - root - [Epoch 0 Batch 32000/34064] loss=4.8761, ppl=131.1182, throughput=59.66K wps, wc=6519.83K
2018-06-25 20:33:10,235 - root - [Epoch 0 Batch 32800/34064] loss=4.7995, ppl=121.4494, throughput=60.87K wps, wc=6518.36K
2018-06-25 20:34:59,141 - root - [Epoch 0 Batch 33600/34064] loss=4.7318, ppl=113.5005, throughput=59.90K wps, wc=6523.04K
2018-06-25 20:42:47,764 - root - [Epoch 0] valid Loss=3.4307, valid ppl=30.8977, valid bleu=9.21
2018-06-25 20:49:29,136 - root - [Epoch 0] test Loss=3.5092, test ppl=33.4209, test bleu=7.18
2018-06-25 20:49:29,142 - root - Save best parameters to transformer_en_de_u512/valid_best.params
2018-06-25 20:51:23,281 - root - [Epoch 1 Batch 800/34064] loss=4.5984, ppl=99.3247, throughput=57.62K wps, wc=6517.40K
2018-06-25 20:53:10,987 - root - [Epoch 1 Batch 1600/34064] loss=4.5285, ppl=92.6176, throughput=60.58K wps, wc=6524.55K
2018-06-25 20:54:59,257 - root - [Epoch 1 Batch 2400/34064] loss=4.4722, ppl=87.5461, throughput=60.19K wps, wc=6516.80K
2018-06-25 20:56:47,306 - root - [Epoch 1 Batch 3200/34064] loss=4.3890, ppl=80.5578, throughput=60.37K wps, wc=6522.46K
2018-06-25 20:58:34,713 - root - [Epoch 1 Batch 4000/34064] loss=4.3505, ppl=77.5137, throughput=60.67K wps, wc=6516.63K
2018-06-25 21:00:23,724 - root - [Epoch 1 Batch 4800/34064] loss=4.3119, ppl=74.5804, throughput=59.79K wps, wc=6517.72K
2018-06-25 21:02:10,603 - root - [Epoch 1 Batch 5600/34064] loss=4.2435, ppl=69.6535, throughput=61.01K wps, wc=6520.25K
2018-06-25 21:03:58,870 - root - [Epoch 1 Batch 6400/34064] loss=4.2039, ppl=66.9493, throughput=60.20K wps, wc=6517.18K
2018-06-25 21:05:46,829 - root - [Epoch 1 Batch 7200/34064] loss=4.1532, ppl=63.6356, throughput=60.39K wps, wc=6520.04K
2018-06-25 21:07:35,451 - root - [Epoch 1 Batch 8000/34064] loss=4.1457, ppl=63.1611, throughput=60.01K wps, wc=6518.66K
2018-06-25 21:09:23,000 - root - [Epoch 1 Batch 8800/34064] loss=4.0981, ppl=60.2234, throughput=60.59K wps, wc=6515.87K
2018-06-25 21:11:10,058 - root - [Epoch 1 Batch 9600/34064] loss=4.0737, ppl=58.7752, throughput=60.93K wps, wc=6523.38K
2018-06-25 21:12:57,319 - root - [Epoch 1 Batch 10400/34064] loss=4.0513, ppl=57.4733, throughput=60.79K wps, wc=6520.75K
2018-06-25 21:14:44,501 - root - [Epoch 1 Batch 11200/34064] loss=4.0183, ppl=55.6087, throughput=60.85K wps, wc=6521.82K
2018-06-25 21:16:31,202 - root - [Epoch 1 Batch 12000/34064] loss=3.9935, ppl=54.2446, throughput=61.17K wps, wc=6526.77K
2018-06-25 21:18:20,323 - root - [Epoch 1 Batch 12800/34064] loss=3.9932, ppl=54.2261, throughput=59.78K wps, wc=6522.99K
2018-06-25 21:20:07,635 - root - [Epoch 1 Batch 13600/34064] loss=3.9356, ppl=51.1924, throughput=60.79K wps, wc=6523.38K
2018-06-25 21:21:56,312 - root - [Epoch 1 Batch 14400/34064] loss=3.9588, ppl=52.3948, throughput=60.03K wps, wc=6523.83K
2018-06-25 21:23:44,469 - root - [Epoch 1 Batch 15200/34064] loss=3.9206, ppl=50.4317, throughput=60.25K wps, wc=6516.86K
2018-06-25 21:25:33,437 - root - [Epoch 1 Batch 16000/34064] loss=3.9099, ppl=49.8960, throughput=59.81K wps, wc=6517.28K
2018-06-25 21:27:21,448 - root - [Epoch 1 Batch 16800/34064] loss=3.8941, ppl=49.1136, throughput=60.39K wps, wc=6522.50K
2018-06-25 21:29:10,007 - root - [Epoch 1 Batch 17600/34064] loss=3.8597, ppl=47.4495, throughput=60.08K wps, wc=6522.69K
2018-06-25 21:30:57,447 - root - [Epoch 1 Batch 18400/34064] loss=3.8444, ppl=46.7313, throughput=60.68K wps, wc=6519.57K
2018-06-25 21:32:45,005 - root - [Epoch 1 Batch 19200/34064] loss=3.8010, ppl=44.7453, throughput=60.71K wps, wc=6530.25K
2018-06-25 21:34:33,217 - root - [Epoch 1 Batch 20000/34064] loss=3.8197, ppl=45.5921, throughput=60.23K wps, wc=6517.08K
2018-06-25 21:36:20,967 - root - [Epoch 1 Batch 20800/34064] loss=3.8014, ppl=44.7630, throughput=60.53K wps, wc=6521.58K
2018-06-25 21:38:09,232 - root - [Epoch 1 Batch 21600/34064] loss=3.7890, ppl=44.2104, throughput=60.18K wps, wc=6514.99K
2018-06-25 21:39:57,234 - root - [Epoch 1 Batch 22400/34064] loss=3.7885, ppl=44.1902, throughput=60.42K wps, wc=6525.71K
2018-06-25 21:41:45,570 - root - [Epoch 1 Batch 23200/34064] loss=3.7784, ppl=43.7479, throughput=60.19K wps, wc=6521.05K
2018-06-25 21:43:34,050 - root - [Epoch 1 Batch 24000/34064] loss=3.7611, ppl=42.9943, throughput=60.04K wps, wc=6513.54K
2018-06-25 21:45:23,761 - root - [Epoch 1 Batch 24800/34064] loss=3.7476, ppl=42.4205, throughput=59.36K wps, wc=6512.78K
2018-06-25 21:47:12,398 - root - [Epoch 1 Batch 25600/34064] loss=3.7423, ppl=42.1948, throughput=60.01K wps, wc=6519.07K
2018-06-25 21:49:00,325 - root - [Epoch 1 Batch 26400/34064] loss=3.7181, ppl=41.1845, throughput=60.46K wps, wc=6525.01K
2018-06-25 21:50:49,551 - root - [Epoch 1 Batch 27200/34064] loss=3.7533, ppl=42.6609, throughput=59.71K wps, wc=6521.88K
2018-06-25 21:52:36,543 - root - [Epoch 1 Batch 28000/34064] loss=3.6884, ppl=39.9798, throughput=61.00K wps, wc=6526.58K
2018-06-25 21:54:23,603 - root - [Epoch 1 Batch 28800/34064] loss=3.7046, ppl=40.6354, throughput=60.97K wps, wc=6527.25K
2018-06-25 21:56:10,818 - root - [Epoch 1 Batch 29600/34064] loss=3.6827, ppl=39.7541, throughput=60.85K wps, wc=6523.91K
2018-06-25 21:57:58,695 - root - [Epoch 1 Batch 30400/34064] loss=3.6701, ppl=39.2566, throughput=60.50K wps, wc=6526.25K
2018-06-25 21:59:45,718 - root - [Epoch 1 Batch 31200/34064] loss=3.6671, ppl=39.1378, throughput=60.95K wps, wc=6522.89K
2018-06-25 22:01:34,201 - root - [Epoch 1 Batch 32000/34064] loss=3.6485, ppl=38.4167, throughput=60.16K wps, wc=6526.23K
2018-06-25 22:03:23,027 - root - [Epoch 1 Batch 32800/34064] loss=3.6510, ppl=38.5150, throughput=59.93K wps, wc=6521.98K
2018-06-25 22:05:11,157 - root - [Epoch 1 Batch 33600/34064] loss=3.6426, ppl=38.1897, throughput=60.28K wps, wc=6517.54K
2018-06-25 22:10:49,646 - root - [Epoch 1] valid Loss=2.1273, valid ppl=8.3925, valid bleu=20.74
2018-06-25 22:15:23,946 - root - [Epoch 1] test Loss=2.0115, test ppl=7.4748, test bleu=19.39
2018-06-25 22:15:23,952 - root - Save best parameters to transformer_en_de_u512/valid_best.params
2018-06-25 22:17:19,727 - root - [Epoch 2 Batch 800/34064] loss=3.6006, ppl=36.6194, throughput=57.50K wps, wc=6521.93K
2018-06-25 22:19:10,318 - root - [Epoch 2 Batch 1600/34064] loss=3.6253, ppl=37.5365, throughput=58.96K wps, wc=6520.45K
2018-06-25 22:20:58,564 - root - [Epoch 2 Batch 2400/34064] loss=3.5783, ppl=35.8131, throughput=60.25K wps, wc=6522.27K
2018-06-25 22:22:47,001 - root - [Epoch 2 Batch 3200/34064] loss=3.5777, ppl=35.7907, throughput=60.13K wps, wc=6520.69K
2018-06-25 22:24:33,821 - root - [Epoch 2 Batch 4000/34064] loss=3.5531, ppl=34.9201, throughput=61.06K wps, wc=6521.89K
2018-06-25 22:26:20,149 - root - [Epoch 2 Batch 4800/34064] loss=3.5567, ppl=35.0456, throughput=61.32K wps, wc=6519.64K
2018-06-25 22:28:07,521 - root - [Epoch 2 Batch 5600/34064] loss=3.5570, ppl=35.0563, throughput=60.75K wps, wc=6522.92K
2018-06-25 22:29:56,113 - root - [Epoch 2 Batch 6400/34064] loss=3.5605, ppl=35.1815, throughput=60.03K wps, wc=6519.26K
2018-06-25 22:31:44,930 - root - [Epoch 2 Batch 7200/34064] loss=3.5671, ppl=35.4122, throughput=59.82K wps, wc=6509.45K
2018-06-25 22:33:32,423 - root - [Epoch 2 Batch 8000/34064] loss=3.5270, ppl=34.0232, throughput=60.65K wps, wc=6519.56K
2018-06-25 22:35:20,346 - root - [Epoch 2 Batch 8800/34064] loss=3.5450, ppl=34.6393, throughput=60.44K wps, wc=6522.36K
2018-06-25 22:37:06,863 - root - [Epoch 2 Batch 9600/34064] loss=3.5254, ppl=33.9658, throughput=61.30K wps, wc=6529.02K
2018-06-25 22:38:53,690 - root - [Epoch 2 Batch 10400/34064] loss=3.5301, ppl=34.1262, throughput=61.02K wps, wc=6518.21K
2018-06-25 22:40:42,483 - root - [Epoch 2 Batch 11200/34064] loss=3.5200, ppl=33.7844, throughput=59.96K wps, wc=6523.47K
2018-06-25 22:42:31,217 - root - [Epoch 2 Batch 12000/34064] loss=3.5168, ppl=33.6766, throughput=59.98K wps, wc=6522.17K
2018-06-25 22:44:17,852 - root - [Epoch 2 Batch 12800/34064] loss=3.5214, ppl=33.8323, throughput=61.15K wps, wc=6520.97K
2018-06-25 22:46:05,399 - root - [Epoch 2 Batch 13600/34064] loss=3.5037, ppl=33.2398, throughput=60.62K wps, wc=6519.23K
2018-06-25 22:47:54,324 - root - [Epoch 2 Batch 14400/34064] loss=3.4886, ppl=32.7393, throughput=59.84K wps, wc=6517.54K
2018-06-25 22:49:42,855 - root - [Epoch 2 Batch 15200/34064] loss=3.4997, ppl=33.1069, throughput=60.01K wps, wc=6512.35K
2018-06-25 22:51:31,631 - root - [Epoch 2 Batch 16000/34064] loss=3.4820, ppl=32.5256, throughput=59.97K wps, wc=6522.82K
2018-06-25 22:53:19,357 - root - [Epoch 2 Batch 16800/34064] loss=3.4707, ppl=32.1585, throughput=60.57K wps, wc=6525.40K
2018-06-25 22:55:06,277 - root - [Epoch 2 Batch 17600/34064] loss=3.4545, ppl=31.6418, throughput=61.00K wps, wc=6521.61K
2018-06-25 22:56:54,285 - root - [Epoch 2 Batch 18400/34064] loss=3.4812, ppl=32.5002, throughput=60.42K wps, wc=6525.48K
2018-06-25 22:58:42,005 - root - [Epoch 2 Batch 19200/34064] loss=3.4610, ppl=31.8481, throughput=60.56K wps, wc=6523.30K
2018-06-25 23:00:31,442 - root - [Epoch 2 Batch 20000/34064] loss=3.4721, ppl=32.2044, throughput=59.64K wps, wc=6526.67K
2018-06-25 23:02:18,910 - root - [Epoch 2 Batch 20800/34064] loss=3.4467, ppl=31.3957, throughput=60.70K wps, wc=6523.69K
2018-06-25 23:04:07,140 - root - [Epoch 2 Batch 21600/34064] loss=3.4490, ppl=31.4699, throughput=60.28K wps, wc=6523.82K
2018-06-25 23:05:54,560 - root - [Epoch 2 Batch 22400/34064] loss=3.4487, ppl=31.4585, throughput=60.67K wps, wc=6517.02K
2018-06-25 23:07:41,808 - root - [Epoch 2 Batch 23200/34064] loss=3.4401, ppl=31.1891, throughput=60.85K wps, wc=6526.26K
2018-06-25 23:09:29,603 - root - [Epoch 2 Batch 24000/34064] loss=3.4364, ppl=31.0756, throughput=60.46K wps, wc=6517.19K
2018-06-25 23:11:16,711 - root - [Epoch 2 Batch 24800/34064] loss=3.4315, ppl=30.9216, throughput=60.82K wps, wc=6514.21K
2018-06-25 23:13:02,997 - root - [Epoch 2 Batch 25600/34064] loss=3.4353, ppl=31.0416, throughput=61.34K wps, wc=6519.26K
2018-06-25 23:14:51,756 - root - [Epoch 2 Batch 26400/34064] loss=3.4508, ppl=31.5258, throughput=59.97K wps, wc=6522.71K
2018-06-25 23:16:38,782 - root - [Epoch 2 Batch 27200/34064] loss=3.4188, ppl=30.5315, throughput=60.98K wps, wc=6526.88K
2018-06-25 23:18:26,627 - root - [Epoch 2 Batch 28000/34064] loss=3.4185, ppl=30.5228, throughput=60.45K wps, wc=6519.26K
2018-06-25 23:20:13,873 - root - [Epoch 2 Batch 28800/34064] loss=3.4168, ppl=30.4705, throughput=60.83K wps, wc=6524.19K
2018-06-25 23:22:01,374 - root - [Epoch 2 Batch 29600/34064] loss=3.4100, ppl=30.2664, throughput=60.66K wps, wc=6521.02K
2018-06-25 23:23:48,509 - root - [Epoch 2 Batch 30400/34064] loss=3.4155, ppl=30.4330, throughput=60.83K wps, wc=6516.85K
2018-06-25 23:25:34,894 - root - [Epoch 2 Batch 31200/34064] loss=3.4130, ppl=30.3570, throughput=61.29K wps, wc=6520.12K
2018-06-25 23:27:22,302 - root - [Epoch 2 Batch 32000/34064] loss=3.4017, ppl=30.0154, throughput=60.74K wps, wc=6523.60K
2018-06-25 23:29:08,812 - root - [Epoch 2 Batch 32800/34064] loss=3.3953, ppl=29.8248, throughput=61.25K wps, wc=6523.61K
2018-06-25 23:30:56,561 - root - [Epoch 2 Batch 33600/34064] loss=3.4098, ppl=30.2591, throughput=60.50K wps, wc=6518.29K
2018-06-25 23:36:45,172 - root - [Epoch 2] valid Loss=1.8743, valid ppl=6.5162, valid bleu=22.39
2018-06-25 23:41:17,226 - root - [Epoch 2] test Loss=1.7349, test ppl=5.6682, test bleu=21.76
2018-06-25 23:41:17,232 - root - Save best parameters to transformer_en_de_u512/valid_best.params
2018-06-25 23:43:11,573 - root - [Epoch 3 Batch 800/34064] loss=3.3510, ppl=28.5321, throughput=58.23K wps, wc=6523.88K
2018-06-25 23:44:59,404 - root - [Epoch 3 Batch 1600/34064] loss=3.3567, ppl=28.6938, throughput=60.52K wps, wc=6525.81K
2018-06-25 23:46:46,526 - root - [Epoch 3 Batch 2400/34064] loss=3.3494, ppl=28.4860, throughput=60.82K wps, wc=6514.74K
2018-06-25 23:48:34,283 - root - [Epoch 3 Batch 3200/34064] loss=3.3452, ppl=28.3670, throughput=60.48K wps, wc=6517.07K
2018-06-25 23:50:22,579 - root - [Epoch 3 Batch 4000/34064] loss=3.3619, ppl=28.8428, throughput=60.24K wps, wc=6523.22K
2018-06-25 23:52:10,541 - root - [Epoch 3 Batch 4800/34064] loss=3.3669, ppl=28.9889, throughput=60.38K wps, wc=6519.15K
2018-06-25 23:53:59,063 - root - [Epoch 3 Batch 5600/34064] loss=3.3653, ppl=28.9429, throughput=60.06K wps, wc=6518.28K
2018-06-25 23:55:46,811 - root - [Epoch 3 Batch 6400/34064] loss=3.3553, ppl=28.6546, throughput=60.53K wps, wc=6522.01K
2018-06-25 23:57:33,463 - root - [Epoch 3 Batch 7200/34064] loss=3.3442, ppl=28.3366, throughput=61.15K wps, wc=6521.29K
2018-06-25 23:59:20,045 - root - [Epoch 3 Batch 8000/34064] loss=3.3349, ppl=28.0747, throughput=61.21K wps, wc=6524.06K
2018-06-26 00:01:07,556 - root - [Epoch 3 Batch 8800/34064] loss=3.3554, ppl=28.6579, throughput=60.60K wps, wc=6515.19K
2018-06-26 00:02:54,382 - root - [Epoch 3 Batch 9600/34064] loss=3.3490, ppl=28.4738, throughput=60.99K wps, wc=6514.88K
2018-06-26 00:04:41,978 - root - [Epoch 3 Batch 10400/34064] loss=3.3434, ppl=28.3141, throughput=60.58K wps, wc=6518.23K
2018-06-26 00:06:28,397 - root - [Epoch 3 Batch 11200/34064] loss=3.3301, ppl=27.9412, throughput=61.31K wps, wc=6524.21K
2018-06-26 00:08:14,909 - root - [Epoch 3 Batch 12000/34064] loss=3.3460, ppl=28.3882, throughput=61.22K wps, wc=6520.15K
2018-06-26 00:10:01,838 - root - [Epoch 3 Batch 12800/34064] loss=3.3374, ppl=28.1457, throughput=61.00K wps, wc=6522.80K
2018-06-26 00:11:48,336 - root - [Epoch 3 Batch 13600/34064] loss=3.3249, ppl=27.7960, throughput=61.17K wps, wc=6514.66K
2018-06-26 00:13:35,518 - root - [Epoch 3 Batch 14400/34064] loss=3.3203, ppl=27.6699, throughput=60.87K wps, wc=6524.29K
2018-06-26 00:15:22,873 - root - [Epoch 3 Batch 15200/34064] loss=3.3286, ppl=27.9002, throughput=60.76K wps, wc=6523.33K
2018-06-26 00:17:10,483 - root - [Epoch 3 Batch 16000/34064] loss=3.3259, ppl=27.8229, throughput=60.60K wps, wc=6521.44K
2018-06-26 00:18:59,664 - root - [Epoch 3 Batch 16800/34064] loss=3.3292, ppl=27.9171, throughput=59.75K wps, wc=6523.26K
2018-06-26 00:20:46,643 - root - [Epoch 3 Batch 17600/34064] loss=3.3202, ppl=27.6661, throughput=61.00K wps, wc=6525.51K
2018-06-26 00:22:34,635 - root - [Epoch 3 Batch 18400/34064] loss=3.3176, ppl=27.5952, throughput=60.38K wps, wc=6520.48K
2018-06-26 00:24:22,664 - root - [Epoch 3 Batch 19200/34064] loss=3.3210, ppl=27.6881, throughput=60.37K wps, wc=6521.42K
2018-06-26 00:26:09,522 - root - [Epoch 3 Batch 20000/34064] loss=3.3120, ppl=27.4397, throughput=61.06K wps, wc=6525.09K
2018-06-26 00:27:57,348 - root - [Epoch 3 Batch 20800/34064] loss=3.3088, ppl=27.3513, throughput=60.46K wps, wc=6518.56K
2018-06-26 00:29:44,636 - root - [Epoch 3 Batch 21600/34064] loss=3.2864, ppl=26.7467, throughput=60.85K wps, wc=6528.66K
2018-06-26 00:31:33,093 - root - [Epoch 3 Batch 22400/34064] loss=3.3145, ppl=27.5076, throughput=60.15K wps, wc=6523.60K
2018-06-26 00:33:22,555 - root - [Epoch 3 Batch 23200/34064] loss=3.2904, ppl=26.8541, throughput=59.57K wps, wc=6520.83K
2018-06-26 00:35:10,536 - root - [Epoch 3 Batch 24000/34064] loss=3.3164, ppl=27.5621, throughput=60.35K wps, wc=6516.52K
2018-06-26 00:36:59,746 - root - [Epoch 3 Batch 24800/34064] loss=3.3168, ppl=27.5729, throughput=59.60K wps, wc=6509.35K
2018-06-26 00:38:47,592 - root - [Epoch 3 Batch 25600/34064] loss=3.2885, ppl=26.8023, throughput=60.50K wps, wc=6524.51K
2018-06-26 00:40:35,868 - root - [Epoch 3 Batch 26400/34064] loss=3.3137, ppl=27.4874, throughput=60.18K wps, wc=6516.21K
2018-06-26 00:42:24,293 - root - [Epoch 3 Batch 27200/34064] loss=3.3172, ppl=27.5832, throughput=60.16K wps, wc=6522.44K
2018-06-26 00:44:12,436 - root - [Epoch 3 Batch 28000/34064] loss=3.3218, ppl=27.7090, throughput=60.31K wps, wc=6521.56K
2018-06-26 00:45:59,329 - root - [Epoch 3 Batch 28800/34064] loss=3.2926, ppl=26.9125, throughput=61.02K wps, wc=6522.57K
2018-06-26 00:47:47,871 - root - [Epoch 3 Batch 29600/34064] loss=3.3125, ppl=27.4545, throughput=60.12K wps, wc=6524.96K
2018-06-26 00:49:35,993 - root - [Epoch 3 Batch 30400/34064] loss=3.2917, ppl=26.8876, throughput=60.32K wps, wc=6521.41K
2018-06-26 00:51:24,126 - root - [Epoch 3 Batch 31200/34064] loss=3.3016, ppl=27.1572, throughput=60.28K wps, wc=6518.32K
2018-06-26 00:53:11,147 - root - [Epoch 3 Batch 32000/34064] loss=3.2873, ppl=26.7717, throughput=60.96K wps, wc=6523.69K
2018-06-26 00:54:57,549 - root - [Epoch 3 Batch 32800/34064] loss=3.2848, ppl=26.7048, throughput=61.32K wps, wc=6524.36K
2018-06-26 00:56:44,787 - root - [Epoch 3 Batch 33600/34064] loss=3.2867, ppl=26.7546, throughput=60.84K wps, wc=6524.69K
2018-06-26 01:02:22,714 - root - [Epoch 3] valid Loss=1.7584, valid ppl=5.8029, valid bleu=23.61
2018-06-26 01:06:58,445 - root - [Epoch 3] test Loss=1.6207, test ppl=5.0568, test bleu=22.66
2018-06-26 01:06:58,451 - root - Save best parameters to transformer_en_de_u512/valid_best.params
2018-06-26 01:08:51,459 - root - [Epoch 4 Batch 800/34064] loss=3.2387, ppl=25.4994, throughput=58.94K wps, wc=6528.01K
2018-06-26 01:10:38,503 - root - [Epoch 4 Batch 1600/34064] loss=3.2378, ppl=25.4779, throughput=60.87K wps, wc=6516.22K
2018-06-26 01:12:26,864 - root - [Epoch 4 Batch 2400/34064] loss=3.2640, ppl=26.1546, throughput=60.18K wps, wc=6521.61K
2018-06-26 01:14:14,956 - root - [Epoch 4 Batch 3200/34064] loss=3.2594, ppl=26.0344, throughput=60.33K wps, wc=6520.66K
2018-06-26 01:16:01,877 - root - [Epoch 4 Batch 4000/34064] loss=3.2450, ppl=25.6624, throughput=61.00K wps, wc=6521.98K
2018-06-26 01:17:50,208 - root - [Epoch 4 Batch 4800/34064] loss=3.2400, ppl=25.5339, throughput=60.20K wps, wc=6522.03K
2018-06-26 01:19:35,877 - root - [Epoch 4 Batch 5600/34064] loss=3.2446, ppl=25.6505, throughput=61.66K wps, wc=6515.02K
2018-06-26 01:21:23,296 - root - [Epoch 4 Batch 6400/34064] loss=3.2666, ppl=26.2215, throughput=60.62K wps, wc=6511.47K
2018-06-26 01:23:11,503 - root - [Epoch 4 Batch 7200/34064] loss=3.2351, ppl=25.4099, throughput=60.31K wps, wc=6525.41K
2018-06-26 01:25:00,383 - root - [Epoch 4 Batch 8000/34064] loss=3.2595, ppl=26.0367, throughput=59.92K wps, wc=6523.94K
2018-06-26 01:26:47,096 - root - [Epoch 4 Batch 8800/34064] loss=3.2406, ppl=25.5490, throughput=61.14K wps, wc=6524.24K
2018-06-26 01:28:34,321 - root - [Epoch 4 Batch 9600/34064] loss=3.2481, ppl=25.7425, throughput=60.82K wps, wc=6521.05K
2018-06-26 01:30:21,325 - root - [Epoch 4 Batch 10400/34064] loss=3.2373, ppl=25.4640, throughput=60.95K wps, wc=6522.23K
2018-06-26 01:32:08,577 - root - [Epoch 4 Batch 11200/34064] loss=3.2544, ppl=25.9048, throughput=60.80K wps, wc=6520.95K
2018-06-26 01:33:55,171 - root - [Epoch 4 Batch 12000/34064] loss=3.2594, ppl=26.0348, throughput=61.20K wps, wc=6523.34K
2018-06-26 01:35:42,880 - root - [Epoch 4 Batch 12800/34064] loss=3.2525, ppl=25.8545, throughput=60.46K wps, wc=6511.94K
2018-06-26 01:37:28,937 - root - [Epoch 4 Batch 13600/34064] loss=3.2300, ppl=25.2784, throughput=61.48K wps, wc=6520.53K
2018-06-26 01:39:16,376 - root - [Epoch 4 Batch 14400/34064] loss=3.2498, ppl=25.7842, throughput=60.73K wps, wc=6524.48K
2018-06-26 01:41:04,299 - root - [Epoch 4 Batch 15200/34064] loss=3.2501, ppl=25.7917, throughput=60.40K wps, wc=6518.81K
2018-06-26 01:42:52,107 - root - [Epoch 4 Batch 16000/34064] loss=3.2306, ppl=25.2958, throughput=60.53K wps, wc=6525.55K
2018-06-26 01:44:40,142 - root - [Epoch 4 Batch 16800/34064] loss=3.2421, ppl=25.5877, throughput=60.37K wps, wc=6522.11K
2018-06-26 01:46:28,648 - root - [Epoch 4 Batch 17600/34064] loss=3.2366, ppl=25.4471, throughput=60.08K wps, wc=6519.22K
2018-06-26 01:48:15,680 - root - [Epoch 4 Batch 18400/34064] loss=3.2320, ppl=25.3298, throughput=60.93K wps, wc=6521.51K
2018-06-26 01:50:03,286 - root - [Epoch 4 Batch 19200/34064] loss=3.2341, ppl=25.3830, throughput=60.62K wps, wc=6522.60K
2018-06-26 01:51:51,216 - root - [Epoch 4 Batch 20000/34064] loss=3.2448, ppl=25.6574, throughput=60.43K wps, wc=6522.44K
2018-06-26 01:53:38,421 - root - [Epoch 4 Batch 20800/34064] loss=3.2530, ppl=25.8678, throughput=60.82K wps, wc=6519.69K
2018-06-26 01:55:25,687 - root - [Epoch 4 Batch 21600/34064] loss=3.2338, ppl=25.3770, throughput=60.75K wps, wc=6515.88K
2018-06-26 01:57:12,773 - root - [Epoch 4 Batch 22400/34064] loss=3.2376, ppl=25.4715, throughput=60.90K wps, wc=6521.54K
2018-06-26 01:59:01,425 - root - [Epoch 4 Batch 23200/34064] loss=3.2370, ppl=25.4570, throughput=60.03K wps, wc=6522.80K
2018-06-26 02:00:49,580 - root - [Epoch 4 Batch 24000/34064] loss=3.2356, ppl=25.4210, throughput=60.32K wps, wc=6523.39K
2018-06-26 02:02:36,737 - root - [Epoch 4 Batch 24800/34064] loss=3.2302, ppl=25.2851, throughput=60.87K wps, wc=6523.11K
2018-06-26 02:04:24,864 - root - [Epoch 4 Batch 25600/34064] loss=3.2349, ppl=25.4043, throughput=60.36K wps, wc=6526.57K
2018-06-26 02:06:12,232 - root - [Epoch 4 Batch 26400/34064] loss=3.2432, ppl=25.6166, throughput=60.71K wps, wc=6518.66K
2018-06-26 02:07:59,109 - root - [Epoch 4 Batch 27200/34064] loss=3.2314, ppl=25.3145, throughput=60.96K wps, wc=6515.38K
2018-06-26 02:09:46,501 - root - [Epoch 4 Batch 28000/34064] loss=3.2229, ppl=25.1017, throughput=60.74K wps, wc=6523.44K
2018-06-26 02:11:33,607 - root - [Epoch 4 Batch 28800/34064] loss=3.2263, ppl=25.1873, throughput=60.94K wps, wc=6527.00K
2018-06-26 02:13:21,281 - root - [Epoch 4 Batch 29600/34064] loss=3.2172, ppl=24.9570, throughput=60.53K wps, wc=6517.25K
2018-06-26 02:15:09,885 - root - [Epoch 4 Batch 30400/34064] loss=3.2362, ppl=25.4378, throughput=60.08K wps, wc=6524.54K
2018-06-26 02:16:57,988 - root - [Epoch 4 Batch 31200/34064] loss=3.2406, ppl=25.5480, throughput=60.28K wps, wc=6516.17K
2018-06-26 02:18:45,785 - root - [Epoch 4 Batch 32000/34064] loss=3.2257, ppl=25.1715, throughput=60.45K wps, wc=6516.02K
2018-06-26 02:20:34,046 - root - [Epoch 4 Batch 32800/34064] loss=3.2321, ppl=25.3332, throughput=60.19K wps, wc=6515.95K
2018-06-26 02:22:20,854 - root - [Epoch 4 Batch 33600/34064] loss=3.2200, ppl=25.0284, throughput=61.13K wps, wc=6528.62K
2018-06-26 02:27:57,924 - root - [Epoch 4] valid Loss=1.7029, valid ppl=5.4896, valid bleu=24.03
2018-06-26 02:32:34,801 - root - [Epoch 4] test Loss=1.5553, test ppl=4.7363, test bleu=23.74
2018-06-26 02:32:34,807 - root - Save best parameters to transformer_en_de_u512/valid_best.params
2018-06-26 02:34:27,720 - root - [Epoch 5 Batch 800/34064] loss=3.1798, ppl=24.0408, throughput=58.92K wps, wc=6519.59K
2018-06-26 02:36:14,875 - root - [Epoch 5 Batch 1600/34064] loss=3.1744, ppl=23.9119, throughput=60.88K wps, wc=6522.97K
2018-06-26 02:38:01,179 - root - [Epoch 5 Batch 2400/34064] loss=3.1838, ppl=24.1390, throughput=61.38K wps, wc=6525.09K
2018-06-26 02:39:48,572 - root - [Epoch 5 Batch 3200/34064] loss=3.1949, ppl=24.4067, throughput=60.71K wps, wc=6519.47K
2018-06-26 02:41:37,090 - root - [Epoch 5 Batch 4000/34064] loss=3.2005, ppl=24.5460, throughput=60.08K wps, wc=6519.41K
2018-06-26 02:43:24,879 - root - [Epoch 5 Batch 4800/34064] loss=3.1810, ppl=24.0709, throughput=60.48K wps, wc=6519.36K
2018-06-26 02:45:13,072 - root - [Epoch 5 Batch 5600/34064] loss=3.1984, ppl=24.4945, throughput=60.25K wps, wc=6518.36K
2018-06-26 02:47:01,984 - root - [Epoch 5 Batch 6400/34064] loss=3.1924, ppl=24.3475, throughput=59.86K wps, wc=6519.26K
2018-06-26 02:48:48,745 - root - [Epoch 5 Batch 7200/34064] loss=3.1914, ppl=24.3226, throughput=61.11K wps, wc=6523.86K
2018-06-26 02:50:36,227 - root - [Epoch 5 Batch 8000/34064] loss=3.1766, ppl=23.9654, throughput=60.73K wps, wc=6526.93K
2018-06-26 02:52:23,996 - root - [Epoch 5 Batch 8800/34064] loss=3.2034, ppl=24.6156, throughput=60.49K wps, wc=6518.35K
2018-06-26 02:54:09,982 - root - [Epoch 5 Batch 9600/34064] loss=3.1657, ppl=23.7058, throughput=61.56K wps, wc=6524.15K
2018-06-26 02:55:56,638 - root - [Epoch 5 Batch 10400/34064] loss=3.1952, ppl=24.4143, throughput=61.17K wps, wc=6524.07K
2018-06-26 02:57:44,634 - root - [Epoch 5 Batch 11200/34064] loss=3.1728, ppl=23.8749, throughput=60.35K wps, wc=6517.52K
2018-06-26 02:59:31,040 - root - [Epoch 5 Batch 12000/34064] loss=3.1910, ppl=24.3134, throughput=61.30K wps, wc=6522.68K
2018-06-26 03:01:17,916 - root - [Epoch 5 Batch 12800/34064] loss=3.2006, ppl=24.5482, throughput=60.99K wps, wc=6518.20K
2018-06-26 03:03:05,999 - root - [Epoch 5 Batch 13600/34064] loss=3.1916, ppl=24.3284, throughput=60.36K wps, wc=6523.32K
2018-06-26 03:04:52,699 - root - [Epoch 5 Batch 14400/34064] loss=3.1794, ppl=24.0314, throughput=61.17K wps, wc=6526.83K
2018-06-26 03:06:40,228 - root - [Epoch 5 Batch 15200/34064] loss=3.1899, ppl=24.2871, throughput=60.68K wps, wc=6524.30K
2018-06-26 03:08:27,847 - root - [Epoch 5 Batch 16000/34064] loss=3.1837, ppl=24.1359, throughput=60.55K wps, wc=6516.48K
2018-06-26 03:10:16,343 - root - [Epoch 5 Batch 16800/34064] loss=3.1817, ppl=24.0880, throughput=60.12K wps, wc=6522.28K
2018-06-26 03:12:03,333 - root - [Epoch 5 Batch 17600/34064] loss=3.1823, ppl=24.1015, throughput=60.99K wps, wc=6525.53K
2018-06-26 03:13:52,354 - root - [Epoch 5 Batch 18400/34064] loss=3.1850, ppl=24.1672, throughput=59.81K wps, wc=6520.20K
2018-06-26 03:15:41,855 - root - [Epoch 5 Batch 19200/34064] loss=3.1940, ppl=24.3846, throughput=59.59K wps, wc=6525.02K
2018-06-26 03:17:28,617 - root - [Epoch 5 Batch 20000/34064] loss=3.1876, ppl=24.2293, throughput=61.09K wps, wc=6522.02K
2018-06-26 03:19:15,871 - root - [Epoch 5 Batch 20800/34064] loss=3.1862, ppl=24.1975, throughput=60.83K wps, wc=6523.76K
2018-06-26 03:21:03,179 - root - [Epoch 5 Batch 21600/34064] loss=3.1833, ppl=24.1265, throughput=60.79K wps, wc=6522.73K
2018-06-26 03:22:51,552 - root - [Epoch 5 Batch 22400/34064] loss=3.1897, ppl=24.2804, throughput=60.19K wps, wc=6522.70K
2018-06-26 03:24:40,339 - root - [Epoch 5 Batch 23200/34064] loss=3.1835, ppl=24.1304, throughput=60.00K wps, wc=6527.66K
2018-06-26 03:26:27,768 - root - [Epoch 5 Batch 24000/34064] loss=3.1941, ppl=24.3880, throughput=60.72K wps, wc=6522.95K
2018-06-26 03:28:14,359 - root - [Epoch 5 Batch 24800/34064] loss=3.1773, ppl=23.9824, throughput=61.17K wps, wc=6520.59K
2018-06-26 03:30:02,371 - root - [Epoch 5 Batch 25600/34064] loss=3.1902, ppl=24.2930, throughput=60.24K wps, wc=6506.24K
2018-06-26 03:31:50,423 - root - [Epoch 5 Batch 26400/34064] loss=3.1789, ppl=24.0194, throughput=60.26K wps, wc=6510.99K
2018-06-26 03:33:37,271 - root - [Epoch 5 Batch 27200/34064] loss=3.1855, ppl=24.1793, throughput=61.02K wps, wc=6520.09K
2018-06-26 03:35:23,934 - root - [Epoch 5 Batch 28000/34064] loss=3.1867, ppl=24.2084, throughput=61.15K wps, wc=6522.88K
2018-06-26 03:37:12,230 - root - [Epoch 5 Batch 28800/34064] loss=3.1814, ppl=24.0798, throughput=60.23K wps, wc=6522.75K
2018-06-26 03:38:59,985 - root - [Epoch 5 Batch 29600/34064] loss=3.1901, ppl=24.2914, throughput=60.50K wps, wc=6519.28K
2018-06-26 03:40:49,803 - root - [Epoch 5 Batch 30400/34064] loss=3.1728, ppl=23.8731, throughput=59.36K wps, wc=6518.83K
2018-06-26 03:42:38,402 - root - [Epoch 5 Batch 31200/34064] loss=3.1723, ppl=23.8612, throughput=60.06K wps, wc=6522.40K
2018-06-26 03:44:27,255 - root - [Epoch 5 Batch 32000/34064] loss=3.1855, ppl=24.1806, throughput=59.90K wps, wc=6520.73K
2018-06-26 03:46:15,458 - root - [Epoch 5 Batch 32800/34064] loss=3.1825, ppl=24.1062, throughput=60.22K wps, wc=6515.52K
2018-06-26 03:48:03,911 - root - [Epoch 5 Batch 33600/34064] loss=3.1783, ppl=24.0069, throughput=60.15K wps, wc=6523.23K
2018-06-26 03:53:42,871 - root - [Epoch 5] valid Loss=1.6575, valid ppl=5.2460, valid bleu=24.37
2018-06-26 03:58:18,346 - root - [Epoch 5] test Loss=1.5064, test ppl=4.5103, test bleu=23.99
2018-06-26 03:58:18,352 - root - Save best parameters to transformer_en_de_u512/valid_best.params
2018-06-26 04:00:13,648 - root - [Epoch 6 Batch 800/34064] loss=3.1482, ppl=23.2944, throughput=57.68K wps, wc=6521.11K
2018-06-26 04:02:01,914 - root - [Epoch 6 Batch 1600/34064] loss=3.1434, ppl=23.1815, throughput=60.21K wps, wc=6518.49K
2018-06-26 04:03:49,157 - root - [Epoch 6 Batch 2400/34064] loss=3.1384, ppl=23.0659, throughput=60.78K wps, wc=6518.44K
2018-06-26 04:05:37,860 - root - [Epoch 6 Batch 3200/34064] loss=3.1542, ppl=23.4348, throughput=59.99K wps, wc=6521.23K
2018-06-26 04:07:25,583 - root - [Epoch 6 Batch 4000/34064] loss=3.1411, ppl=23.1289, throughput=60.52K wps, wc=6519.82K
2018-06-26 04:09:14,403 - root - [Epoch 6 Batch 4800/34064] loss=3.1492, ppl=23.3167, throughput=59.90K wps, wc=6518.81K
2018-06-26 04:11:02,900 - root - [Epoch 6 Batch 5600/34064] loss=3.1572, ppl=23.5053, throughput=60.10K wps, wc=6520.15K
2018-06-26 04:12:49,914 - root - [Epoch 6 Batch 6400/34064] loss=3.1411, ppl=23.1304, throughput=60.99K wps, wc=6526.64K
2018-06-26 04:14:36,019 - root - [Epoch 6 Batch 7200/34064] loss=3.1297, ppl=22.8676, throughput=61.47K wps, wc=6522.03K
2018-06-26 04:16:24,175 - root - [Epoch 6 Batch 8000/34064] loss=3.1511, ppl=23.3607, throughput=60.31K wps, wc=6522.95K
2018-06-26 04:18:11,561 - root - [Epoch 6 Batch 8800/34064] loss=3.1363, ppl=23.0188, throughput=60.76K wps, wc=6524.92K
2018-06-26 04:19:58,310 - root - [Epoch 6 Batch 9600/34064] loss=3.1455, ppl=23.2317, throughput=61.13K wps, wc=6525.12K
2018-06-26 04:21:47,642 - root - [Epoch 6 Batch 10400/34064] loss=3.1634, ppl=23.6503, throughput=59.62K wps, wc=6518.81K
2018-06-26 04:23:36,467 - root - [Epoch 6 Batch 11200/34064] loss=3.1583, ppl=23.5304, throughput=59.93K wps, wc=6521.90K
2018-06-26 04:25:23,669 - root - [Epoch 6 Batch 12000/34064] loss=3.1520, ppl=23.3818, throughput=60.83K wps, wc=6521.53K
2018-06-26 04:27:11,270 - root - [Epoch 6 Batch 12800/34064] loss=3.1408, ppl=23.1228, throughput=60.61K wps, wc=6521.30K
2018-06-26 04:28:58,054 - root - [Epoch 6 Batch 13600/34064] loss=3.1475, ppl=23.2786, throughput=61.12K wps, wc=6526.40K
2018-06-26 04:30:46,484 - root - [Epoch 6 Batch 14400/34064] loss=3.1482, ppl=23.2935, throughput=60.20K wps, wc=6527.94K
2018-06-26 04:32:34,057 - root - [Epoch 6 Batch 15200/34064] loss=3.1716, ppl=23.8450, throughput=60.67K wps, wc=6526.63K
2018-06-26 04:34:22,465 - root - [Epoch 6 Batch 16000/34064] loss=3.1559, ppl=23.4737, throughput=60.15K wps, wc=6521.04K
2018-06-26 04:36:10,796 - root - [Epoch 6 Batch 16800/34064] loss=3.1708, ppl=23.8265, throughput=60.15K wps, wc=6516.20K
2018-06-26 04:37:57,239 - root - [Epoch 6 Batch 17600/34064] loss=3.1269, ppl=22.8037, throughput=61.31K wps, wc=6525.84K
2018-06-26 04:39:45,341 - root - [Epoch 6 Batch 18400/34064] loss=3.1594, ppl=23.5554, throughput=60.37K wps, wc=6526.45K
2018-06-26 04:41:31,970 - root - [Epoch 6 Batch 19200/34064] loss=3.1344, ppl=22.9741, throughput=61.13K wps, wc=6518.51K
2018-06-26 04:43:20,314 - root - [Epoch 6 Batch 20000/34064] loss=3.1505, ppl=23.3469, throughput=60.18K wps, wc=6520.25K
2018-06-26 04:45:07,402 - root - [Epoch 6 Batch 20800/34064] loss=3.1374, ppl=23.0431, throughput=60.92K wps, wc=6523.72K
2018-06-26 04:46:54,240 - root - [Epoch 6 Batch 21600/34064] loss=3.1457, ppl=23.2352, throughput=61.06K wps, wc=6523.37K
2018-06-26 04:48:42,555 - root - [Epoch 6 Batch 22400/34064] loss=3.1685, ppl=23.7729, throughput=60.13K wps, wc=6513.20K
2018-06-26 04:50:31,366 - root - [Epoch 6 Batch 23200/34064] loss=3.1519, ppl=23.3802, throughput=59.92K wps, wc=6519.96K
2018-06-26 04:52:19,940 - root - [Epoch 6 Batch 24000/34064] loss=3.1532, ppl=23.4101, throughput=60.07K wps, wc=6521.59K
2018-06-26 04:54:07,289 - root - [Epoch 6 Batch 24800/34064] loss=3.1436, ppl=23.1879, throughput=60.76K wps, wc=6522.78K
2018-06-26 04:55:55,833 - root - [Epoch 6 Batch 25600/34064] loss=3.1558, ppl=23.4718, throughput=59.98K wps, wc=6510.85K
2018-06-26 04:57:43,444 - root - [Epoch 6 Batch 26400/34064] loss=3.1523, ppl=23.3904, throughput=60.54K wps, wc=6514.61K
2018-06-26 04:59:31,461 - root - [Epoch 6 Batch 27200/34064] loss=3.1453, ppl=23.2260, throughput=60.32K wps, wc=6515.40K
2018-06-26 05:01:18,732 - root - [Epoch 6 Batch 28000/34064] loss=3.1514, ppl=23.3691, throughput=60.74K wps, wc=6515.92K
2018-06-26 05:03:05,658 - root - [Epoch 6 Batch 28800/34064] loss=3.1445, ppl=23.2079, throughput=60.97K wps, wc=6519.04K
2018-06-26 05:04:51,655 - root - [Epoch 6 Batch 29600/34064] loss=3.1317, ppl=22.9137, throughput=61.53K wps, wc=6522.35K
2018-06-26 05:06:37,738 - root - [Epoch 6 Batch 30400/34064] loss=3.1229, ppl=22.7127, throughput=61.51K wps, wc=6524.81K
2018-06-26 05:08:24,791 - root - [Epoch 6 Batch 31200/34064] loss=3.1302, ppl=22.8776, throughput=60.95K wps, wc=6524.54K
2018-06-26 05:10:11,261 - root - [Epoch 6 Batch 32000/34064] loss=3.1434, ppl=23.1818, throughput=61.27K wps, wc=6523.79K
2018-06-26 05:11:59,312 - root - [Epoch 6 Batch 32800/34064] loss=3.1417, ppl=23.1423, throughput=60.34K wps, wc=6520.16K
2018-06-26 05:13:45,863 - root - [Epoch 6 Batch 33600/34064] loss=3.1277, ppl=22.8203, throughput=61.20K wps, wc=6520.54K
2018-06-26 05:19:19,303 - root - [Epoch 6] valid Loss=1.6335, valid ppl=5.1218, valid bleu=24.64
2018-06-26 05:23:54,166 - root - [Epoch 6] test Loss=1.4819, test ppl=4.4012, test bleu=24.40
2018-06-26 05:23:54,172 - root - Save best parameters to transformer_en_de_u512/valid_best.params
2018-06-26 05:25:49,917 - root - [Epoch 7 Batch 800/34064] loss=3.1070, ppl=22.3532, throughput=57.40K wps, wc=6516.39K
2018-06-26 05:27:37,807 - root - [Epoch 7 Batch 1600/34064] loss=3.1150, ppl=22.5338, throughput=60.46K wps, wc=6523.24K
2018-06-26 05:29:24,536 - root - [Epoch 7 Batch 2400/34064] loss=3.0973, ppl=22.1371, throughput=61.12K wps, wc=6523.06K
2018-06-26 05:31:11,999 - root - [Epoch 7 Batch 3200/34064] loss=3.1070, ppl=22.3538, throughput=60.67K wps, wc=6519.53K
2018-06-26 05:33:01,816 - root - [Epoch 7 Batch 4000/34064] loss=3.1234, ppl=22.7229, throughput=59.29K wps, wc=6510.80K
2018-06-26 05:34:49,224 - root - [Epoch 7 Batch 4800/34064] loss=3.1237, ppl=22.7297, throughput=60.74K wps, wc=6523.47K
2018-06-26 05:36:36,574 - root - [Epoch 7 Batch 5600/34064] loss=3.1126, ppl=22.4789, throughput=60.71K wps, wc=6517.45K
2018-06-26 05:38:24,596 - root - [Epoch 7 Batch 6400/34064] loss=3.1074, ppl=22.3636, throughput=60.38K wps, wc=6521.91K
2018-06-26 05:40:12,209 - root - [Epoch 7 Batch 7200/34064] loss=3.1130, ppl=22.4880, throughput=60.64K wps, wc=6525.49K
2018-06-26 05:41:59,460 - root - [Epoch 7 Batch 8000/34064] loss=3.1081, ppl=22.3789, throughput=60.84K wps, wc=6525.37K
2018-06-26 05:43:45,616 - root - [Epoch 7 Batch 8800/34064] loss=3.1197, ppl=22.6388, throughput=61.45K wps, wc=6523.46K
2018-06-26 05:45:34,010 - root - [Epoch 7 Batch 9600/34064] loss=3.1228, ppl=22.7101, throughput=60.19K wps, wc=6524.39K
2018-06-26 05:47:20,765 - root - [Epoch 7 Batch 10400/34064] loss=3.1140, ppl=22.5117, throughput=61.11K wps, wc=6523.65K
2018-06-26 05:49:06,332 - root - [Epoch 7 Batch 11200/34064] loss=3.1070, ppl=22.3535, throughput=61.83K wps, wc=6526.96K
2018-06-26 05:50:54,602 - root - [Epoch 7 Batch 12000/34064] loss=3.1185, ppl=22.6125, throughput=60.23K wps, wc=6521.52K
2018-06-26 05:52:42,115 - root - [Epoch 7 Batch 12800/34064] loss=3.1137, ppl=22.5037, throughput=60.70K wps, wc=6526.32K
2018-06-26 05:54:29,416 - root - [Epoch 7 Batch 13600/34064] loss=3.1316, ppl=22.9109, throughput=60.77K wps, wc=6520.67K
2018-06-26 05:56:16,311 - root - [Epoch 7 Batch 14400/34064] loss=3.1175, ppl=22.5894, throughput=61.00K wps, wc=6520.21K
2018-06-26 05:58:03,903 - root - [Epoch 7 Batch 15200/34064] loss=3.1396, ppl=23.0958, throughput=60.53K wps, wc=6512.18K
2018-06-26 05:59:52,440 - root - [Epoch 7 Batch 16000/34064] loss=3.1330, ppl=22.9422, throughput=60.04K wps, wc=6516.84K
2018-06-26 06:01:40,914 - root - [Epoch 7 Batch 16800/34064] loss=3.1174, ppl=22.5872, throughput=60.10K wps, wc=6519.51K
2018-06-26 06:03:27,671 - root - [Epoch 7 Batch 17600/34064] loss=3.1103, ppl=22.4271, throughput=61.14K wps, wc=6526.59K
2018-06-26 06:05:13,853 - root - [Epoch 7 Batch 18400/34064] loss=3.0981, ppl=22.1549, throughput=61.43K wps, wc=6522.91K
2018-06-26 06:07:01,756 - root - [Epoch 7 Batch 19200/34064] loss=3.1098, ppl=22.4173, throughput=60.46K wps, wc=6523.50K
2018-06-26 06:08:50,091 - root - [Epoch 7 Batch 20000/34064] loss=3.1230, ppl=22.7144, throughput=60.20K wps, wc=6521.76K
2018-06-26 06:10:37,049 - root - [Epoch 7 Batch 20800/34064] loss=3.1089, ppl=22.3973, throughput=60.97K wps, wc=6520.88K
2018-06-26 06:12:25,449 - root - [Epoch 7 Batch 21600/34064] loss=3.1276, ppl=22.8191, throughput=60.15K wps, wc=6520.13K
2018-06-26 06:14:13,254 - root - [Epoch 7 Batch 22400/34064] loss=3.1143, ppl=22.5186, throughput=60.48K wps, wc=6520.08K
2018-06-26 06:16:01,653 - root - [Epoch 7 Batch 23200/34064] loss=3.1025, ppl=22.2543, throughput=60.17K wps, wc=6522.66K
2018-06-26 06:17:48,990 - root - [Epoch 7 Batch 24000/34064] loss=3.1103, ppl=22.4286, throughput=60.74K wps, wc=6519.31K
2018-06-26 06:19:36,582 - root - [Epoch 7 Batch 24800/34064] loss=3.1195, ppl=22.6345, throughput=60.62K wps, wc=6522.26K
2018-06-26 06:21:25,308 - root - [Epoch 7 Batch 25600/34064] loss=3.1381, ppl=23.0602, throughput=59.82K wps, wc=6503.81K
2018-06-26 06:23:13,612 - root - [Epoch 7 Batch 26400/34064] loss=3.1288, ppl=22.8465, throughput=60.27K wps, wc=6527.27K
2018-06-26 06:25:01,652 - root - [Epoch 7 Batch 27200/34064] loss=3.1191, ppl=22.6266, throughput=60.40K wps, wc=6525.84K
2018-06-26 06:26:50,365 - root - [Epoch 7 Batch 28000/34064] loss=3.1240, ppl=22.7375, throughput=59.99K wps, wc=6522.06K
2018-06-26 06:28:36,363 - root - [Epoch 7 Batch 28800/34064] loss=3.0996, ppl=22.1892, throughput=61.57K wps, wc=6526.02K
2018-06-26 06:30:25,254 - root - [Epoch 7 Batch 29600/34064] loss=3.1208, ppl=22.6647, throughput=59.90K wps, wc=6522.81K
2018-06-26 06:32:13,002 - root - [Epoch 7 Batch 30400/34064] loss=3.1114, ppl=22.4532, throughput=60.50K wps, wc=6518.96K
2018-06-26 06:34:01,248 - root - [Epoch 7 Batch 31200/34064] loss=3.1347, ppl=22.9809, throughput=60.15K wps, wc=6510.57K
2018-06-26 06:35:48,761 - root - [Epoch 7 Batch 32000/34064] loss=3.1146, ppl=22.5249, throughput=60.69K wps, wc=6524.35K
2018-06-26 06:37:36,812 - root - [Epoch 7 Batch 32800/34064] loss=3.1281, ppl=22.8309, throughput=60.33K wps, wc=6519.06K
2018-06-26 06:39:23,675 - root - [Epoch 7 Batch 33600/34064] loss=3.1016, ppl=22.2341, throughput=61.07K wps, wc=6526.24K
2018-06-26 06:45:02,178 - root - [Epoch 7] valid Loss=1.6135, valid ppl=5.0202, valid bleu=24.95
2018-06-26 06:49:38,835 - root - [Epoch 7] test Loss=1.4517, test ppl=4.2703, test bleu=24.63
2018-06-26 06:49:38,841 - root - Save best parameters to transformer_en_de_u512/valid_best.params
2018-06-26 06:51:33,371 - root - [Epoch 8 Batch 800/34064] loss=3.0674, ppl=21.4855, throughput=57.97K wps, wc=6511.01K
2018-06-26 06:53:21,032 - root - [Epoch 8 Batch 1600/34064] loss=3.0741, ppl=21.6313, throughput=60.62K wps, wc=6526.74K
2018-06-26 06:55:09,607 - root - [Epoch 8 Batch 2400/34064] loss=3.0741, ppl=21.6312, throughput=60.07K wps, wc=6522.59K
2018-06-26 06:56:56,490 - root - [Epoch 8 Batch 3200/34064] loss=3.0787, ppl=21.7311, throughput=61.01K wps, wc=6521.25K
2018-06-26 06:58:43,561 - root - [Epoch 8 Batch 4000/34064] loss=3.0867, ppl=21.9050, throughput=60.92K wps, wc=6522.63K
2018-06-26 07:00:31,482 - root - [Epoch 8 Batch 4800/34064] loss=3.0898, ppl=21.9721, throughput=60.41K wps, wc=6519.94K
2018-06-26 07:02:21,366 - root - [Epoch 8 Batch 5600/34064] loss=3.1098, ppl=22.4156, throughput=59.36K wps, wc=6523.13K
2018-06-26 07:04:09,899 - root - [Epoch 8 Batch 6400/34064] loss=3.0984, ppl=22.1617, throughput=60.10K wps, wc=6522.22K
2018-06-26 07:05:56,834 - root - [Epoch 8 Batch 7200/34064] loss=3.0918, ppl=22.0162, throughput=61.00K wps, wc=6522.80K
2018-06-26 07:07:43,676 - root - [Epoch 8 Batch 8000/34064] loss=3.0937, ppl=22.0579, throughput=61.05K wps, wc=6522.21K
2018-06-26 07:09:31,180 - root - [Epoch 8 Batch 8800/34064] loss=3.0933, ppl=22.0492, throughput=60.62K wps, wc=6516.49K
2018-06-26 07:11:18,296 - root - [Epoch 8 Batch 9600/34064] loss=3.1040, ppl=22.2875, throughput=60.86K wps, wc=6518.81K
2018-06-26 07:13:05,752 - root - [Epoch 8 Batch 10400/34064] loss=3.0869, ppl=21.9098, throughput=60.72K wps, wc=6524.20K
2018-06-26 07:14:51,361 - root - [Epoch 8 Batch 11200/34064] loss=3.0745, ppl=21.6401, throughput=61.78K wps, wc=6524.39K
2018-06-26 07:16:39,374 - root - [Epoch 8 Batch 12000/34064] loss=3.0840, ppl=21.8458, throughput=60.33K wps, wc=6516.82K
2018-06-26 07:18:26,883 - root - [Epoch 8 Batch 12800/34064] loss=3.1032, ppl=22.2701, throughput=60.66K wps, wc=6521.45K
2018-06-26 07:20:14,440 - root - [Epoch 8 Batch 13600/34064] loss=3.0895, ppl=21.9667, throughput=60.63K wps, wc=6520.90K
2018-06-26 07:22:01,638 - root - [Epoch 8 Batch 14400/34064] loss=3.0966, ppl=22.1233, throughput=60.78K wps, wc=6515.88K
2018-06-26 07:23:50,861 - root - [Epoch 8 Batch 15200/34064] loss=3.1150, ppl=22.5340, throughput=59.69K wps, wc=6519.60K
2018-06-26 07:25:38,552 - root - [Epoch 8 Batch 16000/34064] loss=3.0960, ppl=22.1082, throughput=60.59K wps, wc=6524.65K
2018-06-26 07:27:25,275 - root - [Epoch 8 Batch 16800/34064] loss=3.0942, ppl=22.0706, throughput=61.14K wps, wc=6524.44K
2018-06-26 07:29:12,634 - root - [Epoch 8 Batch 17600/34064] loss=3.1006, ppl=22.2111, throughput=60.62K wps, wc=6508.39K
2018-06-26 07:31:00,671 - root - [Epoch 8 Batch 18400/34064] loss=3.0903, ppl=21.9839, throughput=60.40K wps, wc=6525.62K
2018-06-26 07:32:49,130 - root - [Epoch 8 Batch 19200/34064] loss=3.0974, ppl=22.1397, throughput=60.17K wps, wc=6525.80K
2018-06-26 07:34:37,203 - root - [Epoch 8 Batch 20000/34064] loss=3.0989, ppl=22.1735, throughput=60.38K wps, wc=6525.32K
2018-06-26 07:36:25,437 - root - [Epoch 8 Batch 20800/34064] loss=3.0986, ppl=22.1664, throughput=60.19K wps, wc=6515.07K
2018-06-26 07:38:13,359 - root - [Epoch 8 Batch 21600/34064] loss=3.0930, ppl=22.0432, throughput=60.47K wps, wc=6525.82K
2018-06-26 07:40:00,643 - root - [Epoch 8 Batch 22400/34064] loss=3.0806, ppl=21.7722, throughput=60.78K wps, wc=6520.87K
2018-06-26 07:41:47,953 - root - [Epoch 8 Batch 23200/34064] loss=3.1018, ppl=22.2377, throughput=60.72K wps, wc=6515.74K
2018-06-26 07:43:35,259 - root - [Epoch 8 Batch 24000/34064] loss=3.1040, ppl=22.2877, throughput=60.78K wps, wc=6522.03K
2018-06-26 07:45:23,556 - root - [Epoch 8 Batch 24800/34064] loss=3.0942, ppl=22.0691, throughput=60.19K wps, wc=6517.97K
2018-06-26 07:47:10,266 - root - [Epoch 8 Batch 25600/34064] loss=3.1074, ppl=22.3618, throughput=61.13K wps, wc=6523.27K
2018-06-26 07:48:57,530 - root - [Epoch 8 Batch 26400/34064] loss=3.0791, ppl=21.7390, throughput=60.76K wps, wc=6517.82K
2018-06-26 07:50:45,786 - root - [Epoch 8 Batch 27200/34064] loss=3.1109, ppl=22.4408, throughput=60.24K wps, wc=6521.37K
2018-06-26 07:52:33,468 - root - [Epoch 8 Batch 28000/34064] loss=3.0816, ppl=21.7931, throughput=60.57K wps, wc=6522.31K
2018-06-26 07:54:20,280 - root - [Epoch 8 Batch 28800/34064] loss=3.0853, ppl=21.8746, throughput=61.09K wps, wc=6524.99K
2018-06-26 07:56:07,598 - root - [Epoch 8 Batch 29600/34064] loss=3.0800, ppl=21.7590, throughput=60.68K wps, wc=6511.67K
2018-06-26 07:57:53,842 - root - [Epoch 8 Batch 30400/34064] loss=3.0990, ppl=22.1753, throughput=61.37K wps, wc=6519.67K
2018-06-26 07:59:41,641 - root - [Epoch 8 Batch 31200/34064] loss=3.0908, ppl=21.9955, throughput=60.54K wps, wc=6526.32K
2018-06-26 08:01:28,785 - root - [Epoch 8 Batch 32000/34064] loss=3.0985, ppl=22.1639, throughput=60.89K wps, wc=6523.59K
2018-06-26 08:03:15,565 - root - [Epoch 8 Batch 32800/34064] loss=3.0940, ppl=22.0649, throughput=61.11K wps, wc=6524.76K
2018-06-26 08:05:02,036 - root - [Epoch 8 Batch 33600/34064] loss=3.0892, ppl=21.9600, throughput=61.31K wps, wc=6528.07K
2018-06-26 08:10:40,544 - root - [Epoch 8] valid Loss=1.5918, valid ppl=4.9124, valid bleu=24.90
2018-06-26 08:15:18,612 - root - [Epoch 8] test Loss=1.4299, test ppl=4.1784, test bleu=24.95
2018-06-26 08:17:10,994 - root - [Epoch 9 Batch 800/34064] loss=3.0525, ppl=21.1686, throughput=58.26K wps, wc=6520.26K
2018-06-26 08:18:58,675 - root - [Epoch 9 Batch 1600/34064] loss=3.0574, ppl=21.2716, throughput=60.61K wps, wc=6526.02K
2018-06-26 08:20:46,682 - root - [Epoch 9 Batch 2400/34064] loss=3.0709, ppl=21.5615, throughput=60.38K wps, wc=6521.74K
2018-06-26 08:22:34,212 - root - [Epoch 9 Batch 3200/34064] loss=3.0731, ppl=21.6080, throughput=60.54K wps, wc=6510.29K
2018-06-26 08:24:23,295 - root - [Epoch 9 Batch 4000/34064] loss=3.0847, ppl=21.8619, throughput=59.78K wps, wc=6521.37K
2018-06-26 08:26:10,574 - root - [Epoch 9 Batch 4800/34064] loss=3.0699, ppl=21.5397, throughput=60.85K wps, wc=6527.36K
2018-06-26 08:27:56,563 - root - [Epoch 9 Batch 5600/34064] loss=3.0376, ppl=20.8546, throughput=61.48K wps, wc=6515.85K
2018-06-26 08:29:44,636 - root - [Epoch 9 Batch 6400/34064] loss=3.0700, ppl=21.5411, throughput=60.34K wps, wc=6521.59K
2018-06-26 08:31:32,791 - root - [Epoch 9 Batch 7200/34064] loss=3.0806, ppl=21.7714, throughput=60.34K wps, wc=6525.82K
2018-06-26 08:33:20,964 - root - [Epoch 9 Batch 8000/34064] loss=3.0730, ppl=21.6066, throughput=60.29K wps, wc=6521.68K
2018-06-26 08:35:07,567 - root - [Epoch 9 Batch 8800/34064] loss=3.0610, ppl=21.3479, throughput=61.09K wps, wc=6512.56K
2018-06-26 08:36:54,547 - root - [Epoch 9 Batch 9600/34064] loss=3.0558, ppl=21.2374, throughput=60.95K wps, wc=6520.23K
2018-06-26 08:38:41,286 - root - [Epoch 9 Batch 10400/34064] loss=3.0709, ppl=21.5622, throughput=61.10K wps, wc=6521.69K
2018-06-26 08:40:28,818 - root - [Epoch 9 Batch 11200/34064] loss=3.0763, ppl=21.6778, throughput=60.64K wps, wc=6520.62K
2018-06-26 08:42:15,757 - root - [Epoch 9 Batch 12000/34064] loss=3.0799, ppl=21.7571, throughput=60.94K wps, wc=6516.39K
2018-06-26 08:44:02,862 - root - [Epoch 9 Batch 12800/34064] loss=3.0627, ppl=21.3846, throughput=60.91K wps, wc=6523.68K
2018-06-26 08:45:51,794 - root - [Epoch 9 Batch 13600/34064] loss=3.0699, ppl=21.5404, throughput=59.89K wps, wc=6523.68K
2018-06-26 08:47:39,182 - root - [Epoch 9 Batch 14400/34064] loss=3.0843, ppl=21.8531, throughput=60.73K wps, wc=6521.18K
2018-06-26 08:49:27,115 - root - [Epoch 9 Batch 15200/34064] loss=3.0738, ppl=21.6233, throughput=60.37K wps, wc=6516.23K
2018-06-26 08:51:13,040 - root - [Epoch 9 Batch 16000/34064] loss=3.0709, ppl=21.5617, throughput=61.57K wps, wc=6522.04K
2018-06-26 08:53:01,168 - root - [Epoch 9 Batch 16800/34064] loss=3.0687, ppl=21.5146, throughput=60.34K wps, wc=6524.45K
2018-06-26 08:54:49,904 - root - [Epoch 9 Batch 17600/34064] loss=3.0799, ppl=21.7565, throughput=59.98K wps, wc=6522.43K
2018-06-26 08:56:37,588 - root - [Epoch 9 Batch 18400/34064] loss=3.0697, ppl=21.5359, throughput=60.60K wps, wc=6525.33K
2018-06-26 08:58:26,254 - root - [Epoch 9 Batch 19200/34064] loss=3.0697, ppl=21.5350, throughput=60.02K wps, wc=6522.61K
2018-06-26 09:00:13,943 - root - [Epoch 9 Batch 20000/34064] loss=3.0667, ppl=21.4718, throughput=60.57K wps, wc=6522.72K
2018-06-26 09:02:02,457 - root - [Epoch 9 Batch 20800/34064] loss=3.0816, ppl=21.7923, throughput=60.06K wps, wc=6517.72K
2018-06-26 09:03:50,232 - root - [Epoch 9 Batch 21600/34064] loss=3.0742, ppl=21.6324, throughput=60.55K wps, wc=6525.25K
2018-06-26 09:05:38,770 - root - [Epoch 9 Batch 22400/34064] loss=3.0766, ppl=21.6844, throughput=60.10K wps, wc=6522.62K
2018-06-26 09:07:26,471 - root - [Epoch 9 Batch 23200/34064] loss=3.0687, ppl=21.5133, throughput=60.58K wps, wc=6525.03K
2018-06-26 09:09:14,754 - root - [Epoch 9 Batch 24000/34064] loss=3.0800, ppl=21.7586, throughput=60.26K wps, wc=6525.14K
2018-06-26 09:11:03,774 - root - [Epoch 9 Batch 24800/34064] loss=3.0890, ppl=21.9548, throughput=59.78K wps, wc=6516.94K
2018-06-26 09:12:51,685 - root - [Epoch 9 Batch 25600/34064] loss=3.0738, ppl=21.6241, throughput=60.41K wps, wc=6518.78K
2018-06-26 09:14:40,610 - root - [Epoch 9 Batch 26400/34064] loss=3.0966, ppl=22.1216, throughput=59.87K wps, wc=6520.97K
2018-06-26 09:16:28,942 - root - [Epoch 9 Batch 27200/34064] loss=3.0716, ppl=21.5767, throughput=60.19K wps, wc=6520.65K
2018-06-26 09:18:17,088 - root - [Epoch 9 Batch 28000/34064] loss=3.0843, ppl=21.8522, throughput=60.32K wps, wc=6523.43K
2018-06-26 09:20:06,012 - root - [Epoch 9 Batch 28800/34064] loss=3.0872, ppl=21.9151, throughput=59.88K wps, wc=6522.63K
2018-06-26 09:21:53,668 - root - [Epoch 9 Batch 29600/34064] loss=3.0730, ppl=21.6062, throughput=60.62K wps, wc=6526.18K
2018-06-26 09:23:41,463 - root - [Epoch 9 Batch 30400/34064] loss=3.0715, ppl=21.5738, throughput=60.53K wps, wc=6524.44K
2018-06-26 09:25:28,427 - root - [Epoch 9 Batch 31200/34064] loss=3.0709, ppl=21.5603, throughput=60.84K wps, wc=6507.54K
2018-06-26 09:27:15,693 - root - [Epoch 9 Batch 32000/34064] loss=3.0731, ppl=21.6093, throughput=60.79K wps, wc=6520.52K
2018-06-26 09:29:03,472 - root - [Epoch 9 Batch 32800/34064] loss=3.0677, ppl=21.4922, throughput=60.50K wps, wc=6520.68K
2018-06-26 09:30:50,536 - root - [Epoch 9 Batch 33600/34064] loss=3.0752, ppl=21.6542, throughput=60.91K wps, wc=6521.06K
2018-06-26 09:36:25,273 - root - [Epoch 9] valid Loss=1.5766, valid ppl=4.8387, valid bleu=24.93
2018-06-26 09:41:00,731 - root - [Epoch 9] test Loss=1.4128, test ppl=4.1076, test bleu=25.12
2018-06-26 09:42:52,409 - root - [Epoch 10 Batch 800/34064] loss=3.0374, ppl=20.8518, throughput=58.71K wps, wc=6530.16K
2018-06-26 09:44:39,002 - root - [Epoch 10 Batch 1600/34064] loss=3.0368, ppl=20.8380, throughput=61.21K wps, wc=6524.36K
2018-06-26 09:46:27,147 - root - [Epoch 10 Batch 2400/34064] loss=3.0333, ppl=20.7662, throughput=60.31K wps, wc=6522.62K
2018-06-26 09:48:14,337 - root - [Epoch 10 Batch 3200/34064] loss=3.0550, ppl=21.2222, throughput=60.82K wps, wc=6519.09K
2018-06-26 09:50:02,333 - root - [Epoch 10 Batch 4000/34064] loss=3.0517, ppl=21.1520, throughput=60.29K wps, wc=6511.22K
2018-06-26 09:51:50,112 - root - [Epoch 10 Batch 4800/34064] loss=3.0318, ppl=20.7337, throughput=60.57K wps, wc=6528.01K
2018-06-26 09:53:37,854 - root - [Epoch 10 Batch 5600/34064] loss=3.0456, ppl=21.0224, throughput=60.56K wps, wc=6524.97K
2018-06-26 09:55:25,936 - root - [Epoch 10 Batch 6400/34064] loss=3.0694, ppl=21.5281, throughput=60.30K wps, wc=6517.71K
2018-06-26 09:57:12,414 - root - [Epoch 10 Batch 7200/34064] loss=3.0347, ppl=20.7941, throughput=61.33K wps, wc=6530.34K
2018-06-26 09:58:59,780 - root - [Epoch 10 Batch 8000/34064] loss=3.0593, ppl=21.3118, throughput=60.74K wps, wc=6521.70K
2018-06-26 10:00:48,677 - root - [Epoch 10 Batch 8800/34064] loss=3.0582, ppl=21.2890, throughput=59.92K wps, wc=6524.83K
2018-06-26 10:02:36,023 - root - [Epoch 10 Batch 9600/34064] loss=3.0471, ppl=21.0552, throughput=60.79K wps, wc=6525.48K
2018-06-26 10:04:23,754 - root - [Epoch 10 Batch 10400/34064] loss=3.0722, ppl=21.5896, throughput=60.45K wps, wc=6512.49K
2018-06-26 10:06:11,096 - root - [Epoch 10 Batch 11200/34064] loss=3.0532, ppl=21.1833, throughput=60.77K wps, wc=6523.47K
2018-06-26 10:07:58,828 - root - [Epoch 10 Batch 12000/34064] loss=3.0701, ppl=21.5437, throughput=60.51K wps, wc=6518.46K
2018-06-26 10:09:46,208 - root - [Epoch 10 Batch 12800/34064] loss=3.0607, ppl=21.3417, throughput=60.70K wps, wc=6517.98K
2018-06-26 10:11:33,400 - root - [Epoch 10 Batch 13600/34064] loss=3.0614, ppl=21.3568, throughput=60.82K wps, wc=6519.20K
2018-06-26 10:13:20,809 - root - [Epoch 10 Batch 14400/34064] loss=3.0539, ppl=21.1969, throughput=60.72K wps, wc=6522.02K
2018-06-26 10:15:09,475 - root - [Epoch 10 Batch 15200/34064] loss=3.0654, ppl=21.4432, throughput=60.03K wps, wc=6523.59K
2018-06-26 10:16:56,547 - root - [Epoch 10 Batch 16000/34064] loss=3.0521, ppl=21.1599, throughput=60.91K wps, wc=6522.09K
2018-06-26 10:18:44,999 - root - [Epoch 10 Batch 16800/34064] loss=3.0793, ppl=21.7433, throughput=60.01K wps, wc=6508.12K
2018-06-26 10:20:31,291 - root - [Epoch 10 Batch 17600/34064] loss=3.0397, ppl=20.8980, throughput=61.32K wps, wc=6517.91K
2018-06-26 10:22:19,451 - root - [Epoch 10 Batch 18400/34064] loss=3.0517, ppl=21.1507, throughput=60.30K wps, wc=6521.74K
2018-06-26 10:24:06,474 - root - [Epoch 10 Batch 19200/34064] loss=3.0528, ppl=21.1748, throughput=60.91K wps, wc=6518.53K
2018-06-26 10:25:54,648 - root - [Epoch 10 Batch 20000/34064] loss=3.0544, ppl=21.2079, throughput=60.33K wps, wc=6525.90K
2018-06-26 10:27:42,250 - root - [Epoch 10 Batch 20800/34064] loss=3.0536, ppl=21.1907, throughput=60.63K wps, wc=6524.13K
2018-06-26 10:29:32,217 - root - [Epoch 10 Batch 21600/34064] loss=3.0631, ppl=21.3947, throughput=59.27K wps, wc=6517.56K
2018-06-26 10:31:20,416 - root - [Epoch 10 Batch 22400/34064] loss=3.0590, ppl=21.3058, throughput=60.29K wps, wc=6523.47K
2018-06-26 10:33:08,455 - root - [Epoch 10 Batch 23200/34064] loss=3.0577, ppl=21.2789, throughput=60.32K wps, wc=6517.37K
2018-06-26 10:34:57,102 - root - [Epoch 10 Batch 24000/34064] loss=3.0545, ppl=21.2106, throughput=60.05K wps, wc=6524.64K
2018-06-26 10:36:43,753 - root - [Epoch 10 Batch 24800/34064] loss=3.0615, ppl=21.3588, throughput=61.16K wps, wc=6522.31K
2018-06-26 10:38:29,866 - root - [Epoch 10 Batch 25600/34064] loss=3.0497, ppl=21.1089, throughput=61.42K wps, wc=6517.67K
2018-06-26 10:40:17,695 - root - [Epoch 10 Batch 26400/34064] loss=3.0688, ppl=21.5159, throughput=60.47K wps, wc=6520.13K
2018-06-26 10:42:04,556 - root - [Epoch 10 Batch 27200/34064] loss=3.0601, ppl=21.3291, throughput=61.06K wps, wc=6524.91K
2018-06-26 10:43:52,964 - root - [Epoch 10 Batch 28000/34064] loss=3.0593, ppl=21.3116, throughput=60.08K wps, wc=6513.44K
2018-06-26 10:45:40,850 - root - [Epoch 10 Batch 28800/34064] loss=3.0542, ppl=21.2052, throughput=60.44K wps, wc=6520.56K
2018-06-26 10:47:28,413 - root - [Epoch 10 Batch 29600/34064] loss=3.0611, ppl=21.3521, throughput=60.63K wps, wc=6521.99K
2018-06-26 10:49:15,215 - root - [Epoch 10 Batch 30400/34064] loss=3.0510, ppl=21.1359, throughput=61.08K wps, wc=6523.20K
2018-06-26 10:51:02,949 - root - [Epoch 10 Batch 31200/34064] loss=3.0613, ppl=21.3561, throughput=60.56K wps, wc=6524.65K
2018-06-26 10:52:49,944 - root - [Epoch 10 Batch 32000/34064] loss=3.0519, ppl=21.1562, throughput=60.99K wps, wc=6525.91K
2018-06-26 10:54:36,267 - root - [Epoch 10 Batch 32800/34064] loss=3.0673, ppl=21.4841, throughput=61.34K wps, wc=6522.14K
2018-06-26 10:56:24,102 - root - [Epoch 10 Batch 33600/34064] loss=3.0570, ppl=21.2631, throughput=60.39K wps, wc=6512.03K
2018-06-26 11:02:01,724 - root - [Epoch 10] valid Loss=1.5740, valid ppl=4.8258, valid bleu=25.14
2018-06-26 11:06:35,575 - root - [Epoch 10] test Loss=1.4071, test ppl=4.0839, test bleu=25.32
2018-06-26 11:06:35,581 - root - Save best parameters to transformer_en_de_u512/valid_best.params
2018-06-26 11:08:29,964 - root - [Epoch 11 Batch 800/34064] loss=3.0417, ppl=20.9403, throughput=58.12K wps, wc=6518.86K
2018-06-26 11:10:17,191 - root - [Epoch 11 Batch 1600/34064] loss=3.0182, ppl=20.4542, throughput=60.75K wps, wc=6514.06K
2018-06-26 11:12:04,398 - root - [Epoch 11 Batch 2400/34064] loss=3.0278, ppl=20.6518, throughput=60.85K wps, wc=6523.30K
2018-06-26 11:13:52,263 - root - [Epoch 11 Batch 3200/34064] loss=3.0313, ppl=20.7242, throughput=60.36K wps, wc=6510.24K
2018-06-26 11:15:40,402 - root - [Epoch 11 Batch 4000/34064] loss=3.0303, ppl=20.7032, throughput=60.28K wps, wc=6519.07K
2018-06-26 11:17:30,147 - root - [Epoch 11 Batch 4800/34064] loss=3.0305, ppl=20.7083, throughput=59.34K wps, wc=6511.86K
2018-06-26 11:19:16,940 - root - [Epoch 11 Batch 5600/34064] loss=3.0373, ppl=20.8498, throughput=61.09K wps, wc=6523.57K
2018-06-26 11:21:03,981 - root - [Epoch 11 Batch 6400/34064] loss=3.0295, ppl=20.6861, throughput=60.93K wps, wc=6521.81K
2018-06-26 11:22:51,033 - root - [Epoch 11 Batch 7200/34064] loss=3.0430, ppl=20.9680, throughput=60.97K wps, wc=6526.54K
2018-06-26 11:24:38,355 - root - [Epoch 11 Batch 8000/34064] loss=3.0521, ppl=21.1588, throughput=60.76K wps, wc=6521.10K
2018-06-26 11:26:25,455 - root - [Epoch 11 Batch 8800/34064] loss=3.0482, ppl=21.0767, throughput=60.83K wps, wc=6514.72K
2018-06-26 11:28:12,865 - root - [Epoch 11 Batch 9600/34064] loss=3.0464, ppl=21.0386, throughput=60.73K wps, wc=6522.83K
2018-06-26 11:29:59,429 - root - [Epoch 11 Batch 10400/34064] loss=3.0385, ppl=20.8748, throughput=61.13K wps, wc=6514.53K
2018-06-26 11:31:47,795 - root - [Epoch 11 Batch 11200/34064] loss=3.0566, ppl=21.2560, throughput=60.19K wps, wc=6522.40K
2018-06-26 11:33:33,968 - root - [Epoch 11 Batch 12000/34064] loss=3.0475, ppl=21.0624, throughput=61.46K wps, wc=6525.45K
2018-06-26 11:35:21,288 - root - [Epoch 11 Batch 12800/34064] loss=3.0390, ppl=20.8849, throughput=60.75K wps, wc=6519.29K
2018-06-26 11:37:08,536 - root - [Epoch 11 Batch 13600/34064] loss=3.0389, ppl=20.8829, throughput=60.80K wps, wc=6520.85K
2018-06-26 11:38:55,450 - root - [Epoch 11 Batch 14400/34064] loss=3.0337, ppl=20.7732, throughput=61.00K wps, wc=6521.28K
2018-06-26 11:40:41,939 - root - [Epoch 11 Batch 15200/34064] loss=3.0515, ppl=21.1463, throughput=61.28K wps, wc=6525.83K
2018-06-26 11:42:28,712 - root - [Epoch 11 Batch 16000/34064] loss=3.0326, ppl=20.7505, throughput=61.09K wps, wc=6522.27K
2018-06-26 11:44:15,925 - root - [Epoch 11 Batch 16800/34064] loss=3.0419, ppl=20.9448, throughput=60.76K wps, wc=6513.94K
2018-06-26 11:46:03,200 - root - [Epoch 11 Batch 17600/34064] loss=3.0401, ppl=20.9067, throughput=60.82K wps, wc=6524.42K
2018-06-26 11:47:50,326 - root - [Epoch 11 Batch 18400/34064] loss=3.0423, ppl=20.9525, throughput=60.88K wps, wc=6521.52K
2018-06-26 11:49:37,545 - root - [Epoch 11 Batch 19200/34064] loss=3.0406, ppl=20.9177, throughput=60.84K wps, wc=6523.34K
2018-06-26 11:51:23,524 - root - [Epoch 11 Batch 20000/34064] loss=3.0427, ppl=20.9622, throughput=61.59K wps, wc=6526.85K
2018-06-26 11:53:12,313 - root - [Epoch 11 Batch 20800/34064] loss=3.0564, ppl=21.2503, throughput=59.84K wps, wc=6509.68K
2018-06-26 11:55:00,116 - root - [Epoch 11 Batch 21600/34064] loss=3.0376, ppl=20.8543, throughput=60.51K wps, wc=6522.66K
2018-06-26 11:56:47,842 - root - [Epoch 11 Batch 22400/34064] loss=3.0443, ppl=20.9943, throughput=60.52K wps, wc=6519.53K
2018-06-26 11:58:36,286 - root - [Epoch 11 Batch 23200/34064] loss=3.0489, ppl=21.0914, throughput=60.17K wps, wc=6524.70K
2018-06-26 12:00:24,407 - root - [Epoch 11 Batch 24000/34064] loss=3.0529, ppl=21.1756, throughput=60.36K wps, wc=6525.65K
2018-06-26 12:02:11,532 - root - [Epoch 11 Batch 24800/34064] loss=3.0387, ppl=20.8780, throughput=60.91K wps, wc=6524.46K
2018-06-26 12:03:58,878 - root - [Epoch 11 Batch 25600/34064] loss=3.0473, ppl=21.0577, throughput=60.69K wps, wc=6514.94K
2018-06-26 12:05:46,322 - root - [Epoch 11 Batch 26400/34064] loss=3.0246, ppl=20.5853, throughput=60.69K wps, wc=6521.11K
2018-06-26 12:07:34,769 - root - [Epoch 11 Batch 27200/34064] loss=3.0504, ppl=21.1242, throughput=60.19K wps, wc=6527.55K
2018-06-26 12:09:21,437 - root - [Epoch 11 Batch 28000/34064] loss=3.0397, ppl=20.8993, throughput=61.16K wps, wc=6524.27K
2018-06-26 12:11:07,680 - root - [Epoch 11 Batch 28800/34064] loss=3.0414, ppl=20.9354, throughput=61.38K wps, wc=6521.41K
2018-06-26 12:12:55,363 - root - [Epoch 11 Batch 29600/34064] loss=3.0453, ppl=21.0171, throughput=60.53K wps, wc=6518.05K
2018-06-26 12:14:43,552 - root - [Epoch 11 Batch 30400/34064] loss=3.0426, ppl=20.9600, throughput=60.33K wps, wc=6527.02K
2018-06-26 12:16:30,213 - root - [Epoch 11 Batch 31200/34064] loss=3.0524, ppl=21.1665, throughput=61.20K wps, wc=6527.94K
2018-06-26 12:18:17,965 - root - [Epoch 11 Batch 32000/34064] loss=3.0446, ppl=21.0012, throughput=60.57K wps, wc=6526.48K
2018-06-26 12:20:04,749 - root - [Epoch 11 Batch 32800/34064] loss=3.0404, ppl=20.9131, throughput=61.07K wps, wc=6521.36K
2018-06-26 12:21:51,071 - root - [Epoch 11 Batch 33600/34064] loss=3.0378, ppl=20.8594, throughput=61.33K wps, wc=6520.68K
2018-06-26 12:27:29,654 - root - [Epoch 11] valid Loss=1.5590, valid ppl=4.7538, valid bleu=25.41
2018-06-26 12:32:05,812 - root - [Epoch 11] test Loss=1.3907, test ppl=4.0175, test bleu=25.42
2018-06-26 12:32:05,818 - root - Save best parameters to transformer_en_de_u512/valid_best.params
2018-06-26 12:33:59,286 - root - [Epoch 12 Batch 800/34064] loss=3.0009, ppl=20.1037, throughput=58.66K wps, wc=6525.31K
2018-06-26 12:35:46,486 - root - [Epoch 12 Batch 1600/34064] loss=3.0077, ppl=20.2402, throughput=60.85K wps, wc=6523.48K
2018-06-26 12:37:34,533 - root - [Epoch 12 Batch 2400/34064] loss=3.0120, ppl=20.3277, throughput=60.39K wps, wc=6525.15K
2018-06-26 12:39:23,337 - root - [Epoch 12 Batch 3200/34064] loss=3.0165, ppl=20.4202, throughput=59.94K wps, wc=6522.14K
2018-06-26 12:41:11,255 - root - [Epoch 12 Batch 4000/34064] loss=3.0222, ppl=20.5361, throughput=60.43K wps, wc=6521.38K
2018-06-26 12:42:58,437 - root - [Epoch 12 Batch 4800/34064] loss=3.0227, ppl=20.5473, throughput=60.82K wps, wc=6519.11K
2018-06-26 12:44:45,852 - root - [Epoch 12 Batch 5600/34064] loss=3.0276, ppl=20.6484, throughput=60.75K wps, wc=6525.98K
2018-06-26 12:46:35,586 - root - [Epoch 12 Batch 6400/34064] loss=3.0182, ppl=20.4548, throughput=59.39K wps, wc=6517.36K
2018-06-26 12:48:23,027 - root - [Epoch 12 Batch 7200/34064] loss=3.0154, ppl=20.3963, throughput=60.73K wps, wc=6525.04K
2018-06-26 12:50:09,768 - root - [Epoch 12 Batch 8000/34064] loss=3.0189, ppl=20.4698, throughput=61.15K wps, wc=6526.68K
2018-06-26 12:51:57,283 - root - [Epoch 12 Batch 8800/34064] loss=3.0319, ppl=20.7361, throughput=60.61K wps, wc=6516.47K
2018-06-26 12:53:44,633 - root - [Epoch 12 Batch 9600/34064] loss=3.0326, ppl=20.7505, throughput=60.69K wps, wc=6514.72K
2018-06-26 12:55:31,069 - root - [Epoch 12 Batch 10400/34064] loss=3.0211, ppl=20.5145, throughput=61.28K wps, wc=6522.02K
2018-06-26 12:57:17,076 - root - [Epoch 12 Batch 11200/34064] loss=3.0240, ppl=20.5732, throughput=61.43K wps, wc=6511.48K
2018-06-26 12:59:04,471 - root - [Epoch 12 Batch 12000/34064] loss=3.0242, ppl=20.5766, throughput=60.69K wps, wc=6517.89K
2018-06-26 13:00:51,248 - root - [Epoch 12 Batch 12800/34064] loss=3.0222, ppl=20.5357, throughput=61.01K wps, wc=6514.13K
2018-06-26 13:02:39,844 - root - [Epoch 12 Batch 13600/34064] loss=3.0395, ppl=20.8943, throughput=60.09K wps, wc=6525.29K
2018-06-26 13:04:28,369 - root - [Epoch 12 Batch 14400/34064] loss=3.0389, ppl=20.8818, throughput=60.09K wps, wc=6521.07K
2018-06-26 13:06:15,197 - root - [Epoch 12 Batch 15200/34064] loss=3.0244, ppl=20.5817, throughput=61.10K wps, wc=6526.94K
2018-06-26 13:08:02,394 - root - [Epoch 12 Batch 16000/34064] loss=3.0356, ppl=20.8134, throughput=60.85K wps, wc=6522.67K
2018-06-26 13:09:49,904 - root - [Epoch 12 Batch 16800/34064] loss=3.0366, ppl=20.8344, throughput=60.65K wps, wc=6520.99K
2018-06-26 13:11:37,056 - root - [Epoch 12 Batch 17600/34064] loss=3.0272, ppl=20.6388, throughput=60.88K wps, wc=6523.73K
2018-06-26 13:13:24,420 - root - [Epoch 12 Batch 18400/34064] loss=3.0178, ppl=20.4455, throughput=60.81K wps, wc=6528.72K
2018-06-26 13:15:13,545 - root - [Epoch 12 Batch 19200/34064] loss=3.0365, ppl=20.8327, throughput=59.76K wps, wc=6520.88K
2018-06-26 13:17:00,705 - root - [Epoch 12 Batch 20000/34064] loss=3.0271, ppl=20.6381, throughput=60.83K wps, wc=6518.58K
2018-06-26 13:18:46,412 - root - [Epoch 12 Batch 20800/34064] loss=3.0258, ppl=20.6100, throughput=61.70K wps, wc=6521.65K
2018-06-26 13:20:33,032 - root - [Epoch 12 Batch 21600/34064] loss=3.0118, ppl=20.3237, throughput=61.21K wps, wc=6526.13K
2018-06-26 13:22:20,208 - root - [Epoch 12 Batch 22400/34064] loss=3.0260, ppl=20.6148, throughput=60.86K wps, wc=6522.92K
2018-06-26 13:24:07,546 - root - [Epoch 12 Batch 23200/34064] loss=3.0279, ppl=20.6531, throughput=60.74K wps, wc=6519.21K
2018-06-26 13:25:55,802 - root - [Epoch 12 Batch 24000/34064] loss=3.0306, ppl=20.7097, throughput=60.25K wps, wc=6522.74K
2018-06-26 13:27:44,913 - root - [Epoch 12 Batch 24800/34064] loss=3.0375, ppl=20.8523, throughput=59.80K wps, wc=6524.80K
2018-06-26 13:29:33,988 - root - [Epoch 12 Batch 25600/34064] loss=3.0333, ppl=20.7652, throughput=59.79K wps, wc=6522.04K
2018-06-26 13:31:21,791 - root - [Epoch 12 Batch 26400/34064] loss=3.0453, ppl=21.0155, throughput=60.45K wps, wc=6517.15K
2018-06-26 13:33:09,815 - root - [Epoch 12 Batch 27200/34064] loss=3.0335, ppl=20.7698, throughput=60.35K wps, wc=6519.72K
2018-06-26 13:34:56,631 - root - [Epoch 12 Batch 28000/34064] loss=3.0296, ppl=20.6887, throughput=61.04K wps, wc=6520.31K
2018-06-26 13:36:43,646 - root - [Epoch 12 Batch 28800/34064] loss=3.0365, ppl=20.8331, throughput=60.85K wps, wc=6511.74K
2018-06-26 13:38:31,093 - root - [Epoch 12 Batch 29600/34064] loss=3.0452, ppl=21.0146, throughput=60.71K wps, wc=6522.80K
2018-06-26 13:40:19,679 - root - [Epoch 12 Batch 30400/34064] loss=3.0484, ppl=21.0808, throughput=59.99K wps, wc=6514.34K
2018-06-26 13:42:08,049 - root - [Epoch 12 Batch 31200/34064] loss=3.0368, ppl=20.8391, throughput=60.18K wps, wc=6522.18K
2018-06-26 13:43:56,335 - root - [Epoch 12 Batch 32000/34064] loss=3.0532, ppl=21.1821, throughput=60.18K wps, wc=6516.95K
2018-06-26 13:45:43,395 - root - [Epoch 12 Batch 32800/34064] loss=3.0385, ppl=20.8736, throughput=60.92K wps, wc=6521.99K
2018-06-26 13:47:30,273 - root - [Epoch 12 Batch 33600/34064] loss=3.0268, ppl=20.6316, throughput=61.03K wps, wc=6522.55K
2018-06-26 13:53:06,982 - root - [Epoch 12] valid Loss=1.5482, valid ppl=4.7030, valid bleu=25.30
2018-06-26 13:57:55,418 - root - [Epoch 12] test Loss=1.3835, test ppl=3.9887, test bleu=25.72
2018-06-26 13:59:46,547 - root - [Epoch 13 Batch 800/34064] loss=3.0109, ppl=20.3067, throughput=58.91K wps, wc=6520.33K
2018-06-26 14:01:34,024 - root - [Epoch 13 Batch 1600/34064] loss=2.9985, ppl=20.0552, throughput=60.69K wps, wc=6522.96K
2018-06-26 14:03:22,443 - root - [Epoch 13 Batch 2400/34064] loss=2.9969, ppl=20.0233, throughput=60.12K wps, wc=6518.52K
2018-06-26 14:05:09,601 - root - [Epoch 13 Batch 3200/34064] loss=3.0045, ppl=20.1760, throughput=60.92K wps, wc=6528.04K
2018-06-26 14:06:57,531 - root - [Epoch 13 Batch 4000/34064] loss=3.0081, ppl=20.2493, throughput=60.47K wps, wc=6526.34K
2018-06-26 14:08:45,534 - root - [Epoch 13 Batch 4800/34064] loss=3.0065, ppl=20.2169, throughput=60.40K wps, wc=6523.68K
2018-06-26 14:10:32,315 - root - [Epoch 13 Batch 5600/34064] loss=2.9942, ppl=19.9693, throughput=61.05K wps, wc=6518.80K
2018-06-26 14:12:20,622 - root - [Epoch 13 Batch 6400/34064] loss=3.0143, ppl=20.3740, throughput=60.13K wps, wc=6512.26K
2018-06-26 14:14:08,649 - root - [Epoch 13 Batch 7200/34064] loss=3.0069, ppl=20.2245, throughput=60.38K wps, wc=6522.53K
2018-06-26 14:15:55,357 - root - [Epoch 13 Batch 8000/34064] loss=3.0119, ppl=20.3263, throughput=61.10K wps, wc=6520.15K
2018-06-26 14:17:44,078 - root - [Epoch 13 Batch 8800/34064] loss=2.9988, ppl=20.0608, throughput=59.98K wps, wc=6521.33K
2018-06-26 14:19:32,211 - root - [Epoch 13 Batch 9600/34064] loss=3.0284, ppl=20.6639, throughput=60.29K wps, wc=6519.09K
2018-06-26 14:21:20,783 - root - [Epoch 13 Batch 10400/34064] loss=3.0111, ppl=20.3094, throughput=60.07K wps, wc=6521.85K
2018-06-26 14:23:06,757 - root - [Epoch 13 Batch 11200/34064] loss=3.0256, ppl=20.6066, throughput=61.52K wps, wc=6519.57K
2018-06-26 14:24:52,880 - root - [Epoch 13 Batch 12000/34064] loss=3.0046, ppl=20.1785, throughput=61.51K wps, wc=6527.04K
2018-06-26 14:26:40,119 - root - [Epoch 13 Batch 12800/34064] loss=3.0254, ppl=20.6031, throughput=60.77K wps, wc=6516.84K
2018-06-26 14:28:27,682 - root - [Epoch 13 Batch 13600/34064] loss=3.0204, ppl=20.4999, throughput=60.65K wps, wc=6524.00K
2018-06-26 14:30:14,851 - root - [Epoch 13 Batch 14400/34064] loss=2.9962, ppl=20.0095, throughput=60.89K wps, wc=6525.17K
2018-06-26 14:32:01,149 - root - [Epoch 13 Batch 15200/34064] loss=3.0056, ppl=20.1984, throughput=61.39K wps, wc=6525.40K
2018-06-26 14:33:48,263 - root - [Epoch 13 Batch 16000/34064] loss=3.0206, ppl=20.5046, throughput=60.91K wps, wc=6524.02K
2018-06-26 14:35:36,715 - root - [Epoch 13 Batch 16800/34064] loss=3.0372, ppl=20.8469, throughput=60.08K wps, wc=6515.66K
2018-06-26 14:37:24,268 - root - [Epoch 13 Batch 17600/34064] loss=3.0128, ppl=20.3451, throughput=60.59K wps, wc=6516.12K
2018-06-26 14:39:11,155 - root - [Epoch 13 Batch 18400/34064] loss=3.0148, ppl=20.3858, throughput=61.00K wps, wc=6519.88K
2018-06-26 14:40:57,965 - root - [Epoch 13 Batch 19200/34064] loss=3.0143, ppl=20.3751, throughput=61.05K wps, wc=6520.20K
2018-06-26 14:42:45,748 - root - [Epoch 13 Batch 20000/34064] loss=3.0282, ppl=20.6598, throughput=60.57K wps, wc=6527.85K
2018-06-26 14:44:34,974 - root - [Epoch 13 Batch 20800/34064] loss=3.0235, ppl=20.5628, throughput=59.72K wps, wc=6522.58K
2018-06-26 14:46:22,295 - root - [Epoch 13 Batch 21600/34064] loss=3.0274, ppl=20.6427, throughput=60.81K wps, wc=6525.75K
2018-06-26 14:48:09,658 - root - [Epoch 13 Batch 22400/34064] loss=3.0247, ppl=20.5881, throughput=60.73K wps, wc=6519.68K
2018-06-26 14:49:56,509 - root - [Epoch 13 Batch 23200/34064] loss=3.0228, ppl=20.5483, throughput=61.07K wps, wc=6525.70K
2018-06-26 14:51:44,826 - root - [Epoch 13 Batch 24000/34064] loss=3.0268, ppl=20.6307, throughput=60.15K wps, wc=6515.11K
2018-06-26 14:53:32,592 - root - [Epoch 13 Batch 24800/34064] loss=3.0268, ppl=20.6308, throughput=60.53K wps, wc=6522.52K
2018-06-26 14:55:20,740 - root - [Epoch 13 Batch 25600/34064] loss=3.0344, ppl=20.7886, throughput=60.31K wps, wc=6521.96K
2018-06-26 14:57:07,269 - root - [Epoch 13 Batch 26400/34064] loss=3.0078, ppl=20.2425, throughput=61.19K wps, wc=6518.29K
2018-06-26 14:58:55,376 - root - [Epoch 13 Batch 27200/34064] loss=3.0290, ppl=20.6761, throughput=60.35K wps, wc=6524.52K
2018-06-26 15:00:42,932 - root - [Epoch 13 Batch 28000/34064] loss=3.0120, ppl=20.3276, throughput=60.65K wps, wc=6523.41K
2018-06-26 15:02:30,168 - root - [Epoch 13 Batch 28800/34064] loss=3.0140, ppl=20.3686, throughput=60.83K wps, wc=6522.79K
2018-06-26 15:04:17,519 - root - [Epoch 13 Batch 29600/34064] loss=3.0270, ppl=20.6362, throughput=60.74K wps, wc=6520.08K
2018-06-26 15:06:05,852 - root - [Epoch 13 Batch 30400/34064] loss=3.0300, ppl=20.6982, throughput=60.17K wps, wc=6518.70K
2018-06-26 15:07:53,110 - root - [Epoch 13 Batch 31200/34064] loss=3.0246, ppl=20.5862, throughput=60.79K wps, wc=6519.86K
2018-06-26 15:09:39,856 - root - [Epoch 13 Batch 32000/34064] loss=3.0138, ppl=20.3643, throughput=61.13K wps, wc=6525.08K
2018-06-26 15:11:27,577 - root - [Epoch 13 Batch 32800/34064] loss=3.0309, ppl=20.7161, throughput=60.54K wps, wc=6521.16K
2018-06-26 15:13:16,991 - root - [Epoch 13 Batch 33600/34064] loss=3.0350, ppl=20.8002, throughput=59.51K wps, wc=6510.68K
2018-06-26 15:19:14,477 - root - [Epoch 13] valid Loss=1.5468, valid ppl=4.6963, valid bleu=25.35
2018-06-26 15:23:50,324 - root - [Epoch 13] test Loss=1.3781, test ppl=3.9674, test bleu=25.57
2018-06-26 15:25:43,166 - root - [Epoch 14 Batch 800/34064] loss=2.9904, ppl=19.8939, throughput=57.99K wps, wc=6517.70K
2018-06-26 15:27:31,358 - root - [Epoch 14 Batch 1600/34064] loss=2.9898, ppl=19.8824, throughput=60.27K wps, wc=6520.82K
2018-06-26 15:29:18,936 - root - [Epoch 14 Batch 2400/34064] loss=2.9986, ppl=20.0574, throughput=60.66K wps, wc=6525.94K
2018-06-26 15:31:05,329 - root - [Epoch 14 Batch 3200/34064] loss=2.9798, ppl=19.6834, throughput=61.35K wps, wc=6527.13K
2018-06-26 15:32:52,694 - root - [Epoch 14 Batch 4000/34064] loss=2.9976, ppl=20.0374, throughput=60.71K wps, wc=6518.08K
2018-06-26 15:34:41,790 - root - [Epoch 14 Batch 4800/34064] loss=2.9996, ppl=20.0773, throughput=59.77K wps, wc=6521.08K
2018-06-26 15:36:28,066 - root - [Epoch 14 Batch 5600/34064] loss=2.9877, ppl=19.8395, throughput=61.39K wps, wc=6524.65K
2018-06-26 15:38:16,697 - root - [Epoch 14 Batch 6400/34064] loss=3.0060, ppl=20.2057, throughput=60.05K wps, wc=6523.76K
2018-06-26 15:40:04,562 - root - [Epoch 14 Batch 7200/34064] loss=3.0125, ppl=20.3380, throughput=60.47K wps, wc=6522.62K
2018-06-26 15:41:51,814 - root - [Epoch 14 Batch 8000/34064] loss=2.9923, ppl=19.9324, throughput=60.79K wps, wc=6519.69K
2018-06-26 15:43:38,491 - root - [Epoch 14 Batch 8800/34064] loss=2.9929, ppl=19.9426, throughput=61.13K wps, wc=6521.14K
2018-06-26 15:45:26,718 - root - [Epoch 14 Batch 9600/34064] loss=3.0056, ppl=20.1974, throughput=60.27K wps, wc=6522.34K
2018-06-26 15:47:14,817 - root - [Epoch 14 Batch 10400/34064] loss=3.0045, ppl=20.1766, throughput=60.29K wps, wc=6517.58K
2018-06-26 15:49:02,091 - root - [Epoch 14 Batch 11200/34064] loss=3.0073, ppl=20.2336, throughput=60.79K wps, wc=6521.55K
2018-06-26 15:50:48,593 - root - [Epoch 14 Batch 12000/34064] loss=3.0009, ppl=20.1043, throughput=61.27K wps, wc=6525.74K
2018-06-26 15:52:35,620 - root - [Epoch 14 Batch 12800/34064] loss=2.9948, ppl=19.9823, throughput=60.96K wps, wc=6523.95K
2018-06-26 15:54:22,639 - root - [Epoch 14 Batch 13600/34064] loss=3.0200, ppl=20.4905, throughput=60.95K wps, wc=6523.01K
2018-06-26 15:56:11,317 - root - [Epoch 14 Batch 14400/34064] loss=3.0269, ppl=20.6335, throughput=59.94K wps, wc=6513.60K
2018-06-26 15:57:59,250 - root - [Epoch 14 Batch 15200/34064] loss=2.9974, ppl=20.0337, throughput=60.38K wps, wc=6517.25K
2018-06-26 15:59:46,562 - root - [Epoch 14 Batch 16000/34064] loss=2.9923, ppl=19.9310, throughput=60.80K wps, wc=6524.08K
2018-06-26 16:01:32,410 - root - [Epoch 14 Batch 16800/34064] loss=3.0032, ppl=20.1497, throughput=61.59K wps, wc=6518.92K
2018-06-26 16:03:19,769 - root - [Epoch 14 Batch 17600/34064] loss=3.0138, ppl=20.3655, throughput=60.74K wps, wc=6520.79K
2018-06-26 16:05:06,093 - root - [Epoch 14 Batch 18400/34064] loss=3.0065, ppl=20.2175, throughput=61.31K wps, wc=6518.41K
2018-06-26 16:06:54,519 - root - [Epoch 14 Batch 19200/34064] loss=3.0243, ppl=20.5799, throughput=60.07K wps, wc=6512.76K
2018-06-26 16:08:39,823 - root - [Epoch 14 Batch 20000/34064] loss=3.0098, ppl=20.2828, throughput=61.82K wps, wc=6510.12K
2018-06-26 16:10:26,842 - root - [Epoch 14 Batch 20800/34064] loss=3.0027, ppl=20.1404, throughput=60.98K wps, wc=6525.80K
2018-06-26 16:12:13,439 - root - [Epoch 14 Batch 21600/34064] loss=3.0102, ppl=20.2905, throughput=61.24K wps, wc=6528.22K
2018-06-26 16:14:01,835 - root - [Epoch 14 Batch 22400/34064] loss=3.0206, ppl=20.5042, throughput=60.14K wps, wc=6519.15K
2018-06-26 16:15:49,226 - root - [Epoch 14 Batch 23200/34064] loss=3.0198, ppl=20.4864, throughput=60.72K wps, wc=6521.23K
2018-06-26 16:17:37,227 - root - [Epoch 14 Batch 24000/34064] loss=3.0044, ppl=20.1741, throughput=60.44K wps, wc=6527.30K
2018-06-26 16:19:23,980 - root - [Epoch 14 Batch 24800/34064] loss=3.0048, ppl=20.1827, throughput=61.10K wps, wc=6523.07K
2018-06-26 16:21:12,212 - root - [Epoch 14 Batch 25600/34064] loss=3.0096, ppl=20.2785, throughput=60.26K wps, wc=6521.80K
2018-06-26 16:22:59,351 - root - [Epoch 14 Batch 26400/34064] loss=3.0164, ppl=20.4183, throughput=60.87K wps, wc=6521.34K
2018-06-26 16:24:45,868 - root - [Epoch 14 Batch 27200/34064] loss=3.0153, ppl=20.3958, throughput=61.27K wps, wc=6525.87K
2018-06-26 16:26:34,516 - root - [Epoch 14 Batch 28000/34064] loss=3.0296, ppl=20.6899, throughput=59.97K wps, wc=6515.64K
2018-06-26 16:28:21,592 - root - [Epoch 14 Batch 28800/34064] loss=3.0053, ppl=20.1921, throughput=60.91K wps, wc=6521.53K
2018-06-26 16:30:09,294 - root - [Epoch 14 Batch 29600/34064] loss=3.0341, ppl=20.7825, throughput=60.47K wps, wc=6512.51K
2018-06-26 16:31:55,509 - root - [Epoch 14 Batch 30400/34064] loss=3.0133, ppl=20.3546, throughput=61.45K wps, wc=6527.10K
2018-06-26 16:33:44,233 - root - [Epoch 14 Batch 31200/34064] loss=3.0154, ppl=20.3973, throughput=59.94K wps, wc=6516.77K
2018-06-26 16:35:31,249 - root - [Epoch 14 Batch 32000/34064] loss=3.0122, ppl=20.3330, throughput=60.94K wps, wc=6521.48K
2018-06-26 16:37:18,570 - root - [Epoch 14 Batch 32800/34064] loss=3.0198, ppl=20.4880, throughput=60.77K wps, wc=6522.23K
2018-06-26 16:39:06,053 - root - [Epoch 14 Batch 33600/34064] loss=3.0175, ppl=20.4406, throughput=60.68K wps, wc=6522.34K
2018-06-26 16:44:41,349 - root - [Epoch 14] valid Loss=1.5444, valid ppl=4.6854, valid bleu=25.30