Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
Namespace(alpha=2, batch_size=80, beta=1, bptt=70, clip=0.25, dropout=0.4, dropout_e=0.1, dropout_h=0.2, dropout_i=0.65, emsize=400, epochs=750, eval_only=False, gpu='0', log_interval=200, lr=30, lr_update_factor=0.1, lr_update_interval=30, model='lstm', nhid=1150, nlayers=3, ntasgd=True, optimizer='sgd', save='awd_lstm_lm_1150_wikitext-2', test_mode=False, tied=True, wd=1.2e-06, weight_dropout=0.5)
Use AWDRNN
AWDRNN(
(embedding): HybridSequential(
(0): Embedding(33278 -> 400, float32)
(1): Dropout(p = 0.65, axes=(0,))
)
(encoder): HybridSequential(
(0): LSTM(400 -> 1150, TNC)
(1): LSTM(1150 -> 1150, TNC)
(2): LSTM(1150 -> 400, TNC)
)
(decoder): HybridSequential(
(0): Dense(400 -> 33278, linear)
)
)
[Epoch 0 Batch 200/372] current loss 8.00, ppl 2979.04, throughput 426.35 samples/s, lr 29.57
[Epoch 0] throughput 28348.05 samples/s
[Epoch 0] time cost 80.82s, valid loss 6.55, valid ppl 697.88, lr 30.00
[Epoch 0] test loss 6.48, test ppl 651.82
[Epoch 1 Batch 200/372] current loss 6.74, ppl 848.61, throughput 409.79 samples/s, lr 15.00
[Epoch 1] throughput 28006.58 samples/s
[Epoch 1] time cost 81.71s, valid loss 6.05, valid ppl 424.94, lr 30.00
[Epoch 1] test loss 5.98, test ppl 395.48
[Epoch 2 Batch 200/372] current loss 6.44, ppl 626.88, throughput 405.12 samples/s, lr 31.29
[Epoch 2] throughput 27820.44 samples/s
[Epoch 2] time cost 82.32s, valid loss 5.82, valid ppl 335.67, lr 30.00
[Epoch 2] test loss 5.74, test ppl 312.08
[Epoch 3 Batch 200/372] current loss 6.25, ppl 519.27, throughput 408.22 samples/s, lr 30.43
[Epoch 3] throughput 28002.06 samples/s
[Epoch 3] time cost 81.73s, valid loss 5.65, valid ppl 283.11, lr 30.00
[Epoch 3] test loss 5.57, test ppl 261.82
[Epoch 4 Batch 200/372] current loss 6.08, ppl 438.95, throughput 409.26 samples/s, lr 29.14
[Epoch 4] throughput 27671.86 samples/s
[Epoch 4] time cost 82.62s, valid loss 5.49, valid ppl 243.34, lr 30.00
[Epoch 4] test loss 5.42, test ppl 225.83
[Epoch 5 Batch 200/372] current loss 5.95, ppl 385.63, throughput 411.56 samples/s, lr 34.29
[Epoch 5] throughput 28001.11 samples/s
[Epoch 5] time cost 81.75s, valid loss 5.35, valid ppl 210.92, lr 30.00
[Epoch 5] test loss 5.27, test ppl 194.70
[Epoch 6 Batch 200/372] current loss 5.86, ppl 351.01, throughput 417.25 samples/s, lr 34.29
[Epoch 6] throughput 27880.04 samples/s
[Epoch 6] time cost 82.08s, valid loss 5.27, valid ppl 193.96, lr 30.00
[Epoch 6] test loss 5.19, test ppl 179.33
[Epoch 7 Batch 200/372] current loss 5.78, ppl 323.67, throughput 421.35 samples/s, lr 31.29
[Epoch 7] throughput 28099.37 samples/s
[Epoch 7] time cost 81.41s, valid loss 5.19, valid ppl 179.00, lr 30.00
[Epoch 7] test loss 5.11, test ppl 165.79
[Epoch 8 Batch 200/372] current loss 5.71, ppl 302.12, throughput 409.17 samples/s, lr 32.14
[Epoch 8] throughput 27626.67 samples/s
[Epoch 8] time cost 82.74s, valid loss 5.12, valid ppl 166.58, lr 30.00
[Epoch 8] test loss 5.04, test ppl 155.06
[Epoch 9 Batch 200/372] current loss 5.64, ppl 280.22, throughput 414.53 samples/s, lr 29.14
[Epoch 9] throughput 28063.92 samples/s
[Epoch 9] time cost 81.49s, valid loss 5.07, valid ppl 158.50, lr 30.00
[Epoch 9] test loss 5.00, test ppl 147.82
[Epoch 10 Batch 200/372] current loss 5.59, ppl 267.59, throughput 414.20 samples/s, lr 13.71
[Epoch 10] throughput 28004.12 samples/s
[Epoch 10] time cost 81.69s, valid loss 5.00, valid ppl 148.36, lr 30.00
[Epoch 10] test loss 4.93, test ppl 138.34
[Epoch 11 Batch 200/372] current loss 5.54, ppl 255.90, throughput 415.11 samples/s, lr 31.29
[Epoch 11] throughput 28263.72 samples/s
[Epoch 11] time cost 81.04s, valid loss 4.97, valid ppl 144.68, lr 30.00
[Epoch 11] test loss 4.90, test ppl 134.28
[Epoch 12 Batch 200/372] current loss 5.49, ppl 242.31, throughput 405.97 samples/s, lr 30.00
[Epoch 12] throughput 27716.04 samples/s
[Epoch 12] time cost 82.50s, valid loss 4.93, valid ppl 138.61, lr 30.00
[Epoch 12] test loss 4.86, test ppl 129.09
[Epoch 13 Batch 200/372] current loss 5.45, ppl 233.35, throughput 403.78 samples/s, lr 31.29
[Epoch 13] throughput 27762.58 samples/s
[Epoch 13] time cost 82.37s, valid loss 4.88, valid ppl 131.93, lr 30.00
[Epoch 13] test loss 4.81, test ppl 122.80
[Epoch 14 Batch 200/372] current loss 5.41, ppl 223.35, throughput 414.83 samples/s, lr 29.14
[Epoch 14] throughput 28045.79 samples/s
[Epoch 14] time cost 81.61s, valid loss 4.86, valid ppl 128.60, lr 30.00
[Epoch 14] test loss 4.78, test ppl 119.46
[Epoch 15 Batch 200/372] current loss 5.37, ppl 214.46, throughput 407.60 samples/s, lr 34.29
[Epoch 15] throughput 27815.06 samples/s
[Epoch 15] time cost 82.21s, valid loss 4.81, valid ppl 122.27, lr 30.00
[Epoch 15] test loss 4.73, test ppl 113.74
[Epoch 16 Batch 200/372] current loss 5.34, ppl 207.90, throughput 402.53 samples/s, lr 28.71
[Epoch 16] throughput 27600.81 samples/s
[Epoch 16] time cost 82.84s, valid loss 4.81, valid ppl 122.78, lr 30.00
[Epoch 17 Batch 200/372] current loss 5.32, ppl 204.69, throughput 409.20 samples/s, lr 31.29
[Epoch 17] throughput 27865.11 samples/s
[Epoch 17] time cost 82.14s, valid loss 4.78, valid ppl 119.34, lr 30.00
[Epoch 17] test loss 4.71, test ppl 111.34
[Epoch 18 Batch 200/372] current loss 5.27, ppl 194.69, throughput 411.09 samples/s, lr 14.14
[Epoch 18] throughput 27746.28 samples/s
[Epoch 18] time cost 82.42s, valid loss 4.79, valid ppl 120.86, lr 30.00
[Epoch 19 Batch 200/372] current loss 5.24, ppl 188.98, throughput 410.82 samples/s, lr 27.43
[Epoch 19] throughput 27800.39 samples/s
[Epoch 19] time cost 82.28s, valid loss 4.75, valid ppl 115.53, lr 30.00
[Epoch 19] test loss 4.68, test ppl 107.99
[Epoch 20 Batch 200/372] current loss 5.22, ppl 184.78, throughput 405.95 samples/s, lr 30.00
[Epoch 20] throughput 28009.05 samples/s
[Epoch 20] time cost 81.71s, valid loss 4.74, valid ppl 114.36, lr 30.00
[Epoch 20] test loss 4.67, test ppl 106.52
[Epoch 21 Batch 200/372] current loss 5.21, ppl 182.43, throughput 415.35 samples/s, lr 27.86
[Epoch 21] throughput 27801.15 samples/s
[Epoch 21] time cost 82.27s, valid loss 4.72, valid ppl 112.64, lr 30.00
[Epoch 21] test loss 4.65, test ppl 105.06
[Epoch 22 Batch 200/372] current loss 5.17, ppl 176.09, throughput 412.67 samples/s, lr 27.00
[Epoch 22] throughput 28282.55 samples/s
[Epoch 22] time cost 81.02s, valid loss 4.68, valid ppl 107.81, lr 30.00
[Epoch 22] test loss 4.61, test ppl 100.64
[Epoch 23 Batch 200/372] current loss 5.15, ppl 171.73, throughput 407.31 samples/s, lr 27.86
[Epoch 23] throughput 27675.34 samples/s
[Epoch 23] time cost 82.64s, valid loss 4.67, valid ppl 106.77, lr 30.00
[Epoch 23] test loss 4.61, test ppl 100.15
[Epoch 24 Batch 200/372] current loss 5.13, ppl 169.65, throughput 408.22 samples/s, lr 28.71
[Epoch 24] throughput 27834.56 samples/s
[Epoch 24] time cost 82.22s, valid loss 4.67, valid ppl 106.86, lr 30.00
[Epoch 25 Batch 200/372] current loss 5.12, ppl 167.75, throughput 408.43 samples/s, lr 32.14
[Epoch 25] throughput 27859.50 samples/s
[Epoch 25] time cost 82.13s, valid loss 4.64, valid ppl 103.50, lr 30.00
[Epoch 25] test loss 4.57, test ppl 96.80
[Epoch 26 Batch 200/372] current loss 5.11, ppl 165.33, throughput 405.79 samples/s, lr 30.43
[Epoch 26] throughput 27850.15 samples/s
[Epoch 26] time cost 82.22s, valid loss 4.64, valid ppl 103.57, lr 30.00
[Epoch 27 Batch 200/372] current loss 5.07, ppl 158.83, throughput 412.57 samples/s, lr 15.43
[Epoch 27] throughput 28061.55 samples/s
[Epoch 27] time cost 81.62s, valid loss 4.61, valid ppl 100.56, lr 30.00
[Epoch 27] test loss 4.54, test ppl 94.03
[Epoch 28 Batch 200/372] current loss 5.07, ppl 159.67, throughput 399.07 samples/s, lr 29.57
[Epoch 28] throughput 27652.75 samples/s
[Epoch 28] time cost 82.78s, valid loss 4.63, valid ppl 102.34, lr 30.00
[Epoch 29 Batch 200/372] current loss 5.06, ppl 158.03, throughput 411.58 samples/s, lr 30.00
[Epoch 29] throughput 27717.50 samples/s
[Epoch 29] time cost 82.55s, valid loss 4.63, valid ppl 102.83, lr 30.00
[Epoch 30 Batch 200/372] current loss 5.04, ppl 154.99, throughput 402.88 samples/s, lr 32.57
[Epoch 30] throughput 27573.07 samples/s
[Epoch 30] time cost 82.88s, valid loss 4.62, valid ppl 101.19, lr 30.00
[Epoch 31 Batch 200/372] current loss 5.02, ppl 151.91, throughput 406.84 samples/s, lr 34.71
[Epoch 31] throughput 27433.16 samples/s
[Epoch 31] time cost 83.28s, valid loss 4.58, valid ppl 97.57, lr 30.00
[Epoch 31] test loss 4.51, test ppl 91.32
[Epoch 32 Batch 200/372] current loss 5.00, ppl 148.69, throughput 401.37 samples/s, lr 30.00
[Epoch 32] throughput 27946.19 samples/s
[Epoch 32] time cost 81.84s, valid loss 4.58, valid ppl 97.53, lr 30.00
[Epoch 32] test loss 4.52, test ppl 91.43
[Epoch 33 Batch 200/372] current loss 5.00, ppl 148.20, throughput 403.44 samples/s, lr 29.57
[Epoch 33] throughput 27648.94 samples/s
[Epoch 33] time cost 82.88s, valid loss 4.58, valid ppl 97.03, lr 30.00
[Epoch 33] test loss 4.51, test ppl 90.68
[Epoch 34 Batch 200/372] current loss 4.97, ppl 144.55, throughput 407.29 samples/s, lr 31.71
[Epoch 34] throughput 27697.43 samples/s
[Epoch 34] time cost 82.68s, valid loss 4.56, valid ppl 95.38, lr 30.00
[Epoch 34] test loss 4.49, test ppl 89.45
[Epoch 35 Batch 200/372] current loss 4.96, ppl 142.80, throughput 401.33 samples/s, lr 33.00
[Epoch 35] throughput 27460.54 samples/s
[Epoch 35] time cost 83.31s, valid loss 4.55, valid ppl 94.30, lr 30.00
[Epoch 35] test loss 4.48, test ppl 88.26
[Epoch 36 Batch 200/372] current loss 4.96, ppl 142.62, throughput 404.48 samples/s, lr 30.43
[Epoch 36] throughput 27724.17 samples/s
[Epoch 36] time cost 82.63s, valid loss 4.56, valid ppl 95.63, lr 30.00
[Epoch 37 Batch 200/372] current loss 4.95, ppl 141.29, throughput 404.53 samples/s, lr 30.86
[Epoch 37] throughput 27537.29 samples/s
[Epoch 37] time cost 83.12s, valid loss 4.54, valid ppl 93.58, lr 30.00
[Epoch 37] test loss 4.48, test ppl 87.81
[Epoch 38 Batch 200/372] current loss 4.94, ppl 139.32, throughput 400.69 samples/s, lr 32.14
[Epoch 38] throughput 27555.84 samples/s
[Epoch 38] time cost 83.07s, valid loss 4.52, valid ppl 91.42, lr 30.00
[Epoch 38] test loss 4.45, test ppl 85.74
[Epoch 39 Batch 200/372] current loss 4.91, ppl 136.24, throughput 418.92 samples/s, lr 29.14
[Epoch 39] throughput 27565.96 samples/s
[Epoch 39] time cost 83.06s, valid loss 4.52, valid ppl 92.03, lr 30.00
[Epoch 40 Batch 200/372] current loss 4.92, ppl 137.08, throughput 405.13 samples/s, lr 31.29
[Epoch 40] throughput 27403.46 samples/s
[Epoch 40] time cost 83.45s, valid loss 4.53, valid ppl 92.42, lr 30.00
[Epoch 41 Batch 200/372] current loss 4.90, ppl 133.89, throughput 410.93 samples/s, lr 30.43
[Epoch 41] throughput 27860.01 samples/s
[Epoch 41] time cost 82.06s, valid loss 4.54, valid ppl 93.49, lr 30.00
[Epoch 42 Batch 200/372] current loss 4.89, ppl 132.36, throughput 401.88 samples/s, lr 27.86
[Epoch 42] throughput 27602.02 samples/s
[Epoch 42] time cost 82.77s, valid loss 4.51, valid ppl 91.24, lr 30.00
[Epoch 42] test loss 4.45, test ppl 85.86
[Epoch 43 Batch 200/372] current loss 4.89, ppl 133.05, throughput 403.03 samples/s, lr 27.43
[Epoch 43] throughput 27968.79 samples/s
[Epoch 43] time cost 81.78s, valid loss 4.51, valid ppl 90.68, lr 30.00
[Epoch 43] test loss 4.45, test ppl 85.25
[Epoch 44 Batch 200/372] current loss 4.88, ppl 131.75, throughput 405.89 samples/s, lr 11.57
[Epoch 44] throughput 27718.77 samples/s
[Epoch 44] time cost 82.54s, valid loss 4.50, valid ppl 90.01, lr 30.00
[Epoch 44] test loss 4.44, test ppl 84.44
[Epoch 45 Batch 200/372] current loss 4.87, ppl 129.90, throughput 405.38 samples/s, lr 29.57
[Epoch 45] throughput 27802.73 samples/s
[Epoch 45] time cost 82.33s, valid loss 4.50, valid ppl 89.57, lr 30.00
[Epoch 45] test loss 4.43, test ppl 84.18
[Epoch 46 Batch 200/372] current loss 4.85, ppl 127.96, throughput 408.08 samples/s, lr 27.86
[Epoch 46] throughput 28198.95 samples/s
[Epoch 46] time cost 81.19s, valid loss 4.48, valid ppl 87.88, lr 30.00
[Epoch 46] test loss 4.41, test ppl 82.52
[Epoch 47 Batch 200/372] current loss 4.84, ppl 126.63, throughput 410.60 samples/s, lr 28.71
[Epoch 47] throughput 27773.77 samples/s
[Epoch 47] time cost 82.33s, valid loss 4.48, valid ppl 88.56, lr 30.00
[Epoch 48 Batch 200/372] current loss 4.85, ppl 127.49, throughput 403.64 samples/s, lr 26.57
[Epoch 48] throughput 27904.37 samples/s
[Epoch 48] time cost 82.03s, valid loss 4.47, valid ppl 87.73, lr 30.00
[Epoch 48] test loss 4.41, test ppl 82.23
[Epoch 49 Batch 200/372] current loss 4.82, ppl 123.81, throughput 406.82 samples/s, lr 30.43
[Epoch 49] throughput 27568.50 samples/s
[Epoch 49] time cost 82.89s, valid loss 4.47, valid ppl 87.51, lr 30.00
[Epoch 49] test loss 4.41, test ppl 82.08
[Epoch 50 Batch 200/372] current loss 4.82, ppl 124.44, throughput 419.76 samples/s, lr 33.43
[Epoch 50] throughput 27901.84 samples/s
[Epoch 50] time cost 81.97s, valid loss 4.48, valid ppl 88.29, lr 30.00
[Epoch 51 Batch 200/372] current loss 4.81, ppl 122.78, throughput 400.94 samples/s, lr 29.14
[Epoch 51] throughput 27575.68 samples/s
[Epoch 51] time cost 82.85s, valid loss 4.46, valid ppl 86.23, lr 30.00
[Epoch 51] test loss 4.40, test ppl 81.06
[Epoch 52 Batch 200/372] current loss 4.80, ppl 121.63, throughput 406.48 samples/s, lr 28.71
[Epoch 52] throughput 28020.22 samples/s
[Epoch 52] time cost 81.67s, valid loss 4.47, valid ppl 87.04, lr 30.00
[Epoch 53 Batch 200/372] current loss 4.80, ppl 121.35, throughput 406.37 samples/s, lr 30.00
[Epoch 53] throughput 27687.38 samples/s
[Epoch 53] time cost 82.53s, valid loss 4.47, valid ppl 86.94, lr 30.00
[Epoch 54 Batch 200/372] current loss 4.79, ppl 120.32, throughput 407.42 samples/s, lr 27.43
[Epoch 54] throughput 27958.58 samples/s
[Epoch 54] time cost 81.86s, valid loss 4.49, valid ppl 89.10, lr 30.00
Switching to NTASGD and avg_trigger is : 20460
[Epoch 55 Batch 200/372] current loss 4.80, ppl 121.02, throughput 391.92 samples/s, lr 26.57
[Epoch 55] throughput 27067.59 samples/s
[Epoch 55] time cost 84.16s, valid loss 4.42, valid ppl 82.93, lr 30.00
[Epoch 55] test loss 4.36, test ppl 78.31
[Epoch 56 Batch 200/372] current loss 4.77, ppl 117.86, throughput 397.58 samples/s, lr 31.71
[Epoch 56] throughput 27239.37 samples/s
[Epoch 56] time cost 83.63s, valid loss 4.42, valid ppl 82.70, lr 30.00
[Epoch 56] test loss 4.36, test ppl 78.03
[Epoch 57 Batch 200/372] current loss 4.77, ppl 117.37, throughput 394.49 samples/s, lr 27.00
[Epoch 57] throughput 27088.26 samples/s
[Epoch 57] time cost 84.04s, valid loss 4.41, valid ppl 82.47, lr 30.00
[Epoch 57] test loss 4.35, test ppl 77.81
[Epoch 58 Batch 200/372] current loss 4.76, ppl 117.00, throughput 395.33 samples/s, lr 26.14
[Epoch 58] throughput 26796.85 samples/s
[Epoch 58] time cost 84.97s, valid loss 4.41, valid ppl 82.25, lr 30.00
[Epoch 58] test loss 4.35, test ppl 77.60
[Epoch 59 Batch 200/372] current loss 4.77, ppl 118.02, throughput 402.01 samples/s, lr 30.43
[Epoch 59] throughput 27330.83 samples/s
[Epoch 59] time cost 83.42s, valid loss 4.41, valid ppl 82.07, lr 30.00
[Epoch 59] test loss 4.35, test ppl 77.44
[Epoch 60 Batch 200/372] current loss 4.76, ppl 116.20, throughput 400.68 samples/s, lr 30.43
[Epoch 60] throughput 27088.56 samples/s
[Epoch 60] time cost 84.15s, valid loss 4.41, valid ppl 81.89, lr 30.00
[Epoch 60] test loss 4.35, test ppl 77.28
[Epoch 61 Batch 200/372] current loss 4.76, ppl 116.34, throughput 396.79 samples/s, lr 32.57
[Epoch 61] throughput 27113.74 samples/s
[Epoch 61] time cost 84.05s, valid loss 4.40, valid ppl 81.72, lr 30.00
[Epoch 61] test loss 4.35, test ppl 77.13
[Epoch 62 Batch 200/372] current loss 4.76, ppl 116.39, throughput 402.02 samples/s, lr 32.57
[Epoch 62] throughput 27479.31 samples/s
[Epoch 62] time cost 82.98s, valid loss 4.40, valid ppl 81.55, lr 30.00
[Epoch 62] test loss 4.34, test ppl 76.99
[Epoch 63 Batch 200/372] current loss 4.74, ppl 114.11, throughput 400.35 samples/s, lr 26.14
[Epoch 63] throughput 27004.14 samples/s
[Epoch 63] time cost 84.32s, valid loss 4.40, valid ppl 81.40, lr 30.00
[Epoch 63] test loss 4.34, test ppl 76.86
[Epoch 64 Batch 200/372] current loss 4.74, ppl 113.89, throughput 400.92 samples/s, lr 28.71
[Epoch 64] throughput 27069.68 samples/s
[Epoch 64] time cost 84.19s, valid loss 4.40, valid ppl 81.26, lr 30.00
[Epoch 64] test loss 4.34, test ppl 76.74
[Epoch 65 Batch 200/372] current loss 4.73, ppl 113.16, throughput 395.09 samples/s, lr 30.43
[Epoch 65] throughput 27300.37 samples/s
[Epoch 65] time cost 83.50s, valid loss 4.40, valid ppl 81.11, lr 30.00
[Epoch 65] test loss 4.34, test ppl 76.61
[Epoch 66 Batch 200/372] current loss 4.72, ppl 111.79, throughput 397.53 samples/s, lr 33.86
[Epoch 66] throughput 27353.17 samples/s
[Epoch 66] time cost 83.40s, valid loss 4.39, valid ppl 80.97, lr 30.00
[Epoch 66] test loss 4.34, test ppl 76.50
[Epoch 67 Batch 200/372] current loss 4.73, ppl 112.97, throughput 401.89 samples/s, lr 28.71
[Epoch 67] throughput 27274.72 samples/s
[Epoch 67] time cost 83.58s, valid loss 4.39, valid ppl 80.84, lr 30.00
[Epoch 67] test loss 4.34, test ppl 76.39
[Epoch 68 Batch 200/372] current loss 4.72, ppl 112.29, throughput 405.11 samples/s, lr 31.29
[Epoch 68] throughput 27482.87 samples/s
[Epoch 68] time cost 83.00s, valid loss 4.39, valid ppl 80.71, lr 30.00
[Epoch 68] test loss 4.33, test ppl 76.29
[Epoch 69 Batch 200/372] current loss 4.71, ppl 111.29, throughput 407.29 samples/s, lr 27.43
[Epoch 69] throughput 27169.21 samples/s
[Epoch 69] time cost 83.88s, valid loss 4.39, valid ppl 80.59, lr 30.00
[Epoch 69] test loss 4.33, test ppl 76.18
[Epoch 70 Batch 200/372] current loss 4.70, ppl 110.32, throughput 392.61 samples/s, lr 25.71
[Epoch 70] throughput 27035.18 samples/s
[Epoch 70] time cost 84.28s, valid loss 4.39, valid ppl 80.48, lr 30.00
[Epoch 70] test loss 4.33, test ppl 76.08
[Epoch 71 Batch 200/372] current loss 4.70, ppl 110.39, throughput 387.76 samples/s, lr 29.57
[Epoch 71] throughput 26835.05 samples/s
[Epoch 71] time cost 84.81s, valid loss 4.39, valid ppl 80.37, lr 30.00
[Epoch 71] test loss 4.33, test ppl 75.98
[Epoch 72 Batch 200/372] current loss 4.69, ppl 109.08, throughput 392.37 samples/s, lr 13.71
[Epoch 72] throughput 27432.08 samples/s
[Epoch 72] time cost 83.13s, valid loss 4.39, valid ppl 80.26, lr 30.00
[Epoch 72] test loss 4.33, test ppl 75.89
[Epoch 73 Batch 200/372] current loss 4.70, ppl 110.25, throughput 393.87 samples/s, lr 28.29
[Epoch 73] throughput 26991.64 samples/s
[Epoch 73] time cost 84.38s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 73] test loss 4.33, test ppl 75.79
[Epoch 74 Batch 200/372] current loss 4.69, ppl 108.62, throughput 402.37 samples/s, lr 30.86
[Epoch 74] throughput 27543.94 samples/s
[Epoch 74] time cost 82.86s, valid loss 4.38, valid ppl 80.06, lr 30.00
[Epoch 74] test loss 4.33, test ppl 75.70
[Epoch 75 Batch 200/372] current loss 4.69, ppl 108.49, throughput 398.95 samples/s, lr 28.71
[Epoch 75] throughput 27475.44 samples/s
[Epoch 75] time cost 82.99s, valid loss 4.38, valid ppl 79.96, lr 30.00
[Epoch 75] test loss 4.33, test ppl 75.61
[Epoch 76 Batch 200/372] current loss 4.68, ppl 107.66, throughput 410.75 samples/s, lr 15.86
[Epoch 76] throughput 27088.72 samples/s
[Epoch 76] time cost 84.10s, valid loss 4.38, valid ppl 79.86, lr 30.00
[Epoch 76] test loss 4.32, test ppl 75.52
[Epoch 77 Batch 200/372] current loss 4.68, ppl 107.91, throughput 394.09 samples/s, lr 29.14
[Epoch 77] throughput 27013.38 samples/s
[Epoch 77] time cost 84.32s, valid loss 4.38, valid ppl 79.77, lr 30.00
[Epoch 77] test loss 4.32, test ppl 75.44
[Epoch 78 Batch 200/372] current loss 4.66, ppl 106.07, throughput 397.17 samples/s, lr 29.57
[Epoch 78] throughput 26817.53 samples/s
[Epoch 78] time cost 84.90s, valid loss 4.38, valid ppl 79.67, lr 30.00
[Epoch 78] test loss 4.32, test ppl 75.36
[Epoch 79 Batch 200/372] current loss 4.68, ppl 107.46, throughput 401.24 samples/s, lr 27.00
[Epoch 79] throughput 27316.01 samples/s
[Epoch 79] time cost 83.49s, valid loss 4.38, valid ppl 79.58, lr 30.00
[Epoch 79] test loss 4.32, test ppl 75.28
[Epoch 80 Batch 200/372] current loss 4.68, ppl 107.34, throughput 387.84 samples/s, lr 29.57
[Epoch 80] throughput 26941.33 samples/s
[Epoch 80] time cost 84.50s, valid loss 4.38, valid ppl 79.49, lr 30.00
[Epoch 80] test loss 4.32, test ppl 75.20
[Epoch 81 Batch 200/372] current loss 4.66, ppl 105.86, throughput 397.60 samples/s, lr 32.57
[Epoch 81] throughput 27062.98 samples/s
[Epoch 81] time cost 84.13s, valid loss 4.37, valid ppl 79.41, lr 30.00
[Epoch 81] test loss 4.32, test ppl 75.12
[Epoch 82 Batch 200/372] current loss 4.65, ppl 104.90, throughput 397.62 samples/s, lr 28.29
[Epoch 82] throughput 26968.82 samples/s
[Epoch 82] time cost 84.47s, valid loss 4.37, valid ppl 79.32, lr 30.00
[Epoch 82] test loss 4.32, test ppl 75.05
[Epoch 83 Batch 200/372] current loss 4.66, ppl 105.64, throughput 394.51 samples/s, lr 27.43
[Epoch 83] throughput 26878.41 samples/s
[Epoch 83] time cost 84.70s, valid loss 4.37, valid ppl 79.24, lr 30.00
[Epoch 83] test loss 4.32, test ppl 74.97
[Epoch 84 Batch 200/372] current loss 4.66, ppl 105.46, throughput 399.21 samples/s, lr 26.57
[Epoch 84] throughput 27162.49 samples/s
[Epoch 84] time cost 83.92s, valid loss 4.37, valid ppl 79.15, lr 30.00
[Epoch 84] test loss 4.32, test ppl 74.90
[Epoch 85 Batch 200/372] current loss 4.64, ppl 103.52, throughput 402.40 samples/s, lr 32.57
[Epoch 85] throughput 27179.81 samples/s
[Epoch 85] time cost 83.89s, valid loss 4.37, valid ppl 79.07, lr 30.00
[Epoch 85] test loss 4.32, test ppl 74.83
[Epoch 86 Batch 200/372] current loss 4.64, ppl 103.53, throughput 399.44 samples/s, lr 30.86
[Epoch 86] throughput 27226.38 samples/s
[Epoch 86] time cost 83.73s, valid loss 4.37, valid ppl 78.99, lr 30.00
[Epoch 86] test loss 4.31, test ppl 74.76
[Epoch 87 Batch 200/372] current loss 4.64, ppl 103.72, throughput 406.15 samples/s, lr 29.14
[Epoch 87] throughput 27314.29 samples/s
[Epoch 87] time cost 83.45s, valid loss 4.37, valid ppl 78.91, lr 30.00
[Epoch 87] test loss 4.31, test ppl 74.70
[Epoch 88 Batch 200/372] current loss 4.64, ppl 103.69, throughput 398.82 samples/s, lr 33.00
[Epoch 88] throughput 26733.46 samples/s
[Epoch 88] time cost 85.10s, valid loss 4.37, valid ppl 78.83, lr 30.00
[Epoch 88] test loss 4.31, test ppl 74.63
[Epoch 89 Batch 200/372] current loss 4.63, ppl 102.66, throughput 392.36 samples/s, lr 27.86
[Epoch 89] throughput 26973.50 samples/s
[Epoch 89] time cost 84.44s, valid loss 4.37, valid ppl 78.76, lr 30.00
[Epoch 89] test loss 4.31, test ppl 74.56
[Epoch 90 Batch 200/372] current loss 4.63, ppl 102.68, throughput 392.96 samples/s, lr 28.71
[Epoch 90] throughput 27019.20 samples/s
[Epoch 90] time cost 84.31s, valid loss 4.37, valid ppl 78.69, lr 30.00
[Epoch 90] test loss 4.31, test ppl 74.50
[Epoch 91 Batch 200/372] current loss 4.65, ppl 104.33, throughput 395.29 samples/s, lr 15.43
[Epoch 91] throughput 26903.06 samples/s
[Epoch 91] time cost 84.65s, valid loss 4.36, valid ppl 78.61, lr 30.00
[Epoch 91] test loss 4.31, test ppl 74.43
[Epoch 92 Batch 200/372] current loss 4.61, ppl 100.56, throughput 399.54 samples/s, lr 28.71
[Epoch 92] throughput 27286.66 samples/s
[Epoch 92] time cost 83.51s, valid loss 4.36, valid ppl 78.54, lr 30.00
[Epoch 92] test loss 4.31, test ppl 74.37
[Epoch 93 Batch 200/372] current loss 4.61, ppl 100.98, throughput 404.78 samples/s, lr 13.29
[Epoch 93] throughput 27485.51 samples/s
[Epoch 93] time cost 82.95s, valid loss 4.36, valid ppl 78.47, lr 30.00
[Epoch 93] test loss 4.31, test ppl 74.31
[Epoch 94 Batch 200/372] current loss 4.62, ppl 101.73, throughput 401.17 samples/s, lr 33.43
[Epoch 94] throughput 27412.93 samples/s
[Epoch 94] time cost 83.18s, valid loss 4.36, valid ppl 78.40, lr 30.00
[Epoch 94] test loss 4.31, test ppl 74.24
[Epoch 95 Batch 200/372] current loss 4.62, ppl 101.06, throughput 398.86 samples/s, lr 28.29
[Epoch 95] throughput 27312.14 samples/s
[Epoch 95] time cost 83.53s, valid loss 4.36, valid ppl 78.33, lr 30.00
[Epoch 95] test loss 4.31, test ppl 74.18
[Epoch 96 Batch 200/372] current loss 4.61, ppl 100.45, throughput 401.32 samples/s, lr 31.71
[Epoch 96] throughput 27035.86 samples/s
[Epoch 96] time cost 84.22s, valid loss 4.36, valid ppl 78.27, lr 30.00
[Epoch 96] test loss 4.31, test ppl 74.12
[Epoch 97 Batch 200/372] current loss 4.61, ppl 100.59, throughput 403.59 samples/s, lr 34.29
[Epoch 97] throughput 27502.89 samples/s
[Epoch 97] time cost 82.95s, valid loss 4.36, valid ppl 78.21, lr 30.00
[Epoch 97] test loss 4.30, test ppl 74.07
[Epoch 98 Batch 200/372] current loss 4.61, ppl 100.44, throughput 400.75 samples/s, lr 15.43
[Epoch 98] throughput 27484.68 samples/s
[Epoch 98] time cost 83.01s, valid loss 4.36, valid ppl 78.15, lr 30.00
[Epoch 98] test loss 4.30, test ppl 74.01
[Epoch 99 Batch 200/372] current loss 4.61, ppl 100.20, throughput 406.94 samples/s, lr 28.71
[Epoch 99] throughput 27146.41 samples/s
[Epoch 99] time cost 83.90s, valid loss 4.36, valid ppl 78.09, lr 30.00
[Epoch 99] test loss 4.30, test ppl 73.95
[Epoch 100 Batch 200/372] current loss 4.60, ppl 99.91, throughput 392.21 samples/s, lr 27.00
[Epoch 100] throughput 26897.14 samples/s
[Epoch 100] time cost 84.67s, valid loss 4.36, valid ppl 78.03, lr 30.00
[Epoch 100] test loss 4.30, test ppl 73.90
[Epoch 101 Batch 200/372] current loss 4.61, ppl 100.62, throughput 404.61 samples/s, lr 17.14
[Epoch 101] throughput 27493.11 samples/s
[Epoch 101] time cost 82.99s, valid loss 4.36, valid ppl 77.97, lr 30.00
[Epoch 101] test loss 4.30, test ppl 73.84
[Epoch 102 Batch 200/372] current loss 4.60, ppl 99.02, throughput 393.88 samples/s, lr 27.43
[Epoch 102] throughput 27312.00 samples/s
[Epoch 102] time cost 83.43s, valid loss 4.36, valid ppl 77.91, lr 30.00
[Epoch 102] test loss 4.30, test ppl 73.79
[Epoch 103 Batch 200/372] current loss 4.60, ppl 99.11, throughput 398.08 samples/s, lr 28.71
[Epoch 103] throughput 27413.87 samples/s
[Epoch 103] time cost 83.21s, valid loss 4.35, valid ppl 77.85, lr 30.00
[Epoch 103] test loss 4.30, test ppl 73.74
[Epoch 104 Batch 200/372] current loss 4.59, ppl 98.09, throughput 402.52 samples/s, lr 31.29
[Epoch 104] throughput 27102.38 samples/s
[Epoch 104] time cost 84.11s, valid loss 4.35, valid ppl 77.80, lr 30.00
[Epoch 104] test loss 4.30, test ppl 73.69
[Epoch 105 Batch 200/372] current loss 4.60, ppl 99.39, throughput 399.22 samples/s, lr 27.86
[Epoch 105] throughput 26912.19 samples/s
[Epoch 105] time cost 84.62s, valid loss 4.35, valid ppl 77.74, lr 30.00
[Epoch 105] test loss 4.30, test ppl 73.64
[Epoch 106 Batch 200/372] current loss 4.58, ppl 97.25, throughput 408.70 samples/s, lr 27.86
[Epoch 106] throughput 27478.71 samples/s
[Epoch 106] time cost 83.05s, valid loss 4.35, valid ppl 77.69, lr 30.00
[Epoch 106] test loss 4.30, test ppl 73.59
[Epoch 107 Batch 200/372] current loss 4.58, ppl 97.50, throughput 395.18 samples/s, lr 24.00
[Epoch 107] throughput 27063.50 samples/s
[Epoch 107] time cost 84.20s, valid loss 4.35, valid ppl 77.63, lr 30.00
[Epoch 107] test loss 4.30, test ppl 73.54
[Epoch 108 Batch 200/372] current loss 4.57, ppl 96.70, throughput 398.60 samples/s, lr 31.29
[Epoch 108] throughput 27035.14 samples/s
[Epoch 108] time cost 84.28s, valid loss 4.35, valid ppl 77.58, lr 30.00
[Epoch 108] test loss 4.30, test ppl 73.49
[Epoch 109 Batch 200/372] current loss 4.59, ppl 98.26, throughput 394.87 samples/s, lr 30.00
[Epoch 109] throughput 26794.34 samples/s
[Epoch 109] time cost 84.92s, valid loss 4.35, valid ppl 77.53, lr 30.00
[Epoch 109] test loss 4.30, test ppl 73.45
[Epoch 110 Batch 200/372] current loss 4.60, ppl 99.19, throughput 393.30 samples/s, lr 34.71
[Epoch 110] throughput 26998.67 samples/s
[Epoch 110] time cost 84.37s, valid loss 4.35, valid ppl 77.48, lr 30.00
[Epoch 110] test loss 4.30, test ppl 73.40
[Epoch 111 Batch 200/372] current loss 4.58, ppl 97.27, throughput 406.77 samples/s, lr 32.57
[Epoch 111] throughput 27548.92 samples/s
[Epoch 111] time cost 82.77s, valid loss 4.35, valid ppl 77.43, lr 30.00
[Epoch 111] test loss 4.30, test ppl 73.36
[Epoch 112 Batch 200/372] current loss 4.57, ppl 96.79, throughput 395.06 samples/s, lr 28.71
[Epoch 112] throughput 26993.76 samples/s
[Epoch 112] time cost 84.42s, valid loss 4.35, valid ppl 77.38, lr 30.00
[Epoch 112] test loss 4.29, test ppl 73.31
[Epoch 113 Batch 200/372] current loss 4.58, ppl 97.14, throughput 406.90 samples/s, lr 27.86
[Epoch 113] throughput 27084.94 samples/s
[Epoch 113] time cost 84.11s, valid loss 4.35, valid ppl 77.33, lr 30.00
[Epoch 113] test loss 4.29, test ppl 73.27
[Epoch 114 Batch 200/372] current loss 4.55, ppl 94.89, throughput 402.36 samples/s, lr 32.57
[Epoch 114] throughput 27124.18 samples/s
[Epoch 114] time cost 84.06s, valid loss 4.35, valid ppl 77.28, lr 30.00
[Epoch 114] test loss 4.29, test ppl 73.22
[Epoch 115 Batch 200/372] current loss 4.56, ppl 96.00, throughput 392.72 samples/s, lr 31.71
[Epoch 115] throughput 27140.40 samples/s
[Epoch 115] time cost 83.98s, valid loss 4.35, valid ppl 77.23, lr 30.00
[Epoch 115] test loss 4.29, test ppl 73.18
[Epoch 116 Batch 200/372] current loss 4.56, ppl 95.62, throughput 396.01 samples/s, lr 31.29
[Epoch 116] throughput 27086.29 samples/s
[Epoch 116] time cost 84.12s, valid loss 4.35, valid ppl 77.18, lr 30.00
[Epoch 116] test loss 4.29, test ppl 73.14
[Epoch 117 Batch 200/372] current loss 4.57, ppl 96.51, throughput 396.61 samples/s, lr 28.71
[Epoch 117] throughput 26950.84 samples/s
[Epoch 117] time cost 84.62s, valid loss 4.35, valid ppl 77.14, lr 30.00
[Epoch 117] test loss 4.29, test ppl 73.10
[Epoch 118 Batch 200/372] current loss 4.56, ppl 95.22, throughput 401.45 samples/s, lr 31.29
[Epoch 118] throughput 27197.82 samples/s
[Epoch 118] time cost 83.83s, valid loss 4.34, valid ppl 77.09, lr 30.00
[Epoch 118] test loss 4.29, test ppl 73.06
[Epoch 119 Batch 200/372] current loss 4.57, ppl 96.60, throughput 394.95 samples/s, lr 29.57
[Epoch 119] throughput 27341.52 samples/s
[Epoch 119] time cost 83.36s, valid loss 4.34, valid ppl 77.05, lr 30.00
[Epoch 119] test loss 4.29, test ppl 73.02
[Epoch 120 Batch 200/372] current loss 4.56, ppl 96.05, throughput 399.93 samples/s, lr 33.86
[Epoch 120] throughput 27213.68 samples/s
[Epoch 120] time cost 83.84s, valid loss 4.34, valid ppl 77.00, lr 30.00
[Epoch 120] test loss 4.29, test ppl 72.98
[Epoch 121 Batch 200/372] current loss 4.56, ppl 95.68, throughput 396.71 samples/s, lr 29.57
[Epoch 121] throughput 26785.62 samples/s
[Epoch 121] time cost 84.98s, valid loss 4.34, valid ppl 76.96, lr 30.00
[Epoch 121] test loss 4.29, test ppl 72.94
[Epoch 122 Batch 200/372] current loss 4.54, ppl 93.71, throughput 393.10 samples/s, lr 30.00
[Epoch 122] throughput 27042.26 samples/s
[Epoch 122] time cost 84.26s, valid loss 4.34, valid ppl 76.92, lr 30.00
[Epoch 122] test loss 4.29, test ppl 72.90
[Epoch 123 Batch 200/372] current loss 4.53, ppl 93.02, throughput 404.27 samples/s, lr 30.86
[Epoch 123] throughput 27424.41 samples/s
[Epoch 123] time cost 83.15s, valid loss 4.34, valid ppl 76.87, lr 30.00
[Epoch 123] test loss 4.29, test ppl 72.86
[Epoch 124 Batch 200/372] current loss 4.55, ppl 94.45, throughput 395.10 samples/s, lr 29.57
[Epoch 124] throughput 27095.50 samples/s
[Epoch 124] time cost 84.07s, valid loss 4.34, valid ppl 76.83, lr 30.00
[Epoch 124] test loss 4.29, test ppl 72.82
[Epoch 125 Batch 200/372] current loss 4.54, ppl 94.06, throughput 389.77 samples/s, lr 27.86
[Epoch 125] throughput 27229.98 samples/s
[Epoch 125] time cost 83.76s, valid loss 4.34, valid ppl 76.79, lr 30.00
[Epoch 125] test loss 4.29, test ppl 72.79
[Epoch 126 Batch 200/372] current loss 4.55, ppl 94.56, throughput 393.07 samples/s, lr 32.14
[Epoch 126] throughput 27074.09 samples/s
[Epoch 126] time cost 84.12s, valid loss 4.34, valid ppl 76.75, lr 30.00
[Epoch 126] test loss 4.29, test ppl 72.75
[Epoch 127 Batch 200/372] current loss 4.55, ppl 94.57, throughput 395.03 samples/s, lr 31.71
[Epoch 127] throughput 26754.78 samples/s
[Epoch 127] time cost 85.01s, valid loss 4.34, valid ppl 76.71, lr 30.00
[Epoch 127] test loss 4.29, test ppl 72.72
[Epoch 128 Batch 200/372] current loss 4.54, ppl 93.28, throughput 402.22 samples/s, lr 30.00
[Epoch 128] throughput 27264.26 samples/s
[Epoch 128] time cost 83.61s, valid loss 4.34, valid ppl 76.67, lr 30.00
[Epoch 128] test loss 4.29, test ppl 72.68
[Epoch 129 Batch 200/372] current loss 4.55, ppl 94.77, throughput 405.41 samples/s, lr 32.57
[Epoch 129] throughput 27380.88 samples/s
[Epoch 129] time cost 83.23s, valid loss 4.34, valid ppl 76.63, lr 30.00
[Epoch 129] test loss 4.29, test ppl 72.64
[Epoch 130 Batch 200/372] current loss 4.54, ppl 93.75, throughput 396.84 samples/s, lr 31.29
[Epoch 130] throughput 26905.30 samples/s
[Epoch 130] time cost 84.62s, valid loss 4.34, valid ppl 76.59, lr 30.00
[Epoch 130] test loss 4.29, test ppl 72.61
[Epoch 131 Batch 200/372] current loss 4.53, ppl 93.11, throughput 402.14 samples/s, lr 14.57
[Epoch 131] throughput 27349.78 samples/s
[Epoch 131] time cost 83.35s, valid loss 4.34, valid ppl 76.55, lr 30.00
[Epoch 131] test loss 4.28, test ppl 72.58
[Epoch 132 Batch 200/372] current loss 4.54, ppl 93.46, throughput 397.77 samples/s, lr 27.00
[Epoch 132] throughput 26988.67 samples/s
[Epoch 132] time cost 84.39s, valid loss 4.34, valid ppl 76.51, lr 30.00
[Epoch 132] test loss 4.28, test ppl 72.54
[Epoch 133 Batch 200/372] current loss 4.53, ppl 92.66, throughput 402.55 samples/s, lr 29.57
[Epoch 133] throughput 27303.13 samples/s
[Epoch 133] time cost 83.61s, valid loss 4.34, valid ppl 76.47, lr 30.00
[Epoch 133] test loss 4.28, test ppl 72.51
[Epoch 134 Batch 200/372] current loss 4.54, ppl 93.44, throughput 400.05 samples/s, lr 30.43
[Epoch 134] throughput 27141.20 samples/s
[Epoch 134] time cost 83.96s, valid loss 4.34, valid ppl 76.43, lr 30.00
[Epoch 134] test loss 4.28, test ppl 72.47
[Epoch 135 Batch 200/372] current loss 4.53, ppl 93.04, throughput 392.42 samples/s, lr 28.71
[Epoch 135] throughput 27229.15 samples/s
[Epoch 135] time cost 83.73s, valid loss 4.34, valid ppl 76.40, lr 30.00
[Epoch 135] test loss 4.28, test ppl 72.44
[Epoch 136 Batch 200/372] current loss 4.53, ppl 92.41, throughput 405.28 samples/s, lr 27.00
[Epoch 136] throughput 27650.09 samples/s
[Epoch 136] time cost 82.49s, valid loss 4.34, valid ppl 76.36, lr 30.00
[Epoch 136] test loss 4.28, test ppl 72.41
[Epoch 137 Batch 200/372] current loss 4.54, ppl 93.42, throughput 410.59 samples/s, lr 29.57
[Epoch 137] throughput 27709.35 samples/s
[Epoch 137] time cost 82.41s, valid loss 4.34, valid ppl 76.33, lr 30.00
[Epoch 137] test loss 4.28, test ppl 72.37
[Epoch 138 Batch 200/372] current loss 4.52, ppl 91.69, throughput 402.96 samples/s, lr 33.00
[Epoch 138] throughput 27732.98 samples/s
[Epoch 138] time cost 82.31s, valid loss 4.33, valid ppl 76.29, lr 30.00
[Epoch 138] test loss 4.28, test ppl 72.34
[Epoch 139 Batch 200/372] current loss 4.52, ppl 91.59, throughput 405.31 samples/s, lr 29.57
[Epoch 139] throughput 27639.71 samples/s
[Epoch 139] time cost 82.55s, valid loss 4.33, valid ppl 76.26, lr 30.00
[Epoch 139] test loss 4.28, test ppl 72.31
[Epoch 140 Batch 200/372] current loss 4.53, ppl 92.87, throughput 406.54 samples/s, lr 13.71
[Epoch 140] throughput 27980.04 samples/s
[Epoch 140] time cost 81.68s, valid loss 4.33, valid ppl 76.22, lr 30.00
[Epoch 140] test loss 4.28, test ppl 72.28
[Epoch 141 Batch 200/372] current loss 4.52, ppl 91.66, throughput 412.68 samples/s, lr 30.43
[Epoch 141] throughput 27677.44 samples/s
[Epoch 141] time cost 82.44s, valid loss 4.33, valid ppl 76.19, lr 30.00
[Epoch 141] test loss 4.28, test ppl 72.25
[Epoch 142 Batch 200/372] current loss 4.51, ppl 90.96, throughput 394.58 samples/s, lr 27.86
[Epoch 142] throughput 27110.74 samples/s
[Epoch 142] time cost 84.02s, valid loss 4.33, valid ppl 76.16, lr 30.00
[Epoch 142] test loss 4.28, test ppl 72.22
[Epoch 143 Batch 200/372] current loss 4.51, ppl 91.23, throughput 403.30 samples/s, lr 26.14
[Epoch 143] throughput 27209.73 samples/s
[Epoch 143] time cost 83.81s, valid loss 4.33, valid ppl 76.12, lr 30.00
[Epoch 143] test loss 4.28, test ppl 72.19
[Epoch 144 Batch 200/372] current loss 4.51, ppl 91.23, throughput 397.78 samples/s, lr 27.00
[Epoch 144] throughput 27113.64 samples/s
[Epoch 144] time cost 84.06s, valid loss 4.33, valid ppl 76.09, lr 30.00
[Epoch 144] test loss 4.28, test ppl 72.16
[Epoch 145 Batch 200/372] current loss 4.52, ppl 91.73, throughput 402.59 samples/s, lr 32.14
[Epoch 145] throughput 27173.94 samples/s
[Epoch 145] time cost 83.85s, valid loss 4.33, valid ppl 76.06, lr 30.00
[Epoch 145] test loss 4.28, test ppl 72.14
[Epoch 146 Batch 200/372] current loss 4.51, ppl 90.94, throughput 410.96 samples/s, lr 31.29
[Epoch 146] throughput 27490.06 samples/s
[Epoch 146] time cost 82.94s, valid loss 4.33, valid ppl 76.03, lr 30.00
[Epoch 146] test loss 4.28, test ppl 72.11
[Epoch 147 Batch 200/372] current loss 4.52, ppl 91.62, throughput 405.38 samples/s, lr 31.29
[Epoch 147] throughput 27492.38 samples/s
[Epoch 147] time cost 82.99s, valid loss 4.33, valid ppl 75.99, lr 30.00
[Epoch 147] test loss 4.28, test ppl 72.08
[Epoch 148 Batch 200/372] current loss 4.51, ppl 90.50, throughput 395.22 samples/s, lr 29.57
[Epoch 148] throughput 27123.97 samples/s
[Epoch 148] time cost 84.00s, valid loss 4.33, valid ppl 75.96, lr 30.00
[Epoch 148] test loss 4.28, test ppl 72.06
[Epoch 149 Batch 200/372] current loss 4.51, ppl 90.58, throughput 404.02 samples/s, lr 29.14
[Epoch 149] throughput 27207.76 samples/s
[Epoch 149] time cost 83.92s, valid loss 4.33, valid ppl 75.93, lr 30.00
[Epoch 149] test loss 4.28, test ppl 72.03
[Epoch 150 Batch 200/372] current loss 4.53, ppl 92.46, throughput 394.02 samples/s, lr 31.71
[Epoch 150] throughput 27378.43 samples/s
[Epoch 150] time cost 83.32s, valid loss 4.33, valid ppl 75.90, lr 30.00
[Epoch 150] test loss 4.28, test ppl 72.00
[Epoch 151 Batch 200/372] current loss 4.50, ppl 90.41, throughput 415.06 samples/s, lr 31.29
[Epoch 151] throughput 27405.17 samples/s
[Epoch 151] time cost 83.22s, valid loss 4.33, valid ppl 75.87, lr 30.00
[Epoch 151] test loss 4.28, test ppl 71.98
[Epoch 152 Batch 200/372] current loss 4.50, ppl 90.15, throughput 403.33 samples/s, lr 30.43
[Epoch 152] throughput 27559.63 samples/s
[Epoch 152] time cost 82.80s, valid loss 4.33, valid ppl 75.85, lr 30.00
[Epoch 152] test loss 4.28, test ppl 71.95
[Epoch 153 Batch 200/372] current loss 4.50, ppl 89.95, throughput 405.04 samples/s, lr 30.86
[Epoch 153] throughput 27564.47 samples/s
[Epoch 153] time cost 82.80s, valid loss 4.33, valid ppl 75.82, lr 30.00
[Epoch 153] test loss 4.28, test ppl 71.93
[Epoch 154 Batch 200/372] current loss 4.50, ppl 90.07, throughput 398.45 samples/s, lr 33.00
[Epoch 154] throughput 27395.90 samples/s
[Epoch 154] time cost 83.23s, valid loss 4.33, valid ppl 75.79, lr 30.00
[Epoch 154] test loss 4.28, test ppl 71.91
[Epoch 155 Batch 200/372] current loss 4.50, ppl 89.86, throughput 409.15 samples/s, lr 30.86
[Epoch 155] throughput 27385.21 samples/s
[Epoch 155] time cost 83.21s, valid loss 4.33, valid ppl 75.76, lr 30.00
[Epoch 155] test loss 4.28, test ppl 71.88
[Epoch 156 Batch 200/372] current loss 4.50, ppl 90.30, throughput 395.13 samples/s, lr 30.43
[Epoch 156] throughput 27205.60 samples/s
[Epoch 156] time cost 83.76s, valid loss 4.33, valid ppl 75.73, lr 30.00
[Epoch 156] test loss 4.27, test ppl 71.86
[Epoch 157 Batch 200/372] current loss 4.50, ppl 90.20, throughput 400.03 samples/s, lr 27.43
[Epoch 157] throughput 27171.44 samples/s
[Epoch 157] time cost 83.85s, valid loss 4.33, valid ppl 75.70, lr 30.00
[Epoch 157] test loss 4.27, test ppl 71.83
[Epoch 158 Batch 200/372] current loss 4.50, ppl 89.64, throughput 404.46 samples/s, lr 33.43
[Epoch 158] throughput 27265.27 samples/s
[Epoch 158] time cost 83.63s, valid loss 4.33, valid ppl 75.68, lr 30.00
[Epoch 158] test loss 4.27, test ppl 71.81
[Epoch 159 Batch 200/372] current loss 4.49, ppl 89.11, throughput 397.99 samples/s, lr 27.43
[Epoch 159] throughput 27303.98 samples/s
[Epoch 159] time cost 83.51s, valid loss 4.33, valid ppl 75.65, lr 30.00
[Epoch 159] test loss 4.27, test ppl 71.78
[Epoch 160 Batch 200/372] current loss 4.48, ppl 88.17, throughput 403.80 samples/s, lr 27.86
[Epoch 160] throughput 27397.95 samples/s
[Epoch 160] time cost 83.23s, valid loss 4.33, valid ppl 75.62, lr 30.00
[Epoch 160] test loss 4.27, test ppl 71.76
[Epoch 161 Batch 200/372] current loss 4.48, ppl 88.11, throughput 398.93 samples/s, lr 30.43
[Epoch 161] throughput 27237.45 samples/s
[Epoch 161] time cost 83.65s, valid loss 4.33, valid ppl 75.59, lr 30.00
[Epoch 161] test loss 4.27, test ppl 71.73
[Epoch 162 Batch 200/372] current loss 4.49, ppl 88.78, throughput 394.51 samples/s, lr 29.14
[Epoch 162] throughput 27008.46 samples/s
[Epoch 162] time cost 84.42s, valid loss 4.33, valid ppl 75.57, lr 30.00
[Epoch 162] test loss 4.27, test ppl 71.71
[Epoch 163 Batch 200/372] current loss 4.49, ppl 88.80, throughput 402.74 samples/s, lr 28.71
[Epoch 163] throughput 27448.41 samples/s
[Epoch 163] time cost 83.01s, valid loss 4.32, valid ppl 75.54, lr 30.00
[Epoch 163] test loss 4.27, test ppl 71.69
[Epoch 164 Batch 200/372] current loss 4.50, ppl 89.71, throughput 398.47 samples/s, lr 30.43
[Epoch 164] throughput 26965.67 samples/s
[Epoch 164] time cost 84.47s, valid loss 4.32, valid ppl 75.51, lr 30.00
[Epoch 164] test loss 4.27, test ppl 71.67
[Epoch 165 Batch 200/372] current loss 4.49, ppl 88.89, throughput 395.28 samples/s, lr 29.14
[Epoch 165] throughput 27126.82 samples/s
[Epoch 165] time cost 83.91s, valid loss 4.32, valid ppl 75.49, lr 30.00
[Epoch 165] test loss 4.27, test ppl 71.64
[Epoch 166 Batch 200/372] current loss 4.49, ppl 88.78, throughput 395.60 samples/s, lr 29.57
[Epoch 166] throughput 27565.97 samples/s
[Epoch 166] time cost 82.73s, valid loss 4.32, valid ppl 75.46, lr 30.00
[Epoch 166] test loss 4.27, test ppl 71.62
[Epoch 167 Batch 200/372] current loss 4.48, ppl 88.28, throughput 393.92 samples/s, lr 34.71
[Epoch 167] throughput 27118.19 samples/s
[Epoch 167] time cost 83.97s, valid loss 4.32, valid ppl 75.44, lr 30.00
[Epoch 167] test loss 4.27, test ppl 71.60
[Epoch 168 Batch 200/372] current loss 4.48, ppl 88.53, throughput 399.19 samples/s, lr 29.57
[Epoch 168] throughput 27068.53 samples/s
[Epoch 168] time cost 84.09s, valid loss 4.32, valid ppl 75.41, lr 30.00
[Epoch 168] test loss 4.27, test ppl 71.58
[Epoch 169 Batch 200/372] current loss 4.49, ppl 89.02, throughput 398.23 samples/s, lr 28.71
[Epoch 169] throughput 27492.33 samples/s
[Epoch 169] time cost 82.94s, valid loss 4.32, valid ppl 75.39, lr 30.00
[Epoch 169] test loss 4.27, test ppl 71.56
[Epoch 170 Batch 200/372] current loss 4.48, ppl 88.04, throughput 395.03 samples/s, lr 30.00
[Epoch 170] throughput 27139.57 samples/s
[Epoch 170] time cost 83.93s, valid loss 4.32, valid ppl 75.36, lr 30.00
[Epoch 170] test loss 4.27, test ppl 71.54
[Epoch 171 Batch 200/372] current loss 4.48, ppl 88.52, throughput 396.14 samples/s, lr 32.14
[Epoch 171] throughput 27214.95 samples/s
[Epoch 171] time cost 83.70s, valid loss 4.32, valid ppl 75.34, lr 30.00
[Epoch 171] test loss 4.27, test ppl 71.52
[Epoch 172 Batch 200/372] current loss 4.46, ppl 86.55, throughput 395.02 samples/s, lr 29.14
[Epoch 172] throughput 26935.67 samples/s
[Epoch 172] time cost 84.57s, valid loss 4.32, valid ppl 75.31, lr 30.00
[Epoch 172] test loss 4.27, test ppl 71.50
[Epoch 173 Batch 200/372] current loss 4.48, ppl 87.84, throughput 390.93 samples/s, lr 31.29
[Epoch 173] throughput 27165.39 samples/s
[Epoch 173] time cost 83.85s, valid loss 4.32, valid ppl 75.29, lr 30.00
[Epoch 173] test loss 4.27, test ppl 71.48
[Epoch 174 Batch 200/372] current loss 4.48, ppl 88.38, throughput 403.58 samples/s, lr 29.14
[Epoch 174] throughput 27546.52 samples/s
[Epoch 174] time cost 82.88s, valid loss 4.32, valid ppl 75.26, lr 30.00
[Epoch 174] test loss 4.27, test ppl 71.46
[Epoch 175 Batch 200/372] current loss 4.48, ppl 88.61, throughput 400.21 samples/s, lr 33.00
[Epoch 175] throughput 27229.15 samples/s
[Epoch 175] time cost 83.65s, valid loss 4.32, valid ppl 75.24, lr 30.00
[Epoch 175] test loss 4.27, test ppl 71.44
[Epoch 176 Batch 200/372] current loss 4.46, ppl 86.47, throughput 397.55 samples/s, lr 35.14
[Epoch 176] throughput 27078.63 samples/s
[Epoch 176] time cost 84.12s, valid loss 4.32, valid ppl 75.21, lr 30.00
[Epoch 176] test loss 4.27, test ppl 71.42
[Epoch 177 Batch 200/372] current loss 4.46, ppl 86.84, throughput 403.92 samples/s, lr 28.29
[Epoch 177] throughput 27459.02 samples/s
[Epoch 177] time cost 82.96s, valid loss 4.32, valid ppl 75.19, lr 30.00
[Epoch 177] test loss 4.27, test ppl 71.40
[Epoch 178 Batch 200/372] current loss 4.46, ppl 86.41, throughput 394.64 samples/s, lr 28.29
[Epoch 178] throughput 27016.64 samples/s
[Epoch 178] time cost 84.23s, valid loss 4.32, valid ppl 75.17, lr 30.00
[Epoch 178] test loss 4.27, test ppl 71.38
[Epoch 179 Batch 200/372] current loss 4.47, ppl 87.11, throughput 399.25 samples/s, lr 30.00
[Epoch 179] throughput 27027.39 samples/s
[Epoch 179] time cost 84.24s, valid loss 4.32, valid ppl 75.14, lr 30.00
[Epoch 179] test loss 4.27, test ppl 71.35
[Epoch 180 Batch 200/372] current loss 4.48, ppl 87.99, throughput 399.02 samples/s, lr 29.57
[Epoch 180] throughput 27195.42 samples/s
[Epoch 180] time cost 83.74s, valid loss 4.32, valid ppl 75.12, lr 30.00
[Epoch 180] test loss 4.27, test ppl 71.33
[Epoch 181 Batch 200/372] current loss 4.47, ppl 87.36, throughput 410.59 samples/s, lr 30.43
[Epoch 181] throughput 27314.89 samples/s
[Epoch 181] time cost 83.48s, valid loss 4.32, valid ppl 75.10, lr 30.00
[Epoch 181] test loss 4.27, test ppl 71.31
[Epoch 182 Batch 200/372] current loss 4.47, ppl 87.01, throughput 403.44 samples/s, lr 28.71
[Epoch 182] throughput 27645.99 samples/s
[Epoch 182] time cost 82.48s, valid loss 4.32, valid ppl 75.07, lr 30.00
[Epoch 182] test loss 4.27, test ppl 71.30
[Epoch 183 Batch 200/372] current loss 4.48, ppl 88.02, throughput 402.73 samples/s, lr 27.86
[Epoch 183] throughput 27634.59 samples/s
[Epoch 183] time cost 82.53s, valid loss 4.32, valid ppl 75.05, lr 30.00
[Epoch 183] test loss 4.27, test ppl 71.28
[Epoch 184 Batch 200/372] current loss 4.47, ppl 87.58, throughput 409.66 samples/s, lr 30.43
[Epoch 184] throughput 27760.38 samples/s
[Epoch 184] time cost 82.25s, valid loss 4.32, valid ppl 75.03, lr 30.00
[Epoch 184] test loss 4.27, test ppl 71.26
[Epoch 185 Batch 200/372] current loss 4.47, ppl 86.93, throughput 404.68 samples/s, lr 33.00
[Epoch 185] throughput 27517.99 samples/s
[Epoch 185] time cost 82.84s, valid loss 4.32, valid ppl 75.00, lr 30.00
[Epoch 185] test loss 4.27, test ppl 71.24
[Epoch 186 Batch 200/372] current loss 4.47, ppl 87.17, throughput 409.79 samples/s, lr 30.86
[Epoch 186] throughput 27749.25 samples/s
[Epoch 186] time cost 82.17s, valid loss 4.32, valid ppl 74.98, lr 30.00
[Epoch 186] test loss 4.27, test ppl 71.22
[Epoch 187 Batch 200/372] current loss 4.47, ppl 87.55, throughput 403.36 samples/s, lr 15.43
[Epoch 187] throughput 27177.84 samples/s
[Epoch 187] time cost 83.79s, valid loss 4.32, valid ppl 74.96, lr 30.00
[Epoch 187] test loss 4.27, test ppl 71.20
[Epoch 188 Batch 200/372] current loss 4.44, ppl 84.77, throughput 409.04 samples/s, lr 28.29
[Epoch 188] throughput 27656.31 samples/s
[Epoch 188] time cost 82.47s, valid loss 4.32, valid ppl 74.94, lr 30.00
[Epoch 188] test loss 4.27, test ppl 71.18
[Epoch 189 Batch 200/372] current loss 4.47, ppl 87.10, throughput 391.62 samples/s, lr 27.43
[Epoch 189] throughput 27204.49 samples/s
[Epoch 189] time cost 83.71s, valid loss 4.32, valid ppl 74.92, lr 30.00
[Epoch 189] test loss 4.26, test ppl 71.16
[Epoch 190 Batch 200/372] current loss 4.45, ppl 85.22, throughput 408.51 samples/s, lr 28.29
[Epoch 190] throughput 27446.09 samples/s
[Epoch 190] time cost 83.25s, valid loss 4.32, valid ppl 74.90, lr 30.00
[Epoch 190] test loss 4.26, test ppl 71.14
[Epoch 191 Batch 200/372] current loss 4.45, ppl 85.63, throughput 407.86 samples/s, lr 28.29
[Epoch 191] throughput 27496.12 samples/s
[Epoch 191] time cost 82.90s, valid loss 4.32, valid ppl 74.87, lr 30.00
[Epoch 191] test loss 4.26, test ppl 71.13
[Epoch 192 Batch 200/372] current loss 4.47, ppl 87.08, throughput 414.06 samples/s, lr 30.86
[Epoch 192] throughput 27669.08 samples/s
[Epoch 192] time cost 82.43s, valid loss 4.32, valid ppl 74.85, lr 30.00
[Epoch 192] test loss 4.26, test ppl 71.11
[Epoch 193 Batch 200/372] current loss 4.45, ppl 86.03, throughput 407.90 samples/s, lr 31.29
[Epoch 193] throughput 27436.41 samples/s
[Epoch 193] time cost 83.09s, valid loss 4.32, valid ppl 74.83, lr 30.00
[Epoch 193] test loss 4.26, test ppl 71.09
[Epoch 194 Batch 200/372] current loss 4.44, ppl 85.03, throughput 408.76 samples/s, lr 34.71
[Epoch 194] throughput 27788.74 samples/s
[Epoch 194] time cost 82.03s, valid loss 4.31, valid ppl 74.81, lr 30.00
[Epoch 194] test loss 4.26, test ppl 71.07
[Epoch 195 Batch 200/372] current loss 4.45, ppl 85.94, throughput 407.11 samples/s, lr 28.71
[Epoch 195] throughput 27663.72 samples/s
[Epoch 195] time cost 82.49s, valid loss 4.31, valid ppl 74.79, lr 30.00
[Epoch 195] test loss 4.26, test ppl 71.06
[Epoch 196 Batch 200/372] current loss 4.46, ppl 86.45, throughput 406.84 samples/s, lr 30.00
[Epoch 196] throughput 27546.40 samples/s
[Epoch 196] time cost 82.80s, valid loss 4.31, valid ppl 74.77, lr 30.00
[Epoch 196] test loss 4.26, test ppl 71.04
[Epoch 197 Batch 200/372] current loss 4.45, ppl 86.03, throughput 403.65 samples/s, lr 15.43
[Epoch 197] throughput 27571.09 samples/s
[Epoch 197] time cost 82.72s, valid loss 4.31, valid ppl 74.75, lr 30.00
[Epoch 197] test loss 4.26, test ppl 71.02
[Epoch 198 Batch 200/372] current loss 4.47, ppl 87.10, throughput 409.34 samples/s, lr 30.43
[Epoch 198] throughput 27764.88 samples/s
[Epoch 198] time cost 82.30s, valid loss 4.31, valid ppl 74.73, lr 30.00
[Epoch 198] test loss 4.26, test ppl 71.00
[Epoch 199 Batch 200/372] current loss 4.45, ppl 85.56, throughput 406.29 samples/s, lr 30.00
[Epoch 199] throughput 27616.48 samples/s
[Epoch 199] time cost 82.55s, valid loss 4.31, valid ppl 74.71, lr 30.00
[Epoch 199] test loss 4.26, test ppl 70.99
[Epoch 200 Batch 200/372] current loss 4.45, ppl 85.65, throughput 409.13 samples/s, lr 30.86
[Epoch 200] throughput 27609.83 samples/s
[Epoch 200] time cost 82.59s, valid loss 4.31, valid ppl 74.69, lr 30.00
[Epoch 200] test loss 4.26, test ppl 70.97
[Epoch 201 Batch 200/372] current loss 4.44, ppl 84.91, throughput 406.30 samples/s, lr 30.00
[Epoch 201] throughput 27573.07 samples/s
[Epoch 201] time cost 82.68s, valid loss 4.31, valid ppl 74.67, lr 30.00
[Epoch 201] test loss 4.26, test ppl 70.95
[Epoch 202 Batch 200/372] current loss 4.46, ppl 86.18, throughput 407.30 samples/s, lr 30.00
[Epoch 202] throughput 27643.03 samples/s
[Epoch 202] time cost 82.60s, valid loss 4.31, valid ppl 74.65, lr 30.00
[Epoch 202] test loss 4.26, test ppl 70.94
[Epoch 203 Batch 200/372] current loss 4.45, ppl 85.39, throughput 405.13 samples/s, lr 28.29
[Epoch 203] throughput 27560.48 samples/s
[Epoch 203] time cost 82.62s, valid loss 4.31, valid ppl 74.63, lr 30.00
[Epoch 203] test loss 4.26, test ppl 70.92
[Epoch 204 Batch 200/372] current loss 4.45, ppl 85.76, throughput 402.55 samples/s, lr 28.29
[Epoch 204] throughput 27477.28 samples/s
[Epoch 204] time cost 82.92s, valid loss 4.31, valid ppl 74.61, lr 30.00
[Epoch 204] test loss 4.26, test ppl 70.91
[Epoch 205 Batch 200/372] current loss 4.45, ppl 85.92, throughput 402.70 samples/s, lr 32.14
[Epoch 205] throughput 27517.07 samples/s
[Epoch 205] time cost 82.83s, valid loss 4.31, valid ppl 74.59, lr 30.00
[Epoch 205] test loss 4.26, test ppl 70.89
[Epoch 206 Batch 200/372] current loss 4.46, ppl 86.10, throughput 401.54 samples/s, lr 28.29
[Epoch 206] throughput 27406.47 samples/s
[Epoch 206] time cost 83.10s, valid loss 4.31, valid ppl 74.58, lr 30.00
[Epoch 206] test loss 4.26, test ppl 70.88
[Epoch 207 Batch 200/372] current loss 4.45, ppl 85.51, throughput 399.48 samples/s, lr 30.43
[Epoch 207] throughput 27545.56 samples/s
[Epoch 207] time cost 82.72s, valid loss 4.31, valid ppl 74.56, lr 30.00
[Epoch 207] test loss 4.26, test ppl 70.86
[Epoch 208 Batch 200/372] current loss 4.45, ppl 85.94, throughput 404.77 samples/s, lr 33.43
[Epoch 208] throughput 27281.13 samples/s
[Epoch 208] time cost 83.51s, valid loss 4.31, valid ppl 74.54, lr 30.00
[Epoch 208] test loss 4.26, test ppl 70.85
[Epoch 209 Batch 200/372] current loss 4.44, ppl 84.43, throughput 404.96 samples/s, lr 34.71
[Epoch 209] throughput 27470.99 samples/s
[Epoch 209] time cost 82.93s, valid loss 4.31, valid ppl 74.52, lr 30.00
[Epoch 209] test loss 4.26, test ppl 70.83
[Epoch 210 Batch 200/372] current loss 4.45, ppl 85.87, throughput 402.33 samples/s, lr 31.29
[Epoch 210] throughput 27457.94 samples/s
[Epoch 210] time cost 82.94s, valid loss 4.31, valid ppl 74.50, lr 30.00
[Epoch 210] test loss 4.26, test ppl 70.82
[Epoch 211 Batch 200/372] current loss 4.45, ppl 85.81, throughput 410.75 samples/s, lr 28.29
[Epoch 211] throughput 27630.89 samples/s
[Epoch 211] time cost 82.49s, valid loss 4.31, valid ppl 74.48, lr 30.00
[Epoch 211] test loss 4.26, test ppl 70.80
[Epoch 212 Batch 200/372] current loss 4.45, ppl 85.49, throughput 404.62 samples/s, lr 29.57
[Epoch 212] throughput 27646.39 samples/s
[Epoch 212] time cost 82.50s, valid loss 4.31, valid ppl 74.47, lr 30.00
[Epoch 212] test loss 4.26, test ppl 70.79
[Epoch 213 Batch 200/372] current loss 4.44, ppl 84.45, throughput 404.74 samples/s, lr 13.29
[Epoch 213] throughput 27544.15 samples/s
[Epoch 213] time cost 82.78s, valid loss 4.31, valid ppl 74.45, lr 30.00
[Epoch 213] test loss 4.26, test ppl 70.77
[Epoch 214 Batch 200/372] current loss 4.45, ppl 85.85, throughput 397.14 samples/s, lr 31.29
[Epoch 214] throughput 27351.94 samples/s
[Epoch 214] time cost 83.30s, valid loss 4.31, valid ppl 74.43, lr 30.00
[Epoch 214] test loss 4.26, test ppl 70.75
[Epoch 215 Batch 200/372] current loss 4.44, ppl 84.49, throughput 396.59 samples/s, lr 29.14
[Epoch 215] throughput 27324.09 samples/s
[Epoch 215] time cost 83.31s, valid loss 4.31, valid ppl 74.41, lr 30.00
[Epoch 215] test loss 4.26, test ppl 70.74
[Epoch 216 Batch 200/372] current loss 4.45, ppl 85.21, throughput 410.10 samples/s, lr 32.14
[Epoch 216] throughput 27784.50 samples/s
[Epoch 216] time cost 82.23s, valid loss 4.31, valid ppl 74.40, lr 30.00
[Epoch 216] test loss 4.26, test ppl 70.72
[Epoch 217 Batch 200/372] current loss 4.43, ppl 83.85, throughput 404.94 samples/s, lr 28.71
[Epoch 217] throughput 27815.28 samples/s
[Epoch 217] time cost 82.02s, valid loss 4.31, valid ppl 74.38, lr 30.00
[Epoch 217] test loss 4.26, test ppl 70.71
[Epoch 218 Batch 200/372] current loss 4.42, ppl 83.16, throughput 407.01 samples/s, lr 30.86
[Epoch 218] throughput 27634.95 samples/s
[Epoch 218] time cost 82.46s, valid loss 4.31, valid ppl 74.36, lr 30.00
[Epoch 218] test loss 4.26, test ppl 70.69
[Epoch 219 Batch 200/372] current loss 4.44, ppl 84.59, throughput 410.84 samples/s, lr 29.14
[Epoch 219] throughput 27487.45 samples/s
[Epoch 219] time cost 82.89s, valid loss 4.31, valid ppl 74.34, lr 30.00
[Epoch 219] test loss 4.26, test ppl 70.68
[Epoch 220 Batch 200/372] current loss 4.44, ppl 84.44, throughput 397.58 samples/s, lr 29.57
[Epoch 220] throughput 27467.03 samples/s
[Epoch 220] time cost 82.95s, valid loss 4.31, valid ppl 74.32, lr 30.00
[Epoch 220] test loss 4.26, test ppl 70.66
[Epoch 221 Batch 200/372] current loss 4.43, ppl 84.03, throughput 404.66 samples/s, lr 28.71
[Epoch 221] throughput 27450.52 samples/s
[Epoch 221] time cost 83.02s, valid loss 4.31, valid ppl 74.31, lr 30.00
[Epoch 221] test loss 4.26, test ppl 70.65
[Epoch 222 Batch 200/372] current loss 4.44, ppl 84.56, throughput 403.98 samples/s, lr 29.57
[Epoch 222] throughput 27764.46 samples/s
[Epoch 222] time cost 82.11s, valid loss 4.31, valid ppl 74.29, lr 30.00
[Epoch 222] test loss 4.26, test ppl 70.63
[Epoch 223 Batch 200/372] current loss 4.43, ppl 84.12, throughput 400.06 samples/s, lr 30.00
[Epoch 223] throughput 27678.05 samples/s
[Epoch 223] time cost 82.35s, valid loss 4.31, valid ppl 74.27, lr 30.00
[Epoch 223] test loss 4.26, test ppl 70.62
[Epoch 224 Batch 200/372] current loss 4.43, ppl 83.70, throughput 407.91 samples/s, lr 28.71
[Epoch 224] throughput 27563.47 samples/s
[Epoch 224] time cost 82.72s, valid loss 4.31, valid ppl 74.26, lr 30.00
[Epoch 224] test loss 4.26, test ppl 70.61
[Epoch 225 Batch 200/372] current loss 4.43, ppl 83.84, throughput 405.59 samples/s, lr 26.57
[Epoch 225] throughput 27700.91 samples/s
[Epoch 225] time cost 82.33s, valid loss 4.31, valid ppl 74.24, lr 30.00
[Epoch 225] test loss 4.26, test ppl 70.59
[Epoch 226 Batch 200/372] current loss 4.44, ppl 84.41, throughput 407.69 samples/s, lr 28.71
[Epoch 226] throughput 27701.66 samples/s
[Epoch 226] time cost 82.30s, valid loss 4.31, valid ppl 74.22, lr 30.00
[Epoch 226] test loss 4.26, test ppl 70.58
[Epoch 227 Batch 200/372] current loss 4.43, ppl 83.63, throughput 401.13 samples/s, lr 28.29
[Epoch 227] throughput 27473.19 samples/s
[Epoch 227] time cost 82.97s, valid loss 4.31, valid ppl 74.21, lr 30.00
[Epoch 227] test loss 4.26, test ppl 70.56
[Epoch 228 Batch 200/372] current loss 4.43, ppl 84.00, throughput 403.45 samples/s, lr 29.14
[Epoch 228] throughput 27254.16 samples/s
[Epoch 228] time cost 83.58s, valid loss 4.31, valid ppl 74.19, lr 30.00
[Epoch 228] test loss 4.26, test ppl 70.55
[Epoch 229 Batch 200/372] current loss 4.41, ppl 82.30, throughput 409.42 samples/s, lr 28.71
[Epoch 229] throughput 27496.05 samples/s
[Epoch 229] time cost 82.87s, valid loss 4.31, valid ppl 74.18, lr 30.00
[Epoch 229] test loss 4.26, test ppl 70.54
[Epoch 230 Batch 200/372] current loss 4.43, ppl 83.84, throughput 395.11 samples/s, lr 29.14
[Epoch 230] throughput 27338.18 samples/s
[Epoch 230] time cost 83.33s, valid loss 4.31, valid ppl 74.16, lr 30.00
[Epoch 230] test loss 4.26, test ppl 70.52
[Epoch 231 Batch 200/372] current loss 4.44, ppl 84.58, throughput 394.25 samples/s, lr 27.43
[Epoch 231] throughput 27146.94 samples/s
[Epoch 231] time cost 83.79s, valid loss 4.31, valid ppl 74.14, lr 30.00
[Epoch 231] test loss 4.26, test ppl 70.51
[Epoch 232 Batch 200/372] current loss 4.42, ppl 83.47, throughput 405.11 samples/s, lr 34.71
[Epoch 232] throughput 27429.10 samples/s
[Epoch 232] time cost 83.04s, valid loss 4.31, valid ppl 74.13, lr 30.00
[Epoch 232] test loss 4.26, test ppl 70.50
[Epoch 233 Batch 200/372] current loss 4.43, ppl 84.24, throughput 400.09 samples/s, lr 30.86
[Epoch 233] throughput 27110.02 samples/s
[Epoch 233] time cost 83.94s, valid loss 4.31, valid ppl 74.11, lr 30.00
[Epoch 233] test loss 4.26, test ppl 70.49
[Epoch 234 Batch 200/372] current loss 4.42, ppl 83.09, throughput 406.05 samples/s, lr 30.00
[Epoch 234] throughput 27323.97 samples/s
[Epoch 234] time cost 83.30s, valid loss 4.31, valid ppl 74.10, lr 30.00
[Epoch 234] test loss 4.26, test ppl 70.47
[Epoch 235 Batch 200/372] current loss 4.44, ppl 84.62, throughput 397.32 samples/s, lr 27.86
[Epoch 235] throughput 27210.66 samples/s
[Epoch 235] time cost 83.78s, valid loss 4.31, valid ppl 74.08, lr 30.00
[Epoch 235] test loss 4.26, test ppl 70.46
[Epoch 236 Batch 200/372] current loss 4.40, ppl 81.27, throughput 397.99 samples/s, lr 28.29
[Epoch 236] throughput 27083.09 samples/s
[Epoch 236] time cost 84.01s, valid loss 4.30, valid ppl 74.06, lr 30.00
[Epoch 236] test loss 4.25, test ppl 70.45
[Epoch 237 Batch 200/372] current loss 4.42, ppl 82.99, throughput 398.61 samples/s, lr 25.29
[Epoch 237] throughput 27149.89 samples/s
[Epoch 237] time cost 83.79s, valid loss 4.30, valid ppl 74.05, lr 30.00
[Epoch 237] test loss 4.25, test ppl 70.43
[Epoch 238 Batch 200/372] current loss 4.43, ppl 83.67, throughput 396.44 samples/s, lr 27.43
[Epoch 238] throughput 27288.32 samples/s
[Epoch 238] time cost 83.45s, valid loss 4.30, valid ppl 74.03, lr 30.00
[Epoch 238] test loss 4.25, test ppl 70.42
[Epoch 239 Batch 200/372] current loss 4.42, ppl 82.91, throughput 399.28 samples/s, lr 29.57
[Epoch 239] throughput 27172.74 samples/s
[Epoch 239] time cost 83.80s, valid loss 4.30, valid ppl 74.02, lr 30.00
[Epoch 239] test loss 4.25, test ppl 70.41
[Epoch 240 Batch 200/372] current loss 4.41, ppl 82.64, throughput 395.49 samples/s, lr 33.00
[Epoch 240] throughput 27170.79 samples/s
[Epoch 240] time cost 83.76s, valid loss 4.30, valid ppl 74.00, lr 30.00
[Epoch 240] test loss 4.25, test ppl 70.39
[Epoch 241 Batch 200/372] current loss 4.41, ppl 81.91, throughput 399.64 samples/s, lr 28.29
[Epoch 241] throughput 27092.95 samples/s
[Epoch 241] time cost 84.02s, valid loss 4.30, valid ppl 73.99, lr 30.00
[Epoch 241] test loss 4.25, test ppl 70.38
[Epoch 242 Batch 200/372] current loss 4.41, ppl 82.18, throughput 404.78 samples/s, lr 30.00
[Epoch 242] throughput 27617.68 samples/s
[Epoch 242] time cost 82.52s, valid loss 4.30, valid ppl 73.98, lr 30.00
[Epoch 242] test loss 4.25, test ppl 70.37
[Epoch 243 Batch 200/372] current loss 4.42, ppl 83.04, throughput 397.11 samples/s, lr 27.43
[Epoch 243] throughput 27193.15 samples/s
[Epoch 243] time cost 83.72s, valid loss 4.30, valid ppl 73.96, lr 30.00
[Epoch 243] test loss 4.25, test ppl 70.36
[Epoch 244 Batch 200/372] current loss 4.42, ppl 83.46, throughput 403.71 samples/s, lr 31.29
[Epoch 244] throughput 27283.62 samples/s
[Epoch 244] time cost 83.47s, valid loss 4.30, valid ppl 73.95, lr 30.00
[Epoch 244] test loss 4.25, test ppl 70.35
[Epoch 245 Batch 200/372] current loss 4.42, ppl 82.78, throughput 397.51 samples/s, lr 30.86
[Epoch 245] throughput 27109.35 samples/s
[Epoch 245] time cost 83.94s, valid loss 4.30, valid ppl 73.93, lr 30.00
[Epoch 245] test loss 4.25, test ppl 70.33
[Epoch 246 Batch 200/372] current loss 4.41, ppl 82.63, throughput 395.48 samples/s, lr 31.71
[Epoch 246] throughput 27163.51 samples/s
[Epoch 246] time cost 83.84s, valid loss 4.30, valid ppl 73.92, lr 30.00
[Epoch 246] test loss 4.25, test ppl 70.32
[Epoch 247 Batch 200/372] current loss 4.40, ppl 81.75, throughput 409.69 samples/s, lr 30.86
[Epoch 247] throughput 27486.74 samples/s
[Epoch 247] time cost 82.89s, valid loss 4.30, valid ppl 73.91, lr 30.00
[Epoch 247] test loss 4.25, test ppl 70.31
[Epoch 248 Batch 200/372] current loss 4.41, ppl 82.19, throughput 404.04 samples/s, lr 29.57
[Epoch 248] throughput 26861.76 samples/s
[Epoch 248] time cost 84.68s, valid loss 4.30, valid ppl 73.89, lr 30.00
[Epoch 248] test loss 4.25, test ppl 70.30
[Epoch 249 Batch 200/372] current loss 4.42, ppl 83.35, throughput 406.14 samples/s, lr 29.57
[Epoch 249] throughput 27259.46 samples/s
[Epoch 249] time cost 83.51s, valid loss 4.30, valid ppl 73.88, lr 30.00
[Epoch 249] test loss 4.25, test ppl 70.29
[Epoch 250 Batch 200/372] current loss 4.41, ppl 82.64, throughput 409.65 samples/s, lr 26.57
[Epoch 250] throughput 27659.71 samples/s
[Epoch 250] time cost 82.39s, valid loss 4.30, valid ppl 73.87, lr 30.00
[Epoch 250] test loss 4.25, test ppl 70.28
[Epoch 251 Batch 200/372] current loss 4.41, ppl 82.25, throughput 400.08 samples/s, lr 27.86
[Epoch 251] throughput 27324.25 samples/s
[Epoch 251] time cost 83.34s, valid loss 4.30, valid ppl 73.85, lr 30.00
[Epoch 251] test loss 4.25, test ppl 70.27
[Epoch 252 Batch 200/372] current loss 4.42, ppl 83.03, throughput 393.76 samples/s, lr 31.71
[Epoch 252] throughput 27074.34 samples/s
[Epoch 252] time cost 84.10s, valid loss 4.30, valid ppl 73.84, lr 30.00
[Epoch 252] test loss 4.25, test ppl 70.25
[Epoch 253 Batch 200/372] current loss 4.42, ppl 82.68, throughput 404.82 samples/s, lr 29.14
[Epoch 253] throughput 27216.21 samples/s
[Epoch 253] time cost 83.63s, valid loss 4.30, valid ppl 73.82, lr 30.00
[Epoch 253] test loss 4.25, test ppl 70.24
[Epoch 254 Batch 200/372] current loss 4.41, ppl 81.87, throughput 408.63 samples/s, lr 30.00
[Epoch 254] throughput 27601.90 samples/s
[Epoch 254] time cost 82.60s, valid loss 4.30, valid ppl 73.81, lr 30.00
[Epoch 254] test loss 4.25, test ppl 70.23
[Epoch 255 Batch 200/372] current loss 4.40, ppl 81.20, throughput 409.13 samples/s, lr 27.86
[Epoch 255] throughput 27671.99 samples/s
[Epoch 255] time cost 82.41s, valid loss 4.30, valid ppl 73.80, lr 30.00
[Epoch 255] test loss 4.25, test ppl 70.22
[Epoch 256 Batch 200/372] current loss 4.40, ppl 81.83, throughput 397.89 samples/s, lr 30.43
[Epoch 256] throughput 27191.95 samples/s
[Epoch 256] time cost 83.71s, valid loss 4.30, valid ppl 73.78, lr 30.00
[Epoch 256] test loss 4.25, test ppl 70.21
[Epoch 257 Batch 200/372] current loss 4.39, ppl 80.57, throughput 414.30 samples/s, lr 29.14
[Epoch 257] throughput 27581.96 samples/s
[Epoch 257] time cost 82.67s, valid loss 4.30, valid ppl 73.77, lr 30.00
[Epoch 257] test loss 4.25, test ppl 70.20
[Epoch 258 Batch 200/372] current loss 4.41, ppl 82.62, throughput 398.38 samples/s, lr 26.14
[Epoch 258] throughput 27347.12 samples/s
[Epoch 258] time cost 83.23s, valid loss 4.30, valid ppl 73.76, lr 30.00
[Epoch 258] test loss 4.25, test ppl 70.19
[Epoch 259 Batch 200/372] current loss 4.40, ppl 81.12, throughput 407.24 samples/s, lr 36.86
[Epoch 259] throughput 27527.83 samples/s
[Epoch 259] time cost 82.90s, valid loss 4.30, valid ppl 73.74, lr 30.00
[Epoch 259] test loss 4.25, test ppl 70.18
[Epoch 260 Batch 200/372] current loss 4.41, ppl 81.97, throughput 398.36 samples/s, lr 31.71
[Epoch 260] throughput 27542.27 samples/s
[Epoch 260] time cost 82.80s, valid loss 4.30, valid ppl 73.73, lr 30.00
[Epoch 260] test loss 4.25, test ppl 70.17
[Epoch 261 Batch 200/372] current loss 4.43, ppl 83.62, throughput 392.71 samples/s, lr 30.00
[Epoch 261] throughput 27303.21 samples/s
[Epoch 261] time cost 83.42s, valid loss 4.30, valid ppl 73.72, lr 30.00
[Epoch 261] test loss 4.25, test ppl 70.16
[Epoch 262 Batch 200/372] current loss 4.40, ppl 81.38, throughput 398.14 samples/s, lr 35.14
[Epoch 262] throughput 27012.53 samples/s
[Epoch 262] time cost 84.28s, valid loss 4.30, valid ppl 73.71, lr 30.00
[Epoch 262] test loss 4.25, test ppl 70.15
[Epoch 263 Batch 200/372] current loss 4.42, ppl 83.21, throughput 403.78 samples/s, lr 28.29
[Epoch 263] throughput 26966.39 samples/s
[Epoch 263] time cost 84.42s, valid loss 4.30, valid ppl 73.69, lr 30.00
[Epoch 263] test loss 4.25, test ppl 70.14
[Epoch 264 Batch 200/372] current loss 4.41, ppl 82.00, throughput 398.63 samples/s, lr 29.57
[Epoch 264] throughput 27076.42 samples/s
[Epoch 264] time cost 84.02s, valid loss 4.30, valid ppl 73.68, lr 30.00
[Epoch 264] test loss 4.25, test ppl 70.13
[Epoch 265 Batch 200/372] current loss 4.42, ppl 82.77, throughput 402.78 samples/s, lr 30.43
[Epoch 265] throughput 27615.07 samples/s
[Epoch 265] time cost 82.57s, valid loss 4.30, valid ppl 73.67, lr 30.00
[Epoch 265] test loss 4.25, test ppl 70.12
[Epoch 266 Batch 200/372] current loss 4.41, ppl 82.18, throughput 397.63 samples/s, lr 27.43
[Epoch 266] throughput 27212.12 samples/s
[Epoch 266] time cost 83.67s, valid loss 4.30, valid ppl 73.66, lr 30.00
[Epoch 266] test loss 4.25, test ppl 70.11
[Epoch 267 Batch 200/372] current loss 4.41, ppl 82.43, throughput 401.52 samples/s, lr 28.71
[Epoch 267] throughput 27653.35 samples/s
[Epoch 267] time cost 82.48s, valid loss 4.30, valid ppl 73.64, lr 30.00
[Epoch 267] test loss 4.25, test ppl 70.10
[Epoch 268 Batch 200/372] current loss 4.41, ppl 82.26, throughput 406.91 samples/s, lr 30.86
[Epoch 268] throughput 27753.31 samples/s
[Epoch 268] time cost 82.27s, valid loss 4.30, valid ppl 73.63, lr 30.00
[Epoch 268] test loss 4.25, test ppl 70.09
[Epoch 269 Batch 200/372] current loss 4.41, ppl 81.99, throughput 401.98 samples/s, lr 33.86
[Epoch 269] throughput 27540.13 samples/s
[Epoch 269] time cost 82.75s, valid loss 4.30, valid ppl 73.62, lr 30.00
[Epoch 269] test loss 4.25, test ppl 70.08
[Epoch 270 Batch 200/372] current loss 4.40, ppl 81.57, throughput 408.10 samples/s, lr 29.57
[Epoch 270] throughput 27780.52 samples/s
[Epoch 270] time cost 82.12s, valid loss 4.30, valid ppl 73.61, lr 30.00
[Epoch 270] test loss 4.25, test ppl 70.07
[Epoch 271 Batch 200/372] current loss 4.41, ppl 82.00, throughput 397.33 samples/s, lr 31.29
[Epoch 271] throughput 27285.39 samples/s
[Epoch 271] time cost 83.51s, valid loss 4.30, valid ppl 73.60, lr 30.00
[Epoch 271] test loss 4.25, test ppl 70.06
[Epoch 272 Batch 200/372] current loss 4.40, ppl 81.22, throughput 404.60 samples/s, lr 25.29
[Epoch 272] throughput 27385.05 samples/s
[Epoch 272] time cost 83.28s, valid loss 4.30, valid ppl 73.59, lr 30.00
[Epoch 272] test loss 4.25, test ppl 70.05
[Epoch 273 Batch 200/372] current loss 4.39, ppl 80.61, throughput 399.03 samples/s, lr 30.86
[Epoch 273] throughput 27485.16 samples/s
[Epoch 273] time cost 82.94s, valid loss 4.30, valid ppl 73.57, lr 30.00
[Epoch 273] test loss 4.25, test ppl 70.04
[Epoch 274 Batch 200/372] current loss 4.40, ppl 81.36, throughput 404.69 samples/s, lr 27.43
[Epoch 274] throughput 27205.02 samples/s
[Epoch 274] time cost 83.76s, valid loss 4.30, valid ppl 73.56, lr 30.00
[Epoch 274] test loss 4.25, test ppl 70.03
[Epoch 275 Batch 200/372] current loss 4.41, ppl 81.89, throughput 408.06 samples/s, lr 29.57
[Epoch 275] throughput 27652.00 samples/s
[Epoch 275] time cost 82.53s, valid loss 4.30, valid ppl 73.55, lr 30.00
[Epoch 275] test loss 4.25, test ppl 70.02
[Epoch 276 Batch 200/372] current loss 4.40, ppl 81.61, throughput 393.85 samples/s, lr 32.14
[Epoch 276] throughput 27089.87 samples/s
[Epoch 276] time cost 84.10s, valid loss 4.30, valid ppl 73.54, lr 30.00
[Epoch 276] test loss 4.25, test ppl 70.02
[Epoch 277 Batch 200/372] current loss 4.39, ppl 80.64, throughput 406.07 samples/s, lr 28.29
[Epoch 277] throughput 27546.92 samples/s
[Epoch 277] time cost 82.76s, valid loss 4.30, valid ppl 73.53, lr 30.00
[Epoch 277] test loss 4.25, test ppl 70.01
[Epoch 278 Batch 200/372] current loss 4.38, ppl 79.80, throughput 402.53 samples/s, lr 27.00
[Epoch 278] throughput 27083.74 samples/s
[Epoch 278] time cost 84.02s, valid loss 4.30, valid ppl 73.52, lr 30.00
[Epoch 278] test loss 4.25, test ppl 70.00
[Epoch 279 Batch 200/372] current loss 4.37, ppl 79.32, throughput 407.62 samples/s, lr 31.29
[Epoch 279] throughput 27183.64 samples/s
[Epoch 279] time cost 83.79s, valid loss 4.30, valid ppl 73.51, lr 30.00
[Epoch 279] test loss 4.25, test ppl 69.99
[Epoch 280 Batch 200/372] current loss 4.40, ppl 81.66, throughput 404.92 samples/s, lr 25.29
[Epoch 280] throughput 27198.88 samples/s
[Epoch 280] time cost 83.77s, valid loss 4.30, valid ppl 73.50, lr 30.00
[Epoch 280] test loss 4.25, test ppl 69.98
[Epoch 281 Batch 200/372] current loss 4.38, ppl 80.15, throughput 396.88 samples/s, lr 33.43
[Epoch 281] throughput 27318.00 samples/s
[Epoch 281] time cost 83.45s, valid loss 4.30, valid ppl 73.49, lr 30.00
[Epoch 281] test loss 4.25, test ppl 69.97
[Epoch 282 Batch 200/372] current loss 4.40, ppl 81.25, throughput 401.29 samples/s, lr 30.86
[Epoch 282] throughput 27339.71 samples/s
[Epoch 282] time cost 83.32s, valid loss 4.30, valid ppl 73.48, lr 30.00
[Epoch 282] test loss 4.25, test ppl 69.96
[Epoch 283 Batch 200/372] current loss 4.40, ppl 81.42, throughput 393.81 samples/s, lr 30.00
[Epoch 283] throughput 27160.70 samples/s
[Epoch 283] time cost 83.79s, valid loss 4.30, valid ppl 73.46, lr 30.00
[Epoch 283] test loss 4.25, test ppl 69.95
[Epoch 284 Batch 200/372] current loss 4.41, ppl 81.92, throughput 397.77 samples/s, lr 31.29
[Epoch 284] throughput 27097.47 samples/s
[Epoch 284] time cost 84.03s, valid loss 4.30, valid ppl 73.45, lr 30.00
[Epoch 284] test loss 4.25, test ppl 69.94
[Epoch 285 Batch 200/372] current loss 4.38, ppl 79.89, throughput 394.59 samples/s, lr 10.71
[Epoch 285] throughput 27207.91 samples/s
[Epoch 285] time cost 83.77s, valid loss 4.30, valid ppl 73.45, lr 30.00
[Epoch 285] test loss 4.25, test ppl 69.94
[Epoch 286 Batch 200/372] current loss 4.40, ppl 81.10, throughput 400.18 samples/s, lr 31.71
[Epoch 286] throughput 27403.78 samples/s
[Epoch 286] time cost 83.10s, valid loss 4.30, valid ppl 73.43, lr 30.00
[Epoch 286] test loss 4.25, test ppl 69.93
[Epoch 287 Batch 200/372] current loss 4.39, ppl 80.50, throughput 402.02 samples/s, lr 29.57
[Epoch 287] throughput 27340.16 samples/s
[Epoch 287] time cost 83.33s, valid loss 4.30, valid ppl 73.42, lr 30.00
[Epoch 287] test loss 4.25, test ppl 69.92
[Epoch 288 Batch 200/372] current loss 4.38, ppl 79.64, throughput 398.43 samples/s, lr 30.43
[Epoch 288] throughput 27402.81 samples/s
[Epoch 288] time cost 83.13s, valid loss 4.30, valid ppl 73.41, lr 30.00
[Epoch 288] test loss 4.25, test ppl 69.91
[Epoch 289 Batch 200/372] current loss 4.40, ppl 81.14, throughput 394.57 samples/s, lr 31.71
[Epoch 289] throughput 27252.54 samples/s
[Epoch 289] time cost 83.55s, valid loss 4.30, valid ppl 73.41, lr 30.00
[Epoch 289] test loss 4.25, test ppl 69.90
[Epoch 290 Batch 200/372] current loss 4.39, ppl 80.98, throughput 401.52 samples/s, lr 27.86
[Epoch 290] throughput 27087.14 samples/s
[Epoch 290] time cost 84.06s, valid loss 4.30, valid ppl 73.39, lr 30.00
[Epoch 290] test loss 4.25, test ppl 69.89
[Epoch 291 Batch 200/372] current loss 4.40, ppl 81.55, throughput 394.00 samples/s, lr 26.14
[Epoch 291] throughput 27005.24 samples/s
[Epoch 291] time cost 84.35s, valid loss 4.30, valid ppl 73.39, lr 30.00
[Epoch 291] test loss 4.25, test ppl 69.88
[Epoch 292 Batch 200/372] current loss 4.38, ppl 79.56, throughput 405.76 samples/s, lr 30.86
[Epoch 292] throughput 26965.60 samples/s
[Epoch 292] time cost 84.55s, valid loss 4.30, valid ppl 73.37, lr 30.00
[Epoch 292] test loss 4.25, test ppl 69.88
[Epoch 293 Batch 200/372] current loss 4.40, ppl 81.07, throughput 410.92 samples/s, lr 31.71
[Epoch 293] throughput 27357.87 samples/s
[Epoch 293] time cost 83.28s, valid loss 4.30, valid ppl 73.37, lr 30.00
[Epoch 293] test loss 4.25, test ppl 69.87
[Epoch 294 Batch 200/372] current loss 4.39, ppl 80.98, throughput 401.60 samples/s, lr 33.86
[Epoch 294] throughput 27005.29 samples/s
[Epoch 294] time cost 84.26s, valid loss 4.30, valid ppl 73.36, lr 30.00
[Epoch 294] test loss 4.25, test ppl 69.86
[Epoch 295 Batch 200/372] current loss 4.39, ppl 80.69, throughput 403.06 samples/s, lr 30.86
[Epoch 295] throughput 27298.31 samples/s
[Epoch 295] time cost 83.47s, valid loss 4.30, valid ppl 73.35, lr 30.00
[Epoch 295] test loss 4.25, test ppl 69.85
[Epoch 296 Batch 200/372] current loss 4.39, ppl 80.93, throughput 396.49 samples/s, lr 33.43
[Epoch 296] throughput 26788.66 samples/s
[Epoch 296] time cost 84.84s, valid loss 4.30, valid ppl 73.34, lr 30.00
[Epoch 296] test loss 4.25, test ppl 69.84
[Epoch 297 Batch 200/372] current loss 4.40, ppl 81.14, throughput 403.23 samples/s, lr 30.00
[Epoch 297] throughput 27017.94 samples/s
[Epoch 297] time cost 84.25s, valid loss 4.29, valid ppl 73.33, lr 30.00
[Epoch 297] test loss 4.25, test ppl 69.83
[Epoch 298 Batch 200/372] current loss 4.38, ppl 79.99, throughput 406.01 samples/s, lr 30.43
[Epoch 298] throughput 27670.45 samples/s
[Epoch 298] time cost 82.61s, valid loss 4.29, valid ppl 73.32, lr 30.00
[Epoch 298] test loss 4.25, test ppl 69.83
[Epoch 299 Batch 200/372] current loss 4.38, ppl 79.48, throughput 397.59 samples/s, lr 30.00
[Epoch 299] throughput 26927.85 samples/s
[Epoch 299] time cost 84.53s, valid loss 4.29, valid ppl 73.31, lr 30.00
[Epoch 299] test loss 4.25, test ppl 69.82
[Epoch 300 Batch 200/372] current loss 4.39, ppl 80.81, throughput 395.34 samples/s, lr 28.29
[Epoch 300] throughput 27031.72 samples/s
[Epoch 300] time cost 84.31s, valid loss 4.29, valid ppl 73.30, lr 30.00
[Epoch 300] test loss 4.25, test ppl 69.81
[Epoch 301 Batch 200/372] current loss 4.39, ppl 80.26, throughput 398.36 samples/s, lr 29.14
[Epoch 301] throughput 27306.44 samples/s
[Epoch 301] time cost 83.39s, valid loss 4.29, valid ppl 73.29, lr 30.00
[Epoch 301] test loss 4.25, test ppl 69.80
[Epoch 302 Batch 200/372] current loss 4.38, ppl 79.64, throughput 399.32 samples/s, lr 13.29
[Epoch 302] throughput 27291.98 samples/s
[Epoch 302] time cost 83.51s, valid loss 4.29, valid ppl 73.28, lr 30.00
[Epoch 302] test loss 4.25, test ppl 69.79
[Epoch 303 Batch 200/372] current loss 4.38, ppl 80.12, throughput 402.98 samples/s, lr 27.00
[Epoch 303] throughput 27327.71 samples/s
[Epoch 303] time cost 83.37s, valid loss 4.29, valid ppl 73.27, lr 30.00
[Epoch 303] test loss 4.25, test ppl 69.79
[Epoch 304 Batch 200/372] current loss 4.38, ppl 80.02, throughput 392.42 samples/s, lr 30.43
[Epoch 304] throughput 27171.80 samples/s
[Epoch 304] time cost 83.76s, valid loss 4.29, valid ppl 73.26, lr 30.00
[Epoch 304] test loss 4.25, test ppl 69.78
[Epoch 305 Batch 200/372] current loss 4.38, ppl 79.56, throughput 404.68 samples/s, lr 28.71
[Epoch 305] throughput 27153.48 samples/s
[Epoch 305] time cost 83.86s, valid loss 4.29, valid ppl 73.25, lr 30.00
[Epoch 305] test loss 4.25, test ppl 69.77
[Epoch 306 Batch 200/372] current loss 4.38, ppl 80.08, throughput 393.81 samples/s, lr 24.86
[Epoch 306] throughput 27123.97 samples/s
[Epoch 306] time cost 83.94s, valid loss 4.29, valid ppl 73.24, lr 30.00
[Epoch 306] test loss 4.25, test ppl 69.76
[Epoch 307 Batch 200/372] current loss 4.39, ppl 80.82, throughput 402.87 samples/s, lr 26.57
[Epoch 307] throughput 27532.89 samples/s
[Epoch 307] time cost 82.81s, valid loss 4.29, valid ppl 73.23, lr 30.00
[Epoch 307] test loss 4.24, test ppl 69.75
[Epoch 308 Batch 200/372] current loss 4.39, ppl 80.80, throughput 397.59 samples/s, lr 14.57
[Epoch 308] throughput 26977.20 samples/s
[Epoch 308] time cost 84.35s, valid loss 4.29, valid ppl 73.22, lr 30.00
[Epoch 308] test loss 4.24, test ppl 69.75
[Epoch 309 Batch 200/372] current loss 4.39, ppl 80.40, throughput 404.53 samples/s, lr 32.57
[Epoch 309] throughput 27065.03 samples/s
[Epoch 309] time cost 84.19s, valid loss 4.29, valid ppl 73.22, lr 30.00
[Epoch 309] test loss 4.24, test ppl 69.74
[Epoch 310 Batch 200/372] current loss 4.37, ppl 79.37, throughput 398.79 samples/s, lr 30.86
[Epoch 310] throughput 26943.35 samples/s
[Epoch 310] time cost 84.44s, valid loss 4.29, valid ppl 73.21, lr 30.00
[Epoch 310] test loss 4.24, test ppl 69.73
[Epoch 311 Batch 200/372] current loss 4.38, ppl 79.93, throughput 388.19 samples/s, lr 32.14
[Epoch 311] throughput 27108.80 samples/s
[Epoch 311] time cost 84.00s, valid loss 4.29, valid ppl 73.20, lr 30.00
[Epoch 311] test loss 4.24, test ppl 69.72
[Epoch 312 Batch 200/372] current loss 4.37, ppl 79.36, throughput 401.05 samples/s, lr 30.00
[Epoch 312] throughput 27178.58 samples/s
[Epoch 312] time cost 83.72s, valid loss 4.29, valid ppl 73.19, lr 30.00
[Epoch 312] test loss 4.24, test ppl 69.72
[Epoch 313 Batch 200/372] current loss 4.37, ppl 79.38, throughput 397.03 samples/s, lr 28.29
[Epoch 313] throughput 27092.08 samples/s
[Epoch 313] time cost 84.10s, valid loss 4.29, valid ppl 73.18, lr 30.00
[Epoch 313] test loss 4.24, test ppl 69.71
[Epoch 314 Batch 200/372] current loss 4.38, ppl 79.49, throughput 399.93 samples/s, lr 30.86
[Epoch 314] throughput 27218.79 samples/s
[Epoch 314] time cost 83.73s, valid loss 4.29, valid ppl 73.17, lr 30.00
[Epoch 314] test loss 4.24, test ppl 69.70
[Epoch 315 Batch 200/372] current loss 4.40, ppl 81.28, throughput 397.63 samples/s, lr 30.43
[Epoch 315] throughput 27149.31 samples/s
[Epoch 315] time cost 83.85s, valid loss 4.29, valid ppl 73.16, lr 30.00
[Epoch 315] test loss 4.24, test ppl 69.69
[Epoch 316 Batch 200/372] current loss 4.38, ppl 79.93, throughput 401.96 samples/s, lr 14.14
[Epoch 316] throughput 27007.19 samples/s
[Epoch 316] time cost 84.36s, valid loss 4.29, valid ppl 73.15, lr 30.00
[Epoch 316] test loss 4.24, test ppl 69.69
[Epoch 317 Batch 200/372] current loss 4.38, ppl 80.14, throughput 391.31 samples/s, lr 31.29
[Epoch 317] throughput 27198.72 samples/s
[Epoch 317] time cost 83.79s, valid loss 4.29, valid ppl 73.15, lr 30.00
[Epoch 317] test loss 4.24, test ppl 69.68
[Epoch 318 Batch 200/372] current loss 4.39, ppl 80.91, throughput 397.47 samples/s, lr 28.71
[Epoch 318] throughput 27111.26 samples/s
[Epoch 318] time cost 84.04s, valid loss 4.29, valid ppl 73.14, lr 30.00
[Epoch 318] test loss 4.24, test ppl 69.67
[Epoch 319 Batch 200/372] current loss 4.37, ppl 79.04, throughput 396.11 samples/s, lr 29.57
[Epoch 319] throughput 27192.80 samples/s
[Epoch 319] time cost 83.72s, valid loss 4.29, valid ppl 73.13, lr 30.00
[Epoch 319] test loss 4.24, test ppl 69.67
[Epoch 320 Batch 200/372] current loss 4.38, ppl 79.53, throughput 394.27 samples/s, lr 25.71
[Epoch 320] throughput 26960.85 samples/s
[Epoch 320] time cost 84.44s, valid loss 4.29, valid ppl 73.12, lr 30.00
[Epoch 320] test loss 4.24, test ppl 69.66
[Epoch 321 Batch 200/372] current loss 4.39, ppl 80.95, throughput 396.00 samples/s, lr 28.71
[Epoch 321] throughput 27409.38 samples/s
[Epoch 321] time cost 83.15s, valid loss 4.29, valid ppl 73.11, lr 30.00
[Epoch 321] test loss 4.24, test ppl 69.65
[Epoch 322 Batch 200/372] current loss 4.37, ppl 79.16, throughput 400.99 samples/s, lr 32.57
[Epoch 322] throughput 27199.74 samples/s
[Epoch 322] time cost 83.77s, valid loss 4.29, valid ppl 73.11, lr 30.00
[Epoch 322] test loss 4.24, test ppl 69.65
[Epoch 323 Batch 200/372] current loss 4.38, ppl 80.13, throughput 388.64 samples/s, lr 31.29
[Epoch 323] throughput 27062.67 samples/s
[Epoch 323] time cost 84.10s, valid loss 4.29, valid ppl 73.10, lr 30.00
[Epoch 323] test loss 4.24, test ppl 69.64
[Epoch 324 Batch 200/372] current loss 4.37, ppl 79.29, throughput 394.91 samples/s, lr 29.57
[Epoch 324] throughput 27024.29 samples/s
[Epoch 324] time cost 84.28s, valid loss 4.29, valid ppl 73.09, lr 30.00
[Epoch 324] test loss 4.24, test ppl 69.63
[Epoch 325 Batch 200/372] current loss 4.37, ppl 79.01, throughput 395.65 samples/s, lr 30.00
[Epoch 325] throughput 27148.40 samples/s
[Epoch 325] time cost 83.82s, valid loss 4.29, valid ppl 73.08, lr 30.00
[Epoch 325] test loss 4.24, test ppl 69.63
[Epoch 326 Batch 200/372] current loss 4.38, ppl 79.88, throughput 397.27 samples/s, lr 27.43
[Epoch 326] throughput 27186.88 samples/s
[Epoch 326] time cost 83.79s, valid loss 4.29, valid ppl 73.07, lr 30.00
[Epoch 326] test loss 4.24, test ppl 69.62
[Epoch 327 Batch 200/372] current loss 4.36, ppl 78.35, throughput 402.51 samples/s, lr 28.71
[Epoch 327] throughput 27395.35 samples/s
[Epoch 327] time cost 83.18s, valid loss 4.29, valid ppl 73.06, lr 30.00
[Epoch 327] test loss 4.24, test ppl 69.61
[Epoch 328 Batch 200/372] current loss 4.36, ppl 78.28, throughput 396.04 samples/s, lr 25.71
[Epoch 328] throughput 26908.57 samples/s
[Epoch 328] time cost 84.58s, valid loss 4.29, valid ppl 73.06, lr 30.00
[Epoch 328] test loss 4.24, test ppl 69.60
[Epoch 329 Batch 200/372] current loss 4.39, ppl 80.33, throughput 402.97 samples/s, lr 12.86
[Epoch 329] throughput 27272.78 samples/s
[Epoch 329] time cost 83.52s, valid loss 4.29, valid ppl 73.05, lr 30.00
[Epoch 329] test loss 4.24, test ppl 69.60
[Epoch 330 Batch 200/372] current loss 4.37, ppl 78.76, throughput 401.03 samples/s, lr 31.71
[Epoch 330] throughput 27330.53 samples/s
[Epoch 330] time cost 83.33s, valid loss 4.29, valid ppl 73.04, lr 30.00
[Epoch 330] test loss 4.24, test ppl 69.59
[Epoch 331 Batch 200/372] current loss 4.38, ppl 79.68, throughput 395.75 samples/s, lr 31.29
[Epoch 331] throughput 27286.50 samples/s
[Epoch 331] time cost 83.61s, valid loss 4.29, valid ppl 73.03, lr 30.00
[Epoch 331] test loss 4.24, test ppl 69.58
[Epoch 332 Batch 200/372] current loss 4.37, ppl 78.81, throughput 401.67 samples/s, lr 28.71
[Epoch 332] throughput 26921.18 samples/s
[Epoch 332] time cost 84.51s, valid loss 4.29, valid ppl 73.03, lr 30.00
[Epoch 332] test loss 4.24, test ppl 69.58
[Epoch 333 Batch 200/372] current loss 4.37, ppl 79.13, throughput 399.15 samples/s, lr 30.86
[Epoch 333] throughput 27278.62 samples/s
[Epoch 333] time cost 83.55s, valid loss 4.29, valid ppl 73.02, lr 30.00
[Epoch 333] test loss 4.24, test ppl 69.57
[Epoch 334 Batch 200/372] current loss 4.37, ppl 78.98, throughput 400.40 samples/s, lr 30.43
[Epoch 334] throughput 27232.51 samples/s
[Epoch 334] time cost 83.69s, valid loss 4.29, valid ppl 73.01, lr 30.00
[Epoch 334] test loss 4.24, test ppl 69.56
[Epoch 335 Batch 200/372] current loss 4.38, ppl 79.93, throughput 395.83 samples/s, lr 27.00
[Epoch 335] throughput 27005.04 samples/s
[Epoch 335] time cost 84.25s, valid loss 4.29, valid ppl 73.00, lr 30.00
[Epoch 335] test loss 4.24, test ppl 69.56
[Epoch 336 Batch 200/372] current loss 4.37, ppl 79.34, throughput 403.34 samples/s, lr 24.43
[Epoch 336] throughput 27336.90 samples/s
[Epoch 336] time cost 83.39s, valid loss 4.29, valid ppl 73.00, lr 30.00
[Epoch 336] test loss 4.24, test ppl 69.55
[Epoch 337 Batch 200/372] current loss 4.40, ppl 81.10, throughput 393.96 samples/s, lr 13.71
[Epoch 337] throughput 26900.45 samples/s
[Epoch 337] time cost 84.61s, valid loss 4.29, valid ppl 72.99, lr 30.00
[Epoch 337] test loss 4.24, test ppl 69.55
[Epoch 338 Batch 200/372] current loss 4.36, ppl 77.98, throughput 401.46 samples/s, lr 29.14
[Epoch 338] throughput 26903.80 samples/s
[Epoch 338] time cost 84.66s, valid loss 4.29, valid ppl 72.98, lr 30.00
[Epoch 338] test loss 4.24, test ppl 69.54
[Epoch 339 Batch 200/372] current loss 4.37, ppl 79.36, throughput 398.98 samples/s, lr 28.29
[Epoch 339] throughput 27349.64 samples/s
[Epoch 339] time cost 83.34s, valid loss 4.29, valid ppl 72.97, lr 30.00
[Epoch 339] test loss 4.24, test ppl 69.53
[Epoch 340 Batch 200/372] current loss 4.37, ppl 78.91, throughput 390.93 samples/s, lr 28.29
[Epoch 340] throughput 26798.13 samples/s
[Epoch 340] time cost 84.93s, valid loss 4.29, valid ppl 72.97, lr 30.00
[Epoch 340] test loss 4.24, test ppl 69.53
[Epoch 341 Batch 200/372] current loss 4.39, ppl 80.58, throughput 396.72 samples/s, lr 30.43
[Epoch 341] throughput 27170.28 samples/s
[Epoch 341] time cost 83.79s, valid loss 4.29, valid ppl 72.96, lr 30.00
[Epoch 341] test loss 4.24, test ppl 69.52
[Epoch 342 Batch 200/372] current loss 4.38, ppl 79.65, throughput 400.30 samples/s, lr 31.29
[Epoch 342] throughput 27129.62 samples/s
[Epoch 342] time cost 83.96s, valid loss 4.29, valid ppl 72.95, lr 30.00
[Epoch 342] test loss 4.24, test ppl 69.51
[Epoch 343 Batch 200/372] current loss 4.38, ppl 79.82, throughput 396.34 samples/s, lr 31.29
[Epoch 343] throughput 26959.58 samples/s
[Epoch 343] time cost 84.42s, valid loss 4.29, valid ppl 72.95, lr 30.00
[Epoch 343] test loss 4.24, test ppl 69.51
[Epoch 344 Batch 200/372] current loss 4.37, ppl 79.18, throughput 403.42 samples/s, lr 30.86
[Epoch 344] throughput 27013.76 samples/s
[Epoch 344] time cost 84.24s, valid loss 4.29, valid ppl 72.94, lr 30.00
[Epoch 344] test loss 4.24, test ppl 69.50
[Epoch 345 Batch 200/372] current loss 4.37, ppl 79.11, throughput 402.41 samples/s, lr 29.14
[Epoch 345] throughput 26904.23 samples/s
[Epoch 345] time cost 84.55s, valid loss 4.29, valid ppl 72.93, lr 30.00
[Epoch 345] test loss 4.24, test ppl 69.50
[Epoch 346 Batch 200/372] current loss 4.36, ppl 78.60, throughput 396.80 samples/s, lr 31.71
[Epoch 346] throughput 26986.57 samples/s
[Epoch 346] time cost 84.34s, valid loss 4.29, valid ppl 72.93, lr 30.00
[Epoch 346] test loss 4.24, test ppl 69.49
[Epoch 347 Batch 200/372] current loss 4.37, ppl 79.12, throughput 404.46 samples/s, lr 31.29
[Epoch 347] throughput 27633.00 samples/s
[Epoch 347] time cost 82.51s, valid loss 4.29, valid ppl 72.92, lr 30.00
[Epoch 347] test loss 4.24, test ppl 69.48
[Epoch 348 Batch 200/372] current loss 4.36, ppl 78.18, throughput 408.62 samples/s, lr 33.00
[Epoch 348] throughput 27658.39 samples/s
[Epoch 348] time cost 82.40s, valid loss 4.29, valid ppl 72.92, lr 30.00
[Epoch 348] test loss 4.24, test ppl 69.48
[Epoch 349 Batch 200/372] current loss 4.37, ppl 79.18, throughput 389.68 samples/s, lr 27.43
[Epoch 349] throughput 27045.73 samples/s
[Epoch 349] time cost 84.24s, valid loss 4.29, valid ppl 72.91, lr 30.00
[Epoch 349] test loss 4.24, test ppl 69.47
[Epoch 350 Batch 200/372] current loss 4.35, ppl 77.74, throughput 394.89 samples/s, lr 31.71
[Epoch 350] throughput 26941.00 samples/s
[Epoch 350] time cost 84.52s, valid loss 4.29, valid ppl 72.90, lr 30.00
[Epoch 350] test loss 4.24, test ppl 69.47
[Epoch 351 Batch 200/372] current loss 4.37, ppl 79.23, throughput 398.60 samples/s, lr 30.86
[Epoch 351] throughput 27047.84 samples/s
[Epoch 351] time cost 84.12s, valid loss 4.29, valid ppl 72.90, lr 30.00
[Epoch 351] test loss 4.24, test ppl 69.46
[Epoch 352 Batch 200/372] current loss 4.35, ppl 77.64, throughput 405.75 samples/s, lr 24.86
[Epoch 352] throughput 27284.69 samples/s
[Epoch 352] time cost 83.48s, valid loss 4.29, valid ppl 72.89, lr 30.00
[Epoch 352] test loss 4.24, test ppl 69.46
[Epoch 353 Batch 200/372] current loss 4.37, ppl 79.35, throughput 396.69 samples/s, lr 29.57
[Epoch 353] throughput 27181.45 samples/s
[Epoch 353] time cost 83.80s, valid loss 4.29, valid ppl 72.88, lr 30.00
[Epoch 353] test loss 4.24, test ppl 69.45
[Epoch 354 Batch 200/372] current loss 4.37, ppl 79.10, throughput 401.47 samples/s, lr 34.71
[Epoch 354] throughput 27024.12 samples/s
[Epoch 354] time cost 84.30s, valid loss 4.29, valid ppl 72.88, lr 30.00
[Epoch 354] test loss 4.24, test ppl 69.44
[Epoch 355 Batch 200/372] current loss 4.37, ppl 78.68, throughput 393.86 samples/s, lr 32.14
[Epoch 355] throughput 26741.15 samples/s
[Epoch 355] time cost 85.04s, valid loss 4.29, valid ppl 72.87, lr 30.00
[Epoch 355] test loss 4.24, test ppl 69.44
[Epoch 356 Batch 200/372] current loss 4.36, ppl 78.26, throughput 395.19 samples/s, lr 33.00
[Epoch 356] throughput 27153.37 samples/s
[Epoch 356] time cost 83.85s, valid loss 4.29, valid ppl 72.86, lr 30.00
[Epoch 356] test loss 4.24, test ppl 69.43
[Epoch 357 Batch 200/372] current loss 4.38, ppl 80.06, throughput 399.59 samples/s, lr 29.14
[Epoch 357] throughput 26684.72 samples/s
[Epoch 357] time cost 85.21s, valid loss 4.29, valid ppl 72.86, lr 30.00
[Epoch 357] test loss 4.24, test ppl 69.43
[Epoch 358 Batch 200/372] current loss 4.37, ppl 79.18, throughput 397.79 samples/s, lr 30.43
[Epoch 358] throughput 27312.19 samples/s
[Epoch 358] time cost 83.57s, valid loss 4.29, valid ppl 72.85, lr 30.00
[Epoch 358] test loss 4.24, test ppl 69.42
[Epoch 359 Batch 200/372] current loss 4.37, ppl 79.30, throughput 413.46 samples/s, lr 13.29
[Epoch 359] throughput 27455.51 samples/s
[Epoch 359] time cost 82.99s, valid loss 4.29, valid ppl 72.84, lr 30.00
[Epoch 359] test loss 4.24, test ppl 69.42
[Epoch 360 Batch 200/372] current loss 4.37, ppl 78.86, throughput 388.03 samples/s, lr 30.86
[Epoch 360] throughput 26982.96 samples/s
[Epoch 360] time cost 84.37s, valid loss 4.29, valid ppl 72.84, lr 30.00
[Epoch 360] test loss 4.24, test ppl 69.41
[Epoch 361 Batch 200/372] current loss 4.36, ppl 78.27, throughput 406.91 samples/s, lr 28.71
[Epoch 361] throughput 27207.43 samples/s
[Epoch 361] time cost 83.71s, valid loss 4.29, valid ppl 72.83, lr 30.00
[Epoch 361] test loss 4.24, test ppl 69.40
[Epoch 362 Batch 200/372] current loss 4.36, ppl 78.52, throughput 395.01 samples/s, lr 31.29
[Epoch 362] throughput 26800.47 samples/s
[Epoch 362] time cost 84.87s, valid loss 4.29, valid ppl 72.82, lr 30.00
[Epoch 362] test loss 4.24, test ppl 69.40
[Epoch 363 Batch 200/372] current loss 4.35, ppl 77.86, throughput 391.53 samples/s, lr 29.57
[Epoch 363] throughput 27256.16 samples/s
[Epoch 363] time cost 83.55s, valid loss 4.29, valid ppl 72.82, lr 30.00
[Epoch 363] test loss 4.24, test ppl 69.39
[Epoch 364 Batch 200/372] current loss 4.36, ppl 77.89, throughput 405.15 samples/s, lr 30.00
[Epoch 364] throughput 27546.83 samples/s
[Epoch 364] time cost 82.71s, valid loss 4.29, valid ppl 72.81, lr 30.00
[Epoch 364] test loss 4.24, test ppl 69.39
[Epoch 365 Batch 200/372] current loss 4.37, ppl 79.22, throughput 399.47 samples/s, lr 28.29
[Epoch 365] throughput 27289.97 samples/s
[Epoch 365] time cost 83.42s, valid loss 4.29, valid ppl 72.80, lr 30.00
[Epoch 365] test loss 4.24, test ppl 69.38
[Epoch 366 Batch 200/372] current loss 4.35, ppl 77.22, throughput 399.29 samples/s, lr 30.00
[Epoch 366] throughput 26972.62 samples/s
[Epoch 366] time cost 84.35s, valid loss 4.29, valid ppl 72.80, lr 30.00
[Epoch 366] test loss 4.24, test ppl 69.38
[Epoch 367 Batch 200/372] current loss 4.35, ppl 77.20, throughput 396.34 samples/s, lr 28.29
[Epoch 367] throughput 26959.53 samples/s
[Epoch 367] time cost 84.39s, valid loss 4.29, valid ppl 72.79, lr 30.00
[Epoch 367] test loss 4.24, test ppl 69.37
[Epoch 368 Batch 200/372] current loss 4.37, ppl 78.96, throughput 397.03 samples/s, lr 32.14
[Epoch 368] throughput 27363.96 samples/s
[Epoch 368] time cost 83.21s, valid loss 4.29, valid ppl 72.79, lr 30.00
[Epoch 368] test loss 4.24, test ppl 69.37
[Epoch 369 Batch 200/372] current loss 4.35, ppl 77.79, throughput 403.44 samples/s, lr 28.71
[Epoch 369] throughput 27306.64 samples/s
[Epoch 369] time cost 83.45s, valid loss 4.29, valid ppl 72.78, lr 30.00
[Epoch 369] test loss 4.24, test ppl 69.36
[Epoch 370 Batch 200/372] current loss 4.35, ppl 77.83, throughput 404.85 samples/s, lr 30.43
[Epoch 370] throughput 27080.07 samples/s
[Epoch 370] time cost 84.10s, valid loss 4.29, valid ppl 72.77, lr 30.00
[Epoch 370] test loss 4.24, test ppl 69.36
[Epoch 371 Batch 200/372] current loss 4.36, ppl 78.31, throughput 399.03 samples/s, lr 14.57
[Epoch 371] throughput 27164.42 samples/s
[Epoch 371] time cost 83.85s, valid loss 4.29, valid ppl 72.77, lr 30.00
[Epoch 371] test loss 4.24, test ppl 69.35
[Epoch 372 Batch 200/372] current loss 4.35, ppl 77.78, throughput 400.27 samples/s, lr 25.71
[Epoch 372] throughput 27121.89 samples/s
[Epoch 372] time cost 83.91s, valid loss 4.29, valid ppl 72.76, lr 30.00
[Epoch 372] test loss 4.24, test ppl 69.35
[Epoch 373 Batch 200/372] current loss 4.35, ppl 77.56, throughput 402.16 samples/s, lr 30.00
[Epoch 373] throughput 27079.29 samples/s
[Epoch 373] time cost 84.03s, valid loss 4.29, valid ppl 72.76, lr 30.00
[Epoch 373] test loss 4.24, test ppl 69.34
[Epoch 374 Batch 200/372] current loss 4.36, ppl 78.44, throughput 399.75 samples/s, lr 33.43
[Epoch 374] throughput 27392.53 samples/s
[Epoch 374] time cost 83.17s, valid loss 4.29, valid ppl 72.75, lr 30.00
[Epoch 374] test loss 4.24, test ppl 69.33
[Epoch 375 Batch 200/372] current loss 4.36, ppl 77.88, throughput 391.25 samples/s, lr 11.57
[Epoch 375] throughput 27011.44 samples/s
[Epoch 375] time cost 84.22s, valid loss 4.29, valid ppl 72.74, lr 30.00
[Epoch 375] test loss 4.24, test ppl 69.33
[Epoch 376 Batch 200/372] current loss 4.34, ppl 76.87, throughput 394.87 samples/s, lr 29.14
[Epoch 376] throughput 27027.27 samples/s
[Epoch 376] time cost 84.20s, valid loss 4.29, valid ppl 72.74, lr 30.00
[Epoch 376] test loss 4.24, test ppl 69.32
[Epoch 377 Batch 200/372] current loss 4.36, ppl 78.11, throughput 401.59 samples/s, lr 30.86
[Epoch 377] throughput 27121.80 samples/s
[Epoch 377] time cost 83.85s, valid loss 4.29, valid ppl 72.73, lr 30.00
[Epoch 377] test loss 4.24, test ppl 69.32
[Epoch 378 Batch 200/372] current loss 4.35, ppl 77.74, throughput 393.96 samples/s, lr 30.00
[Epoch 378] throughput 27200.18 samples/s
[Epoch 378] time cost 83.68s, valid loss 4.29, valid ppl 72.73, lr 30.00
[Epoch 378] test loss 4.24, test ppl 69.32
[Epoch 379 Batch 200/372] current loss 4.34, ppl 77.00, throughput 397.03 samples/s, lr 33.86
[Epoch 379] throughput 27073.93 samples/s
[Epoch 379] time cost 84.01s, valid loss 4.29, valid ppl 72.72, lr 30.00
[Epoch 379] test loss 4.24, test ppl 69.31
[Epoch 380 Batch 200/372] current loss 4.35, ppl 77.83, throughput 398.27 samples/s, lr 28.29
[Epoch 380] throughput 27213.71 samples/s
[Epoch 380] time cost 83.64s, valid loss 4.29, valid ppl 72.72, lr 30.00
[Epoch 380] test loss 4.24, test ppl 69.31
[Epoch 381 Batch 200/372] current loss 4.36, ppl 78.05, throughput 401.21 samples/s, lr 15.43
[Epoch 381] throughput 26860.73 samples/s
[Epoch 381] time cost 84.65s, valid loss 4.29, valid ppl 72.71, lr 30.00
[Epoch 381] test loss 4.24, test ppl 69.30
[Epoch 382 Batch 200/372] current loss 4.36, ppl 78.24, throughput 400.63 samples/s, lr 30.86
[Epoch 382] throughput 26871.80 samples/s
[Epoch 382] time cost 84.67s, valid loss 4.29, valid ppl 72.71, lr 30.00
[Epoch 382] test loss 4.24, test ppl 69.30
[Epoch 383 Batch 200/372] current loss 4.36, ppl 78.48, throughput 398.78 samples/s, lr 35.14
[Epoch 383] throughput 27269.06 samples/s
[Epoch 383] time cost 83.53s, valid loss 4.29, valid ppl 72.70, lr 30.00
[Epoch 383] test loss 4.24, test ppl 69.29
[Epoch 384 Batch 200/372] current loss 4.36, ppl 77.94, throughput 400.25 samples/s, lr 33.00
[Epoch 384] throughput 27062.30 samples/s
[Epoch 384] time cost 84.09s, valid loss 4.29, valid ppl 72.70, lr 30.00
[Epoch 384] test loss 4.24, test ppl 69.29
[Epoch 385 Batch 200/372] current loss 4.36, ppl 78.16, throughput 400.27 samples/s, lr 30.43
[Epoch 385] throughput 27284.85 samples/s
[Epoch 385] time cost 83.45s, valid loss 4.29, valid ppl 72.69, lr 30.00
[Epoch 385] test loss 4.24, test ppl 69.28
[Epoch 386 Batch 200/372] current loss 4.36, ppl 78.13, throughput 391.65 samples/s, lr 31.29
[Epoch 386] throughput 27070.14 samples/s
[Epoch 386] time cost 84.04s, valid loss 4.29, valid ppl 72.69, lr 30.00
[Epoch 386] test loss 4.24, test ppl 69.28
[Epoch 387 Batch 200/372] current loss 4.36, ppl 78.09, throughput 390.49 samples/s, lr 30.00
[Epoch 387] throughput 27073.78 samples/s
[Epoch 387] time cost 84.07s, valid loss 4.29, valid ppl 72.68, lr 30.00
[Epoch 387] test loss 4.24, test ppl 69.28
[Epoch 388 Batch 200/372] current loss 4.37, ppl 78.74, throughput 405.37 samples/s, lr 28.71
[Epoch 388] throughput 27420.40 samples/s
[Epoch 388] time cost 83.10s, valid loss 4.29, valid ppl 72.68, lr 30.00
[Epoch 388] test loss 4.24, test ppl 69.27
[Epoch 389 Batch 200/372] current loss 4.36, ppl 77.91, throughput 399.90 samples/s, lr 17.14
[Epoch 389] throughput 27140.62 samples/s
[Epoch 389] time cost 83.83s, valid loss 4.29, valid ppl 72.67, lr 30.00
[Epoch 389] test loss 4.24, test ppl 69.27
[Epoch 390 Batch 200/372] current loss 4.36, ppl 78.02, throughput 396.67 samples/s, lr 26.57
[Epoch 390] throughput 26822.29 samples/s
[Epoch 390] time cost 84.75s, valid loss 4.29, valid ppl 72.66, lr 30.00
[Epoch 390] test loss 4.24, test ppl 69.26
[Epoch 391 Batch 200/372] current loss 4.36, ppl 78.27, throughput 399.96 samples/s, lr 28.29
[Epoch 391] throughput 27049.17 samples/s
[Epoch 391] time cost 84.08s, valid loss 4.29, valid ppl 72.66, lr 30.00
[Epoch 391] test loss 4.24, test ppl 69.26
[Epoch 392 Batch 200/372] current loss 4.34, ppl 76.80, throughput 398.65 samples/s, lr 31.71
[Epoch 392] throughput 26911.97 samples/s
[Epoch 392] time cost 84.53s, valid loss 4.29, valid ppl 72.65, lr 30.00
[Epoch 392] test loss 4.24, test ppl 69.25
[Epoch 393 Batch 200/372] current loss 4.34, ppl 76.89, throughput 403.57 samples/s, lr 27.43
[Epoch 393] throughput 27518.14 samples/s
[Epoch 393] time cost 82.86s, valid loss 4.29, valid ppl 72.65, lr 30.00
[Epoch 393] test loss 4.24, test ppl 69.25
[Epoch 394 Batch 200/372] current loss 4.34, ppl 76.83, throughput 392.82 samples/s, lr 29.14
[Epoch 394] throughput 27145.35 samples/s
[Epoch 394] time cost 83.86s, valid loss 4.29, valid ppl 72.64, lr 30.00
[Epoch 394] test loss 4.24, test ppl 69.24
[Epoch 395 Batch 200/372] current loss 4.35, ppl 77.19, throughput 400.54 samples/s, lr 31.29
[Epoch 395] throughput 27121.42 samples/s
[Epoch 395] time cost 83.93s, valid loss 4.29, valid ppl 72.64, lr 30.00
[Epoch 395] test loss 4.24, test ppl 69.24
[Epoch 396 Batch 200/372] current loss 4.37, ppl 78.92, throughput 397.03 samples/s, lr 31.29
[Epoch 396] throughput 27418.73 samples/s
[Epoch 396] time cost 83.14s, valid loss 4.29, valid ppl 72.63, lr 30.00
[Epoch 396] test loss 4.24, test ppl 69.24
[Epoch 397 Batch 200/372] current loss 4.36, ppl 78.40, throughput 402.38 samples/s, lr 30.00
[Epoch 397] throughput 27229.21 samples/s
[Epoch 397] time cost 83.66s, valid loss 4.29, valid ppl 72.63, lr 30.00
[Epoch 397] test loss 4.24, test ppl 69.23
[Epoch 398 Batch 200/372] current loss 4.34, ppl 77.08, throughput 393.11 samples/s, lr 29.57
[Epoch 398] throughput 27197.07 samples/s
[Epoch 398] time cost 83.72s, valid loss 4.29, valid ppl 72.62, lr 30.00
[Epoch 398] test loss 4.24, test ppl 69.23
[Epoch 399 Batch 200/372] current loss 4.37, ppl 79.12, throughput 394.14 samples/s, lr 27.86
[Epoch 399] throughput 27188.55 samples/s
[Epoch 399] time cost 83.72s, valid loss 4.29, valid ppl 72.62, lr 30.00
[Epoch 399] test loss 4.24, test ppl 69.22
[Epoch 400 Batch 200/372] current loss 4.35, ppl 77.48, throughput 397.94 samples/s, lr 29.57
[Epoch 400] throughput 27473.33 samples/s
[Epoch 400] time cost 82.93s, valid loss 4.29, valid ppl 72.62, lr 30.00
[Epoch 400] test loss 4.24, test ppl 69.22
[Epoch 401 Batch 200/372] current loss 4.36, ppl 77.87, throughput 402.19 samples/s, lr 28.29
[Epoch 401] throughput 27195.48 samples/s
[Epoch 401] time cost 83.67s, valid loss 4.29, valid ppl 72.61, lr 30.00
[Epoch 401] test loss 4.24, test ppl 69.21
[Epoch 402 Batch 200/372] current loss 4.34, ppl 76.98, throughput 398.31 samples/s, lr 29.57
[Epoch 402] throughput 27691.61 samples/s
[Epoch 402] time cost 82.36s, valid loss 4.29, valid ppl 72.61, lr 30.00
[Epoch 402] test loss 4.24, test ppl 69.21
[Epoch 403 Batch 200/372] current loss 4.36, ppl 78.16, throughput 393.78 samples/s, lr 32.57
[Epoch 403] throughput 27270.91 samples/s
[Epoch 403] time cost 83.51s, valid loss 4.28, valid ppl 72.60, lr 30.00
[Epoch 403] test loss 4.24, test ppl 69.21
[Epoch 404 Batch 200/372] current loss 4.34, ppl 77.04, throughput 405.08 samples/s, lr 31.71
[Epoch 404] throughput 27404.38 samples/s
[Epoch 404] time cost 83.19s, valid loss 4.28, valid ppl 72.60, lr 30.00
[Epoch 404] test loss 4.24, test ppl 69.20
[Epoch 405 Batch 200/372] current loss 4.34, ppl 76.79, throughput 409.53 samples/s, lr 27.00
[Epoch 405] throughput 27652.38 samples/s
[Epoch 405] time cost 82.47s, valid loss 4.28, valid ppl 72.59, lr 30.00
[Epoch 405] test loss 4.24, test ppl 69.20
[Epoch 406 Batch 200/372] current loss 4.35, ppl 77.84, throughput 409.02 samples/s, lr 28.71
[Epoch 406] throughput 27292.62 samples/s
[Epoch 406] time cost 83.38s, valid loss 4.28, valid ppl 72.59, lr 30.00
[Epoch 406] test loss 4.24, test ppl 69.19
[Epoch 407 Batch 200/372] current loss 4.35, ppl 77.23, throughput 402.62 samples/s, lr 30.43
[Epoch 407] throughput 27200.31 samples/s
[Epoch 407] time cost 83.73s, valid loss 4.28, valid ppl 72.58, lr 30.00
[Epoch 407] test loss 4.24, test ppl 69.19
[Epoch 408 Batch 200/372] current loss 4.34, ppl 76.96, throughput 396.09 samples/s, lr 28.29
[Epoch 408] throughput 26923.87 samples/s
[Epoch 408] time cost 84.59s, valid loss 4.28, valid ppl 72.58, lr 30.00
[Epoch 408] test loss 4.24, test ppl 69.19
[Epoch 409 Batch 200/372] current loss 4.35, ppl 77.57, throughput 399.26 samples/s, lr 34.29
[Epoch 409] throughput 27333.44 samples/s
[Epoch 409] time cost 83.36s, valid loss 4.28, valid ppl 72.57, lr 30.00
[Epoch 409] test loss 4.24, test ppl 69.18
[Epoch 410 Batch 200/372] current loss 4.33, ppl 76.04, throughput 396.81 samples/s, lr 32.14
[Epoch 410] throughput 27135.22 samples/s
[Epoch 410] time cost 83.87s, valid loss 4.28, valid ppl 72.57, lr 30.00
[Epoch 410] test loss 4.24, test ppl 69.18
[Epoch 411 Batch 200/372] current loss 4.34, ppl 76.79, throughput 398.39 samples/s, lr 27.86
[Epoch 411] throughput 27209.95 samples/s
[Epoch 411] time cost 83.68s, valid loss 4.28, valid ppl 72.56, lr 30.00
[Epoch 411] test loss 4.24, test ppl 69.17
[Epoch 412 Batch 200/372] current loss 4.34, ppl 77.00, throughput 396.70 samples/s, lr 31.71
[Epoch 412] throughput 27097.79 samples/s
[Epoch 412] time cost 83.96s, valid loss 4.28, valid ppl 72.56, lr 30.00
[Epoch 412] test loss 4.24, test ppl 69.17
[Epoch 413 Batch 200/372] current loss 4.35, ppl 77.66, throughput 400.60 samples/s, lr 27.43
[Epoch 413] throughput 27309.41 samples/s
[Epoch 413] time cost 83.38s, valid loss 4.28, valid ppl 72.56, lr 30.00
[Epoch 413] test loss 4.24, test ppl 69.17
[Epoch 414 Batch 200/372] current loss 4.35, ppl 77.45, throughput 404.94 samples/s, lr 31.29
[Epoch 414] throughput 27259.28 samples/s
[Epoch 414] time cost 83.51s, valid loss 4.28, valid ppl 72.55, lr 30.00
[Epoch 414] test loss 4.24, test ppl 69.16
[Epoch 415 Batch 200/372] current loss 4.35, ppl 77.24, throughput 395.82 samples/s, lr 13.29
[Epoch 415] throughput 27090.04 samples/s
[Epoch 415] time cost 84.03s, valid loss 4.28, valid ppl 72.55, lr 30.00
[Epoch 415] test loss 4.24, test ppl 69.16
[Epoch 416 Batch 200/372] current loss 4.33, ppl 75.86, throughput 403.20 samples/s, lr 27.86
[Epoch 416] throughput 27084.26 samples/s
[Epoch 416] time cost 84.10s, valid loss 4.28, valid ppl 72.54, lr 30.00
[Epoch 416] test loss 4.24, test ppl 69.15
[Epoch 417 Batch 200/372] current loss 4.34, ppl 76.76, throughput 404.32 samples/s, lr 24.43
[Epoch 417] throughput 27188.01 samples/s
[Epoch 417] time cost 83.71s, valid loss 4.28, valid ppl 72.54, lr 30.00
[Epoch 417] test loss 4.24, test ppl 69.15
[Epoch 418 Batch 200/372] current loss 4.34, ppl 76.93, throughput 406.99 samples/s, lr 29.14
[Epoch 418] throughput 27262.05 samples/s
[Epoch 418] time cost 83.55s, valid loss 4.28, valid ppl 72.53, lr 30.00
[Epoch 418] test loss 4.24, test ppl 69.15
[Epoch 419 Batch 200/372] current loss 4.34, ppl 76.86, throughput 399.71 samples/s, lr 28.71
[Epoch 419] throughput 27237.24 samples/s
[Epoch 419] time cost 83.56s, valid loss 4.28, valid ppl 72.53, lr 30.00
[Epoch 419] test loss 4.24, test ppl 69.14
[Epoch 420 Batch 200/372] current loss 4.33, ppl 75.86, throughput 399.75 samples/s, lr 29.57
[Epoch 420] throughput 26952.29 samples/s
[Epoch 420] time cost 84.47s, valid loss 4.28, valid ppl 72.52, lr 30.00
[Epoch 420] test loss 4.24, test ppl 69.14
[Epoch 421 Batch 200/372] current loss 4.34, ppl 76.77, throughput 397.75 samples/s, lr 28.71
[Epoch 421] throughput 26980.78 samples/s
[Epoch 421] time cost 84.35s, valid loss 4.28, valid ppl 72.52, lr 30.00
[Epoch 421] test loss 4.24, test ppl 69.13
[Epoch 422 Batch 200/372] current loss 4.34, ppl 76.98, throughput 404.57 samples/s, lr 27.00
[Epoch 422] throughput 27151.35 samples/s
[Epoch 422] time cost 83.90s, valid loss 4.28, valid ppl 72.52, lr 30.00
[Epoch 422] test loss 4.24, test ppl 69.13
[Epoch 423 Batch 200/372] current loss 4.34, ppl 76.88, throughput 403.83 samples/s, lr 32.14
[Epoch 423] throughput 27313.97 samples/s
[Epoch 423] time cost 83.41s, valid loss 4.28, valid ppl 72.51, lr 30.00
[Epoch 423] test loss 4.24, test ppl 69.13
[Epoch 424 Batch 200/372] current loss 4.35, ppl 77.53, throughput 404.85 samples/s, lr 30.00
[Epoch 424] throughput 27153.95 samples/s
[Epoch 424] time cost 83.83s, valid loss 4.28, valid ppl 72.51, lr 30.00
[Epoch 424] test loss 4.24, test ppl 69.12
[Epoch 425 Batch 200/372] current loss 4.35, ppl 77.65, throughput 399.35 samples/s, lr 29.57
[Epoch 425] throughput 27166.62 samples/s
[Epoch 425] time cost 83.84s, valid loss 4.28, valid ppl 72.50, lr 30.00
[Epoch 425] test loss 4.24, test ppl 69.12
[Epoch 426 Batch 200/372] current loss 4.34, ppl 76.36, throughput 402.78 samples/s, lr 30.43
[Epoch 426] throughput 27257.73 samples/s
[Epoch 426] time cost 83.59s, valid loss 4.28, valid ppl 72.50, lr 30.00
[Epoch 426] test loss 4.24, test ppl 69.12
[Epoch 427 Batch 200/372] current loss 4.34, ppl 76.62, throughput 397.97 samples/s, lr 29.14
[Epoch 427] throughput 27429.38 samples/s
[Epoch 427] time cost 83.03s, valid loss 4.28, valid ppl 72.49, lr 30.00
[Epoch 427] test loss 4.24, test ppl 69.11
[Epoch 428 Batch 200/372] current loss 4.36, ppl 77.96, throughput 399.26 samples/s, lr 33.86
[Epoch 428] throughput 27378.37 samples/s
[Epoch 428] time cost 83.28s, valid loss 4.28, valid ppl 72.49, lr 30.00
[Epoch 428] test loss 4.24, test ppl 69.11
[Epoch 429 Batch 200/372] current loss 4.34, ppl 76.70, throughput 393.07 samples/s, lr 31.71
[Epoch 429] throughput 26838.73 samples/s
[Epoch 429] time cost 84.71s, valid loss 4.28, valid ppl 72.49, lr 30.00
[Epoch 429] test loss 4.24, test ppl 69.10
[Epoch 430 Batch 200/372] current loss 4.35, ppl 77.46, throughput 394.10 samples/s, lr 25.71
[Epoch 430] throughput 26800.15 samples/s
[Epoch 430] time cost 84.85s, valid loss 4.28, valid ppl 72.48, lr 30.00
[Epoch 430] test loss 4.24, test ppl 69.10
[Epoch 431 Batch 200/372] current loss 4.35, ppl 77.25, throughput 404.57 samples/s, lr 34.71
[Epoch 431] throughput 27065.19 samples/s
[Epoch 431] time cost 84.09s, valid loss 4.28, valid ppl 72.48, lr 30.00
[Epoch 431] test loss 4.24, test ppl 69.10
[Epoch 432 Batch 200/372] current loss 4.35, ppl 77.36, throughput 402.00 samples/s, lr 30.00
[Epoch 432] throughput 27274.05 samples/s
[Epoch 432] time cost 83.62s, valid loss 4.28, valid ppl 72.47, lr 30.00
[Epoch 432] test loss 4.24, test ppl 69.09
[Epoch 433 Batch 200/372] current loss 4.34, ppl 77.05, throughput 389.54 samples/s, lr 29.57
[Epoch 433] throughput 26933.01 samples/s
[Epoch 433] time cost 84.57s, valid loss 4.28, valid ppl 72.47, lr 30.00
[Epoch 433] test loss 4.24, test ppl 69.09
[Epoch 434 Batch 200/372] current loss 4.33, ppl 76.28, throughput 396.16 samples/s, lr 29.57
[Epoch 434] throughput 27123.92 samples/s
[Epoch 434] time cost 83.95s, valid loss 4.28, valid ppl 72.47, lr 30.00
[Epoch 434] test loss 4.24, test ppl 69.09
[Epoch 435 Batch 200/372] current loss 4.34, ppl 76.77, throughput 393.63 samples/s, lr 27.43
[Epoch 435] throughput 26920.91 samples/s
[Epoch 435] time cost 84.49s, valid loss 4.28, valid ppl 72.46, lr 30.00
[Epoch 435] test loss 4.24, test ppl 69.08
[Epoch 436 Batch 200/372] current loss 4.33, ppl 76.26, throughput 398.98 samples/s, lr 28.71
[Epoch 436] throughput 27307.92 samples/s
[Epoch 436] time cost 83.40s, valid loss 4.28, valid ppl 72.46, lr 30.00
[Epoch 436] test loss 4.24, test ppl 69.08
[Epoch 437 Batch 200/372] current loss 4.34, ppl 76.41, throughput 404.83 samples/s, lr 32.57
[Epoch 437] throughput 27000.73 samples/s
[Epoch 437] time cost 84.22s, valid loss 4.28, valid ppl 72.45, lr 30.00
[Epoch 437] test loss 4.24, test ppl 69.07
[Epoch 438 Batch 200/372] current loss 4.35, ppl 77.61, throughput 397.48 samples/s, lr 34.29
[Epoch 438] throughput 27343.14 samples/s
[Epoch 438] time cost 83.31s, valid loss 4.28, valid ppl 72.45, lr 30.00
[Epoch 438] test loss 4.24, test ppl 69.07
[Epoch 439 Batch 200/372] current loss 4.33, ppl 75.70, throughput 401.01 samples/s, lr 31.29
[Epoch 439] throughput 26952.95 samples/s
[Epoch 439] time cost 84.53s, valid loss 4.28, valid ppl 72.45, lr 30.00
[Epoch 439] test loss 4.24, test ppl 69.07
[Epoch 440 Batch 200/372] current loss 4.35, ppl 77.25, throughput 402.58 samples/s, lr 30.43
[Epoch 440] throughput 27550.08 samples/s
[Epoch 440] time cost 82.87s, valid loss 4.28, valid ppl 72.44, lr 30.00
[Epoch 440] test loss 4.24, test ppl 69.06
[Epoch 441 Batch 200/372] current loss 4.34, ppl 76.77, throughput 396.24 samples/s, lr 28.71
[Epoch 441] throughput 27314.83 samples/s
[Epoch 441] time cost 83.46s, valid loss 4.28, valid ppl 72.44, lr 30.00
[Epoch 441] test loss 4.23, test ppl 69.06
[Epoch 442 Batch 200/372] current loss 4.33, ppl 76.05, throughput 405.84 samples/s, lr 27.86
[Epoch 442] throughput 27451.23 samples/s
[Epoch 442] time cost 83.11s, valid loss 4.28, valid ppl 72.44, lr 30.00
[Epoch 442] test loss 4.23, test ppl 69.06
[Epoch 443 Batch 200/372] current loss 4.33, ppl 75.98, throughput 393.90 samples/s, lr 31.29
[Epoch 443] throughput 27048.09 samples/s
[Epoch 443] time cost 84.18s, valid loss 4.28, valid ppl 72.43, lr 30.00
[Epoch 443] test loss 4.23, test ppl 69.05
[Epoch 444 Batch 200/372] current loss 4.34, ppl 76.78, throughput 389.67 samples/s, lr 28.29
[Epoch 444] throughput 26831.25 samples/s
[Epoch 444] time cost 84.82s, valid loss 4.28, valid ppl 72.43, lr 30.00
[Epoch 444] test loss 4.23, test ppl 69.05
[Epoch 445 Batch 200/372] current loss 4.35, ppl 77.14, throughput 399.37 samples/s, lr 29.14
[Epoch 445] throughput 27454.75 samples/s
[Epoch 445] time cost 83.03s, valid loss 4.28, valid ppl 72.42, lr 30.00
[Epoch 445] test loss 4.23, test ppl 69.05
[Epoch 446 Batch 200/372] current loss 4.33, ppl 75.63, throughput 406.28 samples/s, lr 28.71
[Epoch 446] throughput 27403.35 samples/s
[Epoch 446] time cost 83.08s, valid loss 4.28, valid ppl 72.42, lr 30.00
[Epoch 446] test loss 4.23, test ppl 69.05
[Epoch 447 Batch 200/372] current loss 4.35, ppl 77.48, throughput 404.36 samples/s, lr 25.71
[Epoch 447] throughput 27231.93 samples/s
[Epoch 447] time cost 83.70s, valid loss 4.28, valid ppl 72.42, lr 30.00
[Epoch 447] test loss 4.23, test ppl 69.04
[Epoch 448 Batch 200/372] current loss 4.35, ppl 77.10, throughput 402.98 samples/s, lr 13.71
[Epoch 448] throughput 27412.35 samples/s
[Epoch 448] time cost 83.24s, valid loss 4.28, valid ppl 72.41, lr 30.00
[Epoch 448] test loss 4.23, test ppl 69.04
[Epoch 449 Batch 200/372] current loss 4.35, ppl 77.27, throughput 405.22 samples/s, lr 28.71
[Epoch 449] throughput 27380.64 samples/s
[Epoch 449] time cost 83.16s, valid loss 4.28, valid ppl 72.41, lr 30.00
[Epoch 449] test loss 4.23, test ppl 69.04
[Epoch 450 Batch 200/372] current loss 4.34, ppl 76.86, throughput 392.29 samples/s, lr 27.43
[Epoch 450] throughput 27241.53 samples/s
[Epoch 450] time cost 83.65s, valid loss 4.28, valid ppl 72.41, lr 30.00
[Epoch 450] test loss 4.23, test ppl 69.03
[Epoch 451 Batch 200/372] current loss 4.35, ppl 77.52, throughput 400.81 samples/s, lr 29.57
[Epoch 451] throughput 26642.55 samples/s
[Epoch 451] time cost 85.31s, valid loss 4.28, valid ppl 72.40, lr 30.00
[Epoch 451] test loss 4.23, test ppl 69.03
[Epoch 452 Batch 200/372] current loss 4.33, ppl 75.79, throughput 386.31 samples/s, lr 30.43
[Epoch 452] throughput 27193.17 samples/s
[Epoch 452] time cost 83.74s, valid loss 4.28, valid ppl 72.40, lr 30.00
[Epoch 452] test loss 4.23, test ppl 69.03
[Epoch 453 Batch 200/372] current loss 4.33, ppl 75.65, throughput 407.31 samples/s, lr 29.57
[Epoch 453] throughput 27114.69 samples/s
[Epoch 453] time cost 83.99s, valid loss 4.28, valid ppl 72.39, lr 30.00
[Epoch 453] test loss 4.23, test ppl 69.02
[Epoch 454 Batch 200/372] current loss 4.33, ppl 76.29, throughput 400.67 samples/s, lr 30.00
[Epoch 454] throughput 26958.31 samples/s
[Epoch 454] time cost 84.58s, valid loss 4.28, valid ppl 72.39, lr 30.00
[Epoch 454] test loss 4.23, test ppl 69.02
[Epoch 455 Batch 200/372] current loss 4.34, ppl 76.62, throughput 390.98 samples/s, lr 28.71
[Epoch 455] throughput 27099.59 samples/s
[Epoch 455] time cost 84.03s, valid loss 4.28, valid ppl 72.39, lr 30.00
[Epoch 455] test loss 4.23, test ppl 69.02
[Epoch 456 Batch 200/372] current loss 4.34, ppl 76.44, throughput 386.79 samples/s, lr 30.43
[Epoch 456] throughput 26982.12 samples/s
[Epoch 456] time cost 84.47s, valid loss 4.28, valid ppl 72.38, lr 30.00
[Epoch 456] test loss 4.23, test ppl 69.01
[Epoch 457 Batch 200/372] current loss 4.36, ppl 77.90, throughput 386.83 samples/s, lr 12.86
[Epoch 457] throughput 27028.03 samples/s
[Epoch 457] time cost 84.35s, valid loss 4.28, valid ppl 72.38, lr 30.00
[Epoch 457] test loss 4.23, test ppl 69.01
[Epoch 458 Batch 200/372] current loss 4.34, ppl 76.65, throughput 404.51 samples/s, lr 27.43
[Epoch 458] throughput 27216.98 samples/s
[Epoch 458] time cost 83.70s, valid loss 4.28, valid ppl 72.38, lr 30.00
[Epoch 458] test loss 4.23, test ppl 69.01
[Epoch 459 Batch 200/372] current loss 4.35, ppl 77.16, throughput 398.11 samples/s, lr 32.14
[Epoch 459] throughput 26735.28 samples/s
[Epoch 459] time cost 85.03s, valid loss 4.28, valid ppl 72.37, lr 30.00
[Epoch 459] test loss 4.23, test ppl 69.00
[Epoch 460 Batch 200/372] current loss 4.33, ppl 76.27, throughput 392.78 samples/s, lr 31.71
[Epoch 460] throughput 27242.62 samples/s
[Epoch 460] time cost 83.59s, valid loss 4.28, valid ppl 72.37, lr 30.00
[Epoch 460] test loss 4.23, test ppl 69.00
[Epoch 461 Batch 200/372] current loss 4.34, ppl 76.47, throughput 398.85 samples/s, lr 31.29
[Epoch 461] throughput 27128.43 samples/s
[Epoch 461] time cost 83.92s, valid loss 4.28, valid ppl 72.37, lr 30.00
[Epoch 461] test loss 4.23, test ppl 69.00
[Epoch 462 Batch 200/372] current loss 4.34, ppl 76.47, throughput 399.35 samples/s, lr 13.29
[Epoch 462] throughput 26893.49 samples/s
[Epoch 462] time cost 84.59s, valid loss 4.28, valid ppl 72.36, lr 30.00
[Epoch 462] test loss 4.23, test ppl 68.99
[Epoch 463 Batch 200/372] current loss 4.34, ppl 76.85, throughput 394.90 samples/s, lr 30.43
[Epoch 463] throughput 27395.08 samples/s
[Epoch 463] time cost 83.15s, valid loss 4.28, valid ppl 72.36, lr 30.00
[Epoch 463] test loss 4.23, test ppl 68.99
[Epoch 464 Batch 200/372] current loss 4.32, ppl 75.35, throughput 408.46 samples/s, lr 29.57
[Epoch 464] throughput 27239.36 samples/s
[Epoch 464] time cost 83.57s, valid loss 4.28, valid ppl 72.35, lr 30.00
[Epoch 464] test loss 4.23, test ppl 68.99
[Epoch 465 Batch 200/372] current loss 4.33, ppl 75.91, throughput 400.52 samples/s, lr 29.14
[Epoch 465] throughput 27302.91 samples/s
[Epoch 465] time cost 83.48s, valid loss 4.28, valid ppl 72.35, lr 30.00
[Epoch 465] test loss 4.23, test ppl 68.98
[Epoch 466 Batch 200/372] current loss 4.33, ppl 75.62, throughput 397.52 samples/s, lr 28.29
[Epoch 466] throughput 27163.13 samples/s
[Epoch 466] time cost 83.83s, valid loss 4.28, valid ppl 72.35, lr 30.00
[Epoch 466] test loss 4.23, test ppl 68.98
[Epoch 467 Batch 200/372] current loss 4.33, ppl 76.26, throughput 391.66 samples/s, lr 32.57
[Epoch 467] throughput 26770.87 samples/s
[Epoch 467] time cost 84.97s, valid loss 4.28, valid ppl 72.34, lr 30.00
[Epoch 467] test loss 4.23, test ppl 68.98
[Epoch 468 Batch 200/372] current loss 4.33, ppl 75.87, throughput 400.34 samples/s, lr 27.86
[Epoch 468] throughput 27042.85 samples/s
[Epoch 468] time cost 84.19s, valid loss 4.28, valid ppl 72.34, lr 30.00
[Epoch 468] test loss 4.23, test ppl 68.97
[Epoch 469 Batch 200/372] current loss 4.33, ppl 76.12, throughput 401.54 samples/s, lr 30.00
[Epoch 469] throughput 27373.48 samples/s
[Epoch 469] time cost 83.27s, valid loss 4.28, valid ppl 72.34, lr 30.00
[Epoch 469] test loss 4.23, test ppl 68.97
[Epoch 470 Batch 200/372] current loss 4.32, ppl 75.38, throughput 391.24 samples/s, lr 30.00
[Epoch 470] throughput 26811.85 samples/s
[Epoch 470] time cost 84.90s, valid loss 4.28, valid ppl 72.33, lr 30.00
[Epoch 470] test loss 4.23, test ppl 68.97
[Epoch 471 Batch 200/372] current loss 4.33, ppl 75.68, throughput 398.87 samples/s, lr 27.43
[Epoch 471] throughput 27110.13 samples/s
[Epoch 471] time cost 83.99s, valid loss 4.28, valid ppl 72.33, lr 30.00
[Epoch 471] test loss 4.23, test ppl 68.97
[Epoch 472 Batch 200/372] current loss 4.34, ppl 76.44, throughput 396.78 samples/s, lr 28.71
[Epoch 472] throughput 27005.20 samples/s
[Epoch 472] time cost 84.28s, valid loss 4.28, valid ppl 72.33, lr 30.00
[Epoch 472] test loss 4.23, test ppl 68.96
[Epoch 473 Batch 200/372] current loss 4.33, ppl 75.83, throughput 392.95 samples/s, lr 32.14
[Epoch 473] throughput 26972.66 samples/s
[Epoch 473] time cost 84.41s, valid loss 4.28, valid ppl 72.32, lr 30.00
[Epoch 473] test loss 4.23, test ppl 68.96
[Epoch 474 Batch 200/372] current loss 4.32, ppl 75.39, throughput 393.18 samples/s, lr 26.57
[Epoch 474] throughput 26906.58 samples/s
[Epoch 474] time cost 84.62s, valid loss 4.28, valid ppl 72.32, lr 30.00
[Epoch 474] test loss 4.23, test ppl 68.96
[Epoch 475 Batch 200/372] current loss 4.32, ppl 74.81, throughput 403.77 samples/s, lr 28.71
[Epoch 475] throughput 27630.22 samples/s
[Epoch 475] time cost 82.56s, valid loss 4.28, valid ppl 72.32, lr 30.00
[Epoch 475] test loss 4.23, test ppl 68.95
[Epoch 476 Batch 200/372] current loss 4.33, ppl 75.58, throughput 397.02 samples/s, lr 31.29
[Epoch 476] throughput 26905.40 samples/s
[Epoch 476] time cost 84.64s, valid loss 4.28, valid ppl 72.31, lr 30.00
[Epoch 476] test loss 4.23, test ppl 68.95
[Epoch 477 Batch 200/372] current loss 4.34, ppl 76.40, throughput 397.78 samples/s, lr 28.71
[Epoch 477] throughput 26992.58 samples/s
[Epoch 477] time cost 84.30s, valid loss 4.28, valid ppl 72.31, lr 30.00
[Epoch 477] test loss 4.23, test ppl 68.95
[Epoch 478 Batch 200/372] current loss 4.34, ppl 76.96, throughput 405.33 samples/s, lr 27.00
[Epoch 478] throughput 27423.80 samples/s
[Epoch 478] time cost 83.12s, valid loss 4.28, valid ppl 72.30, lr 30.00
[Epoch 478] test loss 4.23, test ppl 68.94
[Epoch 479 Batch 200/372] current loss 4.32, ppl 75.21, throughput 405.57 samples/s, lr 27.43
[Epoch 479] throughput 27195.33 samples/s
[Epoch 479] time cost 83.75s, valid loss 4.28, valid ppl 72.30, lr 30.00
[Epoch 479] test loss 4.23, test ppl 68.94
[Epoch 480 Batch 200/372] current loss 4.33, ppl 75.72, throughput 404.30 samples/s, lr 31.71
[Epoch 480] throughput 27371.15 samples/s
[Epoch 480] time cost 83.27s, valid loss 4.28, valid ppl 72.30, lr 30.00
[Epoch 480] test loss 4.23, test ppl 68.94
[Epoch 481 Batch 200/372] current loss 4.32, ppl 75.56, throughput 401.60 samples/s, lr 22.29
[Epoch 481] throughput 27018.13 samples/s
[Epoch 481] time cost 84.28s, valid loss 4.28, valid ppl 72.29, lr 30.00
[Epoch 481] test loss 4.23, test ppl 68.93
[Epoch 482 Batch 200/372] current loss 4.34, ppl 76.84, throughput 399.00 samples/s, lr 31.29
[Epoch 482] throughput 27096.66 samples/s
[Epoch 482] time cost 84.08s, valid loss 4.28, valid ppl 72.29, lr 30.00
[Epoch 482] test loss 4.23, test ppl 68.93
[Epoch 483 Batch 200/372] current loss 4.33, ppl 75.63, throughput 394.61 samples/s, lr 30.00
[Epoch 483] throughput 27237.33 samples/s
[Epoch 483] time cost 83.60s, valid loss 4.28, valid ppl 72.29, lr 30.00
[Epoch 483] test loss 4.23, test ppl 68.93
[Epoch 484 Batch 200/372] current loss 4.33, ppl 76.15, throughput 402.42 samples/s, lr 27.86
[Epoch 484] throughput 26949.95 samples/s
[Epoch 484] time cost 84.44s, valid loss 4.28, valid ppl 72.28, lr 30.00
[Epoch 484] test loss 4.23, test ppl 68.93
[Epoch 485 Batch 200/372] current loss 4.34, ppl 76.52, throughput 399.68 samples/s, lr 30.00
[Epoch 485] throughput 27132.72 samples/s
[Epoch 485] time cost 83.94s, valid loss 4.28, valid ppl 72.28, lr 30.00
[Epoch 485] test loss 4.23, test ppl 68.92
[Epoch 486 Batch 200/372] current loss 4.32, ppl 75.39, throughput 394.42 samples/s, lr 29.57
[Epoch 486] throughput 26937.54 samples/s
[Epoch 486] time cost 84.46s, valid loss 4.28, valid ppl 72.28, lr 30.00
[Epoch 486] test loss 4.23, test ppl 68.92
[Epoch 487 Batch 200/372] current loss 4.34, ppl 76.81, throughput 395.12 samples/s, lr 32.14
[Epoch 487] throughput 27443.77 samples/s
[Epoch 487] time cost 83.11s, valid loss 4.28, valid ppl 72.27, lr 30.00
[Epoch 487] test loss 4.23, test ppl 68.92
[Epoch 488 Batch 200/372] current loss 4.33, ppl 75.99, throughput 403.79 samples/s, lr 30.43
[Epoch 488] throughput 27344.31 samples/s
[Epoch 488] time cost 83.54s, valid loss 4.28, valid ppl 72.27, lr 30.00
[Epoch 488] test loss 4.23, test ppl 68.91
[Epoch 489 Batch 200/372] current loss 4.32, ppl 75.52, throughput 409.62 samples/s, lr 30.86
[Epoch 489] throughput 27055.28 samples/s
[Epoch 489] time cost 84.14s, valid loss 4.28, valid ppl 72.27, lr 30.00
[Epoch 489] test loss 4.23, test ppl 68.91
[Epoch 490 Batch 200/372] current loss 4.32, ppl 74.97, throughput 398.83 samples/s, lr 32.14
[Epoch 490] throughput 26826.62 samples/s
[Epoch 490] time cost 84.80s, valid loss 4.28, valid ppl 72.26, lr 30.00
[Epoch 490] test loss 4.23, test ppl 68.91
[Epoch 491 Batch 200/372] current loss 4.34, ppl 76.54, throughput 396.79 samples/s, lr 26.14
[Epoch 491] throughput 26927.99 samples/s
[Epoch 491] time cost 84.58s, valid loss 4.28, valid ppl 72.26, lr 30.00
[Epoch 491] test loss 4.23, test ppl 68.91
[Epoch 492 Batch 200/372] current loss 4.33, ppl 76.00, throughput 398.66 samples/s, lr 31.71
[Epoch 492] throughput 27107.71 samples/s
[Epoch 492] time cost 84.06s, valid loss 4.28, valid ppl 72.26, lr 30.00
[Epoch 492] test loss 4.23, test ppl 68.90
[Epoch 493 Batch 200/372] current loss 4.34, ppl 76.64, throughput 394.83 samples/s, lr 33.43
[Epoch 493] throughput 26771.72 samples/s
[Epoch 493] time cost 84.97s, valid loss 4.28, valid ppl 72.25, lr 30.00
[Epoch 493] test loss 4.23, test ppl 68.90
[Epoch 494 Batch 200/372] current loss 4.33, ppl 76.19, throughput 395.61 samples/s, lr 30.00
[Epoch 494] throughput 26990.27 samples/s
[Epoch 494] time cost 84.42s, valid loss 4.28, valid ppl 72.25, lr 30.00
[Epoch 494] test loss 4.23, test ppl 68.90
[Epoch 495 Batch 200/372] current loss 4.32, ppl 75.45, throughput 401.96 samples/s, lr 30.00
[Epoch 495] throughput 27106.51 samples/s
[Epoch 495] time cost 84.03s, valid loss 4.28, valid ppl 72.25, lr 30.00
[Epoch 495] test loss 4.23, test ppl 68.89
[Epoch 496 Batch 200/372] current loss 4.32, ppl 74.99, throughput 402.34 samples/s, lr 32.14
[Epoch 496] throughput 27227.70 samples/s
[Epoch 496] time cost 83.70s, valid loss 4.28, valid ppl 72.24, lr 30.00
[Epoch 496] test loss 4.23, test ppl 68.89
[Epoch 497 Batch 200/372] current loss 4.31, ppl 74.51, throughput 402.65 samples/s, lr 30.43
[Epoch 497] throughput 27234.44 samples/s
[Epoch 497] time cost 83.72s, valid loss 4.28, valid ppl 72.24, lr 30.00
[Epoch 497] test loss 4.23, test ppl 68.89
[Epoch 498 Batch 200/372] current loss 4.31, ppl 74.80, throughput 395.53 samples/s, lr 30.00
[Epoch 498] throughput 27211.16 samples/s
[Epoch 498] time cost 83.70s, valid loss 4.28, valid ppl 72.24, lr 30.00
[Epoch 498] test loss 4.23, test ppl 68.89
[Epoch 499 Batch 200/372] current loss 4.32, ppl 75.35, throughput 398.10 samples/s, lr 32.14
[Epoch 499] throughput 27190.38 samples/s
[Epoch 499] time cost 83.82s, valid loss 4.28, valid ppl 72.23, lr 30.00
[Epoch 499] test loss 4.23, test ppl 68.88
[Epoch 500 Batch 200/372] current loss 4.33, ppl 76.06, throughput 398.47 samples/s, lr 29.14
[Epoch 500] throughput 27135.89 samples/s
[Epoch 500] time cost 83.97s, valid loss 4.28, valid ppl 72.23, lr 30.00
[Epoch 500] test loss 4.23, test ppl 68.88
[Epoch 501 Batch 200/372] current loss 4.33, ppl 75.84, throughput 396.81 samples/s, lr 30.86
[Epoch 501] throughput 26959.21 samples/s
[Epoch 501] time cost 84.42s, valid loss 4.28, valid ppl 72.23, lr 30.00
[Epoch 501] test loss 4.23, test ppl 68.88
[Epoch 502 Batch 200/372] current loss 4.32, ppl 75.28, throughput 397.12 samples/s, lr 29.57
[Epoch 502] throughput 26859.12 samples/s
[Epoch 502] time cost 84.68s, valid loss 4.28, valid ppl 72.22, lr 30.00
[Epoch 502] test loss 4.23, test ppl 68.88
[Epoch 503 Batch 200/372] current loss 4.34, ppl 76.69, throughput 396.74 samples/s, lr 30.00
[Epoch 503] throughput 27129.25 samples/s
[Epoch 503] time cost 83.99s, valid loss 4.28, valid ppl 72.22, lr 30.00
[Epoch 503] test loss 4.23, test ppl 68.87
[Epoch 504 Batch 200/372] current loss 4.32, ppl 75.40, throughput 388.92 samples/s, lr 33.00
[Epoch 504] throughput 27002.97 samples/s
[Epoch 504] time cost 84.23s, valid loss 4.28, valid ppl 72.22, lr 30.00
[Epoch 504] test loss 4.23, test ppl 68.87
[Epoch 505 Batch 200/372] current loss 4.32, ppl 75.44, throughput 390.48 samples/s, lr 28.71
[Epoch 505] throughput 26889.22 samples/s
[Epoch 505] time cost 84.65s, valid loss 4.28, valid ppl 72.22, lr 30.00
[Epoch 505] test loss 4.23, test ppl 68.87
[Epoch 506 Batch 200/372] current loss 4.32, ppl 75.08, throughput 393.27 samples/s, lr 30.00
[Epoch 506] throughput 26956.76 samples/s
[Epoch 506] time cost 84.43s, valid loss 4.28, valid ppl 72.21, lr 30.00
[Epoch 506] test loss 4.23, test ppl 68.87
[Epoch 507 Batch 200/372] current loss 4.32, ppl 75.51, throughput 394.10 samples/s, lr 12.86
[Epoch 507] throughput 26937.38 samples/s
[Epoch 507] time cost 84.46s, valid loss 4.28, valid ppl 72.21, lr 30.00
[Epoch 507] test loss 4.23, test ppl 68.86
[Epoch 508 Batch 200/372] current loss 4.32, ppl 75.37, throughput 403.53 samples/s, lr 28.71
[Epoch 508] throughput 27321.91 samples/s
[Epoch 508] time cost 83.41s, valid loss 4.28, valid ppl 72.21, lr 30.00
[Epoch 508] test loss 4.23, test ppl 68.86
[Epoch 509 Batch 200/372] current loss 4.32, ppl 75.50, throughput 391.04 samples/s, lr 28.29
[Epoch 509] throughput 27099.94 samples/s
[Epoch 509] time cost 83.97s, valid loss 4.28, valid ppl 72.20, lr 30.00
[Epoch 509] test loss 4.23, test ppl 68.86
[Epoch 510 Batch 200/372] current loss 4.33, ppl 76.15, throughput 398.86 samples/s, lr 31.71
[Epoch 510] throughput 27237.24 samples/s
[Epoch 510] time cost 83.69s, valid loss 4.28, valid ppl 72.20, lr 30.00
[Epoch 510] test loss 4.23, test ppl 68.86
[Epoch 511 Batch 200/372] current loss 4.32, ppl 74.84, throughput 392.96 samples/s, lr 31.29
[Epoch 511] throughput 26952.19 samples/s
[Epoch 511] time cost 84.70s, valid loss 4.28, valid ppl 72.20, lr 30.00
[Epoch 511] test loss 4.23, test ppl 68.85
[Epoch 512 Batch 200/372] current loss 4.32, ppl 75.45, throughput 403.71 samples/s, lr 29.57
[Epoch 512] throughput 27307.77 samples/s
[Epoch 512] time cost 83.44s, valid loss 4.28, valid ppl 72.19, lr 30.00
[Epoch 512] test loss 4.23, test ppl 68.85
[Epoch 513 Batch 200/372] current loss 4.33, ppl 75.99, throughput 395.42 samples/s, lr 29.57
[Epoch 513] throughput 27168.86 samples/s
[Epoch 513] time cost 83.86s, valid loss 4.28, valid ppl 72.19, lr 30.00
[Epoch 513] test loss 4.23, test ppl 68.85
[Epoch 514 Batch 200/372] current loss 4.32, ppl 75.12, throughput 396.15 samples/s, lr 30.43
[Epoch 514] throughput 27056.08 samples/s
[Epoch 514] time cost 84.25s, valid loss 4.28, valid ppl 72.19, lr 30.00
[Epoch 514] test loss 4.23, test ppl 68.85
[Epoch 515 Batch 200/372] current loss 4.33, ppl 76.02, throughput 398.22 samples/s, lr 33.86
[Epoch 515] throughput 26973.97 samples/s
[Epoch 515] time cost 84.42s, valid loss 4.28, valid ppl 72.18, lr 30.00
[Epoch 515] test loss 4.23, test ppl 68.84
[Epoch 516 Batch 200/372] current loss 4.31, ppl 74.80, throughput 401.67 samples/s, lr 28.71
[Epoch 516] throughput 27072.37 samples/s
[Epoch 516] time cost 84.08s, valid loss 4.28, valid ppl 72.18, lr 30.00
[Epoch 516] test loss 4.23, test ppl 68.84
[Epoch 517 Batch 200/372] current loss 4.34, ppl 76.38, throughput 398.88 samples/s, lr 32.14
[Epoch 517] throughput 27040.61 samples/s
[Epoch 517] time cost 84.17s, valid loss 4.28, valid ppl 72.18, lr 30.00
[Epoch 517] test loss 4.23, test ppl 68.84
[Epoch 518 Batch 200/372] current loss 4.33, ppl 76.24, throughput 395.99 samples/s, lr 30.43
[Epoch 518] throughput 27143.68 samples/s
[Epoch 518] time cost 83.89s, valid loss 4.28, valid ppl 72.17, lr 30.00
[Epoch 518] test loss 4.23, test ppl 68.84
[Epoch 519 Batch 200/372] current loss 4.32, ppl 75.33, throughput 401.64 samples/s, lr 27.43
[Epoch 519] throughput 27115.33 samples/s
[Epoch 519] time cost 84.03s, valid loss 4.28, valid ppl 72.17, lr 30.00
[Epoch 519] test loss 4.23, test ppl 68.83
[Epoch 520 Batch 200/372] current loss 4.33, ppl 75.83, throughput 397.68 samples/s, lr 26.14
[Epoch 520] throughput 27075.49 samples/s
[Epoch 520] time cost 84.14s, valid loss 4.28, valid ppl 72.17, lr 30.00
[Epoch 520] test loss 4.23, test ppl 68.83
[Epoch 521 Batch 200/372] current loss 4.32, ppl 75.53, throughput 401.24 samples/s, lr 27.43
[Epoch 521] throughput 27195.06 samples/s
[Epoch 521] time cost 83.77s, valid loss 4.28, valid ppl 72.17, lr 30.00
[Epoch 521] test loss 4.23, test ppl 68.83
[Epoch 522 Batch 200/372] current loss 4.31, ppl 74.47, throughput 390.92 samples/s, lr 29.14
[Epoch 522] throughput 26881.58 samples/s
[Epoch 522] time cost 84.66s, valid loss 4.28, valid ppl 72.16, lr 30.00
[Epoch 522] test loss 4.23, test ppl 68.83
[Epoch 523 Batch 200/372] current loss 4.33, ppl 76.05, throughput 394.68 samples/s, lr 11.57
[Epoch 523] throughput 26999.04 samples/s
[Epoch 523] time cost 84.26s, valid loss 4.28, valid ppl 72.16, lr 30.00
[Epoch 523] test loss 4.23, test ppl 68.82
[Epoch 524 Batch 200/372] current loss 4.32, ppl 74.84, throughput 403.25 samples/s, lr 25.29
[Epoch 524] throughput 27298.20 samples/s
[Epoch 524] time cost 83.49s, valid loss 4.28, valid ppl 72.16, lr 30.00
[Epoch 524] test loss 4.23, test ppl 68.82
[Epoch 525 Batch 200/372] current loss 4.33, ppl 75.98, throughput 401.68 samples/s, lr 30.00
[Epoch 525] throughput 27298.43 samples/s
[Epoch 525] time cost 83.45s, valid loss 4.28, valid ppl 72.15, lr 30.00
[Epoch 525] test loss 4.23, test ppl 68.82
[Epoch 526 Batch 200/372] current loss 4.33, ppl 76.29, throughput 395.90 samples/s, lr 33.00
[Epoch 526] throughput 27184.93 samples/s
[Epoch 526] time cost 83.82s, valid loss 4.28, valid ppl 72.15, lr 30.00
[Epoch 526] test loss 4.23, test ppl 68.82
[Epoch 527 Batch 200/372] current loss 4.32, ppl 74.87, throughput 391.56 samples/s, lr 27.86
[Epoch 527] throughput 27132.12 samples/s
[Epoch 527] time cost 83.88s, valid loss 4.28, valid ppl 72.15, lr 30.00
[Epoch 527] test loss 4.23, test ppl 68.81
[Epoch 528 Batch 200/372] current loss 4.32, ppl 75.51, throughput 398.57 samples/s, lr 27.00
[Epoch 528] throughput 27112.49 samples/s
[Epoch 528] time cost 83.99s, valid loss 4.28, valid ppl 72.15, lr 30.00
[Epoch 528] test loss 4.23, test ppl 68.81
[Epoch 529 Batch 200/372] current loss 4.31, ppl 74.44, throughput 396.06 samples/s, lr 26.14
[Epoch 529] throughput 27094.72 samples/s
[Epoch 529] time cost 84.07s, valid loss 4.28, valid ppl 72.14, lr 30.00
[Epoch 529] test loss 4.23, test ppl 68.81
[Epoch 530 Batch 200/372] current loss 4.31, ppl 74.45, throughput 396.86 samples/s, lr 27.86
[Epoch 530] throughput 27153.13 samples/s
[Epoch 530] time cost 83.85s, valid loss 4.28, valid ppl 72.14, lr 30.00
[Epoch 530] test loss 4.23, test ppl 68.81
[Epoch 531 Batch 200/372] current loss 4.32, ppl 74.97, throughput 402.21 samples/s, lr 28.71
[Epoch 531] throughput 27405.30 samples/s
[Epoch 531] time cost 83.31s, valid loss 4.28, valid ppl 72.14, lr 30.00
[Epoch 531] test loss 4.23, test ppl 68.81
[Epoch 532 Batch 200/372] current loss 4.32, ppl 75.11, throughput 401.14 samples/s, lr 31.29
[Epoch 532] throughput 27128.57 samples/s
[Epoch 532] time cost 83.92s, valid loss 4.28, valid ppl 72.14, lr 30.00
[Epoch 532] test loss 4.23, test ppl 68.80
[Epoch 533 Batch 200/372] current loss 4.32, ppl 75.01, throughput 389.84 samples/s, lr 31.29
[Epoch 533] throughput 26969.27 samples/s
[Epoch 533] time cost 84.41s, valid loss 4.28, valid ppl 72.13, lr 30.00
[Epoch 533] test loss 4.23, test ppl 68.80
[Epoch 534 Batch 200/372] current loss 4.32, ppl 75.51, throughput 395.25 samples/s, lr 27.86
[Epoch 534] throughput 27048.62 samples/s
[Epoch 534] time cost 84.10s, valid loss 4.28, valid ppl 72.13, lr 30.00
[Epoch 534] test loss 4.23, test ppl 68.80
[Epoch 535 Batch 200/372] current loss 4.32, ppl 74.96, throughput 397.53 samples/s, lr 26.57
[Epoch 535] throughput 27207.23 samples/s
[Epoch 535] time cost 83.79s, valid loss 4.28, valid ppl 72.13, lr 30.00
[Epoch 535] test loss 4.23, test ppl 68.80
[Epoch 536 Batch 200/372] current loss 4.32, ppl 75.48, throughput 393.96 samples/s, lr 29.57
[Epoch 536] throughput 26998.31 samples/s
[Epoch 536] time cost 84.27s, valid loss 4.28, valid ppl 72.13, lr 30.00
[Epoch 536] test loss 4.23, test ppl 68.80
[Epoch 537 Batch 200/372] current loss 4.31, ppl 74.18, throughput 393.28 samples/s, lr 27.43
[Epoch 537] throughput 26792.27 samples/s
[Epoch 537] time cost 84.97s, valid loss 4.28, valid ppl 72.12, lr 30.00
[Epoch 537] test loss 4.23, test ppl 68.79
[Epoch 538 Batch 200/372] current loss 4.32, ppl 74.90, throughput 391.09 samples/s, lr 33.00
[Epoch 538] throughput 27056.92 samples/s
[Epoch 538] time cost 84.21s, valid loss 4.28, valid ppl 72.12, lr 30.00
[Epoch 538] test loss 4.23, test ppl 68.79
[Epoch 539 Batch 200/372] current loss 4.32, ppl 74.99, throughput 392.67 samples/s, lr 29.57
[Epoch 539] throughput 26660.01 samples/s
[Epoch 539] time cost 85.35s, valid loss 4.28, valid ppl 72.12, lr 30.00
[Epoch 539] test loss 4.23, test ppl 68.79
[Epoch 540 Batch 200/372] current loss 4.33, ppl 75.79, throughput 394.59 samples/s, lr 31.29
[Epoch 540] throughput 27202.44 samples/s
[Epoch 540] time cost 83.75s, valid loss 4.28, valid ppl 72.12, lr 30.00
[Epoch 540] test loss 4.23, test ppl 68.79
[Epoch 541 Batch 200/372] current loss 4.32, ppl 75.32, throughput 385.24 samples/s, lr 30.43
[Epoch 541] throughput 26785.85 samples/s
[Epoch 541] time cost 84.92s, valid loss 4.28, valid ppl 72.11, lr 30.00
[Epoch 541] test loss 4.23, test ppl 68.78
[Epoch 542 Batch 200/372] current loss 4.32, ppl 75.35, throughput 394.70 samples/s, lr 29.14
[Epoch 542] throughput 26791.40 samples/s
[Epoch 542] time cost 84.89s, valid loss 4.28, valid ppl 72.11, lr 30.00
[Epoch 542] test loss 4.23, test ppl 68.78
[Epoch 543 Batch 200/372] current loss 4.31, ppl 74.38, throughput 398.80 samples/s, lr 26.57
[Epoch 543] throughput 26975.05 samples/s
[Epoch 543] time cost 84.37s, valid loss 4.28, valid ppl 72.11, lr 30.00
[Epoch 543] test loss 4.23, test ppl 68.78
[Epoch 544 Batch 200/372] current loss 4.31, ppl 74.62, throughput 394.55 samples/s, lr 32.14
[Epoch 544] throughput 26932.58 samples/s
[Epoch 544] time cost 84.55s, valid loss 4.28, valid ppl 72.11, lr 30.00
[Epoch 544] test loss 4.23, test ppl 68.78
[Epoch 545 Batch 200/372] current loss 4.32, ppl 75.00, throughput 394.40 samples/s, lr 27.86
[Epoch 545] throughput 27469.39 samples/s
[Epoch 545] time cost 82.99s, valid loss 4.28, valid ppl 72.10, lr 30.00
[Epoch 545] test loss 4.23, test ppl 68.78
[Epoch 546 Batch 200/372] current loss 4.31, ppl 74.10, throughput 404.00 samples/s, lr 29.57
[Epoch 546] throughput 27199.91 samples/s
[Epoch 546] time cost 83.82s, valid loss 4.28, valid ppl 72.10, lr 30.00
[Epoch 546] test loss 4.23, test ppl 68.77
[Epoch 547 Batch 200/372] current loss 4.32, ppl 75.11, throughput 402.67 samples/s, lr 28.71
[Epoch 547] throughput 27126.19 samples/s
[Epoch 547] time cost 83.90s, valid loss 4.28, valid ppl 72.10, lr 30.00
[Epoch 547] test loss 4.23, test ppl 68.77
[Epoch 548 Batch 200/372] current loss 4.31, ppl 74.36, throughput 400.10 samples/s, lr 30.00
[Epoch 548] throughput 27257.08 samples/s
[Epoch 548] time cost 83.62s, valid loss 4.28, valid ppl 72.10, lr 30.00
[Epoch 548] test loss 4.23, test ppl 68.77
[Epoch 549 Batch 200/372] current loss 4.31, ppl 74.76, throughput 402.04 samples/s, lr 30.43
[Epoch 549] throughput 26997.72 samples/s
[Epoch 549] time cost 84.34s, valid loss 4.28, valid ppl 72.10, lr 30.00
[Epoch 549] test loss 4.23, test ppl 68.77
[Epoch 550 Batch 200/372] current loss 4.32, ppl 75.13, throughput 394.78 samples/s, lr 28.71
[Epoch 550] throughput 27155.49 samples/s
[Epoch 550] time cost 83.89s, valid loss 4.28, valid ppl 72.09, lr 30.00
[Epoch 550] test loss 4.23, test ppl 68.77
[Epoch 551 Batch 200/372] current loss 4.33, ppl 75.84, throughput 397.38 samples/s, lr 35.57
[Epoch 551] throughput 27308.13 samples/s
[Epoch 551] time cost 83.42s, valid loss 4.28, valid ppl 72.09, lr 30.00
[Epoch 551] test loss 4.23, test ppl 68.77
[Epoch 552 Batch 200/372] current loss 4.33, ppl 75.74, throughput 396.94 samples/s, lr 29.14
[Epoch 552] throughput 27417.43 samples/s
[Epoch 552] time cost 83.12s, valid loss 4.28, valid ppl 72.09, lr 30.00
[Epoch 552] test loss 4.23, test ppl 68.76
[Epoch 553 Batch 200/372] current loss 4.32, ppl 75.37, throughput 401.59 samples/s, lr 29.14
[Epoch 553] throughput 27249.89 samples/s
[Epoch 553] time cost 83.68s, valid loss 4.28, valid ppl 72.09, lr 30.00
[Epoch 553] test loss 4.23, test ppl 68.76
[Epoch 554 Batch 200/372] current loss 4.31, ppl 74.66, throughput 404.36 samples/s, lr 29.14
[Epoch 554] throughput 27418.53 samples/s
[Epoch 554] time cost 83.16s, valid loss 4.28, valid ppl 72.08, lr 30.00
[Epoch 554] test loss 4.23, test ppl 68.76
[Epoch 555 Batch 200/372] current loss 4.31, ppl 74.43, throughput 406.02 samples/s, lr 31.71
[Epoch 555] throughput 27336.14 samples/s
[Epoch 555] time cost 83.30s, valid loss 4.28, valid ppl 72.08, lr 30.00
[Epoch 555] test loss 4.23, test ppl 68.76
[Epoch 556 Batch 200/372] current loss 4.31, ppl 74.19, throughput 397.05 samples/s, lr 32.14
[Epoch 556] throughput 27160.77 samples/s
[Epoch 556] time cost 83.93s, valid loss 4.28, valid ppl 72.08, lr 30.00
[Epoch 556] test loss 4.23, test ppl 68.76
[Epoch 557 Batch 200/372] current loss 4.31, ppl 74.69, throughput 397.95 samples/s, lr 24.86
[Epoch 557] throughput 27070.48 samples/s
[Epoch 557] time cost 84.09s, valid loss 4.28, valid ppl 72.08, lr 30.00
[Epoch 557] test loss 4.23, test ppl 68.75
[Epoch 558 Batch 200/372] current loss 4.32, ppl 75.22, throughput 391.63 samples/s, lr 30.86
[Epoch 558] throughput 27017.98 samples/s
[Epoch 558] time cost 84.33s, valid loss 4.28, valid ppl 72.07, lr 30.00
[Epoch 558] test loss 4.23, test ppl 68.75
[Epoch 559 Batch 200/372] current loss 4.30, ppl 73.75, throughput 397.54 samples/s, lr 29.14
[Epoch 559] throughput 27127.85 samples/s
[Epoch 559] time cost 83.93s, valid loss 4.28, valid ppl 72.07, lr 30.00
[Epoch 559] test loss 4.23, test ppl 68.75
[Epoch 560 Batch 200/372] current loss 4.31, ppl 74.14, throughput 391.60 samples/s, lr 26.14
[Epoch 560] throughput 26840.46 samples/s
[Epoch 560] time cost 84.81s, valid loss 4.28, valid ppl 72.07, lr 30.00
[Epoch 560] test loss 4.23, test ppl 68.75
[Epoch 561 Batch 200/372] current loss 4.31, ppl 74.32, throughput 392.95 samples/s, lr 32.14
[Epoch 561] throughput 26715.22 samples/s
[Epoch 561] time cost 85.13s, valid loss 4.28, valid ppl 72.07, lr 30.00
[Epoch 561] test loss 4.23, test ppl 68.75
[Epoch 562 Batch 200/372] current loss 4.32, ppl 74.97, throughput 402.70 samples/s, lr 33.43
[Epoch 562] throughput 27160.21 samples/s
[Epoch 562] time cost 83.88s, valid loss 4.28, valid ppl 72.07, lr 30.00
[Epoch 562] test loss 4.23, test ppl 68.75
[Epoch 563 Batch 200/372] current loss 4.32, ppl 75.51, throughput 398.96 samples/s, lr 29.14
[Epoch 563] throughput 27098.61 samples/s
[Epoch 563] time cost 84.07s, valid loss 4.28, valid ppl 72.06, lr 30.00
[Epoch 563] test loss 4.23, test ppl 68.74
[Epoch 564 Batch 200/372] current loss 4.31, ppl 74.31, throughput 404.39 samples/s, lr 31.29
[Epoch 564] throughput 27320.34 samples/s
[Epoch 564] time cost 83.43s, valid loss 4.28, valid ppl 72.06, lr 30.00
[Epoch 564] test loss 4.23, test ppl 68.74
[Epoch 565 Batch 200/372] current loss 4.32, ppl 75.46, throughput 392.35 samples/s, lr 27.86
[Epoch 565] throughput 27005.29 samples/s
[Epoch 565] time cost 84.33s, valid loss 4.28, valid ppl 72.06, lr 30.00
[Epoch 565] test loss 4.23, test ppl 68.74
[Epoch 566 Batch 200/372] current loss 4.32, ppl 74.83, throughput 400.39 samples/s, lr 29.14
[Epoch 566] throughput 27064.77 samples/s
[Epoch 566] time cost 84.13s, valid loss 4.28, valid ppl 72.06, lr 30.00
[Epoch 566] test loss 4.23, test ppl 68.74
[Epoch 567 Batch 200/372] current loss 4.31, ppl 74.34, throughput 400.21 samples/s, lr 29.57
[Epoch 567] throughput 27062.30 samples/s
[Epoch 567] time cost 84.19s, valid loss 4.28, valid ppl 72.05, lr 30.00
[Epoch 567] test loss 4.23, test ppl 68.74
[Epoch 568 Batch 200/372] current loss 4.31, ppl 74.40, throughput 396.10 samples/s, lr 30.43
[Epoch 568] throughput 26885.70 samples/s
[Epoch 568] time cost 84.70s, valid loss 4.28, valid ppl 72.05, lr 30.00
[Epoch 568] test loss 4.23, test ppl 68.73
[Epoch 569 Batch 200/372] current loss 4.32, ppl 75.38, throughput 398.56 samples/s, lr 29.57
[Epoch 569] throughput 27137.98 samples/s
[Epoch 569] time cost 83.94s, valid loss 4.28, valid ppl 72.05, lr 30.00
[Epoch 569] test loss 4.23, test ppl 68.73
[Epoch 570 Batch 200/372] current loss 4.31, ppl 74.63, throughput 396.87 samples/s, lr 32.14
[Epoch 570] throughput 27184.57 samples/s
[Epoch 570] time cost 83.83s, valid loss 4.28, valid ppl 72.05, lr 30.00
[Epoch 570] test loss 4.23, test ppl 68.73
[Epoch 571 Batch 200/372] current loss 4.30, ppl 73.34, throughput 403.26 samples/s, lr 30.00
[Epoch 571] throughput 26877.30 samples/s
[Epoch 571] time cost 84.67s, valid loss 4.28, valid ppl 72.04, lr 30.00
[Epoch 571] test loss 4.23, test ppl 68.73
[Epoch 572 Batch 200/372] current loss 4.32, ppl 75.01, throughput 396.01 samples/s, lr 30.86
[Epoch 572] throughput 26831.71 samples/s
[Epoch 572] time cost 84.82s, valid loss 4.28, valid ppl 72.04, lr 30.00
[Epoch 572] test loss 4.23, test ppl 68.73
[Epoch 573 Batch 200/372] current loss 4.31, ppl 74.21, throughput 396.10 samples/s, lr 29.57
[Epoch 573] throughput 27432.50 samples/s
[Epoch 573] time cost 83.14s, valid loss 4.28, valid ppl 72.04, lr 30.00
[Epoch 573] test loss 4.23, test ppl 68.72
[Epoch 574 Batch 200/372] current loss 4.34, ppl 76.38, throughput 396.46 samples/s, lr 31.29
[Epoch 574] throughput 27205.46 samples/s
[Epoch 574] time cost 83.73s, valid loss 4.28, valid ppl 72.04, lr 30.00
[Epoch 574] test loss 4.23, test ppl 68.72
[Epoch 575 Batch 200/372] current loss 4.31, ppl 74.65, throughput 405.31 samples/s, lr 29.14
[Epoch 575] throughput 27363.51 samples/s
[Epoch 575] time cost 83.51s, valid loss 4.28, valid ppl 72.04, lr 30.00
[Epoch 575] test loss 4.23, test ppl 68.72
[Epoch 576 Batch 200/372] current loss 4.31, ppl 74.54, throughput 392.19 samples/s, lr 29.14
[Epoch 576] throughput 26577.64 samples/s
[Epoch 576] time cost 85.50s, valid loss 4.28, valid ppl 72.03, lr 30.00
[Epoch 576] test loss 4.23, test ppl 68.72
[Epoch 577 Batch 200/372] current loss 4.30, ppl 73.77, throughput 397.00 samples/s, lr 31.29
[Epoch 577] throughput 27208.10 samples/s
[Epoch 577] time cost 83.82s, valid loss 4.28, valid ppl 72.03, lr 30.00
[Epoch 577] test loss 4.23, test ppl 68.72
[Epoch 578 Batch 200/372] current loss 4.31, ppl 74.19, throughput 397.80 samples/s, lr 29.14
[Epoch 578] throughput 27019.45 samples/s
[Epoch 578] time cost 84.24s, valid loss 4.28, valid ppl 72.03, lr 30.00
[Epoch 578] test loss 4.23, test ppl 68.72
[Epoch 579 Batch 200/372] current loss 4.30, ppl 73.38, throughput 395.20 samples/s, lr 30.00
[Epoch 579] throughput 26936.28 samples/s
[Epoch 579] time cost 84.50s, valid loss 4.28, valid ppl 72.03, lr 30.00
[Epoch 579] test loss 4.23, test ppl 68.71
[Epoch 580 Batch 200/372] current loss 4.32, ppl 75.31, throughput 397.27 samples/s, lr 25.29
[Epoch 580] throughput 27134.53 samples/s
[Epoch 580] time cost 83.96s, valid loss 4.28, valid ppl 72.02, lr 30.00
[Epoch 580] test loss 4.23, test ppl 68.71
[Epoch 581 Batch 200/372] current loss 4.31, ppl 74.23, throughput 392.39 samples/s, lr 27.43
[Epoch 581] throughput 27244.25 samples/s
[Epoch 581] time cost 83.64s, valid loss 4.28, valid ppl 72.02, lr 30.00
[Epoch 581] test loss 4.23, test ppl 68.71
[Epoch 582 Batch 200/372] current loss 4.30, ppl 73.75, throughput 394.15 samples/s, lr 32.57
[Epoch 582] throughput 27078.63 samples/s
[Epoch 582] time cost 84.14s, valid loss 4.28, valid ppl 72.02, lr 30.00
[Epoch 582] test loss 4.23, test ppl 68.71
[Epoch 583 Batch 200/372] current loss 4.31, ppl 74.23, throughput 400.92 samples/s, lr 27.86
[Epoch 583] throughput 26851.40 samples/s
[Epoch 583] time cost 84.72s, valid loss 4.28, valid ppl 72.02, lr 30.00
[Epoch 583] test loss 4.23, test ppl 68.71
[Epoch 584 Batch 200/372] current loss 4.31, ppl 74.58, throughput 396.65 samples/s, lr 27.86
[Epoch 584] throughput 27066.49 samples/s
[Epoch 584] time cost 84.16s, valid loss 4.28, valid ppl 72.02, lr 30.00
[Epoch 584] test loss 4.23, test ppl 68.71
[Epoch 585 Batch 200/372] current loss 4.30, ppl 73.86, throughput 398.52 samples/s, lr 32.14
[Epoch 585] throughput 26975.22 samples/s
[Epoch 585] time cost 84.42s, valid loss 4.28, valid ppl 72.01, lr 30.00
[Epoch 585] test loss 4.23, test ppl 68.71
[Epoch 586 Batch 200/372] current loss 4.32, ppl 74.98, throughput 401.93 samples/s, lr 32.57
[Epoch 586] throughput 26922.57 samples/s
[Epoch 586] time cost 84.47s, valid loss 4.28, valid ppl 72.01, lr 30.00
[Epoch 586] test loss 4.23, test ppl 68.70
[Epoch 587 Batch 200/372] current loss 4.31, ppl 74.79, throughput 395.29 samples/s, lr 29.14
[Epoch 587] throughput 27280.94 samples/s
[Epoch 587] time cost 83.56s, valid loss 4.28, valid ppl 72.01, lr 30.00
[Epoch 587] test loss 4.23, test ppl 68.70
[Epoch 588 Batch 200/372] current loss 4.31, ppl 74.80, throughput 397.01 samples/s, lr 28.29
[Epoch 588] throughput 27043.29 samples/s
[Epoch 588] time cost 84.17s, valid loss 4.28, valid ppl 72.01, lr 30.00
[Epoch 588] test loss 4.23, test ppl 68.70
[Epoch 589 Batch 200/372] current loss 4.31, ppl 74.29, throughput 398.31 samples/s, lr 33.00
[Epoch 589] throughput 27075.23 samples/s
[Epoch 589] time cost 84.10s, valid loss 4.28, valid ppl 72.01, lr 30.00
[Epoch 589] test loss 4.23, test ppl 68.70
[Epoch 590 Batch 200/372] current loss 4.30, ppl 73.95, throughput 396.39 samples/s, lr 27.86
[Epoch 590] throughput 27108.60 samples/s
[Epoch 590] time cost 84.09s, valid loss 4.28, valid ppl 72.00, lr 30.00
[Epoch 590] test loss 4.23, test ppl 68.70
[Epoch 591 Batch 200/372] current loss 4.31, ppl 74.17, throughput 393.99 samples/s, lr 30.86
[Epoch 591] throughput 26810.81 samples/s
[Epoch 591] time cost 84.89s, valid loss 4.28, valid ppl 72.00, lr 30.00
[Epoch 591] test loss 4.23, test ppl 68.70
[Epoch 592 Batch 200/372] current loss 4.31, ppl 74.60, throughput 395.00 samples/s, lr 33.00
[Epoch 592] throughput 27184.29 samples/s
[Epoch 592] time cost 83.80s, valid loss 4.28, valid ppl 72.00, lr 30.00
[Epoch 592] test loss 4.23, test ppl 68.70
[Epoch 593 Batch 200/372] current loss 4.32, ppl 75.05, throughput 390.24 samples/s, lr 27.43
[Epoch 593] throughput 27164.59 samples/s
[Epoch 593] time cost 83.83s, valid loss 4.28, valid ppl 72.00, lr 30.00
[Epoch 593] test loss 4.23, test ppl 68.69
[Epoch 594 Batch 200/372] current loss 4.30, ppl 74.04, throughput 398.99 samples/s, lr 29.14
[Epoch 594] throughput 27028.76 samples/s
[Epoch 594] time cost 84.20s, valid loss 4.28, valid ppl 72.00, lr 30.00
[Epoch 594] test loss 4.23, test ppl 68.69
[Epoch 595 Batch 200/372] current loss 4.31, ppl 74.11, throughput 404.41 samples/s, lr 29.57
[Epoch 595] throughput 27206.97 samples/s
[Epoch 595] time cost 83.70s, valid loss 4.28, valid ppl 72.00, lr 30.00
[Epoch 595] test loss 4.23, test ppl 68.69
[Epoch 596 Batch 200/372] current loss 4.32, ppl 74.97, throughput 409.63 samples/s, lr 26.57
[Epoch 596] throughput 27184.19 samples/s
[Epoch 596] time cost 83.83s, valid loss 4.28, valid ppl 71.99, lr 30.00
[Epoch 596] test loss 4.23, test ppl 68.69
[Epoch 597 Batch 200/372] current loss 4.30, ppl 73.91, throughput 393.37 samples/s, lr 30.86
[Epoch 597] throughput 27021.46 samples/s
[Epoch 597] time cost 84.25s, valid loss 4.28, valid ppl 71.99, lr 30.00
[Epoch 597] test loss 4.23, test ppl 68.69
[Epoch 598 Batch 200/372] current loss 4.29, ppl 73.31, throughput 399.71 samples/s, lr 28.29
[Epoch 598] throughput 27064.59 samples/s
[Epoch 598] time cost 84.15s, valid loss 4.28, valid ppl 71.99, lr 30.00
[Epoch 598] test loss 4.23, test ppl 68.69
[Epoch 599 Batch 200/372] current loss 4.31, ppl 74.55, throughput 405.46 samples/s, lr 33.00
[Epoch 599] throughput 27383.03 samples/s
[Epoch 599] time cost 83.22s, valid loss 4.28, valid ppl 71.99, lr 30.00
[Epoch 599] test loss 4.23, test ppl 68.69
[Epoch 600 Batch 200/372] current loss 4.31, ppl 74.51, throughput 404.75 samples/s, lr 32.14
[Epoch 600] throughput 27490.97 samples/s
[Epoch 600] time cost 82.96s, valid loss 4.28, valid ppl 71.99, lr 30.00
[Epoch 600] test loss 4.23, test ppl 68.68
[Epoch 601 Batch 200/372] current loss 4.31, ppl 74.30, throughput 399.50 samples/s, lr 32.14
[Epoch 601] throughput 27231.59 samples/s
[Epoch 601] time cost 83.67s, valid loss 4.28, valid ppl 71.98, lr 30.00
[Epoch 601] test loss 4.23, test ppl 68.68
[Epoch 602 Batch 200/372] current loss 4.32, ppl 74.94, throughput 398.40 samples/s, lr 30.00
[Epoch 602] throughput 27092.30 samples/s
[Epoch 602] time cost 84.06s, valid loss 4.28, valid ppl 71.98, lr 30.00
[Epoch 602] test loss 4.23, test ppl 68.68
[Epoch 603 Batch 200/372] current loss 4.30, ppl 74.04, throughput 397.80 samples/s, lr 29.57
[Epoch 603] throughput 27037.05 samples/s
[Epoch 603] time cost 84.22s, valid loss 4.28, valid ppl 71.98, lr 30.00
[Epoch 603] test loss 4.23, test ppl 68.68
[Epoch 604 Batch 200/372] current loss 4.30, ppl 73.61, throughput 392.18 samples/s, lr 30.43
[Epoch 604] throughput 26871.97 samples/s
[Epoch 604] time cost 84.73s, valid loss 4.28, valid ppl 71.98, lr 30.00
[Epoch 604] test loss 4.23, test ppl 68.68
[Epoch 605 Batch 200/372] current loss 4.30, ppl 73.93, throughput 405.76 samples/s, lr 13.29
[Epoch 605] throughput 27106.83 samples/s
[Epoch 605] time cost 83.97s, valid loss 4.28, valid ppl 71.98, lr 30.00
[Epoch 605] test loss 4.23, test ppl 68.68
[Epoch 606 Batch 200/372] current loss 4.30, ppl 73.48, throughput 397.43 samples/s, lr 33.00
[Epoch 606] throughput 27072.05 samples/s
[Epoch 606] time cost 84.05s, valid loss 4.28, valid ppl 71.97, lr 30.00
[Epoch 606] test loss 4.23, test ppl 68.67
[Epoch 607 Batch 200/372] current loss 4.30, ppl 73.46, throughput 390.46 samples/s, lr 30.00
[Epoch 607] throughput 26960.61 samples/s
[Epoch 607] time cost 84.57s, valid loss 4.28, valid ppl 71.97, lr 30.00
[Epoch 607] test loss 4.23, test ppl 68.67
[Epoch 608 Batch 200/372] current loss 4.31, ppl 74.10, throughput 401.69 samples/s, lr 27.43
[Epoch 608] throughput 27448.20 samples/s
[Epoch 608] time cost 83.05s, valid loss 4.28, valid ppl 71.97, lr 30.00
[Epoch 608] test loss 4.23, test ppl 68.67
[Epoch 609 Batch 200/372] current loss 4.31, ppl 74.56, throughput 412.02 samples/s, lr 29.14
[Epoch 609] throughput 27237.76 samples/s
[Epoch 609] time cost 83.68s, valid loss 4.28, valid ppl 71.97, lr 30.00
[Epoch 609] test loss 4.23, test ppl 68.67
[Epoch 610 Batch 200/372] current loss 4.30, ppl 73.81, throughput 404.41 samples/s, lr 33.43
[Epoch 610] throughput 27155.74 samples/s
[Epoch 610] time cost 83.85s, valid loss 4.28, valid ppl 71.97, lr 30.00
[Epoch 610] test loss 4.23, test ppl 68.67
[Epoch 611 Batch 200/372] current loss 4.29, ppl 73.12, throughput 399.99 samples/s, lr 29.14
[Epoch 611] throughput 26981.68 samples/s
[Epoch 611] time cost 84.39s, valid loss 4.28, valid ppl 71.97, lr 30.00
[Epoch 611] test loss 4.23, test ppl 68.67
[Epoch 612 Batch 200/372] current loss 4.29, ppl 72.76, throughput 395.72 samples/s, lr 29.14
[Epoch 612] throughput 27020.66 samples/s
[Epoch 612] time cost 84.30s, valid loss 4.28, valid ppl 71.96, lr 30.00
[Epoch 612] test loss 4.23, test ppl 68.67
[Epoch 613 Batch 200/372] current loss 4.30, ppl 73.87, throughput 397.57 samples/s, lr 29.14
[Epoch 613] throughput 27196.81 samples/s
[Epoch 613] time cost 83.81s, valid loss 4.28, valid ppl 71.96, lr 30.00
[Epoch 613] test loss 4.23, test ppl 68.66
[Epoch 614 Batch 200/372] current loss 4.30, ppl 73.33, throughput 401.28 samples/s, lr 28.29
[Epoch 614] throughput 27205.35 samples/s
[Epoch 614] time cost 83.73s, valid loss 4.28, valid ppl 71.96, lr 30.00
[Epoch 614] test loss 4.23, test ppl 68.66
[Epoch 615 Batch 200/372] current loss 4.29, ppl 72.68, throughput 394.53 samples/s, lr 30.43
[Epoch 615] throughput 27118.50 samples/s
[Epoch 615] time cost 84.00s, valid loss 4.28, valid ppl 71.96, lr 30.00
[Epoch 615] test loss 4.23, test ppl 68.66
[Epoch 616 Batch 200/372] current loss 4.30, ppl 73.65, throughput 397.05 samples/s, lr 28.71
[Epoch 616] throughput 27504.66 samples/s
[Epoch 616] time cost 82.90s, valid loss 4.28, valid ppl 71.96, lr 30.00
[Epoch 616] test loss 4.23, test ppl 68.66
[Epoch 617 Batch 200/372] current loss 4.31, ppl 74.66, throughput 399.37 samples/s, lr 30.86
[Epoch 617] throughput 27266.03 samples/s
[Epoch 617] time cost 83.54s, valid loss 4.28, valid ppl 71.95, lr 30.00
[Epoch 617] test loss 4.23, test ppl 68.66
[Epoch 618 Batch 200/372] current loss 4.30, ppl 73.80, throughput 396.57 samples/s, lr 32.57
[Epoch 618] throughput 27023.93 samples/s
[Epoch 618] time cost 84.29s, valid loss 4.28, valid ppl 71.95, lr 30.00
[Epoch 618] test loss 4.23, test ppl 68.66
[Epoch 619 Batch 200/372] current loss 4.30, ppl 73.92, throughput 396.00 samples/s, lr 31.71
[Epoch 619] throughput 27003.57 samples/s
[Epoch 619] time cost 84.24s, valid loss 4.28, valid ppl 71.95, lr 30.00
[Epoch 619] test loss 4.23, test ppl 68.66
[Epoch 620 Batch 200/372] current loss 4.31, ppl 74.46, throughput 397.59 samples/s, lr 31.71
[Epoch 620] throughput 27212.41 samples/s
[Epoch 620] time cost 83.67s, valid loss 4.28, valid ppl 71.95, lr 30.00
[Epoch 620] test loss 4.23, test ppl 68.65
[Epoch 621 Batch 200/372] current loss 4.31, ppl 74.09, throughput 397.45 samples/s, lr 29.14
[Epoch 621] throughput 27099.13 samples/s
[Epoch 621] time cost 83.99s, valid loss 4.28, valid ppl 71.95, lr 30.00
[Epoch 621] test loss 4.23, test ppl 68.65
[Epoch 622 Batch 200/372] current loss 4.30, ppl 73.40, throughput 397.63 samples/s, lr 29.14
[Epoch 622] throughput 26951.17 samples/s
[Epoch 622] time cost 84.42s, valid loss 4.28, valid ppl 71.94, lr 30.00
[Epoch 622] test loss 4.23, test ppl 68.65
[Epoch 623 Batch 200/372] current loss 4.30, ppl 73.69, throughput 406.21 samples/s, lr 28.71
[Epoch 623] throughput 27180.84 samples/s
[Epoch 623] time cost 83.75s, valid loss 4.28, valid ppl 71.94, lr 30.00
[Epoch 623] test loss 4.23, test ppl 68.65
[Epoch 624 Batch 200/372] current loss 4.29, ppl 72.92, throughput 397.98 samples/s, lr 28.29
[Epoch 624] throughput 27050.25 samples/s
[Epoch 624] time cost 84.20s, valid loss 4.28, valid ppl 71.94, lr 30.00
[Epoch 624] test loss 4.23, test ppl 68.65
[Epoch 625 Batch 200/372] current loss 4.32, ppl 75.25, throughput 398.32 samples/s, lr 29.57
[Epoch 625] throughput 27078.75 samples/s
[Epoch 625] time cost 84.09s, valid loss 4.28, valid ppl 71.94, lr 30.00
[Epoch 625] test loss 4.23, test ppl 68.65
[Epoch 626 Batch 200/372] current loss 4.30, ppl 73.67, throughput 400.72 samples/s, lr 31.71
[Epoch 626] throughput 27114.68 samples/s
[Epoch 626] time cost 83.88s, valid loss 4.28, valid ppl 71.94, lr 30.00
[Epoch 626] test loss 4.23, test ppl 68.65
[Epoch 627 Batch 200/372] current loss 4.31, ppl 74.35, throughput 392.57 samples/s, lr 27.43
[Epoch 627] throughput 27015.11 samples/s
[Epoch 627] time cost 84.21s, valid loss 4.28, valid ppl 71.93, lr 30.00
[Epoch 627] test loss 4.23, test ppl 68.64
[Epoch 628 Batch 200/372] current loss 4.30, ppl 73.78, throughput 394.12 samples/s, lr 30.86
[Epoch 628] throughput 27230.02 samples/s
[Epoch 628] time cost 83.68s, valid loss 4.28, valid ppl 71.93, lr 30.00
[Epoch 628] test loss 4.23, test ppl 68.64
[Epoch 629 Batch 200/372] current loss 4.30, ppl 73.45, throughput 396.09 samples/s, lr 14.57
[Epoch 629] throughput 27026.01 samples/s
[Epoch 629] time cost 84.26s, valid loss 4.28, valid ppl 71.93, lr 30.00
[Epoch 629] test loss 4.23, test ppl 68.64
[Epoch 630 Batch 200/372] current loss 4.31, ppl 74.14, throughput 391.89 samples/s, lr 34.71
[Epoch 630] throughput 27012.28 samples/s
[Epoch 630] time cost 84.29s, valid loss 4.28, valid ppl 71.93, lr 30.00
[Epoch 630] test loss 4.23, test ppl 68.64
[Epoch 631 Batch 200/372] current loss 4.31, ppl 74.11, throughput 392.12 samples/s, lr 27.86
[Epoch 631] throughput 27024.86 samples/s
[Epoch 631] time cost 84.25s, valid loss 4.28, valid ppl 71.93, lr 30.00
[Epoch 631] test loss 4.23, test ppl 68.64
[Epoch 632 Batch 200/372] current loss 4.30, ppl 73.93, throughput 388.81 samples/s, lr 30.43
[Epoch 632] throughput 26873.57 samples/s
[Epoch 632] time cost 84.74s, valid loss 4.28, valid ppl 71.93, lr 30.00
[Epoch 632] test loss 4.23, test ppl 68.64
[Epoch 633 Batch 200/372] current loss 4.30, ppl 73.67, throughput 397.09 samples/s, lr 27.86
[Epoch 633] throughput 27331.92 samples/s
[Epoch 633] time cost 83.30s, valid loss 4.28, valid ppl 71.92, lr 30.00
[Epoch 633] test loss 4.23, test ppl 68.64
[Epoch 634 Batch 200/372] current loss 4.30, ppl 73.93, throughput 397.42 samples/s, lr 30.43
[Epoch 634] throughput 27235.48 samples/s
[Epoch 634] time cost 83.67s, valid loss 4.28, valid ppl 71.92, lr 30.00
[Epoch 634] test loss 4.23, test ppl 68.63
[Epoch 635 Batch 200/372] current loss 4.30, ppl 73.34, throughput 403.70 samples/s, lr 29.14
[Epoch 635] throughput 27232.92 samples/s
[Epoch 635] time cost 83.65s, valid loss 4.28, valid ppl 71.92, lr 30.00
[Epoch 635] test loss 4.23, test ppl 68.63
[Epoch 636 Batch 200/372] current loss 4.31, ppl 74.54, throughput 398.15 samples/s, lr 29.57
[Epoch 636] throughput 27284.27 samples/s
[Epoch 636] time cost 83.57s, valid loss 4.28, valid ppl 71.92, lr 30.00
[Epoch 636] test loss 4.23, test ppl 68.63
[Epoch 637 Batch 200/372] current loss 4.30, ppl 73.39, throughput 393.66 samples/s, lr 26.57
[Epoch 637] throughput 27217.58 samples/s
[Epoch 637] time cost 83.71s, valid loss 4.28, valid ppl 71.92, lr 30.00
[Epoch 637] test loss 4.23, test ppl 68.63
[Epoch 638 Batch 200/372] current loss 4.29, ppl 73.20, throughput 395.63 samples/s, lr 32.57
[Epoch 638] throughput 26997.40 samples/s
[Epoch 638] time cost 84.25s, valid loss 4.28, valid ppl 71.92, lr 30.00
[Epoch 638] test loss 4.23, test ppl 68.63
[Epoch 639 Batch 200/372] current loss 4.30, ppl 73.44, throughput 398.79 samples/s, lr 27.86
[Epoch 639] throughput 27073.29 samples/s
[Epoch 639] time cost 84.10s, valid loss 4.28, valid ppl 71.91, lr 30.00
[Epoch 639] test loss 4.23, test ppl 68.63
[Epoch 640 Batch 200/372] current loss 4.29, ppl 72.72, throughput 404.56 samples/s, lr 32.14
[Epoch 640] throughput 27131.13 samples/s
[Epoch 640] time cost 84.02s, valid loss 4.28, valid ppl 71.91, lr 30.00
[Epoch 640] test loss 4.23, test ppl 68.63
[Epoch 641 Batch 200/372] current loss 4.29, ppl 73.21, throughput 391.59 samples/s, lr 29.14
[Epoch 641] throughput 27173.85 samples/s
[Epoch 641] time cost 83.85s, valid loss 4.28, valid ppl 71.91, lr 30.00
[Epoch 641] test loss 4.23, test ppl 68.63
[Epoch 642 Batch 200/372] current loss 4.30, ppl 73.92, throughput 393.25 samples/s, lr 32.57
[Epoch 642] throughput 27093.75 samples/s
[Epoch 642] time cost 83.99s, valid loss 4.28, valid ppl 71.91, lr 30.00
[Epoch 642] test loss 4.23, test ppl 68.63
[Epoch 643 Batch 200/372] current loss 4.30, ppl 73.59, throughput 407.86 samples/s, lr 27.86
[Epoch 643] throughput 27421.86 samples/s
[Epoch 643] time cost 83.09s, valid loss 4.28, valid ppl 71.91, lr 30.00
[Epoch 643] test loss 4.23, test ppl 68.62
[Epoch 644 Batch 200/372] current loss 4.30, ppl 73.62, throughput 398.79 samples/s, lr 30.43
[Epoch 644] throughput 27087.49 samples/s
[Epoch 644] time cost 84.07s, valid loss 4.28, valid ppl 71.91, lr 30.00
[Epoch 644] test loss 4.23, test ppl 68.62
[Epoch 645 Batch 200/372] current loss 4.31, ppl 74.32, throughput 389.60 samples/s, lr 30.00
[Epoch 645] throughput 27043.81 samples/s
[Epoch 645] time cost 84.16s, valid loss 4.28, valid ppl 71.90, lr 30.00
[Epoch 645] test loss 4.23, test ppl 68.62
[Epoch 646 Batch 200/372] current loss 4.29, ppl 73.17, throughput 397.48 samples/s, lr 30.43
[Epoch 646] throughput 27087.27 samples/s
[Epoch 646] time cost 84.07s, valid loss 4.28, valid ppl 71.90, lr 30.00
[Epoch 646] test loss 4.23, test ppl 68.62
[Epoch 647 Batch 200/372] current loss 4.30, ppl 73.45, throughput 398.90 samples/s, lr 30.86
[Epoch 647] throughput 26881.62 samples/s
[Epoch 647] time cost 84.67s, valid loss 4.28, valid ppl 71.90, lr 30.00
[Epoch 647] test loss 4.23, test ppl 68.62
[Epoch 648 Batch 200/372] current loss 4.29, ppl 73.28, throughput 398.49 samples/s, lr 33.43
[Epoch 648] throughput 27197.79 samples/s
[Epoch 648] time cost 83.76s, valid loss 4.28, valid ppl 71.90, lr 30.00
[Epoch 648] test loss 4.23, test ppl 68.62
[Epoch 649 Batch 200/372] current loss 4.29, ppl 72.79, throughput 403.38 samples/s, lr 31.71
[Epoch 649] throughput 27286.53 samples/s
[Epoch 649] time cost 83.56s, valid loss 4.28, valid ppl 71.90, lr 30.00
[Epoch 649] test loss 4.23, test ppl 68.62
[Epoch 650 Batch 200/372] current loss 4.29, ppl 73.13, throughput 394.42 samples/s, lr 29.14
[Epoch 650] throughput 27134.28 samples/s
[Epoch 650] time cost 83.89s, valid loss 4.28, valid ppl 71.90, lr 30.00
[Epoch 650] test loss 4.23, test ppl 68.62
[Epoch 651 Batch 200/372] current loss 4.30, ppl 73.50, throughput 399.47 samples/s, lr 26.57
[Epoch 651] throughput 26879.74 samples/s
[Epoch 651] time cost 84.69s, valid loss 4.28, valid ppl 71.89, lr 30.00
[Epoch 651] test loss 4.23, test ppl 68.61
[Epoch 652 Batch 200/372] current loss 4.31, ppl 74.36, throughput 388.74 samples/s, lr 28.71
[Epoch 652] throughput 27080.03 samples/s
[Epoch 652] time cost 84.06s, valid loss 4.28, valid ppl 71.89, lr 30.00
[Epoch 652] test loss 4.23, test ppl 68.61
[Epoch 653 Batch 200/372] current loss 4.29, ppl 73.09, throughput 401.77 samples/s, lr 27.43
[Epoch 653] throughput 27142.98 samples/s
[Epoch 653] time cost 84.01s, valid loss 4.28, valid ppl 71.89, lr 30.00
[Epoch 653] test loss 4.23, test ppl 68.61
[Epoch 654 Batch 200/372] current loss 4.30, ppl 73.67, throughput 408.84 samples/s, lr 27.86
[Epoch 654] throughput 27341.89 samples/s
[Epoch 654] time cost 83.30s, valid loss 4.28, valid ppl 71.89, lr 30.00
[Epoch 654] test loss 4.23, test ppl 68.61
[Epoch 655 Batch 200/372] current loss 4.31, ppl 74.61, throughput 404.36 samples/s, lr 30.00
[Epoch 655] throughput 27120.62 samples/s
[Epoch 655] time cost 83.88s, valid loss 4.28, valid ppl 71.89, lr 30.00
[Epoch 655] test loss 4.23, test ppl 68.61
[Epoch 656 Batch 200/372] current loss 4.30, ppl 73.49, throughput 402.57 samples/s, lr 29.57
[Epoch 656] throughput 27159.95 samples/s
[Epoch 656] time cost 83.86s, valid loss 4.28, valid ppl 71.89, lr 30.00
[Epoch 656] test loss 4.23, test ppl 68.61
[Epoch 657 Batch 200/372] current loss 4.30, ppl 74.02, throughput 398.52 samples/s, lr 30.43
[Epoch 657] throughput 27037.18 samples/s
[Epoch 657] time cost 84.25s, valid loss 4.28, valid ppl 71.89, lr 30.00
[Epoch 657] test loss 4.23, test ppl 68.61
[Epoch 658 Batch 200/372] current loss 4.29, ppl 73.27, throughput 393.52 samples/s, lr 31.71
[Epoch 658] throughput 26995.88 samples/s
[Epoch 658] time cost 84.36s, valid loss 4.28, valid ppl 71.89, lr 30.00
[Epoch 658] test loss 4.23, test ppl 68.61
[Epoch 659 Batch 200/372] current loss 4.31, ppl 74.69, throughput 397.03 samples/s, lr 30.43
[Epoch 659] throughput 26704.59 samples/s
[Epoch 659] time cost 85.16s, valid loss 4.28, valid ppl 71.88, lr 30.00
[Epoch 659] test loss 4.23, test ppl 68.61
[Epoch 660 Batch 200/372] current loss 4.29, ppl 73.01, throughput 402.63 samples/s, lr 33.43
[Epoch 660] throughput 27132.47 samples/s
[Epoch 660] time cost 83.99s, valid loss 4.28, valid ppl 71.88, lr 30.00
[Epoch 660] test loss 4.23, test ppl 68.61
[Epoch 661 Batch 200/372] current loss 4.29, ppl 73.02, throughput 398.01 samples/s, lr 33.43
[Epoch 661] throughput 26974.11 samples/s
[Epoch 661] time cost 84.38s, valid loss 4.28, valid ppl 71.88, lr 30.00
[Epoch 661] test loss 4.23, test ppl 68.61
[Epoch 662 Batch 200/372] current loss 4.29, ppl 72.98, throughput 401.01 samples/s, lr 28.71
[Epoch 662] throughput 27212.28 samples/s
[Epoch 662] time cost 83.71s, valid loss 4.28, valid ppl 71.88, lr 30.00
[Epoch 662] test loss 4.23, test ppl 68.61
[Epoch 663 Batch 200/372] current loss 4.30, ppl 73.57, throughput 398.54 samples/s, lr 31.71
[Epoch 663] throughput 27317.58 samples/s
[Epoch 663] time cost 83.45s, valid loss 4.27, valid ppl 71.88, lr 30.00
[Epoch 663] test loss 4.23, test ppl 68.61
[Epoch 664 Batch 200/372] current loss 4.30, ppl 73.41, throughput 396.11 samples/s, lr 30.43
[Epoch 664] throughput 26875.06 samples/s
[Epoch 664] time cost 84.69s, valid loss 4.27, valid ppl 71.88, lr 30.00
[Epoch 664] test loss 4.23, test ppl 68.60
[Epoch 665 Batch 200/372] current loss 4.29, ppl 72.95, throughput 391.50 samples/s, lr 28.71
[Epoch 665] throughput 26855.62 samples/s
[Epoch 665] time cost 84.70s, valid loss 4.27, valid ppl 71.88, lr 30.00
[Epoch 665] test loss 4.23, test ppl 68.60
[Epoch 666 Batch 200/372] current loss 4.28, ppl 72.54, throughput 385.20 samples/s, lr 27.86
[Epoch 666] throughput 26917.27 samples/s
[Epoch 666] time cost 84.52s, valid loss 4.27, valid ppl 71.88, lr 30.00
[Epoch 666] test loss 4.23, test ppl 68.60
[Epoch 667 Batch 200/372] current loss 4.29, ppl 73.13, throughput 395.01 samples/s, lr 28.71
[Epoch 667] throughput 27189.37 samples/s
[Epoch 667] time cost 83.77s, valid loss 4.27, valid ppl 71.87, lr 30.00
[Epoch 667] test loss 4.23, test ppl 68.60
[Epoch 668 Batch 200/372] current loss 4.29, ppl 72.92, throughput 397.75 samples/s, lr 30.86
[Epoch 668] throughput 26933.94 samples/s
[Epoch 668] time cost 84.45s, valid loss 4.27, valid ppl 71.87, lr 30.00
[Epoch 668] test loss 4.23, test ppl 68.60
[Epoch 669 Batch 200/372] current loss 4.30, ppl 73.36, throughput 393.56 samples/s, lr 31.29
[Epoch 669] throughput 27215.17 samples/s
[Epoch 669] time cost 83.67s, valid loss 4.27, valid ppl 71.87, lr 30.00
[Epoch 669] test loss 4.23, test ppl 68.60
[Epoch 670 Batch 200/372] current loss 4.30, ppl 73.52, throughput 398.65 samples/s, lr 33.86
[Epoch 670] throughput 26970.43 samples/s
[Epoch 670] time cost 84.40s, valid loss 4.27, valid ppl 71.87, lr 30.00
[Epoch 670] test loss 4.23, test ppl 68.60
[Epoch 671 Batch 200/372] current loss 4.29, ppl 72.92, throughput 396.26 samples/s, lr 28.29
[Epoch 671] throughput 26939.31 samples/s
[Epoch 671] time cost 84.43s, valid loss 4.27, valid ppl 71.87, lr 30.00
[Epoch 671] test loss 4.23, test ppl 68.60
[Epoch 672 Batch 200/372] current loss 4.29, ppl 72.64, throughput 405.49 samples/s, lr 31.29
[Epoch 672] throughput 27242.72 samples/s
[Epoch 672] time cost 83.60s, valid loss 4.27, valid ppl 71.87, lr 30.00
[Epoch 672] test loss 4.23, test ppl 68.60
[Epoch 673 Batch 200/372] current loss 4.29, ppl 73.20, throughput 396.00 samples/s, lr 25.29
[Epoch 673] throughput 27042.11 samples/s
[Epoch 673] time cost 84.25s, valid loss 4.27, valid ppl 71.87, lr 30.00
[Epoch 673] test loss 4.23, test ppl 68.60
[Epoch 674 Batch 200/372] current loss 4.30, ppl 73.57, throughput 396.02 samples/s, lr 32.14
[Epoch 674] throughput 26992.34 samples/s
[Epoch 674] time cost 84.35s, valid loss 4.27, valid ppl 71.87, lr 30.00
[Epoch 674] test loss 4.23, test ppl 68.60
[Epoch 675 Batch 200/372] current loss 4.30, ppl 73.89, throughput 405.29 samples/s, lr 30.00
[Epoch 675] throughput 27247.89 samples/s
[Epoch 675] time cost 83.66s, valid loss 4.27, valid ppl 71.86, lr 30.00
[Epoch 675] test loss 4.23, test ppl 68.60
[Epoch 676 Batch 200/372] current loss 4.29, ppl 73.16, throughput 401.04 samples/s, lr 31.71
[Epoch 676] throughput 27289.31 samples/s
[Epoch 676] time cost 83.50s, valid loss 4.27, valid ppl 71.86, lr 30.00
[Epoch 676] test loss 4.23, test ppl 68.59
[Epoch 677 Batch 200/372] current loss 4.30, ppl 73.38, throughput 396.05 samples/s, lr 28.29
[Epoch 677] throughput 27129.70 samples/s
[Epoch 677] time cost 83.98s, valid loss 4.27, valid ppl 71.86, lr 30.00
[Epoch 677] test loss 4.23, test ppl 68.59
[Epoch 678 Batch 200/372] current loss 4.31, ppl 74.52, throughput 399.86 samples/s, lr 30.86
[Epoch 678] throughput 26848.26 samples/s
[Epoch 678] time cost 84.74s, valid loss 4.27, valid ppl 71.86, lr 30.00
[Epoch 678] test loss 4.23, test ppl 68.59
[Epoch 679 Batch 200/372] current loss 4.29, ppl 73.21, throughput 394.61 samples/s, lr 28.29
[Epoch 679] throughput 26908.41 samples/s
[Epoch 679] time cost 84.56s, valid loss 4.27, valid ppl 71.86, lr 30.00
[Epoch 679] test loss 4.23, test ppl 68.59
[Epoch 680 Batch 200/372] current loss 4.30, ppl 73.42, throughput 397.75 samples/s, lr 32.57
[Epoch 680] throughput 26989.51 samples/s
[Epoch 680] time cost 84.35s, valid loss 4.27, valid ppl 71.86, lr 30.00
[Epoch 680] test loss 4.23, test ppl 68.59
[Epoch 681 Batch 200/372] current loss 4.30, ppl 73.36, throughput 397.39 samples/s, lr 28.71
[Epoch 681] throughput 27224.38 samples/s
[Epoch 681] time cost 83.70s, valid loss 4.27, valid ppl 71.86, lr 30.00
[Epoch 681] test loss 4.23, test ppl 68.59
[Epoch 682 Batch 200/372] current loss 4.28, ppl 72.34, throughput 406.31 samples/s, lr 27.86
[Epoch 682] throughput 27227.50 samples/s
[Epoch 682] time cost 83.62s, valid loss 4.27, valid ppl 71.86, lr 30.00
[Epoch 682] test loss 4.23, test ppl 68.59
[Epoch 683 Batch 200/372] current loss 4.29, ppl 73.05, throughput 400.21 samples/s, lr 27.00
[Epoch 683] throughput 26952.77 samples/s
[Epoch 683] time cost 84.46s, valid loss 4.27, valid ppl 71.86, lr 30.00
[Epoch 683] test loss 4.23, test ppl 68.59
[Epoch 684 Batch 200/372] current loss 4.30, ppl 73.79, throughput 400.74 samples/s, lr 31.29
[Epoch 684] throughput 27354.00 samples/s
[Epoch 684] time cost 83.33s, valid loss 4.27, valid ppl 71.85, lr 30.00
[Epoch 684] test loss 4.23, test ppl 68.59
[Epoch 685 Batch 200/372] current loss 4.29, ppl 72.89, throughput 397.32 samples/s, lr 30.43
[Epoch 685] throughput 27374.11 samples/s
[Epoch 685] time cost 83.30s, valid loss 4.27, valid ppl 71.85, lr 30.00
[Epoch 685] test loss 4.23, test ppl 68.59
[Epoch 686 Batch 200/372] current loss 4.30, ppl 73.68, throughput 393.53 samples/s, lr 29.14
[Epoch 686] throughput 26913.46 samples/s
[Epoch 686] time cost 84.55s, valid loss 4.27, valid ppl 71.85, lr 30.00
[Epoch 686] test loss 4.23, test ppl 68.59
[Epoch 687 Batch 200/372] current loss 4.29, ppl 72.78, throughput 409.13 samples/s, lr 29.14
[Epoch 687] throughput 27386.58 samples/s
[Epoch 687] time cost 83.16s, valid loss 4.27, valid ppl 71.85, lr 30.00
[Epoch 687] test loss 4.23, test ppl 68.59
[Epoch 688 Batch 200/372] current loss 4.30, ppl 73.55, throughput 393.69 samples/s, lr 29.57
[Epoch 688] throughput 27115.14 samples/s
[Epoch 688] time cost 84.03s, valid loss 4.27, valid ppl 71.85, lr 30.00
[Epoch 688] test loss 4.23, test ppl 68.59
[Epoch 689 Batch 200/372] current loss 4.29, ppl 72.83, throughput 406.72 samples/s, lr 30.00
[Epoch 689] throughput 26888.55 samples/s
[Epoch 689] time cost 84.62s, valid loss 4.27, valid ppl 71.85, lr 30.00
[Epoch 689] test loss 4.23, test ppl 68.59
[Epoch 690 Batch 200/372] current loss 4.27, ppl 71.88, throughput 410.13 samples/s, lr 30.86
[Epoch 690] throughput 27426.96 samples/s
[Epoch 690] time cost 83.11s, valid loss 4.27, valid ppl 71.85, lr 30.00
[Epoch 690] test loss 4.23, test ppl 68.59
[Epoch 691 Batch 200/372] current loss 4.29, ppl 73.02, throughput 398.44 samples/s, lr 27.43
[Epoch 691] throughput 27161.33 samples/s
[Epoch 691] time cost 83.86s, valid loss 4.27, valid ppl 71.85, lr 30.00
[Epoch 691] test loss 4.23, test ppl 68.59
[Epoch 692 Batch 200/372] current loss 4.29, ppl 72.90, throughput 400.79 samples/s, lr 28.71
[Epoch 692] throughput 27210.53 samples/s
[Epoch 692] time cost 83.73s, valid loss 4.27, valid ppl 71.84, lr 30.00
[Epoch 692] test loss 4.23, test ppl 68.58
[Epoch 693 Batch 200/372] current loss 4.28, ppl 72.48, throughput 393.59 samples/s, lr 32.14
[Epoch 693] throughput 27103.78 samples/s
[Epoch 693] time cost 83.93s, valid loss 4.27, valid ppl 71.84, lr 30.00
[Epoch 693] test loss 4.23, test ppl 68.58
[Epoch 694 Batch 200/372] current loss 4.30, ppl 73.37, throughput 398.71 samples/s, lr 13.29
[Epoch 694] throughput 27024.49 samples/s
[Epoch 694] time cost 84.32s, valid loss 4.27, valid ppl 71.84, lr 30.00
[Epoch 694] test loss 4.23, test ppl 68.58
[Epoch 695 Batch 200/372] current loss 4.30, ppl 73.79, throughput 390.43 samples/s, lr 27.86
[Epoch 695] throughput 26892.33 samples/s
[Epoch 695] time cost 84.69s, valid loss 4.27, valid ppl 71.84, lr 30.00
[Epoch 695] test loss 4.23, test ppl 68.58
[Epoch 696 Batch 200/372] current loss 4.30, ppl 73.85, throughput 407.70 samples/s, lr 31.29
[Epoch 696] throughput 27316.22 samples/s
[Epoch 696] time cost 83.45s, valid loss 4.27, valid ppl 71.84, lr 30.00
[Epoch 696] test loss 4.23, test ppl 68.58
[Epoch 697 Batch 200/372] current loss 4.30, ppl 73.48, throughput 392.95 samples/s, lr 30.86
[Epoch 697] throughput 27025.62 samples/s
[Epoch 697] time cost 84.24s, valid loss 4.27, valid ppl 71.84, lr 30.00
[Epoch 697] test loss 4.23, test ppl 68.58
[Epoch 698 Batch 200/372] current loss 4.29, ppl 72.65, throughput 397.33 samples/s, lr 30.86
[Epoch 698] throughput 26996.80 samples/s
[Epoch 698] time cost 84.38s, valid loss 4.27, valid ppl 71.84, lr 30.00
[Epoch 698] test loss 4.23, test ppl 68.58
[Epoch 699 Batch 200/372] current loss 4.30, ppl 73.37, throughput 398.94 samples/s, lr 29.14
[Epoch 699] throughput 27400.68 samples/s
[Epoch 699] time cost 83.21s, valid loss 4.27, valid ppl 71.84, lr 30.00
[Epoch 699] test loss 4.23, test ppl 68.58
[Epoch 700 Batch 200/372] current loss 4.28, ppl 72.11, throughput 397.20 samples/s, lr 29.57
[Epoch 700] throughput 27130.63 samples/s
[Epoch 700] time cost 83.91s, valid loss 4.27, valid ppl 71.83, lr 30.00
[Epoch 700] test loss 4.23, test ppl 68.58
[Epoch 701 Batch 200/372] current loss 4.29, ppl 72.67, throughput 406.91 samples/s, lr 29.57
[Epoch 701] throughput 27272.56 samples/s
[Epoch 701] time cost 83.56s, valid loss 4.27, valid ppl 71.83, lr 30.00
[Epoch 701] test loss 4.23, test ppl 68.58
[Epoch 702 Batch 200/372] current loss 4.30, ppl 73.57, throughput 404.54 samples/s, lr 29.57
[Epoch 702] throughput 27255.44 samples/s
[Epoch 702] time cost 83.57s, valid loss 4.27, valid ppl 71.83, lr 30.00
[Epoch 702] test loss 4.23, test ppl 68.58
[Epoch 703 Batch 200/372] current loss 4.30, ppl 73.71, throughput 397.19 samples/s, lr 29.14
[Epoch 703] throughput 26782.93 samples/s
[Epoch 703] time cost 85.00s, valid loss 4.27, valid ppl 71.83, lr 30.00
[Epoch 703] test loss 4.23, test ppl 68.57
[Epoch 704 Batch 200/372] current loss 4.29, ppl 73.12, throughput 391.61 samples/s, lr 28.29
[Epoch 704] throughput 26920.00 samples/s
[Epoch 704] time cost 84.55s, valid loss 4.27, valid ppl 71.83, lr 30.00
[Epoch 704] test loss 4.23, test ppl 68.57
[Epoch 705 Batch 200/372] current loss 4.29, ppl 73.21, throughput 395.84 samples/s, lr 29.57
[Epoch 705] throughput 27279.45 samples/s
[Epoch 705] time cost 83.57s, valid loss 4.27, valid ppl 71.83, lr 30.00
[Epoch 705] test loss 4.23, test ppl 68.57
[Epoch 706 Batch 200/372] current loss 4.30, ppl 73.34, throughput 402.09 samples/s, lr 28.71
[Epoch 706] throughput 26995.71 samples/s
[Epoch 706] time cost 84.28s, valid loss 4.27, valid ppl 71.83, lr 30.00
[Epoch 706] test loss 4.23, test ppl 68.57
[Epoch 707 Batch 200/372] current loss 4.29, ppl 73.20, throughput 399.90 samples/s, lr 30.86
[Epoch 707] throughput 27127.40 samples/s
[Epoch 707] time cost 83.90s, valid loss 4.27, valid ppl 71.82, lr 30.00
[Epoch 707] test loss 4.23, test ppl 68.57
[Epoch 708 Batch 200/372] current loss 4.28, ppl 72.20, throughput 393.62 samples/s, lr 29.14
[Epoch 708] throughput 27085.02 samples/s
[Epoch 708] time cost 84.07s, valid loss 4.27, valid ppl 71.82, lr 30.00
[Epoch 708] test loss 4.23, test ppl 68.57
[Epoch 709 Batch 200/372] current loss 4.30, ppl 73.40, throughput 388.03 samples/s, lr 15.86
[Epoch 709] throughput 26987.28 samples/s
[Epoch 709] time cost 84.35s, valid loss 4.27, valid ppl 71.82, lr 30.00
[Epoch 709] test loss 4.23, test ppl 68.57
[Epoch 710 Batch 200/372] current loss 4.29, ppl 72.89, throughput 404.59 samples/s, lr 30.86
[Epoch 710] throughput 27129.39 samples/s
[Epoch 710] time cost 83.95s, valid loss 4.27, valid ppl 71.82, lr 30.00
[Epoch 710] test loss 4.23, test ppl 68.57
[Epoch 711 Batch 200/372] current loss 4.29, ppl 72.80, throughput 401.99 samples/s, lr 31.29
[Epoch 711] throughput 27543.97 samples/s
[Epoch 711] time cost 82.82s, valid loss 4.27, valid ppl 71.82, lr 30.00
[Epoch 711] test loss 4.23, test ppl 68.57
[Epoch 712 Batch 200/372] current loss 4.27, ppl 71.78, throughput 404.11 samples/s, lr 30.43
[Epoch 712] throughput 27304.66 samples/s
[Epoch 712] time cost 83.47s, valid loss 4.27, valid ppl 71.82, lr 30.00
[Epoch 712] test loss 4.23, test ppl 68.57
[Epoch 713 Batch 200/372] current loss 4.30, ppl 73.46, throughput 398.58 samples/s, lr 29.14
[Epoch 713] throughput 27119.76 samples/s
[Epoch 713] time cost 84.03s, valid loss 4.27, valid ppl 71.82, lr 30.00
[Epoch 713] test loss 4.23, test ppl 68.57
[Epoch 714 Batch 200/372] current loss 4.29, ppl 73.00, throughput 400.29 samples/s, lr 30.86
[Epoch 714] throughput 27185.86 samples/s
[Epoch 714] time cost 83.77s, valid loss 4.27, valid ppl 71.82, lr 30.00
[Epoch 714] test loss 4.23, test ppl 68.57
[Epoch 715 Batch 200/372] current loss 4.31, ppl 74.41, throughput 397.94 samples/s, lr 28.29
[Epoch 715] throughput 27130.29 samples/s
[Epoch 715] time cost 83.93s, valid loss 4.27, valid ppl 71.82, lr 30.00
[Epoch 715] test loss 4.23, test ppl 68.57
[Epoch 716 Batch 200/372] current loss 4.30, ppl 73.57, throughput 398.75 samples/s, lr 30.43
[Epoch 716] throughput 27500.63 samples/s
[Epoch 716] time cost 82.91s, valid loss 4.27, valid ppl 71.81, lr 30.00
[Epoch 716] test loss 4.23, test ppl 68.57
[Epoch 717 Batch 200/372] current loss 4.29, ppl 72.95, throughput 405.54 samples/s, lr 30.00
[Epoch 717] throughput 27069.58 samples/s
[Epoch 717] time cost 84.10s, valid loss 4.27, valid ppl 71.81, lr 30.00
[Epoch 717] test loss 4.23, test ppl 68.57
[Epoch 718 Batch 200/372] current loss 4.29, ppl 72.64, throughput 404.61 samples/s, lr 29.14
[Epoch 718] throughput 27104.33 samples/s
[Epoch 718] time cost 83.97s, valid loss 4.27, valid ppl 71.81, lr 30.00
[Epoch 718] test loss 4.23, test ppl 68.57
[Epoch 719 Batch 200/372] current loss 4.28, ppl 72.45, throughput 397.27 samples/s, lr 31.29
[Epoch 719] throughput 27006.16 samples/s
[Epoch 719] time cost 84.30s, valid loss 4.27, valid ppl 71.81, lr 30.00
[Epoch 719] test loss 4.23, test ppl 68.56
[Epoch 720 Batch 200/372] current loss 4.28, ppl 72.42, throughput 402.24 samples/s, lr 29.57
[Epoch 720] throughput 27190.25 samples/s
[Epoch 720] time cost 83.78s, valid loss 4.27, valid ppl 71.81, lr 30.00
[Epoch 720] test loss 4.23, test ppl 68.56
[Epoch 721 Batch 200/372] current loss 4.28, ppl 72.36, throughput 397.48 samples/s, lr 15.43
[Epoch 721] throughput 27080.07 samples/s
[Epoch 721] time cost 84.09s, valid loss 4.27, valid ppl 71.81, lr 30.00
[Epoch 721] test loss 4.23, test ppl 68.56
[Epoch 722 Batch 200/372] current loss 4.29, ppl 72.72, throughput 396.65 samples/s, lr 28.71
[Epoch 722] throughput 27162.59 samples/s
[Epoch 722] time cost 83.86s, valid loss 4.27, valid ppl 71.81, lr 30.00
[Epoch 722] test loss 4.23, test ppl 68.56
[Epoch 723 Batch 200/372] current loss 4.29, ppl 72.73, throughput 399.72 samples/s, lr 33.43
[Epoch 723] throughput 27359.51 samples/s
[Epoch 723] time cost 83.27s, valid loss 4.27, valid ppl 71.81, lr 30.00
[Epoch 723] test loss 4.23, test ppl 68.56
[Epoch 724 Batch 200/372] current loss 4.28, ppl 72.43, throughput 411.57 samples/s, lr 28.71
[Epoch 724] throughput 27465.23 samples/s
[Epoch 724] time cost 83.01s, valid loss 4.27, valid ppl 71.81, lr 30.00
[Epoch 724] test loss 4.23, test ppl 68.56
[Epoch 725 Batch 200/372] current loss 4.28, ppl 72.27, throughput 404.73 samples/s, lr 29.14
[Epoch 725] throughput 27362.02 samples/s
[Epoch 725] time cost 83.25s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 725] test loss 4.23, test ppl 68.56
[Epoch 726 Batch 200/372] current loss 4.29, ppl 73.25, throughput 397.03 samples/s, lr 31.29
[Epoch 726] throughput 27250.57 samples/s
[Epoch 726] time cost 83.58s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 726] test loss 4.23, test ppl 68.56
[Epoch 727 Batch 200/372] current loss 4.29, ppl 73.21, throughput 396.11 samples/s, lr 32.57
[Epoch 727] throughput 27172.54 samples/s
[Epoch 727] time cost 83.80s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 727] test loss 4.23, test ppl 68.56
[Epoch 728 Batch 200/372] current loss 4.29, ppl 72.66, throughput 403.99 samples/s, lr 33.00
[Epoch 728] throughput 27296.80 samples/s
[Epoch 728] time cost 83.49s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 728] test loss 4.23, test ppl 68.56
[Epoch 729 Batch 200/372] current loss 4.28, ppl 72.04, throughput 398.29 samples/s, lr 32.57
[Epoch 729] throughput 27210.38 samples/s
[Epoch 729] time cost 83.76s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 729] test loss 4.23, test ppl 68.56
[Epoch 730 Batch 200/372] current loss 4.29, ppl 73.29, throughput 394.08 samples/s, lr 33.00
[Epoch 730] throughput 27367.21 samples/s
[Epoch 730] time cost 83.31s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 730] test loss 4.23, test ppl 68.56
[Epoch 731 Batch 200/372] current loss 4.29, ppl 72.85, throughput 391.47 samples/s, lr 31.71
[Epoch 731] throughput 27066.04 samples/s
[Epoch 731] time cost 84.14s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 731] test loss 4.23, test ppl 68.56
[Epoch 732 Batch 200/372] current loss 4.28, ppl 72.20, throughput 395.45 samples/s, lr 26.57
[Epoch 732] throughput 27432.18 samples/s
[Epoch 732] time cost 83.08s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 732] test loss 4.23, test ppl 68.56
[Epoch 733 Batch 200/372] current loss 4.30, ppl 73.48, throughput 396.41 samples/s, lr 31.29
[Epoch 733] throughput 26885.63 samples/s
[Epoch 733] time cost 84.70s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 733] test loss 4.23, test ppl 68.56
[Epoch 734 Batch 200/372] current loss 4.27, ppl 71.62, throughput 402.62 samples/s, lr 14.14
[Epoch 734] throughput 27123.54 samples/s
[Epoch 734] time cost 83.96s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 734] test loss 4.23, test ppl 68.56
[Epoch 735 Batch 200/372] current loss 4.29, ppl 73.00, throughput 398.97 samples/s, lr 30.00
[Epoch 735] throughput 27407.96 samples/s
[Epoch 735] time cost 83.21s, valid loss 4.27, valid ppl 71.80, lr 30.00
[Epoch 735] test loss 4.23, test ppl 68.56
[Epoch 736 Batch 200/372] current loss 4.29, ppl 72.86, throughput 404.83 samples/s, lr 29.14
[Epoch 736] throughput 27541.53 samples/s
[Epoch 736] time cost 82.82s, valid loss 4.27, valid ppl 71.79, lr 30.00
[Epoch 736] test loss 4.23, test ppl 68.56
[Epoch 737 Batch 200/372] current loss 4.28, ppl 72.24, throughput 400.77 samples/s, lr 29.57
[Epoch 737] throughput 27253.65 samples/s
[Epoch 737] time cost 83.52s, valid loss 4.27, valid ppl 71.79, lr 30.00
[Epoch 737] test loss 4.23, test ppl 68.55
[Epoch 738 Batch 200/372] current loss 4.28, ppl 72.03, throughput 404.60 samples/s, lr 33.43
[Epoch 738] throughput 27461.44 samples/s
[Epoch 738] time cost 83.01s, valid loss 4.27, valid ppl 71.79, lr 30.00
[Epoch 738] test loss 4.23, test ppl 68.55
[Epoch 739 Batch 200/372] current loss 4.28, ppl 72.20, throughput 398.14 samples/s, lr 28.29
[Epoch 739] throughput 26818.15 samples/s
[Epoch 739] time cost 84.84s, valid loss 4.27, valid ppl 71.79, lr 30.00
[Epoch 739] test loss 4.23, test ppl 68.55
[Epoch 740 Batch 200/372] current loss 4.27, ppl 71.49, throughput 398.59 samples/s, lr 30.00
[Epoch 740] throughput 27149.29 samples/s
[Epoch 740] time cost 83.86s, valid loss 4.27, valid ppl 71.79, lr 30.00
[Epoch 740] test loss 4.23, test ppl 68.55
[Epoch 741 Batch 200/372] current loss 4.29, ppl 73.01, throughput 404.56 samples/s, lr 30.86
[Epoch 741] throughput 27376.11 samples/s
[Epoch 741] time cost 83.26s, valid loss 4.27, valid ppl 71.79, lr 30.00
[Epoch 741] test loss 4.23, test ppl 68.55
[Epoch 742 Batch 200/372] current loss 4.28, ppl 72.55, throughput 405.52 samples/s, lr 28.71
[Epoch 742] throughput 26953.96 samples/s
[Epoch 742] time cost 84.47s, valid loss 4.27, valid ppl 71.79, lr 30.00
[Epoch 742] test loss 4.23, test ppl 68.55
[Epoch 743 Batch 200/372] current loss 4.29, ppl 72.90, throughput 394.68 samples/s, lr 34.29
[Epoch 743] throughput 27022.48 samples/s
[Epoch 743] time cost 84.27s, valid loss 4.27, valid ppl 71.79, lr 30.00
[Epoch 743] test loss 4.23, test ppl 68.55
[Epoch 744 Batch 200/372] current loss 4.28, ppl 72.41, throughput 392.91 samples/s, lr 33.43
[Epoch 744] throughput 27076.82 samples/s
[Epoch 744] time cost 84.22s, valid loss 4.27, valid ppl 71.79, lr 30.00
[Epoch 744] test loss 4.23, test ppl 68.55
[Epoch 745 Batch 200/372] current loss 4.28, ppl 72.57, throughput 400.93 samples/s, lr 30.86
[Epoch 745] throughput 27322.33 samples/s
[Epoch 745] time cost 83.43s, valid loss 4.27, valid ppl 71.79, lr 30.00
[Epoch 745] test loss 4.23, test ppl 68.55
[Epoch 746 Batch 200/372] current loss 4.30, ppl 73.43, throughput 397.24 samples/s, lr 30.00
[Epoch 746] throughput 27158.39 samples/s
[Epoch 746] time cost 83.81s, valid loss 4.27, valid ppl 71.78, lr 30.00
[Epoch 746] test loss 4.23, test ppl 68.55
[Epoch 747 Batch 200/372] current loss 4.30, ppl 73.41, throughput 392.13 samples/s, lr 29.14
[Epoch 747] throughput 26976.94 samples/s
[Epoch 747] time cost 84.42s, valid loss 4.27, valid ppl 71.78, lr 30.00
[Epoch 747] test loss 4.23, test ppl 68.55
[Epoch 748 Batch 200/372] current loss 4.29, ppl 72.73, throughput 389.94 samples/s, lr 33.00
[Epoch 748] throughput 26757.35 samples/s
[Epoch 748] time cost 85.05s, valid loss 4.27, valid ppl 71.78, lr 30.00
[Epoch 748] test loss 4.23, test ppl 68.55
[Epoch 749 Batch 200/372] current loss 4.28, ppl 72.26, throughput 390.37 samples/s, lr 14.14
[Epoch 749] throughput 26812.26 samples/s
[Epoch 749] time cost 84.87s, valid loss 4.27, valid ppl 71.78, lr 30.00
[Epoch 749] test loss 4.23, test ppl 68.55
Total training throughput 14733.09 samples/s
Best validation loss 4.27, val ppl 71.78
Best test loss 4.23, test ppl 68.55
Total time cost 106385.41s