Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
Namespace(alpha=2, batch_size=80, beta=1, bptt=70, clip=0.25, dropout=0.2, dropout_e=0.05, dropout_h=0.1, dropout_i=0.3, emsize=200, epochs=750, eval_only=False, gpu='0', log_interval=200, lr=30, lr_update_factor=0.1, lr_update_interval=30, model='lstm', nhid=600, nlayers=3, ntasgd=True, optimizer='sgd', save='awd_lstm_lm_600_wikitext-2', test_mode=False, tied=True, wd=1.2e-06, weight_dropout=0.2)
Use AWDRNN
AWDRNN(
(embedding): HybridSequential(
(0): Embedding(33278 -> 200, float32)
(1): Dropout(p = 0.3, axes=(0,))
)
(encoder): HybridSequential(
(0): LSTM(200 -> 600, TNC)
(1): LSTM(600 -> 600, TNC)
(2): LSTM(600 -> 200, TNC)
)
(decoder): HybridSequential(
(0): Dense(200 -> 33278, linear)
)
)
[Epoch 0 Batch 200/372] current loss 7.74, ppl 2288.02, throughput 650.82 samples/s, lr 28.71
[Epoch 0] throughput 44198.16 samples/s
[Epoch 0] time cost 50.10s, valid loss 6.38, valid ppl 592.45, lr 30.00
[Epoch 0] test loss 6.31, test ppl 549.84
[Epoch 1 Batch 200/372] current loss 6.64, ppl 766.72, throughput 633.33 samples/s, lr 30.00
[Epoch 1] throughput 43426.25 samples/s
[Epoch 1] time cost 51.17s, valid loss 5.99, valid ppl 400.90, lr 30.00
[Epoch 1] test loss 5.92, test ppl 372.19
[Epoch 2 Batch 200/372] current loss 6.31, ppl 550.95, throughput 654.39 samples/s, lr 32.57
[Epoch 2] throughput 44243.79 samples/s
[Epoch 2] time cost 50.16s, valid loss 5.74, valid ppl 310.69, lr 30.00
[Epoch 2] test loss 5.67, test ppl 288.70
[Epoch 3 Batch 200/372] current loss 6.10, ppl 444.18, throughput 650.56 samples/s, lr 29.14
[Epoch 3] throughput 44074.94 samples/s
[Epoch 3] time cost 50.32s, valid loss 5.62, valid ppl 276.33, lr 30.00
[Epoch 3] test loss 5.54, test ppl 255.81
[Epoch 4 Batch 200/372] current loss 5.92, ppl 370.67, throughput 649.30 samples/s, lr 29.14
[Epoch 4] throughput 44324.22 samples/s
[Epoch 4] time cost 50.06s, valid loss 5.40, valid ppl 220.83, lr 30.00
[Epoch 4] test loss 5.32, test ppl 204.42
[Epoch 5 Batch 200/372] current loss 5.77, ppl 321.68, throughput 647.33 samples/s, lr 32.57
[Epoch 5] throughput 44292.99 samples/s
[Epoch 5] time cost 50.03s, valid loss 5.29, valid ppl 199.28, lr 30.00
[Epoch 5] test loss 5.22, test ppl 184.12
[Epoch 6 Batch 200/372] current loss 5.67, ppl 289.51, throughput 639.94 samples/s, lr 31.29
[Epoch 6] throughput 44443.05 samples/s
[Epoch 6] time cost 49.97s, valid loss 5.25, valid ppl 190.39, lr 30.00
[Epoch 6] test loss 5.17, test ppl 176.70
[Epoch 7 Batch 200/372] current loss 5.57, ppl 263.28, throughput 644.22 samples/s, lr 13.71
[Epoch 7] throughput 44507.61 samples/s
[Epoch 7] time cost 49.86s, valid loss 5.13, valid ppl 168.18, lr 30.00
[Epoch 7] test loss 5.05, test ppl 156.27
[Epoch 8 Batch 200/372] current loss 5.50, ppl 244.18, throughput 662.58 samples/s, lr 30.43
[Epoch 8] throughput 44688.86 samples/s
[Epoch 8] time cost 49.63s, valid loss 5.04, valid ppl 155.16, lr 30.00
[Epoch 8] test loss 4.97, test ppl 144.62
[Epoch 9 Batch 200/372] current loss 5.42, ppl 225.88, throughput 683.11 samples/s, lr 31.71
[Epoch 9] throughput 44739.28 samples/s
[Epoch 9] time cost 49.61s, valid loss 4.99, valid ppl 146.83, lr 30.00
[Epoch 9] test loss 4.92, test ppl 136.54
[Epoch 10 Batch 200/372] current loss 5.37, ppl 214.31, throughput 637.87 samples/s, lr 33.43
[Epoch 10] throughput 44001.53 samples/s
[Epoch 10] time cost 50.46s, valid loss 4.96, valid ppl 142.89, lr 30.00
[Epoch 10] test loss 4.89, test ppl 132.76
[Epoch 11 Batch 200/372] current loss 5.32, ppl 204.49, throughput 648.52 samples/s, lr 30.43
[Epoch 11] throughput 44460.91 samples/s
[Epoch 11] time cost 49.90s, valid loss 4.92, valid ppl 136.69, lr 30.00
[Epoch 11] test loss 4.85, test ppl 127.58
[Epoch 12 Batch 200/372] current loss 5.26, ppl 191.94, throughput 640.00 samples/s, lr 28.29
[Epoch 12] throughput 43973.53 samples/s
[Epoch 12] time cost 50.42s, valid loss 4.92, valid ppl 137.00, lr 30.00
[Epoch 13 Batch 200/372] current loss 5.20, ppl 181.90, throughput 643.26 samples/s, lr 28.71
[Epoch 13] throughput 44082.73 samples/s
[Epoch 13] time cost 50.31s, valid loss 4.85, valid ppl 128.23, lr 30.00
[Epoch 13] test loss 4.79, test ppl 120.21
[Epoch 14 Batch 200/372] current loss 5.18, ppl 177.20, throughput 666.17 samples/s, lr 30.86
[Epoch 14] throughput 44608.82 samples/s
[Epoch 14] time cost 49.69s, valid loss 4.85, valid ppl 127.85, lr 30.00
[Epoch 14] test loss 4.79, test ppl 119.81
[Epoch 15 Batch 200/372] current loss 5.13, ppl 169.37, throughput 650.57 samples/s, lr 29.57
[Epoch 15] throughput 43556.71 samples/s
[Epoch 15] time cost 50.83s, valid loss 4.86, valid ppl 128.43, lr 30.00
[Epoch 16 Batch 200/372] current loss 5.10, ppl 164.02, throughput 643.99 samples/s, lr 32.14
[Epoch 16] throughput 44424.84 samples/s
[Epoch 16] time cost 49.97s, valid loss 4.78, valid ppl 119.50, lr 30.00
[Epoch 16] test loss 4.72, test ppl 111.94
[Epoch 17 Batch 200/372] current loss 5.06, ppl 157.63, throughput 647.20 samples/s, lr 27.00
[Epoch 17] throughput 44375.80 samples/s
[Epoch 17] time cost 50.09s, valid loss 4.76, valid ppl 116.58, lr 30.00
[Epoch 17] test loss 4.69, test ppl 109.19
[Epoch 18 Batch 200/372] current loss 5.03, ppl 153.11, throughput 634.29 samples/s, lr 33.86
[Epoch 18] throughput 44334.62 samples/s
[Epoch 18] time cost 50.03s, valid loss 4.79, valid ppl 120.30, lr 30.00
[Epoch 19 Batch 200/372] current loss 5.00, ppl 148.68, throughput 650.36 samples/s, lr 29.14
[Epoch 19] throughput 44259.08 samples/s
[Epoch 19] time cost 50.32s, valid loss 4.72, valid ppl 111.80, lr 30.00
[Epoch 19] test loss 4.65, test ppl 104.44
[Epoch 20 Batch 200/372] current loss 4.97, ppl 144.18, throughput 648.58 samples/s, lr 29.57
[Epoch 20] throughput 44451.41 samples/s
[Epoch 20] time cost 49.88s, valid loss 4.70, valid ppl 109.74, lr 30.00
[Epoch 20] test loss 4.63, test ppl 102.37
[Epoch 21 Batch 200/372] current loss 4.94, ppl 139.94, throughput 634.14 samples/s, lr 29.14
[Epoch 21] throughput 42820.37 samples/s
[Epoch 21] time cost 51.69s, valid loss 4.70, valid ppl 110.18, lr 30.00
[Epoch 22 Batch 200/372] current loss 4.93, ppl 138.47, throughput 645.90 samples/s, lr 14.57
[Epoch 22] throughput 44392.20 samples/s
[Epoch 22] time cost 49.97s, valid loss 4.70, valid ppl 110.15, lr 30.00
[Epoch 23 Batch 200/372] current loss 4.91, ppl 135.20, throughput 642.94 samples/s, lr 30.00
[Epoch 23] throughput 44340.26 samples/s
[Epoch 23] time cost 50.10s, valid loss 4.68, valid ppl 107.29, lr 30.00
[Epoch 23] test loss 4.61, test ppl 100.69
[Epoch 24 Batch 200/372] current loss 4.87, ppl 130.95, throughput 648.49 samples/s, lr 19.29
[Epoch 24] throughput 44230.44 samples/s
[Epoch 24] time cost 50.17s, valid loss 4.67, valid ppl 106.22, lr 30.00
[Epoch 24] test loss 4.61, test ppl 100.12
[Epoch 25 Batch 200/372] current loss 4.86, ppl 128.55, throughput 656.92 samples/s, lr 33.00
[Epoch 25] throughput 44743.07 samples/s
[Epoch 25] time cost 49.58s, valid loss 4.65, valid ppl 104.16, lr 30.00
[Epoch 25] test loss 4.58, test ppl 97.89
[Epoch 26 Batch 200/372] current loss 4.85, ppl 127.62, throughput 645.60 samples/s, lr 31.29
[Epoch 26] throughput 44472.71 samples/s
[Epoch 26] time cost 49.87s, valid loss 4.66, valid ppl 106.16, lr 30.00
[Epoch 27 Batch 200/372] current loss 4.83, ppl 124.75, throughput 648.05 samples/s, lr 25.71
[Epoch 27] throughput 43892.91 samples/s
[Epoch 27] time cost 50.48s, valid loss 4.64, valid ppl 103.36, lr 30.00
[Epoch 27] test loss 4.58, test ppl 97.12
[Epoch 28 Batch 200/372] current loss 4.81, ppl 122.32, throughput 648.08 samples/s, lr 27.86
[Epoch 28] throughput 44300.62 samples/s
[Epoch 28] time cost 50.12s, valid loss 4.62, valid ppl 101.75, lr 30.00
[Epoch 28] test loss 4.56, test ppl 95.81
[Epoch 29 Batch 200/372] current loss 4.79, ppl 120.48, throughput 659.91 samples/s, lr 30.00
[Epoch 29] throughput 44449.51 samples/s
[Epoch 29] time cost 49.92s, valid loss 4.61, valid ppl 100.83, lr 30.00
[Epoch 29] test loss 4.56, test ppl 95.17
[Epoch 30 Batch 200/372] current loss 4.78, ppl 118.84, throughput 643.60 samples/s, lr 32.14
[Epoch 30] throughput 43515.09 samples/s
[Epoch 30] time cost 50.93s, valid loss 4.63, valid ppl 102.50, lr 30.00
[Epoch 31 Batch 200/372] current loss 4.75, ppl 115.05, throughput 645.49 samples/s, lr 33.86
[Epoch 31] throughput 43325.90 samples/s
[Epoch 31] time cost 51.19s, valid loss 4.62, valid ppl 101.06, lr 30.00
[Epoch 32 Batch 200/372] current loss 4.75, ppl 115.10, throughput 642.59 samples/s, lr 31.71
[Epoch 32] throughput 43937.59 samples/s
[Epoch 32] time cost 50.45s, valid loss 4.61, valid ppl 100.98, lr 30.00
[Epoch 33 Batch 200/372] current loss 4.73, ppl 113.15, throughput 652.65 samples/s, lr 29.57
[Epoch 33] throughput 44141.01 samples/s
[Epoch 33] time cost 50.28s, valid loss 4.61, valid ppl 100.94, lr 30.00
[Epoch 34 Batch 200/372] current loss 4.70, ppl 110.37, throughput 668.18 samples/s, lr 29.14
[Epoch 34] throughput 44402.29 samples/s
[Epoch 34] time cost 50.04s, valid loss 4.59, valid ppl 98.34, lr 30.00
[Epoch 34] test loss 4.53, test ppl 92.54
[Epoch 35 Batch 200/372] current loss 4.71, ppl 111.27, throughput 645.86 samples/s, lr 27.43
[Epoch 35] throughput 44061.75 samples/s
[Epoch 35] time cost 50.41s, valid loss 4.58, valid ppl 97.54, lr 30.00
[Epoch 35] test loss 4.52, test ppl 91.87
[Epoch 36 Batch 200/372] current loss 4.69, ppl 109.24, throughput 666.47 samples/s, lr 27.43
[Epoch 36] throughput 44988.59 samples/s
[Epoch 36] time cost 49.32s, valid loss 4.57, valid ppl 96.82, lr 30.00
[Epoch 36] test loss 4.51, test ppl 90.90
[Epoch 37 Batch 200/372] current loss 4.67, ppl 107.07, throughput 655.55 samples/s, lr 27.00
[Epoch 37] throughput 44336.11 samples/s
[Epoch 37] time cost 50.00s, valid loss 4.59, valid ppl 98.36, lr 30.00
[Epoch 38 Batch 200/372] current loss 4.66, ppl 105.74, throughput 652.47 samples/s, lr 27.43
[Epoch 38] throughput 43731.19 samples/s
[Epoch 38] time cost 50.63s, valid loss 4.57, valid ppl 96.84, lr 30.00
[Epoch 39 Batch 200/372] current loss 4.66, ppl 105.34, throughput 645.74 samples/s, lr 29.14
[Epoch 39] throughput 43622.05 samples/s
[Epoch 39] time cost 50.81s, valid loss 4.57, valid ppl 96.27, lr 30.00
[Epoch 39] test loss 4.50, test ppl 90.25
[Epoch 40 Batch 200/372] current loss 4.64, ppl 103.12, throughput 636.13 samples/s, lr 14.57
[Epoch 40] throughput 44324.39 samples/s
[Epoch 40] time cost 50.00s, valid loss 4.58, valid ppl 97.21, lr 30.00
[Epoch 41 Batch 200/372] current loss 4.63, ppl 102.42, throughput 634.71 samples/s, lr 27.00
[Epoch 41] throughput 44022.04 samples/s
[Epoch 41] time cost 50.34s, valid loss 4.57, valid ppl 96.62, lr 30.00
[Epoch 42 Batch 200/372] current loss 4.62, ppl 101.32, throughput 649.06 samples/s, lr 28.71
[Epoch 42] throughput 44744.91 samples/s
[Epoch 42] time cost 49.65s, valid loss 4.55, valid ppl 94.17, lr 30.00
[Epoch 42] test loss 4.48, test ppl 88.57
[Epoch 43 Batch 200/372] current loss 4.61, ppl 100.66, throughput 640.14 samples/s, lr 33.00
[Epoch 43] throughput 43091.62 samples/s
[Epoch 43] time cost 51.40s, valid loss 4.55, valid ppl 94.82, lr 30.00
[Epoch 44 Batch 200/372] current loss 4.61, ppl 100.60, throughput 657.50 samples/s, lr 30.43
[Epoch 44] throughput 44312.00 samples/s
[Epoch 44] time cost 50.11s, valid loss 4.57, valid ppl 96.38, lr 30.00
[Epoch 45 Batch 200/372] current loss 4.60, ppl 99.42, throughput 631.02 samples/s, lr 17.57
[Epoch 45] throughput 43348.68 samples/s
[Epoch 45] time cost 51.09s, valid loss 4.57, valid ppl 96.28, lr 30.00
Switching to NTASGD and avg_trigger is : 17112
[Epoch 46 Batch 200/372] current loss 4.59, ppl 98.58, throughput 638.27 samples/s, lr 28.29
[Epoch 46] throughput 41875.81 samples/s
[Epoch 46] time cost 52.83s, valid loss 4.50, valid ppl 89.82, lr 30.00
[Epoch 46] test loss 4.44, test ppl 84.88
[Epoch 47 Batch 200/372] current loss 4.58, ppl 97.39, throughput 638.05 samples/s, lr 28.71
[Epoch 47] throughput 43327.74 samples/s
[Epoch 47] time cost 51.06s, valid loss 4.49, valid ppl 89.44, lr 30.00
[Epoch 47] test loss 4.44, test ppl 84.53
[Epoch 48 Batch 200/372] current loss 4.57, ppl 96.10, throughput 630.81 samples/s, lr 30.43
[Epoch 48] throughput 43206.65 samples/s
[Epoch 48] time cost 51.26s, valid loss 4.49, valid ppl 89.16, lr 30.00
[Epoch 48] test loss 4.43, test ppl 84.27
[Epoch 49 Batch 200/372] current loss 4.57, ppl 96.24, throughput 605.90 samples/s, lr 31.29
[Epoch 49] throughput 41919.85 samples/s
[Epoch 49] time cost 52.76s, valid loss 4.49, valid ppl 88.89, lr 30.00
[Epoch 49] test loss 4.43, test ppl 84.04
[Epoch 50 Batch 200/372] current loss 4.56, ppl 95.94, throughput 621.56 samples/s, lr 30.86
[Epoch 50] throughput 42276.36 samples/s
[Epoch 50] time cost 52.24s, valid loss 4.49, valid ppl 88.68, lr 30.00
[Epoch 50] test loss 4.43, test ppl 83.84
[Epoch 51 Batch 200/372] current loss 4.54, ppl 93.65, throughput 622.65 samples/s, lr 15.43
[Epoch 51] throughput 42635.10 samples/s
[Epoch 51] time cost 51.92s, valid loss 4.48, valid ppl 88.52, lr 30.00
[Epoch 51] test loss 4.43, test ppl 83.69
[Epoch 52 Batch 200/372] current loss 4.54, ppl 93.84, throughput 637.35 samples/s, lr 26.57
[Epoch 52] throughput 42508.08 samples/s
[Epoch 52] time cost 52.06s, valid loss 4.48, valid ppl 88.35, lr 30.00
[Epoch 52] test loss 4.43, test ppl 83.55
[Epoch 53 Batch 200/372] current loss 4.54, ppl 93.42, throughput 624.95 samples/s, lr 27.00
[Epoch 53] throughput 42464.86 samples/s
[Epoch 53] time cost 52.14s, valid loss 4.48, valid ppl 88.20, lr 30.00
[Epoch 53] test loss 4.42, test ppl 83.42
[Epoch 54 Batch 200/372] current loss 4.51, ppl 91.21, throughput 627.37 samples/s, lr 28.29
[Epoch 54] throughput 42577.20 samples/s
[Epoch 54] time cost 51.92s, valid loss 4.48, valid ppl 88.06, lr 30.00
[Epoch 54] test loss 4.42, test ppl 83.29
[Epoch 55 Batch 200/372] current loss 4.51, ppl 90.79, throughput 627.75 samples/s, lr 31.29
[Epoch 55] throughput 41886.00 samples/s
[Epoch 55] time cost 52.70s, valid loss 4.48, valid ppl 87.93, lr 30.00
[Epoch 55] test loss 4.42, test ppl 83.16
[Epoch 56 Batch 200/372] current loss 4.52, ppl 91.47, throughput 618.82 samples/s, lr 27.43
[Epoch 56] throughput 42316.70 samples/s
[Epoch 56] time cost 52.21s, valid loss 4.48, valid ppl 87.80, lr 30.00
[Epoch 56] test loss 4.42, test ppl 83.05
[Epoch 57 Batch 200/372] current loss 4.50, ppl 90.41, throughput 611.52 samples/s, lr 31.71
[Epoch 57] throughput 42165.09 samples/s
[Epoch 57] time cost 52.48s, valid loss 4.47, valid ppl 87.67, lr 30.00
[Epoch 57] test loss 4.42, test ppl 82.94
[Epoch 58 Batch 200/372] current loss 4.49, ppl 89.36, throughput 615.84 samples/s, lr 29.14
[Epoch 58] throughput 42772.39 samples/s
[Epoch 58] time cost 51.74s, valid loss 4.47, valid ppl 87.54, lr 30.00
[Epoch 58] test loss 4.42, test ppl 82.84
[Epoch 59 Batch 200/372] current loss 4.49, ppl 88.85, throughput 607.43 samples/s, lr 26.57
[Epoch 59] throughput 41395.13 samples/s
[Epoch 59] time cost 53.44s, valid loss 4.47, valid ppl 87.43, lr 30.00
[Epoch 59] test loss 4.42, test ppl 82.73
[Epoch 60 Batch 200/372] current loss 4.47, ppl 87.78, throughput 630.80 samples/s, lr 31.71
[Epoch 60] throughput 42870.16 samples/s
[Epoch 60] time cost 51.66s, valid loss 4.47, valid ppl 87.31, lr 30.00
[Epoch 60] test loss 4.41, test ppl 82.63
[Epoch 61 Batch 200/372] current loss 4.48, ppl 87.82, throughput 623.81 samples/s, lr 30.86
[Epoch 61] throughput 41849.03 samples/s
[Epoch 61] time cost 52.74s, valid loss 4.47, valid ppl 87.20, lr 30.00
[Epoch 61] test loss 4.41, test ppl 82.52
[Epoch 62 Batch 200/372] current loss 4.48, ppl 88.04, throughput 615.71 samples/s, lr 31.71
[Epoch 62] throughput 42296.39 samples/s
[Epoch 62] time cost 52.26s, valid loss 4.47, valid ppl 87.09, lr 30.00
[Epoch 62] test loss 4.41, test ppl 82.43
[Epoch 63 Batch 200/372] current loss 4.46, ppl 86.53, throughput 621.55 samples/s, lr 29.57
[Epoch 63] throughput 42202.85 samples/s
[Epoch 63] time cost 52.41s, valid loss 4.47, valid ppl 86.99, lr 30.00
[Epoch 63] test loss 4.41, test ppl 82.33
[Epoch 64 Batch 200/372] current loss 4.48, ppl 88.01, throughput 612.73 samples/s, lr 28.29
[Epoch 64] throughput 42039.58 samples/s
[Epoch 64] time cost 52.55s, valid loss 4.46, valid ppl 86.89, lr 30.00
[Epoch 64] test loss 4.41, test ppl 82.24
[Epoch 65 Batch 200/372] current loss 4.45, ppl 85.89, throughput 620.45 samples/s, lr 31.71
[Epoch 65] throughput 42300.77 samples/s
[Epoch 65] time cost 52.27s, valid loss 4.46, valid ppl 86.79, lr 30.00
[Epoch 65] test loss 4.41, test ppl 82.15
[Epoch 66 Batch 200/372] current loss 4.44, ppl 84.92, throughput 632.49 samples/s, lr 30.00
[Epoch 66] throughput 42374.63 samples/s
[Epoch 66] time cost 52.14s, valid loss 4.46, valid ppl 86.70, lr 30.00
[Epoch 66] test loss 4.41, test ppl 82.06
[Epoch 67 Batch 200/372] current loss 4.45, ppl 85.74, throughput 598.66 samples/s, lr 30.43
[Epoch 67] throughput 41922.16 samples/s
[Epoch 67] time cost 52.68s, valid loss 4.46, valid ppl 86.60, lr 30.00
[Epoch 67] test loss 4.41, test ppl 81.97
[Epoch 68 Batch 200/372] current loss 4.44, ppl 84.62, throughput 620.85 samples/s, lr 29.14
[Epoch 68] throughput 42776.30 samples/s
[Epoch 68] time cost 51.67s, valid loss 4.46, valid ppl 86.50, lr 30.00
[Epoch 68] test loss 4.41, test ppl 81.89
[Epoch 69 Batch 200/372] current loss 4.45, ppl 85.35, throughput 649.11 samples/s, lr 29.57
[Epoch 69] throughput 42931.04 samples/s
[Epoch 69] time cost 51.73s, valid loss 4.46, valid ppl 86.42, lr 30.00
[Epoch 69] test loss 4.40, test ppl 81.80
[Epoch 70 Batch 200/372] current loss 4.44, ppl 85.04, throughput 635.48 samples/s, lr 29.57
[Epoch 70] throughput 42318.56 samples/s
[Epoch 70] time cost 52.19s, valid loss 4.46, valid ppl 86.33, lr 30.00
[Epoch 70] test loss 4.40, test ppl 81.72
[Epoch 71 Batch 200/372] current loss 4.43, ppl 84.20, throughput 636.80 samples/s, lr 32.14
[Epoch 71] throughput 42835.40 samples/s
[Epoch 71] time cost 51.75s, valid loss 4.46, valid ppl 86.25, lr 30.00
[Epoch 71] test loss 4.40, test ppl 81.65
[Epoch 72 Batch 200/372] current loss 4.42, ppl 83.11, throughput 644.80 samples/s, lr 30.00
[Epoch 72] throughput 43489.42 samples/s
[Epoch 72] time cost 50.99s, valid loss 4.46, valid ppl 86.17, lr 30.00
[Epoch 72] test loss 4.40, test ppl 81.57
[Epoch 73 Batch 200/372] current loss 4.44, ppl 84.66, throughput 594.11 samples/s, lr 31.29
[Epoch 73] throughput 41731.42 samples/s
[Epoch 73] time cost 52.88s, valid loss 4.46, valid ppl 86.09, lr 30.00
[Epoch 73] test loss 4.40, test ppl 81.49
[Epoch 74 Batch 200/372] current loss 4.42, ppl 83.09, throughput 608.43 samples/s, lr 29.57
[Epoch 74] throughput 42080.81 samples/s
[Epoch 74] time cost 52.56s, valid loss 4.45, valid ppl 86.01, lr 30.00
[Epoch 74] test loss 4.40, test ppl 81.42
[Epoch 75 Batch 200/372] current loss 4.41, ppl 82.15, throughput 628.36 samples/s, lr 16.29
[Epoch 75] throughput 42419.68 samples/s
[Epoch 75] time cost 52.19s, valid loss 4.45, valid ppl 85.93, lr 30.00
[Epoch 75] test loss 4.40, test ppl 81.34
[Epoch 76 Batch 200/372] current loss 4.42, ppl 82.80, throughput 611.88 samples/s, lr 27.86
[Epoch 76] throughput 42369.68 samples/s
[Epoch 76] time cost 52.20s, valid loss 4.45, valid ppl 85.85, lr 30.00
[Epoch 76] test loss 4.40, test ppl 81.27
[Epoch 77 Batch 200/372] current loss 4.40, ppl 81.82, throughput 608.03 samples/s, lr 26.14
[Epoch 77] throughput 42248.01 samples/s
[Epoch 77] time cost 52.31s, valid loss 4.45, valid ppl 85.78, lr 30.00
[Epoch 77] test loss 4.40, test ppl 81.21
[Epoch 78 Batch 200/372] current loss 4.38, ppl 80.20, throughput 644.05 samples/s, lr 26.14
[Epoch 78] throughput 43045.25 samples/s
[Epoch 78] time cost 51.36s, valid loss 4.45, valid ppl 85.71, lr 30.00
[Epoch 78] test loss 4.40, test ppl 81.14
[Epoch 79 Batch 200/372] current loss 4.39, ppl 80.93, throughput 620.29 samples/s, lr 24.86
[Epoch 79] throughput 42148.68 samples/s
[Epoch 79] time cost 52.41s, valid loss 4.45, valid ppl 85.64, lr 30.00
[Epoch 79] test loss 4.40, test ppl 81.08
[Epoch 80 Batch 200/372] current loss 4.39, ppl 80.49, throughput 627.29 samples/s, lr 26.57
[Epoch 80] throughput 42012.09 samples/s
[Epoch 80] time cost 52.57s, valid loss 4.45, valid ppl 85.57, lr 30.00
[Epoch 80] test loss 4.39, test ppl 81.02
[Epoch 81 Batch 200/372] current loss 4.39, ppl 80.33, throughput 619.81 samples/s, lr 31.71
[Epoch 81] throughput 42319.90 samples/s
[Epoch 81] time cost 52.19s, valid loss 4.45, valid ppl 85.50, lr 30.00
[Epoch 81] test loss 4.39, test ppl 80.96
[Epoch 82 Batch 200/372] current loss 4.40, ppl 81.08, throughput 633.14 samples/s, lr 27.86
[Epoch 82] throughput 42723.98 samples/s
[Epoch 82] time cost 51.75s, valid loss 4.45, valid ppl 85.44, lr 30.00
[Epoch 82] test loss 4.39, test ppl 80.90
[Epoch 83 Batch 200/372] current loss 4.38, ppl 79.69, throughput 623.08 samples/s, lr 30.00
[Epoch 83] throughput 42739.28 samples/s
[Epoch 83] time cost 51.74s, valid loss 4.45, valid ppl 85.37, lr 30.00
[Epoch 83] test loss 4.39, test ppl 80.84
[Epoch 84 Batch 200/372] current loss 4.37, ppl 79.37, throughput 623.88 samples/s, lr 30.43
[Epoch 84] throughput 42153.19 samples/s
[Epoch 84] time cost 52.51s, valid loss 4.45, valid ppl 85.31, lr 30.00
[Epoch 84] test loss 4.39, test ppl 80.79
[Epoch 85 Batch 200/372] current loss 4.39, ppl 80.27, throughput 626.80 samples/s, lr 30.86
[Epoch 85] throughput 42386.08 samples/s
[Epoch 85] time cost 52.21s, valid loss 4.45, valid ppl 85.24, lr 30.00
[Epoch 85] test loss 4.39, test ppl 80.73
[Epoch 86 Batch 200/372] current loss 4.36, ppl 77.97, throughput 595.65 samples/s, lr 30.00
[Epoch 86] throughput 41601.81 samples/s
[Epoch 86] time cost 53.13s, valid loss 4.44, valid ppl 85.18, lr 30.00
[Epoch 86] test loss 4.39, test ppl 80.68
[Epoch 87 Batch 200/372] current loss 4.37, ppl 79.16, throughput 603.91 samples/s, lr 28.71
[Epoch 87] throughput 42503.69 samples/s
[Epoch 87] time cost 52.14s, valid loss 4.44, valid ppl 85.12, lr 30.00
[Epoch 87] test loss 4.39, test ppl 80.63
[Epoch 88 Batch 200/372] current loss 4.36, ppl 78.37, throughput 627.36 samples/s, lr 27.43
[Epoch 88] throughput 42298.30 samples/s
[Epoch 88] time cost 52.22s, valid loss 4.44, valid ppl 85.06, lr 30.00
[Epoch 88] test loss 4.39, test ppl 80.57
[Epoch 89 Batch 200/372] current loss 4.38, ppl 79.57, throughput 626.20 samples/s, lr 30.43
[Epoch 89] throughput 42983.71 samples/s
[Epoch 89] time cost 51.55s, valid loss 4.44, valid ppl 85.00, lr 30.00
[Epoch 89] test loss 4.39, test ppl 80.52
[Epoch 90 Batch 200/372] current loss 4.37, ppl 78.70, throughput 622.36 samples/s, lr 29.57
[Epoch 90] throughput 42838.53 samples/s
[Epoch 90] time cost 51.64s, valid loss 4.44, valid ppl 84.94, lr 30.00
[Epoch 90] test loss 4.39, test ppl 80.47
[Epoch 91 Batch 200/372] current loss 4.35, ppl 77.60, throughput 640.73 samples/s, lr 31.29
[Epoch 91] throughput 43112.23 samples/s
[Epoch 91] time cost 51.31s, valid loss 4.44, valid ppl 84.88, lr 30.00
[Epoch 91] test loss 4.39, test ppl 80.42
[Epoch 92 Batch 200/372] current loss 4.36, ppl 78.13, throughput 627.94 samples/s, lr 28.71
[Epoch 92] throughput 42647.41 samples/s
[Epoch 92] time cost 51.83s, valid loss 4.44, valid ppl 84.83, lr 30.00
[Epoch 92] test loss 4.39, test ppl 80.37
[Epoch 93 Batch 200/372] current loss 4.36, ppl 77.92, throughput 637.66 samples/s, lr 33.43
[Epoch 93] throughput 42212.90 samples/s
[Epoch 93] time cost 52.29s, valid loss 4.44, valid ppl 84.78, lr 30.00
[Epoch 93] test loss 4.39, test ppl 80.32
[Epoch 94 Batch 200/372] current loss 4.34, ppl 76.89, throughput 622.54 samples/s, lr 28.29
[Epoch 94] throughput 42398.41 samples/s
[Epoch 94] time cost 52.09s, valid loss 4.44, valid ppl 84.72, lr 30.00
[Epoch 94] test loss 4.39, test ppl 80.28
[Epoch 95 Batch 200/372] current loss 4.35, ppl 77.14, throughput 611.84 samples/s, lr 30.86
[Epoch 95] throughput 42214.58 samples/s
[Epoch 95] time cost 52.44s, valid loss 4.44, valid ppl 84.67, lr 30.00
[Epoch 95] test loss 4.38, test ppl 80.23
[Epoch 96 Batch 200/372] current loss 4.34, ppl 76.49, throughput 617.29 samples/s, lr 31.71
[Epoch 96] throughput 42103.51 samples/s
[Epoch 96] time cost 52.45s, valid loss 4.44, valid ppl 84.62, lr 30.00
[Epoch 96] test loss 4.38, test ppl 80.18
[Epoch 97 Batch 200/372] current loss 4.35, ppl 77.38, throughput 622.09 samples/s, lr 26.57
[Epoch 97] throughput 42785.34 samples/s
[Epoch 97] time cost 51.78s, valid loss 4.44, valid ppl 84.57, lr 30.00
[Epoch 97] test loss 4.38, test ppl 80.14
[Epoch 98 Batch 200/372] current loss 4.33, ppl 76.18, throughput 598.95 samples/s, lr 31.29
[Epoch 98] throughput 42206.15 samples/s
[Epoch 98] time cost 52.38s, valid loss 4.44, valid ppl 84.52, lr 30.00
[Epoch 98] test loss 4.38, test ppl 80.10
[Epoch 99 Batch 200/372] current loss 4.35, ppl 77.42, throughput 632.05 samples/s, lr 29.57
[Epoch 99] throughput 42833.37 samples/s
[Epoch 99] time cost 51.69s, valid loss 4.44, valid ppl 84.47, lr 30.00
[Epoch 99] test loss 4.38, test ppl 80.05
[Epoch 100 Batch 200/372] current loss 4.34, ppl 76.61, throughput 624.72 samples/s, lr 26.14
[Epoch 100] throughput 43187.75 samples/s
[Epoch 100] time cost 51.29s, valid loss 4.44, valid ppl 84.42, lr 30.00
[Epoch 100] test loss 4.38, test ppl 80.01
[Epoch 101 Batch 200/372] current loss 4.33, ppl 75.89, throughput 630.16 samples/s, lr 31.29
[Epoch 101] throughput 42434.37 samples/s
[Epoch 101] time cost 52.06s, valid loss 4.44, valid ppl 84.37, lr 30.00
[Epoch 101] test loss 4.38, test ppl 79.97
[Epoch 102 Batch 200/372] current loss 4.32, ppl 75.51, throughput 634.73 samples/s, lr 30.86
[Epoch 102] throughput 42820.79 samples/s
[Epoch 102] time cost 51.61s, valid loss 4.43, valid ppl 84.32, lr 30.00
[Epoch 102] test loss 4.38, test ppl 79.93
[Epoch 103 Batch 200/372] current loss 4.32, ppl 75.36, throughput 629.37 samples/s, lr 31.71
[Epoch 103] throughput 42395.48 samples/s
[Epoch 103] time cost 52.13s, valid loss 4.43, valid ppl 84.27, lr 30.00
[Epoch 103] test loss 4.38, test ppl 79.88
[Epoch 104 Batch 200/372] current loss 4.33, ppl 75.84, throughput 611.30 samples/s, lr 30.43
[Epoch 104] throughput 41930.82 samples/s
[Epoch 104] time cost 52.70s, valid loss 4.43, valid ppl 84.23, lr 30.00
[Epoch 104] test loss 4.38, test ppl 79.84
[Epoch 105 Batch 200/372] current loss 4.31, ppl 74.72, throughput 614.01 samples/s, lr 29.57
[Epoch 105] throughput 42483.24 samples/s
[Epoch 105] time cost 52.09s, valid loss 4.43, valid ppl 84.19, lr 30.00
[Epoch 105] test loss 4.38, test ppl 79.81
[Epoch 106 Batch 200/372] current loss 4.32, ppl 75.00, throughput 640.20 samples/s, lr 27.86
[Epoch 106] throughput 42805.04 samples/s
[Epoch 106] time cost 51.64s, valid loss 4.43, valid ppl 84.14, lr 30.00
[Epoch 106] test loss 4.38, test ppl 79.77
[Epoch 107 Batch 200/372] current loss 4.31, ppl 74.70, throughput 619.72 samples/s, lr 30.86
[Epoch 107] throughput 42513.68 samples/s
[Epoch 107] time cost 52.11s, valid loss 4.43, valid ppl 84.10, lr 30.00
[Epoch 107] test loss 4.38, test ppl 79.73
[Epoch 108 Batch 200/372] current loss 4.31, ppl 74.74, throughput 621.66 samples/s, lr 29.14
[Epoch 108] throughput 42665.14 samples/s
[Epoch 108] time cost 51.92s, valid loss 4.43, valid ppl 84.06, lr 30.00
[Epoch 108] test loss 4.38, test ppl 79.69
[Epoch 109 Batch 200/372] current loss 4.32, ppl 74.92, throughput 617.69 samples/s, lr 32.14
[Epoch 109] throughput 42151.91 samples/s
[Epoch 109] time cost 52.47s, valid loss 4.43, valid ppl 84.02, lr 30.00
[Epoch 109] test loss 4.38, test ppl 79.66
[Epoch 110 Batch 200/372] current loss 4.29, ppl 73.04, throughput 632.90 samples/s, lr 29.57
[Epoch 110] throughput 42212.33 samples/s
[Epoch 110] time cost 52.37s, valid loss 4.43, valid ppl 83.98, lr 30.00
[Epoch 110] test loss 4.38, test ppl 79.62
[Epoch 111 Batch 200/372] current loss 4.30, ppl 73.96, throughput 629.84 samples/s, lr 32.57
[Epoch 111] throughput 42814.44 samples/s
[Epoch 111] time cost 51.74s, valid loss 4.43, valid ppl 83.95, lr 30.00
[Epoch 111] test loss 4.38, test ppl 79.58
[Epoch 112 Batch 200/372] current loss 4.30, ppl 73.99, throughput 644.73 samples/s, lr 30.00
[Epoch 112] throughput 43096.30 samples/s
[Epoch 112] time cost 51.34s, valid loss 4.43, valid ppl 83.91, lr 30.00
[Epoch 112] test loss 4.38, test ppl 79.55
[Epoch 113 Batch 200/372] current loss 4.30, ppl 73.85, throughput 611.62 samples/s, lr 28.71
[Epoch 113] throughput 42306.62 samples/s
[Epoch 113] time cost 52.31s, valid loss 4.43, valid ppl 83.87, lr 30.00
[Epoch 113] test loss 4.38, test ppl 79.52
[Epoch 114 Batch 200/372] current loss 4.29, ppl 72.89, throughput 630.09 samples/s, lr 33.00
[Epoch 114] throughput 43033.33 samples/s
[Epoch 114] time cost 51.46s, valid loss 4.43, valid ppl 83.84, lr 30.00
[Epoch 114] test loss 4.38, test ppl 79.49
[Epoch 115 Batch 200/372] current loss 4.29, ppl 72.80, throughput 610.95 samples/s, lr 29.14
[Epoch 115] throughput 41864.82 samples/s
[Epoch 115] time cost 52.80s, valid loss 4.43, valid ppl 83.80, lr 30.00
[Epoch 115] test loss 4.38, test ppl 79.45
[Epoch 116 Batch 200/372] current loss 4.28, ppl 72.41, throughput 619.12 samples/s, lr 32.57
[Epoch 116] throughput 43114.96 samples/s
[Epoch 116] time cost 51.30s, valid loss 4.43, valid ppl 83.76, lr 30.00
[Epoch 116] test loss 4.37, test ppl 79.42
[Epoch 117 Batch 200/372] current loss 4.29, ppl 73.24, throughput 610.93 samples/s, lr 29.57
[Epoch 117] throughput 42872.57 samples/s
[Epoch 117] time cost 51.57s, valid loss 4.43, valid ppl 83.73, lr 30.00
[Epoch 117] test loss 4.37, test ppl 79.39
[Epoch 118 Batch 200/372] current loss 4.29, ppl 73.00, throughput 637.12 samples/s, lr 31.29
[Epoch 118] throughput 42507.10 samples/s
[Epoch 118] time cost 52.15s, valid loss 4.43, valid ppl 83.69, lr 30.00
[Epoch 118] test loss 4.37, test ppl 79.36
[Epoch 119 Batch 200/372] current loss 4.27, ppl 71.81, throughput 615.58 samples/s, lr 30.43
[Epoch 119] throughput 42060.26 samples/s
[Epoch 119] time cost 52.53s, valid loss 4.43, valid ppl 83.66, lr 30.00
[Epoch 119] test loss 4.37, test ppl 79.33
[Epoch 120 Batch 200/372] current loss 4.28, ppl 72.09, throughput 622.62 samples/s, lr 29.14
[Epoch 120] throughput 42323.88 samples/s
[Epoch 120] time cost 52.25s, valid loss 4.43, valid ppl 83.63, lr 30.00
[Epoch 120] test loss 4.37, test ppl 79.29
[Epoch 121 Batch 200/372] current loss 4.28, ppl 72.20, throughput 636.11 samples/s, lr 28.71
[Epoch 121] throughput 42903.70 samples/s
[Epoch 121] time cost 51.54s, valid loss 4.43, valid ppl 83.60, lr 30.00
[Epoch 121] test loss 4.37, test ppl 79.26
[Epoch 122 Batch 200/372] current loss 4.27, ppl 71.46, throughput 619.79 samples/s, lr 27.43
[Epoch 122] throughput 42450.96 samples/s
[Epoch 122] time cost 52.15s, valid loss 4.43, valid ppl 83.56, lr 30.00
[Epoch 122] test loss 4.37, test ppl 79.23
[Epoch 123 Batch 200/372] current loss 4.28, ppl 72.31, throughput 622.97 samples/s, lr 31.29
[Epoch 123] throughput 42299.64 samples/s
[Epoch 123] time cost 52.30s, valid loss 4.43, valid ppl 83.53, lr 30.00
[Epoch 123] test loss 4.37, test ppl 79.20
[Epoch 124 Batch 200/372] current loss 4.27, ppl 71.71, throughput 604.45 samples/s, lr 30.43
[Epoch 124] throughput 41669.58 samples/s
[Epoch 124] time cost 53.01s, valid loss 4.42, valid ppl 83.50, lr 30.00
[Epoch 124] test loss 4.37, test ppl 79.17
[Epoch 125 Batch 200/372] current loss 4.29, ppl 72.97, throughput 624.41 samples/s, lr 27.86
[Epoch 125] throughput 41211.76 samples/s
[Epoch 125] time cost 53.65s, valid loss 4.42, valid ppl 83.47, lr 30.00
[Epoch 125] test loss 4.37, test ppl 79.14
[Epoch 126 Batch 200/372] current loss 4.27, ppl 71.57, throughput 622.67 samples/s, lr 31.29
[Epoch 126] throughput 42384.30 samples/s
[Epoch 126] time cost 52.17s, valid loss 4.42, valid ppl 83.44, lr 30.00
[Epoch 126] test loss 4.37, test ppl 79.12
[Epoch 127 Batch 200/372] current loss 4.27, ppl 71.71, throughput 622.63 samples/s, lr 28.71
[Epoch 127] throughput 42546.11 samples/s
[Epoch 127] time cost 51.95s, valid loss 4.42, valid ppl 83.41, lr 30.00
[Epoch 127] test loss 4.37, test ppl 79.09
[Epoch 128 Batch 200/372] current loss 4.27, ppl 71.85, throughput 625.10 samples/s, lr 28.71
[Epoch 128] throughput 42250.49 samples/s
[Epoch 128] time cost 52.27s, valid loss 4.42, valid ppl 83.38, lr 30.00
[Epoch 128] test loss 4.37, test ppl 79.06
[Epoch 129 Batch 200/372] current loss 4.27, ppl 71.29, throughput 628.77 samples/s, lr 31.71
[Epoch 129] throughput 42724.51 samples/s
[Epoch 129] time cost 51.77s, valid loss 4.42, valid ppl 83.35, lr 30.00
[Epoch 129] test loss 4.37, test ppl 79.03
[Epoch 130 Batch 200/372] current loss 4.26, ppl 70.52, throughput 617.96 samples/s, lr 28.29
[Epoch 130] throughput 41837.55 samples/s
[Epoch 130] time cost 52.78s, valid loss 4.42, valid ppl 83.32, lr 30.00
[Epoch 130] test loss 4.37, test ppl 79.00
[Epoch 131 Batch 200/372] current loss 4.27, ppl 71.54, throughput 625.81 samples/s, lr 33.86
[Epoch 131] throughput 42176.83 samples/s
[Epoch 131] time cost 52.43s, valid loss 4.42, valid ppl 83.29, lr 30.00
[Epoch 131] test loss 4.37, test ppl 78.98
[Epoch 132 Batch 200/372] current loss 4.25, ppl 70.22, throughput 619.91 samples/s, lr 28.71
[Epoch 132] throughput 41883.92 samples/s
[Epoch 132] time cost 52.71s, valid loss 4.42, valid ppl 83.26, lr 30.00
[Epoch 132] test loss 4.37, test ppl 78.95
[Epoch 133 Batch 200/372] current loss 4.26, ppl 71.06, throughput 605.59 samples/s, lr 26.14
[Epoch 133] throughput 41553.04 samples/s
[Epoch 133] time cost 53.10s, valid loss 4.42, valid ppl 83.24, lr 30.00
[Epoch 133] test loss 4.37, test ppl 78.92
[Epoch 134 Batch 200/372] current loss 4.26, ppl 70.61, throughput 621.59 samples/s, lr 30.43
[Epoch 134] throughput 42943.95 samples/s
[Epoch 134] time cost 51.61s, valid loss 4.42, valid ppl 83.21, lr 30.00
[Epoch 134] test loss 4.37, test ppl 78.90
[Epoch 135 Batch 200/372] current loss 4.26, ppl 70.74, throughput 615.73 samples/s, lr 14.57
[Epoch 135] throughput 41980.57 samples/s
[Epoch 135] time cost 52.67s, valid loss 4.42, valid ppl 83.18, lr 30.00
[Epoch 135] test loss 4.37, test ppl 78.87
[Epoch 136 Batch 200/372] current loss 4.25, ppl 70.25, throughput 630.72 samples/s, lr 14.14
[Epoch 136] throughput 42559.18 samples/s
[Epoch 136] time cost 51.92s, valid loss 4.42, valid ppl 83.15, lr 30.00
[Epoch 136] test loss 4.37, test ppl 78.84
[Epoch 137 Batch 200/372] current loss 4.25, ppl 70.38, throughput 588.01 samples/s, lr 30.86
[Epoch 137] throughput 41450.64 samples/s
[Epoch 137] time cost 53.23s, valid loss 4.42, valid ppl 83.13, lr 30.00
[Epoch 137] test loss 4.37, test ppl 78.82
[Epoch 138 Batch 200/372] current loss 4.25, ppl 70.11, throughput 601.15 samples/s, lr 30.00
[Epoch 138] throughput 42605.72 samples/s
[Epoch 138] time cost 51.87s, valid loss 4.42, valid ppl 83.10, lr 30.00
[Epoch 138] test loss 4.37, test ppl 78.80
[Epoch 139 Batch 200/372] current loss 4.26, ppl 70.54, throughput 605.35 samples/s, lr 26.14
[Epoch 139] throughput 41854.20 samples/s
[Epoch 139] time cost 52.76s, valid loss 4.42, valid ppl 83.08, lr 30.00
[Epoch 139] test loss 4.37, test ppl 78.77
[Epoch 140 Batch 200/372] current loss 4.25, ppl 69.79, throughput 598.49 samples/s, lr 30.00
[Epoch 140] throughput 42340.23 samples/s
[Epoch 140] time cost 52.16s, valid loss 4.42, valid ppl 83.05, lr 30.00
[Epoch 140] test loss 4.37, test ppl 78.75
[Epoch 141 Batch 200/372] current loss 4.25, ppl 69.78, throughput 613.50 samples/s, lr 28.29
[Epoch 141] throughput 42020.03 samples/s
[Epoch 141] time cost 52.59s, valid loss 4.42, valid ppl 83.02, lr 30.00
[Epoch 141] test loss 4.37, test ppl 78.72
[Epoch 142 Batch 200/372] current loss 4.25, ppl 69.83, throughput 610.89 samples/s, lr 32.57
[Epoch 142] throughput 41820.27 samples/s
[Epoch 142] time cost 52.89s, valid loss 4.42, valid ppl 83.00, lr 30.00
[Epoch 142] test loss 4.37, test ppl 78.70
[Epoch 143 Batch 200/372] current loss 4.25, ppl 69.85, throughput 623.50 samples/s, lr 31.71
[Epoch 143] throughput 42529.90 samples/s
[Epoch 143] time cost 51.98s, valid loss 4.42, valid ppl 82.97, lr 30.00
[Epoch 143] test loss 4.37, test ppl 78.68
[Epoch 144 Batch 200/372] current loss 4.25, ppl 70.12, throughput 628.69 samples/s, lr 31.29
[Epoch 144] throughput 42440.15 samples/s
[Epoch 144] time cost 52.10s, valid loss 4.42, valid ppl 82.95, lr 30.00
[Epoch 144] test loss 4.37, test ppl 78.65
[Epoch 145 Batch 200/372] current loss 4.25, ppl 69.88, throughput 632.59 samples/s, lr 29.14
[Epoch 145] throughput 42652.77 samples/s
[Epoch 145] time cost 51.88s, valid loss 4.42, valid ppl 82.92, lr 30.00
[Epoch 145] test loss 4.36, test ppl 78.63
[Epoch 146 Batch 200/372] current loss 4.23, ppl 69.06, throughput 617.29 samples/s, lr 29.57
[Epoch 146] throughput 42592.84 samples/s
[Epoch 146] time cost 51.88s, valid loss 4.42, valid ppl 82.90, lr 30.00
[Epoch 146] test loss 4.36, test ppl 78.61
[Epoch 147 Batch 200/372] current loss 4.25, ppl 69.84, throughput 614.56 samples/s, lr 30.86
[Epoch 147] throughput 41912.71 samples/s
[Epoch 147] time cost 52.70s, valid loss 4.42, valid ppl 82.87, lr 30.00
[Epoch 147] test loss 4.36, test ppl 78.59
[Epoch 148 Batch 200/372] current loss 4.25, ppl 70.03, throughput 629.48 samples/s, lr 30.43
[Epoch 148] throughput 42416.71 samples/s
[Epoch 148] time cost 52.10s, valid loss 4.42, valid ppl 82.85, lr 30.00
[Epoch 148] test loss 4.36, test ppl 78.56
[Epoch 149 Batch 200/372] current loss 4.24, ppl 69.35, throughput 631.98 samples/s, lr 27.43
[Epoch 149] throughput 42542.01 samples/s
[Epoch 149] time cost 52.00s, valid loss 4.42, valid ppl 82.82, lr 30.00
[Epoch 149] test loss 4.36, test ppl 78.54
[Epoch 150 Batch 200/372] current loss 4.23, ppl 68.42, throughput 634.41 samples/s, lr 32.57
[Epoch 150] throughput 42046.93 samples/s
[Epoch 150] time cost 52.58s, valid loss 4.42, valid ppl 82.80, lr 30.00
[Epoch 150] test loss 4.36, test ppl 78.52
[Epoch 151 Batch 200/372] current loss 4.23, ppl 68.38, throughput 636.14 samples/s, lr 27.86
[Epoch 151] throughput 42939.51 samples/s
[Epoch 151] time cost 51.46s, valid loss 4.42, valid ppl 82.77, lr 30.00
[Epoch 151] test loss 4.36, test ppl 78.50
[Epoch 152 Batch 200/372] current loss 4.23, ppl 68.83, throughput 628.97 samples/s, lr 28.71
[Epoch 152] throughput 43333.71 samples/s
[Epoch 152] time cost 51.06s, valid loss 4.42, valid ppl 82.75, lr 30.00
[Epoch 152] test loss 4.36, test ppl 78.48
[Epoch 153 Batch 200/372] current loss 4.23, ppl 68.82, throughput 605.18 samples/s, lr 32.14
[Epoch 153] throughput 42178.94 samples/s
[Epoch 153] time cost 52.42s, valid loss 4.42, valid ppl 82.73, lr 30.00
[Epoch 153] test loss 4.36, test ppl 78.45
[Epoch 154 Batch 200/372] current loss 4.23, ppl 69.05, throughput 631.22 samples/s, lr 28.71
[Epoch 154] throughput 43374.35 samples/s
[Epoch 154] time cost 50.99s, valid loss 4.42, valid ppl 82.71, lr 30.00
[Epoch 154] test loss 4.36, test ppl 78.43
[Epoch 155 Batch 200/372] current loss 4.22, ppl 67.94, throughput 633.71 samples/s, lr 33.43
[Epoch 155] throughput 42709.06 samples/s
[Epoch 155] time cost 51.78s, valid loss 4.42, valid ppl 82.68, lr 30.00
[Epoch 155] test loss 4.36, test ppl 78.41
[Epoch 156 Batch 200/372] current loss 4.22, ppl 67.77, throughput 623.61 samples/s, lr 28.71
[Epoch 156] throughput 43191.29 samples/s
[Epoch 156] time cost 51.21s, valid loss 4.41, valid ppl 82.66, lr 30.00
[Epoch 156] test loss 4.36, test ppl 78.39
[Epoch 157 Batch 200/372] current loss 4.23, ppl 68.77, throughput 625.52 samples/s, lr 27.86
[Epoch 157] throughput 42713.32 samples/s
[Epoch 157] time cost 51.90s, valid loss 4.41, valid ppl 82.64, lr 30.00
[Epoch 157] test loss 4.36, test ppl 78.37
[Epoch 158 Batch 200/372] current loss 4.23, ppl 68.51, throughput 620.36 samples/s, lr 30.43
[Epoch 158] throughput 42457.97 samples/s
[Epoch 158] time cost 52.03s, valid loss 4.41, valid ppl 82.61, lr 30.00
[Epoch 158] test loss 4.36, test ppl 78.35
[Epoch 159 Batch 200/372] current loss 4.21, ppl 67.68, throughput 653.23 samples/s, lr 30.00
[Epoch 159] throughput 43662.53 samples/s
[Epoch 159] time cost 50.74s, valid loss 4.41, valid ppl 82.59, lr 30.00
[Epoch 159] test loss 4.36, test ppl 78.33
[Epoch 160 Batch 200/372] current loss 4.23, ppl 68.58, throughput 623.29 samples/s, lr 31.29
[Epoch 160] throughput 42330.18 samples/s
[Epoch 160] time cost 52.27s, valid loss 4.41, valid ppl 82.57, lr 30.00
[Epoch 160] test loss 4.36, test ppl 78.31
[Epoch 161 Batch 200/372] current loss 4.22, ppl 67.98, throughput 610.20 samples/s, lr 31.29
[Epoch 161] throughput 41748.78 samples/s
[Epoch 161] time cost 52.84s, valid loss 4.41, valid ppl 82.55, lr 30.00
[Epoch 161] test loss 4.36, test ppl 78.30
[Epoch 162 Batch 200/372] current loss 4.22, ppl 67.89, throughput 643.48 samples/s, lr 27.43
[Epoch 162] throughput 42560.14 samples/s
[Epoch 162] time cost 51.91s, valid loss 4.41, valid ppl 82.53, lr 30.00
[Epoch 162] test loss 4.36, test ppl 78.28
[Epoch 163 Batch 200/372] current loss 4.23, ppl 68.62, throughput 635.56 samples/s, lr 33.43
[Epoch 163] throughput 42859.09 samples/s
[Epoch 163] time cost 51.58s, valid loss 4.41, valid ppl 82.51, lr 30.00
[Epoch 163] test loss 4.36, test ppl 78.26
[Epoch 164 Batch 200/372] current loss 4.23, ppl 68.76, throughput 611.25 samples/s, lr 28.71
[Epoch 164] throughput 41671.31 samples/s
[Epoch 164] time cost 53.00s, valid loss 4.41, valid ppl 82.49, lr 30.00
[Epoch 164] test loss 4.36, test ppl 78.24
[Epoch 165 Batch 200/372] current loss 4.22, ppl 68.25, throughput 592.58 samples/s, lr 32.14
[Epoch 165] throughput 41590.25 samples/s
[Epoch 165] time cost 53.15s, valid loss 4.41, valid ppl 82.47, lr 30.00
[Epoch 165] test loss 4.36, test ppl 78.22
[Epoch 166 Batch 200/372] current loss 4.21, ppl 67.40, throughput 620.67 samples/s, lr 13.71
[Epoch 166] throughput 41997.39 samples/s
[Epoch 166] time cost 52.67s, valid loss 4.41, valid ppl 82.45, lr 30.00
[Epoch 166] test loss 4.36, test ppl 78.21
[Epoch 167 Batch 200/372] current loss 4.21, ppl 67.35, throughput 617.06 samples/s, lr 24.43
[Epoch 167] throughput 42855.18 samples/s
[Epoch 167] time cost 51.60s, valid loss 4.41, valid ppl 82.43, lr 30.00
[Epoch 167] test loss 4.36, test ppl 78.19
[Epoch 168 Batch 200/372] current loss 4.21, ppl 67.03, throughput 632.68 samples/s, lr 33.43
[Epoch 168] throughput 42519.23 samples/s
[Epoch 168] time cost 52.06s, valid loss 4.41, valid ppl 82.41, lr 30.00
[Epoch 168] test loss 4.36, test ppl 78.17
[Epoch 169 Batch 200/372] current loss 4.22, ppl 67.83, throughput 601.58 samples/s, lr 25.71
[Epoch 169] throughput 41822.07 samples/s
[Epoch 169] time cost 52.84s, valid loss 4.41, valid ppl 82.39, lr 30.00
[Epoch 169] test loss 4.36, test ppl 78.16
[Epoch 170 Batch 200/372] current loss 4.21, ppl 67.32, throughput 634.45 samples/s, lr 28.71
[Epoch 170] throughput 43138.90 samples/s
[Epoch 170] time cost 51.32s, valid loss 4.41, valid ppl 82.37, lr 30.00
[Epoch 170] test loss 4.36, test ppl 78.14
[Epoch 171 Batch 200/372] current loss 4.21, ppl 67.20, throughput 618.31 samples/s, lr 26.57
[Epoch 171] throughput 42127.97 samples/s
[Epoch 171] time cost 52.51s, valid loss 4.41, valid ppl 82.35, lr 30.00
[Epoch 171] test loss 4.36, test ppl 78.12
[Epoch 172 Batch 200/372] current loss 4.21, ppl 67.17, throughput 648.29 samples/s, lr 27.00
[Epoch 172] throughput 42732.12 samples/s
[Epoch 172] time cost 51.74s, valid loss 4.41, valid ppl 82.33, lr 30.00
[Epoch 172] test loss 4.36, test ppl 78.11
[Epoch 173 Batch 200/372] current loss 4.21, ppl 67.26, throughput 640.31 samples/s, lr 29.57
[Epoch 173] throughput 42851.79 samples/s
[Epoch 173] time cost 51.56s, valid loss 4.41, valid ppl 82.32, lr 30.00
[Epoch 173] test loss 4.36, test ppl 78.09
[Epoch 174 Batch 200/372] current loss 4.20, ppl 67.00, throughput 641.56 samples/s, lr 28.71
[Epoch 174] throughput 43475.20 samples/s
[Epoch 174] time cost 51.02s, valid loss 4.41, valid ppl 82.30, lr 30.00
[Epoch 174] test loss 4.36, test ppl 78.08
[Epoch 175 Batch 200/372] current loss 4.21, ppl 67.62, throughput 625.55 samples/s, lr 29.57
[Epoch 175] throughput 43135.78 samples/s
[Epoch 175] time cost 51.29s, valid loss 4.41, valid ppl 82.28, lr 30.00
[Epoch 175] test loss 4.36, test ppl 78.06
[Epoch 176 Batch 200/372] current loss 4.22, ppl 68.05, throughput 617.58 samples/s, lr 31.71
[Epoch 176] throughput 43073.56 samples/s
[Epoch 176] time cost 51.39s, valid loss 4.41, valid ppl 82.26, lr 30.00
[Epoch 176] test loss 4.36, test ppl 78.05
[Epoch 177 Batch 200/372] current loss 4.21, ppl 67.64, throughput 621.43 samples/s, lr 30.86
[Epoch 177] throughput 42589.02 samples/s
[Epoch 177] time cost 51.91s, valid loss 4.41, valid ppl 82.25, lr 30.00
[Epoch 177] test loss 4.36, test ppl 78.03
[Epoch 178 Batch 200/372] current loss 4.20, ppl 66.91, throughput 633.88 samples/s, lr 30.00
[Epoch 178] throughput 42996.02 samples/s
[Epoch 178] time cost 51.53s, valid loss 4.41, valid ppl 82.23, lr 30.00
[Epoch 178] test loss 4.36, test ppl 78.02
[Epoch 179 Batch 200/372] current loss 4.21, ppl 67.36, throughput 632.30 samples/s, lr 30.43
[Epoch 179] throughput 42064.46 samples/s
[Epoch 179] time cost 52.54s, valid loss 4.41, valid ppl 82.21, lr 30.00
[Epoch 179] test loss 4.36, test ppl 78.00
[Epoch 180 Batch 200/372] current loss 4.20, ppl 66.42, throughput 617.41 samples/s, lr 31.71
[Epoch 180] throughput 41547.86 samples/s
[Epoch 180] time cost 53.13s, valid loss 4.41, valid ppl 82.19, lr 30.00
[Epoch 180] test loss 4.36, test ppl 77.98
[Epoch 181 Batch 200/372] current loss 4.21, ppl 67.17, throughput 630.85 samples/s, lr 30.00
[Epoch 181] throughput 42467.55 samples/s
[Epoch 181] time cost 52.16s, valid loss 4.41, valid ppl 82.17, lr 30.00
[Epoch 181] test loss 4.36, test ppl 77.97
[Epoch 182 Batch 200/372] current loss 4.19, ppl 66.31, throughput 640.38 samples/s, lr 30.86
[Epoch 182] throughput 43110.55 samples/s
[Epoch 182] time cost 51.40s, valid loss 4.41, valid ppl 82.16, lr 30.00
[Epoch 182] test loss 4.36, test ppl 77.95
[Epoch 183 Batch 200/372] current loss 4.20, ppl 67.00, throughput 648.02 samples/s, lr 27.86
[Epoch 183] throughput 43473.77 samples/s
[Epoch 183] time cost 51.12s, valid loss 4.41, valid ppl 82.14, lr 30.00
[Epoch 183] test loss 4.36, test ppl 77.94
[Epoch 184 Batch 200/372] current loss 4.21, ppl 67.39, throughput 623.74 samples/s, lr 31.71
[Epoch 184] throughput 42398.42 samples/s
[Epoch 184] time cost 52.12s, valid loss 4.41, valid ppl 82.12, lr 30.00
[Epoch 184] test loss 4.36, test ppl 77.92
[Epoch 185 Batch 200/372] current loss 4.19, ppl 66.33, throughput 614.59 samples/s, lr 31.71
[Epoch 185] throughput 42479.56 samples/s
[Epoch 185] time cost 52.09s, valid loss 4.41, valid ppl 82.10, lr 30.00
[Epoch 185] test loss 4.36, test ppl 77.91
[Epoch 186 Batch 200/372] current loss 4.19, ppl 65.87, throughput 626.51 samples/s, lr 27.43
[Epoch 186] throughput 42185.07 samples/s
[Epoch 186] time cost 52.39s, valid loss 4.41, valid ppl 82.09, lr 30.00
[Epoch 186] test loss 4.36, test ppl 77.89
[Epoch 187 Batch 200/372] current loss 4.19, ppl 66.13, throughput 650.30 samples/s, lr 32.14
[Epoch 187] throughput 42707.27 samples/s
[Epoch 187] time cost 51.76s, valid loss 4.41, valid ppl 82.07, lr 30.00
[Epoch 187] test loss 4.36, test ppl 77.88
[Epoch 188 Batch 200/372] current loss 4.19, ppl 66.35, throughput 621.48 samples/s, lr 29.57
[Epoch 188] throughput 42228.29 samples/s
[Epoch 188] time cost 52.43s, valid loss 4.41, valid ppl 82.05, lr 30.00
[Epoch 188] test loss 4.35, test ppl 77.86
[Epoch 189 Batch 200/372] current loss 4.20, ppl 66.60, throughput 629.83 samples/s, lr 27.00
[Epoch 189] throughput 42924.44 samples/s
[Epoch 189] time cost 51.55s, valid loss 4.41, valid ppl 82.04, lr 30.00
[Epoch 189] test loss 4.35, test ppl 77.85
[Epoch 190 Batch 200/372] current loss 4.20, ppl 66.41, throughput 629.17 samples/s, lr 28.29
[Epoch 190] throughput 43228.13 samples/s
[Epoch 190] time cost 51.18s, valid loss 4.41, valid ppl 82.02, lr 30.00
[Epoch 190] test loss 4.35, test ppl 77.83
[Epoch 191 Batch 200/372] current loss 4.19, ppl 66.32, throughput 618.72 samples/s, lr 27.86
[Epoch 191] throughput 42574.32 samples/s
[Epoch 191] time cost 51.89s, valid loss 4.41, valid ppl 82.00, lr 30.00
[Epoch 191] test loss 4.35, test ppl 77.82
[Epoch 192 Batch 200/372] current loss 4.18, ppl 65.57, throughput 612.29 samples/s, lr 30.86
[Epoch 192] throughput 42625.47 samples/s
[Epoch 192] time cost 51.85s, valid loss 4.41, valid ppl 81.99, lr 30.00
[Epoch 192] test loss 4.35, test ppl 77.81
[Epoch 193 Batch 200/372] current loss 4.20, ppl 66.52, throughput 635.19 samples/s, lr 32.14
[Epoch 193] throughput 42896.65 samples/s
[Epoch 193] time cost 51.66s, valid loss 4.41, valid ppl 81.97, lr 30.00
[Epoch 193] test loss 4.35, test ppl 77.80
[Epoch 194 Batch 200/372] current loss 4.18, ppl 65.36, throughput 637.32 samples/s, lr 27.86
[Epoch 194] throughput 42842.24 samples/s
[Epoch 194] time cost 51.59s, valid loss 4.41, valid ppl 81.96, lr 30.00
[Epoch 194] test loss 4.35, test ppl 77.78
[Epoch 195 Batch 200/372] current loss 4.19, ppl 66.05, throughput 618.51 samples/s, lr 28.71
[Epoch 195] throughput 42506.89 samples/s
[Epoch 195] time cost 52.05s, valid loss 4.41, valid ppl 81.94, lr 30.00
[Epoch 195] test loss 4.35, test ppl 77.77
[Epoch 196 Batch 200/372] current loss 4.19, ppl 65.94, throughput 622.84 samples/s, lr 30.00
[Epoch 196] throughput 42926.21 samples/s
[Epoch 196] time cost 51.47s, valid loss 4.41, valid ppl 81.92, lr 30.00
[Epoch 196] test loss 4.35, test ppl 77.76
[Epoch 197 Batch 200/372] current loss 4.19, ppl 66.09, throughput 625.54 samples/s, lr 31.71
[Epoch 197] throughput 42424.96 samples/s
[Epoch 197] time cost 52.08s, valid loss 4.41, valid ppl 81.91, lr 30.00
[Epoch 197] test loss 4.35, test ppl 77.74
[Epoch 198 Batch 200/372] current loss 4.19, ppl 65.88, throughput 634.99 samples/s, lr 28.29
[Epoch 198] throughput 42476.89 samples/s
[Epoch 198] time cost 52.01s, valid loss 4.41, valid ppl 81.89, lr 30.00
[Epoch 198] test loss 4.35, test ppl 77.73
[Epoch 199 Batch 200/372] current loss 4.17, ppl 64.70, throughput 647.75 samples/s, lr 30.43
[Epoch 199] throughput 43741.82 samples/s
[Epoch 199] time cost 50.70s, valid loss 4.41, valid ppl 81.88, lr 30.00
[Epoch 199] test loss 4.35, test ppl 77.71
[Epoch 200 Batch 200/372] current loss 4.17, ppl 65.00, throughput 613.53 samples/s, lr 25.71
[Epoch 200] throughput 42846.96 samples/s
[Epoch 200] time cost 51.59s, valid loss 4.41, valid ppl 81.86, lr 30.00
[Epoch 200] test loss 4.35, test ppl 77.70
[Epoch 201 Batch 200/372] current loss 4.19, ppl 66.18, throughput 620.43 samples/s, lr 29.57
[Epoch 201] throughput 42857.98 samples/s
[Epoch 201] time cost 51.59s, valid loss 4.40, valid ppl 81.85, lr 30.00
[Epoch 201] test loss 4.35, test ppl 77.69
[Epoch 202 Batch 200/372] current loss 4.17, ppl 64.96, throughput 624.46 samples/s, lr 26.14
[Epoch 202] throughput 42243.08 samples/s
[Epoch 202] time cost 52.28s, valid loss 4.40, valid ppl 81.83, lr 30.00
[Epoch 202] test loss 4.35, test ppl 77.67
[Epoch 203 Batch 200/372] current loss 4.18, ppl 65.23, throughput 605.76 samples/s, lr 31.29
[Epoch 203] throughput 41992.04 samples/s
[Epoch 203] time cost 52.64s, valid loss 4.40, valid ppl 81.82, lr 30.00
[Epoch 203] test loss 4.35, test ppl 77.66
[Epoch 204 Batch 200/372] current loss 4.17, ppl 64.97, throughput 617.51 samples/s, lr 25.29
[Epoch 204] throughput 42216.45 samples/s
[Epoch 204] time cost 52.32s, valid loss 4.40, valid ppl 81.80, lr 30.00
[Epoch 204] test loss 4.35, test ppl 77.65
[Epoch 205 Batch 200/372] current loss 4.18, ppl 65.20, throughput 638.78 samples/s, lr 30.00
[Epoch 205] throughput 43120.25 samples/s
[Epoch 205] time cost 51.31s, valid loss 4.40, valid ppl 81.79, lr 30.00
[Epoch 205] test loss 4.35, test ppl 77.64
[Epoch 206 Batch 200/372] current loss 4.17, ppl 65.01, throughput 642.96 samples/s, lr 30.43
[Epoch 206] throughput 42623.66 samples/s
[Epoch 206] time cost 51.83s, valid loss 4.40, valid ppl 81.78, lr 30.00
[Epoch 206] test loss 4.35, test ppl 77.62
[Epoch 207 Batch 200/372] current loss 4.17, ppl 64.75, throughput 614.45 samples/s, lr 27.43
[Epoch 207] throughput 42009.19 samples/s
[Epoch 207] time cost 52.68s, valid loss 4.40, valid ppl 81.76, lr 30.00
[Epoch 207] test loss 4.35, test ppl 77.61
[Epoch 208 Batch 200/372] current loss 4.17, ppl 64.50, throughput 622.53 samples/s, lr 30.43
[Epoch 208] throughput 42720.86 samples/s
[Epoch 208] time cost 51.75s, valid loss 4.40, valid ppl 81.75, lr 30.00
[Epoch 208] test loss 4.35, test ppl 77.60
[Epoch 209 Batch 200/372] current loss 4.17, ppl 64.93, throughput 633.74 samples/s, lr 32.57
[Epoch 209] throughput 42903.10 samples/s
[Epoch 209] time cost 51.60s, valid loss 4.40, valid ppl 81.74, lr 30.00
[Epoch 209] test loss 4.35, test ppl 77.59
[Epoch 210 Batch 200/372] current loss 4.17, ppl 65.01, throughput 631.82 samples/s, lr 32.14
[Epoch 210] throughput 42825.94 samples/s
[Epoch 210] time cost 51.59s, valid loss 4.40, valid ppl 81.72, lr 30.00
[Epoch 210] test loss 4.35, test ppl 77.58
[Epoch 211 Batch 200/372] current loss 4.18, ppl 65.29, throughput 618.15 samples/s, lr 31.29
[Epoch 211] throughput 42336.41 samples/s
[Epoch 211] time cost 52.17s, valid loss 4.40, valid ppl 81.71, lr 30.00
[Epoch 211] test loss 4.35, test ppl 77.57
[Epoch 212 Batch 200/372] current loss 4.17, ppl 64.98, throughput 617.05 samples/s, lr 31.29
[Epoch 212] throughput 42262.93 samples/s
[Epoch 212] time cost 52.36s, valid loss 4.40, valid ppl 81.70, lr 30.00
[Epoch 212] test loss 4.35, test ppl 77.55
[Epoch 213 Batch 200/372] current loss 4.18, ppl 65.20, throughput 633.80 samples/s, lr 31.29
[Epoch 213] throughput 43160.18 samples/s
[Epoch 213] time cost 51.23s, valid loss 4.40, valid ppl 81.68, lr 30.00
[Epoch 213] test loss 4.35, test ppl 77.54
[Epoch 214 Batch 200/372] current loss 4.17, ppl 64.60, throughput 625.34 samples/s, lr 31.29
[Epoch 214] throughput 42545.10 samples/s
[Epoch 214] time cost 51.97s, valid loss 4.40, valid ppl 81.67, lr 30.00
[Epoch 214] test loss 4.35, test ppl 77.53
[Epoch 215 Batch 200/372] current loss 4.17, ppl 64.79, throughput 631.90 samples/s, lr 28.29
[Epoch 215] throughput 42327.73 samples/s
[Epoch 215] time cost 52.30s, valid loss 4.40, valid ppl 81.66, lr 30.00
[Epoch 215] test loss 4.35, test ppl 77.52
[Epoch 216 Batch 200/372] current loss 4.17, ppl 64.99, throughput 617.20 samples/s, lr 29.57
[Epoch 216] throughput 43108.41 samples/s
[Epoch 216] time cost 51.30s, valid loss 4.40, valid ppl 81.64, lr 30.00
[Epoch 216] test loss 4.35, test ppl 77.51
[Epoch 217 Batch 200/372] current loss 4.18, ppl 65.60, throughput 623.32 samples/s, lr 28.71
[Epoch 217] throughput 42449.05 samples/s
[Epoch 217] time cost 52.04s, valid loss 4.40, valid ppl 81.63, lr 30.00
[Epoch 217] test loss 4.35, test ppl 77.50
[Epoch 218 Batch 200/372] current loss 4.18, ppl 65.28, throughput 625.64 samples/s, lr 13.71
[Epoch 218] throughput 42138.78 samples/s
[Epoch 218] time cost 52.41s, valid loss 4.40, valid ppl 81.62, lr 30.00
[Epoch 218] test loss 4.35, test ppl 77.49
[Epoch 219 Batch 200/372] current loss 4.17, ppl 64.92, throughput 637.63 samples/s, lr 33.43
[Epoch 219] throughput 43709.68 samples/s
[Epoch 219] time cost 50.65s, valid loss 4.40, valid ppl 81.61, lr 30.00
[Epoch 219] test loss 4.35, test ppl 77.48
[Epoch 220 Batch 200/372] current loss 4.15, ppl 63.46, throughput 618.69 samples/s, lr 30.43
[Epoch 220] throughput 42351.54 samples/s
[Epoch 220] time cost 52.16s, valid loss 4.40, valid ppl 81.60, lr 30.00
[Epoch 220] test loss 4.35, test ppl 77.46
[Epoch 221 Batch 200/372] current loss 4.16, ppl 64.27, throughput 619.92 samples/s, lr 30.43
[Epoch 221] throughput 42206.73 samples/s
[Epoch 221] time cost 52.29s, valid loss 4.40, valid ppl 81.58, lr 30.00
[Epoch 221] test loss 4.35, test ppl 77.45
[Epoch 222 Batch 200/372] current loss 4.16, ppl 64.14, throughput 630.51 samples/s, lr 29.57
[Epoch 222] throughput 43170.78 samples/s
[Epoch 222] time cost 51.32s, valid loss 4.40, valid ppl 81.57, lr 30.00
[Epoch 222] test loss 4.35, test ppl 77.44
[Epoch 223 Batch 200/372] current loss 4.16, ppl 64.37, throughput 613.96 samples/s, lr 27.00
[Epoch 223] throughput 42329.50 samples/s
[Epoch 223] time cost 52.22s, valid loss 4.40, valid ppl 81.56, lr 30.00
[Epoch 223] test loss 4.35, test ppl 77.43
[Epoch 224 Batch 200/372] current loss 4.17, ppl 64.89, throughput 619.33 samples/s, lr 27.00
[Epoch 224] throughput 41738.27 samples/s
[Epoch 224] time cost 52.91s, valid loss 4.40, valid ppl 81.55, lr 30.00
[Epoch 224] test loss 4.35, test ppl 77.42
[Epoch 225 Batch 200/372] current loss 4.17, ppl 64.47, throughput 613.90 samples/s, lr 32.14
[Epoch 225] throughput 42659.72 samples/s
[Epoch 225] time cost 51.81s, valid loss 4.40, valid ppl 81.53, lr 30.00
[Epoch 225] test loss 4.35, test ppl 77.41
[Epoch 226 Batch 200/372] current loss 4.17, ppl 64.52, throughput 620.24 samples/s, lr 28.71
[Epoch 226] throughput 42971.31 samples/s
[Epoch 226] time cost 51.46s, valid loss 4.40, valid ppl 81.52, lr 30.00
[Epoch 226] test loss 4.35, test ppl 77.40
[Epoch 227 Batch 200/372] current loss 4.16, ppl 64.21, throughput 631.05 samples/s, lr 30.43
[Epoch 227] throughput 43646.46 samples/s
[Epoch 227] time cost 50.82s, valid loss 4.40, valid ppl 81.51, lr 30.00
[Epoch 227] test loss 4.35, test ppl 77.39
[Epoch 228 Batch 200/372] current loss 4.15, ppl 63.52, throughput 637.43 samples/s, lr 29.57
[Epoch 228] throughput 42328.47 samples/s
[Epoch 228] time cost 52.22s, valid loss 4.40, valid ppl 81.50, lr 30.00
[Epoch 228] test loss 4.35, test ppl 77.38
[Epoch 229 Batch 200/372] current loss 4.16, ppl 63.96, throughput 631.62 samples/s, lr 30.86
[Epoch 229] throughput 43363.68 samples/s
[Epoch 229] time cost 51.09s, valid loss 4.40, valid ppl 81.49, lr 30.00
[Epoch 229] test loss 4.35, test ppl 77.37
[Epoch 230 Batch 200/372] current loss 4.16, ppl 64.00, throughput 615.64 samples/s, lr 29.57
[Epoch 230] throughput 41865.88 samples/s
[Epoch 230] time cost 52.80s, valid loss 4.40, valid ppl 81.48, lr 30.00
[Epoch 230] test loss 4.35, test ppl 77.36
[Epoch 231 Batch 200/372] current loss 4.16, ppl 64.22, throughput 618.36 samples/s, lr 29.14
[Epoch 231] throughput 42264.36 samples/s
[Epoch 231] time cost 52.34s, valid loss 4.40, valid ppl 81.47, lr 30.00
[Epoch 231] test loss 4.35, test ppl 77.35
[Epoch 232 Batch 200/372] current loss 4.15, ppl 63.55, throughput 620.35 samples/s, lr 30.86
[Epoch 232] throughput 42673.61 samples/s
[Epoch 232] time cost 51.82s, valid loss 4.40, valid ppl 81.46, lr 30.00
[Epoch 232] test loss 4.35, test ppl 77.34
[Epoch 233 Batch 200/372] current loss 4.16, ppl 63.94, throughput 622.33 samples/s, lr 28.71
[Epoch 233] throughput 42474.03 samples/s
[Epoch 233] time cost 52.07s, valid loss 4.40, valid ppl 81.44, lr 30.00
[Epoch 233] test loss 4.35, test ppl 77.33
[Epoch 234 Batch 200/372] current loss 4.16, ppl 64.20, throughput 621.41 samples/s, lr 25.71
[Epoch 234] throughput 42391.73 samples/s
[Epoch 234] time cost 52.15s, valid loss 4.40, valid ppl 81.43, lr 30.00
[Epoch 234] test loss 4.35, test ppl 77.32
[Epoch 235 Batch 200/372] current loss 4.16, ppl 63.99, throughput 609.20 samples/s, lr 28.71
[Epoch 235] throughput 41883.75 samples/s
[Epoch 235] time cost 52.76s, valid loss 4.40, valid ppl 81.42, lr 30.00
[Epoch 235] test loss 4.35, test ppl 77.31
[Epoch 236 Batch 200/372] current loss 4.16, ppl 64.24, throughput 616.41 samples/s, lr 27.43
[Epoch 236] throughput 41806.95 samples/s
[Epoch 236] time cost 52.94s, valid loss 4.40, valid ppl 81.41, lr 30.00
[Epoch 236] test loss 4.35, test ppl 77.30
[Epoch 237 Batch 200/372] current loss 4.15, ppl 63.75, throughput 620.63 samples/s, lr 29.14
[Epoch 237] throughput 42268.02 samples/s
[Epoch 237] time cost 52.24s, valid loss 4.40, valid ppl 81.40, lr 30.00
[Epoch 237] test loss 4.35, test ppl 77.29
[Epoch 238 Batch 200/372] current loss 4.14, ppl 63.06, throughput 620.54 samples/s, lr 28.29
[Epoch 238] throughput 42727.50 samples/s
[Epoch 238] time cost 51.96s, valid loss 4.40, valid ppl 81.39, lr 30.00
[Epoch 238] test loss 4.35, test ppl 77.28
[Epoch 239 Batch 200/372] current loss 4.14, ppl 63.11, throughput 649.07 samples/s, lr 30.00
[Epoch 239] throughput 43527.63 samples/s
[Epoch 239] time cost 50.93s, valid loss 4.40, valid ppl 81.38, lr 30.00
[Epoch 239] test loss 4.35, test ppl 77.27
[Epoch 240 Batch 200/372] current loss 4.14, ppl 62.52, throughput 610.45 samples/s, lr 26.57
[Epoch 240] throughput 42727.84 samples/s
[Epoch 240] time cost 51.72s, valid loss 4.40, valid ppl 81.37, lr 30.00
[Epoch 240] test loss 4.35, test ppl 77.26
[Epoch 241 Batch 200/372] current loss 4.15, ppl 63.41, throughput 619.85 samples/s, lr 30.43
[Epoch 241] throughput 42262.89 samples/s
[Epoch 241] time cost 52.26s, valid loss 4.40, valid ppl 81.36, lr 30.00
[Epoch 241] test loss 4.35, test ppl 77.26
[Epoch 242 Batch 200/372] current loss 4.15, ppl 63.35, throughput 626.85 samples/s, lr 30.43
[Epoch 242] throughput 42524.85 samples/s
[Epoch 242] time cost 52.06s, valid loss 4.40, valid ppl 81.35, lr 30.00
[Epoch 242] test loss 4.35, test ppl 77.25
[Epoch 243 Batch 200/372] current loss 4.15, ppl 63.36, throughput 630.04 samples/s, lr 30.86
[Epoch 243] throughput 43061.82 samples/s
[Epoch 243] time cost 51.38s, valid loss 4.40, valid ppl 81.34, lr 30.00
[Epoch 243] test loss 4.35, test ppl 77.24
[Epoch 244 Batch 200/372] current loss 4.14, ppl 62.83, throughput 619.09 samples/s, lr 30.43
[Epoch 244] throughput 42184.81 samples/s
[Epoch 244] time cost 52.39s, valid loss 4.40, valid ppl 81.33, lr 30.00
[Epoch 244] test loss 4.35, test ppl 77.23
[Epoch 245 Batch 200/372] current loss 4.14, ppl 62.94, throughput 616.38 samples/s, lr 25.71
[Epoch 245] throughput 43038.20 samples/s
[Epoch 245] time cost 51.39s, valid loss 4.40, valid ppl 81.32, lr 30.00
[Epoch 245] test loss 4.35, test ppl 77.22
[Epoch 246 Batch 200/372] current loss 4.15, ppl 63.58, throughput 637.73 samples/s, lr 27.86
[Epoch 246] throughput 42256.89 samples/s
[Epoch 246] time cost 52.38s, valid loss 4.40, valid ppl 81.31, lr 30.00
[Epoch 246] test loss 4.35, test ppl 77.21
[Epoch 247 Batch 200/372] current loss 4.14, ppl 63.06, throughput 635.90 samples/s, lr 24.43
[Epoch 247] throughput 43409.57 samples/s
[Epoch 247] time cost 51.09s, valid loss 4.40, valid ppl 81.29, lr 30.00
[Epoch 247] test loss 4.35, test ppl 77.20
[Epoch 248 Batch 200/372] current loss 4.15, ppl 63.65, throughput 628.21 samples/s, lr 26.14
[Epoch 248] throughput 42922.58 samples/s
[Epoch 248] time cost 51.50s, valid loss 4.40, valid ppl 81.28, lr 30.00
[Epoch 248] test loss 4.35, test ppl 77.19
[Epoch 249 Batch 200/372] current loss 4.13, ppl 62.36, throughput 627.19 samples/s, lr 31.29
[Epoch 249] throughput 42362.21 samples/s
[Epoch 249] time cost 52.20s, valid loss 4.40, valid ppl 81.27, lr 30.00
[Epoch 249] test loss 4.35, test ppl 77.18
[Epoch 250 Batch 200/372] current loss 4.14, ppl 62.59, throughput 611.79 samples/s, lr 15.00
[Epoch 250] throughput 42057.07 samples/s
[Epoch 250] time cost 52.58s, valid loss 4.40, valid ppl 81.26, lr 30.00
[Epoch 250] test loss 4.35, test ppl 77.18
[Epoch 251 Batch 200/372] current loss 4.13, ppl 62.05, throughput 641.18 samples/s, lr 30.86
[Epoch 251] throughput 42101.98 samples/s
[Epoch 251] time cost 52.54s, valid loss 4.40, valid ppl 81.25, lr 30.00
[Epoch 251] test loss 4.35, test ppl 77.17
[Epoch 252 Batch 200/372] current loss 4.14, ppl 62.95, throughput 621.99 samples/s, lr 27.43
[Epoch 252] throughput 42474.34 samples/s
[Epoch 252] time cost 52.15s, valid loss 4.40, valid ppl 81.24, lr 30.00
[Epoch 252] test loss 4.35, test ppl 77.16
[Epoch 253 Batch 200/372] current loss 4.14, ppl 63.02, throughput 611.04 samples/s, lr 27.00
[Epoch 253] throughput 41962.21 samples/s
[Epoch 253] time cost 52.66s, valid loss 4.40, valid ppl 81.23, lr 30.00
[Epoch 253] test loss 4.35, test ppl 77.15
[Epoch 254 Batch 200/372] current loss 4.14, ppl 62.85, throughput 616.46 samples/s, lr 30.43
[Epoch 254] throughput 42914.95 samples/s
[Epoch 254] time cost 51.52s, valid loss 4.40, valid ppl 81.22, lr 30.00
[Epoch 254] test loss 4.35, test ppl 77.14
[Epoch 255 Batch 200/372] current loss 4.14, ppl 62.51, throughput 613.00 samples/s, lr 29.57
[Epoch 255] throughput 42598.18 samples/s
[Epoch 255] time cost 51.88s, valid loss 4.40, valid ppl 81.21, lr 30.00
[Epoch 255] test loss 4.35, test ppl 77.14
[Epoch 256 Batch 200/372] current loss 4.14, ppl 62.87, throughput 630.21 samples/s, lr 13.71
[Epoch 256] throughput 42870.64 samples/s
[Epoch 256] time cost 51.53s, valid loss 4.40, valid ppl 81.20, lr 30.00
[Epoch 256] test loss 4.35, test ppl 77.13
[Epoch 257 Batch 200/372] current loss 4.15, ppl 63.33, throughput 631.30 samples/s, lr 33.00
[Epoch 257] throughput 42881.40 samples/s
[Epoch 257] time cost 51.58s, valid loss 4.40, valid ppl 81.20, lr 30.00
[Epoch 257] test loss 4.35, test ppl 77.12
[Epoch 258 Batch 200/372] current loss 4.13, ppl 62.30, throughput 626.48 samples/s, lr 31.71
[Epoch 258] throughput 42573.96 samples/s
[Epoch 258] time cost 52.05s, valid loss 4.40, valid ppl 81.19, lr 30.00
[Epoch 258] test loss 4.35, test ppl 77.11
[Epoch 259 Batch 200/372] current loss 4.13, ppl 62.44, throughput 651.17 samples/s, lr 25.29
[Epoch 259] throughput 43303.16 samples/s
[Epoch 259] time cost 51.17s, valid loss 4.40, valid ppl 81.18, lr 30.00
[Epoch 259] test loss 4.35, test ppl 77.11
[Epoch 260 Batch 200/372] current loss 4.14, ppl 62.75, throughput 628.75 samples/s, lr 15.43
[Epoch 260] throughput 42340.02 samples/s
[Epoch 260] time cost 52.26s, valid loss 4.40, valid ppl 81.17, lr 30.00
[Epoch 260] test loss 4.35, test ppl 77.10
[Epoch 261 Batch 200/372] current loss 4.14, ppl 62.73, throughput 629.18 samples/s, lr 30.86
[Epoch 261] throughput 42853.66 samples/s
[Epoch 261] time cost 51.61s, valid loss 4.40, valid ppl 81.16, lr 30.00
[Epoch 261] test loss 4.35, test ppl 77.09
[Epoch 262 Batch 200/372] current loss 4.13, ppl 62.22, throughput 626.34 samples/s, lr 30.00
[Epoch 262] throughput 42778.46 samples/s
[Epoch 262] time cost 51.67s, valid loss 4.40, valid ppl 81.15, lr 30.00
[Epoch 262] test loss 4.34, test ppl 77.09
[Epoch 263 Batch 200/372] current loss 4.15, ppl 63.48, throughput 637.63 samples/s, lr 30.86
[Epoch 263] throughput 42687.23 samples/s
[Epoch 263] time cost 51.86s, valid loss 4.40, valid ppl 81.14, lr 30.00
[Epoch 263] test loss 4.34, test ppl 77.08
[Epoch 264 Batch 200/372] current loss 4.14, ppl 62.92, throughput 642.27 samples/s, lr 30.00
[Epoch 264] throughput 43273.86 samples/s
[Epoch 264] time cost 51.13s, valid loss 4.40, valid ppl 81.13, lr 30.00
[Epoch 264] test loss 4.34, test ppl 77.07
[Epoch 265 Batch 200/372] current loss 4.14, ppl 62.87, throughput 621.83 samples/s, lr 30.00
[Epoch 265] throughput 42729.50 samples/s
[Epoch 265] time cost 51.72s, valid loss 4.40, valid ppl 81.13, lr 30.00
[Epoch 265] test loss 4.34, test ppl 77.06
[Epoch 266 Batch 200/372] current loss 4.14, ppl 62.53, throughput 626.44 samples/s, lr 25.71
[Epoch 266] throughput 42865.14 samples/s
[Epoch 266] time cost 51.68s, valid loss 4.40, valid ppl 81.12, lr 30.00
[Epoch 266] test loss 4.34, test ppl 77.06
[Epoch 267 Batch 200/372] current loss 4.13, ppl 62.34, throughput 652.40 samples/s, lr 30.00
[Epoch 267] throughput 43327.29 samples/s
[Epoch 267] time cost 51.17s, valid loss 4.40, valid ppl 81.11, lr 30.00
[Epoch 267] test loss 4.34, test ppl 77.05
[Epoch 268 Batch 200/372] current loss 4.15, ppl 63.16, throughput 642.44 samples/s, lr 14.57
[Epoch 268] throughput 42956.02 samples/s
[Epoch 268] time cost 51.50s, valid loss 4.40, valid ppl 81.10, lr 30.00
[Epoch 268] test loss 4.34, test ppl 77.04
[Epoch 269 Batch 200/372] current loss 4.12, ppl 61.69, throughput 627.18 samples/s, lr 31.29
[Epoch 269] throughput 42555.13 samples/s
[Epoch 269] time cost 52.03s, valid loss 4.40, valid ppl 81.10, lr 30.00
[Epoch 269] test loss 4.34, test ppl 77.04
[Epoch 270 Batch 200/372] current loss 4.13, ppl 61.97, throughput 624.03 samples/s, lr 30.43
[Epoch 270] throughput 42626.75 samples/s
[Epoch 270] time cost 51.85s, valid loss 4.40, valid ppl 81.09, lr 30.00
[Epoch 270] test loss 4.34, test ppl 77.03
[Epoch 271 Batch 200/372] current loss 4.14, ppl 62.73, throughput 621.81 samples/s, lr 28.71
[Epoch 271] throughput 42439.37 samples/s
[Epoch 271] time cost 52.06s, valid loss 4.40, valid ppl 81.08, lr 30.00
[Epoch 271] test loss 4.34, test ppl 77.02
[Epoch 272 Batch 200/372] current loss 4.14, ppl 62.52, throughput 621.89 samples/s, lr 27.43
[Epoch 272] throughput 42899.59 samples/s
[Epoch 272] time cost 51.55s, valid loss 4.40, valid ppl 81.07, lr 30.00
[Epoch 272] test loss 4.34, test ppl 77.02
[Epoch 273 Batch 200/372] current loss 4.14, ppl 62.53, throughput 604.85 samples/s, lr 31.71
[Epoch 273] throughput 41864.17 samples/s
[Epoch 273] time cost 52.86s, valid loss 4.40, valid ppl 81.06, lr 30.00
[Epoch 273] test loss 4.34, test ppl 77.01
[Epoch 274 Batch 200/372] current loss 4.13, ppl 62.28, throughput 640.50 samples/s, lr 13.29
[Epoch 274] throughput 43562.63 samples/s
[Epoch 274] time cost 50.87s, valid loss 4.40, valid ppl 81.06, lr 30.00
[Epoch 274] test loss 4.34, test ppl 77.00
[Epoch 275 Batch 200/372] current loss 4.12, ppl 61.51, throughput 623.50 samples/s, lr 30.86
[Epoch 275] throughput 42098.65 samples/s
[Epoch 275] time cost 52.53s, valid loss 4.40, valid ppl 81.05, lr 30.00
[Epoch 275] test loss 4.34, test ppl 77.00
[Epoch 276 Batch 200/372] current loss 4.13, ppl 62.44, throughput 619.59 samples/s, lr 28.71
[Epoch 276] throughput 42379.89 samples/s
[Epoch 276] time cost 52.19s, valid loss 4.39, valid ppl 81.04, lr 30.00
[Epoch 276] test loss 4.34, test ppl 76.99
[Epoch 277 Batch 200/372] current loss 4.13, ppl 62.05, throughput 627.50 samples/s, lr 30.86
[Epoch 277] throughput 42214.02 samples/s
[Epoch 277] time cost 52.38s, valid loss 4.39, valid ppl 81.03, lr 30.00
[Epoch 277] test loss 4.34, test ppl 76.98
[Epoch 278 Batch 200/372] current loss 4.12, ppl 61.78, throughput 611.30 samples/s, lr 29.57
[Epoch 278] throughput 42050.19 samples/s
[Epoch 278] time cost 52.60s, valid loss 4.39, valid ppl 81.03, lr 30.00
[Epoch 278] test loss 4.34, test ppl 76.98
[Epoch 279 Batch 200/372] current loss 4.12, ppl 61.87, throughput 640.16 samples/s, lr 31.71
[Epoch 279] throughput 42850.21 samples/s
[Epoch 279] time cost 51.71s, valid loss 4.39, valid ppl 81.02, lr 30.00
[Epoch 279] test loss 4.34, test ppl 76.97
[Epoch 280 Batch 200/372] current loss 4.13, ppl 61.89, throughput 616.53 samples/s, lr 31.29
[Epoch 280] throughput 42454.84 samples/s
[Epoch 280] time cost 52.12s, valid loss 4.39, valid ppl 81.01, lr 30.00
[Epoch 280] test loss 4.34, test ppl 76.96
[Epoch 281 Batch 200/372] current loss 4.12, ppl 61.66, throughput 596.62 samples/s, lr 31.71
[Epoch 281] throughput 41503.15 samples/s
[Epoch 281] time cost 53.22s, valid loss 4.39, valid ppl 81.01, lr 30.00
[Epoch 281] test loss 4.34, test ppl 76.96
[Epoch 282 Batch 200/372] current loss 4.12, ppl 61.82, throughput 630.12 samples/s, lr 27.43
[Epoch 282] throughput 42782.22 samples/s
[Epoch 282] time cost 51.70s, valid loss 4.39, valid ppl 81.00, lr 30.00
[Epoch 282] test loss 4.34, test ppl 76.95
[Epoch 283 Batch 200/372] current loss 4.12, ppl 61.86, throughput 634.45 samples/s, lr 31.71
[Epoch 283] throughput 42256.66 samples/s
[Epoch 283] time cost 52.33s, valid loss 4.39, valid ppl 80.99, lr 30.00
[Epoch 283] test loss 4.34, test ppl 76.95
[Epoch 284 Batch 200/372] current loss 4.12, ppl 61.41, throughput 616.36 samples/s, lr 28.29
[Epoch 284] throughput 42564.80 samples/s
[Epoch 284] time cost 51.87s, valid loss 4.39, valid ppl 80.98, lr 30.00
[Epoch 284] test loss 4.34, test ppl 76.94
[Epoch 285 Batch 200/372] current loss 4.11, ppl 60.98, throughput 630.10 samples/s, lr 31.71
[Epoch 285] throughput 42967.96 samples/s
[Epoch 285] time cost 51.45s, valid loss 4.39, valid ppl 80.98, lr 30.00
[Epoch 285] test loss 4.34, test ppl 76.93
[Epoch 286 Batch 200/372] current loss 4.12, ppl 61.46, throughput 622.99 samples/s, lr 15.00
[Epoch 286] throughput 42406.20 samples/s
[Epoch 286] time cost 52.09s, valid loss 4.39, valid ppl 80.97, lr 30.00
[Epoch 286] test loss 4.34, test ppl 76.93
[Epoch 287 Batch 200/372] current loss 4.12, ppl 61.39, throughput 620.27 samples/s, lr 30.00
[Epoch 287] throughput 42115.20 samples/s
[Epoch 287] time cost 52.53s, valid loss 4.39, valid ppl 80.96, lr 30.00
[Epoch 287] test loss 4.34, test ppl 76.92
[Epoch 288 Batch 200/372] current loss 4.12, ppl 61.56, throughput 626.76 samples/s, lr 26.14
[Epoch 288] throughput 42261.28 samples/s
[Epoch 288] time cost 52.37s, valid loss 4.39, valid ppl 80.96, lr 30.00
[Epoch 288] test loss 4.34, test ppl 76.92
[Epoch 289 Batch 200/372] current loss 4.12, ppl 61.30, throughput 638.29 samples/s, lr 27.00
[Epoch 289] throughput 43021.62 samples/s
[Epoch 289] time cost 51.39s, valid loss 4.39, valid ppl 80.95, lr 30.00
[Epoch 289] test loss 4.34, test ppl 76.91
[Epoch 290 Batch 200/372] current loss 4.12, ppl 61.53, throughput 614.38 samples/s, lr 31.29
[Epoch 290] throughput 42522.66 samples/s
[Epoch 290] time cost 52.00s, valid loss 4.39, valid ppl 80.94, lr 30.00
[Epoch 290] test loss 4.34, test ppl 76.90
[Epoch 291 Batch 200/372] current loss 4.13, ppl 62.08, throughput 644.72 samples/s, lr 29.14
[Epoch 291] throughput 43059.32 samples/s
[Epoch 291] time cost 51.38s, valid loss 4.39, valid ppl 80.93, lr 30.00
[Epoch 291] test loss 4.34, test ppl 76.90
[Epoch 292 Batch 200/372] current loss 4.12, ppl 61.29, throughput 645.53 samples/s, lr 27.86
[Epoch 292] throughput 42937.94 samples/s
[Epoch 292] time cost 51.61s, valid loss 4.39, valid ppl 80.93, lr 30.00
[Epoch 292] test loss 4.34, test ppl 76.89
[Epoch 293 Batch 200/372] current loss 4.11, ppl 60.83, throughput 613.42 samples/s, lr 30.86
[Epoch 293] throughput 42133.12 samples/s
[Epoch 293] time cost 52.42s, valid loss 4.39, valid ppl 80.92, lr 30.00
[Epoch 293] test loss 4.34, test ppl 76.89
[Epoch 294 Batch 200/372] current loss 4.13, ppl 61.97, throughput 611.90 samples/s, lr 19.29
[Epoch 294] throughput 42185.62 samples/s
[Epoch 294] time cost 52.43s, valid loss 4.39, valid ppl 80.91, lr 30.00
[Epoch 294] test loss 4.34, test ppl 76.88
[Epoch 295 Batch 200/372] current loss 4.13, ppl 61.99, throughput 642.37 samples/s, lr 17.14
[Epoch 295] throughput 42551.03 samples/s
[Epoch 295] time cost 52.05s, valid loss 4.39, valid ppl 80.91, lr 30.00
[Epoch 295] test loss 4.34, test ppl 76.87
[Epoch 296 Batch 200/372] current loss 4.11, ppl 61.21, throughput 621.93 samples/s, lr 32.57
[Epoch 296] throughput 42460.45 samples/s
[Epoch 296] time cost 52.12s, valid loss 4.39, valid ppl 80.90, lr 30.00
[Epoch 296] test loss 4.34, test ppl 76.87
[Epoch 297 Batch 200/372] current loss 4.12, ppl 61.42, throughput 641.52 samples/s, lr 35.14
[Epoch 297] throughput 43085.62 samples/s
[Epoch 297] time cost 51.34s, valid loss 4.39, valid ppl 80.90, lr 30.00
[Epoch 297] test loss 4.34, test ppl 76.86
[Epoch 298 Batch 200/372] current loss 4.11, ppl 61.07, throughput 636.38 samples/s, lr 28.29
[Epoch 298] throughput 42990.36 samples/s
[Epoch 298] time cost 51.46s, valid loss 4.39, valid ppl 80.89, lr 30.00
[Epoch 298] test loss 4.34, test ppl 76.86
[Epoch 299 Batch 200/372] current loss 4.12, ppl 61.73, throughput 633.14 samples/s, lr 31.71
[Epoch 299] throughput 43216.91 samples/s
[Epoch 299] time cost 51.25s, valid loss 4.39, valid ppl 80.89, lr 30.00
[Epoch 299] test loss 4.34, test ppl 76.85
[Epoch 300 Batch 200/372] current loss 4.11, ppl 61.13, throughput 633.61 samples/s, lr 13.71
[Epoch 300] throughput 42290.50 samples/s
[Epoch 300] time cost 52.23s, valid loss 4.39, valid ppl 80.88, lr 30.00
[Epoch 300] test loss 4.34, test ppl 76.85
[Epoch 301 Batch 200/372] current loss 4.12, ppl 61.71, throughput 629.09 samples/s, lr 30.43
[Epoch 301] throughput 42673.90 samples/s
[Epoch 301] time cost 51.84s, valid loss 4.39, valid ppl 80.87, lr 30.00
[Epoch 301] test loss 4.34, test ppl 76.84
[Epoch 302 Batch 200/372] current loss 4.10, ppl 60.16, throughput 630.63 samples/s, lr 27.86
[Epoch 302] throughput 42463.54 samples/s
[Epoch 302] time cost 52.10s, valid loss 4.39, valid ppl 80.87, lr 30.00
[Epoch 302] test loss 4.34, test ppl 76.84
[Epoch 303 Batch 200/372] current loss 4.11, ppl 60.76, throughput 628.48 samples/s, lr 27.43
[Epoch 303] throughput 42467.85 samples/s
[Epoch 303] time cost 52.09s, valid loss 4.39, valid ppl 80.86, lr 30.00
[Epoch 303] test loss 4.34, test ppl 76.83
[Epoch 304 Batch 200/372] current loss 4.10, ppl 60.48, throughput 634.76 samples/s, lr 34.71
[Epoch 304] throughput 42417.57 samples/s
[Epoch 304] time cost 52.07s, valid loss 4.39, valid ppl 80.86, lr 30.00
[Epoch 304] test loss 4.34, test ppl 76.83
[Epoch 305 Batch 200/372] current loss 4.11, ppl 60.94, throughput 631.83 samples/s, lr 31.29
[Epoch 305] throughput 42375.97 samples/s
[Epoch 305] time cost 52.13s, valid loss 4.39, valid ppl 80.85, lr 30.00
[Epoch 305] test loss 4.34, test ppl 76.82
[Epoch 306 Batch 200/372] current loss 4.10, ppl 60.44, throughput 618.18 samples/s, lr 30.00
[Epoch 306] throughput 42055.16 samples/s
[Epoch 306] time cost 52.63s, valid loss 4.39, valid ppl 80.85, lr 30.00
[Epoch 306] test loss 4.34, test ppl 76.82
[Epoch 307 Batch 200/372] current loss 4.11, ppl 60.89, throughput 631.67 samples/s, lr 30.86
[Epoch 307] throughput 42972.11 samples/s
[Epoch 307] time cost 51.44s, valid loss 4.39, valid ppl 80.84, lr 30.00
[Epoch 307] test loss 4.34, test ppl 76.81
[Epoch 308 Batch 200/372] current loss 4.11, ppl 60.67, throughput 609.32 samples/s, lr 32.57
[Epoch 308] throughput 41911.96 samples/s
[Epoch 308] time cost 52.70s, valid loss 4.39, valid ppl 80.83, lr 30.00
[Epoch 308] test loss 4.34, test ppl 76.81
[Epoch 309 Batch 200/372] current loss 4.11, ppl 61.22, throughput 634.18 samples/s, lr 27.86
[Epoch 309] throughput 42888.48 samples/s
[Epoch 309] time cost 51.56s, valid loss 4.39, valid ppl 80.83, lr 30.00
[Epoch 309] test loss 4.34, test ppl 76.80
[Epoch 310 Batch 200/372] current loss 4.09, ppl 60.01, throughput 629.79 samples/s, lr 30.43
[Epoch 310] throughput 42976.47 samples/s
[Epoch 310] time cost 51.55s, valid loss 4.39, valid ppl 80.82, lr 30.00
[Epoch 310] test loss 4.34, test ppl 76.80
[Epoch 311 Batch 200/372] current loss 4.11, ppl 60.87, throughput 622.70 samples/s, lr 28.29
[Epoch 311] throughput 42848.75 samples/s
[Epoch 311] time cost 51.61s, valid loss 4.39, valid ppl 80.82, lr 30.00
[Epoch 311] test loss 4.34, test ppl 76.79
[Epoch 312 Batch 200/372] current loss 4.10, ppl 60.41, throughput 639.88 samples/s, lr 30.00
[Epoch 312] throughput 42804.98 samples/s
[Epoch 312] time cost 51.71s, valid loss 4.39, valid ppl 80.81, lr 30.00
[Epoch 312] test loss 4.34, test ppl 76.79
[Epoch 313 Batch 200/372] current loss 4.10, ppl 60.31, throughput 620.76 samples/s, lr 32.14
[Epoch 313] throughput 42264.05 samples/s
[Epoch 313] time cost 52.31s, valid loss 4.39, valid ppl 80.80, lr 30.00
[Epoch 313] test loss 4.34, test ppl 76.79
[Epoch 314 Batch 200/372] current loss 4.10, ppl 60.50, throughput 629.52 samples/s, lr 31.29
[Epoch 314] throughput 42823.09 samples/s
[Epoch 314] time cost 51.66s, valid loss 4.39, valid ppl 80.80, lr 30.00
[Epoch 314] test loss 4.34, test ppl 76.78
[Epoch 315 Batch 200/372] current loss 4.11, ppl 60.98, throughput 623.08 samples/s, lr 29.57
[Epoch 315] throughput 42535.89 samples/s
[Epoch 315] time cost 52.01s, valid loss 4.39, valid ppl 80.79, lr 30.00
[Epoch 315] test loss 4.34, test ppl 76.78
[Epoch 316 Batch 200/372] current loss 4.11, ppl 60.79, throughput 614.05 samples/s, lr 27.43
[Epoch 316] throughput 43142.39 samples/s
[Epoch 316] time cost 51.32s, valid loss 4.39, valid ppl 80.79, lr 30.00
[Epoch 316] test loss 4.34, test ppl 76.77
[Epoch 317 Batch 200/372] current loss 4.09, ppl 59.93, throughput 633.19 samples/s, lr 29.14
[Epoch 317] throughput 42989.82 samples/s
[Epoch 317] time cost 51.45s, valid loss 4.39, valid ppl 80.78, lr 30.00
[Epoch 317] test loss 4.34, test ppl 76.77
[Epoch 318 Batch 200/372] current loss 4.11, ppl 61.04, throughput 615.60 samples/s, lr 29.14
[Epoch 318] throughput 42337.47 samples/s
[Epoch 318] time cost 52.28s, valid loss 4.39, valid ppl 80.78, lr 30.00
[Epoch 318] test loss 4.34, test ppl 76.76
[Epoch 319 Batch 200/372] current loss 4.10, ppl 60.18, throughput 639.19 samples/s, lr 33.43
[Epoch 319] throughput 43340.49 samples/s
[Epoch 319] time cost 51.10s, valid loss 4.39, valid ppl 80.77, lr 30.00
[Epoch 319] test loss 4.34, test ppl 76.76
[Epoch 320 Batch 200/372] current loss 4.09, ppl 60.03, throughput 631.52 samples/s, lr 29.57
[Epoch 320] throughput 42547.11 samples/s
[Epoch 320] time cost 51.95s, valid loss 4.39, valid ppl 80.77, lr 30.00
[Epoch 320] test loss 4.34, test ppl 76.76
[Epoch 321 Batch 200/372] current loss 4.11, ppl 60.79, throughput 600.73 samples/s, lr 31.29
[Epoch 321] throughput 41467.28 samples/s
[Epoch 321] time cost 53.32s, valid loss 4.39, valid ppl 80.76, lr 30.00
[Epoch 321] test loss 4.34, test ppl 76.75
[Epoch 322 Batch 200/372] current loss 4.10, ppl 60.64, throughput 631.98 samples/s, lr 29.14
[Epoch 322] throughput 43558.85 samples/s
[Epoch 322] time cost 50.83s, valid loss 4.39, valid ppl 80.75, lr 30.00
[Epoch 322] test loss 4.34, test ppl 76.75
[Epoch 323 Batch 200/372] current loss 4.11, ppl 61.24, throughput 618.93 samples/s, lr 30.00
[Epoch 323] throughput 42905.30 samples/s
[Epoch 323] time cost 51.52s, valid loss 4.39, valid ppl 80.75, lr 30.00
[Epoch 323] test loss 4.34, test ppl 76.74
[Epoch 324 Batch 200/372] current loss 4.10, ppl 60.51, throughput 620.99 samples/s, lr 30.00
[Epoch 324] throughput 42331.82 samples/s
[Epoch 324] time cost 52.23s, valid loss 4.39, valid ppl 80.74, lr 30.00
[Epoch 324] test loss 4.34, test ppl 76.74
[Epoch 325 Batch 200/372] current loss 4.10, ppl 60.28, throughput 619.95 samples/s, lr 30.43
[Epoch 325] throughput 42356.47 samples/s
[Epoch 325] time cost 52.23s, valid loss 4.39, valid ppl 80.74, lr 30.00
[Epoch 325] test loss 4.34, test ppl 76.74
[Epoch 326 Batch 200/372] current loss 4.11, ppl 60.94, throughput 610.23 samples/s, lr 29.57
[Epoch 326] throughput 41973.17 samples/s
[Epoch 326] time cost 52.67s, valid loss 4.39, valid ppl 80.73, lr 30.00
[Epoch 326] test loss 4.34, test ppl 76.73
[Epoch 327 Batch 200/372] current loss 4.11, ppl 60.75, throughput 636.41 samples/s, lr 15.43
[Epoch 327] throughput 42778.09 samples/s
[Epoch 327] time cost 51.69s, valid loss 4.39, valid ppl 80.73, lr 30.00
[Epoch 327] test loss 4.34, test ppl 76.73
[Epoch 328 Batch 200/372] current loss 4.10, ppl 60.42, throughput 627.64 samples/s, lr 26.14
[Epoch 328] throughput 42501.38 samples/s
[Epoch 328] time cost 52.00s, valid loss 4.39, valid ppl 80.72, lr 30.00
[Epoch 328] test loss 4.34, test ppl 76.72
[Epoch 329 Batch 200/372] current loss 4.11, ppl 60.74, throughput 625.15 samples/s, lr 33.00
[Epoch 329] throughput 42626.80 samples/s
[Epoch 329] time cost 51.89s, valid loss 4.39, valid ppl 80.71, lr 30.00
[Epoch 329] test loss 4.34, test ppl 76.72
[Epoch 330 Batch 200/372] current loss 4.10, ppl 60.51, throughput 636.84 samples/s, lr 27.86
[Epoch 330] throughput 43109.48 samples/s
[Epoch 330] time cost 51.33s, valid loss 4.39, valid ppl 80.71, lr 30.00
[Epoch 330] test loss 4.34, test ppl 76.72
[Epoch 331 Batch 200/372] current loss 4.10, ppl 60.27, throughput 601.38 samples/s, lr 31.29
[Epoch 331] throughput 41922.44 samples/s
[Epoch 331] time cost 52.68s, valid loss 4.39, valid ppl 80.70, lr 30.00
[Epoch 331] test loss 4.34, test ppl 76.71
[Epoch 332 Batch 200/372] current loss 4.10, ppl 60.26, throughput 631.63 samples/s, lr 24.43
[Epoch 332] throughput 42551.05 samples/s
[Epoch 332] time cost 51.99s, valid loss 4.39, valid ppl 80.70, lr 30.00
[Epoch 332] test loss 4.34, test ppl 76.71
[Epoch 333 Batch 200/372] current loss 4.10, ppl 60.16, throughput 625.69 samples/s, lr 29.57
[Epoch 333] throughput 42453.09 samples/s
[Epoch 333] time cost 52.03s, valid loss 4.39, valid ppl 80.69, lr 30.00
[Epoch 333] test loss 4.34, test ppl 76.71
[Epoch 334 Batch 200/372] current loss 4.10, ppl 60.54, throughput 626.84 samples/s, lr 28.29
[Epoch 334] throughput 42375.86 samples/s
[Epoch 334] time cost 52.25s, valid loss 4.39, valid ppl 80.69, lr 30.00
[Epoch 334] test loss 4.34, test ppl 76.70
[Epoch 335 Batch 200/372] current loss 4.10, ppl 60.25, throughput 634.10 samples/s, lr 27.86
[Epoch 335] throughput 42903.68 samples/s
[Epoch 335] time cost 51.62s, valid loss 4.39, valid ppl 80.68, lr 30.00
[Epoch 335] test loss 4.34, test ppl 76.70
[Epoch 336 Batch 200/372] current loss 4.11, ppl 60.76, throughput 625.27 samples/s, lr 30.43
[Epoch 336] throughput 42608.83 samples/s
[Epoch 336] time cost 51.87s, valid loss 4.39, valid ppl 80.68, lr 30.00
[Epoch 336] test loss 4.34, test ppl 76.70
[Epoch 337 Batch 200/372] current loss 4.09, ppl 59.70, throughput 627.85 samples/s, lr 33.86
[Epoch 337] throughput 43391.92 samples/s
[Epoch 337] time cost 51.13s, valid loss 4.39, valid ppl 80.67, lr 30.00
[Epoch 337] test loss 4.34, test ppl 76.69
[Epoch 338 Batch 200/372] current loss 4.10, ppl 60.17, throughput 609.74 samples/s, lr 29.57
[Epoch 338] throughput 41625.14 samples/s
[Epoch 338] time cost 53.14s, valid loss 4.39, valid ppl 80.67, lr 30.00
[Epoch 338] test loss 4.34, test ppl 76.69
[Epoch 339 Batch 200/372] current loss 4.10, ppl 60.19, throughput 620.89 samples/s, lr 30.43
[Epoch 339] throughput 42386.32 samples/s
[Epoch 339] time cost 52.09s, valid loss 4.39, valid ppl 80.66, lr 30.00
[Epoch 339] test loss 4.34, test ppl 76.68
[Epoch 340 Batch 200/372] current loss 4.10, ppl 60.26, throughput 610.21 samples/s, lr 33.00
[Epoch 340] throughput 42654.14 samples/s
[Epoch 340] time cost 51.91s, valid loss 4.39, valid ppl 80.66, lr 30.00
[Epoch 340] test loss 4.34, test ppl 76.68
[Epoch 341 Batch 200/372] current loss 4.09, ppl 59.91, throughput 620.68 samples/s, lr 30.43
[Epoch 341] throughput 42753.95 samples/s
[Epoch 341] time cost 51.71s, valid loss 4.39, valid ppl 80.65, lr 30.00
[Epoch 341] test loss 4.34, test ppl 76.68
[Epoch 342 Batch 200/372] current loss 4.10, ppl 60.53, throughput 628.65 samples/s, lr 32.14
[Epoch 342] throughput 42950.92 samples/s
[Epoch 342] time cost 51.47s, valid loss 4.39, valid ppl 80.65, lr 30.00
[Epoch 342] test loss 4.34, test ppl 76.67
[Epoch 343 Batch 200/372] current loss 4.10, ppl 60.04, throughput 627.59 samples/s, lr 34.29
[Epoch 343] throughput 41896.20 samples/s
[Epoch 343] time cost 52.74s, valid loss 4.39, valid ppl 80.64, lr 30.00
[Epoch 343] test loss 4.34, test ppl 76.67
[Epoch 344 Batch 200/372] current loss 4.10, ppl 60.10, throughput 648.61 samples/s, lr 33.86
[Epoch 344] throughput 42966.19 samples/s
[Epoch 344] time cost 51.58s, valid loss 4.39, valid ppl 80.64, lr 30.00
[Epoch 344] test loss 4.34, test ppl 76.67
[Epoch 345 Batch 200/372] current loss 4.09, ppl 59.71, throughput 626.92 samples/s, lr 32.14
[Epoch 345] throughput 43436.88 samples/s
[Epoch 345] time cost 50.96s, valid loss 4.39, valid ppl 80.63, lr 30.00
[Epoch 345] test loss 4.34, test ppl 76.66
[Epoch 346 Batch 200/372] current loss 4.10, ppl 60.43, throughput 624.13 samples/s, lr 29.14
[Epoch 346] throughput 42759.95 samples/s
[Epoch 346] time cost 51.69s, valid loss 4.39, valid ppl 80.63, lr 30.00
[Epoch 346] test loss 4.34, test ppl 76.66
[Epoch 347 Batch 200/372] current loss 4.09, ppl 59.91, throughput 614.27 samples/s, lr 31.71
[Epoch 347] throughput 42846.98 samples/s
[Epoch 347] time cost 51.66s, valid loss 4.39, valid ppl 80.63, lr 30.00
[Epoch 347] test loss 4.34, test ppl 76.66
[Epoch 348 Batch 200/372] current loss 4.09, ppl 59.74, throughput 606.64 samples/s, lr 30.43
[Epoch 348] throughput 42629.77 samples/s
[Epoch 348] time cost 51.83s, valid loss 4.39, valid ppl 80.62, lr 30.00
[Epoch 348] test loss 4.34, test ppl 76.65
[Epoch 349 Batch 200/372] current loss 4.09, ppl 59.67, throughput 622.17 samples/s, lr 30.00
[Epoch 349] throughput 42310.37 samples/s
[Epoch 349] time cost 52.28s, valid loss 4.39, valid ppl 80.62, lr 30.00
[Epoch 349] test loss 4.34, test ppl 76.65
[Epoch 350 Batch 200/372] current loss 4.09, ppl 59.85, throughput 616.75 samples/s, lr 29.57
[Epoch 350] throughput 42379.00 samples/s
[Epoch 350] time cost 52.21s, valid loss 4.39, valid ppl 80.62, lr 30.00
[Epoch 350] test loss 4.34, test ppl 76.65
[Epoch 351 Batch 200/372] current loss 4.09, ppl 59.91, throughput 622.19 samples/s, lr 31.29
[Epoch 351] throughput 42752.51 samples/s
[Epoch 351] time cost 51.75s, valid loss 4.39, valid ppl 80.61, lr 30.00
[Epoch 351] test loss 4.34, test ppl 76.64
[Epoch 352 Batch 200/372] current loss 4.09, ppl 59.58, throughput 616.76 samples/s, lr 32.57
[Epoch 352] throughput 41902.68 samples/s
[Epoch 352] time cost 52.67s, valid loss 4.39, valid ppl 80.61, lr 30.00
[Epoch 352] test loss 4.34, test ppl 76.64
[Epoch 353 Batch 200/372] current loss 4.08, ppl 59.35, throughput 621.01 samples/s, lr 32.57
[Epoch 353] throughput 41492.78 samples/s
[Epoch 353] time cost 53.19s, valid loss 4.39, valid ppl 80.60, lr 30.00
[Epoch 353] test loss 4.34, test ppl 76.64
[Epoch 354 Batch 200/372] current loss 4.08, ppl 59.05, throughput 608.47 samples/s, lr 28.71
[Epoch 354] throughput 42477.03 samples/s
[Epoch 354] time cost 51.99s, valid loss 4.39, valid ppl 80.60, lr 30.00
[Epoch 354] test loss 4.34, test ppl 76.63
[Epoch 355 Batch 200/372] current loss 4.09, ppl 60.00, throughput 624.13 samples/s, lr 33.86
[Epoch 355] throughput 41733.19 samples/s
[Epoch 355] time cost 52.97s, valid loss 4.39, valid ppl 80.60, lr 30.00
[Epoch 355] test loss 4.34, test ppl 76.63
[Epoch 356 Batch 200/372] current loss 4.09, ppl 59.83, throughput 621.75 samples/s, lr 29.14
[Epoch 356] throughput 42738.76 samples/s
[Epoch 356] time cost 51.72s, valid loss 4.39, valid ppl 80.59, lr 30.00
[Epoch 356] test loss 4.34, test ppl 76.63
[Epoch 357 Batch 200/372] current loss 4.09, ppl 59.58, throughput 622.30 samples/s, lr 13.29
[Epoch 357] throughput 42468.11 samples/s
[Epoch 357] time cost 52.04s, valid loss 4.39, valid ppl 80.59, lr 30.00
[Epoch 357] test loss 4.34, test ppl 76.62
[Epoch 358 Batch 200/372] current loss 4.08, ppl 59.31, throughput 615.13 samples/s, lr 27.00
[Epoch 358] throughput 42572.66 samples/s
[Epoch 358] time cost 52.00s, valid loss 4.39, valid ppl 80.59, lr 30.00
[Epoch 358] test loss 4.34, test ppl 76.62
[Epoch 359 Batch 200/372] current loss 4.08, ppl 59.37, throughput 621.29 samples/s, lr 33.86
[Epoch 359] throughput 42523.89 samples/s
[Epoch 359] time cost 52.07s, valid loss 4.39, valid ppl 80.58, lr 30.00
[Epoch 359] test loss 4.34, test ppl 76.61
[Epoch 360 Batch 200/372] current loss 4.09, ppl 60.03, throughput 605.53 samples/s, lr 30.43
[Epoch 360] throughput 41767.87 samples/s
[Epoch 360] time cost 52.84s, valid loss 4.39, valid ppl 80.58, lr 30.00
[Epoch 360] test loss 4.34, test ppl 76.61
[Epoch 361 Batch 200/372] current loss 4.08, ppl 59.38, throughput 607.73 samples/s, lr 29.14
[Epoch 361] throughput 41795.15 samples/s
[Epoch 361] time cost 52.91s, valid loss 4.39, valid ppl 80.57, lr 30.00
[Epoch 361] test loss 4.34, test ppl 76.61
[Epoch 362 Batch 200/372] current loss 4.09, ppl 59.48, throughput 621.28 samples/s, lr 27.00
[Epoch 362] throughput 43025.43 samples/s
[Epoch 362] time cost 51.38s, valid loss 4.39, valid ppl 80.57, lr 30.00
[Epoch 362] test loss 4.34, test ppl 76.60
[Epoch 363 Batch 200/372] current loss 4.09, ppl 59.64, throughput 622.15 samples/s, lr 27.00
[Epoch 363] throughput 42694.62 samples/s
[Epoch 363] time cost 51.98s, valid loss 4.39, valid ppl 80.57, lr 30.00
[Epoch 363] test loss 4.34, test ppl 76.60
[Epoch 364 Batch 200/372] current loss 4.09, ppl 59.74, throughput 633.68 samples/s, lr 33.00
[Epoch 364] throughput 43760.68 samples/s
[Epoch 364] time cost 50.67s, valid loss 4.39, valid ppl 80.56, lr 30.00
[Epoch 364] test loss 4.34, test ppl 76.60
[Epoch 365 Batch 200/372] current loss 4.08, ppl 59.05, throughput 646.12 samples/s, lr 27.43
[Epoch 365] throughput 43279.59 samples/s
[Epoch 365] time cost 51.09s, valid loss 4.39, valid ppl 80.56, lr 30.00
[Epoch 365] test loss 4.34, test ppl 76.59
[Epoch 366 Batch 200/372] current loss 4.09, ppl 60.04, throughput 611.11 samples/s, lr 31.71
[Epoch 366] throughput 43263.38 samples/s
[Epoch 366] time cost 51.19s, valid loss 4.39, valid ppl 80.56, lr 30.00
[Epoch 366] test loss 4.34, test ppl 76.59
[Epoch 367 Batch 200/372] current loss 4.09, ppl 59.54, throughput 634.75 samples/s, lr 29.14
[Epoch 367] throughput 42919.34 samples/s
[Epoch 367] time cost 51.51s, valid loss 4.39, valid ppl 80.55, lr 30.00
[Epoch 367] test loss 4.34, test ppl 76.59
[Epoch 368 Batch 200/372] current loss 4.08, ppl 59.14, throughput 628.47 samples/s, lr 27.86
[Epoch 368] throughput 42052.20 samples/s
[Epoch 368] time cost 52.50s, valid loss 4.39, valid ppl 80.55, lr 30.00
[Epoch 368] test loss 4.34, test ppl 76.59
[Epoch 369 Batch 200/372] current loss 4.09, ppl 59.56, throughput 631.61 samples/s, lr 26.57
[Epoch 369] throughput 43031.85 samples/s
[Epoch 369] time cost 51.46s, valid loss 4.39, valid ppl 80.55, lr 30.00
[Epoch 369] test loss 4.34, test ppl 76.58
[Epoch 370 Batch 200/372] current loss 4.09, ppl 59.72, throughput 636.57 samples/s, lr 32.14
[Epoch 370] throughput 42673.09 samples/s
[Epoch 370] time cost 51.88s, valid loss 4.39, valid ppl 80.54, lr 30.00
[Epoch 370] test loss 4.34, test ppl 76.58
[Epoch 371 Batch 200/372] current loss 4.10, ppl 60.06, throughput 628.27 samples/s, lr 27.86
[Epoch 371] throughput 42555.71 samples/s
[Epoch 371] time cost 51.95s, valid loss 4.39, valid ppl 80.54, lr 30.00
[Epoch 371] test loss 4.34, test ppl 76.58
[Epoch 372 Batch 200/372] current loss 4.08, ppl 59.19, throughput 633.01 samples/s, lr 29.57
[Epoch 372] throughput 42471.48 samples/s
[Epoch 372] time cost 52.05s, valid loss 4.39, valid ppl 80.54, lr 30.00
[Epoch 372] test loss 4.34, test ppl 76.57
[Epoch 373 Batch 200/372] current loss 4.09, ppl 59.97, throughput 610.08 samples/s, lr 30.43
[Epoch 373] throughput 42600.88 samples/s
[Epoch 373] time cost 51.91s, valid loss 4.39, valid ppl 80.53, lr 30.00
[Epoch 373] test loss 4.34, test ppl 76.57
[Epoch 374 Batch 200/372] current loss 4.09, ppl 60.02, throughput 642.53 samples/s, lr 29.57
[Epoch 374] throughput 43315.36 samples/s
[Epoch 374] time cost 51.16s, valid loss 4.39, valid ppl 80.53, lr 30.00
[Epoch 374] test loss 4.34, test ppl 76.57
[Epoch 375 Batch 200/372] current loss 4.08, ppl 59.07, throughput 618.25 samples/s, lr 27.43
[Epoch 375] throughput 42351.76 samples/s
[Epoch 375] time cost 52.15s, valid loss 4.39, valid ppl 80.53, lr 30.00
[Epoch 375] test loss 4.34, test ppl 76.57
[Epoch 376 Batch 200/372] current loss 4.08, ppl 59.15, throughput 642.54 samples/s, lr 28.29
[Epoch 376] throughput 42990.57 samples/s
[Epoch 376] time cost 51.47s, valid loss 4.39, valid ppl 80.52, lr 30.00
[Epoch 376] test loss 4.34, test ppl 76.56
[Epoch 377 Batch 200/372] current loss 4.07, ppl 58.53, throughput 622.93 samples/s, lr 32.57
[Epoch 377] throughput 42568.86 samples/s
[Epoch 377] time cost 51.93s, valid loss 4.39, valid ppl 80.52, lr 30.00
[Epoch 377] test loss 4.34, test ppl 76.56
[Epoch 378 Batch 200/372] current loss 4.07, ppl 58.70, throughput 606.38 samples/s, lr 30.43
[Epoch 378] throughput 42058.00 samples/s
[Epoch 378] time cost 52.53s, valid loss 4.39, valid ppl 80.52, lr 30.00
[Epoch 378] test loss 4.34, test ppl 76.56
[Epoch 379 Batch 200/372] current loss 4.08, ppl 59.09, throughput 635.60 samples/s, lr 31.29
[Epoch 379] throughput 43293.51 samples/s
[Epoch 379] time cost 51.09s, valid loss 4.39, valid ppl 80.51, lr 30.00
[Epoch 379] test loss 4.34, test ppl 76.55
[Epoch 380 Batch 200/372] current loss 4.09, ppl 59.55, throughput 623.29 samples/s, lr 28.71
[Epoch 380] throughput 43011.16 samples/s
[Epoch 380] time cost 51.45s, valid loss 4.39, valid ppl 80.51, lr 30.00
[Epoch 380] test loss 4.34, test ppl 76.55
[Epoch 381 Batch 200/372] current loss 4.07, ppl 58.76, throughput 616.47 samples/s, lr 33.00
[Epoch 381] throughput 43039.31 samples/s
[Epoch 381] time cost 51.38s, valid loss 4.39, valid ppl 80.51, lr 30.00
[Epoch 381] test loss 4.34, test ppl 76.55
[Epoch 382 Batch 200/372] current loss 4.07, ppl 58.77, throughput 633.01 samples/s, lr 28.71
[Epoch 382] throughput 42479.07 samples/s
[Epoch 382] time cost 52.00s, valid loss 4.39, valid ppl 80.51, lr 30.00
[Epoch 382] test loss 4.34, test ppl 76.54
[Epoch 383 Batch 200/372] current loss 4.08, ppl 59.28, throughput 642.64 samples/s, lr 32.14
[Epoch 383] throughput 43891.98 samples/s
[Epoch 383] time cost 50.47s, valid loss 4.39, valid ppl 80.50, lr 30.00
[Epoch 383] test loss 4.34, test ppl 76.54
[Epoch 384 Batch 200/372] current loss 4.07, ppl 58.30, throughput 624.93 samples/s, lr 30.00
[Epoch 384] throughput 42642.65 samples/s
[Epoch 384] time cost 51.90s, valid loss 4.39, valid ppl 80.50, lr 30.00
[Epoch 384] test loss 4.34, test ppl 76.54
[Epoch 385 Batch 200/372] current loss 4.09, ppl 59.48, throughput 637.45 samples/s, lr 29.14
[Epoch 385] throughput 43086.86 samples/s
[Epoch 385] time cost 51.33s, valid loss 4.39, valid ppl 80.50, lr 30.00
[Epoch 385] test loss 4.34, test ppl 76.54
[Epoch 386 Batch 200/372] current loss 4.08, ppl 58.98, throughput 626.15 samples/s, lr 15.00
[Epoch 386] throughput 43170.62 samples/s
[Epoch 386] time cost 51.31s, valid loss 4.39, valid ppl 80.49, lr 30.00
[Epoch 386] test loss 4.34, test ppl 76.53
[Epoch 387 Batch 200/372] current loss 4.08, ppl 59.29, throughput 634.12 samples/s, lr 30.86
[Epoch 387] throughput 43083.25 samples/s
[Epoch 387] time cost 51.30s, valid loss 4.39, valid ppl 80.49, lr 30.00
[Epoch 387] test loss 4.34, test ppl 76.53
[Epoch 388 Batch 200/372] current loss 4.08, ppl 59.12, throughput 622.28 samples/s, lr 28.29
[Epoch 388] throughput 42669.59 samples/s
[Epoch 388] time cost 51.90s, valid loss 4.39, valid ppl 80.49, lr 30.00
[Epoch 388] test loss 4.34, test ppl 76.53
[Epoch 389 Batch 200/372] current loss 4.08, ppl 59.40, throughput 630.54 samples/s, lr 26.57
[Epoch 389] throughput 42792.25 samples/s
[Epoch 389] time cost 51.66s, valid loss 4.39, valid ppl 80.48, lr 30.00
[Epoch 389] test loss 4.34, test ppl 76.52
[Epoch 390 Batch 200/372] current loss 4.07, ppl 58.78, throughput 636.33 samples/s, lr 29.14
[Epoch 390] throughput 42816.29 samples/s
[Epoch 390] time cost 51.74s, valid loss 4.39, valid ppl 80.48, lr 30.00
[Epoch 390] test loss 4.34, test ppl 76.52
[Epoch 391 Batch 200/372] current loss 4.08, ppl 59.17, throughput 631.30 samples/s, lr 28.71
[Epoch 391] throughput 42902.73 samples/s
[Epoch 391] time cost 51.53s, valid loss 4.39, valid ppl 80.48, lr 30.00
[Epoch 391] test loss 4.34, test ppl 76.52
[Epoch 392 Batch 200/372] current loss 4.08, ppl 59.00, throughput 620.75 samples/s, lr 31.71
[Epoch 392] throughput 42010.20 samples/s
[Epoch 392] time cost 52.55s, valid loss 4.39, valid ppl 80.47, lr 30.00
[Epoch 392] test loss 4.34, test ppl 76.52
[Epoch 393 Batch 200/372] current loss 4.07, ppl 58.52, throughput 622.66 samples/s, lr 33.86
[Epoch 393] throughput 42242.75 samples/s
[Epoch 393] time cost 52.38s, valid loss 4.39, valid ppl 80.47, lr 30.00
[Epoch 393] test loss 4.34, test ppl 76.51
[Epoch 394 Batch 200/372] current loss 4.08, ppl 58.85, throughput 646.20 samples/s, lr 28.29
[Epoch 394] throughput 43047.19 samples/s
[Epoch 394] time cost 51.47s, valid loss 4.39, valid ppl 80.47, lr 30.00
[Epoch 394] test loss 4.34, test ppl 76.51
[Epoch 395 Batch 200/372] current loss 4.05, ppl 57.45, throughput 614.16 samples/s, lr 31.29
[Epoch 395] throughput 42351.73 samples/s
[Epoch 395] time cost 52.21s, valid loss 4.39, valid ppl 80.46, lr 30.00
[Epoch 395] test loss 4.34, test ppl 76.51
[Epoch 396 Batch 200/372] current loss 4.08, ppl 59.28, throughput 629.53 samples/s, lr 33.43
[Epoch 396] throughput 42157.42 samples/s
[Epoch 396] time cost 52.45s, valid loss 4.39, valid ppl 80.46, lr 30.00
[Epoch 396] test loss 4.34, test ppl 76.50
[Epoch 397 Batch 200/372] current loss 4.07, ppl 58.47, throughput 606.17 samples/s, lr 30.43
[Epoch 397] throughput 42261.03 samples/s
[Epoch 397] time cost 52.32s, valid loss 4.39, valid ppl 80.46, lr 30.00
[Epoch 397] test loss 4.34, test ppl 76.50
[Epoch 398 Batch 200/372] current loss 4.06, ppl 58.20, throughput 631.69 samples/s, lr 31.29
[Epoch 398] throughput 43517.71 samples/s
[Epoch 398] time cost 50.92s, valid loss 4.39, valid ppl 80.46, lr 30.00
[Epoch 398] test loss 4.34, test ppl 76.50
[Epoch 399 Batch 200/372] current loss 4.08, ppl 59.21, throughput 627.09 samples/s, lr 27.86
[Epoch 399] throughput 43120.80 samples/s
[Epoch 399] time cost 51.32s, valid loss 4.39, valid ppl 80.45, lr 30.00
[Epoch 399] test loss 4.34, test ppl 76.50
[Epoch 400 Batch 200/372] current loss 4.07, ppl 58.30, throughput 645.01 samples/s, lr 31.71
[Epoch 400] throughput 43429.56 samples/s
[Epoch 400] time cost 50.95s, valid loss 4.39, valid ppl 80.45, lr 30.00
[Epoch 400] test loss 4.34, test ppl 76.49
[Epoch 401 Batch 200/372] current loss 4.07, ppl 58.30, throughput 637.75 samples/s, lr 29.14
[Epoch 401] throughput 42752.15 samples/s
[Epoch 401] time cost 51.80s, valid loss 4.39, valid ppl 80.45, lr 30.00
[Epoch 401] test loss 4.34, test ppl 76.49
[Epoch 402 Batch 200/372] current loss 4.07, ppl 58.51, throughput 625.83 samples/s, lr 28.29
[Epoch 402] throughput 43165.85 samples/s
[Epoch 402] time cost 51.21s, valid loss 4.39, valid ppl 80.44, lr 30.00
[Epoch 402] test loss 4.34, test ppl 76.49
[Epoch 403 Batch 200/372] current loss 4.07, ppl 58.27, throughput 622.04 samples/s, lr 33.00
[Epoch 403] throughput 43256.22 samples/s
[Epoch 403] time cost 51.23s, valid loss 4.39, valid ppl 80.44, lr 30.00
[Epoch 403] test loss 4.34, test ppl 76.49
[Epoch 404 Batch 200/372] current loss 4.07, ppl 58.51, throughput 622.60 samples/s, lr 30.00
[Epoch 404] throughput 41961.97 samples/s
[Epoch 404] time cost 52.76s, valid loss 4.39, valid ppl 80.44, lr 30.00
[Epoch 404] test loss 4.34, test ppl 76.48
[Epoch 405 Batch 200/372] current loss 4.06, ppl 58.05, throughput 636.18 samples/s, lr 26.57
[Epoch 405] throughput 42216.36 samples/s
[Epoch 405] time cost 52.47s, valid loss 4.39, valid ppl 80.44, lr 30.00
[Epoch 405] test loss 4.34, test ppl 76.48
[Epoch 406 Batch 200/372] current loss 4.08, ppl 59.01, throughput 614.48 samples/s, lr 30.86
[Epoch 406] throughput 42513.71 samples/s
[Epoch 406] time cost 52.06s, valid loss 4.39, valid ppl 80.43, lr 30.00
[Epoch 406] test loss 4.34, test ppl 76.48
[Epoch 407 Batch 200/372] current loss 4.07, ppl 58.50, throughput 593.44 samples/s, lr 32.57
[Epoch 407] throughput 41882.76 samples/s
[Epoch 407] time cost 52.76s, valid loss 4.39, valid ppl 80.43, lr 30.00
[Epoch 407] test loss 4.34, test ppl 76.48
[Epoch 408 Batch 200/372] current loss 4.09, ppl 59.53, throughput 638.47 samples/s, lr 33.86
[Epoch 408] throughput 43298.77 samples/s
[Epoch 408] time cost 51.10s, valid loss 4.39, valid ppl 80.43, lr 30.00
[Epoch 408] test loss 4.34, test ppl 76.47
[Epoch 409 Batch 200/372] current loss 4.07, ppl 58.50, throughput 611.43 samples/s, lr 27.86
[Epoch 409] throughput 41626.99 samples/s
[Epoch 409] time cost 53.10s, valid loss 4.39, valid ppl 80.42, lr 30.00
[Epoch 409] test loss 4.34, test ppl 76.47
[Epoch 410 Batch 200/372] current loss 4.07, ppl 58.37, throughput 631.60 samples/s, lr 30.86
[Epoch 410] throughput 42415.19 samples/s
[Epoch 410] time cost 52.18s, valid loss 4.39, valid ppl 80.42, lr 30.00
[Epoch 410] test loss 4.34, test ppl 76.47
[Epoch 411 Batch 200/372] current loss 4.07, ppl 58.76, throughput 621.37 samples/s, lr 27.86
[Epoch 411] throughput 42348.67 samples/s
[Epoch 411] time cost 52.25s, valid loss 4.39, valid ppl 80.42, lr 30.00
[Epoch 411] test loss 4.34, test ppl 76.47
[Epoch 412 Batch 200/372] current loss 4.08, ppl 59.22, throughput 633.96 samples/s, lr 32.57
[Epoch 412] throughput 43064.74 samples/s
[Epoch 412] time cost 51.35s, valid loss 4.39, valid ppl 80.42, lr 30.00
[Epoch 412] test loss 4.34, test ppl 76.46
[Epoch 413 Batch 200/372] current loss 4.07, ppl 58.61, throughput 625.25 samples/s, lr 24.86
[Epoch 413] throughput 42506.14 samples/s
[Epoch 413] time cost 52.02s, valid loss 4.39, valid ppl 80.41, lr 30.00
[Epoch 413] test loss 4.34, test ppl 76.46
[Epoch 414 Batch 200/372] current loss 4.06, ppl 58.02, throughput 609.17 samples/s, lr 30.86
[Epoch 414] throughput 41703.26 samples/s
[Epoch 414] time cost 52.97s, valid loss 4.39, valid ppl 80.41, lr 30.00
[Epoch 414] test loss 4.34, test ppl 76.46
[Epoch 415 Batch 200/372] current loss 4.07, ppl 58.69, throughput 637.06 samples/s, lr 31.71
[Epoch 415] throughput 43085.33 samples/s
[Epoch 415] time cost 51.32s, valid loss 4.39, valid ppl 80.41, lr 30.00
[Epoch 415] test loss 4.34, test ppl 76.46
[Epoch 416 Batch 200/372] current loss 4.10, ppl 60.26, throughput 639.05 samples/s, lr 29.57
[Epoch 416] throughput 43204.96 samples/s
[Epoch 416] time cost 51.18s, valid loss 4.39, valid ppl 80.41, lr 30.00
[Epoch 416] test loss 4.34, test ppl 76.46
[Epoch 417 Batch 200/372] current loss 4.06, ppl 58.16, throughput 631.09 samples/s, lr 31.29
[Epoch 417] throughput 42956.58 samples/s
[Epoch 417] time cost 51.54s, valid loss 4.39, valid ppl 80.40, lr 30.00
[Epoch 417] test loss 4.34, test ppl 76.45
[Epoch 418 Batch 200/372] current loss 4.07, ppl 58.54, throughput 635.12 samples/s, lr 29.57
[Epoch 418] throughput 42821.12 samples/s
[Epoch 418] time cost 51.75s, valid loss 4.39, valid ppl 80.40, lr 30.00
[Epoch 418] test loss 4.34, test ppl 76.45
[Epoch 419 Batch 200/372] current loss 4.07, ppl 58.47, throughput 614.63 samples/s, lr 32.57
[Epoch 419] throughput 42461.78 samples/s
[Epoch 419] time cost 52.00s, valid loss 4.39, valid ppl 80.40, lr 30.00
[Epoch 419] test loss 4.34, test ppl 76.45
[Epoch 420 Batch 200/372] current loss 4.06, ppl 58.18, throughput 626.12 samples/s, lr 31.71
[Epoch 420] throughput 43236.55 samples/s
[Epoch 420] time cost 51.24s, valid loss 4.39, valid ppl 80.40, lr 30.00
[Epoch 420] test loss 4.34, test ppl 76.45
[Epoch 421 Batch 200/372] current loss 4.07, ppl 58.49, throughput 623.94 samples/s, lr 27.43
[Epoch 421] throughput 42632.80 samples/s
[Epoch 421] time cost 51.84s, valid loss 4.39, valid ppl 80.39, lr 30.00
[Epoch 421] test loss 4.34, test ppl 76.44
[Epoch 422 Batch 200/372] current loss 4.07, ppl 58.43, throughput 626.46 samples/s, lr 30.43
[Epoch 422] throughput 42674.11 samples/s
[Epoch 422] time cost 51.87s, valid loss 4.39, valid ppl 80.39, lr 30.00
[Epoch 422] test loss 4.34, test ppl 76.44
[Epoch 423 Batch 200/372] current loss 4.06, ppl 57.88, throughput 618.68 samples/s, lr 30.86
[Epoch 423] throughput 43252.12 samples/s
[Epoch 423] time cost 51.25s, valid loss 4.39, valid ppl 80.39, lr 30.00
[Epoch 423] test loss 4.34, test ppl 76.44
[Epoch 424 Batch 200/372] current loss 4.05, ppl 57.66, throughput 637.50 samples/s, lr 30.43
[Epoch 424] throughput 42524.92 samples/s
[Epoch 424] time cost 51.99s, valid loss 4.39, valid ppl 80.39, lr 30.00
[Epoch 424] test loss 4.34, test ppl 76.44
[Epoch 425 Batch 200/372] current loss 4.07, ppl 58.62, throughput 634.47 samples/s, lr 29.14
[Epoch 425] throughput 43600.68 samples/s
[Epoch 425] time cost 50.77s, valid loss 4.39, valid ppl 80.38, lr 30.00
[Epoch 425] test loss 4.34, test ppl 76.43
[Epoch 426 Batch 200/372] current loss 4.07, ppl 58.36, throughput 615.10 samples/s, lr 33.00
[Epoch 426] throughput 42414.43 samples/s
[Epoch 426] time cost 52.09s, valid loss 4.39, valid ppl 80.38, lr 30.00
[Epoch 426] test loss 4.34, test ppl 76.43
[Epoch 427 Batch 200/372] current loss 4.08, ppl 59.13, throughput 629.57 samples/s, lr 13.29
[Epoch 427] throughput 42737.29 samples/s
[Epoch 427] time cost 51.70s, valid loss 4.39, valid ppl 80.38, lr 30.00
[Epoch 427] test loss 4.34, test ppl 76.43
[Epoch 428 Batch 200/372] current loss 4.06, ppl 58.03, throughput 603.62 samples/s, lr 27.43
[Epoch 428] throughput 42610.42 samples/s
[Epoch 428] time cost 51.96s, valid loss 4.39, valid ppl 80.38, lr 30.00
[Epoch 428] test loss 4.34, test ppl 76.43
[Epoch 429 Batch 200/372] current loss 4.06, ppl 57.93, throughput 614.42 samples/s, lr 27.43
[Epoch 429] throughput 42842.24 samples/s
[Epoch 429] time cost 51.65s, valid loss 4.39, valid ppl 80.37, lr 30.00
[Epoch 429] test loss 4.34, test ppl 76.42
[Epoch 430 Batch 200/372] current loss 4.07, ppl 58.30, throughput 631.33 samples/s, lr 28.29
[Epoch 430] throughput 43534.97 samples/s
[Epoch 430] time cost 50.94s, valid loss 4.39, valid ppl 80.37, lr 30.00
[Epoch 430] test loss 4.34, test ppl 76.42
[Epoch 431 Batch 200/372] current loss 4.05, ppl 57.43, throughput 654.06 samples/s, lr 29.57
[Epoch 431] throughput 43159.35 samples/s
[Epoch 431] time cost 51.23s, valid loss 4.39, valid ppl 80.37, lr 30.00
[Epoch 431] test loss 4.34, test ppl 76.42
[Epoch 432 Batch 200/372] current loss 4.06, ppl 58.01, throughput 623.15 samples/s, lr 29.57
[Epoch 432] throughput 42841.64 samples/s
[Epoch 432] time cost 51.67s, valid loss 4.39, valid ppl 80.37, lr 30.00
[Epoch 432] test loss 4.34, test ppl 76.42
[Epoch 433 Batch 200/372] current loss 4.06, ppl 57.97, throughput 632.60 samples/s, lr 16.71
[Epoch 433] throughput 42385.05 samples/s
[Epoch 433] time cost 52.17s, valid loss 4.39, valid ppl 80.36, lr 30.00
[Epoch 433] test loss 4.34, test ppl 76.41
[Epoch 434 Batch 200/372] current loss 4.06, ppl 57.73, throughput 616.46 samples/s, lr 30.00
[Epoch 434] throughput 42048.43 samples/s
[Epoch 434] time cost 52.49s, valid loss 4.39, valid ppl 80.36, lr 30.00
[Epoch 434] test loss 4.34, test ppl 76.41
[Epoch 435 Batch 200/372] current loss 4.05, ppl 57.39, throughput 613.92 samples/s, lr 28.29
[Epoch 435] throughput 41822.43 samples/s
[Epoch 435] time cost 52.77s, valid loss 4.39, valid ppl 80.36, lr 30.00
[Epoch 435] test loss 4.34, test ppl 76.41
[Epoch 436 Batch 200/372] current loss 4.07, ppl 58.30, throughput 619.52 samples/s, lr 36.00
[Epoch 436] throughput 42046.19 samples/s
[Epoch 436] time cost 52.53s, valid loss 4.39, valid ppl 80.36, lr 30.00
[Epoch 436] test loss 4.34, test ppl 76.41
[Epoch 437 Batch 200/372] current loss 4.07, ppl 58.36, throughput 634.12 samples/s, lr 30.00
[Epoch 437] throughput 42916.42 samples/s
[Epoch 437] time cost 51.59s, valid loss 4.39, valid ppl 80.36, lr 30.00
[Epoch 437] test loss 4.34, test ppl 76.41
[Epoch 438 Batch 200/372] current loss 4.06, ppl 58.03, throughput 633.69 samples/s, lr 29.57
[Epoch 438] throughput 43378.53 samples/s
[Epoch 438] time cost 51.04s, valid loss 4.39, valid ppl 80.35, lr 30.00
[Epoch 438] test loss 4.34, test ppl 76.41
[Epoch 439 Batch 200/372] current loss 4.05, ppl 57.40, throughput 648.86 samples/s, lr 27.00
[Epoch 439] throughput 43526.21 samples/s
[Epoch 439] time cost 50.91s, valid loss 4.39, valid ppl 80.35, lr 30.00
[Epoch 439] test loss 4.34, test ppl 76.40
[Epoch 440 Batch 200/372] current loss 4.05, ppl 57.49, throughput 665.74 samples/s, lr 29.57
[Epoch 440] throughput 43717.50 samples/s
[Epoch 440] time cost 50.66s, valid loss 4.39, valid ppl 80.35, lr 30.00
[Epoch 440] test loss 4.34, test ppl 76.40
[Epoch 441 Batch 200/372] current loss 4.05, ppl 57.56, throughput 619.22 samples/s, lr 31.29
[Epoch 441] throughput 42810.49 samples/s
[Epoch 441] time cost 51.65s, valid loss 4.39, valid ppl 80.35, lr 30.00
[Epoch 441] test loss 4.34, test ppl 76.40
[Epoch 442 Batch 200/372] current loss 4.07, ppl 58.33, throughput 620.83 samples/s, lr 13.29
[Epoch 442] throughput 42600.41 samples/s
[Epoch 442] time cost 51.91s, valid loss 4.39, valid ppl 80.34, lr 30.00
[Epoch 442] test loss 4.34, test ppl 76.40
[Epoch 443 Batch 200/372] current loss 4.07, ppl 58.49, throughput 633.72 samples/s, lr 30.43
[Epoch 443] throughput 43318.45 samples/s
[Epoch 443] time cost 51.18s, valid loss 4.39, valid ppl 80.34, lr 30.00
[Epoch 443] test loss 4.34, test ppl 76.40
[Epoch 444 Batch 200/372] current loss 4.06, ppl 58.11, throughput 623.01 samples/s, lr 29.14
[Epoch 444] throughput 42254.42 samples/s
[Epoch 444] time cost 52.29s, valid loss 4.39, valid ppl 80.34, lr 30.00
[Epoch 444] test loss 4.34, test ppl 76.39
[Epoch 445 Batch 200/372] current loss 4.07, ppl 58.46, throughput 620.46 samples/s, lr 31.71
[Epoch 445] throughput 43028.39 samples/s
[Epoch 445] time cost 51.43s, valid loss 4.39, valid ppl 80.34, lr 30.00
[Epoch 445] test loss 4.34, test ppl 76.39
[Epoch 446 Batch 200/372] current loss 4.07, ppl 58.47, throughput 619.42 samples/s, lr 30.00
[Epoch 446] throughput 42530.92 samples/s
[Epoch 446] time cost 51.94s, valid loss 4.39, valid ppl 80.34, lr 30.00
[Epoch 446] test loss 4.34, test ppl 76.39
[Epoch 447 Batch 200/372] current loss 4.06, ppl 58.08, throughput 634.16 samples/s, lr 15.00
[Epoch 447] throughput 43331.27 samples/s
[Epoch 447] time cost 51.13s, valid loss 4.39, valid ppl 80.33, lr 30.00
[Epoch 447] test loss 4.34, test ppl 76.39
[Epoch 448 Batch 200/372] current loss 4.04, ppl 57.00, throughput 618.97 samples/s, lr 24.86
[Epoch 448] throughput 42648.51 samples/s
[Epoch 448] time cost 51.87s, valid loss 4.39, valid ppl 80.33, lr 30.00
[Epoch 448] test loss 4.34, test ppl 76.39
[Epoch 449 Batch 200/372] current loss 4.06, ppl 58.20, throughput 629.24 samples/s, lr 30.00
[Epoch 449] throughput 42304.49 samples/s
[Epoch 449] time cost 52.31s, valid loss 4.39, valid ppl 80.33, lr 30.00
[Epoch 449] test loss 4.34, test ppl 76.39
[Epoch 450 Batch 200/372] current loss 4.06, ppl 58.14, throughput 640.39 samples/s, lr 28.29
[Epoch 450] throughput 42641.89 samples/s
[Epoch 450] time cost 51.86s, valid loss 4.39, valid ppl 80.33, lr 30.00
[Epoch 450] test loss 4.34, test ppl 76.38
[Epoch 451 Batch 200/372] current loss 4.05, ppl 57.62, throughput 628.08 samples/s, lr 30.43
[Epoch 451] throughput 42710.76 samples/s
[Epoch 451] time cost 51.79s, valid loss 4.39, valid ppl 80.32, lr 30.00
[Epoch 451] test loss 4.34, test ppl 76.38
[Epoch 452 Batch 200/372] current loss 4.06, ppl 57.91, throughput 619.41 samples/s, lr 27.86
[Epoch 452] throughput 42416.46 samples/s
[Epoch 452] time cost 52.12s, valid loss 4.39, valid ppl 80.32, lr 30.00
[Epoch 452] test loss 4.34, test ppl 76.38
[Epoch 453 Batch 200/372] current loss 4.06, ppl 57.88, throughput 618.32 samples/s, lr 29.14
[Epoch 453] throughput 42891.54 samples/s
[Epoch 453] time cost 51.65s, valid loss 4.39, valid ppl 80.32, lr 30.00
[Epoch 453] test loss 4.34, test ppl 76.38
[Epoch 454 Batch 200/372] current loss 4.05, ppl 57.59, throughput 649.40 samples/s, lr 25.29
[Epoch 454] throughput 43255.58 samples/s
[Epoch 454] time cost 51.16s, valid loss 4.39, valid ppl 80.32, lr 30.00
[Epoch 454] test loss 4.34, test ppl 76.38
[Epoch 455 Batch 200/372] current loss 4.06, ppl 57.85, throughput 631.56 samples/s, lr 30.43
[Epoch 455] throughput 43747.62 samples/s
[Epoch 455] time cost 50.87s, valid loss 4.39, valid ppl 80.32, lr 30.00
[Epoch 455] test loss 4.34, test ppl 76.37
[Epoch 456 Batch 200/372] current loss 4.06, ppl 58.04, throughput 631.83 samples/s, lr 26.57
[Epoch 456] throughput 42430.98 samples/s
[Epoch 456] time cost 52.09s, valid loss 4.39, valid ppl 80.32, lr 30.00
[Epoch 456] test loss 4.34, test ppl 76.37
[Epoch 457 Batch 200/372] current loss 4.06, ppl 57.70, throughput 629.85 samples/s, lr 28.71
[Epoch 457] throughput 42551.84 samples/s
[Epoch 457] time cost 51.97s, valid loss 4.39, valid ppl 80.31, lr 30.00
[Epoch 457] test loss 4.34, test ppl 76.37
[Epoch 458 Batch 200/372] current loss 4.06, ppl 57.93, throughput 633.02 samples/s, lr 31.29
[Epoch 458] throughput 42964.97 samples/s
[Epoch 458] time cost 51.56s, valid loss 4.39, valid ppl 80.31, lr 30.00
[Epoch 458] test loss 4.34, test ppl 76.37
[Epoch 459 Batch 200/372] current loss 4.05, ppl 57.61, throughput 622.48 samples/s, lr 30.00
[Epoch 459] throughput 41983.85 samples/s
[Epoch 459] time cost 52.65s, valid loss 4.39, valid ppl 80.31, lr 30.00
[Epoch 459] test loss 4.34, test ppl 76.37
[Epoch 460 Batch 200/372] current loss 4.05, ppl 57.21, throughput 619.22 samples/s, lr 27.43
[Epoch 460] throughput 41677.23 samples/s
[Epoch 460] time cost 53.02s, valid loss 4.39, valid ppl 80.31, lr 30.00
[Epoch 460] test loss 4.34, test ppl 76.37
[Epoch 461 Batch 200/372] current loss 4.06, ppl 57.84, throughput 633.39 samples/s, lr 30.43
[Epoch 461] throughput 42943.04 samples/s
[Epoch 461] time cost 51.49s, valid loss 4.39, valid ppl 80.31, lr 30.00
[Epoch 461] test loss 4.34, test ppl 76.36
[Epoch 462 Batch 200/372] current loss 4.05, ppl 57.60, throughput 618.08 samples/s, lr 29.57
[Epoch 462] throughput 42558.74 samples/s
[Epoch 462] time cost 51.94s, valid loss 4.39, valid ppl 80.31, lr 30.00
[Epoch 462] test loss 4.34, test ppl 76.36
[Epoch 463 Batch 200/372] current loss 4.06, ppl 57.73, throughput 628.41 samples/s, lr 24.86
[Epoch 463] throughput 42603.44 samples/s
[Epoch 463] time cost 51.95s, valid loss 4.39, valid ppl 80.31, lr 30.00
[Epoch 463] test loss 4.34, test ppl 76.36
[Epoch 464 Batch 200/372] current loss 4.05, ppl 57.51, throughput 623.27 samples/s, lr 30.00
[Epoch 464] throughput 43104.75 samples/s
[Epoch 464] time cost 51.29s, valid loss 4.39, valid ppl 80.30, lr 30.00
[Epoch 464] test loss 4.34, test ppl 76.36
[Epoch 465 Batch 200/372] current loss 4.05, ppl 57.44, throughput 621.94 samples/s, lr 29.14
[Epoch 465] throughput 43023.90 samples/s
[Epoch 465] time cost 51.48s, valid loss 4.39, valid ppl 80.30, lr 30.00
[Epoch 465] test loss 4.34, test ppl 76.36
[Epoch 466 Batch 200/372] current loss 4.05, ppl 57.36, throughput 634.18 samples/s, lr 28.29
[Epoch 466] throughput 42970.23 samples/s
[Epoch 466] time cost 51.46s, valid loss 4.39, valid ppl 80.30, lr 30.00
[Epoch 466] test loss 4.34, test ppl 76.36
[Epoch 467 Batch 200/372] current loss 4.05, ppl 57.50, throughput 612.82 samples/s, lr 34.29
[Epoch 467] throughput 42204.48 samples/s
[Epoch 467] time cost 52.35s, valid loss 4.39, valid ppl 80.30, lr 30.00
[Epoch 467] test loss 4.34, test ppl 76.36
[Epoch 468 Batch 200/372] current loss 4.05, ppl 57.65, throughput 628.05 samples/s, lr 16.29
[Epoch 468] throughput 42342.44 samples/s
[Epoch 468] time cost 52.25s, valid loss 4.39, valid ppl 80.30, lr 30.00
[Epoch 468] test loss 4.34, test ppl 76.35
[Epoch 469 Batch 200/372] current loss 4.06, ppl 57.97, throughput 622.94 samples/s, lr 27.00
[Epoch 469] throughput 42914.73 samples/s
[Epoch 469] time cost 51.60s, valid loss 4.39, valid ppl 80.30, lr 30.00
[Epoch 469] test loss 4.34, test ppl 76.35
[Epoch 470 Batch 200/372] current loss 4.05, ppl 57.53, throughput 633.74 samples/s, lr 29.57
[Epoch 470] throughput 42298.68 samples/s
[Epoch 470] time cost 52.25s, valid loss 4.39, valid ppl 80.30, lr 30.00
[Epoch 470] test loss 4.34, test ppl 76.35
[Epoch 471 Batch 200/372] current loss 4.05, ppl 57.31, throughput 641.50 samples/s, lr 28.71
[Epoch 471] throughput 43357.53 samples/s
[Epoch 471] time cost 51.09s, valid loss 4.39, valid ppl 80.29, lr 30.00
[Epoch 471] test loss 4.34, test ppl 76.35
[Epoch 472 Batch 200/372] current loss 4.06, ppl 57.76, throughput 627.27 samples/s, lr 33.43
[Epoch 472] throughput 42110.11 samples/s
[Epoch 472] time cost 52.46s, valid loss 4.39, valid ppl 80.29, lr 30.00
[Epoch 472] test loss 4.34, test ppl 76.35
[Epoch 473 Batch 200/372] current loss 4.05, ppl 57.36, throughput 643.11 samples/s, lr 33.00
[Epoch 473] throughput 43156.50 samples/s
[Epoch 473] time cost 51.32s, valid loss 4.39, valid ppl 80.29, lr 30.00
[Epoch 473] test loss 4.34, test ppl 76.34
[Epoch 474 Batch 200/372] current loss 4.06, ppl 57.90, throughput 617.43 samples/s, lr 30.86
[Epoch 474] throughput 42717.83 samples/s
[Epoch 474] time cost 51.79s, valid loss 4.39, valid ppl 80.29, lr 30.00
[Epoch 474] test loss 4.34, test ppl 76.34
[Epoch 475 Batch 200/372] current loss 4.05, ppl 57.33, throughput 640.47 samples/s, lr 30.86
[Epoch 475] throughput 42723.23 samples/s
[Epoch 475] time cost 51.75s, valid loss 4.39, valid ppl 80.29, lr 30.00
[Epoch 475] test loss 4.34, test ppl 76.34
[Epoch 476 Batch 200/372] current loss 4.04, ppl 56.96, throughput 631.71 samples/s, lr 24.86
[Epoch 476] throughput 42533.63 samples/s
[Epoch 476] time cost 51.96s, valid loss 4.39, valid ppl 80.29, lr 30.00
[Epoch 476] test loss 4.34, test ppl 76.34
[Epoch 477 Batch 200/372] current loss 4.06, ppl 57.76, throughput 607.47 samples/s, lr 31.29
[Epoch 477] throughput 41538.76 samples/s
[Epoch 477] time cost 53.15s, valid loss 4.39, valid ppl 80.29, lr 30.00
[Epoch 477] test loss 4.34, test ppl 76.34
[Epoch 478 Batch 200/372] current loss 4.05, ppl 57.49, throughput 639.73 samples/s, lr 30.86
[Epoch 478] throughput 43114.75 samples/s
[Epoch 478] time cost 51.31s, valid loss 4.39, valid ppl 80.28, lr 30.00
[Epoch 478] test loss 4.34, test ppl 76.33
[Epoch 479 Batch 200/372] current loss 4.04, ppl 57.10, throughput 602.66 samples/s, lr 30.43
[Epoch 479] throughput 42184.97 samples/s
[Epoch 479] time cost 52.35s, valid loss 4.39, valid ppl 80.28, lr 30.00
[Epoch 479] test loss 4.34, test ppl 76.33
[Epoch 480 Batch 200/372] current loss 4.05, ppl 57.68, throughput 621.97 samples/s, lr 27.86
[Epoch 480] throughput 42984.69 samples/s
[Epoch 480] time cost 51.51s, valid loss 4.39, valid ppl 80.28, lr 30.00
[Epoch 480] test loss 4.34, test ppl 76.33
[Epoch 481 Batch 200/372] current loss 4.04, ppl 57.00, throughput 633.02 samples/s, lr 30.43
[Epoch 481] throughput 42529.35 samples/s
[Epoch 481] time cost 52.02s, valid loss 4.39, valid ppl 80.28, lr 30.00
[Epoch 481] test loss 4.34, test ppl 76.33
[Epoch 482 Batch 200/372] current loss 4.04, ppl 56.73, throughput 608.29 samples/s, lr 30.43
[Epoch 482] throughput 41797.74 samples/s
[Epoch 482] time cost 52.88s, valid loss 4.39, valid ppl 80.28, lr 30.00
[Epoch 482] test loss 4.34, test ppl 76.33
[Epoch 483 Batch 200/372] current loss 4.05, ppl 57.64, throughput 617.66 samples/s, lr 27.00
[Epoch 483] throughput 42745.98 samples/s
[Epoch 483] time cost 51.67s, valid loss 4.39, valid ppl 80.28, lr 30.00
[Epoch 483] test loss 4.34, test ppl 76.33
[Epoch 484 Batch 200/372] current loss 4.04, ppl 56.69, throughput 630.45 samples/s, lr 30.86
[Epoch 484] throughput 42497.52 samples/s
[Epoch 484] time cost 52.05s, valid loss 4.39, valid ppl 80.27, lr 30.00
[Epoch 484] test loss 4.34, test ppl 76.33
[Epoch 485 Batch 200/372] current loss 4.05, ppl 57.61, throughput 637.29 samples/s, lr 32.14
[Epoch 485] throughput 42774.32 samples/s
[Epoch 485] time cost 51.77s, valid loss 4.39, valid ppl 80.27, lr 30.00
[Epoch 485] test loss 4.33, test ppl 76.32
[Epoch 486 Batch 200/372] current loss 4.05, ppl 57.33, throughput 627.00 samples/s, lr 30.86
[Epoch 486] throughput 42899.42 samples/s
[Epoch 486] time cost 51.66s, valid loss 4.39, valid ppl 80.27, lr 30.00
[Epoch 486] test loss 4.33, test ppl 76.32
[Epoch 487 Batch 200/372] current loss 4.04, ppl 56.84, throughput 612.94 samples/s, lr 30.86
[Epoch 487] throughput 42662.79 samples/s
[Epoch 487] time cost 51.81s, valid loss 4.39, valid ppl 80.27, lr 30.00
[Epoch 487] test loss 4.33, test ppl 76.32
[Epoch 488 Batch 200/372] current loss 4.05, ppl 57.16, throughput 618.89 samples/s, lr 27.43
[Epoch 488] throughput 42523.61 samples/s
[Epoch 488] time cost 51.97s, valid loss 4.39, valid ppl 80.27, lr 30.00
[Epoch 488] test loss 4.33, test ppl 76.32
[Epoch 489 Batch 200/372] current loss 4.04, ppl 56.81, throughput 624.59 samples/s, lr 29.57
[Epoch 489] throughput 42231.42 samples/s
[Epoch 489] time cost 52.39s, valid loss 4.39, valid ppl 80.27, lr 30.00
[Epoch 489] test loss 4.33, test ppl 76.32
[Epoch 490 Batch 200/372] current loss 4.05, ppl 57.59, throughput 622.44 samples/s, lr 29.57
[Epoch 490] throughput 42932.05 samples/s
[Epoch 490] time cost 51.58s, valid loss 4.39, valid ppl 80.27, lr 30.00
[Epoch 490] test loss 4.33, test ppl 76.32
[Epoch 491 Batch 200/372] current loss 4.05, ppl 57.24, throughput 644.11 samples/s, lr 31.71
[Epoch 491] throughput 43220.74 samples/s
[Epoch 491] time cost 51.26s, valid loss 4.39, valid ppl 80.27, lr 30.00
[Epoch 491] test loss 4.33, test ppl 76.32
[Epoch 492 Batch 200/372] current loss 4.04, ppl 57.01, throughput 633.84 samples/s, lr 30.43
[Epoch 492] throughput 42266.57 samples/s
[Epoch 492] time cost 52.37s, valid loss 4.39, valid ppl 80.26, lr 30.00
[Epoch 492] test loss 4.33, test ppl 76.32
[Epoch 493 Batch 200/372] current loss 4.05, ppl 57.63, throughput 629.83 samples/s, lr 27.00
[Epoch 493] throughput 42456.47 samples/s
[Epoch 493] time cost 52.14s, valid loss 4.39, valid ppl 80.26, lr 30.00
[Epoch 493] test loss 4.33, test ppl 76.31
[Epoch 494 Batch 200/372] current loss 4.05, ppl 57.28, throughput 619.80 samples/s, lr 28.71
[Epoch 494] throughput 42977.71 samples/s
[Epoch 494] time cost 51.50s, valid loss 4.39, valid ppl 80.26, lr 30.00
[Epoch 494] test loss 4.33, test ppl 76.31
[Epoch 495 Batch 200/372] current loss 4.04, ppl 56.78, throughput 623.99 samples/s, lr 30.86
[Epoch 495] throughput 42549.12 samples/s
[Epoch 495] time cost 51.94s, valid loss 4.39, valid ppl 80.26, lr 30.00
[Epoch 495] test loss 4.33, test ppl 76.31
[Epoch 496 Batch 200/372] current loss 4.05, ppl 57.28, throughput 645.23 samples/s, lr 31.71
[Epoch 496] throughput 42264.21 samples/s
[Epoch 496] time cost 52.26s, valid loss 4.39, valid ppl 80.26, lr 30.00
[Epoch 496] test loss 4.33, test ppl 76.31
[Epoch 497 Batch 200/372] current loss 4.05, ppl 57.53, throughput 640.17 samples/s, lr 31.71
[Epoch 497] throughput 42857.85 samples/s
[Epoch 497] time cost 51.81s, valid loss 4.39, valid ppl 80.26, lr 30.00
[Epoch 497] test loss 4.33, test ppl 76.31
[Epoch 498 Batch 200/372] current loss 4.04, ppl 56.62, throughput 608.17 samples/s, lr 28.29
[Epoch 498] throughput 41919.09 samples/s
[Epoch 498] time cost 52.75s, valid loss 4.39, valid ppl 80.26, lr 30.00
[Epoch 498] test loss 4.33, test ppl 76.31
[Epoch 499 Batch 200/372] current loss 4.06, ppl 58.04, throughput 618.65 samples/s, lr 31.29
[Epoch 499] throughput 42520.61 samples/s
[Epoch 499] time cost 52.00s, valid loss 4.39, valid ppl 80.26, lr 30.00
[Epoch 499] test loss 4.33, test ppl 76.30
[Epoch 500 Batch 200/372] current loss 4.05, ppl 57.26, throughput 637.73 samples/s, lr 29.14
[Epoch 500] throughput 43188.30 samples/s
[Epoch 500] time cost 51.18s, valid loss 4.39, valid ppl 80.26, lr 30.00
[Epoch 500] test loss 4.33, test ppl 76.30
[Epoch 501 Batch 200/372] current loss 4.05, ppl 57.22, throughput 621.59 samples/s, lr 31.29
[Epoch 501] throughput 42386.93 samples/s
[Epoch 501] time cost 52.18s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 501] test loss 4.33, test ppl 76.30
[Epoch 502 Batch 200/372] current loss 4.04, ppl 56.88, throughput 621.75 samples/s, lr 27.86
[Epoch 502] throughput 41968.77 samples/s
[Epoch 502] time cost 52.74s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 502] test loss 4.33, test ppl 76.30
[Epoch 503 Batch 200/372] current loss 4.04, ppl 56.87, throughput 638.60 samples/s, lr 31.71
[Epoch 503] throughput 43300.52 samples/s
[Epoch 503] time cost 51.10s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 503] test loss 4.33, test ppl 76.30
[Epoch 504 Batch 200/372] current loss 4.05, ppl 57.53, throughput 613.20 samples/s, lr 27.00
[Epoch 504] throughput 42506.16 samples/s
[Epoch 504] time cost 51.97s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 504] test loss 4.33, test ppl 76.30
[Epoch 505 Batch 200/372] current loss 4.05, ppl 57.37, throughput 611.55 samples/s, lr 28.71
[Epoch 505] throughput 41442.06 samples/s
[Epoch 505] time cost 53.32s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 505] test loss 4.33, test ppl 76.30
[Epoch 506 Batch 200/372] current loss 4.04, ppl 56.64, throughput 613.77 samples/s, lr 27.43
[Epoch 506] throughput 42548.40 samples/s
[Epoch 506] time cost 52.04s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 506] test loss 4.33, test ppl 76.29
[Epoch 507 Batch 200/372] current loss 4.04, ppl 56.60, throughput 620.98 samples/s, lr 16.29
[Epoch 507] throughput 42181.68 samples/s
[Epoch 507] time cost 52.37s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 507] test loss 4.33, test ppl 76.29
[Epoch 508 Batch 200/372] current loss 4.05, ppl 57.36, throughput 637.43 samples/s, lr 29.57
[Epoch 508] throughput 42863.27 samples/s
[Epoch 508] time cost 51.58s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 508] test loss 4.33, test ppl 76.29
[Epoch 509 Batch 200/372] current loss 4.05, ppl 57.60, throughput 622.37 samples/s, lr 29.14
[Epoch 509] throughput 42583.84 samples/s
[Epoch 509] time cost 52.02s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 509] test loss 4.33, test ppl 76.29
[Epoch 510 Batch 200/372] current loss 4.04, ppl 56.96, throughput 613.78 samples/s, lr 27.86
[Epoch 510] throughput 42290.11 samples/s
[Epoch 510] time cost 52.33s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 510] test loss 4.33, test ppl 76.29
[Epoch 511 Batch 200/372] current loss 4.04, ppl 56.78, throughput 633.72 samples/s, lr 29.57
[Epoch 511] throughput 42768.88 samples/s
[Epoch 511] time cost 51.67s, valid loss 4.39, valid ppl 80.25, lr 30.00
[Epoch 511] test loss 4.33, test ppl 76.29
[Epoch 512 Batch 200/372] current loss 4.04, ppl 56.63, throughput 637.89 samples/s, lr 15.86
[Epoch 512] throughput 43106.27 samples/s
[Epoch 512] time cost 51.47s, valid loss 4.39, valid ppl 80.24, lr 30.00
[Epoch 512] test loss 4.33, test ppl 76.29
[Epoch 513 Batch 200/372] current loss 4.05, ppl 57.34, throughput 620.36 samples/s, lr 27.86
[Epoch 513] throughput 43115.00 samples/s
[Epoch 513] time cost 51.31s, valid loss 4.39, valid ppl 80.24, lr 30.00
[Epoch 513] test loss 4.33, test ppl 76.29
[Epoch 514 Batch 200/372] current loss 4.05, ppl 57.34, throughput 629.84 samples/s, lr 28.29
[Epoch 514] throughput 42313.58 samples/s
[Epoch 514] time cost 52.20s, valid loss 4.39, valid ppl 80.24, lr 30.00
[Epoch 514] test loss 4.33, test ppl 76.28
[Epoch 515 Batch 200/372] current loss 4.04, ppl 57.01, throughput 614.11 samples/s, lr 30.00
[Epoch 515] throughput 42888.75 samples/s
[Epoch 515] time cost 51.65s, valid loss 4.39, valid ppl 80.24, lr 30.00
[Epoch 515] test loss 4.33, test ppl 76.28
[Epoch 516 Batch 200/372] current loss 4.05, ppl 57.47, throughput 647.24 samples/s, lr 26.14
[Epoch 516] throughput 42836.80 samples/s
[Epoch 516] time cost 51.60s, valid loss 4.39, valid ppl 80.24, lr 30.00
[Epoch 516] test loss 4.33, test ppl 76.28
[Epoch 517 Batch 200/372] current loss 4.04, ppl 56.64, throughput 617.55 samples/s, lr 27.86
[Epoch 517] throughput 41872.26 samples/s
[Epoch 517] time cost 52.73s, valid loss 4.39, valid ppl 80.24, lr 30.00
[Epoch 517] test loss 4.33, test ppl 76.28
[Epoch 518 Batch 200/372] current loss 4.05, ppl 57.19, throughput 637.33 samples/s, lr 28.29
[Epoch 518] throughput 42539.52 samples/s
[Epoch 518] time cost 51.99s, valid loss 4.39, valid ppl 80.24, lr 30.00
[Epoch 518] test loss 4.33, test ppl 76.28
[Epoch 519 Batch 200/372] current loss 4.04, ppl 56.79, throughput 632.59 samples/s, lr 30.00
[Epoch 519] throughput 43025.00 samples/s
[Epoch 519] time cost 51.39s, valid loss 4.38, valid ppl 80.24, lr 30.00
[Epoch 519] test loss 4.33, test ppl 76.28
[Epoch 520 Batch 200/372] current loss 4.04, ppl 56.83, throughput 631.99 samples/s, lr 32.14
[Epoch 520] throughput 43025.03 samples/s
[Epoch 520] time cost 51.44s, valid loss 4.38, valid ppl 80.24, lr 30.00
[Epoch 520] test loss 4.33, test ppl 76.28
[Epoch 521 Batch 200/372] current loss 4.04, ppl 56.62, throughput 629.25 samples/s, lr 35.14
[Epoch 521] throughput 42524.57 samples/s
[Epoch 521] time cost 51.97s, valid loss 4.38, valid ppl 80.24, lr 30.00
[Epoch 521] test loss 4.33, test ppl 76.28
[Epoch 522 Batch 200/372] current loss 4.04, ppl 57.00, throughput 624.02 samples/s, lr 30.00
[Epoch 522] throughput 42532.77 samples/s
[Epoch 522] time cost 51.96s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 522] test loss 4.33, test ppl 76.27
[Epoch 523 Batch 200/372] current loss 4.04, ppl 56.93, throughput 615.68 samples/s, lr 27.43
[Epoch 523] throughput 42887.74 samples/s
[Epoch 523] time cost 51.55s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 523] test loss 4.33, test ppl 76.27
[Epoch 524 Batch 200/372] current loss 4.04, ppl 56.74, throughput 626.99 samples/s, lr 25.71
[Epoch 524] throughput 42348.42 samples/s
[Epoch 524] time cost 52.21s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 524] test loss 4.33, test ppl 76.27
[Epoch 525 Batch 200/372] current loss 4.04, ppl 56.69, throughput 637.97 samples/s, lr 29.57
[Epoch 525] throughput 42857.23 samples/s
[Epoch 525] time cost 51.72s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 525] test loss 4.33, test ppl 76.27
[Epoch 526 Batch 200/372] current loss 4.03, ppl 56.30, throughput 622.94 samples/s, lr 32.57
[Epoch 526] throughput 42623.03 samples/s
[Epoch 526] time cost 51.93s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 526] test loss 4.33, test ppl 76.27
[Epoch 527 Batch 200/372] current loss 4.04, ppl 56.61, throughput 636.06 samples/s, lr 29.14
[Epoch 527] throughput 43071.89 samples/s
[Epoch 527] time cost 51.46s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 527] test loss 4.33, test ppl 76.27
[Epoch 528 Batch 200/372] current loss 4.03, ppl 56.13, throughput 643.02 samples/s, lr 27.00
[Epoch 528] throughput 42440.42 samples/s
[Epoch 528] time cost 52.09s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 528] test loss 4.33, test ppl 76.27
[Epoch 529 Batch 200/372] current loss 4.04, ppl 56.86, throughput 618.49 samples/s, lr 29.57
[Epoch 529] throughput 42302.72 samples/s
[Epoch 529] time cost 52.25s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 529] test loss 4.33, test ppl 76.27
[Epoch 530 Batch 200/372] current loss 4.04, ppl 56.58, throughput 619.11 samples/s, lr 30.00
[Epoch 530] throughput 42258.19 samples/s
[Epoch 530] time cost 52.34s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 530] test loss 4.33, test ppl 76.26
[Epoch 531 Batch 200/372] current loss 4.04, ppl 56.79, throughput 621.48 samples/s, lr 31.71
[Epoch 531] throughput 42031.02 samples/s
[Epoch 531] time cost 52.61s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 531] test loss 4.33, test ppl 76.26
[Epoch 532 Batch 200/372] current loss 4.04, ppl 57.06, throughput 626.19 samples/s, lr 25.71
[Epoch 532] throughput 42135.08 samples/s
[Epoch 532] time cost 52.44s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 532] test loss 4.33, test ppl 76.26
[Epoch 533 Batch 200/372] current loss 4.03, ppl 56.03, throughput 622.24 samples/s, lr 30.43
[Epoch 533] throughput 42677.52 samples/s
[Epoch 533] time cost 51.90s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 533] test loss 4.33, test ppl 76.26
[Epoch 534 Batch 200/372] current loss 4.04, ppl 56.59, throughput 649.59 samples/s, lr 30.86
[Epoch 534] throughput 42335.43 samples/s
[Epoch 534] time cost 52.20s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 534] test loss 4.33, test ppl 76.26
[Epoch 535 Batch 200/372] current loss 4.05, ppl 57.38, throughput 610.22 samples/s, lr 31.29
[Epoch 535] throughput 42393.33 samples/s
[Epoch 535] time cost 52.18s, valid loss 4.38, valid ppl 80.23, lr 30.00
[Epoch 535] test loss 4.33, test ppl 76.26
[Epoch 536 Batch 200/372] current loss 4.04, ppl 56.57, throughput 618.85 samples/s, lr 31.71
[Epoch 536] throughput 42614.74 samples/s
[Epoch 536] time cost 51.98s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 536] test loss 4.33, test ppl 76.26
[Epoch 537 Batch 200/372] current loss 4.03, ppl 56.13, throughput 619.00 samples/s, lr 32.57
[Epoch 537] throughput 41818.06 samples/s
[Epoch 537] time cost 52.78s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 537] test loss 4.33, test ppl 76.26
[Epoch 538 Batch 200/372] current loss 4.04, ppl 56.64, throughput 607.03 samples/s, lr 34.29
[Epoch 538] throughput 41803.09 samples/s
[Epoch 538] time cost 52.84s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 538] test loss 4.33, test ppl 76.26
[Epoch 539 Batch 200/372] current loss 4.03, ppl 56.31, throughput 626.84 samples/s, lr 31.29
[Epoch 539] throughput 42220.38 samples/s
[Epoch 539] time cost 52.31s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 539] test loss 4.33, test ppl 76.25
[Epoch 540 Batch 200/372] current loss 4.03, ppl 56.50, throughput 600.74 samples/s, lr 28.29
[Epoch 540] throughput 41753.26 samples/s
[Epoch 540] time cost 52.91s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 540] test loss 4.33, test ppl 76.25
[Epoch 541 Batch 200/372] current loss 4.03, ppl 56.11, throughput 600.13 samples/s, lr 29.14
[Epoch 541] throughput 41742.15 samples/s
[Epoch 541] time cost 53.02s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 541] test loss 4.33, test ppl 76.25
[Epoch 542 Batch 200/372] current loss 4.04, ppl 56.62, throughput 598.31 samples/s, lr 30.00
[Epoch 542] throughput 41560.18 samples/s
[Epoch 542] time cost 53.32s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 542] test loss 4.33, test ppl 76.25
[Epoch 543 Batch 200/372] current loss 4.03, ppl 56.48, throughput 651.28 samples/s, lr 30.43
[Epoch 543] throughput 43329.74 samples/s
[Epoch 543] time cost 51.06s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 543] test loss 4.33, test ppl 76.25
[Epoch 544 Batch 200/372] current loss 4.03, ppl 56.44, throughput 628.85 samples/s, lr 26.57
[Epoch 544] throughput 42707.77 samples/s
[Epoch 544] time cost 51.74s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 544] test loss 4.33, test ppl 76.25
[Epoch 545 Batch 200/372] current loss 4.02, ppl 55.86, throughput 628.83 samples/s, lr 28.29
[Epoch 545] throughput 42744.44 samples/s
[Epoch 545] time cost 51.81s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 545] test loss 4.33, test ppl 76.25
[Epoch 546 Batch 200/372] current loss 4.04, ppl 56.73, throughput 613.55 samples/s, lr 30.00
[Epoch 546] throughput 42270.62 samples/s
[Epoch 546] time cost 52.24s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 546] test loss 4.33, test ppl 76.25
[Epoch 547 Batch 200/372] current loss 4.03, ppl 56.35, throughput 608.10 samples/s, lr 15.86
[Epoch 547] throughput 42539.56 samples/s
[Epoch 547] time cost 52.06s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 547] test loss 4.33, test ppl 76.25
[Epoch 548 Batch 200/372] current loss 4.04, ppl 57.03, throughput 607.47 samples/s, lr 29.57
[Epoch 548] throughput 42230.28 samples/s
[Epoch 548] time cost 52.38s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 548] test loss 4.33, test ppl 76.25
[Epoch 549 Batch 200/372] current loss 4.04, ppl 56.89, throughput 617.47 samples/s, lr 30.00
[Epoch 549] throughput 41925.91 samples/s
[Epoch 549] time cost 52.64s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 549] test loss 4.33, test ppl 76.25
[Epoch 550 Batch 200/372] current loss 4.03, ppl 56.39, throughput 641.58 samples/s, lr 30.00
[Epoch 550] throughput 43576.88 samples/s
[Epoch 550] time cost 50.79s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 550] test loss 4.33, test ppl 76.24
[Epoch 551 Batch 200/372] current loss 4.02, ppl 55.45, throughput 615.21 samples/s, lr 31.71
[Epoch 551] throughput 42118.62 samples/s
[Epoch 551] time cost 52.46s, valid loss 4.38, valid ppl 80.22, lr 30.00
[Epoch 551] test loss 4.33, test ppl 76.24
[Epoch 552 Batch 200/372] current loss 4.03, ppl 56.40, throughput 613.61 samples/s, lr 33.43
[Epoch 552] throughput 42217.28 samples/s
[Epoch 552] time cost 52.32s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 552] test loss 4.33, test ppl 76.24
[Epoch 553 Batch 200/372] current loss 4.03, ppl 56.16, throughput 604.95 samples/s, lr 33.00
[Epoch 553] throughput 41936.49 samples/s
[Epoch 553] time cost 52.69s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 553] test loss 4.33, test ppl 76.24
[Epoch 554 Batch 200/372] current loss 4.02, ppl 55.76, throughput 627.82 samples/s, lr 27.86
[Epoch 554] throughput 41657.65 samples/s
[Epoch 554] time cost 53.15s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 554] test loss 4.33, test ppl 76.24
[Epoch 555 Batch 200/372] current loss 4.02, ppl 55.72, throughput 636.96 samples/s, lr 30.86
[Epoch 555] throughput 43156.74 samples/s
[Epoch 555] time cost 51.32s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 555] test loss 4.33, test ppl 76.24
[Epoch 556 Batch 200/372] current loss 4.02, ppl 55.88, throughput 634.41 samples/s, lr 28.29
[Epoch 556] throughput 42758.50 samples/s
[Epoch 556] time cost 51.69s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 556] test loss 4.33, test ppl 76.24
[Epoch 557 Batch 200/372] current loss 4.03, ppl 56.25, throughput 618.89 samples/s, lr 30.00
[Epoch 557] throughput 42246.67 samples/s
[Epoch 557] time cost 52.31s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 557] test loss 4.33, test ppl 76.24
[Epoch 558 Batch 200/372] current loss 4.02, ppl 55.78, throughput 621.11 samples/s, lr 28.29
[Epoch 558] throughput 41537.54 samples/s
[Epoch 558] time cost 53.18s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 558] test loss 4.33, test ppl 76.24
[Epoch 559 Batch 200/372] current loss 4.03, ppl 56.23, throughput 633.83 samples/s, lr 29.57
[Epoch 559] throughput 42344.88 samples/s
[Epoch 559] time cost 52.20s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 559] test loss 4.33, test ppl 76.24
[Epoch 560 Batch 200/372] current loss 4.03, ppl 56.45, throughput 621.39 samples/s, lr 31.29
[Epoch 560] throughput 42983.79 samples/s
[Epoch 560] time cost 51.53s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 560] test loss 4.33, test ppl 76.24
[Epoch 561 Batch 200/372] current loss 4.04, ppl 56.60, throughput 644.35 samples/s, lr 29.14
[Epoch 561] throughput 43472.10 samples/s
[Epoch 561] time cost 50.96s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 561] test loss 4.33, test ppl 76.23
[Epoch 562 Batch 200/372] current loss 4.04, ppl 56.57, throughput 638.24 samples/s, lr 32.57
[Epoch 562] throughput 42818.12 samples/s
[Epoch 562] time cost 51.63s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 562] test loss 4.33, test ppl 76.23
[Epoch 563 Batch 200/372] current loss 4.03, ppl 56.08, throughput 625.84 samples/s, lr 33.86
[Epoch 563] throughput 42407.54 samples/s
[Epoch 563] time cost 52.13s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 563] test loss 4.33, test ppl 76.23
[Epoch 564 Batch 200/372] current loss 4.04, ppl 56.72, throughput 618.89 samples/s, lr 28.71
[Epoch 564] throughput 42752.35 samples/s
[Epoch 564] time cost 51.68s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 564] test loss 4.33, test ppl 76.23
[Epoch 565 Batch 200/372] current loss 4.04, ppl 57.09, throughput 618.75 samples/s, lr 30.00
[Epoch 565] throughput 42378.87 samples/s
[Epoch 565] time cost 52.21s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 565] test loss 4.33, test ppl 76.23
[Epoch 566 Batch 200/372] current loss 4.03, ppl 56.04, throughput 640.15 samples/s, lr 30.43
[Epoch 566] throughput 43112.74 samples/s
[Epoch 566] time cost 51.41s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 566] test loss 4.33, test ppl 76.23
[Epoch 567 Batch 200/372] current loss 4.03, ppl 56.08, throughput 622.32 samples/s, lr 29.14
[Epoch 567] throughput 42469.23 samples/s
[Epoch 567] time cost 52.07s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 567] test loss 4.33, test ppl 76.23
[Epoch 568 Batch 200/372] current loss 4.03, ppl 56.41, throughput 632.05 samples/s, lr 29.57
[Epoch 568] throughput 43016.81 samples/s
[Epoch 568] time cost 51.44s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 568] test loss 4.33, test ppl 76.23
[Epoch 569 Batch 200/372] current loss 4.02, ppl 55.75, throughput 637.69 samples/s, lr 29.14
[Epoch 569] throughput 42970.67 samples/s
[Epoch 569] time cost 51.57s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 569] test loss 4.33, test ppl 76.23
[Epoch 570 Batch 200/372] current loss 4.03, ppl 56.41, throughput 628.54 samples/s, lr 33.00
[Epoch 570] throughput 43324.06 samples/s
[Epoch 570] time cost 51.18s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 570] test loss 4.33, test ppl 76.23
[Epoch 571 Batch 200/372] current loss 4.03, ppl 56.42, throughput 613.92 samples/s, lr 32.57
[Epoch 571] throughput 42450.98 samples/s
[Epoch 571] time cost 52.15s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 571] test loss 4.33, test ppl 76.23
[Epoch 572 Batch 200/372] current loss 4.02, ppl 55.55, throughput 628.11 samples/s, lr 27.43
[Epoch 572] throughput 42400.59 samples/s
[Epoch 572] time cost 52.20s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 572] test loss 4.33, test ppl 76.23
[Epoch 573 Batch 200/372] current loss 4.03, ppl 56.38, throughput 613.58 samples/s, lr 33.86
[Epoch 573] throughput 42273.64 samples/s
[Epoch 573] time cost 52.26s, valid loss 4.38, valid ppl 80.21, lr 30.00
[Epoch 573] test loss 4.33, test ppl 76.22
[Epoch 574 Batch 200/372] current loss 4.04, ppl 56.76, throughput 614.21 samples/s, lr 29.14
[Epoch 574] throughput 42546.03 samples/s
[Epoch 574] time cost 51.95s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 574] test loss 4.33, test ppl 76.22
[Epoch 575 Batch 200/372] current loss 4.03, ppl 56.47, throughput 622.06 samples/s, lr 29.14
[Epoch 575] throughput 42158.80 samples/s
[Epoch 575] time cost 52.47s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 575] test loss 4.33, test ppl 76.22
[Epoch 576 Batch 200/372] current loss 4.02, ppl 55.72, throughput 629.39 samples/s, lr 31.71
[Epoch 576] throughput 42811.87 samples/s
[Epoch 576] time cost 51.71s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 576] test loss 4.33, test ppl 76.22
[Epoch 577 Batch 200/372] current loss 4.03, ppl 56.14, throughput 628.16 samples/s, lr 27.86
[Epoch 577] throughput 42825.51 samples/s
[Epoch 577] time cost 51.60s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 577] test loss 4.33, test ppl 76.22
[Epoch 578 Batch 200/372] current loss 4.03, ppl 56.11, throughput 614.72 samples/s, lr 16.71
[Epoch 578] throughput 41864.46 samples/s
[Epoch 578] time cost 52.82s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 578] test loss 4.33, test ppl 76.22
[Epoch 579 Batch 200/372] current loss 4.03, ppl 56.09, throughput 625.22 samples/s, lr 32.14
[Epoch 579] throughput 42838.24 samples/s
[Epoch 579] time cost 51.74s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 579] test loss 4.33, test ppl 76.22
[Epoch 580 Batch 200/372] current loss 4.02, ppl 55.82, throughput 649.12 samples/s, lr 29.14
[Epoch 580] throughput 42986.50 samples/s
[Epoch 580] time cost 51.51s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 580] test loss 4.33, test ppl 76.22
[Epoch 581 Batch 200/372] current loss 4.03, ppl 56.27, throughput 610.62 samples/s, lr 27.00
[Epoch 581] throughput 41576.09 samples/s
[Epoch 581] time cost 53.14s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 581] test loss 4.33, test ppl 76.22
[Epoch 582 Batch 200/372] current loss 4.02, ppl 55.68, throughput 613.01 samples/s, lr 13.71
[Epoch 582] throughput 42120.56 samples/s
[Epoch 582] time cost 52.57s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 582] test loss 4.33, test ppl 76.22
[Epoch 583 Batch 200/372] current loss 4.02, ppl 55.88, throughput 625.85 samples/s, lr 31.29
[Epoch 583] throughput 42555.16 samples/s
[Epoch 583] time cost 51.96s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 583] test loss 4.33, test ppl 76.22
[Epoch 584 Batch 200/372] current loss 4.03, ppl 56.00, throughput 641.28 samples/s, lr 32.57
[Epoch 584] throughput 42373.51 samples/s
[Epoch 584] time cost 52.27s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 584] test loss 4.33, test ppl 76.22
[Epoch 585 Batch 200/372] current loss 4.02, ppl 55.93, throughput 610.57 samples/s, lr 27.00
[Epoch 585] throughput 41923.49 samples/s
[Epoch 585] time cost 52.69s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 585] test loss 4.33, test ppl 76.22
[Epoch 586 Batch 200/372] current loss 4.03, ppl 56.00, throughput 619.43 samples/s, lr 33.00
[Epoch 586] throughput 42276.68 samples/s
[Epoch 586] time cost 52.23s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 586] test loss 4.33, test ppl 76.22
[Epoch 587 Batch 200/372] current loss 4.03, ppl 56.39, throughput 611.00 samples/s, lr 31.71
[Epoch 587] throughput 41866.62 samples/s
[Epoch 587] time cost 52.77s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 587] test loss 4.33, test ppl 76.22
[Epoch 588 Batch 200/372] current loss 4.03, ppl 56.02, throughput 615.60 samples/s, lr 29.57
[Epoch 588] throughput 42489.42 samples/s
[Epoch 588] time cost 52.11s, valid loss 4.38, valid ppl 80.20, lr 30.00
[Epoch 588] test loss 4.33, test ppl 76.22
[Epoch 589 Batch 200/372] current loss 4.04, ppl 56.82, throughput 642.73 samples/s, lr 27.43
[Epoch 589] throughput 43033.82 samples/s
[Epoch 589] time cost 51.46s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 589] test loss 4.33, test ppl 76.22
[Epoch 590 Batch 200/372] current loss 4.02, ppl 55.53, throughput 622.47 samples/s, lr 29.57
[Epoch 590] throughput 42410.60 samples/s
[Epoch 590] time cost 52.13s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 590] test loss 4.33, test ppl 76.21
[Epoch 591 Batch 200/372] current loss 4.03, ppl 56.08, throughput 617.58 samples/s, lr 30.00
[Epoch 591] throughput 41879.62 samples/s
[Epoch 591] time cost 52.90s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 591] test loss 4.33, test ppl 76.21
[Epoch 592 Batch 200/372] current loss 4.02, ppl 55.97, throughput 623.89 samples/s, lr 31.29
[Epoch 592] throughput 42661.89 samples/s
[Epoch 592] time cost 51.87s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 592] test loss 4.33, test ppl 76.21
[Epoch 593 Batch 200/372] current loss 4.04, ppl 56.75, throughput 615.51 samples/s, lr 28.71
[Epoch 593] throughput 41921.50 samples/s
[Epoch 593] time cost 52.75s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 593] test loss 4.33, test ppl 76.21
[Epoch 594 Batch 200/372] current loss 4.01, ppl 55.20, throughput 629.41 samples/s, lr 31.71
[Epoch 594] throughput 42432.47 samples/s
[Epoch 594] time cost 52.13s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 594] test loss 4.33, test ppl 76.21
[Epoch 595 Batch 200/372] current loss 4.03, ppl 56.10, throughput 621.29 samples/s, lr 30.00
[Epoch 595] throughput 42207.05 samples/s
[Epoch 595] time cost 52.33s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 595] test loss 4.33, test ppl 76.21
[Epoch 596 Batch 200/372] current loss 4.02, ppl 55.80, throughput 627.17 samples/s, lr 30.00
[Epoch 596] throughput 42420.40 samples/s
[Epoch 596] time cost 52.15s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 596] test loss 4.33, test ppl 76.21
[Epoch 597 Batch 200/372] current loss 4.02, ppl 55.81, throughput 642.50 samples/s, lr 30.43
[Epoch 597] throughput 42588.19 samples/s
[Epoch 597] time cost 51.90s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 597] test loss 4.33, test ppl 76.21
[Epoch 598 Batch 200/372] current loss 4.02, ppl 55.55, throughput 627.98 samples/s, lr 26.57
[Epoch 598] throughput 42877.94 samples/s
[Epoch 598] time cost 51.62s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 598] test loss 4.33, test ppl 76.21
[Epoch 599 Batch 200/372] current loss 4.01, ppl 55.33, throughput 616.13 samples/s, lr 34.29
[Epoch 599] throughput 41871.56 samples/s
[Epoch 599] time cost 52.75s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 599] test loss 4.33, test ppl 76.21
[Epoch 600 Batch 200/372] current loss 4.03, ppl 56.04, throughput 636.04 samples/s, lr 31.71
[Epoch 600] throughput 42398.14 samples/s
[Epoch 600] time cost 52.13s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 600] test loss 4.33, test ppl 76.21
[Epoch 601 Batch 200/372] current loss 4.03, ppl 56.29, throughput 639.58 samples/s, lr 32.57
[Epoch 601] throughput 42929.63 samples/s
[Epoch 601] time cost 51.55s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 601] test loss 4.33, test ppl 76.21
[Epoch 602 Batch 200/372] current loss 4.03, ppl 56.49, throughput 631.93 samples/s, lr 28.71
[Epoch 602] throughput 42663.14 samples/s
[Epoch 602] time cost 51.83s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 602] test loss 4.33, test ppl 76.21
[Epoch 603 Batch 200/372] current loss 4.02, ppl 55.93, throughput 610.87 samples/s, lr 26.57
[Epoch 603] throughput 42239.44 samples/s
[Epoch 603] time cost 52.42s, valid loss 4.38, valid ppl 80.19, lr 30.00
[Epoch 603] test loss 4.33, test ppl 76.21
[Epoch 604 Batch 200/372] current loss 4.03, ppl 56.49, throughput 618.93 samples/s, lr 30.86
[Epoch 604] throughput 42271.36 samples/s
[Epoch 604] time cost 52.32s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 604] test loss 4.33, test ppl 76.21
[Epoch 605 Batch 200/372] current loss 4.01, ppl 55.41, throughput 628.23 samples/s, lr 29.57
[Epoch 605] throughput 42377.37 samples/s
[Epoch 605] time cost 52.13s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 605] test loss 4.33, test ppl 76.21
[Epoch 606 Batch 200/372] current loss 4.02, ppl 55.77, throughput 620.80 samples/s, lr 28.29
[Epoch 606] throughput 42150.81 samples/s
[Epoch 606] time cost 52.41s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 606] test loss 4.33, test ppl 76.21
[Epoch 607 Batch 200/372] current loss 4.01, ppl 54.99, throughput 625.33 samples/s, lr 30.86
[Epoch 607] throughput 42517.09 samples/s
[Epoch 607] time cost 51.96s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 607] test loss 4.33, test ppl 76.21
[Epoch 608 Batch 200/372] current loss 4.02, ppl 55.97, throughput 618.68 samples/s, lr 26.57
[Epoch 608] throughput 42932.96 samples/s
[Epoch 608] time cost 51.49s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 608] test loss 4.33, test ppl 76.20
[Epoch 609 Batch 200/372] current loss 4.01, ppl 55.24, throughput 611.20 samples/s, lr 30.86
[Epoch 609] throughput 42261.14 samples/s
[Epoch 609] time cost 52.37s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 609] test loss 4.33, test ppl 76.20
[Epoch 610 Batch 200/372] current loss 4.02, ppl 55.63, throughput 620.95 samples/s, lr 33.86
[Epoch 610] throughput 42110.23 samples/s
[Epoch 610] time cost 52.49s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 610] test loss 4.33, test ppl 76.20
[Epoch 611 Batch 200/372] current loss 4.01, ppl 55.25, throughput 636.37 samples/s, lr 27.86
[Epoch 611] throughput 42833.52 samples/s
[Epoch 611] time cost 51.70s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 611] test loss 4.33, test ppl 76.20
[Epoch 612 Batch 200/372] current loss 4.03, ppl 56.08, throughput 625.43 samples/s, lr 32.57
[Epoch 612] throughput 42392.80 samples/s
[Epoch 612] time cost 52.13s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 612] test loss 4.33, test ppl 76.20
[Epoch 613 Batch 200/372] current loss 4.02, ppl 55.79, throughput 612.87 samples/s, lr 29.14
[Epoch 613] throughput 41936.27 samples/s
[Epoch 613] time cost 52.74s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 613] test loss 4.33, test ppl 76.20
[Epoch 614 Batch 200/372] current loss 4.02, ppl 55.52, throughput 636.13 samples/s, lr 30.86
[Epoch 614] throughput 43287.64 samples/s
[Epoch 614] time cost 51.14s, valid loss 4.38, valid ppl 80.18, lr 30.00
[Epoch 614] test loss 4.33, test ppl 76.20
[Epoch 615 Batch 200/372] current loss 4.02, ppl 55.51, throughput 619.54 samples/s, lr 31.71
[Epoch 615] throughput 42845.21 samples/s
[Epoch 615] time cost 51.60s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 615] test loss 4.33, test ppl 76.20
[Epoch 616 Batch 200/372] current loss 4.03, ppl 56.15, throughput 627.77 samples/s, lr 25.29
[Epoch 616] throughput 42564.77 samples/s
[Epoch 616] time cost 51.94s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 616] test loss 4.33, test ppl 76.20
[Epoch 617 Batch 200/372] current loss 4.02, ppl 55.97, throughput 607.86 samples/s, lr 30.86
[Epoch 617] throughput 42008.15 samples/s
[Epoch 617] time cost 52.59s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 617] test loss 4.33, test ppl 76.20
[Epoch 618 Batch 200/372] current loss 4.03, ppl 56.05, throughput 618.97 samples/s, lr 30.43
[Epoch 618] throughput 42421.44 samples/s
[Epoch 618] time cost 52.12s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 618] test loss 4.33, test ppl 76.20
[Epoch 619 Batch 200/372] current loss 4.02, ppl 55.80, throughput 616.07 samples/s, lr 26.57
[Epoch 619] throughput 41575.59 samples/s
[Epoch 619] time cost 53.06s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 619] test loss 4.33, test ppl 76.20
[Epoch 620 Batch 200/372] current loss 4.01, ppl 55.34, throughput 637.68 samples/s, lr 33.43
[Epoch 620] throughput 42638.71 samples/s
[Epoch 620] time cost 51.84s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 620] test loss 4.33, test ppl 76.20
[Epoch 621 Batch 200/372] current loss 4.03, ppl 56.24, throughput 610.21 samples/s, lr 30.00
[Epoch 621] throughput 42687.49 samples/s
[Epoch 621] time cost 51.76s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 621] test loss 4.33, test ppl 76.20
[Epoch 622 Batch 200/372] current loss 4.02, ppl 55.87, throughput 598.78 samples/s, lr 27.43
[Epoch 622] throughput 40945.64 samples/s
[Epoch 622] time cost 53.93s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 622] test loss 4.33, test ppl 76.20
[Epoch 623 Batch 200/372] current loss 4.03, ppl 56.02, throughput 625.47 samples/s, lr 26.57
[Epoch 623] throughput 42967.10 samples/s
[Epoch 623] time cost 51.49s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 623] test loss 4.33, test ppl 76.20
[Epoch 624 Batch 200/372] current loss 4.01, ppl 55.29, throughput 637.63 samples/s, lr 28.71
[Epoch 624] throughput 43560.69 samples/s
[Epoch 624] time cost 50.90s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 624] test loss 4.33, test ppl 76.19
[Epoch 625 Batch 200/372] current loss 4.03, ppl 56.10, throughput 630.29 samples/s, lr 23.14
[Epoch 625] throughput 43097.29 samples/s
[Epoch 625] time cost 51.37s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 625] test loss 4.33, test ppl 76.19
[Epoch 626 Batch 200/372] current loss 4.00, ppl 54.79, throughput 629.24 samples/s, lr 27.43
[Epoch 626] throughput 42184.39 samples/s
[Epoch 626] time cost 52.35s, valid loss 4.38, valid ppl 80.17, lr 30.00
[Epoch 626] test loss 4.33, test ppl 76.19
[Epoch 627 Batch 200/372] current loss 4.02, ppl 55.52, throughput 600.78 samples/s, lr 32.14
[Epoch 627] throughput 42185.95 samples/s
[Epoch 627] time cost 52.43s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 627] test loss 4.33, test ppl 76.19
[Epoch 628 Batch 200/372] current loss 4.02, ppl 55.59, throughput 634.17 samples/s, lr 14.14
[Epoch 628] throughput 42582.66 samples/s
[Epoch 628] time cost 51.91s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 628] test loss 4.33, test ppl 76.19
[Epoch 629 Batch 200/372] current loss 4.01, ppl 55.04, throughput 616.69 samples/s, lr 29.14
[Epoch 629] throughput 42702.94 samples/s
[Epoch 629] time cost 51.85s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 629] test loss 4.33, test ppl 76.19
[Epoch 630 Batch 200/372] current loss 4.03, ppl 56.00, throughput 626.42 samples/s, lr 28.71
[Epoch 630] throughput 42814.22 samples/s
[Epoch 630] time cost 51.81s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 630] test loss 4.33, test ppl 76.19
[Epoch 631 Batch 200/372] current loss 4.02, ppl 55.77, throughput 619.45 samples/s, lr 29.14
[Epoch 631] throughput 42476.80 samples/s
[Epoch 631] time cost 52.00s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 631] test loss 4.33, test ppl 76.19
[Epoch 632 Batch 200/372] current loss 4.03, ppl 56.22, throughput 633.74 samples/s, lr 32.14
[Epoch 632] throughput 42518.23 samples/s
[Epoch 632] time cost 52.02s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 632] test loss 4.33, test ppl 76.19
[Epoch 633 Batch 200/372] current loss 4.02, ppl 55.92, throughput 626.33 samples/s, lr 25.71
[Epoch 633] throughput 41878.63 samples/s
[Epoch 633] time cost 52.79s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 633] test loss 4.33, test ppl 76.19
[Epoch 634 Batch 200/372] current loss 4.01, ppl 55.24, throughput 623.61 samples/s, lr 27.00
[Epoch 634] throughput 41449.32 samples/s
[Epoch 634] time cost 53.27s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 634] test loss 4.33, test ppl 76.19
[Epoch 635 Batch 200/372] current loss 4.01, ppl 55.21, throughput 630.53 samples/s, lr 34.29
[Epoch 635] throughput 42961.34 samples/s
[Epoch 635] time cost 51.56s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 635] test loss 4.33, test ppl 76.19
[Epoch 636 Batch 200/372] current loss 4.02, ppl 55.78, throughput 635.14 samples/s, lr 27.00
[Epoch 636] throughput 42661.91 samples/s
[Epoch 636] time cost 52.06s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 636] test loss 4.33, test ppl 76.19
[Epoch 637 Batch 200/372] current loss 4.02, ppl 55.92, throughput 638.40 samples/s, lr 30.86
[Epoch 637] throughput 42750.40 samples/s
[Epoch 637] time cost 51.71s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 637] test loss 4.33, test ppl 76.19
[Epoch 638 Batch 200/372] current loss 4.02, ppl 55.49, throughput 617.99 samples/s, lr 30.43
[Epoch 638] throughput 42785.65 samples/s
[Epoch 638] time cost 51.72s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 638] test loss 4.33, test ppl 76.19
[Epoch 639 Batch 200/372] current loss 4.02, ppl 55.48, throughput 628.19 samples/s, lr 32.57
[Epoch 639] throughput 43069.78 samples/s
[Epoch 639] time cost 51.43s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 639] test loss 4.33, test ppl 76.19
[Epoch 640 Batch 200/372] current loss 4.02, ppl 55.68, throughput 607.85 samples/s, lr 28.71
[Epoch 640] throughput 42189.77 samples/s
[Epoch 640] time cost 52.46s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 640] test loss 4.33, test ppl 76.19
[Epoch 641 Batch 200/372] current loss 4.00, ppl 54.62, throughput 641.81 samples/s, lr 30.00
[Epoch 641] throughput 43654.08 samples/s
[Epoch 641] time cost 50.68s, valid loss 4.38, valid ppl 80.16, lr 30.00
[Epoch 641] test loss 4.33, test ppl 76.19
[Epoch 642 Batch 200/372] current loss 4.01, ppl 55.30, throughput 616.08 samples/s, lr 30.43
[Epoch 642] throughput 41850.63 samples/s
[Epoch 642] time cost 52.79s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 642] test loss 4.33, test ppl 76.18
[Epoch 643 Batch 200/372] current loss 4.01, ppl 55.24, throughput 636.02 samples/s, lr 28.71
[Epoch 643] throughput 42878.05 samples/s
[Epoch 643] time cost 51.57s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 643] test loss 4.33, test ppl 76.18
[Epoch 644 Batch 200/372] current loss 4.02, ppl 55.43, throughput 610.76 samples/s, lr 26.14
[Epoch 644] throughput 41946.36 samples/s
[Epoch 644] time cost 52.72s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 644] test loss 4.33, test ppl 76.18
[Epoch 645 Batch 200/372] current loss 4.00, ppl 54.74, throughput 636.92 samples/s, lr 30.43
[Epoch 645] throughput 42374.52 samples/s
[Epoch 645] time cost 52.17s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 645] test loss 4.33, test ppl 76.18
[Epoch 646 Batch 200/372] current loss 4.01, ppl 55.00, throughput 618.82 samples/s, lr 27.00
[Epoch 646] throughput 42138.41 samples/s
[Epoch 646] time cost 52.42s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 646] test loss 4.33, test ppl 76.18
[Epoch 647 Batch 200/372] current loss 4.01, ppl 55.09, throughput 626.66 samples/s, lr 28.71
[Epoch 647] throughput 42021.57 samples/s
[Epoch 647] time cost 52.65s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 647] test loss 4.33, test ppl 76.18
[Epoch 648 Batch 200/372] current loss 4.02, ppl 55.50, throughput 635.32 samples/s, lr 30.43
[Epoch 648] throughput 42457.04 samples/s
[Epoch 648] time cost 52.12s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 648] test loss 4.33, test ppl 76.18
[Epoch 649 Batch 200/372] current loss 4.02, ppl 55.66, throughput 635.48 samples/s, lr 28.71
[Epoch 649] throughput 42686.12 samples/s
[Epoch 649] time cost 51.84s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 649] test loss 4.33, test ppl 76.18
[Epoch 650 Batch 200/372] current loss 4.01, ppl 55.05, throughput 617.23 samples/s, lr 27.86
[Epoch 650] throughput 43052.93 samples/s
[Epoch 650] time cost 51.44s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 650] test loss 4.33, test ppl 76.18
[Epoch 651 Batch 200/372] current loss 4.03, ppl 56.16, throughput 626.88 samples/s, lr 29.57
[Epoch 651] throughput 42365.99 samples/s
[Epoch 651] time cost 52.24s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 651] test loss 4.33, test ppl 76.18
[Epoch 652 Batch 200/372] current loss 4.00, ppl 54.51, throughput 625.18 samples/s, lr 26.14
[Epoch 652] throughput 42411.99 samples/s
[Epoch 652] time cost 52.14s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 652] test loss 4.33, test ppl 76.18
[Epoch 653 Batch 200/372] current loss 4.00, ppl 54.77, throughput 618.85 samples/s, lr 27.43
[Epoch 653] throughput 42307.66 samples/s
[Epoch 653] time cost 52.25s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 653] test loss 4.33, test ppl 76.18
[Epoch 654 Batch 200/372] current loss 4.00, ppl 54.77, throughput 630.05 samples/s, lr 30.43
[Epoch 654] throughput 42480.24 samples/s
[Epoch 654] time cost 52.13s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 654] test loss 4.33, test ppl 76.18
[Epoch 655 Batch 200/372] current loss 4.01, ppl 55.41, throughput 647.93 samples/s, lr 30.43
[Epoch 655] throughput 43169.58 samples/s
[Epoch 655] time cost 51.26s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 655] test loss 4.33, test ppl 76.18
[Epoch 656 Batch 200/372] current loss 4.03, ppl 56.26, throughput 626.67 samples/s, lr 27.86
[Epoch 656] throughput 42596.39 samples/s
[Epoch 656] time cost 51.97s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 656] test loss 4.33, test ppl 76.18
[Epoch 657 Batch 200/372] current loss 4.01, ppl 55.18, throughput 643.62 samples/s, lr 26.57
[Epoch 657] throughput 43259.26 samples/s
[Epoch 657] time cost 51.16s, valid loss 4.38, valid ppl 80.15, lr 30.00
[Epoch 657] test loss 4.33, test ppl 76.18
[Epoch 658 Batch 200/372] current loss 4.03, ppl 56.14, throughput 620.20 samples/s, lr 30.00
[Epoch 658] throughput 42536.00 samples/s
[Epoch 658] time cost 51.97s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 658] test loss 4.33, test ppl 76.18
[Epoch 659 Batch 200/372] current loss 4.04, ppl 56.59, throughput 631.32 samples/s, lr 28.29
[Epoch 659] throughput 42586.67 samples/s
[Epoch 659] time cost 51.96s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 659] test loss 4.33, test ppl 76.18
[Epoch 660 Batch 200/372] current loss 4.02, ppl 55.45, throughput 630.68 samples/s, lr 27.86
[Epoch 660] throughput 42029.67 samples/s
[Epoch 660] time cost 52.70s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 660] test loss 4.33, test ppl 76.17
[Epoch 661 Batch 200/372] current loss 4.01, ppl 55.10, throughput 634.17 samples/s, lr 27.86
[Epoch 661] throughput 42408.24 samples/s
[Epoch 661] time cost 52.22s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 661] test loss 4.33, test ppl 76.17
[Epoch 662 Batch 200/372] current loss 4.01, ppl 55.06, throughput 643.37 samples/s, lr 26.14
[Epoch 662] throughput 42804.99 samples/s
[Epoch 662] time cost 51.76s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 662] test loss 4.33, test ppl 76.17
[Epoch 663 Batch 200/372] current loss 4.02, ppl 55.66, throughput 622.18 samples/s, lr 36.43
[Epoch 663] throughput 42553.57 samples/s
[Epoch 663] time cost 51.97s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 663] test loss 4.33, test ppl 76.17
[Epoch 664 Batch 200/372] current loss 4.01, ppl 55.40, throughput 619.10 samples/s, lr 32.14
[Epoch 664] throughput 42327.60 samples/s
[Epoch 664] time cost 52.21s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 664] test loss 4.33, test ppl 76.17
[Epoch 665 Batch 200/372] current loss 4.00, ppl 54.71, throughput 623.44 samples/s, lr 27.86
[Epoch 665] throughput 41856.72 samples/s
[Epoch 665] time cost 52.84s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 665] test loss 4.33, test ppl 76.17
[Epoch 666 Batch 200/372] current loss 4.02, ppl 55.94, throughput 629.48 samples/s, lr 27.86
[Epoch 666] throughput 42257.78 samples/s
[Epoch 666] time cost 52.30s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 666] test loss 4.33, test ppl 76.17
[Epoch 667 Batch 200/372] current loss 4.01, ppl 54.99, throughput 626.38 samples/s, lr 28.29
[Epoch 667] throughput 42322.16 samples/s
[Epoch 667] time cost 52.21s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 667] test loss 4.33, test ppl 76.17
[Epoch 668 Batch 200/372] current loss 4.01, ppl 54.90, throughput 630.39 samples/s, lr 33.43
[Epoch 668] throughput 43051.13 samples/s
[Epoch 668] time cost 51.37s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 668] test loss 4.33, test ppl 76.17
[Epoch 669 Batch 200/372] current loss 4.00, ppl 54.43, throughput 633.68 samples/s, lr 16.29
[Epoch 669] throughput 42680.37 samples/s
[Epoch 669] time cost 51.92s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 669] test loss 4.33, test ppl 76.17
[Epoch 670 Batch 200/372] current loss 4.00, ppl 54.82, throughput 636.77 samples/s, lr 32.14
[Epoch 670] throughput 42913.19 samples/s
[Epoch 670] time cost 51.53s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 670] test loss 4.33, test ppl 76.17
[Epoch 671 Batch 200/372] current loss 4.01, ppl 54.89, throughput 612.11 samples/s, lr 12.86
[Epoch 671] throughput 41877.50 samples/s
[Epoch 671] time cost 52.81s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 671] test loss 4.33, test ppl 76.17
[Epoch 672 Batch 200/372] current loss 4.00, ppl 54.78, throughput 617.47 samples/s, lr 32.14
[Epoch 672] throughput 42115.57 samples/s
[Epoch 672] time cost 52.52s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 672] test loss 4.33, test ppl 76.17
[Epoch 673 Batch 200/372] current loss 4.00, ppl 54.64, throughput 625.20 samples/s, lr 30.00
[Epoch 673] throughput 42673.86 samples/s
[Epoch 673] time cost 51.86s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 673] test loss 4.33, test ppl 76.17
[Epoch 674 Batch 200/372] current loss 4.00, ppl 54.71, throughput 636.37 samples/s, lr 30.00
[Epoch 674] throughput 42653.88 samples/s
[Epoch 674] time cost 51.90s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 674] test loss 4.33, test ppl 76.17
[Epoch 675 Batch 200/372] current loss 4.01, ppl 55.37, throughput 615.38 samples/s, lr 30.00
[Epoch 675] throughput 41961.06 samples/s
[Epoch 675] time cost 52.70s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 675] test loss 4.33, test ppl 76.17
[Epoch 676 Batch 200/372] current loss 4.02, ppl 55.48, throughput 640.96 samples/s, lr 31.71
[Epoch 676] throughput 42508.49 samples/s
[Epoch 676] time cost 52.06s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 676] test loss 4.33, test ppl 76.17
[Epoch 677 Batch 200/372] current loss 4.00, ppl 54.83, throughput 624.70 samples/s, lr 33.43
[Epoch 677] throughput 42117.58 samples/s
[Epoch 677] time cost 52.45s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 677] test loss 4.33, test ppl 76.17
[Epoch 678 Batch 200/372] current loss 4.02, ppl 55.47, throughput 626.69 samples/s, lr 25.71
[Epoch 678] throughput 42170.18 samples/s
[Epoch 678] time cost 52.39s, valid loss 4.38, valid ppl 80.14, lr 30.00
[Epoch 678] test loss 4.33, test ppl 76.17
[Epoch 679 Batch 200/372] current loss 4.00, ppl 54.76, throughput 631.98 samples/s, lr 30.00
[Epoch 679] throughput 42875.45 samples/s
[Epoch 679] time cost 51.67s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 679] test loss 4.33, test ppl 76.16
[Epoch 680 Batch 200/372] current loss 4.01, ppl 55.19, throughput 603.38 samples/s, lr 30.86
[Epoch 680] throughput 42249.96 samples/s
[Epoch 680] time cost 52.34s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 680] test loss 4.33, test ppl 76.16
[Epoch 681 Batch 200/372] current loss 4.01, ppl 55.40, throughput 620.09 samples/s, lr 27.43
[Epoch 681] throughput 42001.85 samples/s
[Epoch 681] time cost 52.65s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 681] test loss 4.33, test ppl 76.16
[Epoch 682 Batch 200/372] current loss 4.01, ppl 55.12, throughput 649.83 samples/s, lr 33.00
[Epoch 682] throughput 43236.96 samples/s
[Epoch 682] time cost 51.14s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 682] test loss 4.33, test ppl 76.16
[Epoch 683 Batch 200/372] current loss 4.00, ppl 54.82, throughput 632.57 samples/s, lr 25.71
[Epoch 683] throughput 42882.94 samples/s
[Epoch 683] time cost 51.59s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 683] test loss 4.33, test ppl 76.16
[Epoch 684 Batch 200/372] current loss 4.00, ppl 54.50, throughput 614.80 samples/s, lr 28.71
[Epoch 684] throughput 42389.41 samples/s
[Epoch 684] time cost 52.12s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 684] test loss 4.33, test ppl 76.16
[Epoch 685 Batch 200/372] current loss 4.01, ppl 55.24, throughput 633.21 samples/s, lr 26.14
[Epoch 685] throughput 43105.00 samples/s
[Epoch 685] time cost 51.38s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 685] test loss 4.33, test ppl 76.16
[Epoch 686 Batch 200/372] current loss 4.01, ppl 54.96, throughput 630.67 samples/s, lr 24.86
[Epoch 686] throughput 42349.13 samples/s
[Epoch 686] time cost 52.24s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 686] test loss 4.33, test ppl 76.16
[Epoch 687 Batch 200/372] current loss 4.01, ppl 55.34, throughput 623.86 samples/s, lr 29.14
[Epoch 687] throughput 42086.53 samples/s
[Epoch 687] time cost 52.51s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 687] test loss 4.33, test ppl 76.16
[Epoch 688 Batch 200/372] current loss 4.01, ppl 54.95, throughput 609.15 samples/s, lr 29.14
[Epoch 688] throughput 42556.77 samples/s
[Epoch 688] time cost 51.92s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 688] test loss 4.33, test ppl 76.16
[Epoch 689 Batch 200/372] current loss 4.01, ppl 55.40, throughput 619.29 samples/s, lr 28.29
[Epoch 689] throughput 41549.72 samples/s
[Epoch 689] time cost 53.18s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 689] test loss 4.33, test ppl 76.16
[Epoch 690 Batch 200/372] current loss 4.00, ppl 54.33, throughput 624.67 samples/s, lr 30.43
[Epoch 690] throughput 42418.30 samples/s
[Epoch 690] time cost 52.09s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 690] test loss 4.33, test ppl 76.16
[Epoch 691 Batch 200/372] current loss 4.01, ppl 55.06, throughput 625.88 samples/s, lr 34.29
[Epoch 691] throughput 42201.57 samples/s
[Epoch 691] time cost 52.43s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 691] test loss 4.33, test ppl 76.16
[Epoch 692 Batch 200/372] current loss 4.01, ppl 55.23, throughput 630.59 samples/s, lr 27.00
[Epoch 692] throughput 43150.04 samples/s
[Epoch 692] time cost 51.24s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 692] test loss 4.33, test ppl 76.16
[Epoch 693 Batch 200/372] current loss 4.00, ppl 54.64, throughput 635.93 samples/s, lr 14.57
[Epoch 693] throughput 42538.60 samples/s
[Epoch 693] time cost 51.99s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 693] test loss 4.33, test ppl 76.16
[Epoch 694 Batch 200/372] current loss 4.00, ppl 54.82, throughput 619.76 samples/s, lr 29.14
[Epoch 694] throughput 42050.04 samples/s
[Epoch 694] time cost 52.53s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 694] test loss 4.33, test ppl 76.16
[Epoch 695 Batch 200/372] current loss 4.01, ppl 55.26, throughput 636.35 samples/s, lr 31.29
[Epoch 695] throughput 42847.54 samples/s
[Epoch 695] time cost 51.60s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 695] test loss 4.33, test ppl 76.16
[Epoch 696 Batch 200/372] current loss 4.01, ppl 55.27, throughput 607.56 samples/s, lr 30.86
[Epoch 696] throughput 42496.93 samples/s
[Epoch 696] time cost 52.02s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 697 Batch 200/372] current loss 4.00, ppl 54.85, throughput 623.34 samples/s, lr 25.71
[Epoch 697] throughput 42284.82 samples/s
[Epoch 697] time cost 52.26s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 698 Batch 200/372] current loss 4.01, ppl 55.05, throughput 604.78 samples/s, lr 28.71
[Epoch 698] throughput 41773.25 samples/s
[Epoch 698] time cost 52.89s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 698] test loss 4.33, test ppl 76.16
[Epoch 699 Batch 200/372] current loss 4.01, ppl 55.26, throughput 614.52 samples/s, lr 28.71
[Epoch 699] throughput 42115.84 samples/s
[Epoch 699] time cost 52.54s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 700 Batch 200/372] current loss 4.01, ppl 55.18, throughput 615.84 samples/s, lr 31.71
[Epoch 700] throughput 42226.12 samples/s
[Epoch 700] time cost 52.39s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 701 Batch 200/372] current loss 4.01, ppl 54.90, throughput 632.83 samples/s, lr 29.57
[Epoch 701] throughput 42187.36 samples/s
[Epoch 701] time cost 52.44s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 702 Batch 200/372] current loss 4.00, ppl 54.85, throughput 609.18 samples/s, lr 28.29
[Epoch 702] throughput 43251.91 samples/s
[Epoch 702] time cost 51.23s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 702] test loss 4.33, test ppl 76.16
[Epoch 703 Batch 200/372] current loss 4.00, ppl 54.81, throughput 635.22 samples/s, lr 33.00
[Epoch 703] throughput 42477.91 samples/s
[Epoch 703] time cost 52.06s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 703] test loss 4.33, test ppl 76.15
[Epoch 704 Batch 200/372] current loss 4.02, ppl 55.63, throughput 625.93 samples/s, lr 31.71
[Epoch 704] throughput 42525.82 samples/s
[Epoch 704] time cost 51.97s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 704] test loss 4.33, test ppl 76.15
[Epoch 705 Batch 200/372] current loss 4.02, ppl 55.94, throughput 613.06 samples/s, lr 30.86
[Epoch 705] throughput 42264.90 samples/s
[Epoch 705] time cost 52.30s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 705] test loss 4.33, test ppl 76.15
[Epoch 706 Batch 200/372] current loss 4.01, ppl 55.17, throughput 626.88 samples/s, lr 32.14
[Epoch 706] throughput 42897.39 samples/s
[Epoch 706] time cost 51.62s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 706] test loss 4.33, test ppl 76.15
[Epoch 707 Batch 200/372] current loss 4.00, ppl 54.44, throughput 633.89 samples/s, lr 29.14
[Epoch 707] throughput 42240.02 samples/s
[Epoch 707] time cost 52.33s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 707] test loss 4.33, test ppl 76.15
[Epoch 708 Batch 200/372] current loss 4.00, ppl 54.62, throughput 623.96 samples/s, lr 30.00
[Epoch 708] throughput 42888.25 samples/s
[Epoch 708] time cost 51.55s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 708] test loss 4.33, test ppl 76.15
[Epoch 709 Batch 200/372] current loss 4.00, ppl 54.56, throughput 615.01 samples/s, lr 27.43
[Epoch 709] throughput 42079.48 samples/s
[Epoch 709] time cost 52.51s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 709] test loss 4.33, test ppl 76.15
[Epoch 710 Batch 200/372] current loss 4.01, ppl 54.97, throughput 624.80 samples/s, lr 28.29
[Epoch 710] throughput 42408.23 samples/s
[Epoch 710] time cost 52.11s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 711 Batch 200/372] current loss 4.00, ppl 54.41, throughput 626.83 samples/s, lr 30.43
[Epoch 711] throughput 42185.61 samples/s
[Epoch 711] time cost 52.41s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 711] test loss 4.33, test ppl 76.15
[Epoch 712 Batch 200/372] current loss 4.01, ppl 54.96, throughput 636.63 samples/s, lr 27.86
[Epoch 712] throughput 42924.08 samples/s
[Epoch 712] time cost 51.62s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 712] test loss 4.33, test ppl 76.15
[Epoch 713 Batch 200/372] current loss 4.00, ppl 54.82, throughput 640.78 samples/s, lr 30.00
[Epoch 713] throughput 42969.62 samples/s
[Epoch 713] time cost 51.47s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 713] test loss 4.33, test ppl 76.15
[Epoch 714 Batch 200/372] current loss 4.00, ppl 54.49, throughput 620.00 samples/s, lr 30.00
[Epoch 714] throughput 42136.08 samples/s
[Epoch 714] time cost 52.52s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 714] test loss 4.33, test ppl 76.15
[Epoch 715 Batch 200/372] current loss 4.01, ppl 55.10, throughput 623.84 samples/s, lr 30.86
[Epoch 715] throughput 42674.10 samples/s
[Epoch 715] time cost 51.88s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 715] test loss 4.33, test ppl 76.15
[Epoch 716 Batch 200/372] current loss 3.99, ppl 54.23, throughput 620.29 samples/s, lr 33.00
[Epoch 716] throughput 42729.50 samples/s
[Epoch 716] time cost 51.72s, valid loss 4.38, valid ppl 80.13, lr 30.00
[Epoch 716] test loss 4.33, test ppl 76.15
[Epoch 717 Batch 200/372] current loss 4.01, ppl 55.01, throughput 639.99 samples/s, lr 31.71
[Epoch 717] throughput 43020.68 samples/s
[Epoch 717] time cost 51.42s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 717] test loss 4.33, test ppl 76.15
[Epoch 718 Batch 200/372] current loss 4.01, ppl 55.21, throughput 630.19 samples/s, lr 30.86
[Epoch 718] throughput 42307.24 samples/s
[Epoch 718] time cost 52.32s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 718] test loss 4.33, test ppl 76.15
[Epoch 719 Batch 200/372] current loss 4.00, ppl 54.52, throughput 618.34 samples/s, lr 30.86
[Epoch 719] throughput 42100.34 samples/s
[Epoch 719] time cost 52.57s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 719] test loss 4.33, test ppl 76.15
[Epoch 720 Batch 200/372] current loss 4.00, ppl 54.52, throughput 652.74 samples/s, lr 29.14
[Epoch 720] throughput 42704.86 samples/s
[Epoch 720] time cost 51.84s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 720] test loss 4.33, test ppl 76.15
[Epoch 721 Batch 200/372] current loss 4.00, ppl 54.64, throughput 636.21 samples/s, lr 27.43
[Epoch 721] throughput 42870.26 samples/s
[Epoch 721] time cost 51.60s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 721] test loss 4.33, test ppl 76.15
[Epoch 722 Batch 200/372] current loss 4.01, ppl 55.22, throughput 616.00 samples/s, lr 32.14
[Epoch 722] throughput 42670.67 samples/s
[Epoch 722] time cost 51.86s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 722] test loss 4.33, test ppl 76.15
[Epoch 723 Batch 200/372] current loss 4.01, ppl 54.91, throughput 630.18 samples/s, lr 27.00
[Epoch 723] throughput 43189.09 samples/s
[Epoch 723] time cost 51.32s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 723] test loss 4.33, test ppl 76.15
[Epoch 724 Batch 200/372] current loss 4.02, ppl 55.54, throughput 634.43 samples/s, lr 29.57
[Epoch 724] throughput 42408.66 samples/s
[Epoch 724] time cost 52.13s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 724] test loss 4.33, test ppl 76.15
[Epoch 725 Batch 200/372] current loss 4.01, ppl 54.91, throughput 637.67 samples/s, lr 29.14
[Epoch 725] throughput 43059.26 samples/s
[Epoch 725] time cost 51.44s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 725] test loss 4.33, test ppl 76.15
[Epoch 726 Batch 200/372] current loss 4.00, ppl 54.76, throughput 625.06 samples/s, lr 30.86
[Epoch 726] throughput 42702.29 samples/s
[Epoch 726] time cost 51.89s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 726] test loss 4.33, test ppl 76.15
[Epoch 727 Batch 200/372] current loss 4.00, ppl 54.48, throughput 634.06 samples/s, lr 27.86
[Epoch 727] throughput 42361.46 samples/s
[Epoch 727] time cost 52.19s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 727] test loss 4.33, test ppl 76.14
[Epoch 728 Batch 200/372] current loss 4.00, ppl 54.84, throughput 636.47 samples/s, lr 27.43
[Epoch 728] throughput 43642.08 samples/s
[Epoch 728] time cost 50.82s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 728] test loss 4.33, test ppl 76.14
[Epoch 729 Batch 200/372] current loss 4.01, ppl 55.03, throughput 623.72 samples/s, lr 26.57
[Epoch 729] throughput 42895.67 samples/s
[Epoch 729] time cost 51.69s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 729] test loss 4.33, test ppl 76.14
[Epoch 730 Batch 200/372] current loss 4.00, ppl 54.68, throughput 607.25 samples/s, lr 29.57
[Epoch 730] throughput 42232.80 samples/s
[Epoch 730] time cost 52.32s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 730] test loss 4.33, test ppl 76.14
[Epoch 731 Batch 200/372] current loss 4.00, ppl 54.36, throughput 634.90 samples/s, lr 28.71
[Epoch 731] throughput 43229.51 samples/s
[Epoch 731] time cost 51.19s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 731] test loss 4.33, test ppl 76.14
[Epoch 732 Batch 200/372] current loss 4.00, ppl 54.74, throughput 622.48 samples/s, lr 34.71
[Epoch 732] throughput 42606.84 samples/s
[Epoch 732] time cost 51.86s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 732] test loss 4.33, test ppl 76.14
[Epoch 733 Batch 200/372] current loss 3.99, ppl 54.03, throughput 628.31 samples/s, lr 30.43
[Epoch 733] throughput 42945.69 samples/s
[Epoch 733] time cost 51.46s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 733] test loss 4.33, test ppl 76.14
[Epoch 734 Batch 200/372] current loss 4.00, ppl 54.58, throughput 622.46 samples/s, lr 31.71
[Epoch 734] throughput 41550.07 samples/s
[Epoch 734] time cost 53.10s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 734] test loss 4.33, test ppl 76.14
[Epoch 735 Batch 200/372] current loss 4.00, ppl 54.85, throughput 623.19 samples/s, lr 28.29
[Epoch 735] throughput 42104.76 samples/s
[Epoch 735] time cost 52.48s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 735] test loss 4.33, test ppl 76.14
[Epoch 736 Batch 200/372] current loss 4.01, ppl 55.36, throughput 639.86 samples/s, lr 30.00
[Epoch 736] throughput 43133.96 samples/s
[Epoch 736] time cost 51.35s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 736] test loss 4.33, test ppl 76.14
[Epoch 737 Batch 200/372] current loss 3.99, ppl 53.99, throughput 616.65 samples/s, lr 31.71
[Epoch 737] throughput 42420.64 samples/s
[Epoch 737] time cost 52.21s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 737] test loss 4.33, test ppl 76.14
[Epoch 738 Batch 200/372] current loss 4.00, ppl 54.68, throughput 600.49 samples/s, lr 12.43
[Epoch 738] throughput 41326.32 samples/s
[Epoch 738] time cost 53.38s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 738] test loss 4.33, test ppl 76.14
[Epoch 739 Batch 200/372] current loss 4.00, ppl 54.47, throughput 639.36 samples/s, lr 29.57
[Epoch 739] throughput 43129.64 samples/s
[Epoch 739] time cost 51.39s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 739] test loss 4.33, test ppl 76.14
[Epoch 740 Batch 200/372] current loss 4.00, ppl 54.37, throughput 636.05 samples/s, lr 32.57
[Epoch 740] throughput 43158.46 samples/s
[Epoch 740] time cost 51.37s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 740] test loss 4.33, test ppl 76.14
[Epoch 741 Batch 200/372] current loss 4.00, ppl 54.36, throughput 634.32 samples/s, lr 31.71
[Epoch 741] throughput 42798.65 samples/s
[Epoch 741] time cost 51.65s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 741] test loss 4.33, test ppl 76.14
[Epoch 742 Batch 200/372] current loss 4.00, ppl 54.85, throughput 620.42 samples/s, lr 30.86
[Epoch 742] throughput 42486.10 samples/s
[Epoch 742] time cost 52.02s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 742] test loss 4.33, test ppl 76.14
[Epoch 743 Batch 200/372] current loss 4.02, ppl 55.49, throughput 609.36 samples/s, lr 32.14
[Epoch 743] throughput 42179.71 samples/s
[Epoch 743] time cost 52.59s, valid loss 4.38, valid ppl 80.12, lr 30.00
[Epoch 743] test loss 4.33, test ppl 76.14
[Epoch 744 Batch 200/372] current loss 4.01, ppl 54.89, throughput 644.94 samples/s, lr 26.14
[Epoch 744] throughput 42647.80 samples/s
[Epoch 744] time cost 51.80s, valid loss 4.38, valid ppl 80.11, lr 30.00
[Epoch 744] test loss 4.33, test ppl 76.14
[Epoch 745 Batch 200/372] current loss 3.99, ppl 54.22, throughput 623.68 samples/s, lr 27.00
[Epoch 745] throughput 42557.32 samples/s
[Epoch 745] time cost 51.96s, valid loss 4.38, valid ppl 80.11, lr 30.00
[Epoch 745] test loss 4.33, test ppl 76.14
[Epoch 746 Batch 200/372] current loss 4.00, ppl 54.64, throughput 634.66 samples/s, lr 28.71
[Epoch 746] throughput 42864.67 samples/s
[Epoch 746] time cost 51.63s, valid loss 4.38, valid ppl 80.11, lr 30.00
[Epoch 746] test loss 4.33, test ppl 76.14
[Epoch 747 Batch 200/372] current loss 3.99, ppl 54.05, throughput 623.49 samples/s, lr 31.71
[Epoch 747] throughput 42837.28 samples/s
[Epoch 747] time cost 51.71s, valid loss 4.38, valid ppl 80.11, lr 30.00
[Epoch 747] test loss 4.33, test ppl 76.14
[Epoch 748 Batch 200/372] current loss 4.00, ppl 54.61, throughput 619.93 samples/s, lr 26.14
[Epoch 748] throughput 42169.73 samples/s
[Epoch 748] time cost 52.53s, valid loss 4.38, valid ppl 80.11, lr 30.00
[Epoch 748] test loss 4.33, test ppl 76.14
[Epoch 749 Batch 200/372] current loss 4.00, ppl 54.79, throughput 630.61 samples/s, lr 13.29
[Epoch 749] throughput 42248.63 samples/s
[Epoch 749] time cost 52.29s, valid loss 4.38, valid ppl 80.11, lr 30.00
[Epoch 749] test loss 4.33, test ppl 76.14
Total training throughput 28327.69 samples/s
Best validation loss 4.38, val ppl 80.11
Best test loss 4.33, test ppl 76.14
Total time cost 55321.39s