/
SST-2_multichannel.log
3231 lines (3231 loc) · 210 KB
/
SST-2_multichannel.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Namespace(batch_size=50, data_name='SST-2', dropout=0.5, epochs=40, gpu=0, log_interval=30, lr=0.0001, model_mode='multichannel', save_prefix='sa-model')
Use gpu0
1614
53
Done! Tokenizing Time=4.41s, #Sentences=118038
Done! Tokenizing Time=0.80s, #Sentences=1745
SentimentNet(
(embedding): Embedding(17814 -> 300, float32)
(embedding_extend): Embedding(17814 -> 300, float32)
(encoder): ConvolutionalEncoder(
(_convs): HybridConcurrent(
(0): HybridSequential(
(0): Conv1D(600 -> 100, kernel_size=(3,), stride=(1,))
(1): Activation(relu)
(2): HybridLambda(<lambda>)
)
(1): HybridSequential(
(0): Conv1D(600 -> 100, kernel_size=(4,), stride=(1,))
(1): Activation(relu)
(2): HybridLambda(<lambda>)
)
(2): HybridSequential(
(0): Conv1D(600 -> 100, kernel_size=(5,), stride=(1,))
(1): Activation(relu)
(2): HybridLambda(<lambda>)
)
)
)
(output): HybridSequential(
(0): Dropout(p = 0.5, axes=())
(1): Dense(None -> 2, linear)
)
)
[Epoch 0 Batch 30/2125] avg loss 0.0147516, throughput 2.56407K wps
[Epoch 0 Batch 60/2125] avg loss 0.0142936, throughput 4.02322K wps
[Epoch 0 Batch 90/2125] avg loss 0.0138636, throughput 4.02236K wps
[Epoch 0 Batch 120/2125] avg loss 0.0135436, throughput 4.01478K wps
[Epoch 0 Batch 150/2125] avg loss 0.0134229, throughput 4.01785K wps
[Epoch 0 Batch 180/2125] avg loss 0.0130767, throughput 4.01938K wps
[Epoch 0 Batch 210/2125] avg loss 0.0132462, throughput 4.01397K wps
[Epoch 0 Batch 240/2125] avg loss 0.012815, throughput 4.01308K wps
[Epoch 0 Batch 270/2125] avg loss 0.0123686, throughput 4.01757K wps
[Epoch 0 Batch 300/2125] avg loss 0.0124178, throughput 4.01749K wps
[Epoch 0 Batch 330/2125] avg loss 0.0121333, throughput 4.01016K wps
[Epoch 0 Batch 360/2125] avg loss 0.0115257, throughput 4.00946K wps
[Epoch 0 Batch 390/2125] avg loss 0.0113919, throughput 4.01288K wps
[Epoch 0 Batch 420/2125] avg loss 0.0111251, throughput 4.01304K wps
[Epoch 0 Batch 450/2125] avg loss 0.0112771, throughput 4.01867K wps
[Epoch 0 Batch 480/2125] avg loss 0.0108728, throughput 4.01583K wps
[Epoch 0 Batch 510/2125] avg loss 0.010625, throughput 4.01621K wps
[Epoch 0 Batch 540/2125] avg loss 0.0100855, throughput 4.01905K wps
[Epoch 0 Batch 570/2125] avg loss 0.0101509, throughput 4.01538K wps
[Epoch 0 Batch 600/2125] avg loss 0.00997682, throughput 4.01337K wps
[Epoch 0 Batch 630/2125] avg loss 0.00949247, throughput 4.01543K wps
[Epoch 0 Batch 660/2125] avg loss 0.00951337, throughput 4.01165K wps
[Epoch 0 Batch 690/2125] avg loss 0.00911888, throughput 4.01354K wps
[Epoch 0 Batch 720/2125] avg loss 0.00937186, throughput 4.01438K wps
[Epoch 0 Batch 750/2125] avg loss 0.00887978, throughput 3.99346K wps
[Epoch 0 Batch 780/2125] avg loss 0.0086955, throughput 3.99133K wps
[Epoch 0 Batch 810/2125] avg loss 0.00847885, throughput 4.01173K wps
[Epoch 0 Batch 840/2125] avg loss 0.00817178, throughput 4.00983K wps
[Epoch 0 Batch 870/2125] avg loss 0.00775457, throughput 4.00867K wps
[Epoch 0 Batch 900/2125] avg loss 0.00780742, throughput 4.00721K wps
[Epoch 0 Batch 930/2125] avg loss 0.00779968, throughput 4.01075K wps
[Epoch 0 Batch 960/2125] avg loss 0.00801744, throughput 4.00696K wps
[Epoch 0 Batch 990/2125] avg loss 0.00739751, throughput 4.00404K wps
[Epoch 0 Batch 1020/2125] avg loss 0.00721081, throughput 4.01038K wps
[Epoch 0 Batch 1050/2125] avg loss 0.0075239, throughput 4.00852K wps
[Epoch 0 Batch 1080/2125] avg loss 0.00713659, throughput 4.00842K wps
[Epoch 0 Batch 1110/2125] avg loss 0.00718793, throughput 4.00881K wps
[Epoch 0 Batch 1140/2125] avg loss 0.00707068, throughput 4.00812K wps
[Epoch 0 Batch 1170/2125] avg loss 0.00702073, throughput 4.00948K wps
[Epoch 0 Batch 1200/2125] avg loss 0.00687074, throughput 4.00639K wps
[Epoch 0 Batch 1230/2125] avg loss 0.0066359, throughput 4.00242K wps
[Epoch 0 Batch 1260/2125] avg loss 0.00681151, throughput 4.0071K wps
[Epoch 0 Batch 1290/2125] avg loss 0.00656432, throughput 4.00106K wps
[Epoch 0 Batch 1320/2125] avg loss 0.00684996, throughput 4.00742K wps
[Epoch 0 Batch 1350/2125] avg loss 0.00622697, throughput 4.00299K wps
[Epoch 0 Batch 1380/2125] avg loss 0.00652663, throughput 4.00453K wps
[Epoch 0 Batch 1410/2125] avg loss 0.00646962, throughput 4.00456K wps
[Epoch 0 Batch 1440/2125] avg loss 0.00648735, throughput 4.00695K wps
[Epoch 0 Batch 1470/2125] avg loss 0.00661196, throughput 4.0091K wps
[Epoch 0 Batch 1500/2125] avg loss 0.00634111, throughput 4.00373K wps
[Epoch 0 Batch 1530/2125] avg loss 0.00648579, throughput 4.00131K wps
[Epoch 0 Batch 1560/2125] avg loss 0.00671795, throughput 4.00447K wps
[Epoch 0 Batch 1590/2125] avg loss 0.00577287, throughput 4.00277K wps
[Epoch 0 Batch 1620/2125] avg loss 0.00629674, throughput 4.00556K wps
[Epoch 0 Batch 1650/2125] avg loss 0.00624858, throughput 4.0016K wps
[Epoch 0 Batch 1680/2125] avg loss 0.00627834, throughput 4.00326K wps
[Epoch 0 Batch 1710/2125] avg loss 0.00608684, throughput 4.00241K wps
[Epoch 0 Batch 1740/2125] avg loss 0.00591085, throughput 4.00248K wps
[Epoch 0 Batch 1770/2125] avg loss 0.00591208, throughput 4.00495K wps
[Epoch 0 Batch 1800/2125] avg loss 0.00619698, throughput 4.00061K wps
[Epoch 0 Batch 1830/2125] avg loss 0.0060902, throughput 4.00252K wps
[Epoch 0 Batch 1860/2125] avg loss 0.00596406, throughput 4.00577K wps
[Epoch 0 Batch 1890/2125] avg loss 0.00586057, throughput 3.99572K wps
[Epoch 0 Batch 1920/2125] avg loss 0.0055443, throughput 4.00382K wps
[Epoch 0 Batch 1950/2125] avg loss 0.00571695, throughput 4.00193K wps
[Epoch 0 Batch 1980/2125] avg loss 0.00622543, throughput 4.00254K wps
[Epoch 0 Batch 2010/2125] avg loss 0.00572866, throughput 3.99695K wps
[Epoch 0 Batch 2040/2125] avg loss 0.00574906, throughput 4.0022K wps
[Epoch 0 Batch 2070/2125] avg loss 0.00520557, throughput 3.99985K wps
[Epoch 0 Batch 2100/2125] avg loss 0.00583206, throughput 3.99851K wps
Begin Testing...
[Batch 30/237] elapsed 0.46 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 0] train avg loss 0.00838684, test acc 0.8910, test avg loss 0.276346, throughput 3.95853K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.43 s
[Epoch 1 Batch 30/2125] avg loss 0.00563384, throughput 4.09238K wps
[Epoch 1 Batch 60/2125] avg loss 0.00520742, throughput 3.99956K wps
[Epoch 1 Batch 90/2125] avg loss 0.00521086, throughput 3.99952K wps
[Epoch 1 Batch 120/2125] avg loss 0.00537803, throughput 4.0037K wps
[Epoch 1 Batch 150/2125] avg loss 0.00526046, throughput 3.99612K wps
[Epoch 1 Batch 180/2125] avg loss 0.00488797, throughput 4.00344K wps
[Epoch 1 Batch 210/2125] avg loss 0.0052571, throughput 4.00291K wps
[Epoch 1 Batch 240/2125] avg loss 0.00512038, throughput 3.99767K wps
[Epoch 1 Batch 270/2125] avg loss 0.00538388, throughput 4.00111K wps
[Epoch 1 Batch 300/2125] avg loss 0.00493089, throughput 3.99568K wps
[Epoch 1 Batch 330/2125] avg loss 0.00512095, throughput 3.99584K wps
[Epoch 1 Batch 360/2125] avg loss 0.00463241, throughput 3.99511K wps
[Epoch 1 Batch 390/2125] avg loss 0.00514738, throughput 3.99734K wps
[Epoch 1 Batch 420/2125] avg loss 0.00464598, throughput 3.99816K wps
[Epoch 1 Batch 450/2125] avg loss 0.00525882, throughput 4.00017K wps
[Epoch 1 Batch 480/2125] avg loss 0.00503221, throughput 4.00129K wps
[Epoch 1 Batch 510/2125] avg loss 0.00471628, throughput 4.00111K wps
[Epoch 1 Batch 540/2125] avg loss 0.0048286, throughput 3.99672K wps
[Epoch 1 Batch 570/2125] avg loss 0.00518976, throughput 3.9972K wps
[Epoch 1 Batch 600/2125] avg loss 0.00482811, throughput 3.99797K wps
[Epoch 1 Batch 630/2125] avg loss 0.00523104, throughput 3.99996K wps
[Epoch 1 Batch 660/2125] avg loss 0.00526784, throughput 3.99531K wps
[Epoch 1 Batch 690/2125] avg loss 0.0052043, throughput 4.00332K wps
[Epoch 1 Batch 720/2125] avg loss 0.00445159, throughput 4.00849K wps
[Epoch 1 Batch 750/2125] avg loss 0.00489004, throughput 4.00589K wps
[Epoch 1 Batch 780/2125] avg loss 0.0050508, throughput 3.9984K wps
[Epoch 1 Batch 810/2125] avg loss 0.00532554, throughput 4.00045K wps
[Epoch 1 Batch 840/2125] avg loss 0.00457348, throughput 3.99803K wps
[Epoch 1 Batch 870/2125] avg loss 0.00447419, throughput 3.99703K wps
[Epoch 1 Batch 900/2125] avg loss 0.00454232, throughput 3.99857K wps
[Epoch 1 Batch 930/2125] avg loss 0.0045241, throughput 3.99841K wps
[Epoch 1 Batch 960/2125] avg loss 0.00446654, throughput 3.99464K wps
[Epoch 1 Batch 990/2125] avg loss 0.00481538, throughput 3.99886K wps
[Epoch 1 Batch 1020/2125] avg loss 0.00493456, throughput 4.00295K wps
[Epoch 1 Batch 1050/2125] avg loss 0.00483817, throughput 3.99934K wps
[Epoch 1 Batch 1080/2125] avg loss 0.00496186, throughput 3.99858K wps
[Epoch 1 Batch 1110/2125] avg loss 0.00481145, throughput 3.9978K wps
[Epoch 1 Batch 1140/2125] avg loss 0.00481299, throughput 3.9983K wps
[Epoch 1 Batch 1170/2125] avg loss 0.00478651, throughput 3.999K wps
[Epoch 1 Batch 1200/2125] avg loss 0.00484942, throughput 3.99996K wps
[Epoch 1 Batch 1230/2125] avg loss 0.00425814, throughput 3.99893K wps
[Epoch 1 Batch 1260/2125] avg loss 0.00429702, throughput 3.99778K wps
[Epoch 1 Batch 1290/2125] avg loss 0.00455624, throughput 3.99912K wps
[Epoch 1 Batch 1320/2125] avg loss 0.00477553, throughput 4.0015K wps
[Epoch 1 Batch 1350/2125] avg loss 0.00486682, throughput 4.00088K wps
[Epoch 1 Batch 1380/2125] avg loss 0.00457054, throughput 4.00098K wps
[Epoch 1 Batch 1410/2125] avg loss 0.00447109, throughput 3.99609K wps
[Epoch 1 Batch 1440/2125] avg loss 0.00483, throughput 3.99917K wps
[Epoch 1 Batch 1470/2125] avg loss 0.00463008, throughput 3.99754K wps
[Epoch 1 Batch 1500/2125] avg loss 0.00478489, throughput 3.99667K wps
[Epoch 1 Batch 1530/2125] avg loss 0.00442297, throughput 3.99891K wps
[Epoch 1 Batch 1560/2125] avg loss 0.00465089, throughput 3.99869K wps
[Epoch 1 Batch 1590/2125] avg loss 0.00441596, throughput 4.00165K wps
[Epoch 1 Batch 1620/2125] avg loss 0.00482146, throughput 3.99899K wps
[Epoch 1 Batch 1650/2125] avg loss 0.00451658, throughput 4.00158K wps
[Epoch 1 Batch 1680/2125] avg loss 0.00471935, throughput 3.99911K wps
[Epoch 1 Batch 1710/2125] avg loss 0.00454052, throughput 3.99986K wps
[Epoch 1 Batch 1740/2125] avg loss 0.00462583, throughput 4.00117K wps
[Epoch 1 Batch 1770/2125] avg loss 0.00459405, throughput 4.00014K wps
[Epoch 1 Batch 1800/2125] avg loss 0.00472798, throughput 3.99881K wps
[Epoch 1 Batch 1830/2125] avg loss 0.00475623, throughput 3.99817K wps
[Epoch 1 Batch 1860/2125] avg loss 0.00406364, throughput 3.99465K wps
[Epoch 1 Batch 1890/2125] avg loss 0.00494156, throughput 3.99573K wps
[Epoch 1 Batch 1920/2125] avg loss 0.00450352, throughput 3.99914K wps
[Epoch 1 Batch 1950/2125] avg loss 0.00504218, throughput 4.00008K wps
[Epoch 1 Batch 1980/2125] avg loss 0.00509062, throughput 3.99711K wps
[Epoch 1 Batch 2010/2125] avg loss 0.00451889, throughput 3.99997K wps
[Epoch 1 Batch 2040/2125] avg loss 0.00473392, throughput 4.00022K wps
[Epoch 1 Batch 2070/2125] avg loss 0.00440976, throughput 3.99826K wps
[Epoch 1 Batch 2100/2125] avg loss 0.00495309, throughput 3.99512K wps
Begin Testing...
[Batch 30/237] elapsed 0.48 s
[Batch 60/237] elapsed 0.44 s
[Batch 90/237] elapsed 0.44 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 1] train avg loss 0.00481669, test acc 0.9076, test avg loss 0.24216, throughput 4.00031K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.43 s
[Epoch 2 Batch 30/2125] avg loss 0.00372768, throughput 4.08903K wps
[Epoch 2 Batch 60/2125] avg loss 0.00375142, throughput 3.99836K wps
[Epoch 2 Batch 90/2125] avg loss 0.00421321, throughput 4.00193K wps
[Epoch 2 Batch 120/2125] avg loss 0.00375105, throughput 3.99626K wps
[Epoch 2 Batch 150/2125] avg loss 0.00424413, throughput 4.00474K wps
[Epoch 2 Batch 180/2125] avg loss 0.00412048, throughput 4.00238K wps
[Epoch 2 Batch 210/2125] avg loss 0.00406056, throughput 4.00128K wps
[Epoch 2 Batch 240/2125] avg loss 0.00404164, throughput 3.99992K wps
[Epoch 2 Batch 270/2125] avg loss 0.00349389, throughput 4.00198K wps
[Epoch 2 Batch 300/2125] avg loss 0.00429312, throughput 4.00009K wps
[Epoch 2 Batch 330/2125] avg loss 0.00384681, throughput 3.99972K wps
[Epoch 2 Batch 360/2125] avg loss 0.00365196, throughput 4.00152K wps
[Epoch 2 Batch 390/2125] avg loss 0.00458708, throughput 3.99666K wps
[Epoch 2 Batch 420/2125] avg loss 0.00427266, throughput 3.9996K wps
[Epoch 2 Batch 450/2125] avg loss 0.00411935, throughput 4.0002K wps
[Epoch 2 Batch 480/2125] avg loss 0.00385362, throughput 4.00362K wps
[Epoch 2 Batch 510/2125] avg loss 0.00410252, throughput 3.99808K wps
[Epoch 2 Batch 540/2125] avg loss 0.00409695, throughput 4.00171K wps
[Epoch 2 Batch 570/2125] avg loss 0.00401114, throughput 3.99919K wps
[Epoch 2 Batch 600/2125] avg loss 0.0038817, throughput 4.00085K wps
[Epoch 2 Batch 630/2125] avg loss 0.00371004, throughput 4.00038K wps
[Epoch 2 Batch 660/2125] avg loss 0.0037653, throughput 3.99807K wps
[Epoch 2 Batch 690/2125] avg loss 0.00429721, throughput 4.0003K wps
[Epoch 2 Batch 720/2125] avg loss 0.00386738, throughput 4.00036K wps
[Epoch 2 Batch 750/2125] avg loss 0.00396381, throughput 3.99859K wps
[Epoch 2 Batch 780/2125] avg loss 0.0035891, throughput 3.9995K wps
[Epoch 2 Batch 810/2125] avg loss 0.00384002, throughput 4.0002K wps
[Epoch 2 Batch 840/2125] avg loss 0.00369433, throughput 4.00073K wps
[Epoch 2 Batch 870/2125] avg loss 0.00447307, throughput 3.99554K wps
[Epoch 2 Batch 900/2125] avg loss 0.00381584, throughput 4.00025K wps
[Epoch 2 Batch 930/2125] avg loss 0.00400714, throughput 3.99807K wps
[Epoch 2 Batch 960/2125] avg loss 0.00339915, throughput 3.99543K wps
[Epoch 2 Batch 990/2125] avg loss 0.00433354, throughput 3.99834K wps
[Epoch 2 Batch 1020/2125] avg loss 0.00373597, throughput 4.0005K wps
[Epoch 2 Batch 1050/2125] avg loss 0.0037145, throughput 3.99814K wps
[Epoch 2 Batch 1080/2125] avg loss 0.00416336, throughput 3.99888K wps
[Epoch 2 Batch 1110/2125] avg loss 0.00412111, throughput 4.00026K wps
[Epoch 2 Batch 1140/2125] avg loss 0.00431221, throughput 3.99573K wps
[Epoch 2 Batch 1170/2125] avg loss 0.00363961, throughput 3.99919K wps
[Epoch 2 Batch 1200/2125] avg loss 0.00429422, throughput 3.99704K wps
[Epoch 2 Batch 1230/2125] avg loss 0.00367753, throughput 3.99945K wps
[Epoch 2 Batch 1260/2125] avg loss 0.0039543, throughput 4.00349K wps
[Epoch 2 Batch 1290/2125] avg loss 0.00391486, throughput 3.99713K wps
[Epoch 2 Batch 1320/2125] avg loss 0.003756, throughput 3.99817K wps
[Epoch 2 Batch 1350/2125] avg loss 0.00426251, throughput 3.9992K wps
[Epoch 2 Batch 1380/2125] avg loss 0.00367834, throughput 4.00101K wps
[Epoch 2 Batch 1410/2125] avg loss 0.00445956, throughput 3.99988K wps
[Epoch 2 Batch 1440/2125] avg loss 0.0036721, throughput 4.00143K wps
[Epoch 2 Batch 1470/2125] avg loss 0.00411154, throughput 3.99852K wps
[Epoch 2 Batch 1500/2125] avg loss 0.00377809, throughput 4.0006K wps
[Epoch 2 Batch 1530/2125] avg loss 0.00376677, throughput 3.99998K wps
[Epoch 2 Batch 1560/2125] avg loss 0.00426462, throughput 3.99308K wps
[Epoch 2 Batch 1590/2125] avg loss 0.00394671, throughput 3.99943K wps
[Epoch 2 Batch 1620/2125] avg loss 0.00354415, throughput 3.9959K wps
[Epoch 2 Batch 1650/2125] avg loss 0.00412582, throughput 4.00173K wps
[Epoch 2 Batch 1680/2125] avg loss 0.00421245, throughput 3.99636K wps
[Epoch 2 Batch 1710/2125] avg loss 0.0038108, throughput 3.99586K wps
[Epoch 2 Batch 1740/2125] avg loss 0.00407157, throughput 3.99694K wps
[Epoch 2 Batch 1770/2125] avg loss 0.00368507, throughput 3.99741K wps
[Epoch 2 Batch 1800/2125] avg loss 0.00411122, throughput 3.99853K wps
[Epoch 2 Batch 1830/2125] avg loss 0.00341367, throughput 3.99659K wps
[Epoch 2 Batch 1860/2125] avg loss 0.00379787, throughput 3.99688K wps
[Epoch 2 Batch 1890/2125] avg loss 0.00373053, throughput 3.99825K wps
[Epoch 2 Batch 1920/2125] avg loss 0.00395751, throughput 3.99939K wps
[Epoch 2 Batch 1950/2125] avg loss 0.00377079, throughput 3.99675K wps
[Epoch 2 Batch 1980/2125] avg loss 0.00372288, throughput 3.99618K wps
[Epoch 2 Batch 2010/2125] avg loss 0.00387072, throughput 4.00164K wps
[Epoch 2 Batch 2040/2125] avg loss 0.00374262, throughput 3.9979K wps
[Epoch 2 Batch 2070/2125] avg loss 0.00371304, throughput 3.99782K wps
[Epoch 2 Batch 2100/2125] avg loss 0.00408101, throughput 4.00084K wps
Begin Testing...
[Batch 30/237] elapsed 0.46 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 2] train avg loss 0.00393602, test acc 0.9140, test avg loss 0.230929, throughput 4.0004K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.43 s
[Epoch 3 Batch 30/2125] avg loss 0.0033904, throughput 4.09276K wps
[Epoch 3 Batch 60/2125] avg loss 0.00363944, throughput 3.99807K wps
[Epoch 3 Batch 90/2125] avg loss 0.00346443, throughput 3.99326K wps
[Epoch 3 Batch 120/2125] avg loss 0.00354299, throughput 3.99663K wps
[Epoch 3 Batch 150/2125] avg loss 0.00330513, throughput 3.99947K wps
[Epoch 3 Batch 180/2125] avg loss 0.00386889, throughput 3.99914K wps
[Epoch 3 Batch 210/2125] avg loss 0.003229, throughput 3.99901K wps
[Epoch 3 Batch 240/2125] avg loss 0.00308446, throughput 3.99368K wps
[Epoch 3 Batch 270/2125] avg loss 0.0031067, throughput 3.99616K wps
[Epoch 3 Batch 300/2125] avg loss 0.00319471, throughput 3.99572K wps
[Epoch 3 Batch 330/2125] avg loss 0.0031411, throughput 3.99688K wps
[Epoch 3 Batch 360/2125] avg loss 0.00364092, throughput 3.99281K wps
[Epoch 3 Batch 390/2125] avg loss 0.00291069, throughput 3.99397K wps
[Epoch 3 Batch 420/2125] avg loss 0.00343475, throughput 3.99523K wps
[Epoch 3 Batch 450/2125] avg loss 0.00383715, throughput 4.00428K wps
[Epoch 3 Batch 480/2125] avg loss 0.00378076, throughput 4.005K wps
[Epoch 3 Batch 510/2125] avg loss 0.00386261, throughput 4.00318K wps
[Epoch 3 Batch 540/2125] avg loss 0.00384428, throughput 3.99722K wps
[Epoch 3 Batch 570/2125] avg loss 0.00335969, throughput 3.99745K wps
[Epoch 3 Batch 600/2125] avg loss 0.00360967, throughput 3.9984K wps
[Epoch 3 Batch 630/2125] avg loss 0.0031967, throughput 3.99796K wps
[Epoch 3 Batch 660/2125] avg loss 0.00339834, throughput 3.99808K wps
[Epoch 3 Batch 690/2125] avg loss 0.0030571, throughput 3.99524K wps
[Epoch 3 Batch 720/2125] avg loss 0.00367219, throughput 3.99747K wps
[Epoch 3 Batch 750/2125] avg loss 0.00337101, throughput 3.99773K wps
[Epoch 3 Batch 780/2125] avg loss 0.00314582, throughput 3.99382K wps
[Epoch 3 Batch 810/2125] avg loss 0.00379513, throughput 4.00088K wps
[Epoch 3 Batch 840/2125] avg loss 0.00343282, throughput 4.00088K wps
[Epoch 3 Batch 870/2125] avg loss 0.0033154, throughput 3.99781K wps
[Epoch 3 Batch 900/2125] avg loss 0.00337585, throughput 3.99328K wps
[Epoch 3 Batch 930/2125] avg loss 0.00346683, throughput 3.99658K wps
[Epoch 3 Batch 960/2125] avg loss 0.00318301, throughput 3.99935K wps
[Epoch 3 Batch 990/2125] avg loss 0.00326784, throughput 3.99995K wps
[Epoch 3 Batch 1020/2125] avg loss 0.00330093, throughput 3.9993K wps
[Epoch 3 Batch 1050/2125] avg loss 0.0035539, throughput 3.99703K wps
[Epoch 3 Batch 1080/2125] avg loss 0.00358457, throughput 3.99678K wps
[Epoch 3 Batch 1110/2125] avg loss 0.00334674, throughput 3.99963K wps
[Epoch 3 Batch 1140/2125] avg loss 0.00376865, throughput 4.00068K wps
[Epoch 3 Batch 1170/2125] avg loss 0.00345749, throughput 3.99935K wps
[Epoch 3 Batch 1200/2125] avg loss 0.00370953, throughput 3.99696K wps
[Epoch 3 Batch 1230/2125] avg loss 0.00337568, throughput 3.9979K wps
[Epoch 3 Batch 1260/2125] avg loss 0.00337902, throughput 4.00158K wps
[Epoch 3 Batch 1290/2125] avg loss 0.00346691, throughput 4.00419K wps
[Epoch 3 Batch 1320/2125] avg loss 0.00334159, throughput 3.98596K wps
[Epoch 3 Batch 1350/2125] avg loss 0.00336824, throughput 3.98322K wps
[Epoch 3 Batch 1380/2125] avg loss 0.00289602, throughput 4.00355K wps
[Epoch 3 Batch 1410/2125] avg loss 0.00309783, throughput 3.9991K wps
[Epoch 3 Batch 1440/2125] avg loss 0.00324806, throughput 4.00235K wps
[Epoch 3 Batch 1470/2125] avg loss 0.00317644, throughput 3.99644K wps
[Epoch 3 Batch 1500/2125] avg loss 0.00335172, throughput 3.9968K wps
[Epoch 3 Batch 1530/2125] avg loss 0.00307378, throughput 3.99854K wps
[Epoch 3 Batch 1560/2125] avg loss 0.00350744, throughput 4.00058K wps
[Epoch 3 Batch 1590/2125] avg loss 0.00340111, throughput 3.99818K wps
[Epoch 3 Batch 1620/2125] avg loss 0.00334096, throughput 3.99496K wps
[Epoch 3 Batch 1650/2125] avg loss 0.00394288, throughput 3.99416K wps
[Epoch 3 Batch 1680/2125] avg loss 0.00379437, throughput 3.99374K wps
[Epoch 3 Batch 1710/2125] avg loss 0.00352824, throughput 4.00517K wps
[Epoch 3 Batch 1740/2125] avg loss 0.00384453, throughput 3.99972K wps
[Epoch 3 Batch 1770/2125] avg loss 0.00368801, throughput 4.00482K wps
[Epoch 3 Batch 1800/2125] avg loss 0.0033688, throughput 4.00369K wps
[Epoch 3 Batch 1830/2125] avg loss 0.00365095, throughput 4.00813K wps
[Epoch 3 Batch 1860/2125] avg loss 0.00314778, throughput 4.00519K wps
[Epoch 3 Batch 1890/2125] avg loss 0.00344119, throughput 4.00323K wps
[Epoch 3 Batch 1920/2125] avg loss 0.00409361, throughput 4.00673K wps
[Epoch 3 Batch 1950/2125] avg loss 0.00384816, throughput 3.9972K wps
[Epoch 3 Batch 1980/2125] avg loss 0.00356137, throughput 4.0022K wps
[Epoch 3 Batch 2010/2125] avg loss 0.00351984, throughput 3.99787K wps
[Epoch 3 Batch 2040/2125] avg loss 0.00330064, throughput 4.00271K wps
[Epoch 3 Batch 2070/2125] avg loss 0.00372276, throughput 3.99924K wps
[Epoch 3 Batch 2100/2125] avg loss 0.00314503, throughput 3.9998K wps
Begin Testing...
[Batch 30/237] elapsed 0.46 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 3] train avg loss 0.00344526, test acc 0.9171, test avg loss 0.231881, throughput 3.99986K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.43 s
[Epoch 4 Batch 30/2125] avg loss 0.00275388, throughput 4.09485K wps
[Epoch 4 Batch 60/2125] avg loss 0.00279117, throughput 3.99671K wps
[Epoch 4 Batch 90/2125] avg loss 0.00307231, throughput 3.99559K wps
[Epoch 4 Batch 120/2125] avg loss 0.00312418, throughput 3.99535K wps
[Epoch 4 Batch 150/2125] avg loss 0.00340846, throughput 3.99539K wps
[Epoch 4 Batch 180/2125] avg loss 0.00251215, throughput 3.99665K wps
[Epoch 4 Batch 210/2125] avg loss 0.00296142, throughput 4.00003K wps
[Epoch 4 Batch 240/2125] avg loss 0.00309944, throughput 3.99956K wps
[Epoch 4 Batch 270/2125] avg loss 0.00305135, throughput 3.99804K wps
[Epoch 4 Batch 300/2125] avg loss 0.00278939, throughput 3.99771K wps
[Epoch 4 Batch 330/2125] avg loss 0.00307862, throughput 3.99755K wps
[Epoch 4 Batch 360/2125] avg loss 0.00295552, throughput 4.00047K wps
[Epoch 4 Batch 390/2125] avg loss 0.00298802, throughput 3.99751K wps
[Epoch 4 Batch 420/2125] avg loss 0.00290689, throughput 4.00038K wps
[Epoch 4 Batch 450/2125] avg loss 0.00291306, throughput 3.99849K wps
[Epoch 4 Batch 480/2125] avg loss 0.00295056, throughput 3.99739K wps
[Epoch 4 Batch 510/2125] avg loss 0.00282286, throughput 3.99817K wps
[Epoch 4 Batch 540/2125] avg loss 0.00277874, throughput 3.99842K wps
[Epoch 4 Batch 570/2125] avg loss 0.00344663, throughput 4.00122K wps
[Epoch 4 Batch 600/2125] avg loss 0.00316646, throughput 4.00003K wps
[Epoch 4 Batch 630/2125] avg loss 0.00300245, throughput 3.99967K wps
[Epoch 4 Batch 660/2125] avg loss 0.00300958, throughput 3.99948K wps
[Epoch 4 Batch 690/2125] avg loss 0.00293653, throughput 3.9974K wps
[Epoch 4 Batch 720/2125] avg loss 0.00286842, throughput 3.99788K wps
[Epoch 4 Batch 750/2125] avg loss 0.00277752, throughput 3.99875K wps
[Epoch 4 Batch 780/2125] avg loss 0.00270723, throughput 4.00128K wps
[Epoch 4 Batch 810/2125] avg loss 0.00339673, throughput 3.99962K wps
[Epoch 4 Batch 840/2125] avg loss 0.00347367, throughput 3.99809K wps
[Epoch 4 Batch 870/2125] avg loss 0.00309338, throughput 4.00137K wps
[Epoch 4 Batch 900/2125] avg loss 0.00288506, throughput 3.99747K wps
[Epoch 4 Batch 930/2125] avg loss 0.00312296, throughput 3.99729K wps
[Epoch 4 Batch 960/2125] avg loss 0.00325099, throughput 4.00118K wps
[Epoch 4 Batch 990/2125] avg loss 0.00302203, throughput 3.99933K wps
[Epoch 4 Batch 1020/2125] avg loss 0.00291513, throughput 3.99783K wps
[Epoch 4 Batch 1050/2125] avg loss 0.00332072, throughput 3.99681K wps
[Epoch 4 Batch 1080/2125] avg loss 0.00324292, throughput 4.00157K wps
[Epoch 4 Batch 1110/2125] avg loss 0.0030892, throughput 3.99716K wps
[Epoch 4 Batch 1140/2125] avg loss 0.002979, throughput 4.00198K wps
[Epoch 4 Batch 1170/2125] avg loss 0.00300227, throughput 4.00176K wps
[Epoch 4 Batch 1200/2125] avg loss 0.00309862, throughput 3.99897K wps
[Epoch 4 Batch 1230/2125] avg loss 0.00327812, throughput 3.99971K wps
[Epoch 4 Batch 1260/2125] avg loss 0.00296275, throughput 4.00055K wps
[Epoch 4 Batch 1290/2125] avg loss 0.00295405, throughput 3.99969K wps
[Epoch 4 Batch 1320/2125] avg loss 0.00360768, throughput 4.00235K wps
[Epoch 4 Batch 1350/2125] avg loss 0.00292452, throughput 3.98739K wps
[Epoch 4 Batch 1380/2125] avg loss 0.00288385, throughput 3.9972K wps
[Epoch 4 Batch 1410/2125] avg loss 0.00313842, throughput 3.99222K wps
[Epoch 4 Batch 1440/2125] avg loss 0.00317818, throughput 3.98766K wps
[Epoch 4 Batch 1470/2125] avg loss 0.00308852, throughput 3.99424K wps
[Epoch 4 Batch 1500/2125] avg loss 0.00320696, throughput 3.9957K wps
[Epoch 4 Batch 1530/2125] avg loss 0.00282071, throughput 3.99144K wps
[Epoch 4 Batch 1560/2125] avg loss 0.00301076, throughput 3.99821K wps
[Epoch 4 Batch 1590/2125] avg loss 0.00300079, throughput 3.99844K wps
[Epoch 4 Batch 1620/2125] avg loss 0.00283279, throughput 3.98945K wps
[Epoch 4 Batch 1650/2125] avg loss 0.00327742, throughput 3.99459K wps
[Epoch 4 Batch 1680/2125] avg loss 0.00262472, throughput 3.9953K wps
[Epoch 4 Batch 1710/2125] avg loss 0.00303903, throughput 3.99364K wps
[Epoch 4 Batch 1740/2125] avg loss 0.00305432, throughput 3.99123K wps
[Epoch 4 Batch 1770/2125] avg loss 0.00264639, throughput 3.99864K wps
[Epoch 4 Batch 1800/2125] avg loss 0.00319014, throughput 3.99453K wps
[Epoch 4 Batch 1830/2125] avg loss 0.00336713, throughput 3.99245K wps
[Epoch 4 Batch 1860/2125] avg loss 0.00308155, throughput 3.99563K wps
[Epoch 4 Batch 1890/2125] avg loss 0.00359417, throughput 3.99548K wps
[Epoch 4 Batch 1920/2125] avg loss 0.00308635, throughput 3.9988K wps
[Epoch 4 Batch 1950/2125] avg loss 0.00341302, throughput 3.99974K wps
[Epoch 4 Batch 1980/2125] avg loss 0.00324131, throughput 3.99628K wps
[Epoch 4 Batch 2010/2125] avg loss 0.00339585, throughput 4.00152K wps
[Epoch 4 Batch 2040/2125] avg loss 0.0028757, throughput 3.99672K wps
[Epoch 4 Batch 2070/2125] avg loss 0.00338727, throughput 3.99844K wps
[Epoch 4 Batch 2100/2125] avg loss 0.00329966, throughput 3.99514K wps
Begin Testing...
[Batch 30/237] elapsed 0.46 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 4] train avg loss 0.00306041, test acc 0.9193, test avg loss 0.232051, throughput 3.9988K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.43 s
[Epoch 5 Batch 30/2125] avg loss 0.0022825, throughput 4.09718K wps
[Epoch 5 Batch 60/2125] avg loss 0.00254244, throughput 4.00326K wps
[Epoch 5 Batch 90/2125] avg loss 0.00259583, throughput 4.00383K wps
[Epoch 5 Batch 120/2125] avg loss 0.00259168, throughput 3.99592K wps
[Epoch 5 Batch 150/2125] avg loss 0.00278133, throughput 3.99464K wps
[Epoch 5 Batch 180/2125] avg loss 0.00256978, throughput 3.99731K wps
[Epoch 5 Batch 210/2125] avg loss 0.0028489, throughput 4.00237K wps
[Epoch 5 Batch 240/2125] avg loss 0.00234232, throughput 4.0006K wps
[Epoch 5 Batch 270/2125] avg loss 0.00275093, throughput 3.99601K wps
[Epoch 5 Batch 300/2125] avg loss 0.00308546, throughput 3.99846K wps
[Epoch 5 Batch 330/2125] avg loss 0.00294743, throughput 3.99919K wps
[Epoch 5 Batch 360/2125] avg loss 0.00239277, throughput 3.99715K wps
[Epoch 5 Batch 390/2125] avg loss 0.00240688, throughput 3.99695K wps
[Epoch 5 Batch 420/2125] avg loss 0.00302727, throughput 3.99842K wps
[Epoch 5 Batch 450/2125] avg loss 0.00290547, throughput 4.00039K wps
[Epoch 5 Batch 480/2125] avg loss 0.00291568, throughput 4.00077K wps
[Epoch 5 Batch 510/2125] avg loss 0.0022932, throughput 3.95203K wps
[Epoch 5 Batch 540/2125] avg loss 0.00253938, throughput 3.91343K wps
[Epoch 5 Batch 570/2125] avg loss 0.00290635, throughput 3.9948K wps
[Epoch 5 Batch 600/2125] avg loss 0.00255613, throughput 3.99381K wps
[Epoch 5 Batch 630/2125] avg loss 0.0025742, throughput 3.99282K wps
[Epoch 5 Batch 660/2125] avg loss 0.00280975, throughput 3.99607K wps
[Epoch 5 Batch 690/2125] avg loss 0.00303493, throughput 3.99957K wps
[Epoch 5 Batch 720/2125] avg loss 0.00262448, throughput 3.9975K wps
[Epoch 5 Batch 750/2125] avg loss 0.00295974, throughput 3.9925K wps
[Epoch 5 Batch 780/2125] avg loss 0.00278173, throughput 3.99061K wps
[Epoch 5 Batch 810/2125] avg loss 0.00311076, throughput 3.98913K wps
[Epoch 5 Batch 840/2125] avg loss 0.00276734, throughput 3.97578K wps
[Epoch 5 Batch 870/2125] avg loss 0.00253087, throughput 3.9432K wps
[Epoch 5 Batch 900/2125] avg loss 0.00314625, throughput 3.98029K wps
[Epoch 5 Batch 930/2125] avg loss 0.00256389, throughput 3.9947K wps
[Epoch 5 Batch 960/2125] avg loss 0.00303426, throughput 3.99888K wps
[Epoch 5 Batch 990/2125] avg loss 0.00309291, throughput 4.00066K wps
[Epoch 5 Batch 1020/2125] avg loss 0.00349506, throughput 3.99828K wps
[Epoch 5 Batch 1050/2125] avg loss 0.0030527, throughput 3.99809K wps
[Epoch 5 Batch 1080/2125] avg loss 0.0031657, throughput 3.99843K wps
[Epoch 5 Batch 1110/2125] avg loss 0.00252705, throughput 3.99707K wps
[Epoch 5 Batch 1140/2125] avg loss 0.00271469, throughput 3.9968K wps
[Epoch 5 Batch 1170/2125] avg loss 0.00339467, throughput 3.99625K wps
[Epoch 5 Batch 1200/2125] avg loss 0.00267632, throughput 3.99773K wps
[Epoch 5 Batch 1230/2125] avg loss 0.00313533, throughput 3.99311K wps
[Epoch 5 Batch 1260/2125] avg loss 0.00282691, throughput 3.99371K wps
[Epoch 5 Batch 1290/2125] avg loss 0.00260989, throughput 3.99549K wps
[Epoch 5 Batch 1320/2125] avg loss 0.00274335, throughput 3.99704K wps
[Epoch 5 Batch 1350/2125] avg loss 0.00263737, throughput 3.99598K wps
[Epoch 5 Batch 1380/2125] avg loss 0.00268219, throughput 3.99997K wps
[Epoch 5 Batch 1410/2125] avg loss 0.00296077, throughput 3.99854K wps
[Epoch 5 Batch 1440/2125] avg loss 0.00314575, throughput 3.99998K wps
[Epoch 5 Batch 1470/2125] avg loss 0.00246876, throughput 4.002K wps
[Epoch 5 Batch 1500/2125] avg loss 0.0025774, throughput 3.99884K wps
[Epoch 5 Batch 1530/2125] avg loss 0.00252392, throughput 3.99729K wps
[Epoch 5 Batch 1560/2125] avg loss 0.00261165, throughput 4.00009K wps
[Epoch 5 Batch 1590/2125] avg loss 0.00310957, throughput 4.00064K wps
[Epoch 5 Batch 1620/2125] avg loss 0.00252723, throughput 4.00013K wps
[Epoch 5 Batch 1650/2125] avg loss 0.00263221, throughput 3.99356K wps
[Epoch 5 Batch 1680/2125] avg loss 0.00283974, throughput 3.99471K wps
[Epoch 5 Batch 1710/2125] avg loss 0.00248055, throughput 3.99706K wps
[Epoch 5 Batch 1740/2125] avg loss 0.00279796, throughput 3.99294K wps
[Epoch 5 Batch 1770/2125] avg loss 0.00272356, throughput 3.99453K wps
[Epoch 5 Batch 1800/2125] avg loss 0.00261568, throughput 3.99291K wps
[Epoch 5 Batch 1830/2125] avg loss 0.00229801, throughput 4.00706K wps
[Epoch 5 Batch 1860/2125] avg loss 0.00305227, throughput 4.00175K wps
[Epoch 5 Batch 1890/2125] avg loss 0.00305707, throughput 4.00184K wps
[Epoch 5 Batch 1920/2125] avg loss 0.00277188, throughput 4.00803K wps
[Epoch 5 Batch 1950/2125] avg loss 0.0030973, throughput 4.00178K wps
[Epoch 5 Batch 1980/2125] avg loss 0.00323209, throughput 4.00807K wps
[Epoch 5 Batch 2010/2125] avg loss 0.00313434, throughput 4.00589K wps
[Epoch 5 Batch 2040/2125] avg loss 0.00297003, throughput 3.9988K wps
[Epoch 5 Batch 2070/2125] avg loss 0.00287839, throughput 4.002K wps
[Epoch 5 Batch 2100/2125] avg loss 0.00237589, throughput 4.00351K wps
Begin Testing...
[Batch 30/237] elapsed 0.45 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 5] train avg loss 0.00278417, test acc 0.9218, test avg loss 0.240415, throughput 3.99647K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.43 s
[Epoch 6 Batch 30/2125] avg loss 0.00198337, throughput 4.09125K wps
[Epoch 6 Batch 60/2125] avg loss 0.00250973, throughput 4.00059K wps
[Epoch 6 Batch 90/2125] avg loss 0.00224365, throughput 3.99929K wps
[Epoch 6 Batch 120/2125] avg loss 0.00244529, throughput 3.99571K wps
[Epoch 6 Batch 150/2125] avg loss 0.00233686, throughput 3.99807K wps
[Epoch 6 Batch 180/2125] avg loss 0.00228603, throughput 4.00047K wps
[Epoch 6 Batch 210/2125] avg loss 0.00247067, throughput 3.99854K wps
[Epoch 6 Batch 240/2125] avg loss 0.00192245, throughput 3.99817K wps
[Epoch 6 Batch 270/2125] avg loss 0.00264432, throughput 4.00229K wps
[Epoch 6 Batch 300/2125] avg loss 0.00225173, throughput 3.9957K wps
[Epoch 6 Batch 330/2125] avg loss 0.00246615, throughput 3.99887K wps
[Epoch 6 Batch 360/2125] avg loss 0.00195428, throughput 4.00334K wps
[Epoch 6 Batch 390/2125] avg loss 0.00263974, throughput 4.0072K wps
[Epoch 6 Batch 420/2125] avg loss 0.00336309, throughput 3.99809K wps
[Epoch 6 Batch 450/2125] avg loss 0.00251252, throughput 4.00382K wps
[Epoch 6 Batch 480/2125] avg loss 0.00273801, throughput 4.00413K wps
[Epoch 6 Batch 510/2125] avg loss 0.00235722, throughput 4.00315K wps
[Epoch 6 Batch 540/2125] avg loss 0.00213768, throughput 4.00051K wps
[Epoch 6 Batch 570/2125] avg loss 0.0025257, throughput 3.99737K wps
[Epoch 6 Batch 600/2125] avg loss 0.00233726, throughput 3.99771K wps
[Epoch 6 Batch 630/2125] avg loss 0.00260488, throughput 3.99993K wps
[Epoch 6 Batch 660/2125] avg loss 0.00251389, throughput 3.99632K wps
[Epoch 6 Batch 690/2125] avg loss 0.00265432, throughput 3.99869K wps
[Epoch 6 Batch 720/2125] avg loss 0.00274276, throughput 3.99677K wps
[Epoch 6 Batch 750/2125] avg loss 0.00234884, throughput 3.99436K wps
[Epoch 6 Batch 780/2125] avg loss 0.002565, throughput 3.9968K wps
[Epoch 6 Batch 810/2125] avg loss 0.00249897, throughput 3.99947K wps
[Epoch 6 Batch 840/2125] avg loss 0.0021836, throughput 4.0004K wps
[Epoch 6 Batch 870/2125] avg loss 0.00277331, throughput 4.00056K wps
[Epoch 6 Batch 900/2125] avg loss 0.00299542, throughput 3.99939K wps
[Epoch 6 Batch 930/2125] avg loss 0.00235707, throughput 3.99501K wps
[Epoch 6 Batch 960/2125] avg loss 0.00233503, throughput 3.99501K wps
[Epoch 6 Batch 990/2125] avg loss 0.0021442, throughput 3.99792K wps
[Epoch 6 Batch 1020/2125] avg loss 0.00238372, throughput 3.99819K wps
[Epoch 6 Batch 1050/2125] avg loss 0.00257481, throughput 3.9963K wps
[Epoch 6 Batch 1080/2125] avg loss 0.00239753, throughput 3.99762K wps
[Epoch 6 Batch 1110/2125] avg loss 0.00260934, throughput 3.99565K wps
[Epoch 6 Batch 1140/2125] avg loss 0.0020846, throughput 3.99698K wps
[Epoch 6 Batch 1170/2125] avg loss 0.00252877, throughput 3.99645K wps
[Epoch 6 Batch 1200/2125] avg loss 0.00241086, throughput 4.00087K wps
[Epoch 6 Batch 1230/2125] avg loss 0.00222946, throughput 3.99477K wps
[Epoch 6 Batch 1260/2125] avg loss 0.00228515, throughput 3.99639K wps
[Epoch 6 Batch 1290/2125] avg loss 0.00268957, throughput 3.99807K wps
[Epoch 6 Batch 1320/2125] avg loss 0.00296045, throughput 3.99524K wps
[Epoch 6 Batch 1350/2125] avg loss 0.00286201, throughput 3.99815K wps
[Epoch 6 Batch 1380/2125] avg loss 0.00224584, throughput 4.00231K wps
[Epoch 6 Batch 1410/2125] avg loss 0.0027368, throughput 3.99931K wps
[Epoch 6 Batch 1440/2125] avg loss 0.00278062, throughput 4.00013K wps
[Epoch 6 Batch 1470/2125] avg loss 0.00251595, throughput 3.9947K wps
[Epoch 6 Batch 1500/2125] avg loss 0.00262117, throughput 3.9967K wps
[Epoch 6 Batch 1530/2125] avg loss 0.00263141, throughput 3.99964K wps
[Epoch 6 Batch 1560/2125] avg loss 0.0022681, throughput 3.99425K wps
[Epoch 6 Batch 1590/2125] avg loss 0.00248618, throughput 3.99993K wps
[Epoch 6 Batch 1620/2125] avg loss 0.00291531, throughput 3.99706K wps
[Epoch 6 Batch 1650/2125] avg loss 0.00255724, throughput 3.99795K wps
[Epoch 6 Batch 1680/2125] avg loss 0.00259566, throughput 3.9947K wps
[Epoch 6 Batch 1710/2125] avg loss 0.00279095, throughput 4.00166K wps
[Epoch 6 Batch 1740/2125] avg loss 0.00261297, throughput 4.00298K wps
[Epoch 6 Batch 1770/2125] avg loss 0.00249802, throughput 4.00489K wps
[Epoch 6 Batch 1800/2125] avg loss 0.00282959, throughput 4.00854K wps
[Epoch 6 Batch 1830/2125] avg loss 0.00324875, throughput 4.00491K wps
[Epoch 6 Batch 1860/2125] avg loss 0.00265956, throughput 3.99918K wps
[Epoch 6 Batch 1890/2125] avg loss 0.00224365, throughput 3.99895K wps
[Epoch 6 Batch 1920/2125] avg loss 0.00303943, throughput 3.98367K wps
[Epoch 6 Batch 1950/2125] avg loss 0.00273962, throughput 3.99777K wps
[Epoch 6 Batch 1980/2125] avg loss 0.00305446, throughput 4.00023K wps
[Epoch 6 Batch 2010/2125] avg loss 0.00290733, throughput 3.9979K wps
[Epoch 6 Batch 2040/2125] avg loss 0.00230875, throughput 3.99444K wps
[Epoch 6 Batch 2070/2125] avg loss 0.00290851, throughput 4.001K wps
[Epoch 6 Batch 2100/2125] avg loss 0.00306109, throughput 3.99765K wps
Begin Testing...
[Batch 30/237] elapsed 0.46 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 6] train avg loss 0.00254322, test acc 0.9216, test avg loss 0.246337, throughput 3.99997K wps
[Epoch 7 Batch 30/2125] avg loss 0.00211947, throughput 4.08196K wps
[Epoch 7 Batch 60/2125] avg loss 0.00216626, throughput 4.0011K wps
[Epoch 7 Batch 90/2125] avg loss 0.00223903, throughput 4.00228K wps
[Epoch 7 Batch 120/2125] avg loss 0.00215377, throughput 4.00116K wps
[Epoch 7 Batch 150/2125] avg loss 0.00222163, throughput 4.00163K wps
[Epoch 7 Batch 180/2125] avg loss 0.0021045, throughput 4.00566K wps
[Epoch 7 Batch 210/2125] avg loss 0.00213662, throughput 4.00286K wps
[Epoch 7 Batch 240/2125] avg loss 0.00167227, throughput 3.99828K wps
[Epoch 7 Batch 270/2125] avg loss 0.00208361, throughput 4.00174K wps
[Epoch 7 Batch 300/2125] avg loss 0.00257552, throughput 3.99728K wps
[Epoch 7 Batch 330/2125] avg loss 0.00217625, throughput 4.00005K wps
[Epoch 7 Batch 360/2125] avg loss 0.0022611, throughput 4.00104K wps
[Epoch 7 Batch 390/2125] avg loss 0.00197589, throughput 4.00051K wps
[Epoch 7 Batch 420/2125] avg loss 0.00245764, throughput 4.00054K wps
[Epoch 7 Batch 450/2125] avg loss 0.00208687, throughput 3.99801K wps
[Epoch 7 Batch 480/2125] avg loss 0.00188943, throughput 3.99609K wps
[Epoch 7 Batch 510/2125] avg loss 0.00226759, throughput 3.99779K wps
[Epoch 7 Batch 540/2125] avg loss 0.00273505, throughput 3.99812K wps
[Epoch 7 Batch 570/2125] avg loss 0.00225724, throughput 3.9989K wps
[Epoch 7 Batch 600/2125] avg loss 0.00234034, throughput 4.00058K wps
[Epoch 7 Batch 630/2125] avg loss 0.0022215, throughput 3.99763K wps
[Epoch 7 Batch 660/2125] avg loss 0.00208174, throughput 3.99299K wps
[Epoch 7 Batch 690/2125] avg loss 0.00236086, throughput 4.00054K wps
[Epoch 7 Batch 720/2125] avg loss 0.00238492, throughput 3.99898K wps
[Epoch 7 Batch 750/2125] avg loss 0.00264821, throughput 3.99996K wps
[Epoch 7 Batch 780/2125] avg loss 0.00205827, throughput 3.99851K wps
[Epoch 7 Batch 810/2125] avg loss 0.00241725, throughput 3.99528K wps
[Epoch 7 Batch 840/2125] avg loss 0.00206261, throughput 3.99716K wps
[Epoch 7 Batch 870/2125] avg loss 0.0021823, throughput 3.99769K wps
[Epoch 7 Batch 900/2125] avg loss 0.00228839, throughput 3.99582K wps
[Epoch 7 Batch 930/2125] avg loss 0.00204104, throughput 3.99635K wps
[Epoch 7 Batch 960/2125] avg loss 0.00264661, throughput 3.99854K wps
[Epoch 7 Batch 990/2125] avg loss 0.00192559, throughput 3.99475K wps
[Epoch 7 Batch 1020/2125] avg loss 0.00219127, throughput 3.99536K wps
[Epoch 7 Batch 1050/2125] avg loss 0.00218674, throughput 3.99791K wps
[Epoch 7 Batch 1080/2125] avg loss 0.00231877, throughput 3.99704K wps
[Epoch 7 Batch 1110/2125] avg loss 0.00282527, throughput 4.00033K wps
[Epoch 7 Batch 1140/2125] avg loss 0.00266714, throughput 3.99473K wps
[Epoch 7 Batch 1170/2125] avg loss 0.00243537, throughput 3.9993K wps
[Epoch 7 Batch 1200/2125] avg loss 0.00225484, throughput 3.99617K wps
[Epoch 7 Batch 1230/2125] avg loss 0.00259499, throughput 3.99892K wps
[Epoch 7 Batch 1260/2125] avg loss 0.00240174, throughput 3.99977K wps
[Epoch 7 Batch 1290/2125] avg loss 0.00199789, throughput 4.0004K wps
[Epoch 7 Batch 1320/2125] avg loss 0.00256965, throughput 3.99705K wps
[Epoch 7 Batch 1350/2125] avg loss 0.00210882, throughput 3.99913K wps
[Epoch 7 Batch 1380/2125] avg loss 0.00242438, throughput 3.99631K wps
[Epoch 7 Batch 1410/2125] avg loss 0.00237462, throughput 3.99911K wps
[Epoch 7 Batch 1440/2125] avg loss 0.00222942, throughput 3.99948K wps
[Epoch 7 Batch 1470/2125] avg loss 0.00209812, throughput 3.99927K wps
[Epoch 7 Batch 1500/2125] avg loss 0.00198964, throughput 3.99764K wps
[Epoch 7 Batch 1530/2125] avg loss 0.00270763, throughput 3.99622K wps
[Epoch 7 Batch 1560/2125] avg loss 0.00235467, throughput 3.9983K wps
[Epoch 7 Batch 1590/2125] avg loss 0.00251543, throughput 3.99825K wps
[Epoch 7 Batch 1620/2125] avg loss 0.00260508, throughput 4K wps
[Epoch 7 Batch 1650/2125] avg loss 0.00292433, throughput 4.00132K wps
[Epoch 7 Batch 1680/2125] avg loss 0.00254386, throughput 4.00117K wps
[Epoch 7 Batch 1710/2125] avg loss 0.00236536, throughput 3.99931K wps
[Epoch 7 Batch 1740/2125] avg loss 0.00286541, throughput 3.99772K wps
[Epoch 7 Batch 1770/2125] avg loss 0.00227186, throughput 4.00194K wps
[Epoch 7 Batch 1800/2125] avg loss 0.00247652, throughput 4.00084K wps
[Epoch 7 Batch 1830/2125] avg loss 0.00285552, throughput 3.99837K wps
[Epoch 7 Batch 1860/2125] avg loss 0.00263854, throughput 4.00332K wps
[Epoch 7 Batch 1890/2125] avg loss 0.00210071, throughput 3.99905K wps
[Epoch 7 Batch 1920/2125] avg loss 0.00239953, throughput 3.99889K wps
[Epoch 7 Batch 1950/2125] avg loss 0.00243826, throughput 3.99726K wps
[Epoch 7 Batch 1980/2125] avg loss 0.00210164, throughput 4.00014K wps
[Epoch 7 Batch 2010/2125] avg loss 0.00249143, throughput 4.00154K wps
[Epoch 7 Batch 2040/2125] avg loss 0.00235105, throughput 3.99744K wps
[Epoch 7 Batch 2070/2125] avg loss 0.00244036, throughput 4.00064K wps
[Epoch 7 Batch 2100/2125] avg loss 0.00239664, throughput 3.99818K wps
Begin Testing...
[Batch 30/237] elapsed 0.45 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 7] train avg loss 0.00232972, test acc 0.9245, test avg loss 0.253364, throughput 4.00008K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.43 s
[Epoch 8 Batch 30/2125] avg loss 0.0020827, throughput 4.08588K wps
[Epoch 8 Batch 60/2125] avg loss 0.00166811, throughput 3.99781K wps
[Epoch 8 Batch 90/2125] avg loss 0.00183865, throughput 4.00016K wps
[Epoch 8 Batch 120/2125] avg loss 0.00189961, throughput 3.99895K wps
[Epoch 8 Batch 150/2125] avg loss 0.00180559, throughput 4.00035K wps
[Epoch 8 Batch 180/2125] avg loss 0.0019114, throughput 3.99805K wps
[Epoch 8 Batch 210/2125] avg loss 0.00219012, throughput 4.00271K wps
[Epoch 8 Batch 240/2125] avg loss 0.00183277, throughput 3.99982K wps
[Epoch 8 Batch 270/2125] avg loss 0.00218164, throughput 4.00023K wps
[Epoch 8 Batch 300/2125] avg loss 0.00208982, throughput 4.0004K wps
[Epoch 8 Batch 330/2125] avg loss 0.00183288, throughput 4.00056K wps
[Epoch 8 Batch 360/2125] avg loss 0.0019262, throughput 3.99983K wps
[Epoch 8 Batch 390/2125] avg loss 0.00209054, throughput 3.99926K wps
[Epoch 8 Batch 420/2125] avg loss 0.00204555, throughput 4.00233K wps
[Epoch 8 Batch 450/2125] avg loss 0.00196361, throughput 4.00054K wps
[Epoch 8 Batch 480/2125] avg loss 0.00196424, throughput 4.00061K wps
[Epoch 8 Batch 510/2125] avg loss 0.00216374, throughput 4.0001K wps
[Epoch 8 Batch 540/2125] avg loss 0.00195603, throughput 3.99901K wps
[Epoch 8 Batch 570/2125] avg loss 0.0021751, throughput 3.99855K wps
[Epoch 8 Batch 600/2125] avg loss 0.0020346, throughput 4.0006K wps
[Epoch 8 Batch 630/2125] avg loss 0.00181399, throughput 3.99713K wps
[Epoch 8 Batch 660/2125] avg loss 0.00225244, throughput 3.99765K wps
[Epoch 8 Batch 690/2125] avg loss 0.00238105, throughput 3.99962K wps
[Epoch 8 Batch 720/2125] avg loss 0.0018422, throughput 4.00004K wps
[Epoch 8 Batch 750/2125] avg loss 0.00242821, throughput 3.99898K wps
[Epoch 8 Batch 780/2125] avg loss 0.00207147, throughput 3.99943K wps
[Epoch 8 Batch 810/2125] avg loss 0.00208339, throughput 4.00256K wps
[Epoch 8 Batch 840/2125] avg loss 0.00226943, throughput 3.99919K wps
[Epoch 8 Batch 870/2125] avg loss 0.0026114, throughput 3.99728K wps
[Epoch 8 Batch 900/2125] avg loss 0.00210617, throughput 3.99655K wps
[Epoch 8 Batch 930/2125] avg loss 0.00180289, throughput 3.99468K wps
[Epoch 8 Batch 960/2125] avg loss 0.00241649, throughput 3.99824K wps
[Epoch 8 Batch 990/2125] avg loss 0.00245444, throughput 3.99623K wps
[Epoch 8 Batch 1020/2125] avg loss 0.00218489, throughput 3.99707K wps
[Epoch 8 Batch 1050/2125] avg loss 0.00218768, throughput 4.00232K wps
[Epoch 8 Batch 1080/2125] avg loss 0.00207225, throughput 4.00212K wps
[Epoch 8 Batch 1110/2125] avg loss 0.00221815, throughput 3.9928K wps
[Epoch 8 Batch 1140/2125] avg loss 0.00209712, throughput 3.9893K wps
[Epoch 8 Batch 1170/2125] avg loss 0.00221913, throughput 4.00356K wps
[Epoch 8 Batch 1200/2125] avg loss 0.00226165, throughput 3.99796K wps
[Epoch 8 Batch 1230/2125] avg loss 0.00201832, throughput 4.00438K wps
[Epoch 8 Batch 1260/2125] avg loss 0.00200858, throughput 4.00192K wps
[Epoch 8 Batch 1290/2125] avg loss 0.00229307, throughput 3.99903K wps
[Epoch 8 Batch 1320/2125] avg loss 0.00231625, throughput 4.00374K wps
[Epoch 8 Batch 1350/2125] avg loss 0.00198324, throughput 4.00078K wps
[Epoch 8 Batch 1380/2125] avg loss 0.00246484, throughput 4.0005K wps
[Epoch 8 Batch 1410/2125] avg loss 0.00231798, throughput 4.00114K wps
[Epoch 8 Batch 1440/2125] avg loss 0.00201665, throughput 3.99913K wps
[Epoch 8 Batch 1470/2125] avg loss 0.002221, throughput 3.9986K wps
[Epoch 8 Batch 1500/2125] avg loss 0.00222868, throughput 4.00223K wps
[Epoch 8 Batch 1530/2125] avg loss 0.00242457, throughput 3.99674K wps
[Epoch 8 Batch 1560/2125] avg loss 0.00213446, throughput 3.99848K wps
[Epoch 8 Batch 1590/2125] avg loss 0.00216858, throughput 3.99487K wps
[Epoch 8 Batch 1620/2125] avg loss 0.00208444, throughput 4.00031K wps
[Epoch 8 Batch 1650/2125] avg loss 0.00221152, throughput 3.99825K wps
[Epoch 8 Batch 1680/2125] avg loss 0.0017208, throughput 4.00128K wps
[Epoch 8 Batch 1710/2125] avg loss 0.0021396, throughput 3.9925K wps
[Epoch 8 Batch 1740/2125] avg loss 0.00228274, throughput 3.99886K wps
[Epoch 8 Batch 1770/2125] avg loss 0.00281929, throughput 4.00089K wps
[Epoch 8 Batch 1800/2125] avg loss 0.00242054, throughput 4.00066K wps
[Epoch 8 Batch 1830/2125] avg loss 0.0023918, throughput 4.00082K wps
[Epoch 8 Batch 1860/2125] avg loss 0.00233443, throughput 3.99899K wps
[Epoch 8 Batch 1890/2125] avg loss 0.00201438, throughput 3.9994K wps
[Epoch 8 Batch 1920/2125] avg loss 0.00204531, throughput 3.99648K wps
[Epoch 8 Batch 1950/2125] avg loss 0.00209994, throughput 3.99477K wps
[Epoch 8 Batch 1980/2125] avg loss 0.00264662, throughput 3.99454K wps
[Epoch 8 Batch 2010/2125] avg loss 0.0022478, throughput 3.99845K wps
[Epoch 8 Batch 2040/2125] avg loss 0.00249431, throughput 4.0008K wps
[Epoch 8 Batch 2070/2125] avg loss 0.00247069, throughput 3.99903K wps
[Epoch 8 Batch 2100/2125] avg loss 0.0020063, throughput 3.99913K wps
Begin Testing...
[Batch 30/237] elapsed 0.46 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 8] train avg loss 0.00215639, test acc 0.9241, test avg loss 0.262648, throughput 4.00035K wps
[Epoch 9 Batch 30/2125] avg loss 0.00182424, throughput 4.09643K wps
[Epoch 9 Batch 60/2125] avg loss 0.00195425, throughput 4.00526K wps
[Epoch 9 Batch 90/2125] avg loss 0.001745, throughput 4.00641K wps
[Epoch 9 Batch 120/2125] avg loss 0.00179037, throughput 3.99767K wps
[Epoch 9 Batch 150/2125] avg loss 0.00182356, throughput 3.99757K wps
[Epoch 9 Batch 180/2125] avg loss 0.00149395, throughput 3.99629K wps
[Epoch 9 Batch 210/2125] avg loss 0.00149949, throughput 3.99575K wps
[Epoch 9 Batch 240/2125] avg loss 0.00191736, throughput 3.99876K wps
[Epoch 9 Batch 270/2125] avg loss 0.00200521, throughput 3.99996K wps
[Epoch 9 Batch 300/2125] avg loss 0.00176404, throughput 3.99886K wps
[Epoch 9 Batch 330/2125] avg loss 0.00158298, throughput 3.99654K wps
[Epoch 9 Batch 360/2125] avg loss 0.00206157, throughput 4.00124K wps
[Epoch 9 Batch 390/2125] avg loss 0.00171151, throughput 3.99779K wps
[Epoch 9 Batch 420/2125] avg loss 0.00217365, throughput 3.9971K wps
[Epoch 9 Batch 450/2125] avg loss 0.00203262, throughput 3.99628K wps
[Epoch 9 Batch 480/2125] avg loss 0.00203901, throughput 3.99772K wps
[Epoch 9 Batch 510/2125] avg loss 0.00200545, throughput 3.99308K wps
[Epoch 9 Batch 540/2125] avg loss 0.00198275, throughput 3.9994K wps
[Epoch 9 Batch 570/2125] avg loss 0.00166026, throughput 3.99679K wps
[Epoch 9 Batch 600/2125] avg loss 0.00190238, throughput 3.99661K wps
[Epoch 9 Batch 630/2125] avg loss 0.00194891, throughput 3.99943K wps
[Epoch 9 Batch 660/2125] avg loss 0.0016567, throughput 4.00027K wps
[Epoch 9 Batch 690/2125] avg loss 0.0019335, throughput 4.0002K wps
[Epoch 9 Batch 720/2125] avg loss 0.0018421, throughput 3.99595K wps
[Epoch 9 Batch 750/2125] avg loss 0.00193512, throughput 3.9977K wps
[Epoch 9 Batch 780/2125] avg loss 0.0017217, throughput 3.99664K wps
[Epoch 9 Batch 810/2125] avg loss 0.00193914, throughput 3.99872K wps
[Epoch 9 Batch 840/2125] avg loss 0.00176153, throughput 4.00374K wps
[Epoch 9 Batch 870/2125] avg loss 0.00180941, throughput 4.00074K wps
[Epoch 9 Batch 900/2125] avg loss 0.00183327, throughput 4.0013K wps
[Epoch 9 Batch 930/2125] avg loss 0.00184996, throughput 4.00096K wps
[Epoch 9 Batch 960/2125] avg loss 0.00206765, throughput 4.00277K wps
[Epoch 9 Batch 990/2125] avg loss 0.00191093, throughput 4.00622K wps
[Epoch 9 Batch 1020/2125] avg loss 0.00190371, throughput 4.00448K wps
[Epoch 9 Batch 1050/2125] avg loss 0.00189038, throughput 4.00244K wps
[Epoch 9 Batch 1080/2125] avg loss 0.00207938, throughput 3.99382K wps
[Epoch 9 Batch 1110/2125] avg loss 0.00179534, throughput 3.99816K wps
[Epoch 9 Batch 1140/2125] avg loss 0.00200375, throughput 3.99704K wps
[Epoch 9 Batch 1170/2125] avg loss 0.00196437, throughput 3.9998K wps
[Epoch 9 Batch 1200/2125] avg loss 0.00222712, throughput 3.9978K wps
[Epoch 9 Batch 1230/2125] avg loss 0.00201903, throughput 3.99927K wps
[Epoch 9 Batch 1260/2125] avg loss 0.00196267, throughput 3.99576K wps
[Epoch 9 Batch 1290/2125] avg loss 0.00202064, throughput 3.99884K wps
[Epoch 9 Batch 1320/2125] avg loss 0.00185087, throughput 3.99767K wps
[Epoch 9 Batch 1350/2125] avg loss 0.00196528, throughput 3.99957K wps
[Epoch 9 Batch 1380/2125] avg loss 0.00180156, throughput 3.99766K wps
[Epoch 9 Batch 1410/2125] avg loss 0.00218442, throughput 3.99556K wps
[Epoch 9 Batch 1440/2125] avg loss 0.00179648, throughput 4.00405K wps
[Epoch 9 Batch 1470/2125] avg loss 0.00235052, throughput 3.99494K wps
[Epoch 9 Batch 1500/2125] avg loss 0.0021911, throughput 3.99862K wps
[Epoch 9 Batch 1530/2125] avg loss 0.00237207, throughput 4.00154K wps
[Epoch 9 Batch 1560/2125] avg loss 0.00223092, throughput 3.99876K wps
[Epoch 9 Batch 1590/2125] avg loss 0.00202236, throughput 3.99741K wps
[Epoch 9 Batch 1620/2125] avg loss 0.00197483, throughput 4.00178K wps
[Epoch 9 Batch 1650/2125] avg loss 0.00181497, throughput 3.99542K wps
[Epoch 9 Batch 1680/2125] avg loss 0.00208414, throughput 4.00048K wps
[Epoch 9 Batch 1710/2125] avg loss 0.00222789, throughput 3.9945K wps
[Epoch 9 Batch 1740/2125] avg loss 0.00239479, throughput 4.00021K wps
[Epoch 9 Batch 1770/2125] avg loss 0.0022883, throughput 3.99964K wps
[Epoch 9 Batch 1800/2125] avg loss 0.00221379, throughput 3.99406K wps
[Epoch 9 Batch 1830/2125] avg loss 0.00223983, throughput 3.99378K wps
[Epoch 9 Batch 1860/2125] avg loss 0.00252473, throughput 3.99987K wps
[Epoch 9 Batch 1890/2125] avg loss 0.0021407, throughput 3.99839K wps
[Epoch 9 Batch 1920/2125] avg loss 0.0022464, throughput 3.99638K wps
[Epoch 9 Batch 1950/2125] avg loss 0.0023794, throughput 3.99852K wps
[Epoch 9 Batch 1980/2125] avg loss 0.00234652, throughput 3.99516K wps
[Epoch 9 Batch 2010/2125] avg loss 0.00191696, throughput 3.9941K wps
[Epoch 9 Batch 2040/2125] avg loss 0.00231407, throughput 3.99785K wps
[Epoch 9 Batch 2070/2125] avg loss 0.00237412, throughput 3.99953K wps
[Epoch 9 Batch 2100/2125] avg loss 0.00193697, throughput 4.00012K wps
Begin Testing...
[Batch 30/237] elapsed 0.45 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 9] train avg loss 0.00198911, test acc 0.9251, test avg loss 0.271076, throughput 3.99999K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.43 s
[Epoch 10 Batch 30/2125] avg loss 0.00161246, throughput 4.09126K wps
[Epoch 10 Batch 60/2125] avg loss 0.00159329, throughput 3.99932K wps
[Epoch 10 Batch 90/2125] avg loss 0.0019143, throughput 3.99465K wps
[Epoch 10 Batch 120/2125] avg loss 0.0020381, throughput 4.00127K wps
[Epoch 10 Batch 150/2125] avg loss 0.00150598, throughput 3.999K wps
[Epoch 10 Batch 180/2125] avg loss 0.00174184, throughput 4.00071K wps
[Epoch 10 Batch 210/2125] avg loss 0.00168061, throughput 3.99671K wps
[Epoch 10 Batch 240/2125] avg loss 0.00181365, throughput 3.99878K wps
[Epoch 10 Batch 270/2125] avg loss 0.00179769, throughput 4.0012K wps
[Epoch 10 Batch 300/2125] avg loss 0.00188586, throughput 3.99768K wps
[Epoch 10 Batch 330/2125] avg loss 0.0014642, throughput 3.98215K wps
[Epoch 10 Batch 360/2125] avg loss 0.00166928, throughput 3.99509K wps
[Epoch 10 Batch 390/2125] avg loss 0.00191644, throughput 3.99794K wps
[Epoch 10 Batch 420/2125] avg loss 0.00183531, throughput 3.99619K wps
[Epoch 10 Batch 450/2125] avg loss 0.00216231, throughput 4.00189K wps
[Epoch 10 Batch 480/2125] avg loss 0.00190306, throughput 4.00107K wps
[Epoch 10 Batch 510/2125] avg loss 0.00163717, throughput 4.00417K wps
[Epoch 10 Batch 540/2125] avg loss 0.00160676, throughput 3.99857K wps
[Epoch 10 Batch 570/2125] avg loss 0.00162635, throughput 4.00301K wps
[Epoch 10 Batch 600/2125] avg loss 0.00152909, throughput 3.99942K wps
[Epoch 10 Batch 630/2125] avg loss 0.00185673, throughput 4.00266K wps
[Epoch 10 Batch 660/2125] avg loss 0.00191634, throughput 3.99901K wps
[Epoch 10 Batch 690/2125] avg loss 0.00177302, throughput 3.99902K wps
[Epoch 10 Batch 720/2125] avg loss 0.00211625, throughput 3.99884K wps
[Epoch 10 Batch 750/2125] avg loss 0.0018063, throughput 3.99773K wps
[Epoch 10 Batch 780/2125] avg loss 0.0016285, throughput 4.00008K wps
[Epoch 10 Batch 810/2125] avg loss 0.00169169, throughput 3.99621K wps
[Epoch 10 Batch 840/2125] avg loss 0.00197939, throughput 3.99379K wps
[Epoch 10 Batch 870/2125] avg loss 0.00179231, throughput 3.99566K wps
[Epoch 10 Batch 900/2125] avg loss 0.00184352, throughput 3.997K wps
[Epoch 10 Batch 930/2125] avg loss 0.00181249, throughput 3.99749K wps
[Epoch 10 Batch 960/2125] avg loss 0.00187933, throughput 4.00072K wps
[Epoch 10 Batch 990/2125] avg loss 0.00206006, throughput 3.99698K wps
[Epoch 10 Batch 1020/2125] avg loss 0.00187025, throughput 3.99508K wps
[Epoch 10 Batch 1050/2125] avg loss 0.00178138, throughput 3.99899K wps
[Epoch 10 Batch 1080/2125] avg loss 0.0020451, throughput 3.99974K wps
[Epoch 10 Batch 1110/2125] avg loss 0.0016697, throughput 4.0017K wps
[Epoch 10 Batch 1140/2125] avg loss 0.00182663, throughput 3.99757K wps
[Epoch 10 Batch 1170/2125] avg loss 0.00188183, throughput 3.99361K wps
[Epoch 10 Batch 1200/2125] avg loss 0.00170622, throughput 3.99972K wps
[Epoch 10 Batch 1230/2125] avg loss 0.00177944, throughput 3.99696K wps
[Epoch 10 Batch 1260/2125] avg loss 0.00182049, throughput 3.99608K wps
[Epoch 10 Batch 1290/2125] avg loss 0.00163063, throughput 3.99697K wps
[Epoch 10 Batch 1320/2125] avg loss 0.00191474, throughput 3.99931K wps
[Epoch 10 Batch 1350/2125] avg loss 0.00188029, throughput 3.99655K wps
[Epoch 10 Batch 1380/2125] avg loss 0.00179538, throughput 4.00111K wps
[Epoch 10 Batch 1410/2125] avg loss 0.00173436, throughput 3.99731K wps
[Epoch 10 Batch 1440/2125] avg loss 0.00164505, throughput 3.99958K wps
[Epoch 10 Batch 1470/2125] avg loss 0.00185766, throughput 3.99577K wps
[Epoch 10 Batch 1500/2125] avg loss 0.00220893, throughput 3.99594K wps
[Epoch 10 Batch 1530/2125] avg loss 0.00192623, throughput 3.99696K wps
[Epoch 10 Batch 1560/2125] avg loss 0.00181159, throughput 3.99943K wps
[Epoch 10 Batch 1590/2125] avg loss 0.00193049, throughput 3.99928K wps
[Epoch 10 Batch 1620/2125] avg loss 0.00211622, throughput 3.99835K wps
[Epoch 10 Batch 1650/2125] avg loss 0.00190381, throughput 3.998K wps
[Epoch 10 Batch 1680/2125] avg loss 0.00217869, throughput 3.99809K wps
[Epoch 10 Batch 1710/2125] avg loss 0.00187785, throughput 3.99869K wps
[Epoch 10 Batch 1740/2125] avg loss 0.00203472, throughput 3.99849K wps
[Epoch 10 Batch 1770/2125] avg loss 0.00190781, throughput 3.99754K wps
[Epoch 10 Batch 1800/2125] avg loss 0.00188542, throughput 3.99738K wps
[Epoch 10 Batch 1830/2125] avg loss 0.00204809, throughput 3.9966K wps
[Epoch 10 Batch 1860/2125] avg loss 0.001891, throughput 3.99548K wps
[Epoch 10 Batch 1890/2125] avg loss 0.00238344, throughput 3.9953K wps
[Epoch 10 Batch 1920/2125] avg loss 0.00178786, throughput 3.9976K wps
[Epoch 10 Batch 1950/2125] avg loss 0.00167753, throughput 3.99735K wps
[Epoch 10 Batch 1980/2125] avg loss 0.00193586, throughput 3.99321K wps
[Epoch 10 Batch 2010/2125] avg loss 0.00214882, throughput 3.99876K wps
[Epoch 10 Batch 2040/2125] avg loss 0.00230855, throughput 3.99868K wps
[Epoch 10 Batch 2070/2125] avg loss 0.00166984, throughput 3.99802K wps
[Epoch 10 Batch 2100/2125] avg loss 0.00148174, throughput 3.99952K wps
Begin Testing...
[Batch 30/237] elapsed 0.46 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s
[Batch 120/237] elapsed 0.43 s
[Batch 150/237] elapsed 0.43 s
[Batch 180/237] elapsed 0.43 s
[Batch 210/237] elapsed 0.43 s
[Epoch 10] train avg loss 0.00184587, test acc 0.9241, test avg loss 0.284485, throughput 3.99923K wps
[Epoch 11 Batch 30/2125] avg loss 0.00148285, throughput 4.09668K wps
[Epoch 11 Batch 60/2125] avg loss 0.00188634, throughput 3.99774K wps
[Epoch 11 Batch 90/2125] avg loss 0.00162976, throughput 3.99675K wps
[Epoch 11 Batch 120/2125] avg loss 0.00157317, throughput 3.99944K wps
[Epoch 11 Batch 150/2125] avg loss 0.00147451, throughput 4.00497K wps
[Epoch 11 Batch 180/2125] avg loss 0.00166872, throughput 3.99442K wps
[Epoch 11 Batch 210/2125] avg loss 0.00151675, throughput 3.99885K wps
[Epoch 11 Batch 240/2125] avg loss 0.00131241, throughput 3.99976K wps
[Epoch 11 Batch 270/2125] avg loss 0.00131386, throughput 3.99652K wps
[Epoch 11 Batch 300/2125] avg loss 0.00155076, throughput 4.00128K wps
[Epoch 11 Batch 330/2125] avg loss 0.00168219, throughput 3.99642K wps
[Epoch 11 Batch 360/2125] avg loss 0.00151938, throughput 3.99718K wps
[Epoch 11 Batch 390/2125] avg loss 0.00139004, throughput 3.99945K wps
[Epoch 11 Batch 420/2125] avg loss 0.00125788, throughput 3.99748K wps
[Epoch 11 Batch 450/2125] avg loss 0.0018591, throughput 4.00055K wps
[Epoch 11 Batch 480/2125] avg loss 0.00192561, throughput 3.9961K wps
[Epoch 11 Batch 510/2125] avg loss 0.00155708, throughput 3.99642K wps
[Epoch 11 Batch 540/2125] avg loss 0.00166069, throughput 3.99789K wps
[Epoch 11 Batch 570/2125] avg loss 0.00190451, throughput 3.99819K wps
[Epoch 11 Batch 600/2125] avg loss 0.00142731, throughput 3.99835K wps
[Epoch 11 Batch 630/2125] avg loss 0.00186795, throughput 4.00037K wps
[Epoch 11 Batch 660/2125] avg loss 0.00165996, throughput 4.00036K wps
[Epoch 11 Batch 690/2125] avg loss 0.00155261, throughput 3.9992K wps
[Epoch 11 Batch 720/2125] avg loss 0.00156885, throughput 3.99483K wps
[Epoch 11 Batch 750/2125] avg loss 0.00214112, throughput 3.99616K wps
[Epoch 11 Batch 780/2125] avg loss 0.00172047, throughput 3.99405K wps
[Epoch 11 Batch 810/2125] avg loss 0.00152337, throughput 3.99626K wps
[Epoch 11 Batch 840/2125] avg loss 0.00197258, throughput 3.99745K wps
[Epoch 11 Batch 870/2125] avg loss 0.00158748, throughput 3.99385K wps
[Epoch 11 Batch 900/2125] avg loss 0.00183022, throughput 3.99837K wps
[Epoch 11 Batch 930/2125] avg loss 0.00178369, throughput 3.99804K wps
[Epoch 11 Batch 960/2125] avg loss 0.00182349, throughput 3.99499K wps
[Epoch 11 Batch 990/2125] avg loss 0.0019624, throughput 4.0023K wps
[Epoch 11 Batch 1020/2125] avg loss 0.00191406, throughput 3.99735K wps
[Epoch 11 Batch 1050/2125] avg loss 0.0017117, throughput 3.9998K wps
[Epoch 11 Batch 1080/2125] avg loss 0.00230425, throughput 3.99835K wps
[Epoch 11 Batch 1110/2125] avg loss 0.00200271, throughput 3.99991K wps
[Epoch 11 Batch 1140/2125] avg loss 0.00176632, throughput 3.99729K wps
[Epoch 11 Batch 1170/2125] avg loss 0.00205904, throughput 3.9978K wps
[Epoch 11 Batch 1200/2125] avg loss 0.00174823, throughput 3.99674K wps
[Epoch 11 Batch 1230/2125] avg loss 0.00173348, throughput 3.99545K wps
[Epoch 11 Batch 1260/2125] avg loss 0.00207612, throughput 3.99436K wps
[Epoch 11 Batch 1290/2125] avg loss 0.0014544, throughput 3.99719K wps
[Epoch 11 Batch 1320/2125] avg loss 0.00232286, throughput 3.99598K wps
[Epoch 11 Batch 1350/2125] avg loss 0.00191941, throughput 3.99804K wps
[Epoch 11 Batch 1380/2125] avg loss 0.00209322, throughput 3.99722K wps
[Epoch 11 Batch 1410/2125] avg loss 0.00159888, throughput 3.99799K wps
[Epoch 11 Batch 1440/2125] avg loss 0.00162708, throughput 3.99891K wps
[Epoch 11 Batch 1470/2125] avg loss 0.00194368, throughput 3.99486K wps
[Epoch 11 Batch 1500/2125] avg loss 0.00177019, throughput 3.99304K wps
[Epoch 11 Batch 1530/2125] avg loss 0.0017951, throughput 4K wps
[Epoch 11 Batch 1560/2125] avg loss 0.00181874, throughput 3.99595K wps
[Epoch 11 Batch 1590/2125] avg loss 0.00197475, throughput 3.9967K wps
[Epoch 11 Batch 1620/2125] avg loss 0.001995, throughput 4.00243K wps
[Epoch 11 Batch 1650/2125] avg loss 0.00191521, throughput 4.00083K wps
[Epoch 11 Batch 1680/2125] avg loss 0.00180007, throughput 3.99613K wps
[Epoch 11 Batch 1710/2125] avg loss 0.00187816, throughput 3.9999K wps
[Epoch 11 Batch 1740/2125] avg loss 0.00160779, throughput 3.97523K wps
[Epoch 11 Batch 1770/2125] avg loss 0.00215991, throughput 4.00307K wps
[Epoch 11 Batch 1800/2125] avg loss 0.00198507, throughput 4.00413K wps
[Epoch 11 Batch 1830/2125] avg loss 0.0018723, throughput 4.0059K wps
[Epoch 11 Batch 1860/2125] avg loss 0.00166865, throughput 4.00021K wps
[Epoch 11 Batch 1890/2125] avg loss 0.00177707, throughput 4.00568K wps
[Epoch 11 Batch 1920/2125] avg loss 0.00169021, throughput 3.99819K wps
[Epoch 11 Batch 1950/2125] avg loss 0.00183034, throughput 3.99815K wps
[Epoch 11 Batch 1980/2125] avg loss 0.00186279, throughput 3.99706K wps
[Epoch 11 Batch 2010/2125] avg loss 0.00158909, throughput 3.99632K wps
[Epoch 11 Batch 2040/2125] avg loss 0.00174095, throughput 3.99805K wps
[Epoch 11 Batch 2070/2125] avg loss 0.0019252, throughput 3.99746K wps
[Epoch 11 Batch 2100/2125] avg loss 0.0021274, throughput 3.99725K wps
Begin Testing...
[Batch 30/237] elapsed 0.45 s
[Batch 60/237] elapsed 0.43 s
[Batch 90/237] elapsed 0.43 s