/
SST-2_non-static.log
3236 lines (3236 loc) · 210 KB
/
SST-2_non-static.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Namespace(batch_size=50, data_name='SST-2', dropout=0.5, epochs=40, gpu=0, log_interval=30, lr=0.0001, model_mode='non-static', save_prefix='sa-model')
Use gpu0
1614
53
Done! Tokenizing Time=4.31s, #Sentences=118038
Done! Tokenizing Time=0.74s, #Sentences=1745
SentimentNet(
(embedding): Embedding(17814 -> 300, float32)
(encoder): ConvolutionalEncoder(
(_convs): HybridConcurrent(
(0): HybridSequential(
(0): Conv1D(300 -> 100, kernel_size=(3,), stride=(1,))
(1): Activation(relu)
(2): HybridLambda(<lambda>)
)
(1): HybridSequential(
(0): Conv1D(300 -> 100, kernel_size=(4,), stride=(1,))
(1): Activation(relu)
(2): HybridLambda(<lambda>)
)
(2): HybridSequential(
(0): Conv1D(300 -> 100, kernel_size=(5,), stride=(1,))
(1): Activation(relu)
(2): HybridLambda(<lambda>)
)
)
)
(output): HybridSequential(
(0): Dropout(p = 0.5, axes=())
(1): Dense(None -> 2, linear)
)
)
[Epoch 0 Batch 30/2125] avg loss 0.0146453, throughput 3.78731K wps
[Epoch 0 Batch 60/2125] avg loss 0.014539, throughput 6.06875K wps
[Epoch 0 Batch 90/2125] avg loss 0.0141482, throughput 6.06143K wps
[Epoch 0 Batch 120/2125] avg loss 0.0134685, throughput 6.06698K wps
[Epoch 0 Batch 150/2125] avg loss 0.0136063, throughput 6.05797K wps
[Epoch 0 Batch 180/2125] avg loss 0.0133574, throughput 6.06772K wps
[Epoch 0 Batch 210/2125] avg loss 0.0135234, throughput 6.06618K wps
[Epoch 0 Batch 240/2125] avg loss 0.0132481, throughput 6.06549K wps
[Epoch 0 Batch 270/2125] avg loss 0.0129035, throughput 6.06342K wps
[Epoch 0 Batch 300/2125] avg loss 0.0129844, throughput 6.05879K wps
[Epoch 0 Batch 330/2125] avg loss 0.0127407, throughput 6.06172K wps
[Epoch 0 Batch 360/2125] avg loss 0.012595, throughput 6.06912K wps
[Epoch 0 Batch 390/2125] avg loss 0.0123208, throughput 6.05861K wps
[Epoch 0 Batch 420/2125] avg loss 0.0120249, throughput 6.05417K wps
[Epoch 0 Batch 450/2125] avg loss 0.0124691, throughput 6.04935K wps
[Epoch 0 Batch 480/2125] avg loss 0.0116782, throughput 6.05774K wps
[Epoch 0 Batch 510/2125] avg loss 0.0116368, throughput 6.06589K wps
[Epoch 0 Batch 540/2125] avg loss 0.0111518, throughput 6.06781K wps
[Epoch 0 Batch 570/2125] avg loss 0.0112152, throughput 6.05775K wps
[Epoch 0 Batch 600/2125] avg loss 0.0112537, throughput 6.06604K wps
[Epoch 0 Batch 630/2125] avg loss 0.0108123, throughput 6.05512K wps
[Epoch 0 Batch 660/2125] avg loss 0.010679, throughput 6.04928K wps
[Epoch 0 Batch 690/2125] avg loss 0.0102292, throughput 6.05303K wps
[Epoch 0 Batch 720/2125] avg loss 0.0103963, throughput 6.05583K wps
[Epoch 0 Batch 750/2125] avg loss 0.00990726, throughput 6.05314K wps
[Epoch 0 Batch 780/2125] avg loss 0.00980827, throughput 6.06013K wps
[Epoch 0 Batch 810/2125] avg loss 0.00958513, throughput 6.05506K wps
[Epoch 0 Batch 840/2125] avg loss 0.00927252, throughput 6.06102K wps
[Epoch 0 Batch 870/2125] avg loss 0.00887413, throughput 6.05244K wps
[Epoch 0 Batch 900/2125] avg loss 0.00894022, throughput 6.05989K wps
[Epoch 0 Batch 930/2125] avg loss 0.0086854, throughput 6.05053K wps
[Epoch 0 Batch 960/2125] avg loss 0.0087187, throughput 6.05007K wps
[Epoch 0 Batch 990/2125] avg loss 0.0083461, throughput 6.05783K wps
[Epoch 0 Batch 1020/2125] avg loss 0.0079939, throughput 6.05415K wps
[Epoch 0 Batch 1050/2125] avg loss 0.00812602, throughput 6.05618K wps
[Epoch 0 Batch 1080/2125] avg loss 0.00776078, throughput 6.044K wps
[Epoch 0 Batch 1110/2125] avg loss 0.00784576, throughput 6.04358K wps
[Epoch 0 Batch 1140/2125] avg loss 0.00760725, throughput 6.04737K wps
[Epoch 0 Batch 1170/2125] avg loss 0.00753368, throughput 6.04838K wps
[Epoch 0 Batch 1200/2125] avg loss 0.00737966, throughput 6.05171K wps
[Epoch 0 Batch 1230/2125] avg loss 0.00701846, throughput 6.04629K wps
[Epoch 0 Batch 1260/2125] avg loss 0.0071524, throughput 6.03264K wps
[Epoch 0 Batch 1290/2125] avg loss 0.00713113, throughput 6.05256K wps
[Epoch 0 Batch 1320/2125] avg loss 0.00696586, throughput 6.04338K wps
[Epoch 0 Batch 1350/2125] avg loss 0.00669066, throughput 6.04477K wps
[Epoch 0 Batch 1380/2125] avg loss 0.00680478, throughput 6.05472K wps
[Epoch 0 Batch 1410/2125] avg loss 0.00671461, throughput 6.05227K wps
[Epoch 0 Batch 1440/2125] avg loss 0.00665872, throughput 6.05157K wps
[Epoch 0 Batch 1470/2125] avg loss 0.00692924, throughput 6.04074K wps
[Epoch 0 Batch 1500/2125] avg loss 0.00661677, throughput 6.05028K wps
[Epoch 0 Batch 1530/2125] avg loss 0.00667788, throughput 6.04644K wps
[Epoch 0 Batch 1560/2125] avg loss 0.00681418, throughput 6.04478K wps
[Epoch 0 Batch 1590/2125] avg loss 0.00619376, throughput 6.03796K wps
[Epoch 0 Batch 1620/2125] avg loss 0.00632291, throughput 6.04602K wps
[Epoch 0 Batch 1650/2125] avg loss 0.00640708, throughput 6.04553K wps
[Epoch 0 Batch 1680/2125] avg loss 0.00634838, throughput 6.03631K wps
[Epoch 0 Batch 1710/2125] avg loss 0.00634249, throughput 6.03543K wps
[Epoch 0 Batch 1740/2125] avg loss 0.00624135, throughput 6.03105K wps
[Epoch 0 Batch 1770/2125] avg loss 0.00589104, throughput 6.04064K wps
[Epoch 0 Batch 1800/2125] avg loss 0.00650942, throughput 6.03518K wps
[Epoch 0 Batch 1830/2125] avg loss 0.00615124, throughput 6.04134K wps
[Epoch 0 Batch 1860/2125] avg loss 0.00593867, throughput 6.04025K wps
[Epoch 0 Batch 1890/2125] avg loss 0.00599553, throughput 6.04631K wps
[Epoch 0 Batch 1920/2125] avg loss 0.00539364, throughput 6.04263K wps
[Epoch 0 Batch 1950/2125] avg loss 0.00604897, throughput 6.03636K wps
[Epoch 0 Batch 1980/2125] avg loss 0.00627076, throughput 6.03568K wps
[Epoch 0 Batch 2010/2125] avg loss 0.00580447, throughput 6.0317K wps
[Epoch 0 Batch 2040/2125] avg loss 0.00590063, throughput 6.04459K wps
[Epoch 0 Batch 2070/2125] avg loss 0.00524726, throughput 6.04066K wps
[Epoch 0 Batch 2100/2125] avg loss 0.00588659, throughput 6.03332K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 0] train avg loss 0.00889854, test acc 0.8930, test avg loss 0.279222, throughput 5.96662K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.27 s
[Epoch 1 Batch 30/2125] avg loss 0.00561269, throughput 6.18097K wps
[Epoch 1 Batch 60/2125] avg loss 0.00532812, throughput 6.03743K wps
[Epoch 1 Batch 90/2125] avg loss 0.00541954, throughput 6.04267K wps
[Epoch 1 Batch 120/2125] avg loss 0.00544336, throughput 6.03444K wps
[Epoch 1 Batch 150/2125] avg loss 0.00520972, throughput 6.02785K wps
[Epoch 1 Batch 180/2125] avg loss 0.00485277, throughput 6.03183K wps
[Epoch 1 Batch 210/2125] avg loss 0.0052734, throughput 6.02935K wps
[Epoch 1 Batch 240/2125] avg loss 0.00510196, throughput 6.03061K wps
[Epoch 1 Batch 270/2125] avg loss 0.00541268, throughput 6.02883K wps
[Epoch 1 Batch 300/2125] avg loss 0.00487362, throughput 6.03165K wps
[Epoch 1 Batch 330/2125] avg loss 0.00518811, throughput 6.03327K wps
[Epoch 1 Batch 360/2125] avg loss 0.00480206, throughput 6.02454K wps
[Epoch 1 Batch 390/2125] avg loss 0.00541896, throughput 6.02393K wps
[Epoch 1 Batch 420/2125] avg loss 0.00449905, throughput 6.02714K wps
[Epoch 1 Batch 450/2125] avg loss 0.00527598, throughput 6.03365K wps
[Epoch 1 Batch 480/2125] avg loss 0.00492536, throughput 6.02611K wps
[Epoch 1 Batch 510/2125] avg loss 0.00469485, throughput 6.03017K wps
[Epoch 1 Batch 540/2125] avg loss 0.00481728, throughput 6.03267K wps
[Epoch 1 Batch 570/2125] avg loss 0.00513394, throughput 6.01846K wps
[Epoch 1 Batch 600/2125] avg loss 0.00492796, throughput 6.04245K wps
[Epoch 1 Batch 630/2125] avg loss 0.0048846, throughput 6.03608K wps
[Epoch 1 Batch 660/2125] avg loss 0.00514504, throughput 6.0344K wps
[Epoch 1 Batch 690/2125] avg loss 0.00525688, throughput 6.03488K wps
[Epoch 1 Batch 720/2125] avg loss 0.00463184, throughput 6.0419K wps
[Epoch 1 Batch 750/2125] avg loss 0.00481528, throughput 6.0314K wps
[Epoch 1 Batch 780/2125] avg loss 0.00503578, throughput 6.03532K wps
[Epoch 1 Batch 810/2125] avg loss 0.00512864, throughput 6.03828K wps
[Epoch 1 Batch 840/2125] avg loss 0.00476532, throughput 6.02524K wps
[Epoch 1 Batch 870/2125] avg loss 0.00437941, throughput 6.02447K wps
[Epoch 1 Batch 900/2125] avg loss 0.00449429, throughput 6.03371K wps
[Epoch 1 Batch 930/2125] avg loss 0.00448071, throughput 6.03466K wps
[Epoch 1 Batch 960/2125] avg loss 0.00436185, throughput 6.03271K wps
[Epoch 1 Batch 990/2125] avg loss 0.00476991, throughput 6.03082K wps
[Epoch 1 Batch 1020/2125] avg loss 0.00485248, throughput 6.0283K wps
[Epoch 1 Batch 1050/2125] avg loss 0.00486911, throughput 6.03283K wps
[Epoch 1 Batch 1080/2125] avg loss 0.00488294, throughput 6.02241K wps
[Epoch 1 Batch 1110/2125] avg loss 0.00481628, throughput 6.02468K wps
[Epoch 1 Batch 1140/2125] avg loss 0.00483072, throughput 6.03128K wps
[Epoch 1 Batch 1170/2125] avg loss 0.0049272, throughput 6.03886K wps
[Epoch 1 Batch 1200/2125] avg loss 0.0047501, throughput 6.02111K wps
[Epoch 1 Batch 1230/2125] avg loss 0.00417017, throughput 6.02575K wps
[Epoch 1 Batch 1260/2125] avg loss 0.00433226, throughput 6.04036K wps
[Epoch 1 Batch 1290/2125] avg loss 0.00452263, throughput 6.01695K wps
[Epoch 1 Batch 1320/2125] avg loss 0.00474539, throughput 6.01992K wps
[Epoch 1 Batch 1350/2125] avg loss 0.00485473, throughput 6.02199K wps
[Epoch 1 Batch 1380/2125] avg loss 0.00453053, throughput 6.02148K wps
[Epoch 1 Batch 1410/2125] avg loss 0.00464846, throughput 6.0261K wps
[Epoch 1 Batch 1440/2125] avg loss 0.00484329, throughput 6.02989K wps
[Epoch 1 Batch 1470/2125] avg loss 0.00461822, throughput 6.03254K wps
[Epoch 1 Batch 1500/2125] avg loss 0.004756, throughput 6.02205K wps
[Epoch 1 Batch 1530/2125] avg loss 0.0043247, throughput 6.01813K wps
[Epoch 1 Batch 1560/2125] avg loss 0.00459485, throughput 6.03643K wps
[Epoch 1 Batch 1590/2125] avg loss 0.00445727, throughput 6.03002K wps
[Epoch 1 Batch 1620/2125] avg loss 0.00475048, throughput 6.0224K wps
[Epoch 1 Batch 1650/2125] avg loss 0.00435204, throughput 6.04173K wps
[Epoch 1 Batch 1680/2125] avg loss 0.00463807, throughput 6.0345K wps
[Epoch 1 Batch 1710/2125] avg loss 0.0046142, throughput 6.02205K wps
[Epoch 1 Batch 1740/2125] avg loss 0.0047496, throughput 6.03595K wps
[Epoch 1 Batch 1770/2125] avg loss 0.00454747, throughput 6.02907K wps
[Epoch 1 Batch 1800/2125] avg loss 0.00466231, throughput 6.02229K wps
[Epoch 1 Batch 1830/2125] avg loss 0.004634, throughput 6.02346K wps
[Epoch 1 Batch 1860/2125] avg loss 0.00412036, throughput 5.99939K wps
[Epoch 1 Batch 1890/2125] avg loss 0.00490227, throughput 6.0146K wps
[Epoch 1 Batch 1920/2125] avg loss 0.00446759, throughput 6.03262K wps
[Epoch 1 Batch 1950/2125] avg loss 0.00500863, throughput 6.02351K wps
[Epoch 1 Batch 1980/2125] avg loss 0.00497393, throughput 6.02363K wps
[Epoch 1 Batch 2010/2125] avg loss 0.00456391, throughput 6.02503K wps
[Epoch 1 Batch 2040/2125] avg loss 0.00464973, throughput 6.02447K wps
[Epoch 1 Batch 2070/2125] avg loss 0.0043194, throughput 6.03011K wps
[Epoch 1 Batch 2100/2125] avg loss 0.00502493, throughput 6.03908K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 1] train avg loss 0.00480397, test acc 0.9084, test avg loss 0.241493, throughput 6.03121K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.27 s
[Epoch 2 Batch 30/2125] avg loss 0.00378288, throughput 6.16067K wps
[Epoch 2 Batch 60/2125] avg loss 0.00379975, throughput 6.02011K wps
[Epoch 2 Batch 90/2125] avg loss 0.0042167, throughput 6.01632K wps
[Epoch 2 Batch 120/2125] avg loss 0.00378315, throughput 6.02108K wps
[Epoch 2 Batch 150/2125] avg loss 0.00402631, throughput 6.02775K wps
[Epoch 2 Batch 180/2125] avg loss 0.00410664, throughput 6.0209K wps
[Epoch 2 Batch 210/2125] avg loss 0.00417997, throughput 6.02154K wps
[Epoch 2 Batch 240/2125] avg loss 0.00404273, throughput 6.02348K wps
[Epoch 2 Batch 270/2125] avg loss 0.00345837, throughput 6.02958K wps
[Epoch 2 Batch 300/2125] avg loss 0.00429986, throughput 6.03088K wps
[Epoch 2 Batch 330/2125] avg loss 0.0035167, throughput 6.02168K wps
[Epoch 2 Batch 360/2125] avg loss 0.00363477, throughput 6.01928K wps
[Epoch 2 Batch 390/2125] avg loss 0.00441074, throughput 6.02817K wps
[Epoch 2 Batch 420/2125] avg loss 0.00407198, throughput 6.01771K wps
[Epoch 2 Batch 450/2125] avg loss 0.00394415, throughput 6.02922K wps
[Epoch 2 Batch 480/2125] avg loss 0.00385617, throughput 6.02964K wps
[Epoch 2 Batch 510/2125] avg loss 0.00409874, throughput 6.03216K wps
[Epoch 2 Batch 540/2125] avg loss 0.00417068, throughput 6.03985K wps
[Epoch 2 Batch 570/2125] avg loss 0.00397655, throughput 6.01746K wps
[Epoch 2 Batch 600/2125] avg loss 0.00383564, throughput 6.02583K wps
[Epoch 2 Batch 630/2125] avg loss 0.00358196, throughput 6.0307K wps
[Epoch 2 Batch 660/2125] avg loss 0.00377499, throughput 6.02126K wps
[Epoch 2 Batch 690/2125] avg loss 0.00419209, throughput 6.02534K wps
[Epoch 2 Batch 720/2125] avg loss 0.00386005, throughput 6.02979K wps
[Epoch 2 Batch 750/2125] avg loss 0.00390685, throughput 6.02071K wps
[Epoch 2 Batch 780/2125] avg loss 0.00352134, throughput 6.02616K wps
[Epoch 2 Batch 810/2125] avg loss 0.00381646, throughput 6.01342K wps
[Epoch 2 Batch 840/2125] avg loss 0.00353567, throughput 6.01796K wps
[Epoch 2 Batch 870/2125] avg loss 0.00445868, throughput 6.01742K wps
[Epoch 2 Batch 900/2125] avg loss 0.00360173, throughput 6.01045K wps
[Epoch 2 Batch 930/2125] avg loss 0.00398804, throughput 6.02049K wps
[Epoch 2 Batch 960/2125] avg loss 0.00337316, throughput 6.03263K wps
[Epoch 2 Batch 990/2125] avg loss 0.00412185, throughput 6.02943K wps
[Epoch 2 Batch 1020/2125] avg loss 0.00388467, throughput 6.02132K wps
[Epoch 2 Batch 1050/2125] avg loss 0.00356656, throughput 6.01249K wps
[Epoch 2 Batch 1080/2125] avg loss 0.00390035, throughput 6.02519K wps
[Epoch 2 Batch 1110/2125] avg loss 0.0041124, throughput 6.02628K wps
[Epoch 2 Batch 1140/2125] avg loss 0.00413335, throughput 6.02239K wps
[Epoch 2 Batch 1170/2125] avg loss 0.00352991, throughput 6.01488K wps
[Epoch 2 Batch 1200/2125] avg loss 0.00438222, throughput 6.01225K wps
[Epoch 2 Batch 1230/2125] avg loss 0.0035305, throughput 6.01519K wps
[Epoch 2 Batch 1260/2125] avg loss 0.00394693, throughput 6.0281K wps
[Epoch 2 Batch 1290/2125] avg loss 0.0036885, throughput 6.02721K wps
[Epoch 2 Batch 1320/2125] avg loss 0.00382872, throughput 6.01345K wps
[Epoch 2 Batch 1350/2125] avg loss 0.0042578, throughput 6.01743K wps
[Epoch 2 Batch 1380/2125] avg loss 0.00352395, throughput 6.02091K wps
[Epoch 2 Batch 1410/2125] avg loss 0.00449212, throughput 6.02061K wps
[Epoch 2 Batch 1440/2125] avg loss 0.00367191, throughput 6.01238K wps
[Epoch 2 Batch 1470/2125] avg loss 0.00405565, throughput 6.01558K wps
[Epoch 2 Batch 1500/2125] avg loss 0.00362755, throughput 6.01427K wps
[Epoch 2 Batch 1530/2125] avg loss 0.00363849, throughput 6.01912K wps
[Epoch 2 Batch 1560/2125] avg loss 0.00419401, throughput 6.02055K wps
[Epoch 2 Batch 1590/2125] avg loss 0.00386007, throughput 6.01984K wps
[Epoch 2 Batch 1620/2125] avg loss 0.00354532, throughput 6.02261K wps
[Epoch 2 Batch 1650/2125] avg loss 0.00415341, throughput 6.0177K wps
[Epoch 2 Batch 1680/2125] avg loss 0.00411061, throughput 6.02489K wps
[Epoch 2 Batch 1710/2125] avg loss 0.00398758, throughput 6.0268K wps
[Epoch 2 Batch 1740/2125] avg loss 0.00401141, throughput 6.0102K wps
[Epoch 2 Batch 1770/2125] avg loss 0.00351123, throughput 6.02222K wps
[Epoch 2 Batch 1800/2125] avg loss 0.00408918, throughput 6.01202K wps
[Epoch 2 Batch 1830/2125] avg loss 0.00352344, throughput 6.02178K wps
[Epoch 2 Batch 1860/2125] avg loss 0.00370249, throughput 6.01689K wps
[Epoch 2 Batch 1890/2125] avg loss 0.00369327, throughput 6.01948K wps
[Epoch 2 Batch 1920/2125] avg loss 0.00402363, throughput 6.0237K wps
[Epoch 2 Batch 1950/2125] avg loss 0.00358791, throughput 6.03361K wps
[Epoch 2 Batch 1980/2125] avg loss 0.00360738, throughput 6.02192K wps
[Epoch 2 Batch 2010/2125] avg loss 0.00387046, throughput 6.02559K wps
[Epoch 2 Batch 2040/2125] avg loss 0.00372476, throughput 6.02428K wps
[Epoch 2 Batch 2070/2125] avg loss 0.00383992, throughput 6.03499K wps
[Epoch 2 Batch 2100/2125] avg loss 0.00408014, throughput 6.02132K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 2] train avg loss 0.00388178, test acc 0.9152, test avg loss 0.229892, throughput 6.02415K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.27 s
[Epoch 3 Batch 30/2125] avg loss 0.00329633, throughput 6.15708K wps
[Epoch 3 Batch 60/2125] avg loss 0.00341968, throughput 6.02623K wps
[Epoch 3 Batch 90/2125] avg loss 0.00343311, throughput 6.03134K wps
[Epoch 3 Batch 120/2125] avg loss 0.00349467, throughput 6.01985K wps
[Epoch 3 Batch 150/2125] avg loss 0.00319217, throughput 6.02449K wps
[Epoch 3 Batch 180/2125] avg loss 0.00372562, throughput 6.02767K wps
[Epoch 3 Batch 210/2125] avg loss 0.00319632, throughput 6.01599K wps
[Epoch 3 Batch 240/2125] avg loss 0.00300933, throughput 6.02374K wps
[Epoch 3 Batch 270/2125] avg loss 0.00288703, throughput 6.02617K wps
[Epoch 3 Batch 300/2125] avg loss 0.00316389, throughput 6.02573K wps
[Epoch 3 Batch 330/2125] avg loss 0.00312001, throughput 6.02461K wps
[Epoch 3 Batch 360/2125] avg loss 0.00350093, throughput 6.01783K wps
[Epoch 3 Batch 390/2125] avg loss 0.00296996, throughput 6.03313K wps
[Epoch 3 Batch 420/2125] avg loss 0.00321765, throughput 6.0324K wps
[Epoch 3 Batch 450/2125] avg loss 0.00364995, throughput 6.02378K wps
[Epoch 3 Batch 480/2125] avg loss 0.00361579, throughput 6.02895K wps
[Epoch 3 Batch 510/2125] avg loss 0.00366579, throughput 6.01317K wps
[Epoch 3 Batch 540/2125] avg loss 0.0036611, throughput 6.0246K wps
[Epoch 3 Batch 570/2125] avg loss 0.00355673, throughput 6.023K wps
[Epoch 3 Batch 600/2125] avg loss 0.00349155, throughput 6.02176K wps
[Epoch 3 Batch 630/2125] avg loss 0.0031149, throughput 6.01662K wps
[Epoch 3 Batch 660/2125] avg loss 0.00339862, throughput 6.02754K wps
[Epoch 3 Batch 690/2125] avg loss 0.00297542, throughput 6.02698K wps
[Epoch 3 Batch 720/2125] avg loss 0.00356097, throughput 6.02024K wps
[Epoch 3 Batch 750/2125] avg loss 0.00329657, throughput 6.01868K wps
[Epoch 3 Batch 780/2125] avg loss 0.00310237, throughput 6.01624K wps
[Epoch 3 Batch 810/2125] avg loss 0.00361155, throughput 6.02137K wps
[Epoch 3 Batch 840/2125] avg loss 0.00350197, throughput 6.02153K wps
[Epoch 3 Batch 870/2125] avg loss 0.00324632, throughput 6.01784K wps
[Epoch 3 Batch 900/2125] avg loss 0.00328363, throughput 6.02841K wps
[Epoch 3 Batch 930/2125] avg loss 0.0033988, throughput 6.02285K wps
[Epoch 3 Batch 960/2125] avg loss 0.00312493, throughput 6.00955K wps
[Epoch 3 Batch 990/2125] avg loss 0.00311125, throughput 6.02171K wps
[Epoch 3 Batch 1020/2125] avg loss 0.00317315, throughput 6.02494K wps
[Epoch 3 Batch 1050/2125] avg loss 0.00357406, throughput 6.02674K wps
[Epoch 3 Batch 1080/2125] avg loss 0.00341836, throughput 6.03112K wps
[Epoch 3 Batch 1110/2125] avg loss 0.00320742, throughput 6.01889K wps
[Epoch 3 Batch 1140/2125] avg loss 0.00361829, throughput 6.02816K wps
[Epoch 3 Batch 1170/2125] avg loss 0.0034175, throughput 6.0173K wps
[Epoch 3 Batch 1200/2125] avg loss 0.00366877, throughput 6.00778K wps
[Epoch 3 Batch 1230/2125] avg loss 0.00333758, throughput 6.00775K wps
[Epoch 3 Batch 1260/2125] avg loss 0.0033798, throughput 6.01779K wps
[Epoch 3 Batch 1290/2125] avg loss 0.00345485, throughput 6.02706K wps
[Epoch 3 Batch 1320/2125] avg loss 0.0033599, throughput 6.01528K wps
[Epoch 3 Batch 1350/2125] avg loss 0.0033127, throughput 6.02193K wps
[Epoch 3 Batch 1380/2125] avg loss 0.00291138, throughput 6.03433K wps
[Epoch 3 Batch 1410/2125] avg loss 0.00304158, throughput 6.02003K wps
[Epoch 3 Batch 1440/2125] avg loss 0.00325555, throughput 6.01303K wps
[Epoch 3 Batch 1470/2125] avg loss 0.00308925, throughput 6.01836K wps
[Epoch 3 Batch 1500/2125] avg loss 0.00342321, throughput 6.02697K wps
[Epoch 3 Batch 1530/2125] avg loss 0.00302186, throughput 6.0288K wps
[Epoch 3 Batch 1560/2125] avg loss 0.00342853, throughput 6.0139K wps
[Epoch 3 Batch 1590/2125] avg loss 0.00326577, throughput 6.01957K wps
[Epoch 3 Batch 1620/2125] avg loss 0.00320141, throughput 6.01603K wps
[Epoch 3 Batch 1650/2125] avg loss 0.00370802, throughput 6.02756K wps
[Epoch 3 Batch 1680/2125] avg loss 0.00363428, throughput 6.02024K wps
[Epoch 3 Batch 1710/2125] avg loss 0.00356219, throughput 6.01882K wps
[Epoch 3 Batch 1740/2125] avg loss 0.00374879, throughput 6.02289K wps
[Epoch 3 Batch 1770/2125] avg loss 0.00363636, throughput 6.02664K wps
[Epoch 3 Batch 1800/2125] avg loss 0.00329894, throughput 6.02246K wps
[Epoch 3 Batch 1830/2125] avg loss 0.00354369, throughput 6.02168K wps
[Epoch 3 Batch 1860/2125] avg loss 0.00309343, throughput 6.00595K wps
[Epoch 3 Batch 1890/2125] avg loss 0.00345957, throughput 6.01696K wps
[Epoch 3 Batch 1920/2125] avg loss 0.00397834, throughput 6.00512K wps
[Epoch 3 Batch 1950/2125] avg loss 0.00379751, throughput 6.01407K wps
[Epoch 3 Batch 1980/2125] avg loss 0.00315485, throughput 6.01925K wps
[Epoch 3 Batch 2010/2125] avg loss 0.00358171, throughput 6.0033K wps
[Epoch 3 Batch 2040/2125] avg loss 0.00307086, throughput 6.01343K wps
[Epoch 3 Batch 2070/2125] avg loss 0.00356996, throughput 6.01167K wps
[Epoch 3 Batch 2100/2125] avg loss 0.00297814, throughput 6.01457K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 3] train avg loss 0.00336021, test acc 0.9179, test avg loss 0.234045, throughput 6.02268K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.27 s
[Epoch 4 Batch 30/2125] avg loss 0.00266025, throughput 6.15885K wps
[Epoch 4 Batch 60/2125] avg loss 0.00259681, throughput 6.01388K wps
[Epoch 4 Batch 90/2125] avg loss 0.00287396, throughput 6.02441K wps
[Epoch 4 Batch 120/2125] avg loss 0.00308117, throughput 6.01646K wps
[Epoch 4 Batch 150/2125] avg loss 0.00333367, throughput 6.01788K wps
[Epoch 4 Batch 180/2125] avg loss 0.00245765, throughput 6.0311K wps
[Epoch 4 Batch 210/2125] avg loss 0.00289827, throughput 6.02066K wps
[Epoch 4 Batch 240/2125] avg loss 0.0029431, throughput 6.02755K wps
[Epoch 4 Batch 270/2125] avg loss 0.00279749, throughput 6.00569K wps
[Epoch 4 Batch 300/2125] avg loss 0.00270429, throughput 6.02326K wps
[Epoch 4 Batch 330/2125] avg loss 0.00286141, throughput 6.01547K wps
[Epoch 4 Batch 360/2125] avg loss 0.00300203, throughput 6.00617K wps
[Epoch 4 Batch 390/2125] avg loss 0.00296628, throughput 6.02361K wps
[Epoch 4 Batch 420/2125] avg loss 0.00292454, throughput 6.00537K wps
[Epoch 4 Batch 450/2125] avg loss 0.0030003, throughput 6.0249K wps
[Epoch 4 Batch 480/2125] avg loss 0.00294214, throughput 6.02609K wps
[Epoch 4 Batch 510/2125] avg loss 0.00266646, throughput 6.00804K wps
[Epoch 4 Batch 540/2125] avg loss 0.00266632, throughput 6.02019K wps
[Epoch 4 Batch 570/2125] avg loss 0.00316975, throughput 6.02607K wps
[Epoch 4 Batch 600/2125] avg loss 0.0032111, throughput 6.02006K wps
[Epoch 4 Batch 630/2125] avg loss 0.00284225, throughput 6.01752K wps
[Epoch 4 Batch 660/2125] avg loss 0.00283244, throughput 6.01101K wps
[Epoch 4 Batch 690/2125] avg loss 0.00292054, throughput 5.99128K wps
[Epoch 4 Batch 720/2125] avg loss 0.00276408, throughput 6.02985K wps
[Epoch 4 Batch 750/2125] avg loss 0.00270589, throughput 6.02231K wps
[Epoch 4 Batch 780/2125] avg loss 0.00256596, throughput 6.02188K wps
[Epoch 4 Batch 810/2125] avg loss 0.00332758, throughput 6.03238K wps
[Epoch 4 Batch 840/2125] avg loss 0.00345426, throughput 6.01958K wps
[Epoch 4 Batch 870/2125] avg loss 0.00294129, throughput 6.01155K wps
[Epoch 4 Batch 900/2125] avg loss 0.00283353, throughput 6.0167K wps
[Epoch 4 Batch 930/2125] avg loss 0.00300098, throughput 6.02054K wps
[Epoch 4 Batch 960/2125] avg loss 0.00315959, throughput 6.02545K wps
[Epoch 4 Batch 990/2125] avg loss 0.0030926, throughput 6.01709K wps
[Epoch 4 Batch 1020/2125] avg loss 0.00265336, throughput 6.01078K wps
[Epoch 4 Batch 1050/2125] avg loss 0.00330748, throughput 6.01059K wps
[Epoch 4 Batch 1080/2125] avg loss 0.00309418, throughput 6.01582K wps
[Epoch 4 Batch 1110/2125] avg loss 0.00301947, throughput 6.03444K wps
[Epoch 4 Batch 1140/2125] avg loss 0.00294563, throughput 6.01975K wps
[Epoch 4 Batch 1170/2125] avg loss 0.00291969, throughput 6.01953K wps
[Epoch 4 Batch 1200/2125] avg loss 0.00304187, throughput 6.02007K wps
[Epoch 4 Batch 1230/2125] avg loss 0.00327234, throughput 6.0126K wps
[Epoch 4 Batch 1260/2125] avg loss 0.00281882, throughput 6.02171K wps
[Epoch 4 Batch 1290/2125] avg loss 0.00297727, throughput 6.01382K wps
[Epoch 4 Batch 1320/2125] avg loss 0.00330625, throughput 6.02473K wps
[Epoch 4 Batch 1350/2125] avg loss 0.00269154, throughput 6.02435K wps
[Epoch 4 Batch 1380/2125] avg loss 0.00283565, throughput 6.01511K wps
[Epoch 4 Batch 1410/2125] avg loss 0.00313254, throughput 6.0156K wps
[Epoch 4 Batch 1440/2125] avg loss 0.00305659, throughput 6.01877K wps
[Epoch 4 Batch 1470/2125] avg loss 0.00307607, throughput 6.01865K wps
[Epoch 4 Batch 1500/2125] avg loss 0.00299291, throughput 6.02178K wps
[Epoch 4 Batch 1530/2125] avg loss 0.00275254, throughput 6.0312K wps
[Epoch 4 Batch 1560/2125] avg loss 0.00316133, throughput 6.01926K wps
[Epoch 4 Batch 1590/2125] avg loss 0.00303249, throughput 6.01204K wps
[Epoch 4 Batch 1620/2125] avg loss 0.00288925, throughput 6.02083K wps
[Epoch 4 Batch 1650/2125] avg loss 0.00307524, throughput 6.01999K wps
[Epoch 4 Batch 1680/2125] avg loss 0.00264711, throughput 6.01685K wps
[Epoch 4 Batch 1710/2125] avg loss 0.00303186, throughput 6.02136K wps
[Epoch 4 Batch 1740/2125] avg loss 0.00302763, throughput 6.01554K wps
[Epoch 4 Batch 1770/2125] avg loss 0.00265943, throughput 6.00421K wps
[Epoch 4 Batch 1800/2125] avg loss 0.00295478, throughput 6.01557K wps
[Epoch 4 Batch 1830/2125] avg loss 0.00312533, throughput 6.01973K wps
[Epoch 4 Batch 1860/2125] avg loss 0.00293816, throughput 6.01278K wps
[Epoch 4 Batch 1890/2125] avg loss 0.00351002, throughput 6.02173K wps
[Epoch 4 Batch 1920/2125] avg loss 0.00309331, throughput 6.01845K wps
[Epoch 4 Batch 1950/2125] avg loss 0.00332076, throughput 6.0262K wps
[Epoch 4 Batch 1980/2125] avg loss 0.0034739, throughput 6.01439K wps
[Epoch 4 Batch 2010/2125] avg loss 0.00320883, throughput 6.0242K wps
[Epoch 4 Batch 2040/2125] avg loss 0.00279056, throughput 6.01514K wps
[Epoch 4 Batch 2070/2125] avg loss 0.00349205, throughput 6.01928K wps
[Epoch 4 Batch 2100/2125] avg loss 0.00317856, throughput 6.02242K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 4] train avg loss 0.00298122, test acc 0.9177, test avg loss 0.233191, throughput 6.02063K wps
[Epoch 5 Batch 30/2125] avg loss 0.002266, throughput 6.15136K wps
[Epoch 5 Batch 60/2125] avg loss 0.00258292, throughput 6.00632K wps
[Epoch 5 Batch 90/2125] avg loss 0.00261513, throughput 6.01767K wps
[Epoch 5 Batch 120/2125] avg loss 0.00236579, throughput 6.00488K wps
[Epoch 5 Batch 150/2125] avg loss 0.00283505, throughput 6.01023K wps
[Epoch 5 Batch 180/2125] avg loss 0.00255751, throughput 6.01691K wps
[Epoch 5 Batch 210/2125] avg loss 0.00271899, throughput 6.0121K wps
[Epoch 5 Batch 240/2125] avg loss 0.00231081, throughput 6.00793K wps
[Epoch 5 Batch 270/2125] avg loss 0.002578, throughput 6.02047K wps
[Epoch 5 Batch 300/2125] avg loss 0.00286929, throughput 6.00367K wps
[Epoch 5 Batch 330/2125] avg loss 0.00285696, throughput 6.01059K wps
[Epoch 5 Batch 360/2125] avg loss 0.00217682, throughput 6.02371K wps
[Epoch 5 Batch 390/2125] avg loss 0.00242748, throughput 6.01082K wps
[Epoch 5 Batch 420/2125] avg loss 0.0029081, throughput 6.01749K wps
[Epoch 5 Batch 450/2125] avg loss 0.002882, throughput 6.0142K wps
[Epoch 5 Batch 480/2125] avg loss 0.00277555, throughput 6.0193K wps
[Epoch 5 Batch 510/2125] avg loss 0.00214411, throughput 6.01853K wps
[Epoch 5 Batch 540/2125] avg loss 0.00236509, throughput 6.02171K wps
[Epoch 5 Batch 570/2125] avg loss 0.00269093, throughput 6.01898K wps
[Epoch 5 Batch 600/2125] avg loss 0.00262393, throughput 6.01465K wps
[Epoch 5 Batch 630/2125] avg loss 0.00248691, throughput 6.01901K wps
[Epoch 5 Batch 660/2125] avg loss 0.00271584, throughput 6.01917K wps
[Epoch 5 Batch 690/2125] avg loss 0.00282762, throughput 6.01094K wps
[Epoch 5 Batch 720/2125] avg loss 0.00255462, throughput 6.00591K wps
[Epoch 5 Batch 750/2125] avg loss 0.00282782, throughput 6.01582K wps
[Epoch 5 Batch 780/2125] avg loss 0.00254651, throughput 6.01097K wps
[Epoch 5 Batch 810/2125] avg loss 0.00290707, throughput 6.02047K wps
[Epoch 5 Batch 840/2125] avg loss 0.00262968, throughput 6.01899K wps
[Epoch 5 Batch 870/2125] avg loss 0.00246314, throughput 6.02158K wps
[Epoch 5 Batch 900/2125] avg loss 0.00299079, throughput 6.02227K wps
[Epoch 5 Batch 930/2125] avg loss 0.00251947, throughput 6.02393K wps
[Epoch 5 Batch 960/2125] avg loss 0.00286057, throughput 6.0196K wps
[Epoch 5 Batch 990/2125] avg loss 0.00295726, throughput 6.0171K wps
[Epoch 5 Batch 1020/2125] avg loss 0.00330064, throughput 6.02635K wps
[Epoch 5 Batch 1050/2125] avg loss 0.00292193, throughput 6.01763K wps
[Epoch 5 Batch 1080/2125] avg loss 0.00297889, throughput 6.023K wps
[Epoch 5 Batch 1110/2125] avg loss 0.00231156, throughput 6.01802K wps
[Epoch 5 Batch 1140/2125] avg loss 0.00279298, throughput 6.01194K wps
[Epoch 5 Batch 1170/2125] avg loss 0.00335635, throughput 6.00787K wps
[Epoch 5 Batch 1200/2125] avg loss 0.00251146, throughput 6.0251K wps
[Epoch 5 Batch 1230/2125] avg loss 0.00279162, throughput 6.02171K wps
[Epoch 5 Batch 1260/2125] avg loss 0.00265361, throughput 6.02782K wps
[Epoch 5 Batch 1290/2125] avg loss 0.00251453, throughput 6.02191K wps
[Epoch 5 Batch 1320/2125] avg loss 0.00264515, throughput 6.02679K wps
[Epoch 5 Batch 1350/2125] avg loss 0.00248954, throughput 6.01827K wps
[Epoch 5 Batch 1380/2125] avg loss 0.00270039, throughput 6.01786K wps
[Epoch 5 Batch 1410/2125] avg loss 0.00286901, throughput 6.01535K wps
[Epoch 5 Batch 1440/2125] avg loss 0.00319263, throughput 6.00976K wps
[Epoch 5 Batch 1470/2125] avg loss 0.00258001, throughput 6.02117K wps
[Epoch 5 Batch 1500/2125] avg loss 0.00267841, throughput 6.02077K wps
[Epoch 5 Batch 1530/2125] avg loss 0.00258607, throughput 6.02191K wps
[Epoch 5 Batch 1560/2125] avg loss 0.00267857, throughput 6.02158K wps
[Epoch 5 Batch 1590/2125] avg loss 0.00293231, throughput 6.01361K wps
[Epoch 5 Batch 1620/2125] avg loss 0.00250702, throughput 6.01928K wps
[Epoch 5 Batch 1650/2125] avg loss 0.00250209, throughput 6.01549K wps
[Epoch 5 Batch 1680/2125] avg loss 0.00296369, throughput 6.02271K wps
[Epoch 5 Batch 1710/2125] avg loss 0.00238726, throughput 6.01431K wps
[Epoch 5 Batch 1740/2125] avg loss 0.00261131, throughput 6.01672K wps
[Epoch 5 Batch 1770/2125] avg loss 0.00254073, throughput 6.01258K wps
[Epoch 5 Batch 1800/2125] avg loss 0.00258452, throughput 6.01479K wps
[Epoch 5 Batch 1830/2125] avg loss 0.00227937, throughput 6.01754K wps
[Epoch 5 Batch 1860/2125] avg loss 0.00298834, throughput 6.01824K wps
[Epoch 5 Batch 1890/2125] avg loss 0.00305386, throughput 6.02103K wps
[Epoch 5 Batch 1920/2125] avg loss 0.00277241, throughput 6.01351K wps
[Epoch 5 Batch 1950/2125] avg loss 0.00300737, throughput 6.02004K wps
[Epoch 5 Batch 1980/2125] avg loss 0.00319979, throughput 6.02786K wps
[Epoch 5 Batch 2010/2125] avg loss 0.00300186, throughput 6.01881K wps
[Epoch 5 Batch 2040/2125] avg loss 0.00299978, throughput 6.00717K wps
[Epoch 5 Batch 2070/2125] avg loss 0.00280821, throughput 6.02016K wps
[Epoch 5 Batch 2100/2125] avg loss 0.00222502, throughput 6.01845K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 5] train avg loss 0.00269941, test acc 0.9201, test avg loss 0.240024, throughput 6.01902K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.27 s
[Epoch 6 Batch 30/2125] avg loss 0.00192233, throughput 6.16515K wps
[Epoch 6 Batch 60/2125] avg loss 0.00239781, throughput 6.02379K wps
[Epoch 6 Batch 90/2125] avg loss 0.00207097, throughput 6.01697K wps
[Epoch 6 Batch 120/2125] avg loss 0.00235802, throughput 6.01453K wps
[Epoch 6 Batch 150/2125] avg loss 0.00220505, throughput 6.01069K wps
[Epoch 6 Batch 180/2125] avg loss 0.0022343, throughput 6.01581K wps
[Epoch 6 Batch 210/2125] avg loss 0.0024081, throughput 6.0074K wps
[Epoch 6 Batch 240/2125] avg loss 0.00193505, throughput 6.02289K wps
[Epoch 6 Batch 270/2125] avg loss 0.00250878, throughput 6.00724K wps
[Epoch 6 Batch 300/2125] avg loss 0.00199056, throughput 6.0081K wps
[Epoch 6 Batch 330/2125] avg loss 0.00239016, throughput 5.99979K wps
[Epoch 6 Batch 360/2125] avg loss 0.00195498, throughput 6.02173K wps
[Epoch 6 Batch 390/2125] avg loss 0.00246945, throughput 6.02171K wps
[Epoch 6 Batch 420/2125] avg loss 0.00296545, throughput 6.00897K wps
[Epoch 6 Batch 450/2125] avg loss 0.00233941, throughput 6.01583K wps
[Epoch 6 Batch 480/2125] avg loss 0.00253441, throughput 6.01456K wps
[Epoch 6 Batch 510/2125] avg loss 0.00221608, throughput 6.00995K wps
[Epoch 6 Batch 540/2125] avg loss 0.00218651, throughput 6.00781K wps
[Epoch 6 Batch 570/2125] avg loss 0.00248294, throughput 6.01802K wps
[Epoch 6 Batch 600/2125] avg loss 0.00226155, throughput 6.02535K wps
[Epoch 6 Batch 630/2125] avg loss 0.00242754, throughput 6.02488K wps
[Epoch 6 Batch 660/2125] avg loss 0.00248287, throughput 6.01794K wps
[Epoch 6 Batch 690/2125] avg loss 0.00260461, throughput 6.01234K wps
[Epoch 6 Batch 720/2125] avg loss 0.00264918, throughput 6.01408K wps
[Epoch 6 Batch 750/2125] avg loss 0.00222153, throughput 6.02575K wps
[Epoch 6 Batch 780/2125] avg loss 0.00252996, throughput 6.02087K wps
[Epoch 6 Batch 810/2125] avg loss 0.00227176, throughput 6.03148K wps
[Epoch 6 Batch 840/2125] avg loss 0.00212412, throughput 6.02182K wps
[Epoch 6 Batch 870/2125] avg loss 0.00245276, throughput 6.02794K wps
[Epoch 6 Batch 900/2125] avg loss 0.00302922, throughput 6.02133K wps
[Epoch 6 Batch 930/2125] avg loss 0.00247494, throughput 6.01733K wps
[Epoch 6 Batch 960/2125] avg loss 0.00235907, throughput 6.01794K wps
[Epoch 6 Batch 990/2125] avg loss 0.0020372, throughput 6.02545K wps
[Epoch 6 Batch 1020/2125] avg loss 0.00237273, throughput 6.01851K wps
[Epoch 6 Batch 1050/2125] avg loss 0.00256401, throughput 6.02599K wps
[Epoch 6 Batch 1080/2125] avg loss 0.00228862, throughput 6.01342K wps
[Epoch 6 Batch 1110/2125] avg loss 0.00229383, throughput 6.03221K wps
[Epoch 6 Batch 1140/2125] avg loss 0.00200327, throughput 6.02054K wps
[Epoch 6 Batch 1170/2125] avg loss 0.00243308, throughput 6.0191K wps
[Epoch 6 Batch 1200/2125] avg loss 0.00233369, throughput 6.01964K wps
[Epoch 6 Batch 1230/2125] avg loss 0.00211744, throughput 6.02201K wps
[Epoch 6 Batch 1260/2125] avg loss 0.00237833, throughput 6.01458K wps
[Epoch 6 Batch 1290/2125] avg loss 0.0025499, throughput 6.02268K wps
[Epoch 6 Batch 1320/2125] avg loss 0.00278492, throughput 6.02132K wps
[Epoch 6 Batch 1350/2125] avg loss 0.00274881, throughput 6.01752K wps
[Epoch 6 Batch 1380/2125] avg loss 0.00211643, throughput 6.01807K wps
[Epoch 6 Batch 1410/2125] avg loss 0.00252623, throughput 6.02834K wps
[Epoch 6 Batch 1440/2125] avg loss 0.00271443, throughput 6.01857K wps
[Epoch 6 Batch 1470/2125] avg loss 0.00240694, throughput 6.02077K wps
[Epoch 6 Batch 1500/2125] avg loss 0.00257398, throughput 6.01377K wps
[Epoch 6 Batch 1530/2125] avg loss 0.00274423, throughput 6.02598K wps
[Epoch 6 Batch 1560/2125] avg loss 0.00227469, throughput 6.01697K wps
[Epoch 6 Batch 1590/2125] avg loss 0.0025055, throughput 6.02679K wps
[Epoch 6 Batch 1620/2125] avg loss 0.00270308, throughput 6.01831K wps
[Epoch 6 Batch 1650/2125] avg loss 0.00236972, throughput 6.01252K wps
[Epoch 6 Batch 1680/2125] avg loss 0.00246744, throughput 6.00294K wps
[Epoch 6 Batch 1710/2125] avg loss 0.00276843, throughput 5.93829K wps
[Epoch 6 Batch 1740/2125] avg loss 0.00239753, throughput 5.99672K wps
[Epoch 6 Batch 1770/2125] avg loss 0.00240108, throughput 6.0319K wps
[Epoch 6 Batch 1800/2125] avg loss 0.0029332, throughput 6.02184K wps
[Epoch 6 Batch 1830/2125] avg loss 0.00309994, throughput 6.01496K wps
[Epoch 6 Batch 1860/2125] avg loss 0.00240021, throughput 6.01516K wps
[Epoch 6 Batch 1890/2125] avg loss 0.00212872, throughput 6.02349K wps
[Epoch 6 Batch 1920/2125] avg loss 0.00302542, throughput 6.02583K wps
[Epoch 6 Batch 1950/2125] avg loss 0.00261946, throughput 6.02423K wps
[Epoch 6 Batch 1980/2125] avg loss 0.00284279, throughput 6.02674K wps
[Epoch 6 Batch 2010/2125] avg loss 0.00285437, throughput 6.01909K wps
[Epoch 6 Batch 2040/2125] avg loss 0.00229951, throughput 6.0278K wps
[Epoch 6 Batch 2070/2125] avg loss 0.00283838, throughput 6.02163K wps
[Epoch 6 Batch 2100/2125] avg loss 0.00301136, throughput 6.02464K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 6] train avg loss 0.00244919, test acc 0.9212, test avg loss 0.247174, throughput 6.01968K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.27 s
[Epoch 7 Batch 30/2125] avg loss 0.00202062, throughput 6.15686K wps
[Epoch 7 Batch 60/2125] avg loss 0.00217135, throughput 6.01831K wps
[Epoch 7 Batch 90/2125] avg loss 0.00201642, throughput 6.01379K wps
[Epoch 7 Batch 120/2125] avg loss 0.00209621, throughput 6.02374K wps
[Epoch 7 Batch 150/2125] avg loss 0.00202836, throughput 6.02347K wps
[Epoch 7 Batch 180/2125] avg loss 0.00199345, throughput 6.02604K wps
[Epoch 7 Batch 210/2125] avg loss 0.0020707, throughput 6.01924K wps
[Epoch 7 Batch 240/2125] avg loss 0.00162263, throughput 6.00325K wps
[Epoch 7 Batch 270/2125] avg loss 0.0020139, throughput 6.00906K wps
[Epoch 7 Batch 300/2125] avg loss 0.00244518, throughput 6.01157K wps
[Epoch 7 Batch 330/2125] avg loss 0.00224331, throughput 6.02949K wps
[Epoch 7 Batch 360/2125] avg loss 0.00213068, throughput 6.02496K wps
[Epoch 7 Batch 390/2125] avg loss 0.00184593, throughput 6.01844K wps
[Epoch 7 Batch 420/2125] avg loss 0.00222306, throughput 6.01983K wps
[Epoch 7 Batch 450/2125] avg loss 0.00179457, throughput 6.01813K wps
[Epoch 7 Batch 480/2125] avg loss 0.0018563, throughput 6.01723K wps
[Epoch 7 Batch 510/2125] avg loss 0.00229117, throughput 6.0144K wps
[Epoch 7 Batch 540/2125] avg loss 0.0028272, throughput 6.01897K wps
[Epoch 7 Batch 570/2125] avg loss 0.00232186, throughput 6.01392K wps
[Epoch 7 Batch 600/2125] avg loss 0.0020769, throughput 6.01538K wps
[Epoch 7 Batch 630/2125] avg loss 0.00214815, throughput 6.0288K wps
[Epoch 7 Batch 660/2125] avg loss 0.00189695, throughput 6.02869K wps
[Epoch 7 Batch 690/2125] avg loss 0.00217327, throughput 6.00771K wps
[Epoch 7 Batch 720/2125] avg loss 0.00217847, throughput 6.01944K wps
[Epoch 7 Batch 750/2125] avg loss 0.00243964, throughput 6.02905K wps
[Epoch 7 Batch 780/2125] avg loss 0.0019589, throughput 6.03117K wps
[Epoch 7 Batch 810/2125] avg loss 0.00237444, throughput 6.00244K wps
[Epoch 7 Batch 840/2125] avg loss 0.00210078, throughput 6.01605K wps
[Epoch 7 Batch 870/2125] avg loss 0.00212865, throughput 6.01132K wps
[Epoch 7 Batch 900/2125] avg loss 0.00221865, throughput 6.01313K wps
[Epoch 7 Batch 930/2125] avg loss 0.00179696, throughput 6.0099K wps
[Epoch 7 Batch 960/2125] avg loss 0.00263186, throughput 6.01365K wps
[Epoch 7 Batch 990/2125] avg loss 0.00193493, throughput 6.00968K wps
[Epoch 7 Batch 1020/2125] avg loss 0.00207612, throughput 6.01253K wps
[Epoch 7 Batch 1050/2125] avg loss 0.00215651, throughput 6.00794K wps
[Epoch 7 Batch 1080/2125] avg loss 0.00237432, throughput 6.00503K wps
[Epoch 7 Batch 1110/2125] avg loss 0.00276681, throughput 6.00578K wps
[Epoch 7 Batch 1140/2125] avg loss 0.00253792, throughput 6.01817K wps
[Epoch 7 Batch 1170/2125] avg loss 0.00240757, throughput 6.00991K wps
[Epoch 7 Batch 1200/2125] avg loss 0.00204832, throughput 5.99969K wps
[Epoch 7 Batch 1230/2125] avg loss 0.00228504, throughput 6.01623K wps
[Epoch 7 Batch 1260/2125] avg loss 0.00227115, throughput 6.00309K wps
[Epoch 7 Batch 1290/2125] avg loss 0.00189741, throughput 6.01114K wps
[Epoch 7 Batch 1320/2125] avg loss 0.00247494, throughput 6.01152K wps
[Epoch 7 Batch 1350/2125] avg loss 0.00213737, throughput 6.02014K wps
[Epoch 7 Batch 1380/2125] avg loss 0.00245954, throughput 6.00678K wps
[Epoch 7 Batch 1410/2125] avg loss 0.00222627, throughput 6.01284K wps
[Epoch 7 Batch 1440/2125] avg loss 0.00210314, throughput 6.01753K wps
[Epoch 7 Batch 1470/2125] avg loss 0.00203042, throughput 6.02036K wps
[Epoch 7 Batch 1500/2125] avg loss 0.00183952, throughput 6.01651K wps
[Epoch 7 Batch 1530/2125] avg loss 0.00272776, throughput 6.02245K wps
[Epoch 7 Batch 1560/2125] avg loss 0.00228069, throughput 6.01601K wps
[Epoch 7 Batch 1590/2125] avg loss 0.00248911, throughput 6.01353K wps
[Epoch 7 Batch 1620/2125] avg loss 0.00241448, throughput 6.01958K wps
[Epoch 7 Batch 1650/2125] avg loss 0.00278445, throughput 6.01073K wps
[Epoch 7 Batch 1680/2125] avg loss 0.00255007, throughput 6.01385K wps
[Epoch 7 Batch 1710/2125] avg loss 0.00226164, throughput 6.01572K wps
[Epoch 7 Batch 1740/2125] avg loss 0.00287883, throughput 6.00687K wps
[Epoch 7 Batch 1770/2125] avg loss 0.00197915, throughput 6.01578K wps
[Epoch 7 Batch 1800/2125] avg loss 0.00244848, throughput 6.01259K wps
[Epoch 7 Batch 1830/2125] avg loss 0.00274109, throughput 6.01434K wps
[Epoch 7 Batch 1860/2125] avg loss 0.0025687, throughput 6.01017K wps
[Epoch 7 Batch 1890/2125] avg loss 0.00208887, throughput 6.01575K wps
[Epoch 7 Batch 1920/2125] avg loss 0.00243895, throughput 6.0217K wps
[Epoch 7 Batch 1950/2125] avg loss 0.00220696, throughput 6.00715K wps
[Epoch 7 Batch 1980/2125] avg loss 0.00212949, throughput 6.00976K wps
[Epoch 7 Batch 2010/2125] avg loss 0.00243932, throughput 6.01639K wps
[Epoch 7 Batch 2040/2125] avg loss 0.00227523, throughput 6.0148K wps
[Epoch 7 Batch 2070/2125] avg loss 0.00239413, throughput 6.00751K wps
[Epoch 7 Batch 2100/2125] avg loss 0.00216554, throughput 6.01479K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 7] train avg loss 0.0022391, test acc 0.9229, test avg loss 0.257001, throughput 6.01708K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.27 s
[Epoch 8 Batch 30/2125] avg loss 0.00194165, throughput 6.15192K wps
[Epoch 8 Batch 60/2125] avg loss 0.00169439, throughput 6.0209K wps
[Epoch 8 Batch 90/2125] avg loss 0.00169657, throughput 6.01627K wps
[Epoch 8 Batch 120/2125] avg loss 0.00178722, throughput 6.01381K wps
[Epoch 8 Batch 150/2125] avg loss 0.00172245, throughput 6.02335K wps
[Epoch 8 Batch 180/2125] avg loss 0.00194368, throughput 6.01437K wps
[Epoch 8 Batch 210/2125] avg loss 0.00200053, throughput 6.0118K wps
[Epoch 8 Batch 240/2125] avg loss 0.0017583, throughput 6.01847K wps
[Epoch 8 Batch 270/2125] avg loss 0.00222153, throughput 6.01632K wps
[Epoch 8 Batch 300/2125] avg loss 0.0018808, throughput 6.02446K wps
[Epoch 8 Batch 330/2125] avg loss 0.00184873, throughput 6.00352K wps
[Epoch 8 Batch 360/2125] avg loss 0.00180156, throughput 6.01192K wps
[Epoch 8 Batch 390/2125] avg loss 0.00207927, throughput 6.01913K wps
[Epoch 8 Batch 420/2125] avg loss 0.00187912, throughput 6.01394K wps
[Epoch 8 Batch 450/2125] avg loss 0.00185151, throughput 6.01195K wps
[Epoch 8 Batch 480/2125] avg loss 0.00199105, throughput 6.01203K wps
[Epoch 8 Batch 510/2125] avg loss 0.0021623, throughput 6.01186K wps
[Epoch 8 Batch 540/2125] avg loss 0.00185061, throughput 6.0147K wps
[Epoch 8 Batch 570/2125] avg loss 0.00211765, throughput 6.0122K wps
[Epoch 8 Batch 600/2125] avg loss 0.00190702, throughput 6.01718K wps
[Epoch 8 Batch 630/2125] avg loss 0.00180521, throughput 6.01021K wps
[Epoch 8 Batch 660/2125] avg loss 0.00217044, throughput 6.00487K wps
[Epoch 8 Batch 690/2125] avg loss 0.00219558, throughput 6.02234K wps
[Epoch 8 Batch 720/2125] avg loss 0.00169814, throughput 6.02171K wps
[Epoch 8 Batch 750/2125] avg loss 0.00217971, throughput 6.01822K wps
[Epoch 8 Batch 780/2125] avg loss 0.00207785, throughput 6.01445K wps
[Epoch 8 Batch 810/2125] avg loss 0.00186725, throughput 6.01464K wps
[Epoch 8 Batch 840/2125] avg loss 0.00208753, throughput 6.01564K wps
[Epoch 8 Batch 870/2125] avg loss 0.00233762, throughput 6.01727K wps
[Epoch 8 Batch 900/2125] avg loss 0.00210107, throughput 6.01232K wps
[Epoch 8 Batch 930/2125] avg loss 0.00180357, throughput 6.01352K wps
[Epoch 8 Batch 960/2125] avg loss 0.00217358, throughput 6.02522K wps
[Epoch 8 Batch 990/2125] avg loss 0.00217225, throughput 6.01535K wps
[Epoch 8 Batch 1020/2125] avg loss 0.00222565, throughput 6.02205K wps
[Epoch 8 Batch 1050/2125] avg loss 0.00207359, throughput 6.01442K wps
[Epoch 8 Batch 1080/2125] avg loss 0.00199008, throughput 6.02177K wps
[Epoch 8 Batch 1110/2125] avg loss 0.00201866, throughput 6.01239K wps
[Epoch 8 Batch 1140/2125] avg loss 0.0019341, throughput 6.01308K wps
[Epoch 8 Batch 1170/2125] avg loss 0.00215001, throughput 6.01423K wps
[Epoch 8 Batch 1200/2125] avg loss 0.00204672, throughput 6.01649K wps
[Epoch 8 Batch 1230/2125] avg loss 0.00198817, throughput 6.01448K wps
[Epoch 8 Batch 1260/2125] avg loss 0.00200644, throughput 6.01718K wps
[Epoch 8 Batch 1290/2125] avg loss 0.00213092, throughput 6.00849K wps
[Epoch 8 Batch 1320/2125] avg loss 0.00226168, throughput 6.01938K wps
[Epoch 8 Batch 1350/2125] avg loss 0.00189901, throughput 6.02392K wps
[Epoch 8 Batch 1380/2125] avg loss 0.00223916, throughput 6.01614K wps
[Epoch 8 Batch 1410/2125] avg loss 0.0021712, throughput 6.01774K wps
[Epoch 8 Batch 1440/2125] avg loss 0.00191818, throughput 6.00829K wps
[Epoch 8 Batch 1470/2125] avg loss 0.00204638, throughput 6.0144K wps
[Epoch 8 Batch 1500/2125] avg loss 0.00211871, throughput 6.01829K wps
[Epoch 8 Batch 1530/2125] avg loss 0.00241585, throughput 6.01949K wps
[Epoch 8 Batch 1560/2125] avg loss 0.0018464, throughput 6.01623K wps
[Epoch 8 Batch 1590/2125] avg loss 0.00203842, throughput 6.01809K wps
[Epoch 8 Batch 1620/2125] avg loss 0.00185013, throughput 6.01573K wps
[Epoch 8 Batch 1650/2125] avg loss 0.00215092, throughput 6.02168K wps
[Epoch 8 Batch 1680/2125] avg loss 0.00171337, throughput 6.02643K wps
[Epoch 8 Batch 1710/2125] avg loss 0.0020024, throughput 6.02015K wps
[Epoch 8 Batch 1740/2125] avg loss 0.00236147, throughput 6.01545K wps
[Epoch 8 Batch 1770/2125] avg loss 0.00257322, throughput 6.0183K wps
[Epoch 8 Batch 1800/2125] avg loss 0.00239423, throughput 6.0105K wps
[Epoch 8 Batch 1830/2125] avg loss 0.00230529, throughput 6.01204K wps
[Epoch 8 Batch 1860/2125] avg loss 0.00228218, throughput 6.01464K wps
[Epoch 8 Batch 1890/2125] avg loss 0.00196181, throughput 6.02066K wps
[Epoch 8 Batch 1920/2125] avg loss 0.00194806, throughput 6.00895K wps
[Epoch 8 Batch 1950/2125] avg loss 0.00203694, throughput 6.01834K wps
[Epoch 8 Batch 1980/2125] avg loss 0.00246739, throughput 6.00691K wps
[Epoch 8 Batch 2010/2125] avg loss 0.00228341, throughput 6.01209K wps
[Epoch 8 Batch 2040/2125] avg loss 0.00246577, throughput 6.00999K wps
[Epoch 8 Batch 2070/2125] avg loss 0.00242343, throughput 6.01807K wps
[Epoch 8 Batch 2100/2125] avg loss 0.0019853, throughput 6.01106K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 8] train avg loss 0.00205443, test acc 0.9249, test avg loss 0.265395, throughput 6.01758K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.27 s
[Epoch 9 Batch 30/2125] avg loss 0.00161072, throughput 6.15676K wps
[Epoch 9 Batch 60/2125] avg loss 0.00187389, throughput 6.01498K wps
[Epoch 9 Batch 90/2125] avg loss 0.00159963, throughput 6.02048K wps
[Epoch 9 Batch 120/2125] avg loss 0.00175688, throughput 6.02217K wps
[Epoch 9 Batch 150/2125] avg loss 0.00177901, throughput 6.01476K wps
[Epoch 9 Batch 180/2125] avg loss 0.00134942, throughput 6.02033K wps
[Epoch 9 Batch 210/2125] avg loss 0.00139904, throughput 6.01668K wps
[Epoch 9 Batch 240/2125] avg loss 0.00180726, throughput 6.02901K wps
[Epoch 9 Batch 270/2125] avg loss 0.00190185, throughput 6.02032K wps
[Epoch 9 Batch 300/2125] avg loss 0.00177372, throughput 6.02627K wps
[Epoch 9 Batch 330/2125] avg loss 0.00149637, throughput 6.02372K wps
[Epoch 9 Batch 360/2125] avg loss 0.00204333, throughput 6.03007K wps
[Epoch 9 Batch 390/2125] avg loss 0.0017299, throughput 6.01475K wps
[Epoch 9 Batch 420/2125] avg loss 0.00207305, throughput 6.01912K wps
[Epoch 9 Batch 450/2125] avg loss 0.00188322, throughput 6.01139K wps
[Epoch 9 Batch 480/2125] avg loss 0.00193118, throughput 6.01905K wps
[Epoch 9 Batch 510/2125] avg loss 0.00192007, throughput 5.98578K wps
[Epoch 9 Batch 540/2125] avg loss 0.00205725, throughput 5.97893K wps
[Epoch 9 Batch 570/2125] avg loss 0.00175896, throughput 6.0144K wps
[Epoch 9 Batch 600/2125] avg loss 0.00186995, throughput 6.01726K wps
[Epoch 9 Batch 630/2125] avg loss 0.00171525, throughput 6.01671K wps
[Epoch 9 Batch 660/2125] avg loss 0.00149189, throughput 6.02242K wps
[Epoch 9 Batch 690/2125] avg loss 0.00196265, throughput 6.02281K wps
[Epoch 9 Batch 720/2125] avg loss 0.00184022, throughput 6.03107K wps
[Epoch 9 Batch 750/2125] avg loss 0.00179706, throughput 6.02065K wps
[Epoch 9 Batch 780/2125] avg loss 0.00183945, throughput 6.01745K wps
[Epoch 9 Batch 810/2125] avg loss 0.00175176, throughput 6.02051K wps
[Epoch 9 Batch 840/2125] avg loss 0.00169189, throughput 6.01104K wps
[Epoch 9 Batch 870/2125] avg loss 0.00173344, throughput 6.02022K wps
[Epoch 9 Batch 900/2125] avg loss 0.00169748, throughput 6.02006K wps
[Epoch 9 Batch 930/2125] avg loss 0.00161149, throughput 6.01691K wps
[Epoch 9 Batch 960/2125] avg loss 0.00184808, throughput 6.00892K wps
[Epoch 9 Batch 990/2125] avg loss 0.00162143, throughput 6.01801K wps
[Epoch 9 Batch 1020/2125] avg loss 0.00184479, throughput 6.01646K wps
[Epoch 9 Batch 1050/2125] avg loss 0.00173171, throughput 6.01623K wps
[Epoch 9 Batch 1080/2125] avg loss 0.00211823, throughput 6.00728K wps
[Epoch 9 Batch 1110/2125] avg loss 0.00169557, throughput 6.01603K wps
[Epoch 9 Batch 1140/2125] avg loss 0.00178106, throughput 6.01177K wps
[Epoch 9 Batch 1170/2125] avg loss 0.00181508, throughput 6.01133K wps
[Epoch 9 Batch 1200/2125] avg loss 0.00214408, throughput 6.01744K wps
[Epoch 9 Batch 1230/2125] avg loss 0.00174044, throughput 6.02186K wps
[Epoch 9 Batch 1260/2125] avg loss 0.00184899, throughput 6.01214K wps
[Epoch 9 Batch 1290/2125] avg loss 0.00188608, throughput 6.01121K wps
[Epoch 9 Batch 1320/2125] avg loss 0.00180929, throughput 6.00484K wps
[Epoch 9 Batch 1350/2125] avg loss 0.00185828, throughput 6.00721K wps
[Epoch 9 Batch 1380/2125] avg loss 0.00191972, throughput 6.00954K wps
[Epoch 9 Batch 1410/2125] avg loss 0.00186677, throughput 6.01917K wps
[Epoch 9 Batch 1440/2125] avg loss 0.00177678, throughput 6.02776K wps
[Epoch 9 Batch 1470/2125] avg loss 0.00218766, throughput 6.01177K wps
[Epoch 9 Batch 1500/2125] avg loss 0.00209133, throughput 6.01661K wps
[Epoch 9 Batch 1530/2125] avg loss 0.00206169, throughput 6.01529K wps
[Epoch 9 Batch 1560/2125] avg loss 0.00191061, throughput 6.00576K wps
[Epoch 9 Batch 1590/2125] avg loss 0.00192632, throughput 6.00981K wps
[Epoch 9 Batch 1620/2125] avg loss 0.00176926, throughput 6.0155K wps
[Epoch 9 Batch 1650/2125] avg loss 0.00182262, throughput 6.00243K wps
[Epoch 9 Batch 1680/2125] avg loss 0.00211868, throughput 6.0129K wps
[Epoch 9 Batch 1710/2125] avg loss 0.00199809, throughput 6.01661K wps
[Epoch 9 Batch 1740/2125] avg loss 0.00236236, throughput 6.00889K wps
[Epoch 9 Batch 1770/2125] avg loss 0.00220667, throughput 6.013K wps
[Epoch 9 Batch 1800/2125] avg loss 0.00214932, throughput 6.02276K wps
[Epoch 9 Batch 1830/2125] avg loss 0.00215349, throughput 6.01814K wps
[Epoch 9 Batch 1860/2125] avg loss 0.00258506, throughput 6.01505K wps
[Epoch 9 Batch 1890/2125] avg loss 0.00209567, throughput 6.00682K wps
[Epoch 9 Batch 1920/2125] avg loss 0.00215111, throughput 6.0179K wps
[Epoch 9 Batch 1950/2125] avg loss 0.00221761, throughput 6.01464K wps
[Epoch 9 Batch 1980/2125] avg loss 0.00201257, throughput 6.00873K wps
[Epoch 9 Batch 2010/2125] avg loss 0.00188507, throughput 6.01782K wps
[Epoch 9 Batch 2040/2125] avg loss 0.00211831, throughput 6.01089K wps
[Epoch 9 Batch 2070/2125] avg loss 0.00197462, throughput 6.01522K wps
[Epoch 9 Batch 2100/2125] avg loss 0.00182952, throughput 6.0116K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 9] train avg loss 0.0018832, test acc 0.9238, test avg loss 0.273964, throughput 6.01705K wps
[Epoch 10 Batch 30/2125] avg loss 0.00154547, throughput 6.15872K wps
[Epoch 10 Batch 60/2125] avg loss 0.00167571, throughput 6.02243K wps
[Epoch 10 Batch 90/2125] avg loss 0.0019125, throughput 6.01319K wps
[Epoch 10 Batch 120/2125] avg loss 0.00189433, throughput 6.01156K wps
[Epoch 10 Batch 150/2125] avg loss 0.00147028, throughput 6.0123K wps
[Epoch 10 Batch 180/2125] avg loss 0.00158474, throughput 6.01473K wps
[Epoch 10 Batch 210/2125] avg loss 0.00162858, throughput 6.01975K wps
[Epoch 10 Batch 240/2125] avg loss 0.0015955, throughput 6.01969K wps
[Epoch 10 Batch 270/2125] avg loss 0.00188014, throughput 6.01387K wps
[Epoch 10 Batch 300/2125] avg loss 0.00194126, throughput 6.0222K wps
[Epoch 10 Batch 330/2125] avg loss 0.00155786, throughput 6.02043K wps
[Epoch 10 Batch 360/2125] avg loss 0.00149925, throughput 6.02116K wps
[Epoch 10 Batch 390/2125] avg loss 0.00184137, throughput 6.01091K wps
[Epoch 10 Batch 420/2125] avg loss 0.00164687, throughput 6.03128K wps
[Epoch 10 Batch 450/2125] avg loss 0.00210684, throughput 6.01889K wps
[Epoch 10 Batch 480/2125] avg loss 0.00184784, throughput 6.00839K wps
[Epoch 10 Batch 510/2125] avg loss 0.00151014, throughput 6.0251K wps
[Epoch 10 Batch 540/2125] avg loss 0.00171735, throughput 6.00679K wps
[Epoch 10 Batch 570/2125] avg loss 0.00133102, throughput 6.0087K wps
[Epoch 10 Batch 600/2125] avg loss 0.0014377, throughput 6.01966K wps
[Epoch 10 Batch 630/2125] avg loss 0.00179768, throughput 6.01358K wps
[Epoch 10 Batch 660/2125] avg loss 0.00180245, throughput 6.01282K wps
[Epoch 10 Batch 690/2125] avg loss 0.00161994, throughput 6.01009K wps
[Epoch 10 Batch 720/2125] avg loss 0.00195253, throughput 6.00801K wps
[Epoch 10 Batch 750/2125] avg loss 0.00176513, throughput 6.01395K wps
[Epoch 10 Batch 780/2125] avg loss 0.00153525, throughput 6.01043K wps
[Epoch 10 Batch 810/2125] avg loss 0.00167136, throughput 6.02303K wps
[Epoch 10 Batch 840/2125] avg loss 0.0018898, throughput 6.01672K wps
[Epoch 10 Batch 870/2125] avg loss 0.0016315, throughput 6.01429K wps
[Epoch 10 Batch 900/2125] avg loss 0.00171821, throughput 6.00551K wps
[Epoch 10 Batch 930/2125] avg loss 0.00168842, throughput 6.00767K wps
[Epoch 10 Batch 960/2125] avg loss 0.00203871, throughput 6.01479K wps
[Epoch 10 Batch 990/2125] avg loss 0.00186281, throughput 6.01315K wps
[Epoch 10 Batch 1020/2125] avg loss 0.00186208, throughput 6.0111K wps
[Epoch 10 Batch 1050/2125] avg loss 0.00181951, throughput 6.01348K wps
[Epoch 10 Batch 1080/2125] avg loss 0.00197001, throughput 6.01845K wps
[Epoch 10 Batch 1110/2125] avg loss 0.00157876, throughput 6.00847K wps
[Epoch 10 Batch 1140/2125] avg loss 0.00167403, throughput 6.01918K wps
[Epoch 10 Batch 1170/2125] avg loss 0.00181912, throughput 6.02596K wps
[Epoch 10 Batch 1200/2125] avg loss 0.00155661, throughput 6.02112K wps
[Epoch 10 Batch 1230/2125] avg loss 0.00162699, throughput 6.01918K wps
[Epoch 10 Batch 1260/2125] avg loss 0.00181716, throughput 6.02262K wps
[Epoch 10 Batch 1290/2125] avg loss 0.00150949, throughput 6.02562K wps
[Epoch 10 Batch 1320/2125] avg loss 0.00198742, throughput 6.01591K wps
[Epoch 10 Batch 1350/2125] avg loss 0.00189704, throughput 6.01407K wps
[Epoch 10 Batch 1380/2125] avg loss 0.00177395, throughput 6.01233K wps
[Epoch 10 Batch 1410/2125] avg loss 0.00161222, throughput 6.00983K wps
[Epoch 10 Batch 1440/2125] avg loss 0.00146055, throughput 6.01504K wps
[Epoch 10 Batch 1470/2125] avg loss 0.00173187, throughput 6.02377K wps
[Epoch 10 Batch 1500/2125] avg loss 0.00187839, throughput 6.01569K wps
[Epoch 10 Batch 1530/2125] avg loss 0.00180196, throughput 6.01806K wps
[Epoch 10 Batch 1560/2125] avg loss 0.00195319, throughput 6.02626K wps
[Epoch 10 Batch 1590/2125] avg loss 0.0017562, throughput 6.01581K wps
[Epoch 10 Batch 1620/2125] avg loss 0.0022276, throughput 6.01434K wps
[Epoch 10 Batch 1650/2125] avg loss 0.00176203, throughput 6.01642K wps
[Epoch 10 Batch 1680/2125] avg loss 0.00222834, throughput 6.01839K wps
[Epoch 10 Batch 1710/2125] avg loss 0.00177621, throughput 6.01352K wps
[Epoch 10 Batch 1740/2125] avg loss 0.00171831, throughput 6.01335K wps
[Epoch 10 Batch 1770/2125] avg loss 0.00190618, throughput 6.02386K wps
[Epoch 10 Batch 1800/2125] avg loss 0.0018506, throughput 6.02006K wps
[Epoch 10 Batch 1830/2125] avg loss 0.00187726, throughput 6.0193K wps
[Epoch 10 Batch 1860/2125] avg loss 0.00178552, throughput 6.02088K wps
[Epoch 10 Batch 1890/2125] avg loss 0.00229936, throughput 6.00844K wps
[Epoch 10 Batch 1920/2125] avg loss 0.00166587, throughput 6.02135K wps
[Epoch 10 Batch 1950/2125] avg loss 0.0018016, throughput 6.01547K wps
[Epoch 10 Batch 1980/2125] avg loss 0.00176483, throughput 6.01671K wps
[Epoch 10 Batch 2010/2125] avg loss 0.0022677, throughput 6.01963K wps
[Epoch 10 Batch 2040/2125] avg loss 0.00219904, throughput 6.00966K wps
[Epoch 10 Batch 2070/2125] avg loss 0.00174707, throughput 6.03142K wps
[Epoch 10 Batch 2100/2125] avg loss 0.00165896, throughput 6.02159K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s
[Batch 60/237] elapsed 0.27 s
[Batch 90/237] elapsed 0.27 s
[Batch 120/237] elapsed 0.27 s
[Batch 150/237] elapsed 0.27 s
[Batch 180/237] elapsed 0.27 s
[Batch 210/237] elapsed 0.27 s
[Epoch 10] train avg loss 0.00177478, test acc 0.9258, test avg loss 0.285094, throughput 6.0188K wps
Observed Improvement.
Begin Testing...
[Batch 30/35] elapsed 0.27 s
[Epoch 11 Batch 30/2125] avg loss 0.00132532, throughput 6.15719K wps
[Epoch 11 Batch 60/2125] avg loss 0.00165941, throughput 6.0128K wps
[Epoch 11 Batch 90/2125] avg loss 0.00139682, throughput 6.0263K wps
[Epoch 11 Batch 120/2125] avg loss 0.00133825, throughput 6.01315K wps
[Epoch 11 Batch 150/2125] avg loss 0.00137215, throughput 6.00446K wps
[Epoch 11 Batch 180/2125] avg loss 0.00161289, throughput 6.01728K wps
[Epoch 11 Batch 210/2125] avg loss 0.00139325, throughput 6.01773K wps
[Epoch 11 Batch 240/2125] avg loss 0.00137649, throughput 6.01657K wps
[Epoch 11 Batch 270/2125] avg loss 0.00132649, throughput 6.02108K wps
[Epoch 11 Batch 300/2125] avg loss 0.00160659, throughput 6.02857K wps
[Epoch 11 Batch 330/2125] avg loss 0.00158187, throughput 6.01518K wps
[Epoch 11 Batch 360/2125] avg loss 0.00141766, throughput 6.02101K wps
[Epoch 11 Batch 390/2125] avg loss 0.00126441, throughput 6.01843K wps
[Epoch 11 Batch 420/2125] avg loss 0.00112723, throughput 6.0247K wps
[Epoch 11 Batch 450/2125] avg loss 0.00161123, throughput 6.01845K wps
[Epoch 11 Batch 480/2125] avg loss 0.00163232, throughput 6.01057K wps
[Epoch 11 Batch 510/2125] avg loss 0.00149205, throughput 6.02054K wps
[Epoch 11 Batch 540/2125] avg loss 0.00162793, throughput 6.01641K wps
[Epoch 11 Batch 570/2125] avg loss 0.00195476, throughput 6.02482K wps
[Epoch 11 Batch 600/2125] avg loss 0.00142364, throughput 6.01781K wps
[Epoch 11 Batch 630/2125] avg loss 0.00172855, throughput 6.00782K wps
[Epoch 11 Batch 660/2125] avg loss 0.00162091, throughput 6.01927K wps
[Epoch 11 Batch 690/2125] avg loss 0.00150445, throughput 6.02924K wps
[Epoch 11 Batch 720/2125] avg loss 0.00148111, throughput 6.01856K wps
[Epoch 11 Batch 750/2125] avg loss 0.0018917, throughput 6.01606K wps
[Epoch 11 Batch 780/2125] avg loss 0.00154718, throughput 6.02375K wps
[Epoch 11 Batch 810/2125] avg loss 0.00146236, throughput 6.01783K wps
[Epoch 11 Batch 840/2125] avg loss 0.00187273, throughput 6.01286K wps
[Epoch 11 Batch 870/2125] avg loss 0.0016205, throughput 6.01173K wps
[Epoch 11 Batch 900/2125] avg loss 0.00167477, throughput 6.02126K wps
[Epoch 11 Batch 930/2125] avg loss 0.00161951, throughput 6.02244K wps
[Epoch 11 Batch 960/2125] avg loss 0.00184429, throughput 6.01957K wps
[Epoch 11 Batch 990/2125] avg loss 0.00158349, throughput 6.01205K wps
[Epoch 11 Batch 1020/2125] avg loss 0.00182522, throughput 6.02397K wps
[Epoch 11 Batch 1050/2125] avg loss 0.00147489, throughput 6.0172K wps
[Epoch 11 Batch 1080/2125] avg loss 0.00216514, throughput 6.02499K wps
[Epoch 11 Batch 1110/2125] avg loss 0.00178658, throughput 6.01444K wps
[Epoch 11 Batch 1140/2125] avg loss 0.00175479, throughput 6.00862K wps
[Epoch 11 Batch 1170/2125] avg loss 0.00185798, throughput 6.00503K wps
[Epoch 11 Batch 1200/2125] avg loss 0.00176919, throughput 6.00138K wps
[Epoch 11 Batch 1230/2125] avg loss 0.00158506, throughput 6.01287K wps
[Epoch 11 Batch 1260/2125] avg loss 0.00170506, throughput 5.99999K wps
[Epoch 11 Batch 1290/2125] avg loss 0.00136324, throughput 6.01975K wps
[Epoch 11 Batch 1320/2125] avg loss 0.00197814, throughput 6.01208K wps
[Epoch 11 Batch 1350/2125] avg loss 0.00186294, throughput 6.01347K wps
[Epoch 11 Batch 1380/2125] avg loss 0.00191181, throughput 6.01992K wps
[Epoch 11 Batch 1410/2125] avg loss 0.00165113, throughput 6.01371K wps
[Epoch 11 Batch 1440/2125] avg loss 0.00153531, throughput 6.01549K wps
[Epoch 11 Batch 1470/2125] avg loss 0.00186237, throughput 6.01142K wps
[Epoch 11 Batch 1500/2125] avg loss 0.00150011, throughput 6.01936K wps
[Epoch 11 Batch 1530/2125] avg loss 0.00175135, throughput 6.00509K wps
[Epoch 11 Batch 1560/2125] avg loss 0.00169371, throughput 5.98422K wps
[Epoch 11 Batch 1590/2125] avg loss 0.00186827, throughput 6.01304K wps
[Epoch 11 Batch 1620/2125] avg loss 0.00183257, throughput 6.01847K wps
[Epoch 11 Batch 1650/2125] avg loss 0.00192661, throughput 6.00869K wps
[Epoch 11 Batch 1680/2125] avg loss 0.00173807, throughput 6.01085K wps
[Epoch 11 Batch 1710/2125] avg loss 0.00168934, throughput 6.02028K wps
[Epoch 11 Batch 1740/2125] avg loss 0.00162175, throughput 6.02028K wps
[Epoch 11 Batch 1770/2125] avg loss 0.00188944, throughput 6.02316K wps
[Epoch 11 Batch 1800/2125] avg loss 0.00178978, throughput 6.0203K wps
[Epoch 11 Batch 1830/2125] avg loss 0.00177428, throughput 6.02234K wps
[Epoch 11 Batch 1860/2125] avg loss 0.00144609, throughput 6.00569K wps
[Epoch 11 Batch 1890/2125] avg loss 0.00160884, throughput 6.01805K wps
[Epoch 11 Batch 1920/2125] avg loss 0.00163485, throughput 6.02521K wps
[Epoch 11 Batch 1950/2125] avg loss 0.00180822, throughput 6.01919K wps
[Epoch 11 Batch 1980/2125] avg loss 0.00183987, throughput 6.02414K wps
[Epoch 11 Batch 2010/2125] avg loss 0.00162786, throughput 6.01507K wps
[Epoch 11 Batch 2040/2125] avg loss 0.00148403, throughput 6.02096K wps
[Epoch 11 Batch 2070/2125] avg loss 0.00180243, throughput 6.0122K wps
[Epoch 11 Batch 2100/2125] avg loss 0.0019833, throughput 6.01985K wps
Begin Testing...
[Batch 30/237] elapsed 0.29 s