In [1]:
# -*- coding: utf-8 -*-
"""
Simple example using LSTM recurrent neural network to classify IMDB
sentiment dataset.
References:
    - Long Short Term Memory, Sepp Hochreiter & Jurgen Schmidhuber, Neural
    Computation 9(8): 1735-1780, 1997.
    - Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng,
    and Christopher Potts. (2011). Learning Word Vectors for Sentiment
    Analysis. The 49th Annual Meeting of the Association for Computational
    Linguistics (ACL 2011).
Links:
    - http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
    - http://ai.stanford.edu/~amaas/data/sentiment/
"""

from __future__ import division, print_function, absolute_import

import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.embedding_ops import embedding
from tflearn.layers.recurrent import bidirectional_rnn, BasicLSTMCell
from tflearn.layers.estimator import regression

# IMDB Dataset loading
train, test, _ = imdb.load_data(path='Data/imdb.pkl', n_words=10000,
                                valid_portion=0.1)
trainX, trainY = train
testX, testY = test

# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=200, value=0.)
testX = pad_sequences(testX, maxlen=200, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)

# Network building
net = input_data(shape=[None, 200])
net = embedding(net, input_dim=20000, output_dim=128)
net = bidirectional_rnn(net, BasicLSTMCell(128), BasicLSTMCell(128))
net = dropout(net, 0.5)
net = fully_connected(net, 2, activation='softmax')
net = regression(net, optimizer='adam', loss='categorical_crossentropy')

# Training
model = tflearn.DNN(net, clip_gradients=0., tensorboard_verbose=2)
model.fit(trainX, trainY, validation_set=0.1, show_metric=True, batch_size=64)

curses is not supported on this machine (please install/reinstall curses for an optimal experience)


Downloading data from http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl


---------------------------------
Run id: XLTVED
Log directory: /tmp/tflearn_logs/


INFO:tensorflow:Summary name Accuracy/ (raw) is illegal; using Accuracy/__raw_ instead.


---------------------------------
Training samples: 20250
Validation samples: 2250
--


Training Step: 1  | time: 2.449s
[2K| Adam | epoch: 001 | loss: 0.00000 - acc: 0.0000 -- iter: 00064/20250


Training Step: 2  | total loss: [1m[32m0.62376[0m[0m | time: 2.702s
[2K| Adam | epoch: 001 | loss: 0.62376 - acc: 0.4359 -- iter: 00128/20250


Training Step: 3  | total loss: [1m[32m0.68055[0m[0m | time: 2.945s
[2K| Adam | epoch: 001 | loss: 0.68055 - acc: 0.5267 -- iter: 00192/20250


Training Step: 4  | total loss: [1m[32m0.69142[0m[0m | time: 3.193s
[2K| Adam | epoch: 001 | loss: 0.69142 - acc: 0.4129 -- iter: 00256/20250


Training Step: 5  | total loss: [1m[32m0.69241[0m[0m | time: 3.443s
[2K| Adam | epoch: 001 | loss: 0.69241 - acc: 0.4840 -- iter: 00320/20250


Training Step: 6  | total loss: [1m[32m0.69293[0m[0m | time: 3.698s
[2K| Adam | epoch: 001 | loss: 0.69293 - acc: 0.5043 -- iter: 00384/20250


Training Step: 7  | total loss: [1m[32m0.69269[0m[0m | time: 3.970s
[2K| Adam | epoch: 001 | loss: 0.69269 - acc: 0.5111 -- iter: 00448/20250


Training Step: 8  | total loss: [1m[32m0.69328[0m[0m | time: 4.214s
[2K| Adam | epoch: 001 | loss: 0.69328 - acc: 0.4961 -- iter: 00512/20250


Training Step: 9  | total loss: [1m[32m0.69357[0m[0m | time: 4.452s
[2K| Adam | epoch: 001 | loss: 0.69357 - acc: 0.4733 -- iter: 00576/20250


Training Step: 10  | total loss: [1m[32m0.69340[0m[0m | time: 4.695s
[2K| Adam | epoch: 001 | loss: 0.69340 - acc: 0.4632 -- iter: 00640/20250


Training Step: 11  | total loss: [1m[32m0.69354[0m[0m | time: 4.951s
[2K| Adam | epoch: 001 | loss: 0.69354 - acc: 0.4584 -- iter: 00704/20250


Training Step: 12  | total loss: [1m[32m0.69362[0m[0m | time: 5.206s
[2K| Adam | epoch: 001 | loss: 0.69362 - acc: 0.4490 -- iter: 00768/20250


Training Step: 13  | total loss: [1m[32m0.69344[0m[0m | time: 5.448s
[2K| Adam | epoch: 001 | loss: 0.69344 - acc: 0.4374 -- iter: 00832/20250


Training Step: 14  | total loss: [1m[32m0.69332[0m[0m | time: 5.696s
[2K| Adam | epoch: 001 | loss: 0.69332 - acc: 0.4438 -- iter: 00896/20250


Training Step: 15  | total loss: [1m[32m0.69299[0m[0m | time: 5.938s
[2K| Adam | epoch: 001 | loss: 0.69299 - acc: 0.4658 -- iter: 00960/20250


Training Step: 16  | total loss: [1m[32m0.69321[0m[0m | time: 6.239s
[2K| Adam | epoch: 001 | loss: 0.69321 - acc: 0.4669 -- iter: 01024/20250


Training Step: 17  | total loss: [1m[32m0.69313[0m[0m | time: 6.509s
[2K| Adam | epoch: 001 | loss: 0.69313 - acc: 0.5013 -- iter: 01088/20250


Training Step: 18  | total loss: [1m[32m0.69321[0m[0m | time: 6.756s
[2K| Adam | epoch: 001 | loss: 0.69321 - acc: 0.4955 -- iter: 01152/20250


Training Step: 19  | total loss: [1m[32m0.69314[0m[0m | time: 7.016s
[2K| Adam | epoch: 001 | loss: 0.69314 - acc: 0.4970 -- iter: 01216/20250


Training Step: 20  | total loss: [1m[32m0.69329[0m[0m | time: 7.256s
[2K| Adam | epoch: 001 | loss: 0.69329 - acc: 0.4879 -- iter: 01280/20250


Training Step: 21  | total loss: [1m[32m0.69293[0m[0m | time: 7.504s
[2K| Adam | epoch: 001 | loss: 0.69293 - acc: 0.5159 -- iter: 01344/20250


Training Step: 22  | total loss: [1m[32m0.69322[0m[0m | time: 7.749s
[2K| Adam | epoch: 001 | loss: 0.69322 - acc: 0.5018 -- iter: 01408/20250


Training Step: 23  | total loss: [1m[32m0.69317[0m[0m | time: 7.991s
[2K| Adam | epoch: 001 | loss: 0.69317 - acc: 0.5058 -- iter: 01472/20250


Training Step: 24  | total loss: [1m[32m0.69304[0m[0m | time: 8.243s
[2K| Adam | epoch: 001 | loss: 0.69304 - acc: 0.5129 -- iter: 01536/20250


Training Step: 25  | total loss: [1m[32m0.69302[0m[0m | time: 8.492s
[2K| Adam | epoch: 001 | loss: 0.69302 - acc: 0.5137 -- iter: 01600/20250


Training Step: 26  | total loss: [1m[32m0.69315[0m[0m | time: 8.742s
[2K| Adam | epoch: 001 | loss: 0.69315 - acc: 0.4935 -- iter: 01664/20250


Training Step: 27  | total loss: [1m[32m0.69339[0m[0m | time: 9.015s
[2K| Adam | epoch: 001 | loss: 0.69339 - acc: 0.4992 -- iter: 01728/20250


Training Step: 28  | total loss: [1m[32m0.69343[0m[0m | time: 9.262s
[2K| Adam | epoch: 001 | loss: 0.69343 - acc: 0.4955 -- iter: 01792/20250


Training Step: 29  | total loss: [1m[32m0.69398[0m[0m | time: 9.515s
[2K| Adam | epoch: 001 | loss: 0.69398 - acc: 0.4814 -- iter: 01856/20250


Training Step: 30  | total loss: [1m[32m0.69377[0m[0m | time: 9.764s
[2K| Adam | epoch: 001 | loss: 0.69377 - acc: 0.4895 -- iter: 01920/20250


Training Step: 31  | total loss: [1m[32m0.69359[0m[0m | time: 10.005s
[2K| Adam | epoch: 001 | loss: 0.69359 - acc: 0.4955 -- iter: 01984/20250


Training Step: 32  | total loss: [1m[32m0.69333[0m[0m | time: 10.247s
[2K| Adam | epoch: 001 | loss: 0.69333 - acc: 0.5071 -- iter: 02048/20250


Training Step: 33  | total loss: [1m[32m0.69327[0m[0m | time: 10.496s
[2K| Adam | epoch: 001 | loss: 0.69327 - acc: 0.5055 -- iter: 02112/20250


Training Step: 34  | total loss: [1m[32m0.69292[0m[0m | time: 10.749s
[2K| Adam | epoch: 001 | loss: 0.69292 - acc: 0.5311 -- iter: 02176/20250


Training Step: 35  | total loss: [1m[32m0.69294[0m[0m | time: 11.019s
[2K| Adam | epoch: 001 | loss: 0.69294 - acc: 0.5181 -- iter: 02240/20250


Training Step: 36  | total loss: [1m[32m0.69284[0m[0m | time: 11.264s
[2K| Adam | epoch: 001 | loss: 0.69284 - acc: 0.5176 -- iter: 02304/20250


Training Step: 37  | total loss: [1m[32m0.69285[0m[0m | time: 11.525s
[2K| Adam | epoch: 001 | loss: 0.69285 - acc: 0.5141 -- iter: 02368/20250


Training Step: 38  | total loss: [1m[32m0.69292[0m[0m | time: 11.768s
[2K| Adam | epoch: 001 | loss: 0.69292 - acc: 0.5082 -- iter: 02432/20250


Training Step: 39  | total loss: [1m[32m0.69298[0m[0m | time: 12.013s
[2K| Adam | epoch: 001 | loss: 0.69298 - acc: 0.5097 -- iter: 02496/20250


Training Step: 40  | total loss: [1m[32m0.69294[0m[0m | time: 12.267s
[2K| Adam | epoch: 001 | loss: 0.69294 - acc: 0.5108 -- iter: 02560/20250


Training Step: 41  | total loss: [1m[32m0.69323[0m[0m | time: 12.515s
[2K| Adam | epoch: 001 | loss: 0.69323 - acc: 0.5059 -- iter: 02624/20250


Training Step: 42  | total loss: [1m[32m0.69327[0m[0m | time: 12.761s
[2K| Adam | epoch: 001 | loss: 0.69327 - acc: 0.4992 -- iter: 02688/20250


Training Step: 43  | total loss: [1m[32m0.69339[0m[0m | time: 13.004s
[2K| Adam | epoch: 001 | loss: 0.69339 - acc: 0.4994 -- iter: 02752/20250


Training Step: 44  | total loss: [1m[32m0.69313[0m[0m | time: 13.246s
[2K| Adam | epoch: 001 | loss: 0.69313 - acc: 0.5130 -- iter: 02816/20250


Training Step: 45  | total loss: [1m[32m0.69309[0m[0m | time: 13.489s
[2K| Adam | epoch: 001 | loss: 0.69309 - acc: 0.5188 -- iter: 02880/20250


Training Step: 46  | total loss: [1m[32m0.69302[0m[0m | time: 13.764s
[2K| Adam | epoch: 001 | loss: 0.69302 - acc: 0.5156 -- iter: 02944/20250


Training Step: 47  | total loss: [1m[32m0.69298[0m[0m | time: 14.011s
[2K| Adam | epoch: 001 | loss: 0.69298 - acc: 0.5156 -- iter: 03008/20250


Training Step: 48  | total loss: [1m[32m0.69337[0m[0m | time: 14.251s
[2K| Adam | epoch: 001 | loss: 0.69337 - acc: 0.5006 -- iter: 03072/20250


Training Step: 49  | total loss: [1m[32m0.69323[0m[0m | time: 14.494s
[2K| Adam | epoch: 001 | loss: 0.69323 - acc: 0.5079 -- iter: 03136/20250


Training Step: 50  | total loss: [1m[32m0.69296[0m[0m | time: 14.738s
[2K| Adam | epoch: 001 | loss: 0.69296 - acc: 0.5188 -- iter: 03200/20250


Training Step: 51  | total loss: [1m[32m0.69251[0m[0m | time: 14.995s
[2K| Adam | epoch: 001 | loss: 0.69251 - acc: 0.5278 -- iter: 03264/20250


Training Step: 52  | total loss: [1m[32m0.69287[0m[0m | time: 15.236s
[2K| Adam | epoch: 001 | loss: 0.69287 - acc: 0.5166 -- iter: 03328/20250


Training Step: 53  | total loss: [1m[32m0.69302[0m[0m | time: 15.480s
[2K| Adam | epoch: 001 | loss: 0.69302 - acc: 0.5119 -- iter: 03392/20250


Training Step: 54  | total loss: [1m[32m0.69343[0m[0m | time: 15.723s
[2K| Adam | epoch: 001 | loss: 0.69343 - acc: 0.5011 -- iter: 03456/20250


Training Step: 55  | total loss: [1m[32m0.69369[0m[0m | time: 15.968s
[2K| Adam | epoch: 001 | loss: 0.69369 - acc: 0.4898 -- iter: 03520/20250


Training Step: 56  | total loss: [1m[32m0.69371[0m[0m | time: 16.224s
[2K| Adam | epoch: 001 | loss: 0.69371 - acc: 0.4978 -- iter: 03584/20250


Training Step: 57  | total loss: [1m[32m0.69353[0m[0m | time: 16.486s
[2K| Adam | epoch: 001 | loss: 0.69353 - acc: 0.5003 -- iter: 03648/20250


Training Step: 58  | total loss: [1m[32m0.69350[0m[0m | time: 16.731s
[2K| Adam | epoch: 001 | loss: 0.69350 - acc: 0.5002 -- iter: 03712/20250


Training Step: 59  | total loss: [1m[32m0.69402[0m[0m | time: 17.001s
[2K| Adam | epoch: 001 | loss: 0.69402 - acc: 0.4855 -- iter: 03776/20250


Training Step: 60  | total loss: [1m[32m0.69373[0m[0m | time: 17.248s
[2K| Adam | epoch: 001 | loss: 0.69373 - acc: 0.4916 -- iter: 03840/20250


Training Step: 61  | total loss: [1m[32m0.69332[0m[0m | time: 17.490s
[2K| Adam | epoch: 001 | loss: 0.69332 - acc: 0.5008 -- iter: 03904/20250


Training Step: 62  | total loss: [1m[32m0.69315[0m[0m | time: 17.732s
[2K| Adam | epoch: 001 | loss: 0.69315 - acc: 0.5007 -- iter: 03968/20250


Training Step: 63  | total loss: [1m[32m0.69325[0m[0m | time: 17.980s
[2K| Adam | epoch: 001 | loss: 0.69325 - acc: 0.4947 -- iter: 04032/20250


Training Step: 64  | total loss: [1m[32m0.69355[0m[0m | time: 18.220s
[2K| Adam | epoch: 001 | loss: 0.69355 - acc: 0.4836 -- iter: 04096/20250


Training Step: 65  | total loss: [1m[32m0.69358[0m[0m | time: 18.459s
[2K| Adam | epoch: 001 | loss: 0.69358 - acc: 0.4818 -- iter: 04160/20250


Training Step: 66  | total loss: [1m[32m0.69339[0m[0m | time: 18.717s
[2K| Adam | epoch: 001 | loss: 0.69339 - acc: 0.4916 -- iter: 04224/20250


Training Step: 67  | total loss: [1m[32m0.69317[0m[0m | time: 18.989s
[2K| Adam | epoch: 001 | loss: 0.69317 - acc: 0.5020 -- iter: 04288/20250


Training Step: 68  | total loss: [1m[32m0.69308[0m[0m | time: 19.228s
[2K| Adam | epoch: 001 | loss: 0.69308 - acc: 0.5036 -- iter: 04352/20250


Training Step: 69  | total loss: [1m[32m0.69298[0m[0m | time: 19.469s
[2K| Adam | epoch: 001 | loss: 0.69298 - acc: 0.5087 -- iter: 04416/20250


Training Step: 70  | total loss: [1m[32m0.69321[0m[0m | time: 19.713s
[2K| Adam | epoch: 001 | loss: 0.69321 - acc: 0.4986 -- iter: 04480/20250


Training Step: 71  | total loss: [1m[32m0.69320[0m[0m | time: 19.958s
[2K| Adam | epoch: 001 | loss: 0.69320 - acc: 0.4935 -- iter: 04544/20250


Training Step: 72  | total loss: [1m[32m0.69333[0m[0m | time: 20.201s
[2K| Adam | epoch: 001 | loss: 0.69333 - acc: 0.4889 -- iter: 04608/20250


Training Step: 73  | total loss: [1m[32m0.69307[0m[0m | time: 20.440s
[2K| Adam | epoch: 001 | loss: 0.69307 - acc: 0.5058 -- iter: 04672/20250


Training Step: 74  | total loss: [1m[32m0.69297[0m[0m | time: 20.683s
[2K| Adam | epoch: 001 | loss: 0.69297 - acc: 0.5137 -- iter: 04736/20250


Training Step: 75  | total loss: [1m[32m0.69291[0m[0m | time: 20.928s
[2K| Adam | epoch: 001 | loss: 0.69291 - acc: 0.5156 -- iter: 04800/20250


Training Step: 76  | total loss: [1m[32m0.69297[0m[0m | time: 21.187s
[2K| Adam | epoch: 001 | loss: 0.69297 - acc: 0.5106 -- iter: 04864/20250


Training Step: 77  | total loss: [1m[32m0.69319[0m[0m | time: 21.453s
[2K| Adam | epoch: 001 | loss: 0.69319 - acc: 0.5078 -- iter: 04928/20250


Training Step: 78  | total loss: [1m[32m0.69342[0m[0m | time: 21.699s
[2K| Adam | epoch: 001 | loss: 0.69342 - acc: 0.5054 -- iter: 04992/20250


Training Step: 79  | total loss: [1m[32m0.69362[0m[0m | time: 21.943s
[2K| Adam | epoch: 001 | loss: 0.69362 - acc: 0.5000 -- iter: 05056/20250


Training Step: 80  | total loss: [1m[32m0.69360[0m[0m | time: 22.194s
[2K| Adam | epoch: 001 | loss: 0.69360 - acc: 0.4952 -- iter: 05120/20250


Training Step: 81  | total loss: [1m[32m0.69339[0m[0m | time: 22.441s
[2K| Adam | epoch: 001 | loss: 0.69339 - acc: 0.4988 -- iter: 05184/20250


Training Step: 82  | total loss: [1m[32m0.69346[0m[0m | time: 22.691s
[2K| Adam | epoch: 001 | loss: 0.69346 - acc: 0.4943 -- iter: 05248/20250


Training Step: 83  | total loss: [1m[32m0.69328[0m[0m | time: 22.946s
[2K| Adam | epoch: 001 | loss: 0.69328 - acc: 0.5089 -- iter: 05312/20250


Training Step: 84  | total loss: [1m[32m0.69345[0m[0m | time: 23.191s
[2K| Adam | epoch: 001 | loss: 0.69345 - acc: 0.5002 -- iter: 05376/20250


Training Step: 85  | total loss: [1m[32m0.69342[0m[0m | time: 23.435s
[2K| Adam | epoch: 001 | loss: 0.69342 - acc: 0.4970 -- iter: 05440/20250


Training Step: 86  | total loss: [1m[32m0.69317[0m[0m | time: 23.700s
[2K| Adam | epoch: 001 | loss: 0.69317 - acc: 0.5020 -- iter: 05504/20250


Training Step: 87  | total loss: [1m[32m0.69322[0m[0m | time: 23.955s
[2K| Adam | epoch: 001 | loss: 0.69322 - acc: 0.5003 -- iter: 05568/20250


Training Step: 88  | total loss: [1m[32m0.69287[0m[0m | time: 24.209s
[2K| Adam | epoch: 001 | loss: 0.69287 - acc: 0.5127 -- iter: 05632/20250


Training Step: 89  | total loss: [1m[32m0.69272[0m[0m | time: 24.453s
[2K| Adam | epoch: 001 | loss: 0.69272 - acc: 0.5161 -- iter: 05696/20250


Training Step: 90  | total loss: [1m[32m0.69262[0m[0m | time: 24.697s
[2K| Adam | epoch: 001 | loss: 0.69262 - acc: 0.5223 -- iter: 05760/20250


Training Step: 91  | total loss: [1m[32m0.69272[0m[0m | time: 24.935s
[2K| Adam | epoch: 001 | loss: 0.69272 - acc: 0.5185 -- iter: 05824/20250


Training Step: 92  | total loss: [1m[32m0.69298[0m[0m | time: 25.184s
[2K| Adam | epoch: 001 | loss: 0.69298 - acc: 0.5136 -- iter: 05888/20250


Training Step: 93  | total loss: [1m[32m0.69273[0m[0m | time: 25.433s
[2K| Adam | epoch: 001 | loss: 0.69273 - acc: 0.5185 -- iter: 05952/20250


Training Step: 94  | total loss: [1m[32m0.69319[0m[0m | time: 25.684s
[2K| Adam | epoch: 001 | loss: 0.69319 - acc: 0.5166 -- iter: 06016/20250


Training Step: 95  | total loss: [1m[32m0.69370[0m[0m | time: 25.933s
[2K| Adam | epoch: 001 | loss: 0.69370 - acc: 0.5103 -- iter: 06080/20250


Training Step: 96  | total loss: [1m[32m0.69342[0m[0m | time: 26.214s
[2K| Adam | epoch: 001 | loss: 0.69342 - acc: 0.5124 -- iter: 06144/20250


Training Step: 97  | total loss: [1m[32m0.69328[0m[0m | time: 26.484s
[2K| Adam | epoch: 001 | loss: 0.69328 - acc: 0.5111 -- iter: 06208/20250


Training Step: 98  | total loss: [1m[32m0.69307[0m[0m | time: 26.720s
[2K| Adam | epoch: 001 | loss: 0.69307 - acc: 0.5147 -- iter: 06272/20250


Training Step: 99  | total loss: [1m[32m0.69315[0m[0m | time: 26.964s
[2K| Adam | epoch: 001 | loss: 0.69315 - acc: 0.5132 -- iter: 06336/20250


Training Step: 100  | total loss: [1m[32m0.69284[0m[0m | time: 27.213s
[2K| Adam | epoch: 001 | loss: 0.69284 - acc: 0.5197 -- iter: 06400/20250


Training Step: 101  | total loss: [1m[32m0.69267[0m[0m | time: 27.462s
[2K| Adam | epoch: 001 | loss: 0.69267 - acc: 0.5256 -- iter: 06464/20250


Training Step: 102  | total loss: [1m[32m0.69269[0m[0m | time: 27.716s
[2K| Adam | epoch: 001 | loss: 0.69269 - acc: 0.5246 -- iter: 06528/20250


Training Step: 103  | total loss: [1m[32m0.69254[0m[0m | time: 27.964s
[2K| Adam | epoch: 001 | loss: 0.69254 - acc: 0.5362 -- iter: 06592/20250


Training Step: 104  | total loss: [1m[32m0.69266[0m[0m | time: 28.208s
[2K| Adam | epoch: 001 | loss: 0.69266 - acc: 0.5357 -- iter: 06656/20250


Training Step: 105  | total loss: [1m[32m0.69288[0m[0m | time: 28.469s
[2K| Adam | epoch: 001 | loss: 0.69288 - acc: 0.5368 -- iter: 06720/20250


Training Step: 106  | total loss: [1m[32m0.69266[0m[0m | time: 28.750s
[2K| Adam | epoch: 001 | loss: 0.69266 - acc: 0.5409 -- iter: 06784/20250


Training Step: 107  | total loss: [1m[32m0.69246[0m[0m | time: 29.044s
[2K| Adam | epoch: 001 | loss: 0.69246 - acc: 0.5384 -- iter: 06848/20250


Training Step: 108  | total loss: [1m[32m0.69247[0m[0m | time: 29.296s
[2K| Adam | epoch: 001 | loss: 0.69247 - acc: 0.5424 -- iter: 06912/20250


Training Step: 109  | total loss: [1m[32m0.69199[0m[0m | time: 29.533s
[2K| Adam | epoch: 001 | loss: 0.69199 - acc: 0.5506 -- iter: 06976/20250


Training Step: 110  | total loss: [1m[32m0.69143[0m[0m | time: 29.776s
[2K| Adam | epoch: 001 | loss: 0.69143 - acc: 0.5487 -- iter: 07040/20250


Training Step: 111  | total loss: [1m[32m0.69466[0m[0m | time: 30.028s
[2K| Adam | epoch: 001 | loss: 0.69466 - acc: 0.5391 -- iter: 07104/20250


Training Step: 112  | total loss: [1m[32m0.69336[0m[0m | time: 30.277s
[2K| Adam | epoch: 001 | loss: 0.69336 - acc: 0.5415 -- iter: 07168/20250


Training Step: 113  | total loss: [1m[32m0.69212[0m[0m | time: 30.527s
[2K| Adam | epoch: 001 | loss: 0.69212 - acc: 0.5545 -- iter: 07232/20250


Training Step: 114  | total loss: [1m[32m0.69215[0m[0m | time: 30.768s
[2K| Adam | epoch: 001 | loss: 0.69215 - acc: 0.5584 -- iter: 07296/20250


Training Step: 115  | total loss: [1m[32m0.69164[0m[0m | time: 31.030s
[2K| Adam | epoch: 001 | loss: 0.69164 - acc: 0.5635 -- iter: 07360/20250


Training Step: 116  | total loss: [1m[32m0.69187[0m[0m | time: 31.282s
[2K| Adam | epoch: 001 | loss: 0.69187 - acc: 0.5603 -- iter: 07424/20250


Training Step: 117  | total loss: [1m[32m0.69221[0m[0m | time: 31.544s
[2K| Adam | epoch: 001 | loss: 0.69221 - acc: 0.5558 -- iter: 07488/20250


Training Step: 118  | total loss: [1m[32m0.69226[0m[0m | time: 31.787s
[2K| Adam | epoch: 001 | loss: 0.69226 - acc: 0.5534 -- iter: 07552/20250


Training Step: 119  | total loss: [1m[32m0.69358[0m[0m | time: 32.025s
[2K| Adam | epoch: 001 | loss: 0.69358 - acc: 0.5496 -- iter: 07616/20250


Training Step: 120  | total loss: [1m[32m0.69224[0m[0m | time: 32.275s
[2K| Adam | epoch: 001 | loss: 0.69224 - acc: 0.5556 -- iter: 07680/20250


Training Step: 121  | total loss: [1m[32m0.69220[0m[0m | time: 32.517s
[2K| Adam | epoch: 001 | loss: 0.69220 - acc: 0.5500 -- iter: 07744/20250


Training Step: 122  | total loss: [1m[32m0.69164[0m[0m | time: 32.753s
[2K| Adam | epoch: 001 | loss: 0.69164 - acc: 0.5481 -- iter: 07808/20250


Training Step: 123  | total loss: [1m[32m0.69487[0m[0m | time: 33.001s
[2K| Adam | epoch: 001 | loss: 0.69487 - acc: 0.5277 -- iter: 07872/20250


Training Step: 124  | total loss: [1m[32m0.69545[0m[0m | time: 33.241s
[2K| Adam | epoch: 001 | loss: 0.69545 - acc: 0.5234 -- iter: 07936/20250


Training Step: 125  | total loss: [1m[32m0.69601[0m[0m | time: 33.500s
[2K| Adam | epoch: 001 | loss: 0.69601 - acc: 0.5163 -- iter: 08000/20250


Training Step: 126  | total loss: [1m[32m0.69669[0m[0m | time: 33.748s
[2K| Adam | epoch: 001 | loss: 0.69669 - acc: 0.5132 -- iter: 08064/20250


Training Step: 127  | total loss: [1m[32m0.69683[0m[0m | time: 33.994s
[2K| Adam | epoch: 001 | loss: 0.69683 - acc: 0.5040 -- iter: 08128/20250


Training Step: 128  | total loss: [1m[32m0.69609[0m[0m | time: 34.236s
[2K| Adam | epoch: 001 | loss: 0.69609 - acc: 0.5130 -- iter: 08192/20250


Training Step: 129  | total loss: [1m[32m0.69546[0m[0m | time: 34.479s
[2K| Adam | epoch: 001 | loss: 0.69546 - acc: 0.5179 -- iter: 08256/20250


Training Step: 130  | total loss: [1m[32m0.69515[0m[0m | time: 34.719s
[2K| Adam | epoch: 001 | loss: 0.69515 - acc: 0.5162 -- iter: 08320/20250


Training Step: 131  | total loss: [1m[32m0.69484[0m[0m | time: 34.964s
[2K| Adam | epoch: 001 | loss: 0.69484 - acc: 0.5192 -- iter: 08384/20250


Training Step: 132  | total loss: [1m[32m0.69430[0m[0m | time: 35.210s
[2K| Adam | epoch: 001 | loss: 0.69430 - acc: 0.5251 -- iter: 08448/20250


Training Step: 133  | total loss: [1m[32m0.69417[0m[0m | time: 35.459s
[2K| Adam | epoch: 001 | loss: 0.69417 - acc: 0.5179 -- iter: 08512/20250


Training Step: 134  | total loss: [1m[32m0.69387[0m[0m | time: 35.701s
[2K| Adam | epoch: 001 | loss: 0.69387 - acc: 0.5177 -- iter: 08576/20250


Training Step: 135  | total loss: [1m[32m0.69399[0m[0m | time: 35.967s
[2K| Adam | epoch: 001 | loss: 0.69399 - acc: 0.5081 -- iter: 08640/20250


Training Step: 136  | total loss: [1m[32m0.69378[0m[0m | time: 36.213s
[2K| Adam | epoch: 001 | loss: 0.69378 - acc: 0.5104 -- iter: 08704/20250


Training Step: 137  | total loss: [1m[32m0.69381[0m[0m | time: 36.459s
[2K| Adam | epoch: 001 | loss: 0.69381 - acc: 0.4984 -- iter: 08768/20250


Training Step: 138  | total loss: [1m[32m0.69360[0m[0m | time: 36.703s
[2K| Adam | epoch: 001 | loss: 0.69360 - acc: 0.4970 -- iter: 08832/20250


Training Step: 139  | total loss: [1m[32m0.69353[0m[0m | time: 36.949s
[2K| Adam | epoch: 001 | loss: 0.69353 - acc: 0.4973 -- iter: 08896/20250


Training Step: 140  | total loss: [1m[32m0.69334[0m[0m | time: 37.193s
[2K| Adam | epoch: 001 | loss: 0.69334 - acc: 0.5054 -- iter: 08960/20250


Training Step: 141  | total loss: [1m[32m0.69333[0m[0m | time: 37.440s
[2K| Adam | epoch: 001 | loss: 0.69333 - acc: 0.5064 -- iter: 09024/20250


Training Step: 142  | total loss: [1m[32m0.69375[0m[0m | time: 37.682s
[2K| Adam | epoch: 001 | loss: 0.69375 - acc: 0.4917 -- iter: 09088/20250


Training Step: 143  | total loss: [1m[32m0.69380[0m[0m | time: 37.927s
[2K| Adam | epoch: 001 | loss: 0.69380 - acc: 0.4879 -- iter: 09152/20250


Training Step: 144  | total loss: [1m[32m0.69328[0m[0m | time: 38.168s
[2K| Adam | epoch: 001 | loss: 0.69328 - acc: 0.5000 -- iter: 09216/20250


Training Step: 145  | total loss: [1m[32m0.69335[0m[0m | time: 38.407s
[2K| Adam | epoch: 001 | loss: 0.69335 - acc: 0.4922 -- iter: 09280/20250


Training Step: 146  | total loss: [1m[32m0.69318[0m[0m | time: 38.662s
[2K| Adam | epoch: 001 | loss: 0.69318 - acc: 0.4914 -- iter: 09344/20250


Training Step: 147  | total loss: [1m[32m0.69306[0m[0m | time: 38.907s
[2K| Adam | epoch: 001 | loss: 0.69306 - acc: 0.4892 -- iter: 09408/20250


Training Step: 148  | total loss: [1m[32m0.69316[0m[0m | time: 39.156s
[2K| Adam | epoch: 001 | loss: 0.69316 - acc: 0.4855 -- iter: 09472/20250


Training Step: 149  | total loss: [1m[32m0.69301[0m[0m | time: 39.393s
[2K| Adam | epoch: 001 | loss: 0.69301 - acc: 0.4901 -- iter: 09536/20250


Training Step: 150  | total loss: [1m[32m0.69247[0m[0m | time: 39.634s
[2K| Adam | epoch: 001 | loss: 0.69247 - acc: 0.5005 -- iter: 09600/20250


Training Step: 151  | total loss: [1m[32m0.69240[0m[0m | time: 39.876s
[2K| Adam | epoch: 001 | loss: 0.69240 - acc: 0.5004 -- iter: 09664/20250


Training Step: 152  | total loss: [1m[32m0.69237[0m[0m | time: 40.118s
[2K| Adam | epoch: 001 | loss: 0.69237 - acc: 0.5051 -- iter: 09728/20250


Training Step: 153  | total loss: [1m[32m0.69270[0m[0m | time: 40.371s
[2K| Adam | epoch: 001 | loss: 0.69270 - acc: 0.5014 -- iter: 09792/20250


Training Step: 154  | total loss: [1m[32m0.69246[0m[0m | time: 40.614s
[2K| Adam | epoch: 001 | loss: 0.69246 - acc: 0.5076 -- iter: 09856/20250


Training Step: 155  | total loss: [1m[32m0.69297[0m[0m | time: 40.864s
[2K| Adam | epoch: 001 | loss: 0.69297 - acc: 0.4990 -- iter: 09920/20250


Training Step: 156  | total loss: [1m[32m0.69220[0m[0m | time: 41.119s
[2K| Adam | epoch: 001 | loss: 0.69220 - acc: 0.5178 -- iter: 09984/20250


Training Step: 157  | total loss: [1m[32m0.69228[0m[0m | time: 41.368s
[2K| Adam | epoch: 001 | loss: 0.69228 - acc: 0.5207 -- iter: 10048/20250


Training Step: 158  | total loss: [1m[32m0.69260[0m[0m | time: 41.607s
[2K| Adam | epoch: 001 | loss: 0.69260 - acc: 0.5109 -- iter: 10112/20250


Training Step: 159  | total loss: [1m[32m0.69272[0m[0m | time: 41.852s
[2K| Adam | epoch: 001 | loss: 0.69272 - acc: 0.5035 -- iter: 10176/20250


Training Step: 160  | total loss: [1m[32m0.69242[0m[0m | time: 42.100s
[2K| Adam | epoch: 001 | loss: 0.69242 - acc: 0.5063 -- iter: 10240/20250


Training Step: 161  | total loss: [1m[32m0.69264[0m[0m | time: 42.348s
[2K| Adam | epoch: 001 | loss: 0.69264 - acc: 0.5025 -- iter: 10304/20250


Training Step: 162  | total loss: [1m[32m0.69193[0m[0m | time: 42.588s
[2K| Adam | epoch: 001 | loss: 0.69193 - acc: 0.5085 -- iter: 10368/20250


Training Step: 163  | total loss: [1m[32m0.69182[0m[0m | time: 42.835s
[2K| Adam | epoch: 001 | loss: 0.69182 - acc: 0.5092 -- iter: 10432/20250


Training Step: 164  | total loss: [1m[32m0.69183[0m[0m | time: 43.073s
[2K| Adam | epoch: 001 | loss: 0.69183 - acc: 0.5099 -- iter: 10496/20250


Training Step: 165  | total loss: [1m[32m0.69129[0m[0m | time: 43.321s
[2K| Adam | epoch: 001 | loss: 0.69129 - acc: 0.5167 -- iter: 10560/20250


Training Step: 166  | total loss: [1m[32m0.69050[0m[0m | time: 43.587s
[2K| Adam | epoch: 001 | loss: 0.69050 - acc: 0.5275 -- iter: 10624/20250


Training Step: 167  | total loss: [1m[32m0.69096[0m[0m | time: 43.833s
[2K| Adam | epoch: 001 | loss: 0.69096 - acc: 0.5279 -- iter: 10688/20250


Training Step: 168  | total loss: [1m[32m0.69042[0m[0m | time: 44.076s
[2K| Adam | epoch: 001 | loss: 0.69042 - acc: 0.5329 -- iter: 10752/20250


Training Step: 169  | total loss: [1m[32m0.69032[0m[0m | time: 44.321s
[2K| Adam | epoch: 001 | loss: 0.69032 - acc: 0.5390 -- iter: 10816/20250


Training Step: 170  | total loss: [1m[32m0.68978[0m[0m | time: 44.563s
[2K| Adam | epoch: 001 | loss: 0.68978 - acc: 0.5476 -- iter: 10880/20250


Training Step: 171  | total loss: [1m[32m0.68861[0m[0m | time: 44.810s
[2K| Adam | epoch: 001 | loss: 0.68861 - acc: 0.5600 -- iter: 10944/20250


Training Step: 172  | total loss: [1m[32m0.68962[0m[0m | time: 45.059s
[2K| Adam | epoch: 001 | loss: 0.68962 - acc: 0.5572 -- iter: 11008/20250


Training Step: 173  | total loss: [1m[32m0.68833[0m[0m | time: 45.304s
[2K| Adam | epoch: 001 | loss: 0.68833 - acc: 0.5624 -- iter: 11072/20250


Training Step: 174  | total loss: [1m[32m0.68778[0m[0m | time: 45.551s
[2K| Adam | epoch: 001 | loss: 0.68778 - acc: 0.5640 -- iter: 11136/20250


Training Step: 175  | total loss: [1m[32m0.67999[0m[0m | time: 45.807s
[2K| Adam | epoch: 001 | loss: 0.67999 - acc: 0.5841 -- iter: 11200/20250


Training Step: 176  | total loss: [1m[32m0.68670[0m[0m | time: 46.060s
[2K| Adam | epoch: 001 | loss: 0.68670 - acc: 0.5820 -- iter: 11264/20250


Training Step: 177  | total loss: [1m[32m0.69117[0m[0m | time: 46.304s
[2K| Adam | epoch: 001 | loss: 0.69117 - acc: 0.5816 -- iter: 11328/20250


Training Step: 178  | total loss: [1m[32m0.69049[0m[0m | time: 46.549s
[2K| Adam | epoch: 001 | loss: 0.69049 - acc: 0.5828 -- iter: 11392/20250


Training Step: 179  | total loss: [1m[32m0.70094[0m[0m | time: 46.798s
[2K| Adam | epoch: 001 | loss: 0.70094 - acc: 0.5683 -- iter: 11456/20250


Training Step: 180  | total loss: [1m[32m0.70456[0m[0m | time: 47.043s
[2K| Adam | epoch: 001 | loss: 0.70456 - acc: 0.5567 -- iter: 11520/20250


Training Step: 181  | total loss: [1m[32m0.70314[0m[0m | time: 47.287s
[2K| Adam | epoch: 001 | loss: 0.70314 - acc: 0.5573 -- iter: 11584/20250


Training Step: 182  | total loss: [1m[32m0.70181[0m[0m | time: 47.526s
[2K| Adam | epoch: 001 | loss: 0.70181 - acc: 0.5578 -- iter: 11648/20250


Training Step: 183  | total loss: [1m[32m0.70104[0m[0m | time: 47.768s
[2K| Adam | epoch: 001 | loss: 0.70104 - acc: 0.5521 -- iter: 11712/20250


Training Step: 184  | total loss: [1m[32m0.69972[0m[0m | time: 48.018s
[2K| Adam | epoch: 001 | loss: 0.69972 - acc: 0.5500 -- iter: 11776/20250


Training Step: 185  | total loss: [1m[32m0.69917[0m[0m | time: 48.273s
[2K| Adam | epoch: 001 | loss: 0.69917 - acc: 0.5450 -- iter: 11840/20250


Training Step: 186  | total loss: [1m[32m0.70032[0m[0m | time: 48.522s
[2K| Adam | epoch: 001 | loss: 0.70032 - acc: 0.5342 -- iter: 11904/20250


Training Step: 187  | total loss: [1m[32m0.69762[0m[0m | time: 48.776s
[2K| Adam | epoch: 001 | loss: 0.69762 - acc: 0.5449 -- iter: 11968/20250


Training Step: 188  | total loss: [1m[32m0.69556[0m[0m | time: 49.021s
[2K| Adam | epoch: 001 | loss: 0.69556 - acc: 0.5466 -- iter: 12032/20250


Training Step: 189  | total loss: [1m[32m0.69651[0m[0m | time: 49.264s
[2K| Adam | epoch: 001 | loss: 0.69651 - acc: 0.5420 -- iter: 12096/20250


Training Step: 190  | total loss: [1m[32m0.70052[0m[0m | time: 49.512s
[2K| Adam | epoch: 001 | loss: 0.70052 - acc: 0.5253 -- iter: 12160/20250


Training Step: 191  | total loss: [1m[32m0.69935[0m[0m | time: 49.756s
[2K| Adam | epoch: 001 | loss: 0.69935 - acc: 0.5290 -- iter: 12224/20250


Training Step: 192  | total loss: [1m[32m0.69792[0m[0m | time: 49.996s
[2K| Adam | epoch: 001 | loss: 0.69792 - acc: 0.5323 -- iter: 12288/20250


Training Step: 193  | total loss: [1m[32m0.69665[0m[0m | time: 50.247s
[2K| Adam | epoch: 001 | loss: 0.69665 - acc: 0.5354 -- iter: 12352/20250


Training Step: 194  | total loss: [1m[32m0.69639[0m[0m | time: 50.497s
[2K| Adam | epoch: 001 | loss: 0.69639 - acc: 0.5412 -- iter: 12416/20250


Training Step: 195  | total loss: [1m[32m0.69671[0m[0m | time: 50.744s
[2K| Adam | epoch: 001 | loss: 0.69671 - acc: 0.5386 -- iter: 12480/20250


Training Step: 196  | total loss: [1m[32m0.69632[0m[0m | time: 50.985s
[2K| Adam | epoch: 001 | loss: 0.69632 - acc: 0.5301 -- iter: 12544/20250


Training Step: 197  | total loss: [1m[32m0.69605[0m[0m | time: 51.225s
[2K| Adam | epoch: 001 | loss: 0.69605 - acc: 0.5271 -- iter: 12608/20250


Training Step: 198  | total loss: [1m[32m0.69483[0m[0m | time: 51.470s
[2K| Adam | epoch: 001 | loss: 0.69483 - acc: 0.5369 -- iter: 12672/20250


Training Step: 199  | total loss: [1m[32m0.69516[0m[0m | time: 51.717s
[2K| Adam | epoch: 001 | loss: 0.69516 - acc: 0.5269 -- iter: 12736/20250


Training Step: 200  | total loss: [1m[32m0.69462[0m[0m | time: 51.961s
[2K| Adam | epoch: 001 | loss: 0.69462 - acc: 0.5258 -- iter: 12800/20250


Training Step: 201  | total loss: [1m[32m0.69493[0m[0m | time: 52.207s
[2K| Adam | epoch: 001 | loss: 0.69493 - acc: 0.5263 -- iter: 12864/20250


Training Step: 202  | total loss: [1m[32m0.69535[0m[0m | time: 52.448s
[2K| Adam | epoch: 001 | loss: 0.69535 - acc: 0.5159 -- iter: 12928/20250


Training Step: 203  | total loss: [1m[32m0.69451[0m[0m | time: 52.690s
[2K| Adam | epoch: 001 | loss: 0.69451 - acc: 0.5206 -- iter: 12992/20250


Training Step: 204  | total loss: [1m[32m0.69341[0m[0m | time: 52.936s
[2K| Adam | epoch: 001 | loss: 0.69341 - acc: 0.5263 -- iter: 13056/20250


Training Step: 205  | total loss: [1m[32m0.69264[0m[0m | time: 53.192s
[2K| Adam | epoch: 001 | loss: 0.69264 - acc: 0.5315 -- iter: 13120/20250


Training Step: 206  | total loss: [1m[32m0.69344[0m[0m | time: 53.454s
[2K| Adam | epoch: 001 | loss: 0.69344 - acc: 0.5237 -- iter: 13184/20250


Training Step: 207  | total loss: [1m[32m0.69280[0m[0m | time: 53.698s
[2K| Adam | epoch: 001 | loss: 0.69280 - acc: 0.5229 -- iter: 13248/20250


Training Step: 208  | total loss: [1m[32m0.69353[0m[0m | time: 53.949s
[2K| Adam | epoch: 001 | loss: 0.69353 - acc: 0.5112 -- iter: 13312/20250


Training Step: 209  | total loss: [1m[32m0.69300[0m[0m | time: 54.182s
[2K| Adam | epoch: 001 | loss: 0.69300 - acc: 0.5101 -- iter: 13376/20250


Training Step: 210  | total loss: [1m[32m0.69238[0m[0m | time: 54.422s
[2K| Adam | epoch: 001 | loss: 0.69238 - acc: 0.5122 -- iter: 13440/20250


Training Step: 211  | total loss: [1m[32m0.69276[0m[0m | time: 54.664s
[2K| Adam | epoch: 001 | loss: 0.69276 - acc: 0.5141 -- iter: 13504/20250


Training Step: 212  | total loss: [1m[32m0.69313[0m[0m | time: 54.908s
[2K| Adam | epoch: 001 | loss: 0.69313 - acc: 0.5111 -- iter: 13568/20250


Training Step: 213  | total loss: [1m[32m0.69325[0m[0m | time: 55.158s
[2K| Adam | epoch: 001 | loss: 0.69325 - acc: 0.5053 -- iter: 13632/20250


Training Step: 214  | total loss: [1m[32m0.69331[0m[0m | time: 55.411s
[2K| Adam | epoch: 001 | loss: 0.69331 - acc: 0.5126 -- iter: 13696/20250


Training Step: 215  | total loss: [1m[32m0.69263[0m[0m | time: 55.668s
[2K| Adam | epoch: 001 | loss: 0.69263 - acc: 0.5176 -- iter: 13760/20250


Training Step: 216  | total loss: [1m[32m0.69275[0m[0m | time: 55.922s
[2K| Adam | epoch: 001 | loss: 0.69275 - acc: 0.5096 -- iter: 13824/20250


Training Step: 217  | total loss: [1m[32m0.69161[0m[0m | time: 56.166s
[2K| Adam | epoch: 001 | loss: 0.69161 - acc: 0.5211 -- iter: 13888/20250


Training Step: 218  | total loss: [1m[32m0.69220[0m[0m | time: 56.405s
[2K| Adam | epoch: 001 | loss: 0.69220 - acc: 0.5096 -- iter: 13952/20250


Training Step: 219  | total loss: [1m[32m0.69240[0m[0m | time: 56.646s
[2K| Adam | epoch: 001 | loss: 0.69240 - acc: 0.5087 -- iter: 14016/20250


Training Step: 220  | total loss: [1m[32m0.69257[0m[0m | time: 56.891s
[2K| Adam | epoch: 001 | loss: 0.69257 - acc: 0.5094 -- iter: 14080/20250


Training Step: 221  | total loss: [1m[32m0.69237[0m[0m | time: 57.135s
[2K| Adam | epoch: 001 | loss: 0.69237 - acc: 0.5194 -- iter: 14144/20250


Training Step: 222  | total loss: [1m[32m0.69236[0m[0m | time: 57.377s
[2K| Adam | epoch: 001 | loss: 0.69236 - acc: 0.5221 -- iter: 14208/20250


Training Step: 223  | total loss: [1m[32m0.69215[0m[0m | time: 57.626s
[2K| Adam | epoch: 001 | loss: 0.69215 - acc: 0.5262 -- iter: 14272/20250


Training Step: 224  | total loss: [1m[32m0.69200[0m[0m | time: 57.887s
[2K| Adam | epoch: 001 | loss: 0.69200 - acc: 0.5298 -- iter: 14336/20250


Training Step: 225  | total loss: [1m[32m0.69183[0m[0m | time: 58.129s
[2K| Adam | epoch: 001 | loss: 0.69183 - acc: 0.5253 -- iter: 14400/20250


Training Step: 226  | total loss: [1m[32m0.69262[0m[0m | time: 58.380s
[2K| Adam | epoch: 001 | loss: 0.69262 - acc: 0.5212 -- iter: 14464/20250


Training Step: 227  | total loss: [1m[32m0.69217[0m[0m | time: 58.623s
[2K| Adam | epoch: 001 | loss: 0.69217 - acc: 0.5269 -- iter: 14528/20250


Training Step: 228  | total loss: [1m[32m0.69238[0m[0m | time: 58.866s
[2K| Adam | epoch: 001 | loss: 0.69238 - acc: 0.5273 -- iter: 14592/20250


Training Step: 229  | total loss: [1m[32m0.69281[0m[0m | time: 59.115s
[2K| Adam | epoch: 001 | loss: 0.69281 - acc: 0.5230 -- iter: 14656/20250


Training Step: 230  | total loss: [1m[32m0.69232[0m[0m | time: 59.358s
[2K| Adam | epoch: 001 | loss: 0.69232 - acc: 0.5238 -- iter: 14720/20250


Training Step: 231  | total loss: [1m[32m0.69340[0m[0m | time: 59.605s
[2K| Adam | epoch: 001 | loss: 0.69340 - acc: 0.5214 -- iter: 14784/20250


Training Step: 232  | total loss: [1m[32m0.69330[0m[0m | time: 59.842s
[2K| Adam | epoch: 001 | loss: 0.69330 - acc: 0.5193 -- iter: 14848/20250


Training Step: 233  | total loss: [1m[32m0.69346[0m[0m | time: 60.081s
[2K| Adam | epoch: 001 | loss: 0.69346 - acc: 0.5127 -- iter: 14912/20250


Training Step: 234  | total loss: [1m[32m0.69201[0m[0m | time: 60.323s
[2K| Adam | epoch: 001 | loss: 0.69201 - acc: 0.5255 -- iter: 14976/20250


Training Step: 235  | total loss: [1m[32m0.69197[0m[0m | time: 60.565s
[2K| Adam | epoch: 001 | loss: 0.69197 - acc: 0.5307 -- iter: 15040/20250


Training Step: 236  | total loss: [1m[32m0.69278[0m[0m | time: 60.818s
[2K| Adam | epoch: 001 | loss: 0.69278 - acc: 0.5245 -- iter: 15104/20250


Training Step: 237  | total loss: [1m[32m0.69198[0m[0m | time: 61.059s
[2K| Adam | epoch: 001 | loss: 0.69198 - acc: 0.5283 -- iter: 15168/20250


Training Step: 238  | total loss: [1m[32m0.69141[0m[0m | time: 61.305s
[2K| Adam | epoch: 001 | loss: 0.69141 - acc: 0.5271 -- iter: 15232/20250


Training Step: 239  | total loss: [1m[32m0.69283[0m[0m | time: 61.550s
[2K| Adam | epoch: 001 | loss: 0.69283 - acc: 0.5259 -- iter: 15296/20250


Training Step: 240  | total loss: [1m[32m0.69426[0m[0m | time: 61.791s
[2K| Adam | epoch: 001 | loss: 0.69426 - acc: 0.5186 -- iter: 15360/20250


Training Step: 241  | total loss: [1m[32m0.69520[0m[0m | time: 62.032s
[2K| Adam | epoch: 001 | loss: 0.69520 - acc: 0.5121 -- iter: 15424/20250


Training Step: 242  | total loss: [1m[32m0.69491[0m[0m | time: 62.280s
[2K| Adam | epoch: 001 | loss: 0.69491 - acc: 0.5171 -- iter: 15488/20250


Training Step: 243  | total loss: [1m[32m0.69377[0m[0m | time: 62.537s
[2K| Adam | epoch: 001 | loss: 0.69377 - acc: 0.5217 -- iter: 15552/20250


Training Step: 244  | total loss: [1m[32m0.69435[0m[0m | time: 62.781s
[2K| Adam | epoch: 001 | loss: 0.69435 - acc: 0.5164 -- iter: 15616/20250


Training Step: 245  | total loss: [1m[32m0.69357[0m[0m | time: 63.031s
[2K| Adam | epoch: 001 | loss: 0.69357 - acc: 0.5179 -- iter: 15680/20250


Training Step: 246  | total loss: [1m[32m0.69306[0m[0m | time: 63.290s
[2K| Adam | epoch: 001 | loss: 0.69306 - acc: 0.5192 -- iter: 15744/20250


Training Step: 247  | total loss: [1m[32m0.69295[0m[0m | time: 63.537s
[2K| Adam | epoch: 001 | loss: 0.69295 - acc: 0.5220 -- iter: 15808/20250


Training Step: 248  | total loss: [1m[32m0.69264[0m[0m | time: 63.780s
[2K| Adam | epoch: 001 | loss: 0.69264 - acc: 0.5182 -- iter: 15872/20250


Training Step: 249  | total loss: [1m[32m0.69235[0m[0m | time: 64.018s
[2K| Adam | epoch: 001 | loss: 0.69235 - acc: 0.5195 -- iter: 15936/20250


Training Step: 250  | total loss: [1m[32m0.69236[0m[0m | time: 64.266s
[2K| Adam | epoch: 001 | loss: 0.69236 - acc: 0.5191 -- iter: 16000/20250


Training Step: 251  | total loss: [1m[32m0.69200[0m[0m | time: 64.517s
[2K| Adam | epoch: 001 | loss: 0.69200 - acc: 0.5157 -- iter: 16064/20250


Training Step: 252  | total loss: [1m[32m0.69207[0m[0m | time: 64.764s
[2K| Adam | epoch: 001 | loss: 0.69207 - acc: 0.5125 -- iter: 16128/20250


Training Step: 253  | total loss: [1m[32m0.69184[0m[0m | time: 65.018s
[2K| Adam | epoch: 001 | loss: 0.69184 - acc: 0.5160 -- iter: 16192/20250


Training Step: 254  | total loss: [1m[32m0.69227[0m[0m | time: 65.270s
[2K| Adam | epoch: 001 | loss: 0.69227 - acc: 0.5112 -- iter: 16256/20250


Training Step: 255  | total loss: [1m[32m0.69229[0m[0m | time: 65.522s
[2K| Adam | epoch: 001 | loss: 0.69229 - acc: 0.5070 -- iter: 16320/20250


Training Step: 256  | total loss: [1m[32m0.69324[0m[0m | time: 65.775s
[2K| Adam | epoch: 001 | loss: 0.69324 - acc: 0.5047 -- iter: 16384/20250


Training Step: 257  | total loss: [1m[32m0.69392[0m[0m | time: 66.034s
[2K| Adam | epoch: 001 | loss: 0.69392 - acc: 0.4980 -- iter: 16448/20250


Training Step: 258  | total loss: [1m[32m0.69313[0m[0m | time: 66.278s
[2K| Adam | epoch: 001 | loss: 0.69313 - acc: 0.5029 -- iter: 16512/20250


Training Step: 259  | total loss: [1m[32m0.69211[0m[0m | time: 66.520s
[2K| Adam | epoch: 001 | loss: 0.69211 - acc: 0.5135 -- iter: 16576/20250


Training Step: 260  | total loss: [1m[32m0.69089[0m[0m | time: 66.763s
[2K| Adam | epoch: 001 | loss: 0.69089 - acc: 0.5231 -- iter: 16640/20250


Training Step: 261  | total loss: [1m[32m0.69139[0m[0m | time: 67.012s
[2K| Adam | epoch: 001 | loss: 0.69139 - acc: 0.5239 -- iter: 16704/20250


Training Step: 262  | total loss: [1m[32m0.69204[0m[0m | time: 67.257s
[2K| Adam | epoch: 001 | loss: 0.69204 - acc: 0.5215 -- iter: 16768/20250


Training Step: 263  | total loss: [1m[32m0.69161[0m[0m | time: 67.502s
[2K| Adam | epoch: 001 | loss: 0.69161 - acc: 0.5225 -- iter: 16832/20250


Training Step: 264  | total loss: [1m[32m0.69213[0m[0m | time: 67.753s
[2K| Adam | epoch: 001 | loss: 0.69213 - acc: 0.5109 -- iter: 16896/20250


Training Step: 265  | total loss: [1m[32m0.69181[0m[0m | time: 68.009s
[2K| Adam | epoch: 001 | loss: 0.69181 - acc: 0.5114 -- iter: 16960/20250


Training Step: 266  | total loss: [1m[32m0.69239[0m[0m | time: 68.256s
[2K| Adam | epoch: 001 | loss: 0.69239 - acc: 0.5118 -- iter: 17024/20250


Training Step: 267  | total loss: [1m[32m0.69260[0m[0m | time: 68.503s
[2K| Adam | epoch: 001 | loss: 0.69260 - acc: 0.5090 -- iter: 17088/20250


Training Step: 268  | total loss: [1m[32m0.69275[0m[0m | time: 68.748s
[2K| Adam | epoch: 001 | loss: 0.69275 - acc: 0.5066 -- iter: 17152/20250


Training Step: 269  | total loss: [1m[32m0.69174[0m[0m | time: 68.996s
[2K| Adam | epoch: 001 | loss: 0.69174 - acc: 0.5184 -- iter: 17216/20250


Training Step: 270  | total loss: [1m[32m0.69135[0m[0m | time: 69.239s
[2K| Adam | epoch: 001 | loss: 0.69135 - acc: 0.5150 -- iter: 17280/20250


Training Step: 271  | total loss: [1m[32m0.69109[0m[0m | time: 69.491s
[2K| Adam | epoch: 001 | loss: 0.69109 - acc: 0.5135 -- iter: 17344/20250


Training Step: 272  | total loss: [1m[32m0.69045[0m[0m | time: 69.746s
[2K| Adam | epoch: 001 | loss: 0.69045 - acc: 0.5153 -- iter: 17408/20250


Training Step: 273  | total loss: [1m[32m0.69030[0m[0m | time: 69.983s
[2K| Adam | epoch: 001 | loss: 0.69030 - acc: 0.5169 -- iter: 17472/20250


Training Step: 274  | total loss: [1m[32m0.69121[0m[0m | time: 70.246s
[2K| Adam | epoch: 001 | loss: 0.69121 - acc: 0.5074 -- iter: 17536/20250


Training Step: 275  | total loss: [1m[32m0.69084[0m[0m | time: 70.510s
[2K| Adam | epoch: 001 | loss: 0.69084 - acc: 0.5098 -- iter: 17600/20250


Training Step: 276  | total loss: [1m[32m0.69045[0m[0m | time: 70.757s
[2K| Adam | epoch: 001 | loss: 0.69045 - acc: 0.5025 -- iter: 17664/20250


Training Step: 277  | total loss: [1m[32m0.69086[0m[0m | time: 71.012s
[2K| Adam | epoch: 001 | loss: 0.69086 - acc: 0.5054 -- iter: 17728/20250


Training Step: 278  | total loss: [1m[32m0.69039[0m[0m | time: 71.253s
[2K| Adam | epoch: 001 | loss: 0.69039 - acc: 0.5189 -- iter: 17792/20250


Training Step: 279  | total loss: [1m[32m0.68992[0m[0m | time: 71.501s
[2K| Adam | epoch: 001 | loss: 0.68992 - acc: 0.5249 -- iter: 17856/20250


Training Step: 280  | total loss: [1m[32m0.68870[0m[0m | time: 71.749s
[2K| Adam | epoch: 001 | loss: 0.68870 - acc: 0.5317 -- iter: 17920/20250


Training Step: 281  | total loss: [1m[32m0.68757[0m[0m | time: 71.992s
[2K| Adam | epoch: 001 | loss: 0.68757 - acc: 0.5395 -- iter: 17984/20250


Training Step: 282  | total loss: [1m[32m0.68690[0m[0m | time: 72.260s
[2K| Adam | epoch: 001 | loss: 0.68690 - acc: 0.5434 -- iter: 18048/20250


Training Step: 283  | total loss: [1m[32m0.68631[0m[0m | time: 72.502s
[2K| Adam | epoch: 001 | loss: 0.68631 - acc: 0.5547 -- iter: 18112/20250


Training Step: 284  | total loss: [1m[32m0.68625[0m[0m | time: 72.751s
[2K| Adam | epoch: 001 | loss: 0.68625 - acc: 0.5586 -- iter: 18176/20250


Training Step: 285  | total loss: [1m[32m0.68686[0m[0m | time: 73.004s
[2K| Adam | epoch: 001 | loss: 0.68686 - acc: 0.5543 -- iter: 18240/20250


Training Step: 286  | total loss: [1m[32m0.68767[0m[0m | time: 73.248s
[2K| Adam | epoch: 001 | loss: 0.68767 - acc: 0.5551 -- iter: 18304/20250


Training Step: 287  | total loss: [1m[32m0.68674[0m[0m | time: 73.509s
[2K| Adam | epoch: 001 | loss: 0.68674 - acc: 0.5558 -- iter: 18368/20250


Training Step: 288  | total loss: [1m[32m0.68410[0m[0m | time: 73.753s
[2K| Adam | epoch: 001 | loss: 0.68410 - acc: 0.5674 -- iter: 18432/20250


Training Step: 289  | total loss: [1m[32m0.67768[0m[0m | time: 73.994s
[2K| Adam | epoch: 001 | loss: 0.67768 - acc: 0.5857 -- iter: 18496/20250


Training Step: 290  | total loss: [1m[32m0.67456[0m[0m | time: 74.237s
[2K| Adam | epoch: 001 | loss: 0.67456 - acc: 0.5943 -- iter: 18560/20250


Training Step: 291  | total loss: [1m[32m0.66994[0m[0m | time: 74.485s
[2K| Adam | epoch: 001 | loss: 0.66994 - acc: 0.6021 -- iter: 18624/20250


Training Step: 292  | total loss: [1m[32m0.66135[0m[0m | time: 74.730s
[2K| Adam | epoch: 001 | loss: 0.66135 - acc: 0.6153 -- iter: 18688/20250


Training Step: 293  | total loss: [1m[32m0.66495[0m[0m | time: 74.969s
[2K| Adam | epoch: 001 | loss: 0.66495 - acc: 0.6210 -- iter: 18752/20250


Training Step: 294  | total loss: [1m[32m0.66359[0m[0m | time: 75.226s
[2K| Adam | epoch: 001 | loss: 0.66359 - acc: 0.6276 -- iter: 18816/20250


Training Step: 295  | total loss: [1m[32m0.67756[0m[0m | time: 75.475s
[2K| Adam | epoch: 001 | loss: 0.67756 - acc: 0.6180 -- iter: 18880/20250


Training Step: 296  | total loss: [1m[32m0.68378[0m[0m | time: 75.723s
[2K| Adam | epoch: 001 | loss: 0.68378 - acc: 0.6124 -- iter: 18944/20250


Training Step: 297  | total loss: [1m[32m0.68493[0m[0m | time: 75.958s
[2K| Adam | epoch: 001 | loss: 0.68493 - acc: 0.6043 -- iter: 19008/20250


Training Step: 298  | total loss: [1m[32m0.68563[0m[0m | time: 76.212s
[2K| Adam | epoch: 001 | loss: 0.68563 - acc: 0.5986 -- iter: 19072/20250


Training Step: 299  | total loss: [1m[32m0.68653[0m[0m | time: 76.458s
[2K| Adam | epoch: 001 | loss: 0.68653 - acc: 0.5903 -- iter: 19136/20250


Training Step: 300  | total loss: [1m[32m0.68968[0m[0m | time: 76.715s
[2K| Adam | epoch: 001 | loss: 0.68968 - acc: 0.5828 -- iter: 19200/20250


Training Step: 301  | total loss: [1m[32m0.68572[0m[0m | time: 76.963s
[2K| Adam | epoch: 001 | loss: 0.68572 - acc: 0.5855 -- iter: 19264/20250


Training Step: 302  | total loss: [1m[32m0.68270[0m[0m | time: 77.212s
[2K| Adam | epoch: 001 | loss: 0.68270 - acc: 0.5847 -- iter: 19328/20250


Training Step: 303  | total loss: [1m[32m0.68549[0m[0m | time: 77.466s
[2K| Adam | epoch: 001 | loss: 0.68549 - acc: 0.5747 -- iter: 19392/20250


Training Step: 304  | total loss: [1m[32m0.68628[0m[0m | time: 77.725s
[2K| Adam | epoch: 001 | loss: 0.68628 - acc: 0.5782 -- iter: 19456/20250


Training Step: 305  | total loss: [1m[32m0.68498[0m[0m | time: 77.976s
[2K| Adam | epoch: 001 | loss: 0.68498 - acc: 0.5766 -- iter: 19520/20250


Training Step: 306  | total loss: [1m[32m0.68444[0m[0m | time: 78.230s
[2K| Adam | epoch: 001 | loss: 0.68444 - acc: 0.5814 -- iter: 19584/20250


Training Step: 307  | total loss: [1m[32m0.68667[0m[0m | time: 78.476s
[2K| Adam | epoch: 001 | loss: 0.68667 - acc: 0.5780 -- iter: 19648/20250


Training Step: 308  | total loss: [1m[32m0.68708[0m[0m | time: 78.719s
[2K| Adam | epoch: 001 | loss: 0.68708 - acc: 0.5780 -- iter: 19712/20250


Training Step: 309  | total loss: [1m[32m0.68523[0m[0m | time: 78.959s
[2K| Adam | epoch: 001 | loss: 0.68523 - acc: 0.5843 -- iter: 19776/20250


Training Step: 310  | total loss: [1m[32m0.68150[0m[0m | time: 79.206s
[2K| Adam | epoch: 001 | loss: 0.68150 - acc: 0.5961 -- iter: 19840/20250


Training Step: 311  | total loss: [1m[32m0.67818[0m[0m | time: 79.453s
[2K| Adam | epoch: 001 | loss: 0.67818 - acc: 0.6037 -- iter: 19904/20250


Training Step: 312  | total loss: [1m[32m0.68015[0m[0m | time: 79.696s
[2K| Adam | epoch: 001 | loss: 0.68015 - acc: 0.5996 -- iter: 19968/20250


Training Step: 313  | total loss: [1m[32m0.68128[0m[0m | time: 79.954s
[2K| Adam | epoch: 001 | loss: 0.68128 - acc: 0.5912 -- iter: 20032/20250


Training Step: 314  | total loss: [1m[32m0.68088[0m[0m | time: 80.218s
[2K| Adam | epoch: 001 | loss: 0.68088 - acc: 0.5883 -- iter: 20096/20250


Training Step: 315  | total loss: [1m[32m0.67805[0m[0m | time: 80.456s
[2K| Adam | epoch: 001 | loss: 0.67805 - acc: 0.5920 -- iter: 20160/20250


Training Step: 316  | total loss: [1m[32m0.68196[0m[0m | time: 80.704s
[2K| Adam | epoch: 001 | loss: 0.68196 - acc: 0.5844 -- iter: 20224/20250


Training Step: 317  | total loss: [1m[32m0.68345[0m[0m | time: 83.047s
[2K| Adam | epoch: 001 | loss: 0.68345 - acc: 0.5790 | val_loss: 0.66612 - val_acc: 0.5996 -- iter: 20250/20250
--


Training Step: 318  | total loss: [1m[32m0.67903[0m[0m | time: 0.237s
[2K| Adam | epoch: 002 | loss: 0.67903 - acc: 0.5865 -- iter: 00064/20250


Training Step: 319  | total loss: [1m[32m0.67490[0m[0m | time: 0.488s
[2K| Adam | epoch: 002 | loss: 0.67490 - acc: 0.5971 -- iter: 00128/20250


Training Step: 320  | total loss: [1m[32m0.67359[0m[0m | time: 0.739s
[2K| Adam | epoch: 002 | loss: 0.67359 - acc: 0.5968 -- iter: 00192/20250


Training Step: 321  | total loss: [1m[32m0.66714[0m[0m | time: 0.987s
[2K| Adam | epoch: 002 | loss: 0.66714 - acc: 0.6105 -- iter: 00256/20250


Training Step: 322  | total loss: [1m[32m0.66663[0m[0m | time: 1.253s
[2K| Adam | epoch: 002 | loss: 0.66663 - acc: 0.6057 -- iter: 00320/20250


Training Step: 323  | total loss: [1m[32m0.66188[0m[0m | time: 1.507s
[2K| Adam | epoch: 002 | loss: 0.66188 - acc: 0.6155 -- iter: 00384/20250


Training Step: 324  | total loss: [1m[32m0.65748[0m[0m | time: 1.766s
[2K| Adam | epoch: 002 | loss: 0.65748 - acc: 0.6211 -- iter: 00448/20250


Training Step: 325  | total loss: [1m[32m0.64696[0m[0m | time: 2.022s
[2K| Adam | epoch: 002 | loss: 0.64696 - acc: 0.6356 -- iter: 00512/20250


Training Step: 326  | total loss: [1m[32m0.64417[0m[0m | time: 2.296s
[2K| Adam | epoch: 002 | loss: 0.64417 - acc: 0.6423 -- iter: 00576/20250


Training Step: 327  | total loss: [1m[32m0.64911[0m[0m | time: 2.539s
[2K| Adam | epoch: 002 | loss: 0.64911 - acc: 0.6375 -- iter: 00640/20250


Training Step: 328  | total loss: [1m[32m0.64472[0m[0m | time: 2.788s
[2K| Adam | epoch: 002 | loss: 0.64472 - acc: 0.6472 -- iter: 00704/20250


Training Step: 329  | total loss: [1m[32m0.64017[0m[0m | time: 3.043s
[2K| Adam | epoch: 002 | loss: 0.64017 - acc: 0.6590 -- iter: 00768/20250


Training Step: 330  | total loss: [1m[32m0.63721[0m[0m | time: 3.294s
[2K| Adam | epoch: 002 | loss: 0.63721 - acc: 0.6618 -- iter: 00832/20250


Training Step: 331  | total loss: [1m[32m0.62740[0m[0m | time: 3.546s
[2K| Adam | epoch: 002 | loss: 0.62740 - acc: 0.6707 -- iter: 00896/20250


Training Step: 332  | total loss: [1m[32m0.63370[0m[0m | time: 3.805s
[2K| Adam | epoch: 002 | loss: 0.63370 - acc: 0.6645 -- iter: 00960/20250


Training Step: 333  | total loss: [1m[32m0.63332[0m[0m | time: 4.039s
[2K| Adam | epoch: 002 | loss: 0.63332 - acc: 0.6653 -- iter: 01024/20250


Training Step: 334  | total loss: [1m[32m0.64343[0m[0m | time: 4.295s
[2K| Adam | epoch: 002 | loss: 0.64343 - acc: 0.6566 -- iter: 01088/20250


Training Step: 335  | total loss: [1m[32m0.64993[0m[0m | time: 4.555s
[2K| Adam | epoch: 002 | loss: 0.64993 - acc: 0.6503 -- iter: 01152/20250


Training Step: 336  | total loss: [1m[32m0.66030[0m[0m | time: 4.801s
[2K| Adam | epoch: 002 | loss: 0.66030 - acc: 0.6384 -- iter: 01216/20250


Training Step: 337  | total loss: [1m[32m0.66556[0m[0m | time: 5.043s
[2K| Adam | epoch: 002 | loss: 0.66556 - acc: 0.6339 -- iter: 01280/20250


Training Step: 338  | total loss: [1m[32m0.68141[0m[0m | time: 5.292s
[2K| Adam | epoch: 002 | loss: 0.68141 - acc: 0.6127 -- iter: 01344/20250


Training Step: 339  | total loss: [1m[32m0.68588[0m[0m | time: 5.535s
[2K| Adam | epoch: 002 | loss: 0.68588 - acc: 0.6030 -- iter: 01408/20250


Training Step: 340  | total loss: [1m[32m0.68391[0m[0m | time: 5.780s
[2K| Adam | epoch: 002 | loss: 0.68391 - acc: 0.6021 -- iter: 01472/20250


Training Step: 341  | total loss: [1m[32m0.68580[0m[0m | time: 6.029s
[2K| Adam | epoch: 002 | loss: 0.68580 - acc: 0.5919 -- iter: 01536/20250


Training Step: 342  | total loss: [1m[32m0.68470[0m[0m | time: 6.274s
[2K| Adam | epoch: 002 | loss: 0.68470 - acc: 0.5889 -- iter: 01600/20250


Training Step: 343  | total loss: [1m[32m0.68233[0m[0m | time: 6.519s
[2K| Adam | epoch: 002 | loss: 0.68233 - acc: 0.5878 -- iter: 01664/20250


Training Step: 344  | total loss: [1m[32m0.68515[0m[0m | time: 6.790s
[2K| Adam | epoch: 002 | loss: 0.68515 - acc: 0.5806 -- iter: 01728/20250


Training Step: 345  | total loss: [1m[32m0.68657[0m[0m | time: 7.051s
[2K| Adam | epoch: 002 | loss: 0.68657 - acc: 0.5726 -- iter: 01792/20250


Training Step: 346  | total loss: [1m[32m0.68531[0m[0m | time: 7.301s
[2K| Adam | epoch: 002 | loss: 0.68531 - acc: 0.5700 -- iter: 01856/20250


Training Step: 347  | total loss: [1m[32m0.68578[0m[0m | time: 7.543s
[2K| Adam | epoch: 002 | loss: 0.68578 - acc: 0.5708 -- iter: 01920/20250


Training Step: 348  | total loss: [1m[32m0.68839[0m[0m | time: 7.787s
[2K| Adam | epoch: 002 | loss: 0.68839 - acc: 0.5622 -- iter: 01984/20250


Training Step: 349  | total loss: [1m[32m0.69124[0m[0m | time: 8.033s
[2K| Adam | epoch: 002 | loss: 0.69124 - acc: 0.5513 -- iter: 02048/20250


Training Step: 350  | total loss: [1m[32m0.69122[0m[0m | time: 8.288s
[2K| Adam | epoch: 002 | loss: 0.69122 - acc: 0.5461 -- iter: 02112/20250


Training Step: 351  | total loss: [1m[32m0.68946[0m[0m | time: 8.539s
[2K| Adam | epoch: 002 | loss: 0.68946 - acc: 0.5478 -- iter: 02176/20250


Training Step: 352  | total loss: [1m[32m0.68962[0m[0m | time: 8.788s
[2K| Adam | epoch: 002 | loss: 0.68962 - acc: 0.5461 -- iter: 02240/20250


Training Step: 353  | total loss: [1m[32m0.69037[0m[0m | time: 9.032s
[2K| Adam | epoch: 002 | loss: 0.69037 - acc: 0.5446 -- iter: 02304/20250


Training Step: 354  | total loss: [1m[32m0.68548[0m[0m | time: 9.286s
[2K| Adam | epoch: 002 | loss: 0.68548 - acc: 0.5558 -- iter: 02368/20250


Training Step: 355  | total loss: [1m[32m0.68467[0m[0m | time: 9.545s
[2K| Adam | epoch: 002 | loss: 0.68467 - acc: 0.5565 -- iter: 02432/20250


Training Step: 356  | total loss: [1m[32m0.68658[0m[0m | time: 9.791s
[2K| Adam | epoch: 002 | loss: 0.68658 - acc: 0.5539 -- iter: 02496/20250


Training Step: 357  | total loss: [1m[32m0.68829[0m[0m | time: 10.042s
[2K| Adam | epoch: 002 | loss: 0.68829 - acc: 0.5501 -- iter: 02560/20250


Training Step: 358  | total loss: [1m[32m0.68877[0m[0m | time: 10.292s
[2K| Adam | epoch: 002 | loss: 0.68877 - acc: 0.5467 -- iter: 02624/20250


Training Step: 359  | total loss: [1m[32m0.68598[0m[0m | time: 10.545s
[2K| Adam | epoch: 002 | loss: 0.68598 - acc: 0.5561 -- iter: 02688/20250


Training Step: 360  | total loss: [1m[32m0.68418[0m[0m | time: 10.794s
[2K| Adam | epoch: 002 | loss: 0.68418 - acc: 0.5551 -- iter: 02752/20250


Training Step: 361  | total loss: [1m[32m0.68617[0m[0m | time: 11.037s
[2K| Adam | epoch: 002 | loss: 0.68617 - acc: 0.5481 -- iter: 02816/20250


Training Step: 362  | total loss: [1m[32m0.68838[0m[0m | time: 11.288s
[2K| Adam | epoch: 002 | loss: 0.68838 - acc: 0.5370 -- iter: 02880/20250


Training Step: 363  | total loss: [1m[32m0.68617[0m[0m | time: 11.541s
[2K| Adam | epoch: 002 | loss: 0.68617 - acc: 0.5333 -- iter: 02944/20250


Training Step: 364  | total loss: [1m[32m0.68673[0m[0m | time: 11.840s
[2K| Adam | epoch: 002 | loss: 0.68673 - acc: 0.5315 -- iter: 03008/20250


Training Step: 365  | total loss: [1m[32m0.68571[0m[0m | time: 12.106s
[2K| Adam | epoch: 002 | loss: 0.68571 - acc: 0.5346 -- iter: 03072/20250


Training Step: 366  | total loss: [1m[32m0.68425[0m[0m | time: 12.353s
[2K| Adam | epoch: 002 | loss: 0.68425 - acc: 0.5421 -- iter: 03136/20250


Training Step: 367  | total loss: [1m[32m0.68631[0m[0m | time: 12.613s
[2K| Adam | epoch: 002 | loss: 0.68631 - acc: 0.5395 -- iter: 03200/20250


Training Step: 368  | total loss: [1m[32m0.68792[0m[0m | time: 12.858s
[2K| Adam | epoch: 002 | loss: 0.68792 - acc: 0.5418 -- iter: 03264/20250


Training Step: 369  | total loss: [1m[32m0.69509[0m[0m | time: 13.117s
[2K| Adam | epoch: 002 | loss: 0.69509 - acc: 0.5220 -- iter: 03328/20250


Training Step: 370  | total loss: [1m[32m0.69673[0m[0m | time: 13.361s
[2K| Adam | epoch: 002 | loss: 0.69673 - acc: 0.5166 -- iter: 03392/20250


Training Step: 371  | total loss: [1m[32m0.69475[0m[0m | time: 13.615s
[2K| Adam | epoch: 002 | loss: 0.69475 - acc: 0.5181 -- iter: 03456/20250


Training Step: 372  | total loss: [1m[32m0.69410[0m[0m | time: 13.869s
[2K| Adam | epoch: 002 | loss: 0.69410 - acc: 0.5194 -- iter: 03520/20250


Training Step: 373  | total loss: [1m[32m0.69579[0m[0m | time: 14.141s
[2K| Adam | epoch: 002 | loss: 0.69579 - acc: 0.5128 -- iter: 03584/20250


Training Step: 374  | total loss: [1m[32m0.69433[0m[0m | time: 14.413s
[2K| Adam | epoch: 002 | loss: 0.69433 - acc: 0.5131 -- iter: 03648/20250


Training Step: 375  | total loss: [1m[32m0.69310[0m[0m | time: 14.656s
[2K| Adam | epoch: 002 | loss: 0.69310 - acc: 0.5180 -- iter: 03712/20250


Training Step: 376  | total loss: [1m[32m0.69339[0m[0m | time: 14.903s
[2K| Adam | epoch: 002 | loss: 0.69339 - acc: 0.5115 -- iter: 03776/20250


Training Step: 377  | total loss: [1m[32m0.69254[0m[0m | time: 15.149s
[2K| Adam | epoch: 002 | loss: 0.69254 - acc: 0.5135 -- iter: 03840/20250


Training Step: 378  | total loss: [1m[32m0.69380[0m[0m | time: 15.397s
[2K| Adam | epoch: 002 | loss: 0.69380 - acc: 0.5168 -- iter: 03904/20250


Training Step: 379  | total loss: [1m[32m0.69146[0m[0m | time: 15.647s
[2K| Adam | epoch: 002 | loss: 0.69146 - acc: 0.5230 -- iter: 03968/20250


Training Step: 380  | total loss: [1m[32m0.69002[0m[0m | time: 15.890s
[2K| Adam | epoch: 002 | loss: 0.69002 - acc: 0.5207 -- iter: 04032/20250


Training Step: 381  | total loss: [1m[32m0.68834[0m[0m | time: 16.136s
[2K| Adam | epoch: 002 | loss: 0.68834 - acc: 0.5217 -- iter: 04096/20250


Training Step: 382  | total loss: [1m[32m0.68901[0m[0m | time: 16.387s
[2K| Adam | epoch: 002 | loss: 0.68901 - acc: 0.5196 -- iter: 04160/20250


Training Step: 383  | total loss: [1m[32m0.69148[0m[0m | time: 16.638s
[2K| Adam | epoch: 002 | loss: 0.69148 - acc: 0.5113 -- iter: 04224/20250


Training Step: 384  | total loss: [1m[32m0.69006[0m[0m | time: 16.923s
[2K| Adam | epoch: 002 | loss: 0.69006 - acc: 0.5102 -- iter: 04288/20250


Training Step: 385  | total loss: [1m[32m0.68849[0m[0m | time: 17.165s
[2K| Adam | epoch: 002 | loss: 0.68849 - acc: 0.5123 -- iter: 04352/20250


Training Step: 386  | total loss: [1m[32m0.68637[0m[0m | time: 17.419s
[2K| Adam | epoch: 002 | loss: 0.68637 - acc: 0.5205 -- iter: 04416/20250


Training Step: 387  | total loss: [1m[32m0.68585[0m[0m | time: 17.668s
[2K| Adam | epoch: 002 | loss: 0.68585 - acc: 0.5184 -- iter: 04480/20250


Training Step: 388  | total loss: [1m[32m0.68505[0m[0m | time: 17.914s
[2K| Adam | epoch: 002 | loss: 0.68505 - acc: 0.5181 -- iter: 04544/20250


Training Step: 389  | total loss: [1m[32m0.69022[0m[0m | time: 18.165s
[2K| Adam | epoch: 002 | loss: 0.69022 - acc: 0.4991 -- iter: 04608/20250


Training Step: 390  | total loss: [1m[32m0.69115[0m[0m | time: 18.419s
[2K| Adam | epoch: 002 | loss: 0.69115 - acc: 0.5008 -- iter: 04672/20250


Training Step: 391  | total loss: [1m[32m0.69017[0m[0m | time: 18.669s
[2K| Adam | epoch: 002 | loss: 0.69017 - acc: 0.5070 -- iter: 04736/20250


Training Step: 392  | total loss: [1m[32m0.68732[0m[0m | time: 18.922s
[2K| Adam | epoch: 002 | loss: 0.68732 - acc: 0.5156 -- iter: 04800/20250


Training Step: 393  | total loss: [1m[32m0.68573[0m[0m | time: 19.192s
[2K| Adam | epoch: 002 | loss: 0.68573 - acc: 0.5219 -- iter: 04864/20250


Training Step: 394  | total loss: [1m[32m0.68293[0m[0m | time: 19.453s
[2K| Adam | epoch: 002 | loss: 0.68293 - acc: 0.5353 -- iter: 04928/20250


Training Step: 395  | total loss: [1m[32m0.68122[0m[0m | time: 19.703s
[2K| Adam | epoch: 002 | loss: 0.68122 - acc: 0.5349 -- iter: 04992/20250


Training Step: 396  | total loss: [1m[32m0.67902[0m[0m | time: 19.963s
[2K| Adam | epoch: 002 | loss: 0.67902 - acc: 0.5424 -- iter: 05056/20250


Training Step: 397  | total loss: [1m[32m0.68047[0m[0m | time: 20.209s
[2K| Adam | epoch: 002 | loss: 0.68047 - acc: 0.5350 -- iter: 05120/20250


Training Step: 398  | total loss: [1m[32m0.67944[0m[0m | time: 20.451s
[2K| Adam | epoch: 002 | loss: 0.67944 - acc: 0.5409 -- iter: 05184/20250


Training Step: 399  | total loss: [1m[32m0.67768[0m[0m | time: 20.710s
[2K| Adam | epoch: 002 | loss: 0.67768 - acc: 0.5383 -- iter: 05248/20250


Training Step: 400  | total loss: [1m[32m0.67763[0m[0m | time: 20.963s
[2K| Adam | epoch: 002 | loss: 0.67763 - acc: 0.5439 -- iter: 05312/20250


Training Step: 401  | total loss: [1m[32m0.67835[0m[0m | time: 21.209s
[2K| Adam | epoch: 002 | loss: 0.67835 - acc: 0.5489 -- iter: 05376/20250


Training Step: 402  | total loss: [1m[32m0.67898[0m[0m | time: 21.477s
[2K| Adam | epoch: 002 | loss: 0.67898 - acc: 0.5471 -- iter: 05440/20250


Training Step: 403  | total loss: [1m[32m0.68101[0m[0m | time: 21.731s
[2K| Adam | epoch: 002 | loss: 0.68101 - acc: 0.5408 -- iter: 05504/20250


Training Step: 404  | total loss: [1m[32m0.67980[0m[0m | time: 21.979s
[2K| Adam | epoch: 002 | loss: 0.67980 - acc: 0.5477 -- iter: 05568/20250


Training Step: 405  | total loss: [1m[32m0.67989[0m[0m | time: 22.227s
[2K| Adam | epoch: 002 | loss: 0.67989 - acc: 0.5445 -- iter: 05632/20250


Training Step: 406  | total loss: [1m[32m0.67741[0m[0m | time: 22.469s
[2K| Adam | epoch: 002 | loss: 0.67741 - acc: 0.5510 -- iter: 05696/20250


Training Step: 407  | total loss: [1m[32m0.67906[0m[0m | time: 22.719s
[2K| Adam | epoch: 002 | loss: 0.67906 - acc: 0.5521 -- iter: 05760/20250


Training Step: 408  | total loss: [1m[32m0.67723[0m[0m | time: 22.967s
[2K| Adam | epoch: 002 | loss: 0.67723 - acc: 0.5547 -- iter: 05824/20250


Training Step: 409  | total loss: [1m[32m0.67873[0m[0m | time: 23.216s
[2K| Adam | epoch: 002 | loss: 0.67873 - acc: 0.5477 -- iter: 05888/20250


Training Step: 410  | total loss: [1m[32m0.67676[0m[0m | time: 23.473s
[2K| Adam | epoch: 002 | loss: 0.67676 - acc: 0.5492 -- iter: 05952/20250


Training Step: 411  | total loss: [1m[32m0.67892[0m[0m | time: 23.719s
[2K| Adam | epoch: 002 | loss: 0.67892 - acc: 0.5411 -- iter: 06016/20250


Training Step: 412  | total loss: [1m[32m0.67718[0m[0m | time: 24.000s
[2K| Adam | epoch: 002 | loss: 0.67718 - acc: 0.5355 -- iter: 06080/20250


Training Step: 413  | total loss: [1m[32m0.68087[0m[0m | time: 24.252s
[2K| Adam | epoch: 002 | loss: 0.68087 - acc: 0.5366 -- iter: 06144/20250


Training Step: 414  | total loss: [1m[32m0.68141[0m[0m | time: 24.585s
[2K| Adam | epoch: 002 | loss: 0.68141 - acc: 0.5486 -- iter: 06208/20250


Training Step: 415  | total loss: [1m[32m0.68146[0m[0m | time: 24.828s
[2K| Adam | epoch: 002 | loss: 0.68146 - acc: 0.5390 -- iter: 06272/20250


Training Step: 416  | total loss: [1m[32m0.68087[0m[0m | time: 25.081s
[2K| Adam | epoch: 002 | loss: 0.68087 - acc: 0.5336 -- iter: 06336/20250


Training Step: 417  | total loss: [1m[32m0.67840[0m[0m | time: 25.344s
[2K| Adam | epoch: 002 | loss: 0.67840 - acc: 0.5333 -- iter: 06400/20250


Training Step: 418  | total loss: [1m[32m0.67620[0m[0m | time: 25.589s
[2K| Adam | epoch: 002 | loss: 0.67620 - acc: 0.5441 -- iter: 06464/20250


Training Step: 419  | total loss: [1m[32m0.67731[0m[0m | time: 25.851s
[2K| Adam | epoch: 002 | loss: 0.67731 - acc: 0.5459 -- iter: 06528/20250


Training Step: 420  | total loss: [1m[32m0.67731[0m[0m | time: 26.099s
[2K| Adam | epoch: 002 | loss: 0.67731 - acc: 0.5538 -- iter: 06592/20250


Training Step: 421  | total loss: [1m[32m0.68169[0m[0m | time: 26.359s
[2K| Adam | epoch: 002 | loss: 0.68169 - acc: 0.5422 -- iter: 06656/20250


Training Step: 422  | total loss: [1m[32m0.68561[0m[0m | time: 26.634s
[2K| Adam | epoch: 002 | loss: 0.68561 - acc: 0.5364 -- iter: 06720/20250


Training Step: 423  | total loss: [1m[32m0.68288[0m[0m | time: 26.902s
[2K| Adam | epoch: 002 | loss: 0.68288 - acc: 0.5499 -- iter: 06784/20250


Training Step: 424  | total loss: [1m[32m0.68292[0m[0m | time: 27.154s
[2K| Adam | epoch: 002 | loss: 0.68292 - acc: 0.5590 -- iter: 06848/20250


Training Step: 425  | total loss: [1m[32m0.68587[0m[0m | time: 27.398s
[2K| Adam | epoch: 002 | loss: 0.68587 - acc: 0.5562 -- iter: 06912/20250


Training Step: 426  | total loss: [1m[32m0.68667[0m[0m | time: 27.646s
[2K| Adam | epoch: 002 | loss: 0.68667 - acc: 0.5506 -- iter: 06976/20250


Training Step: 427  | total loss: [1m[32m0.68986[0m[0m | time: 27.911s
[2K| Adam | epoch: 002 | loss: 0.68986 - acc: 0.5549 -- iter: 07040/20250


Training Step: 428  | total loss: [1m[32m0.68808[0m[0m | time: 28.164s
[2K| Adam | epoch: 002 | loss: 0.68808 - acc: 0.5479 -- iter: 07104/20250


Training Step: 429  | total loss: [1m[32m0.68994[0m[0m | time: 28.416s
[2K| Adam | epoch: 002 | loss: 0.68994 - acc: 0.5400 -- iter: 07168/20250


Training Step: 430  | total loss: [1m[32m0.68759[0m[0m | time: 28.664s
[2K| Adam | epoch: 002 | loss: 0.68759 - acc: 0.5391 -- iter: 07232/20250


Training Step: 431  | total loss: [1m[32m0.68864[0m[0m | time: 28.930s
[2K| Adam | epoch: 002 | loss: 0.68864 - acc: 0.5383 -- iter: 07296/20250


Training Step: 432  | total loss: [1m[32m0.68846[0m[0m | time: 29.180s
[2K| Adam | epoch: 002 | loss: 0.68846 - acc: 0.5360 -- iter: 07360/20250


Training Step: 433  | total loss: [1m[32m0.68690[0m[0m | time: 29.446s
[2K| Adam | epoch: 002 | loss: 0.68690 - acc: 0.5434 -- iter: 07424/20250


Training Step: 434  | total loss: [1m[32m0.68841[0m[0m | time: 29.693s
[2K| Adam | epoch: 002 | loss: 0.68841 - acc: 0.5375 -- iter: 07488/20250


Training Step: 435  | total loss: [1m[32m0.69104[0m[0m | time: 29.942s
[2K| Adam | epoch: 002 | loss: 0.69104 - acc: 0.5353 -- iter: 07552/20250


Training Step: 436  | total loss: [1m[32m0.68969[0m[0m | time: 30.200s
[2K| Adam | epoch: 002 | loss: 0.68969 - acc: 0.5364 -- iter: 07616/20250


Training Step: 437  | total loss: [1m[32m0.68593[0m[0m | time: 30.444s
[2K| Adam | epoch: 002 | loss: 0.68593 - acc: 0.5453 -- iter: 07680/20250


Training Step: 438  | total loss: [1m[32m0.68941[0m[0m | time: 30.691s
[2K| Adam | epoch: 002 | loss: 0.68941 - acc: 0.5376 -- iter: 07744/20250


Training Step: 439  | total loss: [1m[32m0.68955[0m[0m | time: 30.933s
[2K| Adam | epoch: 002 | loss: 0.68955 - acc: 0.5276 -- iter: 07808/20250


Training Step: 440  | total loss: [1m[32m0.68701[0m[0m | time: 31.184s
[2K| Adam | epoch: 002 | loss: 0.68701 - acc: 0.5374 -- iter: 07872/20250


Training Step: 441  | total loss: [1m[32m0.68371[0m[0m | time: 31.459s
[2K| Adam | epoch: 002 | loss: 0.68371 - acc: 0.5508 -- iter: 07936/20250


Training Step: 442  | total loss: [1m[32m0.68387[0m[0m | time: 31.723s
[2K| Adam | epoch: 002 | loss: 0.68387 - acc: 0.5442 -- iter: 08000/20250


Training Step: 443  | total loss: [1m[32m0.68384[0m[0m | time: 31.985s
[2K| Adam | epoch: 002 | loss: 0.68384 - acc: 0.5444 -- iter: 08064/20250


Training Step: 444  | total loss: [1m[32m0.68220[0m[0m | time: 32.235s
[2K| Adam | epoch: 002 | loss: 0.68220 - acc: 0.5462 -- iter: 08128/20250


Training Step: 445  | total loss: [1m[32m0.68432[0m[0m | time: 32.479s
[2K| Adam | epoch: 002 | loss: 0.68432 - acc: 0.5432 -- iter: 08192/20250


Training Step: 446  | total loss: [1m[32m0.68529[0m[0m | time: 32.736s
[2K| Adam | epoch: 002 | loss: 0.68529 - acc: 0.5451 -- iter: 08256/20250


Training Step: 447  | total loss: [1m[32m0.68287[0m[0m | time: 32.985s
[2K| Adam | epoch: 002 | loss: 0.68287 - acc: 0.5484 -- iter: 08320/20250


Training Step: 448  | total loss: [1m[32m0.68624[0m[0m | time: 33.236s
[2K| Adam | epoch: 002 | loss: 0.68624 - acc: 0.5405 -- iter: 08384/20250


Training Step: 449  | total loss: [1m[32m0.68472[0m[0m | time: 33.496s
[2K| Adam | epoch: 002 | loss: 0.68472 - acc: 0.5489 -- iter: 08448/20250


Training Step: 450  | total loss: [1m[32m0.68137[0m[0m | time: 33.744s
[2K| Adam | epoch: 002 | loss: 0.68137 - acc: 0.5596 -- iter: 08512/20250


Training Step: 451  | total loss: [1m[32m0.68396[0m[0m | time: 34.002s
[2K| Adam | epoch: 002 | loss: 0.68396 - acc: 0.5568 -- iter: 08576/20250


Training Step: 452  | total loss: [1m[32m0.68391[0m[0m | time: 34.282s
[2K| Adam | epoch: 002 | loss: 0.68391 - acc: 0.5542 -- iter: 08640/20250


Training Step: 453  | total loss: [1m[32m0.68529[0m[0m | time: 34.532s
[2K| Adam | epoch: 002 | loss: 0.68529 - acc: 0.5488 -- iter: 08704/20250


Training Step: 454  | total loss: [1m[32m0.68740[0m[0m | time: 34.771s
[2K| Adam | epoch: 002 | loss: 0.68740 - acc: 0.5533 -- iter: 08768/20250


Training Step: 455  | total loss: [1m[32m0.68635[0m[0m | time: 35.014s
[2K| Adam | epoch: 002 | loss: 0.68635 - acc: 0.5542 -- iter: 08832/20250


Training Step: 456  | total loss: [1m[32m0.68397[0m[0m | time: 35.255s
[2K| Adam | epoch: 002 | loss: 0.68397 - acc: 0.5566 -- iter: 08896/20250


Training Step: 457  | total loss: [1m[32m0.68560[0m[0m | time: 35.496s
[2K| Adam | epoch: 002 | loss: 0.68560 - acc: 0.5572 -- iter: 08960/20250


Training Step: 458  | total loss: [1m[32m0.68118[0m[0m | time: 35.747s
[2K| Adam | epoch: 002 | loss: 0.68118 - acc: 0.5687 -- iter: 09024/20250


Training Step: 459  | total loss: [1m[32m0.68217[0m[0m | time: 35.998s
[2K| Adam | epoch: 002 | loss: 0.68217 - acc: 0.5634 -- iter: 09088/20250


Training Step: 460  | total loss: [1m[32m0.68219[0m[0m | time: 36.246s
[2K| Adam | epoch: 002 | loss: 0.68219 - acc: 0.5586 -- iter: 09152/20250


Training Step: 461  | total loss: [1m[32m0.68333[0m[0m | time: 36.505s
[2K| Adam | epoch: 002 | loss: 0.68333 - acc: 0.5512 -- iter: 09216/20250


Training Step: 462  | total loss: [1m[32m0.68374[0m[0m | time: 36.769s
[2K| Adam | epoch: 002 | loss: 0.68374 - acc: 0.5445 -- iter: 09280/20250


Training Step: 463  | total loss: [1m[32m0.68321[0m[0m | time: 37.022s
[2K| Adam | epoch: 002 | loss: 0.68321 - acc: 0.5369 -- iter: 09344/20250


Training Step: 464  | total loss: [1m[32m0.68175[0m[0m | time: 37.289s
[2K| Adam | epoch: 002 | loss: 0.68175 - acc: 0.5364 -- iter: 09408/20250


Training Step: 465  | total loss: [1m[32m0.68156[0m[0m | time: 37.553s
[2K| Adam | epoch: 002 | loss: 0.68156 - acc: 0.5421 -- iter: 09472/20250


Training Step: 466  | total loss: [1m[32m0.67944[0m[0m | time: 37.804s
[2K| Adam | epoch: 002 | loss: 0.67944 - acc: 0.5394 -- iter: 09536/20250


Training Step: 467  | total loss: [1m[32m0.68251[0m[0m | time: 38.060s
[2K| Adam | epoch: 002 | loss: 0.68251 - acc: 0.5355 -- iter: 09600/20250


Training Step: 468  | total loss: [1m[32m0.68327[0m[0m | time: 38.307s
[2K| Adam | epoch: 002 | loss: 0.68327 - acc: 0.5382 -- iter: 09664/20250


Training Step: 469  | total loss: [1m[32m0.68482[0m[0m | time: 38.550s
[2K| Adam | epoch: 002 | loss: 0.68482 - acc: 0.5359 -- iter: 09728/20250


Training Step: 470  | total loss: [1m[32m0.68305[0m[0m | time: 38.792s
[2K| Adam | epoch: 002 | loss: 0.68305 - acc: 0.5417 -- iter: 09792/20250


Training Step: 471  | total loss: [1m[32m0.68368[0m[0m | time: 39.042s
[2K| Adam | epoch: 002 | loss: 0.68368 - acc: 0.5469 -- iter: 09856/20250


Training Step: 472  | total loss: [1m[32m0.68175[0m[0m | time: 39.284s
[2K| Adam | epoch: 002 | loss: 0.68175 - acc: 0.5516 -- iter: 09920/20250


Training Step: 473  | total loss: [1m[32m0.68055[0m[0m | time: 39.529s
[2K| Adam | epoch: 002 | loss: 0.68055 - acc: 0.5574 -- iter: 09984/20250


Training Step: 474  | total loss: [1m[32m0.67953[0m[0m | time: 39.797s
[2K| Adam | epoch: 002 | loss: 0.67953 - acc: 0.5610 -- iter: 10048/20250


Training Step: 475  | total loss: [1m[32m0.67898[0m[0m | time: 40.046s
[2K| Adam | epoch: 002 | loss: 0.67898 - acc: 0.5580 -- iter: 10112/20250


Training Step: 476  | total loss: [1m[32m0.67830[0m[0m | time: 40.288s
[2K| Adam | epoch: 002 | loss: 0.67830 - acc: 0.5554 -- iter: 10176/20250


Training Step: 477  | total loss: [1m[32m0.67905[0m[0m | time: 40.532s
[2K| Adam | epoch: 002 | loss: 0.67905 - acc: 0.5483 -- iter: 10240/20250


Training Step: 478  | total loss: [1m[32m0.67705[0m[0m | time: 40.779s
[2K| Adam | epoch: 002 | loss: 0.67705 - acc: 0.5497 -- iter: 10304/20250


Training Step: 479  | total loss: [1m[32m0.67919[0m[0m | time: 41.022s
[2K| Adam | epoch: 002 | loss: 0.67919 - acc: 0.5353 -- iter: 10368/20250


Training Step: 480  | total loss: [1m[32m0.67724[0m[0m | time: 41.263s
[2K| Adam | epoch: 002 | loss: 0.67724 - acc: 0.5412 -- iter: 10432/20250


Training Step: 481  | total loss: [1m[32m0.67779[0m[0m | time: 41.518s
[2K| Adam | epoch: 002 | loss: 0.67779 - acc: 0.5433 -- iter: 10496/20250


Training Step: 482  | total loss: [1m[32m0.67927[0m[0m | time: 41.787s
[2K| Adam | epoch: 002 | loss: 0.67927 - acc: 0.5421 -- iter: 10560/20250


Training Step: 483  | total loss: [1m[32m0.67928[0m[0m | time: 42.040s
[2K| Adam | epoch: 002 | loss: 0.67928 - acc: 0.5457 -- iter: 10624/20250


Training Step: 484  | total loss: [1m[32m0.67907[0m[0m | time: 42.303s
[2K| Adam | epoch: 002 | loss: 0.67907 - acc: 0.5443 -- iter: 10688/20250


Training Step: 485  | total loss: [1m[32m0.67900[0m[0m | time: 42.553s
[2K| Adam | epoch: 002 | loss: 0.67900 - acc: 0.5492 -- iter: 10752/20250


Training Step: 486  | total loss: [1m[32m0.67758[0m[0m | time: 42.803s
[2K| Adam | epoch: 002 | loss: 0.67758 - acc: 0.5474 -- iter: 10816/20250


Training Step: 487  | total loss: [1m[32m0.67565[0m[0m | time: 43.050s
[2K| Adam | epoch: 002 | loss: 0.67565 - acc: 0.5567 -- iter: 10880/20250


Training Step: 488  | total loss: [1m[32m0.67466[0m[0m | time: 43.299s
[2K| Adam | epoch: 002 | loss: 0.67466 - acc: 0.5683 -- iter: 10944/20250


Training Step: 489  | total loss: [1m[32m0.67586[0m[0m | time: 43.549s
[2K| Adam | epoch: 002 | loss: 0.67586 - acc: 0.5630 -- iter: 11008/20250


Training Step: 490  | total loss: [1m[32m0.67534[0m[0m | time: 43.802s
[2K| Adam | epoch: 002 | loss: 0.67534 - acc: 0.5661 -- iter: 11072/20250


Training Step: 491  | total loss: [1m[32m0.67314[0m[0m | time: 44.050s
[2K| Adam | epoch: 002 | loss: 0.67314 - acc: 0.5751 -- iter: 11136/20250


Training Step: 492  | total loss: [1m[32m0.67325[0m[0m | time: 44.294s
[2K| Adam | epoch: 002 | loss: 0.67325 - acc: 0.5723 -- iter: 11200/20250


Training Step: 493  | total loss: [1m[32m0.67221[0m[0m | time: 44.554s
[2K| Adam | epoch: 002 | loss: 0.67221 - acc: 0.5728 -- iter: 11264/20250


Training Step: 494  | total loss: [1m[32m0.66834[0m[0m | time: 44.821s
[2K| Adam | epoch: 002 | loss: 0.66834 - acc: 0.5765 -- iter: 11328/20250


Training Step: 495  | total loss: [1m[32m0.67554[0m[0m | time: 45.078s
[2K| Adam | epoch: 002 | loss: 0.67554 - acc: 0.5751 -- iter: 11392/20250


Training Step: 496  | total loss: [1m[32m0.67856[0m[0m | time: 45.324s
[2K| Adam | epoch: 002 | loss: 0.67856 - acc: 0.5645 -- iter: 11456/20250


Training Step: 497  | total loss: [1m[32m0.68251[0m[0m | time: 45.573s
[2K| Adam | epoch: 002 | loss: 0.68251 - acc: 0.5565 -- iter: 11520/20250


Training Step: 498  | total loss: [1m[32m0.68010[0m[0m | time: 45.827s
[2K| Adam | epoch: 002 | loss: 0.68010 - acc: 0.5602 -- iter: 11584/20250


Training Step: 499  | total loss: [1m[32m0.68293[0m[0m | time: 46.072s
[2K| Adam | epoch: 002 | loss: 0.68293 - acc: 0.5526 -- iter: 11648/20250


Training Step: 500  | total loss: [1m[32m0.68559[0m[0m | time: 46.315s
[2K| Adam | epoch: 002 | loss: 0.68559 - acc: 0.5427 -- iter: 11712/20250


Training Step: 501  | total loss: [1m[32m0.68543[0m[0m | time: 46.567s
[2K| Adam | epoch: 002 | loss: 0.68543 - acc: 0.5431 -- iter: 11776/20250


Training Step: 502  | total loss: [1m[32m0.68526[0m[0m | time: 46.814s
[2K| Adam | epoch: 002 | loss: 0.68526 - acc: 0.5372 -- iter: 11840/20250


Training Step: 503  | total loss: [1m[32m0.68302[0m[0m | time: 47.068s
[2K| Adam | epoch: 002 | loss: 0.68302 - acc: 0.5491 -- iter: 11904/20250


Training Step: 504  | total loss: [1m[32m0.68304[0m[0m | time: 47.325s
[2K| Adam | epoch: 002 | loss: 0.68304 - acc: 0.5551 -- iter: 11968/20250


Training Step: 505  | total loss: [1m[32m0.68371[0m[0m | time: 47.574s
[2K| Adam | epoch: 002 | loss: 0.68371 - acc: 0.5528 -- iter: 12032/20250


Training Step: 506  | total loss: [1m[32m0.68391[0m[0m | time: 47.822s
[2K| Adam | epoch: 002 | loss: 0.68391 - acc: 0.5584 -- iter: 12096/20250


Training Step: 507  | total loss: [1m[32m0.68607[0m[0m | time: 48.075s
[2K| Adam | epoch: 002 | loss: 0.68607 - acc: 0.5573 -- iter: 12160/20250


Training Step: 508  | total loss: [1m[32m0.68797[0m[0m | time: 48.317s
[2K| Adam | epoch: 002 | loss: 0.68797 - acc: 0.5468 -- iter: 12224/20250


Training Step: 509  | total loss: [1m[32m0.68900[0m[0m | time: 48.561s
[2K| Adam | epoch: 002 | loss: 0.68900 - acc: 0.5422 -- iter: 12288/20250


Training Step: 510  | total loss: [1m[32m0.68819[0m[0m | time: 48.822s
[2K| Adam | epoch: 002 | loss: 0.68819 - acc: 0.5473 -- iter: 12352/20250


Training Step: 511  | total loss: [1m[32m0.68916[0m[0m | time: 49.059s
[2K| Adam | epoch: 002 | loss: 0.68916 - acc: 0.5488 -- iter: 12416/20250


Training Step: 512  | total loss: [1m[32m0.69339[0m[0m | time: 49.305s
[2K| Adam | epoch: 002 | loss: 0.69339 - acc: 0.5440 -- iter: 12480/20250


Training Step: 513  | total loss: [1m[32m0.69222[0m[0m | time: 49.567s
[2K| Adam | epoch: 002 | loss: 0.69222 - acc: 0.5442 -- iter: 12544/20250


Training Step: 514  | total loss: [1m[32m0.69185[0m[0m | time: 49.816s
[2K| Adam | epoch: 002 | loss: 0.69185 - acc: 0.5461 -- iter: 12608/20250


Training Step: 515  | total loss: [1m[32m0.69257[0m[0m | time: 50.067s
[2K| Adam | epoch: 002 | loss: 0.69257 - acc: 0.5383 -- iter: 12672/20250


Training Step: 516  | total loss: [1m[32m0.69086[0m[0m | time: 50.312s
[2K| Adam | epoch: 002 | loss: 0.69086 - acc: 0.5392 -- iter: 12736/20250


Training Step: 517  | total loss: [1m[32m0.68852[0m[0m | time: 50.565s
[2K| Adam | epoch: 002 | loss: 0.68852 - acc: 0.5400 -- iter: 12800/20250


Training Step: 518  | total loss: [1m[32m0.69054[0m[0m | time: 50.817s
[2K| Adam | epoch: 002 | loss: 0.69054 - acc: 0.5438 -- iter: 12864/20250


Training Step: 519  | total loss: [1m[32m0.69034[0m[0m | time: 51.070s
[2K| Adam | epoch: 002 | loss: 0.69034 - acc: 0.5316 -- iter: 12928/20250


Training Step: 520  | total loss: [1m[32m0.68945[0m[0m | time: 51.317s
[2K| Adam | epoch: 002 | loss: 0.68945 - acc: 0.5347 -- iter: 12992/20250


Training Step: 521  | total loss: [1m[32m0.68933[0m[0m | time: 51.565s
[2K| Adam | epoch: 002 | loss: 0.68933 - acc: 0.5375 -- iter: 13056/20250


Training Step: 522  | total loss: [1m[32m0.68700[0m[0m | time: 51.823s
[2K| Adam | epoch: 002 | loss: 0.68700 - acc: 0.5400 -- iter: 13120/20250


Training Step: 523  | total loss: [1m[32m0.68652[0m[0m | time: 52.079s
[2K| Adam | epoch: 002 | loss: 0.68652 - acc: 0.5360 -- iter: 13184/20250


Training Step: 524  | total loss: [1m[32m0.68335[0m[0m | time: 52.326s
[2K| Adam | epoch: 002 | loss: 0.68335 - acc: 0.5496 -- iter: 13248/20250


Training Step: 525  | total loss: [1m[32m0.68484[0m[0m | time: 52.583s
[2K| Adam | epoch: 002 | loss: 0.68484 - acc: 0.5462 -- iter: 13312/20250


Training Step: 526  | total loss: [1m[32m0.68247[0m[0m | time: 52.828s
[2K| Adam | epoch: 002 | loss: 0.68247 - acc: 0.5494 -- iter: 13376/20250


Training Step: 527  | total loss: [1m[32m0.67985[0m[0m | time: 53.074s
[2K| Adam | epoch: 002 | loss: 0.67985 - acc: 0.5554 -- iter: 13440/20250


Training Step: 528  | total loss: [1m[32m0.67874[0m[0m | time: 53.327s
[2K| Adam | epoch: 002 | loss: 0.67874 - acc: 0.5514 -- iter: 13504/20250


Training Step: 529  | total loss: [1m[32m0.67911[0m[0m | time: 53.577s
[2K| Adam | epoch: 002 | loss: 0.67911 - acc: 0.5494 -- iter: 13568/20250


Training Step: 530  | total loss: [1m[32m0.68325[0m[0m | time: 53.825s
[2K| Adam | epoch: 002 | loss: 0.68325 - acc: 0.5413 -- iter: 13632/20250


Training Step: 531  | total loss: [1m[32m0.68473[0m[0m | time: 54.075s
[2K| Adam | epoch: 002 | loss: 0.68473 - acc: 0.5387 -- iter: 13696/20250


Training Step: 532  | total loss: [1m[32m0.68335[0m[0m | time: 54.333s
[2K| Adam | epoch: 002 | loss: 0.68335 - acc: 0.5411 -- iter: 13760/20250


Training Step: 533  | total loss: [1m[32m0.68184[0m[0m | time: 54.622s
[2K| Adam | epoch: 002 | loss: 0.68184 - acc: 0.5479 -- iter: 13824/20250


Training Step: 534  | total loss: [1m[32m0.68057[0m[0m | time: 54.882s
[2K| Adam | epoch: 002 | loss: 0.68057 - acc: 0.5494 -- iter: 13888/20250


Training Step: 535  | total loss: [1m[32m0.67949[0m[0m | time: 55.133s
[2K| Adam | epoch: 002 | loss: 0.67949 - acc: 0.5523 -- iter: 13952/20250


Training Step: 536  | total loss: [1m[32m0.67998[0m[0m | time: 55.389s
[2K| Adam | epoch: 002 | loss: 0.67998 - acc: 0.5502 -- iter: 14016/20250


Training Step: 537  | total loss: [1m[32m0.67606[0m[0m | time: 55.642s
[2K| Adam | epoch: 002 | loss: 0.67606 - acc: 0.5592 -- iter: 14080/20250


Training Step: 538  | total loss: [1m[32m0.67516[0m[0m | time: 55.892s
[2K| Adam | epoch: 002 | loss: 0.67516 - acc: 0.5642 -- iter: 14144/20250


Training Step: 539  | total loss: [1m[32m0.67224[0m[0m | time: 56.141s
[2K| Adam | epoch: 002 | loss: 0.67224 - acc: 0.5719 -- iter: 14208/20250


Training Step: 540  | total loss: [1m[32m0.67192[0m[0m | time: 56.388s
[2K| Adam | epoch: 002 | loss: 0.67192 - acc: 0.5741 -- iter: 14272/20250


Training Step: 541  | total loss: [1m[32m0.67416[0m[0m | time: 56.641s
[2K| Adam | epoch: 002 | loss: 0.67416 - acc: 0.5651 -- iter: 14336/20250


Training Step: 542  | total loss: [1m[32m0.67034[0m[0m | time: 56.899s
[2K| Adam | epoch: 002 | loss: 0.67034 - acc: 0.5695 -- iter: 14400/20250


Training Step: 543  | total loss: [1m[32m0.67082[0m[0m | time: 57.164s
[2K| Adam | epoch: 002 | loss: 0.67082 - acc: 0.5657 -- iter: 14464/20250


Training Step: 544  | total loss: [1m[32m0.67130[0m[0m | time: 57.419s
[2K| Adam | epoch: 002 | loss: 0.67130 - acc: 0.5669 -- iter: 14528/20250


Training Step: 545  | total loss: [1m[32m0.67356[0m[0m | time: 57.678s
[2K| Adam | epoch: 002 | loss: 0.67356 - acc: 0.5649 -- iter: 14592/20250


Training Step: 546  | total loss: [1m[32m0.67313[0m[0m | time: 57.935s
[2K| Adam | epoch: 002 | loss: 0.67313 - acc: 0.5647 -- iter: 14656/20250


Training Step: 547  | total loss: [1m[32m0.67310[0m[0m | time: 58.187s
[2K| Adam | epoch: 002 | loss: 0.67310 - acc: 0.5629 -- iter: 14720/20250


Training Step: 548  | total loss: [1m[32m0.67050[0m[0m | time: 58.443s
[2K| Adam | epoch: 002 | loss: 0.67050 - acc: 0.5644 -- iter: 14784/20250


Training Step: 549  | total loss: [1m[32m0.66973[0m[0m | time: 58.694s
[2K| Adam | epoch: 002 | loss: 0.66973 - acc: 0.5658 -- iter: 14848/20250


Training Step: 550  | total loss: [1m[32m0.66758[0m[0m | time: 58.946s
[2K| Adam | epoch: 002 | loss: 0.66758 - acc: 0.5686 -- iter: 14912/20250


Training Step: 551  | total loss: [1m[32m0.66639[0m[0m | time: 59.220s
[2K| Adam | epoch: 002 | loss: 0.66639 - acc: 0.5742 -- iter: 14976/20250


Training Step: 552  | total loss: [1m[32m0.66650[0m[0m | time: 59.471s
[2K| Adam | epoch: 002 | loss: 0.66650 - acc: 0.5793 -- iter: 15040/20250


Training Step: 553  | total loss: [1m[32m0.66261[0m[0m | time: 59.734s
[2K| Adam | epoch: 002 | loss: 0.66261 - acc: 0.5933 -- iter: 15104/20250


Training Step: 554  | total loss: [1m[32m0.66235[0m[0m | time: 59.980s
[2K| Adam | epoch: 002 | loss: 0.66235 - acc: 0.5964 -- iter: 15168/20250


Training Step: 555  | total loss: [1m[32m0.66343[0m[0m | time: 60.229s
[2K| Adam | epoch: 002 | loss: 0.66343 - acc: 0.5930 -- iter: 15232/20250


Training Step: 556  | total loss: [1m[32m0.66303[0m[0m | time: 60.476s
[2K| Adam | epoch: 002 | loss: 0.66303 - acc: 0.5947 -- iter: 15296/20250


Training Step: 557  | total loss: [1m[32m0.66044[0m[0m | time: 60.738s
[2K| Adam | epoch: 002 | loss: 0.66044 - acc: 0.5977 -- iter: 15360/20250


Training Step: 558  | total loss: [1m[32m0.65928[0m[0m | time: 60.988s
[2K| Adam | epoch: 002 | loss: 0.65928 - acc: 0.6020 -- iter: 15424/20250


Training Step: 559  | total loss: [1m[32m0.65821[0m[0m | time: 61.239s
[2K| Adam | epoch: 002 | loss: 0.65821 - acc: 0.6105 -- iter: 15488/20250


Training Step: 560  | total loss: [1m[32m0.65570[0m[0m | time: 61.489s
[2K| Adam | epoch: 002 | loss: 0.65570 - acc: 0.6151 -- iter: 15552/20250


Training Step: 561  | total loss: [1m[32m0.65307[0m[0m | time: 61.767s
[2K| Adam | epoch: 002 | loss: 0.65307 - acc: 0.6208 -- iter: 15616/20250


Training Step: 562  | total loss: [1m[32m0.64516[0m[0m | time: 62.047s
[2K| Adam | epoch: 002 | loss: 0.64516 - acc: 0.6337 -- iter: 15680/20250


Training Step: 563  | total loss: [1m[32m0.64052[0m[0m | time: 62.290s
[2K| Adam | epoch: 002 | loss: 0.64052 - acc: 0.6438 -- iter: 15744/20250


Training Step: 564  | total loss: [1m[32m0.64551[0m[0m | time: 62.539s
[2K| Adam | epoch: 002 | loss: 0.64551 - acc: 0.6450 -- iter: 15808/20250


Training Step: 565  | total loss: [1m[32m0.63963[0m[0m | time: 62.791s
[2K| Adam | epoch: 002 | loss: 0.63963 - acc: 0.6508 -- iter: 15872/20250


Training Step: 566  | total loss: [1m[32m0.62988[0m[0m | time: 63.040s
[2K| Adam | epoch: 002 | loss: 0.62988 - acc: 0.6608 -- iter: 15936/20250


Training Step: 567  | total loss: [1m[32m0.62494[0m[0m | time: 63.297s
[2K| Adam | epoch: 002 | loss: 0.62494 - acc: 0.6650 -- iter: 16000/20250


Training Step: 568  | total loss: [1m[32m0.61836[0m[0m | time: 63.543s
[2K| Adam | epoch: 002 | loss: 0.61836 - acc: 0.6719 -- iter: 16064/20250


Training Step: 569  | total loss: [1m[32m0.62836[0m[0m | time: 63.802s
[2K| Adam | epoch: 002 | loss: 0.62836 - acc: 0.6625 -- iter: 16128/20250


Training Step: 570  | total loss: [1m[32m0.63072[0m[0m | time: 64.053s
[2K| Adam | epoch: 002 | loss: 0.63072 - acc: 0.6666 -- iter: 16192/20250


Training Step: 571  | total loss: [1m[32m0.62884[0m[0m | time: 64.321s
[2K| Adam | epoch: 002 | loss: 0.62884 - acc: 0.6703 -- iter: 16256/20250


Training Step: 572  | total loss: [1m[32m0.64690[0m[0m | time: 64.577s
[2K| Adam | epoch: 002 | loss: 0.64690 - acc: 0.6548 -- iter: 16320/20250


Training Step: 573  | total loss: [1m[32m0.64515[0m[0m | time: 64.820s
[2K| Adam | epoch: 002 | loss: 0.64515 - acc: 0.6581 -- iter: 16384/20250


Training Step: 574  | total loss: [1m[32m0.64541[0m[0m | time: 65.069s
[2K| Adam | epoch: 002 | loss: 0.64541 - acc: 0.6579 -- iter: 16448/20250


Training Step: 575  | total loss: [1m[32m0.64688[0m[0m | time: 65.322s
[2K| Adam | epoch: 002 | loss: 0.64688 - acc: 0.6562 -- iter: 16512/20250


Training Step: 576  | total loss: [1m[32m0.64023[0m[0m | time: 65.569s
[2K| Adam | epoch: 002 | loss: 0.64023 - acc: 0.6593 -- iter: 16576/20250


Training Step: 577  | total loss: [1m[32m0.64099[0m[0m | time: 65.817s
[2K| Adam | epoch: 002 | loss: 0.64099 - acc: 0.6574 -- iter: 16640/20250


Training Step: 578  | total loss: [1m[32m0.65114[0m[0m | time: 66.064s
[2K| Adam | epoch: 002 | loss: 0.65114 - acc: 0.6511 -- iter: 16704/20250


Training Step: 579  | total loss: [1m[32m0.65569[0m[0m | time: 66.313s
[2K| Adam | epoch: 002 | loss: 0.65569 - acc: 0.6531 -- iter: 16768/20250


Training Step: 580  | total loss: [1m[32m0.65819[0m[0m | time: 66.572s
[2K| Adam | epoch: 002 | loss: 0.65819 - acc: 0.6519 -- iter: 16832/20250


Training Step: 581  | total loss: [1m[32m0.65585[0m[0m | time: 66.827s
[2K| Adam | epoch: 002 | loss: 0.65585 - acc: 0.6539 -- iter: 16896/20250


Training Step: 582  | total loss: [1m[32m0.66484[0m[0m | time: 67.078s
[2K| Adam | epoch: 002 | loss: 0.66484 - acc: 0.6494 -- iter: 16960/20250


Training Step: 583  | total loss: [1m[32m0.66693[0m[0m | time: 67.327s
[2K| Adam | epoch: 002 | loss: 0.66693 - acc: 0.6470 -- iter: 17024/20250


Training Step: 584  | total loss: [1m[32m0.66565[0m[0m | time: 67.599s
[2K| Adam | epoch: 002 | loss: 0.66565 - acc: 0.6479 -- iter: 17088/20250


Training Step: 585  | total loss: [1m[32m0.66936[0m[0m | time: 67.852s
[2K| Adam | epoch: 002 | loss: 0.66936 - acc: 0.6472 -- iter: 17152/20250


Training Step: 586  | total loss: [1m[32m0.66789[0m[0m | time: 68.103s
[2K| Adam | epoch: 002 | loss: 0.66789 - acc: 0.6418 -- iter: 17216/20250


Training Step: 587  | total loss: [1m[32m0.66947[0m[0m | time: 68.352s
[2K| Adam | epoch: 002 | loss: 0.66947 - acc: 0.6370 -- iter: 17280/20250


Training Step: 588  | total loss: [1m[32m0.66310[0m[0m | time: 68.600s
[2K| Adam | epoch: 002 | loss: 0.66310 - acc: 0.6483 -- iter: 17344/20250


Training Step: 589  | total loss: [1m[32m0.66083[0m[0m | time: 68.844s
[2K| Adam | epoch: 002 | loss: 0.66083 - acc: 0.6538 -- iter: 17408/20250


Training Step: 590  | total loss: [1m[32m0.66236[0m[0m | time: 69.103s
[2K| Adam | epoch: 002 | loss: 0.66236 - acc: 0.6462 -- iter: 17472/20250


Training Step: 591  | total loss: [1m[32m0.66212[0m[0m | time: 69.371s
[2K| Adam | epoch: 002 | loss: 0.66212 - acc: 0.6441 -- iter: 17536/20250


Training Step: 592  | total loss: [1m[32m0.65945[0m[0m | time: 69.636s
[2K| Adam | epoch: 002 | loss: 0.65945 - acc: 0.6469 -- iter: 17600/20250


Training Step: 593  | total loss: [1m[32m0.65558[0m[0m | time: 69.888s
[2K| Adam | epoch: 002 | loss: 0.65558 - acc: 0.6541 -- iter: 17664/20250


Training Step: 594  | total loss: [1m[32m0.65137[0m[0m | time: 70.137s
[2K| Adam | epoch: 002 | loss: 0.65137 - acc: 0.6590 -- iter: 17728/20250


Training Step: 595  | total loss: [1m[32m0.65104[0m[0m | time: 70.379s
[2K| Adam | epoch: 002 | loss: 0.65104 - acc: 0.6540 -- iter: 17792/20250


Training Step: 596  | total loss: [1m[32m0.65096[0m[0m | time: 70.632s
[2K| Adam | epoch: 002 | loss: 0.65096 - acc: 0.6558 -- iter: 17856/20250


Training Step: 597  | total loss: [1m[32m0.65872[0m[0m | time: 70.882s
[2K| Adam | epoch: 002 | loss: 0.65872 - acc: 0.6371 -- iter: 17920/20250


Training Step: 598  | total loss: [1m[32m0.66128[0m[0m | time: 71.129s
[2K| Adam | epoch: 002 | loss: 0.66128 - acc: 0.6359 -- iter: 17984/20250


Training Step: 599  | total loss: [1m[32m0.66799[0m[0m | time: 71.385s
[2K| Adam | epoch: 002 | loss: 0.66799 - acc: 0.6192 -- iter: 18048/20250


Training Step: 600  | total loss: [1m[32m0.65928[0m[0m | time: 71.654s
[2K| Adam | epoch: 002 | loss: 0.65928 - acc: 0.6307 -- iter: 18112/20250


Training Step: 601  | total loss: [1m[32m0.66121[0m[0m | time: 71.914s
[2K| Adam | epoch: 002 | loss: 0.66121 - acc: 0.6270 -- iter: 18176/20250


Training Step: 602  | total loss: [1m[32m0.66144[0m[0m | time: 72.169s
[2K| Adam | epoch: 002 | loss: 0.66144 - acc: 0.6237 -- iter: 18240/20250


Training Step: 603  | total loss: [1m[32m0.65539[0m[0m | time: 72.421s
[2K| Adam | epoch: 002 | loss: 0.65539 - acc: 0.6301 -- iter: 18304/20250


Training Step: 604  | total loss: [1m[32m0.66554[0m[0m | time: 72.679s
[2K| Adam | epoch: 002 | loss: 0.66554 - acc: 0.6139 -- iter: 18368/20250


Training Step: 605  | total loss: [1m[32m0.65740[0m[0m | time: 72.926s
[2K| Adam | epoch: 002 | loss: 0.65740 - acc: 0.6260 -- iter: 18432/20250


Training Step: 606  | total loss: [1m[32m0.65177[0m[0m | time: 73.174s
[2K| Adam | epoch: 002 | loss: 0.65177 - acc: 0.6337 -- iter: 18496/20250


Training Step: 607  | total loss: [1m[32m0.64460[0m[0m | time: 73.436s
[2K| Adam | epoch: 002 | loss: 0.64460 - acc: 0.6484 -- iter: 18560/20250


Training Step: 608  | total loss: [1m[32m0.64054[0m[0m | time: 73.679s
[2K| Adam | epoch: 002 | loss: 0.64054 - acc: 0.6508 -- iter: 18624/20250


Training Step: 609  | total loss: [1m[32m0.63975[0m[0m | time: 73.926s
[2K| Adam | epoch: 002 | loss: 0.63975 - acc: 0.6529 -- iter: 18688/20250


Training Step: 610  | total loss: [1m[32m0.63820[0m[0m | time: 74.193s
[2K| Adam | epoch: 002 | loss: 0.63820 - acc: 0.6501 -- iter: 18752/20250


Training Step: 611  | total loss: [1m[32m0.63508[0m[0m | time: 74.445s
[2K| Adam | epoch: 002 | loss: 0.63508 - acc: 0.6585 -- iter: 18816/20250


Training Step: 612  | total loss: [1m[32m0.63145[0m[0m | time: 74.702s
[2K| Adam | epoch: 002 | loss: 0.63145 - acc: 0.6614 -- iter: 18880/20250


Training Step: 613  | total loss: [1m[32m0.63209[0m[0m | time: 74.945s
[2K| Adam | epoch: 002 | loss: 0.63209 - acc: 0.6562 -- iter: 18944/20250


Training Step: 614  | total loss: [1m[32m0.63795[0m[0m | time: 75.196s
[2K| Adam | epoch: 002 | loss: 0.63795 - acc: 0.6453 -- iter: 19008/20250


Training Step: 615  | total loss: [1m[32m0.64160[0m[0m | time: 75.442s
[2K| Adam | epoch: 002 | loss: 0.64160 - acc: 0.6386 -- iter: 19072/20250


Training Step: 616  | total loss: [1m[32m0.64714[0m[0m | time: 75.689s
[2K| Adam | epoch: 002 | loss: 0.64714 - acc: 0.6341 -- iter: 19136/20250


Training Step: 617  | total loss: [1m[32m0.64185[0m[0m | time: 75.934s
[2K| Adam | epoch: 002 | loss: 0.64185 - acc: 0.6410 -- iter: 19200/20250


Training Step: 618  | total loss: [1m[32m0.64146[0m[0m | time: 76.172s
[2K| Adam | epoch: 002 | loss: 0.64146 - acc: 0.6410 -- iter: 19264/20250


Training Step: 619  | total loss: [1m[32m0.63960[0m[0m | time: 76.422s
[2K| Adam | epoch: 002 | loss: 0.63960 - acc: 0.6425 -- iter: 19328/20250


Training Step: 620  | total loss: [1m[32m0.64144[0m[0m | time: 76.683s
[2K| Adam | epoch: 002 | loss: 0.64144 - acc: 0.6439 -- iter: 19392/20250


Training Step: 621  | total loss: [1m[32m0.64348[0m[0m | time: 76.937s
[2K| Adam | epoch: 002 | loss: 0.64348 - acc: 0.6435 -- iter: 19456/20250


Training Step: 622  | total loss: [1m[32m0.64335[0m[0m | time: 77.178s
[2K| Adam | epoch: 002 | loss: 0.64335 - acc: 0.6417 -- iter: 19520/20250


Training Step: 623  | total loss: [1m[32m0.63394[0m[0m | time: 77.422s
[2K| Adam | epoch: 002 | loss: 0.63394 - acc: 0.6556 -- iter: 19584/20250


Training Step: 624  | total loss: [1m[32m0.63281[0m[0m | time: 77.673s
[2K| Adam | epoch: 002 | loss: 0.63281 - acc: 0.6526 -- iter: 19648/20250


Training Step: 625  | total loss: [1m[32m0.63329[0m[0m | time: 77.920s
[2K| Adam | epoch: 002 | loss: 0.63329 - acc: 0.6483 -- iter: 19712/20250


Training Step: 626  | total loss: [1m[32m0.62782[0m[0m | time: 78.174s
[2K| Adam | epoch: 002 | loss: 0.62782 - acc: 0.6569 -- iter: 19776/20250


Training Step: 627  | total loss: [1m[32m0.62608[0m[0m | time: 78.427s
[2K| Adam | epoch: 002 | loss: 0.62608 - acc: 0.6599 -- iter: 19840/20250


Training Step: 628  | total loss: [1m[32m0.63006[0m[0m | time: 78.679s
[2K| Adam | epoch: 002 | loss: 0.63006 - acc: 0.6533 -- iter: 19904/20250


Training Step: 629  | total loss: [1m[32m0.63693[0m[0m | time: 78.935s
[2K| Adam | epoch: 002 | loss: 0.63693 - acc: 0.6395 -- iter: 19968/20250


Training Step: 630  | total loss: [1m[32m0.64273[0m[0m | time: 79.213s
[2K| Adam | epoch: 002 | loss: 0.64273 - acc: 0.6303 -- iter: 20032/20250


Training Step: 631  | total loss: [1m[32m0.63977[0m[0m | time: 79.478s
[2K| Adam | epoch: 002 | loss: 0.63977 - acc: 0.6266 -- iter: 20096/20250


Training Step: 632  | total loss: [1m[32m0.63712[0m[0m | time: 79.725s
[2K| Adam | epoch: 002 | loss: 0.63712 - acc: 0.6280 -- iter: 20160/20250


Training Step: 633  | total loss: [1m[32m0.63872[0m[0m | time: 79.975s
[2K| Adam | epoch: 002 | loss: 0.63872 - acc: 0.6324 -- iter: 20224/20250


Training Step: 634  | total loss: [1m[32m0.64712[0m[0m | time: 81.932s
[2K| Adam | epoch: 002 | loss: 0.64712 - acc: 0.6207 | val_loss: 0.67730 - val_acc: 0.5947 -- iter: 20250/20250
--


Training Step: 635  | total loss: [1m[32m0.64608[0m[0m | time: 0.246s
[2K| Adam | epoch: 003 | loss: 0.64608 - acc: 0.6243 -- iter: 00064/20250


Training Step: 636  | total loss: [1m[32m0.63841[0m[0m | time: 0.495s
[2K| Adam | epoch: 003 | loss: 0.63841 - acc: 0.6349 -- iter: 00128/20250


Training Step: 637  | total loss: [1m[32m0.63218[0m[0m | time: 0.755s
[2K| Adam | epoch: 003 | loss: 0.63218 - acc: 0.6368 -- iter: 00192/20250


Training Step: 638  | total loss: [1m[32m0.63290[0m[0m | time: 1.013s
[2K| Adam | epoch: 003 | loss: 0.63290 - acc: 0.6325 -- iter: 00256/20250


Training Step: 639  | total loss: [1m[32m0.63095[0m[0m | time: 1.259s
[2K| Adam | epoch: 003 | loss: 0.63095 - acc: 0.6333 -- iter: 00320/20250


Training Step: 640  | total loss: [1m[32m0.62695[0m[0m | time: 1.509s
[2K| Adam | epoch: 003 | loss: 0.62695 - acc: 0.6387 -- iter: 00384/20250


Training Step: 641  | total loss: [1m[32m0.62768[0m[0m | time: 1.755s
[2K| Adam | epoch: 003 | loss: 0.62768 - acc: 0.6374 -- iter: 00448/20250


Training Step: 642  | total loss: [1m[32m0.62342[0m[0m | time: 2.011s
[2K| Adam | epoch: 003 | loss: 0.62342 - acc: 0.6393 -- iter: 00512/20250


Training Step: 643  | total loss: [1m[32m0.62112[0m[0m | time: 2.297s
[2K| Adam | epoch: 003 | loss: 0.62112 - acc: 0.6410 -- iter: 00576/20250


Training Step: 644  | total loss: [1m[32m0.62107[0m[0m | time: 2.561s
[2K| Adam | epoch: 003 | loss: 0.62107 - acc: 0.6409 -- iter: 00640/20250


Training Step: 645  | total loss: [1m[32m0.62740[0m[0m | time: 2.819s
[2K| Adam | epoch: 003 | loss: 0.62740 - acc: 0.6300 -- iter: 00704/20250


Training Step: 646  | total loss: [1m[32m0.62880[0m[0m | time: 3.065s
[2K| Adam | epoch: 003 | loss: 0.62880 - acc: 0.6310 -- iter: 00768/20250


Training Step: 647  | total loss: [1m[32m0.62517[0m[0m | time: 3.308s
[2K| Adam | epoch: 003 | loss: 0.62517 - acc: 0.6351 -- iter: 00832/20250


Training Step: 648  | total loss: [1m[32m0.62359[0m[0m | time: 3.551s
[2K| Adam | epoch: 003 | loss: 0.62359 - acc: 0.6419 -- iter: 00896/20250


Training Step: 649  | total loss: [1m[32m0.61428[0m[0m | time: 3.800s
[2K| Adam | epoch: 003 | loss: 0.61428 - acc: 0.6527 -- iter: 00960/20250


Training Step: 650  | total loss: [1m[32m0.61763[0m[0m | time: 4.044s
[2K| Adam | epoch: 003 | loss: 0.61763 - acc: 0.6453 -- iter: 01024/20250


Training Step: 651  | total loss: [1m[32m0.61392[0m[0m | time: 4.292s
[2K| Adam | epoch: 003 | loss: 0.61392 - acc: 0.6526 -- iter: 01088/20250


Training Step: 652  | total loss: [1m[32m0.61751[0m[0m | time: 4.569s
[2K| Adam | epoch: 003 | loss: 0.61751 - acc: 0.6530 -- iter: 01152/20250


Training Step: 653  | total loss: [1m[32m0.61169[0m[0m | time: 4.841s
[2K| Adam | epoch: 003 | loss: 0.61169 - acc: 0.6580 -- iter: 01216/20250


Training Step: 654  | total loss: [1m[32m0.61492[0m[0m | time: 5.090s
[2K| Adam | epoch: 003 | loss: 0.61492 - acc: 0.6563 -- iter: 01280/20250


Training Step: 655  | total loss: [1m[32m0.62031[0m[0m | time: 5.334s
[2K| Adam | epoch: 003 | loss: 0.62031 - acc: 0.6438 -- iter: 01344/20250


Training Step: 656  | total loss: [1m[32m0.62259[0m[0m | time: 5.580s
[2K| Adam | epoch: 003 | loss: 0.62259 - acc: 0.6434 -- iter: 01408/20250


Training Step: 657  | total loss: [1m[32m0.62056[0m[0m | time: 5.830s
[2K| Adam | epoch: 003 | loss: 0.62056 - acc: 0.6525 -- iter: 01472/20250


Training Step: 658  | total loss: [1m[32m0.61873[0m[0m | time: 6.076s
[2K| Adam | epoch: 003 | loss: 0.61873 - acc: 0.6576 -- iter: 01536/20250


Training Step: 659  | total loss: [1m[32m0.61946[0m[0m | time: 6.323s
[2K| Adam | epoch: 003 | loss: 0.61946 - acc: 0.6606 -- iter: 01600/20250


Training Step: 660  | total loss: [1m[32m0.62021[0m[0m | time: 6.575s
[2K| Adam | epoch: 003 | loss: 0.62021 - acc: 0.6601 -- iter: 01664/20250


Training Step: 661  | total loss: [1m[32m0.61684[0m[0m | time: 6.818s
[2K| Adam | epoch: 003 | loss: 0.61684 - acc: 0.6598 -- iter: 01728/20250


Training Step: 662  | total loss: [1m[32m0.61339[0m[0m | time: 7.105s
[2K| Adam | epoch: 003 | loss: 0.61339 - acc: 0.6625 -- iter: 01792/20250


Training Step: 663  | total loss: [1m[32m0.61784[0m[0m | time: 7.376s
[2K| Adam | epoch: 003 | loss: 0.61784 - acc: 0.6525 -- iter: 01856/20250


Training Step: 664  | total loss: [1m[32m0.61641[0m[0m | time: 7.629s
[2K| Adam | epoch: 003 | loss: 0.61641 - acc: 0.6498 -- iter: 01920/20250


Training Step: 665  | total loss: [1m[32m0.60806[0m[0m | time: 7.878s
[2K| Adam | epoch: 003 | loss: 0.60806 - acc: 0.6645 -- iter: 01984/20250


Training Step: 666  | total loss: [1m[32m0.60724[0m[0m | time: 8.126s
[2K| Adam | epoch: 003 | loss: 0.60724 - acc: 0.6715 -- iter: 02048/20250


Training Step: 667  | total loss: [1m[32m0.60922[0m[0m | time: 8.372s
[2K| Adam | epoch: 003 | loss: 0.60922 - acc: 0.6700 -- iter: 02112/20250


Training Step: 668  | total loss: [1m[32m0.60246[0m[0m | time: 8.624s
[2K| Adam | epoch: 003 | loss: 0.60246 - acc: 0.6795 -- iter: 02176/20250


Training Step: 669  | total loss: [1m[32m0.60678[0m[0m | time: 8.869s
[2K| Adam | epoch: 003 | loss: 0.60678 - acc: 0.6709 -- iter: 02240/20250


Training Step: 670  | total loss: [1m[32m0.60661[0m[0m | time: 9.112s
[2K| Adam | epoch: 003 | loss: 0.60661 - acc: 0.6679 -- iter: 02304/20250


Training Step: 671  | total loss: [1m[32m0.60751[0m[0m | time: 9.359s
[2K| Adam | epoch: 003 | loss: 0.60751 - acc: 0.6667 -- iter: 02368/20250


Training Step: 672  | total loss: [1m[32m0.60969[0m[0m | time: 9.635s
[2K| Adam | epoch: 003 | loss: 0.60969 - acc: 0.6657 -- iter: 02432/20250


Training Step: 673  | total loss: [1m[32m0.61073[0m[0m | time: 9.913s
[2K| Adam | epoch: 003 | loss: 0.61073 - acc: 0.6616 -- iter: 02496/20250


Training Step: 674  | total loss: [1m[32m0.60696[0m[0m | time: 10.162s
[2K| Adam | epoch: 003 | loss: 0.60696 - acc: 0.6720 -- iter: 02560/20250


Training Step: 675  | total loss: [1m[32m0.61050[0m[0m | time: 10.411s
[2K| Adam | epoch: 003 | loss: 0.61050 - acc: 0.6689 -- iter: 02624/20250


Training Step: 676  | total loss: [1m[32m0.61615[0m[0m | time: 10.652s
[2K| Adam | epoch: 003 | loss: 0.61615 - acc: 0.6645 -- iter: 02688/20250


Training Step: 677  | total loss: [1m[32m0.62025[0m[0m | time: 10.900s
[2K| Adam | epoch: 003 | loss: 0.62025 - acc: 0.6652 -- iter: 02752/20250


Training Step: 678  | total loss: [1m[32m0.62193[0m[0m | time: 11.147s
[2K| Adam | epoch: 003 | loss: 0.62193 - acc: 0.6612 -- iter: 02816/20250


Training Step: 679  | total loss: [1m[32m0.62191[0m[0m | time: 11.405s
[2K| Adam | epoch: 003 | loss: 0.62191 - acc: 0.6623 -- iter: 02880/20250


Training Step: 680  | total loss: [1m[32m0.62556[0m[0m | time: 11.651s
[2K| Adam | epoch: 003 | loss: 0.62556 - acc: 0.6586 -- iter: 02944/20250


Training Step: 681  | total loss: [1m[32m0.62945[0m[0m | time: 11.899s
[2K| Adam | epoch: 003 | loss: 0.62945 - acc: 0.6489 -- iter: 03008/20250


Training Step: 682  | total loss: [1m[32m0.63507[0m[0m | time: 12.178s
[2K| Adam | epoch: 003 | loss: 0.63507 - acc: 0.6497 -- iter: 03072/20250


Training Step: 683  | total loss: [1m[32m0.63345[0m[0m | time: 12.442s
[2K| Adam | epoch: 003 | loss: 0.63345 - acc: 0.6535 -- iter: 03136/20250


Training Step: 684  | total loss: [1m[32m0.62979[0m[0m | time: 12.687s
[2K| Adam | epoch: 003 | loss: 0.62979 - acc: 0.6553 -- iter: 03200/20250


Training Step: 685  | total loss: [1m[32m0.61998[0m[0m | time: 12.944s
[2K| Adam | epoch: 003 | loss: 0.61998 - acc: 0.6695 -- iter: 03264/20250


Training Step: 686  | total loss: [1m[32m0.61790[0m[0m | time: 13.194s
[2K| Adam | epoch: 003 | loss: 0.61790 - acc: 0.6713 -- iter: 03328/20250


Training Step: 687  | total loss: [1m[32m0.61448[0m[0m | time: 13.452s
[2K| Adam | epoch: 003 | loss: 0.61448 - acc: 0.6713 -- iter: 03392/20250


Training Step: 688  | total loss: [1m[32m0.61346[0m[0m | time: 13.704s
[2K| Adam | epoch: 003 | loss: 0.61346 - acc: 0.6761 -- iter: 03456/20250


Training Step: 689  | total loss: [1m[32m0.61739[0m[0m | time: 13.952s
[2K| Adam | epoch: 003 | loss: 0.61739 - acc: 0.6725 -- iter: 03520/20250


Training Step: 690  | total loss: [1m[32m0.61616[0m[0m | time: 14.197s
[2K| Adam | epoch: 003 | loss: 0.61616 - acc: 0.6740 -- iter: 03584/20250


Training Step: 691  | total loss: [1m[32m0.61356[0m[0m | time: 14.464s
[2K| Adam | epoch: 003 | loss: 0.61356 - acc: 0.6769 -- iter: 03648/20250


Training Step: 692  | total loss: [1m[32m0.61183[0m[0m | time: 14.731s
[2K| Adam | epoch: 003 | loss: 0.61183 - acc: 0.6749 -- iter: 03712/20250


Training Step: 693  | total loss: [1m[32m0.61938[0m[0m | time: 14.980s
[2K| Adam | epoch: 003 | loss: 0.61938 - acc: 0.6714 -- iter: 03776/20250


Training Step: 694  | total loss: [1m[32m0.61790[0m[0m | time: 15.231s
[2K| Adam | epoch: 003 | loss: 0.61790 - acc: 0.6762 -- iter: 03840/20250


Training Step: 695  | total loss: [1m[32m0.61389[0m[0m | time: 15.475s
[2K| Adam | epoch: 003 | loss: 0.61389 - acc: 0.6757 -- iter: 03904/20250


Training Step: 696  | total loss: [1m[32m0.61353[0m[0m | time: 15.723s
[2K| Adam | epoch: 003 | loss: 0.61353 - acc: 0.6722 -- iter: 03968/20250


Training Step: 697  | total loss: [1m[32m0.61374[0m[0m | time: 15.971s
[2K| Adam | epoch: 003 | loss: 0.61374 - acc: 0.6706 -- iter: 04032/20250


Training Step: 698  | total loss: [1m[32m0.62073[0m[0m | time: 16.228s
[2K| Adam | epoch: 003 | loss: 0.62073 - acc: 0.6645 -- iter: 04096/20250


Training Step: 699  | total loss: [1m[32m0.62440[0m[0m | time: 16.484s
[2K| Adam | epoch: 003 | loss: 0.62440 - acc: 0.6590 -- iter: 04160/20250


Training Step: 700  | total loss: [1m[32m0.62580[0m[0m | time: 16.745s
[2K| Adam | epoch: 003 | loss: 0.62580 - acc: 0.6603 -- iter: 04224/20250


Training Step: 701  | total loss: [1m[32m0.61901[0m[0m | time: 17.024s
[2K| Adam | epoch: 003 | loss: 0.61901 - acc: 0.6724 -- iter: 04288/20250


Training Step: 702  | total loss: [1m[32m0.61903[0m[0m | time: 17.292s
[2K| Adam | epoch: 003 | loss: 0.61903 - acc: 0.6739 -- iter: 04352/20250


Training Step: 703  | total loss: [1m[32m0.61960[0m[0m | time: 17.533s
[2K| Adam | epoch: 003 | loss: 0.61960 - acc: 0.6768 -- iter: 04416/20250


Training Step: 704  | total loss: [1m[32m0.61798[0m[0m | time: 17.787s
[2K| Adam | epoch: 003 | loss: 0.61798 - acc: 0.6779 -- iter: 04480/20250


Training Step: 705  | total loss: [1m[32m0.62166[0m[0m | time: 18.035s
[2K| Adam | epoch: 003 | loss: 0.62166 - acc: 0.6742 -- iter: 04544/20250


Training Step: 706  | total loss: [1m[32m0.62054[0m[0m | time: 18.286s
[2K| Adam | epoch: 003 | loss: 0.62054 - acc: 0.6755 -- iter: 04608/20250


Training Step: 707  | total loss: [1m[32m0.61696[0m[0m | time: 18.540s
[2K| Adam | epoch: 003 | loss: 0.61696 - acc: 0.6783 -- iter: 04672/20250


Training Step: 708  | total loss: [1m[32m0.61295[0m[0m | time: 18.792s
[2K| Adam | epoch: 003 | loss: 0.61295 - acc: 0.6792 -- iter: 04736/20250


Training Step: 709  | total loss: [1m[32m0.61519[0m[0m | time: 19.047s
[2K| Adam | epoch: 003 | loss: 0.61519 - acc: 0.6738 -- iter: 04800/20250


Training Step: 710  | total loss: [1m[32m0.61737[0m[0m | time: 19.295s
[2K| Adam | epoch: 003 | loss: 0.61737 - acc: 0.6689 -- iter: 04864/20250


Training Step: 711  | total loss: [1m[32m0.61440[0m[0m | time: 19.568s
[2K| Adam | epoch: 003 | loss: 0.61440 - acc: 0.6723 -- iter: 04928/20250


Training Step: 712  | total loss: [1m[32m0.60773[0m[0m | time: 19.831s
[2K| Adam | epoch: 003 | loss: 0.60773 - acc: 0.6785 -- iter: 04992/20250


Training Step: 713  | total loss: [1m[32m0.60428[0m[0m | time: 20.071s
[2K| Adam | epoch: 003 | loss: 0.60428 - acc: 0.6779 -- iter: 05056/20250


Training Step: 714  | total loss: [1m[32m0.60364[0m[0m | time: 20.320s
[2K| Adam | epoch: 003 | loss: 0.60364 - acc: 0.6804 -- iter: 05120/20250


Training Step: 715  | total loss: [1m[32m0.60265[0m[0m | time: 20.573s
[2K| Adam | epoch: 003 | loss: 0.60265 - acc: 0.6827 -- iter: 05184/20250


Training Step: 716  | total loss: [1m[32m0.60132[0m[0m | time: 20.819s
[2K| Adam | epoch: 003 | loss: 0.60132 - acc: 0.6800 -- iter: 05248/20250


Training Step: 717  | total loss: [1m[32m0.60182[0m[0m | time: 21.065s
[2K| Adam | epoch: 003 | loss: 0.60182 - acc: 0.6808 -- iter: 05312/20250


Training Step: 718  | total loss: [1m[32m0.60622[0m[0m | time: 21.315s
[2K| Adam | epoch: 003 | loss: 0.60622 - acc: 0.6736 -- iter: 05376/20250


Training Step: 719  | total loss: [1m[32m0.60005[0m[0m | time: 21.557s
[2K| Adam | epoch: 003 | loss: 0.60005 - acc: 0.6781 -- iter: 05440/20250


Training Step: 720  | total loss: [1m[32m0.60190[0m[0m | time: 21.816s
[2K| Adam | epoch: 003 | loss: 0.60190 - acc: 0.6775 -- iter: 05504/20250


Training Step: 721  | total loss: [1m[32m0.61011[0m[0m | time: 22.088s
[2K| Adam | epoch: 003 | loss: 0.61011 - acc: 0.6691 -- iter: 05568/20250


Training Step: 722  | total loss: [1m[32m0.60974[0m[0m | time: 22.343s
[2K| Adam | epoch: 003 | loss: 0.60974 - acc: 0.6741 -- iter: 05632/20250


Training Step: 723  | total loss: [1m[32m0.61613[0m[0m | time: 22.590s
[2K| Adam | epoch: 003 | loss: 0.61613 - acc: 0.6692 -- iter: 05696/20250


Training Step: 724  | total loss: [1m[32m0.61189[0m[0m | time: 22.838s
[2K| Adam | epoch: 003 | loss: 0.61189 - acc: 0.6820 -- iter: 05760/20250


Training Step: 725  | total loss: [1m[32m0.61078[0m[0m | time: 23.086s
[2K| Adam | epoch: 003 | loss: 0.61078 - acc: 0.6763 -- iter: 05824/20250


Training Step: 726  | total loss: [1m[32m0.60614[0m[0m | time: 23.328s
[2K| Adam | epoch: 003 | loss: 0.60614 - acc: 0.6805 -- iter: 05888/20250


Training Step: 727  | total loss: [1m[32m0.60244[0m[0m | time: 23.570s
[2K| Adam | epoch: 003 | loss: 0.60244 - acc: 0.6812 -- iter: 05952/20250


Training Step: 728  | total loss: [1m[32m0.60474[0m[0m | time: 23.819s
[2K| Adam | epoch: 003 | loss: 0.60474 - acc: 0.6818 -- iter: 06016/20250


Training Step: 729  | total loss: [1m[32m0.60949[0m[0m | time: 24.065s
[2K| Adam | epoch: 003 | loss: 0.60949 - acc: 0.6808 -- iter: 06080/20250


Training Step: 730  | total loss: [1m[32m0.61083[0m[0m | time: 24.309s
[2K| Adam | epoch: 003 | loss: 0.61083 - acc: 0.6768 -- iter: 06144/20250


Training Step: 731  | total loss: [1m[32m0.60575[0m[0m | time: 24.565s
[2K| Adam | epoch: 003 | loss: 0.60575 - acc: 0.6826 -- iter: 06208/20250


Training Step: 732  | total loss: [1m[32m0.61123[0m[0m | time: 24.847s
[2K| Adam | epoch: 003 | loss: 0.61123 - acc: 0.6768 -- iter: 06272/20250


Training Step: 733  | total loss: [1m[32m0.61528[0m[0m | time: 25.089s
[2K| Adam | epoch: 003 | loss: 0.61528 - acc: 0.6685 -- iter: 06336/20250


Training Step: 734  | total loss: [1m[32m0.61437[0m[0m | time: 25.334s
[2K| Adam | epoch: 003 | loss: 0.61437 - acc: 0.6673 -- iter: 06400/20250


Training Step: 735  | total loss: [1m[32m0.62122[0m[0m | time: 25.583s
[2K| Adam | epoch: 003 | loss: 0.62122 - acc: 0.6584 -- iter: 06464/20250


Training Step: 736  | total loss: [1m[32m0.62379[0m[0m | time: 25.833s
[2K| Adam | epoch: 003 | loss: 0.62379 - acc: 0.6550 -- iter: 06528/20250


Training Step: 737  | total loss: [1m[32m0.62383[0m[0m | time: 26.076s
[2K| Adam | epoch: 003 | loss: 0.62383 - acc: 0.6567 -- iter: 06592/20250


Training Step: 738  | total loss: [1m[32m0.61689[0m[0m | time: 26.321s
[2K| Adam | epoch: 003 | loss: 0.61689 - acc: 0.6567 -- iter: 06656/20250


Training Step: 739  | total loss: [1m[32m0.62051[0m[0m | time: 26.569s
[2K| Adam | epoch: 003 | loss: 0.62051 - acc: 0.6504 -- iter: 06720/20250


Training Step: 740  | total loss: [1m[32m0.63188[0m[0m | time: 26.815s
[2K| Adam | epoch: 003 | loss: 0.63188 - acc: 0.6369 -- iter: 06784/20250


Training Step: 741  | total loss: [1m[32m0.62065[0m[0m | time: 27.090s
[2K| Adam | epoch: 003 | loss: 0.62065 - acc: 0.6388 -- iter: 06848/20250


Training Step: 742  | total loss: [1m[32m0.62570[0m[0m | time: 27.367s
[2K| Adam | epoch: 003 | loss: 0.62570 - acc: 0.6265 -- iter: 06912/20250


Training Step: 743  | total loss: [1m[32m0.62389[0m[0m | time: 27.609s
[2K| Adam | epoch: 003 | loss: 0.62389 - acc: 0.6326 -- iter: 06976/20250


Training Step: 744  | total loss: [1m[32m0.62610[0m[0m | time: 27.860s
[2K| Adam | epoch: 003 | loss: 0.62610 - acc: 0.6225 -- iter: 07040/20250


Training Step: 745  | total loss: [1m[32m0.62441[0m[0m | time: 28.110s
[2K| Adam | epoch: 003 | loss: 0.62441 - acc: 0.6212 -- iter: 07104/20250


Training Step: 746  | total loss: [1m[32m0.62800[0m[0m | time: 28.367s
[2K| Adam | epoch: 003 | loss: 0.62800 - acc: 0.6106 -- iter: 07168/20250


Training Step: 747  | total loss: [1m[32m0.63475[0m[0m | time: 28.631s
[2K| Adam | epoch: 003 | loss: 0.63475 - acc: 0.6011 -- iter: 07232/20250


Training Step: 748  | total loss: [1m[32m0.63608[0m[0m | time: 28.892s
[2K| Adam | epoch: 003 | loss: 0.63608 - acc: 0.6004 -- iter: 07296/20250


Training Step: 749  | total loss: [1m[32m0.63050[0m[0m | time: 29.137s
[2K| Adam | epoch: 003 | loss: 0.63050 - acc: 0.6060 -- iter: 07360/20250


Training Step: 750  | total loss: [1m[32m0.62895[0m[0m | time: 29.393s
[2K| Adam | epoch: 003 | loss: 0.62895 - acc: 0.6126 -- iter: 07424/20250


Training Step: 751  | total loss: [1m[32m0.62806[0m[0m | time: 29.641s
[2K| Adam | epoch: 003 | loss: 0.62806 - acc: 0.6201 -- iter: 07488/20250


Training Step: 752  | total loss: [1m[32m0.62966[0m[0m | time: 29.899s
[2K| Adam | epoch: 003 | loss: 0.62966 - acc: 0.6174 -- iter: 07552/20250


Training Step: 753  | total loss: [1m[32m0.63008[0m[0m | time: 30.141s
[2K| Adam | epoch: 003 | loss: 0.63008 - acc: 0.6166 -- iter: 07616/20250


Training Step: 754  | total loss: [1m[32m0.63425[0m[0m | time: 30.395s
[2K| Adam | epoch: 003 | loss: 0.63425 - acc: 0.6175 -- iter: 07680/20250


Training Step: 755  | total loss: [1m[32m0.63996[0m[0m | time: 30.643s
[2K| Adam | epoch: 003 | loss: 0.63996 - acc: 0.6104 -- iter: 07744/20250


Training Step: 756  | total loss: [1m[32m0.64396[0m[0m | time: 30.891s
[2K| Adam | epoch: 003 | loss: 0.64396 - acc: 0.6072 -- iter: 07808/20250


Training Step: 757  | total loss: [1m[32m0.64305[0m[0m | time: 31.133s
[2K| Adam | epoch: 003 | loss: 0.64305 - acc: 0.6058 -- iter: 07872/20250


Training Step: 758  | total loss: [1m[32m0.63607[0m[0m | time: 31.381s
[2K| Adam | epoch: 003 | loss: 0.63607 - acc: 0.6171 -- iter: 07936/20250


Training Step: 759  | total loss: [1m[32m0.63306[0m[0m | time: 31.645s
[2K| Adam | epoch: 003 | loss: 0.63306 - acc: 0.6273 -- iter: 08000/20250


Training Step: 760  | total loss: [1m[32m0.62816[0m[0m | time: 31.901s
[2K| Adam | epoch: 003 | loss: 0.62816 - acc: 0.6333 -- iter: 08064/20250


Training Step: 761  | total loss: [1m[32m0.62796[0m[0m | time: 32.148s
[2K| Adam | epoch: 003 | loss: 0.62796 - acc: 0.6387 -- iter: 08128/20250


Training Step: 762  | total loss: [1m[32m0.63117[0m[0m | time: 32.390s
[2K| Adam | epoch: 003 | loss: 0.63117 - acc: 0.6327 -- iter: 08192/20250


Training Step: 763  | total loss: [1m[32m0.63099[0m[0m | time: 32.648s
[2K| Adam | epoch: 003 | loss: 0.63099 - acc: 0.6335 -- iter: 08256/20250


Training Step: 764  | total loss: [1m[32m0.62903[0m[0m | time: 32.898s
[2K| Adam | epoch: 003 | loss: 0.62903 - acc: 0.6373 -- iter: 08320/20250


Training Step: 765  | total loss: [1m[32m0.62913[0m[0m | time: 33.137s
[2K| Adam | epoch: 003 | loss: 0.62913 - acc: 0.6361 -- iter: 08384/20250


Training Step: 766  | total loss: [1m[32m0.63578[0m[0m | time: 33.378s
[2K| Adam | epoch: 003 | loss: 0.63578 - acc: 0.6272 -- iter: 08448/20250


Training Step: 767  | total loss: [1m[32m0.63756[0m[0m | time: 33.619s
[2K| Adam | epoch: 003 | loss: 0.63756 - acc: 0.6285 -- iter: 08512/20250


Training Step: 768  | total loss: [1m[32m0.63785[0m[0m | time: 33.862s
[2K| Adam | epoch: 003 | loss: 0.63785 - acc: 0.6297 -- iter: 08576/20250


Training Step: 769  | total loss: [1m[32m0.63851[0m[0m | time: 34.118s
[2K| Adam | epoch: 003 | loss: 0.63851 - acc: 0.6277 -- iter: 08640/20250


Training Step: 770  | total loss: [1m[32m0.63706[0m[0m | time: 34.376s
[2K| Adam | epoch: 003 | loss: 0.63706 - acc: 0.6290 -- iter: 08704/20250


Training Step: 771  | total loss: [1m[32m0.63721[0m[0m | time: 34.622s
[2K| Adam | epoch: 003 | loss: 0.63721 - acc: 0.6286 -- iter: 08768/20250


Training Step: 772  | total loss: [1m[32m0.63567[0m[0m | time: 34.872s
[2K| Adam | epoch: 003 | loss: 0.63567 - acc: 0.6298 -- iter: 08832/20250


Training Step: 773  | total loss: [1m[32m0.64342[0m[0m | time: 35.117s
[2K| Adam | epoch: 003 | loss: 0.64342 - acc: 0.6246 -- iter: 08896/20250


Training Step: 774  | total loss: [1m[32m0.64030[0m[0m | time: 35.356s
[2K| Adam | epoch: 003 | loss: 0.64030 - acc: 0.6293 -- iter: 08960/20250


Training Step: 775  | total loss: [1m[32m0.64012[0m[0m | time: 35.603s
[2K| Adam | epoch: 003 | loss: 0.64012 - acc: 0.6289 -- iter: 09024/20250


Training Step: 776  | total loss: [1m[32m0.63742[0m[0m | time: 35.857s
[2K| Adam | epoch: 003 | loss: 0.63742 - acc: 0.6254 -- iter: 09088/20250


Training Step: 777  | total loss: [1m[32m0.63806[0m[0m | time: 36.102s
[2K| Adam | epoch: 003 | loss: 0.63806 - acc: 0.6207 -- iter: 09152/20250


Training Step: 778  | total loss: [1m[32m0.63304[0m[0m | time: 36.353s
[2K| Adam | epoch: 003 | loss: 0.63304 - acc: 0.6227 -- iter: 09216/20250


Training Step: 779  | total loss: [1m[32m0.63324[0m[0m | time: 36.619s
[2K| Adam | epoch: 003 | loss: 0.63324 - acc: 0.6245 -- iter: 09280/20250


Training Step: 780  | total loss: [1m[32m0.63415[0m[0m | time: 36.880s
[2K| Adam | epoch: 003 | loss: 0.63415 - acc: 0.6183 -- iter: 09344/20250


Training Step: 781  | total loss: [1m[32m0.63536[0m[0m | time: 37.123s
[2K| Adam | epoch: 003 | loss: 0.63536 - acc: 0.6127 -- iter: 09408/20250


Training Step: 782  | total loss: [1m[32m0.63367[0m[0m | time: 37.364s
[2K| Adam | epoch: 003 | loss: 0.63367 - acc: 0.6155 -- iter: 09472/20250


Training Step: 783  | total loss: [1m[32m0.63276[0m[0m | time: 37.609s
[2K| Adam | epoch: 003 | loss: 0.63276 - acc: 0.6180 -- iter: 09536/20250


Training Step: 784  | total loss: [1m[32m0.63042[0m[0m | time: 37.851s
[2K| Adam | epoch: 003 | loss: 0.63042 - acc: 0.6187 -- iter: 09600/20250


Training Step: 785  | total loss: [1m[32m0.62917[0m[0m | time: 38.091s
[2K| Adam | epoch: 003 | loss: 0.62917 - acc: 0.6209 -- iter: 09664/20250


Training Step: 786  | total loss: [1m[32m0.62679[0m[0m | time: 38.339s
[2K| Adam | epoch: 003 | loss: 0.62679 - acc: 0.6307 -- iter: 09728/20250


Training Step: 787  | total loss: [1m[32m0.62703[0m[0m | time: 38.586s
[2K| Adam | epoch: 003 | loss: 0.62703 - acc: 0.6285 -- iter: 09792/20250


Training Step: 788  | total loss: [1m[32m0.62304[0m[0m | time: 38.840s
[2K| Adam | epoch: 003 | loss: 0.62304 - acc: 0.6313 -- iter: 09856/20250


Training Step: 789  | total loss: [1m[32m0.62135[0m[0m | time: 39.096s
[2K| Adam | epoch: 003 | loss: 0.62135 - acc: 0.6354 -- iter: 09920/20250


Training Step: 790  | total loss: [1m[32m0.62070[0m[0m | time: 39.341s
[2K| Adam | epoch: 003 | loss: 0.62070 - acc: 0.6375 -- iter: 09984/20250


Training Step: 791  | total loss: [1m[32m0.61828[0m[0m | time: 39.587s
[2K| Adam | epoch: 003 | loss: 0.61828 - acc: 0.6487 -- iter: 10048/20250


Training Step: 792  | total loss: [1m[32m0.62039[0m[0m | time: 39.836s
[2K| Adam | epoch: 003 | loss: 0.62039 - acc: 0.6479 -- iter: 10112/20250


Training Step: 793  | total loss: [1m[32m0.61926[0m[0m | time: 40.083s
[2K| Adam | epoch: 003 | loss: 0.61926 - acc: 0.6487 -- iter: 10176/20250


Training Step: 794  | total loss: [1m[32m0.61809[0m[0m | time: 40.329s
[2K| Adam | epoch: 003 | loss: 0.61809 - acc: 0.6542 -- iter: 10240/20250


Training Step: 795  | total loss: [1m[32m0.60951[0m[0m | time: 40.578s
[2K| Adam | epoch: 003 | loss: 0.60951 - acc: 0.6700 -- iter: 10304/20250


Training Step: 796  | total loss: [1m[32m0.61767[0m[0m | time: 40.826s
[2K| Adam | epoch: 003 | loss: 0.61767 - acc: 0.6624 -- iter: 10368/20250


Training Step: 797  | total loss: [1m[32m0.61725[0m[0m | time: 41.072s
[2K| Adam | epoch: 003 | loss: 0.61725 - acc: 0.6633 -- iter: 10432/20250


Training Step: 798  | total loss: [1m[32m0.61935[0m[0m | time: 41.330s
[2K| Adam | epoch: 003 | loss: 0.61935 - acc: 0.6642 -- iter: 10496/20250


Training Step: 799  | total loss: [1m[32m0.62584[0m[0m | time: 41.592s
[2K| Adam | epoch: 003 | loss: 0.62584 - acc: 0.6571 -- iter: 10560/20250


Training Step: 800  | total loss: [1m[32m0.62620[0m[0m | time: 41.842s
[2K| Adam | epoch: 003 | loss: 0.62620 - acc: 0.6539 -- iter: 10624/20250


Training Step: 801  | total loss: [1m[32m0.62548[0m[0m | time: 42.094s
[2K| Adam | epoch: 003 | loss: 0.62548 - acc: 0.6557 -- iter: 10688/20250


Training Step: 802  | total loss: [1m[32m0.62306[0m[0m | time: 42.339s
[2K| Adam | epoch: 003 | loss: 0.62306 - acc: 0.6511 -- iter: 10752/20250


Training Step: 803  | total loss: [1m[32m0.62920[0m[0m | time: 42.590s
[2K| Adam | epoch: 003 | loss: 0.62920 - acc: 0.6438 -- iter: 10816/20250


Training Step: 804  | total loss: [1m[32m0.62040[0m[0m | time: 42.846s
[2K| Adam | epoch: 003 | loss: 0.62040 - acc: 0.6482 -- iter: 10880/20250


Training Step: 805  | total loss: [1m[32m0.62449[0m[0m | time: 43.101s
[2K| Adam | epoch: 003 | loss: 0.62449 - acc: 0.6474 -- iter: 10944/20250


Training Step: 806  | total loss: [1m[32m0.62829[0m[0m | time: 43.347s
[2K| Adam | epoch: 003 | loss: 0.62829 - acc: 0.6483 -- iter: 11008/20250


Training Step: 807  | total loss: [1m[32m0.63177[0m[0m | time: 43.603s
[2K| Adam | epoch: 003 | loss: 0.63177 - acc: 0.6460 -- iter: 11072/20250


Training Step: 808  | total loss: [1m[32m0.64596[0m[0m | time: 43.856s
[2K| Adam | epoch: 003 | loss: 0.64596 - acc: 0.6345 -- iter: 11136/20250


Training Step: 809  | total loss: [1m[32m0.65384[0m[0m | time: 44.113s
[2K| Adam | epoch: 003 | loss: 0.65384 - acc: 0.6242 -- iter: 11200/20250


Training Step: 810  | total loss: [1m[32m0.65613[0m[0m | time: 44.369s
[2K| Adam | epoch: 003 | loss: 0.65613 - acc: 0.6164 -- iter: 11264/20250


Training Step: 811  | total loss: [1m[32m0.65716[0m[0m | time: 44.610s
[2K| Adam | epoch: 003 | loss: 0.65716 - acc: 0.6110 -- iter: 11328/20250


Training Step: 812  | total loss: [1m[32m0.65726[0m[0m | time: 44.863s
[2K| Adam | epoch: 003 | loss: 0.65726 - acc: 0.6078 -- iter: 11392/20250


Training Step: 813  | total loss: [1m[32m0.66095[0m[0m | time: 45.110s
[2K| Adam | epoch: 003 | loss: 0.66095 - acc: 0.6032 -- iter: 11456/20250


Training Step: 814  | total loss: [1m[32m0.65894[0m[0m | time: 45.358s
[2K| Adam | epoch: 003 | loss: 0.65894 - acc: 0.6023 -- iter: 11520/20250


Training Step: 815  | total loss: [1m[32m0.65963[0m[0m | time: 45.606s
[2K| Adam | epoch: 003 | loss: 0.65963 - acc: 0.5983 -- iter: 11584/20250


Training Step: 816  | total loss: [1m[32m0.66120[0m[0m | time: 45.852s
[2K| Adam | epoch: 003 | loss: 0.66120 - acc: 0.5900 -- iter: 11648/20250


Training Step: 817  | total loss: [1m[32m0.66416[0m[0m | time: 46.103s
[2K| Adam | epoch: 003 | loss: 0.66416 - acc: 0.5826 -- iter: 11712/20250


Training Step: 818  | total loss: [1m[32m0.66424[0m[0m | time: 46.368s
[2K| Adam | epoch: 003 | loss: 0.66424 - acc: 0.5884 -- iter: 11776/20250


Training Step: 819  | total loss: [1m[32m0.66458[0m[0m | time: 46.633s
[2K| Adam | epoch: 003 | loss: 0.66458 - acc: 0.5827 -- iter: 11840/20250


Training Step: 820  | total loss: [1m[32m0.66072[0m[0m | time: 46.883s
[2K| Adam | epoch: 003 | loss: 0.66072 - acc: 0.5791 -- iter: 11904/20250


Training Step: 821  | total loss: [1m[32m0.66600[0m[0m | time: 47.136s
[2K| Adam | epoch: 003 | loss: 0.66600 - acc: 0.5728 -- iter: 11968/20250


Training Step: 822  | total loss: [1m[32m0.66191[0m[0m | time: 47.382s
[2K| Adam | epoch: 003 | loss: 0.66191 - acc: 0.5764 -- iter: 12032/20250


Training Step: 823  | total loss: [1m[32m0.65048[0m[0m | time: 47.634s
[2K| Adam | epoch: 003 | loss: 0.65048 - acc: 0.5953 -- iter: 12096/20250


Training Step: 824  | total loss: [1m[32m0.64798[0m[0m | time: 47.889s
[2K| Adam | epoch: 003 | loss: 0.64798 - acc: 0.5967 -- iter: 12160/20250


Training Step: 825  | total loss: [1m[32m0.65036[0m[0m | time: 48.134s
[2K| Adam | epoch: 003 | loss: 0.65036 - acc: 0.5996 -- iter: 12224/20250


Training Step: 826  | total loss: [1m[32m0.65639[0m[0m | time: 48.390s
[2K| Adam | epoch: 003 | loss: 0.65639 - acc: 0.5990 -- iter: 12288/20250


Training Step: 827  | total loss: [1m[32m0.65280[0m[0m | time: 48.661s
[2K| Adam | epoch: 003 | loss: 0.65280 - acc: 0.6063 -- iter: 12352/20250


Training Step: 828  | total loss: [1m[32m0.65446[0m[0m | time: 48.910s
[2K| Adam | epoch: 003 | loss: 0.65446 - acc: 0.6081 -- iter: 12416/20250


Training Step: 829  | total loss: [1m[32m0.66169[0m[0m | time: 49.161s
[2K| Adam | epoch: 003 | loss: 0.66169 - acc: 0.5973 -- iter: 12480/20250


Training Step: 830  | total loss: [1m[32m0.66700[0m[0m | time: 49.411s
[2K| Adam | epoch: 003 | loss: 0.66700 - acc: 0.5876 -- iter: 12544/20250


Training Step: 831  | total loss: [1m[32m0.66025[0m[0m | time: 49.656s
[2K| Adam | epoch: 003 | loss: 0.66025 - acc: 0.5929 -- iter: 12608/20250


Training Step: 832  | total loss: [1m[32m0.65698[0m[0m | time: 49.901s
[2K| Adam | epoch: 003 | loss: 0.65698 - acc: 0.5992 -- iter: 12672/20250


Training Step: 833  | total loss: [1m[32m0.66054[0m[0m | time: 50.147s
[2K| Adam | epoch: 003 | loss: 0.66054 - acc: 0.5956 -- iter: 12736/20250


Training Step: 834  | total loss: [1m[32m0.65995[0m[0m | time: 50.401s
[2K| Adam | epoch: 003 | loss: 0.65995 - acc: 0.5985 -- iter: 12800/20250


Training Step: 835  | total loss: [1m[32m0.66400[0m[0m | time: 50.652s
[2K| Adam | epoch: 003 | loss: 0.66400 - acc: 0.5902 -- iter: 12864/20250


Training Step: 836  | total loss: [1m[32m0.66219[0m[0m | time: 50.902s
[2K| Adam | epoch: 003 | loss: 0.66219 - acc: 0.5843 -- iter: 12928/20250


Training Step: 837  | total loss: [1m[32m0.66074[0m[0m | time: 51.148s
[2K| Adam | epoch: 003 | loss: 0.66074 - acc: 0.5806 -- iter: 12992/20250


Training Step: 838  | total loss: [1m[32m0.65356[0m[0m | time: 51.425s
[2K| Adam | epoch: 003 | loss: 0.65356 - acc: 0.5913 -- iter: 13056/20250


Training Step: 839  | total loss: [1m[32m0.65538[0m[0m | time: 51.672s
[2K| Adam | epoch: 003 | loss: 0.65538 - acc: 0.5915 -- iter: 13120/20250


Training Step: 840  | total loss: [1m[32m0.66265[0m[0m | time: 51.933s
[2K| Adam | epoch: 003 | loss: 0.66265 - acc: 0.5839 -- iter: 13184/20250


Training Step: 841  | total loss: [1m[32m0.66502[0m[0m | time: 52.182s
[2K| Adam | epoch: 003 | loss: 0.66502 - acc: 0.5802 -- iter: 13248/20250


Training Step: 842  | total loss: [1m[32m0.66620[0m[0m | time: 52.425s
[2K| Adam | epoch: 003 | loss: 0.66620 - acc: 0.5785 -- iter: 13312/20250


Training Step: 843  | total loss: [1m[32m0.66403[0m[0m | time: 52.669s
[2K| Adam | epoch: 003 | loss: 0.66403 - acc: 0.5769 -- iter: 13376/20250


Training Step: 844  | total loss: [1m[32m0.66476[0m[0m | time: 52.907s
[2K| Adam | epoch: 003 | loss: 0.66476 - acc: 0.5739 -- iter: 13440/20250


Training Step: 845  | total loss: [1m[32m0.66585[0m[0m | time: 53.154s
[2K| Adam | epoch: 003 | loss: 0.66585 - acc: 0.5665 -- iter: 13504/20250


Training Step: 846  | total loss: [1m[32m0.66846[0m[0m | time: 53.404s
[2K| Adam | epoch: 003 | loss: 0.66846 - acc: 0.5692 -- iter: 13568/20250


Training Step: 847  | total loss: [1m[32m0.66468[0m[0m | time: 53.659s
[2K| Adam | epoch: 003 | loss: 0.66468 - acc: 0.5763 -- iter: 13632/20250


Training Step: 848  | total loss: [1m[32m0.66864[0m[0m | time: 53.907s
[2K| Adam | epoch: 003 | loss: 0.66864 - acc: 0.5750 -- iter: 13696/20250


Training Step: 849  | total loss: [1m[32m0.66768[0m[0m | time: 54.155s
[2K| Adam | epoch: 003 | loss: 0.66768 - acc: 0.5737 -- iter: 13760/20250


Training Step: 850  | total loss: [1m[32m0.66292[0m[0m | time: 54.404s
[2K| Adam | epoch: 003 | loss: 0.66292 - acc: 0.5773 -- iter: 13824/20250


Training Step: 851  | total loss: [1m[32m0.66400[0m[0m | time: 54.662s
[2K| Adam | epoch: 003 | loss: 0.66400 - acc: 0.5758 -- iter: 13888/20250


Training Step: 852  | total loss: [1m[32m0.66413[0m[0m | time: 54.904s
[2K| Adam | epoch: 003 | loss: 0.66413 - acc: 0.5729 -- iter: 13952/20250


Training Step: 853  | total loss: [1m[32m0.67024[0m[0m | time: 55.146s
[2K| Adam | epoch: 003 | loss: 0.67024 - acc: 0.5562 -- iter: 14016/20250


Training Step: 854  | total loss: [1m[32m0.66822[0m[0m | time: 55.397s
[2K| Adam | epoch: 003 | loss: 0.66822 - acc: 0.5631 -- iter: 14080/20250


Training Step: 855  | total loss: [1m[32m0.66868[0m[0m | time: 55.641s
[2K| Adam | epoch: 003 | loss: 0.66868 - acc: 0.5615 -- iter: 14144/20250


Training Step: 856  | total loss: [1m[32m0.67161[0m[0m | time: 55.889s
[2K| Adam | epoch: 003 | loss: 0.67161 - acc: 0.5491 -- iter: 14208/20250


Training Step: 857  | total loss: [1m[32m0.66783[0m[0m | time: 56.143s
[2K| Adam | epoch: 003 | loss: 0.66783 - acc: 0.5598 -- iter: 14272/20250


Training Step: 858  | total loss: [1m[32m0.65901[0m[0m | time: 56.402s
[2K| Adam | epoch: 003 | loss: 0.65901 - acc: 0.5710 -- iter: 14336/20250


Training Step: 859  | total loss: [1m[32m0.65933[0m[0m | time: 56.647s
[2K| Adam | epoch: 003 | loss: 0.65933 - acc: 0.5655 -- iter: 14400/20250


Training Step: 860  | total loss: [1m[32m0.65620[0m[0m | time: 56.891s
[2K| Adam | epoch: 003 | loss: 0.65620 - acc: 0.5652 -- iter: 14464/20250


Training Step: 861  | total loss: [1m[32m0.65453[0m[0m | time: 57.137s
[2K| Adam | epoch: 003 | loss: 0.65453 - acc: 0.5696 -- iter: 14528/20250


Training Step: 862  | total loss: [1m[32m0.66334[0m[0m | time: 57.382s
[2K| Adam | epoch: 003 | loss: 0.66334 - acc: 0.5595 -- iter: 14592/20250


Training Step: 863  | total loss: [1m[32m0.66095[0m[0m | time: 57.636s
[2K| Adam | epoch: 003 | loss: 0.66095 - acc: 0.5567 -- iter: 14656/20250


Training Step: 864  | total loss: [1m[32m0.66328[0m[0m | time: 57.880s
[2K| Adam | epoch: 003 | loss: 0.66328 - acc: 0.5463 -- iter: 14720/20250


Training Step: 865  | total loss: [1m[32m0.66614[0m[0m | time: 58.126s
[2K| Adam | epoch: 003 | loss: 0.66614 - acc: 0.5464 -- iter: 14784/20250


Training Step: 866  | total loss: [1m[32m0.66287[0m[0m | time: 58.378s
[2K| Adam | epoch: 003 | loss: 0.66287 - acc: 0.5496 -- iter: 14848/20250


Training Step: 867  | total loss: [1m[32m0.65894[0m[0m | time: 58.632s
[2K| Adam | epoch: 003 | loss: 0.65894 - acc: 0.5509 -- iter: 14912/20250


Training Step: 868  | total loss: [1m[32m0.66183[0m[0m | time: 58.899s
[2K| Adam | epoch: 003 | loss: 0.66183 - acc: 0.5458 -- iter: 14976/20250


Training Step: 869  | total loss: [1m[32m0.66049[0m[0m | time: 59.159s
[2K| Adam | epoch: 003 | loss: 0.66049 - acc: 0.5474 -- iter: 15040/20250


Training Step: 870  | total loss: [1m[32m0.66328[0m[0m | time: 59.413s
[2K| Adam | epoch: 003 | loss: 0.66328 - acc: 0.5380 -- iter: 15104/20250


Training Step: 871  | total loss: [1m[32m0.66363[0m[0m | time: 59.669s
[2K| Adam | epoch: 003 | loss: 0.66363 - acc: 0.5420 -- iter: 15168/20250


Training Step: 872  | total loss: [1m[32m0.65883[0m[0m | time: 59.918s
[2K| Adam | epoch: 003 | loss: 0.65883 - acc: 0.5456 -- iter: 15232/20250


Training Step: 873  | total loss: [1m[32m0.66134[0m[0m | time: 60.165s
[2K| Adam | epoch: 003 | loss: 0.66134 - acc: 0.5489 -- iter: 15296/20250


Training Step: 874  | total loss: [1m[32m0.65612[0m[0m | time: 60.416s
[2K| Adam | epoch: 003 | loss: 0.65612 - acc: 0.5627 -- iter: 15360/20250


Training Step: 875  | total loss: [1m[32m0.65892[0m[0m | time: 60.670s
[2K| Adam | epoch: 003 | loss: 0.65892 - acc: 0.5549 -- iter: 15424/20250


Training Step: 876  | total loss: [1m[32m0.65808[0m[0m | time: 60.923s
[2K| Adam | epoch: 003 | loss: 0.65808 - acc: 0.5557 -- iter: 15488/20250


Training Step: 877  | total loss: [1m[32m0.65659[0m[0m | time: 61.171s
[2K| Adam | epoch: 003 | loss: 0.65659 - acc: 0.5579 -- iter: 15552/20250


Training Step: 878  | total loss: [1m[32m0.65378[0m[0m | time: 61.425s
[2K| Adam | epoch: 003 | loss: 0.65378 - acc: 0.5646 -- iter: 15616/20250


Training Step: 879  | total loss: [1m[32m0.65446[0m[0m | time: 61.663s
[2K| Adam | epoch: 003 | loss: 0.65446 - acc: 0.5582 -- iter: 15680/20250


Training Step: 880  | total loss: [1m[32m0.65480[0m[0m | time: 61.937s
[2K| Adam | epoch: 003 | loss: 0.65480 - acc: 0.5586 -- iter: 15744/20250


Training Step: 881  | total loss: [1m[32m0.65370[0m[0m | time: 62.190s
[2K| Adam | epoch: 003 | loss: 0.65370 - acc: 0.5527 -- iter: 15808/20250


Training Step: 882  | total loss: [1m[32m0.64925[0m[0m | time: 62.430s
[2K| Adam | epoch: 003 | loss: 0.64925 - acc: 0.5646 -- iter: 15872/20250


Training Step: 883  | total loss: [1m[32m0.65183[0m[0m | time: 62.673s
[2K| Adam | epoch: 003 | loss: 0.65183 - acc: 0.5644 -- iter: 15936/20250


Training Step: 884  | total loss: [1m[32m0.65396[0m[0m | time: 62.930s
[2K| Adam | epoch: 003 | loss: 0.65396 - acc: 0.5611 -- iter: 16000/20250


Training Step: 885  | total loss: [1m[32m0.65263[0m[0m | time: 63.175s
[2K| Adam | epoch: 003 | loss: 0.65263 - acc: 0.5675 -- iter: 16064/20250


Training Step: 886  | total loss: [1m[32m0.65022[0m[0m | time: 63.417s
[2K| Adam | epoch: 003 | loss: 0.65022 - acc: 0.5717 -- iter: 16128/20250


Training Step: 887  | total loss: [1m[32m0.65304[0m[0m | time: 63.674s
[2K| Adam | epoch: 003 | loss: 0.65304 - acc: 0.5676 -- iter: 16192/20250


Training Step: 888  | total loss: [1m[32m0.65437[0m[0m | time: 63.928s
[2K| Adam | epoch: 003 | loss: 0.65437 - acc: 0.5734 -- iter: 16256/20250


Training Step: 889  | total loss: [1m[32m0.65057[0m[0m | time: 64.180s
[2K| Adam | epoch: 003 | loss: 0.65057 - acc: 0.5879 -- iter: 16320/20250


Training Step: 890  | total loss: [1m[32m0.65285[0m[0m | time: 64.428s
[2K| Adam | epoch: 003 | loss: 0.65285 - acc: 0.5807 -- iter: 16384/20250


Training Step: 891  | total loss: [1m[32m0.65611[0m[0m | time: 64.677s
[2K| Adam | epoch: 003 | loss: 0.65611 - acc: 0.5773 -- iter: 16448/20250


Training Step: 892  | total loss: [1m[32m0.65387[0m[0m | time: 64.922s
[2K| Adam | epoch: 003 | loss: 0.65387 - acc: 0.5868 -- iter: 16512/20250


Training Step: 893  | total loss: [1m[32m0.64845[0m[0m | time: 65.170s
[2K| Adam | epoch: 003 | loss: 0.64845 - acc: 0.6031 -- iter: 16576/20250


Training Step: 894  | total loss: [1m[32m0.65162[0m[0m | time: 65.418s
[2K| Adam | epoch: 003 | loss: 0.65162 - acc: 0.6006 -- iter: 16640/20250


Training Step: 895  | total loss: [1m[32m0.64841[0m[0m | time: 65.673s
[2K| Adam | epoch: 003 | loss: 0.64841 - acc: 0.6030 -- iter: 16704/20250


Training Step: 896  | total loss: [1m[32m0.63799[0m[0m | time: 65.929s
[2K| Adam | epoch: 003 | loss: 0.63799 - acc: 0.6162 -- iter: 16768/20250


Training Step: 897  | total loss: [1m[32m0.62784[0m[0m | time: 66.177s
[2K| Adam | epoch: 003 | loss: 0.62784 - acc: 0.6311 -- iter: 16832/20250


Training Step: 898  | total loss: [1m[32m0.62231[0m[0m | time: 66.425s
[2K| Adam | epoch: 003 | loss: 0.62231 - acc: 0.6461 -- iter: 16896/20250


Training Step: 899  | total loss: [1m[32m0.61769[0m[0m | time: 66.672s
[2K| Adam | epoch: 003 | loss: 0.61769 - acc: 0.6565 -- iter: 16960/20250


Training Step: 900  | total loss: [1m[32m0.63389[0m[0m | time: 66.930s
[2K| Adam | epoch: 003 | loss: 0.63389 - acc: 0.6440 -- iter: 17024/20250


Training Step: 901  | total loss: [1m[32m0.64525[0m[0m | time: 67.179s
[2K| Adam | epoch: 003 | loss: 0.64525 - acc: 0.6390 -- iter: 17088/20250


Training Step: 902  | total loss: [1m[32m0.64099[0m[0m | time: 67.416s
[2K| Adam | epoch: 003 | loss: 0.64099 - acc: 0.6438 -- iter: 17152/20250


Training Step: 903  | total loss: [1m[32m0.64297[0m[0m | time: 67.650s
[2K| Adam | epoch: 003 | loss: 0.64297 - acc: 0.6404 -- iter: 17216/20250


Training Step: 904  | total loss: [1m[32m0.63047[0m[0m | time: 67.896s
[2K| Adam | epoch: 003 | loss: 0.63047 - acc: 0.6545 -- iter: 17280/20250


Training Step: 905  | total loss: [1m[32m0.62053[0m[0m | time: 68.138s
[2K| Adam | epoch: 003 | loss: 0.62053 - acc: 0.6656 -- iter: 17344/20250


Training Step: 906  | total loss: [1m[32m0.62181[0m[0m | time: 68.393s
[2K| Adam | epoch: 003 | loss: 0.62181 - acc: 0.6615 -- iter: 17408/20250


Training Step: 907  | total loss: [1m[32m0.61419[0m[0m | time: 68.655s
[2K| Adam | epoch: 003 | loss: 0.61419 - acc: 0.6657 -- iter: 17472/20250


Training Step: 908  | total loss: [1m[32m0.61544[0m[0m | time: 68.908s
[2K| Adam | epoch: 003 | loss: 0.61544 - acc: 0.6600 -- iter: 17536/20250


Training Step: 909  | total loss: [1m[32m0.61138[0m[0m | time: 69.151s
[2K| Adam | epoch: 003 | loss: 0.61138 - acc: 0.6690 -- iter: 17600/20250


Training Step: 910  | total loss: [1m[32m0.62102[0m[0m | time: 69.392s
[2K| Adam | epoch: 003 | loss: 0.62102 - acc: 0.6568 -- iter: 17664/20250


Training Step: 911  | total loss: [1m[32m0.62023[0m[0m | time: 69.635s
[2K| Adam | epoch: 003 | loss: 0.62023 - acc: 0.6583 -- iter: 17728/20250


Training Step: 912  | total loss: [1m[32m0.62452[0m[0m | time: 69.882s
[2K| Adam | epoch: 003 | loss: 0.62452 - acc: 0.6581 -- iter: 17792/20250


Training Step: 913  | total loss: [1m[32m0.62574[0m[0m | time: 70.135s
[2K| Adam | epoch: 003 | loss: 0.62574 - acc: 0.6626 -- iter: 17856/20250


Training Step: 914  | total loss: [1m[32m0.62721[0m[0m | time: 70.385s
[2K| Adam | epoch: 003 | loss: 0.62721 - acc: 0.6542 -- iter: 17920/20250


Training Step: 915  | total loss: [1m[32m0.62963[0m[0m | time: 70.632s
[2K| Adam | epoch: 003 | loss: 0.62963 - acc: 0.6513 -- iter: 17984/20250


Training Step: 916  | total loss: [1m[32m0.63192[0m[0m | time: 70.874s
[2K| Adam | epoch: 003 | loss: 0.63192 - acc: 0.6408 -- iter: 18048/20250


Training Step: 917  | total loss: [1m[32m0.63186[0m[0m | time: 71.137s
[2K| Adam | epoch: 003 | loss: 0.63186 - acc: 0.6408 -- iter: 18112/20250


Training Step: 918  | total loss: [1m[32m0.62883[0m[0m | time: 71.388s
[2K| Adam | epoch: 003 | loss: 0.62883 - acc: 0.6423 -- iter: 18176/20250


Training Step: 919  | total loss: [1m[32m0.63299[0m[0m | time: 71.638s
[2K| Adam | epoch: 003 | loss: 0.63299 - acc: 0.6344 -- iter: 18240/20250


Training Step: 920  | total loss: [1m[32m0.62993[0m[0m | time: 71.886s
[2K| Adam | epoch: 003 | loss: 0.62993 - acc: 0.6397 -- iter: 18304/20250


Training Step: 921  | total loss: [1m[32m0.63155[0m[0m | time: 72.140s
[2K| Adam | epoch: 003 | loss: 0.63155 - acc: 0.6366 -- iter: 18368/20250


Training Step: 922  | total loss: [1m[32m0.64827[0m[0m | time: 72.389s
[2K| Adam | epoch: 003 | loss: 0.64827 - acc: 0.6308 -- iter: 18432/20250


Training Step: 923  | total loss: [1m[32m0.63946[0m[0m | time: 72.642s
[2K| Adam | epoch: 003 | loss: 0.63946 - acc: 0.6349 -- iter: 18496/20250


Training Step: 924  | total loss: [1m[32m0.63586[0m[0m | time: 72.892s
[2K| Adam | epoch: 003 | loss: 0.63586 - acc: 0.6402 -- iter: 18560/20250


Training Step: 925  | total loss: [1m[32m0.62996[0m[0m | time: 73.134s
[2K| Adam | epoch: 003 | loss: 0.62996 - acc: 0.6355 -- iter: 18624/20250


Training Step: 926  | total loss: [1m[32m0.62753[0m[0m | time: 73.392s
[2K| Adam | epoch: 003 | loss: 0.62753 - acc: 0.6329 -- iter: 18688/20250


Training Step: 927  | total loss: [1m[32m0.62578[0m[0m | time: 73.634s
[2K| Adam | epoch: 003 | loss: 0.62578 - acc: 0.6337 -- iter: 18752/20250


Training Step: 928  | total loss: [1m[32m0.61939[0m[0m | time: 73.884s
[2K| Adam | epoch: 003 | loss: 0.61939 - acc: 0.6453 -- iter: 18816/20250


Training Step: 929  | total loss: [1m[32m0.62179[0m[0m | time: 74.125s
[2K| Adam | epoch: 003 | loss: 0.62179 - acc: 0.6417 -- iter: 18880/20250


Training Step: 930  | total loss: [1m[32m0.62059[0m[0m | time: 74.369s
[2K| Adam | epoch: 003 | loss: 0.62059 - acc: 0.6385 -- iter: 18944/20250


Training Step: 931  | total loss: [1m[32m0.61706[0m[0m | time: 74.611s
[2K| Adam | epoch: 003 | loss: 0.61706 - acc: 0.6356 -- iter: 19008/20250


Training Step: 932  | total loss: [1m[32m0.60848[0m[0m | time: 74.861s
[2K| Adam | epoch: 003 | loss: 0.60848 - acc: 0.6361 -- iter: 19072/20250


Training Step: 933  | total loss: [1m[32m0.60914[0m[0m | time: 75.111s
[2K| Adam | epoch: 003 | loss: 0.60914 - acc: 0.6318 -- iter: 19136/20250


Training Step: 934  | total loss: [1m[32m0.59918[0m[0m | time: 75.351s
[2K| Adam | epoch: 003 | loss: 0.59918 - acc: 0.6405 -- iter: 19200/20250


Training Step: 935  | total loss: [1m[32m0.59359[0m[0m | time: 75.598s
[2K| Adam | epoch: 003 | loss: 0.59359 - acc: 0.6437 -- iter: 19264/20250


Training Step: 936  | total loss: [1m[32m0.60585[0m[0m | time: 75.843s
[2K| Adam | epoch: 003 | loss: 0.60585 - acc: 0.6277 -- iter: 19328/20250


Training Step: 937  | total loss: [1m[32m0.61091[0m[0m | time: 76.108s
[2K| Adam | epoch: 003 | loss: 0.61091 - acc: 0.6275 -- iter: 19392/20250


Training Step: 938  | total loss: [1m[32m0.61890[0m[0m | time: 76.360s
[2K| Adam | epoch: 003 | loss: 0.61890 - acc: 0.6225 -- iter: 19456/20250


Training Step: 939  | total loss: [1m[32m0.62800[0m[0m | time: 76.608s
[2K| Adam | epoch: 003 | loss: 0.62800 - acc: 0.6181 -- iter: 19520/20250


Training Step: 940  | total loss: [1m[32m0.62522[0m[0m | time: 76.854s
[2K| Adam | epoch: 003 | loss: 0.62522 - acc: 0.6235 -- iter: 19584/20250


Training Step: 941  | total loss: [1m[32m0.62563[0m[0m | time: 77.104s
[2K| Adam | epoch: 003 | loss: 0.62563 - acc: 0.6314 -- iter: 19648/20250


Training Step: 942  | total loss: [1m[32m0.62294[0m[0m | time: 77.349s
[2K| Adam | epoch: 003 | loss: 0.62294 - acc: 0.6339 -- iter: 19712/20250


Training Step: 943  | total loss: [1m[32m0.61467[0m[0m | time: 77.599s
[2K| Adam | epoch: 003 | loss: 0.61467 - acc: 0.6424 -- iter: 19776/20250


Training Step: 944  | total loss: [1m[32m0.60987[0m[0m | time: 77.856s
[2K| Adam | epoch: 003 | loss: 0.60987 - acc: 0.6438 -- iter: 19840/20250


Training Step: 945  | total loss: [1m[32m0.60231[0m[0m | time: 78.104s
[2K| Adam | epoch: 003 | loss: 0.60231 - acc: 0.6482 -- iter: 19904/20250


Training Step: 946  | total loss: [1m[32m0.59816[0m[0m | time: 78.366s
[2K| Adam | epoch: 003 | loss: 0.59816 - acc: 0.6505 -- iter: 19968/20250


Training Step: 947  | total loss: [1m[32m0.60110[0m[0m | time: 78.618s
[2K| Adam | epoch: 003 | loss: 0.60110 - acc: 0.6495 -- iter: 20032/20250


Training Step: 948  | total loss: [1m[32m0.60324[0m[0m | time: 78.879s
[2K| Adam | epoch: 003 | loss: 0.60324 - acc: 0.6518 -- iter: 20096/20250


Training Step: 949  | total loss: [1m[32m0.61010[0m[0m | time: 79.123s
[2K| Adam | epoch: 003 | loss: 0.61010 - acc: 0.6491 -- iter: 20160/20250


Training Step: 950  | total loss: [1m[32m0.61439[0m[0m | time: 79.371s
[2K| Adam | epoch: 003 | loss: 0.61439 - acc: 0.6389 -- iter: 20224/20250


Training Step: 951  | total loss: [1m[32m0.60773[0m[0m | time: 81.335s
[2K| Adam | epoch: 003 | loss: 0.60773 - acc: 0.6469 | val_loss: 0.69519 - val_acc: 0.6018 -- iter: 20250/20250
--


Training Step: 952  | total loss: [1m[32m0.60347[0m[0m | time: 0.248s
[2K| Adam | epoch: 004 | loss: 0.60347 - acc: 0.6619 -- iter: 00064/20250


Training Step: 953  | total loss: [1m[32m0.60866[0m[0m | time: 0.487s
[2K| Adam | epoch: 004 | loss: 0.60866 - acc: 0.6551 -- iter: 00128/20250


Training Step: 954  | total loss: [1m[32m0.60467[0m[0m | time: 0.721s
[2K| Adam | epoch: 004 | loss: 0.60467 - acc: 0.6665 -- iter: 00192/20250


Training Step: 955  | total loss: [1m[32m0.59946[0m[0m | time: 0.972s
[2K| Adam | epoch: 004 | loss: 0.59946 - acc: 0.6806 -- iter: 00256/20250


Training Step: 956  | total loss: [1m[32m0.59333[0m[0m | time: 1.217s
[2K| Adam | epoch: 004 | loss: 0.59333 - acc: 0.6828 -- iter: 00320/20250


Training Step: 957  | total loss: [1m[32m0.59329[0m[0m | time: 1.463s
[2K| Adam | epoch: 004 | loss: 0.59329 - acc: 0.6817 -- iter: 00384/20250


Training Step: 958  | total loss: [1m[32m0.59085[0m[0m | time: 1.711s
[2K| Adam | epoch: 004 | loss: 0.59085 - acc: 0.6870 -- iter: 00448/20250


Training Step: 959  | total loss: [1m[32m0.59102[0m[0m | time: 1.979s
[2K| Adam | epoch: 004 | loss: 0.59102 - acc: 0.6839 -- iter: 00512/20250


Training Step: 960  | total loss: [1m[32m0.59328[0m[0m | time: 2.226s
[2K| Adam | epoch: 004 | loss: 0.59328 - acc: 0.6796 -- iter: 00576/20250


Training Step: 961  | total loss: [1m[32m0.59145[0m[0m | time: 2.471s
[2K| Adam | epoch: 004 | loss: 0.59145 - acc: 0.6788 -- iter: 00640/20250


Training Step: 962  | total loss: [1m[32m0.58469[0m[0m | time: 2.717s
[2K| Adam | epoch: 004 | loss: 0.58469 - acc: 0.6891 -- iter: 00704/20250


Training Step: 963  | total loss: [1m[32m0.58869[0m[0m | time: 2.965s
[2K| Adam | epoch: 004 | loss: 0.58869 - acc: 0.6936 -- iter: 00768/20250


Training Step: 964  | total loss: [1m[32m0.58389[0m[0m | time: 3.211s
[2K| Adam | epoch: 004 | loss: 0.58389 - acc: 0.6992 -- iter: 00832/20250


Training Step: 965  | total loss: [1m[32m0.58284[0m[0m | time: 3.465s
[2K| Adam | epoch: 004 | loss: 0.58284 - acc: 0.6965 -- iter: 00896/20250


Training Step: 966  | total loss: [1m[32m0.56847[0m[0m | time: 3.709s
[2K| Adam | epoch: 004 | loss: 0.56847 - acc: 0.7081 -- iter: 00960/20250


Training Step: 967  | total loss: [1m[32m0.56538[0m[0m | time: 3.961s
[2K| Adam | epoch: 004 | loss: 0.56538 - acc: 0.7107 -- iter: 01024/20250


Training Step: 968  | total loss: [1m[32m0.56477[0m[0m | time: 4.207s
[2K| Adam | epoch: 004 | loss: 0.56477 - acc: 0.7100 -- iter: 01088/20250


Training Step: 969  | total loss: [1m[32m0.57367[0m[0m | time: 4.472s
[2K| Adam | epoch: 004 | loss: 0.57367 - acc: 0.7015 -- iter: 01152/20250


Training Step: 970  | total loss: [1m[32m0.57966[0m[0m | time: 4.731s
[2K| Adam | epoch: 004 | loss: 0.57966 - acc: 0.6954 -- iter: 01216/20250


Training Step: 971  | total loss: [1m[32m0.58014[0m[0m | time: 4.985s
[2K| Adam | epoch: 004 | loss: 0.58014 - acc: 0.6868 -- iter: 01280/20250


Training Step: 972  | total loss: [1m[32m0.58472[0m[0m | time: 5.232s
[2K| Adam | epoch: 004 | loss: 0.58472 - acc: 0.6775 -- iter: 01344/20250


Training Step: 973  | total loss: [1m[32m0.59326[0m[0m | time: 5.480s
[2K| Adam | epoch: 004 | loss: 0.59326 - acc: 0.6644 -- iter: 01408/20250


Training Step: 974  | total loss: [1m[32m0.61147[0m[0m | time: 5.725s
[2K| Adam | epoch: 004 | loss: 0.61147 - acc: 0.6542 -- iter: 01472/20250


Training Step: 975  | total loss: [1m[32m0.61337[0m[0m | time: 5.974s
[2K| Adam | epoch: 004 | loss: 0.61337 - acc: 0.6497 -- iter: 01536/20250


Training Step: 976  | total loss: [1m[32m0.61594[0m[0m | time: 6.221s
[2K| Adam | epoch: 004 | loss: 0.61594 - acc: 0.6410 -- iter: 01600/20250


Training Step: 977  | total loss: [1m[32m0.62202[0m[0m | time: 6.465s
[2K| Adam | epoch: 004 | loss: 0.62202 - acc: 0.6441 -- iter: 01664/20250


Training Step: 978  | total loss: [1m[32m0.61780[0m[0m | time: 6.711s
[2K| Adam | epoch: 004 | loss: 0.61780 - acc: 0.6469 -- iter: 01728/20250


Training Step: 979  | total loss: [1m[32m0.62007[0m[0m | time: 6.973s
[2K| Adam | epoch: 004 | loss: 0.62007 - acc: 0.6447 -- iter: 01792/20250


Training Step: 980  | total loss: [1m[32m0.62202[0m[0m | time: 7.242s
[2K| Adam | epoch: 004 | loss: 0.62202 - acc: 0.6380 -- iter: 01856/20250


Training Step: 981  | total loss: [1m[32m0.62264[0m[0m | time: 7.494s
[2K| Adam | epoch: 004 | loss: 0.62264 - acc: 0.6352 -- iter: 01920/20250


Training Step: 982  | total loss: [1m[32m0.62829[0m[0m | time: 7.737s
[2K| Adam | epoch: 004 | loss: 0.62829 - acc: 0.6326 -- iter: 01984/20250


Training Step: 983  | total loss: [1m[32m0.63266[0m[0m | time: 7.977s
[2K| Adam | epoch: 004 | loss: 0.63266 - acc: 0.6271 -- iter: 02048/20250


Training Step: 984  | total loss: [1m[32m0.63340[0m[0m | time: 8.215s
[2K| Adam | epoch: 004 | loss: 0.63340 - acc: 0.6238 -- iter: 02112/20250


Training Step: 985  | total loss: [1m[32m0.63348[0m[0m | time: 8.471s
[2K| Adam | epoch: 004 | loss: 0.63348 - acc: 0.6302 -- iter: 02176/20250


Training Step: 986  | total loss: [1m[32m0.63810[0m[0m | time: 8.725s
[2K| Adam | epoch: 004 | loss: 0.63810 - acc: 0.6234 -- iter: 02240/20250


Training Step: 987  | total loss: [1m[32m0.63214[0m[0m | time: 8.973s
[2K| Adam | epoch: 004 | loss: 0.63214 - acc: 0.6329 -- iter: 02304/20250


Training Step: 988  | total loss: [1m[32m0.62772[0m[0m | time: 9.217s
[2K| Adam | epoch: 004 | loss: 0.62772 - acc: 0.6368 -- iter: 02368/20250


Training Step: 989  | total loss: [1m[32m0.63743[0m[0m | time: 9.484s
[2K| Adam | epoch: 004 | loss: 0.63743 - acc: 0.6294 -- iter: 02432/20250


Training Step: 990  | total loss: [1m[32m0.62898[0m[0m | time: 9.741s
[2K| Adam | epoch: 004 | loss: 0.62898 - acc: 0.6321 -- iter: 02496/20250


Training Step: 991  | total loss: [1m[32m0.63232[0m[0m | time: 9.985s
[2K| Adam | epoch: 004 | loss: 0.63232 - acc: 0.6283 -- iter: 02560/20250


Training Step: 992  | total loss: [1m[32m0.63383[0m[0m | time: 10.229s
[2K| Adam | epoch: 004 | loss: 0.63383 - acc: 0.6217 -- iter: 02624/20250


Training Step: 993  | total loss: [1m[32m0.62574[0m[0m | time: 10.478s
[2K| Adam | epoch: 004 | loss: 0.62574 - acc: 0.6298 -- iter: 02688/20250


Training Step: 994  | total loss: [1m[32m0.62718[0m[0m | time: 10.723s
[2K| Adam | epoch: 004 | loss: 0.62718 - acc: 0.6262 -- iter: 02752/20250


Training Step: 995  | total loss: [1m[32m0.62589[0m[0m | time: 10.969s
[2K| Adam | epoch: 004 | loss: 0.62589 - acc: 0.6277 -- iter: 02816/20250


Training Step: 996  | total loss: [1m[32m0.63371[0m[0m | time: 11.214s
[2K| Adam | epoch: 004 | loss: 0.63371 - acc: 0.6118 -- iter: 02880/20250


Training Step: 997  | total loss: [1m[32m0.63437[0m[0m | time: 11.460s
[2K| Adam | epoch: 004 | loss: 0.63437 - acc: 0.6068 -- iter: 02944/20250


Training Step: 998  | total loss: [1m[32m0.63163[0m[0m | time: 11.702s
[2K| Adam | epoch: 004 | loss: 0.63163 - acc: 0.6087 -- iter: 03008/20250


Training Step: 999  | total loss: [1m[32m0.62805[0m[0m | time: 11.970s
[2K| Adam | epoch: 004 | loss: 0.62805 - acc: 0.6040 -- iter: 03072/20250


Training Step: 1000  | total loss: [1m[32m0.62574[0m[0m | time: 12.243s
[2K| Adam | epoch: 004 | loss: 0.62574 - acc: 0.6061 -- iter: 03136/20250


Training Step: 1001  | total loss: [1m[32m0.62472[0m[0m | time: 12.490s
[2K| Adam | epoch: 004 | loss: 0.62472 - acc: 0.5971 -- iter: 03200/20250


Training Step: 1002  | total loss: [1m[32m0.61769[0m[0m | time: 12.740s
[2K| Adam | epoch: 004 | loss: 0.61769 - acc: 0.6093 -- iter: 03264/20250


Training Step: 1003  | total loss: [1m[32m0.61696[0m[0m | time: 12.982s
[2K| Adam | epoch: 004 | loss: 0.61696 - acc: 0.6046 -- iter: 03328/20250


Training Step: 1004  | total loss: [1m[32m0.62025[0m[0m | time: 13.227s
[2K| Adam | epoch: 004 | loss: 0.62025 - acc: 0.6004 -- iter: 03392/20250


Training Step: 1005  | total loss: [1m[32m0.62299[0m[0m | time: 13.467s
[2K| Adam | epoch: 004 | loss: 0.62299 - acc: 0.5903 -- iter: 03456/20250


Training Step: 1006  | total loss: [1m[32m0.62485[0m[0m | time: 13.708s
[2K| Adam | epoch: 004 | loss: 0.62485 - acc: 0.5860 -- iter: 03520/20250


Training Step: 1007  | total loss: [1m[32m0.62497[0m[0m | time: 13.952s
[2K| Adam | epoch: 004 | loss: 0.62497 - acc: 0.5852 -- iter: 03584/20250


Training Step: 1008  | total loss: [1m[32m0.62601[0m[0m | time: 14.197s
[2K| Adam | epoch: 004 | loss: 0.62601 - acc: 0.5892 -- iter: 03648/20250


Training Step: 1009  | total loss: [1m[32m0.62031[0m[0m | time: 14.475s
[2K| Adam | epoch: 004 | loss: 0.62031 - acc: 0.5990 -- iter: 03712/20250


Training Step: 1010  | total loss: [1m[32m0.62306[0m[0m | time: 14.751s
[2K| Adam | epoch: 004 | loss: 0.62306 - acc: 0.6032 -- iter: 03776/20250


Training Step: 1011  | total loss: [1m[32m0.62137[0m[0m | time: 14.997s
[2K| Adam | epoch: 004 | loss: 0.62137 - acc: 0.6100 -- iter: 03840/20250


Training Step: 1012  | total loss: [1m[32m0.61624[0m[0m | time: 15.234s
[2K| Adam | epoch: 004 | loss: 0.61624 - acc: 0.6131 -- iter: 03904/20250


Training Step: 1013  | total loss: [1m[32m0.61455[0m[0m | time: 15.475s
[2K| Adam | epoch: 004 | loss: 0.61455 - acc: 0.6112 -- iter: 03968/20250


Training Step: 1014  | total loss: [1m[32m0.61129[0m[0m | time: 15.720s
[2K| Adam | epoch: 004 | loss: 0.61129 - acc: 0.6266 -- iter: 04032/20250


Training Step: 1015  | total loss: [1m[32m0.60241[0m[0m | time: 15.965s
[2K| Adam | epoch: 004 | loss: 0.60241 - acc: 0.6327 -- iter: 04096/20250


Training Step: 1016  | total loss: [1m[32m0.60423[0m[0m | time: 16.211s
[2K| Adam | epoch: 004 | loss: 0.60423 - acc: 0.6319 -- iter: 04160/20250


Training Step: 1017  | total loss: [1m[32m0.59937[0m[0m | time: 16.461s
[2K| Adam | epoch: 004 | loss: 0.59937 - acc: 0.6344 -- iter: 04224/20250


Training Step: 1018  | total loss: [1m[32m0.59968[0m[0m | time: 16.708s
[2K| Adam | epoch: 004 | loss: 0.59968 - acc: 0.6319 -- iter: 04288/20250


Training Step: 1019  | total loss: [1m[32m0.60082[0m[0m | time: 16.982s
[2K| Adam | epoch: 004 | loss: 0.60082 - acc: 0.6359 -- iter: 04352/20250


Training Step: 1020  | total loss: [1m[32m0.60740[0m[0m | time: 17.267s
[2K| Adam | epoch: 004 | loss: 0.60740 - acc: 0.6363 -- iter: 04416/20250


Training Step: 1021  | total loss: [1m[32m0.60515[0m[0m | time: 17.519s
[2K| Adam | epoch: 004 | loss: 0.60515 - acc: 0.6352 -- iter: 04480/20250


Training Step: 1022  | total loss: [1m[32m0.60240[0m[0m | time: 17.766s
[2K| Adam | epoch: 004 | loss: 0.60240 - acc: 0.6389 -- iter: 04544/20250


Training Step: 1023  | total loss: [1m[32m0.60299[0m[0m | time: 18.016s
[2K| Adam | epoch: 004 | loss: 0.60299 - acc: 0.6390 -- iter: 04608/20250


Training Step: 1024  | total loss: [1m[32m0.60633[0m[0m | time: 18.269s
[2K| Adam | epoch: 004 | loss: 0.60633 - acc: 0.6408 -- iter: 04672/20250


Training Step: 1025  | total loss: [1m[32m0.60234[0m[0m | time: 18.518s
[2K| Adam | epoch: 004 | loss: 0.60234 - acc: 0.6470 -- iter: 04736/20250


Training Step: 1026  | total loss: [1m[32m0.60167[0m[0m | time: 18.768s
[2K| Adam | epoch: 004 | loss: 0.60167 - acc: 0.6557 -- iter: 04800/20250


Training Step: 1027  | total loss: [1m[32m0.59614[0m[0m | time: 19.016s
[2K| Adam | epoch: 004 | loss: 0.59614 - acc: 0.6683 -- iter: 04864/20250


Training Step: 1028  | total loss: [1m[32m0.60204[0m[0m | time: 19.290s
[2K| Adam | epoch: 004 | loss: 0.60204 - acc: 0.6562 -- iter: 04928/20250


Training Step: 1029  | total loss: [1m[32m0.61171[0m[0m | time: 19.538s
[2K| Adam | epoch: 004 | loss: 0.61171 - acc: 0.6421 -- iter: 04992/20250


Training Step: 1030  | total loss: [1m[32m0.60889[0m[0m | time: 19.803s
[2K| Adam | epoch: 004 | loss: 0.60889 - acc: 0.6435 -- iter: 05056/20250


Training Step: 1031  | total loss: [1m[32m0.60674[0m[0m | time: 20.055s
[2K| Adam | epoch: 004 | loss: 0.60674 - acc: 0.6448 -- iter: 05120/20250


Training Step: 1032  | total loss: [1m[32m0.60507[0m[0m | time: 20.296s
[2K| Adam | epoch: 004 | loss: 0.60507 - acc: 0.6491 -- iter: 05184/20250


Training Step: 1033  | total loss: [1m[32m0.59558[0m[0m | time: 20.547s
[2K| Adam | epoch: 004 | loss: 0.59558 - acc: 0.6623 -- iter: 05248/20250


Training Step: 1034  | total loss: [1m[32m0.59716[0m[0m | time: 20.802s
[2K| Adam | epoch: 004 | loss: 0.59716 - acc: 0.6648 -- iter: 05312/20250


Training Step: 1035  | total loss: [1m[32m0.60204[0m[0m | time: 21.049s
[2K| Adam | epoch: 004 | loss: 0.60204 - acc: 0.6655 -- iter: 05376/20250


Training Step: 1036  | total loss: [1m[32m0.60256[0m[0m | time: 21.297s
[2K| Adam | epoch: 004 | loss: 0.60256 - acc: 0.6630 -- iter: 05440/20250


Training Step: 1037  | total loss: [1m[32m0.60551[0m[0m | time: 21.550s
[2K| Adam | epoch: 004 | loss: 0.60551 - acc: 0.6592 -- iter: 05504/20250


Training Step: 1038  | total loss: [1m[32m0.60550[0m[0m | time: 21.824s
[2K| Adam | epoch: 004 | loss: 0.60550 - acc: 0.6511 -- iter: 05568/20250


Training Step: 1039  | total loss: [1m[32m0.60974[0m[0m | time: 22.102s
[2K| Adam | epoch: 004 | loss: 0.60974 - acc: 0.6454 -- iter: 05632/20250


Training Step: 1040  | total loss: [1m[32m0.61051[0m[0m | time: 22.350s
[2K| Adam | epoch: 004 | loss: 0.61051 - acc: 0.6433 -- iter: 05696/20250


Training Step: 1041  | total loss: [1m[32m0.60407[0m[0m | time: 22.595s
[2K| Adam | epoch: 004 | loss: 0.60407 - acc: 0.6446 -- iter: 05760/20250


Training Step: 1042  | total loss: [1m[32m0.59819[0m[0m | time: 22.843s
[2K| Adam | epoch: 004 | loss: 0.59819 - acc: 0.6505 -- iter: 05824/20250


Training Step: 1043  | total loss: [1m[32m0.59202[0m[0m | time: 23.085s
[2K| Adam | epoch: 004 | loss: 0.59202 - acc: 0.6542 -- iter: 05888/20250


Training Step: 1044  | total loss: [1m[32m0.57806[0m[0m | time: 23.349s
[2K| Adam | epoch: 004 | loss: 0.57806 - acc: 0.6700 -- iter: 05952/20250


Training Step: 1045  | total loss: [1m[32m0.57798[0m[0m | time: 23.600s
[2K| Adam | epoch: 004 | loss: 0.57798 - acc: 0.6749 -- iter: 06016/20250


Training Step: 1046  | total loss: [1m[32m0.57855[0m[0m | time: 23.845s
[2K| Adam | epoch: 004 | loss: 0.57855 - acc: 0.6746 -- iter: 06080/20250


Training Step: 1047  | total loss: [1m[32m0.57570[0m[0m | time: 24.086s
[2K| Adam | epoch: 004 | loss: 0.57570 - acc: 0.6774 -- iter: 06144/20250


Training Step: 1048  | total loss: [1m[32m0.57102[0m[0m | time: 24.356s
[2K| Adam | epoch: 004 | loss: 0.57102 - acc: 0.6816 -- iter: 06208/20250


Training Step: 1049  | total loss: [1m[32m0.56531[0m[0m | time: 24.636s
[2K| Adam | epoch: 004 | loss: 0.56531 - acc: 0.6868 -- iter: 06272/20250


Training Step: 1050  | total loss: [1m[32m0.56222[0m[0m | time: 24.890s
[2K| Adam | epoch: 004 | loss: 0.56222 - acc: 0.6900 -- iter: 06336/20250


Training Step: 1051  | total loss: [1m[32m0.55974[0m[0m | time: 25.140s
[2K| Adam | epoch: 004 | loss: 0.55974 - acc: 0.6960 -- iter: 06400/20250


Training Step: 1052  | total loss: [1m[32m0.56638[0m[0m | time: 25.387s
[2K| Adam | epoch: 004 | loss: 0.56638 - acc: 0.6921 -- iter: 06464/20250


Training Step: 1053  | total loss: [1m[32m0.57048[0m[0m | time: 25.630s
[2K| Adam | epoch: 004 | loss: 0.57048 - acc: 0.6885 -- iter: 06528/20250


Training Step: 1054  | total loss: [1m[32m0.57739[0m[0m | time: 25.876s
[2K| Adam | epoch: 004 | loss: 0.57739 - acc: 0.6837 -- iter: 06592/20250


Training Step: 1055  | total loss: [1m[32m0.57646[0m[0m | time: 26.116s
[2K| Adam | epoch: 004 | loss: 0.57646 - acc: 0.6872 -- iter: 06656/20250


Training Step: 1056  | total loss: [1m[32m0.57555[0m[0m | time: 26.361s
[2K| Adam | epoch: 004 | loss: 0.57555 - acc: 0.6919 -- iter: 06720/20250


Training Step: 1057  | total loss: [1m[32m0.58328[0m[0m | time: 26.607s
[2K| Adam | epoch: 004 | loss: 0.58328 - acc: 0.6821 -- iter: 06784/20250


Training Step: 1058  | total loss: [1m[32m0.58668[0m[0m | time: 26.917s
[2K| Adam | epoch: 004 | loss: 0.58668 - acc: 0.6733 -- iter: 06848/20250


Training Step: 1059  | total loss: [1m[32m0.58839[0m[0m | time: 27.194s
[2K| Adam | epoch: 004 | loss: 0.58839 - acc: 0.6700 -- iter: 06912/20250


Training Step: 1060  | total loss: [1m[32m0.60005[0m[0m | time: 27.441s
[2K| Adam | epoch: 004 | loss: 0.60005 - acc: 0.6592 -- iter: 06976/20250


Training Step: 1061  | total loss: [1m[32m0.60620[0m[0m | time: 27.691s
[2K| Adam | epoch: 004 | loss: 0.60620 - acc: 0.6511 -- iter: 07040/20250


Training Step: 1062  | total loss: [1m[32m0.61178[0m[0m | time: 27.946s
[2K| Adam | epoch: 004 | loss: 0.61178 - acc: 0.6360 -- iter: 07104/20250


Training Step: 1063  | total loss: [1m[32m0.61715[0m[0m | time: 28.191s
[2K| Adam | epoch: 004 | loss: 0.61715 - acc: 0.6271 -- iter: 07168/20250


Training Step: 1064  | total loss: [1m[32m0.62671[0m[0m | time: 28.437s
[2K| Adam | epoch: 004 | loss: 0.62671 - acc: 0.6238 -- iter: 07232/20250


Training Step: 1065  | total loss: [1m[32m0.62528[0m[0m | time: 28.689s
[2K| Adam | epoch: 004 | loss: 0.62528 - acc: 0.6270 -- iter: 07296/20250


Training Step: 1066  | total loss: [1m[32m0.62696[0m[0m | time: 28.951s
[2K| Adam | epoch: 004 | loss: 0.62696 - acc: 0.6159 -- iter: 07360/20250


Training Step: 1067  | total loss: [1m[32m0.61855[0m[0m | time: 29.199s
[2K| Adam | epoch: 004 | loss: 0.61855 - acc: 0.6293 -- iter: 07424/20250


Training Step: 1068  | total loss: [1m[32m0.61188[0m[0m | time: 29.477s
[2K| Adam | epoch: 004 | loss: 0.61188 - acc: 0.6367 -- iter: 07488/20250


Training Step: 1069  | total loss: [1m[32m0.61630[0m[0m | time: 29.733s
[2K| Adam | epoch: 004 | loss: 0.61630 - acc: 0.6355 -- iter: 07552/20250


Training Step: 1070  | total loss: [1m[32m0.61932[0m[0m | time: 29.981s
[2K| Adam | epoch: 004 | loss: 0.61932 - acc: 0.6376 -- iter: 07616/20250


Training Step: 1071  | total loss: [1m[32m0.62207[0m[0m | time: 30.224s
[2K| Adam | epoch: 004 | loss: 0.62207 - acc: 0.6379 -- iter: 07680/20250


Training Step: 1072  | total loss: [1m[32m0.62538[0m[0m | time: 30.488s
[2K| Adam | epoch: 004 | loss: 0.62538 - acc: 0.6335 -- iter: 07744/20250


Training Step: 1073  | total loss: [1m[32m0.62221[0m[0m | time: 30.735s
[2K| Adam | epoch: 004 | loss: 0.62221 - acc: 0.6389 -- iter: 07808/20250


Training Step: 1074  | total loss: [1m[32m0.62801[0m[0m | time: 30.976s
[2K| Adam | epoch: 004 | loss: 0.62801 - acc: 0.6375 -- iter: 07872/20250


Training Step: 1075  | total loss: [1m[32m0.61773[0m[0m | time: 31.233s
[2K| Adam | epoch: 004 | loss: 0.61773 - acc: 0.6503 -- iter: 07936/20250


Training Step: 1076  | total loss: [1m[32m0.62208[0m[0m | time: 31.475s
[2K| Adam | epoch: 004 | loss: 0.62208 - acc: 0.6446 -- iter: 08000/20250


Training Step: 1077  | total loss: [1m[32m0.62412[0m[0m | time: 31.720s
[2K| Adam | epoch: 004 | loss: 0.62412 - acc: 0.6474 -- iter: 08064/20250


Training Step: 1078  | total loss: [1m[32m0.62758[0m[0m | time: 31.999s
[2K| Adam | epoch: 004 | loss: 0.62758 - acc: 0.6436 -- iter: 08128/20250


Training Step: 1079  | total loss: [1m[32m0.62132[0m[0m | time: 32.283s
[2K| Adam | epoch: 004 | loss: 0.62132 - acc: 0.6511 -- iter: 08192/20250


Training Step: 1080  | total loss: [1m[32m0.61773[0m[0m | time: 32.521s
[2K| Adam | epoch: 004 | loss: 0.61773 - acc: 0.6500 -- iter: 08256/20250


Training Step: 1081  | total loss: [1m[32m0.61319[0m[0m | time: 32.766s
[2K| Adam | epoch: 004 | loss: 0.61319 - acc: 0.6538 -- iter: 08320/20250


Training Step: 1082  | total loss: [1m[32m0.61920[0m[0m | time: 33.012s
[2K| Adam | epoch: 004 | loss: 0.61920 - acc: 0.6447 -- iter: 08384/20250


Training Step: 1083  | total loss: [1m[32m0.61275[0m[0m | time: 33.260s
[2K| Adam | epoch: 004 | loss: 0.61275 - acc: 0.6458 -- iter: 08448/20250


Training Step: 1084  | total loss: [1m[32m0.61110[0m[0m | time: 33.504s
[2K| Adam | epoch: 004 | loss: 0.61110 - acc: 0.6437 -- iter: 08512/20250


Training Step: 1085  | total loss: [1m[32m0.60852[0m[0m | time: 33.750s
[2K| Adam | epoch: 004 | loss: 0.60852 - acc: 0.6434 -- iter: 08576/20250


Training Step: 1086  | total loss: [1m[32m0.60749[0m[0m | time: 33.994s
[2K| Adam | epoch: 004 | loss: 0.60749 - acc: 0.6510 -- iter: 08640/20250


Training Step: 1087  | total loss: [1m[32m0.61335[0m[0m | time: 34.260s
[2K| Adam | epoch: 004 | loss: 0.61335 - acc: 0.6452 -- iter: 08704/20250


Training Step: 1088  | total loss: [1m[32m0.61154[0m[0m | time: 34.510s
[2K| Adam | epoch: 004 | loss: 0.61154 - acc: 0.6463 -- iter: 08768/20250


Training Step: 1089  | total loss: [1m[32m0.60825[0m[0m | time: 34.789s
[2K| Adam | epoch: 004 | loss: 0.60825 - acc: 0.6551 -- iter: 08832/20250


Training Step: 1090  | total loss: [1m[32m0.60804[0m[0m | time: 35.028s
[2K| Adam | epoch: 004 | loss: 0.60804 - acc: 0.6537 -- iter: 08896/20250


Training Step: 1091  | total loss: [1m[32m0.60798[0m[0m | time: 35.272s
[2K| Adam | epoch: 004 | loss: 0.60798 - acc: 0.6508 -- iter: 08960/20250


Training Step: 1092  | total loss: [1m[32m0.60743[0m[0m | time: 35.518s
[2K| Adam | epoch: 004 | loss: 0.60743 - acc: 0.6482 -- iter: 09024/20250


Training Step: 1093  | total loss: [1m[32m0.60739[0m[0m | time: 35.762s
[2K| Adam | epoch: 004 | loss: 0.60739 - acc: 0.6490 -- iter: 09088/20250


Training Step: 1094  | total loss: [1m[32m0.60097[0m[0m | time: 36.006s
[2K| Adam | epoch: 004 | loss: 0.60097 - acc: 0.6576 -- iter: 09152/20250


Training Step: 1095  | total loss: [1m[32m0.60123[0m[0m | time: 36.250s
[2K| Adam | epoch: 004 | loss: 0.60123 - acc: 0.6606 -- iter: 09216/20250


Training Step: 1096  | total loss: [1m[32m0.60104[0m[0m | time: 36.496s
[2K| Adam | epoch: 004 | loss: 0.60104 - acc: 0.6617 -- iter: 09280/20250


Training Step: 1097  | total loss: [1m[32m0.60013[0m[0m | time: 36.771s
[2K| Adam | epoch: 004 | loss: 0.60013 - acc: 0.6674 -- iter: 09344/20250


Training Step: 1098  | total loss: [1m[32m0.60486[0m[0m | time: 37.025s
[2K| Adam | epoch: 004 | loss: 0.60486 - acc: 0.6616 -- iter: 09408/20250


Training Step: 1099  | total loss: [1m[32m0.60952[0m[0m | time: 37.284s
[2K| Adam | epoch: 004 | loss: 0.60952 - acc: 0.6626 -- iter: 09472/20250


Training Step: 1100  | total loss: [1m[32m0.60548[0m[0m | time: 37.527s
[2K| Adam | epoch: 004 | loss: 0.60548 - acc: 0.6667 -- iter: 09536/20250


Training Step: 1101  | total loss: [1m[32m0.61006[0m[0m | time: 37.768s
[2K| Adam | epoch: 004 | loss: 0.61006 - acc: 0.6688 -- iter: 09600/20250


Training Step: 1102  | total loss: [1m[32m0.60822[0m[0m | time: 38.014s
[2K| Adam | epoch: 004 | loss: 0.60822 - acc: 0.6691 -- iter: 09664/20250


Training Step: 1103  | total loss: [1m[32m0.60730[0m[0m | time: 38.259s
[2K| Adam | epoch: 004 | loss: 0.60730 - acc: 0.6662 -- iter: 09728/20250


Training Step: 1104  | total loss: [1m[32m0.61056[0m[0m | time: 38.503s
[2K| Adam | epoch: 004 | loss: 0.61056 - acc: 0.6637 -- iter: 09792/20250


Training Step: 1105  | total loss: [1m[32m0.61698[0m[0m | time: 38.752s
[2K| Adam | epoch: 004 | loss: 0.61698 - acc: 0.6535 -- iter: 09856/20250


Training Step: 1106  | total loss: [1m[32m0.61224[0m[0m | time: 38.993s
[2K| Adam | epoch: 004 | loss: 0.61224 - acc: 0.6616 -- iter: 09920/20250


Training Step: 1107  | total loss: [1m[32m0.60414[0m[0m | time: 39.234s
[2K| Adam | epoch: 004 | loss: 0.60414 - acc: 0.6689 -- iter: 09984/20250


Training Step: 1108  | total loss: [1m[32m0.60071[0m[0m | time: 39.482s
[2K| Adam | epoch: 004 | loss: 0.60071 - acc: 0.6755 -- iter: 10048/20250


Training Step: 1109  | total loss: [1m[32m0.59076[0m[0m | time: 39.753s
[2K| Adam | epoch: 004 | loss: 0.59076 - acc: 0.6876 -- iter: 10112/20250


Training Step: 1110  | total loss: [1m[32m0.58794[0m[0m | time: 39.989s
[2K| Adam | epoch: 004 | loss: 0.58794 - acc: 0.6876 -- iter: 10176/20250


Training Step: 1111  | total loss: [1m[32m0.58846[0m[0m | time: 40.238s
[2K| Adam | epoch: 004 | loss: 0.58846 - acc: 0.6891 -- iter: 10240/20250


Training Step: 1112  | total loss: [1m[32m0.58675[0m[0m | time: 40.483s
[2K| Adam | epoch: 004 | loss: 0.58675 - acc: 0.6921 -- iter: 10304/20250


Training Step: 1113  | total loss: [1m[32m0.58253[0m[0m | time: 40.744s
[2K| Adam | epoch: 004 | loss: 0.58253 - acc: 0.6979 -- iter: 10368/20250


Training Step: 1114  | total loss: [1m[32m0.57968[0m[0m | time: 41.003s
[2K| Adam | epoch: 004 | loss: 0.57968 - acc: 0.6953 -- iter: 10432/20250


Training Step: 1115  | total loss: [1m[32m0.57705[0m[0m | time: 41.248s
[2K| Adam | epoch: 004 | loss: 0.57705 - acc: 0.6976 -- iter: 10496/20250


Training Step: 1116  | total loss: [1m[32m0.57120[0m[0m | time: 41.493s
[2K| Adam | epoch: 004 | loss: 0.57120 - acc: 0.7060 -- iter: 10560/20250


Training Step: 1117  | total loss: [1m[32m0.57025[0m[0m | time: 41.776s
[2K| Adam | epoch: 004 | loss: 0.57025 - acc: 0.7057 -- iter: 10624/20250


Training Step: 1118  | total loss: [1m[32m0.57056[0m[0m | time: 42.070s
[2K| Adam | epoch: 004 | loss: 0.57056 - acc: 0.7023 -- iter: 10688/20250


Training Step: 1119  | total loss: [1m[32m0.57976[0m[0m | time: 42.313s
[2K| Adam | epoch: 004 | loss: 0.57976 - acc: 0.6946 -- iter: 10752/20250


Training Step: 1120  | total loss: [1m[32m0.59110[0m[0m | time: 42.555s
[2K| Adam | epoch: 004 | loss: 0.59110 - acc: 0.6892 -- iter: 10816/20250


Training Step: 1121  | total loss: [1m[32m0.58491[0m[0m | time: 42.802s
[2K| Adam | epoch: 004 | loss: 0.58491 - acc: 0.7015 -- iter: 10880/20250


Training Step: 1122  | total loss: [1m[32m0.57447[0m[0m | time: 43.058s
[2K| Adam | epoch: 004 | loss: 0.57447 - acc: 0.7126 -- iter: 10944/20250


Training Step: 1123  | total loss: [1m[32m0.57952[0m[0m | time: 43.305s
[2K| Adam | epoch: 004 | loss: 0.57952 - acc: 0.7117 -- iter: 11008/20250


Training Step: 1124  | total loss: [1m[32m0.56664[0m[0m | time: 43.558s
[2K| Adam | epoch: 004 | loss: 0.56664 - acc: 0.7202 -- iter: 11072/20250


Training Step: 1125  | total loss: [1m[32m0.57477[0m[0m | time: 43.806s
[2K| Adam | epoch: 004 | loss: 0.57477 - acc: 0.7138 -- iter: 11136/20250


Training Step: 1126  | total loss: [1m[32m0.57405[0m[0m | time: 44.055s
[2K| Adam | epoch: 004 | loss: 0.57405 - acc: 0.7143 -- iter: 11200/20250


Training Step: 1127  | total loss: [1m[32m0.57477[0m[0m | time: 44.358s
[2K| Adam | epoch: 004 | loss: 0.57477 - acc: 0.7147 -- iter: 11264/20250


Training Step: 1128  | total loss: [1m[32m0.57786[0m[0m | time: 44.650s
[2K| Adam | epoch: 004 | loss: 0.57786 - acc: 0.7198 -- iter: 11328/20250


Training Step: 1129  | total loss: [1m[32m0.56904[0m[0m | time: 44.913s
[2K| Adam | epoch: 004 | loss: 0.56904 - acc: 0.7260 -- iter: 11392/20250


Training Step: 1130  | total loss: [1m[32m0.56896[0m[0m | time: 45.181s
[2K| Adam | epoch: 004 | loss: 0.56896 - acc: 0.7315 -- iter: 11456/20250


Training Step: 1131  | total loss: [1m[32m0.57173[0m[0m | time: 45.440s
[2K| Adam | epoch: 004 | loss: 0.57173 - acc: 0.7240 -- iter: 11520/20250


Training Step: 1132  | total loss: [1m[32m0.57272[0m[0m | time: 45.696s
[2K| Adam | epoch: 004 | loss: 0.57272 - acc: 0.7219 -- iter: 11584/20250


Training Step: 1133  | total loss: [1m[32m0.57443[0m[0m | time: 45.948s
[2K| Adam | epoch: 004 | loss: 0.57443 - acc: 0.7231 -- iter: 11648/20250


Training Step: 1134  | total loss: [1m[32m0.58240[0m[0m | time: 46.224s
[2K| Adam | epoch: 004 | loss: 0.58240 - acc: 0.7164 -- iter: 11712/20250


Training Step: 1135  | total loss: [1m[32m0.57383[0m[0m | time: 46.488s
[2K| Adam | epoch: 004 | loss: 0.57383 - acc: 0.7214 -- iter: 11776/20250


Training Step: 1136  | total loss: [1m[32m0.57907[0m[0m | time: 46.747s
[2K| Adam | epoch: 004 | loss: 0.57907 - acc: 0.7133 -- iter: 11840/20250


Training Step: 1137  | total loss: [1m[32m0.59458[0m[0m | time: 47.028s
[2K| Adam | epoch: 004 | loss: 0.59458 - acc: 0.6982 -- iter: 11904/20250


Training Step: 1138  | total loss: [1m[32m0.59266[0m[0m | time: 47.315s
[2K| Adam | epoch: 004 | loss: 0.59266 - acc: 0.7003 -- iter: 11968/20250


Training Step: 1139  | total loss: [1m[32m0.59026[0m[0m | time: 47.580s
[2K| Adam | epoch: 004 | loss: 0.59026 - acc: 0.7021 -- iter: 12032/20250


Training Step: 1140  | total loss: [1m[32m0.58930[0m[0m | time: 47.828s
[2K| Adam | epoch: 004 | loss: 0.58930 - acc: 0.7038 -- iter: 12096/20250


Training Step: 1141  | total loss: [1m[32m0.58917[0m[0m | time: 48.135s
[2K| Adam | epoch: 004 | loss: 0.58917 - acc: 0.7053 -- iter: 12160/20250


Training Step: 1142  | total loss: [1m[32m0.59153[0m[0m | time: 48.406s
[2K| Adam | epoch: 004 | loss: 0.59153 - acc: 0.7051 -- iter: 12224/20250


Training Step: 1143  | total loss: [1m[32m0.59367[0m[0m | time: 48.660s
[2K| Adam | epoch: 004 | loss: 0.59367 - acc: 0.7033 -- iter: 12288/20250


Training Step: 1144  | total loss: [1m[32m0.60233[0m[0m | time: 48.904s
[2K| Adam | epoch: 004 | loss: 0.60233 - acc: 0.6986 -- iter: 12352/20250


Training Step: 1145  | total loss: [1m[32m0.60008[0m[0m | time: 49.158s
[2K| Adam | epoch: 004 | loss: 0.60008 - acc: 0.6991 -- iter: 12416/20250


Training Step: 1146  | total loss: [1m[32m0.60021[0m[0m | time: 49.418s
[2K| Adam | epoch: 004 | loss: 0.60021 - acc: 0.6979 -- iter: 12480/20250


Training Step: 1147  | total loss: [1m[32m0.59135[0m[0m | time: 49.659s
[2K| Adam | epoch: 004 | loss: 0.59135 - acc: 0.7062 -- iter: 12544/20250


Training Step: 1148  | total loss: [1m[32m0.58910[0m[0m | time: 49.929s
[2K| Adam | epoch: 004 | loss: 0.58910 - acc: 0.7106 -- iter: 12608/20250


Training Step: 1149  | total loss: [1m[32m0.58441[0m[0m | time: 50.190s
[2K| Adam | epoch: 004 | loss: 0.58441 - acc: 0.7130 -- iter: 12672/20250


Training Step: 1150  | total loss: [1m[32m0.57802[0m[0m | time: 50.500s
[2K| Adam | epoch: 004 | loss: 0.57802 - acc: 0.7214 -- iter: 12736/20250


Training Step: 1151  | total loss: [1m[32m0.57025[0m[0m | time: 50.757s
[2K| Adam | epoch: 004 | loss: 0.57025 - acc: 0.7227 -- iter: 12800/20250


Training Step: 1152  | total loss: [1m[32m0.55917[0m[0m | time: 51.036s
[2K| Adam | epoch: 004 | loss: 0.55917 - acc: 0.7363 -- iter: 12864/20250


Training Step: 1153  | total loss: [1m[32m0.55461[0m[0m | time: 51.289s
[2K| Adam | epoch: 004 | loss: 0.55461 - acc: 0.7424 -- iter: 12928/20250


Training Step: 1154  | total loss: [1m[32m0.55193[0m[0m | time: 51.533s
[2K| Adam | epoch: 004 | loss: 0.55193 - acc: 0.7463 -- iter: 12992/20250


Training Step: 1155  | total loss: [1m[32m0.54712[0m[0m | time: 51.779s
[2K| Adam | epoch: 004 | loss: 0.54712 - acc: 0.7529 -- iter: 13056/20250


Training Step: 1156  | total loss: [1m[32m0.54571[0m[0m | time: 52.032s
[2K| Adam | epoch: 004 | loss: 0.54571 - acc: 0.7542 -- iter: 13120/20250


Training Step: 1157  | total loss: [1m[32m0.54571[0m[0m | time: 52.292s
[2K| Adam | epoch: 004 | loss: 0.54571 - acc: 0.7522 -- iter: 13184/20250


Training Step: 1158  | total loss: [1m[32m0.54474[0m[0m | time: 52.557s
[2K| Adam | epoch: 004 | loss: 0.54474 - acc: 0.7535 -- iter: 13248/20250


Training Step: 1159  | total loss: [1m[32m0.54065[0m[0m | time: 52.801s
[2K| Adam | epoch: 004 | loss: 0.54065 - acc: 0.7563 -- iter: 13312/20250


Training Step: 1160  | total loss: [1m[32m0.54455[0m[0m | time: 53.047s
[2K| Adam | epoch: 004 | loss: 0.54455 - acc: 0.7494 -- iter: 13376/20250


Training Step: 1161  | total loss: [1m[32m0.54898[0m[0m | time: 53.300s
[2K| Adam | epoch: 004 | loss: 0.54898 - acc: 0.7385 -- iter: 13440/20250


Training Step: 1162  | total loss: [1m[32m0.56057[0m[0m | time: 53.544s
[2K| Adam | epoch: 004 | loss: 0.56057 - acc: 0.7241 -- iter: 13504/20250


Training Step: 1163  | total loss: [1m[32m0.56770[0m[0m | time: 53.788s
[2K| Adam | epoch: 004 | loss: 0.56770 - acc: 0.7110 -- iter: 13568/20250


Training Step: 1164  | total loss: [1m[32m0.57917[0m[0m | time: 54.036s
[2K| Adam | epoch: 004 | loss: 0.57917 - acc: 0.6977 -- iter: 13632/20250


Training Step: 1165  | total loss: [1m[32m0.59059[0m[0m | time: 54.283s
[2K| Adam | epoch: 004 | loss: 0.59059 - acc: 0.6827 -- iter: 13696/20250


Training Step: 1166  | total loss: [1m[32m0.59774[0m[0m | time: 54.527s
[2K| Adam | epoch: 004 | loss: 0.59774 - acc: 0.6738 -- iter: 13760/20250


Training Step: 1167  | total loss: [1m[32m0.59840[0m[0m | time: 54.775s
[2K| Adam | epoch: 004 | loss: 0.59840 - acc: 0.6736 -- iter: 13824/20250


Training Step: 1168  | total loss: [1m[32m0.59980[0m[0m | time: 55.036s
[2K| Adam | epoch: 004 | loss: 0.59980 - acc: 0.6734 -- iter: 13888/20250


Training Step: 1169  | total loss: [1m[32m0.60659[0m[0m | time: 55.286s
[2K| Adam | epoch: 004 | loss: 0.60659 - acc: 0.6654 -- iter: 13952/20250


Training Step: 1170  | total loss: [1m[32m0.61087[0m[0m | time: 55.529s
[2K| Adam | epoch: 004 | loss: 0.61087 - acc: 0.6551 -- iter: 14016/20250


Training Step: 1171  | total loss: [1m[32m0.60503[0m[0m | time: 55.775s
[2K| Adam | epoch: 004 | loss: 0.60503 - acc: 0.6553 -- iter: 14080/20250


Training Step: 1172  | total loss: [1m[32m0.61277[0m[0m | time: 56.025s
[2K| Adam | epoch: 004 | loss: 0.61277 - acc: 0.6460 -- iter: 14144/20250


Training Step: 1173  | total loss: [1m[32m0.60795[0m[0m | time: 56.280s
[2K| Adam | epoch: 004 | loss: 0.60795 - acc: 0.6501 -- iter: 14208/20250


Training Step: 1174  | total loss: [1m[32m0.60443[0m[0m | time: 56.535s
[2K| Adam | epoch: 004 | loss: 0.60443 - acc: 0.6507 -- iter: 14272/20250


Training Step: 1175  | total loss: [1m[32m0.59802[0m[0m | time: 56.781s
[2K| Adam | epoch: 004 | loss: 0.59802 - acc: 0.6544 -- iter: 14336/20250


Training Step: 1176  | total loss: [1m[32m0.59917[0m[0m | time: 57.023s
[2K| Adam | epoch: 004 | loss: 0.59917 - acc: 0.6515 -- iter: 14400/20250


Training Step: 1177  | total loss: [1m[32m0.59928[0m[0m | time: 57.271s
[2K| Adam | epoch: 004 | loss: 0.59928 - acc: 0.6473 -- iter: 14464/20250


Training Step: 1178  | total loss: [1m[32m0.60044[0m[0m | time: 57.543s
[2K| Adam | epoch: 004 | loss: 0.60044 - acc: 0.6357 -- iter: 14528/20250


Training Step: 1179  | total loss: [1m[32m0.60074[0m[0m | time: 57.800s
[2K| Adam | epoch: 004 | loss: 0.60074 - acc: 0.6346 -- iter: 14592/20250


Training Step: 1180  | total loss: [1m[32m0.60263[0m[0m | time: 58.043s
[2K| Adam | epoch: 004 | loss: 0.60263 - acc: 0.6383 -- iter: 14656/20250


Training Step: 1181  | total loss: [1m[32m0.59266[0m[0m | time: 58.290s
[2K| Adam | epoch: 004 | loss: 0.59266 - acc: 0.6589 -- iter: 14720/20250


Training Step: 1182  | total loss: [1m[32m0.59261[0m[0m | time: 58.541s
[2K| Adam | epoch: 004 | loss: 0.59261 - acc: 0.6570 -- iter: 14784/20250


Training Step: 1183  | total loss: [1m[32m0.59610[0m[0m | time: 58.789s
[2K| Adam | epoch: 004 | loss: 0.59610 - acc: 0.6523 -- iter: 14848/20250


Training Step: 1184  | total loss: [1m[32m0.59465[0m[0m | time: 59.040s
[2K| Adam | epoch: 004 | loss: 0.59465 - acc: 0.6558 -- iter: 14912/20250


Training Step: 1185  | total loss: [1m[32m0.59290[0m[0m | time: 59.292s
[2K| Adam | epoch: 004 | loss: 0.59290 - acc: 0.6683 -- iter: 14976/20250


Training Step: 1186  | total loss: [1m[32m0.59425[0m[0m | time: 59.533s
[2K| Adam | epoch: 004 | loss: 0.59425 - acc: 0.6718 -- iter: 15040/20250


Training Step: 1187  | total loss: [1m[32m0.58701[0m[0m | time: 59.793s
[2K| Adam | epoch: 004 | loss: 0.58701 - acc: 0.6875 -- iter: 15104/20250


Training Step: 1188  | total loss: [1m[32m0.59078[0m[0m | time: 60.050s
[2K| Adam | epoch: 004 | loss: 0.59078 - acc: 0.6843 -- iter: 15168/20250


Training Step: 1189  | total loss: [1m[32m0.59076[0m[0m | time: 60.288s
[2K| Adam | epoch: 004 | loss: 0.59076 - acc: 0.6846 -- iter: 15232/20250


Training Step: 1190  | total loss: [1m[32m0.59443[0m[0m | time: 60.531s
[2K| Adam | epoch: 004 | loss: 0.59443 - acc: 0.6865 -- iter: 15296/20250


Training Step: 1191  | total loss: [1m[32m0.58916[0m[0m | time: 60.795s
[2K| Adam | epoch: 004 | loss: 0.58916 - acc: 0.6944 -- iter: 15360/20250


Training Step: 1192  | total loss: [1m[32m0.58255[0m[0m | time: 61.041s
[2K| Adam | epoch: 004 | loss: 0.58255 - acc: 0.7047 -- iter: 15424/20250


Training Step: 1193  | total loss: [1m[32m0.57739[0m[0m | time: 61.288s
[2K| Adam | epoch: 004 | loss: 0.57739 - acc: 0.7076 -- iter: 15488/20250


Training Step: 1194  | total loss: [1m[32m0.57060[0m[0m | time: 61.533s
[2K| Adam | epoch: 004 | loss: 0.57060 - acc: 0.7181 -- iter: 15552/20250


Training Step: 1195  | total loss: [1m[32m0.56126[0m[0m | time: 61.786s
[2K| Adam | epoch: 004 | loss: 0.56126 - acc: 0.7276 -- iter: 15616/20250


Training Step: 1196  | total loss: [1m[32m0.55699[0m[0m | time: 62.037s
[2K| Adam | epoch: 004 | loss: 0.55699 - acc: 0.7329 -- iter: 15680/20250


Training Step: 1197  | total loss: [1m[32m0.57293[0m[0m | time: 62.283s
[2K| Adam | epoch: 004 | loss: 0.57293 - acc: 0.7174 -- iter: 15744/20250


Training Step: 1198  | total loss: [1m[32m0.62236[0m[0m | time: 62.548s
[2K| Adam | epoch: 004 | loss: 0.62236 - acc: 0.6801 -- iter: 15808/20250


Training Step: 1199  | total loss: [1m[32m0.64241[0m[0m | time: 62.795s
[2K| Adam | epoch: 004 | loss: 0.64241 - acc: 0.6652 -- iter: 15872/20250


Training Step: 1200  | total loss: [1m[32m0.66060[0m[0m | time: 63.038s
[2K| Adam | epoch: 004 | loss: 0.66060 - acc: 0.6518 -- iter: 15936/20250


Training Step: 1201  | total loss: [1m[32m0.66480[0m[0m | time: 63.288s
[2K| Adam | epoch: 004 | loss: 0.66480 - acc: 0.6491 -- iter: 16000/20250


Training Step: 1202  | total loss: [1m[32m0.67123[0m[0m | time: 63.537s
[2K| Adam | epoch: 004 | loss: 0.67123 - acc: 0.6483 -- iter: 16064/20250


Training Step: 1203  | total loss: [1m[32m0.66926[0m[0m | time: 63.782s
[2K| Adam | epoch: 004 | loss: 0.66926 - acc: 0.6459 -- iter: 16128/20250


Training Step: 1204  | total loss: [1m[32m0.66930[0m[0m | time: 64.028s
[2K| Adam | epoch: 004 | loss: 0.66930 - acc: 0.6423 -- iter: 16192/20250


Training Step: 1205  | total loss: [1m[32m0.66592[0m[0m | time: 64.274s
[2K| Adam | epoch: 004 | loss: 0.66592 - acc: 0.6390 -- iter: 16256/20250


Training Step: 1206  | total loss: [1m[32m0.65696[0m[0m | time: 64.517s
[2K| Adam | epoch: 004 | loss: 0.65696 - acc: 0.6438 -- iter: 16320/20250


Training Step: 1207  | total loss: [1m[32m0.65909[0m[0m | time: 64.774s
[2K| Adam | epoch: 004 | loss: 0.65909 - acc: 0.6373 -- iter: 16384/20250


Training Step: 1208  | total loss: [1m[32m0.66004[0m[0m | time: 65.018s
[2K| Adam | epoch: 004 | loss: 0.66004 - acc: 0.6298 -- iter: 16448/20250


Training Step: 1209  | total loss: [1m[32m0.65524[0m[0m | time: 65.266s
[2K| Adam | epoch: 004 | loss: 0.65524 - acc: 0.6262 -- iter: 16512/20250


Training Step: 1210  | total loss: [1m[32m0.66118[0m[0m | time: 65.507s
[2K| Adam | epoch: 004 | loss: 0.66118 - acc: 0.6042 -- iter: 16576/20250


Training Step: 1211  | total loss: [1m[32m0.65605[0m[0m | time: 65.761s
[2K| Adam | epoch: 004 | loss: 0.65605 - acc: 0.6016 -- iter: 16640/20250


Training Step: 1212  | total loss: [1m[32m0.65961[0m[0m | time: 66.009s
[2K| Adam | epoch: 004 | loss: 0.65961 - acc: 0.5930 -- iter: 16704/20250


Training Step: 1213  | total loss: [1m[32m0.66186[0m[0m | time: 66.253s
[2K| Adam | epoch: 004 | loss: 0.66186 - acc: 0.5868 -- iter: 16768/20250


Training Step: 1214  | total loss: [1m[32m0.65606[0m[0m | time: 66.500s
[2K| Adam | epoch: 004 | loss: 0.65606 - acc: 0.5922 -- iter: 16832/20250


Training Step: 1215  | total loss: [1m[32m0.65748[0m[0m | time: 66.741s
[2K| Adam | epoch: 004 | loss: 0.65748 - acc: 0.5845 -- iter: 16896/20250


Training Step: 1216  | total loss: [1m[32m0.65447[0m[0m | time: 66.984s
[2K| Adam | epoch: 004 | loss: 0.65447 - acc: 0.5792 -- iter: 16960/20250


Training Step: 1217  | total loss: [1m[32m0.65190[0m[0m | time: 67.242s
[2K| Adam | epoch: 004 | loss: 0.65190 - acc: 0.5775 -- iter: 17024/20250


Training Step: 1218  | total loss: [1m[32m0.65077[0m[0m | time: 67.495s
[2K| Adam | epoch: 004 | loss: 0.65077 - acc: 0.5713 -- iter: 17088/20250


Training Step: 1219  | total loss: [1m[32m0.64794[0m[0m | time: 67.737s
[2K| Adam | epoch: 004 | loss: 0.64794 - acc: 0.5736 -- iter: 17152/20250


Training Step: 1220  | total loss: [1m[32m0.64169[0m[0m | time: 67.981s
[2K| Adam | epoch: 004 | loss: 0.64169 - acc: 0.5819 -- iter: 17216/20250


Training Step: 1221  | total loss: [1m[32m0.64120[0m[0m | time: 68.226s
[2K| Adam | epoch: 004 | loss: 0.64120 - acc: 0.5815 -- iter: 17280/20250


Training Step: 1222  | total loss: [1m[32m0.64090[0m[0m | time: 68.477s
[2K| Adam | epoch: 004 | loss: 0.64090 - acc: 0.5843 -- iter: 17344/20250


Training Step: 1223  | total loss: [1m[32m0.64465[0m[0m | time: 68.742s
[2K| Adam | epoch: 004 | loss: 0.64465 - acc: 0.5805 -- iter: 17408/20250


Training Step: 1224  | total loss: [1m[32m0.63805[0m[0m | time: 68.991s
[2K| Adam | epoch: 004 | loss: 0.63805 - acc: 0.5881 -- iter: 17472/20250


Training Step: 1225  | total loss: [1m[32m0.63743[0m[0m | time: 69.242s
[2K| Adam | epoch: 004 | loss: 0.63743 - acc: 0.5855 -- iter: 17536/20250


Training Step: 1226  | total loss: [1m[32m0.64558[0m[0m | time: 69.494s
[2K| Adam | epoch: 004 | loss: 0.64558 - acc: 0.5817 -- iter: 17600/20250


Training Step: 1227  | total loss: [1m[32m0.65220[0m[0m | time: 69.741s
[2K| Adam | epoch: 004 | loss: 0.65220 - acc: 0.5735 -- iter: 17664/20250


Training Step: 1228  | total loss: [1m[32m0.64485[0m[0m | time: 70.001s
[2K| Adam | epoch: 004 | loss: 0.64485 - acc: 0.5787 -- iter: 17728/20250


Training Step: 1229  | total loss: [1m[32m0.64334[0m[0m | time: 70.244s
[2K| Adam | epoch: 004 | loss: 0.64334 - acc: 0.5849 -- iter: 17792/20250


Training Step: 1230  | total loss: [1m[32m0.63456[0m[0m | time: 70.490s
[2K| Adam | epoch: 004 | loss: 0.63456 - acc: 0.5889 -- iter: 17856/20250


Training Step: 1231  | total loss: [1m[32m0.63004[0m[0m | time: 70.743s
[2K| Adam | epoch: 004 | loss: 0.63004 - acc: 0.5987 -- iter: 17920/20250


Training Step: 1232  | total loss: [1m[32m0.63227[0m[0m | time: 70.987s
[2K| Adam | epoch: 004 | loss: 0.63227 - acc: 0.5920 -- iter: 17984/20250


Training Step: 1233  | total loss: [1m[32m0.62744[0m[0m | time: 71.227s
[2K| Adam | epoch: 004 | loss: 0.62744 - acc: 0.5906 -- iter: 18048/20250


Training Step: 1234  | total loss: [1m[32m0.62926[0m[0m | time: 71.470s
[2K| Adam | epoch: 004 | loss: 0.62926 - acc: 0.5894 -- iter: 18112/20250


Training Step: 1235  | total loss: [1m[32m0.63179[0m[0m | time: 71.716s
[2K| Adam | epoch: 004 | loss: 0.63179 - acc: 0.5851 -- iter: 18176/20250


Training Step: 1236  | total loss: [1m[32m0.62218[0m[0m | time: 71.955s
[2K| Adam | epoch: 004 | loss: 0.62218 - acc: 0.5891 -- iter: 18240/20250


Training Step: 1237  | total loss: [1m[32m0.63469[0m[0m | time: 72.206s
[2K| Adam | epoch: 004 | loss: 0.63469 - acc: 0.5692 -- iter: 18304/20250


Training Step: 1238  | total loss: [1m[32m0.63079[0m[0m | time: 72.451s
[2K| Adam | epoch: 004 | loss: 0.63079 - acc: 0.5670 -- iter: 18368/20250


Training Step: 1239  | total loss: [1m[32m0.62967[0m[0m | time: 72.693s
[2K| Adam | epoch: 004 | loss: 0.62967 - acc: 0.5791 -- iter: 18432/20250


Training Step: 1240  | total loss: [1m[32m0.62971[0m[0m | time: 72.956s
[2K| Adam | epoch: 004 | loss: 0.62971 - acc: 0.5805 -- iter: 18496/20250


Training Step: 1241  | total loss: [1m[32m0.62402[0m[0m | time: 73.205s
[2K| Adam | epoch: 004 | loss: 0.62402 - acc: 0.5865 -- iter: 18560/20250


Training Step: 1242  | total loss: [1m[32m0.62827[0m[0m | time: 73.453s
[2K| Adam | epoch: 004 | loss: 0.62827 - acc: 0.5794 -- iter: 18624/20250


Training Step: 1243  | total loss: [1m[32m0.62726[0m[0m | time: 73.705s
[2K| Adam | epoch: 004 | loss: 0.62726 - acc: 0.5887 -- iter: 18688/20250


Training Step: 1244  | total loss: [1m[32m0.63178[0m[0m | time: 73.955s
[2K| Adam | epoch: 004 | loss: 0.63178 - acc: 0.5876 -- iter: 18752/20250


Training Step: 1245  | total loss: [1m[32m0.62820[0m[0m | time: 74.204s
[2K| Adam | epoch: 004 | loss: 0.62820 - acc: 0.5929 -- iter: 18816/20250


Training Step: 1246  | total loss: [1m[32m0.63101[0m[0m | time: 74.455s
[2K| Adam | epoch: 004 | loss: 0.63101 - acc: 0.5883 -- iter: 18880/20250


Training Step: 1247  | total loss: [1m[32m0.62707[0m[0m | time: 74.709s
[2K| Adam | epoch: 004 | loss: 0.62707 - acc: 0.6045 -- iter: 18944/20250


Training Step: 1248  | total loss: [1m[32m0.62736[0m[0m | time: 74.960s
[2K| Adam | epoch: 004 | loss: 0.62736 - acc: 0.6081 -- iter: 19008/20250


Training Step: 1249  | total loss: [1m[32m0.61915[0m[0m | time: 75.201s
[2K| Adam | epoch: 004 | loss: 0.61915 - acc: 0.6207 -- iter: 19072/20250


Training Step: 1250  | total loss: [1m[32m0.61632[0m[0m | time: 75.451s
[2K| Adam | epoch: 004 | loss: 0.61632 - acc: 0.6212 -- iter: 19136/20250


Training Step: 1251  | total loss: [1m[32m0.60743[0m[0m | time: 75.701s
[2K| Adam | epoch: 004 | loss: 0.60743 - acc: 0.6231 -- iter: 19200/20250


Training Step: 1252  | total loss: [1m[32m0.60756[0m[0m | time: 75.946s
[2K| Adam | epoch: 004 | loss: 0.60756 - acc: 0.6327 -- iter: 19264/20250


Training Step: 1253  | total loss: [1m[32m0.60774[0m[0m | time: 76.188s
[2K| Adam | epoch: 004 | loss: 0.60774 - acc: 0.6272 -- iter: 19328/20250


Training Step: 1254  | total loss: [1m[32m0.61208[0m[0m | time: 76.458s
[2K| Adam | epoch: 004 | loss: 0.61208 - acc: 0.6207 -- iter: 19392/20250


Training Step: 1255  | total loss: [1m[32m0.61992[0m[0m | time: 76.701s
[2K| Adam | epoch: 004 | loss: 0.61992 - acc: 0.6055 -- iter: 19456/20250


Training Step: 1256  | total loss: [1m[32m0.61438[0m[0m | time: 76.979s
[2K| Adam | epoch: 004 | loss: 0.61438 - acc: 0.6075 -- iter: 19520/20250


Training Step: 1257  | total loss: [1m[32m0.61081[0m[0m | time: 77.232s
[2K| Adam | epoch: 004 | loss: 0.61081 - acc: 0.6124 -- iter: 19584/20250


Training Step: 1258  | total loss: [1m[32m0.61002[0m[0m | time: 77.493s
[2K| Adam | epoch: 004 | loss: 0.61002 - acc: 0.6230 -- iter: 19648/20250


Training Step: 1259  | total loss: [1m[32m0.61315[0m[0m | time: 77.747s
[2K| Adam | epoch: 004 | loss: 0.61315 - acc: 0.6154 -- iter: 19712/20250


Training Step: 1260  | total loss: [1m[32m0.61069[0m[0m | time: 77.992s
[2K| Adam | epoch: 004 | loss: 0.61069 - acc: 0.6257 -- iter: 19776/20250


Training Step: 1261  | total loss: [1m[32m0.60781[0m[0m | time: 78.239s
[2K| Adam | epoch: 004 | loss: 0.60781 - acc: 0.6303 -- iter: 19840/20250


Training Step: 1262  | total loss: [1m[32m0.60823[0m[0m | time: 78.486s
[2K| Adam | epoch: 004 | loss: 0.60823 - acc: 0.6298 -- iter: 19904/20250


Training Step: 1263  | total loss: [1m[32m0.60761[0m[0m | time: 78.726s
[2K| Adam | epoch: 004 | loss: 0.60761 - acc: 0.6340 -- iter: 19968/20250


Training Step: 1264  | total loss: [1m[32m0.60534[0m[0m | time: 78.976s
[2K| Adam | epoch: 004 | loss: 0.60534 - acc: 0.6394 -- iter: 20032/20250


Training Step: 1265  | total loss: [1m[32m0.60169[0m[0m | time: 79.220s
[2K| Adam | epoch: 004 | loss: 0.60169 - acc: 0.6442 -- iter: 20096/20250


Training Step: 1266  | total loss: [1m[32m0.59618[0m[0m | time: 79.478s
[2K| Adam | epoch: 004 | loss: 0.59618 - acc: 0.6532 -- iter: 20160/20250


Training Step: 1267  | total loss: [1m[32m0.60092[0m[0m | time: 79.725s
[2K| Adam | epoch: 004 | loss: 0.60092 - acc: 0.6410 -- iter: 20224/20250


Training Step: 1268  | total loss: [1m[32m0.60230[0m[0m | time: 81.675s
[2K| Adam | epoch: 004 | loss: 0.60230 - acc: 0.6363 | val_loss: 0.71933 - val_acc: 0.5773 -- iter: 20250/20250
--


Training Step: 1269  | total loss: [1m[32m0.59766[0m[0m | time: 0.245s
[2K| Adam | epoch: 005 | loss: 0.59766 - acc: 0.6320 -- iter: 00064/20250


Training Step: 1270  | total loss: [1m[32m0.59772[0m[0m | time: 0.504s
[2K| Adam | epoch: 005 | loss: 0.59772 - acc: 0.6282 -- iter: 00128/20250


Training Step: 1271  | total loss: [1m[32m0.59802[0m[0m | time: 0.770s
[2K| Adam | epoch: 005 | loss: 0.59802 - acc: 0.6263 -- iter: 00192/20250


Training Step: 1272  | total loss: [1m[32m0.59767[0m[0m | time: 1.013s
[2K| Adam | epoch: 005 | loss: 0.59767 - acc: 0.6368 -- iter: 00256/20250


Training Step: 1273  | total loss: [1m[32m0.59240[0m[0m | time: 1.258s
[2K| Adam | epoch: 005 | loss: 0.59240 - acc: 0.6346 -- iter: 00320/20250


Training Step: 1274  | total loss: [1m[32m0.58828[0m[0m | time: 1.503s
[2K| Adam | epoch: 005 | loss: 0.58828 - acc: 0.6430 -- iter: 00384/20250


Training Step: 1275  | total loss: [1m[32m0.59112[0m[0m | time: 1.755s
[2K| Adam | epoch: 005 | loss: 0.59112 - acc: 0.6397 -- iter: 00448/20250


Training Step: 1276  | total loss: [1m[32m0.59268[0m[0m | time: 2.006s
[2K| Adam | epoch: 005 | loss: 0.59268 - acc: 0.6460 -- iter: 00512/20250


Training Step: 1277  | total loss: [1m[32m0.58728[0m[0m | time: 2.254s
[2K| Adam | epoch: 005 | loss: 0.58728 - acc: 0.6580 -- iter: 00576/20250


Training Step: 1278  | total loss: [1m[32m0.58619[0m[0m | time: 2.490s
[2K| Adam | epoch: 005 | loss: 0.58619 - acc: 0.6531 -- iter: 00640/20250


Training Step: 1279  | total loss: [1m[32m0.58426[0m[0m | time: 2.743s
[2K| Adam | epoch: 005 | loss: 0.58426 - acc: 0.6597 -- iter: 00704/20250


Training Step: 1280  | total loss: [1m[32m0.58472[0m[0m | time: 3.014s
[2K| Adam | epoch: 005 | loss: 0.58472 - acc: 0.6546 -- iter: 00768/20250


Training Step: 1281  | total loss: [1m[32m0.58425[0m[0m | time: 3.265s
[2K| Adam | epoch: 005 | loss: 0.58425 - acc: 0.6548 -- iter: 00832/20250


Training Step: 1282  | total loss: [1m[32m0.57909[0m[0m | time: 3.512s
[2K| Adam | epoch: 005 | loss: 0.57909 - acc: 0.6565 -- iter: 00896/20250


Training Step: 1283  | total loss: [1m[32m0.57793[0m[0m | time: 3.767s
[2K| Adam | epoch: 005 | loss: 0.57793 - acc: 0.6518 -- iter: 00960/20250


Training Step: 1284  | total loss: [1m[32m0.57704[0m[0m | time: 4.022s
[2K| Adam | epoch: 005 | loss: 0.57704 - acc: 0.6569 -- iter: 01024/20250


Training Step: 1285  | total loss: [1m[32m0.58082[0m[0m | time: 4.266s
[2K| Adam | epoch: 005 | loss: 0.58082 - acc: 0.6584 -- iter: 01088/20250


Training Step: 1286  | total loss: [1m[32m0.57918[0m[0m | time: 4.512s
[2K| Adam | epoch: 005 | loss: 0.57918 - acc: 0.6629 -- iter: 01152/20250


Training Step: 1287  | total loss: [1m[32m0.58060[0m[0m | time: 4.761s
[2K| Adam | epoch: 005 | loss: 0.58060 - acc: 0.6591 -- iter: 01216/20250


Training Step: 1288  | total loss: [1m[32m0.58227[0m[0m | time: 5.002s
[2K| Adam | epoch: 005 | loss: 0.58227 - acc: 0.6588 -- iter: 01280/20250


Training Step: 1289  | total loss: [1m[32m0.58028[0m[0m | time: 5.259s
[2K| Adam | epoch: 005 | loss: 0.58028 - acc: 0.6601 -- iter: 01344/20250


Training Step: 1290  | total loss: [1m[32m0.58115[0m[0m | time: 5.549s
[2K| Adam | epoch: 005 | loss: 0.58115 - acc: 0.6566 -- iter: 01408/20250


Training Step: 1291  | total loss: [1m[32m0.57417[0m[0m | time: 5.795s
[2K| Adam | epoch: 005 | loss: 0.57417 - acc: 0.6581 -- iter: 01472/20250


Training Step: 1292  | total loss: [1m[32m0.57150[0m[0m | time: 6.048s
[2K| Adam | epoch: 005 | loss: 0.57150 - acc: 0.6611 -- iter: 01536/20250


Training Step: 1293  | total loss: [1m[32m0.58177[0m[0m | time: 6.290s
[2K| Adam | epoch: 005 | loss: 0.58177 - acc: 0.6481 -- iter: 01600/20250


Training Step: 1294  | total loss: [1m[32m0.57871[0m[0m | time: 6.537s
[2K| Adam | epoch: 005 | loss: 0.57871 - acc: 0.6505 -- iter: 01664/20250


Training Step: 1295  | total loss: [1m[32m0.57138[0m[0m | time: 6.783s
[2K| Adam | epoch: 005 | loss: 0.57138 - acc: 0.6635 -- iter: 01728/20250


Training Step: 1296  | total loss: [1m[32m0.57179[0m[0m | time: 7.032s
[2K| Adam | epoch: 005 | loss: 0.57179 - acc: 0.6659 -- iter: 01792/20250


Training Step: 1297  | total loss: [1m[32m0.57467[0m[0m | time: 7.277s
[2K| Adam | epoch: 005 | loss: 0.57467 - acc: 0.6619 -- iter: 01856/20250


Training Step: 1298  | total loss: [1m[32m0.57630[0m[0m | time: 7.529s
[2K| Adam | epoch: 005 | loss: 0.57630 - acc: 0.6629 -- iter: 01920/20250


Training Step: 1299  | total loss: [1m[32m0.57633[0m[0m | time: 7.798s
[2K| Adam | epoch: 005 | loss: 0.57633 - acc: 0.6591 -- iter: 01984/20250


Training Step: 1300  | total loss: [1m[32m0.57687[0m[0m | time: 8.060s
[2K| Adam | epoch: 005 | loss: 0.57687 - acc: 0.6541 -- iter: 02048/20250


Training Step: 1301  | total loss: [1m[32m0.58191[0m[0m | time: 8.310s
[2K| Adam | epoch: 005 | loss: 0.58191 - acc: 0.6481 -- iter: 02112/20250


Training Step: 1302  | total loss: [1m[32m0.59487[0m[0m | time: 8.554s
[2K| Adam | epoch: 005 | loss: 0.59487 - acc: 0.6301 -- iter: 02176/20250


Training Step: 1303  | total loss: [1m[32m0.59865[0m[0m | time: 8.789s
[2K| Adam | epoch: 005 | loss: 0.59865 - acc: 0.6296 -- iter: 02240/20250


Training Step: 1304  | total loss: [1m[32m0.60456[0m[0m | time: 9.035s
[2K| Adam | epoch: 005 | loss: 0.60456 - acc: 0.6229 -- iter: 02304/20250


Training Step: 1305  | total loss: [1m[32m0.60649[0m[0m | time: 9.279s
[2K| Adam | epoch: 005 | loss: 0.60649 - acc: 0.6137 -- iter: 02368/20250


Training Step: 1306  | total loss: [1m[32m0.60278[0m[0m | time: 9.521s
[2K| Adam | epoch: 005 | loss: 0.60278 - acc: 0.6211 -- iter: 02432/20250


Training Step: 1307  | total loss: [1m[32m0.60549[0m[0m | time: 9.774s
[2K| Adam | epoch: 005 | loss: 0.60549 - acc: 0.6199 -- iter: 02496/20250


Training Step: 1308  | total loss: [1m[32m0.59753[0m[0m | time: 10.020s
[2K| Adam | epoch: 005 | loss: 0.59753 - acc: 0.6236 -- iter: 02560/20250


Training Step: 1309  | total loss: [1m[32m0.58932[0m[0m | time: 10.283s
[2K| Adam | epoch: 005 | loss: 0.58932 - acc: 0.6347 -- iter: 02624/20250


Training Step: 1310  | total loss: [1m[32m0.58695[0m[0m | time: 10.542s
[2K| Adam | epoch: 005 | loss: 0.58695 - acc: 0.6415 -- iter: 02688/20250


Training Step: 1311  | total loss: [1m[32m0.58071[0m[0m | time: 10.785s
[2K| Adam | epoch: 005 | loss: 0.58071 - acc: 0.6477 -- iter: 02752/20250


Training Step: 1312  | total loss: [1m[32m0.57662[0m[0m | time: 11.024s
[2K| Adam | epoch: 005 | loss: 0.57662 - acc: 0.6516 -- iter: 02816/20250


Training Step: 1313  | total loss: [1m[32m0.57394[0m[0m | time: 11.276s
[2K| Adam | epoch: 005 | loss: 0.57394 - acc: 0.6662 -- iter: 02880/20250


Training Step: 1314  | total loss: [1m[32m0.56258[0m[0m | time: 11.519s
[2K| Adam | epoch: 005 | loss: 0.56258 - acc: 0.6730 -- iter: 02944/20250


Training Step: 1315  | total loss: [1m[32m0.56076[0m[0m | time: 11.765s
[2K| Adam | epoch: 005 | loss: 0.56076 - acc: 0.6807 -- iter: 03008/20250


Training Step: 1316  | total loss: [1m[32m0.57069[0m[0m | time: 12.005s
[2K| Adam | epoch: 005 | loss: 0.57069 - acc: 0.6782 -- iter: 03072/20250


Training Step: 1317  | total loss: [1m[32m0.57294[0m[0m | time: 12.241s
[2K| Adam | epoch: 005 | loss: 0.57294 - acc: 0.6792 -- iter: 03136/20250


Training Step: 1318  | total loss: [1m[32m0.57425[0m[0m | time: 12.491s
[2K| Adam | epoch: 005 | loss: 0.57425 - acc: 0.6863 -- iter: 03200/20250


Training Step: 1319  | total loss: [1m[32m0.56620[0m[0m | time: 12.761s
[2K| Adam | epoch: 005 | loss: 0.56620 - acc: 0.6973 -- iter: 03264/20250


Training Step: 1320  | total loss: [1m[32m0.56472[0m[0m | time: 13.028s
[2K| Adam | epoch: 005 | loss: 0.56472 - acc: 0.7041 -- iter: 03328/20250


Training Step: 1321  | total loss: [1m[32m0.56270[0m[0m | time: 13.275s
[2K| Adam | epoch: 005 | loss: 0.56270 - acc: 0.7087 -- iter: 03392/20250


Training Step: 1322  | total loss: [1m[32m0.56243[0m[0m | time: 13.521s
[2K| Adam | epoch: 005 | loss: 0.56243 - acc: 0.7113 -- iter: 03456/20250


Training Step: 1323  | total loss: [1m[32m0.55933[0m[0m | time: 13.765s
[2K| Adam | epoch: 005 | loss: 0.55933 - acc: 0.7152 -- iter: 03520/20250


Training Step: 1324  | total loss: [1m[32m0.55865[0m[0m | time: 14.012s
[2K| Adam | epoch: 005 | loss: 0.55865 - acc: 0.7108 -- iter: 03584/20250


Training Step: 1325  | total loss: [1m[32m0.56351[0m[0m | time: 14.257s
[2K| Adam | epoch: 005 | loss: 0.56351 - acc: 0.7069 -- iter: 03648/20250


Training Step: 1326  | total loss: [1m[32m0.55455[0m[0m | time: 14.515s
[2K| Adam | epoch: 005 | loss: 0.55455 - acc: 0.7112 -- iter: 03712/20250


Training Step: 1327  | total loss: [1m[32m0.54779[0m[0m | time: 14.769s
[2K| Adam | epoch: 005 | loss: 0.54779 - acc: 0.7214 -- iter: 03776/20250


Training Step: 1328  | total loss: [1m[32m0.55115[0m[0m | time: 15.024s
[2K| Adam | epoch: 005 | loss: 0.55115 - acc: 0.7242 -- iter: 03840/20250


Training Step: 1329  | total loss: [1m[32m0.55135[0m[0m | time: 15.300s
[2K| Adam | epoch: 005 | loss: 0.55135 - acc: 0.7237 -- iter: 03904/20250


Training Step: 1330  | total loss: [1m[32m0.55886[0m[0m | time: 15.544s
[2K| Adam | epoch: 005 | loss: 0.55886 - acc: 0.7185 -- iter: 03968/20250


Training Step: 1331  | total loss: [1m[32m0.55963[0m[0m | time: 15.787s
[2K| Adam | epoch: 005 | loss: 0.55963 - acc: 0.7170 -- iter: 04032/20250


Training Step: 1332  | total loss: [1m[32m0.55449[0m[0m | time: 16.029s
[2K| Adam | epoch: 005 | loss: 0.55449 - acc: 0.7187 -- iter: 04096/20250


Training Step: 1333  | total loss: [1m[32m0.55343[0m[0m | time: 16.273s
[2K| Adam | epoch: 005 | loss: 0.55343 - acc: 0.7218 -- iter: 04160/20250


Training Step: 1334  | total loss: [1m[32m0.54640[0m[0m | time: 16.519s
[2K| Adam | epoch: 005 | loss: 0.54640 - acc: 0.7247 -- iter: 04224/20250


Training Step: 1335  | total loss: [1m[32m0.54759[0m[0m | time: 16.762s
[2K| Adam | epoch: 005 | loss: 0.54759 - acc: 0.7225 -- iter: 04288/20250


Training Step: 1336  | total loss: [1m[32m0.53147[0m[0m | time: 17.003s
[2K| Adam | epoch: 005 | loss: 0.53147 - acc: 0.7393 -- iter: 04352/20250


Training Step: 1337  | total loss: [1m[32m0.53205[0m[0m | time: 17.254s
[2K| Adam | epoch: 005 | loss: 0.53205 - acc: 0.7419 -- iter: 04416/20250


Training Step: 1338  | total loss: [1m[32m0.52653[0m[0m | time: 17.497s
[2K| Adam | epoch: 005 | loss: 0.52653 - acc: 0.7428 -- iter: 04480/20250


Training Step: 1339  | total loss: [1m[32m0.51459[0m[0m | time: 17.773s
[2K| Adam | epoch: 005 | loss: 0.51459 - acc: 0.7482 -- iter: 04544/20250


Training Step: 1340  | total loss: [1m[32m0.50695[0m[0m | time: 18.027s
[2K| Adam | epoch: 005 | loss: 0.50695 - acc: 0.7562 -- iter: 04608/20250


Training Step: 1341  | total loss: [1m[32m0.51269[0m[0m | time: 18.324s
[2K| Adam | epoch: 005 | loss: 0.51269 - acc: 0.7493 -- iter: 04672/20250


Training Step: 1342  | total loss: [1m[32m0.50977[0m[0m | time: 18.571s
[2K| Adam | epoch: 005 | loss: 0.50977 - acc: 0.7494 -- iter: 04736/20250


Training Step: 1343  | total loss: [1m[32m0.51041[0m[0m | time: 18.828s
[2K| Adam | epoch: 005 | loss: 0.51041 - acc: 0.7463 -- iter: 04800/20250


Training Step: 1344  | total loss: [1m[32m0.50611[0m[0m | time: 19.079s
[2K| Adam | epoch: 005 | loss: 0.50611 - acc: 0.7420 -- iter: 04864/20250


Training Step: 1345  | total loss: [1m[32m0.50755[0m[0m | time: 19.325s
[2K| Adam | epoch: 005 | loss: 0.50755 - acc: 0.7381 -- iter: 04928/20250


Training Step: 1346  | total loss: [1m[32m0.50005[0m[0m | time: 19.581s
[2K| Adam | epoch: 005 | loss: 0.50005 - acc: 0.7377 -- iter: 04992/20250


Training Step: 1347  | total loss: [1m[32m0.50257[0m[0m | time: 19.827s
[2K| Adam | epoch: 005 | loss: 0.50257 - acc: 0.7358 -- iter: 05056/20250


Training Step: 1348  | total loss: [1m[32m0.50360[0m[0m | time: 20.096s
[2K| Adam | epoch: 005 | loss: 0.50360 - acc: 0.7326 -- iter: 05120/20250


Training Step: 1349  | total loss: [1m[32m0.51555[0m[0m | time: 20.348s
[2K| Adam | epoch: 005 | loss: 0.51555 - acc: 0.7202 -- iter: 05184/20250


Training Step: 1350  | total loss: [1m[32m0.51176[0m[0m | time: 20.602s
[2K| Adam | epoch: 005 | loss: 0.51176 - acc: 0.7263 -- iter: 05248/20250


Training Step: 1351  | total loss: [1m[32m0.51026[0m[0m | time: 20.852s
[2K| Adam | epoch: 005 | loss: 0.51026 - acc: 0.7256 -- iter: 05312/20250


Training Step: 1352  | total loss: [1m[32m0.50544[0m[0m | time: 21.097s
[2K| Adam | epoch: 005 | loss: 0.50544 - acc: 0.7343 -- iter: 05376/20250


Training Step: 1353  | total loss: [1m[32m0.50042[0m[0m | time: 21.341s
[2K| Adam | epoch: 005 | loss: 0.50042 - acc: 0.7390 -- iter: 05440/20250


Training Step: 1354  | total loss: [1m[32m0.48804[0m[0m | time: 21.586s
[2K| Adam | epoch: 005 | loss: 0.48804 - acc: 0.7510 -- iter: 05504/20250


Training Step: 1355  | total loss: [1m[32m0.48521[0m[0m | time: 21.827s
[2K| Adam | epoch: 005 | loss: 0.48521 - acc: 0.7540 -- iter: 05568/20250


Training Step: 1356  | total loss: [1m[32m0.48755[0m[0m | time: 22.073s
[2K| Adam | epoch: 005 | loss: 0.48755 - acc: 0.7568 -- iter: 05632/20250


Training Step: 1357  | total loss: [1m[32m0.49703[0m[0m | time: 22.321s
[2K| Adam | epoch: 005 | loss: 0.49703 - acc: 0.7514 -- iter: 05696/20250


Training Step: 1358  | total loss: [1m[32m0.52238[0m[0m | time: 22.584s
[2K| Adam | epoch: 005 | loss: 0.52238 - acc: 0.7403 -- iter: 05760/20250


Training Step: 1359  | total loss: [1m[32m0.52570[0m[0m | time: 22.867s
[2K| Adam | epoch: 005 | loss: 0.52570 - acc: 0.7397 -- iter: 05824/20250


Training Step: 1360  | total loss: [1m[32m0.51893[0m[0m | time: 23.113s
[2K| Adam | epoch: 005 | loss: 0.51893 - acc: 0.7454 -- iter: 05888/20250


Training Step: 1361  | total loss: [1m[32m0.50451[0m[0m | time: 23.355s
[2K| Adam | epoch: 005 | loss: 0.50451 - acc: 0.7568 -- iter: 05952/20250


Training Step: 1362  | total loss: [1m[32m0.51915[0m[0m | time: 23.599s
[2K| Adam | epoch: 005 | loss: 0.51915 - acc: 0.7515 -- iter: 06016/20250


Training Step: 1363  | total loss: [1m[32m0.51228[0m[0m | time: 23.845s
[2K| Adam | epoch: 005 | loss: 0.51228 - acc: 0.7623 -- iter: 06080/20250


Training Step: 1364  | total loss: [1m[32m0.53945[0m[0m | time: 24.089s
[2K| Adam | epoch: 005 | loss: 0.53945 - acc: 0.7579 -- iter: 06144/20250


Training Step: 1365  | total loss: [1m[32m0.54677[0m[0m | time: 24.350s
[2K| Adam | epoch: 005 | loss: 0.54677 - acc: 0.7571 -- iter: 06208/20250


Training Step: 1366  | total loss: [1m[32m0.55028[0m[0m | time: 24.596s
[2K| Adam | epoch: 005 | loss: 0.55028 - acc: 0.7580 -- iter: 06272/20250


Training Step: 1367  | total loss: [1m[32m0.54566[0m[0m | time: 24.843s
[2K| Adam | epoch: 005 | loss: 0.54566 - acc: 0.7619 -- iter: 06336/20250


Training Step: 1368  | total loss: [1m[32m0.54328[0m[0m | time: 25.109s
[2K| Adam | epoch: 005 | loss: 0.54328 - acc: 0.7669 -- iter: 06400/20250


Training Step: 1369  | total loss: [1m[32m0.54165[0m[0m | time: 25.389s
[2K| Adam | epoch: 005 | loss: 0.54165 - acc: 0.7668 -- iter: 06464/20250


Training Step: 1370  | total loss: [1m[32m0.53710[0m[0m | time: 25.634s
[2K| Adam | epoch: 005 | loss: 0.53710 - acc: 0.7667 -- iter: 06528/20250


Training Step: 1371  | total loss: [1m[32m0.53938[0m[0m | time: 25.890s
[2K| Adam | epoch: 005 | loss: 0.53938 - acc: 0.7650 -- iter: 06592/20250


Training Step: 1372  | total loss: [1m[32m0.54371[0m[0m | time: 26.142s
[2K| Adam | epoch: 005 | loss: 0.54371 - acc: 0.7588 -- iter: 06656/20250


Training Step: 1373  | total loss: [1m[32m0.53460[0m[0m | time: 26.398s
[2K| Adam | epoch: 005 | loss: 0.53460 - acc: 0.7642 -- iter: 06720/20250


Training Step: 1374  | total loss: [1m[32m0.52860[0m[0m | time: 26.644s
[2K| Adam | epoch: 005 | loss: 0.52860 - acc: 0.7706 -- iter: 06784/20250


Training Step: 1375  | total loss: [1m[32m0.52634[0m[0m | time: 26.885s
[2K| Adam | epoch: 005 | loss: 0.52634 - acc: 0.7685 -- iter: 06848/20250


Training Step: 1376  | total loss: [1m[32m0.51640[0m[0m | time: 27.132s
[2K| Adam | epoch: 005 | loss: 0.51640 - acc: 0.7776 -- iter: 06912/20250


Training Step: 1377  | total loss: [1m[32m0.52248[0m[0m | time: 27.377s
[2K| Adam | epoch: 005 | loss: 0.52248 - acc: 0.7748 -- iter: 06976/20250


Training Step: 1378  | total loss: [1m[32m0.52376[0m[0m | time: 27.648s
[2K| Adam | epoch: 005 | loss: 0.52376 - acc: 0.7755 -- iter: 07040/20250


Training Step: 1379  | total loss: [1m[32m0.52152[0m[0m | time: 27.916s
[2K| Adam | epoch: 005 | loss: 0.52152 - acc: 0.7745 -- iter: 07104/20250


Training Step: 1380  | total loss: [1m[32m0.52499[0m[0m | time: 28.160s
[2K| Adam | epoch: 005 | loss: 0.52499 - acc: 0.7767 -- iter: 07168/20250


Training Step: 1381  | total loss: [1m[32m0.52700[0m[0m | time: 28.403s
[2K| Adam | epoch: 005 | loss: 0.52700 - acc: 0.7709 -- iter: 07232/20250


Training Step: 1382  | total loss: [1m[32m0.52408[0m[0m | time: 28.653s
[2K| Adam | epoch: 005 | loss: 0.52408 - acc: 0.7735 -- iter: 07296/20250


Training Step: 1383  | total loss: [1m[32m0.53536[0m[0m | time: 28.901s
[2K| Adam | epoch: 005 | loss: 0.53536 - acc: 0.7649 -- iter: 07360/20250


Training Step: 1384  | total loss: [1m[32m0.54169[0m[0m | time: 29.162s
[2K| Adam | epoch: 005 | loss: 0.54169 - acc: 0.7619 -- iter: 07424/20250


Training Step: 1385  | total loss: [1m[32m0.53438[0m[0m | time: 29.407s
[2K| Adam | epoch: 005 | loss: 0.53438 - acc: 0.7654 -- iter: 07488/20250


Training Step: 1386  | total loss: [1m[32m0.53757[0m[0m | time: 29.654s
[2K| Adam | epoch: 005 | loss: 0.53757 - acc: 0.7638 -- iter: 07552/20250


Training Step: 1387  | total loss: [1m[32m0.53742[0m[0m | time: 29.904s
[2K| Adam | epoch: 005 | loss: 0.53742 - acc: 0.7578 -- iter: 07616/20250


Training Step: 1388  | total loss: [1m[32m0.53807[0m[0m | time: 30.175s
[2K| Adam | epoch: 005 | loss: 0.53807 - acc: 0.7539 -- iter: 07680/20250


Training Step: 1389  | total loss: [1m[32m0.54621[0m[0m | time: 30.445s
[2K| Adam | epoch: 005 | loss: 0.54621 - acc: 0.7425 -- iter: 07744/20250


Training Step: 1390  | total loss: [1m[32m0.55980[0m[0m | time: 30.687s
[2K| Adam | epoch: 005 | loss: 0.55980 - acc: 0.7261 -- iter: 07808/20250


Training Step: 1391  | total loss: [1m[32m0.57002[0m[0m | time: 30.932s
[2K| Adam | epoch: 005 | loss: 0.57002 - acc: 0.7176 -- iter: 07872/20250


Training Step: 1392  | total loss: [1m[32m0.57807[0m[0m | time: 31.176s
[2K| Adam | epoch: 005 | loss: 0.57807 - acc: 0.7052 -- iter: 07936/20250


Training Step: 1393  | total loss: [1m[32m0.58005[0m[0m | time: 31.427s
[2K| Adam | epoch: 005 | loss: 0.58005 - acc: 0.7034 -- iter: 08000/20250


Training Step: 1394  | total loss: [1m[32m0.57817[0m[0m | time: 31.662s
[2K| Adam | epoch: 005 | loss: 0.57817 - acc: 0.7018 -- iter: 08064/20250


Training Step: 1395  | total loss: [1m[32m0.57113[0m[0m | time: 31.898s
[2K| Adam | epoch: 005 | loss: 0.57113 - acc: 0.7066 -- iter: 08128/20250


Training Step: 1396  | total loss: [1m[32m0.57050[0m[0m | time: 32.140s
[2K| Adam | epoch: 005 | loss: 0.57050 - acc: 0.7032 -- iter: 08192/20250


Training Step: 1397  | total loss: [1m[32m0.56915[0m[0m | time: 32.387s
[2K| Adam | epoch: 005 | loss: 0.56915 - acc: 0.7016 -- iter: 08256/20250


Training Step: 1398  | total loss: [1m[32m0.56485[0m[0m | time: 32.673s
[2K| Adam | epoch: 005 | loss: 0.56485 - acc: 0.7002 -- iter: 08320/20250


Training Step: 1399  | total loss: [1m[32m0.56616[0m[0m | time: 32.964s
[2K| Adam | epoch: 005 | loss: 0.56616 - acc: 0.6849 -- iter: 08384/20250


Training Step: 1400  | total loss: [1m[32m0.57068[0m[0m | time: 33.211s
[2K| Adam | epoch: 005 | loss: 0.57068 - acc: 0.6789 -- iter: 08448/20250


Training Step: 1401  | total loss: [1m[32m0.57562[0m[0m | time: 33.452s
[2K| Adam | epoch: 005 | loss: 0.57562 - acc: 0.6657 -- iter: 08512/20250


Training Step: 1402  | total loss: [1m[32m0.57764[0m[0m | time: 33.697s
[2K| Adam | epoch: 005 | loss: 0.57764 - acc: 0.6585 -- iter: 08576/20250


Training Step: 1403  | total loss: [1m[32m0.57227[0m[0m | time: 33.944s
[2K| Adam | epoch: 005 | loss: 0.57227 - acc: 0.6629 -- iter: 08640/20250


Training Step: 1404  | total loss: [1m[32m0.57328[0m[0m | time: 34.187s
[2K| Adam | epoch: 005 | loss: 0.57328 - acc: 0.6607 -- iter: 08704/20250


Training Step: 1405  | total loss: [1m[32m0.58045[0m[0m | time: 34.432s
[2K| Adam | epoch: 005 | loss: 0.58045 - acc: 0.6571 -- iter: 08768/20250


Training Step: 1406  | total loss: [1m[32m0.57125[0m[0m | time: 34.677s
[2K| Adam | epoch: 005 | loss: 0.57125 - acc: 0.6649 -- iter: 08832/20250


Training Step: 1407  | total loss: [1m[32m0.56574[0m[0m | time: 34.917s
[2K| Adam | epoch: 005 | loss: 0.56574 - acc: 0.6718 -- iter: 08896/20250


Training Step: 1408  | total loss: [1m[32m0.55766[0m[0m | time: 35.179s
[2K| Adam | epoch: 005 | loss: 0.55766 - acc: 0.6781 -- iter: 08960/20250


Training Step: 1409  | total loss: [1m[32m0.56105[0m[0m | time: 35.443s
[2K| Adam | epoch: 005 | loss: 0.56105 - acc: 0.6759 -- iter: 09024/20250


Training Step: 1410  | total loss: [1m[32m0.55916[0m[0m | time: 35.689s
[2K| Adam | epoch: 005 | loss: 0.55916 - acc: 0.6802 -- iter: 09088/20250


Training Step: 1411  | total loss: [1m[32m0.55363[0m[0m | time: 35.940s
[2K| Adam | epoch: 005 | loss: 0.55363 - acc: 0.6918 -- iter: 09152/20250


Training Step: 1412  | total loss: [1m[32m0.55735[0m[0m | time: 36.181s
[2K| Adam | epoch: 005 | loss: 0.55735 - acc: 0.6977 -- iter: 09216/20250


Training Step: 1413  | total loss: [1m[32m0.55207[0m[0m | time: 36.427s
[2K| Adam | epoch: 005 | loss: 0.55207 - acc: 0.6982 -- iter: 09280/20250


Training Step: 1414  | total loss: [1m[32m0.55878[0m[0m | time: 36.671s
[2K| Adam | epoch: 005 | loss: 0.55878 - acc: 0.6987 -- iter: 09344/20250


Training Step: 1415  | total loss: [1m[32m0.57469[0m[0m | time: 36.912s
[2K| Adam | epoch: 005 | loss: 0.57469 - acc: 0.6976 -- iter: 09408/20250


Training Step: 1416  | total loss: [1m[32m0.58836[0m[0m | time: 37.164s
[2K| Adam | epoch: 005 | loss: 0.58836 - acc: 0.6981 -- iter: 09472/20250


Training Step: 1417  | total loss: [1m[32m0.60147[0m[0m | time: 37.412s
[2K| Adam | epoch: 005 | loss: 0.60147 - acc: 0.7018 -- iter: 09536/20250


Training Step: 1418  | total loss: [1m[32m0.62785[0m[0m | time: 37.692s
[2K| Adam | epoch: 005 | loss: 0.62785 - acc: 0.6956 -- iter: 09600/20250


Training Step: 1419  | total loss: [1m[32m0.63512[0m[0m | time: 37.968s
[2K| Adam | epoch: 005 | loss: 0.63512 - acc: 0.6964 -- iter: 09664/20250


Training Step: 1420  | total loss: [1m[32m0.63288[0m[0m | time: 38.212s
[2K| Adam | epoch: 005 | loss: 0.63288 - acc: 0.7002 -- iter: 09728/20250


Training Step: 1421  | total loss: [1m[32m0.64242[0m[0m | time: 38.453s
[2K| Adam | epoch: 005 | loss: 0.64242 - acc: 0.6927 -- iter: 09792/20250


Training Step: 1422  | total loss: [1m[32m0.63492[0m[0m | time: 38.700s
[2K| Adam | epoch: 005 | loss: 0.63492 - acc: 0.6922 -- iter: 09856/20250


Training Step: 1423  | total loss: [1m[32m0.62522[0m[0m | time: 38.952s
[2K| Adam | epoch: 005 | loss: 0.62522 - acc: 0.6917 -- iter: 09920/20250


Training Step: 1424  | total loss: [1m[32m0.61834[0m[0m | time: 39.203s
[2K| Adam | epoch: 005 | loss: 0.61834 - acc: 0.6866 -- iter: 09984/20250


Training Step: 1425  | total loss: [1m[32m0.62135[0m[0m | time: 39.452s
[2K| Adam | epoch: 005 | loss: 0.62135 - acc: 0.6804 -- iter: 10048/20250


Training Step: 1426  | total loss: [1m[32m0.61620[0m[0m | time: 39.694s
[2K| Adam | epoch: 005 | loss: 0.61620 - acc: 0.6718 -- iter: 10112/20250


Training Step: 1427  | total loss: [1m[32m0.62347[0m[0m | time: 39.976s
[2K| Adam | epoch: 005 | loss: 0.62347 - acc: 0.6593 -- iter: 10176/20250


Training Step: 1428  | total loss: [1m[32m0.62319[0m[0m | time: 40.221s
[2K| Adam | epoch: 005 | loss: 0.62319 - acc: 0.6590 -- iter: 10240/20250


Training Step: 1429  | total loss: [1m[32m0.62416[0m[0m | time: 40.493s
[2K| Adam | epoch: 005 | loss: 0.62416 - acc: 0.6524 -- iter: 10304/20250


Training Step: 1430  | total loss: [1m[32m0.61387[0m[0m | time: 40.752s
[2K| Adam | epoch: 005 | loss: 0.61387 - acc: 0.6544 -- iter: 10368/20250


Training Step: 1431  | total loss: [1m[32m0.61963[0m[0m | time: 40.994s
[2K| Adam | epoch: 005 | loss: 0.61963 - acc: 0.6483 -- iter: 10432/20250


Training Step: 1432  | total loss: [1m[32m0.62033[0m[0m | time: 41.237s
[2K| Adam | epoch: 005 | loss: 0.62033 - acc: 0.6476 -- iter: 10496/20250


Training Step: 1433  | total loss: [1m[32m0.62016[0m[0m | time: 41.482s
[2K| Adam | epoch: 005 | loss: 0.62016 - acc: 0.6531 -- iter: 10560/20250


Training Step: 1434  | total loss: [1m[32m0.61578[0m[0m | time: 41.733s
[2K| Adam | epoch: 005 | loss: 0.61578 - acc: 0.6503 -- iter: 10624/20250


Training Step: 1435  | total loss: [1m[32m0.61160[0m[0m | time: 41.980s
[2K| Adam | epoch: 005 | loss: 0.61160 - acc: 0.6478 -- iter: 10688/20250


Training Step: 1436  | total loss: [1m[32m0.60744[0m[0m | time: 42.227s
[2K| Adam | epoch: 005 | loss: 0.60744 - acc: 0.6533 -- iter: 10752/20250


Training Step: 1437  | total loss: [1m[32m0.60045[0m[0m | time: 42.490s
[2K| Adam | epoch: 005 | loss: 0.60045 - acc: 0.6599 -- iter: 10816/20250


Training Step: 1438  | total loss: [1m[32m0.59584[0m[0m | time: 42.739s
[2K| Adam | epoch: 005 | loss: 0.59584 - acc: 0.6626 -- iter: 10880/20250


Training Step: 1439  | total loss: [1m[32m0.58902[0m[0m | time: 43.012s
[2K| Adam | epoch: 005 | loss: 0.58902 - acc: 0.6682 -- iter: 10944/20250


Training Step: 1440  | total loss: [1m[32m0.58595[0m[0m | time: 43.264s
[2K| Adam | epoch: 005 | loss: 0.58595 - acc: 0.6702 -- iter: 11008/20250


Training Step: 1441  | total loss: [1m[32m0.57790[0m[0m | time: 43.509s
[2K| Adam | epoch: 005 | loss: 0.57790 - acc: 0.6750 -- iter: 11072/20250


Training Step: 1442  | total loss: [1m[32m0.57871[0m[0m | time: 43.763s
[2K| Adam | epoch: 005 | loss: 0.57871 - acc: 0.6747 -- iter: 11136/20250


Training Step: 1443  | total loss: [1m[32m0.58468[0m[0m | time: 44.003s
[2K| Adam | epoch: 005 | loss: 0.58468 - acc: 0.6682 -- iter: 11200/20250


Training Step: 1444  | total loss: [1m[32m0.58057[0m[0m | time: 44.243s
[2K| Adam | epoch: 005 | loss: 0.58057 - acc: 0.6701 -- iter: 11264/20250


Training Step: 1445  | total loss: [1m[32m0.58183[0m[0m | time: 44.490s
[2K| Adam | epoch: 005 | loss: 0.58183 - acc: 0.6625 -- iter: 11328/20250


Training Step: 1446  | total loss: [1m[32m0.58147[0m[0m | time: 44.741s
[2K| Adam | epoch: 005 | loss: 0.58147 - acc: 0.6681 -- iter: 11392/20250


Training Step: 1447  | total loss: [1m[32m0.58143[0m[0m | time: 45.016s
[2K| Adam | epoch: 005 | loss: 0.58143 - acc: 0.6622 -- iter: 11456/20250


Training Step: 1448  | total loss: [1m[32m0.58181[0m[0m | time: 45.297s
[2K| Adam | epoch: 005 | loss: 0.58181 - acc: 0.6585 -- iter: 11520/20250


Training Step: 1449  | total loss: [1m[32m0.58256[0m[0m | time: 45.543s
[2K| Adam | epoch: 005 | loss: 0.58256 - acc: 0.6536 -- iter: 11584/20250


Training Step: 1450  | total loss: [1m[32m0.58737[0m[0m | time: 45.790s
[2K| Adam | epoch: 005 | loss: 0.58737 - acc: 0.6492 -- iter: 11648/20250


Training Step: 1451  | total loss: [1m[32m0.59469[0m[0m | time: 46.042s
[2K| Adam | epoch: 005 | loss: 0.59469 - acc: 0.6374 -- iter: 11712/20250


Training Step: 1452  | total loss: [1m[32m0.59194[0m[0m | time: 46.292s
[2K| Adam | epoch: 005 | loss: 0.59194 - acc: 0.6408 -- iter: 11776/20250


Training Step: 1453  | total loss: [1m[32m0.59493[0m[0m | time: 46.543s
[2K| Adam | epoch: 005 | loss: 0.59493 - acc: 0.6377 -- iter: 11840/20250


Training Step: 1454  | total loss: [1m[32m0.59404[0m[0m | time: 46.789s
[2K| Adam | epoch: 005 | loss: 0.59404 - acc: 0.6427 -- iter: 11904/20250


Training Step: 1455  | total loss: [1m[32m0.58531[0m[0m | time: 47.040s
[2K| Adam | epoch: 005 | loss: 0.58531 - acc: 0.6550 -- iter: 11968/20250


Training Step: 1456  | total loss: [1m[32m0.58160[0m[0m | time: 47.287s
[2K| Adam | epoch: 005 | loss: 0.58160 - acc: 0.6598 -- iter: 12032/20250


Training Step: 1457  | total loss: [1m[32m0.58352[0m[0m | time: 47.568s
[2K| Adam | epoch: 005 | loss: 0.58352 - acc: 0.6657 -- iter: 12096/20250


Training Step: 1458  | total loss: [1m[32m0.58626[0m[0m | time: 47.825s
[2K| Adam | epoch: 005 | loss: 0.58626 - acc: 0.6647 -- iter: 12160/20250


Training Step: 1459  | total loss: [1m[32m0.57961[0m[0m | time: 48.072s
[2K| Adam | epoch: 005 | loss: 0.57961 - acc: 0.6654 -- iter: 12224/20250


Training Step: 1460  | total loss: [1m[32m0.57639[0m[0m | time: 48.322s
[2K| Adam | epoch: 005 | loss: 0.57639 - acc: 0.6723 -- iter: 12288/20250


Training Step: 1461  | total loss: [1m[32m0.57272[0m[0m | time: 48.578s
[2K| Adam | epoch: 005 | loss: 0.57272 - acc: 0.6864 -- iter: 12352/20250


Training Step: 1462  | total loss: [1m[32m0.56908[0m[0m | time: 48.832s
[2K| Adam | epoch: 005 | loss: 0.56908 - acc: 0.6990 -- iter: 12416/20250


Training Step: 1463  | total loss: [1m[32m0.56195[0m[0m | time: 49.084s
[2K| Adam | epoch: 005 | loss: 0.56195 - acc: 0.7134 -- iter: 12480/20250


Training Step: 1464  | total loss: [1m[32m0.55799[0m[0m | time: 49.340s
[2K| Adam | epoch: 005 | loss: 0.55799 - acc: 0.7234 -- iter: 12544/20250


Training Step: 1465  | total loss: [1m[32m0.55265[0m[0m | time: 49.580s
[2K| Adam | epoch: 005 | loss: 0.55265 - acc: 0.7245 -- iter: 12608/20250


Training Step: 1466  | total loss: [1m[32m0.54744[0m[0m | time: 49.824s
[2K| Adam | epoch: 005 | loss: 0.54744 - acc: 0.7364 -- iter: 12672/20250


Training Step: 1467  | total loss: [1m[32m0.53767[0m[0m | time: 50.069s
[2K| Adam | epoch: 005 | loss: 0.53767 - acc: 0.7471 -- iter: 12736/20250


Training Step: 1468  | total loss: [1m[32m0.53493[0m[0m | time: 50.323s
[2K| Adam | epoch: 005 | loss: 0.53493 - acc: 0.7474 -- iter: 12800/20250


Training Step: 1469  | total loss: [1m[32m0.53041[0m[0m | time: 50.569s
[2K| Adam | epoch: 005 | loss: 0.53041 - acc: 0.7414 -- iter: 12864/20250


Training Step: 1470  | total loss: [1m[32m0.53376[0m[0m | time: 50.818s
[2K| Adam | epoch: 005 | loss: 0.53376 - acc: 0.7345 -- iter: 12928/20250


Training Step: 1471  | total loss: [1m[32m0.53899[0m[0m | time: 51.070s
[2K| Adam | epoch: 005 | loss: 0.53899 - acc: 0.7173 -- iter: 12992/20250


Training Step: 1472  | total loss: [1m[32m0.54238[0m[0m | time: 51.355s
[2K| Adam | epoch: 005 | loss: 0.54238 - acc: 0.7159 -- iter: 13056/20250


Training Step: 1473  | total loss: [1m[32m0.54712[0m[0m | time: 51.606s
[2K| Adam | epoch: 005 | loss: 0.54712 - acc: 0.7099 -- iter: 13120/20250


Training Step: 1474  | total loss: [1m[32m0.54616[0m[0m | time: 51.856s
[2K| Adam | epoch: 005 | loss: 0.54616 - acc: 0.7123 -- iter: 13184/20250


Training Step: 1475  | total loss: [1m[32m0.54324[0m[0m | time: 52.107s
[2K| Adam | epoch: 005 | loss: 0.54324 - acc: 0.7145 -- iter: 13248/20250


Training Step: 1476  | total loss: [1m[32m0.55274[0m[0m | time: 52.358s
[2K| Adam | epoch: 005 | loss: 0.55274 - acc: 0.6993 -- iter: 13312/20250


Training Step: 1477  | total loss: [1m[32m0.55669[0m[0m | time: 52.598s
[2K| Adam | epoch: 005 | loss: 0.55669 - acc: 0.6950 -- iter: 13376/20250


Training Step: 1478  | total loss: [1m[32m0.56822[0m[0m | time: 52.843s
[2K| Adam | epoch: 005 | loss: 0.56822 - acc: 0.6833 -- iter: 13440/20250


Training Step: 1479  | total loss: [1m[32m0.57299[0m[0m | time: 53.096s
[2K| Adam | epoch: 005 | loss: 0.57299 - acc: 0.6806 -- iter: 13504/20250


Training Step: 1480  | total loss: [1m[32m0.56472[0m[0m | time: 53.337s
[2K| Adam | epoch: 005 | loss: 0.56472 - acc: 0.6907 -- iter: 13568/20250


Training Step: 1481  | total loss: [1m[32m0.56032[0m[0m | time: 53.590s
[2K| Adam | epoch: 005 | loss: 0.56032 - acc: 0.6935 -- iter: 13632/20250


Training Step: 1482  | total loss: [1m[32m0.56411[0m[0m | time: 53.858s
[2K| Adam | epoch: 005 | loss: 0.56411 - acc: 0.6913 -- iter: 13696/20250


Training Step: 1483  | total loss: [1m[32m0.56215[0m[0m | time: 54.114s
[2K| Adam | epoch: 005 | loss: 0.56215 - acc: 0.6910 -- iter: 13760/20250


Training Step: 1484  | total loss: [1m[32m0.56182[0m[0m | time: 54.362s
[2K| Adam | epoch: 005 | loss: 0.56182 - acc: 0.6937 -- iter: 13824/20250


Training Step: 1485  | total loss: [1m[32m0.56350[0m[0m | time: 54.609s
[2K| Adam | epoch: 005 | loss: 0.56350 - acc: 0.6947 -- iter: 13888/20250


Training Step: 1486  | total loss: [1m[32m0.57111[0m[0m | time: 54.854s
[2K| Adam | epoch: 005 | loss: 0.57111 - acc: 0.6893 -- iter: 13952/20250


Training Step: 1487  | total loss: [1m[32m0.57402[0m[0m | time: 55.102s
[2K| Adam | epoch: 005 | loss: 0.57402 - acc: 0.6844 -- iter: 14016/20250


Training Step: 1488  | total loss: [1m[32m0.56863[0m[0m | time: 55.345s
[2K| Adam | epoch: 005 | loss: 0.56863 - acc: 0.6894 -- iter: 14080/20250


Training Step: 1489  | total loss: [1m[32m0.57086[0m[0m | time: 55.594s
[2K| Adam | epoch: 005 | loss: 0.57086 - acc: 0.6845 -- iter: 14144/20250


Training Step: 1490  | total loss: [1m[32m0.56788[0m[0m | time: 55.853s
[2K| Adam | epoch: 005 | loss: 0.56788 - acc: 0.6833 -- iter: 14208/20250


Training Step: 1491  | total loss: [1m[32m0.56907[0m[0m | time: 56.117s
[2K| Adam | epoch: 005 | loss: 0.56907 - acc: 0.6821 -- iter: 14272/20250


Training Step: 1492  | total loss: [1m[32m0.57355[0m[0m | time: 56.366s
[2K| Adam | epoch: 005 | loss: 0.57355 - acc: 0.6780 -- iter: 14336/20250


Training Step: 1493  | total loss: [1m[32m0.57433[0m[0m | time: 56.616s
[2K| Adam | epoch: 005 | loss: 0.57433 - acc: 0.6727 -- iter: 14400/20250


Training Step: 1494  | total loss: [1m[32m0.56750[0m[0m | time: 56.867s
[2K| Adam | epoch: 005 | loss: 0.56750 - acc: 0.6788 -- iter: 14464/20250


Training Step: 1495  | total loss: [1m[32m0.55852[0m[0m | time: 57.117s
[2K| Adam | epoch: 005 | loss: 0.55852 - acc: 0.6875 -- iter: 14528/20250


Training Step: 1496  | total loss: [1m[32m0.55664[0m[0m | time: 57.371s
[2K| Adam | epoch: 005 | loss: 0.55664 - acc: 0.6969 -- iter: 14592/20250


Training Step: 1497  | total loss: [1m[32m0.54913[0m[0m | time: 57.611s
[2K| Adam | epoch: 005 | loss: 0.54913 - acc: 0.7085 -- iter: 14656/20250


Training Step: 1498  | total loss: [1m[32m0.55139[0m[0m | time: 57.853s
[2K| Adam | epoch: 005 | loss: 0.55139 - acc: 0.7079 -- iter: 14720/20250


Training Step: 1499  | total loss: [1m[32m0.54346[0m[0m | time: 58.097s
[2K| Adam | epoch: 005 | loss: 0.54346 - acc: 0.7121 -- iter: 14784/20250


Training Step: 1500  | total loss: [1m[32m0.54639[0m[0m | time: 58.348s
[2K| Adam | epoch: 005 | loss: 0.54639 - acc: 0.7112 -- iter: 14848/20250


Training Step: 1501  | total loss: [1m[32m0.53978[0m[0m | time: 58.615s
[2K| Adam | epoch: 005 | loss: 0.53978 - acc: 0.7198 -- iter: 14912/20250


Training Step: 1502  | total loss: [1m[32m0.53940[0m[0m | time: 58.887s
[2K| Adam | epoch: 005 | loss: 0.53940 - acc: 0.7181 -- iter: 14976/20250


Training Step: 1503  | total loss: [1m[32m0.54185[0m[0m | time: 59.129s
[2K| Adam | epoch: 005 | loss: 0.54185 - acc: 0.7182 -- iter: 15040/20250


Training Step: 1504  | total loss: [1m[32m0.53420[0m[0m | time: 59.378s
[2K| Adam | epoch: 005 | loss: 0.53420 - acc: 0.7245 -- iter: 15104/20250


Training Step: 1505  | total loss: [1m[32m0.52703[0m[0m | time: 59.622s
[2K| Adam | epoch: 005 | loss: 0.52703 - acc: 0.7333 -- iter: 15168/20250


Training Step: 1506  | total loss: [1m[32m0.52489[0m[0m | time: 59.871s
[2K| Adam | epoch: 005 | loss: 0.52489 - acc: 0.7350 -- iter: 15232/20250


Training Step: 1507  | total loss: [1m[32m0.52477[0m[0m | time: 60.119s
[2K| Adam | epoch: 005 | loss: 0.52477 - acc: 0.7396 -- iter: 15296/20250


Training Step: 1508  | total loss: [1m[32m0.52090[0m[0m | time: 60.371s
[2K| Adam | epoch: 005 | loss: 0.52090 - acc: 0.7391 -- iter: 15360/20250


Training Step: 1509  | total loss: [1m[32m0.53517[0m[0m | time: 60.618s
[2K| Adam | epoch: 005 | loss: 0.53517 - acc: 0.7355 -- iter: 15424/20250


Training Step: 1510  | total loss: [1m[32m0.53688[0m[0m | time: 60.876s
[2K| Adam | epoch: 005 | loss: 0.53688 - acc: 0.7447 -- iter: 15488/20250


Training Step: 1511  | total loss: [1m[32m0.54055[0m[0m | time: 61.144s
[2K| Adam | epoch: 005 | loss: 0.54055 - acc: 0.7484 -- iter: 15552/20250


Training Step: 1512  | total loss: [1m[32m0.54686[0m[0m | time: 61.392s
[2K| Adam | epoch: 005 | loss: 0.54686 - acc: 0.7486 -- iter: 15616/20250


Training Step: 1513  | total loss: [1m[32m0.55742[0m[0m | time: 61.634s
[2K| Adam | epoch: 005 | loss: 0.55742 - acc: 0.7456 -- iter: 15680/20250


Training Step: 1514  | total loss: [1m[32m0.55637[0m[0m | time: 61.882s
[2K| Adam | epoch: 005 | loss: 0.55637 - acc: 0.7491 -- iter: 15744/20250


Training Step: 1515  | total loss: [1m[32m0.55739[0m[0m | time: 62.131s
[2K| Adam | epoch: 005 | loss: 0.55739 - acc: 0.7524 -- iter: 15808/20250


Training Step: 1516  | total loss: [1m[32m0.57543[0m[0m | time: 62.381s
[2K| Adam | epoch: 005 | loss: 0.57543 - acc: 0.7490 -- iter: 15872/20250


Training Step: 1517  | total loss: [1m[32m0.57182[0m[0m | time: 62.632s
[2K| Adam | epoch: 005 | loss: 0.57182 - acc: 0.7491 -- iter: 15936/20250


Training Step: 1518  | total loss: [1m[32m0.56996[0m[0m | time: 62.874s
[2K| Adam | epoch: 005 | loss: 0.56996 - acc: 0.7523 -- iter: 16000/20250


Training Step: 1519  | total loss: [1m[32m0.57416[0m[0m | time: 63.123s
[2K| Adam | epoch: 005 | loss: 0.57416 - acc: 0.7490 -- iter: 16064/20250


Training Step: 1520  | total loss: [1m[32m0.57347[0m[0m | time: 63.381s
[2K| Adam | epoch: 005 | loss: 0.57347 - acc: 0.7553 -- iter: 16128/20250


Training Step: 1521  | total loss: [1m[32m0.56292[0m[0m | time: 63.642s
[2K| Adam | epoch: 005 | loss: 0.56292 - acc: 0.7548 -- iter: 16192/20250


Training Step: 1522  | total loss: [1m[32m0.56065[0m[0m | time: 63.908s
[2K| Adam | epoch: 005 | loss: 0.56065 - acc: 0.7574 -- iter: 16256/20250


Training Step: 1523  | total loss: [1m[32m0.56088[0m[0m | time: 64.150s
[2K| Adam | epoch: 005 | loss: 0.56088 - acc: 0.7551 -- iter: 16320/20250


Training Step: 1524  | total loss: [1m[32m0.56765[0m[0m | time: 64.394s
[2K| Adam | epoch: 005 | loss: 0.56765 - acc: 0.7499 -- iter: 16384/20250


Training Step: 1525  | total loss: [1m[32m0.55796[0m[0m | time: 64.637s
[2K| Adam | epoch: 005 | loss: 0.55796 - acc: 0.7546 -- iter: 16448/20250


Training Step: 1526  | total loss: [1m[32m0.55927[0m[0m | time: 64.892s
[2K| Adam | epoch: 005 | loss: 0.55927 - acc: 0.7510 -- iter: 16512/20250


Training Step: 1527  | total loss: [1m[32m0.55424[0m[0m | time: 65.139s
[2K| Adam | epoch: 005 | loss: 0.55424 - acc: 0.7556 -- iter: 16576/20250


Training Step: 1528  | total loss: [1m[32m0.54575[0m[0m | time: 65.392s
[2K| Adam | epoch: 005 | loss: 0.54575 - acc: 0.7597 -- iter: 16640/20250


Training Step: 1529  | total loss: [1m[32m0.53402[0m[0m | time: 65.643s
[2K| Adam | epoch: 005 | loss: 0.53402 - acc: 0.7713 -- iter: 16704/20250


Training Step: 1530  | total loss: [1m[32m0.52649[0m[0m | time: 65.903s
[2K| Adam | epoch: 005 | loss: 0.52649 - acc: 0.7738 -- iter: 16768/20250


Training Step: 1531  | total loss: [1m[32m0.52757[0m[0m | time: 66.168s
[2K| Adam | epoch: 005 | loss: 0.52757 - acc: 0.7730 -- iter: 16832/20250


Training Step: 1532  | total loss: [1m[32m0.52121[0m[0m | time: 66.415s
[2K| Adam | epoch: 005 | loss: 0.52121 - acc: 0.7785 -- iter: 16896/20250


Training Step: 1533  | total loss: [1m[32m0.52055[0m[0m | time: 66.658s
[2K| Adam | epoch: 005 | loss: 0.52055 - acc: 0.7757 -- iter: 16960/20250


Training Step: 1534  | total loss: [1m[32m0.51242[0m[0m | time: 66.906s
[2K| Adam | epoch: 005 | loss: 0.51242 - acc: 0.7778 -- iter: 17024/20250


Training Step: 1535  | total loss: [1m[32m0.50926[0m[0m | time: 67.150s
[2K| Adam | epoch: 005 | loss: 0.50926 - acc: 0.7813 -- iter: 17088/20250


Training Step: 1536  | total loss: [1m[32m0.49970[0m[0m | time: 67.399s
[2K| Adam | epoch: 005 | loss: 0.49970 - acc: 0.7906 -- iter: 17152/20250


Training Step: 1537  | total loss: [1m[32m0.50422[0m[0m | time: 67.644s
[2K| Adam | epoch: 005 | loss: 0.50422 - acc: 0.7881 -- iter: 17216/20250


Training Step: 1538  | total loss: [1m[32m0.50136[0m[0m | time: 67.893s
[2K| Adam | epoch: 005 | loss: 0.50136 - acc: 0.7906 -- iter: 17280/20250


Training Step: 1539  | total loss: [1m[32m0.49447[0m[0m | time: 68.141s
[2K| Adam | epoch: 005 | loss: 0.49447 - acc: 0.7928 -- iter: 17344/20250


Training Step: 1540  | total loss: [1m[32m0.49525[0m[0m | time: 68.401s
[2K| Adam | epoch: 005 | loss: 0.49525 - acc: 0.7916 -- iter: 17408/20250


Training Step: 1541  | total loss: [1m[32m0.49992[0m[0m | time: 68.654s
[2K| Adam | epoch: 005 | loss: 0.49992 - acc: 0.7874 -- iter: 17472/20250


Training Step: 1542  | total loss: [1m[32m0.50393[0m[0m | time: 68.901s
[2K| Adam | epoch: 005 | loss: 0.50393 - acc: 0.7853 -- iter: 17536/20250


Training Step: 1543  | total loss: [1m[32m0.49815[0m[0m | time: 69.147s
[2K| Adam | epoch: 005 | loss: 0.49815 - acc: 0.7927 -- iter: 17600/20250


Training Step: 1544  | total loss: [1m[32m0.50745[0m[0m | time: 69.387s
[2K| Adam | epoch: 005 | loss: 0.50745 - acc: 0.7837 -- iter: 17664/20250


Training Step: 1545  | total loss: [1m[32m0.50311[0m[0m | time: 69.630s
[2K| Adam | epoch: 005 | loss: 0.50311 - acc: 0.7866 -- iter: 17728/20250


Training Step: 1546  | total loss: [1m[32m0.49186[0m[0m | time: 69.875s
[2K| Adam | epoch: 005 | loss: 0.49186 - acc: 0.7970 -- iter: 17792/20250


Training Step: 1547  | total loss: [1m[32m0.48920[0m[0m | time: 70.124s
[2K| Adam | epoch: 005 | loss: 0.48920 - acc: 0.7970 -- iter: 17856/20250


Training Step: 1548  | total loss: [1m[32m0.49349[0m[0m | time: 70.372s
[2K| Adam | epoch: 005 | loss: 0.49349 - acc: 0.7954 -- iter: 17920/20250


Training Step: 1549  | total loss: [1m[32m0.48922[0m[0m | time: 70.621s
[2K| Adam | epoch: 005 | loss: 0.48922 - acc: 0.7987 -- iter: 17984/20250


Training Step: 1550  | total loss: [1m[32m0.48909[0m[0m | time: 70.879s
[2K| Adam | epoch: 005 | loss: 0.48909 - acc: 0.7985 -- iter: 18048/20250


Training Step: 1551  | total loss: [1m[32m0.51492[0m[0m | time: 71.143s
[2K| Adam | epoch: 005 | loss: 0.51492 - acc: 0.7890 -- iter: 18112/20250


Training Step: 1552  | total loss: [1m[32m0.51773[0m[0m | time: 71.384s
[2K| Adam | epoch: 005 | loss: 0.51773 - acc: 0.7882 -- iter: 18176/20250


Training Step: 1553  | total loss: [1m[32m0.51230[0m[0m | time: 71.628s
[2K| Adam | epoch: 005 | loss: 0.51230 - acc: 0.7922 -- iter: 18240/20250


Training Step: 1554  | total loss: [1m[32m0.50775[0m[0m | time: 71.870s
[2K| Adam | epoch: 005 | loss: 0.50775 - acc: 0.7942 -- iter: 18304/20250


Training Step: 1555  | total loss: [1m[32m0.50884[0m[0m | time: 72.114s
[2K| Adam | epoch: 005 | loss: 0.50884 - acc: 0.7945 -- iter: 18368/20250


Training Step: 1556  | total loss: [1m[32m0.50262[0m[0m | time: 72.373s
[2K| Adam | epoch: 005 | loss: 0.50262 - acc: 0.7978 -- iter: 18432/20250


Training Step: 1557  | total loss: [1m[32m0.48764[0m[0m | time: 72.627s
[2K| Adam | epoch: 005 | loss: 0.48764 - acc: 0.8056 -- iter: 18496/20250


Training Step: 1558  | total loss: [1m[32m0.48809[0m[0m | time: 72.877s
[2K| Adam | epoch: 005 | loss: 0.48809 - acc: 0.8031 -- iter: 18560/20250


Training Step: 1559  | total loss: [1m[32m0.48541[0m[0m | time: 73.129s
[2K| Adam | epoch: 005 | loss: 0.48541 - acc: 0.8041 -- iter: 18624/20250


Training Step: 1560  | total loss: [1m[32m0.49082[0m[0m | time: 73.402s
[2K| Adam | epoch: 005 | loss: 0.49082 - acc: 0.7971 -- iter: 18688/20250


Training Step: 1561  | total loss: [1m[32m0.49446[0m[0m | time: 73.674s
[2K| Adam | epoch: 005 | loss: 0.49446 - acc: 0.7924 -- iter: 18752/20250


Training Step: 1562  | total loss: [1m[32m0.49325[0m[0m | time: 73.917s
[2K| Adam | epoch: 005 | loss: 0.49325 - acc: 0.7928 -- iter: 18816/20250


Training Step: 1563  | total loss: [1m[32m0.48308[0m[0m | time: 74.182s
[2K| Adam | epoch: 005 | loss: 0.48308 - acc: 0.7979 -- iter: 18880/20250


Training Step: 1564  | total loss: [1m[32m0.48008[0m[0m | time: 74.459s
[2K| Adam | epoch: 005 | loss: 0.48008 - acc: 0.8025 -- iter: 18944/20250


Training Step: 1565  | total loss: [1m[32m0.47488[0m[0m | time: 74.710s
[2K| Adam | epoch: 005 | loss: 0.47488 - acc: 0.8051 -- iter: 19008/20250


Training Step: 1566  | total loss: [1m[32m0.47004[0m[0m | time: 74.962s
[2K| Adam | epoch: 005 | loss: 0.47004 - acc: 0.8074 -- iter: 19072/20250


Training Step: 1567  | total loss: [1m[32m0.47381[0m[0m | time: 75.202s
[2K| Adam | epoch: 005 | loss: 0.47381 - acc: 0.8063 -- iter: 19136/20250


Training Step: 1568  | total loss: [1m[32m0.46740[0m[0m | time: 75.460s
[2K| Adam | epoch: 005 | loss: 0.46740 - acc: 0.8116 -- iter: 19200/20250


Training Step: 1569  | total loss: [1m[32m0.47379[0m[0m | time: 75.719s
[2K| Adam | epoch: 005 | loss: 0.47379 - acc: 0.8070 -- iter: 19264/20250


Training Step: 1570  | total loss: [1m[32m0.46689[0m[0m | time: 75.990s
[2K| Adam | epoch: 005 | loss: 0.46689 - acc: 0.8123 -- iter: 19328/20250


Training Step: 1571  | total loss: [1m[32m0.47053[0m[0m | time: 76.253s
[2K| Adam | epoch: 005 | loss: 0.47053 - acc: 0.8092 -- iter: 19392/20250


Training Step: 1572  | total loss: [1m[32m0.47022[0m[0m | time: 76.504s
[2K| Adam | epoch: 005 | loss: 0.47022 - acc: 0.8079 -- iter: 19456/20250


Training Step: 1573  | total loss: [1m[32m0.46012[0m[0m | time: 76.758s
[2K| Adam | epoch: 005 | loss: 0.46012 - acc: 0.8162 -- iter: 19520/20250


Training Step: 1574  | total loss: [1m[32m0.46606[0m[0m | time: 77.004s
[2K| Adam | epoch: 005 | loss: 0.46606 - acc: 0.8096 -- iter: 19584/20250


Training Step: 1575  | total loss: [1m[32m0.46004[0m[0m | time: 77.252s
[2K| Adam | epoch: 005 | loss: 0.46004 - acc: 0.8146 -- iter: 19648/20250


Training Step: 1576  | total loss: [1m[32m0.46852[0m[0m | time: 77.503s
[2K| Adam | epoch: 005 | loss: 0.46852 - acc: 0.8081 -- iter: 19712/20250


Training Step: 1577  | total loss: [1m[32m0.46537[0m[0m | time: 77.756s
[2K| Adam | epoch: 005 | loss: 0.46537 - acc: 0.8117 -- iter: 19776/20250


Training Step: 1578  | total loss: [1m[32m0.47264[0m[0m | time: 78.010s
[2K| Adam | epoch: 005 | loss: 0.47264 - acc: 0.8086 -- iter: 19840/20250


Training Step: 1579  | total loss: [1m[32m0.47396[0m[0m | time: 78.260s
[2K| Adam | epoch: 005 | loss: 0.47396 - acc: 0.8028 -- iter: 19904/20250


Training Step: 1580  | total loss: [1m[32m0.47059[0m[0m | time: 78.521s
[2K| Adam | epoch: 005 | loss: 0.47059 - acc: 0.8037 -- iter: 19968/20250


Training Step: 1581  | total loss: [1m[32m0.46093[0m[0m | time: 78.804s
[2K| Adam | epoch: 005 | loss: 0.46093 - acc: 0.8109 -- iter: 20032/20250


Training Step: 1582  | total loss: [1m[32m0.46495[0m[0m | time: 79.051s
[2K| Adam | epoch: 005 | loss: 0.46495 - acc: 0.8063 -- iter: 20096/20250


Training Step: 1583  | total loss: [1m[32m0.46436[0m[0m | time: 79.301s
[2K| Adam | epoch: 005 | loss: 0.46436 - acc: 0.8070 -- iter: 20160/20250


Training Step: 1584  | total loss: [1m[32m0.46165[0m[0m | time: 79.546s
[2K| Adam | epoch: 005 | loss: 0.46165 - acc: 0.8075 -- iter: 20224/20250


Training Step: 1585  | total loss: [1m[32m0.45869[0m[0m | time: 81.513s
[2K| Adam | epoch: 005 | loss: 0.45869 - acc: 0.8080 | val_loss: 0.58953 - val_acc: 0.7427 -- iter: 20250/20250
--


Training Step: 1586  | total loss: [1m[32m0.45746[0m[0m | time: 0.249s
[2K| Adam | epoch: 006 | loss: 0.45746 - acc: 0.8131 -- iter: 00064/20250


Training Step: 1587  | total loss: [1m[32m0.45640[0m[0m | time: 0.513s
[2K| Adam | epoch: 006 | loss: 0.45640 - acc: 0.8146 -- iter: 00128/20250


Training Step: 1588  | total loss: [1m[32m0.45372[0m[0m | time: 0.765s
[2K| Adam | epoch: 006 | loss: 0.45372 - acc: 0.8176 -- iter: 00192/20250


Training Step: 1589  | total loss: [1m[32m0.46383[0m[0m | time: 1.014s
[2K| Adam | epoch: 006 | loss: 0.46383 - acc: 0.8077 -- iter: 00256/20250


Training Step: 1590  | total loss: [1m[32m0.47305[0m[0m | time: 1.266s
[2K| Adam | epoch: 006 | loss: 0.47305 - acc: 0.8000 -- iter: 00320/20250


Training Step: 1591  | total loss: [1m[32m0.47901[0m[0m | time: 1.518s
[2K| Adam | epoch: 006 | loss: 0.47901 - acc: 0.7969 -- iter: 00384/20250


Training Step: 1592  | total loss: [1m[32m0.48367[0m[0m | time: 1.766s
[2K| Adam | epoch: 006 | loss: 0.48367 - acc: 0.7907 -- iter: 00448/20250


Training Step: 1593  | total loss: [1m[32m0.49306[0m[0m | time: 2.058s
[2K| Adam | epoch: 006 | loss: 0.49306 - acc: 0.7835 -- iter: 00512/20250


Training Step: 1594  | total loss: [1m[32m0.47977[0m[0m | time: 2.315s
[2K| Adam | epoch: 006 | loss: 0.47977 - acc: 0.7942 -- iter: 00576/20250


Training Step: 1595  | total loss: [1m[32m0.47384[0m[0m | time: 2.577s
[2K| Adam | epoch: 006 | loss: 0.47384 - acc: 0.7976 -- iter: 00640/20250


Training Step: 1596  | total loss: [1m[32m0.48125[0m[0m | time: 2.846s
[2K| Adam | epoch: 006 | loss: 0.48125 - acc: 0.7897 -- iter: 00704/20250


Training Step: 1597  | total loss: [1m[32m0.47692[0m[0m | time: 3.114s
[2K| Adam | epoch: 006 | loss: 0.47692 - acc: 0.7951 -- iter: 00768/20250


Training Step: 1598  | total loss: [1m[32m0.46954[0m[0m | time: 3.366s
[2K| Adam | epoch: 006 | loss: 0.46954 - acc: 0.8000 -- iter: 00832/20250


Training Step: 1599  | total loss: [1m[32m0.46402[0m[0m | time: 3.613s
[2K| Adam | epoch: 006 | loss: 0.46402 - acc: 0.8059 -- iter: 00896/20250


Training Step: 1600  | total loss: [1m[32m0.45782[0m[0m | time: 3.869s
[2K| Adam | epoch: 006 | loss: 0.45782 - acc: 0.8113 -- iter: 00960/20250


Training Step: 1601  | total loss: [1m[32m0.46008[0m[0m | time: 4.119s
[2K| Adam | epoch: 006 | loss: 0.46008 - acc: 0.8083 -- iter: 01024/20250


Training Step: 1602  | total loss: [1m[32m0.45981[0m[0m | time: 4.405s
[2K| Adam | epoch: 006 | loss: 0.45981 - acc: 0.8056 -- iter: 01088/20250


Training Step: 1603  | total loss: [1m[32m0.45455[0m[0m | time: 4.684s
[2K| Adam | epoch: 006 | loss: 0.45455 - acc: 0.8094 -- iter: 01152/20250


Training Step: 1604  | total loss: [1m[32m0.45576[0m[0m | time: 4.932s
[2K| Adam | epoch: 006 | loss: 0.45576 - acc: 0.8081 -- iter: 01216/20250


Training Step: 1605  | total loss: [1m[32m0.46691[0m[0m | time: 5.174s
[2K| Adam | epoch: 006 | loss: 0.46691 - acc: 0.7992 -- iter: 01280/20250


Training Step: 1606  | total loss: [1m[32m0.45491[0m[0m | time: 5.434s
[2K| Adam | epoch: 006 | loss: 0.45491 - acc: 0.8068 -- iter: 01344/20250


Training Step: 1607  | total loss: [1m[32m0.45078[0m[0m | time: 5.684s
[2K| Adam | epoch: 006 | loss: 0.45078 - acc: 0.8073 -- iter: 01408/20250


Training Step: 1608  | total loss: [1m[32m0.45011[0m[0m | time: 5.928s
[2K| Adam | epoch: 006 | loss: 0.45011 - acc: 0.8110 -- iter: 01472/20250


Training Step: 1609  | total loss: [1m[32m0.45130[0m[0m | time: 6.172s
[2K| Adam | epoch: 006 | loss: 0.45130 - acc: 0.8127 -- iter: 01536/20250


Training Step: 1610  | total loss: [1m[32m0.45862[0m[0m | time: 6.414s
[2K| Adam | epoch: 006 | loss: 0.45862 - acc: 0.8096 -- iter: 01600/20250


Training Step: 1611  | total loss: [1m[32m0.46924[0m[0m | time: 6.660s
[2K| Adam | epoch: 006 | loss: 0.46924 - acc: 0.8052 -- iter: 01664/20250


Training Step: 1612  | total loss: [1m[32m0.47279[0m[0m | time: 6.941s
[2K| Adam | epoch: 006 | loss: 0.47279 - acc: 0.8028 -- iter: 01728/20250


Training Step: 1613  | total loss: [1m[32m0.47850[0m[0m | time: 7.222s
[2K| Adam | epoch: 006 | loss: 0.47850 - acc: 0.8006 -- iter: 01792/20250


Training Step: 1614  | total loss: [1m[32m0.48475[0m[0m | time: 7.455s
[2K| Adam | epoch: 006 | loss: 0.48475 - acc: 0.7987 -- iter: 01856/20250


Training Step: 1615  | total loss: [1m[32m0.48582[0m[0m | time: 7.697s
[2K| Adam | epoch: 006 | loss: 0.48582 - acc: 0.7938 -- iter: 01920/20250


Training Step: 1616  | total loss: [1m[32m0.47661[0m[0m | time: 7.944s
[2K| Adam | epoch: 006 | loss: 0.47661 - acc: 0.7972 -- iter: 01984/20250


Training Step: 1617  | total loss: [1m[32m0.47545[0m[0m | time: 8.184s
[2K| Adam | epoch: 006 | loss: 0.47545 - acc: 0.7956 -- iter: 02048/20250


Training Step: 1618  | total loss: [1m[32m0.48361[0m[0m | time: 8.426s
[2K| Adam | epoch: 006 | loss: 0.48361 - acc: 0.7911 -- iter: 02112/20250


Training Step: 1619  | total loss: [1m[32m0.48051[0m[0m | time: 8.668s
[2K| Adam | epoch: 006 | loss: 0.48051 - acc: 0.7948 -- iter: 02176/20250


Training Step: 1620  | total loss: [1m[32m0.47511[0m[0m | time: 8.914s
[2K| Adam | epoch: 006 | loss: 0.47511 - acc: 0.7981 -- iter: 02240/20250


Training Step: 1621  | total loss: [1m[32m0.46975[0m[0m | time: 9.158s
[2K| Adam | epoch: 006 | loss: 0.46975 - acc: 0.8042 -- iter: 02304/20250


Training Step: 1622  | total loss: [1m[32m0.46592[0m[0m | time: 9.431s
[2K| Adam | epoch: 006 | loss: 0.46592 - acc: 0.8051 -- iter: 02368/20250


Training Step: 1623  | total loss: [1m[32m0.45484[0m[0m | time: 9.696s
[2K| Adam | epoch: 006 | loss: 0.45484 - acc: 0.8105 -- iter: 02432/20250


Training Step: 1624  | total loss: [1m[32m0.46078[0m[0m | time: 9.949s
[2K| Adam | epoch: 006 | loss: 0.46078 - acc: 0.8107 -- iter: 02496/20250


Training Step: 1625  | total loss: [1m[32m0.45579[0m[0m | time: 10.199s
[2K| Adam | epoch: 006 | loss: 0.45579 - acc: 0.8140 -- iter: 02560/20250


Training Step: 1626  | total loss: [1m[32m0.46210[0m[0m | time: 10.459s
[2K| Adam | epoch: 006 | loss: 0.46210 - acc: 0.8107 -- iter: 02624/20250


Training Step: 1627  | total loss: [1m[32m0.46844[0m[0m | time: 10.700s
[2K| Adam | epoch: 006 | loss: 0.46844 - acc: 0.8062 -- iter: 02688/20250


Training Step: 1628  | total loss: [1m[32m0.47608[0m[0m | time: 10.939s
[2K| Adam | epoch: 006 | loss: 0.47608 - acc: 0.7990 -- iter: 02752/20250


Training Step: 1629  | total loss: [1m[32m0.48907[0m[0m | time: 11.189s
[2K| Adam | epoch: 006 | loss: 0.48907 - acc: 0.7894 -- iter: 02816/20250


Training Step: 1630  | total loss: [1m[32m0.49003[0m[0m | time: 11.445s
[2K| Adam | epoch: 006 | loss: 0.49003 - acc: 0.7902 -- iter: 02880/20250


Training Step: 1631  | total loss: [1m[32m0.49957[0m[0m | time: 11.692s
[2K| Adam | epoch: 006 | loss: 0.49957 - acc: 0.7846 -- iter: 02944/20250


Training Step: 1632  | total loss: [1m[32m0.50232[0m[0m | time: 11.937s
[2K| Adam | epoch: 006 | loss: 0.50232 - acc: 0.7827 -- iter: 03008/20250


Training Step: 1633  | total loss: [1m[32m0.50207[0m[0m | time: 12.204s
[2K| Adam | epoch: 006 | loss: 0.50207 - acc: 0.7826 -- iter: 03072/20250


Training Step: 1634  | total loss: [1m[32m0.49451[0m[0m | time: 12.454s
[2K| Adam | epoch: 006 | loss: 0.49451 - acc: 0.7871 -- iter: 03136/20250


Training Step: 1635  | total loss: [1m[32m0.49538[0m[0m | time: 12.702s
[2K| Adam | epoch: 006 | loss: 0.49538 - acc: 0.7850 -- iter: 03200/20250


Training Step: 1636  | total loss: [1m[32m0.48644[0m[0m | time: 12.945s
[2K| Adam | epoch: 006 | loss: 0.48644 - acc: 0.7908 -- iter: 03264/20250


Training Step: 1637  | total loss: [1m[32m0.47460[0m[0m | time: 13.189s
[2K| Adam | epoch: 006 | loss: 0.47460 - acc: 0.7961 -- iter: 03328/20250


Training Step: 1638  | total loss: [1m[32m0.47540[0m[0m | time: 13.433s
[2K| Adam | epoch: 006 | loss: 0.47540 - acc: 0.7962 -- iter: 03392/20250


Training Step: 1639  | total loss: [1m[32m0.46979[0m[0m | time: 13.676s
[2K| Adam | epoch: 006 | loss: 0.46979 - acc: 0.8025 -- iter: 03456/20250


Training Step: 1640  | total loss: [1m[32m0.46787[0m[0m | time: 13.926s
[2K| Adam | epoch: 006 | loss: 0.46787 - acc: 0.8051 -- iter: 03520/20250


Training Step: 1641  | total loss: [1m[32m0.46085[0m[0m | time: 14.167s
[2K| Adam | epoch: 006 | loss: 0.46085 - acc: 0.8105 -- iter: 03584/20250


Training Step: 1642  | total loss: [1m[32m0.46370[0m[0m | time: 14.460s
[2K| Adam | epoch: 006 | loss: 0.46370 - acc: 0.8076 -- iter: 03648/20250


Training Step: 1643  | total loss: [1m[32m0.45691[0m[0m | time: 14.738s
[2K| Adam | epoch: 006 | loss: 0.45691 - acc: 0.8112 -- iter: 03712/20250


Training Step: 1644  | total loss: [1m[32m0.46310[0m[0m | time: 14.986s
[2K| Adam | epoch: 006 | loss: 0.46310 - acc: 0.8051 -- iter: 03776/20250


Training Step: 1645  | total loss: [1m[32m0.45972[0m[0m | time: 15.251s
[2K| Adam | epoch: 006 | loss: 0.45972 - acc: 0.8105 -- iter: 03840/20250


Training Step: 1646  | total loss: [1m[32m0.46368[0m[0m | time: 15.511s
[2K| Adam | epoch: 006 | loss: 0.46368 - acc: 0.8076 -- iter: 03904/20250


Training Step: 1647  | total loss: [1m[32m0.45274[0m[0m | time: 15.751s
[2K| Adam | epoch: 006 | loss: 0.45274 - acc: 0.8159 -- iter: 03968/20250


Training Step: 1648  | total loss: [1m[32m0.44986[0m[0m | time: 16.009s
[2K| Adam | epoch: 006 | loss: 0.44986 - acc: 0.8156 -- iter: 04032/20250


Training Step: 1649  | total loss: [1m[32m0.45455[0m[0m | time: 16.263s
[2K| Adam | epoch: 006 | loss: 0.45455 - acc: 0.8121 -- iter: 04096/20250


Training Step: 1650  | total loss: [1m[32m0.45973[0m[0m | time: 16.514s
[2K| Adam | epoch: 006 | loss: 0.45973 - acc: 0.8106 -- iter: 04160/20250


Training Step: 1651  | total loss: [1m[32m0.45300[0m[0m | time: 16.747s
[2K| Adam | epoch: 006 | loss: 0.45300 - acc: 0.8155 -- iter: 04224/20250


Training Step: 1652  | total loss: [1m[32m0.46828[0m[0m | time: 17.014s
[2K| Adam | epoch: 006 | loss: 0.46828 - acc: 0.8105 -- iter: 04288/20250


Training Step: 1653  | total loss: [1m[32m0.49867[0m[0m | time: 17.255s
[2K| Adam | epoch: 006 | loss: 0.49867 - acc: 0.7966 -- iter: 04352/20250


Training Step: 1654  | total loss: [1m[32m0.50605[0m[0m | time: 17.508s
[2K| Adam | epoch: 006 | loss: 0.50605 - acc: 0.7967 -- iter: 04416/20250


Training Step: 1655  | total loss: [1m[32m0.50838[0m[0m | time: 17.753s
[2K| Adam | epoch: 006 | loss: 0.50838 - acc: 0.7982 -- iter: 04480/20250


Training Step: 1656  | total loss: [1m[32m0.53658[0m[0m | time: 17.998s
[2K| Adam | epoch: 006 | loss: 0.53658 - acc: 0.7887 -- iter: 04544/20250


Training Step: 1657  | total loss: [1m[32m0.57712[0m[0m | time: 18.238s
[2K| Adam | epoch: 006 | loss: 0.57712 - acc: 0.7739 -- iter: 04608/20250


Training Step: 1658  | total loss: [1m[32m0.62651[0m[0m | time: 18.486s
[2K| Adam | epoch: 006 | loss: 0.62651 - acc: 0.7512 -- iter: 04672/20250


Training Step: 1659  | total loss: [1m[32m0.67480[0m[0m | time: 18.727s
[2K| Adam | epoch: 006 | loss: 0.67480 - acc: 0.7308 -- iter: 04736/20250


Training Step: 1660  | total loss: [1m[32m0.69212[0m[0m | time: 18.974s
[2K| Adam | epoch: 006 | loss: 0.69212 - acc: 0.7202 -- iter: 04800/20250


Training Step: 1661  | total loss: [1m[32m0.71712[0m[0m | time: 19.217s
[2K| Adam | epoch: 006 | loss: 0.71712 - acc: 0.7029 -- iter: 04864/20250


Training Step: 1662  | total loss: [1m[32m0.73676[0m[0m | time: 19.476s
[2K| Adam | epoch: 006 | loss: 0.73676 - acc: 0.6841 -- iter: 04928/20250


Training Step: 1663  | total loss: [1m[32m0.74472[0m[0m | time: 19.754s
[2K| Adam | epoch: 006 | loss: 0.74472 - acc: 0.6720 -- iter: 04992/20250


Training Step: 1664  | total loss: [1m[32m0.73367[0m[0m | time: 20.007s
[2K| Adam | epoch: 006 | loss: 0.73367 - acc: 0.6688 -- iter: 05056/20250


Training Step: 1665  | total loss: [1m[32m0.73896[0m[0m | time: 20.252s
[2K| Adam | epoch: 006 | loss: 0.73896 - acc: 0.6441 -- iter: 05120/20250


Training Step: 1666  | total loss: [1m[32m0.73195[0m[0m | time: 20.505s
[2K| Adam | epoch: 006 | loss: 0.73195 - acc: 0.6438 -- iter: 05184/20250


Training Step: 1667  | total loss: [1m[32m0.72368[0m[0m | time: 20.760s
[2K| Adam | epoch: 006 | loss: 0.72368 - acc: 0.6232 -- iter: 05248/20250


Training Step: 1668  | total loss: [1m[32m0.71959[0m[0m | time: 21.004s
[2K| Adam | epoch: 006 | loss: 0.71959 - acc: 0.6077 -- iter: 05312/20250


Training Step: 1669  | total loss: [1m[32m0.72099[0m[0m | time: 21.257s
[2K| Adam | epoch: 006 | loss: 0.72099 - acc: 0.5954 -- iter: 05376/20250


Training Step: 1670  | total loss: [1m[32m0.71253[0m[0m | time: 21.508s
[2K| Adam | epoch: 006 | loss: 0.71253 - acc: 0.5952 -- iter: 05440/20250


Training Step: 1671  | total loss: [1m[32m0.70392[0m[0m | time: 21.777s
[2K| Adam | epoch: 006 | loss: 0.70392 - acc: 0.5920 -- iter: 05504/20250


Training Step: 1672  | total loss: [1m[32m0.70578[0m[0m | time: 22.042s
[2K| Adam | epoch: 006 | loss: 0.70578 - acc: 0.5890 -- iter: 05568/20250


Training Step: 1673  | total loss: [1m[32m0.70425[0m[0m | time: 22.301s
[2K| Adam | epoch: 006 | loss: 0.70425 - acc: 0.5864 -- iter: 05632/20250


Training Step: 1674  | total loss: [1m[32m0.70466[0m[0m | time: 22.545s
[2K| Adam | epoch: 006 | loss: 0.70466 - acc: 0.5777 -- iter: 05696/20250


Training Step: 1675  | total loss: [1m[32m0.70280[0m[0m | time: 22.795s
[2K| Adam | epoch: 006 | loss: 0.70280 - acc: 0.5793 -- iter: 05760/20250


Training Step: 1676  | total loss: [1m[32m0.70383[0m[0m | time: 23.044s
[2K| Adam | epoch: 006 | loss: 0.70383 - acc: 0.5730 -- iter: 05824/20250


Training Step: 1677  | total loss: [1m[32m0.70246[0m[0m | time: 23.294s
[2K| Adam | epoch: 006 | loss: 0.70246 - acc: 0.5719 -- iter: 05888/20250


Training Step: 1678  | total loss: [1m[32m0.70162[0m[0m | time: 23.542s
[2K| Adam | epoch: 006 | loss: 0.70162 - acc: 0.5663 -- iter: 05952/20250


Training Step: 1679  | total loss: [1m[32m0.69099[0m[0m | time: 23.789s
[2K| Adam | epoch: 006 | loss: 0.69099 - acc: 0.5706 -- iter: 06016/20250


Training Step: 1680  | total loss: [1m[32m0.69112[0m[0m | time: 24.045s
[2K| Adam | epoch: 006 | loss: 0.69112 - acc: 0.5620 -- iter: 06080/20250


Training Step: 1681  | total loss: [1m[32m0.68388[0m[0m | time: 24.334s
[2K| Adam | epoch: 006 | loss: 0.68388 - acc: 0.5714 -- iter: 06144/20250


Training Step: 1682  | total loss: [1m[32m0.68146[0m[0m | time: 24.635s
[2K| Adam | epoch: 006 | loss: 0.68146 - acc: 0.5643 -- iter: 06208/20250


Training Step: 1683  | total loss: [1m[32m0.67597[0m[0m | time: 24.877s
[2K| Adam | epoch: 006 | loss: 0.67597 - acc: 0.5688 -- iter: 06272/20250


Training Step: 1684  | total loss: [1m[32m0.67727[0m[0m | time: 25.137s
[2K| Adam | epoch: 006 | loss: 0.67727 - acc: 0.5713 -- iter: 06336/20250


Training Step: 1685  | total loss: [1m[32m0.67337[0m[0m | time: 25.390s
[2K| Adam | epoch: 006 | loss: 0.67337 - acc: 0.5704 -- iter: 06400/20250


Training Step: 1686  | total loss: [1m[32m0.66478[0m[0m | time: 25.640s
[2K| Adam | epoch: 006 | loss: 0.66478 - acc: 0.5696 -- iter: 06464/20250


Training Step: 1687  | total loss: [1m[32m0.66321[0m[0m | time: 25.882s
[2K| Adam | epoch: 006 | loss: 0.66321 - acc: 0.5658 -- iter: 06528/20250


Training Step: 1688  | total loss: [1m[32m0.65843[0m[0m | time: 26.128s
[2K| Adam | epoch: 006 | loss: 0.65843 - acc: 0.5639 -- iter: 06592/20250


Training Step: 1689  | total loss: [1m[32m0.65465[0m[0m | time: 26.380s
[2K| Adam | epoch: 006 | loss: 0.65465 - acc: 0.5669 -- iter: 06656/20250


Training Step: 1690  | total loss: [1m[32m0.64745[0m[0m | time: 26.633s
[2K| Adam | epoch: 006 | loss: 0.64745 - acc: 0.5696 -- iter: 06720/20250


Training Step: 1691  | total loss: [1m[32m0.65404[0m[0m | time: 26.916s
[2K| Adam | epoch: 006 | loss: 0.65404 - acc: 0.5610 -- iter: 06784/20250


Training Step: 1692  | total loss: [1m[32m0.65209[0m[0m | time: 27.196s
[2K| Adam | epoch: 006 | loss: 0.65209 - acc: 0.5612 -- iter: 06848/20250


Training Step: 1693  | total loss: [1m[32m0.64919[0m[0m | time: 27.438s
[2K| Adam | epoch: 006 | loss: 0.64919 - acc: 0.5676 -- iter: 06912/20250


Training Step: 1694  | total loss: [1m[32m0.63926[0m[0m | time: 27.683s
[2K| Adam | epoch: 006 | loss: 0.63926 - acc: 0.5733 -- iter: 06976/20250


Training Step: 1695  | total loss: [1m[32m0.63994[0m[0m | time: 27.932s
[2K| Adam | epoch: 006 | loss: 0.63994 - acc: 0.5691 -- iter: 07040/20250


Training Step: 1696  | total loss: [1m[32m0.64039[0m[0m | time: 28.178s
[2K| Adam | epoch: 006 | loss: 0.64039 - acc: 0.5669 -- iter: 07104/20250


Training Step: 1697  | total loss: [1m[32m0.64036[0m[0m | time: 28.428s
[2K| Adam | epoch: 006 | loss: 0.64036 - acc: 0.5758 -- iter: 07168/20250


Training Step: 1698  | total loss: [1m[32m0.64066[0m[0m | time: 28.684s
[2K| Adam | epoch: 006 | loss: 0.64066 - acc: 0.5745 -- iter: 07232/20250


Training Step: 1699  | total loss: [1m[32m0.64030[0m[0m | time: 28.936s
[2K| Adam | epoch: 006 | loss: 0.64030 - acc: 0.5764 -- iter: 07296/20250


Training Step: 1700  | total loss: [1m[32m0.63614[0m[0m | time: 29.184s
[2K| Adam | epoch: 006 | loss: 0.63614 - acc: 0.5875 -- iter: 07360/20250


Training Step: 1701  | total loss: [1m[32m0.63523[0m[0m | time: 29.446s
[2K| Adam | epoch: 006 | loss: 0.63523 - acc: 0.5866 -- iter: 07424/20250


Training Step: 1702  | total loss: [1m[32m0.63064[0m[0m | time: 29.695s
[2K| Adam | epoch: 006 | loss: 0.63064 - acc: 0.5873 -- iter: 07488/20250


Training Step: 1703  | total loss: [1m[32m0.63041[0m[0m | time: 29.941s
[2K| Adam | epoch: 006 | loss: 0.63041 - acc: 0.5833 -- iter: 07552/20250


Training Step: 1704  | total loss: [1m[32m0.62220[0m[0m | time: 30.196s
[2K| Adam | epoch: 006 | loss: 0.62220 - acc: 0.5999 -- iter: 07616/20250


Training Step: 1705  | total loss: [1m[32m0.62231[0m[0m | time: 30.438s
[2K| Adam | epoch: 006 | loss: 0.62231 - acc: 0.5915 -- iter: 07680/20250


Training Step: 1706  | total loss: [1m[32m0.62619[0m[0m | time: 30.686s
[2K| Adam | epoch: 006 | loss: 0.62619 - acc: 0.5870 -- iter: 07744/20250


Training Step: 1707  | total loss: [1m[32m0.62473[0m[0m | time: 30.934s
[2K| Adam | epoch: 006 | loss: 0.62473 - acc: 0.5893 -- iter: 07808/20250


Training Step: 1708  | total loss: [1m[32m0.62795[0m[0m | time: 31.184s
[2K| Adam | epoch: 006 | loss: 0.62795 - acc: 0.5835 -- iter: 07872/20250


Training Step: 1709  | total loss: [1m[32m0.62956[0m[0m | time: 31.433s
[2K| Adam | epoch: 006 | loss: 0.62956 - acc: 0.5814 -- iter: 07936/20250


Training Step: 1710  | total loss: [1m[32m0.63128[0m[0m | time: 31.683s
[2K| Adam | epoch: 006 | loss: 0.63128 - acc: 0.5857 -- iter: 08000/20250


Training Step: 1711  | total loss: [1m[32m0.63692[0m[0m | time: 31.980s
[2K| Adam | epoch: 006 | loss: 0.63692 - acc: 0.5740 -- iter: 08064/20250


Training Step: 1712  | total loss: [1m[32m0.63566[0m[0m | time: 32.253s
[2K| Adam | epoch: 006 | loss: 0.63566 - acc: 0.5823 -- iter: 08128/20250


Training Step: 1713  | total loss: [1m[32m0.63561[0m[0m | time: 32.500s
[2K| Adam | epoch: 006 | loss: 0.63561 - acc: 0.5709 -- iter: 08192/20250


Training Step: 1714  | total loss: [1m[32m0.63073[0m[0m | time: 32.756s
[2K| Adam | epoch: 006 | loss: 0.63073 - acc: 0.5748 -- iter: 08256/20250


Training Step: 1715  | total loss: [1m[32m0.62450[0m[0m | time: 33.002s
[2K| Adam | epoch: 006 | loss: 0.62450 - acc: 0.5845 -- iter: 08320/20250


Training Step: 1716  | total loss: [1m[32m0.62465[0m[0m | time: 33.244s
[2K| Adam | epoch: 006 | loss: 0.62465 - acc: 0.5854 -- iter: 08384/20250


Training Step: 1717  | total loss: [1m[32m0.62042[0m[0m | time: 33.503s
[2K| Adam | epoch: 006 | loss: 0.62042 - acc: 0.5925 -- iter: 08448/20250


Training Step: 1718  | total loss: [1m[32m0.62092[0m[0m | time: 33.758s
[2K| Adam | epoch: 006 | loss: 0.62092 - acc: 0.5895 -- iter: 08512/20250


Training Step: 1719  | total loss: [1m[32m0.62833[0m[0m | time: 34.003s
[2K| Adam | epoch: 006 | loss: 0.62833 - acc: 0.5774 -- iter: 08576/20250


Training Step: 1720  | total loss: [1m[32m0.63270[0m[0m | time: 34.281s
[2K| Adam | epoch: 006 | loss: 0.63270 - acc: 0.5744 -- iter: 08640/20250


Training Step: 1721  | total loss: [1m[32m0.63144[0m[0m | time: 34.551s
[2K| Adam | epoch: 006 | loss: 0.63144 - acc: 0.5763 -- iter: 08704/20250


Training Step: 1722  | total loss: [1m[32m0.63676[0m[0m | time: 34.795s
[2K| Adam | epoch: 006 | loss: 0.63676 - acc: 0.5655 -- iter: 08768/20250


Training Step: 1723  | total loss: [1m[32m0.63344[0m[0m | time: 35.043s
[2K| Adam | epoch: 006 | loss: 0.63344 - acc: 0.5684 -- iter: 08832/20250


Training Step: 1724  | total loss: [1m[32m0.63353[0m[0m | time: 35.295s
[2K| Adam | epoch: 006 | loss: 0.63353 - acc: 0.5662 -- iter: 08896/20250


Training Step: 1725  | total loss: [1m[32m0.63257[0m[0m | time: 35.543s
[2K| Adam | epoch: 006 | loss: 0.63257 - acc: 0.5783 -- iter: 08960/20250


Training Step: 1726  | total loss: [1m[32m0.62257[0m[0m | time: 35.797s
[2K| Adam | epoch: 006 | loss: 0.62257 - acc: 0.5893 -- iter: 09024/20250


Training Step: 1727  | total loss: [1m[32m0.62080[0m[0m | time: 36.054s
[2K| Adam | epoch: 006 | loss: 0.62080 - acc: 0.5928 -- iter: 09088/20250


Training Step: 1728  | total loss: [1m[32m0.61971[0m[0m | time: 36.304s
[2K| Adam | epoch: 006 | loss: 0.61971 - acc: 0.5976 -- iter: 09152/20250


Training Step: 1729  | total loss: [1m[32m0.61933[0m[0m | time: 36.546s
[2K| Adam | epoch: 006 | loss: 0.61933 - acc: 0.5972 -- iter: 09216/20250


Training Step: 1730  | total loss: [1m[32m0.61461[0m[0m | time: 36.826s
[2K| Adam | epoch: 006 | loss: 0.61461 - acc: 0.6031 -- iter: 09280/20250


Training Step: 1731  | total loss: [1m[32m0.61478[0m[0m | time: 37.104s
[2K| Adam | epoch: 006 | loss: 0.61478 - acc: 0.6038 -- iter: 09344/20250


Training Step: 1732  | total loss: [1m[32m0.61354[0m[0m | time: 37.366s
[2K| Adam | epoch: 006 | loss: 0.61354 - acc: 0.6090 -- iter: 09408/20250


Training Step: 1733  | total loss: [1m[32m0.61539[0m[0m | time: 37.613s
[2K| Adam | epoch: 006 | loss: 0.61539 - acc: 0.6012 -- iter: 09472/20250


Training Step: 1734  | total loss: [1m[32m0.61288[0m[0m | time: 37.861s
[2K| Adam | epoch: 006 | loss: 0.61288 - acc: 0.6083 -- iter: 09536/20250


Training Step: 1735  | total loss: [1m[32m0.61673[0m[0m | time: 38.106s
[2K| Adam | epoch: 006 | loss: 0.61673 - acc: 0.6021 -- iter: 09600/20250


Training Step: 1736  | total loss: [1m[32m0.61480[0m[0m | time: 38.361s
[2K| Adam | epoch: 006 | loss: 0.61480 - acc: 0.5982 -- iter: 09664/20250


Training Step: 1737  | total loss: [1m[32m0.61830[0m[0m | time: 38.609s
[2K| Adam | epoch: 006 | loss: 0.61830 - acc: 0.5946 -- iter: 09728/20250


Training Step: 1738  | total loss: [1m[32m0.61655[0m[0m | time: 38.854s
[2K| Adam | epoch: 006 | loss: 0.61655 - acc: 0.5930 -- iter: 09792/20250


Training Step: 1739  | total loss: [1m[32m0.62007[0m[0m | time: 39.107s
[2K| Adam | epoch: 006 | loss: 0.62007 - acc: 0.5977 -- iter: 09856/20250


Training Step: 1740  | total loss: [1m[32m0.62397[0m[0m | time: 39.377s
[2K| Adam | epoch: 006 | loss: 0.62397 - acc: 0.5880 -- iter: 09920/20250


Training Step: 1741  | total loss: [1m[32m0.62799[0m[0m | time: 39.662s
[2K| Adam | epoch: 006 | loss: 0.62799 - acc: 0.5807 -- iter: 09984/20250


Training Step: 1742  | total loss: [1m[32m0.62169[0m[0m | time: 39.911s
[2K| Adam | epoch: 006 | loss: 0.62169 - acc: 0.5945 -- iter: 10048/20250


Training Step: 1743  | total loss: [1m[32m0.61998[0m[0m | time: 40.163s
[2K| Adam | epoch: 006 | loss: 0.61998 - acc: 0.5960 -- iter: 10112/20250


Training Step: 1744  | total loss: [1m[32m0.61815[0m[0m | time: 40.424s
[2K| Adam | epoch: 006 | loss: 0.61815 - acc: 0.6005 -- iter: 10176/20250


Training Step: 1745  | total loss: [1m[32m0.61080[0m[0m | time: 40.673s
[2K| Adam | epoch: 006 | loss: 0.61080 - acc: 0.6107 -- iter: 10240/20250


Training Step: 1746  | total loss: [1m[32m0.61007[0m[0m | time: 40.922s
[2K| Adam | epoch: 006 | loss: 0.61007 - acc: 0.6090 -- iter: 10304/20250


Training Step: 1747  | total loss: [1m[32m0.60656[0m[0m | time: 41.216s
[2K| Adam | epoch: 006 | loss: 0.60656 - acc: 0.6106 -- iter: 10368/20250


Training Step: 1748  | total loss: [1m[32m0.60949[0m[0m | time: 41.474s
[2K| Adam | epoch: 006 | loss: 0.60949 - acc: 0.6089 -- iter: 10432/20250


Training Step: 1749  | total loss: [1m[32m0.60635[0m[0m | time: 41.732s
[2K| Adam | epoch: 006 | loss: 0.60635 - acc: 0.6059 -- iter: 10496/20250


Training Step: 1750  | total loss: [1m[32m0.60757[0m[0m | time: 41.989s
[2K| Adam | epoch: 006 | loss: 0.60757 - acc: 0.6062 -- iter: 10560/20250


Training Step: 1751  | total loss: [1m[32m0.61253[0m[0m | time: 42.235s
[2K| Adam | epoch: 006 | loss: 0.61253 - acc: 0.6018 -- iter: 10624/20250


Training Step: 1752  | total loss: [1m[32m0.60760[0m[0m | time: 42.487s
[2K| Adam | epoch: 006 | loss: 0.60760 - acc: 0.6151 -- iter: 10688/20250


Training Step: 1753  | total loss: [1m[32m0.60681[0m[0m | time: 42.739s
[2K| Adam | epoch: 006 | loss: 0.60681 - acc: 0.6192 -- iter: 10752/20250


Training Step: 1754  | total loss: [1m[32m0.60581[0m[0m | time: 42.987s
[2K| Adam | epoch: 006 | loss: 0.60581 - acc: 0.6182 -- iter: 10816/20250


Training Step: 1755  | total loss: [1m[32m0.60823[0m[0m | time: 43.240s
[2K| Adam | epoch: 006 | loss: 0.60823 - acc: 0.6189 -- iter: 10880/20250


Training Step: 1756  | total loss: [1m[32m0.61231[0m[0m | time: 43.501s
[2K| Adam | epoch: 006 | loss: 0.61231 - acc: 0.6070 -- iter: 10944/20250


Training Step: 1757  | total loss: [1m[32m0.61151[0m[0m | time: 43.748s
[2K| Adam | epoch: 006 | loss: 0.61151 - acc: 0.5979 -- iter: 11008/20250


Training Step: 1758  | total loss: [1m[32m0.60948[0m[0m | time: 44.000s
[2K| Adam | epoch: 006 | loss: 0.60948 - acc: 0.6006 -- iter: 11072/20250


Training Step: 1759  | total loss: [1m[32m0.60605[0m[0m | time: 44.273s
[2K| Adam | epoch: 006 | loss: 0.60605 - acc: 0.6077 -- iter: 11136/20250


Training Step: 1760  | total loss: [1m[32m0.59870[0m[0m | time: 44.515s
[2K| Adam | epoch: 006 | loss: 0.59870 - acc: 0.6219 -- iter: 11200/20250


Training Step: 1761  | total loss: [1m[32m0.59965[0m[0m | time: 44.760s
[2K| Adam | epoch: 006 | loss: 0.59965 - acc: 0.6316 -- iter: 11264/20250


Training Step: 1762  | total loss: [1m[32m0.59275[0m[0m | time: 45.014s
[2K| Adam | epoch: 006 | loss: 0.59275 - acc: 0.6403 -- iter: 11328/20250


Training Step: 1763  | total loss: [1m[32m0.59890[0m[0m | time: 45.264s
[2K| Adam | epoch: 006 | loss: 0.59890 - acc: 0.6388 -- iter: 11392/20250


Training Step: 1764  | total loss: [1m[32m0.59724[0m[0m | time: 45.515s
[2K| Adam | epoch: 006 | loss: 0.59724 - acc: 0.6421 -- iter: 11456/20250


Training Step: 1765  | total loss: [1m[32m0.59400[0m[0m | time: 45.774s
[2K| Adam | epoch: 006 | loss: 0.59400 - acc: 0.6482 -- iter: 11520/20250


Training Step: 1766  | total loss: [1m[32m0.59423[0m[0m | time: 46.028s
[2K| Adam | epoch: 006 | loss: 0.59423 - acc: 0.6506 -- iter: 11584/20250


Training Step: 1767  | total loss: [1m[32m0.59876[0m[0m | time: 46.279s
[2K| Adam | epoch: 006 | loss: 0.59876 - acc: 0.6418 -- iter: 11648/20250


Training Step: 1768  | total loss: [1m[32m0.60415[0m[0m | time: 46.533s
[2K| Adam | epoch: 006 | loss: 0.60415 - acc: 0.6276 -- iter: 11712/20250


Training Step: 1769  | total loss: [1m[32m0.60498[0m[0m | time: 46.795s
[2K| Adam | epoch: 006 | loss: 0.60498 - acc: 0.6258 -- iter: 11776/20250


Training Step: 1770  | total loss: [1m[32m0.59864[0m[0m | time: 47.047s
[2K| Adam | epoch: 006 | loss: 0.59864 - acc: 0.6351 -- iter: 11840/20250


Training Step: 1771  | total loss: [1m[32m0.60129[0m[0m | time: 47.292s
[2K| Adam | epoch: 006 | loss: 0.60129 - acc: 0.6325 -- iter: 11904/20250


Training Step: 1772  | total loss: [1m[32m0.60303[0m[0m | time: 47.538s
[2K| Adam | epoch: 006 | loss: 0.60303 - acc: 0.6318 -- iter: 11968/20250


Training Step: 1773  | total loss: [1m[32m0.60276[0m[0m | time: 47.795s
[2K| Adam | epoch: 006 | loss: 0.60276 - acc: 0.6326 -- iter: 12032/20250


Training Step: 1774  | total loss: [1m[32m0.59888[0m[0m | time: 48.048s
[2K| Adam | epoch: 006 | loss: 0.59888 - acc: 0.6366 -- iter: 12096/20250


Training Step: 1775  | total loss: [1m[32m0.59525[0m[0m | time: 48.300s
[2K| Adam | epoch: 006 | loss: 0.59525 - acc: 0.6370 -- iter: 12160/20250


Training Step: 1776  | total loss: [1m[32m0.59735[0m[0m | time: 48.555s
[2K| Adam | epoch: 006 | loss: 0.59735 - acc: 0.6342 -- iter: 12224/20250


Training Step: 1777  | total loss: [1m[32m0.60876[0m[0m | time: 48.808s
[2K| Adam | epoch: 006 | loss: 0.60876 - acc: 0.6192 -- iter: 12288/20250


Training Step: 1778  | total loss: [1m[32m0.60727[0m[0m | time: 49.063s
[2K| Adam | epoch: 006 | loss: 0.60727 - acc: 0.6276 -- iter: 12352/20250


Training Step: 1779  | total loss: [1m[32m0.60047[0m[0m | time: 49.320s
[2K| Adam | epoch: 006 | loss: 0.60047 - acc: 0.6383 -- iter: 12416/20250


Training Step: 1780  | total loss: [1m[32m0.60078[0m[0m | time: 49.575s
[2K| Adam | epoch: 006 | loss: 0.60078 - acc: 0.6370 -- iter: 12480/20250


Training Step: 1781  | total loss: [1m[32m0.59537[0m[0m | time: 49.826s
[2K| Adam | epoch: 006 | loss: 0.59537 - acc: 0.6420 -- iter: 12544/20250


Training Step: 1782  | total loss: [1m[32m0.59628[0m[0m | time: 50.070s
[2K| Adam | epoch: 006 | loss: 0.59628 - acc: 0.6356 -- iter: 12608/20250


Training Step: 1783  | total loss: [1m[32m0.59944[0m[0m | time: 50.317s
[2K| Adam | epoch: 006 | loss: 0.59944 - acc: 0.6299 -- iter: 12672/20250


Training Step: 1784  | total loss: [1m[32m0.59892[0m[0m | time: 50.568s
[2K| Adam | epoch: 006 | loss: 0.59892 - acc: 0.6388 -- iter: 12736/20250


Training Step: 1785  | total loss: [1m[32m0.60429[0m[0m | time: 50.819s
[2K| Adam | epoch: 006 | loss: 0.60429 - acc: 0.6374 -- iter: 12800/20250


Training Step: 1786  | total loss: [1m[32m0.60727[0m[0m | time: 51.067s
[2K| Adam | epoch: 006 | loss: 0.60727 - acc: 0.6268 -- iter: 12864/20250


Training Step: 1787  | total loss: [1m[32m0.61559[0m[0m | time: 51.319s
[2K| Adam | epoch: 006 | loss: 0.61559 - acc: 0.6250 -- iter: 12928/20250


Training Step: 1788  | total loss: [1m[32m0.61089[0m[0m | time: 51.575s
[2K| Adam | epoch: 006 | loss: 0.61089 - acc: 0.6282 -- iter: 12992/20250


Training Step: 1789  | total loss: [1m[32m0.60986[0m[0m | time: 51.842s
[2K| Adam | epoch: 006 | loss: 0.60986 - acc: 0.6278 -- iter: 13056/20250


Training Step: 1790  | total loss: [1m[32m0.61166[0m[0m | time: 52.090s
[2K| Adam | epoch: 006 | loss: 0.61166 - acc: 0.6213 -- iter: 13120/20250


Training Step: 1791  | total loss: [1m[32m0.60857[0m[0m | time: 52.340s
[2K| Adam | epoch: 006 | loss: 0.60857 - acc: 0.6217 -- iter: 13184/20250


Training Step: 1792  | total loss: [1m[32m0.60469[0m[0m | time: 52.587s
[2K| Adam | epoch: 006 | loss: 0.60469 - acc: 0.6329 -- iter: 13248/20250


Training Step: 1793  | total loss: [1m[32m0.60949[0m[0m | time: 52.836s
[2K| Adam | epoch: 006 | loss: 0.60949 - acc: 0.6243 -- iter: 13312/20250


Training Step: 1794  | total loss: [1m[32m0.60708[0m[0m | time: 53.083s
[2K| Adam | epoch: 006 | loss: 0.60708 - acc: 0.6275 -- iter: 13376/20250


Training Step: 1795  | total loss: [1m[32m0.60778[0m[0m | time: 53.331s
[2K| Adam | epoch: 006 | loss: 0.60778 - acc: 0.6304 -- iter: 13440/20250


Training Step: 1796  | total loss: [1m[32m0.61040[0m[0m | time: 53.576s
[2K| Adam | epoch: 006 | loss: 0.61040 - acc: 0.6158 -- iter: 13504/20250


Training Step: 1797  | total loss: [1m[32m0.61053[0m[0m | time: 53.848s
[2K| Adam | epoch: 006 | loss: 0.61053 - acc: 0.6198 -- iter: 13568/20250


Training Step: 1798  | total loss: [1m[32m0.60998[0m[0m | time: 54.117s
[2K| Adam | epoch: 006 | loss: 0.60998 - acc: 0.6250 -- iter: 13632/20250


Training Step: 1799  | total loss: [1m[32m0.59623[0m[0m | time: 54.363s
[2K| Adam | epoch: 006 | loss: 0.59623 - acc: 0.6407 -- iter: 13696/20250


Training Step: 1800  | total loss: [1m[32m0.59447[0m[0m | time: 54.618s
[2K| Adam | epoch: 006 | loss: 0.59447 - acc: 0.6422 -- iter: 13760/20250


Training Step: 1801  | total loss: [1m[32m0.59378[0m[0m | time: 54.863s
[2K| Adam | epoch: 006 | loss: 0.59378 - acc: 0.6389 -- iter: 13824/20250


Training Step: 1802  | total loss: [1m[32m0.58798[0m[0m | time: 55.115s
[2K| Adam | epoch: 006 | loss: 0.58798 - acc: 0.6422 -- iter: 13888/20250


Training Step: 1803  | total loss: [1m[32m0.59476[0m[0m | time: 55.365s
[2K| Adam | epoch: 006 | loss: 0.59476 - acc: 0.6468 -- iter: 13952/20250


Training Step: 1804  | total loss: [1m[32m0.59544[0m[0m | time: 55.607s
[2K| Adam | epoch: 006 | loss: 0.59544 - acc: 0.6430 -- iter: 14016/20250


Training Step: 1805  | total loss: [1m[32m0.58713[0m[0m | time: 55.865s
[2K| Adam | epoch: 006 | loss: 0.58713 - acc: 0.6600 -- iter: 14080/20250


Training Step: 1806  | total loss: [1m[32m0.58971[0m[0m | time: 56.157s
[2K| Adam | epoch: 006 | loss: 0.58971 - acc: 0.6643 -- iter: 14144/20250


Training Step: 1807  | total loss: [1m[32m0.58906[0m[0m | time: 56.444s
[2K| Adam | epoch: 006 | loss: 0.58906 - acc: 0.6697 -- iter: 14208/20250


Training Step: 1808  | total loss: [1m[32m0.59193[0m[0m | time: 56.700s
[2K| Adam | epoch: 006 | loss: 0.59193 - acc: 0.6637 -- iter: 14272/20250


Training Step: 1809  | total loss: [1m[32m0.60265[0m[0m | time: 56.961s
[2K| Adam | epoch: 006 | loss: 0.60265 - acc: 0.6458 -- iter: 14336/20250


Training Step: 1810  | total loss: [1m[32m0.60879[0m[0m | time: 57.210s
[2K| Adam | epoch: 006 | loss: 0.60879 - acc: 0.6343 -- iter: 14400/20250


Training Step: 1811  | total loss: [1m[32m0.61609[0m[0m | time: 57.458s
[2K| Adam | epoch: 006 | loss: 0.61609 - acc: 0.6209 -- iter: 14464/20250


Training Step: 1812  | total loss: [1m[32m0.62146[0m[0m | time: 57.708s
[2K| Adam | epoch: 006 | loss: 0.62146 - acc: 0.6072 -- iter: 14528/20250


Training Step: 1813  | total loss: [1m[32m0.62708[0m[0m | time: 57.958s
[2K| Adam | epoch: 006 | loss: 0.62708 - acc: 0.6028 -- iter: 14592/20250


Training Step: 1814  | total loss: [1m[32m0.62063[0m[0m | time: 58.209s
[2K| Adam | epoch: 006 | loss: 0.62063 - acc: 0.6159 -- iter: 14656/20250


Training Step: 1815  | total loss: [1m[32m0.62503[0m[0m | time: 58.458s
[2K| Adam | epoch: 006 | loss: 0.62503 - acc: 0.6106 -- iter: 14720/20250


Training Step: 1816  | total loss: [1m[32m0.61525[0m[0m | time: 58.711s
[2K| Adam | epoch: 006 | loss: 0.61525 - acc: 0.6230 -- iter: 14784/20250


Training Step: 1817  | total loss: [1m[32m0.62169[0m[0m | time: 58.981s
[2K| Adam | epoch: 006 | loss: 0.62169 - acc: 0.6169 -- iter: 14848/20250


Training Step: 1818  | total loss: [1m[32m0.62131[0m[0m | time: 59.243s
[2K| Adam | epoch: 006 | loss: 0.62131 - acc: 0.6177 -- iter: 14912/20250


Training Step: 1819  | total loss: [1m[32m0.61738[0m[0m | time: 59.495s
[2K| Adam | epoch: 006 | loss: 0.61738 - acc: 0.6200 -- iter: 14976/20250


Training Step: 1820  | total loss: [1m[32m0.61234[0m[0m | time: 59.744s
[2K| Adam | epoch: 006 | loss: 0.61234 - acc: 0.6283 -- iter: 15040/20250


Training Step: 1821  | total loss: [1m[32m0.60919[0m[0m | time: 59.994s
[2K| Adam | epoch: 006 | loss: 0.60919 - acc: 0.6264 -- iter: 15104/20250


Training Step: 1822  | total loss: [1m[32m0.60537[0m[0m | time: 60.243s
[2K| Adam | epoch: 006 | loss: 0.60537 - acc: 0.6325 -- iter: 15168/20250


Training Step: 1823  | total loss: [1m[32m0.59697[0m[0m | time: 60.486s
[2K| Adam | epoch: 006 | loss: 0.59697 - acc: 0.6474 -- iter: 15232/20250


Training Step: 1824  | total loss: [1m[32m0.59683[0m[0m | time: 60.741s
[2K| Adam | epoch: 006 | loss: 0.59683 - acc: 0.6436 -- iter: 15296/20250


Training Step: 1825  | total loss: [1m[32m0.59686[0m[0m | time: 60.994s
[2K| Adam | epoch: 006 | loss: 0.59686 - acc: 0.6371 -- iter: 15360/20250


Training Step: 1826  | total loss: [1m[32m0.59481[0m[0m | time: 61.250s
[2K| Adam | epoch: 006 | loss: 0.59481 - acc: 0.6405 -- iter: 15424/20250


Training Step: 1827  | total loss: [1m[32m0.58810[0m[0m | time: 61.507s
[2K| Adam | epoch: 006 | loss: 0.58810 - acc: 0.6515 -- iter: 15488/20250


Training Step: 1828  | total loss: [1m[32m0.58227[0m[0m | time: 61.776s
[2K| Adam | epoch: 006 | loss: 0.58227 - acc: 0.6582 -- iter: 15552/20250


Training Step: 1829  | total loss: [1m[32m0.58292[0m[0m | time: 62.020s
[2K| Adam | epoch: 006 | loss: 0.58292 - acc: 0.6518 -- iter: 15616/20250


Training Step: 1830  | total loss: [1m[32m0.57157[0m[0m | time: 62.262s
[2K| Adam | epoch: 006 | loss: 0.57157 - acc: 0.6647 -- iter: 15680/20250


Training Step: 1831  | total loss: [1m[32m0.56604[0m[0m | time: 62.509s
[2K| Adam | epoch: 006 | loss: 0.56604 - acc: 0.6764 -- iter: 15744/20250


Training Step: 1832  | total loss: [1m[32m0.56080[0m[0m | time: 62.750s
[2K| Adam | epoch: 006 | loss: 0.56080 - acc: 0.6869 -- iter: 15808/20250


Training Step: 1833  | total loss: [1m[32m0.56076[0m[0m | time: 62.996s
[2K| Adam | epoch: 006 | loss: 0.56076 - acc: 0.6900 -- iter: 15872/20250


Training Step: 1834  | total loss: [1m[32m0.55164[0m[0m | time: 63.237s
[2K| Adam | epoch: 006 | loss: 0.55164 - acc: 0.7085 -- iter: 15936/20250


Training Step: 1835  | total loss: [1m[32m0.55195[0m[0m | time: 63.495s
[2K| Adam | epoch: 006 | loss: 0.55195 - acc: 0.7111 -- iter: 16000/20250


Training Step: 1836  | total loss: [1m[32m0.54973[0m[0m | time: 63.748s
[2K| Adam | epoch: 006 | loss: 0.54973 - acc: 0.7119 -- iter: 16064/20250


Training Step: 1837  | total loss: [1m[32m0.53630[0m[0m | time: 63.994s
[2K| Adam | epoch: 006 | loss: 0.53630 - acc: 0.7266 -- iter: 16128/20250


Training Step: 1838  | total loss: [1m[32m0.55341[0m[0m | time: 64.253s
[2K| Adam | epoch: 006 | loss: 0.55341 - acc: 0.7180 -- iter: 16192/20250


Training Step: 1839  | total loss: [1m[32m0.56425[0m[0m | time: 64.506s
[2K| Adam | epoch: 006 | loss: 0.56425 - acc: 0.7119 -- iter: 16256/20250


Training Step: 1840  | total loss: [1m[32m0.58283[0m[0m | time: 64.753s
[2K| Adam | epoch: 006 | loss: 0.58283 - acc: 0.6985 -- iter: 16320/20250


Training Step: 1841  | total loss: [1m[32m0.58666[0m[0m | time: 65.004s
[2K| Adam | epoch: 006 | loss: 0.58666 - acc: 0.6989 -- iter: 16384/20250


Training Step: 1842  | total loss: [1m[32m0.59261[0m[0m | time: 65.247s
[2K| Adam | epoch: 006 | loss: 0.59261 - acc: 0.6947 -- iter: 16448/20250


Training Step: 1843  | total loss: [1m[32m0.59589[0m[0m | time: 65.493s
[2K| Adam | epoch: 006 | loss: 0.59589 - acc: 0.6893 -- iter: 16512/20250


Training Step: 1844  | total loss: [1m[32m0.58744[0m[0m | time: 65.744s
[2K| Adam | epoch: 006 | loss: 0.58744 - acc: 0.6953 -- iter: 16576/20250


Training Step: 1845  | total loss: [1m[32m0.58560[0m[0m | time: 65.996s
[2K| Adam | epoch: 006 | loss: 0.58560 - acc: 0.7008 -- iter: 16640/20250


Training Step: 1846  | total loss: [1m[32m0.59278[0m[0m | time: 66.242s
[2K| Adam | epoch: 006 | loss: 0.59278 - acc: 0.6979 -- iter: 16704/20250


Training Step: 1847  | total loss: [1m[32m0.58296[0m[0m | time: 66.510s
[2K| Adam | epoch: 006 | loss: 0.58296 - acc: 0.7078 -- iter: 16768/20250


Training Step: 1848  | total loss: [1m[32m0.56805[0m[0m | time: 66.804s
[2K| Adam | epoch: 006 | loss: 0.56805 - acc: 0.7136 -- iter: 16832/20250


Training Step: 1849  | total loss: [1m[32m0.56755[0m[0m | time: 67.060s
[2K| Adam | epoch: 006 | loss: 0.56755 - acc: 0.7094 -- iter: 16896/20250


Training Step: 1850  | total loss: [1m[32m0.56429[0m[0m | time: 67.307s
[2K| Adam | epoch: 006 | loss: 0.56429 - acc: 0.7010 -- iter: 16960/20250


Training Step: 1851  | total loss: [1m[32m0.57253[0m[0m | time: 67.558s
[2K| Adam | epoch: 006 | loss: 0.57253 - acc: 0.6887 -- iter: 17024/20250


Training Step: 1852  | total loss: [1m[32m0.57966[0m[0m | time: 67.815s
[2K| Adam | epoch: 006 | loss: 0.57966 - acc: 0.6839 -- iter: 17088/20250


Training Step: 1853  | total loss: [1m[32m0.58365[0m[0m | time: 68.068s
[2K| Adam | epoch: 006 | loss: 0.58365 - acc: 0.6764 -- iter: 17152/20250


Training Step: 1854  | total loss: [1m[32m0.59248[0m[0m | time: 68.316s
[2K| Adam | epoch: 006 | loss: 0.59248 - acc: 0.6682 -- iter: 17216/20250


Training Step: 1855  | total loss: [1m[32m0.59696[0m[0m | time: 68.560s
[2K| Adam | epoch: 006 | loss: 0.59696 - acc: 0.6576 -- iter: 17280/20250


Training Step: 1856  | total loss: [1m[32m0.60261[0m[0m | time: 68.820s
[2K| Adam | epoch: 006 | loss: 0.60261 - acc: 0.6465 -- iter: 17344/20250


Training Step: 1857  | total loss: [1m[32m0.60118[0m[0m | time: 69.064s
[2K| Adam | epoch: 006 | loss: 0.60118 - acc: 0.6491 -- iter: 17408/20250


Training Step: 1858  | total loss: [1m[32m0.59642[0m[0m | time: 69.329s
[2K| Adam | epoch: 006 | loss: 0.59642 - acc: 0.6513 -- iter: 17472/20250


Training Step: 1859  | total loss: [1m[32m0.59298[0m[0m | time: 69.581s
[2K| Adam | epoch: 006 | loss: 0.59298 - acc: 0.6518 -- iter: 17536/20250


Training Step: 1860  | total loss: [1m[32m0.59070[0m[0m | time: 69.825s
[2K| Adam | epoch: 006 | loss: 0.59070 - acc: 0.6585 -- iter: 17600/20250


Training Step: 1861  | total loss: [1m[32m0.59313[0m[0m | time: 70.072s
[2K| Adam | epoch: 006 | loss: 0.59313 - acc: 0.6567 -- iter: 17664/20250


Training Step: 1862  | total loss: [1m[32m0.58716[0m[0m | time: 70.319s
[2K| Adam | epoch: 006 | loss: 0.58716 - acc: 0.6692 -- iter: 17728/20250


Training Step: 1863  | total loss: [1m[32m0.58434[0m[0m | time: 70.565s
[2K| Adam | epoch: 006 | loss: 0.58434 - acc: 0.6757 -- iter: 17792/20250


Training Step: 1864  | total loss: [1m[32m0.58478[0m[0m | time: 70.813s
[2K| Adam | epoch: 006 | loss: 0.58478 - acc: 0.6753 -- iter: 17856/20250


Training Step: 1865  | total loss: [1m[32m0.57731[0m[0m | time: 71.061s
[2K| Adam | epoch: 006 | loss: 0.57731 - acc: 0.6828 -- iter: 17920/20250


Training Step: 1866  | total loss: [1m[32m0.57473[0m[0m | time: 71.319s
[2K| Adam | epoch: 006 | loss: 0.57473 - acc: 0.6911 -- iter: 17984/20250


Training Step: 1867  | total loss: [1m[32m0.57250[0m[0m | time: 71.592s
[2K| Adam | epoch: 006 | loss: 0.57250 - acc: 0.7001 -- iter: 18048/20250


Training Step: 1868  | total loss: [1m[32m0.57421[0m[0m | time: 71.877s
[2K| Adam | epoch: 006 | loss: 0.57421 - acc: 0.6988 -- iter: 18112/20250


Training Step: 1869  | total loss: [1m[32m0.57023[0m[0m | time: 72.130s
[2K| Adam | epoch: 006 | loss: 0.57023 - acc: 0.7071 -- iter: 18176/20250


Training Step: 1870  | total loss: [1m[32m0.56639[0m[0m | time: 72.379s
[2K| Adam | epoch: 006 | loss: 0.56639 - acc: 0.7161 -- iter: 18240/20250


Training Step: 1871  | total loss: [1m[32m0.56153[0m[0m | time: 72.627s
[2K| Adam | epoch: 006 | loss: 0.56153 - acc: 0.7179 -- iter: 18304/20250


Training Step: 1872  | total loss: [1m[32m0.55767[0m[0m | time: 72.877s
[2K| Adam | epoch: 006 | loss: 0.55767 - acc: 0.7258 -- iter: 18368/20250


Training Step: 1873  | total loss: [1m[32m0.54078[0m[0m | time: 73.130s
[2K| Adam | epoch: 006 | loss: 0.54078 - acc: 0.7454 -- iter: 18432/20250


Training Step: 1874  | total loss: [1m[32m0.53363[0m[0m | time: 73.376s
[2K| Adam | epoch: 006 | loss: 0.53363 - acc: 0.7490 -- iter: 18496/20250


Training Step: 1875  | total loss: [1m[32m0.53819[0m[0m | time: 73.633s
[2K| Adam | epoch: 006 | loss: 0.53819 - acc: 0.7460 -- iter: 18560/20250


Training Step: 1876  | total loss: [1m[32m0.57667[0m[0m | time: 73.890s
[2K| Adam | epoch: 006 | loss: 0.57667 - acc: 0.7245 -- iter: 18624/20250


Training Step: 1877  | total loss: [1m[32m0.57300[0m[0m | time: 74.174s
[2K| Adam | epoch: 006 | loss: 0.57300 - acc: 0.7286 -- iter: 18688/20250


Training Step: 1878  | total loss: [1m[32m0.56191[0m[0m | time: 74.431s
[2K| Adam | epoch: 006 | loss: 0.56191 - acc: 0.7370 -- iter: 18752/20250


Training Step: 1879  | total loss: [1m[32m0.55173[0m[0m | time: 74.686s
[2K| Adam | epoch: 006 | loss: 0.55173 - acc: 0.7399 -- iter: 18816/20250


Training Step: 1880  | total loss: [1m[32m0.54249[0m[0m | time: 74.933s
[2K| Adam | epoch: 006 | loss: 0.54249 - acc: 0.7471 -- iter: 18880/20250


Training Step: 1881  | total loss: [1m[32m0.53336[0m[0m | time: 75.183s
[2K| Adam | epoch: 006 | loss: 0.53336 - acc: 0.7568 -- iter: 18944/20250


Training Step: 1882  | total loss: [1m[32m0.52543[0m[0m | time: 75.431s
[2K| Adam | epoch: 006 | loss: 0.52543 - acc: 0.7639 -- iter: 19008/20250


Training Step: 1883  | total loss: [1m[32m0.51762[0m[0m | time: 75.687s
[2K| Adam | epoch: 006 | loss: 0.51762 - acc: 0.7703 -- iter: 19072/20250


Training Step: 1884  | total loss: [1m[32m0.51307[0m[0m | time: 75.940s
[2K| Adam | epoch: 006 | loss: 0.51307 - acc: 0.7714 -- iter: 19136/20250


Training Step: 1885  | total loss: [1m[32m0.51182[0m[0m | time: 76.190s
[2K| Adam | epoch: 006 | loss: 0.51182 - acc: 0.7708 -- iter: 19200/20250


Training Step: 1886  | total loss: [1m[32m0.50822[0m[0m | time: 76.453s
[2K| Adam | epoch: 006 | loss: 0.50822 - acc: 0.7703 -- iter: 19264/20250


Training Step: 1887  | total loss: [1m[32m0.49999[0m[0m | time: 76.736s
[2K| Adam | epoch: 006 | loss: 0.49999 - acc: 0.7745 -- iter: 19328/20250


Training Step: 1888  | total loss: [1m[32m0.50260[0m[0m | time: 76.981s
[2K| Adam | epoch: 006 | loss: 0.50260 - acc: 0.7768 -- iter: 19392/20250


Training Step: 1889  | total loss: [1m[32m0.51320[0m[0m | time: 77.223s
[2K| Adam | epoch: 006 | loss: 0.51320 - acc: 0.7741 -- iter: 19456/20250


Training Step: 1890  | total loss: [1m[32m0.50796[0m[0m | time: 77.474s
[2K| Adam | epoch: 006 | loss: 0.50796 - acc: 0.7764 -- iter: 19520/20250


Training Step: 1891  | total loss: [1m[32m0.49524[0m[0m | time: 77.728s
[2K| Adam | epoch: 006 | loss: 0.49524 - acc: 0.7862 -- iter: 19584/20250


Training Step: 1892  | total loss: [1m[32m0.48762[0m[0m | time: 77.971s
[2K| Adam | epoch: 006 | loss: 0.48762 - acc: 0.7904 -- iter: 19648/20250


Training Step: 1893  | total loss: [1m[32m0.48545[0m[0m | time: 78.219s
[2K| Adam | epoch: 006 | loss: 0.48545 - acc: 0.7911 -- iter: 19712/20250


Training Step: 1894  | total loss: [1m[32m0.49053[0m[0m | time: 78.463s
[2K| Adam | epoch: 006 | loss: 0.49053 - acc: 0.7885 -- iter: 19776/20250


Training Step: 1895  | total loss: [1m[32m0.48819[0m[0m | time: 78.710s
[2K| Adam | epoch: 006 | loss: 0.48819 - acc: 0.7909 -- iter: 19840/20250


Training Step: 1896  | total loss: [1m[32m0.48909[0m[0m | time: 78.977s
[2K| Adam | epoch: 006 | loss: 0.48909 - acc: 0.7884 -- iter: 19904/20250


Training Step: 1897  | total loss: [1m[32m0.47802[0m[0m | time: 79.244s
[2K| Adam | epoch: 006 | loss: 0.47802 - acc: 0.7971 -- iter: 19968/20250


Training Step: 1898  | total loss: [1m[32m0.47917[0m[0m | time: 79.497s
[2K| Adam | epoch: 006 | loss: 0.47917 - acc: 0.7939 -- iter: 20032/20250


Training Step: 1899  | total loss: [1m[32m0.48482[0m[0m | time: 79.748s
[2K| Adam | epoch: 006 | loss: 0.48482 - acc: 0.7942 -- iter: 20096/20250


Training Step: 1900  | total loss: [1m[32m0.48107[0m[0m | time: 79.990s
[2K| Adam | epoch: 006 | loss: 0.48107 - acc: 0.7960 -- iter: 20160/20250


Training Step: 1901  | total loss: [1m[32m0.48434[0m[0m | time: 80.237s
[2K| Adam | epoch: 006 | loss: 0.48434 - acc: 0.7930 -- iter: 20224/20250


Training Step: 1902  | total loss: [1m[32m0.48517[0m[0m | time: 82.178s
[2K| Adam | epoch: 006 | loss: 0.48517 - acc: 0.7918 | val_loss: 0.69395 - val_acc: 0.6387 -- iter: 20250/20250
--


Training Step: 1903  | total loss: [1m[32m0.50059[0m[0m | time: 0.244s
[2K| Adam | epoch: 007 | loss: 0.50059 - acc: 0.7751 -- iter: 00064/20250


Training Step: 1904  | total loss: [1m[32m0.50552[0m[0m | time: 0.492s
[2K| Adam | epoch: 007 | loss: 0.50552 - acc: 0.7695 -- iter: 00128/20250


Training Step: 1905  | total loss: [1m[32m0.50318[0m[0m | time: 0.741s
[2K| Adam | epoch: 007 | loss: 0.50318 - acc: 0.7722 -- iter: 00192/20250


Training Step: 1906  | total loss: [1m[32m0.51149[0m[0m | time: 0.988s
[2K| Adam | epoch: 007 | loss: 0.51149 - acc: 0.7638 -- iter: 00256/20250


Training Step: 1907  | total loss: [1m[32m0.51077[0m[0m | time: 1.224s
[2K| Adam | epoch: 007 | loss: 0.51077 - acc: 0.7671 -- iter: 00320/20250


Training Step: 1908  | total loss: [1m[32m0.50809[0m[0m | time: 1.461s
[2K| Adam | epoch: 007 | loss: 0.50809 - acc: 0.7634 -- iter: 00384/20250


Training Step: 1909  | total loss: [1m[32m0.50428[0m[0m | time: 1.708s
[2K| Adam | epoch: 007 | loss: 0.50428 - acc: 0.7640 -- iter: 00448/20250


Training Step: 1910  | total loss: [1m[32m0.49772[0m[0m | time: 1.966s
[2K| Adam | epoch: 007 | loss: 0.49772 - acc: 0.7673 -- iter: 00512/20250


Training Step: 1911  | total loss: [1m[32m0.49365[0m[0m | time: 2.230s
[2K| Adam | epoch: 007 | loss: 0.49365 - acc: 0.7687 -- iter: 00576/20250


Training Step: 1912  | total loss: [1m[32m0.49424[0m[0m | time: 2.484s
[2K| Adam | epoch: 007 | loss: 0.49424 - acc: 0.7700 -- iter: 00640/20250


Training Step: 1913  | total loss: [1m[32m0.49816[0m[0m | time: 2.745s
[2K| Adam | epoch: 007 | loss: 0.49816 - acc: 0.7664 -- iter: 00704/20250


Training Step: 1914  | total loss: [1m[32m0.49565[0m[0m | time: 2.996s
[2K| Adam | epoch: 007 | loss: 0.49565 - acc: 0.7694 -- iter: 00768/20250


Training Step: 1915  | total loss: [1m[32m0.49970[0m[0m | time: 3.243s
[2K| Adam | epoch: 007 | loss: 0.49970 - acc: 0.7628 -- iter: 00832/20250


Training Step: 1916  | total loss: [1m[32m0.49781[0m[0m | time: 3.497s
[2K| Adam | epoch: 007 | loss: 0.49781 - acc: 0.7647 -- iter: 00896/20250


Training Step: 1917  | total loss: [1m[32m0.48160[0m[0m | time: 3.742s
[2K| Adam | epoch: 007 | loss: 0.48160 - acc: 0.7819 -- iter: 00960/20250


Training Step: 1918  | total loss: [1m[32m0.47896[0m[0m | time: 3.998s
[2K| Adam | epoch: 007 | loss: 0.47896 - acc: 0.7850 -- iter: 01024/20250


Training Step: 1919  | total loss: [1m[32m0.49457[0m[0m | time: 4.263s
[2K| Adam | epoch: 007 | loss: 0.49457 - acc: 0.7768 -- iter: 01088/20250


Training Step: 1920  | total loss: [1m[32m0.48709[0m[0m | time: 4.543s
[2K| Adam | epoch: 007 | loss: 0.48709 - acc: 0.7819 -- iter: 01152/20250


Training Step: 1921  | total loss: [1m[32m0.48074[0m[0m | time: 4.797s
[2K| Adam | epoch: 007 | loss: 0.48074 - acc: 0.7866 -- iter: 01216/20250


Training Step: 1922  | total loss: [1m[32m0.47938[0m[0m | time: 5.049s
[2K| Adam | epoch: 007 | loss: 0.47938 - acc: 0.7845 -- iter: 01280/20250


Training Step: 1923  | total loss: [1m[32m0.48162[0m[0m | time: 5.293s
[2K| Adam | epoch: 007 | loss: 0.48162 - acc: 0.7826 -- iter: 01344/20250


Training Step: 1924  | total loss: [1m[32m0.49131[0m[0m | time: 5.545s
[2K| Adam | epoch: 007 | loss: 0.49131 - acc: 0.7746 -- iter: 01408/20250


Training Step: 1925  | total loss: [1m[32m0.52208[0m[0m | time: 5.794s
[2K| Adam | epoch: 007 | loss: 0.52208 - acc: 0.7534 -- iter: 01472/20250


Training Step: 1926  | total loss: [1m[32m0.55411[0m[0m | time: 6.047s
[2K| Adam | epoch: 007 | loss: 0.55411 - acc: 0.7296 -- iter: 01536/20250


Training Step: 1927  | total loss: [1m[32m0.58611[0m[0m | time: 6.295s
[2K| Adam | epoch: 007 | loss: 0.58611 - acc: 0.7051 -- iter: 01600/20250


Training Step: 1928  | total loss: [1m[32m0.61016[0m[0m | time: 6.543s
[2K| Adam | epoch: 007 | loss: 0.61016 - acc: 0.6862 -- iter: 01664/20250


Training Step: 1929  | total loss: [1m[32m0.62290[0m[0m | time: 6.813s
[2K| Adam | epoch: 007 | loss: 0.62290 - acc: 0.6722 -- iter: 01728/20250


Training Step: 1930  | total loss: [1m[32m0.63311[0m[0m | time: 7.080s
[2K| Adam | epoch: 007 | loss: 0.63311 - acc: 0.6597 -- iter: 01792/20250


Training Step: 1931  | total loss: [1m[32m0.62937[0m[0m | time: 7.323s
[2K| Adam | epoch: 007 | loss: 0.62937 - acc: 0.6562 -- iter: 01856/20250


Training Step: 1932  | total loss: [1m[32m0.62407[0m[0m | time: 7.576s
[2K| Adam | epoch: 007 | loss: 0.62407 - acc: 0.6562 -- iter: 01920/20250


Training Step: 1933  | total loss: [1m[32m0.61567[0m[0m | time: 7.823s
[2K| Adam | epoch: 007 | loss: 0.61567 - acc: 0.6609 -- iter: 01984/20250


Training Step: 1934  | total loss: [1m[32m0.61579[0m[0m | time: 8.080s
[2K| Adam | epoch: 007 | loss: 0.61579 - acc: 0.6526 -- iter: 02048/20250


Training Step: 1935  | total loss: [1m[32m0.61924[0m[0m | time: 8.325s
[2K| Adam | epoch: 007 | loss: 0.61924 - acc: 0.6483 -- iter: 02112/20250


Training Step: 1936  | total loss: [1m[32m0.62119[0m[0m | time: 8.574s
[2K| Adam | epoch: 007 | loss: 0.62119 - acc: 0.6382 -- iter: 02176/20250


Training Step: 1937  | total loss: [1m[32m0.61605[0m[0m | time: 8.814s
[2K| Adam | epoch: 007 | loss: 0.61605 - acc: 0.6369 -- iter: 02240/20250


Training Step: 1938  | total loss: [1m[32m0.60922[0m[0m | time: 9.066s
[2K| Adam | epoch: 007 | loss: 0.60922 - acc: 0.6404 -- iter: 02304/20250


Training Step: 1939  | total loss: [1m[32m0.60349[0m[0m | time: 9.328s
[2K| Adam | epoch: 007 | loss: 0.60349 - acc: 0.6388 -- iter: 02368/20250


Training Step: 1940  | total loss: [1m[32m0.60069[0m[0m | time: 9.605s
[2K| Adam | epoch: 007 | loss: 0.60069 - acc: 0.6359 -- iter: 02432/20250


Training Step: 1941  | total loss: [1m[32m0.59815[0m[0m | time: 9.856s
[2K| Adam | epoch: 007 | loss: 0.59815 - acc: 0.6317 -- iter: 02496/20250


Training Step: 1942  | total loss: [1m[32m0.59375[0m[0m | time: 10.106s
[2K| Adam | epoch: 007 | loss: 0.59375 - acc: 0.6357 -- iter: 02560/20250


Training Step: 1943  | total loss: [1m[32m0.59496[0m[0m | time: 10.357s
[2K| Adam | epoch: 007 | loss: 0.59496 - acc: 0.6315 -- iter: 02624/20250


Training Step: 1944  | total loss: [1m[32m0.59226[0m[0m | time: 10.605s
[2K| Adam | epoch: 007 | loss: 0.59226 - acc: 0.6277 -- iter: 02688/20250


Training Step: 1945  | total loss: [1m[32m0.59639[0m[0m | time: 10.850s
[2K| Adam | epoch: 007 | loss: 0.59639 - acc: 0.6259 -- iter: 02752/20250


Training Step: 1946  | total loss: [1m[32m0.59310[0m[0m | time: 11.099s
[2K| Adam | epoch: 007 | loss: 0.59310 - acc: 0.6305 -- iter: 02816/20250


Training Step: 1947  | total loss: [1m[32m0.58989[0m[0m | time: 11.353s
[2K| Adam | epoch: 007 | loss: 0.58989 - acc: 0.6346 -- iter: 02880/20250


Training Step: 1948  | total loss: [1m[32m0.59575[0m[0m | time: 11.601s
[2K| Adam | epoch: 007 | loss: 0.59575 - acc: 0.6243 -- iter: 02944/20250


Training Step: 1949  | total loss: [1m[32m0.58628[0m[0m | time: 11.865s
[2K| Adam | epoch: 007 | loss: 0.58628 - acc: 0.6290 -- iter: 03008/20250


Training Step: 1950  | total loss: [1m[32m0.58313[0m[0m | time: 12.126s
[2K| Adam | epoch: 007 | loss: 0.58313 - acc: 0.6302 -- iter: 03072/20250


Training Step: 1951  | total loss: [1m[32m0.58279[0m[0m | time: 12.367s
[2K| Adam | epoch: 007 | loss: 0.58279 - acc: 0.6297 -- iter: 03136/20250


Training Step: 1952  | total loss: [1m[32m0.58091[0m[0m | time: 12.617s
[2K| Adam | epoch: 007 | loss: 0.58091 - acc: 0.6308 -- iter: 03200/20250


Training Step: 1953  | total loss: [1m[32m0.57787[0m[0m | time: 12.868s
[2K| Adam | epoch: 007 | loss: 0.57787 - acc: 0.6380 -- iter: 03264/20250


Training Step: 1954  | total loss: [1m[32m0.58077[0m[0m | time: 13.113s
[2K| Adam | epoch: 007 | loss: 0.58077 - acc: 0.6305 -- iter: 03328/20250


Training Step: 1955  | total loss: [1m[32m0.58996[0m[0m | time: 13.364s
[2K| Adam | epoch: 007 | loss: 0.58996 - acc: 0.6284 -- iter: 03392/20250


Training Step: 1956  | total loss: [1m[32m0.59992[0m[0m | time: 13.605s
[2K| Adam | epoch: 007 | loss: 0.59992 - acc: 0.6327 -- iter: 03456/20250


Training Step: 1957  | total loss: [1m[32m0.63482[0m[0m | time: 13.857s
[2K| Adam | epoch: 007 | loss: 0.63482 - acc: 0.6288 -- iter: 03520/20250


Training Step: 1958  | total loss: [1m[32m0.67092[0m[0m | time: 14.104s
[2K| Adam | epoch: 007 | loss: 0.67092 - acc: 0.6222 -- iter: 03584/20250


Training Step: 1959  | total loss: [1m[32m0.68853[0m[0m | time: 14.379s
[2K| Adam | epoch: 007 | loss: 0.68853 - acc: 0.6225 -- iter: 03648/20250


Training Step: 1960  | total loss: [1m[32m0.72599[0m[0m | time: 14.674s
[2K| Adam | epoch: 007 | loss: 0.72599 - acc: 0.6118 -- iter: 03712/20250


Training Step: 1961  | total loss: [1m[32m0.75584[0m[0m | time: 14.920s
[2K| Adam | epoch: 007 | loss: 0.75584 - acc: 0.6006 -- iter: 03776/20250


Training Step: 1962  | total loss: [1m[32m0.78416[0m[0m | time: 15.168s
[2K| Adam | epoch: 007 | loss: 0.78416 - acc: 0.5859 -- iter: 03840/20250


Training Step: 1963  | total loss: [1m[32m0.79345[0m[0m | time: 15.424s
[2K| Adam | epoch: 007 | loss: 0.79345 - acc: 0.5773 -- iter: 03904/20250


Training Step: 1964  | total loss: [1m[32m0.79316[0m[0m | time: 15.668s
[2K| Adam | epoch: 007 | loss: 0.79316 - acc: 0.5742 -- iter: 03968/20250


Training Step: 1965  | total loss: [1m[32m0.78233[0m[0m | time: 15.915s
[2K| Adam | epoch: 007 | loss: 0.78233 - acc: 0.5793 -- iter: 04032/20250


Training Step: 1966  | total loss: [1m[32m0.76354[0m[0m | time: 16.161s
[2K| Adam | epoch: 007 | loss: 0.76354 - acc: 0.5839 -- iter: 04096/20250


Training Step: 1967  | total loss: [1m[32m0.75566[0m[0m | time: 16.408s
[2K| Adam | epoch: 007 | loss: 0.75566 - acc: 0.5802 -- iter: 04160/20250


Training Step: 1968  | total loss: [1m[32m0.74521[0m[0m | time: 16.655s
[2K| Adam | epoch: 007 | loss: 0.74521 - acc: 0.5878 -- iter: 04224/20250


Training Step: 1969  | total loss: [1m[32m0.73579[0m[0m | time: 16.948s
[2K| Adam | epoch: 007 | loss: 0.73579 - acc: 0.5853 -- iter: 04288/20250


Training Step: 1970  | total loss: [1m[32m0.72215[0m[0m | time: 17.214s
[2K| Adam | epoch: 007 | loss: 0.72215 - acc: 0.5892 -- iter: 04352/20250


Training Step: 1971  | total loss: [1m[32m0.70820[0m[0m | time: 17.459s
[2K| Adam | epoch: 007 | loss: 0.70820 - acc: 0.5912 -- iter: 04416/20250


Training Step: 1972  | total loss: [1m[32m0.70060[0m[0m | time: 17.699s
[2K| Adam | epoch: 007 | loss: 0.70060 - acc: 0.5946 -- iter: 04480/20250


Training Step: 1973  | total loss: [1m[32m0.69383[0m[0m | time: 17.941s
[2K| Adam | epoch: 007 | loss: 0.69383 - acc: 0.5930 -- iter: 04544/20250


Training Step: 1974  | total loss: [1m[32m0.68738[0m[0m | time: 18.189s
[2K| Adam | epoch: 007 | loss: 0.68738 - acc: 0.5977 -- iter: 04608/20250


Training Step: 1975  | total loss: [1m[32m0.68362[0m[0m | time: 18.437s
[2K| Adam | epoch: 007 | loss: 0.68362 - acc: 0.6005 -- iter: 04672/20250


Training Step: 1976  | total loss: [1m[32m0.67898[0m[0m | time: 18.682s
[2K| Adam | epoch: 007 | loss: 0.67898 - acc: 0.6029 -- iter: 04736/20250


Training Step: 1977  | total loss: [1m[32m0.67305[0m[0m | time: 18.922s
[2K| Adam | epoch: 007 | loss: 0.67305 - acc: 0.6051 -- iter: 04800/20250


Training Step: 1978  | total loss: [1m[32m0.66984[0m[0m | time: 19.168s
[2K| Adam | epoch: 007 | loss: 0.66984 - acc: 0.6071 -- iter: 04864/20250


Training Step: 1979  | total loss: [1m[32m0.67441[0m[0m | time: 19.446s
[2K| Adam | epoch: 007 | loss: 0.67441 - acc: 0.5980 -- iter: 04928/20250


Training Step: 1980  | total loss: [1m[32m0.66718[0m[0m | time: 19.722s
[2K| Adam | epoch: 007 | loss: 0.66718 - acc: 0.6054 -- iter: 04992/20250


Training Step: 1981  | total loss: [1m[32m0.66577[0m[0m | time: 19.984s
[2K| Adam | epoch: 007 | loss: 0.66577 - acc: 0.6058 -- iter: 05056/20250


Training Step: 1982  | total loss: [1m[32m0.67297[0m[0m | time: 20.229s
[2K| Adam | epoch: 007 | loss: 0.67297 - acc: 0.5936 -- iter: 05120/20250


Training Step: 1983  | total loss: [1m[32m0.67989[0m[0m | time: 20.482s
[2K| Adam | epoch: 007 | loss: 0.67989 - acc: 0.5827 -- iter: 05184/20250


Training Step: 1984  | total loss: [1m[32m0.68608[0m[0m | time: 20.729s
[2K| Adam | epoch: 007 | loss: 0.68608 - acc: 0.5729 -- iter: 05248/20250


Training Step: 1985  | total loss: [1m[32m0.68293[0m[0m | time: 20.975s
[2K| Adam | epoch: 007 | loss: 0.68293 - acc: 0.5687 -- iter: 05312/20250


Training Step: 1986  | total loss: [1m[32m0.68538[0m[0m | time: 21.222s
[2K| Adam | epoch: 007 | loss: 0.68538 - acc: 0.5650 -- iter: 05376/20250


Training Step: 1987  | total loss: [1m[32m0.68698[0m[0m | time: 21.472s
[2K| Adam | epoch: 007 | loss: 0.68698 - acc: 0.5631 -- iter: 05440/20250


Training Step: 1988  | total loss: [1m[32m0.67575[0m[0m | time: 21.712s
[2K| Adam | epoch: 007 | loss: 0.67575 - acc: 0.5693 -- iter: 05504/20250


Training Step: 1989  | total loss: [1m[32m0.66772[0m[0m | time: 21.989s
[2K| Adam | epoch: 007 | loss: 0.66772 - acc: 0.5765 -- iter: 05568/20250


Training Step: 1990  | total loss: [1m[32m0.67060[0m[0m | time: 22.251s
[2K| Adam | epoch: 007 | loss: 0.67060 - acc: 0.5688 -- iter: 05632/20250


Training Step: 1991  | total loss: [1m[32m0.66121[0m[0m | time: 22.490s
[2K| Adam | epoch: 007 | loss: 0.66121 - acc: 0.5791 -- iter: 05696/20250


Training Step: 1992  | total loss: [1m[32m0.65453[0m[0m | time: 22.730s
[2K| Adam | epoch: 007 | loss: 0.65453 - acc: 0.5821 -- iter: 05760/20250


Training Step: 1993  | total loss: [1m[32m0.65561[0m[0m | time: 22.974s
[2K| Adam | epoch: 007 | loss: 0.65561 - acc: 0.5802 -- iter: 05824/20250


Training Step: 1994  | total loss: [1m[32m0.64942[0m[0m | time: 23.223s
[2K| Adam | epoch: 007 | loss: 0.64942 - acc: 0.5800 -- iter: 05888/20250


Training Step: 1995  | total loss: [1m[32m0.64211[0m[0m | time: 23.467s
[2K| Adam | epoch: 007 | loss: 0.64211 - acc: 0.5907 -- iter: 05952/20250


Training Step: 1996  | total loss: [1m[32m0.63777[0m[0m | time: 23.710s
[2K| Adam | epoch: 007 | loss: 0.63777 - acc: 0.5957 -- iter: 06016/20250


Training Step: 1997  | total loss: [1m[32m0.63087[0m[0m | time: 23.951s
[2K| Adam | epoch: 007 | loss: 0.63087 - acc: 0.5986 -- iter: 06080/20250


Training Step: 1998  | total loss: [1m[32m0.62473[0m[0m | time: 24.196s
[2K| Adam | epoch: 007 | loss: 0.62473 - acc: 0.5997 -- iter: 06144/20250


Training Step: 1999  | total loss: [1m[32m0.62162[0m[0m | time: 24.449s
[2K| Adam | epoch: 007 | loss: 0.62162 - acc: 0.6054 -- iter: 06208/20250


Training Step: 2000  | total loss: [1m[32m0.62948[0m[0m | time: 24.700s
[2K| Adam | epoch: 007 | loss: 0.62948 - acc: 0.6026 -- iter: 06272/20250


Training Step: 2001  | total loss: [1m[32m0.62679[0m[0m | time: 24.951s
[2K| Adam | epoch: 007 | loss: 0.62679 - acc: 0.6033 -- iter: 06336/20250


Training Step: 2002  | total loss: [1m[32m0.62814[0m[0m | time: 25.203s
[2K| Adam | epoch: 007 | loss: 0.62814 - acc: 0.6055 -- iter: 06400/20250


Training Step: 2003  | total loss: [1m[32m0.62219[0m[0m | time: 25.468s
[2K| Adam | epoch: 007 | loss: 0.62219 - acc: 0.6184 -- iter: 06464/20250


Training Step: 2004  | total loss: [1m[32m0.62630[0m[0m | time: 25.707s
[2K| Adam | epoch: 007 | loss: 0.62630 - acc: 0.6112 -- iter: 06528/20250


Training Step: 2005  | total loss: [1m[32m0.62583[0m[0m | time: 25.943s
[2K| Adam | epoch: 007 | loss: 0.62583 - acc: 0.6126 -- iter: 06592/20250


Training Step: 2006  | total loss: [1m[32m0.62649[0m[0m | time: 26.190s
[2K| Adam | epoch: 007 | loss: 0.62649 - acc: 0.6076 -- iter: 06656/20250


Training Step: 2007  | total loss: [1m[32m0.62382[0m[0m | time: 26.435s
[2K| Adam | epoch: 007 | loss: 0.62382 - acc: 0.6078 -- iter: 06720/20250


Training Step: 2008  | total loss: [1m[32m0.62084[0m[0m | time: 26.681s
[2K| Adam | epoch: 007 | loss: 0.62084 - acc: 0.6111 -- iter: 06784/20250


Training Step: 2009  | total loss: [1m[32m0.61877[0m[0m | time: 26.930s
[2K| Adam | epoch: 007 | loss: 0.61877 - acc: 0.6156 -- iter: 06848/20250


Training Step: 2010  | total loss: [1m[32m0.61715[0m[0m | time: 27.170s
[2K| Adam | epoch: 007 | loss: 0.61715 - acc: 0.6228 -- iter: 06912/20250


Training Step: 2011  | total loss: [1m[32m0.61453[0m[0m | time: 27.414s
[2K| Adam | epoch: 007 | loss: 0.61453 - acc: 0.6199 -- iter: 06976/20250


Training Step: 2012  | total loss: [1m[32m0.61213[0m[0m | time: 27.669s
[2K| Adam | epoch: 007 | loss: 0.61213 - acc: 0.6235 -- iter: 07040/20250


Training Step: 2013  | total loss: [1m[32m0.61388[0m[0m | time: 27.922s
[2K| Adam | epoch: 007 | loss: 0.61388 - acc: 0.6237 -- iter: 07104/20250


Training Step: 2014  | total loss: [1m[32m0.61327[0m[0m | time: 28.192s
[2K| Adam | epoch: 007 | loss: 0.61327 - acc: 0.6207 -- iter: 07168/20250


Training Step: 2015  | total loss: [1m[32m0.61101[0m[0m | time: 28.436s
[2K| Adam | epoch: 007 | loss: 0.61101 - acc: 0.6180 -- iter: 07232/20250


Training Step: 2016  | total loss: [1m[32m0.61211[0m[0m | time: 28.671s
[2K| Adam | epoch: 007 | loss: 0.61211 - acc: 0.6218 -- iter: 07296/20250


Training Step: 2017  | total loss: [1m[32m0.60481[0m[0m | time: 28.918s
[2K| Adam | epoch: 007 | loss: 0.60481 - acc: 0.6377 -- iter: 07360/20250


Training Step: 2018  | total loss: [1m[32m0.60111[0m[0m | time: 29.165s
[2K| Adam | epoch: 007 | loss: 0.60111 - acc: 0.6443 -- iter: 07424/20250


Training Step: 2019  | total loss: [1m[32m0.60148[0m[0m | time: 29.411s
[2K| Adam | epoch: 007 | loss: 0.60148 - acc: 0.6502 -- iter: 07488/20250


Training Step: 2020  | total loss: [1m[32m0.60831[0m[0m | time: 29.651s
[2K| Adam | epoch: 007 | loss: 0.60831 - acc: 0.6461 -- iter: 07552/20250


Training Step: 2021  | total loss: [1m[32m0.61022[0m[0m | time: 29.892s
[2K| Adam | epoch: 007 | loss: 0.61022 - acc: 0.6502 -- iter: 07616/20250


Training Step: 2022  | total loss: [1m[32m0.61466[0m[0m | time: 30.133s
[2K| Adam | epoch: 007 | loss: 0.61466 - acc: 0.6399 -- iter: 07680/20250


Training Step: 2023  | total loss: [1m[32m0.61416[0m[0m | time: 30.383s
[2K| Adam | epoch: 007 | loss: 0.61416 - acc: 0.6431 -- iter: 07744/20250


Training Step: 2024  | total loss: [1m[32m0.60866[0m[0m | time: 30.630s
[2K| Adam | epoch: 007 | loss: 0.60866 - acc: 0.6616 -- iter: 07808/20250


Training Step: 2025  | total loss: [1m[32m0.60699[0m[0m | time: 30.868s
[2K| Adam | epoch: 007 | loss: 0.60699 - acc: 0.6611 -- iter: 07872/20250


Training Step: 2026  | total loss: [1m[32m0.60179[0m[0m | time: 31.114s
[2K| Adam | epoch: 007 | loss: 0.60179 - acc: 0.6637 -- iter: 07936/20250


Training Step: 2027  | total loss: [1m[32m0.60644[0m[0m | time: 31.363s
[2K| Adam | epoch: 007 | loss: 0.60644 - acc: 0.6614 -- iter: 08000/20250


Training Step: 2028  | total loss: [1m[32m0.60823[0m[0m | time: 31.604s
[2K| Adam | epoch: 007 | loss: 0.60823 - acc: 0.6593 -- iter: 08064/20250


Training Step: 2029  | total loss: [1m[32m0.61215[0m[0m | time: 31.853s
[2K| Adam | epoch: 007 | loss: 0.61215 - acc: 0.6543 -- iter: 08128/20250


Training Step: 2030  | total loss: [1m[32m0.60840[0m[0m | time: 32.092s
[2K| Adam | epoch: 007 | loss: 0.60840 - acc: 0.6530 -- iter: 08192/20250


Training Step: 2031  | total loss: [1m[32m0.60082[0m[0m | time: 32.334s
[2K| Adam | epoch: 007 | loss: 0.60082 - acc: 0.6627 -- iter: 08256/20250


Training Step: 2032  | total loss: [1m[32m0.59970[0m[0m | time: 32.579s
[2K| Adam | epoch: 007 | loss: 0.59970 - acc: 0.6636 -- iter: 08320/20250


Training Step: 2033  | total loss: [1m[32m0.59644[0m[0m | time: 32.839s
[2K| Adam | epoch: 007 | loss: 0.59644 - acc: 0.6660 -- iter: 08384/20250


Training Step: 2034  | total loss: [1m[32m0.60565[0m[0m | time: 33.112s
[2K| Adam | epoch: 007 | loss: 0.60565 - acc: 0.6572 -- iter: 08448/20250


Training Step: 2035  | total loss: [1m[32m0.59808[0m[0m | time: 33.353s
[2K| Adam | epoch: 007 | loss: 0.59808 - acc: 0.6618 -- iter: 08512/20250


Training Step: 2036  | total loss: [1m[32m0.60458[0m[0m | time: 33.596s
[2K| Adam | epoch: 007 | loss: 0.60458 - acc: 0.6503 -- iter: 08576/20250


Training Step: 2037  | total loss: [1m[32m0.60555[0m[0m | time: 33.845s
[2K| Adam | epoch: 007 | loss: 0.60555 - acc: 0.6493 -- iter: 08640/20250


Training Step: 2038  | total loss: [1m[32m0.59625[0m[0m | time: 34.092s
[2K| Adam | epoch: 007 | loss: 0.59625 - acc: 0.6625 -- iter: 08704/20250


Training Step: 2039  | total loss: [1m[32m0.60744[0m[0m | time: 34.333s
[2K| Adam | epoch: 007 | loss: 0.60744 - acc: 0.6556 -- iter: 08768/20250


Training Step: 2040  | total loss: [1m[32m0.59779[0m[0m | time: 34.576s
[2K| Adam | epoch: 007 | loss: 0.59779 - acc: 0.6604 -- iter: 08832/20250


Training Step: 2041  | total loss: [1m[32m0.59700[0m[0m | time: 34.824s
[2K| Adam | epoch: 007 | loss: 0.59700 - acc: 0.6615 -- iter: 08896/20250


Training Step: 2042  | total loss: [1m[32m0.59335[0m[0m | time: 35.066s
[2K| Adam | epoch: 007 | loss: 0.59335 - acc: 0.6579 -- iter: 08960/20250


Training Step: 2043  | total loss: [1m[32m0.58846[0m[0m | time: 35.330s
[2K| Adam | epoch: 007 | loss: 0.58846 - acc: 0.6687 -- iter: 09024/20250


Training Step: 2044  | total loss: [1m[32m0.58599[0m[0m | time: 35.603s
[2K| Adam | epoch: 007 | loss: 0.58599 - acc: 0.6674 -- iter: 09088/20250


Training Step: 2045  | total loss: [1m[32m0.59264[0m[0m | time: 35.848s
[2K| Adam | epoch: 007 | loss: 0.59264 - acc: 0.6538 -- iter: 09152/20250


Training Step: 2046  | total loss: [1m[32m0.58612[0m[0m | time: 36.089s
[2K| Adam | epoch: 007 | loss: 0.58612 - acc: 0.6603 -- iter: 09216/20250


Training Step: 2047  | total loss: [1m[32m0.59359[0m[0m | time: 36.337s
[2K| Adam | epoch: 007 | loss: 0.59359 - acc: 0.6474 -- iter: 09280/20250


Training Step: 2048  | total loss: [1m[32m0.59180[0m[0m | time: 36.581s
[2K| Adam | epoch: 007 | loss: 0.59180 - acc: 0.6545 -- iter: 09344/20250


Training Step: 2049  | total loss: [1m[32m0.59161[0m[0m | time: 36.827s
[2K| Adam | epoch: 007 | loss: 0.59161 - acc: 0.6531 -- iter: 09408/20250


Training Step: 2050  | total loss: [1m[32m0.59463[0m[0m | time: 37.073s
[2K| Adam | epoch: 007 | loss: 0.59463 - acc: 0.6488 -- iter: 09472/20250


Training Step: 2051  | total loss: [1m[32m0.59767[0m[0m | time: 37.316s
[2K| Adam | epoch: 007 | loss: 0.59767 - acc: 0.6370 -- iter: 09536/20250


Training Step: 2052  | total loss: [1m[32m0.59855[0m[0m | time: 37.565s
[2K| Adam | epoch: 007 | loss: 0.59855 - acc: 0.6405 -- iter: 09600/20250


Training Step: 2053  | total loss: [1m[32m0.59509[0m[0m | time: 37.813s
[2K| Adam | epoch: 007 | loss: 0.59509 - acc: 0.6452 -- iter: 09664/20250


Training Step: 2054  | total loss: [1m[32m0.60393[0m[0m | time: 38.072s
[2K| Adam | epoch: 007 | loss: 0.60393 - acc: 0.6354 -- iter: 09728/20250


Training Step: 2055  | total loss: [1m[32m0.60547[0m[0m | time: 38.317s
[2K| Adam | epoch: 007 | loss: 0.60547 - acc: 0.6312 -- iter: 09792/20250


Training Step: 2056  | total loss: [1m[32m0.60491[0m[0m | time: 38.555s
[2K| Adam | epoch: 007 | loss: 0.60491 - acc: 0.6384 -- iter: 09856/20250


Training Step: 2057  | total loss: [1m[32m0.60866[0m[0m | time: 38.804s
[2K| Adam | epoch: 007 | loss: 0.60866 - acc: 0.6355 -- iter: 09920/20250


Training Step: 2058  | total loss: [1m[32m0.60574[0m[0m | time: 39.039s
[2K| Adam | epoch: 007 | loss: 0.60574 - acc: 0.6407 -- iter: 09984/20250


Training Step: 2059  | total loss: [1m[32m0.60099[0m[0m | time: 39.281s
[2K| Adam | epoch: 007 | loss: 0.60099 - acc: 0.6438 -- iter: 10048/20250


Training Step: 2060  | total loss: [1m[32m0.59951[0m[0m | time: 39.527s
[2K| Adam | epoch: 007 | loss: 0.59951 - acc: 0.6466 -- iter: 10112/20250


Training Step: 2061  | total loss: [1m[32m0.59736[0m[0m | time: 39.767s
[2K| Adam | epoch: 007 | loss: 0.59736 - acc: 0.6491 -- iter: 10176/20250


Training Step: 2062  | total loss: [1m[32m0.60244[0m[0m | time: 39.999s
[2K| Adam | epoch: 007 | loss: 0.60244 - acc: 0.6483 -- iter: 10240/20250


Training Step: 2063  | total loss: [1m[32m0.60338[0m[0m | time: 40.250s
[2K| Adam | epoch: 007 | loss: 0.60338 - acc: 0.6444 -- iter: 10304/20250


Training Step: 2064  | total loss: [1m[32m0.60477[0m[0m | time: 40.533s
[2K| Adam | epoch: 007 | loss: 0.60477 - acc: 0.6425 -- iter: 10368/20250


Training Step: 2065  | total loss: [1m[32m0.60624[0m[0m | time: 40.771s
[2K| Adam | epoch: 007 | loss: 0.60624 - acc: 0.6392 -- iter: 10432/20250


Training Step: 2066  | total loss: [1m[32m0.59728[0m[0m | time: 41.014s
[2K| Adam | epoch: 007 | loss: 0.59728 - acc: 0.6487 -- iter: 10496/20250


Training Step: 2067  | total loss: [1m[32m0.59536[0m[0m | time: 41.262s
[2K| Adam | epoch: 007 | loss: 0.59536 - acc: 0.6510 -- iter: 10560/20250


Training Step: 2068  | total loss: [1m[32m0.60081[0m[0m | time: 41.509s
[2K| Adam | epoch: 007 | loss: 0.60081 - acc: 0.6359 -- iter: 10624/20250


Training Step: 2069  | total loss: [1m[32m0.60214[0m[0m | time: 41.750s
[2K| Adam | epoch: 007 | loss: 0.60214 - acc: 0.6379 -- iter: 10688/20250


Training Step: 2070  | total loss: [1m[32m0.60405[0m[0m | time: 41.992s
[2K| Adam | epoch: 007 | loss: 0.60405 - acc: 0.6366 -- iter: 10752/20250


Training Step: 2071  | total loss: [1m[32m0.59812[0m[0m | time: 42.237s
[2K| Adam | epoch: 007 | loss: 0.59812 - acc: 0.6417 -- iter: 10816/20250


Training Step: 2072  | total loss: [1m[32m0.60156[0m[0m | time: 42.482s
[2K| Adam | epoch: 007 | loss: 0.60156 - acc: 0.6385 -- iter: 10880/20250


Training Step: 2073  | total loss: [1m[32m0.60381[0m[0m | time: 42.737s
[2K| Adam | epoch: 007 | loss: 0.60381 - acc: 0.6371 -- iter: 10944/20250


Training Step: 2074  | total loss: [1m[32m0.61268[0m[0m | time: 43.011s
[2K| Adam | epoch: 007 | loss: 0.61268 - acc: 0.6266 -- iter: 11008/20250


Training Step: 2075  | total loss: [1m[32m0.60693[0m[0m | time: 43.261s
[2K| Adam | epoch: 007 | loss: 0.60693 - acc: 0.6342 -- iter: 11072/20250


Training Step: 2076  | total loss: [1m[32m0.60519[0m[0m | time: 43.504s
[2K| Adam | epoch: 007 | loss: 0.60519 - acc: 0.6380 -- iter: 11136/20250


Training Step: 2077  | total loss: [1m[32m0.60944[0m[0m | time: 43.749s
[2K| Adam | epoch: 007 | loss: 0.60944 - acc: 0.6304 -- iter: 11200/20250


Training Step: 2078  | total loss: [1m[32m0.61231[0m[0m | time: 43.993s
[2K| Adam | epoch: 007 | loss: 0.61231 - acc: 0.6314 -- iter: 11264/20250


Training Step: 2079  | total loss: [1m[32m0.61106[0m[0m | time: 44.239s
[2K| Adam | epoch: 007 | loss: 0.61106 - acc: 0.6261 -- iter: 11328/20250


Training Step: 2080  | total loss: [1m[32m0.60252[0m[0m | time: 44.480s
[2K| Adam | epoch: 007 | loss: 0.60252 - acc: 0.6385 -- iter: 11392/20250


Training Step: 2081  | total loss: [1m[32m0.60597[0m[0m | time: 44.720s
[2K| Adam | epoch: 007 | loss: 0.60597 - acc: 0.6325 -- iter: 11456/20250


Training Step: 2082  | total loss: [1m[32m0.60628[0m[0m | time: 44.965s
[2K| Adam | epoch: 007 | loss: 0.60628 - acc: 0.6411 -- iter: 11520/20250


Training Step: 2083  | total loss: [1m[32m0.60359[0m[0m | time: 45.212s
[2K| Adam | epoch: 007 | loss: 0.60359 - acc: 0.6489 -- iter: 11584/20250


Training Step: 2084  | total loss: [1m[32m0.60144[0m[0m | time: 45.464s
[2K| Adam | epoch: 007 | loss: 0.60144 - acc: 0.6480 -- iter: 11648/20250


Training Step: 2085  | total loss: [1m[32m0.59733[0m[0m | time: 45.747s
[2K| Adam | epoch: 007 | loss: 0.59733 - acc: 0.6520 -- iter: 11712/20250


Training Step: 2086  | total loss: [1m[32m0.60541[0m[0m | time: 45.988s
[2K| Adam | epoch: 007 | loss: 0.60541 - acc: 0.6462 -- iter: 11776/20250


Training Step: 2087  | total loss: [1m[32m0.60261[0m[0m | time: 46.232s
[2K| Adam | epoch: 007 | loss: 0.60261 - acc: 0.6534 -- iter: 11840/20250


Training Step: 2088  | total loss: [1m[32m0.60068[0m[0m | time: 46.469s
[2K| Adam | epoch: 007 | loss: 0.60068 - acc: 0.6600 -- iter: 11904/20250


Training Step: 2089  | total loss: [1m[32m0.59945[0m[0m | time: 46.716s
[2K| Adam | epoch: 007 | loss: 0.59945 - acc: 0.6580 -- iter: 11968/20250


Training Step: 2090  | total loss: [1m[32m0.60237[0m[0m | time: 46.963s
[2K| Adam | epoch: 007 | loss: 0.60237 - acc: 0.6500 -- iter: 12032/20250


Training Step: 2091  | total loss: [1m[32m0.60172[0m[0m | time: 47.211s
[2K| Adam | epoch: 007 | loss: 0.60172 - acc: 0.6491 -- iter: 12096/20250


Training Step: 2092  | total loss: [1m[32m0.60628[0m[0m | time: 47.461s
[2K| Adam | epoch: 007 | loss: 0.60628 - acc: 0.6436 -- iter: 12160/20250


Training Step: 2093  | total loss: [1m[32m0.59914[0m[0m | time: 47.716s
[2K| Adam | epoch: 007 | loss: 0.59914 - acc: 0.6526 -- iter: 12224/20250


Training Step: 2094  | total loss: [1m[32m0.59430[0m[0m | time: 47.968s
[2K| Adam | epoch: 007 | loss: 0.59430 - acc: 0.6530 -- iter: 12288/20250


Training Step: 2095  | total loss: [1m[32m0.59292[0m[0m | time: 48.220s
[2K| Adam | epoch: 007 | loss: 0.59292 - acc: 0.6596 -- iter: 12352/20250


Training Step: 2096  | total loss: [1m[32m0.58917[0m[0m | time: 48.455s
[2K| Adam | epoch: 007 | loss: 0.58917 - acc: 0.6686 -- iter: 12416/20250


Training Step: 2097  | total loss: [1m[32m0.59631[0m[0m | time: 48.701s
[2K| Adam | epoch: 007 | loss: 0.59631 - acc: 0.6643 -- iter: 12480/20250


Training Step: 2098  | total loss: [1m[32m0.59363[0m[0m | time: 48.944s
[2K| Adam | epoch: 007 | loss: 0.59363 - acc: 0.6681 -- iter: 12544/20250


Training Step: 2099  | total loss: [1m[32m0.59647[0m[0m | time: 49.186s
[2K| Adam | epoch: 007 | loss: 0.59647 - acc: 0.6560 -- iter: 12608/20250


Training Step: 2100  | total loss: [1m[32m0.59809[0m[0m | time: 49.427s
[2K| Adam | epoch: 007 | loss: 0.59809 - acc: 0.6592 -- iter: 12672/20250


Training Step: 2101  | total loss: [1m[32m0.60282[0m[0m | time: 49.671s
[2K| Adam | epoch: 007 | loss: 0.60282 - acc: 0.6573 -- iter: 12736/20250


Training Step: 2102  | total loss: [1m[32m0.60337[0m[0m | time: 49.909s
[2K| Adam | epoch: 007 | loss: 0.60337 - acc: 0.6510 -- iter: 12800/20250


Training Step: 2103  | total loss: [1m[32m0.60699[0m[0m | time: 50.143s
[2K| Adam | epoch: 007 | loss: 0.60699 - acc: 0.6499 -- iter: 12864/20250


Training Step: 2104  | total loss: [1m[32m0.60816[0m[0m | time: 50.386s
[2K| Adam | epoch: 007 | loss: 0.60816 - acc: 0.6474 -- iter: 12928/20250


Training Step: 2105  | total loss: [1m[32m0.60330[0m[0m | time: 50.652s
[2K| Adam | epoch: 007 | loss: 0.60330 - acc: 0.6530 -- iter: 12992/20250


Training Step: 2106  | total loss: [1m[32m0.59724[0m[0m | time: 50.903s
[2K| Adam | epoch: 007 | loss: 0.59724 - acc: 0.6596 -- iter: 13056/20250


Training Step: 2107  | total loss: [1m[32m0.60136[0m[0m | time: 51.151s
[2K| Adam | epoch: 007 | loss: 0.60136 - acc: 0.6592 -- iter: 13120/20250


Training Step: 2108  | total loss: [1m[32m0.60587[0m[0m | time: 51.393s
[2K| Adam | epoch: 007 | loss: 0.60587 - acc: 0.6543 -- iter: 13184/20250


Training Step: 2109  | total loss: [1m[32m0.60458[0m[0m | time: 51.654s
[2K| Adam | epoch: 007 | loss: 0.60458 - acc: 0.6498 -- iter: 13248/20250


Training Step: 2110  | total loss: [1m[32m0.60389[0m[0m | time: 51.901s
[2K| Adam | epoch: 007 | loss: 0.60389 - acc: 0.6504 -- iter: 13312/20250


Training Step: 2111  | total loss: [1m[32m0.60287[0m[0m | time: 52.142s
[2K| Adam | epoch: 007 | loss: 0.60287 - acc: 0.6447 -- iter: 13376/20250


Training Step: 2112  | total loss: [1m[32m0.59745[0m[0m | time: 52.385s
[2K| Adam | epoch: 007 | loss: 0.59745 - acc: 0.6568 -- iter: 13440/20250


Training Step: 2113  | total loss: [1m[32m0.59090[0m[0m | time: 52.630s
[2K| Adam | epoch: 007 | loss: 0.59090 - acc: 0.6599 -- iter: 13504/20250


Training Step: 2114  | total loss: [1m[32m0.60028[0m[0m | time: 52.892s
[2K| Adam | epoch: 007 | loss: 0.60028 - acc: 0.6486 -- iter: 13568/20250


Training Step: 2115  | total loss: [1m[32m0.59924[0m[0m | time: 53.173s
[2K| Adam | epoch: 007 | loss: 0.59924 - acc: 0.6509 -- iter: 13632/20250


Training Step: 2116  | total loss: [1m[32m0.59232[0m[0m | time: 53.423s
[2K| Adam | epoch: 007 | loss: 0.59232 - acc: 0.6593 -- iter: 13696/20250


Training Step: 2117  | total loss: [1m[32m0.58893[0m[0m | time: 53.669s
[2K| Adam | epoch: 007 | loss: 0.58893 - acc: 0.6574 -- iter: 13760/20250


Training Step: 2118  | total loss: [1m[32m0.58951[0m[0m | time: 53.913s
[2K| Adam | epoch: 007 | loss: 0.58951 - acc: 0.6620 -- iter: 13824/20250


Training Step: 2119  | total loss: [1m[32m0.59336[0m[0m | time: 54.152s
[2K| Adam | epoch: 007 | loss: 0.59336 - acc: 0.6520 -- iter: 13888/20250


Training Step: 2120  | total loss: [1m[32m0.59196[0m[0m | time: 54.391s
[2K| Adam | epoch: 007 | loss: 0.59196 - acc: 0.6509 -- iter: 13952/20250


Training Step: 2121  | total loss: [1m[32m0.58598[0m[0m | time: 54.644s
[2K| Adam | epoch: 007 | loss: 0.58598 - acc: 0.6561 -- iter: 14016/20250


Training Step: 2122  | total loss: [1m[32m0.59151[0m[0m | time: 54.889s
[2K| Adam | epoch: 007 | loss: 0.59151 - acc: 0.6483 -- iter: 14080/20250


Training Step: 2123  | total loss: [1m[32m0.58749[0m[0m | time: 55.132s
[2K| Adam | epoch: 007 | loss: 0.58749 - acc: 0.6522 -- iter: 14144/20250


Training Step: 2124  | total loss: [1m[32m0.58120[0m[0m | time: 55.396s
[2K| Adam | epoch: 007 | loss: 0.58120 - acc: 0.6651 -- iter: 14208/20250


Training Step: 2125  | total loss: [1m[32m0.57628[0m[0m | time: 55.682s
[2K| Adam | epoch: 007 | loss: 0.57628 - acc: 0.6689 -- iter: 14272/20250


Training Step: 2126  | total loss: [1m[32m0.58051[0m[0m | time: 55.931s
[2K| Adam | epoch: 007 | loss: 0.58051 - acc: 0.6677 -- iter: 14336/20250


Training Step: 2127  | total loss: [1m[32m0.57704[0m[0m | time: 56.172s
[2K| Adam | epoch: 007 | loss: 0.57704 - acc: 0.6759 -- iter: 14400/20250


Training Step: 2128  | total loss: [1m[32m0.57844[0m[0m | time: 56.414s
[2K| Adam | epoch: 007 | loss: 0.57844 - acc: 0.6817 -- iter: 14464/20250


Training Step: 2129  | total loss: [1m[32m0.58072[0m[0m | time: 56.656s
[2K| Adam | epoch: 007 | loss: 0.58072 - acc: 0.6823 -- iter: 14528/20250


Training Step: 2130  | total loss: [1m[32m0.58079[0m[0m | time: 56.900s
[2K| Adam | epoch: 007 | loss: 0.58079 - acc: 0.6813 -- iter: 14592/20250


Training Step: 2131  | total loss: [1m[32m0.57984[0m[0m | time: 57.141s
[2K| Adam | epoch: 007 | loss: 0.57984 - acc: 0.6850 -- iter: 14656/20250


Training Step: 2132  | total loss: [1m[32m0.58832[0m[0m | time: 57.387s
[2K| Adam | epoch: 007 | loss: 0.58832 - acc: 0.6821 -- iter: 14720/20250


Training Step: 2133  | total loss: [1m[32m0.59092[0m[0m | time: 57.643s
[2K| Adam | epoch: 007 | loss: 0.59092 - acc: 0.6780 -- iter: 14784/20250


Training Step: 2134  | total loss: [1m[32m0.59016[0m[0m | time: 57.907s
[2K| Adam | epoch: 007 | loss: 0.59016 - acc: 0.6774 -- iter: 14848/20250


Training Step: 2135  | total loss: [1m[32m0.59121[0m[0m | time: 58.152s
[2K| Adam | epoch: 007 | loss: 0.59121 - acc: 0.6690 -- iter: 14912/20250


Training Step: 2136  | total loss: [1m[32m0.58880[0m[0m | time: 58.412s
[2K| Adam | epoch: 007 | loss: 0.58880 - acc: 0.6756 -- iter: 14976/20250


Training Step: 2137  | total loss: [1m[32m0.59540[0m[0m | time: 58.654s
[2K| Adam | epoch: 007 | loss: 0.59540 - acc: 0.6611 -- iter: 15040/20250


Training Step: 2138  | total loss: [1m[32m0.59553[0m[0m | time: 58.903s
[2K| Adam | epoch: 007 | loss: 0.59553 - acc: 0.6638 -- iter: 15104/20250


Training Step: 2139  | total loss: [1m[32m0.59508[0m[0m | time: 59.147s
[2K| Adam | epoch: 007 | loss: 0.59508 - acc: 0.6614 -- iter: 15168/20250


Training Step: 2140  | total loss: [1m[32m0.59274[0m[0m | time: 59.388s
[2K| Adam | epoch: 007 | loss: 0.59274 - acc: 0.6641 -- iter: 15232/20250


Training Step: 2141  | total loss: [1m[32m0.59033[0m[0m | time: 59.632s
[2K| Adam | epoch: 007 | loss: 0.59033 - acc: 0.6633 -- iter: 15296/20250


Training Step: 2142  | total loss: [1m[32m0.59842[0m[0m | time: 59.875s
[2K| Adam | epoch: 007 | loss: 0.59842 - acc: 0.6516 -- iter: 15360/20250


Training Step: 2143  | total loss: [1m[32m0.59372[0m[0m | time: 60.122s
[2K| Adam | epoch: 007 | loss: 0.59372 - acc: 0.6599 -- iter: 15424/20250


Training Step: 2144  | total loss: [1m[32m0.59001[0m[0m | time: 60.390s
[2K| Adam | epoch: 007 | loss: 0.59001 - acc: 0.6627 -- iter: 15488/20250


Training Step: 2145  | total loss: [1m[32m0.59075[0m[0m | time: 60.668s
[2K| Adam | epoch: 007 | loss: 0.59075 - acc: 0.6683 -- iter: 15552/20250


Training Step: 2146  | total loss: [1m[32m0.58183[0m[0m | time: 60.917s
[2K| Adam | epoch: 007 | loss: 0.58183 - acc: 0.6733 -- iter: 15616/20250


Training Step: 2147  | total loss: [1m[32m0.57438[0m[0m | time: 61.163s
[2K| Adam | epoch: 007 | loss: 0.57438 - acc: 0.6810 -- iter: 15680/20250


Training Step: 2148  | total loss: [1m[32m0.57336[0m[0m | time: 61.405s
[2K| Adam | epoch: 007 | loss: 0.57336 - acc: 0.6785 -- iter: 15744/20250


Training Step: 2149  | total loss: [1m[32m0.57727[0m[0m | time: 61.648s
[2K| Adam | epoch: 007 | loss: 0.57727 - acc: 0.6732 -- iter: 15808/20250


Training Step: 2150  | total loss: [1m[32m0.58203[0m[0m | time: 61.887s
[2K| Adam | epoch: 007 | loss: 0.58203 - acc: 0.6668 -- iter: 15872/20250


Training Step: 2151  | total loss: [1m[32m0.57698[0m[0m | time: 62.148s
[2K| Adam | epoch: 007 | loss: 0.57698 - acc: 0.6735 -- iter: 15936/20250


Training Step: 2152  | total loss: [1m[32m0.57679[0m[0m | time: 62.393s
[2K| Adam | epoch: 007 | loss: 0.57679 - acc: 0.6718 -- iter: 16000/20250


Training Step: 2153  | total loss: [1m[32m0.58969[0m[0m | time: 62.640s
[2K| Adam | epoch: 007 | loss: 0.58969 - acc: 0.6546 -- iter: 16064/20250


Training Step: 2154  | total loss: [1m[32m0.59066[0m[0m | time: 62.897s
[2K| Adam | epoch: 007 | loss: 0.59066 - acc: 0.6532 -- iter: 16128/20250


Training Step: 2155  | total loss: [1m[32m0.58692[0m[0m | time: 63.184s
[2K| Adam | epoch: 007 | loss: 0.58692 - acc: 0.6567 -- iter: 16192/20250


Training Step: 2156  | total loss: [1m[32m0.58806[0m[0m | time: 63.430s
[2K| Adam | epoch: 007 | loss: 0.58806 - acc: 0.6551 -- iter: 16256/20250


Training Step: 2157  | total loss: [1m[32m0.59090[0m[0m | time: 63.678s
[2K| Adam | epoch: 007 | loss: 0.59090 - acc: 0.6536 -- iter: 16320/20250


Training Step: 2158  | total loss: [1m[32m0.59716[0m[0m | time: 63.922s
[2K| Adam | epoch: 007 | loss: 0.59716 - acc: 0.6508 -- iter: 16384/20250


Training Step: 2159  | total loss: [1m[32m0.60026[0m[0m | time: 64.165s
[2K| Adam | epoch: 007 | loss: 0.60026 - acc: 0.6482 -- iter: 16448/20250


Training Step: 2160  | total loss: [1m[32m0.59653[0m[0m | time: 64.406s
[2K| Adam | epoch: 007 | loss: 0.59653 - acc: 0.6474 -- iter: 16512/20250


Training Step: 2161  | total loss: [1m[32m0.58975[0m[0m | time: 64.653s
[2K| Adam | epoch: 007 | loss: 0.58975 - acc: 0.6514 -- iter: 16576/20250


Training Step: 2162  | total loss: [1m[32m0.58674[0m[0m | time: 64.899s
[2K| Adam | epoch: 007 | loss: 0.58674 - acc: 0.6597 -- iter: 16640/20250


Training Step: 2163  | total loss: [1m[32m0.58384[0m[0m | time: 65.148s
[2K| Adam | epoch: 007 | loss: 0.58384 - acc: 0.6672 -- iter: 16704/20250


Training Step: 2164  | total loss: [1m[32m0.58740[0m[0m | time: 65.434s
[2K| Adam | epoch: 007 | loss: 0.58740 - acc: 0.6645 -- iter: 16768/20250


Training Step: 2165  | total loss: [1m[32m0.57989[0m[0m | time: 65.727s
[2K| Adam | epoch: 007 | loss: 0.57989 - acc: 0.6746 -- iter: 16832/20250


Training Step: 2166  | total loss: [1m[32m0.57096[0m[0m | time: 65.970s
[2K| Adam | epoch: 007 | loss: 0.57096 - acc: 0.6869 -- iter: 16896/20250


Training Step: 2167  | total loss: [1m[32m0.56698[0m[0m | time: 66.210s
[2K| Adam | epoch: 007 | loss: 0.56698 - acc: 0.6916 -- iter: 16960/20250


Training Step: 2168  | total loss: [1m[32m0.56647[0m[0m | time: 66.450s
[2K| Adam | epoch: 007 | loss: 0.56647 - acc: 0.6943 -- iter: 17024/20250


Training Step: 2169  | total loss: [1m[32m0.56534[0m[0m | time: 66.695s
[2K| Adam | epoch: 007 | loss: 0.56534 - acc: 0.6999 -- iter: 17088/20250


Training Step: 2170  | total loss: [1m[32m0.56233[0m[0m | time: 66.942s
[2K| Adam | epoch: 007 | loss: 0.56233 - acc: 0.7002 -- iter: 17152/20250


Training Step: 2171  | total loss: [1m[32m0.55942[0m[0m | time: 67.194s
[2K| Adam | epoch: 007 | loss: 0.55942 - acc: 0.7068 -- iter: 17216/20250


Training Step: 2172  | total loss: [1m[32m0.56007[0m[0m | time: 67.428s
[2K| Adam | epoch: 007 | loss: 0.56007 - acc: 0.7064 -- iter: 17280/20250


Training Step: 2173  | total loss: [1m[32m0.56380[0m[0m | time: 67.670s
[2K| Adam | epoch: 007 | loss: 0.56380 - acc: 0.7014 -- iter: 17344/20250


Training Step: 2174  | total loss: [1m[32m0.56014[0m[0m | time: 67.932s
[2K| Adam | epoch: 007 | loss: 0.56014 - acc: 0.7031 -- iter: 17408/20250


Training Step: 2175  | total loss: [1m[32m0.55632[0m[0m | time: 68.196s
[2K| Adam | epoch: 007 | loss: 0.55632 - acc: 0.7031 -- iter: 17472/20250


Training Step: 2176  | total loss: [1m[32m0.56121[0m[0m | time: 68.442s
[2K| Adam | epoch: 007 | loss: 0.56121 - acc: 0.6984 -- iter: 17536/20250


Training Step: 2177  | total loss: [1m[32m0.55950[0m[0m | time: 68.683s
[2K| Adam | epoch: 007 | loss: 0.55950 - acc: 0.6973 -- iter: 17600/20250


Training Step: 2178  | total loss: [1m[32m0.56434[0m[0m | time: 68.931s
[2K| Adam | epoch: 007 | loss: 0.56434 - acc: 0.6948 -- iter: 17664/20250


Training Step: 2179  | total loss: [1m[32m0.57040[0m[0m | time: 69.180s
[2K| Adam | epoch: 007 | loss: 0.57040 - acc: 0.6831 -- iter: 17728/20250


Training Step: 2180  | total loss: [1m[32m0.57221[0m[0m | time: 69.427s
[2K| Adam | epoch: 007 | loss: 0.57221 - acc: 0.6804 -- iter: 17792/20250


Training Step: 2181  | total loss: [1m[32m0.56911[0m[0m | time: 69.665s
[2K| Adam | epoch: 007 | loss: 0.56911 - acc: 0.6843 -- iter: 17856/20250


Training Step: 2182  | total loss: [1m[32m0.57597[0m[0m | time: 69.908s
[2K| Adam | epoch: 007 | loss: 0.57597 - acc: 0.6737 -- iter: 17920/20250


Training Step: 2183  | total loss: [1m[32m0.57723[0m[0m | time: 70.152s
[2K| Adam | epoch: 007 | loss: 0.57723 - acc: 0.6641 -- iter: 17984/20250


Training Step: 2184  | total loss: [1m[32m0.57423[0m[0m | time: 70.392s
[2K| Adam | epoch: 007 | loss: 0.57423 - acc: 0.6727 -- iter: 18048/20250


Training Step: 2185  | total loss: [1m[32m0.58307[0m[0m | time: 70.682s
[2K| Adam | epoch: 007 | loss: 0.58307 - acc: 0.6664 -- iter: 18112/20250


Training Step: 2186  | total loss: [1m[32m0.59033[0m[0m | time: 70.926s
[2K| Adam | epoch: 007 | loss: 0.59033 - acc: 0.6591 -- iter: 18176/20250


Training Step: 2187  | total loss: [1m[32m0.58811[0m[0m | time: 71.170s
[2K| Adam | epoch: 007 | loss: 0.58811 - acc: 0.6651 -- iter: 18240/20250


Training Step: 2188  | total loss: [1m[32m0.58249[0m[0m | time: 71.416s
[2K| Adam | epoch: 007 | loss: 0.58249 - acc: 0.6657 -- iter: 18304/20250


Training Step: 2189  | total loss: [1m[32m0.57956[0m[0m | time: 71.668s
[2K| Adam | epoch: 007 | loss: 0.57956 - acc: 0.6695 -- iter: 18368/20250


Training Step: 2190  | total loss: [1m[32m0.57857[0m[0m | time: 71.906s
[2K| Adam | epoch: 007 | loss: 0.57857 - acc: 0.6697 -- iter: 18432/20250


Training Step: 2191  | total loss: [1m[32m0.58270[0m[0m | time: 72.155s
[2K| Adam | epoch: 007 | loss: 0.58270 - acc: 0.6699 -- iter: 18496/20250


Training Step: 2192  | total loss: [1m[32m0.58476[0m[0m | time: 72.400s
[2K| Adam | epoch: 007 | loss: 0.58476 - acc: 0.6748 -- iter: 18560/20250


Training Step: 2193  | total loss: [1m[32m0.58169[0m[0m | time: 72.640s
[2K| Adam | epoch: 007 | loss: 0.58169 - acc: 0.6776 -- iter: 18624/20250


Training Step: 2194  | total loss: [1m[32m0.58371[0m[0m | time: 72.910s
[2K| Adam | epoch: 007 | loss: 0.58371 - acc: 0.6802 -- iter: 18688/20250


Training Step: 2195  | total loss: [1m[32m0.58199[0m[0m | time: 73.193s
[2K| Adam | epoch: 007 | loss: 0.58199 - acc: 0.6747 -- iter: 18752/20250


Training Step: 2196  | total loss: [1m[32m0.57760[0m[0m | time: 73.439s
[2K| Adam | epoch: 007 | loss: 0.57760 - acc: 0.6775 -- iter: 18816/20250


Training Step: 2197  | total loss: [1m[32m0.57067[0m[0m | time: 73.690s
[2K| Adam | epoch: 007 | loss: 0.57067 - acc: 0.6910 -- iter: 18880/20250


Training Step: 2198  | total loss: [1m[32m0.56608[0m[0m | time: 73.938s
[2K| Adam | epoch: 007 | loss: 0.56608 - acc: 0.6938 -- iter: 18944/20250


Training Step: 2199  | total loss: [1m[32m0.55612[0m[0m | time: 74.172s
[2K| Adam | epoch: 007 | loss: 0.55612 - acc: 0.7072 -- iter: 19008/20250


Training Step: 2200  | total loss: [1m[32m0.54981[0m[0m | time: 74.415s
[2K| Adam | epoch: 007 | loss: 0.54981 - acc: 0.7099 -- iter: 19072/20250


Training Step: 2201  | total loss: [1m[32m0.55318[0m[0m | time: 74.659s
[2K| Adam | epoch: 007 | loss: 0.55318 - acc: 0.7077 -- iter: 19136/20250


Training Step: 2202  | total loss: [1m[32m0.55493[0m[0m | time: 74.901s
[2K| Adam | epoch: 007 | loss: 0.55493 - acc: 0.7057 -- iter: 19200/20250


Training Step: 2203  | total loss: [1m[32m0.55273[0m[0m | time: 75.147s
[2K| Adam | epoch: 007 | loss: 0.55273 - acc: 0.7070 -- iter: 19264/20250


Training Step: 2204  | total loss: [1m[32m0.54646[0m[0m | time: 75.410s
[2K| Adam | epoch: 007 | loss: 0.54646 - acc: 0.7222 -- iter: 19328/20250


Training Step: 2205  | total loss: [1m[32m0.54838[0m[0m | time: 75.689s
[2K| Adam | epoch: 007 | loss: 0.54838 - acc: 0.7234 -- iter: 19392/20250


Training Step: 2206  | total loss: [1m[32m0.54709[0m[0m | time: 75.936s
[2K| Adam | epoch: 007 | loss: 0.54709 - acc: 0.7261 -- iter: 19456/20250


Training Step: 2207  | total loss: [1m[32m0.53730[0m[0m | time: 76.178s
[2K| Adam | epoch: 007 | loss: 0.53730 - acc: 0.7316 -- iter: 19520/20250


Training Step: 2208  | total loss: [1m[32m0.54365[0m[0m | time: 76.419s
[2K| Adam | epoch: 007 | loss: 0.54365 - acc: 0.7194 -- iter: 19584/20250


Training Step: 2209  | total loss: [1m[32m0.54834[0m[0m | time: 76.677s
[2K| Adam | epoch: 007 | loss: 0.54834 - acc: 0.7146 -- iter: 19648/20250


Training Step: 2210  | total loss: [1m[32m0.54928[0m[0m | time: 76.920s
[2K| Adam | epoch: 007 | loss: 0.54928 - acc: 0.7166 -- iter: 19712/20250


Training Step: 2211  | total loss: [1m[32m0.54697[0m[0m | time: 77.170s
[2K| Adam | epoch: 007 | loss: 0.54697 - acc: 0.7184 -- iter: 19776/20250


Training Step: 2212  | total loss: [1m[32m0.54115[0m[0m | time: 77.419s
[2K| Adam | epoch: 007 | loss: 0.54115 - acc: 0.7231 -- iter: 19840/20250


Training Step: 2213  | total loss: [1m[32m0.54739[0m[0m | time: 77.670s
[2K| Adam | epoch: 007 | loss: 0.54739 - acc: 0.7180 -- iter: 19904/20250


Training Step: 2214  | total loss: [1m[32m0.54649[0m[0m | time: 77.939s
[2K| Adam | epoch: 007 | loss: 0.54649 - acc: 0.7243 -- iter: 19968/20250


Training Step: 2215  | total loss: [1m[32m0.54278[0m[0m | time: 78.216s
[2K| Adam | epoch: 007 | loss: 0.54278 - acc: 0.7253 -- iter: 20032/20250


Training Step: 2216  | total loss: [1m[32m0.54141[0m[0m | time: 78.470s
[2K| Adam | epoch: 007 | loss: 0.54141 - acc: 0.7293 -- iter: 20096/20250


Training Step: 2217  | total loss: [1m[32m0.54205[0m[0m | time: 78.714s
[2K| Adam | epoch: 007 | loss: 0.54205 - acc: 0.7267 -- iter: 20160/20250


Training Step: 2218  | total loss: [1m[32m0.53986[0m[0m | time: 78.960s
[2K| Adam | epoch: 007 | loss: 0.53986 - acc: 0.7322 -- iter: 20224/20250


Training Step: 2219  | total loss: [1m[32m0.53467[0m[0m | time: 80.953s
[2K| Adam | epoch: 007 | loss: 0.53467 - acc: 0.7386 | val_loss: 0.67390 - val_acc: 0.6867 -- iter: 20250/20250
--


Training Step: 2220  | total loss: [1m[32m0.53188[0m[0m | time: 0.247s
[2K| Adam | epoch: 008 | loss: 0.53188 - acc: 0.7382 -- iter: 00064/20250


Training Step: 2221  | total loss: [1m[32m0.52298[0m[0m | time: 0.494s
[2K| Adam | epoch: 008 | loss: 0.52298 - acc: 0.7456 -- iter: 00128/20250


Training Step: 2222  | total loss: [1m[32m0.52143[0m[0m | time: 0.757s
[2K| Adam | epoch: 008 | loss: 0.52143 - acc: 0.7461 -- iter: 00192/20250


Training Step: 2223  | total loss: [1m[32m0.51285[0m[0m | time: 1.007s
[2K| Adam | epoch: 008 | loss: 0.51285 - acc: 0.7543 -- iter: 00256/20250


Training Step: 2224  | total loss: [1m[32m0.51593[0m[0m | time: 1.267s
[2K| Adam | epoch: 008 | loss: 0.51593 - acc: 0.7492 -- iter: 00320/20250


Training Step: 2225  | total loss: [1m[32m0.51277[0m[0m | time: 1.513s
[2K| Adam | epoch: 008 | loss: 0.51277 - acc: 0.7555 -- iter: 00384/20250


Training Step: 2226  | total loss: [1m[32m0.52195[0m[0m | time: 1.775s
[2K| Adam | epoch: 008 | loss: 0.52195 - acc: 0.7453 -- iter: 00448/20250


Training Step: 2227  | total loss: [1m[32m0.52048[0m[0m | time: 2.024s
[2K| Adam | epoch: 008 | loss: 0.52048 - acc: 0.7477 -- iter: 00512/20250


Training Step: 2228  | total loss: [1m[32m0.53793[0m[0m | time: 2.283s
[2K| Adam | epoch: 008 | loss: 0.53793 - acc: 0.7292 -- iter: 00576/20250


Training Step: 2229  | total loss: [1m[32m0.54146[0m[0m | time: 2.547s
[2K| Adam | epoch: 008 | loss: 0.54146 - acc: 0.7203 -- iter: 00640/20250


Training Step: 2230  | total loss: [1m[32m0.55270[0m[0m | time: 2.797s
[2K| Adam | epoch: 008 | loss: 0.55270 - acc: 0.7093 -- iter: 00704/20250


Training Step: 2231  | total loss: [1m[32m0.55346[0m[0m | time: 3.064s
[2K| Adam | epoch: 008 | loss: 0.55346 - acc: 0.7118 -- iter: 00768/20250


Training Step: 2232  | total loss: [1m[32m0.55343[0m[0m | time: 3.307s
[2K| Adam | epoch: 008 | loss: 0.55343 - acc: 0.7140 -- iter: 00832/20250


Training Step: 2233  | total loss: [1m[32m0.55911[0m[0m | time: 3.553s
[2K| Adam | epoch: 008 | loss: 0.55911 - acc: 0.7051 -- iter: 00896/20250


Training Step: 2234  | total loss: [1m[32m0.57974[0m[0m | time: 3.796s
[2K| Adam | epoch: 008 | loss: 0.57974 - acc: 0.6862 -- iter: 00960/20250


Training Step: 2235  | total loss: [1m[32m0.58491[0m[0m | time: 4.066s
[2K| Adam | epoch: 008 | loss: 0.58491 - acc: 0.6816 -- iter: 01024/20250


Training Step: 2236  | total loss: [1m[32m0.58584[0m[0m | time: 4.315s
[2K| Adam | epoch: 008 | loss: 0.58584 - acc: 0.6806 -- iter: 01088/20250


Training Step: 2237  | total loss: [1m[32m0.59247[0m[0m | time: 4.561s
[2K| Adam | epoch: 008 | loss: 0.59247 - acc: 0.6704 -- iter: 01152/20250


Training Step: 2238  | total loss: [1m[32m0.58655[0m[0m | time: 4.818s
[2K| Adam | epoch: 008 | loss: 0.58655 - acc: 0.6721 -- iter: 01216/20250


Training Step: 2239  | total loss: [1m[32m0.58636[0m[0m | time: 5.066s
[2K| Adam | epoch: 008 | loss: 0.58636 - acc: 0.6674 -- iter: 01280/20250


Training Step: 2240  | total loss: [1m[32m0.58342[0m[0m | time: 5.315s
[2K| Adam | epoch: 008 | loss: 0.58342 - acc: 0.6616 -- iter: 01344/20250


Training Step: 2241  | total loss: [1m[32m0.58907[0m[0m | time: 5.589s
[2K| Adam | epoch: 008 | loss: 0.58907 - acc: 0.6517 -- iter: 01408/20250


Training Step: 2242  | total loss: [1m[32m0.58481[0m[0m | time: 5.838s
[2K| Adam | epoch: 008 | loss: 0.58481 - acc: 0.6600 -- iter: 01472/20250


Training Step: 2243  | total loss: [1m[32m0.58275[0m[0m | time: 6.082s
[2K| Adam | epoch: 008 | loss: 0.58275 - acc: 0.6549 -- iter: 01536/20250


Training Step: 2244  | total loss: [1m[32m0.59271[0m[0m | time: 6.337s
[2K| Adam | epoch: 008 | loss: 0.59271 - acc: 0.6441 -- iter: 01600/20250


Training Step: 2245  | total loss: [1m[32m0.58898[0m[0m | time: 6.573s
[2K| Adam | epoch: 008 | loss: 0.58898 - acc: 0.6531 -- iter: 01664/20250


Training Step: 2246  | total loss: [1m[32m0.58973[0m[0m | time: 6.825s
[2K| Adam | epoch: 008 | loss: 0.58973 - acc: 0.6519 -- iter: 01728/20250


Training Step: 2247  | total loss: [1m[32m0.58675[0m[0m | time: 7.072s
[2K| Adam | epoch: 008 | loss: 0.58675 - acc: 0.6601 -- iter: 01792/20250


Training Step: 2248  | total loss: [1m[32m0.58920[0m[0m | time: 7.317s
[2K| Adam | epoch: 008 | loss: 0.58920 - acc: 0.6457 -- iter: 01856/20250


Training Step: 2249  | total loss: [1m[32m0.58733[0m[0m | time: 7.559s
[2K| Adam | epoch: 008 | loss: 0.58733 - acc: 0.6374 -- iter: 01920/20250


Training Step: 2250  | total loss: [1m[32m0.58846[0m[0m | time: 7.805s
[2K| Adam | epoch: 008 | loss: 0.58846 - acc: 0.6346 -- iter: 01984/20250


Training Step: 2251  | total loss: [1m[32m0.59014[0m[0m | time: 8.084s
[2K| Adam | epoch: 008 | loss: 0.59014 - acc: 0.6305 -- iter: 02048/20250


Training Step: 2252  | total loss: [1m[32m0.59153[0m[0m | time: 8.328s
[2K| Adam | epoch: 008 | loss: 0.59153 - acc: 0.6284 -- iter: 02112/20250


Training Step: 2253  | total loss: [1m[32m0.59731[0m[0m | time: 8.569s
[2K| Adam | epoch: 008 | loss: 0.59731 - acc: 0.6202 -- iter: 02176/20250


Training Step: 2254  | total loss: [1m[32m0.59341[0m[0m | time: 8.825s
[2K| Adam | epoch: 008 | loss: 0.59341 - acc: 0.6191 -- iter: 02240/20250


Training Step: 2255  | total loss: [1m[32m0.59098[0m[0m | time: 9.075s
[2K| Adam | epoch: 008 | loss: 0.59098 - acc: 0.6275 -- iter: 02304/20250


Training Step: 2256  | total loss: [1m[32m0.59115[0m[0m | time: 9.323s
[2K| Adam | epoch: 008 | loss: 0.59115 - acc: 0.6226 -- iter: 02368/20250


Training Step: 2257  | total loss: [1m[32m0.59016[0m[0m | time: 9.578s
[2K| Adam | epoch: 008 | loss: 0.59016 - acc: 0.6260 -- iter: 02432/20250


Training Step: 2258  | total loss: [1m[32m0.59191[0m[0m | time: 9.822s
[2K| Adam | epoch: 008 | loss: 0.59191 - acc: 0.6227 -- iter: 02496/20250


Training Step: 2259  | total loss: [1m[32m0.59433[0m[0m | time: 10.065s
[2K| Adam | epoch: 008 | loss: 0.59433 - acc: 0.6230 -- iter: 02560/20250


Training Step: 2260  | total loss: [1m[32m0.58862[0m[0m | time: 10.330s
[2K| Adam | epoch: 008 | loss: 0.58862 - acc: 0.6310 -- iter: 02624/20250


Training Step: 2261  | total loss: [1m[32m0.58759[0m[0m | time: 10.593s
[2K| Adam | epoch: 008 | loss: 0.58759 - acc: 0.6382 -- iter: 02688/20250


Training Step: 2262  | total loss: [1m[32m0.58674[0m[0m | time: 10.845s
[2K| Adam | epoch: 008 | loss: 0.58674 - acc: 0.6353 -- iter: 02752/20250


Training Step: 2263  | total loss: [1m[32m0.58577[0m[0m | time: 11.086s
[2K| Adam | epoch: 008 | loss: 0.58577 - acc: 0.6358 -- iter: 02816/20250


Training Step: 2264  | total loss: [1m[32m0.58432[0m[0m | time: 11.332s
[2K| Adam | epoch: 008 | loss: 0.58432 - acc: 0.6426 -- iter: 02880/20250


Training Step: 2265  | total loss: [1m[32m0.58300[0m[0m | time: 11.573s
[2K| Adam | epoch: 008 | loss: 0.58300 - acc: 0.6486 -- iter: 02944/20250


Training Step: 2266  | total loss: [1m[32m0.58274[0m[0m | time: 11.818s
[2K| Adam | epoch: 008 | loss: 0.58274 - acc: 0.6525 -- iter: 03008/20250


Training Step: 2267  | total loss: [1m[32m0.57774[0m[0m | time: 12.066s
[2K| Adam | epoch: 008 | loss: 0.57774 - acc: 0.6607 -- iter: 03072/20250


Training Step: 2268  | total loss: [1m[32m0.58155[0m[0m | time: 12.368s
[2K| Adam | epoch: 008 | loss: 0.58155 - acc: 0.6634 -- iter: 03136/20250


Training Step: 2269  | total loss: [1m[32m0.58121[0m[0m | time: 12.626s
[2K| Adam | epoch: 008 | loss: 0.58121 - acc: 0.6580 -- iter: 03200/20250


Training Step: 2270  | total loss: [1m[32m0.57613[0m[0m | time: 12.880s
[2K| Adam | epoch: 008 | loss: 0.57613 - acc: 0.6625 -- iter: 03264/20250


Training Step: 2271  | total loss: [1m[32m0.57283[0m[0m | time: 13.160s
[2K| Adam | epoch: 008 | loss: 0.57283 - acc: 0.6681 -- iter: 03328/20250


Training Step: 2272  | total loss: [1m[32m0.56987[0m[0m | time: 13.398s
[2K| Adam | epoch: 008 | loss: 0.56987 - acc: 0.6763 -- iter: 03392/20250


Training Step: 2273  | total loss: [1m[32m0.57827[0m[0m | time: 13.639s
[2K| Adam | epoch: 008 | loss: 0.57827 - acc: 0.6696 -- iter: 03456/20250


Training Step: 2274  | total loss: [1m[32m0.58085[0m[0m | time: 13.886s
[2K| Adam | epoch: 008 | loss: 0.58085 - acc: 0.6683 -- iter: 03520/20250


Training Step: 2275  | total loss: [1m[32m0.57382[0m[0m | time: 14.131s
[2K| Adam | epoch: 008 | loss: 0.57382 - acc: 0.6749 -- iter: 03584/20250


Training Step: 2276  | total loss: [1m[32m0.57957[0m[0m | time: 14.378s
[2K| Adam | epoch: 008 | loss: 0.57957 - acc: 0.6636 -- iter: 03648/20250


Training Step: 2277  | total loss: [1m[32m0.57600[0m[0m | time: 14.624s
[2K| Adam | epoch: 008 | loss: 0.57600 - acc: 0.6645 -- iter: 03712/20250


Training Step: 2278  | total loss: [1m[32m0.57964[0m[0m | time: 14.880s
[2K| Adam | epoch: 008 | loss: 0.57964 - acc: 0.6652 -- iter: 03776/20250


Training Step: 2279  | total loss: [1m[32m0.58003[0m[0m | time: 15.130s
[2K| Adam | epoch: 008 | loss: 0.58003 - acc: 0.6612 -- iter: 03840/20250


Training Step: 2280  | total loss: [1m[32m0.58370[0m[0m | time: 15.393s
[2K| Adam | epoch: 008 | loss: 0.58370 - acc: 0.6560 -- iter: 03904/20250


Training Step: 2281  | total loss: [1m[32m0.58769[0m[0m | time: 15.660s
[2K| Adam | epoch: 008 | loss: 0.58769 - acc: 0.6498 -- iter: 03968/20250


Training Step: 2282  | total loss: [1m[32m0.59272[0m[0m | time: 15.904s
[2K| Adam | epoch: 008 | loss: 0.59272 - acc: 0.6489 -- iter: 04032/20250


Training Step: 2283  | total loss: [1m[32m0.58522[0m[0m | time: 16.160s
[2K| Adam | epoch: 008 | loss: 0.58522 - acc: 0.6527 -- iter: 04096/20250


Training Step: 2284  | total loss: [1m[32m0.57992[0m[0m | time: 16.416s
[2K| Adam | epoch: 008 | loss: 0.57992 - acc: 0.6609 -- iter: 04160/20250


Training Step: 2285  | total loss: [1m[32m0.58115[0m[0m | time: 16.661s
[2K| Adam | epoch: 008 | loss: 0.58115 - acc: 0.6526 -- iter: 04224/20250


Training Step: 2286  | total loss: [1m[32m0.57156[0m[0m | time: 16.906s
[2K| Adam | epoch: 008 | loss: 0.57156 - acc: 0.6624 -- iter: 04288/20250


Training Step: 2287  | total loss: [1m[32m0.56783[0m[0m | time: 17.150s
[2K| Adam | epoch: 008 | loss: 0.56783 - acc: 0.6711 -- iter: 04352/20250


Training Step: 2288  | total loss: [1m[32m0.56494[0m[0m | time: 17.398s
[2K| Adam | epoch: 008 | loss: 0.56494 - acc: 0.6743 -- iter: 04416/20250


Training Step: 2289  | total loss: [1m[32m0.56509[0m[0m | time: 17.648s
[2K| Adam | epoch: 008 | loss: 0.56509 - acc: 0.6710 -- iter: 04480/20250


Training Step: 2290  | total loss: [1m[32m0.56784[0m[0m | time: 17.922s
[2K| Adam | epoch: 008 | loss: 0.56784 - acc: 0.6648 -- iter: 04544/20250


Training Step: 2291  | total loss: [1m[32m0.57106[0m[0m | time: 18.209s
[2K| Adam | epoch: 008 | loss: 0.57106 - acc: 0.6624 -- iter: 04608/20250


Training Step: 2292  | total loss: [1m[32m0.57588[0m[0m | time: 18.454s
[2K| Adam | epoch: 008 | loss: 0.57588 - acc: 0.6555 -- iter: 04672/20250


Training Step: 2293  | total loss: [1m[32m0.57376[0m[0m | time: 18.698s
[2K| Adam | epoch: 008 | loss: 0.57376 - acc: 0.6603 -- iter: 04736/20250


Training Step: 2294  | total loss: [1m[32m0.58624[0m[0m | time: 18.940s
[2K| Adam | epoch: 008 | loss: 0.58624 - acc: 0.6458 -- iter: 04800/20250


Training Step: 2295  | total loss: [1m[32m0.58603[0m[0m | time: 19.187s
[2K| Adam | epoch: 008 | loss: 0.58603 - acc: 0.6469 -- iter: 04864/20250


Training Step: 2296  | total loss: [1m[32m0.59137[0m[0m | time: 19.430s
[2K| Adam | epoch: 008 | loss: 0.59137 - acc: 0.6400 -- iter: 04928/20250


Training Step: 2297  | total loss: [1m[32m0.58707[0m[0m | time: 19.671s
[2K| Adam | epoch: 008 | loss: 0.58707 - acc: 0.6479 -- iter: 04992/20250


Training Step: 2298  | total loss: [1m[32m0.58213[0m[0m | time: 19.914s
[2K| Adam | epoch: 008 | loss: 0.58213 - acc: 0.6549 -- iter: 05056/20250


Training Step: 2299  | total loss: [1m[32m0.57882[0m[0m | time: 20.163s
[2K| Adam | epoch: 008 | loss: 0.57882 - acc: 0.6582 -- iter: 05120/20250


Training Step: 2300  | total loss: [1m[32m0.58290[0m[0m | time: 20.450s
[2K| Adam | epoch: 008 | loss: 0.58290 - acc: 0.6502 -- iter: 05184/20250


Training Step: 2301  | total loss: [1m[32m0.58206[0m[0m | time: 20.706s
[2K| Adam | epoch: 008 | loss: 0.58206 - acc: 0.6508 -- iter: 05248/20250


Training Step: 2302  | total loss: [1m[32m0.57988[0m[0m | time: 20.947s
[2K| Adam | epoch: 008 | loss: 0.57988 - acc: 0.6467 -- iter: 05312/20250


Training Step: 2303  | total loss: [1m[32m0.57798[0m[0m | time: 21.191s
[2K| Adam | epoch: 008 | loss: 0.57798 - acc: 0.6445 -- iter: 05376/20250


Training Step: 2304  | total loss: [1m[32m0.57565[0m[0m | time: 21.451s
[2K| Adam | epoch: 008 | loss: 0.57565 - acc: 0.6504 -- iter: 05440/20250


Training Step: 2305  | total loss: [1m[32m0.57340[0m[0m | time: 21.699s
[2K| Adam | epoch: 008 | loss: 0.57340 - acc: 0.6572 -- iter: 05504/20250


Training Step: 2306  | total loss: [1m[32m0.56945[0m[0m | time: 21.953s
[2K| Adam | epoch: 008 | loss: 0.56945 - acc: 0.6680 -- iter: 05568/20250


Training Step: 2307  | total loss: [1m[32m0.56955[0m[0m | time: 22.202s
[2K| Adam | epoch: 008 | loss: 0.56955 - acc: 0.6700 -- iter: 05632/20250


Training Step: 2308  | total loss: [1m[32m0.56884[0m[0m | time: 22.449s
[2K| Adam | epoch: 008 | loss: 0.56884 - acc: 0.6764 -- iter: 05696/20250


Training Step: 2309  | total loss: [1m[32m0.56710[0m[0m | time: 22.695s
[2K| Adam | epoch: 008 | loss: 0.56710 - acc: 0.6775 -- iter: 05760/20250


Training Step: 2310  | total loss: [1m[32m0.57179[0m[0m | time: 22.946s
[2K| Adam | epoch: 008 | loss: 0.57179 - acc: 0.6754 -- iter: 05824/20250


Training Step: 2311  | total loss: [1m[32m0.57325[0m[0m | time: 23.199s
[2K| Adam | epoch: 008 | loss: 0.57325 - acc: 0.6766 -- iter: 05888/20250


Training Step: 2312  | total loss: [1m[32m0.57926[0m[0m | time: 23.482s
[2K| Adam | epoch: 008 | loss: 0.57926 - acc: 0.6715 -- iter: 05952/20250


Training Step: 2313  | total loss: [1m[32m0.57707[0m[0m | time: 23.731s
[2K| Adam | epoch: 008 | loss: 0.57707 - acc: 0.6731 -- iter: 06016/20250


Training Step: 2314  | total loss: [1m[32m0.57383[0m[0m | time: 23.972s
[2K| Adam | epoch: 008 | loss: 0.57383 - acc: 0.6698 -- iter: 06080/20250


Training Step: 2315  | total loss: [1m[32m0.57437[0m[0m | time: 24.232s
[2K| Adam | epoch: 008 | loss: 0.57437 - acc: 0.6653 -- iter: 06144/20250


Training Step: 2316  | total loss: [1m[32m0.56205[0m[0m | time: 24.478s
[2K| Adam | epoch: 008 | loss: 0.56205 - acc: 0.6800 -- iter: 06208/20250


Training Step: 2317  | total loss: [1m[32m0.55977[0m[0m | time: 24.719s
[2K| Adam | epoch: 008 | loss: 0.55977 - acc: 0.6761 -- iter: 06272/20250


Training Step: 2318  | total loss: [1m[32m0.55524[0m[0m | time: 24.972s
[2K| Adam | epoch: 008 | loss: 0.55524 - acc: 0.6835 -- iter: 06336/20250


Training Step: 2319  | total loss: [1m[32m0.56200[0m[0m | time: 25.217s
[2K| Adam | epoch: 008 | loss: 0.56200 - acc: 0.6761 -- iter: 06400/20250


Training Step: 2320  | total loss: [1m[32m0.55819[0m[0m | time: 25.483s
[2K| Adam | epoch: 008 | loss: 0.55819 - acc: 0.6788 -- iter: 06464/20250


Training Step: 2321  | total loss: [1m[32m0.56061[0m[0m | time: 25.748s
[2K| Adam | epoch: 008 | loss: 0.56061 - acc: 0.6734 -- iter: 06528/20250


Training Step: 2322  | total loss: [1m[32m0.55931[0m[0m | time: 26.005s
[2K| Adam | epoch: 008 | loss: 0.55931 - acc: 0.6748 -- iter: 06592/20250


Training Step: 2323  | total loss: [1m[32m0.55107[0m[0m | time: 26.259s
[2K| Adam | epoch: 008 | loss: 0.55107 - acc: 0.6855 -- iter: 06656/20250


Training Step: 2324  | total loss: [1m[32m0.55215[0m[0m | time: 26.505s
[2K| Adam | epoch: 008 | loss: 0.55215 - acc: 0.6857 -- iter: 06720/20250


Training Step: 2325  | total loss: [1m[32m0.55785[0m[0m | time: 26.750s
[2K| Adam | epoch: 008 | loss: 0.55785 - acc: 0.6827 -- iter: 06784/20250


Training Step: 2326  | total loss: [1m[32m0.55884[0m[0m | time: 27.001s
[2K| Adam | epoch: 008 | loss: 0.55884 - acc: 0.6785 -- iter: 06848/20250


Training Step: 2327  | total loss: [1m[32m0.55218[0m[0m | time: 27.247s
[2K| Adam | epoch: 008 | loss: 0.55218 - acc: 0.6888 -- iter: 06912/20250


Training Step: 2328  | total loss: [1m[32m0.55367[0m[0m | time: 27.494s
[2K| Adam | epoch: 008 | loss: 0.55367 - acc: 0.6808 -- iter: 06976/20250


Training Step: 2329  | total loss: [1m[32m0.55121[0m[0m | time: 27.745s
[2K| Adam | epoch: 008 | loss: 0.55121 - acc: 0.6831 -- iter: 07040/20250


Training Step: 2330  | total loss: [1m[32m0.55718[0m[0m | time: 27.996s
[2K| Adam | epoch: 008 | loss: 0.55718 - acc: 0.6788 -- iter: 07104/20250


Training Step: 2331  | total loss: [1m[32m0.55510[0m[0m | time: 28.258s
[2K| Adam | epoch: 008 | loss: 0.55510 - acc: 0.6766 -- iter: 07168/20250


Training Step: 2332  | total loss: [1m[32m0.55618[0m[0m | time: 28.507s
[2K| Adam | epoch: 008 | loss: 0.55618 - acc: 0.6777 -- iter: 07232/20250


Training Step: 2333  | total loss: [1m[32m0.55834[0m[0m | time: 28.758s
[2K| Adam | epoch: 008 | loss: 0.55834 - acc: 0.6802 -- iter: 07296/20250


Training Step: 2334  | total loss: [1m[32m0.55207[0m[0m | time: 29.003s
[2K| Adam | epoch: 008 | loss: 0.55207 - acc: 0.6825 -- iter: 07360/20250


Training Step: 2335  | total loss: [1m[32m0.55020[0m[0m | time: 29.249s
[2K| Adam | epoch: 008 | loss: 0.55020 - acc: 0.6846 -- iter: 07424/20250


Training Step: 2336  | total loss: [1m[32m0.54921[0m[0m | time: 29.501s
[2K| Adam | epoch: 008 | loss: 0.54921 - acc: 0.6864 -- iter: 07488/20250


Training Step: 2337  | total loss: [1m[32m0.54735[0m[0m | time: 29.748s
[2K| Adam | epoch: 008 | loss: 0.54735 - acc: 0.6865 -- iter: 07552/20250


Training Step: 2338  | total loss: [1m[32m0.55130[0m[0m | time: 29.996s
[2K| Adam | epoch: 008 | loss: 0.55130 - acc: 0.6882 -- iter: 07616/20250


Training Step: 2339  | total loss: [1m[32m0.54787[0m[0m | time: 30.242s
[2K| Adam | epoch: 008 | loss: 0.54787 - acc: 0.6897 -- iter: 07680/20250


Training Step: 2340  | total loss: [1m[32m0.54657[0m[0m | time: 30.501s
[2K| Adam | epoch: 008 | loss: 0.54657 - acc: 0.6817 -- iter: 07744/20250


Training Step: 2341  | total loss: [1m[32m0.55028[0m[0m | time: 30.785s
[2K| Adam | epoch: 008 | loss: 0.55028 - acc: 0.6760 -- iter: 07808/20250


Training Step: 2342  | total loss: [1m[32m0.55249[0m[0m | time: 31.032s
[2K| Adam | epoch: 008 | loss: 0.55249 - acc: 0.6724 -- iter: 07872/20250


Training Step: 2343  | total loss: [1m[32m0.55698[0m[0m | time: 31.281s
[2K| Adam | epoch: 008 | loss: 0.55698 - acc: 0.6708 -- iter: 07936/20250


Training Step: 2344  | total loss: [1m[32m0.55765[0m[0m | time: 31.528s
[2K| Adam | epoch: 008 | loss: 0.55765 - acc: 0.6725 -- iter: 08000/20250


Training Step: 2345  | total loss: [1m[32m0.56120[0m[0m | time: 31.773s
[2K| Adam | epoch: 008 | loss: 0.56120 - acc: 0.6646 -- iter: 08064/20250


Training Step: 2346  | total loss: [1m[32m0.54875[0m[0m | time: 32.021s
[2K| Adam | epoch: 008 | loss: 0.54875 - acc: 0.6747 -- iter: 08128/20250


Training Step: 2347  | total loss: [1m[32m0.55426[0m[0m | time: 32.272s
[2K| Adam | epoch: 008 | loss: 0.55426 - acc: 0.6635 -- iter: 08192/20250


Training Step: 2348  | total loss: [1m[32m0.54962[0m[0m | time: 32.526s
[2K| Adam | epoch: 008 | loss: 0.54962 - acc: 0.6706 -- iter: 08256/20250


Training Step: 2349  | total loss: [1m[32m0.54661[0m[0m | time: 32.770s
[2K| Adam | epoch: 008 | loss: 0.54661 - acc: 0.6770 -- iter: 08320/20250


Training Step: 2350  | total loss: [1m[32m0.54262[0m[0m | time: 33.031s
[2K| Adam | epoch: 008 | loss: 0.54262 - acc: 0.6827 -- iter: 08384/20250


Training Step: 2351  | total loss: [1m[32m0.53720[0m[0m | time: 33.311s
[2K| Adam | epoch: 008 | loss: 0.53720 - acc: 0.6894 -- iter: 08448/20250


Training Step: 2352  | total loss: [1m[32m0.54366[0m[0m | time: 33.562s
[2K| Adam | epoch: 008 | loss: 0.54366 - acc: 0.6846 -- iter: 08512/20250


Training Step: 2353  | total loss: [1m[32m0.53890[0m[0m | time: 33.811s
[2K| Adam | epoch: 008 | loss: 0.53890 - acc: 0.6911 -- iter: 08576/20250


Training Step: 2354  | total loss: [1m[32m0.53812[0m[0m | time: 34.067s
[2K| Adam | epoch: 008 | loss: 0.53812 - acc: 0.6923 -- iter: 08640/20250


Training Step: 2355  | total loss: [1m[32m0.53888[0m[0m | time: 34.317s
[2K| Adam | epoch: 008 | loss: 0.53888 - acc: 0.6918 -- iter: 08704/20250


Training Step: 2356  | total loss: [1m[32m0.54155[0m[0m | time: 34.561s
[2K| Adam | epoch: 008 | loss: 0.54155 - acc: 0.6961 -- iter: 08768/20250


Training Step: 2357  | total loss: [1m[32m0.54380[0m[0m | time: 34.807s
[2K| Adam | epoch: 008 | loss: 0.54380 - acc: 0.6890 -- iter: 08832/20250


Training Step: 2358  | total loss: [1m[32m0.55156[0m[0m | time: 35.052s
[2K| Adam | epoch: 008 | loss: 0.55156 - acc: 0.6779 -- iter: 08896/20250


Training Step: 2359  | total loss: [1m[32m0.55342[0m[0m | time: 35.298s
[2K| Adam | epoch: 008 | loss: 0.55342 - acc: 0.6773 -- iter: 08960/20250


Training Step: 2360  | total loss: [1m[32m0.55007[0m[0m | time: 35.557s
[2K| Adam | epoch: 008 | loss: 0.55007 - acc: 0.6846 -- iter: 09024/20250


Training Step: 2361  | total loss: [1m[32m0.54259[0m[0m | time: 35.849s
[2K| Adam | epoch: 008 | loss: 0.54259 - acc: 0.6974 -- iter: 09088/20250


Training Step: 2362  | total loss: [1m[32m0.53925[0m[0m | time: 36.092s
[2K| Adam | epoch: 008 | loss: 0.53925 - acc: 0.7026 -- iter: 09152/20250


Training Step: 2363  | total loss: [1m[32m0.53791[0m[0m | time: 36.344s
[2K| Adam | epoch: 008 | loss: 0.53791 - acc: 0.7074 -- iter: 09216/20250


Training Step: 2364  | total loss: [1m[32m0.53511[0m[0m | time: 36.599s
[2K| Adam | epoch: 008 | loss: 0.53511 - acc: 0.7132 -- iter: 09280/20250


Training Step: 2365  | total loss: [1m[32m0.52685[0m[0m | time: 36.849s
[2K| Adam | epoch: 008 | loss: 0.52685 - acc: 0.7216 -- iter: 09344/20250


Training Step: 2366  | total loss: [1m[32m0.52567[0m[0m | time: 37.105s
[2K| Adam | epoch: 008 | loss: 0.52567 - acc: 0.7197 -- iter: 09408/20250


Training Step: 2367  | total loss: [1m[32m0.52868[0m[0m | time: 37.354s
[2K| Adam | epoch: 008 | loss: 0.52868 - acc: 0.7165 -- iter: 09472/20250


Training Step: 2368  | total loss: [1m[32m0.52880[0m[0m | time: 37.600s
[2K| Adam | epoch: 008 | loss: 0.52880 - acc: 0.7214 -- iter: 09536/20250


Training Step: 2369  | total loss: [1m[32m0.52098[0m[0m | time: 37.852s
[2K| Adam | epoch: 008 | loss: 0.52098 - acc: 0.7289 -- iter: 09600/20250


Training Step: 2370  | total loss: [1m[32m0.51149[0m[0m | time: 38.118s
[2K| Adam | epoch: 008 | loss: 0.51149 - acc: 0.7373 -- iter: 09664/20250


Training Step: 2371  | total loss: [1m[32m0.50611[0m[0m | time: 38.398s
[2K| Adam | epoch: 008 | loss: 0.50611 - acc: 0.7464 -- iter: 09728/20250


Training Step: 2372  | total loss: [1m[32m0.50156[0m[0m | time: 38.655s
[2K| Adam | epoch: 008 | loss: 0.50156 - acc: 0.7514 -- iter: 09792/20250


Training Step: 2373  | total loss: [1m[32m0.50134[0m[0m | time: 38.905s
[2K| Adam | epoch: 008 | loss: 0.50134 - acc: 0.7560 -- iter: 09856/20250


Training Step: 2374  | total loss: [1m[32m0.51172[0m[0m | time: 39.168s
[2K| Adam | epoch: 008 | loss: 0.51172 - acc: 0.7554 -- iter: 09920/20250


Training Step: 2375  | total loss: [1m[32m0.50977[0m[0m | time: 39.412s
[2K| Adam | epoch: 008 | loss: 0.50977 - acc: 0.7595 -- iter: 09984/20250


Training Step: 2376  | total loss: [1m[32m0.50510[0m[0m | time: 39.660s
[2K| Adam | epoch: 008 | loss: 0.50510 - acc: 0.7648 -- iter: 10048/20250


Training Step: 2377  | total loss: [1m[32m0.50031[0m[0m | time: 39.910s
[2K| Adam | epoch: 008 | loss: 0.50031 - acc: 0.7665 -- iter: 10112/20250


Training Step: 2378  | total loss: [1m[32m0.49781[0m[0m | time: 40.150s
[2K| Adam | epoch: 008 | loss: 0.49781 - acc: 0.7695 -- iter: 10176/20250


Training Step: 2379  | total loss: [1m[32m0.51247[0m[0m | time: 40.399s
[2K| Adam | epoch: 008 | loss: 0.51247 - acc: 0.7582 -- iter: 10240/20250


Training Step: 2380  | total loss: [1m[32m0.50724[0m[0m | time: 40.668s
[2K| Adam | epoch: 008 | loss: 0.50724 - acc: 0.7605 -- iter: 10304/20250


Training Step: 2381  | total loss: [1m[32m0.49504[0m[0m | time: 40.942s
[2K| Adam | epoch: 008 | loss: 0.49504 - acc: 0.7673 -- iter: 10368/20250


Training Step: 2382  | total loss: [1m[32m0.49747[0m[0m | time: 41.192s
[2K| Adam | epoch: 008 | loss: 0.49747 - acc: 0.7640 -- iter: 10432/20250


Training Step: 2383  | total loss: [1m[32m0.49456[0m[0m | time: 41.437s
[2K| Adam | epoch: 008 | loss: 0.49456 - acc: 0.7688 -- iter: 10496/20250


Training Step: 2384  | total loss: [1m[32m0.49989[0m[0m | time: 41.687s
[2K| Adam | epoch: 008 | loss: 0.49989 - acc: 0.7669 -- iter: 10560/20250


Training Step: 2385  | total loss: [1m[32m0.50584[0m[0m | time: 41.928s
[2K| Adam | epoch: 008 | loss: 0.50584 - acc: 0.7699 -- iter: 10624/20250


Training Step: 2386  | total loss: [1m[32m0.50238[0m[0m | time: 42.173s
[2K| Adam | epoch: 008 | loss: 0.50238 - acc: 0.7726 -- iter: 10688/20250


Training Step: 2387  | total loss: [1m[32m0.49872[0m[0m | time: 42.428s
[2K| Adam | epoch: 008 | loss: 0.49872 - acc: 0.7766 -- iter: 10752/20250


Training Step: 2388  | total loss: [1m[32m0.50958[0m[0m | time: 42.671s
[2K| Adam | epoch: 008 | loss: 0.50958 - acc: 0.7661 -- iter: 10816/20250


Training Step: 2389  | total loss: [1m[32m0.53364[0m[0m | time: 42.911s
[2K| Adam | epoch: 008 | loss: 0.53364 - acc: 0.7458 -- iter: 10880/20250


Training Step: 2390  | total loss: [1m[32m0.53988[0m[0m | time: 43.176s
[2K| Adam | epoch: 008 | loss: 0.53988 - acc: 0.7368 -- iter: 10944/20250


Training Step: 2391  | total loss: [1m[32m0.55565[0m[0m | time: 43.459s
[2K| Adam | epoch: 008 | loss: 0.55565 - acc: 0.7256 -- iter: 11008/20250


Training Step: 2392  | total loss: [1m[32m0.55438[0m[0m | time: 43.710s
[2K| Adam | epoch: 008 | loss: 0.55438 - acc: 0.7250 -- iter: 11072/20250


Training Step: 2393  | total loss: [1m[32m0.55358[0m[0m | time: 43.955s
[2K| Adam | epoch: 008 | loss: 0.55358 - acc: 0.7243 -- iter: 11136/20250


Training Step: 2394  | total loss: [1m[32m0.53987[0m[0m | time: 44.196s
[2K| Adam | epoch: 008 | loss: 0.53987 - acc: 0.7363 -- iter: 11200/20250


Training Step: 2395  | total loss: [1m[32m0.52881[0m[0m | time: 44.448s
[2K| Adam | epoch: 008 | loss: 0.52881 - acc: 0.7423 -- iter: 11264/20250


Training Step: 2396  | total loss: [1m[32m0.53187[0m[0m | time: 44.696s
[2K| Adam | epoch: 008 | loss: 0.53187 - acc: 0.7462 -- iter: 11328/20250


Training Step: 2397  | total loss: [1m[32m0.56218[0m[0m | time: 44.944s
[2K| Adam | epoch: 008 | loss: 0.56218 - acc: 0.7388 -- iter: 11392/20250


Training Step: 2398  | total loss: [1m[32m0.59089[0m[0m | time: 45.188s
[2K| Adam | epoch: 008 | loss: 0.59089 - acc: 0.7305 -- iter: 11456/20250


Training Step: 2399  | total loss: [1m[32m0.61432[0m[0m | time: 45.441s
[2K| Adam | epoch: 008 | loss: 0.61432 - acc: 0.7231 -- iter: 11520/20250


Training Step: 2400  | total loss: [1m[32m0.61586[0m[0m | time: 45.713s
[2K| Adam | epoch: 008 | loss: 0.61586 - acc: 0.7242 -- iter: 11584/20250


Training Step: 2401  | total loss: [1m[32m0.59283[0m[0m | time: 45.983s
[2K| Adam | epoch: 008 | loss: 0.59283 - acc: 0.7377 -- iter: 11648/20250


Training Step: 2402  | total loss: [1m[32m0.58273[0m[0m | time: 46.229s
[2K| Adam | epoch: 008 | loss: 0.58273 - acc: 0.7405 -- iter: 11712/20250


Training Step: 2403  | total loss: [1m[32m0.56723[0m[0m | time: 46.476s
[2K| Adam | epoch: 008 | loss: 0.56723 - acc: 0.7540 -- iter: 11776/20250


Training Step: 2404  | total loss: [1m[32m0.55549[0m[0m | time: 46.726s
[2K| Adam | epoch: 008 | loss: 0.55549 - acc: 0.7661 -- iter: 11840/20250


Training Step: 2405  | total loss: [1m[32m0.55229[0m[0m | time: 46.977s
[2K| Adam | epoch: 008 | loss: 0.55229 - acc: 0.7660 -- iter: 11904/20250


Training Step: 2406  | total loss: [1m[32m0.54640[0m[0m | time: 47.220s
[2K| Adam | epoch: 008 | loss: 0.54640 - acc: 0.7691 -- iter: 11968/20250


Training Step: 2407  | total loss: [1m[32m0.55178[0m[0m | time: 47.474s
[2K| Adam | epoch: 008 | loss: 0.55178 - acc: 0.7625 -- iter: 12032/20250


Training Step: 2408  | total loss: [1m[32m0.54226[0m[0m | time: 47.720s
[2K| Adam | epoch: 008 | loss: 0.54226 - acc: 0.7691 -- iter: 12096/20250


Training Step: 2409  | total loss: [1m[32m0.53602[0m[0m | time: 47.967s
[2K| Adam | epoch: 008 | loss: 0.53602 - acc: 0.7750 -- iter: 12160/20250


Training Step: 2410  | total loss: [1m[32m0.53108[0m[0m | time: 48.237s
[2K| Adam | epoch: 008 | loss: 0.53108 - acc: 0.7741 -- iter: 12224/20250


Training Step: 2411  | total loss: [1m[32m0.52234[0m[0m | time: 48.507s
[2K| Adam | epoch: 008 | loss: 0.52234 - acc: 0.7763 -- iter: 12288/20250


Training Step: 2412  | total loss: [1m[32m0.51098[0m[0m | time: 48.747s
[2K| Adam | epoch: 008 | loss: 0.51098 - acc: 0.7815 -- iter: 12352/20250


Training Step: 2413  | total loss: [1m[32m0.50685[0m[0m | time: 49.000s
[2K| Adam | epoch: 008 | loss: 0.50685 - acc: 0.7815 -- iter: 12416/20250


Training Step: 2414  | total loss: [1m[32m0.50517[0m[0m | time: 49.253s
[2K| Adam | epoch: 008 | loss: 0.50517 - acc: 0.7846 -- iter: 12480/20250


Training Step: 2415  | total loss: [1m[32m0.50792[0m[0m | time: 49.497s
[2K| Adam | epoch: 008 | loss: 0.50792 - acc: 0.7827 -- iter: 12544/20250


Training Step: 2416  | total loss: [1m[32m0.51353[0m[0m | time: 49.756s
[2K| Adam | epoch: 008 | loss: 0.51353 - acc: 0.7794 -- iter: 12608/20250


Training Step: 2417  | total loss: [1m[32m0.51577[0m[0m | time: 50.004s
[2K| Adam | epoch: 008 | loss: 0.51577 - acc: 0.7780 -- iter: 12672/20250


Training Step: 2418  | total loss: [1m[32m0.51795[0m[0m | time: 50.251s
[2K| Adam | epoch: 008 | loss: 0.51795 - acc: 0.7737 -- iter: 12736/20250


Training Step: 2419  | total loss: [1m[32m0.50843[0m[0m | time: 50.498s
[2K| Adam | epoch: 008 | loss: 0.50843 - acc: 0.7822 -- iter: 12800/20250


Training Step: 2420  | total loss: [1m[32m0.50474[0m[0m | time: 50.777s
[2K| Adam | epoch: 008 | loss: 0.50474 - acc: 0.7868 -- iter: 12864/20250


Training Step: 2421  | total loss: [1m[32m0.50249[0m[0m | time: 51.062s
[2K| Adam | epoch: 008 | loss: 0.50249 - acc: 0.7863 -- iter: 12928/20250


Training Step: 2422  | total loss: [1m[32m0.50596[0m[0m | time: 51.317s
[2K| Adam | epoch: 008 | loss: 0.50596 - acc: 0.7842 -- iter: 12992/20250


Training Step: 2423  | total loss: [1m[32m0.49432[0m[0m | time: 51.565s
[2K| Adam | epoch: 008 | loss: 0.49432 - acc: 0.7933 -- iter: 13056/20250


Training Step: 2424  | total loss: [1m[32m0.49043[0m[0m | time: 51.810s
[2K| Adam | epoch: 008 | loss: 0.49043 - acc: 0.7952 -- iter: 13120/20250


Training Step: 2425  | total loss: [1m[32m0.48580[0m[0m | time: 52.063s
[2K| Adam | epoch: 008 | loss: 0.48580 - acc: 0.7954 -- iter: 13184/20250


Training Step: 2426  | total loss: [1m[32m0.48763[0m[0m | time: 52.313s
[2K| Adam | epoch: 008 | loss: 0.48763 - acc: 0.7940 -- iter: 13248/20250


Training Step: 2427  | total loss: [1m[32m0.48162[0m[0m | time: 52.554s
[2K| Adam | epoch: 008 | loss: 0.48162 - acc: 0.7974 -- iter: 13312/20250


Training Step: 2428  | total loss: [1m[32m0.48379[0m[0m | time: 52.803s
[2K| Adam | epoch: 008 | loss: 0.48379 - acc: 0.7942 -- iter: 13376/20250


Training Step: 2429  | total loss: [1m[32m0.48390[0m[0m | time: 53.055s
[2K| Adam | epoch: 008 | loss: 0.48390 - acc: 0.7945 -- iter: 13440/20250


Training Step: 2430  | total loss: [1m[32m0.48300[0m[0m | time: 53.330s
[2K| Adam | epoch: 008 | loss: 0.48300 - acc: 0.7931 -- iter: 13504/20250


Training Step: 2431  | total loss: [1m[32m0.48544[0m[0m | time: 53.590s
[2K| Adam | epoch: 008 | loss: 0.48544 - acc: 0.7904 -- iter: 13568/20250


Training Step: 2432  | total loss: [1m[32m0.47756[0m[0m | time: 53.846s
[2K| Adam | epoch: 008 | loss: 0.47756 - acc: 0.7973 -- iter: 13632/20250


Training Step: 2433  | total loss: [1m[32m0.48109[0m[0m | time: 54.092s
[2K| Adam | epoch: 008 | loss: 0.48109 - acc: 0.7926 -- iter: 13696/20250


Training Step: 2434  | total loss: [1m[32m0.47265[0m[0m | time: 54.338s
[2K| Adam | epoch: 008 | loss: 0.47265 - acc: 0.7977 -- iter: 13760/20250


Training Step: 2435  | total loss: [1m[32m0.46972[0m[0m | time: 54.602s
[2K| Adam | epoch: 008 | loss: 0.46972 - acc: 0.7976 -- iter: 13824/20250


Training Step: 2436  | total loss: [1m[32m0.46678[0m[0m | time: 54.855s
[2K| Adam | epoch: 008 | loss: 0.46678 - acc: 0.7975 -- iter: 13888/20250


Training Step: 2437  | total loss: [1m[32m0.45849[0m[0m | time: 55.119s
[2K| Adam | epoch: 008 | loss: 0.45849 - acc: 0.8022 -- iter: 13952/20250


Training Step: 2438  | total loss: [1m[32m0.45501[0m[0m | time: 55.366s
[2K| Adam | epoch: 008 | loss: 0.45501 - acc: 0.8063 -- iter: 14016/20250


Training Step: 2439  | total loss: [1m[32m0.45413[0m[0m | time: 55.620s
[2K| Adam | epoch: 008 | loss: 0.45413 - acc: 0.8038 -- iter: 14080/20250


Training Step: 2440  | total loss: [1m[32m0.45495[0m[0m | time: 55.879s
[2K| Adam | epoch: 008 | loss: 0.45495 - acc: 0.8031 -- iter: 14144/20250


Training Step: 2441  | total loss: [1m[32m0.45126[0m[0m | time: 56.125s
[2K| Adam | epoch: 008 | loss: 0.45126 - acc: 0.8056 -- iter: 14208/20250


Training Step: 2442  | total loss: [1m[32m0.44401[0m[0m | time: 56.368s
[2K| Adam | epoch: 008 | loss: 0.44401 - acc: 0.8126 -- iter: 14272/20250


Training Step: 2443  | total loss: [1m[32m0.44782[0m[0m | time: 56.616s
[2K| Adam | epoch: 008 | loss: 0.44782 - acc: 0.8110 -- iter: 14336/20250


Training Step: 2444  | total loss: [1m[32m0.46073[0m[0m | time: 56.867s
[2K| Adam | epoch: 008 | loss: 0.46073 - acc: 0.8033 -- iter: 14400/20250


Training Step: 2445  | total loss: [1m[32m0.46068[0m[0m | time: 57.117s
[2K| Adam | epoch: 008 | loss: 0.46068 - acc: 0.8011 -- iter: 14464/20250


Training Step: 2446  | total loss: [1m[32m0.44947[0m[0m | time: 57.359s
[2K| Adam | epoch: 008 | loss: 0.44947 - acc: 0.8101 -- iter: 14528/20250


Training Step: 2447  | total loss: [1m[32m0.43773[0m[0m | time: 57.611s
[2K| Adam | epoch: 008 | loss: 0.43773 - acc: 0.8212 -- iter: 14592/20250


Training Step: 2448  | total loss: [1m[32m0.44391[0m[0m | time: 57.860s
[2K| Adam | epoch: 008 | loss: 0.44391 - acc: 0.8188 -- iter: 14656/20250


Training Step: 2449  | total loss: [1m[32m0.44962[0m[0m | time: 58.118s
[2K| Adam | epoch: 008 | loss: 0.44962 - acc: 0.8151 -- iter: 14720/20250


Training Step: 2450  | total loss: [1m[32m0.44921[0m[0m | time: 58.393s
[2K| Adam | epoch: 008 | loss: 0.44921 - acc: 0.8148 -- iter: 14784/20250


Training Step: 2451  | total loss: [1m[32m0.43918[0m[0m | time: 58.646s
[2K| Adam | epoch: 008 | loss: 0.43918 - acc: 0.8224 -- iter: 14848/20250


Training Step: 2452  | total loss: [1m[32m0.43522[0m[0m | time: 58.892s
[2K| Adam | epoch: 008 | loss: 0.43522 - acc: 0.8245 -- iter: 14912/20250


Training Step: 2453  | total loss: [1m[32m0.43024[0m[0m | time: 59.141s
[2K| Adam | epoch: 008 | loss: 0.43024 - acc: 0.8264 -- iter: 14976/20250


Training Step: 2454  | total loss: [1m[32m0.42377[0m[0m | time: 59.384s
[2K| Adam | epoch: 008 | loss: 0.42377 - acc: 0.8297 -- iter: 15040/20250


Training Step: 2455  | total loss: [1m[32m0.42810[0m[0m | time: 59.632s
[2K| Adam | epoch: 008 | loss: 0.42810 - acc: 0.8264 -- iter: 15104/20250


Training Step: 2456  | total loss: [1m[32m0.43391[0m[0m | time: 59.878s
[2K| Adam | epoch: 008 | loss: 0.43391 - acc: 0.8219 -- iter: 15168/20250


Training Step: 2457  | total loss: [1m[32m0.43558[0m[0m | time: 60.140s
[2K| Adam | epoch: 008 | loss: 0.43558 - acc: 0.8194 -- iter: 15232/20250


Training Step: 2458  | total loss: [1m[32m0.42819[0m[0m | time: 60.399s
[2K| Adam | epoch: 008 | loss: 0.42819 - acc: 0.8219 -- iter: 15296/20250


Training Step: 2459  | total loss: [1m[32m0.42525[0m[0m | time: 60.650s
[2K| Adam | epoch: 008 | loss: 0.42525 - acc: 0.8225 -- iter: 15360/20250


Training Step: 2460  | total loss: [1m[32m0.42434[0m[0m | time: 60.928s
[2K| Adam | epoch: 008 | loss: 0.42434 - acc: 0.8262 -- iter: 15424/20250


Training Step: 2461  | total loss: [1m[32m0.42820[0m[0m | time: 61.173s
[2K| Adam | epoch: 008 | loss: 0.42820 - acc: 0.8217 -- iter: 15488/20250


Training Step: 2462  | total loss: [1m[32m0.43507[0m[0m | time: 61.422s
[2K| Adam | epoch: 008 | loss: 0.43507 - acc: 0.8161 -- iter: 15552/20250


Training Step: 2463  | total loss: [1m[32m0.45622[0m[0m | time: 61.670s
[2K| Adam | epoch: 008 | loss: 0.45622 - acc: 0.8032 -- iter: 15616/20250


Training Step: 2464  | total loss: [1m[32m0.46996[0m[0m | time: 61.911s
[2K| Adam | epoch: 008 | loss: 0.46996 - acc: 0.7932 -- iter: 15680/20250


Training Step: 2465  | total loss: [1m[32m0.47005[0m[0m | time: 62.156s
[2K| Adam | epoch: 008 | loss: 0.47005 - acc: 0.7951 -- iter: 15744/20250


Training Step: 2466  | total loss: [1m[32m0.47503[0m[0m | time: 62.408s
[2K| Adam | epoch: 008 | loss: 0.47503 - acc: 0.7906 -- iter: 15808/20250


Training Step: 2467  | total loss: [1m[32m0.48295[0m[0m | time: 62.654s
[2K| Adam | epoch: 008 | loss: 0.48295 - acc: 0.7866 -- iter: 15872/20250


Training Step: 2468  | total loss: [1m[32m0.47983[0m[0m | time: 62.897s
[2K| Adam | epoch: 008 | loss: 0.47983 - acc: 0.7892 -- iter: 15936/20250


Training Step: 2469  | total loss: [1m[32m0.48316[0m[0m | time: 63.156s
[2K| Adam | epoch: 008 | loss: 0.48316 - acc: 0.7884 -- iter: 16000/20250


Training Step: 2470  | total loss: [1m[32m0.49588[0m[0m | time: 63.443s
[2K| Adam | epoch: 008 | loss: 0.49588 - acc: 0.7767 -- iter: 16064/20250


Training Step: 2471  | total loss: [1m[32m0.48663[0m[0m | time: 63.681s
[2K| Adam | epoch: 008 | loss: 0.48663 - acc: 0.7834 -- iter: 16128/20250


Training Step: 2472  | total loss: [1m[32m0.48136[0m[0m | time: 63.933s
[2K| Adam | epoch: 008 | loss: 0.48136 - acc: 0.7895 -- iter: 16192/20250


Training Step: 2473  | total loss: [1m[32m0.47326[0m[0m | time: 64.181s
[2K| Adam | epoch: 008 | loss: 0.47326 - acc: 0.7949 -- iter: 16256/20250


Training Step: 2474  | total loss: [1m[32m0.48362[0m[0m | time: 64.425s
[2K| Adam | epoch: 008 | loss: 0.48362 - acc: 0.7904 -- iter: 16320/20250


Training Step: 2475  | total loss: [1m[32m0.48312[0m[0m | time: 64.676s
[2K| Adam | epoch: 008 | loss: 0.48312 - acc: 0.7942 -- iter: 16384/20250


Training Step: 2476  | total loss: [1m[32m0.48234[0m[0m | time: 64.922s
[2K| Adam | epoch: 008 | loss: 0.48234 - acc: 0.7960 -- iter: 16448/20250


Training Step: 2477  | total loss: [1m[32m0.48723[0m[0m | time: 65.172s
[2K| Adam | epoch: 008 | loss: 0.48723 - acc: 0.7914 -- iter: 16512/20250


Training Step: 2478  | total loss: [1m[32m0.48872[0m[0m | time: 65.417s
[2K| Adam | epoch: 008 | loss: 0.48872 - acc: 0.7873 -- iter: 16576/20250


Training Step: 2479  | total loss: [1m[32m0.48780[0m[0m | time: 65.678s
[2K| Adam | epoch: 008 | loss: 0.48780 - acc: 0.7882 -- iter: 16640/20250


Training Step: 2480  | total loss: [1m[32m0.50012[0m[0m | time: 65.957s
[2K| Adam | epoch: 008 | loss: 0.50012 - acc: 0.7735 -- iter: 16704/20250


Training Step: 2481  | total loss: [1m[32m0.51414[0m[0m | time: 66.201s
[2K| Adam | epoch: 008 | loss: 0.51414 - acc: 0.7602 -- iter: 16768/20250


Training Step: 2482  | total loss: [1m[32m0.50319[0m[0m | time: 66.445s
[2K| Adam | epoch: 008 | loss: 0.50319 - acc: 0.7685 -- iter: 16832/20250


Training Step: 2483  | total loss: [1m[32m0.50456[0m[0m | time: 66.693s
[2K| Adam | epoch: 008 | loss: 0.50456 - acc: 0.7667 -- iter: 16896/20250


Training Step: 2484  | total loss: [1m[32m0.50749[0m[0m | time: 66.942s
[2K| Adam | epoch: 008 | loss: 0.50749 - acc: 0.7588 -- iter: 16960/20250


Training Step: 2485  | total loss: [1m[32m0.51831[0m[0m | time: 67.180s
[2K| Adam | epoch: 008 | loss: 0.51831 - acc: 0.7516 -- iter: 17024/20250


Training Step: 2486  | total loss: [1m[32m0.53603[0m[0m | time: 67.436s
[2K| Adam | epoch: 008 | loss: 0.53603 - acc: 0.7421 -- iter: 17088/20250


Training Step: 2487  | total loss: [1m[32m0.54019[0m[0m | time: 67.682s
[2K| Adam | epoch: 008 | loss: 0.54019 - acc: 0.7351 -- iter: 17152/20250


Training Step: 2488  | total loss: [1m[32m0.54468[0m[0m | time: 67.930s
[2K| Adam | epoch: 008 | loss: 0.54468 - acc: 0.7319 -- iter: 17216/20250


Training Step: 2489  | total loss: [1m[32m0.55315[0m[0m | time: 68.194s
[2K| Adam | epoch: 008 | loss: 0.55315 - acc: 0.7243 -- iter: 17280/20250


Training Step: 2490  | total loss: [1m[32m0.54897[0m[0m | time: 68.445s
[2K| Adam | epoch: 008 | loss: 0.54897 - acc: 0.7284 -- iter: 17344/20250


Training Step: 2491  | total loss: [1m[32m0.55134[0m[0m | time: 68.723s
[2K| Adam | epoch: 008 | loss: 0.55134 - acc: 0.7212 -- iter: 17408/20250


Training Step: 2492  | total loss: [1m[32m0.55489[0m[0m | time: 68.961s
[2K| Adam | epoch: 008 | loss: 0.55489 - acc: 0.7116 -- iter: 17472/20250


Training Step: 2493  | total loss: [1m[32m0.56502[0m[0m | time: 69.203s
[2K| Adam | epoch: 008 | loss: 0.56502 - acc: 0.7029 -- iter: 17536/20250


Training Step: 2494  | total loss: [1m[32m0.55749[0m[0m | time: 69.454s
[2K| Adam | epoch: 008 | loss: 0.55749 - acc: 0.7061 -- iter: 17600/20250


Training Step: 2495  | total loss: [1m[32m0.55935[0m[0m | time: 69.698s
[2K| Adam | epoch: 008 | loss: 0.55935 - acc: 0.7058 -- iter: 17664/20250


Training Step: 2496  | total loss: [1m[32m0.56424[0m[0m | time: 69.958s
[2K| Adam | epoch: 008 | loss: 0.56424 - acc: 0.6977 -- iter: 17728/20250


Training Step: 2497  | total loss: [1m[32m0.56138[0m[0m | time: 70.206s
[2K| Adam | epoch: 008 | loss: 0.56138 - acc: 0.6936 -- iter: 17792/20250


Training Step: 2498  | total loss: [1m[32m0.54402[0m[0m | time: 70.454s
[2K| Adam | epoch: 008 | loss: 0.54402 - acc: 0.7117 -- iter: 17856/20250


Training Step: 2499  | total loss: [1m[32m0.54698[0m[0m | time: 70.714s
[2K| Adam | epoch: 008 | loss: 0.54698 - acc: 0.7077 -- iter: 17920/20250


Training Step: 2500  | total loss: [1m[32m0.55130[0m[0m | time: 70.978s
[2K| Adam | epoch: 008 | loss: 0.55130 - acc: 0.7057 -- iter: 17984/20250


Training Step: 2501  | total loss: [1m[32m0.53338[0m[0m | time: 71.252s
[2K| Adam | epoch: 008 | loss: 0.53338 - acc: 0.7211 -- iter: 18048/20250


Training Step: 2502  | total loss: [1m[32m0.52699[0m[0m | time: 71.503s
[2K| Adam | epoch: 008 | loss: 0.52699 - acc: 0.7208 -- iter: 18112/20250


Training Step: 2503  | total loss: [1m[32m0.52537[0m[0m | time: 71.765s
[2K| Adam | epoch: 008 | loss: 0.52537 - acc: 0.7253 -- iter: 18176/20250


Training Step: 2504  | total loss: [1m[32m0.53295[0m[0m | time: 72.013s
[2K| Adam | epoch: 008 | loss: 0.53295 - acc: 0.7247 -- iter: 18240/20250


Training Step: 2505  | total loss: [1m[32m0.54262[0m[0m | time: 72.262s
[2K| Adam | epoch: 008 | loss: 0.54262 - acc: 0.7131 -- iter: 18304/20250


Training Step: 2506  | total loss: [1m[32m0.55315[0m[0m | time: 72.522s
[2K| Adam | epoch: 008 | loss: 0.55315 - acc: 0.7074 -- iter: 18368/20250


Training Step: 2507  | total loss: [1m[32m0.55954[0m[0m | time: 72.768s
[2K| Adam | epoch: 008 | loss: 0.55954 - acc: 0.7008 -- iter: 18432/20250


Training Step: 2508  | total loss: [1m[32m0.55974[0m[0m | time: 73.019s
[2K| Adam | epoch: 008 | loss: 0.55974 - acc: 0.7072 -- iter: 18496/20250


Training Step: 2509  | total loss: [1m[32m0.54987[0m[0m | time: 73.283s
[2K| Adam | epoch: 008 | loss: 0.54987 - acc: 0.7178 -- iter: 18560/20250


Training Step: 2510  | total loss: [1m[32m0.54871[0m[0m | time: 73.563s
[2K| Adam | epoch: 008 | loss: 0.54871 - acc: 0.7210 -- iter: 18624/20250


Training Step: 2511  | total loss: [1m[32m0.53470[0m[0m | time: 73.812s
[2K| Adam | epoch: 008 | loss: 0.53470 - acc: 0.7317 -- iter: 18688/20250


Training Step: 2512  | total loss: [1m[32m0.53948[0m[0m | time: 74.060s
[2K| Adam | epoch: 008 | loss: 0.53948 - acc: 0.7226 -- iter: 18752/20250


Training Step: 2513  | total loss: [1m[32m0.52654[0m[0m | time: 74.305s
[2K| Adam | epoch: 008 | loss: 0.52654 - acc: 0.7363 -- iter: 18816/20250


Training Step: 2514  | total loss: [1m[32m0.51749[0m[0m | time: 74.554s
[2K| Adam | epoch: 008 | loss: 0.51749 - acc: 0.7455 -- iter: 18880/20250


Training Step: 2515  | total loss: [1m[32m0.51447[0m[0m | time: 74.799s
[2K| Adam | epoch: 008 | loss: 0.51447 - acc: 0.7428 -- iter: 18944/20250


Training Step: 2516  | total loss: [1m[32m0.51758[0m[0m | time: 75.056s
[2K| Adam | epoch: 008 | loss: 0.51758 - acc: 0.7466 -- iter: 19008/20250


Training Step: 2517  | total loss: [1m[32m0.53295[0m[0m | time: 75.306s
[2K| Adam | epoch: 008 | loss: 0.53295 - acc: 0.7438 -- iter: 19072/20250


Training Step: 2518  | total loss: [1m[32m0.54147[0m[0m | time: 75.556s
[2K| Adam | epoch: 008 | loss: 0.54147 - acc: 0.7413 -- iter: 19136/20250


Training Step: 2519  | total loss: [1m[32m0.54125[0m[0m | time: 75.830s
[2K| Adam | epoch: 008 | loss: 0.54125 - acc: 0.7391 -- iter: 19200/20250


Training Step: 2520  | total loss: [1m[32m0.53758[0m[0m | time: 76.112s
[2K| Adam | epoch: 008 | loss: 0.53758 - acc: 0.7417 -- iter: 19264/20250


Training Step: 2521  | total loss: [1m[32m0.53816[0m[0m | time: 76.370s
[2K| Adam | epoch: 008 | loss: 0.53816 - acc: 0.7472 -- iter: 19328/20250


Training Step: 2522  | total loss: [1m[32m0.53471[0m[0m | time: 76.613s
[2K| Adam | epoch: 008 | loss: 0.53471 - acc: 0.7506 -- iter: 19392/20250


Training Step: 2523  | total loss: [1m[32m0.52308[0m[0m | time: 76.865s
[2K| Adam | epoch: 008 | loss: 0.52308 - acc: 0.7568 -- iter: 19456/20250


Training Step: 2524  | total loss: [1m[32m0.52412[0m[0m | time: 77.106s
[2K| Adam | epoch: 008 | loss: 0.52412 - acc: 0.7562 -- iter: 19520/20250


Training Step: 2525  | total loss: [1m[32m0.52487[0m[0m | time: 77.348s
[2K| Adam | epoch: 008 | loss: 0.52487 - acc: 0.7555 -- iter: 19584/20250


Training Step: 2526  | total loss: [1m[32m0.51949[0m[0m | time: 77.592s
[2K| Adam | epoch: 008 | loss: 0.51949 - acc: 0.7612 -- iter: 19648/20250


Training Step: 2527  | total loss: [1m[32m0.51676[0m[0m | time: 77.839s
[2K| Adam | epoch: 008 | loss: 0.51676 - acc: 0.7601 -- iter: 19712/20250


Training Step: 2528  | total loss: [1m[32m0.51776[0m[0m | time: 78.088s
[2K| Adam | epoch: 008 | loss: 0.51776 - acc: 0.7591 -- iter: 19776/20250


Training Step: 2529  | total loss: [1m[32m0.51357[0m[0m | time: 78.361s
[2K| Adam | epoch: 008 | loss: 0.51357 - acc: 0.7613 -- iter: 19840/20250


Training Step: 2530  | total loss: [1m[32m0.50914[0m[0m | time: 78.641s
[2K| Adam | epoch: 008 | loss: 0.50914 - acc: 0.7680 -- iter: 19904/20250


Training Step: 2531  | total loss: [1m[32m0.51215[0m[0m | time: 78.889s
[2K| Adam | epoch: 008 | loss: 0.51215 - acc: 0.7662 -- iter: 19968/20250


Training Step: 2532  | total loss: [1m[32m0.50764[0m[0m | time: 79.132s
[2K| Adam | epoch: 008 | loss: 0.50764 - acc: 0.7693 -- iter: 20032/20250


Training Step: 2533  | total loss: [1m[32m0.50454[0m[0m | time: 79.374s
[2K| Adam | epoch: 008 | loss: 0.50454 - acc: 0.7689 -- iter: 20096/20250


Training Step: 2534  | total loss: [1m[32m0.50143[0m[0m | time: 79.620s
[2K| Adam | epoch: 008 | loss: 0.50143 - acc: 0.7733 -- iter: 20160/20250


Training Step: 2535  | total loss: [1m[32m0.49785[0m[0m | time: 79.873s
[2K| Adam | epoch: 008 | loss: 0.49785 - acc: 0.7803 -- iter: 20224/20250


Training Step: 2536  | total loss: [1m[32m0.48978[0m[0m | time: 81.816s
[2K| Adam | epoch: 008 | loss: 0.48978 - acc: 0.7835 | val_loss: 0.71999 - val_acc: 0.6333 -- iter: 20250/20250
--


Training Step: 2537  | total loss: [1m[32m0.49048[0m[0m | time: 0.238s
[2K| Adam | epoch: 009 | loss: 0.49048 - acc: 0.7849 -- iter: 00064/20250


Training Step: 2538  | total loss: [1m[32m0.49947[0m[0m | time: 0.482s
[2K| Adam | epoch: 009 | loss: 0.49947 - acc: 0.7704 -- iter: 00128/20250


Training Step: 2539  | total loss: [1m[32m0.49910[0m[0m | time: 0.726s
[2K| Adam | epoch: 009 | loss: 0.49910 - acc: 0.7684 -- iter: 00192/20250


Training Step: 2540  | total loss: [1m[32m0.49601[0m[0m | time: 0.973s
[2K| Adam | epoch: 009 | loss: 0.49601 - acc: 0.7650 -- iter: 00256/20250


Training Step: 2541  | total loss: [1m[32m0.49831[0m[0m | time: 1.215s
[2K| Adam | epoch: 009 | loss: 0.49831 - acc: 0.7604 -- iter: 00320/20250


Training Step: 2542  | total loss: [1m[32m0.49864[0m[0m | time: 1.463s
[2K| Adam | epoch: 009 | loss: 0.49864 - acc: 0.7562 -- iter: 00384/20250


Training Step: 2543  | total loss: [1m[32m0.49144[0m[0m | time: 1.714s
[2K| Adam | epoch: 009 | loss: 0.49144 - acc: 0.7618 -- iter: 00448/20250


Training Step: 2544  | total loss: [1m[32m0.49526[0m[0m | time: 1.976s
[2K| Adam | epoch: 009 | loss: 0.49526 - acc: 0.7587 -- iter: 00512/20250


Training Step: 2545  | total loss: [1m[32m0.49486[0m[0m | time: 2.230s
[2K| Adam | epoch: 009 | loss: 0.49486 - acc: 0.7598 -- iter: 00576/20250


Training Step: 2546  | total loss: [1m[32m0.49356[0m[0m | time: 2.486s
[2K| Adam | epoch: 009 | loss: 0.49356 - acc: 0.7619 -- iter: 00640/20250


Training Step: 2547  | total loss: [1m[32m0.49587[0m[0m | time: 2.725s
[2K| Adam | epoch: 009 | loss: 0.49587 - acc: 0.7607 -- iter: 00704/20250


Training Step: 2548  | total loss: [1m[32m0.49926[0m[0m | time: 2.973s
[2K| Adam | epoch: 009 | loss: 0.49926 - acc: 0.7581 -- iter: 00768/20250


Training Step: 2549  | total loss: [1m[32m0.51110[0m[0m | time: 3.217s
[2K| Adam | epoch: 009 | loss: 0.51110 - acc: 0.7510 -- iter: 00832/20250


Training Step: 2550  | total loss: [1m[32m0.50111[0m[0m | time: 3.474s
[2K| Adam | epoch: 009 | loss: 0.50111 - acc: 0.7541 -- iter: 00896/20250


Training Step: 2551  | total loss: [1m[32m0.50377[0m[0m | time: 3.723s
[2K| Adam | epoch: 009 | loss: 0.50377 - acc: 0.7443 -- iter: 00960/20250


Training Step: 2552  | total loss: [1m[32m0.50307[0m[0m | time: 3.974s
[2K| Adam | epoch: 009 | loss: 0.50307 - acc: 0.7511 -- iter: 01024/20250


Training Step: 2553  | total loss: [1m[32m0.49563[0m[0m | time: 4.234s
[2K| Adam | epoch: 009 | loss: 0.49563 - acc: 0.7463 -- iter: 01088/20250


Training Step: 2554  | total loss: [1m[32m0.49352[0m[0m | time: 4.526s
[2K| Adam | epoch: 009 | loss: 0.49352 - acc: 0.7451 -- iter: 01152/20250


Training Step: 2555  | total loss: [1m[32m0.48782[0m[0m | time: 4.771s
[2K| Adam | epoch: 009 | loss: 0.48782 - acc: 0.7503 -- iter: 01216/20250


Training Step: 2556  | total loss: [1m[32m0.49026[0m[0m | time: 5.021s
[2K| Adam | epoch: 009 | loss: 0.49026 - acc: 0.7440 -- iter: 01280/20250


Training Step: 2557  | total loss: [1m[32m0.47895[0m[0m | time: 5.267s
[2K| Adam | epoch: 009 | loss: 0.47895 - acc: 0.7555 -- iter: 01344/20250


Training Step: 2558  | total loss: [1m[32m0.48245[0m[0m | time: 5.506s
[2K| Adam | epoch: 009 | loss: 0.48245 - acc: 0.7503 -- iter: 01408/20250


Training Step: 2559  | total loss: [1m[32m0.47116[0m[0m | time: 5.757s
[2K| Adam | epoch: 009 | loss: 0.47116 - acc: 0.7659 -- iter: 01472/20250


Training Step: 2560  | total loss: [1m[32m0.47349[0m[0m | time: 6.003s
[2K| Adam | epoch: 009 | loss: 0.47349 - acc: 0.7627 -- iter: 01536/20250


Training Step: 2561  | total loss: [1m[32m0.47918[0m[0m | time: 6.248s
[2K| Adam | epoch: 009 | loss: 0.47918 - acc: 0.7583 -- iter: 01600/20250


Training Step: 2562  | total loss: [1m[32m0.48399[0m[0m | time: 6.496s
[2K| Adam | epoch: 009 | loss: 0.48399 - acc: 0.7544 -- iter: 01664/20250


Training Step: 2563  | total loss: [1m[32m0.49501[0m[0m | time: 6.779s
[2K| Adam | epoch: 009 | loss: 0.49501 - acc: 0.7493 -- iter: 01728/20250


Training Step: 2564  | total loss: [1m[32m0.48914[0m[0m | time: 7.053s
[2K| Adam | epoch: 009 | loss: 0.48914 - acc: 0.7509 -- iter: 01792/20250


Training Step: 2565  | total loss: [1m[32m0.48649[0m[0m | time: 7.295s
[2K| Adam | epoch: 009 | loss: 0.48649 - acc: 0.7524 -- iter: 01856/20250


Training Step: 2566  | total loss: [1m[32m0.49549[0m[0m | time: 7.538s
[2K| Adam | epoch: 009 | loss: 0.49549 - acc: 0.7474 -- iter: 01920/20250


Training Step: 2567  | total loss: [1m[32m0.48892[0m[0m | time: 7.779s
[2K| Adam | epoch: 009 | loss: 0.48892 - acc: 0.7524 -- iter: 01984/20250


Training Step: 2568  | total loss: [1m[32m0.49347[0m[0m | time: 8.023s
[2K| Adam | epoch: 009 | loss: 0.49347 - acc: 0.7475 -- iter: 02048/20250


Training Step: 2569  | total loss: [1m[32m0.48793[0m[0m | time: 8.271s
[2K| Adam | epoch: 009 | loss: 0.48793 - acc: 0.7508 -- iter: 02112/20250


Training Step: 2570  | total loss: [1m[32m0.48651[0m[0m | time: 8.520s
[2K| Adam | epoch: 009 | loss: 0.48651 - acc: 0.7570 -- iter: 02176/20250


Training Step: 2571  | total loss: [1m[32m0.49072[0m[0m | time: 8.773s
[2K| Adam | epoch: 009 | loss: 0.49072 - acc: 0.7563 -- iter: 02240/20250


Training Step: 2572  | total loss: [1m[32m0.49143[0m[0m | time: 9.030s
[2K| Adam | epoch: 009 | loss: 0.49143 - acc: 0.7557 -- iter: 02304/20250


Training Step: 2573  | total loss: [1m[32m0.48232[0m[0m | time: 9.304s
[2K| Adam | epoch: 009 | loss: 0.48232 - acc: 0.7660 -- iter: 02368/20250


Training Step: 2574  | total loss: [1m[32m0.47906[0m[0m | time: 9.592s
[2K| Adam | epoch: 009 | loss: 0.47906 - acc: 0.7676 -- iter: 02432/20250


Training Step: 2575  | total loss: [1m[32m0.48143[0m[0m | time: 9.845s
[2K| Adam | epoch: 009 | loss: 0.48143 - acc: 0.7705 -- iter: 02496/20250


Training Step: 2576  | total loss: [1m[32m0.47045[0m[0m | time: 10.085s
[2K| Adam | epoch: 009 | loss: 0.47045 - acc: 0.7809 -- iter: 02560/20250


Training Step: 2577  | total loss: [1m[32m0.46426[0m[0m | time: 10.329s
[2K| Adam | epoch: 009 | loss: 0.46426 - acc: 0.7872 -- iter: 02624/20250


Training Step: 2578  | total loss: [1m[32m0.45313[0m[0m | time: 10.576s
[2K| Adam | epoch: 009 | loss: 0.45313 - acc: 0.7976 -- iter: 02688/20250


Training Step: 2579  | total loss: [1m[32m0.44827[0m[0m | time: 10.827s
[2K| Adam | epoch: 009 | loss: 0.44827 - acc: 0.8022 -- iter: 02752/20250


Training Step: 2580  | total loss: [1m[32m0.43996[0m[0m | time: 11.078s
[2K| Adam | epoch: 009 | loss: 0.43996 - acc: 0.8126 -- iter: 02816/20250


Training Step: 2581  | total loss: [1m[32m0.43399[0m[0m | time: 11.326s
[2K| Adam | epoch: 009 | loss: 0.43399 - acc: 0.8157 -- iter: 02880/20250


Training Step: 2582  | total loss: [1m[32m0.43276[0m[0m | time: 11.578s
[2K| Adam | epoch: 009 | loss: 0.43276 - acc: 0.8138 -- iter: 02944/20250


Training Step: 2583  | total loss: [1m[32m0.44474[0m[0m | time: 11.867s
[2K| Adam | epoch: 009 | loss: 0.44474 - acc: 0.8074 -- iter: 03008/20250


Training Step: 2584  | total loss: [1m[32m0.43885[0m[0m | time: 12.145s
[2K| Adam | epoch: 009 | loss: 0.43885 - acc: 0.8095 -- iter: 03072/20250


Training Step: 2585  | total loss: [1m[32m0.42750[0m[0m | time: 12.395s
[2K| Adam | epoch: 009 | loss: 0.42750 - acc: 0.8145 -- iter: 03136/20250


Training Step: 2586  | total loss: [1m[32m0.42026[0m[0m | time: 12.639s
[2K| Adam | epoch: 009 | loss: 0.42026 - acc: 0.8190 -- iter: 03200/20250


Training Step: 2587  | total loss: [1m[32m0.42540[0m[0m | time: 12.887s
[2K| Adam | epoch: 009 | loss: 0.42540 - acc: 0.8183 -- iter: 03264/20250


Training Step: 2588  | total loss: [1m[32m0.42414[0m[0m | time: 13.133s
[2K| Adam | epoch: 009 | loss: 0.42414 - acc: 0.8193 -- iter: 03328/20250


Training Step: 2589  | total loss: [1m[32m0.42838[0m[0m | time: 13.381s
[2K| Adam | epoch: 009 | loss: 0.42838 - acc: 0.8155 -- iter: 03392/20250


Training Step: 2590  | total loss: [1m[32m0.42449[0m[0m | time: 13.633s
[2K| Adam | epoch: 009 | loss: 0.42449 - acc: 0.8168 -- iter: 03456/20250


Training Step: 2591  | total loss: [1m[32m0.43804[0m[0m | time: 13.879s
[2K| Adam | epoch: 009 | loss: 0.43804 - acc: 0.8085 -- iter: 03520/20250


Training Step: 2592  | total loss: [1m[32m0.45372[0m[0m | time: 14.121s
[2K| Adam | epoch: 009 | loss: 0.45372 - acc: 0.7996 -- iter: 03584/20250


Training Step: 2593  | total loss: [1m[32m0.44714[0m[0m | time: 14.380s
[2K| Adam | epoch: 009 | loss: 0.44714 - acc: 0.8071 -- iter: 03648/20250


Training Step: 2594  | total loss: [1m[32m0.45064[0m[0m | time: 14.660s
[2K| Adam | epoch: 009 | loss: 0.45064 - acc: 0.8030 -- iter: 03712/20250


Training Step: 2595  | total loss: [1m[32m0.44213[0m[0m | time: 14.912s
[2K| Adam | epoch: 009 | loss: 0.44213 - acc: 0.8117 -- iter: 03776/20250


Training Step: 2596  | total loss: [1m[32m0.45868[0m[0m | time: 15.159s
[2K| Adam | epoch: 009 | loss: 0.45868 - acc: 0.7977 -- iter: 03840/20250


Training Step: 2597  | total loss: [1m[32m0.46957[0m[0m | time: 15.402s
[2K| Adam | epoch: 009 | loss: 0.46957 - acc: 0.7898 -- iter: 03904/20250


Training Step: 2598  | total loss: [1m[32m0.47357[0m[0m | time: 15.643s
[2K| Adam | epoch: 009 | loss: 0.47357 - acc: 0.7843 -- iter: 03968/20250


Training Step: 2599  | total loss: [1m[32m0.46909[0m[0m | time: 15.891s
[2K| Adam | epoch: 009 | loss: 0.46909 - acc: 0.7902 -- iter: 04032/20250


Training Step: 2600  | total loss: [1m[32m0.47206[0m[0m | time: 16.139s
[2K| Adam | epoch: 009 | loss: 0.47206 - acc: 0.7893 -- iter: 04096/20250


Training Step: 2601  | total loss: [1m[32m0.46538[0m[0m | time: 16.387s
[2K| Adam | epoch: 009 | loss: 0.46538 - acc: 0.7963 -- iter: 04160/20250


Training Step: 2602  | total loss: [1m[32m0.45759[0m[0m | time: 16.638s
[2K| Adam | epoch: 009 | loss: 0.45759 - acc: 0.8058 -- iter: 04224/20250


Training Step: 2603  | total loss: [1m[32m0.45988[0m[0m | time: 16.879s
[2K| Adam | epoch: 009 | loss: 0.45988 - acc: 0.8018 -- iter: 04288/20250


Training Step: 2604  | total loss: [1m[32m0.45857[0m[0m | time: 17.149s
[2K| Adam | epoch: 009 | loss: 0.45857 - acc: 0.8013 -- iter: 04352/20250


Training Step: 2605  | total loss: [1m[32m0.45065[0m[0m | time: 17.393s
[2K| Adam | epoch: 009 | loss: 0.45065 - acc: 0.8086 -- iter: 04416/20250


Training Step: 2606  | total loss: [1m[32m0.45118[0m[0m | time: 17.644s
[2K| Adam | epoch: 009 | loss: 0.45118 - acc: 0.8059 -- iter: 04480/20250


Training Step: 2607  | total loss: [1m[32m0.43190[0m[0m | time: 17.904s
[2K| Adam | epoch: 009 | loss: 0.43190 - acc: 0.8175 -- iter: 04544/20250


Training Step: 2608  | total loss: [1m[32m0.42604[0m[0m | time: 18.150s
[2K| Adam | epoch: 009 | loss: 0.42604 - acc: 0.8217 -- iter: 04608/20250


Training Step: 2609  | total loss: [1m[32m0.41002[0m[0m | time: 18.400s
[2K| Adam | epoch: 009 | loss: 0.41002 - acc: 0.8317 -- iter: 04672/20250


Training Step: 2610  | total loss: [1m[32m0.40493[0m[0m | time: 18.660s
[2K| Adam | epoch: 009 | loss: 0.40493 - acc: 0.8360 -- iter: 04736/20250


Training Step: 2611  | total loss: [1m[32m0.41814[0m[0m | time: 18.907s
[2K| Adam | epoch: 009 | loss: 0.41814 - acc: 0.8259 -- iter: 04800/20250


Training Step: 2612  | total loss: [1m[32m0.46924[0m[0m | time: 19.152s
[2K| Adam | epoch: 009 | loss: 0.46924 - acc: 0.7948 -- iter: 04864/20250


Training Step: 2613  | total loss: [1m[32m0.51154[0m[0m | time: 19.420s
[2K| Adam | epoch: 009 | loss: 0.51154 - acc: 0.7747 -- iter: 04928/20250


Training Step: 2614  | total loss: [1m[32m0.54061[0m[0m | time: 19.708s
[2K| Adam | epoch: 009 | loss: 0.54061 - acc: 0.7613 -- iter: 04992/20250


Training Step: 2615  | total loss: [1m[32m0.58446[0m[0m | time: 19.961s
[2K| Adam | epoch: 009 | loss: 0.58446 - acc: 0.7336 -- iter: 05056/20250


Training Step: 2616  | total loss: [1m[32m0.61552[0m[0m | time: 20.211s
[2K| Adam | epoch: 009 | loss: 0.61552 - acc: 0.7134 -- iter: 05120/20250


Training Step: 2617  | total loss: [1m[32m0.64071[0m[0m | time: 20.462s
[2K| Adam | epoch: 009 | loss: 0.64071 - acc: 0.6967 -- iter: 05184/20250


Training Step: 2618  | total loss: [1m[32m0.65483[0m[0m | time: 20.707s
[2K| Adam | epoch: 009 | loss: 0.65483 - acc: 0.6864 -- iter: 05248/20250


Training Step: 2619  | total loss: [1m[32m0.66379[0m[0m | time: 20.963s
[2K| Adam | epoch: 009 | loss: 0.66379 - acc: 0.6756 -- iter: 05312/20250


Training Step: 2620  | total loss: [1m[32m0.66856[0m[0m | time: 21.207s
[2K| Adam | epoch: 009 | loss: 0.66856 - acc: 0.6659 -- iter: 05376/20250


Training Step: 2621  | total loss: [1m[32m0.66670[0m[0m | time: 21.458s
[2K| Adam | epoch: 009 | loss: 0.66670 - acc: 0.6618 -- iter: 05440/20250


Training Step: 2622  | total loss: [1m[32m0.66951[0m[0m | time: 21.731s
[2K| Adam | epoch: 009 | loss: 0.66951 - acc: 0.6503 -- iter: 05504/20250


Training Step: 2623  | total loss: [1m[32m0.66644[0m[0m | time: 21.983s
[2K| Adam | epoch: 009 | loss: 0.66644 - acc: 0.6446 -- iter: 05568/20250


Training Step: 2624  | total loss: [1m[32m0.66408[0m[0m | time: 22.281s
[2K| Adam | epoch: 009 | loss: 0.66408 - acc: 0.6380 -- iter: 05632/20250


Training Step: 2625  | total loss: [1m[32m0.66061[0m[0m | time: 22.540s
[2K| Adam | epoch: 009 | loss: 0.66061 - acc: 0.6320 -- iter: 05696/20250


Training Step: 2626  | total loss: [1m[32m0.65588[0m[0m | time: 22.792s
[2K| Adam | epoch: 009 | loss: 0.65588 - acc: 0.6266 -- iter: 05760/20250


Training Step: 2627  | total loss: [1m[32m0.65009[0m[0m | time: 23.048s
[2K| Adam | epoch: 009 | loss: 0.65009 - acc: 0.6280 -- iter: 05824/20250


Training Step: 2628  | total loss: [1m[32m0.64627[0m[0m | time: 23.303s
[2K| Adam | epoch: 009 | loss: 0.64627 - acc: 0.6230 -- iter: 05888/20250


Training Step: 2629  | total loss: [1m[32m0.64007[0m[0m | time: 23.568s
[2K| Adam | epoch: 009 | loss: 0.64007 - acc: 0.6263 -- iter: 05952/20250


Training Step: 2630  | total loss: [1m[32m0.63834[0m[0m | time: 23.845s
[2K| Adam | epoch: 009 | loss: 0.63834 - acc: 0.6325 -- iter: 06016/20250


Training Step: 2631  | total loss: [1m[32m0.63700[0m[0m | time: 24.086s
[2K| Adam | epoch: 009 | loss: 0.63700 - acc: 0.6302 -- iter: 06080/20250


Training Step: 2632  | total loss: [1m[32m0.63584[0m[0m | time: 24.342s
[2K| Adam | epoch: 009 | loss: 0.63584 - acc: 0.6328 -- iter: 06144/20250


Training Step: 2633  | total loss: [1m[32m0.63134[0m[0m | time: 24.591s
[2K| Adam | epoch: 009 | loss: 0.63134 - acc: 0.6367 -- iter: 06208/20250


Training Step: 2634  | total loss: [1m[32m0.63516[0m[0m | time: 24.840s
[2K| Adam | epoch: 009 | loss: 0.63516 - acc: 0.6246 -- iter: 06272/20250


Training Step: 2635  | total loss: [1m[32m0.63238[0m[0m | time: 25.092s
[2K| Adam | epoch: 009 | loss: 0.63238 - acc: 0.6199 -- iter: 06336/20250


Training Step: 2636  | total loss: [1m[32m0.62554[0m[0m | time: 25.338s
[2K| Adam | epoch: 009 | loss: 0.62554 - acc: 0.6204 -- iter: 06400/20250


Training Step: 2637  | total loss: [1m[32m0.62420[0m[0m | time: 25.588s
[2K| Adam | epoch: 009 | loss: 0.62420 - acc: 0.6271 -- iter: 06464/20250


Training Step: 2638  | total loss: [1m[32m0.62444[0m[0m | time: 25.827s
[2K| Adam | epoch: 009 | loss: 0.62444 - acc: 0.6207 -- iter: 06528/20250


Training Step: 2639  | total loss: [1m[32m0.62339[0m[0m | time: 26.078s
[2K| Adam | epoch: 009 | loss: 0.62339 - acc: 0.6180 -- iter: 06592/20250


Training Step: 2640  | total loss: [1m[32m0.61949[0m[0m | time: 26.355s
[2K| Adam | epoch: 009 | loss: 0.61949 - acc: 0.6281 -- iter: 06656/20250


Training Step: 2641  | total loss: [1m[32m0.61608[0m[0m | time: 26.636s
[2K| Adam | epoch: 009 | loss: 0.61608 - acc: 0.6215 -- iter: 06720/20250


Training Step: 2642  | total loss: [1m[32m0.61541[0m[0m | time: 26.880s
[2K| Adam | epoch: 009 | loss: 0.61541 - acc: 0.6172 -- iter: 06784/20250


Training Step: 2643  | total loss: [1m[32m0.61921[0m[0m | time: 27.117s
[2K| Adam | epoch: 009 | loss: 0.61921 - acc: 0.6133 -- iter: 06848/20250


Training Step: 2644  | total loss: [1m[32m0.61748[0m[0m | time: 27.360s
[2K| Adam | epoch: 009 | loss: 0.61748 - acc: 0.6222 -- iter: 06912/20250


Training Step: 2645  | total loss: [1m[32m0.61176[0m[0m | time: 27.612s
[2K| Adam | epoch: 009 | loss: 0.61176 - acc: 0.6272 -- iter: 06976/20250


Training Step: 2646  | total loss: [1m[32m0.60763[0m[0m | time: 27.855s
[2K| Adam | epoch: 009 | loss: 0.60763 - acc: 0.6301 -- iter: 07040/20250


Training Step: 2647  | total loss: [1m[32m0.61324[0m[0m | time: 28.100s
[2K| Adam | epoch: 009 | loss: 0.61324 - acc: 0.6171 -- iter: 07104/20250


Training Step: 2648  | total loss: [1m[32m0.61161[0m[0m | time: 28.347s
[2K| Adam | epoch: 009 | loss: 0.61161 - acc: 0.6195 -- iter: 07168/20250


Training Step: 2649  | total loss: [1m[32m0.60684[0m[0m | time: 28.600s
[2K| Adam | epoch: 009 | loss: 0.60684 - acc: 0.6169 -- iter: 07232/20250


Training Step: 2650  | total loss: [1m[32m0.60724[0m[0m | time: 28.862s
[2K| Adam | epoch: 009 | loss: 0.60724 - acc: 0.6130 -- iter: 07296/20250


Training Step: 2651  | total loss: [1m[32m0.60876[0m[0m | time: 29.115s
[2K| Adam | epoch: 009 | loss: 0.60876 - acc: 0.6095 -- iter: 07360/20250


Training Step: 2652  | total loss: [1m[32m0.61374[0m[0m | time: 29.369s
[2K| Adam | epoch: 009 | loss: 0.61374 - acc: 0.6126 -- iter: 07424/20250


Training Step: 2653  | total loss: [1m[32m0.61428[0m[0m | time: 29.622s
[2K| Adam | epoch: 009 | loss: 0.61428 - acc: 0.6076 -- iter: 07488/20250


Training Step: 2654  | total loss: [1m[32m0.60822[0m[0m | time: 29.870s
[2K| Adam | epoch: 009 | loss: 0.60822 - acc: 0.6078 -- iter: 07552/20250


Training Step: 2655  | total loss: [1m[32m0.60651[0m[0m | time: 30.128s
[2K| Adam | epoch: 009 | loss: 0.60651 - acc: 0.6126 -- iter: 07616/20250


Training Step: 2656  | total loss: [1m[32m0.60038[0m[0m | time: 30.386s
[2K| Adam | epoch: 009 | loss: 0.60038 - acc: 0.6186 -- iter: 07680/20250


Training Step: 2657  | total loss: [1m[32m0.61105[0m[0m | time: 30.630s
[2K| Adam | epoch: 009 | loss: 0.61105 - acc: 0.6098 -- iter: 07744/20250


Training Step: 2658  | total loss: [1m[32m0.60094[0m[0m | time: 30.877s
[2K| Adam | epoch: 009 | loss: 0.60094 - acc: 0.6223 -- iter: 07808/20250


Training Step: 2659  | total loss: [1m[32m0.59827[0m[0m | time: 31.148s
[2K| Adam | epoch: 009 | loss: 0.59827 - acc: 0.6226 -- iter: 07872/20250


Training Step: 2660  | total loss: [1m[32m0.60913[0m[0m | time: 31.411s
[2K| Adam | epoch: 009 | loss: 0.60913 - acc: 0.6212 -- iter: 07936/20250


Training Step: 2661  | total loss: [1m[32m0.61190[0m[0m | time: 31.686s
[2K| Adam | epoch: 009 | loss: 0.61190 - acc: 0.6185 -- iter: 08000/20250


Training Step: 2662  | total loss: [1m[32m0.60803[0m[0m | time: 31.935s
[2K| Adam | epoch: 009 | loss: 0.60803 - acc: 0.6098 -- iter: 08064/20250


Training Step: 2663  | total loss: [1m[32m0.61364[0m[0m | time: 32.180s
[2K| Adam | epoch: 009 | loss: 0.61364 - acc: 0.6050 -- iter: 08128/20250


Training Step: 2664  | total loss: [1m[32m0.61139[0m[0m | time: 32.422s
[2K| Adam | epoch: 009 | loss: 0.61139 - acc: 0.6148 -- iter: 08192/20250


Training Step: 2665  | total loss: [1m[32m0.60956[0m[0m | time: 32.668s
[2K| Adam | epoch: 009 | loss: 0.60956 - acc: 0.6174 -- iter: 08256/20250


Training Step: 2666  | total loss: [1m[32m0.61662[0m[0m | time: 32.918s
[2K| Adam | epoch: 009 | loss: 0.61662 - acc: 0.6088 -- iter: 08320/20250


Training Step: 2667  | total loss: [1m[32m0.61429[0m[0m | time: 33.159s
[2K| Adam | epoch: 009 | loss: 0.61429 - acc: 0.6104 -- iter: 08384/20250


Training Step: 2668  | total loss: [1m[32m0.60896[0m[0m | time: 33.414s
[2K| Adam | epoch: 009 | loss: 0.60896 - acc: 0.6088 -- iter: 08448/20250


Training Step: 2669  | total loss: [1m[32m0.60855[0m[0m | time: 33.670s
[2K| Adam | epoch: 009 | loss: 0.60855 - acc: 0.6026 -- iter: 08512/20250


Training Step: 2670  | total loss: [1m[32m0.60491[0m[0m | time: 33.929s
[2K| Adam | epoch: 009 | loss: 0.60491 - acc: 0.6079 -- iter: 08576/20250


Training Step: 2671  | total loss: [1m[32m0.60391[0m[0m | time: 34.212s
[2K| Adam | epoch: 009 | loss: 0.60391 - acc: 0.6159 -- iter: 08640/20250


Training Step: 2672  | total loss: [1m[32m0.60231[0m[0m | time: 34.464s
[2K| Adam | epoch: 009 | loss: 0.60231 - acc: 0.6215 -- iter: 08704/20250


Training Step: 2673  | total loss: [1m[32m0.60300[0m[0m | time: 34.718s
[2K| Adam | epoch: 009 | loss: 0.60300 - acc: 0.6187 -- iter: 08768/20250


Training Step: 2674  | total loss: [1m[32m0.60899[0m[0m | time: 34.973s
[2K| Adam | epoch: 009 | loss: 0.60899 - acc: 0.6068 -- iter: 08832/20250


Training Step: 2675  | total loss: [1m[32m0.61105[0m[0m | time: 35.225s
[2K| Adam | epoch: 009 | loss: 0.61105 - acc: 0.5977 -- iter: 08896/20250


Training Step: 2676  | total loss: [1m[32m0.60367[0m[0m | time: 35.478s
[2K| Adam | epoch: 009 | loss: 0.60367 - acc: 0.6083 -- iter: 08960/20250


Training Step: 2677  | total loss: [1m[32m0.59875[0m[0m | time: 35.733s
[2K| Adam | epoch: 009 | loss: 0.59875 - acc: 0.6084 -- iter: 09024/20250


Training Step: 2678  | total loss: [1m[32m0.59890[0m[0m | time: 35.982s
[2K| Adam | epoch: 009 | loss: 0.59890 - acc: 0.6116 -- iter: 09088/20250


Training Step: 2679  | total loss: [1m[32m0.59134[0m[0m | time: 36.231s
[2K| Adam | epoch: 009 | loss: 0.59134 - acc: 0.6270 -- iter: 09152/20250


Training Step: 2680  | total loss: [1m[32m0.59787[0m[0m | time: 36.481s
[2K| Adam | epoch: 009 | loss: 0.59787 - acc: 0.6221 -- iter: 09216/20250


Training Step: 2681  | total loss: [1m[32m0.60274[0m[0m | time: 36.766s
[2K| Adam | epoch: 009 | loss: 0.60274 - acc: 0.6193 -- iter: 09280/20250


Training Step: 2682  | total loss: [1m[32m0.60528[0m[0m | time: 37.017s
[2K| Adam | epoch: 009 | loss: 0.60528 - acc: 0.6245 -- iter: 09344/20250


Training Step: 2683  | total loss: [1m[32m0.60616[0m[0m | time: 37.266s
[2K| Adam | epoch: 009 | loss: 0.60616 - acc: 0.6199 -- iter: 09408/20250


Training Step: 2684  | total loss: [1m[32m0.60511[0m[0m | time: 37.516s
[2K| Adam | epoch: 009 | loss: 0.60511 - acc: 0.6188 -- iter: 09472/20250


Training Step: 2685  | total loss: [1m[32m0.60543[0m[0m | time: 37.770s
[2K| Adam | epoch: 009 | loss: 0.60543 - acc: 0.6148 -- iter: 09536/20250


Training Step: 2686  | total loss: [1m[32m0.60357[0m[0m | time: 38.021s
[2K| Adam | epoch: 009 | loss: 0.60357 - acc: 0.6142 -- iter: 09600/20250


Training Step: 2687  | total loss: [1m[32m0.60341[0m[0m | time: 38.267s
[2K| Adam | epoch: 009 | loss: 0.60341 - acc: 0.6106 -- iter: 09664/20250


Training Step: 2688  | total loss: [1m[32m0.60845[0m[0m | time: 38.514s
[2K| Adam | epoch: 009 | loss: 0.60845 - acc: 0.6121 -- iter: 09728/20250


Training Step: 2689  | total loss: [1m[32m0.60900[0m[0m | time: 38.781s
[2K| Adam | epoch: 009 | loss: 0.60900 - acc: 0.6071 -- iter: 09792/20250


Training Step: 2690  | total loss: [1m[32m0.60338[0m[0m | time: 39.078s
[2K| Adam | epoch: 009 | loss: 0.60338 - acc: 0.6245 -- iter: 09856/20250


Training Step: 2691  | total loss: [1m[32m0.59947[0m[0m | time: 39.349s
[2K| Adam | epoch: 009 | loss: 0.59947 - acc: 0.6261 -- iter: 09920/20250


Training Step: 2692  | total loss: [1m[32m0.59747[0m[0m | time: 39.601s
[2K| Adam | epoch: 009 | loss: 0.59747 - acc: 0.6276 -- iter: 09984/20250


Training Step: 2693  | total loss: [1m[32m0.59677[0m[0m | time: 39.846s
[2K| Adam | epoch: 009 | loss: 0.59677 - acc: 0.6304 -- iter: 10048/20250


Training Step: 2694  | total loss: [1m[32m0.59495[0m[0m | time: 40.093s
[2K| Adam | epoch: 009 | loss: 0.59495 - acc: 0.6393 -- iter: 10112/20250


Training Step: 2695  | total loss: [1m[32m0.59739[0m[0m | time: 40.343s
[2K| Adam | epoch: 009 | loss: 0.59739 - acc: 0.6347 -- iter: 10176/20250


Training Step: 2696  | total loss: [1m[32m0.59734[0m[0m | time: 40.589s
[2K| Adam | epoch: 009 | loss: 0.59734 - acc: 0.6431 -- iter: 10240/20250


Training Step: 2697  | total loss: [1m[32m0.59572[0m[0m | time: 40.837s
[2K| Adam | epoch: 009 | loss: 0.59572 - acc: 0.6460 -- iter: 10304/20250


Training Step: 2698  | total loss: [1m[32m0.59512[0m[0m | time: 41.084s
[2K| Adam | epoch: 009 | loss: 0.59512 - acc: 0.6408 -- iter: 10368/20250


Training Step: 2699  | total loss: [1m[32m0.59945[0m[0m | time: 41.348s
[2K| Adam | epoch: 009 | loss: 0.59945 - acc: 0.6329 -- iter: 10432/20250


Training Step: 2700  | total loss: [1m[32m0.60251[0m[0m | time: 41.628s
[2K| Adam | epoch: 009 | loss: 0.60251 - acc: 0.6275 -- iter: 10496/20250


Training Step: 2701  | total loss: [1m[32m0.59926[0m[0m | time: 41.878s
[2K| Adam | epoch: 009 | loss: 0.59926 - acc: 0.6350 -- iter: 10560/20250


Training Step: 2702  | total loss: [1m[32m0.60280[0m[0m | time: 42.129s
[2K| Adam | epoch: 009 | loss: 0.60280 - acc: 0.6309 -- iter: 10624/20250


Training Step: 2703  | total loss: [1m[32m0.60646[0m[0m | time: 42.380s
[2K| Adam | epoch: 009 | loss: 0.60646 - acc: 0.6209 -- iter: 10688/20250


Training Step: 2704  | total loss: [1m[32m0.60102[0m[0m | time: 42.635s
[2K| Adam | epoch: 009 | loss: 0.60102 - acc: 0.6213 -- iter: 10752/20250


Training Step: 2705  | total loss: [1m[32m0.60478[0m[0m | time: 42.886s
[2K| Adam | epoch: 009 | loss: 0.60478 - acc: 0.6139 -- iter: 10816/20250


Training Step: 2706  | total loss: [1m[32m0.60507[0m[0m | time: 43.139s
[2K| Adam | epoch: 009 | loss: 0.60507 - acc: 0.6166 -- iter: 10880/20250


Training Step: 2707  | total loss: [1m[32m0.60554[0m[0m | time: 43.385s
[2K| Adam | epoch: 009 | loss: 0.60554 - acc: 0.6284 -- iter: 10944/20250


Training Step: 2708  | total loss: [1m[32m0.60206[0m[0m | time: 43.638s
[2K| Adam | epoch: 009 | loss: 0.60206 - acc: 0.6358 -- iter: 11008/20250


Training Step: 2709  | total loss: [1m[32m0.60391[0m[0m | time: 43.901s
[2K| Adam | epoch: 009 | loss: 0.60391 - acc: 0.6441 -- iter: 11072/20250


Training Step: 2710  | total loss: [1m[32m0.61175[0m[0m | time: 44.186s
[2K| Adam | epoch: 009 | loss: 0.61175 - acc: 0.6360 -- iter: 11136/20250


Training Step: 2711  | total loss: [1m[32m0.61549[0m[0m | time: 44.430s
[2K| Adam | epoch: 009 | loss: 0.61549 - acc: 0.6333 -- iter: 11200/20250


Training Step: 2712  | total loss: [1m[32m0.61406[0m[0m | time: 44.683s
[2K| Adam | epoch: 009 | loss: 0.61406 - acc: 0.6293 -- iter: 11264/20250


Training Step: 2713  | total loss: [1m[32m0.61260[0m[0m | time: 44.940s
[2K| Adam | epoch: 009 | loss: 0.61260 - acc: 0.6242 -- iter: 11328/20250


Training Step: 2714  | total loss: [1m[32m0.61629[0m[0m | time: 45.190s
[2K| Adam | epoch: 009 | loss: 0.61629 - acc: 0.6118 -- iter: 11392/20250


Training Step: 2715  | total loss: [1m[32m0.60549[0m[0m | time: 45.441s
[2K| Adam | epoch: 009 | loss: 0.60549 - acc: 0.6225 -- iter: 11456/20250


Training Step: 2716  | total loss: [1m[32m0.61229[0m[0m | time: 45.687s
[2K| Adam | epoch: 009 | loss: 0.61229 - acc: 0.6212 -- iter: 11520/20250


Training Step: 2717  | total loss: [1m[32m0.60812[0m[0m | time: 45.935s
[2K| Adam | epoch: 009 | loss: 0.60812 - acc: 0.6294 -- iter: 11584/20250


Training Step: 2718  | total loss: [1m[32m0.60947[0m[0m | time: 46.184s
[2K| Adam | epoch: 009 | loss: 0.60947 - acc: 0.6274 -- iter: 11648/20250


Training Step: 2719  | total loss: [1m[32m0.61070[0m[0m | time: 46.443s
[2K| Adam | epoch: 009 | loss: 0.61070 - acc: 0.6193 -- iter: 11712/20250


Training Step: 2720  | total loss: [1m[32m0.60878[0m[0m | time: 46.726s
[2K| Adam | epoch: 009 | loss: 0.60878 - acc: 0.6183 -- iter: 11776/20250


Training Step: 2721  | total loss: [1m[32m0.61117[0m[0m | time: 46.975s
[2K| Adam | epoch: 009 | loss: 0.61117 - acc: 0.6159 -- iter: 11840/20250


Training Step: 2722  | total loss: [1m[32m0.61155[0m[0m | time: 47.223s
[2K| Adam | epoch: 009 | loss: 0.61155 - acc: 0.6230 -- iter: 11904/20250


Training Step: 2723  | total loss: [1m[32m0.60495[0m[0m | time: 47.478s
[2K| Adam | epoch: 009 | loss: 0.60495 - acc: 0.6248 -- iter: 11968/20250


Training Step: 2724  | total loss: [1m[32m0.61301[0m[0m | time: 47.737s
[2K| Adam | epoch: 009 | loss: 0.61301 - acc: 0.6123 -- iter: 12032/20250


Training Step: 2725  | total loss: [1m[32m0.61462[0m[0m | time: 47.978s
[2K| Adam | epoch: 009 | loss: 0.61462 - acc: 0.6183 -- iter: 12096/20250


Training Step: 2726  | total loss: [1m[32m0.61182[0m[0m | time: 48.217s
[2K| Adam | epoch: 009 | loss: 0.61182 - acc: 0.6221 -- iter: 12160/20250


Training Step: 2727  | total loss: [1m[32m0.60866[0m[0m | time: 48.465s
[2K| Adam | epoch: 009 | loss: 0.60866 - acc: 0.6395 -- iter: 12224/20250


Training Step: 2728  | total loss: [1m[32m0.60504[0m[0m | time: 48.717s
[2K| Adam | epoch: 009 | loss: 0.60504 - acc: 0.6443 -- iter: 12288/20250


Training Step: 2729  | total loss: [1m[32m0.60008[0m[0m | time: 48.979s
[2K| Adam | epoch: 009 | loss: 0.60008 - acc: 0.6471 -- iter: 12352/20250


Training Step: 2730  | total loss: [1m[32m0.60310[0m[0m | time: 49.257s
[2K| Adam | epoch: 009 | loss: 0.60310 - acc: 0.6402 -- iter: 12416/20250


Training Step: 2731  | total loss: [1m[32m0.60243[0m[0m | time: 49.499s
[2K| Adam | epoch: 009 | loss: 0.60243 - acc: 0.6481 -- iter: 12480/20250


Training Step: 2732  | total loss: [1m[32m0.60002[0m[0m | time: 49.817s
[2K| Adam | epoch: 009 | loss: 0.60002 - acc: 0.6473 -- iter: 12544/20250


Training Step: 2733  | total loss: [1m[32m0.60177[0m[0m | time: 50.067s
[2K| Adam | epoch: 009 | loss: 0.60177 - acc: 0.6482 -- iter: 12608/20250


Training Step: 2734  | total loss: [1m[32m0.60422[0m[0m | time: 50.311s
[2K| Adam | epoch: 009 | loss: 0.60422 - acc: 0.6412 -- iter: 12672/20250


Training Step: 2735  | total loss: [1m[32m0.60652[0m[0m | time: 50.562s
[2K| Adam | epoch: 009 | loss: 0.60652 - acc: 0.6365 -- iter: 12736/20250


Training Step: 2736  | total loss: [1m[32m0.60364[0m[0m | time: 50.809s
[2K| Adam | epoch: 009 | loss: 0.60364 - acc: 0.6416 -- iter: 12800/20250


Training Step: 2737  | total loss: [1m[32m0.60033[0m[0m | time: 51.052s
[2K| Adam | epoch: 009 | loss: 0.60033 - acc: 0.6462 -- iter: 12864/20250


Training Step: 2738  | total loss: [1m[32m0.59401[0m[0m | time: 51.299s
[2K| Adam | epoch: 009 | loss: 0.59401 - acc: 0.6503 -- iter: 12928/20250


Training Step: 2739  | total loss: [1m[32m0.59389[0m[0m | time: 51.605s
[2K| Adam | epoch: 009 | loss: 0.59389 - acc: 0.6478 -- iter: 12992/20250


Training Step: 2740  | total loss: [1m[32m0.59592[0m[0m | time: 51.894s
[2K| Adam | epoch: 009 | loss: 0.59592 - acc: 0.6424 -- iter: 13056/20250


Training Step: 2741  | total loss: [1m[32m0.59814[0m[0m | time: 52.140s
[2K| Adam | epoch: 009 | loss: 0.59814 - acc: 0.6437 -- iter: 13120/20250


Training Step: 2742  | total loss: [1m[32m0.59758[0m[0m | time: 52.386s
[2K| Adam | epoch: 009 | loss: 0.59758 - acc: 0.6372 -- iter: 13184/20250


Training Step: 2743  | total loss: [1m[32m0.59656[0m[0m | time: 52.640s
[2K| Adam | epoch: 009 | loss: 0.59656 - acc: 0.6297 -- iter: 13248/20250


Training Step: 2744  | total loss: [1m[32m0.59728[0m[0m | time: 52.888s
[2K| Adam | epoch: 009 | loss: 0.59728 - acc: 0.6277 -- iter: 13312/20250


Training Step: 2745  | total loss: [1m[32m0.59772[0m[0m | time: 53.139s
[2K| Adam | epoch: 009 | loss: 0.59772 - acc: 0.6227 -- iter: 13376/20250


Training Step: 2746  | total loss: [1m[32m0.59576[0m[0m | time: 53.384s
[2K| Adam | epoch: 009 | loss: 0.59576 - acc: 0.6230 -- iter: 13440/20250


Training Step: 2747  | total loss: [1m[32m0.59377[0m[0m | time: 53.634s
[2K| Adam | epoch: 009 | loss: 0.59377 - acc: 0.6247 -- iter: 13504/20250


Training Step: 2748  | total loss: [1m[32m0.59337[0m[0m | time: 53.896s
[2K| Adam | epoch: 009 | loss: 0.59337 - acc: 0.6279 -- iter: 13568/20250


Training Step: 2749  | total loss: [1m[32m0.59406[0m[0m | time: 54.141s
[2K| Adam | epoch: 009 | loss: 0.59406 - acc: 0.6338 -- iter: 13632/20250


Training Step: 2750  | total loss: [1m[32m0.58432[0m[0m | time: 54.429s
[2K| Adam | epoch: 009 | loss: 0.58432 - acc: 0.6470 -- iter: 13696/20250


Training Step: 2751  | total loss: [1m[32m0.58479[0m[0m | time: 54.680s
[2K| Adam | epoch: 009 | loss: 0.58479 - acc: 0.6464 -- iter: 13760/20250


Training Step: 2752  | total loss: [1m[32m0.59194[0m[0m | time: 54.927s
[2K| Adam | epoch: 009 | loss: 0.59194 - acc: 0.6380 -- iter: 13824/20250


Training Step: 2753  | total loss: [1m[32m0.59038[0m[0m | time: 55.167s
[2K| Adam | epoch: 009 | loss: 0.59038 - acc: 0.6429 -- iter: 13888/20250


Training Step: 2754  | total loss: [1m[32m0.59211[0m[0m | time: 55.421s
[2K| Adam | epoch: 009 | loss: 0.59211 - acc: 0.6443 -- iter: 13952/20250


Training Step: 2755  | total loss: [1m[32m0.59045[0m[0m | time: 55.666s
[2K| Adam | epoch: 009 | loss: 0.59045 - acc: 0.6439 -- iter: 14016/20250


Training Step: 2756  | total loss: [1m[32m0.58720[0m[0m | time: 55.920s
[2K| Adam | epoch: 009 | loss: 0.58720 - acc: 0.6498 -- iter: 14080/20250


Training Step: 2757  | total loss: [1m[32m0.58291[0m[0m | time: 56.172s
[2K| Adam | epoch: 009 | loss: 0.58291 - acc: 0.6552 -- iter: 14144/20250


Training Step: 2758  | total loss: [1m[32m0.58296[0m[0m | time: 56.416s
[2K| Adam | epoch: 009 | loss: 0.58296 - acc: 0.6553 -- iter: 14208/20250


Training Step: 2759  | total loss: [1m[32m0.58035[0m[0m | time: 56.687s
[2K| Adam | epoch: 009 | loss: 0.58035 - acc: 0.6554 -- iter: 14272/20250


Training Step: 2760  | total loss: [1m[32m0.57162[0m[0m | time: 56.985s
[2K| Adam | epoch: 009 | loss: 0.57162 - acc: 0.6617 -- iter: 14336/20250


Training Step: 2761  | total loss: [1m[32m0.56943[0m[0m | time: 57.228s
[2K| Adam | epoch: 009 | loss: 0.56943 - acc: 0.6627 -- iter: 14400/20250


Training Step: 2762  | total loss: [1m[32m0.57091[0m[0m | time: 57.482s
[2K| Adam | epoch: 009 | loss: 0.57091 - acc: 0.6621 -- iter: 14464/20250


Training Step: 2763  | total loss: [1m[32m0.56738[0m[0m | time: 57.732s
[2K| Adam | epoch: 009 | loss: 0.56738 - acc: 0.6677 -- iter: 14528/20250


Training Step: 2764  | total loss: [1m[32m0.56924[0m[0m | time: 57.980s
[2K| Adam | epoch: 009 | loss: 0.56924 - acc: 0.6744 -- iter: 14592/20250


Training Step: 2765  | total loss: [1m[32m0.56728[0m[0m | time: 58.227s
[2K| Adam | epoch: 009 | loss: 0.56728 - acc: 0.6773 -- iter: 14656/20250


Training Step: 2766  | total loss: [1m[32m0.56518[0m[0m | time: 58.481s
[2K| Adam | epoch: 009 | loss: 0.56518 - acc: 0.6814 -- iter: 14720/20250


Training Step: 2767  | total loss: [1m[32m0.56951[0m[0m | time: 58.729s
[2K| Adam | epoch: 009 | loss: 0.56951 - acc: 0.6836 -- iter: 14784/20250


Training Step: 2768  | total loss: [1m[32m0.56719[0m[0m | time: 58.984s
[2K| Adam | epoch: 009 | loss: 0.56719 - acc: 0.6824 -- iter: 14848/20250


Training Step: 2769  | total loss: [1m[32m0.57384[0m[0m | time: 59.244s
[2K| Adam | epoch: 009 | loss: 0.57384 - acc: 0.6736 -- iter: 14912/20250


Training Step: 2770  | total loss: [1m[32m0.57247[0m[0m | time: 59.530s
[2K| Adam | epoch: 009 | loss: 0.57247 - acc: 0.6765 -- iter: 14976/20250


Training Step: 2771  | total loss: [1m[32m0.57543[0m[0m | time: 59.777s
[2K| Adam | epoch: 009 | loss: 0.57543 - acc: 0.6760 -- iter: 15040/20250


Training Step: 2772  | total loss: [1m[32m0.57595[0m[0m | time: 60.024s
[2K| Adam | epoch: 009 | loss: 0.57595 - acc: 0.6709 -- iter: 15104/20250


Training Step: 2773  | total loss: [1m[32m0.56918[0m[0m | time: 60.270s
[2K| Adam | epoch: 009 | loss: 0.56918 - acc: 0.6804 -- iter: 15168/20250


Training Step: 2774  | total loss: [1m[32m0.56278[0m[0m | time: 60.513s
[2K| Adam | epoch: 009 | loss: 0.56278 - acc: 0.6796 -- iter: 15232/20250


Training Step: 2775  | total loss: [1m[32m0.55891[0m[0m | time: 60.762s
[2K| Adam | epoch: 009 | loss: 0.55891 - acc: 0.6882 -- iter: 15296/20250


Training Step: 2776  | total loss: [1m[32m0.56482[0m[0m | time: 61.009s
[2K| Adam | epoch: 009 | loss: 0.56482 - acc: 0.6850 -- iter: 15360/20250


Training Step: 2777  | total loss: [1m[32m0.56078[0m[0m | time: 61.269s
[2K| Adam | epoch: 009 | loss: 0.56078 - acc: 0.6868 -- iter: 15424/20250


Training Step: 2778  | total loss: [1m[32m0.56046[0m[0m | time: 61.520s
[2K| Adam | epoch: 009 | loss: 0.56046 - acc: 0.6853 -- iter: 15488/20250


Training Step: 2779  | total loss: [1m[32m0.55523[0m[0m | time: 61.801s
[2K| Adam | epoch: 009 | loss: 0.55523 - acc: 0.6902 -- iter: 15552/20250


Training Step: 2780  | total loss: [1m[32m0.54957[0m[0m | time: 62.071s
[2K| Adam | epoch: 009 | loss: 0.54957 - acc: 0.6962 -- iter: 15616/20250


Training Step: 2781  | total loss: [1m[32m0.54011[0m[0m | time: 62.316s
[2K| Adam | epoch: 009 | loss: 0.54011 - acc: 0.7047 -- iter: 15680/20250


Training Step: 2782  | total loss: [1m[32m0.53628[0m[0m | time: 62.564s
[2K| Adam | epoch: 009 | loss: 0.53628 - acc: 0.7108 -- iter: 15744/20250


Training Step: 2783  | total loss: [1m[32m0.53336[0m[0m | time: 62.809s
[2K| Adam | epoch: 009 | loss: 0.53336 - acc: 0.7194 -- iter: 15808/20250


Training Step: 2784  | total loss: [1m[32m0.52873[0m[0m | time: 63.050s
[2K| Adam | epoch: 009 | loss: 0.52873 - acc: 0.7334 -- iter: 15872/20250


Training Step: 2785  | total loss: [1m[32m0.52832[0m[0m | time: 63.292s
[2K| Adam | epoch: 009 | loss: 0.52832 - acc: 0.7366 -- iter: 15936/20250


Training Step: 2786  | total loss: [1m[32m0.53383[0m[0m | time: 63.538s
[2K| Adam | epoch: 009 | loss: 0.53383 - acc: 0.7333 -- iter: 16000/20250


Training Step: 2787  | total loss: [1m[32m0.53525[0m[0m | time: 63.784s
[2K| Adam | epoch: 009 | loss: 0.53525 - acc: 0.7303 -- iter: 16064/20250


Training Step: 2788  | total loss: [1m[32m0.53086[0m[0m | time: 64.035s
[2K| Adam | epoch: 009 | loss: 0.53086 - acc: 0.7354 -- iter: 16128/20250


Training Step: 2789  | total loss: [1m[32m0.53119[0m[0m | time: 64.288s
[2K| Adam | epoch: 009 | loss: 0.53119 - acc: 0.7337 -- iter: 16192/20250


Training Step: 2790  | total loss: [1m[32m0.51875[0m[0m | time: 64.604s
[2K| Adam | epoch: 009 | loss: 0.51875 - acc: 0.7463 -- iter: 16256/20250


Training Step: 2791  | total loss: [1m[32m0.51435[0m[0m | time: 64.850s
[2K| Adam | epoch: 009 | loss: 0.51435 - acc: 0.7529 -- iter: 16320/20250


Training Step: 2792  | total loss: [1m[32m0.51312[0m[0m | time: 65.096s
[2K| Adam | epoch: 009 | loss: 0.51312 - acc: 0.7542 -- iter: 16384/20250


Training Step: 2793  | total loss: [1m[32m0.51817[0m[0m | time: 65.338s
[2K| Adam | epoch: 009 | loss: 0.51817 - acc: 0.7506 -- iter: 16448/20250


Training Step: 2794  | total loss: [1m[32m0.50591[0m[0m | time: 65.596s
[2K| Adam | epoch: 009 | loss: 0.50591 - acc: 0.7646 -- iter: 16512/20250


Training Step: 2795  | total loss: [1m[32m0.51040[0m[0m | time: 65.840s
[2K| Adam | epoch: 009 | loss: 0.51040 - acc: 0.7647 -- iter: 16576/20250


Training Step: 2796  | total loss: [1m[32m0.51836[0m[0m | time: 66.087s
[2K| Adam | epoch: 009 | loss: 0.51836 - acc: 0.7586 -- iter: 16640/20250


Training Step: 2797  | total loss: [1m[32m0.52837[0m[0m | time: 66.341s
[2K| Adam | epoch: 009 | loss: 0.52837 - acc: 0.7499 -- iter: 16704/20250


Training Step: 2798  | total loss: [1m[32m0.53100[0m[0m | time: 66.584s
[2K| Adam | epoch: 009 | loss: 0.53100 - acc: 0.7468 -- iter: 16768/20250


Training Step: 2799  | total loss: [1m[32m0.53763[0m[0m | time: 66.859s
[2K| Adam | epoch: 009 | loss: 0.53763 - acc: 0.7330 -- iter: 16832/20250


Training Step: 2800  | total loss: [1m[32m0.53913[0m[0m | time: 67.126s
[2K| Adam | epoch: 009 | loss: 0.53913 - acc: 0.7285 -- iter: 16896/20250


Training Step: 2801  | total loss: [1m[32m0.53653[0m[0m | time: 67.370s
[2K| Adam | epoch: 009 | loss: 0.53653 - acc: 0.7322 -- iter: 16960/20250


Training Step: 2802  | total loss: [1m[32m0.53589[0m[0m | time: 67.641s
[2K| Adam | epoch: 009 | loss: 0.53589 - acc: 0.7293 -- iter: 17024/20250


Training Step: 2803  | total loss: [1m[32m0.53581[0m[0m | time: 67.888s
[2K| Adam | epoch: 009 | loss: 0.53581 - acc: 0.7282 -- iter: 17088/20250


Training Step: 2804  | total loss: [1m[32m0.53608[0m[0m | time: 68.143s
[2K| Adam | epoch: 009 | loss: 0.53608 - acc: 0.7273 -- iter: 17152/20250


Training Step: 2805  | total loss: [1m[32m0.53673[0m[0m | time: 68.391s
[2K| Adam | epoch: 009 | loss: 0.53673 - acc: 0.7202 -- iter: 17216/20250


Training Step: 2806  | total loss: [1m[32m0.54274[0m[0m | time: 68.643s
[2K| Adam | epoch: 009 | loss: 0.54274 - acc: 0.7122 -- iter: 17280/20250


Training Step: 2807  | total loss: [1m[32m0.53769[0m[0m | time: 68.898s
[2K| Adam | epoch: 009 | loss: 0.53769 - acc: 0.7176 -- iter: 17344/20250


Training Step: 2808  | total loss: [1m[32m0.53143[0m[0m | time: 69.145s
[2K| Adam | epoch: 009 | loss: 0.53143 - acc: 0.7224 -- iter: 17408/20250


Training Step: 2809  | total loss: [1m[32m0.53497[0m[0m | time: 69.402s
[2K| Adam | epoch: 009 | loss: 0.53497 - acc: 0.7236 -- iter: 17472/20250


Training Step: 2810  | total loss: [1m[32m0.52787[0m[0m | time: 69.655s
[2K| Adam | epoch: 009 | loss: 0.52787 - acc: 0.7340 -- iter: 17536/20250


Training Step: 2811  | total loss: [1m[32m0.52544[0m[0m | time: 69.941s
[2K| Adam | epoch: 009 | loss: 0.52544 - acc: 0.7372 -- iter: 17600/20250


Training Step: 2812  | total loss: [1m[32m0.51480[0m[0m | time: 70.195s
[2K| Adam | epoch: 009 | loss: 0.51480 - acc: 0.7494 -- iter: 17664/20250


Training Step: 2813  | total loss: [1m[32m0.50864[0m[0m | time: 70.441s
[2K| Adam | epoch: 009 | loss: 0.50864 - acc: 0.7542 -- iter: 17728/20250


Training Step: 2814  | total loss: [1m[32m0.50565[0m[0m | time: 70.685s
[2K| Adam | epoch: 009 | loss: 0.50565 - acc: 0.7522 -- iter: 17792/20250


Training Step: 2815  | total loss: [1m[32m0.50038[0m[0m | time: 70.931s
[2K| Adam | epoch: 009 | loss: 0.50038 - acc: 0.7582 -- iter: 17856/20250


Training Step: 2816  | total loss: [1m[32m0.49286[0m[0m | time: 71.171s
[2K| Adam | epoch: 009 | loss: 0.49286 - acc: 0.7714 -- iter: 17920/20250


Training Step: 2817  | total loss: [1m[32m0.48606[0m[0m | time: 71.415s
[2K| Adam | epoch: 009 | loss: 0.48606 - acc: 0.7771 -- iter: 17984/20250


Training Step: 2818  | total loss: [1m[32m0.47964[0m[0m | time: 71.659s
[2K| Adam | epoch: 009 | loss: 0.47964 - acc: 0.7822 -- iter: 18048/20250


Training Step: 2819  | total loss: [1m[32m0.47848[0m[0m | time: 71.914s
[2K| Adam | epoch: 009 | loss: 0.47848 - acc: 0.7868 -- iter: 18112/20250


Training Step: 2820  | total loss: [1m[32m0.46833[0m[0m | time: 72.177s
[2K| Adam | epoch: 009 | loss: 0.46833 - acc: 0.7925 -- iter: 18176/20250


Training Step: 2821  | total loss: [1m[32m0.47271[0m[0m | time: 72.446s
[2K| Adam | epoch: 009 | loss: 0.47271 - acc: 0.7945 -- iter: 18240/20250


Training Step: 2822  | total loss: [1m[32m0.47553[0m[0m | time: 72.693s
[2K| Adam | epoch: 009 | loss: 0.47553 - acc: 0.7854 -- iter: 18304/20250


Training Step: 2823  | total loss: [1m[32m0.47059[0m[0m | time: 72.945s
[2K| Adam | epoch: 009 | loss: 0.47059 - acc: 0.7912 -- iter: 18368/20250


Training Step: 2824  | total loss: [1m[32m0.47025[0m[0m | time: 73.210s
[2K| Adam | epoch: 009 | loss: 0.47025 - acc: 0.7933 -- iter: 18432/20250


Training Step: 2825  | total loss: [1m[32m0.47064[0m[0m | time: 73.461s
[2K| Adam | epoch: 009 | loss: 0.47064 - acc: 0.7921 -- iter: 18496/20250


Training Step: 2826  | total loss: [1m[32m0.48165[0m[0m | time: 73.710s
[2K| Adam | epoch: 009 | loss: 0.48165 - acc: 0.7910 -- iter: 18560/20250


Training Step: 2827  | total loss: [1m[32m0.47185[0m[0m | time: 73.952s
[2K| Adam | epoch: 009 | loss: 0.47185 - acc: 0.7994 -- iter: 18624/20250


Training Step: 2828  | total loss: [1m[32m0.47829[0m[0m | time: 74.200s
[2K| Adam | epoch: 009 | loss: 0.47829 - acc: 0.7914 -- iter: 18688/20250


Training Step: 2829  | total loss: [1m[32m0.46876[0m[0m | time: 74.446s
[2K| Adam | epoch: 009 | loss: 0.46876 - acc: 0.7966 -- iter: 18752/20250


Training Step: 2830  | total loss: [1m[32m0.46019[0m[0m | time: 74.696s
[2K| Adam | epoch: 009 | loss: 0.46019 - acc: 0.7998 -- iter: 18816/20250


Training Step: 2831  | total loss: [1m[32m0.45654[0m[0m | time: 74.972s
[2K| Adam | epoch: 009 | loss: 0.45654 - acc: 0.8026 -- iter: 18880/20250


Training Step: 2832  | total loss: [1m[32m0.45637[0m[0m | time: 75.223s
[2K| Adam | epoch: 009 | loss: 0.45637 - acc: 0.8005 -- iter: 18944/20250


Training Step: 2833  | total loss: [1m[32m0.47471[0m[0m | time: 75.472s
[2K| Adam | epoch: 009 | loss: 0.47471 - acc: 0.7939 -- iter: 19008/20250


Training Step: 2834  | total loss: [1m[32m0.47083[0m[0m | time: 75.714s
[2K| Adam | epoch: 009 | loss: 0.47083 - acc: 0.8020 -- iter: 19072/20250


Training Step: 2835  | total loss: [1m[32m0.47505[0m[0m | time: 75.959s
[2K| Adam | epoch: 009 | loss: 0.47505 - acc: 0.7999 -- iter: 19136/20250


Training Step: 2836  | total loss: [1m[32m0.48805[0m[0m | time: 76.208s
[2K| Adam | epoch: 009 | loss: 0.48805 - acc: 0.7933 -- iter: 19200/20250


Training Step: 2837  | total loss: [1m[32m0.49114[0m[0m | time: 76.449s
[2K| Adam | epoch: 009 | loss: 0.49114 - acc: 0.7937 -- iter: 19264/20250


Training Step: 2838  | total loss: [1m[32m0.49256[0m[0m | time: 76.699s
[2K| Adam | epoch: 009 | loss: 0.49256 - acc: 0.7940 -- iter: 19328/20250


Training Step: 2839  | total loss: [1m[32m0.49756[0m[0m | time: 76.946s
[2K| Adam | epoch: 009 | loss: 0.49756 - acc: 0.7865 -- iter: 19392/20250


Training Step: 2840  | total loss: [1m[32m0.51102[0m[0m | time: 77.204s
[2K| Adam | epoch: 009 | loss: 0.51102 - acc: 0.7766 -- iter: 19456/20250


Training Step: 2841  | total loss: [1m[32m0.50851[0m[0m | time: 77.471s
[2K| Adam | epoch: 009 | loss: 0.50851 - acc: 0.7786 -- iter: 19520/20250


Training Step: 2842  | total loss: [1m[32m0.50009[0m[0m | time: 77.750s
[2K| Adam | epoch: 009 | loss: 0.50009 - acc: 0.7820 -- iter: 19584/20250


Training Step: 2843  | total loss: [1m[32m0.49816[0m[0m | time: 77.996s
[2K| Adam | epoch: 009 | loss: 0.49816 - acc: 0.7804 -- iter: 19648/20250


Training Step: 2844  | total loss: [1m[32m0.49099[0m[0m | time: 78.241s
[2K| Adam | epoch: 009 | loss: 0.49099 - acc: 0.7883 -- iter: 19712/20250


Training Step: 2845  | total loss: [1m[32m0.47552[0m[0m | time: 78.489s
[2K| Adam | epoch: 009 | loss: 0.47552 - acc: 0.8001 -- iter: 19776/20250


Training Step: 2846  | total loss: [1m[32m0.47813[0m[0m | time: 78.736s
[2K| Adam | epoch: 009 | loss: 0.47813 - acc: 0.7997 -- iter: 19840/20250


Training Step: 2847  | total loss: [1m[32m0.48002[0m[0m | time: 78.986s
[2K| Adam | epoch: 009 | loss: 0.48002 - acc: 0.7995 -- iter: 19904/20250


Training Step: 2848  | total loss: [1m[32m0.47434[0m[0m | time: 79.229s
[2K| Adam | epoch: 009 | loss: 0.47434 - acc: 0.8023 -- iter: 19968/20250


Training Step: 2849  | total loss: [1m[32m0.46276[0m[0m | time: 79.476s
[2K| Adam | epoch: 009 | loss: 0.46276 - acc: 0.8080 -- iter: 20032/20250


Training Step: 2850  | total loss: [1m[32m0.45524[0m[0m | time: 79.722s
[2K| Adam | epoch: 009 | loss: 0.45524 - acc: 0.8100 -- iter: 20096/20250


Training Step: 2851  | total loss: [1m[32m0.45454[0m[0m | time: 79.993s
[2K| Adam | epoch: 009 | loss: 0.45454 - acc: 0.8087 -- iter: 20160/20250


Training Step: 2852  | total loss: [1m[32m0.45079[0m[0m | time: 80.275s
[2K| Adam | epoch: 009 | loss: 0.45079 - acc: 0.8075 -- iter: 20224/20250


Training Step: 2853  | total loss: [1m[32m0.45818[0m[0m | time: 82.228s
[2K| Adam | epoch: 009 | loss: 0.45818 - acc: 0.8002 | val_loss: 0.62343 - val_acc: 0.7373 -- iter: 20250/20250
--


Training Step: 2854  | total loss: [1m[32m0.45128[0m[0m | time: 0.247s
[2K| Adam | epoch: 010 | loss: 0.45128 - acc: 0.8061 -- iter: 00064/20250


Training Step: 2855  | total loss: [1m[32m0.44118[0m[0m | time: 0.531s
[2K| Adam | epoch: 010 | loss: 0.44118 - acc: 0.8115 -- iter: 00128/20250


Training Step: 2856  | total loss: [1m[32m0.44038[0m[0m | time: 0.786s
[2K| Adam | epoch: 010 | loss: 0.44038 - acc: 0.8131 -- iter: 00192/20250


Training Step: 2857  | total loss: [1m[32m0.43640[0m[0m | time: 1.031s
[2K| Adam | epoch: 010 | loss: 0.43640 - acc: 0.8131 -- iter: 00256/20250


Training Step: 2858  | total loss: [1m[32m0.43590[0m[0m | time: 1.278s
[2K| Adam | epoch: 010 | loss: 0.43590 - acc: 0.8161 -- iter: 00320/20250


Training Step: 2859  | total loss: [1m[32m0.43481[0m[0m | time: 1.523s
[2K| Adam | epoch: 010 | loss: 0.43481 - acc: 0.8126 -- iter: 00384/20250


Training Step: 2860  | total loss: [1m[32m0.44267[0m[0m | time: 1.770s
[2K| Adam | epoch: 010 | loss: 0.44267 - acc: 0.8079 -- iter: 00448/20250


Training Step: 2861  | total loss: [1m[32m0.45141[0m[0m | time: 2.010s
[2K| Adam | epoch: 010 | loss: 0.45141 - acc: 0.8037 -- iter: 00512/20250


Training Step: 2862  | total loss: [1m[32m0.43994[0m[0m | time: 2.255s
[2K| Adam | epoch: 010 | loss: 0.43994 - acc: 0.8118 -- iter: 00576/20250


Training Step: 2863  | total loss: [1m[32m0.43401[0m[0m | time: 2.501s
[2K| Adam | epoch: 010 | loss: 0.43401 - acc: 0.8191 -- iter: 00640/20250


Training Step: 2864  | total loss: [1m[32m0.42543[0m[0m | time: 2.760s
[2K| Adam | epoch: 010 | loss: 0.42543 - acc: 0.8215 -- iter: 00704/20250


Training Step: 2865  | total loss: [1m[32m0.43373[0m[0m | time: 3.076s
[2K| Adam | epoch: 010 | loss: 0.43373 - acc: 0.8144 -- iter: 00768/20250


Training Step: 2866  | total loss: [1m[32m0.43122[0m[0m | time: 3.347s
[2K| Adam | epoch: 010 | loss: 0.43122 - acc: 0.8205 -- iter: 00832/20250


Training Step: 2867  | total loss: [1m[32m0.44600[0m[0m | time: 3.593s
[2K| Adam | epoch: 010 | loss: 0.44600 - acc: 0.8150 -- iter: 00896/20250


Training Step: 2868  | total loss: [1m[32m0.44688[0m[0m | time: 3.837s
[2K| Adam | epoch: 010 | loss: 0.44688 - acc: 0.8116 -- iter: 00960/20250


Training Step: 2869  | total loss: [1m[32m0.44113[0m[0m | time: 4.085s
[2K| Adam | epoch: 010 | loss: 0.44113 - acc: 0.8117 -- iter: 01024/20250


Training Step: 2870  | total loss: [1m[32m0.44514[0m[0m | time: 4.331s
[2K| Adam | epoch: 010 | loss: 0.44514 - acc: 0.8102 -- iter: 01088/20250


Training Step: 2871  | total loss: [1m[32m0.43681[0m[0m | time: 4.575s
[2K| Adam | epoch: 010 | loss: 0.43681 - acc: 0.8151 -- iter: 01152/20250


Training Step: 2872  | total loss: [1m[32m0.44050[0m[0m | time: 4.827s
[2K| Adam | epoch: 010 | loss: 0.44050 - acc: 0.8133 -- iter: 01216/20250


Training Step: 2873  | total loss: [1m[32m0.43448[0m[0m | time: 5.077s
[2K| Adam | epoch: 010 | loss: 0.43448 - acc: 0.8195 -- iter: 01280/20250


Training Step: 2874  | total loss: [1m[32m0.41924[0m[0m | time: 5.340s
[2K| Adam | epoch: 010 | loss: 0.41924 - acc: 0.8313 -- iter: 01344/20250


Training Step: 2875  | total loss: [1m[32m0.42074[0m[0m | time: 5.625s
[2K| Adam | epoch: 010 | loss: 0.42074 - acc: 0.8294 -- iter: 01408/20250


Training Step: 2876  | total loss: [1m[32m0.42943[0m[0m | time: 5.872s
[2K| Adam | epoch: 010 | loss: 0.42943 - acc: 0.8246 -- iter: 01472/20250


Training Step: 2877  | total loss: [1m[32m0.43570[0m[0m | time: 6.122s
[2K| Adam | epoch: 010 | loss: 0.43570 - acc: 0.8187 -- iter: 01536/20250


Training Step: 2878  | total loss: [1m[32m0.42646[0m[0m | time: 6.373s
[2K| Adam | epoch: 010 | loss: 0.42646 - acc: 0.8243 -- iter: 01600/20250


Training Step: 2879  | total loss: [1m[32m0.42383[0m[0m | time: 6.613s
[2K| Adam | epoch: 010 | loss: 0.42383 - acc: 0.8263 -- iter: 01664/20250


Training Step: 2880  | total loss: [1m[32m0.43503[0m[0m | time: 6.860s
[2K| Adam | epoch: 010 | loss: 0.43503 - acc: 0.8186 -- iter: 01728/20250


Training Step: 2881  | total loss: [1m[32m0.42518[0m[0m | time: 7.105s
[2K| Adam | epoch: 010 | loss: 0.42518 - acc: 0.8227 -- iter: 01792/20250


Training Step: 2882  | total loss: [1m[32m0.42477[0m[0m | time: 7.357s
[2K| Adam | epoch: 010 | loss: 0.42477 - acc: 0.8217 -- iter: 01856/20250


Training Step: 2883  | total loss: [1m[32m0.41462[0m[0m | time: 7.602s
[2K| Adam | epoch: 010 | loss: 0.41462 - acc: 0.8286 -- iter: 01920/20250


Training Step: 2884  | total loss: [1m[32m0.40502[0m[0m | time: 7.858s
[2K| Adam | epoch: 010 | loss: 0.40502 - acc: 0.8317 -- iter: 01984/20250


Training Step: 2885  | total loss: [1m[32m0.39546[0m[0m | time: 8.131s
[2K| Adam | epoch: 010 | loss: 0.39546 - acc: 0.8376 -- iter: 02048/20250


Training Step: 2886  | total loss: [1m[32m0.39528[0m[0m | time: 8.386s
[2K| Adam | epoch: 010 | loss: 0.39528 - acc: 0.8413 -- iter: 02112/20250


Training Step: 2887  | total loss: [1m[32m0.39999[0m[0m | time: 8.635s
[2K| Adam | epoch: 010 | loss: 0.39999 - acc: 0.8384 -- iter: 02176/20250


Training Step: 2888  | total loss: [1m[32m0.39193[0m[0m | time: 8.880s
[2K| Adam | epoch: 010 | loss: 0.39193 - acc: 0.8421 -- iter: 02240/20250


Training Step: 2889  | total loss: [1m[32m0.38587[0m[0m | time: 9.126s
[2K| Adam | epoch: 010 | loss: 0.38587 - acc: 0.8469 -- iter: 02304/20250


Training Step: 2890  | total loss: [1m[32m0.38603[0m[0m | time: 9.374s
[2K| Adam | epoch: 010 | loss: 0.38603 - acc: 0.8497 -- iter: 02368/20250


Training Step: 2891  | total loss: [1m[32m0.38056[0m[0m | time: 9.615s
[2K| Adam | epoch: 010 | loss: 0.38056 - acc: 0.8507 -- iter: 02432/20250


Training Step: 2892  | total loss: [1m[32m0.38647[0m[0m | time: 9.859s
[2K| Adam | epoch: 010 | loss: 0.38647 - acc: 0.8453 -- iter: 02496/20250


Training Step: 2893  | total loss: [1m[32m0.38166[0m[0m | time: 10.108s
[2K| Adam | epoch: 010 | loss: 0.38166 - acc: 0.8483 -- iter: 02560/20250


Training Step: 2894  | total loss: [1m[32m0.37502[0m[0m | time: 10.359s
[2K| Adam | epoch: 010 | loss: 0.37502 - acc: 0.8541 -- iter: 02624/20250


Training Step: 2895  | total loss: [1m[32m0.38117[0m[0m | time: 10.649s
[2K| Adam | epoch: 010 | loss: 0.38117 - acc: 0.8468 -- iter: 02688/20250


Training Step: 2896  | total loss: [1m[32m0.38393[0m[0m | time: 10.897s
[2K| Adam | epoch: 010 | loss: 0.38393 - acc: 0.8512 -- iter: 02752/20250


Training Step: 2897  | total loss: [1m[32m0.38559[0m[0m | time: 11.142s
[2K| Adam | epoch: 010 | loss: 0.38559 - acc: 0.8520 -- iter: 02816/20250


Training Step: 2898  | total loss: [1m[32m0.37701[0m[0m | time: 11.387s
[2K| Adam | epoch: 010 | loss: 0.37701 - acc: 0.8574 -- iter: 02880/20250


Training Step: 2899  | total loss: [1m[32m0.36787[0m[0m | time: 11.636s
[2K| Adam | epoch: 010 | loss: 0.36787 - acc: 0.8623 -- iter: 02944/20250


Training Step: 2900  | total loss: [1m[32m0.36558[0m[0m | time: 11.894s
[2K| Adam | epoch: 010 | loss: 0.36558 - acc: 0.8605 -- iter: 03008/20250


Training Step: 2901  | total loss: [1m[32m0.35962[0m[0m | time: 12.137s
[2K| Adam | epoch: 010 | loss: 0.35962 - acc: 0.8635 -- iter: 03072/20250


Training Step: 2902  | total loss: [1m[32m0.35636[0m[0m | time: 12.388s
[2K| Adam | epoch: 010 | loss: 0.35636 - acc: 0.8631 -- iter: 03136/20250


Training Step: 2903  | total loss: [1m[32m0.35563[0m[0m | time: 12.638s
[2K| Adam | epoch: 010 | loss: 0.35563 - acc: 0.8627 -- iter: 03200/20250


Training Step: 2904  | total loss: [1m[32m0.36033[0m[0m | time: 12.903s
[2K| Adam | epoch: 010 | loss: 0.36033 - acc: 0.8577 -- iter: 03264/20250


Training Step: 2905  | total loss: [1m[32m0.36028[0m[0m | time: 13.184s
[2K| Adam | epoch: 010 | loss: 0.36028 - acc: 0.8563 -- iter: 03328/20250


Training Step: 2906  | total loss: [1m[32m0.36632[0m[0m | time: 13.433s
[2K| Adam | epoch: 010 | loss: 0.36632 - acc: 0.8566 -- iter: 03392/20250


Training Step: 2907  | total loss: [1m[32m0.36231[0m[0m | time: 13.680s
[2K| Adam | epoch: 010 | loss: 0.36231 - acc: 0.8584 -- iter: 03456/20250


Training Step: 2908  | total loss: [1m[32m0.37047[0m[0m | time: 13.932s
[2K| Adam | epoch: 010 | loss: 0.37047 - acc: 0.8585 -- iter: 03520/20250


Training Step: 2909  | total loss: [1m[32m0.37452[0m[0m | time: 14.178s
[2K| Adam | epoch: 010 | loss: 0.37452 - acc: 0.8570 -- iter: 03584/20250


Training Step: 2910  | total loss: [1m[32m0.38051[0m[0m | time: 14.427s
[2K| Adam | epoch: 010 | loss: 0.38051 - acc: 0.8526 -- iter: 03648/20250


Training Step: 2911  | total loss: [1m[32m0.37943[0m[0m | time: 14.670s
[2K| Adam | epoch: 010 | loss: 0.37943 - acc: 0.8501 -- iter: 03712/20250


Training Step: 2912  | total loss: [1m[32m0.37792[0m[0m | time: 14.912s
[2K| Adam | epoch: 010 | loss: 0.37792 - acc: 0.8542 -- iter: 03776/20250


Training Step: 2913  | total loss: [1m[32m0.37571[0m[0m | time: 15.154s
[2K| Adam | epoch: 010 | loss: 0.37571 - acc: 0.8578 -- iter: 03840/20250


Training Step: 2914  | total loss: [1m[32m0.37841[0m[0m | time: 15.413s
[2K| Adam | epoch: 010 | loss: 0.37841 - acc: 0.8611 -- iter: 03904/20250


Training Step: 2915  | total loss: [1m[32m0.36382[0m[0m | time: 15.700s
[2K| Adam | epoch: 010 | loss: 0.36382 - acc: 0.8688 -- iter: 03968/20250


Training Step: 2916  | total loss: [1m[32m0.35403[0m[0m | time: 15.948s
[2K| Adam | epoch: 010 | loss: 0.35403 - acc: 0.8709 -- iter: 04032/20250


Training Step: 2917  | total loss: [1m[32m0.36096[0m[0m | time: 16.190s
[2K| Adam | epoch: 010 | loss: 0.36096 - acc: 0.8604 -- iter: 04096/20250


Training Step: 2918  | total loss: [1m[32m0.36230[0m[0m | time: 16.434s
[2K| Adam | epoch: 010 | loss: 0.36230 - acc: 0.8572 -- iter: 04160/20250


Training Step: 2919  | total loss: [1m[32m0.36492[0m[0m | time: 16.680s
[2K| Adam | epoch: 010 | loss: 0.36492 - acc: 0.8558 -- iter: 04224/20250


Training Step: 2920  | total loss: [1m[32m0.36064[0m[0m | time: 16.924s
[2K| Adam | epoch: 010 | loss: 0.36064 - acc: 0.8578 -- iter: 04288/20250


Training Step: 2921  | total loss: [1m[32m0.35751[0m[0m | time: 17.171s
[2K| Adam | epoch: 010 | loss: 0.35751 - acc: 0.8579 -- iter: 04352/20250


Training Step: 2922  | total loss: [1m[32m0.37015[0m[0m | time: 17.419s
[2K| Adam | epoch: 010 | loss: 0.37015 - acc: 0.8534 -- iter: 04416/20250


Training Step: 2923  | total loss: [1m[32m0.37581[0m[0m | time: 17.670s
[2K| Adam | epoch: 010 | loss: 0.37581 - acc: 0.8493 -- iter: 04480/20250


Training Step: 2924  | total loss: [1m[32m0.36693[0m[0m | time: 17.940s
[2K| Adam | epoch: 010 | loss: 0.36693 - acc: 0.8550 -- iter: 04544/20250


Training Step: 2925  | total loss: [1m[32m0.36696[0m[0m | time: 18.227s
[2K| Adam | epoch: 010 | loss: 0.36696 - acc: 0.8570 -- iter: 04608/20250


Training Step: 2926  | total loss: [1m[32m0.36891[0m[0m | time: 18.474s
[2K| Adam | epoch: 010 | loss: 0.36891 - acc: 0.8541 -- iter: 04672/20250


Training Step: 2927  | total loss: [1m[32m0.35641[0m[0m | time: 18.725s
[2K| Adam | epoch: 010 | loss: 0.35641 - acc: 0.8562 -- iter: 04736/20250


Training Step: 2928  | total loss: [1m[32m0.35169[0m[0m | time: 18.969s
[2K| Adam | epoch: 010 | loss: 0.35169 - acc: 0.8612 -- iter: 04800/20250


Training Step: 2929  | total loss: [1m[32m0.36084[0m[0m | time: 19.211s
[2K| Adam | epoch: 010 | loss: 0.36084 - acc: 0.8595 -- iter: 04864/20250


Training Step: 2930  | total loss: [1m[32m0.35148[0m[0m | time: 19.452s
[2K| Adam | epoch: 010 | loss: 0.35148 - acc: 0.8673 -- iter: 04928/20250


Training Step: 2931  | total loss: [1m[32m0.34821[0m[0m | time: 19.698s
[2K| Adam | epoch: 010 | loss: 0.34821 - acc: 0.8665 -- iter: 04992/20250


Training Step: 2932  | total loss: [1m[32m0.37494[0m[0m | time: 19.940s
[2K| Adam | epoch: 010 | loss: 0.37494 - acc: 0.8548 -- iter: 05056/20250


Training Step: 2933  | total loss: [1m[32m0.36896[0m[0m | time: 20.190s
[2K| Adam | epoch: 010 | loss: 0.36896 - acc: 0.8553 -- iter: 05120/20250


Training Step: 2934  | total loss: [1m[32m0.36260[0m[0m | time: 20.460s
[2K| Adam | epoch: 010 | loss: 0.36260 - acc: 0.8588 -- iter: 05184/20250


Training Step: 2935  | total loss: [1m[32m0.36065[0m[0m | time: 20.718s
[2K| Adam | epoch: 010 | loss: 0.36065 - acc: 0.8573 -- iter: 05248/20250


Training Step: 2936  | total loss: [1m[32m0.35715[0m[0m | time: 21.014s
[2K| Adam | epoch: 010 | loss: 0.35715 - acc: 0.8591 -- iter: 05312/20250


Training Step: 2937  | total loss: [1m[32m0.35273[0m[0m | time: 21.255s
[2K| Adam | epoch: 010 | loss: 0.35273 - acc: 0.8622 -- iter: 05376/20250


Training Step: 2938  | total loss: [1m[32m0.36742[0m[0m | time: 21.506s
[2K| Adam | epoch: 010 | loss: 0.36742 - acc: 0.8541 -- iter: 05440/20250


Training Step: 2939  | total loss: [1m[32m0.37123[0m[0m | time: 21.750s
[2K| Adam | epoch: 010 | loss: 0.37123 - acc: 0.8515 -- iter: 05504/20250


Training Step: 2940  | total loss: [1m[32m0.38209[0m[0m | time: 21.994s
[2K| Adam | epoch: 010 | loss: 0.38209 - acc: 0.8429 -- iter: 05568/20250


Training Step: 2941  | total loss: [1m[32m0.38152[0m[0m | time: 22.254s
[2K| Adam | epoch: 010 | loss: 0.38152 - acc: 0.8461 -- iter: 05632/20250


Training Step: 2942  | total loss: [1m[32m0.38462[0m[0m | time: 22.500s
[2K| Adam | epoch: 010 | loss: 0.38462 - acc: 0.8428 -- iter: 05696/20250


Training Step: 2943  | total loss: [1m[32m0.38320[0m[0m | time: 22.750s
[2K| Adam | epoch: 010 | loss: 0.38320 - acc: 0.8429 -- iter: 05760/20250


Training Step: 2944  | total loss: [1m[32m0.38413[0m[0m | time: 23.015s
[2K| Adam | epoch: 010 | loss: 0.38413 - acc: 0.8383 -- iter: 05824/20250


Training Step: 2945  | total loss: [1m[32m0.39885[0m[0m | time: 23.284s
[2K| Adam | epoch: 010 | loss: 0.39885 - acc: 0.8310 -- iter: 05888/20250


Training Step: 2946  | total loss: [1m[32m0.39789[0m[0m | time: 23.557s
[2K| Adam | epoch: 010 | loss: 0.39789 - acc: 0.8307 -- iter: 05952/20250


Training Step: 2947  | total loss: [1m[32m0.41192[0m[0m | time: 23.805s
[2K| Adam | epoch: 010 | loss: 0.41192 - acc: 0.8273 -- iter: 06016/20250


Training Step: 2948  | total loss: [1m[32m0.40118[0m[0m | time: 24.050s
[2K| Adam | epoch: 010 | loss: 0.40118 - acc: 0.8352 -- iter: 06080/20250


Training Step: 2949  | total loss: [1m[32m0.40846[0m[0m | time: 24.298s
[2K| Adam | epoch: 010 | loss: 0.40846 - acc: 0.8283 -- iter: 06144/20250


Training Step: 2950  | total loss: [1m[32m0.40671[0m[0m | time: 24.540s
[2K| Adam | epoch: 010 | loss: 0.40671 - acc: 0.8283 -- iter: 06208/20250


Training Step: 2951  | total loss: [1m[32m0.39926[0m[0m | time: 24.786s
[2K| Adam | epoch: 010 | loss: 0.39926 - acc: 0.8329 -- iter: 06272/20250


Training Step: 2952  | total loss: [1m[32m0.40006[0m[0m | time: 25.038s
[2K| Adam | epoch: 010 | loss: 0.40006 - acc: 0.8324 -- iter: 06336/20250


Training Step: 2953  | total loss: [1m[32m0.39937[0m[0m | time: 25.285s
[2K| Adam | epoch: 010 | loss: 0.39937 - acc: 0.8320 -- iter: 06400/20250


Training Step: 2954  | total loss: [1m[32m0.39991[0m[0m | time: 25.557s
[2K| Adam | epoch: 010 | loss: 0.39991 - acc: 0.8301 -- iter: 06464/20250


Training Step: 2955  | total loss: [1m[32m0.39291[0m[0m | time: 25.819s
[2K| Adam | epoch: 010 | loss: 0.39291 - acc: 0.8330 -- iter: 06528/20250


Training Step: 2956  | total loss: [1m[32m0.39182[0m[0m | time: 26.091s
[2K| Adam | epoch: 010 | loss: 0.39182 - acc: 0.8341 -- iter: 06592/20250


Training Step: 2957  | total loss: [1m[32m0.39582[0m[0m | time: 26.348s
[2K| Adam | epoch: 010 | loss: 0.39582 - acc: 0.8304 -- iter: 06656/20250


Training Step: 2958  | total loss: [1m[32m0.39965[0m[0m | time: 26.593s
[2K| Adam | epoch: 010 | loss: 0.39965 - acc: 0.8317 -- iter: 06720/20250


Training Step: 2959  | total loss: [1m[32m0.40835[0m[0m | time: 26.843s
[2K| Adam | epoch: 010 | loss: 0.40835 - acc: 0.8282 -- iter: 06784/20250


Training Step: 2960  | total loss: [1m[32m0.39988[0m[0m | time: 27.092s
[2K| Adam | epoch: 010 | loss: 0.39988 - acc: 0.8360 -- iter: 06848/20250


Training Step: 2961  | total loss: [1m[32m0.38754[0m[0m | time: 27.339s
[2K| Adam | epoch: 010 | loss: 0.38754 - acc: 0.8430 -- iter: 06912/20250


Training Step: 2962  | total loss: [1m[32m0.38405[0m[0m | time: 27.587s
[2K| Adam | epoch: 010 | loss: 0.38405 - acc: 0.8447 -- iter: 06976/20250


Training Step: 2963  | total loss: [1m[32m0.37865[0m[0m | time: 27.843s
[2K| Adam | epoch: 010 | loss: 0.37865 - acc: 0.8493 -- iter: 07040/20250


Training Step: 2964  | total loss: [1m[32m0.38214[0m[0m | time: 28.096s
[2K| Adam | epoch: 010 | loss: 0.38214 - acc: 0.8487 -- iter: 07104/20250


Training Step: 2965  | total loss: [1m[32m0.37632[0m[0m | time: 28.359s
[2K| Adam | epoch: 010 | loss: 0.37632 - acc: 0.8545 -- iter: 07168/20250


Training Step: 2966  | total loss: [1m[32m0.36760[0m[0m | time: 28.614s
[2K| Adam | epoch: 010 | loss: 0.36760 - acc: 0.8596 -- iter: 07232/20250


Training Step: 2967  | total loss: [1m[32m0.36699[0m[0m | time: 28.870s
[2K| Adam | epoch: 010 | loss: 0.36699 - acc: 0.8596 -- iter: 07296/20250


Training Step: 2968  | total loss: [1m[32m0.36698[0m[0m | time: 29.151s
[2K| Adam | epoch: 010 | loss: 0.36698 - acc: 0.8612 -- iter: 07360/20250


Training Step: 2969  | total loss: [1m[32m0.36037[0m[0m | time: 29.398s
[2K| Adam | epoch: 010 | loss: 0.36037 - acc: 0.8610 -- iter: 07424/20250


Training Step: 2970  | total loss: [1m[32m0.36188[0m[0m | time: 29.646s
[2K| Adam | epoch: 010 | loss: 0.36188 - acc: 0.8577 -- iter: 07488/20250


Training Step: 2971  | total loss: [1m[32m0.37894[0m[0m | time: 29.899s
[2K| Adam | epoch: 010 | loss: 0.37894 - acc: 0.8485 -- iter: 07552/20250


Training Step: 2972  | total loss: [1m[32m0.38293[0m[0m | time: 30.142s
[2K| Adam | epoch: 010 | loss: 0.38293 - acc: 0.8449 -- iter: 07616/20250


Training Step: 2973  | total loss: [1m[32m0.37371[0m[0m | time: 30.393s
[2K| Adam | epoch: 010 | loss: 0.37371 - acc: 0.8510 -- iter: 07680/20250


Training Step: 2974  | total loss: [1m[32m0.37062[0m[0m | time: 30.640s
[2K| Adam | epoch: 010 | loss: 0.37062 - acc: 0.8503 -- iter: 07744/20250


Training Step: 2975  | total loss: [1m[32m0.36716[0m[0m | time: 30.897s
[2K| Adam | epoch: 010 | loss: 0.36716 - acc: 0.8543 -- iter: 07808/20250


Training Step: 2976  | total loss: [1m[32m0.35277[0m[0m | time: 31.148s
[2K| Adam | epoch: 010 | loss: 0.35277 - acc: 0.8626 -- iter: 07872/20250


Training Step: 2977  | total loss: [1m[32m0.34340[0m[0m | time: 31.412s
[2K| Adam | epoch: 010 | loss: 0.34340 - acc: 0.8686 -- iter: 07936/20250


Training Step: 2978  | total loss: [1m[32m0.35131[0m[0m | time: 31.704s
[2K| Adam | epoch: 010 | loss: 0.35131 - acc: 0.8661 -- iter: 08000/20250


Training Step: 2979  | total loss: [1m[32m0.35960[0m[0m | time: 31.975s
[2K| Adam | epoch: 010 | loss: 0.35960 - acc: 0.8654 -- iter: 08064/20250


Training Step: 2980  | total loss: [1m[32m0.36672[0m[0m | time: 32.224s
[2K| Adam | epoch: 010 | loss: 0.36672 - acc: 0.8632 -- iter: 08128/20250


Training Step: 2981  | total loss: [1m[32m0.36773[0m[0m | time: 32.467s
[2K| Adam | epoch: 010 | loss: 0.36773 - acc: 0.8629 -- iter: 08192/20250


Training Step: 2982  | total loss: [1m[32m0.36334[0m[0m | time: 32.721s
[2K| Adam | epoch: 010 | loss: 0.36334 - acc: 0.8656 -- iter: 08256/20250


Training Step: 2983  | total loss: [1m[32m0.35815[0m[0m | time: 32.969s
[2K| Adam | epoch: 010 | loss: 0.35815 - acc: 0.8697 -- iter: 08320/20250


Training Step: 2984  | total loss: [1m[32m0.34695[0m[0m | time: 33.209s
[2K| Adam | epoch: 010 | loss: 0.34695 - acc: 0.8734 -- iter: 08384/20250


Training Step: 2985  | total loss: [1m[32m0.34823[0m[0m | time: 33.460s
[2K| Adam | epoch: 010 | loss: 0.34823 - acc: 0.8720 -- iter: 08448/20250


Training Step: 2986  | total loss: [1m[32m0.35795[0m[0m | time: 33.710s
[2K| Adam | epoch: 010 | loss: 0.35795 - acc: 0.8660 -- iter: 08512/20250


Training Step: 2987  | total loss: [1m[32m0.36432[0m[0m | time: 33.965s
[2K| Adam | epoch: 010 | loss: 0.36432 - acc: 0.8622 -- iter: 08576/20250


Training Step: 2988  | total loss: [1m[32m0.36442[0m[0m | time: 34.254s
[2K| Adam | epoch: 010 | loss: 0.36442 - acc: 0.8573 -- iter: 08640/20250


Training Step: 2989  | total loss: [1m[32m0.39462[0m[0m | time: 34.501s
[2K| Adam | epoch: 010 | loss: 0.39462 - acc: 0.8403 -- iter: 08704/20250


Training Step: 2990  | total loss: [1m[32m0.40260[0m[0m | time: 34.753s
[2K| Adam | epoch: 010 | loss: 0.40260 - acc: 0.8359 -- iter: 08768/20250


Training Step: 2991  | total loss: [1m[32m0.41120[0m[0m | time: 35.011s
[2K| Adam | epoch: 010 | loss: 0.41120 - acc: 0.8352 -- iter: 08832/20250


Training Step: 2992  | total loss: [1m[32m0.40823[0m[0m | time: 35.261s
[2K| Adam | epoch: 010 | loss: 0.40823 - acc: 0.8329 -- iter: 08896/20250


Training Step: 2993  | total loss: [1m[32m0.41238[0m[0m | time: 35.518s
[2K| Adam | epoch: 010 | loss: 0.41238 - acc: 0.8308 -- iter: 08960/20250


Training Step: 2994  | total loss: [1m[32m0.40343[0m[0m | time: 35.756s
[2K| Adam | epoch: 010 | loss: 0.40343 - acc: 0.8321 -- iter: 09024/20250


Training Step: 2995  | total loss: [1m[32m0.39325[0m[0m | time: 36.004s
[2K| Adam | epoch: 010 | loss: 0.39325 - acc: 0.8317 -- iter: 09088/20250


Training Step: 2996  | total loss: [1m[32m0.38869[0m[0m | time: 36.259s
[2K| Adam | epoch: 010 | loss: 0.38869 - acc: 0.8361 -- iter: 09152/20250


Training Step: 2997  | total loss: [1m[32m0.38102[0m[0m | time: 36.533s
[2K| Adam | epoch: 010 | loss: 0.38102 - acc: 0.8368 -- iter: 09216/20250


Training Step: 2998  | total loss: [1m[32m0.38350[0m[0m | time: 36.835s
[2K| Adam | epoch: 010 | loss: 0.38350 - acc: 0.8344 -- iter: 09280/20250


Training Step: 2999  | total loss: [1m[32m0.38201[0m[0m | time: 37.081s
[2K| Adam | epoch: 010 | loss: 0.38201 - acc: 0.8369 -- iter: 09344/20250


Training Step: 3000  | total loss: [1m[32m0.38406[0m[0m | time: 37.322s
[2K| Adam | epoch: 010 | loss: 0.38406 - acc: 0.8376 -- iter: 09408/20250


Training Step: 3001  | total loss: [1m[32m0.38144[0m[0m | time: 37.574s
[2K| Adam | epoch: 010 | loss: 0.38144 - acc: 0.8398 -- iter: 09472/20250


Training Step: 3002  | total loss: [1m[32m0.38194[0m[0m | time: 37.819s
[2K| Adam | epoch: 010 | loss: 0.38194 - acc: 0.8355 -- iter: 09536/20250


Training Step: 3003  | total loss: [1m[32m0.38200[0m[0m | time: 38.059s
[2K| Adam | epoch: 010 | loss: 0.38200 - acc: 0.8379 -- iter: 09600/20250


Training Step: 3004  | total loss: [1m[32m0.37295[0m[0m | time: 38.305s
[2K| Adam | epoch: 010 | loss: 0.37295 - acc: 0.8463 -- iter: 09664/20250


Training Step: 3005  | total loss: [1m[32m0.36440[0m[0m | time: 38.549s
[2K| Adam | epoch: 010 | loss: 0.36440 - acc: 0.8507 -- iter: 09728/20250


Training Step: 3006  | total loss: [1m[32m0.37508[0m[0m | time: 38.803s
[2K| Adam | epoch: 010 | loss: 0.37508 - acc: 0.8438 -- iter: 09792/20250


Training Step: 3007  | total loss: [1m[32m0.37829[0m[0m | time: 39.064s
[2K| Adam | epoch: 010 | loss: 0.37829 - acc: 0.8453 -- iter: 09856/20250


Training Step: 3008  | total loss: [1m[32m0.36892[0m[0m | time: 39.352s
[2K| Adam | epoch: 010 | loss: 0.36892 - acc: 0.8530 -- iter: 09920/20250


Training Step: 3009  | total loss: [1m[32m0.37993[0m[0m | time: 39.602s
[2K| Adam | epoch: 010 | loss: 0.37993 - acc: 0.8505 -- iter: 09984/20250


Training Step: 3010  | total loss: [1m[32m0.37551[0m[0m | time: 39.850s
[2K| Adam | epoch: 010 | loss: 0.37551 - acc: 0.8529 -- iter: 10048/20250


Training Step: 3011  | total loss: [1m[32m0.36896[0m[0m | time: 40.105s
[2K| Adam | epoch: 010 | loss: 0.36896 - acc: 0.8567 -- iter: 10112/20250


Training Step: 3012  | total loss: [1m[32m0.36190[0m[0m | time: 40.351s
[2K| Adam | epoch: 010 | loss: 0.36190 - acc: 0.8632 -- iter: 10176/20250


Training Step: 3013  | total loss: [1m[32m0.37765[0m[0m | time: 40.595s
[2K| Adam | epoch: 010 | loss: 0.37765 - acc: 0.8550 -- iter: 10240/20250


Training Step: 3014  | total loss: [1m[32m0.36712[0m[0m | time: 40.852s
[2K| Adam | epoch: 010 | loss: 0.36712 - acc: 0.8586 -- iter: 10304/20250


Training Step: 3015  | total loss: [1m[32m0.37902[0m[0m | time: 41.095s
[2K| Adam | epoch: 010 | loss: 0.37902 - acc: 0.8540 -- iter: 10368/20250


Training Step: 3016  | total loss: [1m[32m0.36741[0m[0m | time: 41.361s
[2K| Adam | epoch: 010 | loss: 0.36741 - acc: 0.8592 -- iter: 10432/20250


Training Step: 3017  | total loss: [1m[32m0.36058[0m[0m | time: 41.626s
[2K| Adam | epoch: 010 | loss: 0.36058 - acc: 0.8639 -- iter: 10496/20250


Training Step: 3018  | total loss: [1m[32m0.37049[0m[0m | time: 41.924s
[2K| Adam | epoch: 010 | loss: 0.37049 - acc: 0.8588 -- iter: 10560/20250


Training Step: 3019  | total loss: [1m[32m0.36847[0m[0m | time: 42.174s
[2K| Adam | epoch: 010 | loss: 0.36847 - acc: 0.8604 -- iter: 10624/20250


Training Step: 3020  | total loss: [1m[32m0.38012[0m[0m | time: 42.421s
[2K| Adam | epoch: 010 | loss: 0.38012 - acc: 0.8556 -- iter: 10688/20250


Training Step: 3021  | total loss: [1m[32m0.37088[0m[0m | time: 42.667s
[2K| Adam | epoch: 010 | loss: 0.37088 - acc: 0.8560 -- iter: 10752/20250


Training Step: 3022  | total loss: [1m[32m0.38653[0m[0m | time: 42.915s
[2K| Adam | epoch: 010 | loss: 0.38653 - acc: 0.8469 -- iter: 10816/20250


Training Step: 3023  | total loss: [1m[32m0.38212[0m[0m | time: 43.153s
[2K| Adam | epoch: 010 | loss: 0.38212 - acc: 0.8498 -- iter: 10880/20250


Training Step: 3024  | total loss: [1m[32m0.37374[0m[0m | time: 43.399s
[2K| Adam | epoch: 010 | loss: 0.37374 - acc: 0.8538 -- iter: 10944/20250


Training Step: 3025  | total loss: [1m[32m0.37699[0m[0m | time: 43.645s
[2K| Adam | epoch: 010 | loss: 0.37699 - acc: 0.8466 -- iter: 11008/20250


Training Step: 3026  | total loss: [1m[32m0.37709[0m[0m | time: 43.909s
[2K| Adam | epoch: 010 | loss: 0.37709 - acc: 0.8416 -- iter: 11072/20250


Training Step: 3027  | total loss: [1m[32m0.37624[0m[0m | time: 44.179s
[2K| Adam | epoch: 010 | loss: 0.37624 - acc: 0.8449 -- iter: 11136/20250


Training Step: 3028  | total loss: [1m[32m0.36592[0m[0m | time: 44.456s
[2K| Adam | epoch: 010 | loss: 0.36592 - acc: 0.8542 -- iter: 11200/20250


Training Step: 3029  | total loss: [1m[32m0.36973[0m[0m | time: 44.703s
[2K| Adam | epoch: 010 | loss: 0.36973 - acc: 0.8485 -- iter: 11264/20250


Training Step: 3030  | total loss: [1m[32m0.37462[0m[0m | time: 44.960s
[2K| Adam | epoch: 010 | loss: 0.37462 - acc: 0.8464 -- iter: 11328/20250


Training Step: 3031  | total loss: [1m[32m0.39035[0m[0m | time: 45.202s
[2K| Adam | epoch: 010 | loss: 0.39035 - acc: 0.8352 -- iter: 11392/20250


Training Step: 3032  | total loss: [1m[32m0.39165[0m[0m | time: 45.450s
[2K| Adam | epoch: 010 | loss: 0.39165 - acc: 0.8298 -- iter: 11456/20250


Training Step: 3033  | total loss: [1m[32m0.39101[0m[0m | time: 45.695s
[2K| Adam | epoch: 010 | loss: 0.39101 - acc: 0.8281 -- iter: 11520/20250


Training Step: 3034  | total loss: [1m[32m0.40684[0m[0m | time: 45.946s
[2K| Adam | epoch: 010 | loss: 0.40684 - acc: 0.8125 -- iter: 11584/20250


Training Step: 3035  | total loss: [1m[32m0.40015[0m[0m | time: 46.195s
[2K| Adam | epoch: 010 | loss: 0.40015 - acc: 0.8187 -- iter: 11648/20250


Training Step: 3036  | total loss: [1m[32m0.39878[0m[0m | time: 46.456s
[2K| Adam | epoch: 010 | loss: 0.39878 - acc: 0.8228 -- iter: 11712/20250


Training Step: 3037  | total loss: [1m[32m0.39730[0m[0m | time: 46.717s
[2K| Adam | epoch: 010 | loss: 0.39730 - acc: 0.8249 -- iter: 11776/20250


Training Step: 3038  | total loss: [1m[32m0.39260[0m[0m | time: 47.006s
[2K| Adam | epoch: 010 | loss: 0.39260 - acc: 0.8283 -- iter: 11840/20250


Training Step: 3039  | total loss: [1m[32m0.39144[0m[0m | time: 47.244s
[2K| Adam | epoch: 010 | loss: 0.39144 - acc: 0.8314 -- iter: 11904/20250


Training Step: 3040  | total loss: [1m[32m0.38623[0m[0m | time: 47.489s
[2K| Adam | epoch: 010 | loss: 0.38623 - acc: 0.8358 -- iter: 11968/20250


Training Step: 3041  | total loss: [1m[32m0.38611[0m[0m | time: 47.747s
[2K| Adam | epoch: 010 | loss: 0.38611 - acc: 0.8366 -- iter: 12032/20250


Training Step: 3042  | total loss: [1m[32m0.38065[0m[0m | time: 47.993s
[2K| Adam | epoch: 010 | loss: 0.38065 - acc: 0.8467 -- iter: 12096/20250


Training Step: 3043  | total loss: [1m[32m0.37131[0m[0m | time: 48.240s
[2K| Adam | epoch: 010 | loss: 0.37131 - acc: 0.8558 -- iter: 12160/20250


Training Step: 3044  | total loss: [1m[32m0.36997[0m[0m | time: 48.489s
[2K| Adam | epoch: 010 | loss: 0.36997 - acc: 0.8577 -- iter: 12224/20250


Training Step: 3045  | total loss: [1m[32m0.37011[0m[0m | time: 48.736s
[2K| Adam | epoch: 010 | loss: 0.37011 - acc: 0.8563 -- iter: 12288/20250


Training Step: 3046  | total loss: [1m[32m0.36429[0m[0m | time: 48.998s
[2K| Adam | epoch: 010 | loss: 0.36429 - acc: 0.8613 -- iter: 12352/20250


Training Step: 3047  | total loss: [1m[32m0.36680[0m[0m | time: 49.273s
[2K| Adam | epoch: 010 | loss: 0.36680 - acc: 0.8564 -- iter: 12416/20250


Training Step: 3048  | total loss: [1m[32m0.36306[0m[0m | time: 49.526s
[2K| Adam | epoch: 010 | loss: 0.36306 - acc: 0.8567 -- iter: 12480/20250


Training Step: 3049  | total loss: [1m[32m0.35481[0m[0m | time: 49.779s
[2K| Adam | epoch: 010 | loss: 0.35481 - acc: 0.8632 -- iter: 12544/20250


Training Step: 3050  | total loss: [1m[32m0.35228[0m[0m | time: 50.023s
[2K| Adam | epoch: 010 | loss: 0.35228 - acc: 0.8628 -- iter: 12608/20250


Training Step: 3051  | total loss: [1m[32m0.35531[0m[0m | time: 50.266s
[2K| Adam | epoch: 010 | loss: 0.35531 - acc: 0.8625 -- iter: 12672/20250


Training Step: 3052  | total loss: [1m[32m0.34816[0m[0m | time: 50.517s
[2K| Adam | epoch: 010 | loss: 0.34816 - acc: 0.8637 -- iter: 12736/20250


Training Step: 3053  | total loss: [1m[32m0.34294[0m[0m | time: 50.761s
[2K| Adam | epoch: 010 | loss: 0.34294 - acc: 0.8696 -- iter: 12800/20250


Training Step: 3054  | total loss: [1m[32m0.32726[0m[0m | time: 51.004s
[2K| Adam | epoch: 010 | loss: 0.32726 - acc: 0.8779 -- iter: 12864/20250


Training Step: 3055  | total loss: [1m[32m0.32353[0m[0m | time: 51.252s
[2K| Adam | epoch: 010 | loss: 0.32353 - acc: 0.8792 -- iter: 12928/20250


Training Step: 3056  | total loss: [1m[32m0.32400[0m[0m | time: 51.502s
[2K| Adam | epoch: 010 | loss: 0.32400 - acc: 0.8803 -- iter: 12992/20250


Training Step: 3057  | total loss: [1m[32m0.33197[0m[0m | time: 51.765s
[2K| Adam | epoch: 010 | loss: 0.33197 - acc: 0.8782 -- iter: 13056/20250


Training Step: 3058  | total loss: [1m[32m0.33191[0m[0m | time: 52.048s
[2K| Adam | epoch: 010 | loss: 0.33191 - acc: 0.8763 -- iter: 13120/20250


Training Step: 3059  | total loss: [1m[32m0.34548[0m[0m | time: 52.311s
[2K| Adam | epoch: 010 | loss: 0.34548 - acc: 0.8700 -- iter: 13184/20250


Training Step: 3060  | total loss: [1m[32m0.36745[0m[0m | time: 52.556s
[2K| Adam | epoch: 010 | loss: 0.36745 - acc: 0.8595 -- iter: 13248/20250


Training Step: 3061  | total loss: [1m[32m0.37314[0m[0m | time: 52.803s
[2K| Adam | epoch: 010 | loss: 0.37314 - acc: 0.8533 -- iter: 13312/20250


Training Step: 3062  | total loss: [1m[32m0.35551[0m[0m | time: 53.048s
[2K| Adam | epoch: 010 | loss: 0.35551 - acc: 0.8648 -- iter: 13376/20250


Training Step: 3063  | total loss: [1m[32m0.35598[0m[0m | time: 53.299s
[2K| Adam | epoch: 010 | loss: 0.35598 - acc: 0.8658 -- iter: 13440/20250


Training Step: 3064  | total loss: [1m[32m0.36494[0m[0m | time: 53.546s
[2K| Adam | epoch: 010 | loss: 0.36494 - acc: 0.8589 -- iter: 13504/20250


Training Step: 3065  | total loss: [1m[32m0.36875[0m[0m | time: 53.797s
[2K| Adam | epoch: 010 | loss: 0.36875 - acc: 0.8559 -- iter: 13568/20250


Training Step: 3066  | total loss: [1m[32m0.37054[0m[0m | time: 54.051s
[2K| Adam | epoch: 010 | loss: 0.37054 - acc: 0.8546 -- iter: 13632/20250


Training Step: 3067  | total loss: [1m[32m0.37361[0m[0m | time: 54.322s
[2K| Adam | epoch: 010 | loss: 0.37361 - acc: 0.8520 -- iter: 13696/20250


Training Step: 3068  | total loss: [1m[32m0.36721[0m[0m | time: 54.628s
[2K| Adam | epoch: 010 | loss: 0.36721 - acc: 0.8559 -- iter: 13760/20250


Training Step: 3069  | total loss: [1m[32m0.36782[0m[0m | time: 54.872s
[2K| Adam | epoch: 010 | loss: 0.36782 - acc: 0.8546 -- iter: 13824/20250


Training Step: 3070  | total loss: [1m[32m0.36089[0m[0m | time: 55.118s
[2K| Adam | epoch: 010 | loss: 0.36089 - acc: 0.8629 -- iter: 13888/20250


Training Step: 3071  | total loss: [1m[32m0.35252[0m[0m | time: 55.365s
[2K| Adam | epoch: 010 | loss: 0.35252 - acc: 0.8688 -- iter: 13952/20250


Training Step: 3072  | total loss: [1m[32m0.36074[0m[0m | time: 55.609s
[2K| Adam | epoch: 010 | loss: 0.36074 - acc: 0.8585 -- iter: 14016/20250


Training Step: 3073  | total loss: [1m[32m0.35765[0m[0m | time: 55.858s
[2K| Adam | epoch: 010 | loss: 0.35765 - acc: 0.8602 -- iter: 14080/20250


Training Step: 3074  | total loss: [1m[32m0.36094[0m[0m | time: 56.092s
[2K| Adam | epoch: 010 | loss: 0.36094 - acc: 0.8585 -- iter: 14144/20250


Training Step: 3075  | total loss: [1m[32m0.35831[0m[0m | time: 56.345s
[2K| Adam | epoch: 010 | loss: 0.35831 - acc: 0.8617 -- iter: 14208/20250


Training Step: 3076  | total loss: [1m[32m0.36336[0m[0m | time: 56.604s
[2K| Adam | epoch: 010 | loss: 0.36336 - acc: 0.8584 -- iter: 14272/20250


Training Step: 3077  | total loss: [1m[32m0.36101[0m[0m | time: 56.888s
[2K| Adam | epoch: 010 | loss: 0.36101 - acc: 0.8569 -- iter: 14336/20250


Training Step: 3078  | total loss: [1m[32m0.36230[0m[0m | time: 57.151s
[2K| Adam | epoch: 010 | loss: 0.36230 - acc: 0.8540 -- iter: 14400/20250


Training Step: 3079  | total loss: [1m[32m0.35483[0m[0m | time: 57.397s
[2K| Adam | epoch: 010 | loss: 0.35483 - acc: 0.8546 -- iter: 14464/20250


Training Step: 3080  | total loss: [1m[32m0.34930[0m[0m | time: 57.641s
[2K| Adam | epoch: 010 | loss: 0.34930 - acc: 0.8597 -- iter: 14528/20250


Training Step: 3081  | total loss: [1m[32m0.33488[0m[0m | time: 57.890s
[2K| Adam | epoch: 010 | loss: 0.33488 - acc: 0.8644 -- iter: 14592/20250


Training Step: 3082  | total loss: [1m[32m0.32718[0m[0m | time: 58.134s
[2K| Adam | epoch: 010 | loss: 0.32718 - acc: 0.8670 -- iter: 14656/20250


Training Step: 3083  | total loss: [1m[32m0.32349[0m[0m | time: 58.380s
[2K| Adam | epoch: 010 | loss: 0.32349 - acc: 0.8662 -- iter: 14720/20250


Training Step: 3084  | total loss: [1m[32m0.32451[0m[0m | time: 58.629s
[2K| Adam | epoch: 010 | loss: 0.32451 - acc: 0.8656 -- iter: 14784/20250


Training Step: 3085  | total loss: [1m[32m0.31720[0m[0m | time: 58.873s
[2K| Adam | epoch: 010 | loss: 0.31720 - acc: 0.8712 -- iter: 14848/20250


Training Step: 3086  | total loss: [1m[32m0.31290[0m[0m | time: 59.118s
[2K| Adam | epoch: 010 | loss: 0.31290 - acc: 0.8731 -- iter: 14912/20250


Training Step: 3087  | total loss: [1m[32m0.31591[0m[0m | time: 59.382s
[2K| Adam | epoch: 010 | loss: 0.31591 - acc: 0.8749 -- iter: 14976/20250


Training Step: 3088  | total loss: [1m[32m0.31821[0m[0m | time: 59.688s
[2K| Adam | epoch: 010 | loss: 0.31821 - acc: 0.8686 -- iter: 15040/20250


Training Step: 3089  | total loss: [1m[32m0.31993[0m[0m | time: 59.937s
[2K| Adam | epoch: 010 | loss: 0.31993 - acc: 0.8646 -- iter: 15104/20250


Training Step: 3090  | total loss: [1m[32m0.32847[0m[0m | time: 60.191s
[2K| Adam | epoch: 010 | loss: 0.32847 - acc: 0.8609 -- iter: 15168/20250


Training Step: 3091  | total loss: [1m[32m0.32389[0m[0m | time: 60.433s
[2K| Adam | epoch: 010 | loss: 0.32389 - acc: 0.8639 -- iter: 15232/20250


Training Step: 3092  | total loss: [1m[32m0.32502[0m[0m | time: 60.680s
[2K| Adam | epoch: 010 | loss: 0.32502 - acc: 0.8635 -- iter: 15296/20250


Training Step: 3093  | total loss: [1m[32m0.31678[0m[0m | time: 60.929s
[2K| Adam | epoch: 010 | loss: 0.31678 - acc: 0.8662 -- iter: 15360/20250


Training Step: 3094  | total loss: [1m[32m0.32228[0m[0m | time: 61.171s
[2K| Adam | epoch: 010 | loss: 0.32228 - acc: 0.8624 -- iter: 15424/20250


Training Step: 3095  | total loss: [1m[32m0.31582[0m[0m | time: 61.413s
[2K| Adam | epoch: 010 | loss: 0.31582 - acc: 0.8668 -- iter: 15488/20250


Training Step: 3096  | total loss: [1m[32m0.31442[0m[0m | time: 61.661s
[2K| Adam | epoch: 010 | loss: 0.31442 - acc: 0.8660 -- iter: 15552/20250


Training Step: 3097  | total loss: [1m[32m0.31897[0m[0m | time: 61.931s
[2K| Adam | epoch: 010 | loss: 0.31897 - acc: 0.8638 -- iter: 15616/20250


Training Step: 3098  | total loss: [1m[32m0.31986[0m[0m | time: 62.227s
[2K| Adam | epoch: 010 | loss: 0.31986 - acc: 0.8618 -- iter: 15680/20250


Training Step: 3099  | total loss: [1m[32m0.32173[0m[0m | time: 62.488s
[2K| Adam | epoch: 010 | loss: 0.32173 - acc: 0.8615 -- iter: 15744/20250


Training Step: 3100  | total loss: [1m[32m0.32935[0m[0m | time: 62.741s
[2K| Adam | epoch: 010 | loss: 0.32935 - acc: 0.8598 -- iter: 15808/20250


Training Step: 3101  | total loss: [1m[32m0.32476[0m[0m | time: 62.988s
[2K| Adam | epoch: 010 | loss: 0.32476 - acc: 0.8597 -- iter: 15872/20250


Training Step: 3102  | total loss: [1m[32m0.32085[0m[0m | time: 63.233s
[2K| Adam | epoch: 010 | loss: 0.32085 - acc: 0.8644 -- iter: 15936/20250


Training Step: 3103  | total loss: [1m[32m0.31897[0m[0m | time: 63.481s
[2K| Adam | epoch: 010 | loss: 0.31897 - acc: 0.8639 -- iter: 16000/20250


Training Step: 3104  | total loss: [1m[32m0.31568[0m[0m | time: 63.730s
[2K| Adam | epoch: 010 | loss: 0.31568 - acc: 0.8697 -- iter: 16064/20250


Training Step: 3105  | total loss: [1m[32m0.31062[0m[0m | time: 63.975s
[2K| Adam | epoch: 010 | loss: 0.31062 - acc: 0.8702 -- iter: 16128/20250


Training Step: 3106  | total loss: [1m[32m0.30703[0m[0m | time: 64.221s
[2K| Adam | epoch: 010 | loss: 0.30703 - acc: 0.8738 -- iter: 16192/20250


Training Step: 3107  | total loss: [1m[32m0.31061[0m[0m | time: 64.489s
[2K| Adam | epoch: 010 | loss: 0.31061 - acc: 0.8739 -- iter: 16256/20250


Training Step: 3108  | total loss: [1m[32m0.30923[0m[0m | time: 64.786s
[2K| Adam | epoch: 010 | loss: 0.30923 - acc: 0.8756 -- iter: 16320/20250


Training Step: 3109  | total loss: [1m[32m0.30495[0m[0m | time: 65.039s
[2K| Adam | epoch: 010 | loss: 0.30495 - acc: 0.8802 -- iter: 16384/20250


Training Step: 3110  | total loss: [1m[32m0.32262[0m[0m | time: 65.293s
[2K| Adam | epoch: 010 | loss: 0.32262 - acc: 0.8703 -- iter: 16448/20250


Training Step: 3111  | total loss: [1m[32m0.32632[0m[0m | time: 65.534s
[2K| Adam | epoch: 010 | loss: 0.32632 - acc: 0.8692 -- iter: 16512/20250


Training Step: 3112  | total loss: [1m[32m0.32973[0m[0m | time: 65.782s
[2K| Adam | epoch: 010 | loss: 0.32973 - acc: 0.8683 -- iter: 16576/20250


Training Step: 3113  | total loss: [1m[32m0.32806[0m[0m | time: 66.035s
[2K| Adam | epoch: 010 | loss: 0.32806 - acc: 0.8689 -- iter: 16640/20250


Training Step: 3114  | total loss: [1m[32m0.35014[0m[0m | time: 66.284s
[2K| Adam | epoch: 010 | loss: 0.35014 - acc: 0.8586 -- iter: 16704/20250


Training Step: 3115  | total loss: [1m[32m0.34670[0m[0m | time: 66.537s
[2K| Adam | epoch: 010 | loss: 0.34670 - acc: 0.8602 -- iter: 16768/20250


Training Step: 3116  | total loss: [1m[32m0.34012[0m[0m | time: 66.786s
[2K| Adam | epoch: 010 | loss: 0.34012 - acc: 0.8601 -- iter: 16832/20250


Training Step: 3117  | total loss: [1m[32m0.33803[0m[0m | time: 67.042s
[2K| Adam | epoch: 010 | loss: 0.33803 - acc: 0.8632 -- iter: 16896/20250


Training Step: 3118  | total loss: [1m[32m0.33639[0m[0m | time: 67.304s
[2K| Adam | epoch: 010 | loss: 0.33639 - acc: 0.8644 -- iter: 16960/20250


Training Step: 3119  | total loss: [1m[32m0.33502[0m[0m | time: 67.555s
[2K| Adam | epoch: 010 | loss: 0.33502 - acc: 0.8654 -- iter: 17024/20250


Training Step: 3120  | total loss: [1m[32m0.34341[0m[0m | time: 67.829s
[2K| Adam | epoch: 010 | loss: 0.34341 - acc: 0.8664 -- iter: 17088/20250


Training Step: 3121  | total loss: [1m[32m0.34160[0m[0m | time: 68.071s
[2K| Adam | epoch: 010 | loss: 0.34160 - acc: 0.8641 -- iter: 17152/20250


Training Step: 3122  | total loss: [1m[32m0.33117[0m[0m | time: 68.315s
[2K| Adam | epoch: 010 | loss: 0.33117 - acc: 0.8715 -- iter: 17216/20250


Training Step: 3123  | total loss: [1m[32m0.33305[0m[0m | time: 68.561s
[2K| Adam | epoch: 010 | loss: 0.33305 - acc: 0.8718 -- iter: 17280/20250


Training Step: 3124  | total loss: [1m[32m0.33299[0m[0m | time: 68.810s
[2K| Adam | epoch: 010 | loss: 0.33299 - acc: 0.8721 -- iter: 17344/20250


Training Step: 3125  | total loss: [1m[32m0.32614[0m[0m | time: 69.060s
[2K| Adam | epoch: 010 | loss: 0.32614 - acc: 0.8756 -- iter: 17408/20250


Training Step: 3126  | total loss: [1m[32m0.32638[0m[0m | time: 69.306s
[2K| Adam | epoch: 010 | loss: 0.32638 - acc: 0.8755 -- iter: 17472/20250


Training Step: 3127  | total loss: [1m[32m0.32544[0m[0m | time: 69.561s
[2K| Adam | epoch: 010 | loss: 0.32544 - acc: 0.8754 -- iter: 17536/20250


Training Step: 3128  | total loss: [1m[32m0.31876[0m[0m | time: 69.814s
[2K| Adam | epoch: 010 | loss: 0.31876 - acc: 0.8785 -- iter: 17600/20250


Training Step: 3129  | total loss: [1m[32m0.32131[0m[0m | time: 70.078s
[2K| Adam | epoch: 010 | loss: 0.32131 - acc: 0.8766 -- iter: 17664/20250


Training Step: 3130  | total loss: [1m[32m0.32966[0m[0m | time: 70.324s
[2K| Adam | epoch: 010 | loss: 0.32966 - acc: 0.8749 -- iter: 17728/20250


Training Step: 3131  | total loss: [1m[32m0.33362[0m[0m | time: 70.609s
[2K| Adam | epoch: 010 | loss: 0.33362 - acc: 0.8765 -- iter: 17792/20250


Training Step: 3132  | total loss: [1m[32m0.32344[0m[0m | time: 70.857s
[2K| Adam | epoch: 010 | loss: 0.32344 - acc: 0.8794 -- iter: 17856/20250


Training Step: 3133  | total loss: [1m[32m0.32101[0m[0m | time: 71.108s
[2K| Adam | epoch: 010 | loss: 0.32101 - acc: 0.8821 -- iter: 17920/20250


Training Step: 3134  | total loss: [1m[32m0.31311[0m[0m | time: 71.354s
[2K| Adam | epoch: 010 | loss: 0.31311 - acc: 0.8877 -- iter: 17984/20250


Training Step: 3135  | total loss: [1m[32m0.31458[0m[0m | time: 71.599s
[2K| Adam | epoch: 010 | loss: 0.31458 - acc: 0.8880 -- iter: 18048/20250


Training Step: 3136  | total loss: [1m[32m0.32803[0m[0m | time: 71.852s
[2K| Adam | epoch: 010 | loss: 0.32803 - acc: 0.8851 -- iter: 18112/20250


Training Step: 3137  | total loss: [1m[32m0.32645[0m[0m | time: 72.103s
[2K| Adam | epoch: 010 | loss: 0.32645 - acc: 0.8857 -- iter: 18176/20250


Training Step: 3138  | total loss: [1m[32m0.32422[0m[0m | time: 72.352s
[2K| Adam | epoch: 010 | loss: 0.32422 - acc: 0.8815 -- iter: 18240/20250


Training Step: 3139  | total loss: [1m[32m0.33132[0m[0m | time: 72.606s
[2K| Adam | epoch: 010 | loss: 0.33132 - acc: 0.8714 -- iter: 18304/20250


Training Step: 3140  | total loss: [1m[32m0.32213[0m[0m | time: 72.881s
[2K| Adam | epoch: 010 | loss: 0.32213 - acc: 0.8765 -- iter: 18368/20250


Training Step: 3141  | total loss: [1m[32m0.33124[0m[0m | time: 73.181s
[2K| Adam | epoch: 010 | loss: 0.33124 - acc: 0.8701 -- iter: 18432/20250


Training Step: 3142  | total loss: [1m[32m0.32479[0m[0m | time: 73.432s
[2K| Adam | epoch: 010 | loss: 0.32479 - acc: 0.8706 -- iter: 18496/20250


Training Step: 3143  | total loss: [1m[32m0.32664[0m[0m | time: 73.673s
[2K| Adam | epoch: 010 | loss: 0.32664 - acc: 0.8679 -- iter: 18560/20250


Training Step: 3144  | total loss: [1m[32m0.32808[0m[0m | time: 73.918s
[2K| Adam | epoch: 010 | loss: 0.32808 - acc: 0.8639 -- iter: 18624/20250


Training Step: 3145  | total loss: [1m[32m0.34461[0m[0m | time: 74.181s
[2K| Adam | epoch: 010 | loss: 0.34461 - acc: 0.8510 -- iter: 18688/20250


Training Step: 3146  | total loss: [1m[32m0.33846[0m[0m | time: 74.447s
[2K| Adam | epoch: 010 | loss: 0.33846 - acc: 0.8549 -- iter: 18752/20250


Training Step: 3147  | total loss: [1m[32m0.33272[0m[0m | time: 74.702s
[2K| Adam | epoch: 010 | loss: 0.33272 - acc: 0.8601 -- iter: 18816/20250


Training Step: 3148  | total loss: [1m[32m0.32626[0m[0m | time: 74.953s
[2K| Adam | epoch: 010 | loss: 0.32626 - acc: 0.8631 -- iter: 18880/20250


Training Step: 3149  | total loss: [1m[32m0.32304[0m[0m | time: 75.237s
[2K| Adam | epoch: 010 | loss: 0.32304 - acc: 0.8627 -- iter: 18944/20250


Training Step: 3150  | total loss: [1m[32m0.31148[0m[0m | time: 75.550s
[2K| Adam | epoch: 010 | loss: 0.31148 - acc: 0.8702 -- iter: 19008/20250


Training Step: 3151  | total loss: [1m[32m0.30813[0m[0m | time: 75.804s
[2K| Adam | epoch: 010 | loss: 0.30813 - acc: 0.8723 -- iter: 19072/20250


Training Step: 3152  | total loss: [1m[32m0.30884[0m[0m | time: 76.055s
[2K| Adam | epoch: 010 | loss: 0.30884 - acc: 0.8757 -- iter: 19136/20250


Training Step: 3153  | total loss: [1m[32m0.31665[0m[0m | time: 76.332s
[2K| Adam | epoch: 010 | loss: 0.31665 - acc: 0.8709 -- iter: 19200/20250


Training Step: 3154  | total loss: [1m[32m0.31056[0m[0m | time: 76.590s
[2K| Adam | epoch: 010 | loss: 0.31056 - acc: 0.8760 -- iter: 19264/20250


Training Step: 3155  | total loss: [1m[32m0.30434[0m[0m | time: 76.846s
[2K| Adam | epoch: 010 | loss: 0.30434 - acc: 0.8759 -- iter: 19328/20250


Training Step: 3156  | total loss: [1m[32m0.30051[0m[0m | time: 77.108s
[2K| Adam | epoch: 010 | loss: 0.30051 - acc: 0.8789 -- iter: 19392/20250


Training Step: 3157  | total loss: [1m[32m0.28575[0m[0m | time: 77.379s
[2K| Adam | epoch: 010 | loss: 0.28575 - acc: 0.8879 -- iter: 19456/20250


Training Step: 3158  | total loss: [1m[32m0.28627[0m[0m | time: 77.628s
[2K| Adam | epoch: 010 | loss: 0.28627 - acc: 0.8851 -- iter: 19520/20250


Training Step: 3159  | total loss: [1m[32m0.28543[0m[0m | time: 77.889s
[2K| Adam | epoch: 010 | loss: 0.28543 - acc: 0.8872 -- iter: 19584/20250


Training Step: 3160  | total loss: [1m[32m0.27852[0m[0m | time: 78.172s
[2K| Adam | epoch: 010 | loss: 0.27852 - acc: 0.8875 -- iter: 19648/20250


Training Step: 3161  | total loss: [1m[32m0.27837[0m[0m | time: 78.426s
[2K| Adam | epoch: 010 | loss: 0.27837 - acc: 0.8894 -- iter: 19712/20250


Training Step: 3162  | total loss: [1m[32m0.27706[0m[0m | time: 78.674s
[2K| Adam | epoch: 010 | loss: 0.27706 - acc: 0.8926 -- iter: 19776/20250


Training Step: 3163  | total loss: [1m[32m0.26401[0m[0m | time: 78.924s
[2K| Adam | epoch: 010 | loss: 0.26401 - acc: 0.9003 -- iter: 19840/20250


Training Step: 3164  | total loss: [1m[32m0.28126[0m[0m | time: 79.173s
[2K| Adam | epoch: 010 | loss: 0.28126 - acc: 0.8930 -- iter: 19904/20250


Training Step: 3165  | total loss: [1m[32m0.28802[0m[0m | time: 79.418s
[2K| Adam | epoch: 010 | loss: 0.28802 - acc: 0.8881 -- iter: 19968/20250


Training Step: 3166  | total loss: [1m[32m0.28847[0m[0m | time: 79.661s
[2K| Adam | epoch: 010 | loss: 0.28847 - acc: 0.8868 -- iter: 20032/20250


Training Step: 3167  | total loss: [1m[32m0.27383[0m[0m | time: 79.909s
[2K| Adam | epoch: 010 | loss: 0.27383 - acc: 0.8919 -- iter: 20096/20250


Training Step: 3168  | total loss: [1m[32m0.28477[0m[0m | time: 80.159s
[2K| Adam | epoch: 010 | loss: 0.28477 - acc: 0.8824 -- iter: 20160/20250


Training Step: 3169  | total loss: [1m[32m0.27872[0m[0m | time: 80.408s
[2K| Adam | epoch: 010 | loss: 0.27872 - acc: 0.8879 -- iter: 20224/20250


Training Step: 3170  | total loss: [1m[32m0.27227[0m[0m | time: 82.385s
[2K| Adam | epoch: 010 | loss: 0.27227 - acc: 0.8897 | val_loss: 0.50984 - val_acc: 0.7969 -- iter: 20250/20250
--
