### 多级推理模块

0. 由于没有self-attention pooling了，所以再加一层self-attention层
1. 每次更新段落的Summary vectors 
    input: [batch_sise, para_num, para_len, dim]
    query: [batch_size, dim]
    
2. expand -> view -> biSeqAtt -> sum
3. ori san

In [1]:
import os
import torch
import torch.nn as nn
import torchtext
from tensorboardX import SummaryWriter
import random
import numpy as np

from torchtext.data import NestedField, Field, RawField
from model import *
from dataset import DataHandler
%load_ext autoreload

%autoreload 2

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
class Config:
    def __init__(self):
        self.hidden = 50
        self.embedding_dim = 300
        self.lr = 5e-4
        self.epochs = 50
        self.fix_length = None
        
        self.log_dir = './logs'
        self.model_name = 'h_reason'
        self.batch_size = 4
        self.train_data = './data/train_filter.pt'
        self.dev_data = './data/dev_filter.pt'
        
        self.word_vocab = './data/glove_vocab.pt'
        self.charNGram_vocab = './data/charNGram_vocab.pt'
        #self.word_vocab = None
        #self.charNGram_vocab = None
        
        self.dropout = 0.2
        self.seed = 1023
        self.steps = 2
        self.memory_type = 0
        
config = Config()
device = torch.device("cuda:0")


In [3]:
torch.cuda.is_available()

True

In [4]:
random.seed(config.seed)
np.random.seed(config.seed)
torch.manual_seed(config.seed)
torch.cuda.manual_seed_all(config.seed)

In [5]:
save_path = config.model_name  + '_lr_'+ str(config.lr)+ '__hidden__' + str(config.hidden) \
            + '_batchsize_' + str(config.batch_size) +  '_p'+ str(config.dropout)+'_steps_'+str(config.steps)+'memory_type_' \
            + str(config.memory_type)
save_path = os.path.join(config.log_dir, save_path)   
print(save_path)
config.save_path = save_path

./logs/h_reason_lr_0.0005__hidden__50_batchsize_4_p0.2_steps_1memory_type_0


### Define Fileds

In [6]:
word_field = Field(batch_first=True, sequential=True, tokenize="spacy", lower=True) # query
multi_word_field = NestedField(word_field) 

word_field_sup = Field(batch_first=True, sequential=True, tokenize="spacy", lower=True, fix_length=config.fix_length)
multi_word_field_sup = NestedField(word_field_sup) 

charNGram_field = Field(batch_first=True, sequential=True, tokenize="spacy", lower=True) # query
multi_charNGram_field = NestedField(charNGram_field) 

charNGram_field_sup = Field(batch_first=True, sequential=True, tokenize="spacy", lower=True, fix_length=config.fix_length)
multi_charNGram_field_sup = NestedField(charNGram_field_sup) 

raw = RawField()
raw.is_target = False

label_field = Field(sequential=False, is_target=True, use_vocab=False)

dict_field = {
    'id': ('id', raw),
    'supports': [('s_glove', multi_word_field_sup), ('s_charNGram', multi_charNGram_field_sup)],
    'query': [('q_glove', word_field), ('q_charNGram', charNGram_field)],
    'candidates': [('c_glove', multi_word_field), ('c_charNGram', multi_charNGram_field)],
    'label': ('label', label_field),
    'mentions': ('mentions', raw),
    'para_label': ('para_label', raw)
}

In [7]:
data_handler = DataHandler(config.train_data, config.dev_data, dict_field)

# torch.save(data_handler.trainset.examples, './data/train_example.pt')
# torch.save(data_handler.valset.examples, './data/dev_example.pt')

load examples.pt  :./data/train_filter.pt, ./data/dev_filter.pt


### Build Vocab

In [8]:
if config.charNGram_vocab is not None:
    charNGram_vocab = torch.load(config.charNGram_vocab)
    multi_charNGram_field_sup.vocab = charNGram_vocab
    charNGram_field_sup.vocab = charNGram_vocab
else:
    multi_charNGram_field_sup.build_vocab(data_handler.trainset, data_handler.valset, 
                                          vectors=torchtext.vocab.CharNGram())

if config.word_vocab is not None:
    word_vocab = torch.load(config.word_vocab)
    multi_word_field_sup.vocab = word_vocab
    word_field_sup.vocab = word_vocab
else:
    multi_word_field_sup.build_vocab(data_handler.trainset, data_handler.valset, 
                                 vectors=torchtext.vocab.GloVe(dim=300,name='6B') )

word_field.vocab = multi_word_field_sup.vocab
charNGram_field.vocab = multi_charNGram_field_sup.vocab



In [None]:
print(multi_word_field_sup.vocab.vectors.shape,multi_charNGram_field_sup.vocab.vectors.shape )

torch.Size([312667, 300]) torch.Size([312667, 100])


### Get data_iter

In [None]:
train_iter = data_handler.get_train_iter(batch_size=config.batch_size)
val_iter = data_handler.get_val_iter(batch_size=config.batch_size)

In [None]:
for idx, batch in enumerate(val_iter):
    break
batch


[torchtext.data.batch.Batch of size 4]
	[.id]:['WH_dev_0', 'WH_dev_1', 'WH_dev_2', 'WH_dev_3']
	[.s_glove]:[torch.LongTensor of size 4x15x292]
	[.s_charNGram]:[torch.LongTensor of size 4x15x292]
	[.q_glove]:[torch.LongTensor of size 4x11]
	[.q_charNGram]:[torch.LongTensor of size 4x11]
	[.c_glove]:[torch.LongTensor of size 4x18x4]
	[.c_charNGram]:[torch.LongTensor of size 4x18x4]
	[.label]:[torch.LongTensor of size 4]
	[.mentions]:[[[[6, 145, 146], [6, 173, 174], [7, 78, 79]], [[3, 50, 53], [5, 28, 31], [13, 1, 4]], [[6, 135, 136], [6, 218, 219], [6, 261, 262], [8, 45, 46], [12, 98, 99]], [[0, 2, 4], [7, 1, 3], [13, 64, 66], [13, 69, 71]], [[0, 36, 38], [10, 1, 3], [13, 75, 77]], [[0, 14, 15], [1, 63, 64], [1, 138, 139], [1, 186, 187], [1, 238, 239], [2, 128, 129], [9, 8, 9], [9, 43, 44], [10, 19, 20], [10, 34, 35], [11, 37, 38], [11, 79, 80], [13, 56, 57]], [[7, 37, 40]], [[6, 180, 181], [12, 101, 102]], [[12, 96, 97]], [[8, 43, 46]], [[6, 169, 172]], [[6, 179, 181]], [[7, 38, 40]], 

### Define Model

In [None]:
def generate_mask(x_size, num_turn, dropout_p=0.0, is_training=False):
    if not is_training: dropout_p = 0.0
    new_data = torch.zeros(x_size, num_turn)
    new_data = (1-dropout_p) * (new_data.zero_() + 1)
    for i in range(new_data.size(0)):
        one = random.randint(0, new_data.size(1)-1)
        new_data[i][one] = 1
    mask = 1.0/(1 - dropout_p) * torch.bernoulli(new_data)
    mask.requires_grad = False
    return mask

class SAN(nn.Module):
    def __init__(self, question_dim, support_dim, candidate_dim, num_turn=5, dropout=0.2, memo_dropout=0.4, memory_type=0, device=None):
        super(SAN,self).__init__()
        self.qp_bilinear_attention_word = BilinearSeqAttn(support_dim, question_dim, dropout=dropout)
        self.qp_bilinear_attention_para = BilinearSeqAttn(support_dim, question_dim, dropout=dropout)

        self.candidates_scorer = BilinearSeqAttn(candidate_dim, question_dim, dropout=dropout)        
        self.gru = nn.GRUCell(support_dim, question_dim)
        
        self.num_turn = num_turn
        
        self.dropout = nn.Dropout(p=dropout)
        self.memo_dropout=memo_dropout
        self.device = device
        self.memory_type = memory_type
        
    def forward(self, question_embedding, para_embedding, candidates_embedding, para_length):
        '''
        input:
            question_embedding: [batch_size, hidden_dim]
            para_embedding: [batch_size*para_num, para_length, hidden_dim]
            candidates_embedding: [batch_size, candidates_num, hidden_dim]

        '''
        score_list = []
        batch_size = question_embedding.size(0)
        hidden = question_embedding.size(1)        
        for turn in range(self.num_turn):
            question_embedding_expand = question_embedding.unsqueeze(1).expand(batch_size, para_length, hidden).contiguous()
            question_embedding_expand = question_embedding_expand.view(-1,hidden)    
            
            # update paragraph embedding
            qp_score_word = self.qp_bilinear_attention_word(para_embedding, question_embedding_expand)
            qp_score_word = F.softmax(qp_score_word, 1)
            para_embedding_summary = torch.bmm(qp_score_word.unsqueeze(1), para_embedding).squeeze(1)
            para_embedding_summary = para_embedding_summary.contiguous().view(batch_size, para_length, hidden)
            
            # update question embedding
            qp_score_para = self.qp_bilinear_attention_para(para_embedding_summary, question_embedding)
            qp_score_para = F.softmax(qp_score_para, 1)
            S = torch.bmm(qp_score_para.unsqueeze(1), para_embedding_summary).squeeze(1)
            
            S = self.dropout(question_embedding)
            question_embedding = self.gru(S, question_embedding)
            
            # compute candidates score
            candidates_score = self.candidates_scorer(candidates_embedding, question_embedding)

            score_list.append(candidates_score)
        if self.memory_type == 0:
            mask = generate_mask(batch_size,self.num_turn, self.memo_dropout, self.training)
            mask = mask.to(self.device)
            mask = [m.contiguous() for m in torch.unbind(mask, 1)]

            score_list = [mask[idx].view(batch_size, 1).expand_as(inp) * inp for idx, inp in enumerate(score_list)]
            scores = torch.stack(score_list, 2)
            scores = torch.mean(scores, 2)
        elif self.memory_type == 1:
            scores = torch.stack(score_list, 2)
            scores = torch.mean(scores, 2)
        elif self.memory_type == 2:
            scores = score_list[-1]
            
        return scores
    
    

In [None]:
class SimpleQANet(nn.Module):
    
    def __init__(self, config, word_vectors, charNGram_vectors, device):
        super(SimpleQANet, self).__init__()
        self.config = config
        self.device = device
        
        self.embedding_layer = EmbeddingLayer(word_vectors, charNGram_vectors)
        

        self.rnn = EncoderRNN(config.embedding_dim, config.hidden, 1, True, True, config.dropout, False)
                
            
        self.co_att = CoAttention(config.hidden*2, att_type=2, dropout=config.dropout)
        
        self.linear_1 = nn.Sequential(
                        nn.Linear(config.hidden*4, config.hidden),
                        nn.ReLU()
                    )        
        self.rnn2 =  EncoderRNN(config.hidden, config.hidden, 1, True, True, config.dropout, False)
        
        self.word_att = SelfAttention(config.hidden*2, config.hidden*2, config.dropout)
        self.word_att_q = SelfAttention(config.hidden*2, config.hidden*2, config.dropout)
        
        self.pass_att = SelfAttention(config.hidden*2, config.hidden*2, config.dropout)
        
        self.c_att = SelfAttention(config.hidden*2, config.hidden*2, config.dropout)
        
        self.mention_att = SelfAttention(config.hidden*2, config.hidden*2, config.dropout)
        
        
        #self.fusion = FusionLayer(config.hidden*2, dropout=config.dropout)
        self.max_pooling = PoolingLayer()     
        
        self.fc = nn.Linear(config.hidden*2, config.hidden*4)
        self.san = SAN(config.hidden*2,config.hidden*2,config.hidden*4, num_turn=config.steps, memory_type=config.memory_type, device=device)
        
        self.to(device)
        
    def forward(self, batch, return_label = True):
        if type(batch.q_glove) is tuple:
            q_glove, _ = batch.q_glove
            q_charNGram, _ = batch.q_charNGram
        else:
            q_glove = batch.q_glove
            q_charNGram = batch.q_charNGram            
        
        s_glove = batch.s_glove
        s_charNGram = batch.s_charNGram
        
        c_glove = batch.c_glove
        c_charNGram = batch.c_charNGram
        
        q_glove = q_glove.to(self.device)
        q_charNGram = q_charNGram.to(self.device)

        s_glove = s_glove.to(self.device)
        s_charNGram = s_charNGram.to(self.device)

        c_glove = c_glove.to(self.device)
        c_charNGram = c_charNGram.to(self.device)
        
        
        q_out = self.embedding_layer(q_glove) # [batch_size,qeustion_length, hidden_dim]
        s_out = self.embedding_layer(s_glove) # [batch_szie, support_num, support_length, hidden_dim]
        c_out = self.embedding_layer(c_glove) # [batch_size, candidates_num, candidates_length, hidden_dim]        
        
        batch_size=  s_out.size(0)
        
        s_len = s_out.size(1)
        c_len = c_out.size(1)
        
        s_word_len = s_out.size(2)
        c_word_len = c_out.size(2)
        
        hidden = s_out.size(-1)
        
        s_out = s_out.view(batch_size*s_len, s_word_len, hidden).contiguous()
        c_out = c_out.view(batch_size*c_len, c_word_len, hidden).contiguous()
        
        q_out = self.rnn(q_out) # [batch_size,qeustion_length, hidden_dim]
        c_out = self.rnn(c_out) # [batch_szie * support_num, support_length, hidden_dim]
        s_out = self.rnn(s_out) # [batch_size * candidates_num, candidates_length, hidden_dim] 
        
        # Attention
        
        q_word_len = q_out.size(1)
        q_out_expand = q_out.unsqueeze(1).expand(batch_size, s_len, q_word_len, q_out.size(-1)).contiguous()
        q_out_expand = q_out_expand.view(batch_size*s_len, q_word_len, q_out.size(-1)).contiguous()
        
        s_out_att, q_out_att = self.co_att(s_out, q_out_expand)
        #S_s = self.fusion(s_out, s_out_att)
        #S_q = self.fusion(q_out, q_out_att)
        
        S_s = self.linear_1(s_out_att)
        S_s = self.rnn2(S_s) # [batch_size * para_num, para_length, hidden]
        
        
        
        batch_c_m = []
        for i in range(batch_size):
            # get mention embedding
            mentions = batch.mentions[i]
            c_ms = torch.zeros(c_len, s_len, s_out.size(-1))
            for idx, c_mention in enumerate(mentions):
                c_m_dict = {}
                for mention in c_mention:
                    m = s_out[i*s_len + mention[0]][mention[1]:mention[2]]
                    m = self.max_pooling(m.unsqueeze(0)).squeeze()
                    if mention[0] not in c_m_dict:
                        c_m_dict[mention[0]] = []
                    c_m_dict[mention[0]].append(m)
                c_m = torch.zeros(s_len, s_out.size(-1))
                for key in c_m_dict:
                    for m in c_m_dict[key]:
                        c_m[key] += m.cpu()
                    c_m[key] /= len(c_m_dict[key])
                c_ms[idx] = c_m
            batch_c_m.append(c_ms)
        batch_c_m = torch.stack(batch_c_m)
        batch_c_m = batch_c_m.to(self.device)
        batch_c_m = batch_c_m.view(batch_size*c_len, s_len, -1)
        batch_c_m = self.mention_att(batch_c_m)
        batch_c_m = batch_c_m.view(batch_size, c_len, -1)
        

        
        question_summary = self.word_att_q(q_out)
        
        
        candidates_summary = self.c_att(c_out)        
        candidates_summary = candidates_summary.view(batch_size, c_len, -1)
        
        candidates_summary = torch.cat([candidates_summary, batch_c_m],-1)
        
        
        
        score = self.san(question_summary, S_s, candidates_summary, s_len)
        
        if return_label:
            label = batch.label.to(self.device)
            return score, label
        return score

#### test model

In [None]:
model = SimpleQANet(config, word_field.vocab.vectors, charNGram_field.vocab.vectors, device)
#score, label= model(batch)
#Eprint(score.shape, label.shape)

In [None]:
from tqdm import tqdm, trange

In [None]:
from utils import AverageMeter

def train(epoch, data_iter, model, criterion, optimizer, batch_size=1):
    losses = AverageMeter()
    acces = AverageMeter()
    model.train()
    #model.embedding_layer.eval()
    with trange(len(data_iter)) as t:
        for idx, batch in enumerate(data_iter):
            score, label, = model(batch)

            loss = criterion(score, label)

            loss = loss / batch_size
            loss.backward()
            if (idx+1)%batch_size == 0 :
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)            
                optimizer.step()
                optimizer.zero_grad()        

            losses.update(loss.item()*batch_size)

            pred = score.argmax(1)
            acc = pred.eq(label).sum().item()  / pred.size(0)
            acces.update(acc)
            
            matrix = {
                'acc':acces.avg,
                'epoch':epoch,
                'loss': losses.avg
            }
            t.set_postfix(matrix)
            t.update()
            if (idx+1) % (batch_size*100) == 0:
                print(f'epoch:{epoch}, idx:{idx}/{len(data_iter)}, loss:{losses.avg}, acc:{acces.avg}')
    return losses.avg, acces.avg

def val(epoch, data_iter, model, criterion):
    losses = AverageMeter()
    acces = AverageMeter()
    model.eval()
    for idx, batch in enumerate(data_iter):
        with torch.no_grad():
            score, label = model(batch)
                    
        loss = criterion(score, label)

        losses.update(loss.item())
        
        pred = score.argmax(1)
        acc = pred.eq(label).sum().item()  / pred.size(0)
        acces.update(acc)
        if idx % 100 == 0:
            print(f'epoch:{epoch}, idx:{idx}/{len(data_iter)}, loss:{losses.avg}, acc:{acces.avg}')
    return losses.avg, acces.avg

In [None]:
optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()),
                             lr=config.lr)

criterion = nn.CrossEntropyLoss()

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.epochs)
#train(0, train_iter, model, criterion, optimizer, batch_size=config.batch_size)
# val(0, val_iter, model,criterion)

In [None]:
cycle_len = 1
cycle_iter = 50

In [None]:
if not os.path.exists(config.save_path):
    os.makedirs(config.save_path)
writer = SummaryWriter(config.save_path)

best_acc = 0.0
for i in range(cycle_len):
    optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()),
                             lr=config.lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=cycle_iter)
    for epoch in range(cycle_iter):
        scheduler.step()
        train_loss, train_acc = train(epoch, train_iter, model, criterion, optimizer, 1)
        val_loss, val_acc = val(epoch, val_iter, model, criterion)
        global_epoch = cycle_iter * i + epoch + 1
        writer.add_scalar('train_loss', train_loss, global_epoch)
        writer.add_scalar('val_loss', val_loss, global_epoch)
        writer.add_scalar('train_acc', train_acc, global_epoch)
        writer.add_scalar('val_acc', val_acc, global_epoch)

        state = {
            'val_acc': val_acc,
            'train_acc': train_acc,
            'epoch': epoch,
            'model': model.state_dict()
        }
        torch.save(state, os.path.join(config.save_path,'lastest.pth'))
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(state, os.path.join(save_path, f'best_epoch{epoch}.pth'))

  alphas = self.softmax(alphas)  # (bsz, sent_len)
  1%|          | 100/10845 [00:57<1:26:42,  2.07it/s, acc=0.198, epoch=0, loss=3.15]

epoch:0, idx:99/10845, loss:3.153954541683197, acc:0.1975


  2%|▏         | 200/10845 [01:51<1:25:28,  2.08it/s, acc=0.249, epoch=0, loss=2.91]

epoch:0, idx:199/10845, loss:2.908936183154583, acc:0.24875


  3%|▎         | 300/10845 [02:48<1:29:29,  1.96it/s, acc=0.281, epoch=0, loss=2.74]

epoch:0, idx:299/10845, loss:2.7417222036918005, acc:0.2808333333333333


  4%|▎         | 400/10845 [03:40<1:08:06,  2.56it/s, acc=0.293, epoch=0, loss=2.63]

epoch:0, idx:399/10845, loss:2.63254357278347, acc:0.293125


  5%|▍         | 500/10845 [04:36<1:39:23,  1.73it/s, acc=0.298, epoch=0, loss=2.56]

epoch:0, idx:499/10845, loss:2.556354758620262, acc:0.2985


  6%|▌         | 600/10845 [05:28<1:05:53,  2.59it/s, acc=0.312, epoch=0, loss=2.48]

epoch:0, idx:599/10845, loss:2.4806740988294282, acc:0.3125


  6%|▋         | 700/10845 [06:27<1:42:01,  1.66it/s, acc=0.318, epoch=0, loss=2.45]

epoch:0, idx:699/10845, loss:2.446483498896871, acc:0.3175


  7%|▋         | 800/10845 [07:24<1:40:45,  1.66it/s, acc=0.323, epoch=0, loss=2.39]

epoch:0, idx:799/10845, loss:2.393891591802239, acc:0.323125


  8%|▊         | 900/10845 [08:23<1:46:23,  1.56it/s, acc=0.336, epoch=0, loss=2.33]

epoch:0, idx:899/10845, loss:2.3316517654392452, acc:0.3363888888888889


  9%|▉         | 1000/10845 [09:19<1:26:53,  1.89it/s, acc=0.341, epoch=0, loss=2.29]

epoch:0, idx:999/10845, loss:2.292008549571037, acc:0.34125


 10%|█         | 1100/10845 [10:12<1:41:05,  1.61it/s, acc=0.345, epoch=0, loss=2.27]

epoch:0, idx:1099/10845, loss:2.2715190655534916, acc:0.3452272727272727


 11%|█         | 1200/10845 [11:08<1:31:42,  1.75it/s, acc=0.348, epoch=0, loss=2.25]

epoch:0, idx:1199/10845, loss:2.253607843716939, acc:0.34791666666666665


 12%|█▏        | 1300/10845 [12:00<1:48:44,  1.46it/s, acc=0.352, epoch=0, loss=2.23]

epoch:0, idx:1299/10845, loss:2.2251319348812104, acc:0.3525


 13%|█▎        | 1400/10845 [12:55<1:15:20,  2.09it/s, acc=0.36, epoch=0, loss=2.2]  

epoch:0, idx:1399/10845, loss:2.200201819028173, acc:0.36


 14%|█▍        | 1500/10845 [13:53<1:38:05,  1.59it/s, acc=0.365, epoch=0, loss=2.18]

epoch:0, idx:1499/10845, loss:2.1780136115550994, acc:0.36516666666666664


 15%|█▍        | 1600/10845 [14:51<1:33:49,  1.64it/s, acc=0.369, epoch=0, loss=2.16]

epoch:0, idx:1599/10845, loss:2.1598397232219577, acc:0.36859375


 16%|█▌        | 1700/10845 [15:51<1:52:39,  1.35it/s, acc=0.37, epoch=0, loss=2.16] 

epoch:0, idx:1699/10845, loss:2.1551819403381907, acc:0.3695588235294118


 17%|█▋        | 1800/10845 [16:45<1:13:34,  2.05it/s, acc=0.372, epoch=0, loss=2.14]

epoch:0, idx:1799/10845, loss:2.139134472310543, acc:0.3725


 18%|█▊        | 1900/10845 [17:41<1:08:13,  2.19it/s, acc=0.375, epoch=0, loss=2.13]

epoch:0, idx:1899/10845, loss:2.1260014254168462, acc:0.375


 18%|█▊        | 2000/10845 [18:38<1:20:10,  1.84it/s, acc=0.377, epoch=0, loss=2.11]

epoch:0, idx:1999/10845, loss:2.114635533928871, acc:0.377


 19%|█▉        | 2100/10845 [19:33<1:12:49,  2.00it/s, acc=0.38, epoch=0, loss=2.1]  

epoch:0, idx:2099/10845, loss:2.0996356375444503, acc:0.37952380952380954


 20%|██        | 2200/10845 [20:27<57:49,  2.49it/s, acc=0.383, epoch=0, loss=2.08]  

epoch:0, idx:2199/10845, loss:2.083177768588066, acc:0.38272727272727275


 21%|██        | 2300/10845 [21:21<1:00:55,  2.34it/s, acc=0.384, epoch=0, loss=2.07]

epoch:0, idx:2299/10845, loss:2.0703835657109386, acc:0.38358695652173913


 22%|██▏       | 2400/10845 [22:18<59:21,  2.37it/s, acc=0.384, epoch=0, loss=2.06]  

epoch:0, idx:2399/10845, loss:2.0615942652275163, acc:0.38427083333333334


 23%|██▎       | 2500/10845 [23:11<1:07:12,  2.07it/s, acc=0.387, epoch=0, loss=2.05]

epoch:0, idx:2499/10845, loss:2.047653180837631, acc:0.3872


 24%|██▍       | 2600/10845 [24:06<1:29:43,  1.53it/s, acc=0.388, epoch=0, loss=2.04]

epoch:0, idx:2599/10845, loss:2.0404531679703637, acc:0.38759615384615387


 25%|██▍       | 2700/10845 [25:02<59:07,  2.30it/s, acc=0.391, epoch=0, loss=2.03]  

epoch:0, idx:2699/10845, loss:2.028125783028426, acc:0.39064814814814813


 26%|██▌       | 2800/10845 [25:56<1:15:52,  1.77it/s, acc=0.393, epoch=0, loss=2.02]

epoch:0, idx:2799/10845, loss:2.0207290561071463, acc:0.39276785714285717


 27%|██▋       | 2900/10845 [26:49<1:02:17,  2.13it/s, acc=0.394, epoch=0, loss=2.01]

epoch:0, idx:2899/10845, loss:2.00861498674442, acc:0.39448275862068966


 28%|██▊       | 3000/10845 [27:45<52:19,  2.50it/s, acc=0.396, epoch=0, loss=2]     

epoch:0, idx:2999/10845, loss:1.998297681848208, acc:0.3963333333333333


 29%|██▊       | 3100/10845 [28:41<1:21:48,  1.58it/s, acc=0.398, epoch=0, loss=1.99]

epoch:0, idx:3099/10845, loss:1.9898612104692768, acc:0.39814516129032257


 30%|██▉       | 3200/10845 [29:34<1:24:03,  1.52it/s, acc=0.398, epoch=0, loss=1.98]

epoch:0, idx:3199/10845, loss:1.9832597390562297, acc:0.3984375


 30%|███       | 3300/10845 [30:31<1:07:39,  1.86it/s, acc=0.4, epoch=0, loss=1.98]  

epoch:0, idx:3299/10845, loss:1.9755533829782947, acc:0.4


 31%|███▏      | 3400/10845 [31:25<1:02:24,  1.99it/s, acc=0.403, epoch=0, loss=1.96]

epoch:0, idx:3399/10845, loss:1.9628484496649574, acc:0.40286764705882355


 32%|███▏      | 3500/10845 [32:23<56:42,  2.16it/s, acc=0.404, epoch=0, loss=1.96]  

epoch:0, idx:3499/10845, loss:1.9559668406077793, acc:0.4042857142857143


 33%|███▎      | 3600/10845 [33:16<1:01:31,  1.96it/s, acc=0.407, epoch=0, loss=1.95]

epoch:0, idx:3599/10845, loss:1.9465960202614467, acc:0.4070138888888889


 34%|███▍      | 3700/10845 [34:10<1:07:38,  1.76it/s, acc=0.41, epoch=0, loss=1.94] 

epoch:0, idx:3699/10845, loss:1.9372001615408305, acc:0.4095945945945946


 35%|███▌      | 3800/10845 [35:06<54:38,  2.15it/s, acc=0.411, epoch=0, loss=1.93]  

epoch:0, idx:3799/10845, loss:1.930612901104124, acc:0.41138157894736843


 36%|███▌      | 3900/10845 [36:01<58:31,  1.98it/s, acc=0.413, epoch=0, loss=1.92]  

epoch:0, idx:3899/10845, loss:1.9224698801835378, acc:0.41275641025641024


 37%|███▋      | 4000/10845 [36:59<1:05:43,  1.74it/s, acc=0.415, epoch=0, loss=1.92]

epoch:0, idx:3999/10845, loss:1.9156766917407513, acc:0.414625


 38%|███▊      | 4101/10845 [37:57<41:11,  2.73it/s, acc=0.415, epoch=0, loss=1.91]  

epoch:0, idx:4099/10845, loss:1.9121610326011007, acc:0.41542682926829266


 39%|███▊      | 4200/10845 [38:55<1:16:54,  1.44it/s, acc=0.417, epoch=0, loss=1.91]

epoch:0, idx:4199/10845, loss:1.9088491843853679, acc:0.41660714285714284


 40%|███▉      | 4300/10845 [39:47<40:32,  2.69it/s, acc=0.417, epoch=0, loss=1.9]   

epoch:0, idx:4299/10845, loss:1.903237141259881, acc:0.41738372093023257


 41%|████      | 4400/10845 [40:46<1:03:02,  1.70it/s, acc=0.419, epoch=0, loss=1.9]

epoch:0, idx:4399/10845, loss:1.8982850969379599, acc:0.41852272727272727


 41%|████▏     | 4500/10845 [41:39<55:49,  1.89it/s, acc=0.42, epoch=0, loss=1.89]   

epoch:0, idx:4499/10845, loss:1.891733173635271, acc:0.41988888888888887


 42%|████▏     | 4600/10845 [42:32<50:20,  2.07it/s, acc=0.422, epoch=0, loss=1.89]  

epoch:0, idx:4599/10845, loss:1.8858198531555093, acc:0.42168478260869563


 43%|████▎     | 4700/10845 [43:27<1:11:22,  1.44it/s, acc=0.423, epoch=0, loss=1.88]

epoch:0, idx:4699/10845, loss:1.879444788364654, acc:0.42345744680851066


 44%|████▍     | 4800/10845 [44:21<52:41,  1.91it/s, acc=0.425, epoch=0, loss=1.88]  

epoch:0, idx:4799/10845, loss:1.8763992061714332, acc:0.42473958333333334


 45%|████▌     | 4900/10845 [45:19<46:51,  2.11it/s, acc=0.426, epoch=0, loss=1.87]  

epoch:0, idx:4899/10845, loss:1.8708733672876747, acc:0.4262755102040816


 46%|████▌     | 5000/10845 [46:11<55:59,  1.74it/s, acc=0.427, epoch=0, loss=1.87]  

epoch:0, idx:4999/10845, loss:1.8673363369107245, acc:0.4274


 47%|████▋     | 5100/10845 [47:03<51:05,  1.87it/s, acc=0.429, epoch=0, loss=1.86]  

epoch:0, idx:5099/10845, loss:1.8628084551236208, acc:0.42867647058823527


 48%|████▊     | 5200/10845 [47:57<56:20,  1.67it/s, acc=0.43, epoch=0, loss=1.86]   

epoch:0, idx:5199/10845, loss:1.856598228502732, acc:0.4297596153846154


 49%|████▉     | 5300/10845 [48:52<58:24,  1.58it/s, acc=0.431, epoch=0, loss=1.85]  

epoch:0, idx:5299/10845, loss:1.8521769329961741, acc:0.4309433962264151


 50%|████▉     | 5400/10845 [49:45<46:41,  1.94it/s, acc=0.432, epoch=0, loss=1.85]  

epoch:0, idx:5399/10845, loss:1.8490848152836163, acc:0.43180555555555555


 51%|█████     | 5500/10845 [50:38<49:59,  1.78it/s, acc=0.432, epoch=0, loss=1.85]  

epoch:0, idx:5499/10845, loss:1.8459236758080395, acc:0.43227272727272725


 52%|█████▏    | 5600/10845 [51:32<36:32,  2.39it/s, acc=0.434, epoch=0, loss=1.84]  

epoch:0, idx:5599/10845, loss:1.8412174219744546, acc:0.43370535714285713


 53%|█████▎    | 5700/10845 [52:28<44:25,  1.93it/s, acc=0.435, epoch=0, loss=1.84]  

epoch:0, idx:5699/10845, loss:1.837330697555291, acc:0.43469298245614035


 53%|█████▎    | 5800/10845 [53:23<43:50,  1.92it/s, acc=0.436, epoch=0, loss=1.83]  

epoch:0, idx:5799/10845, loss:1.8342348586177004, acc:0.43586206896551727


 54%|█████▍    | 5900/10845 [54:20<46:37,  1.77it/s, acc=0.436, epoch=0, loss=1.83]  

epoch:0, idx:5899/10845, loss:1.8309790881306438, acc:0.4361864406779661


 55%|█████▌    | 6000/10845 [55:15<41:40,  1.94it/s, acc=0.438, epoch=0, loss=1.83]  

epoch:0, idx:5999/10845, loss:1.8257302196621894, acc:0.4377083333333333


 56%|█████▌    | 6100/10845 [56:09<39:45,  1.99it/s, acc=0.44, epoch=0, loss=1.82]   

epoch:0, idx:6099/10845, loss:1.819448703390653, acc:0.439672131147541


 57%|█████▋    | 6200/10845 [57:04<48:55,  1.58it/s, acc=0.44, epoch=0, loss=1.82]  

epoch:0, idx:6199/10845, loss:1.8166023863131, acc:0.44044354838709676


 58%|█████▊    | 6300/10845 [57:56<45:54,  1.65it/s, acc=0.441, epoch=0, loss=1.82] 

epoch:0, idx:6299/10845, loss:1.8164842157231438, acc:0.440515873015873


 59%|█████▉    | 6400/10845 [58:55<30:51,  2.40it/s, acc=0.442, epoch=0, loss=1.81]  

epoch:0, idx:6399/10845, loss:1.8121826614625751, acc:0.4415625


 60%|█████▉    | 6501/10845 [59:51<29:11,  2.48it/s, acc=0.443, epoch=0, loss=1.81]  

epoch:0, idx:6499/10845, loss:1.8083795695121472, acc:0.44246153846153846


 61%|██████    | 6600/10845 [1:00:45<40:23,  1.75it/s, acc=0.444, epoch=0, loss=1.8]   

epoch:0, idx:6599/10845, loss:1.8030509921276208, acc:0.4437121212121212


 62%|██████▏   | 6700/10845 [1:01:42<37:04,  1.86it/s, acc=0.444, epoch=0, loss=1.8]  

epoch:0, idx:6699/10845, loss:1.8000083848611632, acc:0.444365671641791


 63%|██████▎   | 6800/10845 [1:02:35<39:08,  1.72it/s, acc=0.445, epoch=0, loss=1.8]

epoch:0, idx:6799/10845, loss:1.795575495476232, acc:0.4454779411764706


 64%|██████▎   | 6900/10845 [1:03:30<24:39,  2.67it/s, acc=0.447, epoch=0, loss=1.79]

epoch:0, idx:6899/10845, loss:1.7926663703935735, acc:0.44659420289855073


 65%|██████▍   | 7000/10845 [1:04:26<36:11,  1.77it/s, acc=0.447, epoch=0, loss=1.79]

epoch:0, idx:6999/10845, loss:1.7896699976239885, acc:0.44725


 65%|██████▌   | 7100/10845 [1:05:18<31:57,  1.95it/s, acc=0.448, epoch=0, loss=1.78]

epoch:0, idx:7099/10845, loss:1.783858552407211, acc:0.4483450704225352


 66%|██████▋   | 7200/10845 [1:06:09<30:04,  2.02it/s, acc=0.449, epoch=0, loss=1.78]

epoch:0, idx:7199/10845, loss:1.7811066030545366, acc:0.44881944444444444


 67%|██████▋   | 7300/10845 [1:07:05<34:08,  1.73it/s, acc=0.45, epoch=0, loss=1.78] 

epoch:0, idx:7299/10845, loss:1.777550259273346, acc:0.4500684931506849


 68%|██████▊   | 7400/10845 [1:07:56<27:32,  2.08it/s, acc=0.451, epoch=0, loss=1.77]

epoch:0, idx:7399/10845, loss:1.7746830871781787, acc:0.45070945945945945


 69%|██████▉   | 7500/10845 [1:08:54<32:55,  1.69it/s, acc=0.451, epoch=0, loss=1.77]

epoch:0, idx:7499/10845, loss:1.7728737725496293, acc:0.4513333333333333


 70%|███████   | 7600/10845 [1:09:49<26:33,  2.04it/s, acc=0.451, epoch=0, loss=1.77]

epoch:0, idx:7599/10845, loss:1.7709653902759677, acc:0.4513815789473684


 71%|███████   | 7700/10845 [1:10:44<24:41,  2.12it/s, acc=0.452, epoch=0, loss=1.77]

epoch:0, idx:7699/10845, loss:1.7679800911305787, acc:0.45246753246753246


 72%|███████▏  | 7800/10845 [1:11:39<24:23,  2.08it/s, acc=0.453, epoch=0, loss=1.76]

epoch:0, idx:7799/10845, loss:1.7645724588174085, acc:0.4532051282051282


 73%|███████▎  | 7900/10845 [1:12:35<21:04,  2.33it/s, acc=0.453, epoch=0, loss=1.76]

epoch:0, idx:7899/10845, loss:1.7631731047509591, acc:0.4534810126582278


 74%|███████▍  | 8000/10845 [1:13:30<23:17,  2.04it/s, acc=0.454, epoch=0, loss=1.76]

epoch:0, idx:7999/10845, loss:1.7607334139049053, acc:0.45403125


 75%|███████▍  | 8100/10845 [1:14:24<32:41,  1.40it/s, acc=0.455, epoch=0, loss=1.76]

epoch:0, idx:8099/10845, loss:1.7581410571086553, acc:0.4546296296296296


 76%|███████▌  | 8200/10845 [1:15:17<19:53,  2.22it/s, acc=0.455, epoch=0, loss=1.75]

epoch:0, idx:8199/10845, loss:1.754559159613237, acc:0.4552439024390244


 77%|███████▋  | 8300/10845 [1:16:09<25:42,  1.65it/s, acc=0.456, epoch=0, loss=1.75]

epoch:0, idx:8299/10845, loss:1.7503968247089041, acc:0.45626506024096386


 77%|███████▋  | 8400/10845 [1:17:06<24:39,  1.65it/s, acc=0.457, epoch=0, loss=1.75]

epoch:0, idx:8399/10845, loss:1.749781367239498, acc:0.45672619047619045


 78%|███████▊  | 8500/10845 [1:17:58<19:33,  2.00it/s, acc=0.457, epoch=0, loss=1.75]

epoch:0, idx:8499/10845, loss:1.7476466532524895, acc:0.45744117647058824


 79%|███████▉  | 8600/10845 [1:18:51<16:00,  2.34it/s, acc=0.458, epoch=0, loss=1.75]

epoch:0, idx:8599/10845, loss:1.74524583140778, acc:0.45828488372093024


 80%|████████  | 8700/10845 [1:19:48<14:06,  2.53it/s, acc=0.459, epoch=0, loss=1.74]

epoch:0, idx:8699/10845, loss:1.7429492987777995, acc:0.4589080459770115


 81%|████████  | 8800/10845 [1:20:46<22:39,  1.50it/s, acc=0.459, epoch=0, loss=1.74]

epoch:0, idx:8799/10845, loss:1.7412335892698982, acc:0.4593465909090909


 82%|████████▏ | 8900/10845 [1:21:39<15:24,  2.10it/s, acc=0.46, epoch=0, loss=1.74] 

epoch:0, idx:8899/10845, loss:1.7403088880924695, acc:0.4597191011235955


 83%|████████▎ | 9000/10845 [1:22:30<19:00,  1.62it/s, acc=0.46, epoch=0, loss=1.74]

epoch:0, idx:8999/10845, loss:1.7389833692974515, acc:0.4603888888888889


 84%|████████▍ | 9100/10845 [1:23:24<13:20,  2.18it/s, acc=0.461, epoch=0, loss=1.74]

epoch:0, idx:9099/10845, loss:1.7361869505408045, acc:0.4610714285714286


 85%|████████▍ | 9200/10845 [1:24:15<15:57,  1.72it/s, acc=0.462, epoch=0, loss=1.73]

epoch:0, idx:9199/10845, loss:1.7341230918661408, acc:0.46195652173913043


 86%|████████▌ | 9300/10845 [1:25:13<13:51,  1.86it/s, acc=0.462, epoch=0, loss=1.73]

epoch:0, idx:9299/10845, loss:1.7321088866008225, acc:0.46241935483870966


 87%|████████▋ | 9400/10845 [1:26:07<15:22,  1.57it/s, acc=0.463, epoch=0, loss=1.73]

epoch:0, idx:9399/10845, loss:1.7300418101726693, acc:0.463031914893617


 88%|████████▊ | 9500/10845 [1:27:02<13:22,  1.68it/s, acc=0.464, epoch=0, loss=1.73]

epoch:0, idx:9499/10845, loss:1.727416513831992, acc:0.46378947368421053


 89%|████████▊ | 9600/10845 [1:27:54<10:58,  1.89it/s, acc=0.465, epoch=0, loss=1.73]

epoch:0, idx:9599/10845, loss:1.7251792607828975, acc:0.46455729166666665


 89%|████████▉ | 9700/10845 [1:28:53<13:44,  1.39it/s, acc=0.465, epoch=0, loss=1.72]

epoch:0, idx:9699/10845, loss:1.7241804569222263, acc:0.46512886597938147


 90%|█████████ | 9800/10845 [1:29:50<08:07,  2.15it/s, acc=0.466, epoch=0, loss=1.72]

epoch:0, idx:9799/10845, loss:1.7224253890891463, acc:0.46573979591836734


 91%|█████████▏| 9900/10845 [1:30:44<08:36,  1.83it/s, acc=0.467, epoch=0, loss=1.72]

epoch:0, idx:9899/10845, loss:1.720225243556379, acc:0.4666161616161616


 92%|█████████▏| 10000/10845 [1:31:41<07:09,  1.97it/s, acc=0.467, epoch=0, loss=1.72]

epoch:0, idx:9999/10845, loss:1.7191418676018715, acc:0.467025


 93%|█████████▎| 10100/10845 [1:32:37<10:25,  1.19it/s, acc=0.468, epoch=0, loss=1.72]

epoch:0, idx:10099/10845, loss:1.7164880981775794, acc:0.4678960396039604


 94%|█████████▍| 10200/10845 [1:33:30<07:03,  1.52it/s, acc=0.468, epoch=0, loss=1.72]

epoch:0, idx:10199/10845, loss:1.7164896501454652, acc:0.46791666666666665


 95%|█████████▍| 10300/10845 [1:34:27<04:52,  1.86it/s, acc=0.468, epoch=0, loss=1.72]

epoch:0, idx:10299/10845, loss:1.7152977017812359, acc:0.4679854368932039


 96%|█████████▌| 10400/10845 [1:35:22<04:16,  1.74it/s, acc=0.468, epoch=0, loss=1.71]

epoch:0, idx:10399/10845, loss:1.7142715609016326, acc:0.46802884615384616


 97%|█████████▋| 10500/10845 [1:36:15<02:28,  2.32it/s, acc=0.468, epoch=0, loss=1.71]

epoch:0, idx:10499/10845, loss:1.7120716457366942, acc:0.4684285714285714


 98%|█████████▊| 10600/10845 [1:37:09<02:02,  1.99it/s, acc=0.469, epoch=0, loss=1.71]

epoch:0, idx:10599/10845, loss:1.7088038575761724, acc:0.4689858490566038


 99%|█████████▊| 10700/10845 [1:38:02<01:08,  2.11it/s, acc=0.47, epoch=0, loss=1.71] 

epoch:0, idx:10699/10845, loss:1.707289832819288, acc:0.46955607476635514


100%|█████████▉| 10800/10845 [1:38:57<00:21,  2.09it/s, acc=0.47, epoch=0, loss=1.71]

epoch:0, idx:10799/10845, loss:1.7052685962175882, acc:0.47034722222222225


100%|██████████| 10845/10845 [1:39:21<00:00,  1.85it/s, acc=0.47, epoch=0, loss=1.71]


epoch:0, idx:0/1275, loss:1.0945760011672974, acc:0.5
epoch:0, idx:100/1275, loss:1.609716781885317, acc:0.5247524752475248
epoch:0, idx:200/1275, loss:1.5638252704297726, acc:0.5385572139303483
epoch:0, idx:300/1275, loss:1.5482094141335978, acc:0.553156146179402
epoch:0, idx:400/1275, loss:1.5363678880165936, acc:0.550498753117207
epoch:0, idx:500/1275, loss:1.5323731545916575, acc:0.5474051896207585
epoch:0, idx:600/1275, loss:1.530171838358119, acc:0.5391014975041597
epoch:0, idx:700/1275, loss:1.5228679746431903, acc:0.5395863052781741
epoch:0, idx:800/1275, loss:1.5336916810564334, acc:0.5346441947565543
epoch:0, idx:900/1275, loss:1.5294003287510125, acc:0.5360710321864595
epoch:0, idx:1000/1275, loss:1.5376647551219305, acc:0.5344655344655345
epoch:0, idx:1100/1275, loss:1.5354838314433188, acc:0.5345140781108083
epoch:0, idx:1200/1275, loss:1.5399065221676125, acc:0.533721898417985


  1%|          | 100/10845 [00:53<1:33:12,  1.92it/s, acc=0.525, epoch=1, loss=1.45]

epoch:1, idx:99/10845, loss:1.4452170526981354, acc:0.525


  2%|▏         | 200/10845 [01:46<1:37:33,  1.82it/s, acc=0.565, epoch=1, loss=1.37]

epoch:1, idx:199/10845, loss:1.371137073636055, acc:0.565


  3%|▎         | 300/10845 [02:44<1:32:47,  1.89it/s, acc=0.552, epoch=1, loss=1.41]

epoch:1, idx:299/10845, loss:1.4080103059609732, acc:0.5516666666666666


  4%|▎         | 400/10845 [03:38<1:24:09,  2.07it/s, acc=0.546, epoch=1, loss=1.44]

epoch:1, idx:399/10845, loss:1.4422607578337192, acc:0.54625


  5%|▍         | 500/10845 [04:33<1:46:51,  1.61it/s, acc=0.551, epoch=1, loss=1.42]

epoch:1, idx:499/10845, loss:1.4181700016260148, acc:0.551


  6%|▌         | 600/10845 [05:28<1:14:55,  2.28it/s, acc=0.552, epoch=1, loss=1.42]

epoch:1, idx:599/10845, loss:1.4199734410643579, acc:0.5516666666666666


  6%|▋         | 700/10845 [06:24<1:39:57,  1.69it/s, acc=0.546, epoch=1, loss=1.43]

epoch:1, idx:699/10845, loss:1.431648321918079, acc:0.5460714285714285


  7%|▋         | 800/10845 [07:21<1:23:46,  2.00it/s, acc=0.545, epoch=1, loss=1.43]

epoch:1, idx:799/10845, loss:1.4273405804485082, acc:0.545


  8%|▊         | 900/10845 [08:17<1:48:33,  1.53it/s, acc=0.548, epoch=1, loss=1.42]

epoch:1, idx:899/10845, loss:1.4218732935190201, acc:0.5483333333333333


  9%|▉         | 1000/10845 [09:14<1:40:48,  1.63it/s, acc=0.555, epoch=1, loss=1.42]

epoch:1, idx:999/10845, loss:1.415994308590889, acc:0.55525


 10%|█         | 1100/10845 [10:14<1:18:15,  2.08it/s, acc=0.555, epoch=1, loss=1.43]

epoch:1, idx:1099/10845, loss:1.427406557202339, acc:0.5554545454545454


 11%|█         | 1200/10845 [11:11<1:16:39,  2.10it/s, acc=0.561, epoch=1, loss=1.41]

epoch:1, idx:1199/10845, loss:1.4115247125923633, acc:0.5610416666666667


 12%|█▏        | 1300/10845 [12:09<1:32:49,  1.71it/s, acc=0.561, epoch=1, loss=1.41]

epoch:1, idx:1299/10845, loss:1.4109093645444284, acc:0.5611538461538461


 13%|█▎        | 1400/10845 [13:05<1:26:29,  1.82it/s, acc=0.561, epoch=1, loss=1.41]

epoch:1, idx:1399/10845, loss:1.4122491060835975, acc:0.56125


 14%|█▍        | 1500/10845 [14:02<2:10:26,  1.19it/s, acc=0.561, epoch=1, loss=1.41]

epoch:1, idx:1499/10845, loss:1.4117357630729674, acc:0.5606666666666666


 15%|█▍        | 1600/10845 [14:58<1:37:46,  1.58it/s, acc=0.558, epoch=1, loss=1.42]

epoch:1, idx:1599/10845, loss:1.420062798783183, acc:0.5578125


 16%|█▌        | 1700/10845 [15:54<1:22:01,  1.86it/s, acc=0.555, epoch=1, loss=1.42]

epoch:1, idx:1699/10845, loss:1.4231817804715212, acc:0.5552941176470588


 17%|█▋        | 1800/10845 [16:51<1:18:02,  1.93it/s, acc=0.554, epoch=1, loss=1.43]

epoch:1, idx:1799/10845, loss:1.4271647788749802, acc:0.5541666666666667


 18%|█▊        | 1900/10845 [17:45<1:11:43,  2.08it/s, acc=0.553, epoch=1, loss=1.43]

epoch:1, idx:1899/10845, loss:1.428304025122994, acc:0.5528947368421052


 18%|█▊        | 2000/10845 [18:41<1:38:17,  1.50it/s, acc=0.552, epoch=1, loss=1.43]

epoch:1, idx:1999/10845, loss:1.4278859848082066, acc:0.551625


 19%|█▉        | 2100/10845 [19:35<1:34:25,  1.54it/s, acc=0.551, epoch=1, loss=1.43]

epoch:1, idx:2099/10845, loss:1.4267801207871664, acc:0.5510714285714285


 20%|██        | 2200/10845 [20:35<1:23:17,  1.73it/s, acc=0.549, epoch=1, loss=1.43]

epoch:1, idx:2199/10845, loss:1.4297486912662332, acc:0.5489772727272727


 21%|██        | 2300/10845 [21:29<1:28:12,  1.61it/s, acc=0.55, epoch=1, loss=1.43] 

epoch:1, idx:2299/10845, loss:1.4254654923470125, acc:0.5504347826086956


 22%|██▏       | 2400/10845 [22:23<1:27:03,  1.62it/s, acc=0.549, epoch=1, loss=1.43]

epoch:1, idx:2399/10845, loss:1.4301309128602346, acc:0.5492708333333334


 23%|██▎       | 2500/10845 [23:18<1:41:48,  1.37it/s, acc=0.548, epoch=1, loss=1.43]

epoch:1, idx:2499/10845, loss:1.4290254103183746, acc:0.5485


 24%|██▍       | 2600/10845 [24:15<1:01:41,  2.23it/s, acc=0.549, epoch=1, loss=1.43]

epoch:1, idx:2599/10845, loss:1.426109557197644, acc:0.5494230769230769


 25%|██▍       | 2700/10845 [25:08<1:06:59,  2.03it/s, acc=0.55, epoch=1, loss=1.43] 

epoch:1, idx:2699/10845, loss:1.4263150307867263, acc:0.5498148148148149


 26%|██▌       | 2800/10845 [26:05<1:24:33,  1.59it/s, acc=0.549, epoch=1, loss=1.43]

epoch:1, idx:2799/10845, loss:1.4297343364357948, acc:0.5491071428571429


 27%|██▋       | 2900/10845 [27:00<57:44,  2.29it/s, acc=0.549, epoch=1, loss=1.43]  

epoch:1, idx:2899/10845, loss:1.4263903253653953, acc:0.5491379310344827


 28%|██▊       | 3000/10845 [27:58<1:18:20,  1.67it/s, acc=0.549, epoch=1, loss=1.43]

epoch:1, idx:2999/10845, loss:1.4287410724163054, acc:0.5494166666666667


 29%|██▊       | 3100/10845 [28:52<1:13:50,  1.75it/s, acc=0.549, epoch=1, loss=1.43]

epoch:1, idx:3099/10845, loss:1.4307113947599166, acc:0.5486290322580645


 30%|██▉       | 3200/10845 [29:49<1:10:42,  1.80it/s, acc=0.549, epoch=1, loss=1.43]

epoch:1, idx:3199/10845, loss:1.4305576485022902, acc:0.5490625


 30%|███       | 3300/10845 [30:48<1:14:21,  1.69it/s, acc=0.55, epoch=1, loss=1.43] 

epoch:1, idx:3299/10845, loss:1.4295788205392432, acc:0.5504545454545454


 31%|███▏      | 3400/10845 [31:44<53:43,  2.31it/s, acc=0.549, epoch=1, loss=1.43]  

epoch:1, idx:3399/10845, loss:1.4329246356031473, acc:0.5489705882352941


 32%|███▏      | 3500/10845 [32:37<1:04:51,  1.89it/s, acc=0.548, epoch=1, loss=1.43]

epoch:1, idx:3499/10845, loss:1.4340374932459423, acc:0.5481428571428572


 33%|███▎      | 3600/10845 [33:33<1:18:13,  1.54it/s, acc=0.548, epoch=1, loss=1.44]

epoch:1, idx:3599/10845, loss:1.4354409228265286, acc:0.5477083333333334


 34%|███▍      | 3700/10845 [34:24<1:16:59,  1.55it/s, acc=0.549, epoch=1, loss=1.43]

epoch:1, idx:3699/10845, loss:1.4345624471516223, acc:0.5485810810810811


 35%|███▌      | 3800/10845 [35:17<54:07,  2.17it/s, acc=0.549, epoch=1, loss=1.43]  

epoch:1, idx:3799/10845, loss:1.4315975876701506, acc:0.5490131578947368


 36%|███▌      | 3900/10845 [36:09<52:40,  2.20it/s, acc=0.55, epoch=1, loss=1.43]   

epoch:1, idx:3899/10845, loss:1.4272197740887984, acc:0.5500641025641025


 37%|███▋      | 4000/10845 [37:04<1:11:34,  1.59it/s, acc=0.55, epoch=1, loss=1.43] 

epoch:1, idx:3999/10845, loss:1.428129717104137, acc:0.5503125


 38%|███▊      | 4100/10845 [38:01<1:09:09,  1.63it/s, acc=0.55, epoch=1, loss=1.43]

epoch:1, idx:4099/10845, loss:1.431493113818692, acc:0.5496341463414635


 39%|███▊      | 4200/10845 [38:56<49:44,  2.23it/s, acc=0.55, epoch=1, loss=1.43]  

epoch:1, idx:4199/10845, loss:1.4301035433652856, acc:0.5498214285714286


 40%|███▉      | 4300/10845 [39:52<1:08:52,  1.58it/s, acc=0.549, epoch=1, loss=1.43]

epoch:1, idx:4299/10845, loss:1.4284321879162345, acc:0.5494767441860465


 41%|████      | 4400/10845 [40:44<57:21,  1.87it/s, acc=0.55, epoch=1, loss=1.43]   

epoch:1, idx:4399/10845, loss:1.425469185730273, acc:0.5502272727272727


 41%|████▏     | 4500/10845 [41:36<49:48,  2.12it/s, acc=0.551, epoch=1, loss=1.42]  

epoch:1, idx:4499/10845, loss:1.4248923670517073, acc:0.5508333333333333


 42%|████▏     | 4600/10845 [42:30<1:04:57,  1.60it/s, acc=0.552, epoch=1, loss=1.42]

epoch:1, idx:4599/10845, loss:1.4230025558173656, acc:0.5519021739130435


 43%|████▎     | 4700/10845 [43:25<57:57,  1.77it/s, acc=0.552, epoch=1, loss=1.43]  

epoch:1, idx:4699/10845, loss:1.425136868452772, acc:0.5517553191489362


 44%|████▍     | 4800/10845 [44:16<1:03:59,  1.57it/s, acc=0.551, epoch=1, loss=1.43]

epoch:1, idx:4799/10845, loss:1.4256777312917015, acc:0.55125


 45%|████▌     | 4900/10845 [45:09<45:16,  2.19it/s, acc=0.552, epoch=1, loss=1.42]  

epoch:1, idx:4899/10845, loss:1.4237196548556794, acc:0.5521938775510205


 46%|████▌     | 5000/10845 [46:08<1:01:40,  1.58it/s, acc=0.551, epoch=1, loss=1.43]

epoch:1, idx:4999/10845, loss:1.4259884311258793, acc:0.5514


 47%|████▋     | 5100/10845 [47:01<52:16,  1.83it/s, acc=0.551, epoch=1, loss=1.43]  

epoch:1, idx:5099/10845, loss:1.4266045294146912, acc:0.5511274509803922


 48%|████▊     | 5200/10845 [47:57<46:23,  2.03it/s, acc=0.552, epoch=1, loss=1.42]  

epoch:1, idx:5199/10845, loss:1.4236973233005175, acc:0.551826923076923


 49%|████▉     | 5300/10845 [48:53<42:10,  2.19it/s, acc=0.552, epoch=1, loss=1.42]  

epoch:1, idx:5299/10845, loss:1.4249471769411608, acc:0.551556603773585


 50%|████▉     | 5400/10845 [49:52<53:38,  1.69it/s, acc=0.552, epoch=1, loss=1.42]  

epoch:1, idx:5399/10845, loss:1.4247705001356425, acc:0.5515740740740741


 51%|█████     | 5500/10845 [50:47<1:05:35,  1.36it/s, acc=0.552, epoch=1, loss=1.42]

epoch:1, idx:5499/10845, loss:1.4230096918290311, acc:0.5517727272727273


 52%|█████▏    | 5600/10845 [51:48<1:09:11,  1.26it/s, acc=0.551, epoch=1, loss=1.42]

epoch:1, idx:5599/10845, loss:1.4247656972333789, acc:0.5513392857142857


 53%|█████▎    | 5700/10845 [52:43<53:57,  1.59it/s, acc=0.552, epoch=1, loss=1.42]  

epoch:1, idx:5699/10845, loss:1.4232853625688637, acc:0.5519298245614035


 53%|█████▎    | 5800/10845 [53:35<43:24,  1.94it/s, acc=0.552, epoch=1, loss=1.42]  

epoch:1, idx:5799/10845, loss:1.4236915946366457, acc:0.551853448275862


 54%|█████▍    | 5900/10845 [54:29<48:34,  1.70it/s, acc=0.553, epoch=1, loss=1.42]  

epoch:1, idx:5899/10845, loss:1.4222572301454464, acc:0.5525847457627119


 55%|█████▌    | 6000/10845 [55:31<49:02,  1.65it/s, acc=0.552, epoch=1, loss=1.42]  

epoch:1, idx:5999/10845, loss:1.4229059803237518, acc:0.5521666666666667


 56%|█████▌    | 6100/10845 [56:31<47:23,  1.67it/s, acc=0.552, epoch=1, loss=1.42]  

epoch:1, idx:6099/10845, loss:1.4236569466600653, acc:0.5520901639344262


 57%|█████▋    | 6200/10845 [57:25<41:22,  1.87it/s, acc=0.552, epoch=1, loss=1.42]

epoch:1, idx:6199/10845, loss:1.4242335655468126, acc:0.5522983870967741


 58%|█████▊    | 6300/10845 [58:21<50:48,  1.49it/s, acc=0.553, epoch=1, loss=1.42]

epoch:1, idx:6299/10845, loss:1.4219750574231147, acc:0.5532539682539682


 59%|█████▉    | 6400/10845 [59:16<37:02,  2.00it/s, acc=0.553, epoch=1, loss=1.42]  

epoch:1, idx:6399/10845, loss:1.4203404854191468, acc:0.5532421875


 60%|█████▉    | 6500/10845 [1:00:13<38:29,  1.88it/s, acc=0.554, epoch=1, loss=1.42]  

epoch:1, idx:6499/10845, loss:1.4197062065922297, acc:0.5537307692307692


 61%|██████    | 6600/10845 [1:01:10<43:15,  1.64it/s, acc=0.554, epoch=1, loss=1.42]

epoch:1, idx:6599/10845, loss:1.418578035799843, acc:0.5542424242424242


 62%|██████▏   | 6700/10845 [1:02:06<41:02,  1.68it/s, acc=0.554, epoch=1, loss=1.42]

epoch:1, idx:6699/10845, loss:1.4182342129189576, acc:0.5541044776119403


 63%|██████▎   | 6800/10845 [1:03:04<31:44,  2.12it/s, acc=0.554, epoch=1, loss=1.42]  

epoch:1, idx:6799/10845, loss:1.417268664990278, acc:0.5543014705882353


 64%|██████▎   | 6900/10845 [1:03:58<35:51,  1.83it/s, acc=0.555, epoch=1, loss=1.42]

epoch:1, idx:6899/10845, loss:1.4157343312320503, acc:0.5547463768115942


 65%|██████▍   | 7000/10845 [1:04:56<39:17,  1.63it/s, acc=0.555, epoch=1, loss=1.42]

epoch:1, idx:6999/10845, loss:1.415602808973619, acc:0.5546428571428571


 65%|██████▌   | 7100/10845 [1:05:52<32:36,  1.91it/s, acc=0.555, epoch=1, loss=1.41]  

epoch:1, idx:7099/10845, loss:1.4148331643787908, acc:0.5547887323943662


 66%|██████▋   | 7200/10845 [1:06:46<32:57,  1.84it/s, acc=0.555, epoch=1, loss=1.41]

epoch:1, idx:7199/10845, loss:1.4142822900455858, acc:0.5550694444444444


 67%|██████▋   | 7300/10845 [1:07:43<33:04,  1.79it/s, acc=0.555, epoch=1, loss=1.41]

epoch:1, idx:7299/10845, loss:1.4131449983830322, acc:0.5554452054794521


 68%|██████▊   | 7400/10845 [1:08:36<29:12,  1.97it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:7399/10845, loss:1.4129689044042213, acc:0.555777027027027


 69%|██████▉   | 7500/10845 [1:09:32<30:57,  1.80it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:7499/10845, loss:1.411513174903393, acc:0.5561


 70%|███████   | 7600/10845 [1:10:30<28:49,  1.88it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:7599/10845, loss:1.4107485078628126, acc:0.5562828947368421


 71%|███████   | 7700/10845 [1:11:26<34:16,  1.53it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:7699/10845, loss:1.41111001360339, acc:0.5558766233766234


 72%|███████▏  | 7800/10845 [1:12:21<22:30,  2.25it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:7799/10845, loss:1.4107686566733397, acc:0.5559294871794872


 73%|███████▎  | 7900/10845 [1:13:12<28:02,  1.75it/s, acc=0.555, epoch=1, loss=1.41]

epoch:1, idx:7899/10845, loss:1.4117073675839207, acc:0.5554430379746835


 74%|███████▍  | 8001/10845 [1:14:08<17:56,  2.64it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:7999/10845, loss:1.411474043149501, acc:0.5556875


 75%|███████▍  | 8100/10845 [1:15:04<30:55,  1.48it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:8099/10845, loss:1.4105975064856036, acc:0.5558333333333333


 76%|███████▌  | 8200/10845 [1:15:59<24:52,  1.77it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:8199/10845, loss:1.4098494117688842, acc:0.5559146341463415


 77%|███████▋  | 8300/10845 [1:16:55<23:51,  1.78it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:8299/10845, loss:1.409189237855285, acc:0.5558132530120482


 77%|███████▋  | 8400/10845 [1:17:54<27:31,  1.48it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:8399/10845, loss:1.4069895870806206, acc:0.5562202380952381


 78%|███████▊  | 8500/10845 [1:18:47<16:42,  2.34it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:8499/10845, loss:1.4062544564043775, acc:0.5564411764705882


 79%|███████▉  | 8600/10845 [1:19:41<21:25,  1.75it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:8599/10845, loss:1.4060601525563141, acc:0.5562790697674419


 80%|████████  | 8700/10845 [1:20:39<15:37,  2.29it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:8699/10845, loss:1.4064782210225346, acc:0.5563218390804597


 81%|████████  | 8800/10845 [1:21:32<18:47,  1.81it/s, acc=0.557, epoch=1, loss=1.41]

epoch:1, idx:8799/10845, loss:1.4055705143071033, acc:0.5567045454545455


 82%|████████▏ | 8900/10845 [1:22:26<16:18,  1.99it/s, acc=0.556, epoch=1, loss=1.41]

epoch:1, idx:8899/10845, loss:1.406126809043161, acc:0.5562640449438202


 83%|████████▎ | 9000/10845 [1:23:22<16:44,  1.84it/s, acc=0.557, epoch=1, loss=1.4] 

epoch:1, idx:8999/10845, loss:1.404233448230558, acc:0.5566388888888889


 84%|████████▍ | 9100/10845 [1:24:18<16:35,  1.75it/s, acc=0.556, epoch=1, loss=1.4]

epoch:1, idx:9099/10845, loss:1.404630201305007, acc:0.5564010989010989


 85%|████████▍ | 9200/10845 [1:25:14<13:47,  1.99it/s, acc=0.557, epoch=1, loss=1.4]

epoch:1, idx:9199/10845, loss:1.403382570390468, acc:0.556875


 86%|████████▌ | 9300/10845 [1:26:08<12:28,  2.06it/s, acc=0.557, epoch=1, loss=1.4]

epoch:1, idx:9299/10845, loss:1.4042332080967965, acc:0.5568817204301075


 87%|████████▋ | 9400/10845 [1:27:03<10:10,  2.37it/s, acc=0.557, epoch=1, loss=1.4]

epoch:1, idx:9399/10845, loss:1.4038211575055377, acc:0.5573404255319149


 88%|████████▊ | 9500/10845 [1:27:58<16:00,  1.40it/s, acc=0.557, epoch=1, loss=1.4]

epoch:1, idx:9499/10845, loss:1.4035543843664622, acc:0.5571578947368421


 89%|████████▊ | 9600/10845 [1:28:53<11:25,  1.82it/s, acc=0.558, epoch=1, loss=1.4]

epoch:1, idx:9599/10845, loss:1.4034368752222508, acc:0.5575260416666666


 89%|████████▉ | 9700/10845 [1:29:48<08:53,  2.14it/s, acc=0.558, epoch=1, loss=1.4]

epoch:1, idx:9699/10845, loss:1.4021382103230535, acc:0.5578350515463918


 90%|█████████ | 9800/10845 [1:30:41<11:41,  1.49it/s, acc=0.558, epoch=1, loss=1.4]

epoch:1, idx:9799/10845, loss:1.400669528711207, acc:0.5582908163265307


 91%|█████████▏| 9900/10845 [1:31:36<08:11,  1.92it/s, acc=0.559, epoch=1, loss=1.4]

epoch:1, idx:9899/10845, loss:1.4002824427112184, acc:0.5585353535353536


 92%|█████████▏| 10000/10845 [1:32:30<11:08,  1.26it/s, acc=0.559, epoch=1, loss=1.4]

epoch:1, idx:9999/10845, loss:1.3999067541331052, acc:0.5588


 93%|█████████▎| 10100/10845 [1:33:27<07:12,  1.72it/s, acc=0.559, epoch=1, loss=1.4]

epoch:1, idx:10099/10845, loss:1.4012037874241867, acc:0.5586386138613861


 94%|█████████▍| 10200/10845 [1:34:24<05:19,  2.02it/s, acc=0.559, epoch=1, loss=1.4]

epoch:1, idx:10199/10845, loss:1.4007491938127021, acc:0.5588235294117647


 95%|█████████▍| 10300/10845 [1:35:18<05:06,  1.78it/s, acc=0.559, epoch=1, loss=1.4]

epoch:1, idx:10299/10845, loss:1.4005961929218282, acc:0.5591019417475728


 96%|█████████▌| 10400/10845 [1:36:14<04:45,  1.56it/s, acc=0.559, epoch=1, loss=1.4]

epoch:1, idx:10399/10845, loss:1.4001010050997138, acc:0.5591346153846154


 97%|█████████▋| 10500/10845 [1:37:11<03:38,  1.58it/s, acc=0.559, epoch=1, loss=1.4]

epoch:1, idx:10499/10845, loss:1.4001088237166406, acc:0.5592380952380952


 98%|█████████▊| 10600/10845 [1:38:09<02:51,  1.43it/s, acc=0.56, epoch=1, loss=1.4] 

epoch:1, idx:10599/10845, loss:1.3999075403387817, acc:0.5595518867924528


 99%|█████████▊| 10700/10845 [1:39:04<01:33,  1.55it/s, acc=0.559, epoch=1, loss=1.4]

epoch:1, idx:10699/10845, loss:1.4003816827518918, acc:0.5593457943925234


100%|█████████▉| 10800/10845 [1:40:00<00:25,  1.73it/s, acc=0.559, epoch=1, loss=1.4]

epoch:1, idx:10799/10845, loss:1.401071940370732, acc:0.5593287037037037


100%|██████████| 10845/10845 [1:40:23<00:00,  2.92it/s, acc=0.56, epoch=1, loss=1.4] 


epoch:1, idx:0/1275, loss:0.9646737575531006, acc:0.75
epoch:1, idx:100/1275, loss:1.4566523621578027, acc:0.5841584158415841
epoch:1, idx:200/1275, loss:1.4166287489198333, acc:0.5895522388059702
epoch:1, idx:300/1275, loss:1.4026422720414855, acc:0.5938538205980066
epoch:1, idx:400/1275, loss:1.3957560286854864, acc:0.5916458852867831
epoch:1, idx:500/1275, loss:1.3915938755233368, acc:0.5878243512974052
epoch:1, idx:600/1275, loss:1.3911031771221891, acc:0.5844425956738769
epoch:1, idx:700/1275, loss:1.3860584653053065, acc:0.5855920114122682
epoch:1, idx:800/1275, loss:1.3988110723715745, acc:0.5820848938826467
epoch:1, idx:900/1275, loss:1.3909444731559923, acc:0.5868479467258602
epoch:1, idx:1000/1275, loss:1.3994706046212089, acc:0.5859140859140859
epoch:1, idx:1100/1275, loss:1.3903940626429385, acc:0.5881017257039055
epoch:1, idx:1200/1275, loss:1.393135924571559, acc:0.5855537052456287


  1%|          | 100/10845 [00:50<1:27:03,  2.06it/s, acc=0.635, epoch=2, loss=1.24]

epoch:2, idx:99/10845, loss:1.2358368808031082, acc:0.635


  2%|▏         | 200/10845 [01:45<1:19:49,  2.22it/s, acc=0.619, epoch=2, loss=1.22]

epoch:2, idx:199/10845, loss:1.2248152387142182, acc:0.61875


  3%|▎         | 300/10845 [02:42<2:02:29,  1.43it/s, acc=0.616, epoch=2, loss=1.2] 

epoch:2, idx:299/10845, loss:1.1966659001509348, acc:0.6158333333333333


  4%|▎         | 400/10845 [03:35<1:34:39,  1.84it/s, acc=0.6, epoch=2, loss=1.24]  

epoch:2, idx:399/10845, loss:1.2438013453781605, acc:0.6


  5%|▍         | 500/10845 [04:35<2:03:33,  1.40it/s, acc=0.593, epoch=2, loss=1.28]

epoch:2, idx:499/10845, loss:1.2751368849277496, acc:0.593


  6%|▌         | 600/10845 [05:32<1:49:44,  1.56it/s, acc=0.594, epoch=2, loss=1.27]

epoch:2, idx:599/10845, loss:1.2727337212363878, acc:0.59375


  6%|▋         | 700/10845 [06:24<1:25:22,  1.98it/s, acc=0.598, epoch=2, loss=1.26]

epoch:2, idx:699/10845, loss:1.260944647192955, acc:0.5982142857142857


  7%|▋         | 800/10845 [07:21<1:35:25,  1.75it/s, acc=0.6, epoch=2, loss=1.26]  

epoch:2, idx:799/10845, loss:1.2603721567243338, acc:0.5996875


  8%|▊         | 900/10845 [08:17<1:38:05,  1.69it/s, acc=0.599, epoch=2, loss=1.26]

epoch:2, idx:899/10845, loss:1.2582771018478605, acc:0.5991666666666666


  9%|▉         | 1000/10845 [09:15<1:24:56,  1.93it/s, acc=0.6, epoch=2, loss=1.26] 

epoch:2, idx:999/10845, loss:1.261945468187332, acc:0.59975


 10%|█         | 1100/10845 [10:09<1:35:07,  1.71it/s, acc=0.601, epoch=2, loss=1.26]

epoch:2, idx:1099/10845, loss:1.2612974815477025, acc:0.600909090909091


 11%|█         | 1200/10845 [11:07<1:32:35,  1.74it/s, acc=0.601, epoch=2, loss=1.26]

epoch:2, idx:1199/10845, loss:1.2595871586104235, acc:0.600625


 12%|█▏        | 1300/10845 [11:59<1:33:58,  1.69it/s, acc=0.599, epoch=2, loss=1.27]

epoch:2, idx:1299/10845, loss:1.2680116007419733, acc:0.5988461538461538


 13%|█▎        | 1400/10845 [12:54<1:26:13,  1.83it/s, acc=0.6, epoch=2, loss=1.27]  

epoch:2, idx:1399/10845, loss:1.2655233409575053, acc:0.6003571428571428


 14%|█▍        | 1500/10845 [13:48<1:33:59,  1.66it/s, acc=0.601, epoch=2, loss=1.27]

epoch:2, idx:1499/10845, loss:1.2672362811168034, acc:0.6006666666666667


 15%|█▍        | 1600/10845 [14:43<59:34,  2.59it/s, acc=0.598, epoch=2, loss=1.28]  

epoch:2, idx:1599/10845, loss:1.275291559100151, acc:0.5978125


 16%|█▌        | 1700/10845 [15:39<1:17:18,  1.97it/s, acc=0.596, epoch=2, loss=1.29]

epoch:2, idx:1699/10845, loss:1.2863010573387146, acc:0.5955882352941176


 17%|█▋        | 1800/10845 [16:33<1:09:33,  2.17it/s, acc=0.597, epoch=2, loss=1.28]

epoch:2, idx:1799/10845, loss:1.2816288162933456, acc:0.5972222222222222


 18%|█▊        | 1900/10845 [17:26<1:30:06,  1.65it/s, acc=0.599, epoch=2, loss=1.27]

epoch:2, idx:1899/10845, loss:1.2733698581708106, acc:0.5989473684210527


 18%|█▊        | 2000/10845 [18:24<1:28:04,  1.67it/s, acc=0.597, epoch=2, loss=1.28]

epoch:2, idx:1999/10845, loss:1.2811141368448735, acc:0.597125


 19%|█▉        | 2100/10845 [19:16<1:15:53,  1.92it/s, acc=0.597, epoch=2, loss=1.28]

epoch:2, idx:2099/10845, loss:1.2789036951178596, acc:0.5970238095238095


 20%|██        | 2200/10845 [20:10<1:16:52,  1.87it/s, acc=0.596, epoch=2, loss=1.28]

epoch:2, idx:2199/10845, loss:1.2776085675846447, acc:0.5956818181818182


 21%|██        | 2300/10845 [21:10<1:44:48,  1.36it/s, acc=0.594, epoch=2, loss=1.29]

epoch:2, idx:2299/10845, loss:1.2859096867364386, acc:0.5935869565217391


 22%|██▏       | 2400/10845 [22:05<59:51,  2.35it/s, acc=0.594, epoch=2, loss=1.28]  

epoch:2, idx:2399/10845, loss:1.2831951121489207, acc:0.5944791666666667


 23%|██▎       | 2500/10845 [22:59<1:27:53,  1.58it/s, acc=0.594, epoch=2, loss=1.29]

epoch:2, idx:2499/10845, loss:1.2868313887357712, acc:0.594


 24%|██▍       | 2600/10845 [23:57<1:29:39,  1.53it/s, acc=0.594, epoch=2, loss=1.29]

epoch:2, idx:2599/10845, loss:1.2871010666397902, acc:0.5941346153846154


 25%|██▍       | 2700/10845 [24:50<1:29:35,  1.52it/s, acc=0.593, epoch=2, loss=1.29]

epoch:2, idx:2699/10845, loss:1.2883797753298725, acc:0.5931481481481482


 26%|██▌       | 2800/10845 [25:43<1:15:39,  1.77it/s, acc=0.596, epoch=2, loss=1.28]

epoch:2, idx:2799/10845, loss:1.282944417233978, acc:0.5955357142857143


 27%|██▋       | 2900/10845 [26:37<1:12:53,  1.82it/s, acc=0.595, epoch=2, loss=1.28]

epoch:2, idx:2899/10845, loss:1.2842115862410644, acc:0.5950862068965517


 28%|██▊       | 3000/10845 [27:32<1:25:55,  1.52it/s, acc=0.595, epoch=2, loss=1.28]

epoch:2, idx:2999/10845, loss:1.2845860830148061, acc:0.5949166666666666


 29%|██▊       | 3100/10845 [28:24<1:03:10,  2.04it/s, acc=0.596, epoch=2, loss=1.28]

epoch:2, idx:3099/10845, loss:1.2833775489753292, acc:0.5955645161290323


 30%|██▉       | 3200/10845 [29:14<40:53,  3.12it/s, acc=0.595, epoch=2, loss=1.28]  

epoch:2, idx:3199/10845, loss:1.2833583353459834, acc:0.594921875


 30%|███       | 3300/10845 [30:01<52:55,  2.38it/s, acc=0.595, epoch=2, loss=1.28]  

epoch:2, idx:3299/10845, loss:1.2833779719201002, acc:0.5945454545454546


 31%|███▏      | 3400/10845 [30:51<1:12:29,  1.71it/s, acc=0.596, epoch=2, loss=1.28]

epoch:2, idx:3399/10845, loss:1.281227096617222, acc:0.5955147058823529


 32%|███▏      | 3500/10845 [31:40<1:00:18,  2.03it/s, acc=0.596, epoch=2, loss=1.28]

epoch:2, idx:3499/10845, loss:1.2815029253789356, acc:0.5959285714285715


 33%|███▎      | 3600/10845 [32:27<57:58,  2.08it/s, acc=0.596, epoch=2, loss=1.28]  

epoch:2, idx:3599/10845, loss:1.2814884438614051, acc:0.5959722222222222


 34%|███▍      | 3700/10845 [33:14<57:23,  2.08it/s, acc=0.596, epoch=2, loss=1.28]  

epoch:2, idx:3699/10845, loss:1.280013993076376, acc:0.5961486486486487


 35%|███▌      | 3800/10845 [33:58<55:07,  2.13it/s, acc=0.596, epoch=2, loss=1.28]  

epoch:2, idx:3799/10845, loss:1.2806501778489665, acc:0.59625


 36%|███▌      | 3900/10845 [34:37<33:15,  3.48it/s, acc=0.596, epoch=2, loss=1.28]  

epoch:2, idx:3899/10845, loss:1.2808598423156983, acc:0.596474358974359


 37%|███▋      | 4000/10845 [35:17<55:53,  2.04it/s, acc=0.597, epoch=2, loss=1.28]  

epoch:2, idx:3999/10845, loss:1.2797011935412883, acc:0.5973125


 38%|███▊      | 4100/10845 [35:58<1:26:28,  1.30it/s, acc=0.598, epoch=2, loss=1.28]

epoch:2, idx:4099/10845, loss:1.2789266611163208, acc:0.5978658536585366


 39%|███▊      | 4200/10845 [36:38<48:13,  2.30it/s, acc=0.598, epoch=2, loss=1.28]  

epoch:2, idx:4199/10845, loss:1.279012113823777, acc:0.5980952380952381


 40%|███▉      | 4300/10845 [37:36<50:20,  2.17it/s, acc=0.599, epoch=2, loss=1.28]  

epoch:2, idx:4299/10845, loss:1.2788471508303354, acc:0.598546511627907


 41%|████      | 4400/10845 [38:34<54:27,  1.97it/s, acc=0.598, epoch=2, loss=1.28]  

epoch:2, idx:4399/10845, loss:1.2786774902316658, acc:0.5983522727272728


 41%|████▏     | 4500/10845 [39:31<1:03:50,  1.66it/s, acc=0.598, epoch=2, loss=1.28]

epoch:2, idx:4499/10845, loss:1.2804966977834702, acc:0.5975555555555555


 42%|████▏     | 4600/10845 [40:27<44:04,  2.36it/s, acc=0.598, epoch=2, loss=1.28]  

epoch:2, idx:4599/10845, loss:1.27710696346086, acc:0.5984782608695652


 43%|████▎     | 4700/10845 [41:26<1:02:34,  1.64it/s, acc=0.598, epoch=2, loss=1.28]

epoch:2, idx:4699/10845, loss:1.2789071233348643, acc:0.5980851063829787


 44%|████▍     | 4800/10845 [42:21<1:04:51,  1.55it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:4799/10845, loss:1.2756580122808616, acc:0.5994270833333334


 45%|████▌     | 4900/10845 [43:15<52:11,  1.90it/s, acc=0.599, epoch=2, loss=1.28]  

epoch:2, idx:4899/10845, loss:1.2765351148405855, acc:0.5988775510204082


 46%|████▌     | 5000/10845 [44:13<57:13,  1.70it/s, acc=0.599, epoch=2, loss=1.28]  

epoch:2, idx:4999/10845, loss:1.2768452712059022, acc:0.599


 47%|████▋     | 5100/10845 [45:07<1:03:55,  1.50it/s, acc=0.6, epoch=2, loss=1.28]  

epoch:2, idx:5099/10845, loss:1.2756878135134193, acc:0.5996078431372549


 48%|████▊     | 5200/10845 [46:04<45:40,  2.06it/s, acc=0.6, epoch=2, loss=1.27]  

epoch:2, idx:5199/10845, loss:1.2748722326870148, acc:0.5997596153846154


 49%|████▉     | 5300/10845 [46:57<1:05:32,  1.41it/s, acc=0.6, epoch=2, loss=1.27]  

epoch:2, idx:5299/10845, loss:1.272721252711314, acc:0.6004245283018868


 50%|████▉     | 5400/10845 [47:50<1:01:36,  1.47it/s, acc=0.601, epoch=2, loss=1.27]

epoch:2, idx:5399/10845, loss:1.2706172024541431, acc:0.600925925925926


 51%|█████     | 5500/10845 [48:46<52:31,  1.70it/s, acc=0.6, epoch=2, loss=1.27]    

epoch:2, idx:5499/10845, loss:1.2713195797530088, acc:0.6004090909090909


 52%|█████▏    | 5600/10845 [49:40<54:23,  1.61it/s, acc=0.6, epoch=2, loss=1.27]  

epoch:2, idx:5599/10845, loss:1.2749557861366443, acc:0.5996875


 53%|█████▎    | 5700/10845 [50:36<51:45,  1.66it/s, acc=0.6, epoch=2, loss=1.28]  

epoch:2, idx:5699/10845, loss:1.2752889430000072, acc:0.6002631578947368


 53%|█████▎    | 5800/10845 [51:31<58:07,  1.45it/s, acc=0.6, epoch=2, loss=1.28]  

epoch:2, idx:5799/10845, loss:1.2752995644663943, acc:0.5997413793103449


 54%|█████▍    | 5900/10845 [52:28<55:07,  1.50it/s, acc=0.6, epoch=2, loss=1.28]  

epoch:2, idx:5899/10845, loss:1.275226900426008, acc:0.5995338983050847


 55%|█████▌    | 6000/10845 [53:23<35:26,  2.28it/s, acc=0.599, epoch=2, loss=1.28]  

epoch:2, idx:5999/10845, loss:1.275384214371443, acc:0.598875


 56%|█████▌    | 6100/10845 [54:18<35:08,  2.25it/s, acc=0.599, epoch=2, loss=1.28]  

epoch:2, idx:6099/10845, loss:1.2754033339903004, acc:0.5989344262295082


 57%|█████▋    | 6200/10845 [55:14<44:08,  1.75it/s, acc=0.599, epoch=2, loss=1.28]  

epoch:2, idx:6199/10845, loss:1.2778133735829784, acc:0.59875


 58%|█████▊    | 6300/10845 [56:06<37:24,  2.03it/s, acc=0.599, epoch=2, loss=1.28]  

epoch:2, idx:6299/10845, loss:1.2755899615231014, acc:0.5993253968253969


 59%|█████▉    | 6400/10845 [57:06<47:26,  1.56it/s, acc=0.599, epoch=2, loss=1.28]  

epoch:2, idx:6399/10845, loss:1.2752199386432768, acc:0.599453125


 60%|█████▉    | 6500/10845 [58:03<34:57,  2.07it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:6499/10845, loss:1.2760243923113896, acc:0.5991153846153846


 61%|██████    | 6600/10845 [59:00<39:34,  1.79it/s, acc=0.599, epoch=2, loss=1.28]  

epoch:2, idx:6599/10845, loss:1.2761117153095476, acc:0.5988257575757576


 62%|██████▏   | 6700/10845 [59:56<37:55,  1.82it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:6699/10845, loss:1.2764661001536384, acc:0.5992164179104478


 63%|██████▎   | 6800/10845 [1:00:48<32:02,  2.10it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:6799/10845, loss:1.2763332348974312, acc:0.5988970588235294


 64%|██████▎   | 6900/10845 [1:01:40<36:40,  1.79it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:6899/10845, loss:1.2773268574649008, acc:0.5985144927536232


 65%|██████▍   | 7000/10845 [1:02:19<40:13,  1.59it/s, acc=0.598, epoch=2, loss=1.28]

epoch:2, idx:6999/10845, loss:1.2778564504299845, acc:0.5982857142857143


 65%|██████▌   | 7100/10845 [1:03:03<25:48,  2.42it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:7099/10845, loss:1.2750814665455215, acc:0.5990845070422535


 66%|██████▋   | 7200/10845 [1:03:45<33:35,  1.81it/s, acc=0.6, epoch=2, loss=1.27]  

epoch:2, idx:7199/10845, loss:1.2734244749777848, acc:0.5995833333333334


 67%|██████▋   | 7300/10845 [1:04:42<37:46,  1.56it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:7299/10845, loss:1.272145940147034, acc:0.6002054794520548


 68%|██████▊   | 7400/10845 [1:05:40<31:50,  1.80it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:7399/10845, loss:1.2742624085336118, acc:0.5999324324324324


 69%|██████▉   | 7500/10845 [1:06:31<18:43,  2.98it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:7499/10845, loss:1.2736409403006235, acc:0.5998666666666667


 70%|███████   | 7600/10845 [1:07:15<23:29,  2.30it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:7599/10845, loss:1.2736224721215272, acc:0.6


 71%|███████   | 7700/10845 [1:08:09<24:11,  2.17it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:7699/10845, loss:1.273094137016829, acc:0.6002922077922078


 72%|███████▏  | 7800/10845 [1:09:04<28:18,  1.79it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:7799/10845, loss:1.2740463550274188, acc:0.6000961538461539


 73%|███████▎  | 7900/10845 [1:09:44<20:27,  2.40it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:7899/10845, loss:1.2740196190302886, acc:0.5996835443037974


 74%|███████▍  | 8000/10845 [1:10:28<33:50,  1.40it/s, acc=0.599, epoch=2, loss=1.27]

epoch:2, idx:7999/10845, loss:1.27470463578403, acc:0.59940625


 75%|███████▍  | 8100/10845 [1:11:21<24:39,  1.86it/s, acc=0.599, epoch=2, loss=1.27]

epoch:2, idx:8099/10845, loss:1.274746385106334, acc:0.5994135802469136


 76%|███████▌  | 8200/10845 [1:12:18<21:54,  2.01it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:8199/10845, loss:1.27549330270145, acc:0.5994817073170732


 77%|███████▋  | 8300/10845 [1:13:15<25:15,  1.68it/s, acc=0.6, epoch=2, loss=1.27]  

epoch:2, idx:8299/10845, loss:1.274450090771698, acc:0.5996686746987951


 77%|███████▋  | 8400/10845 [1:14:12<20:05,  2.03it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:8399/10845, loss:1.2726874169068678, acc:0.5999107142857143


 78%|███████▊  | 8500/10845 [1:15:10<22:23,  1.74it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:8499/10845, loss:1.2733254752018872, acc:0.5998529411764706


 79%|███████▉  | 8600/10845 [1:16:03<21:41,  1.72it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:8599/10845, loss:1.2727555134615234, acc:0.5997674418604652


 80%|████████  | 8700/10845 [1:16:59<18:18,  1.95it/s, acc=0.6, epoch=2, loss=1.27]  

epoch:2, idx:8699/10845, loss:1.2738041068562145, acc:0.5995114942528735


 81%|████████  | 8800/10845 [1:17:52<17:02,  2.00it/s, acc=0.599, epoch=2, loss=1.27]

epoch:2, idx:8799/10845, loss:1.2749698354981163, acc:0.5993181818181819


 82%|████████▏ | 8900/10845 [1:18:42<14:54,  2.17it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:8899/10845, loss:1.2754830188228843, acc:0.599494382022472


 83%|████████▎ | 9000/10845 [1:19:30<16:05,  1.91it/s, acc=0.6, epoch=2, loss=1.27]  

epoch:2, idx:8999/10845, loss:1.2746627766489982, acc:0.5997222222222223


 84%|████████▍ | 9100/10845 [1:20:17<11:06,  2.62it/s, acc=0.6, epoch=2, loss=1.27]  

epoch:2, idx:9099/10845, loss:1.274229539999595, acc:0.5996703296703296


 85%|████████▍ | 9200/10845 [1:21:05<15:30,  1.77it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:9199/10845, loss:1.273139865152214, acc:0.5999184782608695


 86%|████████▌ | 9300/10845 [1:21:53<15:20,  1.68it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:9299/10845, loss:1.2735362088616176, acc:0.5998924731182795


 87%|████████▋ | 9400/10845 [1:22:42<11:07,  2.17it/s, acc=0.6, epoch=2, loss=1.27]

epoch:2, idx:9399/10845, loss:1.2746631567845954, acc:0.5997074468085106


 88%|████████▊ | 9500/10845 [1:23:29<17:03,  1.31it/s, acc=0.6, epoch=2, loss=1.27]  

epoch:2, idx:9499/10845, loss:1.2745499674458254, acc:0.5996578947368421


 89%|████████▊ | 9600/10845 [1:24:16<10:37,  1.95it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:9599/10845, loss:1.2755726874619722, acc:0.599375


 89%|████████▉ | 9700/10845 [1:25:01<08:33,  2.23it/s, acc=0.6, epoch=2, loss=1.28]  

epoch:2, idx:9699/10845, loss:1.2751056589723861, acc:0.599819587628866


 90%|█████████ | 9800/10845 [1:25:50<07:57,  2.19it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:9799/10845, loss:1.2765895261326614, acc:0.5994897959183674


 91%|█████████▏| 9900/10845 [1:26:43<10:33,  1.49it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:9899/10845, loss:1.2767894640053161, acc:0.5992929292929293


 92%|█████████▏| 10000/10845 [1:27:35<06:46,  2.08it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:9999/10845, loss:1.276399431312084, acc:0.59945


 93%|█████████▎| 10100/10845 [1:28:27<05:16,  2.36it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:10099/10845, loss:1.276676678108697, acc:0.5991584158415841


 94%|█████████▍| 10200/10845 [1:29:19<05:58,  1.80it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:10199/10845, loss:1.2759019465656842, acc:0.599436274509804


 95%|█████████▍| 10300/10845 [1:29:55<01:47,  5.07it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:10299/10845, loss:1.2764476064397294, acc:0.5994902912621359


 96%|█████████▌| 10400/10845 [1:30:34<02:41,  2.76it/s, acc=0.6, epoch=2, loss=1.28]  

epoch:2, idx:10399/10845, loss:1.2757735084684996, acc:0.5995192307692307


 97%|█████████▋| 10500/10845 [1:31:15<03:00,  1.91it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:10499/10845, loss:1.2773504547959282, acc:0.5987619047619047


 98%|█████████▊| 10600/10845 [1:32:09<02:21,  1.73it/s, acc=0.598, epoch=2, loss=1.28]

epoch:2, idx:10599/10845, loss:1.2781527560780632, acc:0.5984669811320755


 99%|█████████▊| 10700/10845 [1:33:04<01:07,  2.15it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:10699/10845, loss:1.277443740975077, acc:0.5987616822429906


100%|█████████▉| 10800/10845 [1:33:57<00:25,  1.76it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:10799/10845, loss:1.276759633852376, acc:0.598912037037037


100%|██████████| 10845/10845 [1:34:21<00:00,  2.10it/s, acc=0.599, epoch=2, loss=1.28]


epoch:2, idx:0/1275, loss:0.7933062314987183, acc:0.75
epoch:2, idx:100/1275, loss:1.3920147961909228, acc:0.5618811881188119
epoch:2, idx:200/1275, loss:1.3335093221261134, acc:0.5845771144278606
epoch:2, idx:300/1275, loss:1.3260449012648623, acc:0.5888704318936877
epoch:2, idx:400/1275, loss:1.3225292297372795, acc:0.5935162094763092
epoch:2, idx:500/1275, loss:1.322240867062719, acc:0.593812375249501
epoch:2, idx:600/1275, loss:1.3253668309646518, acc:0.5894342762063228
epoch:2, idx:700/1275, loss:1.3199076691639746, acc:0.5891583452211127
epoch:2, idx:800/1275, loss:1.332820476515314, acc:0.583645443196005
epoch:2, idx:900/1275, loss:1.3235635596030826, acc:0.589622641509434
epoch:2, idx:1000/1275, loss:1.330063555326376, acc:0.5889110889110889
epoch:2, idx:1100/1275, loss:1.3253708948989438, acc:0.5890099909173478
epoch:2, idx:1200/1275, loss:1.3301588484488558, acc:0.5872189841798501


  1%|          | 100/10845 [00:50<1:39:38,  1.80it/s, acc=0.625, epoch=3, loss=1.18]

epoch:3, idx:99/10845, loss:1.1780492913722993, acc:0.625


  2%|▏         | 200/10845 [01:33<1:07:37,  2.62it/s, acc=0.624, epoch=3, loss=1.18]

epoch:3, idx:199/10845, loss:1.1753525549173356, acc:0.62375


  3%|▎         | 300/10845 [02:06<50:23,  3.49it/s, acc=0.634, epoch=3, loss=1.17]  

epoch:3, idx:299/10845, loss:1.1698338594039281, acc:0.6341666666666667


  4%|▎         | 400/10845 [02:47<1:10:56,  2.45it/s, acc=0.623, epoch=3, loss=1.2] 

epoch:3, idx:399/10845, loss:1.1952737799286843, acc:0.6225


  5%|▍         | 500/10845 [03:22<54:15,  3.18it/s, acc=0.626, epoch=3, loss=1.18]  

epoch:3, idx:499/10845, loss:1.1825938688516617, acc:0.6265


  6%|▌         | 600/10845 [04:16<2:44:53,  1.04it/s, acc=0.623, epoch=3, loss=1.2] 

epoch:3, idx:599/10845, loss:1.1998084238171578, acc:0.6233333333333333


  6%|▋         | 700/10845 [05:11<1:30:42,  1.86it/s, acc=0.631, epoch=3, loss=1.19]

epoch:3, idx:699/10845, loss:1.1852219526256833, acc:0.6310714285714286


  7%|▋         | 800/10845 [06:02<1:19:41,  2.10it/s, acc=0.632, epoch=3, loss=1.19]

epoch:3, idx:799/10845, loss:1.1908787021785974, acc:0.631875


  8%|▊         | 900/10845 [07:03<1:12:45,  2.28it/s, acc=0.63, epoch=3, loss=1.2]  

epoch:3, idx:899/10845, loss:1.2007038649585513, acc:0.6302777777777778


  9%|▉         | 1000/10845 [08:01<1:41:51,  1.61it/s, acc=0.628, epoch=3, loss=1.2]

epoch:3, idx:999/10845, loss:1.202895872414112, acc:0.628


 10%|█         | 1100/10845 [09:00<1:11:31,  2.27it/s, acc=0.633, epoch=3, loss=1.18]

epoch:3, idx:1099/10845, loss:1.182525287324732, acc:0.6327272727272727


 11%|█         | 1200/10845 [09:54<1:11:35,  2.25it/s, acc=0.634, epoch=3, loss=1.18]

epoch:3, idx:1199/10845, loss:1.182475130756696, acc:0.6341666666666667


 12%|█▏        | 1300/10845 [10:49<1:12:57,  2.18it/s, acc=0.633, epoch=3, loss=1.18]

epoch:3, idx:1299/10845, loss:1.182511928815108, acc:0.6328846153846154


 13%|█▎        | 1400/10845 [11:45<1:21:03,  1.94it/s, acc=0.634, epoch=3, loss=1.18]

epoch:3, idx:1399/10845, loss:1.1794383593542235, acc:0.6342857142857142


 14%|█▍        | 1501/10845 [12:39<57:17,  2.72it/s, acc=0.631, epoch=3, loss=1.19]  

epoch:3, idx:1499/10845, loss:1.1880432464679083, acc:0.6305


 15%|█▍        | 1600/10845 [13:35<2:01:40,  1.27it/s, acc=0.63, epoch=3, loss=1.18] 

epoch:3, idx:1599/10845, loss:1.1836750392988324, acc:0.6303125


 16%|█▌        | 1700/10845 [14:34<1:41:55,  1.50it/s, acc=0.631, epoch=3, loss=1.18]

epoch:3, idx:1699/10845, loss:1.1839242650480832, acc:0.6307352941176471


 17%|█▋        | 1800/10845 [15:29<1:04:08,  2.35it/s, acc=0.631, epoch=3, loss=1.19]

epoch:3, idx:1799/10845, loss:1.1850828186339801, acc:0.6309722222222223


 18%|█▊        | 1900/10845 [16:22<1:51:53,  1.33it/s, acc=0.632, epoch=3, loss=1.18]

epoch:3, idx:1899/10845, loss:1.181635018712596, acc:0.6322368421052632


 18%|█▊        | 2000/10845 [17:22<1:45:55,  1.39it/s, acc=0.632, epoch=3, loss=1.18]

epoch:3, idx:1999/10845, loss:1.1762780948281288, acc:0.6325


 19%|█▉        | 2100/10845 [18:17<1:38:17,  1.48it/s, acc=0.63, epoch=3, loss=1.18] 

epoch:3, idx:2099/10845, loss:1.181513684846106, acc:0.6302380952380953


 20%|██        | 2200/10845 [19:10<55:38,  2.59it/s, acc=0.631, epoch=3, loss=1.18]  

epoch:3, idx:2199/10845, loss:1.1774400425228206, acc:0.6306818181818182


 21%|██        | 2300/10845 [19:51<1:34:54,  1.50it/s, acc=0.631, epoch=3, loss=1.18]

epoch:3, idx:2299/10845, loss:1.1780219749782397, acc:0.6305434782608695


 22%|██▏       | 2400/10845 [20:43<1:11:58,  1.96it/s, acc=0.629, epoch=3, loss=1.18]

epoch:3, idx:2399/10845, loss:1.181874929095308, acc:0.6288541666666667


 23%|██▎       | 2500/10845 [21:36<58:17,  2.39it/s, acc=0.629, epoch=3, loss=1.18]  

epoch:3, idx:2499/10845, loss:1.1809327780961991, acc:0.6291


 24%|██▍       | 2600/10845 [22:30<1:21:03,  1.70it/s, acc=0.628, epoch=3, loss=1.18]

epoch:3, idx:2599/10845, loss:1.181104686328998, acc:0.6283653846153846


 25%|██▍       | 2700/10845 [23:29<1:39:16,  1.37it/s, acc=0.627, epoch=3, loss=1.19]

epoch:3, idx:2699/10845, loss:1.1870176501185805, acc:0.6273148148148148


 26%|██▌       | 2801/10845 [24:21<1:03:22,  2.12it/s, acc=0.628, epoch=3, loss=1.19]

epoch:3, idx:2799/10845, loss:1.185864166191646, acc:0.6275


 27%|██▋       | 2900/10845 [25:18<1:31:26,  1.45it/s, acc=0.627, epoch=3, loss=1.19]

epoch:3, idx:2899/10845, loss:1.1879925844176062, acc:0.6268965517241379


 28%|██▊       | 3000/10845 [26:12<57:12,  2.29it/s, acc=0.626, epoch=3, loss=1.19]  

epoch:3, idx:2999/10845, loss:1.1876072595715523, acc:0.62625


 29%|██▊       | 3100/10845 [27:05<1:51:08,  1.16it/s, acc=0.625, epoch=3, loss=1.19]

epoch:3, idx:3099/10845, loss:1.19169051035758, acc:0.6245161290322581


 30%|██▉       | 3200/10845 [27:58<43:42,  2.92it/s, acc=0.623, epoch=3, loss=1.2]   

epoch:3, idx:3199/10845, loss:1.1963607267849148, acc:0.623125


 30%|███       | 3300/10845 [28:54<1:00:37,  2.07it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:3299/10845, loss:1.1972941849267844, acc:0.6224242424242424


 31%|███▏      | 3400/10845 [29:50<1:30:26,  1.37it/s, acc=0.622, epoch=3, loss=1.2] 

epoch:3, idx:3399/10845, loss:1.1955525042028987, acc:0.6218382352941176


 32%|███▏      | 3500/10845 [30:48<1:10:57,  1.73it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:3499/10845, loss:1.2000599358422415, acc:0.6210714285714286


 33%|███▎      | 3600/10845 [31:35<44:06,  2.74it/s, acc=0.621, epoch=3, loss=1.2]  

epoch:3, idx:3599/10845, loss:1.199935071269671, acc:0.620625


 34%|███▍      | 3700/10845 [32:20<1:06:36,  1.79it/s, acc=0.62, epoch=3, loss=1.2] 

epoch:3, idx:3699/10845, loss:1.1999813654616072, acc:0.620472972972973


 35%|███▌      | 3800/10845 [33:12<1:07:14,  1.75it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:3799/10845, loss:1.1987359127402306, acc:0.6209210526315789


 36%|███▌      | 3900/10845 [34:02<42:17,  2.74it/s, acc=0.621, epoch=3, loss=1.2]  

epoch:3, idx:3899/10845, loss:1.1973188697986115, acc:0.6214102564102564


 37%|███▋      | 4000/10845 [34:55<1:08:11,  1.67it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:3999/10845, loss:1.1955527407079936, acc:0.6229375


 38%|███▊      | 4100/10845 [35:46<1:05:02,  1.73it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:4099/10845, loss:1.1981519796644768, acc:0.6221341463414635


 39%|███▊      | 4200/10845 [36:35<53:07,  2.08it/s, acc=0.623, epoch=3, loss=1.2]  

epoch:3, idx:4199/10845, loss:1.1960406655215081, acc:0.6233333333333333


 40%|███▉      | 4300/10845 [37:30<1:07:11,  1.62it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:4299/10845, loss:1.1980346602478693, acc:0.6223255813953489


 41%|████      | 4400/10845 [38:23<51:22,  2.09it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:4399/10845, loss:1.200667286014015, acc:0.6217613636363636


 41%|████▏     | 4500/10845 [39:16<53:42,  1.97it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:4499/10845, loss:1.201310914436976, acc:0.6217777777777778


 42%|████▏     | 4600/10845 [40:08<1:03:19,  1.64it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:4599/10845, loss:1.201293389317782, acc:0.6215217391304347


 43%|████▎     | 4700/10845 [41:01<47:31,  2.16it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:4699/10845, loss:1.200677896218097, acc:0.6221808510638298


 44%|████▍     | 4800/10845 [41:58<1:03:15,  1.59it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:4799/10845, loss:1.2012334310387571, acc:0.621875


 45%|████▌     | 4900/10845 [42:46<37:39,  2.63it/s, acc=0.621, epoch=3, loss=1.2]  

epoch:3, idx:4899/10845, loss:1.2013459290898576, acc:0.6213775510204081


 46%|████▌     | 5000/10845 [43:28<25:48,  3.77it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:4999/10845, loss:1.2012395245671272, acc:0.6218


 47%|████▋     | 5100/10845 [44:01<45:02,  2.13it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:5099/10845, loss:1.1998942219860413, acc:0.6223529411764706


 48%|████▊     | 5200/10845 [44:53<51:08,  1.84it/s, acc=0.623, epoch=3, loss=1.2]  

epoch:3, idx:5199/10845, loss:1.1980204993257155, acc:0.6229326923076923


 49%|████▉     | 5300/10845 [45:46<44:42,  2.07it/s, acc=0.623, epoch=3, loss=1.2]  

epoch:3, idx:5299/10845, loss:1.1987317010816538, acc:0.6225


 50%|████▉     | 5400/10845 [46:42<56:25,  1.61it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:5399/10845, loss:1.1994210024233218, acc:0.6223148148148148


 51%|█████     | 5500/10845 [47:40<33:47,  2.64it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:5499/10845, loss:1.2003217602426355, acc:0.6219090909090909


 52%|█████▏    | 5600/10845 [48:33<41:49,  2.09it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:5599/10845, loss:1.1999443310605629, acc:0.621875


 53%|█████▎    | 5700/10845 [49:31<40:54,  2.10it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:5699/10845, loss:1.200770414963103, acc:0.6217105263157895


 53%|█████▎    | 5800/10845 [50:24<48:48,  1.72it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:5799/10845, loss:1.2026708716462398, acc:0.621551724137931


 54%|█████▍    | 5900/10845 [51:18<56:22,  1.46it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:5899/10845, loss:1.2008672907190807, acc:0.6219067796610169


 55%|█████▌    | 6000/10845 [52:14<40:00,  2.02it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:5999/10845, loss:1.201528083662192, acc:0.6218333333333333


 56%|█████▌    | 6100/10845 [53:10<36:32,  2.16it/s, acc=0.621, epoch=3, loss=1.2]  

epoch:3, idx:6099/10845, loss:1.2034794965337534, acc:0.6212704918032786


 57%|█████▋    | 6200/10845 [54:05<41:14,  1.88it/s, acc=0.622, epoch=3, loss=1.2]  

epoch:3, idx:6199/10845, loss:1.2038537540358882, acc:0.6215322580645162


 58%|█████▊    | 6300/10845 [55:00<47:12,  1.60it/s, acc=0.621, epoch=3, loss=1.2]  

epoch:3, idx:6299/10845, loss:1.204923126811073, acc:0.6208730158730159


 59%|█████▉    | 6400/10845 [55:46<40:18,  1.84it/s, acc=0.621, epoch=3, loss=1.2] 

epoch:3, idx:6399/10845, loss:1.2048529071174563, acc:0.6210546875


 60%|█████▉    | 6500/10845 [56:33<47:21,  1.53it/s, acc=0.621, epoch=3, loss=1.21]

epoch:3, idx:6499/10845, loss:1.2057312435278527, acc:0.6207692307692307


 61%|██████    | 6600/10845 [57:21<31:30,  2.25it/s, acc=0.621, epoch=3, loss=1.2] 

epoch:3, idx:6599/10845, loss:1.2030283959977555, acc:0.6212878787878788


 62%|██████▏   | 6700/10845 [58:10<34:48,  1.98it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:6699/10845, loss:1.2029920129900549, acc:0.6211194029850746


 63%|██████▎   | 6800/10845 [58:55<35:33,  1.90it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:6799/10845, loss:1.20248940288144, acc:0.6215441176470589


 64%|██████▎   | 6900/10845 [59:44<30:31,  2.15it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:6899/10845, loss:1.2032647314970044, acc:0.621159420289855


 65%|██████▍   | 7000/10845 [1:00:29<17:10,  3.73it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:6999/10845, loss:1.2019534763097763, acc:0.62125


 65%|██████▌   | 7100/10845 [1:01:07<22:34,  2.76it/s, acc=0.621, epoch=3, loss=1.2] 

epoch:3, idx:7099/10845, loss:1.2045839349316878, acc:0.6208802816901409


 66%|██████▋   | 7201/10845 [1:01:52<25:32,  2.38it/s, acc=0.621, epoch=3, loss=1.2] 

epoch:3, idx:7199/10845, loss:1.2042737685143947, acc:0.6210763888888889


 67%|██████▋   | 7300/10845 [1:02:28<26:53,  2.20it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:7299/10845, loss:1.2033543979713361, acc:0.621472602739726


 68%|██████▊   | 7400/10845 [1:03:08<17:40,  3.25it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:7399/10845, loss:1.2037759267961656, acc:0.6210135135135135


 69%|██████▉   | 7500/10845 [1:03:49<23:59,  2.32it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:7499/10845, loss:1.204331338040034, acc:0.6208333333333333


 70%|███████   | 7601/10845 [1:04:25<12:59,  4.16it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:7599/10845, loss:1.2040280955559328, acc:0.620953947368421


 71%|███████   | 7701/10845 [1:05:06<13:27,  3.89it/s, acc=0.62, epoch=3, loss=1.21] 

epoch:3, idx:7699/10845, loss:1.2064170997018937, acc:0.6200974025974026


 72%|███████▏  | 7800/10845 [1:05:44<17:43,  2.86it/s, acc=0.62, epoch=3, loss=1.21]

epoch:3, idx:7799/10845, loss:1.2052911082674296, acc:0.6202564102564102


 73%|███████▎  | 7900/10845 [1:06:23<25:01,  1.96it/s, acc=0.62, epoch=3, loss=1.21]

epoch:3, idx:7899/10845, loss:1.2057347586260567, acc:0.6201898734177215


 74%|███████▍  | 8001/10845 [1:07:06<15:24,  3.08it/s, acc=0.62, epoch=3, loss=1.2] 

epoch:3, idx:7999/10845, loss:1.2042425778210164, acc:0.62053125


 75%|███████▍  | 8100/10845 [1:07:46<22:22,  2.04it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:8099/10845, loss:1.2037001496774178, acc:0.6205246913580247


 76%|███████▌  | 8200/10845 [1:08:30<18:27,  2.39it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:8199/10845, loss:1.2025001777672186, acc:0.6208536585365854


 77%|███████▋  | 8300/10845 [1:09:17<15:26,  2.75it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:8299/10845, loss:1.2038100532953997, acc:0.6210240963855421


 77%|███████▋  | 8400/10845 [1:09:59<13:36,  2.99it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:8399/10845, loss:1.2034327690445241, acc:0.62125


 78%|███████▊  | 8500/10845 [1:10:45<23:29,  1.66it/s, acc=0.621, epoch=3, loss=1.21]

epoch:3, idx:8499/10845, loss:1.2050710096920239, acc:0.6211470588235294


 79%|███████▉  | 8600/10845 [1:11:36<18:19,  2.04it/s, acc=0.621, epoch=3, loss=1.2] 

epoch:3, idx:8599/10845, loss:1.204062055591927, acc:0.6213081395348837


 80%|████████  | 8700/10845 [1:12:27<16:27,  2.17it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:8699/10845, loss:1.2038054438226524, acc:0.6213218390804598


 81%|████████  | 8800/10845 [1:13:17<15:44,  2.17it/s, acc=0.621, epoch=3, loss=1.2]

epoch:3, idx:8799/10845, loss:1.204598441963846, acc:0.6213920454545454


 82%|████████▏ | 8900/10845 [1:14:07<19:41,  1.65it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:8899/10845, loss:1.204221126256364, acc:0.6217977528089887


 83%|████████▎ | 9000/10845 [1:14:55<13:56,  2.21it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:8999/10845, loss:1.204387581480874, acc:0.6216111111111111


 84%|████████▍ | 9100/10845 [1:15:50<20:40,  1.41it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:9099/10845, loss:1.2036782405533633, acc:0.6221428571428571


 85%|████████▍ | 9200/10845 [1:16:46<16:10,  1.70it/s, acc=0.622, epoch=3, loss=1.2]

epoch:3, idx:9199/10845, loss:1.2035058682962605, acc:0.6222826086956522


 86%|████████▌ | 9301/10845 [1:17:41<11:34,  2.22it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:9299/10845, loss:1.2034442623071773, acc:0.6226075268817204


 87%|████████▋ | 9400/10845 [1:18:35<12:17,  1.96it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:9399/10845, loss:1.2024656756253953, acc:0.6227659574468085


 88%|████████▊ | 9500/10845 [1:19:28<14:10,  1.58it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:9499/10845, loss:1.2020292744511052, acc:0.6231052631578947


 89%|████████▊ | 9600/10845 [1:20:20<09:33,  2.17it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:9599/10845, loss:1.2004136235825718, acc:0.623203125


 89%|████████▉ | 9700/10845 [1:21:16<10:42,  1.78it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:9699/10845, loss:1.201306149867392, acc:0.6229896907216494


 90%|█████████ | 9800/10845 [1:22:17<07:58,  2.18it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:9799/10845, loss:1.2020631378584978, acc:0.6229591836734694


 91%|█████████▏| 9900/10845 [1:23:09<09:46,  1.61it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:9899/10845, loss:1.2026939587581038, acc:0.6227525252525252


 92%|█████████▏| 10000/10845 [1:24:05<07:54,  1.78it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:9999/10845, loss:1.2023792609810828, acc:0.622875


 93%|█████████▎| 10100/10845 [1:25:01<05:18,  2.34it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:10099/10845, loss:1.2030827980938523, acc:0.6229950495049505


 94%|█████████▍| 10200/10845 [1:25:56<04:20,  2.48it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:10199/10845, loss:1.2024328991946052, acc:0.6231617647058824


 95%|█████████▍| 10301/10845 [1:26:40<03:19,  2.72it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:10299/10845, loss:1.2023730461632163, acc:0.6233009708737864


 96%|█████████▌| 10400/10845 [1:27:20<03:18,  2.24it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:10399/10845, loss:1.2030948307766365, acc:0.6230769230769231


 97%|█████████▋| 10500/10845 [1:28:04<02:28,  2.33it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:10499/10845, loss:1.2028344779184887, acc:0.6230952380952381


 98%|█████████▊| 10600/10845 [1:28:44<02:05,  1.96it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:10599/10845, loss:1.2031425986818547, acc:0.6227830188679245


 99%|█████████▊| 10700/10845 [1:29:24<01:02,  2.34it/s, acc=0.623, epoch=3, loss=1.2]

epoch:3, idx:10699/10845, loss:1.2033739635821814, acc:0.6226869158878504


100%|█████████▉| 10800/10845 [1:30:04<00:13,  3.27it/s, acc=0.622, epoch=3, loss=1.21]

epoch:3, idx:10799/10845, loss:1.2058063509673984, acc:0.6221527777777778


100%|██████████| 10845/10845 [1:30:23<00:00,  1.83it/s, acc=0.622, epoch=3, loss=1.21]


epoch:3, idx:0/1275, loss:0.8611742258071899, acc:0.75
epoch:3, idx:100/1275, loss:1.3694443401723806, acc:0.5717821782178217
epoch:3, idx:200/1275, loss:1.3130319791646738, acc:0.599502487562189
epoch:3, idx:300/1275, loss:1.299283779142703, acc:0.6088039867109635
epoch:3, idx:400/1275, loss:1.2900033001590547, acc:0.6122194513715711
epoch:3, idx:500/1275, loss:1.2901693220861896, acc:0.6117764471057884
epoch:3, idx:600/1275, loss:1.2940377184436247, acc:0.6098169717138103
epoch:3, idx:700/1275, loss:1.2899308727573227, acc:0.6098430813124108
epoch:3, idx:800/1275, loss:1.304188012853544, acc:0.6067415730337079
epoch:3, idx:900/1275, loss:1.2960654395269633, acc:0.6104328523862376
epoch:3, idx:1000/1275, loss:1.3023661537008449, acc:0.6103896103896104
epoch:3, idx:1100/1275, loss:1.2975572695740778, acc:0.6110354223433242
epoch:3, idx:1200/1275, loss:1.300355533676084, acc:0.607826810990841


  1%|          | 101/10845 [00:42<1:04:27,  2.78it/s, acc=0.649, epoch=4, loss=1.1] 

epoch:4, idx:99/10845, loss:1.076589218378067, acc:0.6525


  2%|▏         | 200/10845 [01:31<2:09:49,  1.37it/s, acc=0.66, epoch=4, loss=1.13] 

epoch:4, idx:199/10845, loss:1.128305304646492, acc:0.66


  3%|▎         | 300/10845 [02:16<1:23:21,  2.11it/s, acc=0.657, epoch=4, loss=1.12]

epoch:4, idx:299/10845, loss:1.1214020403226217, acc:0.6575


  4%|▎         | 400/10845 [03:01<47:57,  3.63it/s, acc=0.644, epoch=4, loss=1.15]  

epoch:4, idx:399/10845, loss:1.1501314370334148, acc:0.644375


  5%|▍         | 500/10845 [03:36<54:14,  3.18it/s, acc=0.643, epoch=4, loss=1.14]  

epoch:4, idx:499/10845, loss:1.1411533873081208, acc:0.643


  6%|▌         | 600/10845 [04:20<1:26:00,  1.99it/s, acc=0.637, epoch=4, loss=1.16]

epoch:4, idx:599/10845, loss:1.1590913112958272, acc:0.6370833333333333


  6%|▋         | 700/10845 [05:06<59:16,  2.85it/s, acc=0.637, epoch=4, loss=1.16]  

epoch:4, idx:699/10845, loss:1.1566925529071264, acc:0.6375


  7%|▋         | 800/10845 [05:54<1:28:01,  1.90it/s, acc=0.639, epoch=4, loss=1.15]

epoch:4, idx:799/10845, loss:1.1460200259834528, acc:0.6390625


  8%|▊         | 900/10845 [06:42<1:29:55,  1.84it/s, acc=0.641, epoch=4, loss=1.13]

epoch:4, idx:899/10845, loss:1.1315207229057949, acc:0.6411111111111111


  9%|▉         | 1000/10845 [07:17<1:04:35,  2.54it/s, acc=0.641, epoch=4, loss=1.13]

epoch:4, idx:999/10845, loss:1.1274427221417427, acc:0.6415


 10%|█         | 1100/10845 [07:57<1:03:43,  2.55it/s, acc=0.646, epoch=4, loss=1.12]

epoch:4, idx:1099/10845, loss:1.1155441509051756, acc:0.6456818181818181


 11%|█         | 1200/10845 [08:35<1:41:36,  1.58it/s, acc=0.647, epoch=4, loss=1.11]

epoch:4, idx:1199/10845, loss:1.114583134551843, acc:0.6466666666666666


 12%|█▏        | 1300/10845 [09:13<48:47,  3.26it/s, acc=0.647, epoch=4, loss=1.11]  

epoch:4, idx:1299/10845, loss:1.1145936065912248, acc:0.6475


 13%|█▎        | 1400/10845 [09:48<1:07:33,  2.33it/s, acc=0.646, epoch=4, loss=1.12]

epoch:4, idx:1399/10845, loss:1.1168710003580367, acc:0.64625


 14%|█▍        | 1501/10845 [10:20<47:23,  3.29it/s, acc=0.646, epoch=4, loss=1.12]  

epoch:4, idx:1499/10845, loss:1.1204143583377202, acc:0.6461666666666667


 15%|█▍        | 1600/10845 [10:55<1:09:38,  2.21it/s, acc=0.645, epoch=4, loss=1.12]

epoch:4, idx:1599/10845, loss:1.118430708013475, acc:0.64515625


 16%|█▌        | 1700/10845 [11:36<52:01,  2.93it/s, acc=0.646, epoch=4, loss=1.12]  

epoch:4, idx:1699/10845, loss:1.1167916175197152, acc:0.6458823529411765


 17%|█▋        | 1800/10845 [12:19<1:11:09,  2.12it/s, acc=0.646, epoch=4, loss=1.12]

epoch:4, idx:1799/10845, loss:1.120478605694241, acc:0.64625


 18%|█▊        | 1900/10845 [13:01<1:23:09,  1.79it/s, acc=0.647, epoch=4, loss=1.12]

epoch:4, idx:1899/10845, loss:1.121125126324202, acc:0.6467105263157895


 18%|█▊        | 2000/10845 [13:40<54:32,  2.70it/s, acc=0.647, epoch=4, loss=1.13]  

epoch:4, idx:1999/10845, loss:1.12806118658185, acc:0.647


 19%|█▉        | 2100/10845 [14:20<1:06:09,  2.20it/s, acc=0.647, epoch=4, loss=1.13]

epoch:4, idx:2099/10845, loss:1.1284805279118675, acc:0.6473809523809524


 20%|██        | 2200/10845 [15:01<1:14:12,  1.94it/s, acc=0.649, epoch=4, loss=1.13]

epoch:4, idx:2199/10845, loss:1.1254159545356577, acc:0.6492045454545454


 21%|██        | 2300/10845 [15:55<1:32:46,  1.54it/s, acc=0.649, epoch=4, loss=1.13]

epoch:4, idx:2299/10845, loss:1.127998953850373, acc:0.6494565217391305


 22%|██▏       | 2400/10845 [16:50<1:01:30,  2.29it/s, acc=0.651, epoch=4, loss=1.13]

epoch:4, idx:2399/10845, loss:1.125113347073396, acc:0.650625


 23%|██▎       | 2500/10845 [17:47<1:09:14,  2.01it/s, acc=0.65, epoch=4, loss=1.12] 

epoch:4, idx:2499/10845, loss:1.12374378592968, acc:0.6502


 24%|██▍       | 2600/10845 [18:39<1:21:56,  1.68it/s, acc=0.651, epoch=4, loss=1.12]

epoch:4, idx:2599/10845, loss:1.1218872465995642, acc:0.6509615384615385


 25%|██▍       | 2700/10845 [19:37<1:18:10,  1.74it/s, acc=0.651, epoch=4, loss=1.12]

epoch:4, idx:2699/10845, loss:1.1221958266364203, acc:0.6512037037037037


 26%|██▌       | 2800/10845 [20:36<1:27:46,  1.53it/s, acc=0.65, epoch=4, loss=1.13] 

epoch:4, idx:2799/10845, loss:1.1305972905669894, acc:0.6495535714285714


 27%|██▋       | 2900/10845 [21:38<1:18:09,  1.69it/s, acc=0.65, epoch=4, loss=1.13]

epoch:4, idx:2899/10845, loss:1.12947355519081, acc:0.6500862068965517


 28%|██▊       | 3000/10845 [22:32<1:11:56,  1.82it/s, acc=0.651, epoch=4, loss=1.12]

epoch:4, idx:2999/10845, loss:1.124789161960284, acc:0.6510833333333333


 29%|██▊       | 3100/10845 [23:28<1:17:14,  1.67it/s, acc=0.648, epoch=4, loss=1.13]

epoch:4, idx:3099/10845, loss:1.1315199224602792, acc:0.6483870967741936


 30%|██▉       | 3200/10845 [24:23<1:14:32,  1.71it/s, acc=0.649, epoch=4, loss=1.13]

epoch:4, idx:3199/10845, loss:1.130243500582874, acc:0.64875


 30%|███       | 3301/10845 [25:10<33:43,  3.73it/s, acc=0.65, epoch=4, loss=1.13]   

epoch:4, idx:3299/10845, loss:1.126704446239905, acc:0.6496212121212122


 31%|███▏      | 3400/10845 [26:01<1:00:51,  2.04it/s, acc=0.649, epoch=4, loss=1.13]

epoch:4, idx:3399/10845, loss:1.1294757887545754, acc:0.64875


 32%|███▏      | 3500/10845 [26:57<1:07:03,  1.83it/s, acc=0.648, epoch=4, loss=1.13]

epoch:4, idx:3499/10845, loss:1.1296552927834647, acc:0.6478571428571429


 33%|███▎      | 3600/10845 [27:52<1:20:43,  1.50it/s, acc=0.648, epoch=4, loss=1.13]

epoch:4, idx:3599/10845, loss:1.1284095637334717, acc:0.6481944444444444


 34%|███▍      | 3700/10845 [28:44<50:29,  2.36it/s, acc=0.649, epoch=4, loss=1.13]  

epoch:4, idx:3699/10845, loss:1.1294892213312355, acc:0.648918918918919


 35%|███▌      | 3800/10845 [29:38<56:32,  2.08it/s, acc=0.649, epoch=4, loss=1.13]  

epoch:4, idx:3799/10845, loss:1.1292422983834618, acc:0.6494078947368421


 36%|███▌      | 3900/10845 [30:31<49:50,  2.32it/s, acc=0.65, epoch=4, loss=1.13]   

epoch:4, idx:3899/10845, loss:1.1318268121817172, acc:0.6497435897435897


 37%|███▋      | 4000/10845 [31:29<1:15:58,  1.50it/s, acc=0.649, epoch=4, loss=1.13]

epoch:4, idx:3999/10845, loss:1.1335126014053822, acc:0.6495


 38%|███▊      | 4100/10845 [32:20<46:44,  2.40it/s, acc=0.65, epoch=4, loss=1.13]   

epoch:4, idx:4099/10845, loss:1.1329278782228145, acc:0.6499390243902439


 39%|███▊      | 4200/10845 [33:16<1:04:45,  1.71it/s, acc=0.65, epoch=4, loss=1.13] 

epoch:4, idx:4199/10845, loss:1.1331555192953064, acc:0.6496428571428572


 40%|███▉      | 4300/10845 [34:07<1:07:49,  1.61it/s, acc=0.649, epoch=4, loss=1.13]

epoch:4, idx:4299/10845, loss:1.132747462558192, acc:0.6494767441860465


 41%|████      | 4400/10845 [35:02<1:03:30,  1.69it/s, acc=0.649, epoch=4, loss=1.13]

epoch:4, idx:4399/10845, loss:1.1324870067022064, acc:0.6494886363636364


 41%|████▏     | 4500/10845 [35:47<58:54,  1.80it/s, acc=0.649, epoch=4, loss=1.13]  

epoch:4, idx:4499/10845, loss:1.1347953917450375, acc:0.6488333333333334


 42%|████▏     | 4600/10845 [36:38<53:27,  1.95it/s, acc=0.648, epoch=4, loss=1.14]  

epoch:4, idx:4599/10845, loss:1.1354418897369634, acc:0.6482065217391304


 43%|████▎     | 4700/10845 [37:15<1:06:54,  1.53it/s, acc=0.648, epoch=4, loss=1.13]

epoch:4, idx:4699/10845, loss:1.134920225422433, acc:0.6481914893617021


 44%|████▍     | 4800/10845 [38:13<53:24,  1.89it/s, acc=0.648, epoch=4, loss=1.14]  

epoch:4, idx:4799/10845, loss:1.136192919711272, acc:0.6477083333333333


 45%|████▌     | 4900/10845 [39:10<54:14,  1.83it/s, acc=0.647, epoch=4, loss=1.14]  

epoch:4, idx:4899/10845, loss:1.1381496524080938, acc:0.6470918367346938


 46%|████▌     | 5000/10845 [40:03<56:44,  1.72it/s, acc=0.647, epoch=4, loss=1.14]  

epoch:4, idx:4999/10845, loss:1.138331151676178, acc:0.6472


 47%|████▋     | 5100/10845 [40:59<1:02:14,  1.54it/s, acc=0.648, epoch=4, loss=1.14]

epoch:4, idx:5099/10845, loss:1.1374753592645421, acc:0.6475980392156863


 48%|████▊     | 5200/10845 [41:55<50:40,  1.86it/s, acc=0.647, epoch=4, loss=1.14]  

epoch:4, idx:5199/10845, loss:1.1379280562354968, acc:0.6475


 49%|████▉     | 5300/10845 [42:50<51:38,  1.79it/s, acc=0.647, epoch=4, loss=1.14]  

epoch:4, idx:5299/10845, loss:1.1387396173769573, acc:0.6469811320754717


 50%|████▉     | 5400/10845 [43:46<1:00:47,  1.49it/s, acc=0.647, epoch=4, loss=1.14]

epoch:4, idx:5399/10845, loss:1.139348585417977, acc:0.6468518518518519


 51%|█████     | 5500/10845 [44:41<35:51,  2.48it/s, acc=0.646, epoch=4, loss=1.14]  

epoch:4, idx:5499/10845, loss:1.1402818209799854, acc:0.6464545454545455


 52%|█████▏    | 5600/10845 [45:40<40:38,  2.15it/s, acc=0.646, epoch=4, loss=1.14]  

epoch:4, idx:5599/10845, loss:1.1391678744448084, acc:0.6460714285714285


 53%|█████▎    | 5700/10845 [46:38<59:15,  1.45it/s, acc=0.646, epoch=4, loss=1.14]  

epoch:4, idx:5699/10845, loss:1.1394919053295203, acc:0.6463157894736842


 53%|█████▎    | 5800/10845 [47:29<37:25,  2.25it/s, acc=0.646, epoch=4, loss=1.14]  

epoch:4, idx:5799/10845, loss:1.1382450426858046, acc:0.6459913793103448


 54%|█████▍    | 5900/10845 [48:23<37:02,  2.22it/s, acc=0.646, epoch=4, loss=1.14]  

epoch:4, idx:5899/10845, loss:1.1379296404931505, acc:0.6458050847457627


 55%|█████▌    | 6000/10845 [49:20<41:10,  1.96it/s, acc=0.646, epoch=4, loss=1.14]  

epoch:4, idx:5999/10845, loss:1.1389145275553068, acc:0.6455833333333333


 56%|█████▌    | 6100/10845 [50:17<38:07,  2.07it/s, acc=0.645, epoch=4, loss=1.14]  

epoch:4, idx:6099/10845, loss:1.1403771561091063, acc:0.6450819672131147


 57%|█████▋    | 6200/10845 [51:12<57:04,  1.36it/s, acc=0.645, epoch=4, loss=1.14]  

epoch:4, idx:6199/10845, loss:1.140553638435179, acc:0.6452016129032258


 58%|█████▊    | 6300/10845 [52:10<59:17,  1.28it/s, acc=0.645, epoch=4, loss=1.14]  

epoch:4, idx:6299/10845, loss:1.140883381432957, acc:0.6451190476190476


 59%|█████▉    | 6400/10845 [53:04<39:32,  1.87it/s, acc=0.645, epoch=4, loss=1.14]  

epoch:4, idx:6399/10845, loss:1.142972912751138, acc:0.6448828125


 60%|█████▉    | 6500/10845 [53:55<33:40,  2.15it/s, acc=0.645, epoch=4, loss=1.14]  

epoch:4, idx:6499/10845, loss:1.143590341256215, acc:0.6446538461538461


 61%|██████    | 6600/10845 [54:52<40:28,  1.75it/s, acc=0.645, epoch=4, loss=1.14]  

epoch:4, idx:6599/10845, loss:1.1434166938156793, acc:0.6450378787878788


 62%|██████▏   | 6700/10845 [55:49<38:33,  1.79it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:6699/10845, loss:1.1457443950425332, acc:0.6442910447761194


 63%|██████▎   | 6801/10845 [56:43<32:20,  2.08it/s, acc=0.644, epoch=4, loss=1.14]

epoch:4, idx:6799/10845, loss:1.1451149066406139, acc:0.6444117647058824


 64%|██████▎   | 6900/10845 [57:34<31:31,  2.09it/s, acc=0.644, epoch=4, loss=1.14]

epoch:4, idx:6899/10845, loss:1.1449902448101321, acc:0.6444927536231884


 65%|██████▍   | 7000/10845 [58:27<20:58,  3.05it/s, acc=0.644, epoch=4, loss=1.15]  

epoch:4, idx:6999/10845, loss:1.145597360755716, acc:0.6443214285714286


 65%|██████▌   | 7100/10845 [59:23<36:25,  1.71it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:7099/10845, loss:1.1454160878356074, acc:0.6444366197183099


 66%|██████▋   | 7200/10845 [1:00:21<41:09,  1.48it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:7199/10845, loss:1.1461567964239252, acc:0.6440625


 67%|██████▋   | 7300/10845 [1:01:22<40:16,  1.47it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:7299/10845, loss:1.1466199784082909, acc:0.6441780821917809


 68%|██████▊   | 7400/10845 [1:02:16<28:07,  2.04it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:7399/10845, loss:1.146351890306215, acc:0.6443581081081081


 69%|██████▉   | 7500/10845 [1:03:11<29:37,  1.88it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:7499/10845, loss:1.1468820764223735, acc:0.6441666666666667


 70%|███████   | 7600/10845 [1:04:09<36:21,  1.49it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:7599/10845, loss:1.1473318932950496, acc:0.6439802631578947


 71%|███████   | 7700/10845 [1:05:05<46:29,  1.13it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:7699/10845, loss:1.1476902799946922, acc:0.6439935064935065


 72%|███████▏  | 7800/10845 [1:05:56<18:47,  2.70it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:7799/10845, loss:1.148716776676667, acc:0.643974358974359


 73%|███████▎  | 7900/10845 [1:06:54<32:09,  1.53it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:7899/10845, loss:1.14718225095091, acc:0.6439556962025317


 74%|███████▍  | 8000/10845 [1:07:53<25:33,  1.85it/s, acc=0.643, epoch=4, loss=1.15]

epoch:4, idx:7999/10845, loss:1.1482167157456278, acc:0.643375


 75%|███████▍  | 8100/10845 [1:08:46<21:23,  2.14it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:8099/10845, loss:1.146290408174197, acc:0.643641975308642


 76%|███████▌  | 8200/10845 [1:09:40<22:34,  1.95it/s, acc=0.644, epoch=4, loss=1.15]

epoch:4, idx:8199/10845, loss:1.1470745689549098, acc:0.6438109756097561


 77%|███████▋  | 8300/10845 [1:10:35<22:17,  1.90it/s, acc=0.643, epoch=4, loss=1.15]

epoch:4, idx:8299/10845, loss:1.148360603760524, acc:0.6433734939759036


 77%|███████▋  | 8400/10845 [1:11:35<33:22,  1.22it/s, acc=0.643, epoch=4, loss=1.15]

epoch:4, idx:8399/10845, loss:1.150377115422771, acc:0.6428273809523809


 78%|███████▊  | 8500/10845 [1:12:28<18:14,  2.14it/s, acc=0.643, epoch=4, loss=1.15]

epoch:4, idx:8499/10845, loss:1.1513916076351614, acc:0.6425294117647059


 79%|███████▉  | 8600/10845 [1:13:23<17:25,  2.15it/s, acc=0.642, epoch=4, loss=1.15]

epoch:4, idx:8599/10845, loss:1.1527252698707027, acc:0.6421511627906977


 80%|████████  | 8700/10845 [1:14:20<19:56,  1.79it/s, acc=0.642, epoch=4, loss=1.15]

epoch:4, idx:8699/10845, loss:1.1539939048715022, acc:0.6418103448275863


 81%|████████  | 8800/10845 [1:15:14<18:45,  1.82it/s, acc=0.642, epoch=4, loss=1.15]

epoch:4, idx:8799/10845, loss:1.1542407110333444, acc:0.6415056818181818


 82%|████████▏ | 8900/10845 [1:16:08<18:42,  1.73it/s, acc=0.642, epoch=4, loss=1.15]

epoch:4, idx:8899/10845, loss:1.1535119235917424, acc:0.6416011235955056


 83%|████████▎ | 9000/10845 [1:17:05<21:20,  1.44it/s, acc=0.642, epoch=4, loss=1.15]

epoch:4, idx:8999/10845, loss:1.1524557276434368, acc:0.6422222222222222


 84%|████████▍ | 9100/10845 [1:17:57<14:55,  1.95it/s, acc=0.642, epoch=4, loss=1.15]

epoch:4, idx:9099/10845, loss:1.1519503750774887, acc:0.6424725274725275


 85%|████████▍ | 9200/10845 [1:18:52<13:50,  1.98it/s, acc=0.642, epoch=4, loss=1.15]

epoch:4, idx:9199/10845, loss:1.1525812275966871, acc:0.6422010869565218


 86%|████████▌ | 9300/10845 [1:19:46<14:02,  1.83it/s, acc=0.643, epoch=4, loss=1.15]

epoch:4, idx:9299/10845, loss:1.1527744921112573, acc:0.6425537634408602


 87%|████████▋ | 9400/10845 [1:20:38<13:46,  1.75it/s, acc=0.642, epoch=4, loss=1.15]

epoch:4, idx:9399/10845, loss:1.1540606559844728, acc:0.6424202127659574


 88%|████████▊ | 9500/10845 [1:21:32<11:38,  1.93it/s, acc=0.642, epoch=4, loss=1.15]

epoch:4, idx:9499/10845, loss:1.154017061904857, acc:0.6423684210526316


 89%|████████▊ | 9600/10845 [1:22:25<10:11,  2.04it/s, acc=0.642, epoch=4, loss=1.16]

epoch:4, idx:9599/10845, loss:1.15529552327469, acc:0.641796875


 89%|████████▉ | 9700/10845 [1:23:20<10:48,  1.76it/s, acc=0.642, epoch=4, loss=1.16]

epoch:4, idx:9699/10845, loss:1.1554658917852283, acc:0.6416494845360825


 90%|█████████ | 9800/10845 [1:24:15<08:10,  2.13it/s, acc=0.642, epoch=4, loss=1.16]

epoch:4, idx:9799/10845, loss:1.15624057006471, acc:0.6415306122448979


 91%|█████████▏| 9900/10845 [1:25:08<07:48,  2.02it/s, acc=0.641, epoch=4, loss=1.16]

epoch:4, idx:9899/10845, loss:1.157396228807141, acc:0.6413888888888889


 92%|█████████▏| 10000/10845 [1:26:07<07:01,  2.00it/s, acc=0.641, epoch=4, loss=1.16]

epoch:4, idx:9999/10845, loss:1.1581688865661621, acc:0.6412


 93%|█████████▎| 10100/10845 [1:27:04<07:08,  1.74it/s, acc=0.641, epoch=4, loss=1.16]

epoch:4, idx:10099/10845, loss:1.1590626053172763, acc:0.6411386138613862


 94%|█████████▍| 10200/10845 [1:28:00<05:30,  1.95it/s, acc=0.641, epoch=4, loss=1.16]

epoch:4, idx:10199/10845, loss:1.1587468943934816, acc:0.6408823529411765


 95%|█████████▍| 10300/10845 [1:28:53<04:16,  2.13it/s, acc=0.641, epoch=4, loss=1.16]

epoch:4, idx:10299/10845, loss:1.158876397557629, acc:0.6409466019417476


 96%|█████████▌| 10400/10845 [1:29:50<03:32,  2.10it/s, acc=0.641, epoch=4, loss=1.16]

epoch:4, idx:10399/10845, loss:1.1586430152906821, acc:0.6408413461538461


 97%|█████████▋| 10500/10845 [1:30:46<04:03,  1.42it/s, acc=0.641, epoch=4, loss=1.16]

epoch:4, idx:10499/10845, loss:1.1583003129675276, acc:0.6409047619047619


 98%|█████████▊| 10600/10845 [1:31:39<02:12,  1.84it/s, acc=0.641, epoch=4, loss=1.16]

epoch:4, idx:10599/10845, loss:1.1588425297613414, acc:0.6407783018867924


 99%|█████████▊| 10700/10845 [1:32:36<01:14,  1.94it/s, acc=0.64, epoch=4, loss=1.16] 

epoch:4, idx:10699/10845, loss:1.1601057035120848, acc:0.6403971962616822


100%|█████████▉| 10800/10845 [1:33:32<00:22,  1.99it/s, acc=0.64, epoch=4, loss=1.16]

epoch:4, idx:10799/10845, loss:1.1606232341848037, acc:0.6400694444444445


100%|██████████| 10845/10845 [1:33:55<00:00,  2.31it/s, acc=0.64, epoch=4, loss=1.16]


epoch:4, idx:0/1275, loss:0.9191807508468628, acc:0.75
epoch:4, idx:100/1275, loss:1.312138835392376, acc:0.5816831683168316
epoch:4, idx:200/1275, loss:1.2501674751144143, acc:0.6119402985074627
epoch:4, idx:300/1275, loss:1.2441595899702307, acc:0.6179401993355482
epoch:4, idx:400/1275, loss:1.2317946401617474, acc:0.6190773067331671
epoch:4, idx:500/1275, loss:1.23461758685921, acc:0.6162674650698603
epoch:4, idx:600/1275, loss:1.2397638922324792, acc:0.6131447587354409
epoch:4, idx:700/1275, loss:1.2392052915398983, acc:0.6119828815977175
epoch:4, idx:800/1275, loss:1.2533934864212064, acc:0.6083021223470662
epoch:4, idx:900/1275, loss:1.2412716573007099, acc:0.6132075471698113
epoch:4, idx:1000/1275, loss:1.2439069008374666, acc:0.6128871128871128
epoch:4, idx:1100/1275, loss:1.2376816065932923, acc:0.6153496821071753
epoch:4, idx:1200/1275, loss:1.2413931608100814, acc:0.6144879267277269


  1%|          | 100/10845 [00:51<1:26:00,  2.08it/s, acc=0.623, epoch=5, loss=1.16]

epoch:5, idx:99/10845, loss:1.155080823302269, acc:0.6225


  2%|▏         | 200/10845 [01:45<1:43:17,  1.72it/s, acc=0.632, epoch=5, loss=1.13]

epoch:5, idx:199/10845, loss:1.1287274375557899, acc:0.6325


  3%|▎         | 300/10845 [02:42<1:45:35,  1.66it/s, acc=0.639, epoch=5, loss=1.1] 

epoch:5, idx:299/10845, loss:1.1015661313136418, acc:0.6391666666666667


  4%|▎         | 400/10845 [03:35<1:20:52,  2.15it/s, acc=0.644, epoch=5, loss=1.09]

epoch:5, idx:399/10845, loss:1.0852424813807011, acc:0.644375


  5%|▍         | 500/10845 [04:30<1:24:42,  2.04it/s, acc=0.655, epoch=5, loss=1.08]

epoch:5, idx:499/10845, loss:1.0753001462221146, acc:0.655


  6%|▌         | 600/10845 [05:27<1:39:08,  1.72it/s, acc=0.661, epoch=5, loss=1.06]

epoch:5, idx:599/10845, loss:1.0640613866845767, acc:0.66125


  6%|▋         | 700/10845 [06:26<1:34:18,  1.79it/s, acc=0.662, epoch=5, loss=1.07]

epoch:5, idx:699/10845, loss:1.0715654628617424, acc:0.6621428571428571


  7%|▋         | 800/10845 [07:20<1:26:14,  1.94it/s, acc=0.663, epoch=5, loss=1.08]

epoch:5, idx:799/10845, loss:1.0786763196438551, acc:0.6628125


  8%|▊         | 900/10845 [08:16<1:05:05,  2.55it/s, acc=0.666, epoch=5, loss=1.07]

epoch:5, idx:899/10845, loss:1.0734185514516301, acc:0.6658333333333334


  9%|▉         | 1000/10845 [09:10<1:49:03,  1.50it/s, acc=0.665, epoch=5, loss=1.07]

epoch:5, idx:999/10845, loss:1.0700712831318377, acc:0.6655


 10%|█         | 1100/10845 [10:05<1:21:44,  1.99it/s, acc=0.668, epoch=5, loss=1.06]

epoch:5, idx:1099/10845, loss:1.0602179980007085, acc:0.6679545454545455


 11%|█         | 1200/10845 [11:01<1:17:00,  2.09it/s, acc=0.666, epoch=5, loss=1.08]

epoch:5, idx:1199/10845, loss:1.0752494627485674, acc:0.6660416666666666


 12%|█▏        | 1300/10845 [11:56<1:16:05,  2.09it/s, acc=0.666, epoch=5, loss=1.08]

epoch:5, idx:1299/10845, loss:1.0780284230754926, acc:0.6657692307692308


 13%|█▎        | 1400/10845 [12:56<1:49:21,  1.44it/s, acc=0.666, epoch=5, loss=1.08]

epoch:5, idx:1399/10845, loss:1.0788777600654533, acc:0.6657142857142857


 14%|█▍        | 1500/10845 [13:54<1:33:13,  1.67it/s, acc=0.667, epoch=5, loss=1.08]

epoch:5, idx:1499/10845, loss:1.079086376051108, acc:0.6671666666666667


 15%|█▍        | 1600/10845 [14:47<1:08:15,  2.26it/s, acc=0.667, epoch=5, loss=1.08]

epoch:5, idx:1599/10845, loss:1.0819068980030715, acc:0.66703125


 16%|█▌        | 1700/10845 [15:39<1:24:34,  1.80it/s, acc=0.667, epoch=5, loss=1.08]

epoch:5, idx:1699/10845, loss:1.0814168787528486, acc:0.6670588235294118


 17%|█▋        | 1800/10845 [16:33<1:17:16,  1.95it/s, acc=0.666, epoch=5, loss=1.08]

epoch:5, idx:1799/10845, loss:1.0830206786096097, acc:0.6661111111111111


 18%|█▊        | 1900/10845 [17:29<1:15:19,  1.98it/s, acc=0.663, epoch=5, loss=1.09]

epoch:5, idx:1899/10845, loss:1.0916861901000927, acc:0.6632894736842105


 18%|█▊        | 2000/10845 [18:30<1:15:18,  1.96it/s, acc=0.664, epoch=5, loss=1.09]

epoch:5, idx:1999/10845, loss:1.0906784218400716, acc:0.66375


 19%|█▉        | 2100/10845 [19:24<1:20:30,  1.81it/s, acc=0.664, epoch=5, loss=1.09]

epoch:5, idx:2099/10845, loss:1.0905388458144096, acc:0.6639285714285714


 20%|██        | 2200/10845 [20:17<1:36:25,  1.49it/s, acc=0.663, epoch=5, loss=1.09]

epoch:5, idx:2199/10845, loss:1.0910121229155496, acc:0.6630681818181818


 21%|██        | 2300/10845 [21:08<1:10:08,  2.03it/s, acc=0.664, epoch=5, loss=1.09]

epoch:5, idx:2299/10845, loss:1.0885306147000064, acc:0.663804347826087


 22%|██▏       | 2400/10845 [22:07<1:09:15,  2.03it/s, acc=0.664, epoch=5, loss=1.09]

epoch:5, idx:2399/10845, loss:1.0880412855371833, acc:0.6636458333333334


 23%|██▎       | 2500/10845 [23:02<1:28:23,  1.57it/s, acc=0.664, epoch=5, loss=1.09]

epoch:5, idx:2499/10845, loss:1.085716430103779, acc:0.6644


 24%|██▍       | 2600/10845 [23:58<1:20:24,  1.71it/s, acc=0.664, epoch=5, loss=1.09]

epoch:5, idx:2599/10845, loss:1.090175837542002, acc:0.6641346153846154


 25%|██▍       | 2700/10845 [24:53<1:03:39,  2.13it/s, acc=0.665, epoch=5, loss=1.09]

epoch:5, idx:2699/10845, loss:1.0877765830026733, acc:0.6652777777777777


 26%|██▌       | 2800/10845 [25:49<54:58,  2.44it/s, acc=0.665, epoch=5, loss=1.09]  

epoch:5, idx:2799/10845, loss:1.0913809469874416, acc:0.6648214285714286


 27%|██▋       | 2900/10845 [26:44<1:00:17,  2.20it/s, acc=0.666, epoch=5, loss=1.09]

epoch:5, idx:2899/10845, loss:1.0887713767228455, acc:0.6661206896551725


 28%|██▊       | 3000/10845 [27:40<1:02:46,  2.08it/s, acc=0.668, epoch=5, loss=1.08]

epoch:5, idx:2999/10845, loss:1.0842690678139528, acc:0.6680833333333334


 29%|██▊       | 3100/10845 [28:36<57:00,  2.26it/s, acc=0.669, epoch=5, loss=1.08]  

epoch:5, idx:3099/10845, loss:1.0814808171506851, acc:0.6686290322580645


 30%|██▉       | 3200/10845 [29:31<1:03:54,  1.99it/s, acc=0.668, epoch=5, loss=1.08]

epoch:5, idx:3199/10845, loss:1.0808169234450906, acc:0.6678125


 30%|███       | 3300/10845 [30:26<1:04:14,  1.96it/s, acc=0.667, epoch=5, loss=1.08]

epoch:5, idx:3299/10845, loss:1.0838411471789533, acc:0.6674242424242425


 31%|███▏      | 3400/10845 [31:19<1:00:01,  2.07it/s, acc=0.667, epoch=5, loss=1.08]

epoch:5, idx:3399/10845, loss:1.0838160418850533, acc:0.6675


 32%|███▏      | 3500/10845 [32:10<1:06:04,  1.85it/s, acc=0.667, epoch=5, loss=1.09]

epoch:5, idx:3499/10845, loss:1.0867430159449578, acc:0.6667142857142857


 33%|███▎      | 3600/10845 [33:02<1:02:01,  1.95it/s, acc=0.666, epoch=5, loss=1.09]

epoch:5, idx:3599/10845, loss:1.0877759305222168, acc:0.665625


 34%|███▍      | 3700/10845 [33:57<1:23:09,  1.43it/s, acc=0.665, epoch=5, loss=1.09]

epoch:5, idx:3699/10845, loss:1.0891683637854215, acc:0.665


 35%|███▌      | 3800/10845 [34:50<1:08:17,  1.72it/s, acc=0.666, epoch=5, loss=1.09]

epoch:5, idx:3799/10845, loss:1.0866089892779527, acc:0.6657894736842105


 36%|███▌      | 3900/10845 [35:40<1:11:06,  1.63it/s, acc=0.665, epoch=5, loss=1.09]

epoch:5, idx:3899/10845, loss:1.0866058225127366, acc:0.6653846153846154


 37%|███▋      | 4000/10845 [36:34<53:43,  2.12it/s, acc=0.665, epoch=5, loss=1.09]  

epoch:5, idx:3999/10845, loss:1.087334495998919, acc:0.6650625


 38%|███▊      | 4100/10845 [37:29<56:40,  1.98it/s, acc=0.664, epoch=5, loss=1.09]  

epoch:5, idx:4099/10845, loss:1.0920104553132521, acc:0.6640853658536585


 39%|███▊      | 4200/10845 [38:22<1:19:34,  1.39it/s, acc=0.664, epoch=5, loss=1.09]

epoch:5, idx:4199/10845, loss:1.0889732044722353, acc:0.6639880952380952


 40%|███▉      | 4300/10845 [39:18<47:16,  2.31it/s, acc=0.664, epoch=5, loss=1.09]  

epoch:5, idx:4299/10845, loss:1.0884301417996718, acc:0.663953488372093


 41%|████      | 4400/10845 [40:12<59:26,  1.81it/s, acc=0.664, epoch=5, loss=1.09]  

epoch:5, idx:4399/10845, loss:1.0882304107397796, acc:0.6642045454545454


 41%|████▏     | 4500/10845 [41:05<1:14:05,  1.43it/s, acc=0.663, epoch=5, loss=1.09]

epoch:5, idx:4499/10845, loss:1.0895258052017953, acc:0.6627777777777778


 42%|████▏     | 4600/10845 [42:02<54:40,  1.90it/s, acc=0.662, epoch=5, loss=1.09]  

epoch:5, idx:4599/10845, loss:1.091445908151243, acc:0.6621195652173913


 43%|████▎     | 4700/10845 [42:55<38:14,  2.68it/s, acc=0.662, epoch=5, loss=1.09]  

epoch:5, idx:4699/10845, loss:1.092035230176246, acc:0.6618085106382978


 44%|████▍     | 4800/10845 [43:47<36:02,  2.80it/s, acc=0.662, epoch=5, loss=1.09]  

epoch:5, idx:4799/10845, loss:1.092914132780085, acc:0.6619791666666667


 45%|████▌     | 4900/10845 [44:43<55:19,  1.79it/s, acc=0.662, epoch=5, loss=1.09]  

epoch:5, idx:4899/10845, loss:1.0935363531416775, acc:0.6621428571428571


 46%|████▌     | 5000/10845 [45:39<1:04:04,  1.52it/s, acc=0.662, epoch=5, loss=1.09]

epoch:5, idx:4999/10845, loss:1.0947852512419223, acc:0.66185


 47%|████▋     | 5100/10845 [46:33<56:43,  1.69it/s, acc=0.662, epoch=5, loss=1.1]   

epoch:5, idx:5099/10845, loss:1.0960791122095257, acc:0.6620588235294118


 48%|████▊     | 5200/10845 [47:26<36:35,  2.57it/s, acc=0.662, epoch=5, loss=1.1]  

epoch:5, idx:5199/10845, loss:1.0986176549012845, acc:0.661923076923077


 49%|████▉     | 5300/10845 [48:20<37:24,  2.47it/s, acc=0.662, epoch=5, loss=1.1]  

epoch:5, idx:5299/10845, loss:1.0999765085611704, acc:0.6616037735849056


 50%|████▉     | 5400/10845 [49:13<40:49,  2.22it/s, acc=0.661, epoch=5, loss=1.1]  

epoch:5, idx:5399/10845, loss:1.1002155801764242, acc:0.6614814814814814


 51%|█████     | 5500/10845 [50:06<34:09,  2.61it/s, acc=0.662, epoch=5, loss=1.1]  

epoch:5, idx:5499/10845, loss:1.0984818758856167, acc:0.6621818181818182


 52%|█████▏    | 5600/10845 [51:00<57:14,  1.53it/s, acc=0.661, epoch=5, loss=1.1]  

epoch:5, idx:5599/10845, loss:1.1018032994121314, acc:0.6609375


 53%|█████▎    | 5700/10845 [51:54<53:43,  1.60it/s, acc=0.66, epoch=5, loss=1.1]   

epoch:5, idx:5699/10845, loss:1.1040455538400433, acc:0.6602631578947369


 53%|█████▎    | 5800/10845 [52:46<38:21,  2.19it/s, acc=0.66, epoch=5, loss=1.1]   

epoch:5, idx:5799/10845, loss:1.1047998536712136, acc:0.6602586206896551


 54%|█████▍    | 5900/10845 [53:41<45:43,  1.80it/s, acc=0.66, epoch=5, loss=1.1]  

epoch:5, idx:5899/10845, loss:1.1046333940604987, acc:0.6602118644067797


 55%|█████▌    | 6000/10845 [54:37<40:48,  1.98it/s, acc=0.66, epoch=5, loss=1.11]  

epoch:5, idx:5999/10845, loss:1.106207595532139, acc:0.6599583333333333


 56%|█████▌    | 6100/10845 [55:32<59:01,  1.34it/s, acc=0.659, epoch=5, loss=1.11] 

epoch:5, idx:6099/10845, loss:1.1075138778618125, acc:0.6594672131147541


 57%|█████▋    | 6200/10845 [56:23<36:35,  2.12it/s, acc=0.659, epoch=5, loss=1.11]  

epoch:5, idx:6199/10845, loss:1.1062309974672333, acc:0.6592741935483871


 58%|█████▊    | 6300/10845 [57:18<35:59,  2.10it/s, acc=0.66, epoch=5, loss=1.11]   

epoch:5, idx:6299/10845, loss:1.1061868492809552, acc:0.6596031746031746


 59%|█████▉    | 6400/10845 [58:13<35:55,  2.06it/s, acc=0.659, epoch=5, loss=1.11]  

epoch:5, idx:6399/10845, loss:1.1069741007173435, acc:0.659375


 60%|█████▉    | 6500/10845 [59:04<32:50,  2.21it/s, acc=0.66, epoch=5, loss=1.11] 

epoch:5, idx:6499/10845, loss:1.1066057443848023, acc:0.6595769230769231


 61%|██████    | 6600/10845 [59:52<25:16,  2.80it/s, acc=0.66, epoch=5, loss=1.11] 

epoch:5, idx:6599/10845, loss:1.1069279028746215, acc:0.6595075757575758


 62%|██████▏   | 6700/10845 [1:00:38<24:03,  2.87it/s, acc=0.659, epoch=5, loss=1.11]

epoch:5, idx:6699/10845, loss:1.1076580923751218, acc:0.6591044776119404


 63%|██████▎   | 6801/10845 [1:01:19<21:34,  3.13it/s, acc=0.66, epoch=5, loss=1.11] 

epoch:5, idx:6799/10845, loss:1.106596881041632, acc:0.6596323529411765


 64%|██████▎   | 6900/10845 [1:02:01<20:40,  3.18it/s, acc=0.659, epoch=5, loss=1.11]

epoch:5, idx:6899/10845, loss:1.1077474677173988, acc:0.6591304347826087


 65%|██████▍   | 7000/10845 [1:02:45<36:44,  1.74it/s, acc=0.658, epoch=5, loss=1.11]

epoch:5, idx:6999/10845, loss:1.109448234358004, acc:0.6583214285714286


 65%|██████▌   | 7100/10845 [1:03:26<27:55,  2.23it/s, acc=0.658, epoch=5, loss=1.11]

epoch:5, idx:7099/10845, loss:1.1095921496816084, acc:0.6584154929577465


 66%|██████▋   | 7200/10845 [1:04:07<23:42,  2.56it/s, acc=0.658, epoch=5, loss=1.11]

epoch:5, idx:7199/10845, loss:1.1103648835130864, acc:0.6580555555555555


 67%|██████▋   | 7300/10845 [1:04:46<21:48,  2.71it/s, acc=0.657, epoch=5, loss=1.11]

epoch:5, idx:7299/10845, loss:1.1112811281297306, acc:0.6573630136986301


 68%|██████▊   | 7400/10845 [1:05:28<19:17,  2.98it/s, acc=0.657, epoch=5, loss=1.11]

epoch:5, idx:7399/10845, loss:1.111555394220191, acc:0.6571283783783783


 69%|██████▉   | 7500/10845 [1:06:10<23:02,  2.42it/s, acc=0.656, epoch=5, loss=1.11]

epoch:5, idx:7499/10845, loss:1.1141283158580462, acc:0.6563


 70%|███████   | 7601/10845 [1:06:52<17:58,  3.01it/s, acc=0.656, epoch=5, loss=1.12]

epoch:5, idx:7599/10845, loss:1.1154232523119763, acc:0.6558223684210527


 71%|███████   | 7700/10845 [1:07:33<19:32,  2.68it/s, acc=0.655, epoch=5, loss=1.12]

epoch:5, idx:7699/10845, loss:1.115956659553113, acc:0.655487012987013


 72%|███████▏  | 7800/10845 [1:08:18<36:38,  1.39it/s, acc=0.656, epoch=5, loss=1.11]

epoch:5, idx:7799/10845, loss:1.1147219950686662, acc:0.6559935897435898


 73%|███████▎  | 7900/10845 [1:09:13<25:57,  1.89it/s, acc=0.656, epoch=5, loss=1.12]

epoch:5, idx:7899/10845, loss:1.115419889579091, acc:0.655759493670886


 74%|███████▍  | 8000/10845 [1:10:06<17:55,  2.65it/s, acc=0.656, epoch=5, loss=1.12]

epoch:5, idx:7999/10845, loss:1.1154088083542884, acc:0.65578125


 75%|███████▍  | 8100/10845 [1:11:05<19:40,  2.32it/s, acc=0.656, epoch=5, loss=1.12]

epoch:5, idx:8099/10845, loss:1.115819712733781, acc:0.6559876543209876


 76%|███████▌  | 8200/10845 [1:12:04<24:46,  1.78it/s, acc=0.656, epoch=5, loss=1.12]

epoch:5, idx:8199/10845, loss:1.1153834145061854, acc:0.6560365853658536


 77%|███████▋  | 8300/10845 [1:12:58<24:08,  1.76it/s, acc=0.656, epoch=5, loss=1.12]

epoch:5, idx:8299/10845, loss:1.1156799038670149, acc:0.6560240963855422


 77%|███████▋  | 8400/10845 [1:13:52<18:50,  2.16it/s, acc=0.656, epoch=5, loss=1.12]

epoch:5, idx:8399/10845, loss:1.1156821383358466, acc:0.6560119047619047


 78%|███████▊  | 8500/10845 [1:14:48<20:31,  1.90it/s, acc=0.656, epoch=5, loss=1.11]

epoch:5, idx:8499/10845, loss:1.1148827459356363, acc:0.656235294117647


 79%|███████▉  | 8600/10845 [1:15:44<20:04,  1.86it/s, acc=0.656, epoch=5, loss=1.11]

epoch:5, idx:8599/10845, loss:1.1145900409339473, acc:0.6563081395348838


 80%|████████  | 8700/10845 [1:16:41<17:22,  2.06it/s, acc=0.656, epoch=5, loss=1.12]

epoch:5, idx:8699/10845, loss:1.115050286517061, acc:0.6558620689655172


 81%|████████  | 8800/10845 [1:17:36<20:39,  1.65it/s, acc=0.655, epoch=5, loss=1.12]

epoch:5, idx:8799/10845, loss:1.116737032840875, acc:0.6553693181818182


 82%|████████▏ | 8900/10845 [1:18:31<19:29,  1.66it/s, acc=0.656, epoch=5, loss=1.12]

epoch:5, idx:8899/10845, loss:1.115310412005762, acc:0.6557303370786517


 83%|████████▎ | 9000/10845 [1:19:26<14:08,  2.17it/s, acc=0.656, epoch=5, loss=1.12]

epoch:5, idx:8999/10845, loss:1.1154813709821967, acc:0.6556944444444445


 84%|████████▍ | 9100/10845 [1:20:23<19:28,  1.49it/s, acc=0.655, epoch=5, loss=1.12]

epoch:5, idx:9099/10845, loss:1.1171795845260988, acc:0.6550549450549451


 85%|████████▍ | 9200/10845 [1:21:12<14:24,  1.90it/s, acc=0.655, epoch=5, loss=1.12]

epoch:5, idx:9199/10845, loss:1.116704311678591, acc:0.6547010869565217


 86%|████████▌ | 9300/10845 [1:22:04<17:58,  1.43it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:9299/10845, loss:1.117452110100177, acc:0.6544623655913978


 87%|████████▋ | 9400/10845 [1:22:56<12:37,  1.91it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:9399/10845, loss:1.1200042213562955, acc:0.6539095744680851


 88%|████████▊ | 9500/10845 [1:23:46<09:23,  2.39it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:9499/10845, loss:1.119400943997659, acc:0.6542368421052631


 89%|████████▊ | 9600/10845 [1:24:41<11:25,  1.82it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:9599/10845, loss:1.119450112186993, acc:0.6541927083333333


 89%|████████▉ | 9701/10845 [1:25:34<08:34,  2.22it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:9699/10845, loss:1.1186536618944296, acc:0.6542525773195876


 90%|█████████ | 9800/10845 [1:26:29<12:16,  1.42it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:9799/10845, loss:1.1195551690003094, acc:0.6538775510204081


 91%|█████████▏| 9900/10845 [1:27:21<07:13,  2.18it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:9899/10845, loss:1.1195783315132362, acc:0.6537373737373737


 92%|█████████▏| 10000/10845 [1:28:14<07:08,  1.97it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:9999/10845, loss:1.1197251464694737, acc:0.653575


 93%|█████████▎| 10100/10845 [1:29:08<07:29,  1.66it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:10099/10845, loss:1.119585663502169, acc:0.6537623762376238


 94%|█████████▍| 10200/10845 [1:30:02<07:35,  1.41it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:10199/10845, loss:1.1198065843038698, acc:0.6535539215686275


 95%|█████████▍| 10300/10845 [1:30:54<04:54,  1.85it/s, acc=0.653, epoch=5, loss=1.12]

epoch:5, idx:10299/10845, loss:1.1204903468547516, acc:0.6530825242718447


 96%|█████████▌| 10400/10845 [1:31:44<02:47,  2.65it/s, acc=0.653, epoch=5, loss=1.12]

epoch:5, idx:10399/10845, loss:1.1192955918868002, acc:0.6533173076923077


 97%|█████████▋| 10500/10845 [1:32:38<02:07,  2.70it/s, acc=0.653, epoch=5, loss=1.12]

epoch:5, idx:10499/10845, loss:1.1194089059858094, acc:0.6533571428571429


 98%|█████████▊| 10600/10845 [1:33:21<01:48,  2.26it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:10599/10845, loss:1.1189982573969184, acc:0.6535377358490566


 99%|█████████▊| 10700/10845 [1:34:05<01:34,  1.53it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:10699/10845, loss:1.1196944638529671, acc:0.6535747663551402


100%|█████████▉| 10800/10845 [1:34:54<00:22,  2.02it/s, acc=0.654, epoch=5, loss=1.12]

epoch:5, idx:10799/10845, loss:1.1195386129314149, acc:0.6536805555555556


100%|██████████| 10845/10845 [1:35:16<00:00,  2.90it/s, acc=0.654, epoch=5, loss=1.12]


epoch:5, idx:0/1275, loss:0.8034533262252808, acc:0.75
epoch:5, idx:100/1275, loss:1.2979395843968533, acc:0.6113861386138614
epoch:5, idx:200/1275, loss:1.2249165284099863, acc:0.6243781094527363
epoch:5, idx:300/1275, loss:1.2189486787168686, acc:0.6295681063122923
epoch:5, idx:400/1275, loss:1.2126896616229392, acc:0.628428927680798
epoch:5, idx:500/1275, loss:1.2133384219662633, acc:0.6282435129740519
epoch:5, idx:600/1275, loss:1.2226777943517524, acc:0.620216306156406
epoch:5, idx:700/1275, loss:1.218310822773932, acc:0.6223252496433667
epoch:5, idx:800/1275, loss:1.2311596192521848, acc:0.6198501872659176
epoch:5, idx:900/1275, loss:1.2189220231063622, acc:0.6226415094339622
epoch:5, idx:1000/1275, loss:1.2228937613499629, acc:0.6238761238761239
epoch:5, idx:1100/1275, loss:1.2185796971325438, acc:0.6246594005449592
epoch:5, idx:1200/1275, loss:1.218756529909685, acc:0.6244796003330558


  1%|          | 100/10845 [00:48<1:29:56,  1.99it/s, acc=0.688, epoch=6, loss=0.951]

epoch:6, idx:99/10845, loss:0.9508134710788727, acc:0.6875


  2%|▏         | 200/10845 [01:43<1:46:13,  1.67it/s, acc=0.693, epoch=6, loss=0.976]

epoch:6, idx:199/10845, loss:0.9756831452250481, acc:0.6925


  3%|▎         | 300/10845 [02:35<1:32:09,  1.91it/s, acc=0.687, epoch=6, loss=1.01] 

epoch:6, idx:299/10845, loss:1.006483266154925, acc:0.6866666666666666


  4%|▎         | 400/10845 [03:30<1:14:07,  2.35it/s, acc=0.695, epoch=6, loss=0.981]

epoch:6, idx:399/10845, loss:0.9810217581689358, acc:0.695


  5%|▍         | 500/10845 [04:23<1:48:15,  1.59it/s, acc=0.691, epoch=6, loss=1]    

epoch:6, idx:499/10845, loss:1.000376464009285, acc:0.691


  6%|▌         | 600/10845 [05:17<1:11:27,  2.39it/s, acc=0.691, epoch=6, loss=1.01]

epoch:6, idx:599/10845, loss:1.008531197309494, acc:0.69125


  6%|▋         | 700/10845 [06:04<1:30:18,  1.87it/s, acc=0.694, epoch=6, loss=1]   

epoch:6, idx:699/10845, loss:1.0044367987768992, acc:0.6935714285714286


  7%|▋         | 800/10845 [07:05<1:51:16,  1.50it/s, acc=0.691, epoch=6, loss=1.01]

epoch:6, idx:799/10845, loss:1.0116458536684514, acc:0.690625


  8%|▊         | 900/10845 [08:02<1:39:06,  1.67it/s, acc=0.687, epoch=6, loss=1.02]

epoch:6, idx:899/10845, loss:1.0217392679717805, acc:0.6869444444444445


  9%|▉         | 1000/10845 [08:58<2:18:00,  1.19it/s, acc=0.683, epoch=6, loss=1.04]

epoch:6, idx:999/10845, loss:1.043666899383068, acc:0.6835


 10%|█         | 1100/10845 [09:51<57:58,  2.80it/s, acc=0.683, epoch=6, loss=1.04]  

epoch:6, idx:1099/10845, loss:1.0439509361982346, acc:0.6829545454545455


 11%|█         | 1200/10845 [10:45<1:47:25,  1.50it/s, acc=0.68, epoch=6, loss=1.06] 

epoch:6, idx:1199/10845, loss:1.0557762296994526, acc:0.6802083333333333


 12%|█▏        | 1300/10845 [11:35<1:13:25,  2.17it/s, acc=0.682, epoch=6, loss=1.05]

epoch:6, idx:1299/10845, loss:1.051826166510582, acc:0.681923076923077


 13%|█▎        | 1400/10845 [12:28<1:08:29,  2.30it/s, acc=0.681, epoch=6, loss=1.06]

epoch:6, idx:1399/10845, loss:1.0565482942972864, acc:0.6807142857142857


 14%|█▍        | 1500/10845 [13:23<1:16:29,  2.04it/s, acc=0.681, epoch=6, loss=1.06]

epoch:6, idx:1499/10845, loss:1.0635352131525675, acc:0.6806666666666666


 15%|█▍        | 1600/10845 [14:16<50:47,  3.03it/s, acc=0.681, epoch=6, loss=1.06]  

epoch:6, idx:1599/10845, loss:1.0648425304144622, acc:0.68140625


 16%|█▌        | 1700/10845 [15:09<1:01:03,  2.50it/s, acc=0.679, epoch=6, loss=1.07]

epoch:6, idx:1699/10845, loss:1.0691270716050092, acc:0.6794117647058824


 17%|█▋        | 1800/10845 [15:59<1:20:56,  1.86it/s, acc=0.679, epoch=6, loss=1.06]

epoch:6, idx:1799/10845, loss:1.0648964954415958, acc:0.6790277777777778


 18%|█▊        | 1900/10845 [16:50<1:11:34,  2.08it/s, acc=0.681, epoch=6, loss=1.06]

epoch:6, idx:1899/10845, loss:1.0586856997326801, acc:0.6807894736842105


 18%|█▊        | 2000/10845 [17:41<46:14,  3.19it/s, acc=0.68, epoch=6, loss=1.06]   

epoch:6, idx:1999/10845, loss:1.0603804972469806, acc:0.68


 19%|█▉        | 2100/10845 [18:36<1:29:31,  1.63it/s, acc=0.68, epoch=6, loss=1.06] 

epoch:6, idx:2099/10845, loss:1.059107474968547, acc:0.6795238095238095


 20%|██        | 2200/10845 [19:28<1:22:55,  1.74it/s, acc=0.679, epoch=6, loss=1.06]

epoch:6, idx:2199/10845, loss:1.0597139791466974, acc:0.6789772727272727


 21%|██        | 2300/10845 [20:15<1:02:00,  2.30it/s, acc=0.678, epoch=6, loss=1.06]

epoch:6, idx:2299/10845, loss:1.0642519734735074, acc:0.6782608695652174


 22%|██▏       | 2400/10845 [21:08<1:08:04,  2.07it/s, acc=0.679, epoch=6, loss=1.07]

epoch:6, idx:2399/10845, loss:1.065860525717338, acc:0.6785416666666667


 23%|██▎       | 2500/10845 [21:58<52:33,  2.65it/s, acc=0.678, epoch=6, loss=1.07]  

epoch:6, idx:2499/10845, loss:1.0657221589803696, acc:0.6779


 24%|██▍       | 2600/10845 [22:54<1:08:51,  2.00it/s, acc=0.676, epoch=6, loss=1.07]

epoch:6, idx:2599/10845, loss:1.0725883504289848, acc:0.6763461538461538


 25%|██▍       | 2700/10845 [23:47<1:19:55,  1.70it/s, acc=0.676, epoch=6, loss=1.07]

epoch:6, idx:2699/10845, loss:1.0728649006728772, acc:0.6758333333333333


 26%|██▌       | 2800/10845 [24:37<1:21:53,  1.64it/s, acc=0.675, epoch=6, loss=1.07]

epoch:6, idx:2799/10845, loss:1.074881274082831, acc:0.6754464285714286


 27%|██▋       | 2900/10845 [25:30<1:03:40,  2.08it/s, acc=0.675, epoch=6, loss=1.08]

epoch:6, idx:2899/10845, loss:1.0778409243246605, acc:0.6747413793103448


 28%|██▊       | 3000/10845 [26:26<2:21:56,  1.09s/it, acc=0.676, epoch=6, loss=1.08]

epoch:6, idx:2999/10845, loss:1.0755879838665325, acc:0.6755833333333333


 29%|██▊       | 3100/10845 [27:21<51:40,  2.50it/s, acc=0.675, epoch=6, loss=1.08]  

epoch:6, idx:3099/10845, loss:1.0789073012144335, acc:0.6747580645161291


 30%|██▉       | 3200/10845 [28:12<46:37,  2.73it/s, acc=0.675, epoch=6, loss=1.08]  

epoch:6, idx:3199/10845, loss:1.0763987444899976, acc:0.675078125


 30%|███       | 3300/10845 [29:06<50:43,  2.48it/s, acc=0.674, epoch=6, loss=1.08]  

epoch:6, idx:3299/10845, loss:1.0791322058800494, acc:0.6741666666666667


 31%|███▏      | 3400/10845 [30:03<1:28:33,  1.40it/s, acc=0.675, epoch=6, loss=1.08]

epoch:6, idx:3399/10845, loss:1.0776256553127486, acc:0.6748529411764705


 32%|███▏      | 3500/10845 [30:56<1:03:07,  1.94it/s, acc=0.675, epoch=6, loss=1.08]

epoch:6, idx:3499/10845, loss:1.0765096393227578, acc:0.6746428571428571


 33%|███▎      | 3600/10845 [31:47<1:00:17,  2.00it/s, acc=0.674, epoch=6, loss=1.08]

epoch:6, idx:3599/10845, loss:1.0773079238749212, acc:0.6740277777777778


 34%|███▍      | 3700/10845 [32:35<43:52,  2.71it/s, acc=0.673, epoch=6, loss=1.08]  

epoch:6, idx:3699/10845, loss:1.0810702602686109, acc:0.6726351351351352


 35%|███▌      | 3800/10845 [33:27<1:13:02,  1.61it/s, acc=0.672, epoch=6, loss=1.08]

epoch:6, idx:3799/10845, loss:1.0820444248146133, acc:0.6719736842105263


 36%|███▌      | 3900/10845 [34:20<53:19,  2.17it/s, acc=0.672, epoch=6, loss=1.08]  

epoch:6, idx:3899/10845, loss:1.081560729444027, acc:0.6723717948717949


 37%|███▋      | 4000/10845 [35:08<57:27,  1.99it/s, acc=0.672, epoch=6, loss=1.08]  

epoch:6, idx:3999/10845, loss:1.0827997079715133, acc:0.6716875


 38%|███▊      | 4100/10845 [35:59<1:03:36,  1.77it/s, acc=0.672, epoch=6, loss=1.08]

epoch:6, idx:4099/10845, loss:1.0812902855655042, acc:0.6715853658536586


 39%|███▊      | 4200/10845 [36:53<1:05:59,  1.68it/s, acc=0.671, epoch=6, loss=1.08]

epoch:6, idx:4199/10845, loss:1.0822334603681452, acc:0.6713690476190476


 40%|███▉      | 4300/10845 [37:47<1:04:43,  1.69it/s, acc=0.671, epoch=6, loss=1.08]

epoch:6, idx:4299/10845, loss:1.0821949900513472, acc:0.6707558139534884


 41%|████      | 4400/10845 [38:38<1:03:47,  1.68it/s, acc=0.671, epoch=6, loss=1.08]

epoch:6, idx:4399/10845, loss:1.0815430486269972, acc:0.6707954545454545


 41%|████▏     | 4500/10845 [39:27<40:10,  2.63it/s, acc=0.671, epoch=6, loss=1.08]  

epoch:6, idx:4499/10845, loss:1.0800655869444211, acc:0.6711666666666667


 42%|████▏     | 4600/10845 [40:16<53:32,  1.94it/s, acc=0.671, epoch=6, loss=1.08]  

epoch:6, idx:4599/10845, loss:1.0778806229583595, acc:0.6714673913043478


 43%|████▎     | 4700/10845 [41:04<39:37,  2.58it/s, acc=0.671, epoch=6, loss=1.08]  

epoch:6, idx:4699/10845, loss:1.078668918704733, acc:0.6711702127659575


 44%|████▍     | 4800/10845 [41:59<57:21,  1.76it/s, acc=0.671, epoch=6, loss=1.08]  

epoch:6, idx:4799/10845, loss:1.0806477255312106, acc:0.670625


 45%|████▌     | 4900/10845 [42:54<1:10:07,  1.41it/s, acc=0.67, epoch=6, loss=1.08] 

epoch:6, idx:4899/10845, loss:1.0822313496105525, acc:0.6704081632653062


 46%|████▌     | 5001/10845 [43:48<44:42,  2.18it/s, acc=0.67, epoch=6, loss=1.08]   

epoch:6, idx:4999/10845, loss:1.0831324871242047, acc:0.67


 47%|████▋     | 5100/10845 [44:39<51:07,  1.87it/s, acc=0.67, epoch=6, loss=1.08]  

epoch:6, idx:5099/10845, loss:1.0828358781980534, acc:0.6698039215686274


 48%|████▊     | 5200/10845 [45:33<59:24,  1.58it/s, acc=0.67, epoch=6, loss=1.08]  

epoch:6, idx:5199/10845, loss:1.0837010707935462, acc:0.6701923076923076


 49%|████▉     | 5300/10845 [46:24<49:48,  1.86it/s, acc=0.67, epoch=6, loss=1.08]   

epoch:6, idx:5299/10845, loss:1.0837312533214407, acc:0.6703301886792453


 50%|████▉     | 5400/10845 [47:17<41:43,  2.17it/s, acc=0.67, epoch=6, loss=1.08]  

epoch:6, idx:5399/10845, loss:1.0846005291574532, acc:0.6701388888888888


 51%|█████     | 5500/10845 [48:09<45:07,  1.97it/s, acc=0.67, epoch=6, loss=1.08]   

epoch:6, idx:5499/10845, loss:1.084135267674923, acc:0.6696818181818182


 52%|█████▏    | 5600/10845 [49:01<40:01,  2.18it/s, acc=0.67, epoch=6, loss=1.08]  

epoch:6, idx:5599/10845, loss:1.0837187132079686, acc:0.6696875


 53%|█████▎    | 5700/10845 [49:53<42:59,  1.99it/s, acc=0.67, epoch=6, loss=1.08]   

epoch:6, idx:5699/10845, loss:1.0824195685982705, acc:0.6695175438596491


 53%|█████▎    | 5800/10845 [50:43<34:40,  2.43it/s, acc=0.669, epoch=6, loss=1.08]  

epoch:6, idx:5799/10845, loss:1.082646258951261, acc:0.669051724137931


 54%|█████▍    | 5900/10845 [51:34<42:19,  1.95it/s, acc=0.669, epoch=6, loss=1.08]  

epoch:6, idx:5899/10845, loss:1.0836152501682104, acc:0.6686016949152542


 55%|█████▌    | 6000/10845 [52:30<46:34,  1.73it/s, acc=0.668, epoch=6, loss=1.09]  

epoch:6, idx:5999/10845, loss:1.0852067691336076, acc:0.66825


 56%|█████▌    | 6100/10845 [53:19<32:13,  2.45it/s, acc=0.669, epoch=6, loss=1.08]

epoch:6, idx:6099/10845, loss:1.0820199193182538, acc:0.6686065573770492


 57%|█████▋    | 6200/10845 [54:19<41:21,  1.87it/s, acc=0.668, epoch=6, loss=1.08]  

epoch:6, idx:6199/10845, loss:1.0825778730598188, acc:0.6681048387096774


 58%|█████▊    | 6300/10845 [55:11<34:57,  2.17it/s, acc=0.669, epoch=6, loss=1.08]

epoch:6, idx:6299/10845, loss:1.0822136637473863, acc:0.6687301587301587


 59%|█████▉    | 6400/10845 [56:04<32:01,  2.31it/s, acc=0.669, epoch=6, loss=1.08]  

epoch:6, idx:6399/10845, loss:1.083197361701168, acc:0.6686328125


 60%|█████▉    | 6500/10845 [56:56<37:33,  1.93it/s, acc=0.668, epoch=6, loss=1.08]

epoch:6, idx:6499/10845, loss:1.083619383247999, acc:0.6683846153846154


 61%|██████    | 6600/10845 [57:52<47:55,  1.48it/s, acc=0.668, epoch=6, loss=1.08]

epoch:6, idx:6599/10845, loss:1.0839333172052195, acc:0.668219696969697


 62%|██████▏   | 6700/10845 [58:43<33:23,  2.07it/s, acc=0.668, epoch=6, loss=1.09]

epoch:6, idx:6699/10845, loss:1.0851198091035459, acc:0.6677611940298508


 63%|██████▎   | 6800/10845 [59:37<42:19,  1.59it/s, acc=0.667, epoch=6, loss=1.09]

epoch:6, idx:6799/10845, loss:1.08523448684198, acc:0.6673161764705883


 64%|██████▎   | 6900/10845 [1:00:31<35:58,  1.83it/s, acc=0.667, epoch=6, loss=1.09]

epoch:6, idx:6899/10845, loss:1.0862906500610752, acc:0.6671014492753623


 65%|██████▍   | 7000/10845 [1:01:26<20:33,  3.12it/s, acc=0.667, epoch=6, loss=1.09]

epoch:6, idx:6999/10845, loss:1.0862255030572414, acc:0.6671428571428571


 65%|██████▌   | 7100/10845 [1:02:20<35:33,  1.76it/s, acc=0.667, epoch=6, loss=1.09]

epoch:6, idx:7099/10845, loss:1.0859801015425736, acc:0.6672535211267606


 66%|██████▋   | 7200/10845 [1:03:12<33:44,  1.80it/s, acc=0.667, epoch=6, loss=1.09]

epoch:6, idx:7199/10845, loss:1.0872633045042555, acc:0.6668402777777778


 67%|██████▋   | 7300/10845 [1:04:07<27:56,  2.11it/s, acc=0.666, epoch=6, loss=1.09]

epoch:6, idx:7299/10845, loss:1.087841419181595, acc:0.6664383561643835


 68%|██████▊   | 7400/10845 [1:05:01<26:04,  2.20it/s, acc=0.666, epoch=6, loss=1.09]

epoch:6, idx:7399/10845, loss:1.087699626462685, acc:0.6662837837837838


 69%|██████▉   | 7500/10845 [1:05:54<21:51,  2.55it/s, acc=0.666, epoch=6, loss=1.09]

epoch:6, idx:7499/10845, loss:1.090501633409659, acc:0.6658666666666667


 70%|███████   | 7600/10845 [1:06:50<37:52,  1.43it/s, acc=0.666, epoch=6, loss=1.09]

epoch:6, idx:7599/10845, loss:1.090744274176265, acc:0.6655592105263158


 71%|███████   | 7700/10845 [1:07:40<18:47,  2.79it/s, acc=0.666, epoch=6, loss=1.09]

epoch:6, idx:7699/10845, loss:1.090296130091339, acc:0.6658116883116884


 72%|███████▏  | 7800/10845 [1:08:29<29:43,  1.71it/s, acc=0.666, epoch=6, loss=1.09]

epoch:6, idx:7799/10845, loss:1.0900112964365727, acc:0.6660576923076923


 73%|███████▎  | 7900/10845 [1:09:22<20:49,  2.36it/s, acc=0.666, epoch=6, loss=1.09]

epoch:6, idx:7899/10845, loss:1.0895907234353355, acc:0.6661392405063291


 74%|███████▍  | 8000/10845 [1:10:15<32:00,  1.48it/s, acc=0.666, epoch=6, loss=1.09]

epoch:6, idx:7999/10845, loss:1.0894386674948036, acc:0.66596875


 75%|███████▍  | 8100/10845 [1:11:05<25:16,  1.81it/s, acc=0.665, epoch=6, loss=1.09]

epoch:6, idx:8099/10845, loss:1.089971463603002, acc:0.6654012345679012


 76%|███████▌  | 8200/10845 [1:11:54<18:53,  2.33it/s, acc=0.666, epoch=6, loss=1.09]

epoch:6, idx:8199/10845, loss:1.0894651869411875, acc:0.6655792682926829


 77%|███████▋  | 8300/10845 [1:12:47<22:26,  1.89it/s, acc=0.665, epoch=6, loss=1.09]

epoch:6, idx:8299/10845, loss:1.0896057587036168, acc:0.6651204819277109


 77%|███████▋  | 8400/10845 [1:13:38<20:11,  2.02it/s, acc=0.665, epoch=6, loss=1.09]

epoch:6, idx:8399/10845, loss:1.08984700641107, acc:0.6654166666666667


 78%|███████▊  | 8500/10845 [1:14:31<21:52,  1.79it/s, acc=0.665, epoch=6, loss=1.09]

epoch:6, idx:8499/10845, loss:1.0909538227495026, acc:0.6649705882352941


 79%|███████▉  | 8600/10845 [1:15:28<20:16,  1.85it/s, acc=0.665, epoch=6, loss=1.09]

epoch:6, idx:8599/10845, loss:1.0902208896258543, acc:0.6650581395348837


 80%|████████  | 8700/10845 [1:16:22<17:25,  2.05it/s, acc=0.665, epoch=6, loss=1.09]

epoch:6, idx:8699/10845, loss:1.0916824819335993, acc:0.6645114942528736


 81%|████████  | 8800/10845 [1:17:14<17:38,  1.93it/s, acc=0.665, epoch=6, loss=1.09]

epoch:6, idx:8799/10845, loss:1.0910948225618764, acc:0.6646590909090909


 82%|████████▏ | 8900/10845 [1:18:07<16:11,  2.00it/s, acc=0.664, epoch=6, loss=1.09]

epoch:6, idx:8899/10845, loss:1.0929068033045597, acc:0.6642134831460674


 83%|████████▎ | 9000/10845 [1:19:00<14:29,  2.12it/s, acc=0.664, epoch=6, loss=1.09]

epoch:6, idx:8999/10845, loss:1.094007607450088, acc:0.6639722222222222


 84%|████████▍ | 9100/10845 [1:19:54<13:36,  2.14it/s, acc=0.664, epoch=6, loss=1.1] 

epoch:6, idx:9099/10845, loss:1.0950099705634537, acc:0.6635989010989011


 85%|████████▍ | 9200/10845 [1:20:48<12:44,  2.15it/s, acc=0.663, epoch=6, loss=1.1] 

epoch:6, idx:9199/10845, loss:1.0958489708116521, acc:0.6633423913043478


 86%|████████▌ | 9300/10845 [1:21:42<17:33,  1.47it/s, acc=0.664, epoch=6, loss=1.09]

epoch:6, idx:9299/10845, loss:1.093739639107258, acc:0.6638440860215054


 87%|████████▋ | 9400/10845 [1:22:38<13:46,  1.75it/s, acc=0.664, epoch=6, loss=1.09]

epoch:6, idx:9399/10845, loss:1.0928698311238847, acc:0.6640159574468085


 88%|████████▊ | 9500/10845 [1:23:27<15:18,  1.46it/s, acc=0.664, epoch=6, loss=1.09]

epoch:6, idx:9499/10845, loss:1.0931442509732747, acc:0.6637368421052632


 89%|████████▊ | 9600/10845 [1:24:22<14:55,  1.39it/s, acc=0.664, epoch=6, loss=1.09]

epoch:6, idx:9599/10845, loss:1.093151974954332, acc:0.6636197916666666


 89%|████████▉ | 9700/10845 [1:25:12<07:56,  2.40it/s, acc=0.663, epoch=6, loss=1.09]

epoch:6, idx:9699/10845, loss:1.0935683388839064, acc:0.6634278350515463


 90%|█████████ | 9800/10845 [1:26:08<09:00,  1.93it/s, acc=0.663, epoch=6, loss=1.09]

epoch:6, idx:9799/10845, loss:1.0929391267500361, acc:0.6633673469387755


 91%|█████████▏| 9900/10845 [1:27:02<08:08,  1.94it/s, acc=0.663, epoch=6, loss=1.09]

epoch:6, idx:9899/10845, loss:1.092984754536489, acc:0.663409090909091


 92%|█████████▏| 10000/10845 [1:27:55<06:19,  2.23it/s, acc=0.663, epoch=6, loss=1.09]

epoch:6, idx:9999/10845, loss:1.0926905762344599, acc:0.663475


 93%|█████████▎| 10100/10845 [1:28:46<06:45,  1.84it/s, acc=0.663, epoch=6, loss=1.09]

epoch:6, idx:10099/10845, loss:1.093309203212214, acc:0.6632425742574257


 94%|█████████▍| 10200/10845 [1:29:30<04:58,  2.16it/s, acc=0.664, epoch=6, loss=1.09]

epoch:6, idx:10199/10845, loss:1.0919348685092787, acc:0.6636764705882353


 95%|█████████▍| 10300/10845 [1:30:21<03:06,  2.92it/s, acc=0.663, epoch=6, loss=1.09]

epoch:6, idx:10299/10845, loss:1.0924364253415644, acc:0.6633252427184466


 96%|█████████▌| 10400/10845 [1:31:13<02:38,  2.81it/s, acc=0.663, epoch=6, loss=1.09]

epoch:6, idx:10399/10845, loss:1.0919673604661455, acc:0.6632451923076923


 97%|█████████▋| 10500/10845 [1:32:05<03:22,  1.71it/s, acc=0.663, epoch=6, loss=1.09]

epoch:6, idx:10499/10845, loss:1.0911919364560219, acc:0.6634761904761904


 98%|█████████▊| 10600/10845 [1:32:54<02:01,  2.02it/s, acc=0.664, epoch=6, loss=1.09]

epoch:6, idx:10599/10845, loss:1.0909480929683963, acc:0.663561320754717


 99%|█████████▊| 10700/10845 [1:33:46<01:16,  1.89it/s, acc=0.663, epoch=6, loss=1.09]

epoch:6, idx:10699/10845, loss:1.0921176091560694, acc:0.6630607476635514


100%|█████████▉| 10800/10845 [1:34:40<00:20,  2.19it/s, acc=0.663, epoch=6, loss=1.09]

epoch:6, idx:10799/10845, loss:1.0928939929687314, acc:0.662962962962963


100%|██████████| 10845/10845 [1:35:03<00:00,  2.12it/s, acc=0.663, epoch=6, loss=1.09]


epoch:6, idx:0/1275, loss:0.7735567092895508, acc:0.75
epoch:6, idx:100/1275, loss:1.2639829563622427, acc:0.5841584158415841
epoch:6, idx:200/1275, loss:1.22195040052803, acc:0.6119402985074627
epoch:6, idx:300/1275, loss:1.2070421649768106, acc:0.6204318936877077
epoch:6, idx:400/1275, loss:1.190553203782536, acc:0.6228179551122195
epoch:6, idx:500/1275, loss:1.1947083425617027, acc:0.6172654690618763
epoch:6, idx:600/1275, loss:1.2029379240288314, acc:0.6160565723793677
epoch:6, idx:700/1275, loss:1.1998382926667468, acc:0.6187589158345221
epoch:6, idx:800/1275, loss:1.2117542707815896, acc:0.6173533083645443
epoch:6, idx:900/1275, loss:1.2040955263554851, acc:0.6204217536071032
epoch:6, idx:1000/1275, loss:1.20769179706926, acc:0.6216283716283716
epoch:6, idx:1100/1275, loss:1.201916223380048, acc:0.6226158038147139
epoch:6, idx:1200/1275, loss:1.2018292207304981, acc:0.6226061615320566


  1%|          | 101/10845 [00:28<34:11,  5.24it/s, acc=0.688, epoch=7, loss=1.02] 

epoch:7, idx:99/10845, loss:1.023133534193039, acc:0.6875


  2%|▏         | 201/10845 [00:58<50:30,  3.51it/s, acc=0.693, epoch=7, loss=0.995]  

epoch:7, idx:199/10845, loss:0.9985329020023346, acc:0.6925


  3%|▎         | 300/10845 [01:26<1:07:29,  2.60it/s, acc=0.695, epoch=7, loss=0.999]

epoch:7, idx:299/10845, loss:0.9985483199357986, acc:0.695


  4%|▎         | 400/10845 [01:56<56:14,  3.10it/s, acc=0.692, epoch=7, loss=1.02]   

epoch:7, idx:399/10845, loss:1.0167077492177485, acc:0.691875


  5%|▍         | 500/10845 [02:24<50:01,  3.45it/s, acc=0.7, epoch=7, loss=0.979]    

epoch:7, idx:499/10845, loss:0.9790813404321671, acc:0.6995


  6%|▌         | 600/10845 [02:52<42:40,  4.00it/s, acc=0.7, epoch=7, loss=0.967]    

epoch:7, idx:599/10845, loss:0.9667113164067268, acc:0.7004166666666667


  6%|▋         | 700/10845 [03:21<48:00,  3.52it/s, acc=0.693, epoch=7, loss=1]      

epoch:7, idx:699/10845, loss:1.00166674886431, acc:0.6925


  7%|▋         | 800/10845 [03:50<39:52,  4.20it/s, acc=0.689, epoch=7, loss=1.01]   

epoch:7, idx:799/10845, loss:1.014469945654273, acc:0.6890625


  8%|▊         | 900/10845 [04:20<52:07,  3.18it/s, acc=0.69, epoch=7, loss=1.01]   

epoch:7, idx:899/10845, loss:1.0123914839161767, acc:0.6897222222222222


  9%|▉         | 1001/10845 [04:47<34:12,  4.80it/s, acc=0.691, epoch=7, loss=1.01] 

epoch:7, idx:999/10845, loss:1.0099796085357666, acc:0.6905


 10%|█         | 1100/10845 [05:30<37:52,  4.29it/s, acc=0.685, epoch=7, loss=1.03]  

epoch:7, idx:1099/10845, loss:1.0275551800836216, acc:0.6847727272727273


 11%|█         | 1200/10845 [06:20<3:46:50,  1.41s/it, acc=0.682, epoch=7, loss=1.03]

epoch:7, idx:1199/10845, loss:1.034539961864551, acc:0.6825


 12%|█▏        | 1301/10845 [06:56<57:43,  2.76it/s, acc=0.683, epoch=7, loss=1.03]  

epoch:7, idx:1299/10845, loss:1.0330103709606024, acc:0.683076923076923


 13%|█▎        | 1400/10845 [07:47<1:14:45,  2.11it/s, acc=0.683, epoch=7, loss=1.03]

epoch:7, idx:1399/10845, loss:1.0311613579733032, acc:0.6833928571428571


 14%|█▍        | 1500/10845 [08:42<1:13:01,  2.13it/s, acc=0.683, epoch=7, loss=1.03]

epoch:7, idx:1499/10845, loss:1.029042171994845, acc:0.683


 15%|█▍        | 1600/10845 [09:27<1:40:16,  1.54it/s, acc=0.682, epoch=7, loss=1.03]

epoch:7, idx:1599/10845, loss:1.0326506697759033, acc:0.6825


 16%|█▌        | 1700/10845 [10:11<1:42:02,  1.49it/s, acc=0.681, epoch=7, loss=1.04]

epoch:7, idx:1699/10845, loss:1.036786202157245, acc:0.6811764705882353


 17%|█▋        | 1801/10845 [11:01<43:54,  3.43it/s, acc=0.682, epoch=7, loss=1.03]  

epoch:7, idx:1799/10845, loss:1.0339896847142114, acc:0.6823611111111111


 18%|█▊        | 1901/10845 [11:48<31:27,  4.74it/s, acc=0.682, epoch=7, loss=1.03]  

epoch:7, idx:1899/10845, loss:1.0334711052242078, acc:0.6818421052631579


 18%|█▊        | 2000/10845 [12:31<51:51,  2.84it/s, acc=0.684, epoch=7, loss=1.03]  

epoch:7, idx:1999/10845, loss:1.0323452822268009, acc:0.68375


 19%|█▉        | 2100/10845 [13:25<1:11:01,  2.05it/s, acc=0.685, epoch=7, loss=1.03]

epoch:7, idx:2099/10845, loss:1.0285037660314924, acc:0.6847619047619048


 20%|██        | 2200/10845 [14:16<1:18:44,  1.83it/s, acc=0.684, epoch=7, loss=1.03]

epoch:7, idx:2199/10845, loss:1.030251189253547, acc:0.6840909090909091


 21%|██        | 2300/10845 [15:12<51:21,  2.77it/s, acc=0.685, epoch=7, loss=1.03]  

epoch:7, idx:2299/10845, loss:1.030672615325969, acc:0.6853260869565218


 22%|██▏       | 2400/10845 [16:09<1:26:18,  1.63it/s, acc=0.684, epoch=7, loss=1.03]

epoch:7, idx:2399/10845, loss:1.0339807976037263, acc:0.684375


 23%|██▎       | 2500/10845 [17:00<1:28:52,  1.56it/s, acc=0.686, epoch=7, loss=1.03]

epoch:7, idx:2499/10845, loss:1.027989675450325, acc:0.6858


 24%|██▍       | 2600/10845 [17:55<1:09:37,  1.97it/s, acc=0.684, epoch=7, loss=1.03]

epoch:7, idx:2599/10845, loss:1.0331816166180832, acc:0.6842307692307692


 25%|██▍       | 2700/10845 [18:53<1:05:28,  2.07it/s, acc=0.685, epoch=7, loss=1.03]

epoch:7, idx:2699/10845, loss:1.030875079786336, acc:0.6847222222222222


 26%|██▌       | 2800/10845 [19:45<59:01,  2.27it/s, acc=0.686, epoch=7, loss=1.03]  

epoch:7, idx:2799/10845, loss:1.028354455041034, acc:0.6855357142857142


 27%|██▋       | 2900/10845 [20:40<1:32:49,  1.43it/s, acc=0.687, epoch=7, loss=1.02]

epoch:7, idx:2899/10845, loss:1.0215375802640256, acc:0.6872413793103448


 28%|██▊       | 3000/10845 [21:33<1:35:23,  1.37it/s, acc=0.688, epoch=7, loss=1.02]

epoch:7, idx:2999/10845, loss:1.021487488647302, acc:0.6876666666666666


 29%|██▊       | 3100/10845 [22:28<57:16,  2.25it/s, acc=0.688, epoch=7, loss=1.02]  

epoch:7, idx:3099/10845, loss:1.0217348377550801, acc:0.6876612903225806


 30%|██▉       | 3200/10845 [23:22<1:33:58,  1.36it/s, acc=0.688, epoch=7, loss=1.02]

epoch:7, idx:3199/10845, loss:1.0215981203503908, acc:0.687578125


 30%|███       | 3300/10845 [24:16<57:08,  2.20it/s, acc=0.686, epoch=7, loss=1.02]  

epoch:7, idx:3299/10845, loss:1.0245781855330323, acc:0.6862878787878788


 31%|███▏      | 3400/10845 [25:08<1:03:40,  1.95it/s, acc=0.686, epoch=7, loss=1.03]

epoch:7, idx:3399/10845, loss:1.0259129363123107, acc:0.6855882352941176


 32%|███▏      | 3500/10845 [26:03<1:27:18,  1.40it/s, acc=0.685, epoch=7, loss=1.03]

epoch:7, idx:3499/10845, loss:1.0262575167928423, acc:0.6848571428571428


 33%|███▎      | 3600/10845 [26:57<56:04,  2.15it/s, acc=0.685, epoch=7, loss=1.03]  

epoch:7, idx:3599/10845, loss:1.025200317270226, acc:0.6848611111111111


 34%|███▍      | 3700/10845 [27:54<48:45,  2.44it/s, acc=0.685, epoch=7, loss=1.02]  

epoch:7, idx:3699/10845, loss:1.0235068411118275, acc:0.6851351351351351


 35%|███▌      | 3800/10845 [28:47<59:23,  1.98it/s, acc=0.685, epoch=7, loss=1.02]  

epoch:7, idx:3799/10845, loss:1.0236527216748188, acc:0.6854605263157895


 36%|███▌      | 3900/10845 [29:38<41:49,  2.77it/s, acc=0.685, epoch=7, loss=1.02]  

epoch:7, idx:3899/10845, loss:1.0248908181526721, acc:0.6851282051282052


 37%|███▋      | 4000/10845 [30:30<48:10,  2.37it/s, acc=0.685, epoch=7, loss=1.02]  

epoch:7, idx:3999/10845, loss:1.0249255205243826, acc:0.685125


 38%|███▊      | 4100/10845 [31:22<53:12,  2.11it/s, acc=0.685, epoch=7, loss=1.03]  

epoch:7, idx:4099/10845, loss:1.0251307763268307, acc:0.6848170731707317


 39%|███▊      | 4200/10845 [32:17<53:22,  2.08it/s, acc=0.685, epoch=7, loss=1.02]  

epoch:7, idx:4199/10845, loss:1.0247804645413443, acc:0.6851785714285714


 40%|███▉      | 4300/10845 [33:06<50:01,  2.18it/s, acc=0.685, epoch=7, loss=1.02]  

epoch:7, idx:4299/10845, loss:1.02465568520302, acc:0.6850581395348837


 41%|████      | 4400/10845 [34:01<1:00:23,  1.78it/s, acc=0.685, epoch=7, loss=1.03]

epoch:7, idx:4399/10845, loss:1.0256469760970637, acc:0.6848863636363637


 41%|████▏     | 4500/10845 [34:55<39:38,  2.67it/s, acc=0.684, epoch=7, loss=1.03]  

epoch:7, idx:4499/10845, loss:1.0273852766752243, acc:0.6840555555555555


 42%|████▏     | 4600/10845 [35:49<1:05:17,  1.59it/s, acc=0.683, epoch=7, loss=1.03]

epoch:7, idx:4599/10845, loss:1.0313337687176207, acc:0.6831521739130435


 43%|████▎     | 4700/10845 [36:45<47:31,  2.16it/s, acc=0.683, epoch=7, loss=1.03]  

epoch:7, idx:4699/10845, loss:1.0305492872633832, acc:0.6832978723404255


 44%|████▍     | 4800/10845 [37:37<54:05,  1.86it/s, acc=0.683, epoch=7, loss=1.03]  

epoch:7, idx:4799/10845, loss:1.0309937466060122, acc:0.6834375


 45%|████▌     | 4900/10845 [38:29<44:40,  2.22it/s, acc=0.683, epoch=7, loss=1.03]  

epoch:7, idx:4899/10845, loss:1.0309742533674044, acc:0.6829081632653061


 46%|████▌     | 5000/10845 [39:20<56:01,  1.74it/s, acc=0.683, epoch=7, loss=1.03]  

epoch:7, idx:4999/10845, loss:1.0299102202177048, acc:0.68295


 47%|████▋     | 5100/10845 [40:10<39:42,  2.41it/s, acc=0.683, epoch=7, loss=1.03]  

epoch:7, idx:5099/10845, loss:1.0296055141967886, acc:0.683235294117647


 48%|████▊     | 5200/10845 [41:04<46:45,  2.01it/s, acc=0.683, epoch=7, loss=1.03]  

epoch:7, idx:5199/10845, loss:1.0294470198223225, acc:0.6831730769230769


 49%|████▉     | 5300/10845 [41:53<59:03,  1.56it/s, acc=0.682, epoch=7, loss=1.03]  

epoch:7, idx:5299/10845, loss:1.0296417550883203, acc:0.6824056603773585


 50%|████▉     | 5400/10845 [42:46<55:36,  1.63it/s, acc=0.683, epoch=7, loss=1.03]  

epoch:7, idx:5399/10845, loss:1.0274740415149264, acc:0.6831018518518519


 51%|█████     | 5500/10845 [43:42<43:57,  2.03it/s, acc=0.682, epoch=7, loss=1.03]  

epoch:7, idx:5499/10845, loss:1.0296862068067898, acc:0.6821363636363637


 52%|█████▏    | 5600/10845 [44:38<1:03:26,  1.38it/s, acc=0.682, epoch=7, loss=1.03]

epoch:7, idx:5599/10845, loss:1.0307570625628744, acc:0.6820982142857143


 53%|█████▎    | 5700/10845 [45:28<43:08,  1.99it/s, acc=0.683, epoch=7, loss=1.03]  

epoch:7, idx:5699/10845, loss:1.030068010499603, acc:0.6827192982456141


 53%|█████▎    | 5801/10845 [46:13<30:58,  2.71it/s, acc=0.683, epoch=7, loss=1.03]  

epoch:7, idx:5799/10845, loss:1.0321097398626393, acc:0.6825431034482758


 54%|█████▍    | 5900/10845 [46:58<47:51,  1.72it/s, acc=0.683, epoch=7, loss=1.03]

epoch:7, idx:5899/10845, loss:1.0313810415389175, acc:0.6827118644067797


 55%|█████▌    | 6000/10845 [47:45<51:24,  1.57it/s, acc=0.682, epoch=7, loss=1.03]

epoch:7, idx:5999/10845, loss:1.0330013254185517, acc:0.6820416666666667


 56%|█████▌    | 6100/10845 [49:01<38:38,  2.05it/s, acc=0.682, epoch=7, loss=1.03]  

epoch:7, idx:6099/10845, loss:1.0335611886079195, acc:0.6815983606557378


 57%|█████▋    | 6200/10845 [49:59<36:16,  2.13it/s, acc=0.682, epoch=7, loss=1.03]  

epoch:7, idx:6199/10845, loss:1.0338059408241702, acc:0.6819758064516129


 58%|█████▊    | 6300/10845 [50:53<30:27,  2.49it/s, acc=0.681, epoch=7, loss=1.03]

epoch:7, idx:6299/10845, loss:1.0348603272816492, acc:0.6813888888888889


 59%|█████▉    | 6400/10845 [51:46<51:00,  1.45it/s, acc=0.681, epoch=7, loss=1.03]

epoch:7, idx:6399/10845, loss:1.0348267740663142, acc:0.681015625


 60%|█████▉    | 6500/10845 [52:41<36:17,  2.00it/s, acc=0.68, epoch=7, loss=1.04] 

epoch:7, idx:6499/10845, loss:1.0373356372668192, acc:0.6805


 61%|██████    | 6600/10845 [53:31<40:21,  1.75it/s, acc=0.68, epoch=7, loss=1.04]

epoch:7, idx:6599/10845, loss:1.0381527498906309, acc:0.6804545454545454


 62%|██████▏   | 6700/10845 [54:24<36:39,  1.88it/s, acc=0.68, epoch=7, loss=1.04]

epoch:7, idx:6699/10845, loss:1.0403938658023948, acc:0.6801119402985074


 63%|██████▎   | 6800/10845 [55:17<45:11,  1.49it/s, acc=0.679, epoch=7, loss=1.04]

epoch:7, idx:6799/10845, loss:1.0417722583430655, acc:0.6793382352941176


 64%|██████▎   | 6900/10845 [56:04<33:10,  1.98it/s, acc=0.679, epoch=7, loss=1.04]

epoch:7, idx:6899/10845, loss:1.0426080887905065, acc:0.6792753623188406


 65%|██████▍   | 7000/10845 [56:56<33:29,  1.91it/s, acc=0.679, epoch=7, loss=1.04]

epoch:7, idx:6999/10845, loss:1.0423724078280585, acc:0.6795


 65%|██████▌   | 7100/10845 [57:51<24:22,  2.56it/s, acc=0.679, epoch=7, loss=1.04]  

epoch:7, idx:7099/10845, loss:1.043510918810334, acc:0.6790492957746479


 66%|██████▋   | 7200/10845 [58:48<26:33,  2.29it/s, acc=0.679, epoch=7, loss=1.04]

epoch:7, idx:7199/10845, loss:1.0448068592780166, acc:0.6788194444444444


 67%|██████▋   | 7300/10845 [59:43<25:42,  2.30it/s, acc=0.679, epoch=7, loss=1.05]

epoch:7, idx:7299/10845, loss:1.0457477765867156, acc:0.6786643835616438


 68%|██████▊   | 7400/10845 [1:00:34<26:02,  2.21it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:7399/10845, loss:1.0460023110380043, acc:0.6784797297297297


 69%|██████▉   | 7500/10845 [1:01:33<30:42,  1.82it/s, acc=0.679, epoch=7, loss=1.05]

epoch:7, idx:7499/10845, loss:1.0455845918575923, acc:0.6787333333333333


 70%|███████   | 7600/10845 [1:02:30<30:28,  1.77it/s, acc=0.679, epoch=7, loss=1.05]

epoch:7, idx:7599/10845, loss:1.045589342132995, acc:0.67875


 71%|███████   | 7700/10845 [1:03:25<38:19,  1.37it/s, acc=0.679, epoch=7, loss=1.05]

epoch:7, idx:7699/10845, loss:1.045171222996402, acc:0.6789285714285714


 72%|███████▏  | 7800/10845 [1:04:20<18:19,  2.77it/s, acc=0.679, epoch=7, loss=1.05]

epoch:7, idx:7799/10845, loss:1.0457295524080594, acc:0.6790705128205128


 73%|███████▎  | 7900/10845 [1:05:12<24:33,  2.00it/s, acc=0.679, epoch=7, loss=1.05]

epoch:7, idx:7899/10845, loss:1.0468800086990186, acc:0.6786392405063291


 74%|███████▍  | 8000/10845 [1:06:02<22:13,  2.13it/s, acc=0.679, epoch=7, loss=1.05]

epoch:7, idx:7999/10845, loss:1.0470933318957687, acc:0.67859375


 75%|███████▍  | 8100/10845 [1:06:59<30:39,  1.49it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:8099/10845, loss:1.0480669347739513, acc:0.6783024691358025


 76%|███████▌  | 8200/10845 [1:07:48<25:19,  1.74it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:8199/10845, loss:1.0490975477928068, acc:0.6779878048780488


 77%|███████▋  | 8300/10845 [1:08:40<21:39,  1.96it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:8299/10845, loss:1.049316560094615, acc:0.6777409638554217


 77%|███████▋  | 8400/10845 [1:09:33<19:12,  2.12it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:8399/10845, loss:1.0498202110614094, acc:0.6776190476190476


 78%|███████▊  | 8500/10845 [1:10:27<16:17,  2.40it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:8499/10845, loss:1.0491133824937484, acc:0.6777352941176471


 79%|███████▉  | 8600/10845 [1:11:22<17:14,  2.17it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:8599/10845, loss:1.0485606511942176, acc:0.6780523255813954


 80%|████████  | 8700/10845 [1:12:15<15:49,  2.26it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:8699/10845, loss:1.0477547685168256, acc:0.6783045977011494


 81%|████████  | 8800/10845 [1:13:09<17:22,  1.96it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:8799/10845, loss:1.050253224142573, acc:0.6778693181818182


 82%|████████▏ | 8900/10845 [1:13:56<13:12,  2.45it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:8899/10845, loss:1.0508910454458065, acc:0.6778089887640449


 83%|████████▎ | 9001/10845 [1:14:47<15:06,  2.03it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:8999/10845, loss:1.0519399994810423, acc:0.6775555555555556


 84%|████████▍ | 9100/10845 [1:15:41<15:40,  1.86it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:9099/10845, loss:1.051803546999837, acc:0.6776098901098901


 85%|████████▍ | 9200/10845 [1:16:28<10:03,  2.72it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:9199/10845, loss:1.0507770944743053, acc:0.6775


 86%|████████▌ | 9300/10845 [1:17:17<11:08,  2.31it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:9299/10845, loss:1.0516477387758993, acc:0.6773655913978495


 87%|████████▋ | 9400/10845 [1:18:06<16:18,  1.48it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:9399/10845, loss:1.0518071606945485, acc:0.677154255319149


 88%|████████▊ | 9500/10845 [1:19:01<13:38,  1.64it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:9499/10845, loss:1.0522190808057785, acc:0.6772631578947368


 89%|████████▊ | 9600/10845 [1:19:55<08:32,  2.43it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:9599/10845, loss:1.0540094192326068, acc:0.676953125


 89%|████████▉ | 9700/10845 [1:20:48<09:17,  2.06it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:9699/10845, loss:1.0537758430746413, acc:0.6770618556701031


 90%|█████████ | 9800/10845 [1:21:39<06:34,  2.65it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:9799/10845, loss:1.0536532859048064, acc:0.6771938775510205


 91%|█████████▏| 9900/10845 [1:22:30<09:41,  1.63it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:9899/10845, loss:1.0527734838651888, acc:0.6772979797979798


 92%|█████████▏| 10000/10845 [1:23:24<09:50,  1.43it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:9999/10845, loss:1.052238497930765, acc:0.67745


 93%|█████████▎| 10100/10845 [1:24:11<08:08,  1.52it/s, acc=0.678, epoch=7, loss=1.05]

epoch:7, idx:10099/10845, loss:1.0527800567787473, acc:0.6776485148514851


 94%|█████████▍| 10201/10845 [1:25:06<07:05,  1.52it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:10199/10845, loss:1.0532150000801273, acc:0.6773774509803921


 95%|█████████▍| 10300/10845 [1:26:03<04:19,  2.10it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:10299/10845, loss:1.0538325209640762, acc:0.6772815533980583


 96%|█████████▌| 10400/10845 [1:26:57<04:33,  1.63it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:10399/10845, loss:1.0542601055709215, acc:0.676875


 97%|█████████▋| 10500/10845 [1:27:53<03:13,  1.78it/s, acc=0.677, epoch=7, loss=1.05]

epoch:7, idx:10499/10845, loss:1.0548488592704137, acc:0.6768333333333333


 98%|█████████▊| 10600/10845 [1:28:49<02:28,  1.65it/s, acc=0.677, epoch=7, loss=1.06]

epoch:7, idx:10599/10845, loss:1.056205851694323, acc:0.676556603773585


 99%|█████████▊| 10700/10845 [1:29:44<01:26,  1.67it/s, acc=0.676, epoch=7, loss=1.06]

epoch:7, idx:10699/10845, loss:1.0564373898450459, acc:0.6762850467289719


100%|█████████▉| 10800/10845 [1:30:41<00:23,  1.94it/s, acc=0.676, epoch=7, loss=1.06]

epoch:7, idx:10799/10845, loss:1.0559481105705102, acc:0.6762731481481481


100%|██████████| 10845/10845 [1:31:06<00:00,  2.27it/s, acc=0.676, epoch=7, loss=1.06]


epoch:7, idx:0/1275, loss:0.8149615526199341, acc:0.75
epoch:7, idx:100/1275, loss:1.2532366024385584, acc:0.594059405940594
epoch:7, idx:200/1275, loss:1.2072700508198335, acc:0.6131840796019901
epoch:7, idx:300/1275, loss:1.1846765901559215, acc:0.6196013289036545
epoch:7, idx:400/1275, loss:1.1653259062113013, acc:0.6278054862842892
epoch:7, idx:500/1275, loss:1.1694915620628707, acc:0.625748502994012
epoch:7, idx:600/1275, loss:1.1837646682528211, acc:0.6185524126455907
epoch:7, idx:700/1275, loss:1.1858861754182062, acc:0.6173323823109843
epoch:7, idx:800/1275, loss:1.195488285631425, acc:0.6182896379525593
epoch:7, idx:900/1275, loss:1.1847935559746958, acc:0.6201442841287459
epoch:7, idx:1000/1275, loss:1.1903668310377862, acc:0.6186313686313686
epoch:7, idx:1100/1275, loss:1.1841670347277844, acc:0.6189827429609446
epoch:7, idx:1200/1275, loss:1.1873194606576136, acc:0.6167776852622814


  1%|          | 100/10845 [00:49<59:55,  2.99it/s, acc=0.71, epoch=8, loss=0.978]  

epoch:8, idx:99/10845, loss:0.9781776249408722, acc:0.71


  2%|▏         | 200/10845 [01:45<1:03:49,  2.78it/s, acc=0.699, epoch=8, loss=1.04] 

epoch:8, idx:199/10845, loss:1.0364566126465797, acc:0.69875


  3%|▎         | 300/10845 [02:43<1:11:08,  2.47it/s, acc=0.701, epoch=8, loss=1.02] 

epoch:8, idx:299/10845, loss:1.0196949563423792, acc:0.7008333333333333


  4%|▎         | 400/10845 [03:45<1:27:40,  1.99it/s, acc=0.698, epoch=8, loss=1.02]

epoch:8, idx:399/10845, loss:1.0207887797057629, acc:0.6975


  5%|▍         | 500/10845 [04:40<1:55:32,  1.49it/s, acc=0.702, epoch=8, loss=0.998]

epoch:8, idx:499/10845, loss:0.9980210063457489, acc:0.7015


  6%|▌         | 600/10845 [05:29<1:01:54,  2.76it/s, acc=0.704, epoch=8, loss=0.977]

epoch:8, idx:599/10845, loss:0.9770248369375865, acc:0.70375


  6%|▋         | 700/10845 [06:14<1:22:51,  2.04it/s, acc=0.703, epoch=8, loss=0.989]

epoch:8, idx:699/10845, loss:0.9888531831332615, acc:0.7028571428571428


  7%|▋         | 800/10845 [06:59<1:07:47,  2.47it/s, acc=0.707, epoch=8, loss=0.986]

epoch:8, idx:799/10845, loss:0.9856452044844627, acc:0.706875


  8%|▊         | 900/10845 [07:35<55:40,  2.98it/s, acc=0.705, epoch=8, loss=0.99]   

epoch:8, idx:899/10845, loss:0.9896154587136374, acc:0.7052777777777778


  9%|▉         | 1000/10845 [08:17<49:57,  3.28it/s, acc=0.699, epoch=8, loss=1.01]  

epoch:8, idx:999/10845, loss:1.0133735800385475, acc:0.69925


 10%|█         | 1100/10845 [08:58<1:07:19,  2.41it/s, acc=0.697, epoch=8, loss=1.02]

epoch:8, idx:1099/10845, loss:1.0173152966390957, acc:0.696590909090909


 11%|█         | 1200/10845 [09:38<59:30,  2.70it/s, acc=0.694, epoch=8, loss=1.02]  

epoch:8, idx:1199/10845, loss:1.0234961314499378, acc:0.6941666666666667


 12%|█▏        | 1300/10845 [10:21<1:00:53,  2.61it/s, acc=0.691, epoch=8, loss=1.03]

epoch:8, idx:1299/10845, loss:1.0274464187255272, acc:0.6913461538461538


 13%|█▎        | 1400/10845 [11:06<1:20:39,  1.95it/s, acc=0.691, epoch=8, loss=1.02]

epoch:8, idx:1399/10845, loss:1.0247998635257993, acc:0.6905357142857143


 14%|█▍        | 1500/10845 [11:59<1:28:13,  1.77it/s, acc=0.69, epoch=8, loss=1.02] 

epoch:8, idx:1499/10845, loss:1.018489358862241, acc:0.6901666666666667


 15%|█▍        | 1600/10845 [12:52<1:08:54,  2.24it/s, acc=0.692, epoch=8, loss=1.01]

epoch:8, idx:1599/10845, loss:1.0130851482227445, acc:0.6915625


 16%|█▌        | 1700/10845 [13:44<1:24:31,  1.80it/s, acc=0.692, epoch=8, loss=1.01]

epoch:8, idx:1699/10845, loss:1.0099269625018625, acc:0.6916176470588236


 17%|█▋        | 1800/10845 [14:32<1:19:34,  1.89it/s, acc=0.692, epoch=8, loss=1.01]

epoch:8, idx:1799/10845, loss:1.0067519468069077, acc:0.6923611111111111


 18%|█▊        | 1900/10845 [15:22<1:05:04,  2.29it/s, acc=0.693, epoch=8, loss=1]   

epoch:8, idx:1899/10845, loss:1.004206093392874, acc:0.6934210526315789


 18%|█▊        | 2000/10845 [16:18<1:47:50,  1.37it/s, acc=0.692, epoch=8, loss=1.01]

epoch:8, idx:1999/10845, loss:1.0094646136760712, acc:0.692125


 19%|█▉        | 2100/10845 [17:13<1:20:43,  1.81it/s, acc=0.693, epoch=8, loss=1.01]

epoch:8, idx:2099/10845, loss:1.0118500642833255, acc:0.6926190476190476


 20%|██        | 2200/10845 [18:09<1:20:59,  1.78it/s, acc=0.694, epoch=8, loss=1.01]

epoch:8, idx:2199/10845, loss:1.0101724242622203, acc:0.6938636363636363


 21%|██        | 2300/10845 [18:59<1:06:35,  2.14it/s, acc=0.694, epoch=8, loss=1.01]

epoch:8, idx:2299/10845, loss:1.0076986305350843, acc:0.6935869565217392


 22%|██▏       | 2400/10845 [19:48<58:14,  2.42it/s, acc=0.694, epoch=8, loss=1]     

epoch:8, idx:2399/10845, loss:1.0049747458597025, acc:0.6944791666666666


 23%|██▎       | 2500/10845 [20:43<1:15:00,  1.85it/s, acc=0.695, epoch=8, loss=1]   

epoch:8, idx:2499/10845, loss:1.0032650047779084, acc:0.695


 24%|██▍       | 2600/10845 [21:33<1:15:09,  1.83it/s, acc=0.695, epoch=8, loss=1.01]

epoch:8, idx:2599/10845, loss:1.0066923501629095, acc:0.6953846153846154


 25%|██▍       | 2700/10845 [22:30<56:03,  2.42it/s, acc=0.697, epoch=8, loss=1]     

epoch:8, idx:2699/10845, loss:1.0021894836867298, acc:0.697037037037037


 26%|██▌       | 2800/10845 [23:24<1:27:05,  1.54it/s, acc=0.697, epoch=8, loss=1]

epoch:8, idx:2799/10845, loss:1.0020931352674962, acc:0.6969642857142857


 27%|██▋       | 2900/10845 [24:13<1:19:51,  1.66it/s, acc=0.697, epoch=8, loss=1]

epoch:8, idx:2899/10845, loss:1.0019342093221073, acc:0.696551724137931


 28%|██▊       | 3000/10845 [25:09<1:17:34,  1.69it/s, acc=0.696, epoch=8, loss=0.999]

epoch:8, idx:2999/10845, loss:0.9993377522627512, acc:0.6964166666666667


 29%|██▊       | 3100/10845 [26:02<1:25:52,  1.50it/s, acc=0.696, epoch=8, loss=0.997]

epoch:8, idx:3099/10845, loss:0.9969481841018123, acc:0.6958870967741936


 30%|██▉       | 3200/10845 [26:57<58:13,  2.19it/s, acc=0.697, epoch=8, loss=0.992]  

epoch:8, idx:3199/10845, loss:0.9924990475736558, acc:0.696875


 30%|███       | 3300/10845 [27:50<57:40,  2.18it/s, acc=0.697, epoch=8, loss=0.995]  

epoch:8, idx:3299/10845, loss:0.9949773988037398, acc:0.6965151515151515


 31%|███▏      | 3401/10845 [28:43<51:55,  2.39it/s, acc=0.697, epoch=8, loss=0.995]  

epoch:8, idx:3399/10845, loss:0.9948176805412068, acc:0.6970588235294117


 32%|███▏      | 3500/10845 [29:35<1:04:17,  1.90it/s, acc=0.696, epoch=8, loss=0.996]

epoch:8, idx:3499/10845, loss:0.9962580649001258, acc:0.6959285714285715


 33%|███▎      | 3600/10845 [30:31<1:06:21,  1.82it/s, acc=0.696, epoch=8, loss=0.995]

epoch:8, idx:3599/10845, loss:0.9954781023164591, acc:0.6955555555555556


 34%|███▍      | 3700/10845 [31:27<1:07:50,  1.76it/s, acc=0.696, epoch=8, loss=0.995]

epoch:8, idx:3699/10845, loss:0.9951785496602187, acc:0.6958783783783784


 35%|███▌      | 3800/10845 [32:23<1:00:18,  1.95it/s, acc=0.696, epoch=8, loss=0.997]

epoch:8, idx:3799/10845, loss:0.9974885700094073, acc:0.6960526315789474


 36%|███▌      | 3900/10845 [33:17<1:04:09,  1.80it/s, acc=0.696, epoch=8, loss=0.999]

epoch:8, idx:3899/10845, loss:0.9988477879609817, acc:0.6957692307692308


 37%|███▋      | 4000/10845 [34:11<56:10,  2.03it/s, acc=0.695, epoch=8, loss=1]      

epoch:8, idx:3999/10845, loss:1.002668216109276, acc:0.6950625


 38%|███▊      | 4100/10845 [35:06<1:02:56,  1.79it/s, acc=0.695, epoch=8, loss=1]

epoch:8, idx:4099/10845, loss:1.00418397644671, acc:0.6947560975609756


 39%|███▊      | 4200/10845 [35:59<42:18,  2.62it/s, acc=0.695, epoch=8, loss=1]     

epoch:8, idx:4199/10845, loss:1.0037774962754478, acc:0.6946428571428571


 40%|███▉      | 4300/10845 [36:57<43:36,  2.50it/s, acc=0.694, epoch=8, loss=1.01]  

epoch:8, idx:4299/10845, loss:1.0059163393946582, acc:0.6938372093023256


 41%|████      | 4400/10845 [37:49<53:06,  2.02it/s, acc=0.693, epoch=8, loss=1.01]  

epoch:8, idx:4399/10845, loss:1.0072285025634549, acc:0.6931818181818182


 41%|████▏     | 4500/10845 [38:44<1:16:07,  1.39it/s, acc=0.693, epoch=8, loss=1.01]

epoch:8, idx:4499/10845, loss:1.008104998257425, acc:0.6931666666666667


 42%|████▏     | 4600/10845 [39:41<58:48,  1.77it/s, acc=0.694, epoch=8, loss=1.01]  

epoch:8, idx:4599/10845, loss:1.0076730388791664, acc:0.6935326086956521


 43%|████▎     | 4700/10845 [40:37<1:02:57,  1.63it/s, acc=0.693, epoch=8, loss=1.01]

epoch:8, idx:4699/10845, loss:1.010915731280408, acc:0.6927659574468085


 44%|████▍     | 4800/10845 [41:26<42:39,  2.36it/s, acc=0.692, epoch=8, loss=1.01]  

epoch:8, idx:4799/10845, loss:1.0113099496314923, acc:0.6923958333333333


 45%|████▌     | 4900/10845 [42:20<1:03:26,  1.56it/s, acc=0.692, epoch=8, loss=1.01]

epoch:8, idx:4899/10845, loss:1.011641745688964, acc:0.6920408163265306


 46%|████▌     | 5000/10845 [43:19<48:23,  2.01it/s, acc=0.692, epoch=8, loss=1.01]  

epoch:8, idx:4999/10845, loss:1.014090307343006, acc:0.69185


 47%|████▋     | 5100/10845 [44:15<49:38,  1.93it/s, acc=0.691, epoch=8, loss=1.02]  

epoch:8, idx:5099/10845, loss:1.0159082350310158, acc:0.6913235294117647


 48%|████▊     | 5200/10845 [45:08<51:20,  1.83it/s, acc=0.691, epoch=8, loss=1.02]  

epoch:8, idx:5199/10845, loss:1.0155624084747754, acc:0.69125


 49%|████▉     | 5300/10845 [46:01<44:35,  2.07it/s, acc=0.691, epoch=8, loss=1.02]  

epoch:8, idx:5299/10845, loss:1.015962237974383, acc:0.6913207547169812


 50%|████▉     | 5400/10845 [46:59<53:24,  1.70it/s, acc=0.69, epoch=8, loss=1.02]   

epoch:8, idx:5399/10845, loss:1.0172200509905815, acc:0.690462962962963


 51%|█████     | 5500/10845 [47:50<41:52,  2.13it/s, acc=0.69, epoch=8, loss=1.02]  

epoch:8, idx:5499/10845, loss:1.0170396832552824, acc:0.6902272727272727


 52%|█████▏    | 5600/10845 [48:43<46:08,  1.89it/s, acc=0.69, epoch=8, loss=1.02]   

epoch:8, idx:5599/10845, loss:1.0165422696088042, acc:0.6903125


 53%|█████▎    | 5700/10845 [49:40<52:37,  1.63it/s, acc=0.69, epoch=8, loss=1.02]  

epoch:8, idx:5699/10845, loss:1.0171165421866535, acc:0.690219298245614


 53%|█████▎    | 5800/10845 [50:34<38:02,  2.21it/s, acc=0.69, epoch=8, loss=1.02]  

epoch:8, idx:5799/10845, loss:1.0206481774408243, acc:0.6901293103448276


 54%|█████▍    | 5900/10845 [51:27<51:13,  1.61it/s, acc=0.689, epoch=8, loss=1.02] 

epoch:8, idx:5899/10845, loss:1.0230747083789211, acc:0.6891525423728814


 55%|█████▌    | 6000/10845 [52:19<49:13,  1.64it/s, acc=0.689, epoch=8, loss=1.02]  

epoch:8, idx:5999/10845, loss:1.022835542112589, acc:0.6894166666666667


 56%|█████▌    | 6100/10845 [53:13<38:11,  2.07it/s, acc=0.69, epoch=8, loss=1.02] 

epoch:8, idx:6099/10845, loss:1.0203894669501508, acc:0.6898360655737705


 57%|█████▋    | 6200/10845 [54:11<52:44,  1.47it/s, acc=0.689, epoch=8, loss=1.02] 

epoch:8, idx:6199/10845, loss:1.0221464634903015, acc:0.6893145161290323


 58%|█████▊    | 6300/10845 [55:06<33:19,  2.27it/s, acc=0.689, epoch=8, loss=1.02]  

epoch:8, idx:6299/10845, loss:1.0244176078221154, acc:0.6886507936507936


 59%|█████▉    | 6400/10845 [55:59<37:30,  1.98it/s, acc=0.688, epoch=8, loss=1.02]

epoch:8, idx:6399/10845, loss:1.0234743439313023, acc:0.6884375


 60%|█████▉    | 6500/10845 [56:53<42:58,  1.69it/s, acc=0.689, epoch=8, loss=1.02]  

epoch:8, idx:6499/10845, loss:1.0232406936975627, acc:0.6886538461538462


 61%|██████    | 6600/10845 [57:49<40:33,  1.74it/s, acc=0.689, epoch=8, loss=1.02]  

epoch:8, idx:6599/10845, loss:1.0227087949622762, acc:0.6886363636363636


 62%|██████▏   | 6700/10845 [58:43<40:32,  1.70it/s, acc=0.689, epoch=8, loss=1.02]

epoch:8, idx:6699/10845, loss:1.022768624940915, acc:0.6887313432835821


 63%|██████▎   | 6800/10845 [59:37<37:21,  1.80it/s, acc=0.688, epoch=8, loss=1.02]

epoch:8, idx:6799/10845, loss:1.0248922127134659, acc:0.6883088235294118


 64%|██████▎   | 6900/10845 [1:00:29<33:01,  1.99it/s, acc=0.688, epoch=8, loss=1.03]

epoch:8, idx:6899/10845, loss:1.0262020230811575, acc:0.6881884057971015


 65%|██████▍   | 7000/10845 [1:01:20<24:49,  2.58it/s, acc=0.688, epoch=8, loss=1.03]

epoch:8, idx:6999/10845, loss:1.0267171795623644, acc:0.6879642857142857


 65%|██████▌   | 7100/10845 [1:02:16<41:56,  1.49it/s, acc=0.687, epoch=8, loss=1.03]

epoch:8, idx:7099/10845, loss:1.0270124846025253, acc:0.6871126760563381


 66%|██████▋   | 7200/10845 [1:03:09<32:55,  1.85it/s, acc=0.687, epoch=8, loss=1.03]

epoch:8, idx:7199/10845, loss:1.0272317866318756, acc:0.6870138888888889


 67%|██████▋   | 7300/10845 [1:04:03<39:43,  1.49it/s, acc=0.687, epoch=8, loss=1.03]

epoch:8, idx:7299/10845, loss:1.02786566135818, acc:0.6867465753424657


 68%|██████▊   | 7400/10845 [1:05:00<29:28,  1.95it/s, acc=0.687, epoch=8, loss=1.03]

epoch:8, idx:7399/10845, loss:1.0275492003157332, acc:0.6869256756756756


 69%|██████▉   | 7500/10845 [1:05:53<36:28,  1.53it/s, acc=0.687, epoch=8, loss=1.03]

epoch:8, idx:7499/10845, loss:1.0271355037848156, acc:0.687


 70%|███████   | 7600/10845 [1:06:47<32:42,  1.65it/s, acc=0.687, epoch=8, loss=1.03]

epoch:8, idx:7599/10845, loss:1.028261382619017, acc:0.6868092105263158


 71%|███████   | 7700/10845 [1:07:43<28:08,  1.86it/s, acc=0.687, epoch=8, loss=1.03]

epoch:8, idx:7699/10845, loss:1.0300419127786313, acc:0.6865584415584416


 72%|███████▏  | 7800/10845 [1:08:37<24:28,  2.07it/s, acc=0.687, epoch=8, loss=1.03]

epoch:8, idx:7799/10845, loss:1.030323943342918, acc:0.6865064102564102


 73%|███████▎  | 7900/10845 [1:09:34<23:31,  2.09it/s, acc=0.686, epoch=8, loss=1.03]

epoch:8, idx:7899/10845, loss:1.0334045586782166, acc:0.6858860759493671


 74%|███████▍  | 8000/10845 [1:10:32<31:06,  1.52it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:7999/10845, loss:1.0342917010113597, acc:0.68515625


 75%|███████▍  | 8100/10845 [1:11:22<17:42,  2.58it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:8099/10845, loss:1.0338232341813451, acc:0.6851543209876543


 76%|███████▌  | 8200/10845 [1:12:15<19:17,  2.29it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:8199/10845, loss:1.0326964477286107, acc:0.6851829268292683


 77%|███████▋  | 8300/10845 [1:13:13<22:06,  1.92it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:8299/10845, loss:1.0334272156566022, acc:0.6852409638554217


 77%|███████▋  | 8400/10845 [1:14:08<20:57,  1.94it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:8399/10845, loss:1.0331706288811706, acc:0.685029761904762


 78%|███████▊  | 8500/10845 [1:14:59<17:59,  2.17it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:8499/10845, loss:1.0341185648160822, acc:0.6849117647058823


 79%|███████▉  | 8600/10845 [1:15:54<23:46,  1.57it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:8599/10845, loss:1.0337578046807023, acc:0.6849127906976744


 80%|████████  | 8700/10845 [1:16:47<20:17,  1.76it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:8699/10845, loss:1.0344928637386739, acc:0.6845689655172413


 81%|████████  | 8800/10845 [1:17:37<18:59,  1.79it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:8799/10845, loss:1.0340037006884812, acc:0.6849715909090909


 82%|████████▏ | 8900/10845 [1:18:32<17:03,  1.90it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:8899/10845, loss:1.0331968435477674, acc:0.6852247191011236


 83%|████████▎ | 9000/10845 [1:19:27<14:57,  2.06it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:8999/10845, loss:1.034126863790883, acc:0.685


 84%|████████▍ | 9100/10845 [1:20:22<13:04,  2.22it/s, acc=0.685, epoch=8, loss=1.04]

epoch:8, idx:9099/10845, loss:1.0358184309844132, acc:0.6846153846153846


 85%|████████▍ | 9200/10845 [1:21:14<19:11,  1.43it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:9199/10845, loss:1.0360975588598977, acc:0.6843478260869565


 86%|████████▌ | 9300/10845 [1:22:04<09:35,  2.68it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:9299/10845, loss:1.0355681708102584, acc:0.6843010752688172


 87%|████████▋ | 9401/10845 [1:22:58<11:59,  2.01it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:9399/10845, loss:1.0344592652041862, acc:0.6845478723404256


 88%|████████▊ | 9500/10845 [1:23:54<11:28,  1.95it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:9499/10845, loss:1.0356221782972939, acc:0.6841315789473684


 89%|████████▊ | 9600/10845 [1:24:55<14:51,  1.40it/s, acc=0.685, epoch=8, loss=1.03]

epoch:8, idx:9599/10845, loss:1.0346827670683463, acc:0.6845833333333333


 89%|████████▉ | 9700/10845 [1:25:49<13:05,  1.46it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:9699/10845, loss:1.035840634033852, acc:0.6843041237113402


 90%|█████████ | 9800/10845 [1:26:42<10:52,  1.60it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:9799/10845, loss:1.0370603789602006, acc:0.6841581632653061


 91%|█████████▏| 9900/10845 [1:27:37<08:04,  1.95it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:9899/10845, loss:1.036976359191567, acc:0.6842424242424242


 92%|█████████▏| 10000/10845 [1:28:34<05:28,  2.57it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:9999/10845, loss:1.036812748247385, acc:0.68405


 93%|█████████▎| 10100/10845 [1:29:25<04:47,  2.59it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:10099/10845, loss:1.0378094047189939, acc:0.6839356435643564


 94%|█████████▍| 10200/10845 [1:30:18<05:53,  1.82it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:10199/10845, loss:1.0382411931016866, acc:0.683921568627451


 95%|█████████▍| 10300/10845 [1:31:10<04:36,  1.97it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:10299/10845, loss:1.0380458486138038, acc:0.6838592233009708


 96%|█████████▌| 10400/10845 [1:32:07<03:39,  2.02it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:10399/10845, loss:1.0372356793055169, acc:0.68375


 97%|█████████▋| 10500/10845 [1:32:59<02:34,  2.23it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:10499/10845, loss:1.0363982274759382, acc:0.6839761904761905


 98%|█████████▊| 10600/10845 [1:33:53<01:50,  2.21it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:10599/10845, loss:1.0367896781552512, acc:0.6842216981132075


 99%|█████████▊| 10700/10845 [1:34:48<01:08,  2.10it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:10699/10845, loss:1.0366590658526553, acc:0.6841822429906542


100%|█████████▉| 10800/10845 [1:35:40<00:29,  1.55it/s, acc=0.684, epoch=8, loss=1.04]

epoch:8, idx:10799/10845, loss:1.0364564220441712, acc:0.6844212962962963


100%|██████████| 10845/10845 [1:36:03<00:00,  2.43it/s, acc=0.685, epoch=8, loss=1.04]


epoch:8, idx:0/1275, loss:0.8734642267227173, acc:0.5
epoch:8, idx:100/1275, loss:1.241996136632296, acc:0.6188118811881188
epoch:8, idx:200/1275, loss:1.1855895744627387, acc:0.6218905472636815
epoch:8, idx:300/1275, loss:1.1805150712843353, acc:0.6287375415282392
epoch:8, idx:400/1275, loss:1.161505188876554, acc:0.6346633416458853
epoch:8, idx:500/1275, loss:1.1584855670224645, acc:0.6347305389221557
epoch:8, idx:600/1275, loss:1.1688799138077086, acc:0.629783693843594
epoch:8, idx:700/1275, loss:1.1710727611043823, acc:0.6308844507845934
epoch:8, idx:800/1275, loss:1.183421605981691, acc:0.6279650436953808
epoch:8, idx:900/1275, loss:1.1694410046382697, acc:0.6323529411764706
epoch:8, idx:1000/1275, loss:1.1692018659560235, acc:0.6326173826173827
epoch:8, idx:1100/1275, loss:1.163110521790334, acc:0.6346503178928247
epoch:8, idx:1200/1275, loss:1.1635973574120635, acc:0.634263114071607


  1%|          | 100/10845 [00:50<1:33:54,  1.91it/s, acc=0.757, epoch=9, loss=0.772]

epoch:9, idx:99/10845, loss:0.7723123288154602, acc:0.7575


  2%|▏         | 200/10845 [01:44<1:20:12,  2.21it/s, acc=0.723, epoch=9, loss=0.908]

epoch:9, idx:199/10845, loss:0.9080686390399932, acc:0.7225


  3%|▎         | 300/10845 [02:38<1:47:44,  1.63it/s, acc=0.718, epoch=9, loss=0.927]

epoch:9, idx:299/10845, loss:0.9272136318683625, acc:0.7175


  4%|▎         | 400/10845 [03:34<1:22:15,  2.12it/s, acc=0.701, epoch=9, loss=0.955]

epoch:9, idx:399/10845, loss:0.9549133287370205, acc:0.70125


  5%|▍         | 500/10845 [04:29<1:13:37,  2.34it/s, acc=0.7, epoch=9, loss=0.979]  

epoch:9, idx:499/10845, loss:0.9791836779117584, acc:0.6995


  6%|▌         | 600/10845 [05:21<1:20:02,  2.13it/s, acc=0.704, epoch=9, loss=0.957]

epoch:9, idx:599/10845, loss:0.9573237832883994, acc:0.70375


  6%|▋         | 700/10845 [06:15<1:15:40,  2.23it/s, acc=0.704, epoch=9, loss=0.952]

epoch:9, idx:699/10845, loss:0.9519716215559414, acc:0.7039285714285715


  7%|▋         | 800/10845 [07:09<1:21:11,  2.06it/s, acc=0.702, epoch=9, loss=0.963]

epoch:9, idx:799/10845, loss:0.9630140044167638, acc:0.701875


  8%|▊         | 900/10845 [08:03<1:24:13,  1.97it/s, acc=0.699, epoch=9, loss=0.968]

epoch:9, idx:899/10845, loss:0.9677088173561627, acc:0.6991666666666667


  9%|▉         | 1000/10845 [08:56<1:24:28,  1.94it/s, acc=0.693, epoch=9, loss=0.987]

epoch:9, idx:999/10845, loss:0.9873217699229717, acc:0.69325


 10%|█         | 1100/10845 [09:52<1:29:19,  1.82it/s, acc=0.696, epoch=9, loss=0.978]

epoch:9, idx:1099/10845, loss:0.9778418001532555, acc:0.6963636363636364


 11%|█         | 1200/10845 [10:47<1:31:10,  1.76it/s, acc=0.694, epoch=9, loss=0.991]

epoch:9, idx:1199/10845, loss:0.991090598081549, acc:0.6939583333333333


 12%|█▏        | 1300/10845 [11:43<1:45:47,  1.50it/s, acc=0.693, epoch=9, loss=0.991]

epoch:9, idx:1299/10845, loss:0.9911893574320353, acc:0.6934615384615385


 13%|█▎        | 1401/10845 [12:35<1:18:36,  2.00it/s, acc=0.694, epoch=9, loss=0.999]

epoch:9, idx:1399/10845, loss:0.9991938299153532, acc:0.69375


 14%|█▍        | 1500/10845 [13:29<55:32,  2.80it/s, acc=0.694, epoch=9, loss=0.997]  

epoch:9, idx:1499/10845, loss:0.9973957522114117, acc:0.694


 15%|█▍        | 1600/10845 [14:23<1:15:07,  2.05it/s, acc=0.695, epoch=9, loss=0.996]

epoch:9, idx:1599/10845, loss:0.9958406442590058, acc:0.69515625


 16%|█▌        | 1700/10845 [15:15<1:33:27,  1.63it/s, acc=0.694, epoch=9, loss=1]    

epoch:9, idx:1699/10845, loss:0.9998723168057554, acc:0.6941176470588235


 17%|█▋        | 1800/10845 [16:09<58:53,  2.56it/s, acc=0.696, epoch=9, loss=1]      

epoch:9, idx:1799/10845, loss:1.0000041688978671, acc:0.6956944444444444


 18%|█▊        | 1900/10845 [17:04<1:08:01,  2.19it/s, acc=0.694, epoch=9, loss=1.01] 

epoch:9, idx:1899/10845, loss:1.007120093844439, acc:0.6939473684210526


 18%|█▊        | 2000/10845 [17:57<1:24:18,  1.75it/s, acc=0.693, epoch=9, loss=1.01]

epoch:9, idx:1999/10845, loss:1.011881336852908, acc:0.693375


 19%|█▉        | 2100/10845 [18:53<1:37:10,  1.50it/s, acc=0.693, epoch=9, loss=1.01]

epoch:9, idx:2099/10845, loss:1.0111177159349123, acc:0.6929761904761905


 20%|██        | 2200/10845 [19:50<1:10:23,  2.05it/s, acc=0.692, epoch=9, loss=1.01]

epoch:9, idx:2199/10845, loss:1.0141259195452386, acc:0.6921590909090909


 21%|██        | 2300/10845 [20:40<54:36,  2.61it/s, acc=0.692, epoch=9, loss=1.01]  

epoch:9, idx:2299/10845, loss:1.0140863332670669, acc:0.6923913043478261


 22%|██▏       | 2400/10845 [21:31<1:44:51,  1.34it/s, acc=0.692, epoch=9, loss=1.02]

epoch:9, idx:2399/10845, loss:1.015612622834742, acc:0.6922916666666666


 23%|██▎       | 2500/10845 [22:23<1:24:52,  1.64it/s, acc=0.691, epoch=9, loss=1.02]

epoch:9, idx:2499/10845, loss:1.015724132335186, acc:0.6913


 24%|██▍       | 2600/10845 [23:19<1:20:20,  1.71it/s, acc=0.69, epoch=9, loss=1.02] 

epoch:9, idx:2599/10845, loss:1.0170084373194437, acc:0.6900961538461539


 25%|██▍       | 2700/10845 [24:12<58:26,  2.32it/s, acc=0.691, epoch=9, loss=1.01]  

epoch:9, idx:2699/10845, loss:1.014611482763732, acc:0.6909259259259259


 26%|██▌       | 2800/10845 [25:04<1:15:02,  1.79it/s, acc=0.69, epoch=9, loss=1.02] 

epoch:9, idx:2799/10845, loss:1.0164711923365082, acc:0.6901785714285714


 27%|██▋       | 2900/10845 [26:00<1:04:21,  2.06it/s, acc=0.691, epoch=9, loss=1.02]

epoch:9, idx:2899/10845, loss:1.0158371992049546, acc:0.690948275862069


 28%|██▊       | 3000/10845 [26:54<54:57,  2.38it/s, acc=0.692, epoch=9, loss=1.01]  

epoch:9, idx:2999/10845, loss:1.0134966804484526, acc:0.6919166666666666


 29%|██▊       | 3100/10845 [27:45<1:19:10,  1.63it/s, acc=0.692, epoch=9, loss=1.01]

epoch:9, idx:3099/10845, loss:1.0142614029588237, acc:0.6916935483870967


 30%|██▉       | 3200/10845 [28:40<54:08,  2.35it/s, acc=0.691, epoch=9, loss=1.02]  

epoch:9, idx:3199/10845, loss:1.0176019897032529, acc:0.690703125


 30%|███       | 3300/10845 [29:30<1:02:28,  2.01it/s, acc=0.691, epoch=9, loss=1.02]

epoch:9, idx:3299/10845, loss:1.0159832322507194, acc:0.6907575757575758


 31%|███▏      | 3400/10845 [30:23<1:12:38,  1.71it/s, acc=0.691, epoch=9, loss=1.01]

epoch:9, idx:3399/10845, loss:1.0149088269735085, acc:0.6913970588235294


 32%|███▏      | 3500/10845 [31:16<1:21:00,  1.51it/s, acc=0.692, epoch=9, loss=1.01]

epoch:9, idx:3499/10845, loss:1.014792039692402, acc:0.6916428571428571


 33%|███▎      | 3600/10845 [32:10<1:04:47,  1.86it/s, acc=0.693, epoch=9, loss=1.01]

epoch:9, idx:3599/10845, loss:1.011575709639324, acc:0.6926388888888889


 34%|███▍      | 3700/10845 [33:00<52:19,  2.28it/s, acc=0.693, epoch=9, loss=1.01]  

epoch:9, idx:3699/10845, loss:1.010652712478831, acc:0.6933783783783783


 35%|███▌      | 3800/10845 [33:53<53:21,  2.20it/s, acc=0.694, epoch=9, loss=1.01]  

epoch:9, idx:3799/10845, loss:1.008614399817429, acc:0.6941447368421053


 36%|███▌      | 3900/10845 [34:51<1:34:31,  1.22it/s, acc=0.695, epoch=9, loss=1.01]

epoch:9, idx:3899/10845, loss:1.0053496839679203, acc:0.6948076923076923


 37%|███▋      | 4000/10845 [35:46<54:54,  2.08it/s, acc=0.694, epoch=9, loss=1.01]  

epoch:9, idx:3999/10845, loss:1.0060965618118645, acc:0.694125


 38%|███▊      | 4100/10845 [36:42<52:11,  2.15it/s, acc=0.695, epoch=9, loss=1]     

epoch:9, idx:4099/10845, loss:1.0030922546837389, acc:0.6953048780487805


 39%|███▊      | 4200/10845 [37:37<44:53,  2.47it/s, acc=0.695, epoch=9, loss=1]  

epoch:9, idx:4199/10845, loss:1.0046655399529707, acc:0.6947619047619048


 40%|███▉      | 4300/10845 [38:33<1:05:57,  1.65it/s, acc=0.694, epoch=9, loss=1]   

epoch:9, idx:4299/10845, loss:1.0045730567463609, acc:0.6944186046511628


 41%|████      | 4400/10845 [39:22<54:31,  1.97it/s, acc=0.694, epoch=9, loss=1]  

epoch:9, idx:4399/10845, loss:1.0047892187603495, acc:0.6941477272727272


 41%|████▏     | 4500/10845 [40:14<55:03,  1.92it/s, acc=0.695, epoch=9, loss=1]  

epoch:9, idx:4499/10845, loss:1.004597123656008, acc:0.6948333333333333


 42%|████▏     | 4600/10845 [41:10<1:14:20,  1.40it/s, acc=0.695, epoch=9, loss=1]   

epoch:9, idx:4599/10845, loss:1.00488429521089, acc:0.6947282608695652


 43%|████▎     | 4700/10845 [42:03<56:20,  1.82it/s, acc=0.693, epoch=9, loss=1.01]  

epoch:9, idx:4699/10845, loss:1.0083545416530142, acc:0.6932446808510638


 44%|████▍     | 4800/10845 [42:55<1:04:44,  1.56it/s, acc=0.693, epoch=9, loss=1.01]

epoch:9, idx:4799/10845, loss:1.008761584268262, acc:0.6933854166666666


 45%|████▌     | 4900/10845 [43:45<49:09,  2.02it/s, acc=0.693, epoch=9, loss=1.01]  

epoch:9, idx:4899/10845, loss:1.0088328762991088, acc:0.6930102040816326


 46%|████▌     | 5000/10845 [44:38<51:15,  1.90it/s, acc=0.693, epoch=9, loss=1.01]  

epoch:9, idx:4999/10845, loss:1.0077451064884662, acc:0.6928


 47%|████▋     | 5100/10845 [45:31<40:16,  2.38it/s, acc=0.693, epoch=9, loss=1.01]  

epoch:9, idx:5099/10845, loss:1.008459940488432, acc:0.6925


 48%|████▊     | 5200/10845 [46:24<47:34,  1.98it/s, acc=0.692, epoch=9, loss=1.01]  

epoch:9, idx:5199/10845, loss:1.00821063777002, acc:0.6919711538461538


 49%|████▉     | 5300/10845 [47:21<52:58,  1.74it/s, acc=0.692, epoch=9, loss=1.01]  

epoch:9, idx:5299/10845, loss:1.00751701372412, acc:0.6918396226415094


 50%|████▉     | 5400/10845 [48:14<42:48,  2.12it/s, acc=0.692, epoch=9, loss=1.01]  

epoch:9, idx:5399/10845, loss:1.0084760777541886, acc:0.6921296296296297


 51%|█████     | 5500/10845 [49:10<56:36,  1.57it/s, acc=0.692, epoch=9, loss=1.01]  

epoch:9, idx:5499/10845, loss:1.008713403718038, acc:0.6922727272727273


 52%|█████▏    | 5600/10845 [50:05<49:22,  1.77it/s, acc=0.692, epoch=9, loss=1.01]  

epoch:9, idx:5599/10845, loss:1.010783260544496, acc:0.6920535714285714


 53%|█████▎    | 5700/10845 [50:55<49:03,  1.75it/s, acc=0.692, epoch=9, loss=1.01]  

epoch:9, idx:5699/10845, loss:1.010624772399141, acc:0.6922807017543859


 53%|█████▎    | 5800/10845 [51:52<48:42,  1.73it/s, acc=0.693, epoch=9, loss=1.01]  

epoch:9, idx:5799/10845, loss:1.0096354627968935, acc:0.6925862068965517


 54%|█████▍    | 5900/10845 [52:43<37:11,  2.22it/s, acc=0.692, epoch=9, loss=1.01]

epoch:9, idx:5899/10845, loss:1.009658783941956, acc:0.6923305084745762


 55%|█████▌    | 6000/10845 [53:34<46:23,  1.74it/s, acc=0.692, epoch=9, loss=1.01]  

epoch:9, idx:5999/10845, loss:1.0118243126422166, acc:0.69175


 56%|█████▌    | 6100/10845 [54:30<45:53,  1.72it/s, acc=0.691, epoch=9, loss=1.01]  

epoch:9, idx:6099/10845, loss:1.0133561421368942, acc:0.6914754098360656


 57%|█████▋    | 6200/10845 [55:24<30:41,  2.52it/s, acc=0.691, epoch=9, loss=1.01]  

epoch:9, idx:6199/10845, loss:1.0136889707321122, acc:0.6914112903225806


 58%|█████▊    | 6300/10845 [56:20<37:10,  2.04it/s, acc=0.691, epoch=9, loss=1.01]  

epoch:9, idx:6299/10845, loss:1.0141289541409129, acc:0.6913888888888889


 59%|█████▉    | 6400/10845 [57:17<41:52,  1.77it/s, acc=0.691, epoch=9, loss=1.01]  

epoch:9, idx:6399/10845, loss:1.013896204731427, acc:0.6910546875


 60%|█████▉    | 6500/10845 [58:11<34:39,  2.09it/s, acc=0.691, epoch=9, loss=1.01]  

epoch:9, idx:6499/10845, loss:1.0135997107533308, acc:0.6909615384615385


 61%|██████    | 6600/10845 [59:05<34:22,  2.06it/s, acc=0.691, epoch=9, loss=1.01]

epoch:9, idx:6599/10845, loss:1.0130086733491133, acc:0.6912878787878788


 62%|██████▏   | 6700/10845 [59:59<38:26,  1.80it/s, acc=0.691, epoch=9, loss=1.01]

epoch:9, idx:6699/10845, loss:1.013223588248687, acc:0.691231343283582


 63%|██████▎   | 6800/10845 [1:00:54<52:32,  1.28it/s, acc=0.691, epoch=9, loss=1.01]

epoch:9, idx:6799/10845, loss:1.0143796737316777, acc:0.6909191176470588


 64%|██████▎   | 6900/10845 [1:01:43<37:13,  1.77it/s, acc=0.691, epoch=9, loss=1.02]

epoch:9, idx:6899/10845, loss:1.015603235018426, acc:0.6907246376811594


 65%|██████▍   | 7000/10845 [1:02:39<37:08,  1.73it/s, acc=0.69, epoch=9, loss=1.02] 

epoch:9, idx:6999/10845, loss:1.016623811670712, acc:0.6901428571428572


 65%|██████▌   | 7100/10845 [1:03:29<33:31,  1.86it/s, acc=0.69, epoch=9, loss=1.02] 

epoch:9, idx:7099/10845, loss:1.0153232223383137, acc:0.690387323943662


 66%|██████▋   | 7200/10845 [1:04:21<41:32,  1.46it/s, acc=0.69, epoch=9, loss=1.02]

epoch:9, idx:7199/10845, loss:1.0150872764901981, acc:0.6902430555555555


 67%|██████▋   | 7300/10845 [1:05:14<36:55,  1.60it/s, acc=0.69, epoch=9, loss=1.02]

epoch:9, idx:7299/10845, loss:1.0154620846656903, acc:0.6898630136986301


 68%|██████▊   | 7400/10845 [1:06:11<24:05,  2.38it/s, acc=0.69, epoch=9, loss=1.01]

epoch:9, idx:7399/10845, loss:1.0145835115297421, acc:0.6900337837837838


 69%|██████▉   | 7500/10845 [1:07:04<25:05,  2.22it/s, acc=0.69, epoch=9, loss=1.02]

epoch:9, idx:7499/10845, loss:1.0162257532835006, acc:0.6897


 70%|███████   | 7600/10845 [1:08:00<30:34,  1.77it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:7599/10845, loss:1.015976578867749, acc:0.689375


 71%|███████   | 7700/10845 [1:08:57<28:02,  1.87it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:7699/10845, loss:1.015677912181074, acc:0.6893831168831169


 72%|███████▏  | 7800/10845 [1:09:52<24:51,  2.04it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:7799/10845, loss:1.0159367615290178, acc:0.6892628205128205


 73%|███████▎  | 7900/10845 [1:10:47<29:05,  1.69it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:7899/10845, loss:1.0162410747174975, acc:0.6892088607594937


 74%|███████▍  | 8000/10845 [1:11:39<22:26,  2.11it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:7999/10845, loss:1.016823573127389, acc:0.6891875


 75%|███████▍  | 8100/10845 [1:12:33<21:29,  2.13it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:8099/10845, loss:1.0176857647115802, acc:0.6887345679012346


 76%|███████▌  | 8200/10845 [1:13:24<19:37,  2.25it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:8199/10845, loss:1.0171019716960628, acc:0.6888109756097561


 77%|███████▋  | 8300/10845 [1:14:20<27:11,  1.56it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:8299/10845, loss:1.0173547695415566, acc:0.6889759036144578


 77%|███████▋  | 8400/10845 [1:15:13<22:03,  1.85it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:8399/10845, loss:1.0181729013792107, acc:0.6889285714285714


 78%|███████▊  | 8500/10845 [1:16:08<30:42,  1.27it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:8499/10845, loss:1.0168693931803985, acc:0.6891764705882353


 79%|███████▉  | 8600/10845 [1:17:01<19:50,  1.89it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:8599/10845, loss:1.0171985970680104, acc:0.6890116279069768


 80%|████████  | 8700/10845 [1:17:56<22:16,  1.61it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:8699/10845, loss:1.0182685870548775, acc:0.6890229885057472


 81%|████████  | 8800/10845 [1:18:50<18:56,  1.80it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:8799/10845, loss:1.0175158235904844, acc:0.6893465909090909


 82%|████████▏ | 8900/10845 [1:19:44<27:26,  1.18it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:8899/10845, loss:1.0174437175975757, acc:0.6893258426966292


 83%|████████▎ | 9000/10845 [1:20:40<19:56,  1.54it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:8999/10845, loss:1.0175855187111431, acc:0.6893888888888889


 84%|████████▍ | 9100/10845 [1:21:31<10:31,  2.76it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:9099/10845, loss:1.017340668433315, acc:0.6892857142857143


 85%|████████▍ | 9200/10845 [1:22:26<15:29,  1.77it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:9199/10845, loss:1.016566047091847, acc:0.6893478260869565


 86%|████████▌ | 9301/10845 [1:23:17<09:06,  2.83it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:9299/10845, loss:1.0174396914243697, acc:0.6893010752688172


 87%|████████▋ | 9400/10845 [1:24:10<14:29,  1.66it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:9399/10845, loss:1.017465017025775, acc:0.6892287234042553


 88%|████████▊ | 9500/10845 [1:25:05<10:34,  2.12it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:9499/10845, loss:1.0180751143819406, acc:0.6890526315789474


 89%|████████▊ | 9600/10845 [1:25:55<09:23,  2.21it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:9599/10845, loss:1.0176561787600318, acc:0.6890625


 89%|████████▉ | 9700/10845 [1:26:54<14:08,  1.35it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:9699/10845, loss:1.017207955198804, acc:0.6890721649484536


 90%|█████████ | 9800/10845 [1:27:51<09:02,  1.93it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:9799/10845, loss:1.018113378411045, acc:0.6891836734693878


 91%|█████████▏| 9900/10845 [1:28:44<09:34,  1.65it/s, acc=0.689, epoch=9, loss=1.02]

epoch:9, idx:9899/10845, loss:1.0191892503578253, acc:0.6889141414141414


 92%|█████████▏| 10000/10845 [1:29:40<07:54,  1.78it/s, acc=0.688, epoch=9, loss=1.02]

epoch:9, idx:9999/10845, loss:1.020447319611907, acc:0.688425


 93%|█████████▎| 10100/10845 [1:30:31<05:47,  2.15it/s, acc=0.688, epoch=9, loss=1.02]

epoch:9, idx:10099/10845, loss:1.021003814584548, acc:0.6879207920792079


 94%|█████████▍| 10200/10845 [1:31:30<06:33,  1.64it/s, acc=0.688, epoch=9, loss=1.02]

epoch:9, idx:10199/10845, loss:1.0211877267705458, acc:0.6879901960784314


 95%|█████████▍| 10300/10845 [1:32:26<05:30,  1.65it/s, acc=0.688, epoch=9, loss=1.02]

epoch:9, idx:10299/10845, loss:1.0223806390918575, acc:0.6875


 96%|█████████▌| 10400/10845 [1:33:17<03:00,  2.47it/s, acc=0.687, epoch=9, loss=1.02]

epoch:9, idx:10399/10845, loss:1.0234845862193749, acc:0.6872115384615385


 97%|█████████▋| 10500/10845 [1:34:09<02:33,  2.25it/s, acc=0.687, epoch=9, loss=1.02]

epoch:9, idx:10499/10845, loss:1.0231259854294006, acc:0.6874047619047619


 98%|█████████▊| 10600/10845 [1:35:03<02:05,  1.96it/s, acc=0.688, epoch=9, loss=1.02]

epoch:9, idx:10599/10845, loss:1.0217657362233918, acc:0.6876415094339623


 99%|█████████▊| 10700/10845 [1:35:53<01:05,  2.23it/s, acc=0.688, epoch=9, loss=1.02]

epoch:9, idx:10699/10845, loss:1.0220529527251965, acc:0.687733644859813


100%|█████████▉| 10800/10845 [1:36:50<00:19,  2.29it/s, acc=0.688, epoch=9, loss=1.02]

epoch:9, idx:10799/10845, loss:1.0227578630933054, acc:0.6875


100%|██████████| 10845/10845 [1:37:15<00:00,  2.10it/s, acc=0.688, epoch=9, loss=1.02]


epoch:9, idx:0/1275, loss:0.80269455909729, acc:0.5
epoch:9, idx:100/1275, loss:1.2377184257648959, acc:0.6163366336633663
epoch:9, idx:200/1275, loss:1.1928728166504285, acc:0.6293532338308457
epoch:9, idx:300/1275, loss:1.1831197275275804, acc:0.6411960132890365
epoch:9, idx:400/1275, loss:1.1621497083185914, acc:0.6465087281795511
epoch:9, idx:500/1275, loss:1.162542928478675, acc:0.6437125748502994
epoch:9, idx:600/1275, loss:1.1730697759971047, acc:0.6385191347753744
epoch:9, idx:700/1275, loss:1.1777956872625799, acc:0.6394436519258203
epoch:9, idx:800/1275, loss:1.1865678421418169, acc:0.6363920099875156
epoch:9, idx:900/1275, loss:1.1768609339600795, acc:0.6342952275249722
epoch:9, idx:1000/1275, loss:1.1771601969069176, acc:0.6341158841158842
epoch:9, idx:1100/1275, loss:1.1711662417644808, acc:0.6351044504995459
epoch:9, idx:1200/1275, loss:1.171954751709518, acc:0.6334304746044963


  1%|          | 100/10845 [00:51<1:44:36,  1.71it/s, acc=0.72, epoch=10, loss=0.961]

epoch:10, idx:99/10845, loss:0.9611202296614647, acc:0.72


  2%|▏         | 200/10845 [01:45<1:48:42,  1.63it/s, acc=0.716, epoch=10, loss=0.958]

epoch:10, idx:199/10845, loss:0.9581876768171788, acc:0.71625


  3%|▎         | 300/10845 [02:44<1:51:51,  1.57it/s, acc=0.713, epoch=10, loss=0.975]

epoch:10, idx:299/10845, loss:0.9747639704744021, acc:0.7133333333333334


  4%|▎         | 400/10845 [03:37<1:24:18,  2.06it/s, acc=0.714, epoch=10, loss=0.984]

epoch:10, idx:399/10845, loss:0.984495488628745, acc:0.71375


  5%|▍         | 500/10845 [04:30<1:20:32,  2.14it/s, acc=0.724, epoch=10, loss=0.934]

epoch:10, idx:499/10845, loss:0.9335319961905479, acc:0.7235


  6%|▌         | 600/10845 [05:25<1:37:38,  1.75it/s, acc=0.718, epoch=10, loss=0.947]

epoch:10, idx:599/10845, loss:0.9469342714051405, acc:0.7183333333333334


  6%|▋         | 700/10845 [06:17<1:05:58,  2.56it/s, acc=0.714, epoch=10, loss=0.956]

epoch:10, idx:699/10845, loss:0.9562616311226573, acc:0.7139285714285715


  7%|▋         | 800/10845 [07:09<1:39:49,  1.68it/s, acc=0.716, epoch=10, loss=0.956]

epoch:10, idx:799/10845, loss:0.9564470023289323, acc:0.7159375


  8%|▊         | 900/10845 [08:04<1:26:13,  1.92it/s, acc=0.711, epoch=10, loss=0.965]

epoch:10, idx:899/10845, loss:0.964951458407773, acc:0.7113888888888888


  9%|▉         | 1000/10845 [08:53<1:25:59,  1.91it/s, acc=0.713, epoch=10, loss=0.958]

epoch:10, idx:999/10845, loss:0.9583990685045719, acc:0.713


 10%|█         | 1100/10845 [09:46<58:32,  2.77it/s, acc=0.715, epoch=10, loss=0.949]  

epoch:10, idx:1099/10845, loss:0.9494259241765196, acc:0.7152272727272727


 11%|█         | 1200/10845 [10:42<1:34:36,  1.70it/s, acc=0.714, epoch=10, loss=0.954]

epoch:10, idx:1199/10845, loss:0.9542759834478299, acc:0.71375


 12%|█▏        | 1300/10845 [11:35<1:31:43,  1.73it/s, acc=0.716, epoch=10, loss=0.944]

epoch:10, idx:1299/10845, loss:0.9440913007580317, acc:0.7157692307692308


 13%|█▎        | 1400/10845 [12:28<57:21,  2.74it/s, acc=0.717, epoch=10, loss=0.94]   

epoch:10, idx:1399/10845, loss:0.9397572936969144, acc:0.7169642857142857


 14%|█▍        | 1500/10845 [13:18<1:42:07,  1.53it/s, acc=0.714, epoch=10, loss=0.949]

epoch:10, idx:1499/10845, loss:0.9493094437718391, acc:0.7143333333333334


 15%|█▍        | 1600/10845 [14:17<1:31:40,  1.68it/s, acc=0.713, epoch=10, loss=0.951]

epoch:10, idx:1599/10845, loss:0.9509554623253643, acc:0.7128125


 16%|█▌        | 1700/10845 [15:13<1:13:31,  2.07it/s, acc=0.712, epoch=10, loss=0.951]

epoch:10, idx:1699/10845, loss:0.9508516033782678, acc:0.711764705882353


 17%|█▋        | 1800/10845 [16:11<1:19:11,  1.90it/s, acc=0.711, epoch=10, loss=0.953]

epoch:10, idx:1799/10845, loss:0.9529125403861205, acc:0.7105555555555556


 18%|█▊        | 1900/10845 [17:05<1:04:20,  2.32it/s, acc=0.711, epoch=10, loss=0.953]

epoch:10, idx:1899/10845, loss:0.9533311067286291, acc:0.7105263157894737


 18%|█▊        | 2000/10845 [18:01<1:18:40,  1.87it/s, acc=0.707, epoch=10, loss=0.963]

epoch:10, idx:1999/10845, loss:0.9632664891928434, acc:0.707375


 19%|█▉        | 2100/10845 [18:59<1:33:46,  1.55it/s, acc=0.706, epoch=10, loss=0.966]

epoch:10, idx:2099/10845, loss:0.9659000899536269, acc:0.705595238095238


 20%|██        | 2200/10845 [19:54<1:19:49,  1.80it/s, acc=0.704, epoch=10, loss=0.972]

epoch:10, idx:2199/10845, loss:0.9723404517092488, acc:0.7042045454545455


 21%|██        | 2300/10845 [20:51<1:20:39,  1.77it/s, acc=0.702, epoch=10, loss=0.974]

epoch:10, idx:2299/10845, loss:0.9736754086354504, acc:0.7022826086956522


 22%|██▏       | 2400/10845 [21:43<1:13:34,  1.91it/s, acc=0.703, epoch=10, loss=0.972]

epoch:10, idx:2399/10845, loss:0.9722059812024235, acc:0.7027083333333334


 23%|██▎       | 2500/10845 [22:36<1:06:33,  2.09it/s, acc=0.704, epoch=10, loss=0.969]

epoch:10, idx:2499/10845, loss:0.9686435189843178, acc:0.7039


 24%|██▍       | 2600/10845 [23:32<1:13:41,  1.86it/s, acc=0.705, epoch=10, loss=0.964]

epoch:10, idx:2599/10845, loss:0.9644759083138063, acc:0.7048076923076924


 25%|██▍       | 2700/10845 [24:30<1:32:03,  1.47it/s, acc=0.705, epoch=10, loss=0.966]

epoch:10, idx:2699/10845, loss:0.9662721028261715, acc:0.7045370370370371


 26%|██▌       | 2800/10845 [25:23<1:36:24,  1.39it/s, acc=0.705, epoch=10, loss=0.964]

epoch:10, idx:2799/10845, loss:0.9644232224247286, acc:0.7049107142857143


 27%|██▋       | 2900/10845 [26:23<1:20:51,  1.64it/s, acc=0.705, epoch=10, loss=0.965]

epoch:10, idx:2899/10845, loss:0.9645940471414862, acc:0.7048275862068966


 28%|██▊       | 3000/10845 [27:19<1:22:13,  1.59it/s, acc=0.705, epoch=10, loss=0.963]

epoch:10, idx:2999/10845, loss:0.9626412020921707, acc:0.7049166666666666


 29%|██▊       | 3100/10845 [28:11<1:01:22,  2.10it/s, acc=0.705, epoch=10, loss=0.965]

epoch:10, idx:3099/10845, loss:0.9654071945913376, acc:0.7045967741935484


 30%|██▉       | 3200/10845 [29:08<1:43:43,  1.23it/s, acc=0.704, epoch=10, loss=0.967]

epoch:10, idx:3199/10845, loss:0.9667178744636477, acc:0.703671875


 30%|███       | 3300/10845 [29:57<59:17,  2.12it/s, acc=0.704, epoch=10, loss=0.967]  

epoch:10, idx:3299/10845, loss:0.9672648627107794, acc:0.703939393939394


 31%|███▏      | 3400/10845 [30:49<1:07:30,  1.84it/s, acc=0.704, epoch=10, loss=0.97] 

epoch:10, idx:3399/10845, loss:0.969798042406054, acc:0.7038970588235294


 32%|███▏      | 3500/10845 [31:44<1:06:54,  1.83it/s, acc=0.703, epoch=10, loss=0.974]

epoch:10, idx:3499/10845, loss:0.973867587174688, acc:0.703


 33%|███▎      | 3600/10845 [32:38<53:23,  2.26it/s, acc=0.702, epoch=10, loss=0.976]  

epoch:10, idx:3599/10845, loss:0.9755684917668501, acc:0.7023611111111111


 34%|███▍      | 3700/10845 [33:33<1:04:48,  1.84it/s, acc=0.702, epoch=10, loss=0.979]

epoch:10, idx:3699/10845, loss:0.9785377975412317, acc:0.701554054054054


 35%|███▌      | 3800/10845 [34:28<57:55,  2.03it/s, acc=0.702, epoch=10, loss=0.978]  

epoch:10, idx:3799/10845, loss:0.977668512981189, acc:0.7021052631578948


 36%|███▌      | 3900/10845 [35:22<49:08,  2.36it/s, acc=0.702, epoch=10, loss=0.977]  

epoch:10, idx:3899/10845, loss:0.9770310474664737, acc:0.7018589743589744


 37%|███▋      | 4000/10845 [36:14<52:49,  2.16it/s, acc=0.702, epoch=10, loss=0.978]  

epoch:10, idx:3999/10845, loss:0.9782460884600878, acc:0.7016875


 38%|███▊      | 4100/10845 [37:05<1:00:27,  1.86it/s, acc=0.701, epoch=10, loss=0.979]

epoch:10, idx:4099/10845, loss:0.9789692548862318, acc:0.7014634146341463


 39%|███▊      | 4200/10845 [37:58<1:02:39,  1.77it/s, acc=0.702, epoch=10, loss=0.98] 

epoch:10, idx:4199/10845, loss:0.9798292879121644, acc:0.7015476190476191


 40%|███▉      | 4300/10845 [38:52<46:18,  2.36it/s, acc=0.702, epoch=10, loss=0.98]   

epoch:10, idx:4299/10845, loss:0.9798186732516733, acc:0.7019186046511627


 41%|████      | 4400/10845 [39:47<1:24:59,  1.26it/s, acc=0.702, epoch=10, loss=0.981]

epoch:10, idx:4399/10845, loss:0.9810864768109538, acc:0.7016477272727273


 41%|████▏     | 4500/10845 [40:40<51:13,  2.06it/s, acc=0.701, epoch=10, loss=0.981]  

epoch:10, idx:4499/10845, loss:0.9814794083171421, acc:0.7012222222222222


 42%|████▏     | 4600/10845 [41:34<1:03:24,  1.64it/s, acc=0.702, epoch=10, loss=0.979]

epoch:10, idx:4599/10845, loss:0.9792768398186434, acc:0.7020652173913043


 43%|████▎     | 4700/10845 [42:25<39:29,  2.59it/s, acc=0.701, epoch=10, loss=0.983]  

epoch:10, idx:4699/10845, loss:0.9826439271074661, acc:0.7014361702127659


 44%|████▍     | 4800/10845 [43:17<40:26,  2.49it/s, acc=0.701, epoch=10, loss=0.986]  

epoch:10, idx:4799/10845, loss:0.9857933303713798, acc:0.7008854166666667


 45%|████▌     | 4900/10845 [44:09<42:49,  2.31it/s, acc=0.702, epoch=10, loss=0.984]  

epoch:10, idx:4899/10845, loss:0.9835347109303183, acc:0.7016836734693878


 46%|████▌     | 5000/10845 [45:02<1:01:49,  1.58it/s, acc=0.702, epoch=10, loss=0.983]

epoch:10, idx:4999/10845, loss:0.9826474599599838, acc:0.70195


 47%|████▋     | 5100/10845 [45:53<52:06,  1.84it/s, acc=0.702, epoch=10, loss=0.985]  

epoch:10, idx:5099/10845, loss:0.9845669306727016, acc:0.7016176470588236


 48%|████▊     | 5200/10845 [46:48<44:57,  2.09it/s, acc=0.702, epoch=10, loss=0.985]  

epoch:10, idx:5199/10845, loss:0.9847344769308201, acc:0.7015865384615385


 49%|████▉     | 5300/10845 [47:46<40:45,  2.27it/s, acc=0.701, epoch=10, loss=0.985]  

epoch:10, idx:5299/10845, loss:0.9849420008232008, acc:0.7014150943396227


 50%|████▉     | 5400/10845 [48:38<54:37,  1.66it/s, acc=0.702, epoch=10, loss=0.983]  

epoch:10, idx:5399/10845, loss:0.982602157537584, acc:0.702037037037037


 51%|█████     | 5500/10845 [49:31<51:55,  1.72it/s, acc=0.702, epoch=10, loss=0.982]  

epoch:10, idx:5499/10845, loss:0.9821199739737945, acc:0.7018181818181818


 52%|█████▏    | 5600/10845 [50:21<36:51,  2.37it/s, acc=0.702, epoch=10, loss=0.981]  

epoch:10, idx:5599/10845, loss:0.9809746507023062, acc:0.7022321428571429


 53%|█████▎    | 5700/10845 [51:13<41:32,  2.06it/s, acc=0.702, epoch=10, loss=0.98]   

epoch:10, idx:5699/10845, loss:0.9799186377567157, acc:0.7024561403508772


 53%|█████▎    | 5800/10845 [52:06<45:59,  1.83it/s, acc=0.702, epoch=10, loss=0.981]  

epoch:10, idx:5799/10845, loss:0.9806358971266911, acc:0.701853448275862


 54%|█████▍    | 5900/10845 [53:00<46:09,  1.79it/s, acc=0.701, epoch=10, loss=0.983]  

epoch:10, idx:5899/10845, loss:0.9827868050841961, acc:0.7013135593220339


 55%|█████▌    | 6000/10845 [53:54<45:53,  1.76it/s, acc=0.702, epoch=10, loss=0.983]  

epoch:10, idx:5999/10845, loss:0.9832350760996341, acc:0.7015833333333333


 56%|█████▌    | 6100/10845 [54:48<34:35,  2.29it/s, acc=0.701, epoch=10, loss=0.984]  

epoch:10, idx:6099/10845, loss:0.9837542113612909, acc:0.700983606557377


 57%|█████▋    | 6200/10845 [55:43<41:23,  1.87it/s, acc=0.701, epoch=10, loss=0.985]  

epoch:10, idx:6199/10845, loss:0.9846675634768701, acc:0.7006048387096774


 58%|█████▊    | 6300/10845 [56:38<35:51,  2.11it/s, acc=0.7, epoch=10, loss=0.986]  

epoch:10, idx:6299/10845, loss:0.9858857789304522, acc:0.7003968253968254


 59%|█████▉    | 6400/10845 [57:30<32:34,  2.27it/s, acc=0.7, epoch=10, loss=0.987]  

epoch:10, idx:6399/10845, loss:0.9874732372909785, acc:0.7


 60%|█████▉    | 6500/10845 [58:28<48:33,  1.49it/s, acc=0.7, epoch=10, loss=0.99]   

epoch:10, idx:6499/10845, loss:0.9896636689809652, acc:0.6997307692307693


 61%|██████    | 6600/10845 [59:25<40:46,  1.74it/s, acc=0.7, epoch=10, loss=0.989]  

epoch:10, idx:6599/10845, loss:0.9893056502938271, acc:0.6995454545454546


 62%|██████▏   | 6700/10845 [1:00:20<31:27,  2.20it/s, acc=0.699, epoch=10, loss=0.99] 

epoch:10, idx:6699/10845, loss:0.989868755376161, acc:0.6992537313432836


 63%|██████▎   | 6800/10845 [1:01:15<39:46,  1.69it/s, acc=0.699, epoch=10, loss=0.99]   

epoch:10, idx:6799/10845, loss:0.990151423145743, acc:0.6992647058823529


 64%|██████▎   | 6901/10845 [1:02:16<26:27,  2.48it/s, acc=0.699, epoch=10, loss=0.991]

epoch:10, idx:6899/10845, loss:0.9914149184762567, acc:0.6990942028985507


 65%|██████▍   | 7000/10845 [1:03:11<37:17,  1.72it/s, acc=0.699, epoch=10, loss=0.992]

epoch:10, idx:6999/10845, loss:0.991561869766031, acc:0.6989285714285715


 65%|██████▌   | 7100/10845 [1:04:05<26:48,  2.33it/s, acc=0.698, epoch=10, loss=0.995]

epoch:10, idx:7099/10845, loss:0.995028591659707, acc:0.6984507042253522


 66%|██████▋   | 7200/10845 [1:05:00<37:25,  1.62it/s, acc=0.698, epoch=10, loss=0.995]

epoch:10, idx:7199/10845, loss:0.9950580501308044, acc:0.6982638888888889


 67%|██████▋   | 7300/10845 [1:05:58<22:50,  2.59it/s, acc=0.698, epoch=10, loss=0.995]

epoch:10, idx:7299/10845, loss:0.9949335617888464, acc:0.6983561643835616


 68%|██████▊   | 7400/10845 [1:06:55<24:43,  2.32it/s, acc=0.699, epoch=10, loss=0.995]

epoch:10, idx:7399/10845, loss:0.9946850406157004, acc:0.6986148648648649


 69%|██████▉   | 7500/10845 [1:07:51<26:27,  2.11it/s, acc=0.698, epoch=10, loss=0.995]

epoch:10, idx:7499/10845, loss:0.9950124199231466, acc:0.6983


 70%|███████   | 7600/10845 [1:08:45<23:54,  2.26it/s, acc=0.698, epoch=10, loss=0.996]

epoch:10, idx:7599/10845, loss:0.9955156328254624, acc:0.6982236842105263


 71%|███████   | 7700/10845 [1:09:38<29:08,  1.80it/s, acc=0.699, epoch=10, loss=0.994]

epoch:10, idx:7699/10845, loss:0.9936633483543025, acc:0.6986363636363636


 72%|███████▏  | 7800/10845 [1:10:31<19:42,  2.58it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:7799/10845, loss:0.993106560248595, acc:0.6990705128205128


 73%|███████▎  | 7900/10845 [1:11:25<24:33,  2.00it/s, acc=0.699, epoch=10, loss=0.992]

epoch:10, idx:7899/10845, loss:0.99198814137827, acc:0.6990822784810127


 74%|███████▍  | 8000/10845 [1:12:21<28:13,  1.68it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:7999/10845, loss:0.9927099260017276, acc:0.699


 75%|███████▍  | 8100/10845 [1:13:13<29:28,  1.55it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:8099/10845, loss:0.9933986724011692, acc:0.6989197530864197


 76%|███████▌  | 8200/10845 [1:14:05<25:56,  1.70it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:8199/10845, loss:0.993196512853227, acc:0.6985975609756098


 77%|███████▋  | 8300/10845 [1:15:00<21:53,  1.94it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:8299/10845, loss:0.9931952782639538, acc:0.698644578313253


 77%|███████▋  | 8400/10845 [1:15:55<27:44,  1.47it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:8399/10845, loss:0.9930306659426008, acc:0.6988988095238096


 78%|███████▊  | 8500/10845 [1:16:50<24:05,  1.62it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:8499/10845, loss:0.9932322535865447, acc:0.6987058823529412


 79%|███████▉  | 8600/10845 [1:17:47<24:45,  1.51it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:8599/10845, loss:0.9934193247279456, acc:0.6986627906976745


 80%|████████  | 8700/10845 [1:18:37<20:04,  1.78it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:8699/10845, loss:0.9934771800897587, acc:0.6988793103448275


 81%|████████  | 8800/10845 [1:19:34<30:35,  1.11it/s, acc=0.699, epoch=10, loss=0.994]

epoch:10, idx:8799/10845, loss:0.9938584885712375, acc:0.6986647727272727


 82%|████████▏ | 8900/10845 [1:20:28<23:38,  1.37it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:8899/10845, loss:0.993219224512577, acc:0.6988202247191011


 83%|████████▎ | 9000/10845 [1:21:24<20:18,  1.51it/s, acc=0.698, epoch=10, loss=0.995]

epoch:10, idx:8999/10845, loss:0.9945141314036317, acc:0.6983333333333334


 84%|████████▍ | 9100/10845 [1:22:19<13:46,  2.11it/s, acc=0.698, epoch=10, loss=0.994]

epoch:10, idx:9099/10845, loss:0.99416152460562, acc:0.6984615384615385


 85%|████████▍ | 9200/10845 [1:23:15<16:15,  1.69it/s, acc=0.699, epoch=10, loss=0.993]

epoch:10, idx:9199/10845, loss:0.9932479965136103, acc:0.6986141304347826


 86%|████████▌ | 9300/10845 [1:24:15<18:04,  1.42it/s, acc=0.698, epoch=10, loss=0.994]

epoch:10, idx:9299/10845, loss:0.9937714687822967, acc:0.6984408602150538


 87%|████████▋ | 9400/10845 [1:25:10<11:05,  2.17it/s, acc=0.698, epoch=10, loss=0.995]

epoch:10, idx:9399/10845, loss:0.9948209572060311, acc:0.6981117021276596


 88%|████████▊ | 9500/10845 [1:26:02<09:34,  2.34it/s, acc=0.698, epoch=10, loss=0.996]

epoch:10, idx:9499/10845, loss:0.9957530789343935, acc:0.6979473684210526


 89%|████████▊ | 9600/10845 [1:26:55<10:54,  1.90it/s, acc=0.698, epoch=10, loss=0.997]

epoch:10, idx:9599/10845, loss:0.9972028302183996, acc:0.6978645833333333


 89%|████████▉ | 9700/10845 [1:27:50<10:37,  1.80it/s, acc=0.698, epoch=10, loss=0.998]

epoch:10, idx:9699/10845, loss:0.9977820291992315, acc:0.697680412371134


 90%|█████████ | 9800/10845 [1:28:45<07:43,  2.26it/s, acc=0.697, epoch=10, loss=1]    

epoch:10, idx:9799/10845, loss:1.0004593691077768, acc:0.6970918367346939


 91%|█████████▏| 9900/10845 [1:29:41<10:27,  1.51it/s, acc=0.697, epoch=10, loss=1]

epoch:10, idx:9899/10845, loss:1.000927490619096, acc:0.6969191919191919


 92%|█████████▏| 10000/10845 [1:30:30<05:41,  2.48it/s, acc=0.697, epoch=10, loss=1]

epoch:10, idx:9999/10845, loss:0.9997667907863855, acc:0.69725


 93%|█████████▎| 10100/10845 [1:31:23<06:00,  2.07it/s, acc=0.697, epoch=10, loss=1]

epoch:10, idx:10099/10845, loss:1.0003411428733628, acc:0.6971534653465347


 94%|█████████▍| 10200/10845 [1:32:15<06:16,  1.71it/s, acc=0.696, epoch=10, loss=1]

epoch:10, idx:10199/10845, loss:1.0044142552246065, acc:0.6960049019607844


 95%|█████████▍| 10300/10845 [1:33:09<06:25,  1.41it/s, acc=0.695, epoch=10, loss=1.01]

epoch:10, idx:10299/10845, loss:1.0067714974110566, acc:0.6954854368932039


 96%|█████████▌| 10400/10845 [1:34:03<02:55,  2.53it/s, acc=0.695, epoch=10, loss=1.01]

epoch:10, idx:10399/10845, loss:1.0082083625375078, acc:0.6953846153846154


 97%|█████████▋| 10500/10845 [1:34:59<03:15,  1.76it/s, acc=0.695, epoch=10, loss=1.01]

epoch:10, idx:10499/10845, loss:1.0094853830592974, acc:0.694952380952381


 98%|█████████▊| 10600/10845 [1:35:53<02:15,  1.81it/s, acc=0.695, epoch=10, loss=1.01]

epoch:10, idx:10599/10845, loss:1.0091872999999882, acc:0.6949528301886793


 99%|█████████▊| 10700/10845 [1:36:46<01:06,  2.17it/s, acc=0.695, epoch=10, loss=1.01]

epoch:10, idx:10699/10845, loss:1.0094211764285497, acc:0.6949532710280374


100%|█████████▉| 10800/10845 [1:37:42<00:20,  2.19it/s, acc=0.695, epoch=10, loss=1.01]

epoch:10, idx:10799/10845, loss:1.0086935691949395, acc:0.695162037037037


100%|██████████| 10845/10845 [1:38:04<00:00,  1.97it/s, acc=0.695, epoch=10, loss=1.01]


epoch:10, idx:0/1275, loss:0.864067554473877, acc:0.5
epoch:10, idx:100/1275, loss:1.2094499468803406, acc:0.6386138613861386
epoch:10, idx:200/1275, loss:1.1608011529813358, acc:0.6343283582089553
epoch:10, idx:300/1275, loss:1.1471696408880114, acc:0.6428571428571429
epoch:10, idx:400/1275, loss:1.1357898716617403, acc:0.6415211970074813
epoch:10, idx:500/1275, loss:1.1372978224487837, acc:0.6402195608782435
epoch:10, idx:600/1275, loss:1.1451008478535987, acc:0.6376871880199667
epoch:10, idx:700/1275, loss:1.1518925780406521, acc:0.634450784593438
epoch:10, idx:800/1275, loss:1.1625668834360054, acc:0.6338951310861424
epoch:10, idx:900/1275, loss:1.153371046835786, acc:0.6362375138734739
epoch:10, idx:1000/1275, loss:1.154653022339294, acc:0.6373626373626373
epoch:10, idx:1100/1275, loss:1.1462620699762973, acc:0.6394187102633969
epoch:10, idx:1200/1275, loss:1.1467565132318984, acc:0.6384263114071607


  1%|          | 100/10845 [00:49<1:37:16,  1.84it/s, acc=0.72, epoch=11, loss=0.942]

epoch:11, idx:99/10845, loss:0.942401003241539, acc:0.72


  2%|▏         | 200/10845 [01:44<2:11:19,  1.35it/s, acc=0.713, epoch=11, loss=0.909]

epoch:11, idx:199/10845, loss:0.9090732678771019, acc:0.7125


  3%|▎         | 300/10845 [02:37<1:21:12,  2.16it/s, acc=0.716, epoch=11, loss=0.905]

epoch:11, idx:299/10845, loss:0.9047632010777792, acc:0.7158333333333333


  4%|▎         | 400/10845 [03:34<1:59:02,  1.46it/s, acc=0.719, epoch=11, loss=0.913]

epoch:11, idx:399/10845, loss:0.9127975259721279, acc:0.719375


  5%|▍         | 500/10845 [04:27<1:29:24,  1.93it/s, acc=0.723, epoch=11, loss=0.906]

epoch:11, idx:499/10845, loss:0.9063355855941773, acc:0.7225


  6%|▌         | 600/10845 [05:21<1:27:36,  1.95it/s, acc=0.725, epoch=11, loss=0.909]

epoch:11, idx:599/10845, loss:0.9089254434903463, acc:0.7245833333333334


  6%|▋         | 700/10845 [06:18<1:20:23,  2.10it/s, acc=0.726, epoch=11, loss=0.901]

epoch:11, idx:699/10845, loss:0.9011136920963014, acc:0.7257142857142858


  7%|▋         | 800/10845 [07:14<1:32:36,  1.81it/s, acc=0.73, epoch=11, loss=0.893] 

epoch:11, idx:799/10845, loss:0.8934601901471615, acc:0.7296875


  8%|▊         | 900/10845 [08:06<1:20:03,  2.07it/s, acc=0.729, epoch=11, loss=0.896]

epoch:11, idx:899/10845, loss:0.8963410472207599, acc:0.7286111111111111


  9%|▉         | 1000/10845 [08:59<1:14:34,  2.20it/s, acc=0.726, epoch=11, loss=0.903]

epoch:11, idx:999/10845, loss:0.903495584666729, acc:0.7255


 10%|█         | 1100/10845 [09:50<1:14:14,  2.19it/s, acc=0.724, epoch=11, loss=0.913]

epoch:11, idx:1099/10845, loss:0.9127322670546445, acc:0.7236363636363636


 11%|█         | 1200/10845 [10:46<1:15:59,  2.12it/s, acc=0.724, epoch=11, loss=0.908]

epoch:11, idx:1199/10845, loss:0.9077377547323704, acc:0.7235416666666666


 12%|█▏        | 1300/10845 [11:41<1:29:04,  1.79it/s, acc=0.723, epoch=11, loss=0.908]

epoch:11, idx:1299/10845, loss:0.907600424518952, acc:0.7230769230769231


 13%|█▎        | 1400/10845 [12:38<1:47:49,  1.46it/s, acc=0.721, epoch=11, loss=0.918]

epoch:11, idx:1399/10845, loss:0.9180805359993662, acc:0.7214285714285714


 14%|█▍        | 1500/10845 [13:39<1:24:55,  1.83it/s, acc=0.719, epoch=11, loss=0.925]

epoch:11, idx:1499/10845, loss:0.9252135973771413, acc:0.7193333333333334


 15%|█▍        | 1600/10845 [14:31<1:11:45,  2.15it/s, acc=0.717, epoch=11, loss=0.932]

epoch:11, idx:1599/10845, loss:0.9320924621075392, acc:0.716875


 16%|█▌        | 1700/10845 [15:29<1:27:47,  1.74it/s, acc=0.715, epoch=11, loss=0.937]

epoch:11, idx:1699/10845, loss:0.9370729828231474, acc:0.7154411764705882


 17%|█▋        | 1800/10845 [16:22<1:11:30,  2.11it/s, acc=0.715, epoch=11, loss=0.936]

epoch:11, idx:1799/10845, loss:0.9358144955833753, acc:0.7151388888888889


 18%|█▊        | 1900/10845 [17:14<1:33:37,  1.59it/s, acc=0.713, epoch=11, loss=0.945]

epoch:11, idx:1899/10845, loss:0.9446353080711867, acc:0.713421052631579


 18%|█▊        | 2000/10845 [18:10<2:20:21,  1.05it/s, acc=0.714, epoch=11, loss=0.941]

epoch:11, idx:1999/10845, loss:0.9411678494513035, acc:0.71425


 19%|█▉        | 2100/10845 [19:05<1:19:52,  1.82it/s, acc=0.714, epoch=11, loss=0.941]

epoch:11, idx:2099/10845, loss:0.9405306982142585, acc:0.7142857142857143


 20%|██        | 2200/10845 [20:00<57:20,  2.51it/s, acc=0.714, epoch=11, loss=0.945]  

epoch:11, idx:2199/10845, loss:0.9449211859703064, acc:0.7140909090909091


 21%|██        | 2300/10845 [20:51<1:05:53,  2.16it/s, acc=0.715, epoch=11, loss=0.94] 

epoch:11, idx:2299/10845, loss:0.9400173860529195, acc:0.7145652173913043


 22%|██▏       | 2400/10845 [21:39<58:27,  2.41it/s, acc=0.714, epoch=11, loss=0.94]   

epoch:11, idx:2399/10845, loss:0.9404053874313831, acc:0.7142708333333333


 23%|██▎       | 2500/10845 [22:34<1:09:51,  1.99it/s, acc=0.714, epoch=11, loss=0.939]

epoch:11, idx:2499/10845, loss:0.938702019739151, acc:0.7139


 24%|██▍       | 2600/10845 [23:29<55:51,  2.46it/s, acc=0.714, epoch=11, loss=0.939]  

epoch:11, idx:2599/10845, loss:0.9387367646052287, acc:0.71375


 25%|██▍       | 2700/10845 [24:25<1:24:02,  1.62it/s, acc=0.714, epoch=11, loss=0.94] 

epoch:11, idx:2699/10845, loss:0.9401460496363816, acc:0.7138888888888889


 26%|██▌       | 2800/10845 [25:16<57:03,  2.35it/s, acc=0.714, epoch=11, loss=0.937]  

epoch:11, idx:2799/10845, loss:0.9370564359000751, acc:0.7142857142857143


 27%|██▋       | 2900/10845 [26:10<47:41,  2.78it/s, acc=0.714, epoch=11, loss=0.939]  

epoch:11, idx:2899/10845, loss:0.9393591890458403, acc:0.7143103448275862


 28%|██▊       | 3000/10845 [26:58<1:00:45,  2.15it/s, acc=0.714, epoch=11, loss=0.941]

epoch:11, idx:2999/10845, loss:0.9408629235823949, acc:0.7139166666666666


 29%|██▊       | 3100/10845 [27:53<1:04:43,  1.99it/s, acc=0.714, epoch=11, loss=0.939]

epoch:11, idx:3099/10845, loss:0.9392566574965754, acc:0.7141129032258065


 30%|██▉       | 3200/10845 [28:48<1:02:36,  2.03it/s, acc=0.713, epoch=11, loss=0.944]

epoch:11, idx:3199/10845, loss:0.9435495574399829, acc:0.71296875


 30%|███       | 3300/10845 [29:41<1:09:58,  1.80it/s, acc=0.714, epoch=11, loss=0.942]

epoch:11, idx:3299/10845, loss:0.942308420231848, acc:0.7136363636363636


 31%|███▏      | 3400/10845 [30:35<1:12:26,  1.71it/s, acc=0.713, epoch=11, loss=0.941]

epoch:11, idx:3399/10845, loss:0.9410351672943901, acc:0.7134558823529412


 32%|███▏      | 3500/10845 [31:30<1:21:50,  1.50it/s, acc=0.713, epoch=11, loss=0.942]

epoch:11, idx:3499/10845, loss:0.9420505814892906, acc:0.7134285714285714


 33%|███▎      | 3600/10845 [32:22<1:15:34,  1.60it/s, acc=0.712, epoch=11, loss=0.945]

epoch:11, idx:3599/10845, loss:0.9453959737055831, acc:0.7120833333333333


 34%|███▍      | 3700/10845 [33:14<1:02:59,  1.89it/s, acc=0.712, epoch=11, loss=0.945]

epoch:11, idx:3699/10845, loss:0.9452760771319673, acc:0.7118918918918918


 35%|███▌      | 3801/10845 [34:10<54:54,  2.14it/s, acc=0.712, epoch=11, loss=0.947]  

epoch:11, idx:3799/10845, loss:0.9469068143791274, acc:0.7123684210526315


 36%|███▌      | 3900/10845 [35:03<1:04:43,  1.79it/s, acc=0.711, epoch=11, loss=0.948]

epoch:11, idx:3899/10845, loss:0.9483835939107798, acc:0.7110897435897436


 37%|███▋      | 4000/10845 [36:02<1:05:41,  1.74it/s, acc=0.71, epoch=11, loss=0.95]  

epoch:11, idx:3999/10845, loss:0.9504479245096445, acc:0.7100625


 38%|███▊      | 4100/10845 [36:58<1:00:58,  1.84it/s, acc=0.71, epoch=11, loss=0.95]  

epoch:11, idx:4099/10845, loss:0.9504509170317068, acc:0.7101219512195122


 39%|███▊      | 4200/10845 [37:52<1:00:07,  1.84it/s, acc=0.71, epoch=11, loss=0.952]

epoch:11, idx:4199/10845, loss:0.9523476216055098, acc:0.7096428571428571


 40%|███▉      | 4300/10845 [38:46<54:49,  1.99it/s, acc=0.709, epoch=11, loss=0.952]  

epoch:11, idx:4299/10845, loss:0.9520640736402467, acc:0.7093023255813954


 41%|████      | 4400/10845 [39:39<57:28,  1.87it/s, acc=0.709, epoch=11, loss=0.951]  

epoch:11, idx:4399/10845, loss:0.9509179846265099, acc:0.7094318181818182


 41%|████▏     | 4500/10845 [40:29<1:03:00,  1.68it/s, acc=0.708, epoch=11, loss=0.954]

epoch:11, idx:4499/10845, loss:0.954295241329405, acc:0.7083888888888888


 42%|████▏     | 4600/10845 [41:26<59:48,  1.74it/s, acc=0.708, epoch=11, loss=0.955]  

epoch:11, idx:4599/10845, loss:0.9550139740886896, acc:0.7083695652173913


 43%|████▎     | 4700/10845 [42:23<55:12,  1.85it/s, acc=0.708, epoch=11, loss=0.956]  

epoch:11, idx:4699/10845, loss:0.9564092227879991, acc:0.7077127659574468


 44%|████▍     | 4800/10845 [43:20<42:04,  2.39it/s, acc=0.708, epoch=11, loss=0.957]  

epoch:11, idx:4799/10845, loss:0.9568556615213553, acc:0.7078125


 45%|████▌     | 4900/10845 [44:13<52:55,  1.87it/s, acc=0.708, epoch=11, loss=0.956]  

epoch:11, idx:4899/10845, loss:0.9563414258373027, acc:0.7081122448979592


 46%|████▌     | 5000/10845 [45:09<39:08,  2.49it/s, acc=0.708, epoch=11, loss=0.955]  

epoch:11, idx:4999/10845, loss:0.9549504229068756, acc:0.708


 47%|████▋     | 5100/10845 [46:02<41:18,  2.32it/s, acc=0.708, epoch=11, loss=0.958]  

epoch:11, idx:5099/10845, loss:0.957907924546915, acc:0.7076470588235294


 48%|████▊     | 5200/10845 [46:57<57:40,  1.63it/s, acc=0.708, epoch=11, loss=0.958]  

epoch:11, idx:5199/10845, loss:0.9582179626822471, acc:0.7078846153846153


 49%|████▉     | 5300/10845 [47:51<43:24,  2.13it/s, acc=0.708, epoch=11, loss=0.959]  

epoch:11, idx:5299/10845, loss:0.9585839155035198, acc:0.7079716981132076


 50%|████▉     | 5400/10845 [48:44<42:40,  2.13it/s, acc=0.708, epoch=11, loss=0.962]  

epoch:11, idx:5399/10845, loss:0.9616985087703775, acc:0.7075


 51%|█████     | 5500/10845 [49:42<1:06:26,  1.34it/s, acc=0.707, epoch=11, loss=0.963]

epoch:11, idx:5499/10845, loss:0.9625427452325821, acc:0.7071363636363637


 52%|█████▏    | 5600/10845 [50:33<46:39,  1.87it/s, acc=0.707, epoch=11, loss=0.962]  

epoch:11, idx:5599/10845, loss:0.9621162228924888, acc:0.7071428571428572


 53%|█████▎    | 5700/10845 [51:27<34:56,  2.45it/s, acc=0.707, epoch=11, loss=0.963]  

epoch:11, idx:5699/10845, loss:0.9625669709527702, acc:0.7073245614035087


 53%|█████▎    | 5800/10845 [52:22<38:04,  2.21it/s, acc=0.708, epoch=11, loss=0.961]  

epoch:11, idx:5799/10845, loss:0.9607038355695791, acc:0.7077155172413793


 54%|█████▍    | 5900/10845 [53:14<47:32,  1.73it/s, acc=0.708, epoch=11, loss=0.961]  

epoch:11, idx:5899/10845, loss:0.9612951444266206, acc:0.7075847457627119


 55%|█████▌    | 6000/10845 [54:09<34:41,  2.33it/s, acc=0.708, epoch=11, loss=0.961]  

epoch:11, idx:5999/10845, loss:0.9612047664225102, acc:0.7079583333333334


 56%|█████▌    | 6100/10845 [55:01<44:25,  1.78it/s, acc=0.708, epoch=11, loss=0.961]  

epoch:11, idx:6099/10845, loss:0.9612897024780024, acc:0.7077049180327869


 57%|█████▋    | 6200/10845 [55:53<51:30,  1.50it/s, acc=0.708, epoch=11, loss=0.962]  

epoch:11, idx:6199/10845, loss:0.9615007752756919, acc:0.7078629032258065


 58%|█████▊    | 6300/10845 [56:48<44:18,  1.71it/s, acc=0.708, epoch=11, loss=0.963]  

epoch:11, idx:6299/10845, loss:0.9626154896191188, acc:0.7076587301587302


 59%|█████▉    | 6400/10845 [57:42<39:33,  1.87it/s, acc=0.707, epoch=11, loss=0.965]

epoch:11, idx:6399/10845, loss:0.965224238652736, acc:0.7072265625


 60%|█████▉    | 6500/10845 [58:35<59:55,  1.21it/s, acc=0.707, epoch=11, loss=0.967]

epoch:11, idx:6499/10845, loss:0.9667781506776809, acc:0.7066923076923077


 61%|██████    | 6600/10845 [59:31<44:40,  1.58it/s, acc=0.707, epoch=11, loss=0.966]  

epoch:11, idx:6599/10845, loss:0.9655662486137766, acc:0.706780303030303


 62%|██████▏   | 6700/10845 [1:00:26<46:30,  1.49it/s, acc=0.707, epoch=11, loss=0.965]

epoch:11, idx:6699/10845, loss:0.9652141336273791, acc:0.7066044776119403


 63%|██████▎   | 6800/10845 [1:01:22<31:21,  2.15it/s, acc=0.706, epoch=11, loss=0.965]

epoch:11, idx:6799/10845, loss:0.9650068807601929, acc:0.7063235294117647


 64%|██████▎   | 6900/10845 [1:02:20<32:32,  2.02it/s, acc=0.706, epoch=11, loss=0.967]  

epoch:11, idx:6899/10845, loss:0.9670868942357492, acc:0.7055072463768116


 65%|██████▍   | 7000/10845 [1:03:11<41:39,  1.54it/s, acc=0.705, epoch=11, loss=0.967]

epoch:11, idx:6999/10845, loss:0.966535076873643, acc:0.7053214285714285


 65%|██████▌   | 7100/10845 [1:04:01<29:39,  2.10it/s, acc=0.705, epoch=11, loss=0.967]

epoch:11, idx:7099/10845, loss:0.9669684491694813, acc:0.7053521126760564


 66%|██████▋   | 7200/10845 [1:05:00<32:19,  1.88it/s, acc=0.706, epoch=11, loss=0.966]

epoch:11, idx:7199/10845, loss:0.9659197035349077, acc:0.7055902777777778


 67%|██████▋   | 7300/10845 [1:05:51<22:34,  2.62it/s, acc=0.705, epoch=11, loss=0.966]

epoch:11, idx:7299/10845, loss:0.9659377777168195, acc:0.7054109589041095


 68%|██████▊   | 7400/10845 [1:06:45<25:09,  2.28it/s, acc=0.706, epoch=11, loss=0.966]

epoch:11, idx:7399/10845, loss:0.9662518402611887, acc:0.7057432432432432


 69%|██████▉   | 7500/10845 [1:07:39<24:30,  2.27it/s, acc=0.706, epoch=11, loss=0.966]

epoch:11, idx:7499/10845, loss:0.9663938818136851, acc:0.7056666666666667


 70%|███████   | 7600/10845 [1:08:30<30:14,  1.79it/s, acc=0.706, epoch=11, loss=0.965]

epoch:11, idx:7599/10845, loss:0.9648394516031993, acc:0.70625


 71%|███████   | 7700/10845 [1:09:24<34:01,  1.54it/s, acc=0.706, epoch=11, loss=0.966]

epoch:11, idx:7699/10845, loss:0.9656736614022936, acc:0.7061038961038961


 72%|███████▏  | 7800/10845 [1:10:20<24:14,  2.09it/s, acc=0.706, epoch=11, loss=0.966]

epoch:11, idx:7799/10845, loss:0.9661552736239556, acc:0.7060576923076923


 73%|███████▎  | 7900/10845 [1:11:14<29:42,  1.65it/s, acc=0.706, epoch=11, loss=0.967]

epoch:11, idx:7899/10845, loss:0.9667095780448068, acc:0.7060126582278481


 74%|███████▍  | 8000/10845 [1:12:05<29:34,  1.60it/s, acc=0.706, epoch=11, loss=0.968]

epoch:11, idx:7999/10845, loss:0.967521266579628, acc:0.70609375


 75%|███████▍  | 8101/10845 [1:12:59<18:31,  2.47it/s, acc=0.706, epoch=11, loss=0.967]

epoch:11, idx:8099/10845, loss:0.9671902619394255, acc:0.7061728395061728


 76%|███████▌  | 8200/10845 [1:13:55<26:56,  1.64it/s, acc=0.706, epoch=11, loss=0.969]

epoch:11, idx:8199/10845, loss:0.9686987076372635, acc:0.7057926829268293


 77%|███████▋  | 8300/10845 [1:14:48<20:41,  2.05it/s, acc=0.705, epoch=11, loss=0.971]

epoch:11, idx:8299/10845, loss:0.9713166157955146, acc:0.7054518072289157


 77%|███████▋  | 8400/10845 [1:15:41<20:13,  2.02it/s, acc=0.706, epoch=11, loss=0.972]

epoch:11, idx:8399/10845, loss:0.9718790135142349, acc:0.7055357142857143


 78%|███████▊  | 8500/10845 [1:16:36<18:36,  2.10it/s, acc=0.705, epoch=11, loss=0.973]

epoch:11, idx:8499/10845, loss:0.9730005517076044, acc:0.7053529411764706


 79%|███████▉  | 8600/10845 [1:17:29<17:57,  2.08it/s, acc=0.705, epoch=11, loss=0.974]

epoch:11, idx:8599/10845, loss:0.9740686724976052, acc:0.7054651162790697


 80%|████████  | 8700/10845 [1:18:22<23:09,  1.54it/s, acc=0.705, epoch=11, loss=0.976]

epoch:11, idx:8699/10845, loss:0.9756971023617119, acc:0.705287356321839


 81%|████████  | 8800/10845 [1:19:18<16:02,  2.13it/s, acc=0.705, epoch=11, loss=0.977]

epoch:11, idx:8799/10845, loss:0.9765782790834253, acc:0.7049147727272728


 82%|████████▏ | 8900/10845 [1:20:11<15:33,  2.08it/s, acc=0.705, epoch=11, loss=0.978]

epoch:11, idx:8899/10845, loss:0.9777165905478296, acc:0.7048314606741573


 83%|████████▎ | 9000/10845 [1:21:06<21:09,  1.45it/s, acc=0.705, epoch=11, loss=0.978]

epoch:11, idx:8999/10845, loss:0.9781512272357941, acc:0.7046388888888889


 84%|████████▍ | 9100/10845 [1:21:57<22:42,  1.28it/s, acc=0.705, epoch=11, loss=0.978]

epoch:11, idx:9099/10845, loss:0.9778630587818858, acc:0.7046153846153846


 85%|████████▍ | 9200/10845 [1:22:52<24:35,  1.12it/s, acc=0.705, epoch=11, loss=0.977]

epoch:11, idx:9199/10845, loss:0.9771423123193824, acc:0.704945652173913


 86%|████████▌ | 9300/10845 [1:23:45<14:58,  1.72it/s, acc=0.705, epoch=11, loss=0.978]

epoch:11, idx:9299/10845, loss:0.9776847357455121, acc:0.7049193548387097


 87%|████████▋ | 9400/10845 [1:24:39<11:00,  2.19it/s, acc=0.705, epoch=11, loss=0.978]

epoch:11, idx:9399/10845, loss:0.9780121268870983, acc:0.7048936170212766


 88%|████████▊ | 9500/10845 [1:25:31<11:33,  1.94it/s, acc=0.705, epoch=11, loss=0.979]

epoch:11, idx:9499/10845, loss:0.9787480687404934, acc:0.7046315789473684


 89%|████████▊ | 9600/10845 [1:26:23<07:48,  2.66it/s, acc=0.705, epoch=11, loss=0.978]

epoch:11, idx:9599/10845, loss:0.9784483316230277, acc:0.704609375


 89%|████████▉ | 9700/10845 [1:27:18<08:09,  2.34it/s, acc=0.704, epoch=11, loss=0.979]

epoch:11, idx:9699/10845, loss:0.9788631407194531, acc:0.7040979381443299


 90%|█████████ | 9800/10845 [1:28:17<08:48,  1.98it/s, acc=0.704, epoch=11, loss=0.98] 

epoch:11, idx:9799/10845, loss:0.9803226407997462, acc:0.7040816326530612


 91%|█████████▏| 9900/10845 [1:29:08<08:17,  1.90it/s, acc=0.704, epoch=11, loss=0.98] 

epoch:11, idx:9899/10845, loss:0.9798644318363884, acc:0.7040909090909091


 92%|█████████▏| 10000/10845 [1:29:58<05:14,  2.69it/s, acc=0.704, epoch=11, loss=0.981]

epoch:11, idx:9999/10845, loss:0.9810070428013802, acc:0.703875


 93%|█████████▎| 10100/10845 [1:30:55<06:48,  1.82it/s, acc=0.704, epoch=11, loss=0.981]

epoch:11, idx:10099/10845, loss:0.9814358089052805, acc:0.7037128712871287


 94%|█████████▍| 10200/10845 [1:31:50<05:17,  2.03it/s, acc=0.703, epoch=11, loss=0.984]

epoch:11, idx:10199/10845, loss:0.9837391499269242, acc:0.7029901960784314


 95%|█████████▍| 10300/10845 [1:32:45<05:24,  1.68it/s, acc=0.703, epoch=11, loss=0.984]

epoch:11, idx:10299/10845, loss:0.9840962495242508, acc:0.7027669902912621


 96%|█████████▌| 10400/10845 [1:33:41<04:00,  1.85it/s, acc=0.703, epoch=11, loss=0.984]

epoch:11, idx:10399/10845, loss:0.9842707272590353, acc:0.7028125


 97%|█████████▋| 10500/10845 [1:34:35<02:42,  2.13it/s, acc=0.703, epoch=11, loss=0.986]

epoch:11, idx:10499/10845, loss:0.9855683117679187, acc:0.7027619047619048


 98%|█████████▊| 10600/10845 [1:35:28<02:10,  1.87it/s, acc=0.703, epoch=11, loss=0.986]

epoch:11, idx:10599/10845, loss:0.9863814019960052, acc:0.7025943396226415


 99%|█████████▊| 10700/10845 [1:36:21<01:21,  1.78it/s, acc=0.702, epoch=11, loss=0.988]

epoch:11, idx:10699/10845, loss:0.9877531011054449, acc:0.7023598130841121


100%|█████████▉| 10800/10845 [1:37:11<00:22,  2.02it/s, acc=0.703, epoch=11, loss=0.987]

epoch:11, idx:10799/10845, loss:0.9874167908314202, acc:0.7025231481481482


100%|██████████| 10845/10845 [1:37:34<00:00,  1.83it/s, acc=0.702, epoch=11, loss=0.987]


epoch:11, idx:0/1275, loss:0.7963125705718994, acc:0.5
epoch:11, idx:100/1275, loss:1.1809216962002291, acc:0.6460396039603961
epoch:11, idx:200/1275, loss:1.1249329708701936, acc:0.6467661691542289
epoch:11, idx:300/1275, loss:1.1219686691547153, acc:0.6495016611295681
epoch:11, idx:400/1275, loss:1.113835471675283, acc:0.6477556109725686
epoch:11, idx:500/1275, loss:1.1124813816028678, acc:0.6467065868263473
epoch:11, idx:600/1275, loss:1.1207117584104744, acc:0.6435108153078203
epoch:11, idx:700/1275, loss:1.128841345813577, acc:0.6433666191155493
epoch:11, idx:800/1275, loss:1.1385416452506658, acc:0.6426342072409488
epoch:11, idx:900/1275, loss:1.1263698211785294, acc:0.6456714761376249
epoch:11, idx:1000/1275, loss:1.1268038940834595, acc:0.6458541458541458
epoch:11, idx:1100/1275, loss:1.1221146624701117, acc:0.6478201634877384
epoch:11, idx:1200/1275, loss:1.121368276735428, acc:0.6469608659450458


  1%|          | 100/10845 [00:52<1:43:39,  1.73it/s, acc=0.73, epoch=12, loss=0.849]

epoch:12, idx:99/10845, loss:0.8487137681245804, acc:0.73


  2%|▏         | 200/10845 [01:46<1:59:16,  1.49it/s, acc=0.724, epoch=12, loss=0.931]

epoch:12, idx:199/10845, loss:0.9305340769886971, acc:0.72375


  3%|▎         | 300/10845 [02:39<1:31:22,  1.92it/s, acc=0.722, epoch=12, loss=0.932]

epoch:12, idx:299/10845, loss:0.9316637859741846, acc:0.7216666666666667


  4%|▎         | 400/10845 [03:32<1:21:42,  2.13it/s, acc=0.712, epoch=12, loss=0.955]

epoch:12, idx:399/10845, loss:0.9546591678261757, acc:0.711875


  5%|▍         | 500/10845 [04:25<1:17:36,  2.22it/s, acc=0.716, epoch=12, loss=0.942]

epoch:12, idx:499/10845, loss:0.9416561679840088, acc:0.716


  6%|▌         | 601/10845 [05:17<57:32,  2.97it/s, acc=0.714, epoch=12, loss=0.956]  

epoch:12, idx:599/10845, loss:0.9552573445439339, acc:0.7141666666666666


  6%|▋         | 700/10845 [06:06<1:31:36,  1.85it/s, acc=0.719, epoch=12, loss=0.934]

epoch:12, idx:699/10845, loss:0.9341831906352724, acc:0.7192857142857143


  7%|▋         | 800/10845 [06:58<1:10:35,  2.37it/s, acc=0.718, epoch=12, loss=0.931]

epoch:12, idx:799/10845, loss:0.9314980160444974, acc:0.7178125


  8%|▊         | 900/10845 [07:49<1:33:54,  1.76it/s, acc=0.716, epoch=12, loss=0.936]

epoch:12, idx:899/10845, loss:0.9356911294990116, acc:0.7155555555555555


  9%|▉         | 1000/10845 [08:42<1:13:59,  2.22it/s, acc=0.713, epoch=12, loss=0.944]

epoch:12, idx:999/10845, loss:0.9436741706728935, acc:0.713


 10%|█         | 1100/10845 [09:34<1:32:11,  1.76it/s, acc=0.708, epoch=12, loss=0.958]

epoch:12, idx:1099/10845, loss:0.9583089053088969, acc:0.7077272727272728


 11%|█         | 1200/10845 [10:32<1:16:41,  2.10it/s, acc=0.708, epoch=12, loss=0.955]

epoch:12, idx:1199/10845, loss:0.9548379836479822, acc:0.7077083333333334


 12%|█▏        | 1300/10845 [11:27<1:23:44,  1.90it/s, acc=0.71, epoch=12, loss=0.948] 

epoch:12, idx:1299/10845, loss:0.9478708406136586, acc:0.7098076923076924


 13%|█▎        | 1400/10845 [12:18<1:20:20,  1.96it/s, acc=0.712, epoch=12, loss=0.938]

epoch:12, idx:1399/10845, loss:0.9378931254148484, acc:0.7121428571428572


 14%|█▍        | 1500/10845 [13:08<1:35:18,  1.63it/s, acc=0.71, epoch=12, loss=0.949] 

epoch:12, idx:1499/10845, loss:0.9487498931487401, acc:0.7095


 15%|█▍        | 1600/10845 [13:59<1:36:25,  1.60it/s, acc=0.711, epoch=12, loss=0.945]

epoch:12, idx:1599/10845, loss:0.9446333461627364, acc:0.71140625


 16%|█▌        | 1700/10845 [14:54<1:42:18,  1.49it/s, acc=0.712, epoch=12, loss=0.944]

epoch:12, idx:1699/10845, loss:0.9441566987248028, acc:0.711764705882353


 17%|█▋        | 1800/10845 [15:49<1:16:21,  1.97it/s, acc=0.713, epoch=12, loss=0.945]

epoch:12, idx:1799/10845, loss:0.9452814726697074, acc:0.7129166666666666


 18%|█▊        | 1900/10845 [16:43<1:43:37,  1.44it/s, acc=0.715, epoch=12, loss=0.944]

epoch:12, idx:1899/10845, loss:0.9435567636238901, acc:0.7148684210526316


 18%|█▊        | 2000/10845 [17:34<1:11:22,  2.07it/s, acc=0.715, epoch=12, loss=0.937]

epoch:12, idx:1999/10845, loss:0.936893653690815, acc:0.715375


 19%|█▉        | 2100/10845 [18:27<1:26:16,  1.69it/s, acc=0.716, epoch=12, loss=0.938]

epoch:12, idx:2099/10845, loss:0.9384140142088845, acc:0.7157142857142857


 20%|██        | 2200/10845 [19:23<1:26:41,  1.66it/s, acc=0.715, epoch=12, loss=0.938]

epoch:12, idx:2199/10845, loss:0.9381174665147608, acc:0.715


 21%|██        | 2300/10845 [20:21<1:47:26,  1.33it/s, acc=0.716, epoch=12, loss=0.934]

epoch:12, idx:2299/10845, loss:0.9343018295453942, acc:0.716195652173913


 22%|██▏       | 2400/10845 [21:13<1:29:36,  1.57it/s, acc=0.717, epoch=12, loss=0.934]

epoch:12, idx:2399/10845, loss:0.9337797173857689, acc:0.7173958333333333


 23%|██▎       | 2500/10845 [22:07<1:32:34,  1.50it/s, acc=0.715, epoch=12, loss=0.94] 

epoch:12, idx:2499/10845, loss:0.939985162639618, acc:0.7149


 24%|██▍       | 2600/10845 [23:02<1:08:23,  2.01it/s, acc=0.715, epoch=12, loss=0.937]

epoch:12, idx:2599/10845, loss:0.9374596837392221, acc:0.7153846153846154


 25%|██▍       | 2700/10845 [24:02<1:49:58,  1.23it/s, acc=0.716, epoch=12, loss=0.939]

epoch:12, idx:2699/10845, loss:0.9391604977846145, acc:0.7155555555555555


 26%|██▌       | 2800/10845 [24:56<1:19:40,  1.68it/s, acc=0.717, epoch=12, loss=0.938]

epoch:12, idx:2799/10845, loss:0.9381337973262582, acc:0.7167857142857142


 27%|██▋       | 2900/10845 [25:49<1:06:19,  2.00it/s, acc=0.716, epoch=12, loss=0.94] 

epoch:12, idx:2899/10845, loss:0.9401645364021433, acc:0.7162068965517241


 28%|██▊       | 3000/10845 [26:46<1:39:41,  1.31it/s, acc=0.716, epoch=12, loss=0.939]

epoch:12, idx:2999/10845, loss:0.9389320834477742, acc:0.716


 29%|██▊       | 3100/10845 [27:40<1:21:35,  1.58it/s, acc=0.715, epoch=12, loss=0.941]

epoch:12, idx:3099/10845, loss:0.9406227988004684, acc:0.715


 30%|██▉       | 3200/10845 [28:32<58:43,  2.17it/s, acc=0.715, epoch=12, loss=0.938]  

epoch:12, idx:3199/10845, loss:0.9378793079033494, acc:0.715078125


 30%|███       | 3300/10845 [29:26<1:07:37,  1.86it/s, acc=0.715, epoch=12, loss=0.94] 

epoch:12, idx:3299/10845, loss:0.9395503077904384, acc:0.7148484848484848


 31%|███▏      | 3400/10845 [30:21<1:14:55,  1.66it/s, acc=0.716, epoch=12, loss=0.936]

epoch:12, idx:3399/10845, loss:0.9355037002528415, acc:0.7155882352941176


 32%|███▏      | 3500/10845 [31:18<1:11:43,  1.71it/s, acc=0.715, epoch=12, loss=0.938]

epoch:12, idx:3499/10845, loss:0.9375299906730652, acc:0.7149285714285715


 33%|███▎      | 3600/10845 [32:08<41:42,  2.89it/s, acc=0.713, epoch=12, loss=0.941]  

epoch:12, idx:3599/10845, loss:0.9413609760006268, acc:0.7130555555555556


 34%|███▍      | 3700/10845 [32:59<1:00:03,  1.98it/s, acc=0.713, epoch=12, loss=0.941]

epoch:12, idx:3699/10845, loss:0.9408531261940261, acc:0.7132432432432433


 35%|███▌      | 3800/10845 [33:54<55:46,  2.11it/s, acc=0.713, epoch=12, loss=0.941]  

epoch:12, idx:3799/10845, loss:0.9405565758127915, acc:0.7132236842105263


 36%|███▌      | 3900/10845 [34:49<1:06:38,  1.74it/s, acc=0.714, epoch=12, loss=0.94] 

epoch:12, idx:3899/10845, loss:0.939747796196204, acc:0.7137820512820513


 37%|███▋      | 4001/10845 [35:39<35:37,  3.20it/s, acc=0.714, epoch=12, loss=0.94]   

epoch:12, idx:3999/10845, loss:0.9404635978937149, acc:0.7138125


 38%|███▊      | 4100/10845 [36:34<1:07:21,  1.67it/s, acc=0.714, epoch=12, loss=0.944]

epoch:12, idx:4099/10845, loss:0.9437649738788605, acc:0.713719512195122


 39%|███▊      | 4200/10845 [37:28<1:07:09,  1.65it/s, acc=0.715, epoch=12, loss=0.943]

epoch:12, idx:4199/10845, loss:0.9425946173923356, acc:0.7147023809523809


 40%|███▉      | 4300/10845 [38:23<47:45,  2.28it/s, acc=0.714, epoch=12, loss=0.942]  

epoch:12, idx:4299/10845, loss:0.9419788693411406, acc:0.7144186046511628


 41%|████      | 4400/10845 [39:18<52:28,  2.05it/s, acc=0.714, epoch=12, loss=0.942]  

epoch:12, idx:4399/10845, loss:0.9415244070779194, acc:0.7144886363636364


 41%|████▏     | 4500/10845 [40:15<1:38:21,  1.08it/s, acc=0.714, epoch=12, loss=0.943]

epoch:12, idx:4499/10845, loss:0.9425338020722072, acc:0.7143333333333334


 42%|████▏     | 4600/10845 [41:06<1:24:05,  1.24it/s, acc=0.714, epoch=12, loss=0.945]

epoch:12, idx:4599/10845, loss:0.9447249234500139, acc:0.7143478260869566


 43%|████▎     | 4700/10845 [41:59<53:20,  1.92it/s, acc=0.714, epoch=12, loss=0.944]  

epoch:12, idx:4699/10845, loss:0.9438543995644184, acc:0.7144680851063829


 44%|████▍     | 4800/10845 [42:54<1:08:52,  1.46it/s, acc=0.715, epoch=12, loss=0.945]

epoch:12, idx:4799/10845, loss:0.944787927394112, acc:0.7146354166666666


 45%|████▌     | 4900/10845 [43:47<50:24,  1.97it/s, acc=0.715, epoch=12, loss=0.944]  

epoch:12, idx:4899/10845, loss:0.9443653322482596, acc:0.7148979591836735


 46%|████▌     | 5000/10845 [44:44<49:06,  1.98it/s, acc=0.715, epoch=12, loss=0.945]  

epoch:12, idx:4999/10845, loss:0.9445012729167939, acc:0.7152


 47%|████▋     | 5100/10845 [45:35<42:22,  2.26it/s, acc=0.715, epoch=12, loss=0.945]  

epoch:12, idx:5099/10845, loss:0.9448241039234049, acc:0.7150490196078432


 48%|████▊     | 5200/10845 [46:32<44:16,  2.12it/s, acc=0.715, epoch=12, loss=0.947]  

epoch:12, idx:5199/10845, loss:0.947094849050045, acc:0.7146634615384615


 49%|████▉     | 5300/10845 [47:28<1:05:00,  1.42it/s, acc=0.714, epoch=12, loss=0.949]

epoch:12, idx:5299/10845, loss:0.9486449723873498, acc:0.7136792452830188


 50%|████▉     | 5400/10845 [48:18<42:32,  2.13it/s, acc=0.714, epoch=12, loss=0.948]  

epoch:12, idx:5399/10845, loss:0.9481283054583602, acc:0.7136111111111111


 51%|█████     | 5500/10845 [49:11<55:40,  1.60it/s, acc=0.714, epoch=12, loss=0.948]  

epoch:12, idx:5499/10845, loss:0.9475243226343936, acc:0.7140909090909091


 52%|█████▏    | 5600/10845 [50:05<43:55,  1.99it/s, acc=0.714, epoch=12, loss=0.949]  

epoch:12, idx:5599/10845, loss:0.9493581201934389, acc:0.71375


 53%|█████▎    | 5700/10845 [50:58<49:50,  1.72it/s, acc=0.714, epoch=12, loss=0.949]  

epoch:12, idx:5699/10845, loss:0.9488784189652978, acc:0.7139035087719299


 53%|█████▎    | 5800/10845 [51:55<50:49,  1.65it/s, acc=0.713, epoch=12, loss=0.951]  

epoch:12, idx:5799/10845, loss:0.9507573524168853, acc:0.7131896551724138


 54%|█████▍    | 5900/10845 [52:44<32:02,  2.57it/s, acc=0.713, epoch=12, loss=0.95]   

epoch:12, idx:5899/10845, loss:0.9495640838903896, acc:0.7134322033898305


 55%|█████▌    | 6000/10845 [53:36<43:13,  1.87it/s, acc=0.714, epoch=12, loss=0.949]  

epoch:12, idx:5999/10845, loss:0.9493587204962969, acc:0.7136666666666667


 56%|█████▌    | 6100/10845 [54:32<37:24,  2.11it/s, acc=0.713, epoch=12, loss=0.949]  

epoch:12, idx:6099/10845, loss:0.9494861158044612, acc:0.7132786885245902


 57%|█████▋    | 6200/10845 [55:29<40:16,  1.92it/s, acc=0.713, epoch=12, loss=0.95] 

epoch:12, idx:6199/10845, loss:0.950386221879913, acc:0.7131854838709677


 58%|█████▊    | 6300/10845 [56:23<38:07,  1.99it/s, acc=0.713, epoch=12, loss=0.951]

epoch:12, idx:6299/10845, loss:0.9508364447052516, acc:0.7132539682539683


 59%|█████▉    | 6400/10845 [57:18<36:19,  2.04it/s, acc=0.714, epoch=12, loss=0.951]

epoch:12, idx:6399/10845, loss:0.9509272288903594, acc:0.713515625


 60%|█████▉    | 6500/10845 [58:10<35:33,  2.04it/s, acc=0.714, epoch=12, loss=0.951]

epoch:12, idx:6499/10845, loss:0.9505769164929023, acc:0.7136538461538462


 61%|██████    | 6600/10845 [59:04<33:25,  2.12it/s, acc=0.714, epoch=12, loss=0.951]

epoch:12, idx:6599/10845, loss:0.9509975377176747, acc:0.7135984848484849


 62%|██████▏   | 6700/10845 [59:58<31:17,  2.21it/s, acc=0.714, epoch=12, loss=0.95]   

epoch:12, idx:6699/10845, loss:0.9502654101777432, acc:0.7140298507462687


 63%|██████▎   | 6800/10845 [1:00:52<49:24,  1.36it/s, acc=0.714, epoch=12, loss=0.951]

epoch:12, idx:6799/10845, loss:0.9508372075943385, acc:0.7136764705882352


 64%|██████▎   | 6900/10845 [1:01:47<28:32,  2.30it/s, acc=0.714, epoch=12, loss=0.952]

epoch:12, idx:6899/10845, loss:0.9515764181855796, acc:0.7135869565217391


 65%|██████▍   | 7000/10845 [1:02:44<36:32,  1.75it/s, acc=0.713, epoch=12, loss=0.953]  

epoch:12, idx:6999/10845, loss:0.9532744738629886, acc:0.7134285714285714


 65%|██████▌   | 7100/10845 [1:03:38<31:01,  2.01it/s, acc=0.713, epoch=12, loss=0.954]

epoch:12, idx:7099/10845, loss:0.9543381712386306, acc:0.7129929577464789


 66%|██████▋   | 7200/10845 [1:04:28<30:22,  2.00it/s, acc=0.713, epoch=12, loss=0.956]

epoch:12, idx:7199/10845, loss:0.9555624283105135, acc:0.7129166666666666


 67%|██████▋   | 7300/10845 [1:05:20<26:44,  2.21it/s, acc=0.713, epoch=12, loss=0.955]

epoch:12, idx:7299/10845, loss:0.9550908437330429, acc:0.7133561643835616


 68%|██████▊   | 7400/10845 [1:06:11<27:22,  2.10it/s, acc=0.713, epoch=12, loss=0.957]

epoch:12, idx:7399/10845, loss:0.9566135290748364, acc:0.7129054054054054


 69%|██████▉   | 7500/10845 [1:07:03<26:00,  2.14it/s, acc=0.713, epoch=12, loss=0.957]

epoch:12, idx:7499/10845, loss:0.9571975748459498, acc:0.7128666666666666


 70%|███████   | 7600/10845 [1:08:05<32:36,  1.66it/s, acc=0.713, epoch=12, loss=0.958]

epoch:12, idx:7599/10845, loss:0.9580306076336849, acc:0.7126644736842105


 71%|███████   | 7700/10845 [1:09:02<26:59,  1.94it/s, acc=0.712, epoch=12, loss=0.959]

epoch:12, idx:7699/10845, loss:0.9590456913160039, acc:0.7124675324675325


 72%|███████▏  | 7800/10845 [1:09:57<35:05,  1.45it/s, acc=0.712, epoch=12, loss=0.96] 

epoch:12, idx:7799/10845, loss:0.9601620206809961, acc:0.7119871794871795


 73%|███████▎  | 7900/10845 [1:10:49<27:23,  1.79it/s, acc=0.712, epoch=12, loss=0.96] 

epoch:12, idx:7899/10845, loss:0.9599235836501363, acc:0.7117405063291139


 74%|███████▍  | 8000/10845 [1:11:46<23:32,  2.01it/s, acc=0.711, epoch=12, loss=0.961]

epoch:12, idx:7999/10845, loss:0.9613810573704541, acc:0.71109375


 75%|███████▍  | 8100/10845 [1:12:39<17:23,  2.63it/s, acc=0.711, epoch=12, loss=0.961]

epoch:12, idx:8099/10845, loss:0.9611148322327637, acc:0.7108950617283951


 76%|███████▌  | 8200/10845 [1:13:30<26:04,  1.69it/s, acc=0.711, epoch=12, loss=0.962]

epoch:12, idx:8199/10845, loss:0.9621860005324934, acc:0.7107926829268293


 77%|███████▋  | 8300/10845 [1:14:22<26:35,  1.59it/s, acc=0.711, epoch=12, loss=0.961]

epoch:12, idx:8299/10845, loss:0.960790013954582, acc:0.7109638554216867


 77%|███████▋  | 8400/10845 [1:15:15<26:15,  1.55it/s, acc=0.711, epoch=12, loss=0.962]

epoch:12, idx:8399/10845, loss:0.9616490434500433, acc:0.710654761904762


 78%|███████▊  | 8500/10845 [1:16:09<18:37,  2.10it/s, acc=0.71, epoch=12, loss=0.962] 

epoch:12, idx:8499/10845, loss:0.9620942864032352, acc:0.7102647058823529


 79%|███████▉  | 8600/10845 [1:17:01<26:06,  1.43it/s, acc=0.71, epoch=12, loss=0.964]

epoch:12, idx:8599/10845, loss:0.9636212606104307, acc:0.7100290697674418


 80%|████████  | 8700/10845 [1:17:52<24:50,  1.44it/s, acc=0.709, epoch=12, loss=0.965]

epoch:12, idx:8699/10845, loss:0.9647311674281098, acc:0.7094827586206897


 81%|████████  | 8800/10845 [1:18:46<15:02,  2.27it/s, acc=0.709, epoch=12, loss=0.965]

epoch:12, idx:8799/10845, loss:0.9653612273829905, acc:0.709403409090909


 82%|████████▏ | 8900/10845 [1:19:42<21:25,  1.51it/s, acc=0.709, epoch=12, loss=0.965]

epoch:12, idx:8899/10845, loss:0.9653632290410192, acc:0.7094101123595505


 83%|████████▎ | 9000/10845 [1:20:35<21:47,  1.41it/s, acc=0.71, epoch=12, loss=0.964] 

epoch:12, idx:8999/10845, loss:0.9640835293829441, acc:0.7095555555555556


 84%|████████▍ | 9100/10845 [1:21:30<14:05,  2.06it/s, acc=0.709, epoch=12, loss=0.965]

epoch:12, idx:9099/10845, loss:0.9649088200861281, acc:0.7094505494505494


 85%|████████▍ | 9200/10845 [1:22:19<14:15,  1.92it/s, acc=0.709, epoch=12, loss=0.964]

epoch:12, idx:9199/10845, loss:0.9644459785812575, acc:0.709320652173913


 86%|████████▌ | 9300/10845 [1:23:10<14:46,  1.74it/s, acc=0.709, epoch=12, loss=0.965]

epoch:12, idx:9299/10845, loss:0.9647323742060251, acc:0.709274193548387


 87%|████████▋ | 9401/10845 [1:24:04<09:48,  2.45it/s, acc=0.709, epoch=12, loss=0.965]

epoch:12, idx:9399/10845, loss:0.9649611402857811, acc:0.7092021276595745


 88%|████████▊ | 9500/10845 [1:25:01<12:17,  1.82it/s, acc=0.709, epoch=12, loss=0.966]

epoch:12, idx:9499/10845, loss:0.9658654010264497, acc:0.7087368421052631


 89%|████████▊ | 9600/10845 [1:25:56<12:10,  1.70it/s, acc=0.709, epoch=12, loss=0.967]

epoch:12, idx:9599/10845, loss:0.9665130301223447, acc:0.70859375


 89%|████████▉ | 9700/10845 [1:26:50<16:16,  1.17it/s, acc=0.708, epoch=12, loss=0.967]

epoch:12, idx:9699/10845, loss:0.9671823068654414, acc:0.7084278350515464


 90%|█████████ | 9800/10845 [1:27:44<09:49,  1.77it/s, acc=0.708, epoch=12, loss=0.968]

epoch:12, idx:9799/10845, loss:0.9676808650122614, acc:0.7083928571428572


 91%|█████████▏| 9900/10845 [1:28:37<07:20,  2.15it/s, acc=0.709, epoch=12, loss=0.967]

epoch:12, idx:9899/10845, loss:0.9669638053124601, acc:0.7086111111111111


 92%|█████████▏| 10000/10845 [1:29:25<07:24,  1.90it/s, acc=0.709, epoch=12, loss=0.967]

epoch:12, idx:9999/10845, loss:0.9666881717890501, acc:0.708725


 93%|█████████▎| 10100/10845 [1:30:18<06:56,  1.79it/s, acc=0.709, epoch=12, loss=0.968]

epoch:12, idx:10099/10845, loss:0.9680488281763426, acc:0.7085891089108911


 94%|█████████▍| 10200/10845 [1:31:14<05:32,  1.94it/s, acc=0.709, epoch=12, loss=0.968]

epoch:12, idx:10199/10845, loss:0.9675929966130677, acc:0.7087745098039215


 95%|█████████▍| 10300/10845 [1:32:07<05:51,  1.55it/s, acc=0.709, epoch=12, loss=0.967]

epoch:12, idx:10299/10845, loss:0.9674630153381709, acc:0.7085922330097087


 96%|█████████▌| 10400/10845 [1:33:00<02:48,  2.65it/s, acc=0.709, epoch=12, loss=0.968]

epoch:12, idx:10399/10845, loss:0.9677723809532248, acc:0.7086538461538462


 97%|█████████▋| 10500/10845 [1:33:53<03:12,  1.79it/s, acc=0.709, epoch=12, loss=0.968]

epoch:12, idx:10499/10845, loss:0.9675897697465761, acc:0.7088571428571429


 98%|█████████▊| 10600/10845 [1:34:51<02:08,  1.91it/s, acc=0.709, epoch=12, loss=0.967]

epoch:12, idx:10599/10845, loss:0.9672119221681694, acc:0.7089622641509434


 99%|█████████▊| 10700/10845 [1:35:47<01:37,  1.48it/s, acc=0.709, epoch=12, loss=0.969]

epoch:12, idx:10699/10845, loss:0.9685669021155232, acc:0.7088084112149533


100%|█████████▉| 10800/10845 [1:36:39<00:22,  1.96it/s, acc=0.709, epoch=12, loss=0.969]

epoch:12, idx:10799/10845, loss:0.9693066031320228, acc:0.7087268518518518


100%|██████████| 10845/10845 [1:37:05<00:00,  1.88it/s, acc=0.709, epoch=12, loss=0.97] 


epoch:12, idx:0/1275, loss:0.9348533153533936, acc:0.5
epoch:12, idx:100/1275, loss:1.1955064918735239, acc:0.6410891089108911
epoch:12, idx:200/1275, loss:1.1454155480683739, acc:0.6393034825870647
epoch:12, idx:300/1275, loss:1.1447976152366182, acc:0.6461794019933554
epoch:12, idx:400/1275, loss:1.1298619730216903, acc:0.6496259351620948
epoch:12, idx:500/1275, loss:1.1259453303799658, acc:0.6497005988023952
epoch:12, idx:600/1275, loss:1.1305128239752251, acc:0.6455906821963394
epoch:12, idx:700/1275, loss:1.1357680521745994, acc:0.6437232524964337
epoch:12, idx:800/1275, loss:1.1445966618933183, acc:0.6401373283395755
epoch:12, idx:900/1275, loss:1.1345108783866933, acc:0.644284128745838
epoch:12, idx:1000/1275, loss:1.138869340484078, acc:0.6443556443556444
epoch:12, idx:1100/1275, loss:1.1327437797965623, acc:0.6460036330608537
epoch:12, idx:1200/1275, loss:1.1320317159286646, acc:0.6457119067443797


  1%|          | 100/10845 [00:50<1:10:23,  2.54it/s, acc=0.71, epoch=13, loss=0.912]

epoch:13, idx:99/10845, loss:0.9117148405313492, acc:0.71


  2%|▏         | 200/10845 [01:44<1:45:03,  1.69it/s, acc=0.734, epoch=13, loss=0.878]

epoch:13, idx:199/10845, loss:0.8776490476727485, acc:0.73375


  3%|▎         | 300/10845 [02:36<1:46:21,  1.65it/s, acc=0.73, epoch=13, loss=0.869] 

epoch:13, idx:299/10845, loss:0.8694444608688354, acc:0.73


  4%|▎         | 400/10845 [03:31<1:46:36,  1.63it/s, acc=0.729, epoch=13, loss=0.875]

epoch:13, idx:399/10845, loss:0.8749966087937355, acc:0.72875


  5%|▍         | 500/10845 [04:26<1:19:54,  2.16it/s, acc=0.738, epoch=13, loss=0.852]

epoch:13, idx:499/10845, loss:0.8521244469881057, acc:0.7375


  6%|▌         | 600/10845 [05:23<1:42:05,  1.67it/s, acc=0.734, epoch=13, loss=0.901]

epoch:13, idx:599/10845, loss:0.9006445574760437, acc:0.7341666666666666


  6%|▋         | 700/10845 [06:15<1:19:57,  2.11it/s, acc=0.735, epoch=13, loss=0.892]

epoch:13, idx:699/10845, loss:0.8920624755961555, acc:0.7353571428571428


  7%|▋         | 800/10845 [07:16<1:38:45,  1.70it/s, acc=0.729, epoch=13, loss=0.907]

epoch:13, idx:799/10845, loss:0.907171530649066, acc:0.7290625


  8%|▊         | 900/10845 [08:06<1:40:39,  1.65it/s, acc=0.727, epoch=13, loss=0.908]

epoch:13, idx:899/10845, loss:0.9079814050594965, acc:0.7272222222222222


  9%|▉         | 1000/10845 [08:58<1:33:30,  1.75it/s, acc=0.726, epoch=13, loss=0.914]

epoch:13, idx:999/10845, loss:0.9142866490483283, acc:0.72575


 10%|█         | 1100/10845 [09:52<1:20:36,  2.01it/s, acc=0.723, epoch=13, loss=0.919]

epoch:13, idx:1099/10845, loss:0.9188053074208173, acc:0.7227272727272728


 11%|█         | 1200/10845 [10:51<2:03:50,  1.30it/s, acc=0.721, epoch=13, loss=0.919]

epoch:13, idx:1199/10845, loss:0.9185442297160625, acc:0.7210416666666667


 12%|█▏        | 1300/10845 [11:48<1:15:06,  2.12it/s, acc=0.724, epoch=13, loss=0.911]

epoch:13, idx:1299/10845, loss:0.9106728421266262, acc:0.7242307692307692


 13%|█▎        | 1400/10845 [12:43<1:52:37,  1.40it/s, acc=0.723, epoch=13, loss=0.913]

epoch:13, idx:1399/10845, loss:0.9125193655490875, acc:0.7232142857142857


 14%|█▍        | 1500/10845 [13:37<1:10:51,  2.20it/s, acc=0.723, epoch=13, loss=0.91] 

epoch:13, idx:1499/10845, loss:0.9101177023649216, acc:0.7225


 15%|█▍        | 1600/10845 [14:32<1:38:06,  1.57it/s, acc=0.722, epoch=13, loss=0.914]

epoch:13, idx:1599/10845, loss:0.9137038416601717, acc:0.72171875


 16%|█▌        | 1700/10845 [15:28<1:42:17,  1.49it/s, acc=0.722, epoch=13, loss=0.918]

epoch:13, idx:1699/10845, loss:0.918487138590392, acc:0.7216176470588235


 17%|█▋        | 1801/10845 [16:21<56:38,  2.66it/s, acc=0.721, epoch=13, loss=0.924]  

epoch:13, idx:1799/10845, loss:0.924596357130342, acc:0.7205555555555555


 18%|█▊        | 1900/10845 [17:13<1:21:41,  1.82it/s, acc=0.719, epoch=13, loss=0.927]

epoch:13, idx:1899/10845, loss:0.9266485196195151, acc:0.7186842105263158


 18%|█▊        | 2000/10845 [18:08<2:02:41,  1.20it/s, acc=0.719, epoch=13, loss=0.926]

epoch:13, idx:1999/10845, loss:0.9255196613520383, acc:0.71925


 19%|█▉        | 2100/10845 [19:00<1:49:40,  1.33it/s, acc=0.721, epoch=13, loss=0.92] 

epoch:13, idx:2099/10845, loss:0.9200646161039671, acc:0.7208333333333333


 20%|██        | 2200/10845 [19:56<1:42:08,  1.41it/s, acc=0.719, epoch=13, loss=0.926]

epoch:13, idx:2199/10845, loss:0.9256089255620132, acc:0.7190909090909091


 21%|██        | 2300/10845 [20:52<1:13:45,  1.93it/s, acc=0.72, epoch=13, loss=0.925] 

epoch:13, idx:2299/10845, loss:0.9251749711840049, acc:0.720108695652174


 22%|██▏       | 2400/10845 [21:44<1:26:02,  1.64it/s, acc=0.721, epoch=13, loss=0.924]

epoch:13, idx:2399/10845, loss:0.9237079394732912, acc:0.7207291666666666


 23%|██▎       | 2500/10845 [22:42<1:21:46,  1.70it/s, acc=0.721, epoch=13, loss=0.921]

epoch:13, idx:2499/10845, loss:0.9210703196883202, acc:0.7211


 24%|██▍       | 2600/10845 [23:33<1:01:33,  2.23it/s, acc=0.722, epoch=13, loss=0.919]

epoch:13, idx:2599/10845, loss:0.9188082322592919, acc:0.7221153846153846


 25%|██▍       | 2700/10845 [24:25<1:01:36,  2.20it/s, acc=0.721, epoch=13, loss=0.922]

epoch:13, idx:2699/10845, loss:0.9216755897248233, acc:0.7213888888888889


 26%|██▌       | 2800/10845 [25:15<51:59,  2.58it/s, acc=0.722, epoch=13, loss=0.922]  

epoch:13, idx:2799/10845, loss:0.9215165305137635, acc:0.7215178571428571


 27%|██▋       | 2900/10845 [26:11<1:01:16,  2.16it/s, acc=0.721, epoch=13, loss=0.921]

epoch:13, idx:2899/10845, loss:0.9205246945701796, acc:0.7213793103448276


 28%|██▊       | 3000/10845 [27:07<1:45:47,  1.24it/s, acc=0.722, epoch=13, loss=0.921]

epoch:13, idx:2999/10845, loss:0.9206982664664587, acc:0.7215


 29%|██▊       | 3100/10845 [28:04<1:08:47,  1.88it/s, acc=0.721, epoch=13, loss=0.924]

epoch:13, idx:3099/10845, loss:0.9238383879200105, acc:0.7207258064516129


 30%|██▉       | 3200/10845 [29:01<1:08:13,  1.87it/s, acc=0.72, epoch=13, loss=0.924] 

epoch:13, idx:3199/10845, loss:0.9238627171795816, acc:0.7203125


 30%|███       | 3300/10845 [29:55<1:03:31,  1.98it/s, acc=0.721, epoch=13, loss=0.924]

epoch:13, idx:3299/10845, loss:0.9236619787234248, acc:0.7206818181818182


 31%|███▏      | 3400/10845 [30:48<1:11:03,  1.75it/s, acc=0.719, epoch=13, loss=0.926]

epoch:13, idx:3399/10845, loss:0.9262516090010895, acc:0.7194852941176471


 32%|███▏      | 3500/10845 [31:40<46:47,  2.62it/s, acc=0.721, epoch=13, loss=0.923]  

epoch:13, idx:3499/10845, loss:0.9228424356579781, acc:0.7207142857142858


 33%|███▎      | 3600/10845 [32:36<1:07:37,  1.79it/s, acc=0.72, epoch=13, loss=0.925] 

epoch:13, idx:3599/10845, loss:0.9245689654764202, acc:0.7200694444444444


 34%|███▍      | 3700/10845 [33:28<1:10:07,  1.70it/s, acc=0.719, epoch=13, loss=0.926]

epoch:13, idx:3699/10845, loss:0.9260070913305154, acc:0.7193243243243244


 35%|███▌      | 3800/10845 [34:18<49:44,  2.36it/s, acc=0.718, epoch=13, loss=0.928]  

epoch:13, idx:3799/10845, loss:0.9275886936642622, acc:0.7183552631578948


 36%|███▌      | 3900/10845 [35:12<48:23,  2.39it/s, acc=0.717, epoch=13, loss=0.93]   

epoch:13, idx:3899/10845, loss:0.929913315154039, acc:0.7169871794871795


 37%|███▋      | 4000/10845 [36:04<46:00,  2.48it/s, acc=0.717, epoch=13, loss=0.931]  

epoch:13, idx:3999/10845, loss:0.9309963706806302, acc:0.7169375


 38%|███▊      | 4100/10845 [36:57<46:29,  2.42it/s, acc=0.718, epoch=13, loss=0.931]  

epoch:13, idx:4099/10845, loss:0.9308931656218157, acc:0.7176219512195122


 39%|███▊      | 4200/10845 [37:52<53:38,  2.06it/s, acc=0.718, epoch=13, loss=0.929]  

epoch:13, idx:4199/10845, loss:0.9294170502892563, acc:0.7176190476190476


 40%|███▉      | 4300/10845 [38:49<1:06:15,  1.65it/s, acc=0.718, epoch=13, loss=0.931]

epoch:13, idx:4299/10845, loss:0.9313624838826268, acc:0.7177325581395348


 41%|████      | 4400/10845 [39:48<1:10:58,  1.51it/s, acc=0.718, epoch=13, loss=0.931]

epoch:13, idx:4399/10845, loss:0.9305573312599551, acc:0.7183522727272728


 41%|████▏     | 4500/10845 [40:43<1:09:17,  1.53it/s, acc=0.718, epoch=13, loss=0.933]

epoch:13, idx:4499/10845, loss:0.9331580261720551, acc:0.7179444444444445


 42%|████▏     | 4600/10845 [41:39<51:01,  2.04it/s, acc=0.718, epoch=13, loss=0.933]  

epoch:13, idx:4599/10845, loss:0.9333133609916853, acc:0.7176630434782608


 43%|████▎     | 4700/10845 [42:31<54:36,  1.88it/s, acc=0.717, epoch=13, loss=0.935]  

epoch:13, idx:4699/10845, loss:0.935349983243232, acc:0.7174468085106382


 44%|████▍     | 4800/10845 [43:24<52:49,  1.91it/s, acc=0.718, epoch=13, loss=0.934]  

epoch:13, idx:4799/10845, loss:0.9338314150522152, acc:0.7182291666666667


 45%|████▌     | 4900/10845 [44:15<45:09,  2.19it/s, acc=0.718, epoch=13, loss=0.934]  

epoch:13, idx:4899/10845, loss:0.9339677922822991, acc:0.7179591836734693


 46%|████▌     | 5000/10845 [45:04<51:19,  1.90it/s, acc=0.718, epoch=13, loss=0.935]  

epoch:13, idx:4999/10845, loss:0.9346438911199569, acc:0.7178


 47%|████▋     | 5100/10845 [45:56<34:32,  2.77it/s, acc=0.717, epoch=13, loss=0.937]  

epoch:13, idx:5099/10845, loss:0.9366101208037021, acc:0.717107843137255


 48%|████▊     | 5200/10845 [46:48<45:22,  2.07it/s, acc=0.717, epoch=13, loss=0.938]  

epoch:13, idx:5199/10845, loss:0.9378846586094453, acc:0.7170673076923076


 49%|████▉     | 5300/10845 [47:40<41:55,  2.20it/s, acc=0.717, epoch=13, loss=0.939]  

epoch:13, idx:5299/10845, loss:0.9385697298229865, acc:0.7170283018867925


 50%|████▉     | 5400/10845 [48:34<49:26,  1.84it/s, acc=0.718, epoch=13, loss=0.937]  

epoch:13, idx:5399/10845, loss:0.9370540007838496, acc:0.7178240740740741


 51%|█████     | 5500/10845 [49:25<45:36,  1.95it/s, acc=0.717, epoch=13, loss=0.94]   

epoch:13, idx:5499/10845, loss:0.9395905403332276, acc:0.7174545454545455


 52%|█████▏    | 5600/10845 [50:19<37:06,  2.36it/s, acc=0.718, epoch=13, loss=0.938]  

epoch:13, idx:5599/10845, loss:0.9377852460422686, acc:0.7175892857142857


 53%|█████▎    | 5700/10845 [51:13<47:26,  1.81it/s, acc=0.717, epoch=13, loss=0.937]  

epoch:13, idx:5699/10845, loss:0.9373947845216383, acc:0.7170614035087719


 53%|█████▎    | 5800/10845 [52:10<43:17,  1.94it/s, acc=0.717, epoch=13, loss=0.938]  

epoch:13, idx:5799/10845, loss:0.9384973400625689, acc:0.7172844827586207


 54%|█████▍    | 5900/10845 [53:01<48:33,  1.70it/s, acc=0.717, epoch=13, loss=0.94]   

epoch:13, idx:5899/10845, loss:0.9399774780111798, acc:0.7172033898305085


 55%|█████▌    | 6000/10845 [53:56<30:44,  2.63it/s, acc=0.717, epoch=13, loss=0.941]  

epoch:13, idx:5999/10845, loss:0.9409146481752395, acc:0.7169166666666666


 56%|█████▌    | 6100/10845 [54:49<32:00,  2.47it/s, acc=0.717, epoch=13, loss=0.942]  

epoch:13, idx:6099/10845, loss:0.9422071242234746, acc:0.7167622950819672


 57%|█████▋    | 6200/10845 [55:43<40:43,  1.90it/s, acc=0.717, epoch=13, loss=0.942]  

epoch:13, idx:6199/10845, loss:0.9417337123520912, acc:0.7170564516129032


 58%|█████▊    | 6300/10845 [56:35<51:30,  1.47it/s, acc=0.718, epoch=13, loss=0.942]

epoch:13, idx:6299/10845, loss:0.9416194128611731, acc:0.7175396825396826


 59%|█████▉    | 6400/10845 [57:33<32:43,  2.26it/s, acc=0.718, epoch=13, loss=0.941]  

epoch:13, idx:6399/10845, loss:0.9413821872510016, acc:0.717734375


 60%|█████▉    | 6500/10845 [58:28<56:00,  1.29it/s, acc=0.718, epoch=13, loss=0.942]

epoch:13, idx:6499/10845, loss:0.9416643547461583, acc:0.7176538461538462


 61%|██████    | 6600/10845 [59:25<47:02,  1.50it/s, acc=0.718, epoch=13, loss=0.942]  

epoch:13, idx:6599/10845, loss:0.9418735474257758, acc:0.7177651515151515


 62%|██████▏   | 6700/10845 [1:00:20<30:38,  2.25it/s, acc=0.718, epoch=13, loss=0.941]

epoch:13, idx:6699/10845, loss:0.9405057071305033, acc:0.7177985074626866


 63%|██████▎   | 6800/10845 [1:01:09<43:00,  1.57it/s, acc=0.718, epoch=13, loss=0.94] 

epoch:13, idx:6799/10845, loss:0.9397368904247003, acc:0.7180514705882353


 64%|██████▎   | 6900/10845 [1:02:03<38:23,  1.71it/s, acc=0.718, epoch=13, loss=0.94] 

epoch:13, idx:6899/10845, loss:0.9404836775513663, acc:0.7176449275362319


 65%|██████▍   | 7000/10845 [1:02:54<29:17,  2.19it/s, acc=0.718, epoch=13, loss=0.94] 

epoch:13, idx:6999/10845, loss:0.9398602189251355, acc:0.7177142857142857


 65%|██████▌   | 7100/10845 [1:03:48<31:59,  1.95it/s, acc=0.718, epoch=13, loss=0.939]

epoch:13, idx:7099/10845, loss:0.9391711565138589, acc:0.7181338028169014


 66%|██████▋   | 7200/10845 [1:04:39<33:12,  1.83it/s, acc=0.718, epoch=13, loss=0.94] 

epoch:13, idx:7199/10845, loss:0.9400760082734956, acc:0.7179861111111111


 67%|██████▋   | 7300/10845 [1:05:31<40:34,  1.46it/s, acc=0.719, epoch=13, loss=0.938]

epoch:13, idx:7299/10845, loss:0.9379240410213601, acc:0.7188356164383561


 68%|██████▊   | 7400/10845 [1:06:28<35:01,  1.64it/s, acc=0.718, epoch=13, loss=0.94] 

epoch:13, idx:7399/10845, loss:0.9402543828455178, acc:0.7182432432432433


 69%|██████▉   | 7500/10845 [1:07:24<42:01,  1.33it/s, acc=0.718, epoch=13, loss=0.942]

epoch:13, idx:7499/10845, loss:0.9415375482877095, acc:0.7179333333333333


 70%|███████   | 7600/10845 [1:08:23<23:23,  2.31it/s, acc=0.718, epoch=13, loss=0.943]

epoch:13, idx:7599/10845, loss:0.9432717953543914, acc:0.7175657894736842


 71%|███████   | 7700/10845 [1:09:17<22:17,  2.35it/s, acc=0.718, epoch=13, loss=0.944]

epoch:13, idx:7699/10845, loss:0.9439945229694441, acc:0.7175974025974026


 72%|███████▏  | 7800/10845 [1:10:10<30:32,  1.66it/s, acc=0.717, epoch=13, loss=0.946]

epoch:13, idx:7799/10845, loss:0.9459666033433034, acc:0.7168910256410257


 73%|███████▎  | 7900/10845 [1:11:08<24:39,  1.99it/s, acc=0.716, epoch=13, loss=0.948]

epoch:13, idx:7899/10845, loss:0.9477348441564584, acc:0.7163924050632912


 74%|███████▍  | 8000/10845 [1:12:00<24:50,  1.91it/s, acc=0.716, epoch=13, loss=0.949]

epoch:13, idx:7999/10845, loss:0.9486308471634984, acc:0.71625


 75%|███████▍  | 8100/10845 [1:12:56<17:50,  2.56it/s, acc=0.717, epoch=13, loss=0.947]

epoch:13, idx:8099/10845, loss:0.94723037586168, acc:0.7165432098765432


 76%|███████▌  | 8200/10845 [1:13:48<26:38,  1.66it/s, acc=0.716, epoch=13, loss=0.948]

epoch:13, idx:8199/10845, loss:0.9480056291709585, acc:0.7163719512195122


 77%|███████▋  | 8300/10845 [1:14:42<22:05,  1.92it/s, acc=0.716, epoch=13, loss=0.949]

epoch:13, idx:8299/10845, loss:0.9493020245432854, acc:0.7155722891566265


 77%|███████▋  | 8400/10845 [1:15:38<23:44,  1.72it/s, acc=0.715, epoch=13, loss=0.949]

epoch:13, idx:8399/10845, loss:0.9492380679008506, acc:0.7153869047619048


 78%|███████▊  | 8500/10845 [1:16:30<20:40,  1.89it/s, acc=0.715, epoch=13, loss=0.95] 

epoch:13, idx:8499/10845, loss:0.9495803327069563, acc:0.7150294117647059


 79%|███████▉  | 8600/10845 [1:17:22<20:20,  1.84it/s, acc=0.715, epoch=13, loss=0.949]

epoch:13, idx:8599/10845, loss:0.9490184528051421, acc:0.7149127906976744


 80%|████████  | 8700/10845 [1:18:22<26:23,  1.35it/s, acc=0.715, epoch=13, loss=0.949]

epoch:13, idx:8699/10845, loss:0.9494135856285862, acc:0.7147126436781609


 81%|████████  | 8800/10845 [1:19:11<15:42,  2.17it/s, acc=0.715, epoch=13, loss=0.949]

epoch:13, idx:8799/10845, loss:0.9490872896124016, acc:0.7148579545454545


 82%|████████▏ | 8900/10845 [1:20:05<18:15,  1.78it/s, acc=0.715, epoch=13, loss=0.95] 

epoch:13, idx:8899/10845, loss:0.9499439350339803, acc:0.7145786516853933


 83%|████████▎ | 9000/10845 [1:20:56<16:13,  1.90it/s, acc=0.714, epoch=13, loss=0.949]

epoch:13, idx:8999/10845, loss:0.9488412376509773, acc:0.7144722222222222


 84%|████████▍ | 9100/10845 [1:21:50<13:55,  2.09it/s, acc=0.714, epoch=13, loss=0.95] 

epoch:13, idx:9099/10845, loss:0.9498158900947361, acc:0.7143131868131868


 85%|████████▍ | 9200/10845 [1:22:43<13:50,  1.98it/s, acc=0.714, epoch=13, loss=0.949]

epoch:13, idx:9199/10845, loss:0.9492521909073643, acc:0.7144293478260869


 86%|████████▌ | 9300/10845 [1:23:37<15:49,  1.63it/s, acc=0.714, epoch=13, loss=0.951]

epoch:13, idx:9299/10845, loss:0.951165717667149, acc:0.7138709677419355


 87%|████████▋ | 9400/10845 [1:24:27<13:43,  1.76it/s, acc=0.714, epoch=13, loss=0.952]

epoch:13, idx:9399/10845, loss:0.9515310152287179, acc:0.7137765957446809


 88%|████████▊ | 9500/10845 [1:25:20<12:16,  1.83it/s, acc=0.714, epoch=13, loss=0.952]

epoch:13, idx:9499/10845, loss:0.9515008739421242, acc:0.7137631578947369


 89%|████████▊ | 9600/10845 [1:26:16<09:40,  2.14it/s, acc=0.713, epoch=13, loss=0.954]

epoch:13, idx:9599/10845, loss:0.9538018948957324, acc:0.7131510416666667


 89%|████████▉ | 9700/10845 [1:27:17<13:40,  1.40it/s, acc=0.713, epoch=13, loss=0.954]

epoch:13, idx:9699/10845, loss:0.95402183591705, acc:0.7129639175257731


 90%|█████████ | 9800/10845 [1:28:12<09:49,  1.77it/s, acc=0.713, epoch=13, loss=0.954]

epoch:13, idx:9799/10845, loss:0.9540143420562452, acc:0.7134183673469388


 91%|█████████▏| 9900/10845 [1:29:06<07:44,  2.04it/s, acc=0.714, epoch=13, loss=0.954]

epoch:13, idx:9899/10845, loss:0.9543257799774709, acc:0.713510101010101


 92%|█████████▏| 10000/10845 [1:30:03<09:46,  1.44it/s, acc=0.713, epoch=13, loss=0.955]

epoch:13, idx:9999/10845, loss:0.9552134363353253, acc:0.713375


 93%|█████████▎| 10100/10845 [1:31:00<08:13,  1.51it/s, acc=0.713, epoch=13, loss=0.956]

epoch:13, idx:10099/10845, loss:0.9558871468754098, acc:0.7131683168316831


 94%|█████████▍| 10200/10845 [1:31:53<05:32,  1.94it/s, acc=0.713, epoch=13, loss=0.955]

epoch:13, idx:10199/10845, loss:0.9551795615226615, acc:0.7133823529411765


 95%|█████████▍| 10300/10845 [1:32:50<05:50,  1.55it/s, acc=0.713, epoch=13, loss=0.957]

epoch:13, idx:10299/10845, loss:0.9570567570670138, acc:0.7128398058252428


 96%|█████████▌| 10400/10845 [1:33:47<05:38,  1.32it/s, acc=0.713, epoch=13, loss=0.958]

epoch:13, idx:10399/10845, loss:0.9575250879102029, acc:0.7126442307692308


 97%|█████████▋| 10500/10845 [1:34:38<03:08,  1.83it/s, acc=0.713, epoch=13, loss=0.956]

epoch:13, idx:10499/10845, loss:0.956497212211291, acc:0.7127380952380953


 98%|█████████▊| 10600/10845 [1:35:30<02:06,  1.94it/s, acc=0.713, epoch=13, loss=0.956]

epoch:13, idx:10599/10845, loss:0.9561955815328742, acc:0.7129245283018868


 99%|█████████▊| 10700/10845 [1:36:25<01:00,  2.38it/s, acc=0.713, epoch=13, loss=0.957]

epoch:13, idx:10699/10845, loss:0.9569262784997994, acc:0.7128504672897197


100%|█████████▉| 10800/10845 [1:37:19<00:20,  2.21it/s, acc=0.713, epoch=13, loss=0.957]

epoch:13, idx:10799/10845, loss:0.956621366209454, acc:0.7128009259259259


100%|██████████| 10845/10845 [1:37:42<00:00,  1.91it/s, acc=0.713, epoch=13, loss=0.957]


epoch:13, idx:0/1275, loss:0.8386824131011963, acc:0.5
epoch:13, idx:100/1275, loss:1.2193381567992787, acc:0.6410891089108911
epoch:13, idx:200/1275, loss:1.1546456422378768, acc:0.6480099502487562
epoch:13, idx:300/1275, loss:1.1595347305864987, acc:0.6519933554817275
epoch:13, idx:400/1275, loss:1.1418448524582119, acc:0.6602244389027432
epoch:13, idx:500/1275, loss:1.1361994677912928, acc:0.658183632734531
epoch:13, idx:600/1275, loss:1.1432560349066128, acc:0.6543261231281198
epoch:13, idx:700/1275, loss:1.1498388468624012, acc:0.6554921540656206
epoch:13, idx:800/1275, loss:1.1543414650784896, acc:0.6523096129837703
epoch:13, idx:900/1275, loss:1.1421079713681694, acc:0.6556603773584906
epoch:13, idx:1000/1275, loss:1.1398557774670475, acc:0.6553446553446554
epoch:13, idx:1100/1275, loss:1.133581780553622, acc:0.6546321525885559
epoch:13, idx:1200/1275, loss:1.1343861001417301, acc:0.6552872606161532


  1%|          | 100/10845 [00:51<1:07:43,  2.64it/s, acc=0.735, epoch=14, loss=0.869]

epoch:14, idx:99/10845, loss:0.8692601376771927, acc:0.735


  2%|▏         | 200/10845 [01:40<1:32:11,  1.92it/s, acc=0.724, epoch=14, loss=0.875]

epoch:14, idx:199/10845, loss:0.8753972336649894, acc:0.72375


  3%|▎         | 300/10845 [02:21<1:08:15,  2.57it/s, acc=0.718, epoch=14, loss=0.877]

epoch:14, idx:299/10845, loss:0.877395231127739, acc:0.7183333333333334


  4%|▎         | 400/10845 [02:58<1:52:20,  1.55it/s, acc=0.718, epoch=14, loss=0.882]

epoch:14, idx:399/10845, loss:0.8815298104286193, acc:0.718125


  5%|▍         | 501/10845 [03:38<59:39,  2.89it/s, acc=0.726, epoch=14, loss=0.874]  

epoch:14, idx:499/10845, loss:0.8759095578193664, acc:0.725


  6%|▌         | 600/10845 [04:22<1:05:36,  2.60it/s, acc=0.727, epoch=14, loss=0.871]

epoch:14, idx:599/10845, loss:0.871129629512628, acc:0.7270833333333333


  6%|▋         | 700/10845 [05:04<1:14:55,  2.26it/s, acc=0.727, epoch=14, loss=0.866]

epoch:14, idx:699/10845, loss:0.8663369850601469, acc:0.7271428571428571


  7%|▋         | 800/10845 [05:43<54:38,  3.06it/s, acc=0.73, epoch=14, loss=0.871]   

epoch:14, idx:799/10845, loss:0.8705561935156584, acc:0.7303125


  8%|▊         | 900/10845 [06:24<1:42:04,  1.62it/s, acc=0.73, epoch=14, loss=0.875] 

epoch:14, idx:899/10845, loss:0.8746395199166404, acc:0.73


  9%|▉         | 1000/10845 [07:04<1:08:35,  2.39it/s, acc=0.727, epoch=14, loss=0.893]

epoch:14, idx:999/10845, loss:0.892883575618267, acc:0.72725


 10%|█         | 1100/10845 [07:40<52:24,  3.10it/s, acc=0.73, epoch=14, loss=0.889]   

epoch:14, idx:1099/10845, loss:0.8891054372895848, acc:0.73


 11%|█         | 1200/10845 [08:21<58:36,  2.74it/s, acc=0.731, epoch=14, loss=0.881]  

epoch:14, idx:1199/10845, loss:0.8814657091597716, acc:0.73125


 12%|█▏        | 1300/10845 [08:59<1:18:42,  2.02it/s, acc=0.728, epoch=14, loss=0.893]

epoch:14, idx:1299/10845, loss:0.8932725949012316, acc:0.7284615384615385


 13%|█▎        | 1400/10845 [09:52<1:29:03,  1.77it/s, acc=0.728, epoch=14, loss=0.903]

epoch:14, idx:1399/10845, loss:0.9025082925387792, acc:0.7278571428571429


 14%|█▍        | 1500/10845 [10:43<1:20:12,  1.94it/s, acc=0.729, epoch=14, loss=0.905]

epoch:14, idx:1499/10845, loss:0.9051998194853464, acc:0.7285


 15%|█▍        | 1600/10845 [11:36<1:05:10,  2.36it/s, acc=0.729, epoch=14, loss=0.905]

epoch:14, idx:1599/10845, loss:0.9045646788924933, acc:0.72921875


 16%|█▌        | 1700/10845 [12:29<1:07:15,  2.27it/s, acc=0.729, epoch=14, loss=0.906]

epoch:14, idx:1699/10845, loss:0.9057653726199094, acc:0.7291176470588235


 17%|█▋        | 1800/10845 [13:23<1:40:09,  1.51it/s, acc=0.73, epoch=14, loss=0.902] 

epoch:14, idx:1799/10845, loss:0.9023844488130676, acc:0.7295833333333334


 18%|█▊        | 1900/10845 [14:16<1:18:16,  1.90it/s, acc=0.73, epoch=14, loss=0.903] 

epoch:14, idx:1899/10845, loss:0.9027223657934289, acc:0.7302631578947368


 18%|█▊        | 2000/10845 [15:10<1:32:39,  1.59it/s, acc=0.73, epoch=14, loss=0.908] 

epoch:14, idx:1999/10845, loss:0.9075302381217479, acc:0.729625


 19%|█▉        | 2100/10845 [16:07<1:03:15,  2.30it/s, acc=0.727, epoch=14, loss=0.913]

epoch:14, idx:2099/10845, loss:0.9133261452402387, acc:0.7271428571428571


 20%|██        | 2200/10845 [16:58<1:17:25,  1.86it/s, acc=0.728, epoch=14, loss=0.912]

epoch:14, idx:2199/10845, loss:0.911815353171392, acc:0.7282954545454545


 21%|██        | 2300/10845 [17:53<1:19:15,  1.80it/s, acc=0.728, epoch=14, loss=0.912]

epoch:14, idx:2299/10845, loss:0.911658748453078, acc:0.7276086956521739


 22%|██▏       | 2400/10845 [18:49<1:11:50,  1.96it/s, acc=0.728, epoch=14, loss=0.905]

epoch:14, idx:2399/10845, loss:0.9054173050448299, acc:0.7279166666666667


 23%|██▎       | 2500/10845 [19:45<1:15:35,  1.84it/s, acc=0.727, epoch=14, loss=0.91] 

epoch:14, idx:2499/10845, loss:0.9102696624159813, acc:0.7273


 24%|██▍       | 2600/10845 [20:35<1:26:14,  1.59it/s, acc=0.729, epoch=14, loss=0.907]

epoch:14, idx:2599/10845, loss:0.9065918432405362, acc:0.7290384615384615


 25%|██▍       | 2700/10845 [21:27<50:50,  2.67it/s, acc=0.729, epoch=14, loss=0.905]  

epoch:14, idx:2699/10845, loss:0.9054504786486979, acc:0.7289814814814815


 26%|██▌       | 2800/10845 [22:21<58:53,  2.28it/s, acc=0.728, epoch=14, loss=0.907]  

epoch:14, idx:2799/10845, loss:0.9072332563251257, acc:0.7277678571428572


 27%|██▋       | 2900/10845 [23:15<1:34:57,  1.39it/s, acc=0.728, epoch=14, loss=0.908]

epoch:14, idx:2899/10845, loss:0.9075845487672707, acc:0.728448275862069


 28%|██▊       | 3000/10845 [24:09<1:01:16,  2.13it/s, acc=0.728, epoch=14, loss=0.913]

epoch:14, idx:2999/10845, loss:0.912645790497462, acc:0.72775


 29%|██▊       | 3100/10845 [25:06<1:12:00,  1.79it/s, acc=0.728, epoch=14, loss=0.913]

epoch:14, idx:3099/10845, loss:0.9130123882332156, acc:0.727983870967742


 30%|██▉       | 3200/10845 [25:55<1:04:16,  1.98it/s, acc=0.729, epoch=14, loss=0.909]

epoch:14, idx:3199/10845, loss:0.909366089142859, acc:0.72921875


 30%|███       | 3300/10845 [26:48<1:02:27,  2.01it/s, acc=0.729, epoch=14, loss=0.91] 

epoch:14, idx:3299/10845, loss:0.9099469653765361, acc:0.7292424242424242


 31%|███▏      | 3400/10845 [27:44<1:32:55,  1.34it/s, acc=0.728, epoch=14, loss=0.915]

epoch:14, idx:3399/10845, loss:0.9148500796451288, acc:0.7279411764705882


 32%|███▏      | 3500/10845 [28:37<1:07:53,  1.80it/s, acc=0.729, epoch=14, loss=0.913]

epoch:14, idx:3499/10845, loss:0.9131866409948894, acc:0.7287857142857143


 33%|███▎      | 3600/10845 [29:32<1:19:03,  1.53it/s, acc=0.729, epoch=14, loss=0.912]

epoch:14, idx:3599/10845, loss:0.9118147983484798, acc:0.7291666666666666


 34%|███▍      | 3700/10845 [30:27<1:13:11,  1.63it/s, acc=0.728, epoch=14, loss=0.914]

epoch:14, idx:3699/10845, loss:0.913805650827047, acc:0.7282432432432432


 35%|███▌      | 3800/10845 [31:16<45:45,  2.57it/s, acc=0.727, epoch=14, loss=0.915]  

epoch:14, idx:3799/10845, loss:0.9145484473203358, acc:0.7274342105263157


 36%|███▌      | 3900/10845 [32:10<1:11:55,  1.61it/s, acc=0.727, epoch=14, loss=0.918]

epoch:14, idx:3899/10845, loss:0.9175964603057274, acc:0.7266666666666667


 37%|███▋      | 4000/10845 [32:54<40:26,  2.82it/s, acc=0.727, epoch=14, loss=0.916]  

epoch:14, idx:3999/10845, loss:0.9164917047470807, acc:0.7268125


 38%|███▊      | 4101/10845 [33:34<32:44,  3.43it/s, acc=0.727, epoch=14, loss=0.915]  

epoch:14, idx:4099/10845, loss:0.9152757171159838, acc:0.7268902439024391


 39%|███▊      | 4200/10845 [34:13<35:32,  3.12it/s, acc=0.727, epoch=14, loss=0.914]  

epoch:14, idx:4199/10845, loss:0.9144269124099187, acc:0.727202380952381


 40%|███▉      | 4300/10845 [34:53<52:25,  2.08it/s, acc=0.727, epoch=14, loss=0.918]

epoch:14, idx:4299/10845, loss:0.9179314288011817, acc:0.7269186046511628


 41%|████      | 4400/10845 [35:32<51:20,  2.09it/s, acc=0.727, epoch=14, loss=0.92]   

epoch:14, idx:4399/10845, loss:0.9197624097899957, acc:0.7265340909090909


 41%|████▏     | 4500/10845 [36:12<54:58,  1.92it/s, acc=0.726, epoch=14, loss=0.919]  

epoch:14, idx:4499/10845, loss:0.9193977494902081, acc:0.7264444444444444


 42%|████▏     | 4601/10845 [36:54<31:31,  3.30it/s, acc=0.727, epoch=14, loss=0.919]  

epoch:14, idx:4599/10845, loss:0.918760248966839, acc:0.7267934782608696


 43%|████▎     | 4700/10845 [37:31<27:35,  3.71it/s, acc=0.727, epoch=14, loss=0.918]

epoch:14, idx:4699/10845, loss:0.9177563382843708, acc:0.7268617021276595


 44%|████▍     | 4800/10845 [38:11<44:17,  2.27it/s, acc=0.727, epoch=14, loss=0.917]

epoch:14, idx:4799/10845, loss:0.9165930883089701, acc:0.72703125


 45%|████▌     | 4900/10845 [38:52<44:48,  2.21it/s, acc=0.726, epoch=14, loss=0.917]  

epoch:14, idx:4899/10845, loss:0.9169647760050638, acc:0.7264285714285714


 46%|████▌     | 5000/10845 [39:32<34:15,  2.84it/s, acc=0.726, epoch=14, loss=0.92]   

epoch:14, idx:4999/10845, loss:0.9195728238344193, acc:0.72585


 47%|████▋     | 5100/10845 [40:12<37:00,  2.59it/s, acc=0.726, epoch=14, loss=0.918]

epoch:14, idx:5099/10845, loss:0.9180458284125609, acc:0.7261274509803921


 48%|████▊     | 5201/10845 [40:54<27:49,  3.38it/s, acc=0.725, epoch=14, loss=0.921]

epoch:14, idx:5199/10845, loss:0.9207617091100949, acc:0.7253846153846154


 49%|████▉     | 5300/10845 [41:37<1:00:49,  1.52it/s, acc=0.726, epoch=14, loss=0.92]

epoch:14, idx:5299/10845, loss:0.9195729882199809, acc:0.7258018867924528


 50%|████▉     | 5400/10845 [42:32<43:46,  2.07it/s, acc=0.726, epoch=14, loss=0.921]  

epoch:14, idx:5399/10845, loss:0.9207572059057377, acc:0.7255555555555555


 51%|█████     | 5501/10845 [43:16<28:37,  3.11it/s, acc=0.725, epoch=14, loss=0.922] 

epoch:14, idx:5499/10845, loss:0.9220887055071918, acc:0.7253181818181819


 52%|█████▏    | 5600/10845 [44:04<30:52,  2.83it/s, acc=0.725, epoch=14, loss=0.922]  

epoch:14, idx:5599/10845, loss:0.9215282735441412, acc:0.725


 53%|█████▎    | 5700/10845 [44:44<34:16,  2.50it/s, acc=0.725, epoch=14, loss=0.922]

epoch:14, idx:5699/10845, loss:0.9223892772197724, acc:0.7250877192982456


 53%|█████▎    | 5800/10845 [45:25<29:08,  2.89it/s, acc=0.725, epoch=14, loss=0.923]

epoch:14, idx:5799/10845, loss:0.9230191989397181, acc:0.7250431034482758


 54%|█████▍    | 5900/10845 [46:07<52:54,  1.56it/s, acc=0.726, epoch=14, loss=0.921]

epoch:14, idx:5899/10845, loss:0.9209805981991654, acc:0.7255932203389831


 55%|█████▌    | 6000/10845 [46:50<37:45,  2.14it/s, acc=0.725, epoch=14, loss=0.922]

epoch:14, idx:5999/10845, loss:0.9219113389054934, acc:0.7251666666666666


 56%|█████▌    | 6100/10845 [47:30<31:51,  2.48it/s, acc=0.725, epoch=14, loss=0.923]

epoch:14, idx:6099/10845, loss:0.9225150725685182, acc:0.725


 57%|█████▋    | 6200/10845 [48:13<29:30,  2.62it/s, acc=0.725, epoch=14, loss=0.922]

epoch:14, idx:6199/10845, loss:0.9223496622520109, acc:0.7250806451612903


 58%|█████▊    | 6300/10845 [48:58<43:05,  1.76it/s, acc=0.724, epoch=14, loss=0.924]

epoch:14, idx:6299/10845, loss:0.9243953922722075, acc:0.7236904761904762


 59%|█████▉    | 6400/10845 [49:41<25:30,  2.90it/s, acc=0.723, epoch=14, loss=0.927]

epoch:14, idx:6399/10845, loss:0.927296338994056, acc:0.7230859375


 60%|█████▉    | 6500/10845 [50:20<34:24,  2.10it/s, acc=0.723, epoch=14, loss=0.926]

epoch:14, idx:6499/10845, loss:0.9261307181578416, acc:0.7233846153846154


 61%|██████    | 6600/10845 [51:06<37:01,  1.91it/s, acc=0.724, epoch=14, loss=0.926]

epoch:14, idx:6599/10845, loss:0.9260216377269138, acc:0.7236363636363636


 62%|██████▏   | 6700/10845 [52:01<36:48,  1.88it/s, acc=0.724, epoch=14, loss=0.927]  

epoch:14, idx:6699/10845, loss:0.9269582740851303, acc:0.7235074626865672


 63%|██████▎   | 6800/10845 [52:52<39:52,  1.69it/s, acc=0.723, epoch=14, loss=0.929]

epoch:14, idx:6799/10845, loss:0.9285153281776344, acc:0.723125


 64%|██████▎   | 6900/10845 [53:45<31:58,  2.06it/s, acc=0.723, epoch=14, loss=0.93] 

epoch:14, idx:6899/10845, loss:0.9295043168378913, acc:0.7231884057971014


 65%|██████▍   | 7000/10845 [54:39<36:57,  1.73it/s, acc=0.723, epoch=14, loss=0.929]

epoch:14, idx:6999/10845, loss:0.9294498744606972, acc:0.7231428571428572


 65%|██████▌   | 7100/10845 [55:33<29:28,  2.12it/s, acc=0.723, epoch=14, loss=0.929]

epoch:14, idx:7099/10845, loss:0.9291347492244881, acc:0.7230633802816901


 66%|██████▋   | 7200/10845 [56:27<32:22,  1.88it/s, acc=0.723, epoch=14, loss=0.929]

epoch:14, idx:7199/10845, loss:0.9290933487895462, acc:0.7233333333333334


 67%|██████▋   | 7300/10845 [57:24<30:38,  1.93it/s, acc=0.723, epoch=14, loss=0.929]

epoch:14, idx:7299/10845, loss:0.9288155755278182, acc:0.723013698630137


 68%|██████▊   | 7400/10845 [58:14<21:00,  2.73it/s, acc=0.723, epoch=14, loss=0.931]

epoch:14, idx:7399/10845, loss:0.930554505006687, acc:0.7226351351351351


 69%|██████▉   | 7500/10845 [59:04<30:46,  1.81it/s, acc=0.723, epoch=14, loss=0.93] 

epoch:14, idx:7499/10845, loss:0.9299089786926905, acc:0.7232333333333333


 70%|███████   | 7600/10845 [59:46<18:12,  2.97it/s, acc=0.723, epoch=14, loss=0.931]

epoch:14, idx:7599/10845, loss:0.9307843808594503, acc:0.7229276315789473


 71%|███████   | 7700/10845 [1:00:27<18:14,  2.87it/s, acc=0.723, epoch=14, loss=0.929]

epoch:14, idx:7699/10845, loss:0.9290930352195517, acc:0.7232142857142857


 72%|███████▏  | 7800/10845 [1:01:07<20:11,  2.51it/s, acc=0.723, epoch=14, loss=0.93] 

epoch:14, idx:7799/10845, loss:0.9301883522440225, acc:0.7227884615384615


 73%|███████▎  | 7900/10845 [1:01:48<19:25,  2.53it/s, acc=0.723, epoch=14, loss=0.93] 

epoch:14, idx:7899/10845, loss:0.929654511952702, acc:0.7227848101265822


 74%|███████▍  | 8000/10845 [1:02:33<19:35,  2.42it/s, acc=0.723, epoch=14, loss=0.931]

epoch:14, idx:7999/10845, loss:0.9305197791233659, acc:0.7225625


 75%|███████▍  | 8100/10845 [1:03:15<22:08,  2.07it/s, acc=0.723, epoch=14, loss=0.932]

epoch:14, idx:8099/10845, loss:0.9320556937470849, acc:0.7225617283950617


 76%|███████▌  | 8200/10845 [1:03:57<23:46,  1.85it/s, acc=0.722, epoch=14, loss=0.933]

epoch:14, idx:8199/10845, loss:0.93287649881549, acc:0.7221646341463415


 77%|███████▋  | 8300/10845 [1:04:34<16:36,  2.55it/s, acc=0.722, epoch=14, loss=0.933]

epoch:14, idx:8299/10845, loss:0.9328314691925623, acc:0.7220783132530121


 77%|███████▋  | 8400/10845 [1:05:17<15:44,  2.59it/s, acc=0.722, epoch=14, loss=0.933]

epoch:14, idx:8399/10845, loss:0.9332608594284171, acc:0.7217261904761905


 78%|███████▊  | 8501/10845 [1:05:58<17:33,  2.22it/s, acc=0.722, epoch=14, loss=0.933]

epoch:14, idx:8499/10845, loss:0.9330550774195615, acc:0.721735294117647


 79%|███████▉  | 8600/10845 [1:06:35<12:42,  2.94it/s, acc=0.722, epoch=14, loss=0.932]

epoch:14, idx:8599/10845, loss:0.9319102636846, acc:0.7218023255813953


 80%|████████  | 8700/10845 [1:07:14<11:01,  3.24it/s, acc=0.722, epoch=14, loss=0.931]

epoch:14, idx:8699/10845, loss:0.9311459967631033, acc:0.721867816091954


 81%|████████  | 8800/10845 [1:08:03<17:22,  1.96it/s, acc=0.722, epoch=14, loss=0.931]

epoch:14, idx:8799/10845, loss:0.9310410737957466, acc:0.7220738636363636


 82%|████████▏ | 8900/10845 [1:08:41<11:07,  2.91it/s, acc=0.722, epoch=14, loss=0.932]

epoch:14, idx:8899/10845, loss:0.9316039940112093, acc:0.7223876404494382


 83%|████████▎ | 9000/10845 [1:09:15<10:58,  2.80it/s, acc=0.722, epoch=14, loss=0.931]

epoch:14, idx:8999/10845, loss:0.9311263934605651, acc:0.7223888888888889


 84%|████████▍ | 9100/10845 [1:09:54<08:50,  3.29it/s, acc=0.722, epoch=14, loss=0.932]

epoch:14, idx:9099/10845, loss:0.9316230128721876, acc:0.7219505494505495


 85%|████████▍ | 9200/10845 [1:10:34<07:42,  3.55it/s, acc=0.722, epoch=14, loss=0.933]

epoch:14, idx:9199/10845, loss:0.9322201767661001, acc:0.7219021739130435


 86%|████████▌ | 9300/10845 [1:11:15<09:24,  2.74it/s, acc=0.722, epoch=14, loss=0.933]

epoch:14, idx:9299/10845, loss:0.9328574476671475, acc:0.7218817204301076


 87%|████████▋ | 9400/10845 [1:11:51<07:20,  3.28it/s, acc=0.722, epoch=14, loss=0.934]

epoch:14, idx:9399/10845, loss:0.9335255671149872, acc:0.7216489361702128


 88%|████████▊ | 9500/10845 [1:12:28<08:15,  2.71it/s, acc=0.722, epoch=14, loss=0.934]

epoch:14, idx:9499/10845, loss:0.933770379816231, acc:0.7216315789473684


 89%|████████▊ | 9600/10845 [1:13:09<07:59,  2.59it/s, acc=0.721, epoch=14, loss=0.936]

epoch:14, idx:9599/10845, loss:0.9358114582579583, acc:0.7211197916666666


 89%|████████▉ | 9700/10845 [1:13:47<09:12,  2.07it/s, acc=0.721, epoch=14, loss=0.935]

epoch:14, idx:9699/10845, loss:0.9351162932244773, acc:0.7211855670103092


 90%|█████████ | 9800/10845 [1:14:23<07:11,  2.42it/s, acc=0.721, epoch=14, loss=0.935]

epoch:14, idx:9799/10845, loss:0.9350343804608803, acc:0.7210969387755102


 91%|█████████▏| 9901/10845 [1:15:04<05:29,  2.87it/s, acc=0.721, epoch=14, loss=0.935]

epoch:14, idx:9899/10845, loss:0.9354761387151901, acc:0.7206565656565657


 92%|█████████▏| 10000/10845 [1:15:50<07:36,  1.85it/s, acc=0.721, epoch=14, loss=0.935]

epoch:14, idx:9999/10845, loss:0.9351448811680079, acc:0.72085


 93%|█████████▎| 10101/10845 [1:16:32<03:26,  3.59it/s, acc=0.721, epoch=14, loss=0.934]

epoch:14, idx:10099/10845, loss:0.9338743109307667, acc:0.7211138613861386


 94%|█████████▍| 10200/10845 [1:17:10<02:56,  3.65it/s, acc=0.721, epoch=14, loss=0.935]

epoch:14, idx:10199/10845, loss:0.9351102929576939, acc:0.7209068627450981


 95%|█████████▍| 10300/10845 [1:17:51<03:22,  2.69it/s, acc=0.721, epoch=14, loss=0.935]

epoch:14, idx:10299/10845, loss:0.9349549438594614, acc:0.721116504854369


 96%|█████████▌| 10400/10845 [1:18:31<02:33,  2.90it/s, acc=0.721, epoch=14, loss=0.935]

epoch:14, idx:10399/10845, loss:0.9348701217025518, acc:0.7211778846153846


 97%|█████████▋| 10500/10845 [1:19:11<03:02,  1.89it/s, acc=0.721, epoch=14, loss=0.936]

epoch:14, idx:10499/10845, loss:0.9362680411054974, acc:0.7208095238095238


 98%|█████████▊| 10600/10845 [1:19:51<01:24,  2.88it/s, acc=0.721, epoch=14, loss=0.935]

epoch:14, idx:10599/10845, loss:0.9354157421161544, acc:0.7210377358490566


 99%|█████████▊| 10700/10845 [1:20:29<00:50,  2.89it/s, acc=0.721, epoch=14, loss=0.936]

epoch:14, idx:10699/10845, loss:0.9363404811821251, acc:0.7209345794392523


100%|█████████▉| 10800/10845 [1:21:08<00:15,  3.00it/s, acc=0.721, epoch=14, loss=0.937]

epoch:14, idx:10799/10845, loss:0.9372708034184244, acc:0.7206712962962963


100%|██████████| 10845/10845 [1:21:27<00:00,  2.09it/s, acc=0.721, epoch=14, loss=0.938]


epoch:14, idx:0/1275, loss:0.8473383188247681, acc:0.5
epoch:14, idx:100/1275, loss:1.2074301986411067, acc:0.6336633663366337
epoch:14, idx:200/1275, loss:1.1361185760047305, acc:0.654228855721393
epoch:14, idx:300/1275, loss:1.1444483555828615, acc:0.6553156146179402
epoch:14, idx:400/1275, loss:1.1296711916340854, acc:0.6608478802992519
epoch:14, idx:500/1275, loss:1.1285298573756646, acc:0.6551896207584831
epoch:14, idx:600/1275, loss:1.1386422302679293, acc:0.6522462562396006
epoch:14, idx:700/1275, loss:1.1497803838038752, acc:0.6465763195435092
epoch:14, idx:800/1275, loss:1.155261268478803, acc:0.6463795255930087
epoch:14, idx:900/1275, loss:1.1439032418349475, acc:0.6495560488346281
epoch:14, idx:1000/1275, loss:1.1421685619549555, acc:0.6493506493506493
epoch:14, idx:1100/1275, loss:1.1345359972127451, acc:0.6498637602179836
epoch:14, idx:1200/1275, loss:1.133331111825376, acc:0.6509159034138218


  1%|          | 100/10845 [00:52<59:24,  3.01it/s, acc=0.75, epoch=15, loss=0.803]  

epoch:15, idx:99/10845, loss:0.8027673810720444, acc:0.75


  2%|▏         | 200/10845 [01:41<1:33:21,  1.90it/s, acc=0.731, epoch=15, loss=0.834]

epoch:15, idx:199/10845, loss:0.83422972291708, acc:0.73125


  3%|▎         | 300/10845 [02:30<1:45:53,  1.66it/s, acc=0.727, epoch=15, loss=0.874]

epoch:15, idx:299/10845, loss:0.874095223347346, acc:0.7266666666666667


  4%|▎         | 400/10845 [03:25<1:29:55,  1.94it/s, acc=0.727, epoch=15, loss=0.866]

epoch:15, idx:399/10845, loss:0.8662735669314862, acc:0.726875


  5%|▍         | 500/10845 [04:16<1:37:38,  1.77it/s, acc=0.728, epoch=15, loss=0.873]

epoch:15, idx:499/10845, loss:0.8727092655897141, acc:0.7275


  6%|▌         | 600/10845 [05:13<1:33:04,  1.83it/s, acc=0.724, epoch=15, loss=0.903]

epoch:15, idx:599/10845, loss:0.902825013101101, acc:0.7241666666666666


  6%|▋         | 700/10845 [06:06<1:10:43,  2.39it/s, acc=0.726, epoch=15, loss=0.898]

epoch:15, idx:699/10845, loss:0.8977667179277965, acc:0.7260714285714286


  7%|▋         | 800/10845 [06:57<1:23:33,  2.00it/s, acc=0.729, epoch=15, loss=0.893]

epoch:15, idx:799/10845, loss:0.8927911064773798, acc:0.7290625


  8%|▊         | 900/10845 [07:48<1:20:12,  2.07it/s, acc=0.729, epoch=15, loss=0.892]

epoch:15, idx:899/10845, loss:0.8922278091642591, acc:0.7286111111111111


  9%|▉         | 1000/10845 [08:41<1:23:55,  1.96it/s, acc=0.728, epoch=15, loss=0.898]

epoch:15, idx:999/10845, loss:0.8979577752947807, acc:0.728


 10%|█         | 1101/10845 [09:38<1:09:34,  2.33it/s, acc=0.731, epoch=15, loss=0.894]

epoch:15, idx:1099/10845, loss:0.8935183489864523, acc:0.730909090909091


 11%|█         | 1200/10845 [10:26<1:00:22,  2.66it/s, acc=0.734, epoch=15, loss=0.886]

epoch:15, idx:1199/10845, loss:0.8860489109655222, acc:0.73375


 12%|█▏        | 1300/10845 [11:24<1:44:54,  1.52it/s, acc=0.733, epoch=15, loss=0.889]

epoch:15, idx:1299/10845, loss:0.8885016218515542, acc:0.7334615384615385


 13%|█▎        | 1400/10845 [12:15<1:03:34,  2.48it/s, acc=0.733, epoch=15, loss=0.888]

epoch:15, idx:1399/10845, loss:0.8876182275584766, acc:0.7328571428571429


 14%|█▍        | 1500/10845 [13:05<1:26:44,  1.80it/s, acc=0.731, epoch=15, loss=0.893]

epoch:15, idx:1499/10845, loss:0.8930734682480495, acc:0.731


 15%|█▍        | 1600/10845 [13:54<1:06:28,  2.32it/s, acc=0.731, epoch=15, loss=0.891]

epoch:15, idx:1599/10845, loss:0.8909992210194468, acc:0.73125


 16%|█▌        | 1700/10845 [14:49<1:28:52,  1.72it/s, acc=0.731, epoch=15, loss=0.897]

epoch:15, idx:1699/10845, loss:0.8969123292319915, acc:0.7314705882352941


 17%|█▋        | 1800/10845 [15:46<1:39:58,  1.51it/s, acc=0.73, epoch=15, loss=0.901] 

epoch:15, idx:1799/10845, loss:0.9014084194435014, acc:0.7301388888888889


 18%|█▊        | 1900/10845 [16:39<1:21:05,  1.84it/s, acc=0.73, epoch=15, loss=0.897]

epoch:15, idx:1899/10845, loss:0.8973582269643482, acc:0.73


 18%|█▊        | 2000/10845 [17:29<1:22:24,  1.79it/s, acc=0.731, epoch=15, loss=0.89] 

epoch:15, idx:1999/10845, loss:0.8904563230127096, acc:0.731375


 19%|█▉        | 2100/10845 [18:24<1:12:39,  2.01it/s, acc=0.733, epoch=15, loss=0.886]

epoch:15, idx:2099/10845, loss:0.8863135949344862, acc:0.7329761904761904


 20%|██        | 2200/10845 [19:20<1:12:49,  1.98it/s, acc=0.734, epoch=15, loss=0.883]

epoch:15, idx:2199/10845, loss:0.8828372919965874, acc:0.7342045454545455


 21%|██        | 2300/10845 [20:10<52:57,  2.69it/s, acc=0.733, epoch=15, loss=0.885]  

epoch:15, idx:2299/10845, loss:0.8851314729063408, acc:0.7334782608695652


 22%|██▏       | 2400/10845 [20:50<1:02:36,  2.25it/s, acc=0.733, epoch=15, loss=0.885]

epoch:15, idx:2399/10845, loss:0.8850190725798408, acc:0.7327083333333333


 23%|██▎       | 2500/10845 [21:42<1:20:21,  1.73it/s, acc=0.734, epoch=15, loss=0.882]

epoch:15, idx:2499/10845, loss:0.8820106828331947, acc:0.7335


 24%|██▍       | 2601/10845 [22:32<55:37,  2.47it/s, acc=0.734, epoch=15, loss=0.882]  

epoch:15, idx:2599/10845, loss:0.881695219610746, acc:0.7336538461538461


 25%|██▍       | 2700/10845 [23:20<1:15:16,  1.80it/s, acc=0.732, epoch=15, loss=0.887]

epoch:15, idx:2699/10845, loss:0.8869227032418604, acc:0.7323148148148149


 26%|██▌       | 2800/10845 [24:11<1:02:54,  2.13it/s, acc=0.731, epoch=15, loss=0.889]

epoch:15, idx:2799/10845, loss:0.8888254426739045, acc:0.7310714285714286


 27%|██▋       | 2900/10845 [25:06<54:34,  2.43it/s, acc=0.73, epoch=15, loss=0.889]   

epoch:15, idx:2899/10845, loss:0.8890158733314482, acc:0.7298275862068966


 28%|██▊       | 3000/10845 [25:57<1:06:16,  1.97it/s, acc=0.73, epoch=15, loss=0.889] 

epoch:15, idx:2999/10845, loss:0.8894693934222062, acc:0.7295833333333334


 29%|██▊       | 3100/10845 [26:48<1:04:45,  1.99it/s, acc=0.729, epoch=15, loss=0.892]

epoch:15, idx:3099/10845, loss:0.89242332405621, acc:0.7285483870967742


 30%|██▉       | 3200/10845 [27:44<1:15:38,  1.68it/s, acc=0.729, epoch=15, loss=0.891]

epoch:15, idx:3199/10845, loss:0.8911491660680622, acc:0.729375


 30%|███       | 3300/10845 [28:36<1:06:44,  1.88it/s, acc=0.73, epoch=15, loss=0.891] 

epoch:15, idx:3299/10845, loss:0.8909900259158828, acc:0.7295454545454545


 31%|███▏      | 3400/10845 [29:31<1:22:54,  1.50it/s, acc=0.73, epoch=15, loss=0.891] 

epoch:15, idx:3399/10845, loss:0.8908962844136883, acc:0.7302205882352941


 32%|███▏      | 3500/10845 [30:23<1:09:52,  1.75it/s, acc=0.731, epoch=15, loss=0.892]

epoch:15, idx:3499/10845, loss:0.8919156560982977, acc:0.7305


 33%|███▎      | 3600/10845 [31:17<1:05:14,  1.85it/s, acc=0.731, epoch=15, loss=0.892]

epoch:15, idx:3599/10845, loss:0.8924753319140938, acc:0.7309027777777778


 34%|███▍      | 3700/10845 [32:08<1:18:04,  1.53it/s, acc=0.731, epoch=15, loss=0.892]

epoch:15, idx:3699/10845, loss:0.8924903274146286, acc:0.7312837837837838


 35%|███▌      | 3800/10845 [33:03<1:07:55,  1.73it/s, acc=0.731, epoch=15, loss=0.895]

epoch:15, idx:3799/10845, loss:0.8946852411643454, acc:0.7307236842105264


 36%|███▌      | 3900/10845 [33:56<1:05:26,  1.77it/s, acc=0.73, epoch=15, loss=0.895] 

epoch:15, idx:3899/10845, loss:0.8950664306986026, acc:0.7303846153846154


 37%|███▋      | 4000/10845 [34:48<1:01:48,  1.85it/s, acc=0.731, epoch=15, loss=0.894]

epoch:15, idx:3999/10845, loss:0.8941326855048537, acc:0.73075


 38%|███▊      | 4100/10845 [35:45<1:05:05,  1.73it/s, acc=0.73, epoch=15, loss=0.898] 

epoch:15, idx:4099/10845, loss:0.8978882967771553, acc:0.7299390243902439


 39%|███▊      | 4200/10845 [36:39<47:51,  2.31it/s, acc=0.731, epoch=15, loss=0.899]  

epoch:15, idx:4199/10845, loss:0.8985432026003088, acc:0.7305357142857143


 40%|███▉      | 4300/10845 [37:30<52:07,  2.09it/s, acc=0.73, epoch=15, loss=0.902]   

epoch:15, idx:4299/10845, loss:0.9015838901317397, acc:0.7300581395348837


 41%|████      | 4400/10845 [38:20<43:56,  2.44it/s, acc=0.731, epoch=15, loss=0.901] 

epoch:15, idx:4399/10845, loss:0.9008040491627022, acc:0.7305113636363636


 41%|████▏     | 4500/10845 [39:20<1:02:12,  1.70it/s, acc=0.731, epoch=15, loss=0.9]  

epoch:15, idx:4499/10845, loss:0.8999340553349919, acc:0.7309444444444444


 42%|████▏     | 4600/10845 [40:16<53:03,  1.96it/s, acc=0.731, epoch=15, loss=0.9]    

epoch:15, idx:4599/10845, loss:0.9000485171377659, acc:0.7308152173913044


 43%|████▎     | 4700/10845 [41:10<46:46,  2.19it/s, acc=0.731, epoch=15, loss=0.899]  

epoch:15, idx:4699/10845, loss:0.8994695603657276, acc:0.7311702127659574


 44%|████▍     | 4800/10845 [42:02<51:38,  1.95it/s, acc=0.73, epoch=15, loss=0.903]   

epoch:15, idx:4799/10845, loss:0.9028130278550088, acc:0.73015625


 45%|████▌     | 4900/10845 [42:54<44:57,  2.20it/s, acc=0.73, epoch=15, loss=0.901]  

epoch:15, idx:4899/10845, loss:0.9014691552763082, acc:0.7304591836734694


 46%|████▌     | 5000/10845 [43:48<53:07,  1.83it/s, acc=0.73, epoch=15, loss=0.902]   

epoch:15, idx:4999/10845, loss:0.9016066339075566, acc:0.73035


 47%|████▋     | 5100/10845 [44:44<46:27,  2.06it/s, acc=0.731, epoch=15, loss=0.902]  

epoch:15, idx:5099/10845, loss:0.9015904682115012, acc:0.7306372549019607


 48%|████▊     | 5200/10845 [45:38<49:37,  1.90it/s, acc=0.73, epoch=15, loss=0.905]   

epoch:15, idx:5199/10845, loss:0.9051552469283343, acc:0.7296153846153847


 49%|████▉     | 5300/10845 [46:36<49:34,  1.86it/s, acc=0.729, epoch=15, loss=0.906]  

epoch:15, idx:5299/10845, loss:0.9061672376630441, acc:0.7293396226415094


 50%|████▉     | 5400/10845 [47:30<43:28,  2.09it/s, acc=0.729, epoch=15, loss=0.907]  

epoch:15, idx:5399/10845, loss:0.9065157100392712, acc:0.7292592592592593


 51%|█████     | 5500/10845 [48:23<45:53,  1.94it/s, acc=0.729, epoch=15, loss=0.907]  

epoch:15, idx:5499/10845, loss:0.9073250098716129, acc:0.7292272727272727


 52%|█████▏    | 5600/10845 [49:17<39:01,  2.24it/s, acc=0.729, epoch=15, loss=0.908]  

epoch:15, idx:5599/10845, loss:0.9076413093879819, acc:0.7288839285714286


 53%|█████▎    | 5700/10845 [50:11<44:54,  1.91it/s, acc=0.729, epoch=15, loss=0.909]  

epoch:15, idx:5699/10845, loss:0.9087974247545527, acc:0.7289912280701755


 53%|█████▎    | 5800/10845 [51:03<53:58,  1.56it/s, acc=0.729, epoch=15, loss=0.908]  

epoch:15, idx:5799/10845, loss:0.9077520744913611, acc:0.7292672413793103


 54%|█████▍    | 5900/10845 [51:55<33:10,  2.48it/s, acc=0.729, epoch=15, loss=0.911]  

epoch:15, idx:5899/10845, loss:0.9112575367396161, acc:0.7286440677966102


 55%|█████▌    | 6000/10845 [52:47<46:20,  1.74it/s, acc=0.729, epoch=15, loss=0.91]  

epoch:15, idx:5999/10845, loss:0.9103557021071513, acc:0.7287916666666666


 56%|█████▌    | 6100/10845 [53:46<47:44,  1.66it/s, acc=0.728, epoch=15, loss=0.912]  

epoch:15, idx:6099/10845, loss:0.9115081029104405, acc:0.728155737704918


 57%|█████▋    | 6200/10845 [54:40<32:22,  2.39it/s, acc=0.728, epoch=15, loss=0.912]  

epoch:15, idx:6199/10845, loss:0.911567246524557, acc:0.7278225806451613


 58%|█████▊    | 6300/10845 [55:37<49:39,  1.53it/s, acc=0.727, epoch=15, loss=0.913]  

epoch:15, idx:6299/10845, loss:0.9128129915301761, acc:0.7271031746031746


 59%|█████▉    | 6400/10845 [56:27<34:10,  2.17it/s, acc=0.727, epoch=15, loss=0.912]

epoch:15, idx:6399/10845, loss:0.9124896895419806, acc:0.7273046875


 60%|█████▉    | 6500/10845 [57:19<32:27,  2.23it/s, acc=0.728, epoch=15, loss=0.912]

epoch:15, idx:6499/10845, loss:0.9115582837141477, acc:0.7276538461538462


 61%|██████    | 6600/10845 [58:13<27:35,  2.56it/s, acc=0.728, epoch=15, loss=0.912]

epoch:15, idx:6599/10845, loss:0.9120735368042281, acc:0.7277651515151515


 62%|██████▏   | 6700/10845 [59:07<39:41,  1.74it/s, acc=0.728, epoch=15, loss=0.912]  

epoch:15, idx:6699/10845, loss:0.9124120854797648, acc:0.7278731343283582


 63%|██████▎   | 6800/10845 [1:00:01<32:29,  2.07it/s, acc=0.728, epoch=15, loss=0.913]

epoch:15, idx:6799/10845, loss:0.9133374597921091, acc:0.7277941176470588


 64%|██████▎   | 6900/10845 [1:00:51<33:04,  1.99it/s, acc=0.728, epoch=15, loss=0.912]

epoch:15, idx:6899/10845, loss:0.9121612561267355, acc:0.7277173913043479


 65%|██████▍   | 7000/10845 [1:01:40<19:43,  3.25it/s, acc=0.728, epoch=15, loss=0.912]

epoch:15, idx:6999/10845, loss:0.9121214246494429, acc:0.7278571428571429


 65%|██████▌   | 7100/10845 [1:02:13<17:46,  3.51it/s, acc=0.727, epoch=15, loss=0.914]

epoch:15, idx:7099/10845, loss:0.9135874576467863, acc:0.7272887323943662


 66%|██████▋   | 7200/10845 [1:02:52<20:47,  2.92it/s, acc=0.727, epoch=15, loss=0.916]

epoch:15, idx:7199/10845, loss:0.9162387711720335, acc:0.7271180555555555


 67%|██████▋   | 7300/10845 [1:03:30<18:01,  3.28it/s, acc=0.726, epoch=15, loss=0.918]

epoch:15, idx:7299/10845, loss:0.9184190639241101, acc:0.726472602739726


 68%|██████▊   | 7400/10845 [1:04:10<22:56,  2.50it/s, acc=0.726, epoch=15, loss=0.92] 

epoch:15, idx:7399/10845, loss:0.9202785170481012, acc:0.726081081081081


 69%|██████▉   | 7500/10845 [1:04:51<24:40,  2.26it/s, acc=0.726, epoch=15, loss=0.921]

epoch:15, idx:7499/10845, loss:0.9211165523131688, acc:0.7262


 70%|███████   | 7600/10845 [1:05:34<21:55,  2.47it/s, acc=0.726, epoch=15, loss=0.922]

epoch:15, idx:7599/10845, loss:0.9216912700706407, acc:0.7257894736842105


 71%|███████   | 7700/10845 [1:06:21<29:10,  1.80it/s, acc=0.726, epoch=15, loss=0.921]

epoch:15, idx:7699/10845, loss:0.9205892808406384, acc:0.7260064935064935


 72%|███████▏  | 7800/10845 [1:07:10<18:15,  2.78it/s, acc=0.726, epoch=15, loss=0.92] 

epoch:15, idx:7799/10845, loss:0.9204785694449376, acc:0.72625


 73%|███████▎  | 7900/10845 [1:07:45<19:07,  2.57it/s, acc=0.727, epoch=15, loss=0.919]

epoch:15, idx:7899/10845, loss:0.9194986420718929, acc:0.7265189873417721


 74%|███████▍  | 8000/10845 [1:08:32<25:29,  1.86it/s, acc=0.727, epoch=15, loss=0.92] 

epoch:15, idx:7999/10845, loss:0.920337712585926, acc:0.72653125


 75%|███████▍  | 8100/10845 [1:09:23<22:19,  2.05it/s, acc=0.726, epoch=15, loss=0.921]

epoch:15, idx:8099/10845, loss:0.9209294949343175, acc:0.726358024691358


 76%|███████▌  | 8200/10845 [1:10:00<14:18,  3.08it/s, acc=0.726, epoch=15, loss=0.922]

epoch:15, idx:8199/10845, loss:0.9219229373117772, acc:0.7262804878048781


 77%|███████▋  | 8301/10845 [1:10:35<09:23,  4.51it/s, acc=0.726, epoch=15, loss=0.923]

epoch:15, idx:8299/10845, loss:0.9231131528443601, acc:0.7261144578313253


 77%|███████▋  | 8400/10845 [1:11:08<18:12,  2.24it/s, acc=0.726, epoch=15, loss=0.923]

epoch:15, idx:8399/10845, loss:0.923115742327202, acc:0.7257142857142858


 78%|███████▊  | 8500/10845 [1:11:43<17:08,  2.28it/s, acc=0.726, epoch=15, loss=0.923]

epoch:15, idx:8499/10845, loss:0.9226382023586946, acc:0.7257941176470588


 79%|███████▉  | 8600/10845 [1:12:16<12:43,  2.94it/s, acc=0.725, epoch=15, loss=0.923]

epoch:15, idx:8599/10845, loss:0.9232781616061233, acc:0.7253779069767442


 80%|████████  | 8700/10845 [1:12:48<08:57,  3.99it/s, acc=0.726, epoch=15, loss=0.923]

epoch:15, idx:8699/10845, loss:0.9227523748491002, acc:0.7255459770114943


 81%|████████  | 8800/10845 [1:13:21<14:18,  2.38it/s, acc=0.725, epoch=15, loss=0.923]

epoch:15, idx:8799/10845, loss:0.9232445394586433, acc:0.7254261363636364


 82%|████████▏ | 8901/10845 [1:13:54<07:26,  4.36it/s, acc=0.725, epoch=15, loss=0.924]

epoch:15, idx:8899/10845, loss:0.9237940601351555, acc:0.7253089887640449


 83%|████████▎ | 9000/10845 [1:14:27<08:38,  3.56it/s, acc=0.725, epoch=15, loss=0.925]

epoch:15, idx:8999/10845, loss:0.9245269844532013, acc:0.7251111111111112


 84%|████████▍ | 9100/10845 [1:15:00<11:20,  2.57it/s, acc=0.725, epoch=15, loss=0.924]

epoch:15, idx:9099/10845, loss:0.9243614703678823, acc:0.7252747252747253


 85%|████████▍ | 9200/10845 [1:15:50<17:42,  1.55it/s, acc=0.725, epoch=15, loss=0.923]

epoch:15, idx:9199/10845, loss:0.9233226438968078, acc:0.7254076086956521


 86%|████████▌ | 9300/10845 [1:16:47<10:46,  2.39it/s, acc=0.725, epoch=15, loss=0.925]

epoch:15, idx:9299/10845, loss:0.924824885661884, acc:0.7251344086021505


 87%|████████▋ | 9400/10845 [1:17:22<08:04,  2.98it/s, acc=0.725, epoch=15, loss=0.926]

epoch:15, idx:9399/10845, loss:0.9255946581414405, acc:0.7250797872340425


 88%|████████▊ | 9500/10845 [1:17:59<08:23,  2.67it/s, acc=0.725, epoch=15, loss=0.926]

epoch:15, idx:9499/10845, loss:0.926152912735939, acc:0.7250526315789474


 89%|████████▊ | 9600/10845 [1:18:36<06:24,  3.24it/s, acc=0.725, epoch=15, loss=0.925]

epoch:15, idx:9599/10845, loss:0.9251424100684623, acc:0.725078125


 89%|████████▉ | 9700/10845 [1:19:08<04:25,  4.32it/s, acc=0.725, epoch=15, loss=0.925]

epoch:15, idx:9699/10845, loss:0.9251621450222645, acc:0.725438144329897


 90%|█████████ | 9801/10845 [1:19:43<05:00,  3.47it/s, acc=0.725, epoch=15, loss=0.926]

epoch:15, idx:9799/10845, loss:0.9261807524306434, acc:0.7252295918367347


 91%|█████████▏| 9900/10845 [1:20:15<04:45,  3.31it/s, acc=0.725, epoch=15, loss=0.927]

epoch:15, idx:9899/10845, loss:0.9271402311987347, acc:0.7248232323232323


 92%|█████████▏| 10000/10845 [1:20:46<04:31,  3.11it/s, acc=0.725, epoch=15, loss=0.927]

epoch:15, idx:9999/10845, loss:0.9268846240758896, acc:0.724825


 93%|█████████▎| 10100/10845 [1:21:21<03:47,  3.28it/s, acc=0.725, epoch=15, loss=0.927]

epoch:15, idx:10099/10845, loss:0.9270418599100396, acc:0.7248762376237624


 94%|█████████▍| 10200/10845 [1:21:54<03:29,  3.08it/s, acc=0.725, epoch=15, loss=0.927]

epoch:15, idx:10199/10845, loss:0.9270507254729083, acc:0.7247058823529412


 95%|█████████▍| 10300/10845 [1:22:28<03:28,  2.62it/s, acc=0.725, epoch=15, loss=0.926]

epoch:15, idx:10299/10845, loss:0.9260981312365207, acc:0.725121359223301


 96%|█████████▌| 10400/10845 [1:22:58<02:19,  3.20it/s, acc=0.725, epoch=15, loss=0.927]

epoch:15, idx:10399/10845, loss:0.9270014683673015, acc:0.725


 97%|█████████▋| 10500/10845 [1:23:32<01:31,  3.79it/s, acc=0.725, epoch=15, loss=0.928]

epoch:15, idx:10499/10845, loss:0.927968357035092, acc:0.7247380952380953


 98%|█████████▊| 10600/10845 [1:24:06<02:37,  1.56it/s, acc=0.725, epoch=15, loss=0.928]

epoch:15, idx:10599/10845, loss:0.9278812242004106, acc:0.7245518867924529


 99%|█████████▊| 10700/10845 [1:24:50<01:10,  2.04it/s, acc=0.725, epoch=15, loss=0.928]

epoch:15, idx:10699/10845, loss:0.9275030460981565, acc:0.724626168224299


100%|█████████▉| 10800/10845 [1:25:41<00:14,  3.05it/s, acc=0.725, epoch=15, loss=0.927]

epoch:15, idx:10799/10845, loss:0.9273000012227782, acc:0.7246064814814814


100%|██████████| 10845/10845 [1:25:56<00:00,  2.82it/s, acc=0.724, epoch=15, loss=0.928]


epoch:15, idx:0/1275, loss:1.1241655349731445, acc:0.5
epoch:15, idx:100/1275, loss:1.1928715074416434, acc:0.6287128712871287
epoch:15, idx:200/1275, loss:1.144248552286803, acc:0.6455223880597015
epoch:15, idx:300/1275, loss:1.1571432217411028, acc:0.6519933554817275
epoch:15, idx:400/1275, loss:1.1364398200315728, acc:0.6602244389027432
epoch:15, idx:500/1275, loss:1.1291021588081847, acc:0.659181636726547
epoch:15, idx:600/1275, loss:1.1313559456593583, acc:0.653910149750416
epoch:15, idx:700/1275, loss:1.1430892230950818, acc:0.6504992867332382
epoch:15, idx:800/1275, loss:1.148920718799072, acc:0.6476279650436954
epoch:15, idx:900/1275, loss:1.1362177313498731, acc:0.6526082130965594
epoch:15, idx:1000/1275, loss:1.135359271839782, acc:0.6528471528471529
epoch:15, idx:1100/1275, loss:1.12761729430546, acc:0.6541780199818347
epoch:15, idx:1200/1275, loss:1.1273813795586807, acc:0.6532056619483764


  1%|          | 100/10845 [00:32<41:50,  4.28it/s, acc=0.713, epoch=16, loss=0.833] 

epoch:16, idx:99/10845, loss:0.833039391040802, acc:0.7125


  2%|▏         | 200/10845 [01:04<50:38,  3.50it/s, acc=0.715, epoch=16, loss=0.868]  

epoch:16, idx:199/10845, loss:0.868487104177475, acc:0.715


  3%|▎         | 300/10845 [01:35<1:02:48,  2.80it/s, acc=0.728, epoch=16, loss=0.887]

epoch:16, idx:299/10845, loss:0.8872842770814896, acc:0.7275


  4%|▎         | 400/10845 [02:26<53:59,  3.22it/s, acc=0.723, epoch=16, loss=0.928]  

epoch:16, idx:399/10845, loss:0.9277415269613266, acc:0.7225


  5%|▍         | 501/10845 [02:58<39:59,  4.31it/s, acc=0.728, epoch=16, loss=0.923]  

epoch:16, idx:499/10845, loss:0.9221533806324005, acc:0.7275


  6%|▌         | 600/10845 [03:29<48:41,  3.51it/s, acc=0.735, epoch=16, loss=0.903]  

epoch:16, idx:599/10845, loss:0.9028085986773173, acc:0.7345833333333334


  6%|▋         | 700/10845 [04:04<1:41:21,  1.67it/s, acc=0.738, epoch=16, loss=0.896]

epoch:16, idx:699/10845, loss:0.8956267151662282, acc:0.7378571428571429


  7%|▋         | 800/10845 [04:52<54:20,  3.08it/s, acc=0.737, epoch=16, loss=0.892]  

epoch:16, idx:799/10845, loss:0.891782610192895, acc:0.7371875


  8%|▊         | 900/10845 [05:26<1:01:22,  2.70it/s, acc=0.735, epoch=16, loss=0.904]

epoch:16, idx:899/10845, loss:0.9037198048498896, acc:0.735


  9%|▉         | 1000/10845 [06:00<1:03:48,  2.57it/s, acc=0.733, epoch=16, loss=0.913]

epoch:16, idx:999/10845, loss:0.9134494392573833, acc:0.7325


 10%|█         | 1100/10845 [06:32<39:05,  4.15it/s, acc=0.732, epoch=16, loss=0.919]  

epoch:16, idx:1099/10845, loss:0.9187993055311117, acc:0.7320454545454546


 11%|█         | 1200/10845 [07:03<58:39,  2.74it/s, acc=0.734, epoch=16, loss=0.909]  

epoch:16, idx:1199/10845, loss:0.9092012998710076, acc:0.7341666666666666


 12%|█▏        | 1300/10845 [07:35<51:24,  3.09it/s, acc=0.736, epoch=16, loss=0.899]  

epoch:16, idx:1299/10845, loss:0.8993213488276188, acc:0.7355769230769231


 13%|█▎        | 1401/10845 [08:07<41:04,  3.83it/s, acc=0.735, epoch=16, loss=0.901]  

epoch:16, idx:1399/10845, loss:0.9012573179815496, acc:0.735


 14%|█▍        | 1500/10845 [08:35<50:14,  3.10it/s, acc=0.734, epoch=16, loss=0.907]  

epoch:16, idx:1499/10845, loss:0.9067960684498151, acc:0.7336666666666667


 15%|█▍        | 1600/10845 [09:13<1:04:42,  2.38it/s, acc=0.734, epoch=16, loss=0.902]

epoch:16, idx:1599/10845, loss:0.9021631734631955, acc:0.734375


 16%|█▌        | 1700/10845 [09:46<44:34,  3.42it/s, acc=0.736, epoch=16, loss=0.897]  

epoch:16, idx:1699/10845, loss:0.8970607684465015, acc:0.735735294117647


 17%|█▋        | 1800/10845 [10:24<41:26,  3.64it/s, acc=0.733, epoch=16, loss=0.904]  

epoch:16, idx:1799/10845, loss:0.9036856572329998, acc:0.7327777777777778


 18%|█▊        | 1901/10845 [10:54<33:18,  4.48it/s, acc=0.734, epoch=16, loss=0.9]    

epoch:16, idx:1899/10845, loss:0.9008587634720301, acc:0.7336842105263158


 18%|█▊        | 2001/10845 [11:26<42:02,  3.51it/s, acc=0.734, epoch=16, loss=0.903]  

epoch:16, idx:1999/10845, loss:0.9027868204563856, acc:0.734125


 19%|█▉        | 2100/10845 [11:59<46:58,  3.10it/s, acc=0.734, epoch=16, loss=0.899]  

epoch:16, idx:2099/10845, loss:0.8985097436819758, acc:0.7342857142857143


 20%|██        | 2200/10845 [12:29<46:51,  3.08it/s, acc=0.733, epoch=16, loss=0.907]  

epoch:16, idx:2199/10845, loss:0.9065787928348238, acc:0.7326136363636364


 21%|██        | 2300/10845 [13:17<1:20:24,  1.77it/s, acc=0.732, epoch=16, loss=0.911]

epoch:16, idx:2299/10845, loss:0.9109910073098929, acc:0.7317391304347826


 22%|██▏       | 2400/10845 [14:08<1:11:30,  1.97it/s, acc=0.733, epoch=16, loss=0.909]

epoch:16, idx:2399/10845, loss:0.9091977435722947, acc:0.7329166666666667


 23%|██▎       | 2501/10845 [15:01<56:06,  2.48it/s, acc=0.734, epoch=16, loss=0.909]  

epoch:16, idx:2499/10845, loss:0.9091498968243599, acc:0.7334


 24%|██▍       | 2600/10845 [15:52<1:23:04,  1.65it/s, acc=0.733, epoch=16, loss=0.911]

epoch:16, idx:2599/10845, loss:0.9110754203681762, acc:0.7325


 25%|██▍       | 2700/10845 [16:38<55:08,  2.46it/s, acc=0.733, epoch=16, loss=0.911]  

epoch:16, idx:2699/10845, loss:0.9105398547097489, acc:0.7330555555555556


 26%|██▌       | 2800/10845 [17:22<1:00:08,  2.23it/s, acc=0.732, epoch=16, loss=0.914]

epoch:16, idx:2799/10845, loss:0.9137629245966673, acc:0.7323214285714286


 27%|██▋       | 2900/10845 [18:03<1:17:37,  1.71it/s, acc=0.733, epoch=16, loss=0.91] 

epoch:16, idx:2899/10845, loss:0.9097174427118795, acc:0.7333620689655173


 28%|██▊       | 3000/10845 [18:46<34:45,  3.76it/s, acc=0.734, epoch=16, loss=0.91]   

epoch:16, idx:2999/10845, loss:0.9095214385092258, acc:0.7335


 29%|██▊       | 3100/10845 [19:32<44:30,  2.90it/s, acc=0.734, epoch=16, loss=0.909]  

epoch:16, idx:3099/10845, loss:0.9085763979631085, acc:0.7337903225806451


 30%|██▉       | 3200/10845 [20:14<53:42,  2.37it/s, acc=0.734, epoch=16, loss=0.908]  

epoch:16, idx:3199/10845, loss:0.9081125669647008, acc:0.733515625


 30%|███       | 3300/10845 [21:12<1:21:26,  1.54it/s, acc=0.734, epoch=16, loss=0.907]

epoch:16, idx:3299/10845, loss:0.9066279362638792, acc:0.7337121212121213


 31%|███▏      | 3400/10845 [22:09<1:32:13,  1.35it/s, acc=0.734, epoch=16, loss=0.905]

epoch:16, idx:3399/10845, loss:0.9054520995213705, acc:0.7341911764705882


 32%|███▏      | 3500/10845 [23:04<1:06:28,  1.84it/s, acc=0.734, epoch=16, loss=0.907]

epoch:16, idx:3499/10845, loss:0.9068054909110069, acc:0.7340714285714286


 33%|███▎      | 3600/10845 [23:46<35:16,  3.42it/s, acc=0.734, epoch=16, loss=0.908]  

epoch:16, idx:3599/10845, loss:0.9075545862896575, acc:0.7336111111111111


 34%|███▍      | 3700/10845 [24:19<45:12,  2.63it/s, acc=0.732, epoch=16, loss=0.913]  

epoch:16, idx:3699/10845, loss:0.9125717342302606, acc:0.7322297297297298


 35%|███▌      | 3801/10845 [24:51<40:31,  2.90it/s, acc=0.732, epoch=16, loss=0.913]  

epoch:16, idx:3799/10845, loss:0.91329602916774, acc:0.7315131578947368


 36%|███▌      | 3900/10845 [25:22<27:24,  4.22it/s, acc=0.732, epoch=16, loss=0.911]

epoch:16, idx:3899/10845, loss:0.9108351779060486, acc:0.7324358974358974


 37%|███▋      | 4000/10845 [25:56<27:31,  4.15it/s, acc=0.732, epoch=16, loss=0.912]

epoch:16, idx:3999/10845, loss:0.9115541490092873, acc:0.7321875


 38%|███▊      | 4101/10845 [26:24<28:30,  3.94it/s, acc=0.732, epoch=16, loss=0.911]

epoch:16, idx:4099/10845, loss:0.9105133306616691, acc:0.7325


 39%|███▊      | 4200/10845 [27:07<48:29,  2.28it/s, acc=0.732, epoch=16, loss=0.909]  

epoch:16, idx:4199/10845, loss:0.9093338695665201, acc:0.7323809523809524


 40%|███▉      | 4300/10845 [27:57<1:14:14,  1.47it/s, acc=0.732, epoch=16, loss=0.912]

epoch:16, idx:4299/10845, loss:0.9117132039333499, acc:0.7315697674418604


 41%|████      | 4400/10845 [28:52<1:15:23,  1.42it/s, acc=0.732, epoch=16, loss=0.91] 

epoch:16, idx:4399/10845, loss:0.9097625966031443, acc:0.7323863636363637


 41%|████▏     | 4500/10845 [29:48<55:19,  1.91it/s, acc=0.733, epoch=16, loss=0.909]  

epoch:16, idx:4499/10845, loss:0.9087044387658437, acc:0.7327777777777778


 42%|████▏     | 4600/10845 [30:43<44:29,  2.34it/s, acc=0.733, epoch=16, loss=0.908]  

epoch:16, idx:4599/10845, loss:0.9084393411486045, acc:0.7325


 43%|████▎     | 4700/10845 [31:39<1:06:13,  1.55it/s, acc=0.732, epoch=16, loss=0.91] 

epoch:16, idx:4699/10845, loss:0.9098602618625824, acc:0.7322872340425531


 44%|████▍     | 4800/10845 [32:34<1:02:50,  1.60it/s, acc=0.732, epoch=16, loss=0.911]

epoch:16, idx:4799/10845, loss:0.9110183886749049, acc:0.7316145833333333


 45%|████▌     | 4900/10845 [33:31<49:24,  2.01it/s, acc=0.731, epoch=16, loss=0.912]  

epoch:16, idx:4899/10845, loss:0.9124247768764593, acc:0.7314285714285714


 46%|████▌     | 5000/10845 [34:23<49:32,  1.97it/s, acc=0.73, epoch=16, loss=0.914]   

epoch:16, idx:4999/10845, loss:0.9137861082732678, acc:0.73035


 47%|████▋     | 5100/10845 [35:15<1:00:45,  1.58it/s, acc=0.73, epoch=16, loss=0.913] 

epoch:16, idx:5099/10845, loss:0.9129857366166862, acc:0.730343137254902


 48%|████▊     | 5200/10845 [36:05<46:40,  2.02it/s, acc=0.729, epoch=16, loss=0.915] 

epoch:16, idx:5199/10845, loss:0.9153250058625753, acc:0.729375


 49%|████▉     | 5300/10845 [37:01<41:39,  2.22it/s, acc=0.729, epoch=16, loss=0.914]  

epoch:16, idx:5299/10845, loss:0.9144020846148707, acc:0.7292924528301887


 50%|████▉     | 5400/10845 [37:55<50:18,  1.80it/s, acc=0.729, epoch=16, loss=0.916]  

epoch:16, idx:5399/10845, loss:0.915628957224113, acc:0.7290740740740741


 51%|█████     | 5500/10845 [38:47<47:05,  1.89it/s, acc=0.729, epoch=16, loss=0.915]  

epoch:16, idx:5499/10845, loss:0.9145212651978839, acc:0.7292727272727273


 52%|█████▏    | 5601/10845 [39:43<28:24,  3.08it/s, acc=0.73, epoch=16, loss=0.915]   

epoch:16, idx:5599/10845, loss:0.9153502970242074, acc:0.7294642857142857


 53%|█████▎    | 5700/10845 [40:38<45:11,  1.90it/s, acc=0.73, epoch=16, loss=0.914]  

epoch:16, idx:5699/10845, loss:0.9141939940212066, acc:0.7301315789473685


 53%|█████▎    | 5800/10845 [41:34<33:32,  2.51it/s, acc=0.731, epoch=16, loss=0.912]  

epoch:16, idx:5799/10845, loss:0.9119750599049289, acc:0.730646551724138


 54%|█████▍    | 5900/10845 [42:26<38:50,  2.12it/s, acc=0.731, epoch=16, loss=0.912]  

epoch:16, idx:5899/10845, loss:0.9121400742864205, acc:0.7306779661016949


 55%|█████▌    | 6000/10845 [43:20<33:16,  2.43it/s, acc=0.731, epoch=16, loss=0.909]  

epoch:16, idx:5999/10845, loss:0.909377265031139, acc:0.7308333333333333


 56%|█████▌    | 6100/10845 [44:14<37:36,  2.10it/s, acc=0.731, epoch=16, loss=0.911]  

epoch:16, idx:6099/10845, loss:0.9111213294062458, acc:0.7305327868852459


 57%|█████▋    | 6200/10845 [45:09<43:29,  1.78it/s, acc=0.731, epoch=16, loss=0.911]

epoch:16, idx:6199/10845, loss:0.9114373236750403, acc:0.7306854838709678


 58%|█████▊    | 6300/10845 [46:03<41:27,  1.83it/s, acc=0.73, epoch=16, loss=0.912]   

epoch:16, idx:6299/10845, loss:0.912026444384976, acc:0.7300396825396825


 59%|█████▉    | 6400/10845 [46:55<36:26,  2.03it/s, acc=0.73, epoch=16, loss=0.912]

epoch:16, idx:6399/10845, loss:0.912047687987797, acc:0.7295703125


 60%|█████▉    | 6500/10845 [47:49<41:23,  1.75it/s, acc=0.73, epoch=16, loss=0.912]  

epoch:16, idx:6499/10845, loss:0.9120770590167779, acc:0.7298846153846154


 61%|██████    | 6600/10845 [48:42<39:51,  1.78it/s, acc=0.73, epoch=16, loss=0.911]

epoch:16, idx:6599/10845, loss:0.91141011375821, acc:0.7300378787878787


 62%|██████▏   | 6700/10845 [49:36<44:56,  1.54it/s, acc=0.73, epoch=16, loss=0.913]

epoch:16, idx:6699/10845, loss:0.9127966105448666, acc:0.7298134328358209


 63%|██████▎   | 6800/10845 [50:32<30:32,  2.21it/s, acc=0.73, epoch=16, loss=0.914]  

epoch:16, idx:6799/10845, loss:0.9137023780670236, acc:0.73


 64%|██████▎   | 6900/10845 [51:27<32:38,  2.01it/s, acc=0.73, epoch=16, loss=0.915]

epoch:16, idx:6899/10845, loss:0.9154256495401479, acc:0.7298188405797101


 65%|██████▍   | 7000/10845 [52:21<42:35,  1.50it/s, acc=0.73, epoch=16, loss=0.916]

epoch:16, idx:6999/10845, loss:0.9161241106944424, acc:0.7295714285714285


 65%|██████▌   | 7100/10845 [53:17<33:41,  1.85it/s, acc=0.729, epoch=16, loss=0.918]

epoch:16, idx:7099/10845, loss:0.9178726956500134, acc:0.7288732394366197


 66%|██████▋   | 7200/10845 [54:15<21:47,  2.79it/s, acc=0.728, epoch=16, loss=0.92] 

epoch:16, idx:7199/10845, loss:0.9197489526826474, acc:0.7283680555555555


 67%|██████▋   | 7300/10845 [55:07<25:23,  2.33it/s, acc=0.728, epoch=16, loss=0.92] 

epoch:16, idx:7299/10845, loss:0.9201826217443976, acc:0.7282191780821918


 68%|██████▊   | 7400/10845 [56:00<32:57,  1.74it/s, acc=0.728, epoch=16, loss=0.919]

epoch:16, idx:7399/10845, loss:0.9193000857370931, acc:0.7284121621621622


 69%|██████▉   | 7500/10845 [56:56<25:40,  2.17it/s, acc=0.728, epoch=16, loss=0.919]

epoch:16, idx:7499/10845, loss:0.9191823790907859, acc:0.7284333333333334


 70%|███████   | 7600/10845 [57:47<28:27,  1.90it/s, acc=0.729, epoch=16, loss=0.917]

epoch:16, idx:7599/10845, loss:0.9174816811594524, acc:0.7288815789473684


 71%|███████   | 7700/10845 [58:41<31:50,  1.65it/s, acc=0.728, epoch=16, loss=0.917]

epoch:16, idx:7699/10845, loss:0.9171748822656545, acc:0.7284415584415584


 72%|███████▏  | 7801/10845 [59:34<18:44,  2.71it/s, acc=0.729, epoch=16, loss=0.917]

epoch:16, idx:7799/10845, loss:0.9171277520862909, acc:0.728525641025641


 73%|███████▎  | 7900/10845 [1:00:25<20:41,  2.37it/s, acc=0.728, epoch=16, loss=0.918]

epoch:16, idx:7899/10845, loss:0.9175466705613499, acc:0.7284177215189873


 74%|███████▍  | 8000/10845 [1:01:21<22:31,  2.11it/s, acc=0.729, epoch=16, loss=0.917]

epoch:16, idx:7999/10845, loss:0.9171121633537114, acc:0.7285


 75%|███████▍  | 8100/10845 [1:02:18<25:12,  1.81it/s, acc=0.728, epoch=16, loss=0.918]

epoch:16, idx:8099/10845, loss:0.9183670357826316, acc:0.7283024691358024


 76%|███████▌  | 8200/10845 [1:03:10<25:00,  1.76it/s, acc=0.728, epoch=16, loss=0.919]

epoch:16, idx:8199/10845, loss:0.9189557515702597, acc:0.7280487804878049


 77%|███████▋  | 8300/10845 [1:04:02<18:16,  2.32it/s, acc=0.728, epoch=16, loss=0.918]

epoch:16, idx:8299/10845, loss:0.9181795976176319, acc:0.7281626506024096


 77%|███████▋  | 8400/10845 [1:04:57<21:56,  1.86it/s, acc=0.728, epoch=16, loss=0.918]

epoch:16, idx:8399/10845, loss:0.9182979177080449, acc:0.7280654761904762


 78%|███████▊  | 8500/10845 [1:05:53<19:30,  2.00it/s, acc=0.728, epoch=16, loss=0.918]

epoch:16, idx:8499/10845, loss:0.9180248897706761, acc:0.7280294117647059


 79%|███████▉  | 8600/10845 [1:06:41<19:13,  1.95it/s, acc=0.728, epoch=16, loss=0.918]

epoch:16, idx:8599/10845, loss:0.918253434473692, acc:0.728139534883721


 80%|████████  | 8700/10845 [1:07:34<14:10,  2.52it/s, acc=0.728, epoch=16, loss=0.918]

epoch:16, idx:8699/10845, loss:0.9178799448547692, acc:0.7283333333333334


 81%|████████  | 8800/10845 [1:08:25<14:34,  2.34it/s, acc=0.728, epoch=16, loss=0.917]

epoch:16, idx:8799/10845, loss:0.9174436094679616, acc:0.7283806818181818


 82%|████████▏ | 8901/10845 [1:09:21<13:33,  2.39it/s, acc=0.728, epoch=16, loss=0.918]

epoch:16, idx:8899/10845, loss:0.9180837978138012, acc:0.7284550561797752


 83%|████████▎ | 9000/10845 [1:10:16<17:47,  1.73it/s, acc=0.729, epoch=16, loss=0.917]

epoch:16, idx:8999/10845, loss:0.9172189224494828, acc:0.7285


 84%|████████▍ | 9100/10845 [1:11:15<18:24,  1.58it/s, acc=0.728, epoch=16, loss=0.918]

epoch:16, idx:9099/10845, loss:0.9181874581942191, acc:0.7282967032967033


 85%|████████▍ | 9200/10845 [1:12:04<11:59,  2.29it/s, acc=0.729, epoch=16, loss=0.917]

epoch:16, idx:9199/10845, loss:0.9169754695114882, acc:0.7286684782608696


 86%|████████▌ | 9300/10845 [1:12:57<13:20,  1.93it/s, acc=0.729, epoch=16, loss=0.918]

epoch:16, idx:9299/10845, loss:0.9175649074008387, acc:0.7286021505376344


 87%|████████▋ | 9400/10845 [1:13:47<11:06,  2.17it/s, acc=0.729, epoch=16, loss=0.917]

epoch:16, idx:9399/10845, loss:0.9165919566851981, acc:0.7289627659574468


 88%|████████▊ | 9500/10845 [1:14:44<16:12,  1.38it/s, acc=0.729, epoch=16, loss=0.917]

epoch:16, idx:9499/10845, loss:0.9170551010809447, acc:0.7289736842105263


 89%|████████▊ | 9600/10845 [1:15:35<08:30,  2.44it/s, acc=0.729, epoch=16, loss=0.917]

epoch:16, idx:9599/10845, loss:0.9168359187742074, acc:0.7286979166666666


 89%|████████▉ | 9700/10845 [1:16:32<10:29,  1.82it/s, acc=0.729, epoch=16, loss=0.918]

epoch:16, idx:9699/10845, loss:0.9177358534225484, acc:0.7285567010309278


 90%|█████████ | 9800/10845 [1:17:28<09:34,  1.82it/s, acc=0.728, epoch=16, loss=0.919]

epoch:16, idx:9799/10845, loss:0.9189935381497656, acc:0.7279591836734693


 91%|█████████▏| 9900/10845 [1:18:24<11:32,  1.36it/s, acc=0.728, epoch=16, loss=0.919]

epoch:16, idx:9899/10845, loss:0.9187813239085554, acc:0.7280050505050505


 92%|█████████▏| 10000/10845 [1:19:17<06:37,  2.13it/s, acc=0.728, epoch=16, loss=0.92]

epoch:16, idx:9999/10845, loss:0.9195443458557129, acc:0.72755


 93%|█████████▎| 10100/10845 [1:20:06<05:54,  2.10it/s, acc=0.728, epoch=16, loss=0.919]

epoch:16, idx:10099/10845, loss:0.9194072195503972, acc:0.7276485148514852


 94%|█████████▍| 10200/10845 [1:21:03<04:18,  2.49it/s, acc=0.728, epoch=16, loss=0.92] 

epoch:16, idx:10199/10845, loss:0.9201982459425926, acc:0.7275


 95%|█████████▍| 10300/10845 [1:22:00<05:17,  1.71it/s, acc=0.727, epoch=16, loss=0.92] 

epoch:16, idx:10299/10845, loss:0.9202189185549912, acc:0.7273058252427185


 96%|█████████▌| 10400/10845 [1:22:55<03:48,  1.95it/s, acc=0.727, epoch=16, loss=0.92] 

epoch:16, idx:10399/10845, loss:0.9197523055913357, acc:0.7274278846153847


 97%|█████████▋| 10500/10845 [1:23:50<03:02,  1.89it/s, acc=0.727, epoch=16, loss=0.92] 

epoch:16, idx:10499/10845, loss:0.9198143109083176, acc:0.7273333333333334


 98%|█████████▊| 10600/10845 [1:24:42<01:39,  2.46it/s, acc=0.727, epoch=16, loss=0.92] 

epoch:16, idx:10599/10845, loss:0.919644007452254, acc:0.7273113207547169


 99%|█████████▊| 10700/10845 [1:25:38<01:25,  1.69it/s, acc=0.727, epoch=16, loss=0.919]

epoch:16, idx:10699/10845, loss:0.9193272659266106, acc:0.7272429906542056


100%|█████████▉| 10800/10845 [1:26:37<00:30,  1.48it/s, acc=0.727, epoch=16, loss=0.92] 

epoch:16, idx:10799/10845, loss:0.9197563589226316, acc:0.7272916666666667


100%|██████████| 10845/10845 [1:27:01<00:00,  2.82it/s, acc=0.727, epoch=16, loss=0.92]


epoch:16, idx:0/1275, loss:1.0557187795639038, acc:0.5
epoch:16, idx:100/1275, loss:1.219899605996538, acc:0.6361386138613861
epoch:16, idx:200/1275, loss:1.145821961300883, acc:0.6355721393034826
epoch:16, idx:300/1275, loss:1.1551297429788152, acc:0.6411960132890365
epoch:16, idx:400/1275, loss:1.1385115324113138, acc:0.6440149625935162
epoch:16, idx:500/1275, loss:1.1327799127963252, acc:0.6432135728542914
epoch:16, idx:600/1275, loss:1.1353464504446642, acc:0.6414309484193012
epoch:16, idx:700/1275, loss:1.147027388023752, acc:0.6401569186875892
epoch:16, idx:800/1275, loss:1.151815031054017, acc:0.6398252184769039
epoch:16, idx:900/1275, loss:1.1388405624954867, acc:0.644284128745838
epoch:16, idx:1000/1275, loss:1.1359612000333918, acc:0.6466033966033966
epoch:16, idx:1100/1275, loss:1.1309312397950786, acc:0.6473660308810173
epoch:16, idx:1200/1275, loss:1.1340607489078467, acc:0.644879267277269


  1%|          | 100/10845 [00:54<1:20:14,  2.23it/s, acc=0.75, epoch=17, loss=0.78] 

epoch:17, idx:99/10845, loss:0.7796863156557083, acc:0.75


  2%|▏         | 200/10845 [01:48<2:06:46,  1.40it/s, acc=0.757, epoch=17, loss=0.822]

epoch:17, idx:199/10845, loss:0.822257522046566, acc:0.7575


  3%|▎         | 300/10845 [02:43<1:26:43,  2.03it/s, acc=0.773, epoch=17, loss=0.788]

epoch:17, idx:299/10845, loss:0.7875714514652888, acc:0.7733333333333333


  4%|▎         | 400/10845 [03:38<1:35:00,  1.83it/s, acc=0.766, epoch=17, loss=0.828]

epoch:17, idx:399/10845, loss:0.8284411746263504, acc:0.76625


  5%|▍         | 500/10845 [04:30<1:46:13,  1.62it/s, acc=0.763, epoch=17, loss=0.84] 

epoch:17, idx:499/10845, loss:0.8402191761732102, acc:0.763


  6%|▌         | 600/10845 [05:22<1:26:16,  1.98it/s, acc=0.765, epoch=17, loss=0.825]

epoch:17, idx:599/10845, loss:0.8246550007164478, acc:0.7645833333333333


  6%|▋         | 700/10845 [06:18<1:55:56,  1.46it/s, acc=0.765, epoch=17, loss=0.83] 

epoch:17, idx:699/10845, loss:0.8304051610827446, acc:0.765


  7%|▋         | 800/10845 [07:12<1:09:22,  2.41it/s, acc=0.762, epoch=17, loss=0.832]

epoch:17, idx:799/10845, loss:0.8318431347236037, acc:0.761875


  8%|▊         | 900/10845 [08:06<1:32:16,  1.80it/s, acc=0.763, epoch=17, loss=0.824]

epoch:17, idx:899/10845, loss:0.8237506505846978, acc:0.7633333333333333


  9%|▉         | 1000/10845 [08:59<1:31:58,  1.78it/s, acc=0.763, epoch=17, loss=0.818]

epoch:17, idx:999/10845, loss:0.8183476746380329, acc:0.76275


 10%|█         | 1100/10845 [09:52<1:33:57,  1.73it/s, acc=0.762, epoch=17, loss=0.818]

epoch:17, idx:1099/10845, loss:0.8184703875400803, acc:0.7625


 11%|█         | 1200/10845 [10:47<1:34:42,  1.70it/s, acc=0.76, epoch=17, loss=0.826] 

epoch:17, idx:1199/10845, loss:0.826247654731075, acc:0.7602083333333334


 12%|█▏        | 1300/10845 [11:46<1:23:05,  1.91it/s, acc=0.761, epoch=17, loss=0.824]

epoch:17, idx:1299/10845, loss:0.8238411514804913, acc:0.7613461538461539


 13%|█▎        | 1400/10845 [12:39<1:21:19,  1.94it/s, acc=0.76, epoch=17, loss=0.828] 

epoch:17, idx:1399/10845, loss:0.8277477365093572, acc:0.7598214285714285


 14%|█▍        | 1500/10845 [13:33<1:25:47,  1.82it/s, acc=0.76, epoch=17, loss=0.823] 

epoch:17, idx:1499/10845, loss:0.8227642147938411, acc:0.7603333333333333


 15%|█▍        | 1600/10845 [14:28<1:54:35,  1.34it/s, acc=0.759, epoch=17, loss=0.824]

epoch:17, idx:1599/10845, loss:0.8237887051329017, acc:0.7590625


 16%|█▌        | 1700/10845 [15:26<1:39:13,  1.54it/s, acc=0.758, epoch=17, loss=0.827]

epoch:17, idx:1699/10845, loss:0.8274551567610572, acc:0.7577941176470588


 17%|█▋        | 1800/10845 [16:16<1:24:14,  1.79it/s, acc=0.757, epoch=17, loss=0.823]

epoch:17, idx:1799/10845, loss:0.8233104271027777, acc:0.7572222222222222


 18%|█▊        | 1900/10845 [17:12<1:25:48,  1.74it/s, acc=0.756, epoch=17, loss=0.831]

epoch:17, idx:1899/10845, loss:0.8311136258275885, acc:0.7556578947368421


 18%|█▊        | 2000/10845 [18:06<1:03:45,  2.31it/s, acc=0.755, epoch=17, loss=0.834]

epoch:17, idx:1999/10845, loss:0.8337809830009937, acc:0.755375


 19%|█▉        | 2100/10845 [18:58<1:15:31,  1.93it/s, acc=0.755, epoch=17, loss=0.835]

epoch:17, idx:2099/10845, loss:0.8352113010485968, acc:0.7546428571428572


 20%|██        | 2200/10845 [19:52<1:36:13,  1.50it/s, acc=0.755, epoch=17, loss=0.835]

epoch:17, idx:2199/10845, loss:0.8351530013572086, acc:0.7545454545454545


 21%|██        | 2300/10845 [20:45<1:31:56,  1.55it/s, acc=0.753, epoch=17, loss=0.839]

epoch:17, idx:2299/10845, loss:0.8385088427429613, acc:0.7534782608695653


 22%|██▏       | 2400/10845 [21:42<1:39:01,  1.42it/s, acc=0.753, epoch=17, loss=0.839]

epoch:17, idx:2399/10845, loss:0.838618291541934, acc:0.7533333333333333


 23%|██▎       | 2500/10845 [22:36<1:20:08,  1.74it/s, acc=0.752, epoch=17, loss=0.837]

epoch:17, idx:2499/10845, loss:0.8373066004037857, acc:0.7524


 24%|██▍       | 2600/10845 [23:30<1:21:29,  1.69it/s, acc=0.751, epoch=17, loss=0.841]

epoch:17, idx:2599/10845, loss:0.8408777614969474, acc:0.7513461538461539


 25%|██▍       | 2700/10845 [24:22<1:20:22,  1.69it/s, acc=0.752, epoch=17, loss=0.843]

epoch:17, idx:2699/10845, loss:0.8434269826279747, acc:0.7517592592592592


 26%|██▌       | 2800/10845 [25:14<57:44,  2.32it/s, acc=0.751, epoch=17, loss=0.846]  

epoch:17, idx:2799/10845, loss:0.8459439705099379, acc:0.7508928571428571


 27%|██▋       | 2900/10845 [26:09<1:16:14,  1.74it/s, acc=0.75, epoch=17, loss=0.846] 

epoch:17, idx:2899/10845, loss:0.8462945685921044, acc:0.7503448275862069


 28%|██▊       | 3000/10845 [27:02<1:10:02,  1.87it/s, acc=0.752, epoch=17, loss=0.845]

epoch:17, idx:2999/10845, loss:0.8447597209215164, acc:0.7518333333333334


 29%|██▊       | 3100/10845 [27:55<1:27:49,  1.47it/s, acc=0.751, epoch=17, loss=0.844]

epoch:17, idx:3099/10845, loss:0.844061348380581, acc:0.7509677419354839


 30%|██▉       | 3201/10845 [28:48<51:47,  2.46it/s, acc=0.749, epoch=17, loss=0.848]  

epoch:17, idx:3199/10845, loss:0.8487610276788473, acc:0.749375


 30%|███       | 3300/10845 [29:37<1:07:17,  1.87it/s, acc=0.749, epoch=17, loss=0.849]

epoch:17, idx:3299/10845, loss:0.8491059506481344, acc:0.748560606060606


 31%|███▏      | 3400/10845 [30:28<54:31,  2.28it/s, acc=0.748, epoch=17, loss=0.852]  

epoch:17, idx:3399/10845, loss:0.8515788382817717, acc:0.7477205882352941


 32%|███▏      | 3500/10845 [31:21<1:07:17,  1.82it/s, acc=0.747, epoch=17, loss=0.854]

epoch:17, idx:3499/10845, loss:0.8535119812999453, acc:0.7472142857142857


 33%|███▎      | 3600/10845 [32:20<1:12:28,  1.67it/s, acc=0.748, epoch=17, loss=0.854]

epoch:17, idx:3599/10845, loss:0.8542007630566756, acc:0.7475


 34%|███▍      | 3700/10845 [33:12<1:01:00,  1.95it/s, acc=0.746, epoch=17, loss=0.857]

epoch:17, idx:3699/10845, loss:0.8569467460142599, acc:0.7463513513513513


 35%|███▌      | 3800/10845 [34:07<1:08:51,  1.71it/s, acc=0.746, epoch=17, loss=0.858]

epoch:17, idx:3799/10845, loss:0.8584781128011252, acc:0.7463815789473685


 36%|███▌      | 3900/10845 [34:58<46:28,  2.49it/s, acc=0.747, epoch=17, loss=0.858]  

epoch:17, idx:3899/10845, loss:0.858083906983718, acc:0.7466025641025641


 37%|███▋      | 4000/10845 [35:53<1:03:29,  1.80it/s, acc=0.745, epoch=17, loss=0.86] 

epoch:17, idx:3999/10845, loss:0.8602080606669188, acc:0.74525


 38%|███▊      | 4100/10845 [36:44<47:50,  2.35it/s, acc=0.745, epoch=17, loss=0.86]   

epoch:17, idx:4099/10845, loss:0.8600837609244556, acc:0.7454268292682927


 39%|███▊      | 4200/10845 [37:39<45:24,  2.44it/s, acc=0.745, epoch=17, loss=0.86]   

epoch:17, idx:4199/10845, loss:0.860457249439898, acc:0.7450595238095238


 40%|███▉      | 4300/10845 [38:34<1:22:18,  1.33it/s, acc=0.744, epoch=17, loss=0.867]

epoch:17, idx:4299/10845, loss:0.8666483510233635, acc:0.7440697674418605


 41%|████      | 4401/10845 [39:26<49:43,  2.16it/s, acc=0.744, epoch=17, loss=0.867]  

epoch:17, idx:4399/10845, loss:0.8669831668924202, acc:0.7440340909090909


 41%|████▏     | 4500/10845 [40:17<42:50,  2.47it/s, acc=0.744, epoch=17, loss=0.866]  

epoch:17, idx:4499/10845, loss:0.866226104007827, acc:0.7443888888888889


 42%|████▏     | 4600/10845 [41:12<50:50,  2.05it/s, acc=0.745, epoch=17, loss=0.866]  

epoch:17, idx:4599/10845, loss:0.8663237210978633, acc:0.7446195652173913


 43%|████▎     | 4700/10845 [42:05<1:17:07,  1.33it/s, acc=0.744, epoch=17, loss=0.871]

epoch:17, idx:4699/10845, loss:0.8705398242651148, acc:0.7437765957446808


 44%|████▍     | 4800/10845 [42:56<1:01:33,  1.64it/s, acc=0.744, epoch=17, loss=0.87] 

epoch:17, idx:4799/10845, loss:0.8703299517681201, acc:0.7438541666666667


 45%|████▌     | 4900/10845 [43:48<39:38,  2.50it/s, acc=0.744, epoch=17, loss=0.871]  

epoch:17, idx:4899/10845, loss:0.8709455072514865, acc:0.7438775510204082


 46%|████▌     | 5000/10845 [44:41<53:00,  1.84it/s, acc=0.744, epoch=17, loss=0.87]   

epoch:17, idx:4999/10845, loss:0.8697821973800659, acc:0.7441


 47%|████▋     | 5100/10845 [45:35<54:25,  1.76it/s, acc=0.744, epoch=17, loss=0.871]  

epoch:17, idx:5099/10845, loss:0.8713346716586281, acc:0.7436764705882353


 48%|████▊     | 5200/10845 [46:29<44:02,  2.14it/s, acc=0.744, epoch=17, loss=0.871]  

epoch:17, idx:5199/10845, loss:0.871024622378441, acc:0.7436538461538461


 49%|████▉     | 5300/10845 [47:26<44:16,  2.09it/s, acc=0.744, epoch=17, loss=0.872]  

epoch:17, idx:5299/10845, loss:0.8717990588804461, acc:0.7439622641509434


 50%|████▉     | 5400/10845 [48:20<57:18,  1.58it/s, acc=0.744, epoch=17, loss=0.874]  

epoch:17, idx:5399/10845, loss:0.8739141079010787, acc:0.7435648148148148


 51%|█████     | 5500/10845 [49:16<1:08:39,  1.30it/s, acc=0.744, epoch=17, loss=0.875]

epoch:17, idx:5499/10845, loss:0.8746109668449922, acc:0.7435


 52%|█████▏    | 5600/10845 [50:09<35:41,  2.45it/s, acc=0.744, epoch=17, loss=0.873]  

epoch:17, idx:5599/10845, loss:0.8733087606887733, acc:0.7435714285714285


 53%|█████▎    | 5700/10845 [51:02<37:31,  2.29it/s, acc=0.744, epoch=17, loss=0.874]  

epoch:17, idx:5699/10845, loss:0.8735225430758375, acc:0.7438157894736842


 53%|█████▎    | 5800/10845 [51:54<42:14,  1.99it/s, acc=0.744, epoch=17, loss=0.874]  

epoch:17, idx:5799/10845, loss:0.8741832634960783, acc:0.74375


 54%|█████▍    | 5900/10845 [52:49<43:03,  1.91it/s, acc=0.744, epoch=17, loss=0.875]  

epoch:17, idx:5899/10845, loss:0.8752625227934223, acc:0.7438559322033899


 55%|█████▌    | 6000/10845 [53:42<40:27,  2.00it/s, acc=0.744, epoch=17, loss=0.876]

epoch:17, idx:5999/10845, loss:0.8758919865737359, acc:0.7435


 56%|█████▌    | 6100/10845 [54:38<42:15,  1.87it/s, acc=0.743, epoch=17, loss=0.876]  

epoch:17, idx:6099/10845, loss:0.8757028324828773, acc:0.7432377049180328


 57%|█████▋    | 6200/10845 [55:33<38:25,  2.01it/s, acc=0.743, epoch=17, loss=0.874]  

epoch:17, idx:6199/10845, loss:0.8738967210919626, acc:0.7431854838709677


 58%|█████▊    | 6300/10845 [56:29<45:31,  1.66it/s, acc=0.744, epoch=17, loss=0.873]  

epoch:17, idx:6299/10845, loss:0.873302417198817, acc:0.7436111111111111


 59%|█████▉    | 6400/10845 [57:23<44:01,  1.68it/s, acc=0.744, epoch=17, loss=0.874]

epoch:17, idx:6399/10845, loss:0.8737124371342361, acc:0.74359375


 60%|█████▉    | 6500/10845 [58:22<56:32,  1.28it/s, acc=0.743, epoch=17, loss=0.874]  

epoch:17, idx:6499/10845, loss:0.8741825375281848, acc:0.7431538461538462


 61%|██████    | 6600/10845 [59:11<39:12,  1.80it/s, acc=0.743, epoch=17, loss=0.876]  

epoch:17, idx:6599/10845, loss:0.8758762248086207, acc:0.7427272727272727


 62%|██████▏   | 6700/10845 [1:00:03<50:27,  1.37it/s, acc=0.743, epoch=17, loss=0.876]

epoch:17, idx:6699/10845, loss:0.8764382445456377, acc:0.7426492537313433


 63%|██████▎   | 6800/10845 [1:00:56<29:19,  2.30it/s, acc=0.743, epoch=17, loss=0.878]

epoch:17, idx:6799/10845, loss:0.878100589136867, acc:0.742610294117647


 64%|██████▎   | 6900/10845 [1:01:50<33:47,  1.95it/s, acc=0.742, epoch=17, loss=0.879]

epoch:17, idx:6899/10845, loss:0.8793872098214385, acc:0.7420652173913044


 65%|██████▍   | 7000/10845 [1:02:43<31:12,  2.05it/s, acc=0.742, epoch=17, loss=0.881]

epoch:17, idx:6999/10845, loss:0.8808888314706939, acc:0.7415357142857143


 65%|██████▌   | 7100/10845 [1:03:35<25:24,  2.46it/s, acc=0.741, epoch=17, loss=0.882]

epoch:17, idx:7099/10845, loss:0.8822461156190281, acc:0.7409154929577465


 66%|██████▋   | 7200/10845 [1:04:26<35:31,  1.71it/s, acc=0.741, epoch=17, loss=0.883]

epoch:17, idx:7199/10845, loss:0.8830831346909205, acc:0.7405902777777778


 67%|██████▋   | 7301/10845 [1:05:19<24:32,  2.41it/s, acc=0.74, epoch=17, loss=0.884] 

epoch:17, idx:7299/10845, loss:0.884194455301925, acc:0.7401369863013698


 68%|██████▊   | 7400/10845 [1:06:09<20:19,  2.83it/s, acc=0.74, epoch=17, loss=0.885]

epoch:17, idx:7399/10845, loss:0.8845705013742318, acc:0.74


 69%|██████▉   | 7500/10845 [1:06:57<32:41,  1.71it/s, acc=0.74, epoch=17, loss=0.884]

epoch:17, idx:7499/10845, loss:0.8844661753813425, acc:0.7400333333333333


 70%|███████   | 7600/10845 [1:07:49<39:04,  1.38it/s, acc=0.74, epoch=17, loss=0.885]

epoch:17, idx:7599/10845, loss:0.8853058555016392, acc:0.7399671052631579


 71%|███████   | 7700/10845 [1:08:42<23:05,  2.27it/s, acc=0.74, epoch=17, loss=0.884] 

epoch:17, idx:7699/10845, loss:0.8835904093764045, acc:0.7404545454545455


 72%|███████▏  | 7800/10845 [1:09:34<25:49,  1.96it/s, acc=0.74, epoch=17, loss=0.883] 

epoch:17, idx:7799/10845, loss:0.8830905100703239, acc:0.7404487179487179


 73%|███████▎  | 7900/10845 [1:10:27<25:06,  1.95it/s, acc=0.74, epoch=17, loss=0.884] 

epoch:17, idx:7899/10845, loss:0.884017819475524, acc:0.740253164556962


 74%|███████▍  | 8000/10845 [1:11:21<30:50,  1.54it/s, acc=0.74, epoch=17, loss=0.885]

epoch:17, idx:7999/10845, loss:0.8846609912887216, acc:0.74025


 75%|███████▍  | 8100/10845 [1:11:57<17:15,  2.65it/s, acc=0.74, epoch=17, loss=0.885]

epoch:17, idx:8099/10845, loss:0.8854968829213837, acc:0.7399691358024691


 76%|███████▌  | 8200/10845 [1:12:33<23:37,  1.87it/s, acc=0.74, epoch=17, loss=0.884]

epoch:17, idx:8199/10845, loss:0.8844635338136335, acc:0.739969512195122


 77%|███████▋  | 8300/10845 [1:13:21<22:42,  1.87it/s, acc=0.74, epoch=17, loss=0.885]

epoch:17, idx:8299/10845, loss:0.8853101642842752, acc:0.739789156626506


 77%|███████▋  | 8400/10845 [1:14:11<17:14,  2.36it/s, acc=0.739, epoch=17, loss=0.887]

epoch:17, idx:8399/10845, loss:0.8865102295116299, acc:0.739375


 78%|███████▊  | 8500/10845 [1:15:01<24:03,  1.62it/s, acc=0.739, epoch=17, loss=0.888]

epoch:17, idx:8499/10845, loss:0.8884694765911383, acc:0.7389705882352942


 79%|███████▉  | 8600/10845 [1:15:53<24:47,  1.51it/s, acc=0.739, epoch=17, loss=0.888]

epoch:17, idx:8599/10845, loss:0.8879054270684719, acc:0.7390697674418605


 80%|████████  | 8700/10845 [1:16:43<23:06,  1.55it/s, acc=0.738, epoch=17, loss=0.889]

epoch:17, idx:8699/10845, loss:0.8893706197916776, acc:0.738448275862069


 81%|████████  | 8800/10845 [1:17:40<14:16,  2.39it/s, acc=0.738, epoch=17, loss=0.889]

epoch:17, idx:8799/10845, loss:0.8894202247600663, acc:0.7384375


 82%|████████▏ | 8900/10845 [1:18:34<18:21,  1.77it/s, acc=0.738, epoch=17, loss=0.89] 

epoch:17, idx:8899/10845, loss:0.8901780134037639, acc:0.7384831460674157


 83%|████████▎ | 9000/10845 [1:19:25<13:22,  2.30it/s, acc=0.739, epoch=17, loss=0.89] 

epoch:17, idx:8999/10845, loss:0.8899023460944494, acc:0.7385


 84%|████████▍ | 9100/10845 [1:20:15<15:30,  1.88it/s, acc=0.739, epoch=17, loss=0.888]

epoch:17, idx:9099/10845, loss:0.8880713725548525, acc:0.738956043956044


 85%|████████▍ | 9200/10845 [1:21:06<14:08,  1.94it/s, acc=0.739, epoch=17, loss=0.89] 

epoch:17, idx:9199/10845, loss:0.8898623484567456, acc:0.7385597826086957


 86%|████████▌ | 9300/10845 [1:22:02<11:01,  2.34it/s, acc=0.738, epoch=17, loss=0.891]

epoch:17, idx:9299/10845, loss:0.8905922246299764, acc:0.7381451612903226


 87%|████████▋ | 9400/10845 [1:22:54<11:54,  2.02it/s, acc=0.738, epoch=17, loss=0.892]

epoch:17, idx:9399/10845, loss:0.891695718511622, acc:0.7379521276595745


 88%|████████▊ | 9500/10845 [1:23:50<12:58,  1.73it/s, acc=0.738, epoch=17, loss=0.894]

epoch:17, idx:9499/10845, loss:0.8937564916610717, acc:0.737578947368421


 89%|████████▊ | 9600/10845 [1:24:46<12:07,  1.71it/s, acc=0.737, epoch=17, loss=0.895]

epoch:17, idx:9599/10845, loss:0.8952661665156484, acc:0.737421875


 89%|████████▉ | 9700/10845 [1:25:39<07:02,  2.71it/s, acc=0.737, epoch=17, loss=0.895]

epoch:17, idx:9699/10845, loss:0.8954496495441063, acc:0.7374484536082474


 90%|█████████ | 9800/10845 [1:26:35<08:37,  2.02it/s, acc=0.737, epoch=17, loss=0.896]

epoch:17, idx:9799/10845, loss:0.896357952952385, acc:0.7372448979591837


 91%|█████████▏| 9900/10845 [1:27:25<08:32,  1.84it/s, acc=0.738, epoch=17, loss=0.895]

epoch:17, idx:9899/10845, loss:0.895187253885799, acc:0.7377272727272727


 92%|█████████▏| 10000/10845 [1:28:16<06:39,  2.12it/s, acc=0.738, epoch=17, loss=0.896]

epoch:17, idx:9999/10845, loss:0.8960647498726845, acc:0.7375


 93%|█████████▎| 10100/10845 [1:29:08<07:07,  1.74it/s, acc=0.737, epoch=17, loss=0.898]

epoch:17, idx:10099/10845, loss:0.8975965703005838, acc:0.7370544554455446


 94%|█████████▍| 10200/10845 [1:29:59<05:07,  2.09it/s, acc=0.737, epoch=17, loss=0.898]

epoch:17, idx:10199/10845, loss:0.8984583287613065, acc:0.7368137254901961


 95%|█████████▍| 10300/10845 [1:30:50<04:43,  1.92it/s, acc=0.737, epoch=17, loss=0.897]

epoch:17, idx:10299/10845, loss:0.8971180952347598, acc:0.7369902912621359


 96%|█████████▌| 10400/10845 [1:31:42<04:21,  1.70it/s, acc=0.737, epoch=17, loss=0.896]

epoch:17, idx:10399/10845, loss:0.8963879145395297, acc:0.7371634615384616


 97%|█████████▋| 10500/10845 [1:32:39<02:23,  2.40it/s, acc=0.737, epoch=17, loss=0.897]

epoch:17, idx:10499/10845, loss:0.8968245974949428, acc:0.7371190476190477


 98%|█████████▊| 10600/10845 [1:33:28<02:23,  1.71it/s, acc=0.737, epoch=17, loss=0.899]

epoch:17, idx:10599/10845, loss:0.8985029490151495, acc:0.7367452830188679


 99%|█████████▊| 10700/10845 [1:34:20<01:03,  2.27it/s, acc=0.736, epoch=17, loss=0.899]

epoch:17, idx:10699/10845, loss:0.89891487459156, acc:0.7364953271028037


100%|█████████▉| 10800/10845 [1:35:09<00:20,  2.23it/s, acc=0.736, epoch=17, loss=0.9]  

epoch:17, idx:10799/10845, loss:0.8998508597800026, acc:0.7363194444444444


100%|██████████| 10845/10845 [1:35:34<00:00,  1.93it/s, acc=0.736, epoch=17, loss=0.9]  


epoch:17, idx:0/1275, loss:0.9565123319625854, acc:0.5
epoch:17, idx:100/1275, loss:1.2071660745261918, acc:0.6336633663366337
epoch:17, idx:200/1275, loss:1.1622717383489087, acc:0.6430348258706468
epoch:17, idx:300/1275, loss:1.1669014046754551, acc:0.6478405315614618
epoch:17, idx:400/1275, loss:1.1435585840979121, acc:0.6521197007481296
epoch:17, idx:500/1275, loss:1.1411359876453757, acc:0.6492015968063872
epoch:17, idx:600/1275, loss:1.1503736757597391, acc:0.6451747088186356
epoch:17, idx:700/1275, loss:1.1590419522195672, acc:0.6444365192582026
epoch:17, idx:800/1275, loss:1.1637253379553891, acc:0.6438826466916354
epoch:17, idx:900/1275, loss:1.153003099028199, acc:0.6448390677025527
epoch:17, idx:1000/1275, loss:1.1512483714462876, acc:0.6461038961038961
epoch:17, idx:1100/1275, loss:1.1448591398066763, acc:0.6478201634877384
epoch:17, idx:1200/1275, loss:1.145096902892949, acc:0.6475853455453788


  1%|          | 100/10845 [00:51<1:40:38,  1.78it/s, acc=0.745, epoch=18, loss=0.878]

epoch:18, idx:99/10845, loss:0.8779484355449676, acc:0.745


  2%|▏         | 200/10845 [01:44<1:36:53,  1.83it/s, acc=0.745, epoch=18, loss=0.873]

epoch:18, idx:199/10845, loss:0.8731801208853721, acc:0.745


  3%|▎         | 300/10845 [02:36<1:22:06,  2.14it/s, acc=0.739, epoch=18, loss=0.863]

epoch:18, idx:299/10845, loss:0.863339589436849, acc:0.7391666666666666


  4%|▎         | 400/10845 [03:30<1:28:03,  1.98it/s, acc=0.738, epoch=18, loss=0.878]

epoch:18, idx:399/10845, loss:0.8778678207099437, acc:0.738125


  5%|▍         | 500/10845 [04:20<1:03:30,  2.72it/s, acc=0.738, epoch=18, loss=0.876]

epoch:18, idx:499/10845, loss:0.876125254869461, acc:0.738


  6%|▌         | 600/10845 [05:14<1:07:24,  2.53it/s, acc=0.743, epoch=18, loss=0.869]

epoch:18, idx:599/10845, loss:0.8690397072831789, acc:0.7425


  6%|▋         | 700/10845 [06:06<1:49:10,  1.55it/s, acc=0.744, epoch=18, loss=0.869]

epoch:18, idx:699/10845, loss:0.868530684368951, acc:0.7442857142857143


  7%|▋         | 800/10845 [06:56<1:06:16,  2.53it/s, acc=0.742, epoch=18, loss=0.879]

epoch:18, idx:799/10845, loss:0.8791805429756642, acc:0.7415625


  8%|▊         | 900/10845 [07:50<1:33:16,  1.78it/s, acc=0.74, epoch=18, loss=0.872] 

epoch:18, idx:899/10845, loss:0.8719490077760484, acc:0.7402777777777778


  9%|▉         | 1000/10845 [08:44<1:06:38,  2.46it/s, acc=0.741, epoch=18, loss=0.858]

epoch:18, idx:999/10845, loss:0.8583599067330361, acc:0.74075


 10%|█         | 1100/10845 [09:37<1:27:27,  1.86it/s, acc=0.744, epoch=18, loss=0.851]

epoch:18, idx:1099/10845, loss:0.8512178031964736, acc:0.7443181818181818


 11%|█         | 1200/10845 [10:28<1:14:59,  2.14it/s, acc=0.745, epoch=18, loss=0.851]

epoch:18, idx:1199/10845, loss:0.8506385334829489, acc:0.7445833333333334


 12%|█▏        | 1300/10845 [11:19<1:15:29,  2.11it/s, acc=0.743, epoch=18, loss=0.854]

epoch:18, idx:1299/10845, loss:0.854268460892714, acc:0.7428846153846154


 13%|█▎        | 1400/10845 [12:09<1:15:04,  2.10it/s, acc=0.744, epoch=18, loss=0.847]

epoch:18, idx:1399/10845, loss:0.8474355883896351, acc:0.74375


 14%|█▍        | 1500/10845 [13:04<1:48:07,  1.44it/s, acc=0.742, epoch=18, loss=0.853]

epoch:18, idx:1499/10845, loss:0.8528627701401711, acc:0.7418333333333333


 15%|█▍        | 1600/10845 [13:57<1:22:28,  1.87it/s, acc=0.743, epoch=18, loss=0.852]

epoch:18, idx:1599/10845, loss:0.8524007596634329, acc:0.7425


 16%|█▌        | 1700/10845 [14:49<1:17:59,  1.95it/s, acc=0.742, epoch=18, loss=0.848]

epoch:18, idx:1699/10845, loss:0.8483801821926061, acc:0.741764705882353


 17%|█▋        | 1800/10845 [15:37<1:07:28,  2.23it/s, acc=0.741, epoch=18, loss=0.848]

epoch:18, idx:1799/10845, loss:0.8484852219786909, acc:0.7409722222222223


 18%|█▊        | 1900/10845 [16:29<1:32:35,  1.61it/s, acc=0.741, epoch=18, loss=0.852]

epoch:18, idx:1899/10845, loss:0.8521814641356468, acc:0.7414473684210526


 18%|█▊        | 2000/10845 [17:22<1:01:49,  2.38it/s, acc=0.74, epoch=18, loss=0.863] 

epoch:18, idx:1999/10845, loss:0.8627557686120272, acc:0.74


 19%|█▉        | 2100/10845 [18:13<53:51,  2.71it/s, acc=0.742, epoch=18, loss=0.855]  

epoch:18, idx:2099/10845, loss:0.8549837592244148, acc:0.741904761904762


 20%|██        | 2200/10845 [19:09<1:11:38,  2.01it/s, acc=0.741, epoch=18, loss=0.861]

epoch:18, idx:2199/10845, loss:0.8611159929768606, acc:0.740909090909091


 21%|██        | 2301/10845 [20:02<1:00:31,  2.35it/s, acc=0.742, epoch=18, loss=0.859]

epoch:18, idx:2299/10845, loss:0.8594609107012334, acc:0.7419565217391304


 22%|██▏       | 2400/10845 [20:51<1:27:01,  1.62it/s, acc=0.743, epoch=18, loss=0.858]

epoch:18, idx:2399/10845, loss:0.8575173790504536, acc:0.7429166666666667


 23%|██▎       | 2500/10845 [21:43<1:10:00,  1.99it/s, acc=0.743, epoch=18, loss=0.86] 

epoch:18, idx:2499/10845, loss:0.8596546920061111, acc:0.7425


 24%|██▍       | 2600/10845 [22:34<1:25:12,  1.61it/s, acc=0.743, epoch=18, loss=0.86] 

epoch:18, idx:2599/10845, loss:0.8595235872268677, acc:0.7425


 25%|██▍       | 2700/10845 [23:26<1:09:19,  1.96it/s, acc=0.743, epoch=18, loss=0.859]

epoch:18, idx:2699/10845, loss:0.8594595903820462, acc:0.7426851851851852


 26%|██▌       | 2800/10845 [24:21<1:28:36,  1.51it/s, acc=0.743, epoch=18, loss=0.861]

epoch:18, idx:2799/10845, loss:0.8608166375756263, acc:0.7427678571428571


 27%|██▋       | 2900/10845 [25:14<48:44,  2.72it/s, acc=0.742, epoch=18, loss=0.862]  

epoch:18, idx:2899/10845, loss:0.862118624366563, acc:0.7419827586206896


 28%|██▊       | 3000/10845 [26:07<1:16:39,  1.71it/s, acc=0.741, epoch=18, loss=0.868]

epoch:18, idx:2999/10845, loss:0.8675321786204974, acc:0.7411666666666666


 29%|██▊       | 3100/10845 [27:00<1:03:31,  2.03it/s, acc=0.742, epoch=18, loss=0.87] 

epoch:18, idx:3099/10845, loss:0.869577290050445, acc:0.7415322580645162


 30%|██▉       | 3200/10845 [27:55<1:22:09,  1.55it/s, acc=0.74, epoch=18, loss=0.872] 

epoch:18, idx:3199/10845, loss:0.871774618756026, acc:0.7396875


 30%|███       | 3300/10845 [28:45<59:17,  2.12it/s, acc=0.741, epoch=18, loss=0.868]  

epoch:18, idx:3299/10845, loss:0.868220149531509, acc:0.7407575757575757


 31%|███▏      | 3400/10845 [29:37<1:04:02,  1.94it/s, acc=0.742, epoch=18, loss=0.863]

epoch:18, idx:3399/10845, loss:0.863351364153273, acc:0.7415441176470589


 32%|███▏      | 3500/10845 [30:33<1:19:09,  1.55it/s, acc=0.743, epoch=18, loss=0.859]

epoch:18, idx:3499/10845, loss:0.8591736986126218, acc:0.7428571428571429


 33%|███▎      | 3600/10845 [31:25<1:04:13,  1.88it/s, acc=0.743, epoch=18, loss=0.859]

epoch:18, idx:3599/10845, loss:0.8588384747008483, acc:0.7429166666666667


 34%|███▍      | 3700/10845 [32:18<56:32,  2.11it/s, acc=0.744, epoch=18, loss=0.86]   

epoch:18, idx:3699/10845, loss:0.8604013411257718, acc:0.7439189189189189


 35%|███▌      | 3800/10845 [33:09<54:42,  2.15it/s, acc=0.744, epoch=18, loss=0.862]  

epoch:18, idx:3799/10845, loss:0.8623051395384889, acc:0.7436842105263158


 36%|███▌      | 3900/10845 [34:06<1:31:34,  1.26it/s, acc=0.743, epoch=18, loss=0.864]

epoch:18, idx:3899/10845, loss:0.8640166640740174, acc:0.7434615384615385


 37%|███▋      | 4000/10845 [35:01<1:12:24,  1.58it/s, acc=0.744, epoch=18, loss=0.864]

epoch:18, idx:3999/10845, loss:0.8639874345958233, acc:0.743625


 38%|███▊      | 4100/10845 [35:53<49:36,  2.27it/s, acc=0.744, epoch=18, loss=0.861]  

epoch:18, idx:4099/10845, loss:0.860941909057338, acc:0.7441463414634146


 39%|███▊      | 4200/10845 [36:45<59:25,  1.86it/s, acc=0.744, epoch=18, loss=0.86]   

epoch:18, idx:4199/10845, loss:0.8597578385614213, acc:0.7442261904761904


 40%|███▉      | 4300/10845 [37:37<1:01:44,  1.77it/s, acc=0.744, epoch=18, loss=0.861]

epoch:18, idx:4299/10845, loss:0.8609082028477691, acc:0.7438953488372093


 41%|████      | 4400/10845 [38:34<56:30,  1.90it/s, acc=0.744, epoch=18, loss=0.864]  

epoch:18, idx:4399/10845, loss:0.8640596863898364, acc:0.7435795454545454


 41%|████▏     | 4500/10845 [39:27<1:03:29,  1.67it/s, acc=0.744, epoch=18, loss=0.865]

epoch:18, idx:4499/10845, loss:0.8651147250466876, acc:0.7437222222222222


 42%|████▏     | 4600/10845 [40:21<38:39,  2.69it/s, acc=0.743, epoch=18, loss=0.868]  

epoch:18, idx:4599/10845, loss:0.8676064692113711, acc:0.7434239130434782


 43%|████▎     | 4700/10845 [41:12<1:01:47,  1.66it/s, acc=0.744, epoch=18, loss=0.87] 

epoch:18, idx:4699/10845, loss:0.869867055035652, acc:0.7436170212765958


 44%|████▍     | 4800/10845 [42:08<57:59,  1.74it/s, acc=0.744, epoch=18, loss=0.869]  

epoch:18, idx:4799/10845, loss:0.8685311571136117, acc:0.74359375


 45%|████▌     | 4900/10845 [43:03<56:21,  1.76it/s, acc=0.744, epoch=18, loss=0.868]  

epoch:18, idx:4899/10845, loss:0.8677255985323263, acc:0.7438775510204082


 46%|████▌     | 5000/10845 [43:57<48:08,  2.02it/s, acc=0.745, epoch=18, loss=0.866]  

epoch:18, idx:4999/10845, loss:0.8661117094397545, acc:0.74455


 47%|████▋     | 5100/10845 [44:48<41:04,  2.33it/s, acc=0.744, epoch=18, loss=0.867]  

epoch:18, idx:5099/10845, loss:0.8674915327160966, acc:0.7444117647058823


 48%|████▊     | 5200/10845 [45:39<42:23,  2.22it/s, acc=0.745, epoch=18, loss=0.868]  

epoch:18, idx:5199/10845, loss:0.8679560838410487, acc:0.7446634615384615


 49%|████▉     | 5301/10845 [46:37<38:03,  2.43it/s, acc=0.745, epoch=18, loss=0.866]  

epoch:18, idx:5299/10845, loss:0.8663969965700833, acc:0.7454245283018868


 50%|████▉     | 5400/10845 [47:30<52:56,  1.71it/s, acc=0.746, epoch=18, loss=0.865]  

epoch:18, idx:5399/10845, loss:0.8652789367900955, acc:0.7458796296296296


 51%|█████     | 5500/10845 [48:24<40:49,  2.18it/s, acc=0.746, epoch=18, loss=0.866]  

epoch:18, idx:5499/10845, loss:0.8656290321566842, acc:0.7460454545454546


 52%|█████▏    | 5600/10845 [49:22<41:46,  2.09it/s, acc=0.746, epoch=18, loss=0.868]  

epoch:18, idx:5599/10845, loss:0.8676780181803874, acc:0.7459375


 53%|█████▎    | 5700/10845 [50:15<44:54,  1.91it/s, acc=0.746, epoch=18, loss=0.868]  

epoch:18, idx:5699/10845, loss:0.868003378092197, acc:0.7455263157894737


 53%|█████▎    | 5800/10845 [51:07<35:11,  2.39it/s, acc=0.746, epoch=18, loss=0.867]  

epoch:18, idx:5799/10845, loss:0.8669506404728725, acc:0.745646551724138


 54%|█████▍    | 5900/10845 [51:59<1:01:38,  1.34it/s, acc=0.746, epoch=18, loss=0.865]

epoch:18, idx:5899/10845, loss:0.8650497575735642, acc:0.7460169491525424


 55%|█████▌    | 6000/10845 [52:55<52:04,  1.55it/s, acc=0.745, epoch=18, loss=0.87]   

epoch:18, idx:5999/10845, loss:0.8703773467044036, acc:0.745


 56%|█████▌    | 6100/10845 [53:46<54:29,  1.45it/s, acc=0.744, epoch=18, loss=0.872]  

epoch:18, idx:6099/10845, loss:0.8716237778448668, acc:0.744344262295082


 57%|█████▋    | 6200/10845 [54:38<29:39,  2.61it/s, acc=0.744, epoch=18, loss=0.872]

epoch:18, idx:6199/10845, loss:0.8718538838336545, acc:0.744233870967742


 58%|█████▊    | 6300/10845 [55:33<54:01,  1.40it/s, acc=0.744, epoch=18, loss=0.874]

epoch:18, idx:6299/10845, loss:0.873914296002615, acc:0.7438095238095238


 59%|█████▉    | 6400/10845 [56:29<44:00,  1.68it/s, acc=0.743, epoch=18, loss=0.875]  

epoch:18, idx:6399/10845, loss:0.8750432986486704, acc:0.743359375


 60%|█████▉    | 6500/10845 [57:24<43:11,  1.68it/s, acc=0.742, epoch=18, loss=0.878]

epoch:18, idx:6499/10845, loss:0.8776835984358421, acc:0.7424615384615385


 61%|██████    | 6600/10845 [58:17<33:58,  2.08it/s, acc=0.742, epoch=18, loss=0.879]  

epoch:18, idx:6599/10845, loss:0.8788321635939859, acc:0.741969696969697


 62%|██████▏   | 6700/10845 [59:09<32:09,  2.15it/s, acc=0.742, epoch=18, loss=0.878]

epoch:18, idx:6699/10845, loss:0.8776487288012433, acc:0.7423880597014926


 63%|██████▎   | 6800/10845 [1:00:00<39:39,  1.70it/s, acc=0.743, epoch=18, loss=0.877]

epoch:18, idx:6799/10845, loss:0.8772800766369876, acc:0.742610294117647


 64%|██████▎   | 6900/10845 [1:00:52<33:28,  1.96it/s, acc=0.742, epoch=18, loss=0.879]  

epoch:18, idx:6899/10845, loss:0.8794575680166051, acc:0.7416666666666667


 65%|██████▍   | 7000/10845 [1:01:50<38:52,  1.65it/s, acc=0.742, epoch=18, loss=0.88] 

epoch:18, idx:6999/10845, loss:0.8797154354878834, acc:0.7415


 65%|██████▌   | 7100/10845 [1:02:40<36:44,  1.70it/s, acc=0.742, epoch=18, loss=0.88] 

epoch:18, idx:7099/10845, loss:0.8799052437090538, acc:0.7415845070422535


 66%|██████▋   | 7200/10845 [1:03:36<41:32,  1.46it/s, acc=0.741, epoch=18, loss=0.881]

epoch:18, idx:7199/10845, loss:0.88136497920586, acc:0.7410763888888889


 67%|██████▋   | 7300/10845 [1:04:35<27:59,  2.11it/s, acc=0.74, epoch=18, loss=0.883] 

epoch:18, idx:7299/10845, loss:0.8832432408610436, acc:0.7403082191780822


 68%|██████▊   | 7400/10845 [1:05:27<39:57,  1.44it/s, acc=0.741, epoch=18, loss=0.881]

epoch:18, idx:7399/10845, loss:0.8810459611303098, acc:0.7408108108108108


 69%|██████▉   | 7500/10845 [1:06:19<26:42,  2.09it/s, acc=0.74, epoch=18, loss=0.882] 

epoch:18, idx:7499/10845, loss:0.8824782087723414, acc:0.7404


 70%|███████   | 7600/10845 [1:07:17<28:29,  1.90it/s, acc=0.74, epoch=18, loss=0.883] 

epoch:18, idx:7599/10845, loss:0.8834833602215114, acc:0.7401315789473685


 71%|███████   | 7700/10845 [1:08:11<25:43,  2.04it/s, acc=0.739, epoch=18, loss=0.887]

epoch:18, idx:7699/10845, loss:0.8866458265734958, acc:0.7393831168831169


 72%|███████▏  | 7800/10845 [1:09:00<27:22,  1.85it/s, acc=0.739, epoch=18, loss=0.886]

epoch:18, idx:7799/10845, loss:0.8856530141410155, acc:0.7391987179487179


 73%|███████▎  | 7900/10845 [1:09:55<24:00,  2.04it/s, acc=0.739, epoch=18, loss=0.888]

epoch:18, idx:7899/10845, loss:0.8879009276364423, acc:0.7385759493670886


 74%|███████▍  | 8000/10845 [1:10:48<23:02,  2.06it/s, acc=0.739, epoch=18, loss=0.887]

epoch:18, idx:7999/10845, loss:0.8867252612449229, acc:0.73865625


 75%|███████▍  | 8100/10845 [1:11:44<29:48,  1.53it/s, acc=0.738, epoch=18, loss=0.888]

epoch:18, idx:8099/10845, loss:0.887603024902903, acc:0.7384259259259259


 76%|███████▌  | 8200/10845 [1:12:39<19:05,  2.31it/s, acc=0.738, epoch=18, loss=0.888]

epoch:18, idx:8199/10845, loss:0.8879560417554727, acc:0.7383841463414634


 77%|███████▋  | 8300/10845 [1:13:33<16:40,  2.54it/s, acc=0.738, epoch=18, loss=0.889]

epoch:18, idx:8299/10845, loss:0.8891350920444512, acc:0.7382530120481928


 77%|███████▋  | 8400/10845 [1:14:29<23:45,  1.72it/s, acc=0.738, epoch=18, loss=0.89] 

epoch:18, idx:8399/10845, loss:0.8898642211868649, acc:0.7379761904761905


 78%|███████▊  | 8500/10845 [1:15:25<25:03,  1.56it/s, acc=0.738, epoch=18, loss=0.89] 

epoch:18, idx:8499/10845, loss:0.8898321025161182, acc:0.7377941176470588


 79%|███████▉  | 8600/10845 [1:16:19<20:25,  1.83it/s, acc=0.738, epoch=18, loss=0.89] 

epoch:18, idx:8599/10845, loss:0.8899843967272791, acc:0.7378197674418605


 80%|████████  | 8700/10845 [1:17:12<15:29,  2.31it/s, acc=0.738, epoch=18, loss=0.89] 

epoch:18, idx:8699/10845, loss:0.8899646536406429, acc:0.7380747126436782


 81%|████████  | 8800/10845 [1:18:08<22:59,  1.48it/s, acc=0.738, epoch=18, loss=0.89] 

epoch:18, idx:8799/10845, loss:0.8902416867119345, acc:0.7375852272727272


 82%|████████▏ | 8900/10845 [1:19:00<15:35,  2.08it/s, acc=0.738, epoch=18, loss=0.889]

epoch:18, idx:8899/10845, loss:0.8889349418443241, acc:0.7376685393258426


 83%|████████▎ | 9000/10845 [1:19:52<18:58,  1.62it/s, acc=0.737, epoch=18, loss=0.889]

epoch:18, idx:8999/10845, loss:0.8894811480409569, acc:0.7373611111111111


 84%|████████▍ | 9100/10845 [1:20:44<12:31,  2.32it/s, acc=0.737, epoch=18, loss=0.889]

epoch:18, idx:9099/10845, loss:0.8892539859112802, acc:0.7372252747252748


 85%|████████▍ | 9200/10845 [1:21:36<13:29,  2.03it/s, acc=0.737, epoch=18, loss=0.89] 

epoch:18, idx:9199/10845, loss:0.8895946246677119, acc:0.7370923913043478


 86%|████████▌ | 9300/10845 [1:22:28<09:23,  2.74it/s, acc=0.737, epoch=18, loss=0.889]

epoch:18, idx:9299/10845, loss:0.8891702614836795, acc:0.7369892473118279


 87%|████████▋ | 9400/10845 [1:23:21<16:18,  1.48it/s, acc=0.737, epoch=18, loss=0.888]

epoch:18, idx:9399/10845, loss:0.8884974014854178, acc:0.7372606382978724


 88%|████████▊ | 9500/10845 [1:24:17<12:34,  1.78it/s, acc=0.737, epoch=18, loss=0.89] 

epoch:18, idx:9499/10845, loss:0.8902440967904894, acc:0.7369736842105263


 89%|████████▊ | 9600/10845 [1:25:13<10:14,  2.03it/s, acc=0.737, epoch=18, loss=0.891]

epoch:18, idx:9599/10845, loss:0.8906136355890582, acc:0.736796875


 89%|████████▉ | 9700/10845 [1:26:09<08:44,  2.18it/s, acc=0.736, epoch=18, loss=0.892]

epoch:18, idx:9699/10845, loss:0.8923996597804974, acc:0.7364690721649485


 90%|█████████ | 9800/10845 [1:27:05<07:52,  2.21it/s, acc=0.737, epoch=18, loss=0.892]

epoch:18, idx:9799/10845, loss:0.8920542353482879, acc:0.7365051020408163


 91%|█████████▏| 9900/10845 [1:28:03<07:47,  2.02it/s, acc=0.736, epoch=18, loss=0.893]

epoch:18, idx:9899/10845, loss:0.8928256060830271, acc:0.736489898989899


 92%|█████████▏| 10000/10845 [1:28:59<07:03,  1.99it/s, acc=0.736, epoch=18, loss=0.893]

epoch:18, idx:9999/10845, loss:0.8933392349123955, acc:0.7361


 93%|█████████▎| 10100/10845 [1:29:55<04:51,  2.56it/s, acc=0.736, epoch=18, loss=0.893]

epoch:18, idx:10099/10845, loss:0.8929056018000782, acc:0.7362128712871288


 94%|█████████▍| 10200/10845 [1:30:48<06:05,  1.76it/s, acc=0.736, epoch=18, loss=0.894]

epoch:18, idx:10199/10845, loss:0.893794376651446, acc:0.7362009803921569


 95%|█████████▍| 10300/10845 [1:31:42<06:09,  1.48it/s, acc=0.736, epoch=18, loss=0.894]

epoch:18, idx:10299/10845, loss:0.894184244032045, acc:0.7362864077669903


 96%|█████████▌| 10400/10845 [1:32:36<03:52,  1.91it/s, acc=0.736, epoch=18, loss=0.895]

epoch:18, idx:10399/10845, loss:0.8945687637019616, acc:0.7362019230769231


 97%|█████████▋| 10500/10845 [1:33:31<03:29,  1.65it/s, acc=0.736, epoch=18, loss=0.895]

epoch:18, idx:10499/10845, loss:0.8950359362874712, acc:0.7359523809523809


 98%|█████████▊| 10600/10845 [1:34:27<02:02,  2.00it/s, acc=0.736, epoch=18, loss=0.896]

epoch:18, idx:10599/10845, loss:0.8964617294635413, acc:0.7357075471698113


 99%|█████████▊| 10700/10845 [1:35:22<01:27,  1.67it/s, acc=0.736, epoch=18, loss=0.896]

epoch:18, idx:10699/10845, loss:0.896357168104047, acc:0.7355841121495327


100%|█████████▉| 10800/10845 [1:36:17<00:34,  1.32it/s, acc=0.736, epoch=18, loss=0.895]

epoch:18, idx:10799/10845, loss:0.8954061116857661, acc:0.7357870370370371


100%|██████████| 10845/10845 [1:36:41<00:00,  2.25it/s, acc=0.736, epoch=18, loss=0.895]


epoch:18, idx:0/1275, loss:1.0396479368209839, acc:0.5
epoch:18, idx:100/1275, loss:1.2081379512749095, acc:0.6336633663366337
epoch:18, idx:200/1275, loss:1.1618371092858006, acc:0.6480099502487562
epoch:18, idx:300/1275, loss:1.1715174012405927, acc:0.6478405315614618
epoch:18, idx:400/1275, loss:1.1439522033617682, acc:0.6502493765586035
epoch:18, idx:500/1275, loss:1.1396424083890553, acc:0.6497005988023952
epoch:18, idx:600/1275, loss:1.15143623695199, acc:0.6493344425956739
epoch:18, idx:700/1275, loss:1.162921883750404, acc:0.6465763195435092
epoch:18, idx:800/1275, loss:1.1650979877560028, acc:0.6451310861423221
epoch:18, idx:900/1275, loss:1.1541295958875155, acc:0.6498335183129855
epoch:18, idx:1000/1275, loss:1.1517174218620334, acc:0.6520979020979021
epoch:18, idx:1100/1275, loss:1.1447458878145989, acc:0.6541780199818347
epoch:18, idx:1200/1275, loss:1.1442060809300205, acc:0.6519567027477102


  1%|          | 100/10845 [00:53<1:55:24,  1.55it/s, acc=0.743, epoch=19, loss=0.858]

epoch:19, idx:99/10845, loss:0.8582639443874359, acc:0.7425


  2%|▏         | 200/10845 [01:48<1:25:49,  2.07it/s, acc=0.748, epoch=19, loss=0.819]

epoch:19, idx:199/10845, loss:0.8191635531187057, acc:0.7475


  3%|▎         | 300/10845 [02:42<1:24:17,  2.09it/s, acc=0.738, epoch=19, loss=0.843]

epoch:19, idx:299/10845, loss:0.8429836199680965, acc:0.7375


  4%|▎         | 400/10845 [03:35<1:45:36,  1.65it/s, acc=0.743, epoch=19, loss=0.835]

epoch:19, idx:399/10845, loss:0.8345374266803265, acc:0.743125


  5%|▍         | 500/10845 [04:29<2:33:18,  1.12it/s, acc=0.748, epoch=19, loss=0.831]

epoch:19, idx:499/10845, loss:0.8307930579781533, acc:0.748


  6%|▌         | 600/10845 [05:22<1:31:26,  1.87it/s, acc=0.749, epoch=19, loss=0.832]

epoch:19, idx:599/10845, loss:0.8323247662683328, acc:0.7491666666666666


  6%|▋         | 700/10845 [06:16<1:22:36,  2.05it/s, acc=0.747, epoch=19, loss=0.855]

epoch:19, idx:699/10845, loss:0.8551442158647946, acc:0.7471428571428571


  7%|▋         | 800/10845 [07:10<1:30:08,  1.86it/s, acc=0.75, epoch=19, loss=0.858] 

epoch:19, idx:799/10845, loss:0.8582973809912801, acc:0.7496875


  8%|▊         | 900/10845 [08:05<1:33:56,  1.76it/s, acc=0.747, epoch=19, loss=0.866]

epoch:19, idx:899/10845, loss:0.8664614380399386, acc:0.7472222222222222


  9%|▉         | 1000/10845 [08:56<1:03:38,  2.58it/s, acc=0.749, epoch=19, loss=0.864]

epoch:19, idx:999/10845, loss:0.8640274170935154, acc:0.74875


 10%|█         | 1100/10845 [09:54<1:08:55,  2.36it/s, acc=0.749, epoch=19, loss=0.862]

epoch:19, idx:1099/10845, loss:0.8617854085564614, acc:0.7493181818181818


 11%|█         | 1200/10845 [10:50<1:55:46,  1.39it/s, acc=0.748, epoch=19, loss=0.865]

epoch:19, idx:1199/10845, loss:0.864616472646594, acc:0.7483333333333333


 12%|█▏        | 1300/10845 [11:43<1:01:29,  2.59it/s, acc=0.748, epoch=19, loss=0.86] 

epoch:19, idx:1299/10845, loss:0.860357096172296, acc:0.7484615384615385


 13%|█▎        | 1400/10845 [12:36<1:13:53,  2.13it/s, acc=0.747, epoch=19, loss=0.867]

epoch:19, idx:1399/10845, loss:0.8673621568296637, acc:0.7466071428571428


 14%|█▍        | 1500/10845 [13:32<1:07:42,  2.30it/s, acc=0.748, epoch=19, loss=0.861]

epoch:19, idx:1499/10845, loss:0.8609240317940712, acc:0.748


 15%|█▍        | 1600/10845 [14:12<55:49,  2.76it/s, acc=0.748, epoch=19, loss=0.857]  

epoch:19, idx:1599/10845, loss:0.8572712731920182, acc:0.7478125


 16%|█▌        | 1700/10845 [14:57<54:58,  2.77it/s, acc=0.748, epoch=19, loss=0.857]  

epoch:19, idx:1699/10845, loss:0.8570798911241924, acc:0.7479411764705882


 17%|█▋        | 1800/10845 [15:51<1:13:16,  2.06it/s, acc=0.747, epoch=19, loss=0.861]

epoch:19, idx:1799/10845, loss:0.8611533196270466, acc:0.7466666666666667


 18%|█▊        | 1900/10845 [16:43<1:37:37,  1.53it/s, acc=0.748, epoch=19, loss=0.859]

epoch:19, idx:1899/10845, loss:0.8591616812662074, acc:0.7476315789473684


 18%|█▊        | 2000/10845 [17:36<1:28:57,  1.66it/s, acc=0.748, epoch=19, loss=0.862]

epoch:19, idx:1999/10845, loss:0.8622638059407473, acc:0.748375


 19%|█▉        | 2100/10845 [18:31<1:24:48,  1.72it/s, acc=0.748, epoch=19, loss=0.863]

epoch:19, idx:2099/10845, loss:0.8629999839266141, acc:0.7478571428571429


 20%|██        | 2200/10845 [19:29<1:06:11,  2.18it/s, acc=0.746, epoch=19, loss=0.873]

epoch:19, idx:2199/10845, loss:0.8726661431789399, acc:0.7459090909090909


 21%|██        | 2300/10845 [20:23<59:14,  2.40it/s, acc=0.746, epoch=19, loss=0.871]  

epoch:19, idx:2299/10845, loss:0.8713492175807124, acc:0.7456521739130435


 22%|██▏       | 2400/10845 [21:15<1:12:01,  1.95it/s, acc=0.747, epoch=19, loss=0.866]

epoch:19, idx:2399/10845, loss:0.8662639492998521, acc:0.7467708333333334


 23%|██▎       | 2500/10845 [22:05<1:05:54,  2.11it/s, acc=0.746, epoch=19, loss=0.866]

epoch:19, idx:2499/10845, loss:0.8662492598295212, acc:0.7462


 24%|██▍       | 2600/10845 [23:04<1:31:21,  1.50it/s, acc=0.747, epoch=19, loss=0.865]

epoch:19, idx:2599/10845, loss:0.8652575876621099, acc:0.7466346153846154


 25%|██▍       | 2700/10845 [23:55<1:30:42,  1.50it/s, acc=0.747, epoch=19, loss=0.864]

epoch:19, idx:2699/10845, loss:0.8638689155931826, acc:0.7465740740740741


 26%|██▌       | 2800/10845 [24:50<1:14:15,  1.81it/s, acc=0.747, epoch=19, loss=0.863]

epoch:19, idx:2799/10845, loss:0.8634168682779585, acc:0.7467857142857143


 27%|██▋       | 2900/10845 [25:38<43:01,  3.08it/s, acc=0.747, epoch=19, loss=0.865]  

epoch:19, idx:2899/10845, loss:0.8645881045275721, acc:0.7471551724137931


 28%|██▊       | 3000/10845 [26:59<1:10:28,  1.86it/s, acc=0.747, epoch=19, loss=0.869]

epoch:19, idx:2999/10845, loss:0.8686851816177368, acc:0.7465


 29%|██▊       | 3100/10845 [27:55<1:05:48,  1.96it/s, acc=0.745, epoch=19, loss=0.873]

epoch:19, idx:3099/10845, loss:0.8732518880021187, acc:0.7454032258064516


 30%|██▉       | 3200/10845 [28:56<1:28:20,  1.44it/s, acc=0.744, epoch=19, loss=0.877]

epoch:19, idx:3199/10845, loss:0.876965266391635, acc:0.74375


 30%|███       | 3300/10845 [30:13<1:06:59,  1.88it/s, acc=0.744, epoch=19, loss=0.878]

epoch:19, idx:3299/10845, loss:0.8779864560293429, acc:0.7437121212121212


 31%|███▏      | 3400/10845 [31:09<1:14:45,  1.66it/s, acc=0.743, epoch=19, loss=0.878]

epoch:19, idx:3399/10845, loss:0.8782146469635121, acc:0.743014705882353


 32%|███▏      | 3500/10845 [32:06<57:16,  2.14it/s, acc=0.743, epoch=19, loss=0.881]  

epoch:19, idx:3499/10845, loss:0.8806206945862088, acc:0.7426428571428572


 33%|███▎      | 3600/10845 [33:04<1:28:07,  1.37it/s, acc=0.743, epoch=19, loss=0.88] 

epoch:19, idx:3599/10845, loss:0.8800804734892316, acc:0.7425694444444444


 34%|███▍      | 3700/10845 [33:58<1:09:07,  1.72it/s, acc=0.742, epoch=19, loss=0.88] 

epoch:19, idx:3699/10845, loss:0.880347283498661, acc:0.7420945945945946


 35%|███▌      | 3800/10845 [34:52<1:03:13,  1.86it/s, acc=0.742, epoch=19, loss=0.88] 

epoch:19, idx:3799/10845, loss:0.8802577465929483, acc:0.7421052631578947


 36%|███▌      | 3900/10845 [35:55<1:02:13,  1.86it/s, acc=0.741, epoch=19, loss=0.882]

epoch:19, idx:3899/10845, loss:0.8817519672558858, acc:0.741474358974359


 37%|███▋      | 4000/10845 [37:13<1:27:26,  1.30it/s, acc=0.741, epoch=19, loss=0.881]

epoch:19, idx:3999/10845, loss:0.8811055730879307, acc:0.7409375


 38%|███▊      | 4100/10845 [40:10<1:00:08,  1.87it/s, acc=0.74, epoch=19, loss=0.882]  

epoch:19, idx:4099/10845, loss:0.8821981198322483, acc:0.7401829268292683


 39%|███▊      | 4200/10845 [41:29<1:11:24,  1.55it/s, acc=0.74, epoch=19, loss=0.883] 

epoch:19, idx:4199/10845, loss:0.8828743280257497, acc:0.7396428571428572


 40%|███▉      | 4300/10845 [42:36<1:15:14,  1.45it/s, acc=0.74, epoch=19, loss=0.882]

epoch:19, idx:4299/10845, loss:0.8822890733979469, acc:0.7398255813953488


 41%|████      | 4400/10845 [43:35<56:35,  1.90it/s, acc=0.74, epoch=19, loss=0.882]   

epoch:19, idx:4399/10845, loss:0.8819566773284565, acc:0.7403409090909091


 41%|████▏     | 4500/10845 [44:35<1:08:39,  1.54it/s, acc=0.741, epoch=19, loss=0.881]

epoch:19, idx:4499/10845, loss:0.8805889857874977, acc:0.7407777777777778


 42%|████▏     | 4600/10845 [45:36<52:59,  1.96it/s, acc=0.741, epoch=19, loss=0.877]  

epoch:19, idx:4599/10845, loss:0.8773882625802704, acc:0.74125


 43%|████▎     | 4700/10845 [46:32<57:11,  1.79it/s, acc=0.742, epoch=19, loss=0.876]  

epoch:19, idx:4699/10845, loss:0.8761656921467883, acc:0.7415425531914893


 44%|████▍     | 4800/10845 [47:53<55:25,  1.82it/s, acc=0.741, epoch=19, loss=0.876]  

epoch:19, idx:4799/10845, loss:0.8764597815896074, acc:0.7414583333333333


 45%|████▌     | 4900/10845 [48:50<1:13:33,  1.35it/s, acc=0.742, epoch=19, loss=0.875]

epoch:19, idx:4899/10845, loss:0.8749552934601599, acc:0.7418877551020409


 46%|████▌     | 5000/10845 [49:54<1:06:31,  1.46it/s, acc=0.741, epoch=19, loss=0.877]

epoch:19, idx:4999/10845, loss:0.8772803732484579, acc:0.7412


 47%|████▋     | 5100/10845 [50:56<56:24,  1.70it/s, acc=0.741, epoch=19, loss=0.88]   

epoch:19, idx:5099/10845, loss:0.8799401984115441, acc:0.740686274509804


 48%|████▊     | 5200/10845 [51:59<1:07:11,  1.40it/s, acc=0.74, epoch=19, loss=0.881] 

epoch:19, idx:5199/10845, loss:0.8808370898979214, acc:0.7402403846153847


 49%|████▉     | 5300/10845 [53:02<1:09:13,  1.34it/s, acc=0.74, epoch=19, loss=0.882]

epoch:19, idx:5299/10845, loss:0.8817126702334521, acc:0.7402358490566038


 50%|████▉     | 5400/10845 [54:07<45:01,  2.02it/s, acc=0.741, epoch=19, loss=0.88]   

epoch:19, idx:5399/10845, loss:0.8804970815750184, acc:0.7405555555555555


 51%|█████     | 5500/10845 [55:10<50:47,  1.75it/s, acc=0.74, epoch=19, loss=0.881]   

epoch:19, idx:5499/10845, loss:0.8810402509163726, acc:0.7400909090909091


 52%|█████▏    | 5600/10845 [56:11<48:47,  1.79it/s, acc=0.74, epoch=19, loss=0.88]   

epoch:19, idx:5599/10845, loss:0.8799409292079509, acc:0.7403125


 53%|█████▎    | 5700/10845 [57:17<1:10:07,  1.22it/s, acc=0.74, epoch=19, loss=0.881]

epoch:19, idx:5699/10845, loss:0.8806846520435392, acc:0.7400438596491228


 53%|█████▎    | 5800/10845 [58:18<1:07:52,  1.24it/s, acc=0.74, epoch=19, loss=0.88] 

epoch:19, idx:5799/10845, loss:0.8802769914860356, acc:0.7395689655172414


 54%|█████▍    | 5900/10845 [59:17<53:25,  1.54it/s, acc=0.739, epoch=19, loss=0.882]  

epoch:19, idx:5899/10845, loss:0.8822884380893182, acc:0.7391525423728813


 55%|█████▌    | 6000/10845 [1:00:16<45:56,  1.76it/s, acc=0.74, epoch=19, loss=0.881] 

epoch:19, idx:5999/10845, loss:0.8811693519329031, acc:0.7398333333333333


 56%|█████▌    | 6100/10845 [1:01:15<44:31,  1.78it/s, acc=0.74, epoch=19, loss=0.882]  

epoch:19, idx:6099/10845, loss:0.8820994709920688, acc:0.7397131147540984


 57%|█████▋    | 6200/10845 [1:02:19<50:33,  1.53it/s, acc=0.74, epoch=19, loss=0.882]  

epoch:19, idx:6199/10845, loss:0.8817669751158645, acc:0.7401612903225806


 58%|█████▊    | 6300/10845 [1:03:23<49:06,  1.54it/s, acc=0.74, epoch=19, loss=0.882]  

epoch:19, idx:6299/10845, loss:0.8821036791020915, acc:0.74


 59%|█████▉    | 6400/10845 [1:04:28<43:02,  1.72it/s, acc=0.74, epoch=19, loss=0.883]  

epoch:19, idx:6399/10845, loss:0.8825641838437878, acc:0.7400390625


 60%|█████▉    | 6500/10845 [1:05:31<43:25,  1.67it/s, acc=0.74, epoch=19, loss=0.883]  

epoch:19, idx:6499/10845, loss:0.8830612102082143, acc:0.74


 61%|██████    | 6600/10845 [1:06:26<43:43,  1.62it/s, acc=0.74, epoch=19, loss=0.883]

epoch:19, idx:6599/10845, loss:0.8831775737147439, acc:0.7401136363636364


 62%|██████▏   | 6700/10845 [1:07:23<37:55,  1.82it/s, acc=0.74, epoch=19, loss=0.885]

epoch:19, idx:6699/10845, loss:0.8848258917611926, acc:0.739589552238806


 63%|██████▎   | 6800/10845 [1:08:17<35:29,  1.90it/s, acc=0.74, epoch=19, loss=0.885]

epoch:19, idx:6799/10845, loss:0.8851636794625837, acc:0.7395220588235294


 64%|██████▎   | 6900/10845 [1:09:13<33:19,  1.97it/s, acc=0.74, epoch=19, loss=0.884] 

epoch:19, idx:6899/10845, loss:0.8842642262340455, acc:0.7395652173913043


 65%|██████▍   | 7000/10845 [1:10:11<28:19,  2.26it/s, acc=0.74, epoch=19, loss=0.883] 

epoch:19, idx:6999/10845, loss:0.8829059227343117, acc:0.73975


 65%|██████▌   | 7100/10845 [1:11:55<47:14,  1.32it/s, acc=0.74, epoch=19, loss=0.883]  

epoch:19, idx:7099/10845, loss:0.8829541858128259, acc:0.7396126760563381


 66%|██████▋   | 7200/10845 [1:12:50<38:48,  1.57it/s, acc=0.74, epoch=19, loss=0.882]

epoch:19, idx:7199/10845, loss:0.8821154425355295, acc:0.7396527777777778


 67%|██████▋   | 7300/10845 [1:13:50<37:22,  1.58it/s, acc=0.74, epoch=19, loss=0.881]

epoch:19, idx:7299/10845, loss:0.8808273620372765, acc:0.7402739726027397


 68%|██████▊   | 7400/10845 [1:14:47<33:36,  1.71it/s, acc=0.74, epoch=19, loss=0.882]

epoch:19, idx:7399/10845, loss:0.8821765161023752, acc:0.7397972972972973


 69%|██████▉   | 7500/10845 [1:15:41<24:39,  2.26it/s, acc=0.74, epoch=19, loss=0.882]

epoch:19, idx:7499/10845, loss:0.8816543721258641, acc:0.7400666666666667


 70%|███████   | 7600/10845 [1:16:45<32:50,  1.65it/s, acc=0.74, epoch=19, loss=0.883]  

epoch:19, idx:7599/10845, loss:0.882740758788821, acc:0.7401315789473685


 71%|███████   | 7700/10845 [1:17:36<23:47,  2.20it/s, acc=0.74, epoch=19, loss=0.883]

epoch:19, idx:7699/10845, loss:0.8826502586475441, acc:0.74


 72%|███████▏  | 7800/10845 [1:18:31<30:23,  1.67it/s, acc=0.74, epoch=19, loss=0.882]

epoch:19, idx:7799/10845, loss:0.8824696916017013, acc:0.7399679487179487


 73%|███████▎  | 7900/10845 [1:19:31<22:17,  2.20it/s, acc=0.74, epoch=19, loss=0.883] 

epoch:19, idx:7899/10845, loss:0.8829916664901414, acc:0.7396202531645569


 74%|███████▍  | 8001/10845 [1:20:31<24:26,  1.94it/s, acc=0.74, epoch=19, loss=0.883] 

epoch:19, idx:7999/10845, loss:0.8828292775731533, acc:0.7395625


 75%|███████▍  | 8100/10845 [1:21:29<32:24,  1.41it/s, acc=0.739, epoch=19, loss=0.884]

epoch:19, idx:8099/10845, loss:0.8835100532737044, acc:0.7394135802469136


 76%|███████▌  | 8200/10845 [1:22:27<21:51,  2.02it/s, acc=0.739, epoch=19, loss=0.884]

epoch:19, idx:8199/10845, loss:0.8844418994446354, acc:0.739329268292683


 77%|███████▋  | 8300/10845 [1:23:27<18:43,  2.27it/s, acc=0.739, epoch=19, loss=0.885]

epoch:19, idx:8299/10845, loss:0.8847603916601244, acc:0.7393674698795181


 77%|███████▋  | 8400/10845 [1:24:22<34:10,  1.19it/s, acc=0.74, epoch=19, loss=0.883] 

epoch:19, idx:8399/10845, loss:0.8827386805681245, acc:0.7395535714285715


 78%|███████▊  | 8500/10845 [1:25:27<26:02,  1.50it/s, acc=0.739, epoch=19, loss=0.884]  

epoch:19, idx:8499/10845, loss:0.8835169308869277, acc:0.7394411764705883


 79%|███████▉  | 8600/10845 [1:26:20<17:47,  2.10it/s, acc=0.74, epoch=19, loss=0.883] 

epoch:19, idx:8599/10845, loss:0.8831732298035261, acc:0.7397674418604652


 80%|████████  | 8700/10845 [1:27:13<13:53,  2.57it/s, acc=0.74, epoch=19, loss=0.882]

epoch:19, idx:8699/10845, loss:0.8823799390474271, acc:0.7400862068965517


 81%|████████  | 8801/10845 [1:28:10<14:10,  2.40it/s, acc=0.74, epoch=19, loss=0.883]

epoch:19, idx:8799/10845, loss:0.8833542074110697, acc:0.7399431818181819


 82%|████████▏ | 8900/10845 [1:29:08<14:19,  2.26it/s, acc=0.74, epoch=19, loss=0.883]

epoch:19, idx:8899/10845, loss:0.8834472576671102, acc:0.7399157303370787


 83%|████████▎ | 9000/10845 [1:30:12<22:25,  1.37it/s, acc=0.74, epoch=19, loss=0.885]

epoch:19, idx:8999/10845, loss:0.8848168362097608, acc:0.7396944444444444


 84%|████████▍ | 9100/10845 [1:31:10<16:30,  1.76it/s, acc=0.74, epoch=19, loss=0.884]

epoch:19, idx:9099/10845, loss:0.8838436770422773, acc:0.7399175824175824


 85%|████████▍ | 9200/10845 [1:32:10<14:47,  1.85it/s, acc=0.74, epoch=19, loss=0.885]

epoch:19, idx:9199/10845, loss:0.8846602156418173, acc:0.7398097826086957


 86%|████████▌ | 9300/10845 [1:33:12<13:48,  1.86it/s, acc=0.74, epoch=19, loss=0.884]

epoch:19, idx:9299/10845, loss:0.884129234858418, acc:0.7399462365591398


 87%|████████▋ | 9400/10845 [1:34:10<12:49,  1.88it/s, acc=0.74, epoch=19, loss=0.884]

epoch:19, idx:9399/10845, loss:0.8844823736634027, acc:0.7397872340425532


 88%|████████▊ | 9500/10845 [1:35:04<08:01,  2.79it/s, acc=0.74, epoch=19, loss=0.885]

epoch:19, idx:9499/10845, loss:0.8845201707435282, acc:0.7399736842105263


 89%|████████▊ | 9600/10845 [1:36:03<12:29,  1.66it/s, acc=0.74, epoch=19, loss=0.884]

epoch:19, idx:9599/10845, loss:0.8842413037999843, acc:0.7401822916666667


 89%|████████▉ | 9700/10845 [1:37:30<09:43,  1.96it/s, acc=0.74, epoch=19, loss=0.885]

epoch:19, idx:9699/10845, loss:0.8847977615938973, acc:0.7401546391752577


 90%|█████████ | 9800/10845 [1:38:27<08:17,  2.10it/s, acc=0.74, epoch=19, loss=0.885]

epoch:19, idx:9799/10845, loss:0.8853467473995928, acc:0.7398979591836735


 91%|█████████▏| 9900/10845 [1:39:31<11:06,  1.42it/s, acc=0.74, epoch=19, loss=0.886]

epoch:19, idx:9899/10845, loss:0.8858574317802083, acc:0.73989898989899


 92%|█████████▏| 10000/10845 [1:40:36<10:43,  1.31it/s, acc=0.74, epoch=19, loss=0.886]

epoch:19, idx:9999/10845, loss:0.886116181486845, acc:0.7395


 93%|█████████▎| 10100/10845 [1:41:35<04:18,  2.89it/s, acc=0.739, epoch=19, loss=0.887]

epoch:19, idx:10099/10845, loss:0.8871910599670788, acc:0.739059405940594


 94%|█████████▍| 10200/10845 [1:42:22<04:32,  2.36it/s, acc=0.739, epoch=19, loss=0.887]

epoch:19, idx:10199/10845, loss:0.887250382958674, acc:0.7392892156862745


 95%|█████████▍| 10300/10845 [1:43:11<05:12,  1.74it/s, acc=0.739, epoch=19, loss=0.886]

epoch:19, idx:10299/10845, loss:0.8864383531947738, acc:0.739490291262136


 96%|█████████▌| 10400/10845 [1:44:07<05:51,  1.27it/s, acc=0.739, epoch=19, loss=0.887]

epoch:19, idx:10399/10845, loss:0.8867375302486695, acc:0.7393509615384616


 97%|█████████▋| 10500/10845 [1:45:05<03:00,  1.91it/s, acc=0.74, epoch=19, loss=0.886] 

epoch:19, idx:10499/10845, loss:0.8856246149369649, acc:0.7395714285714285


 98%|█████████▊| 10600/10845 [1:46:03<01:52,  2.17it/s, acc=0.739, epoch=19, loss=0.887]

epoch:19, idx:10599/10845, loss:0.8865144223031008, acc:0.7393632075471698


 99%|█████████▊| 10700/10845 [1:47:02<01:32,  1.57it/s, acc=0.739, epoch=19, loss=0.886]

epoch:19, idx:10699/10845, loss:0.8864309195380344, acc:0.739392523364486


100%|█████████▉| 10800/10845 [1:47:59<00:22,  2.03it/s, acc=0.739, epoch=19, loss=0.887]

epoch:19, idx:10799/10845, loss:0.8867264249258571, acc:0.7392361111111111


100%|██████████| 10845/10845 [1:48:26<00:00,  2.40it/s, acc=0.739, epoch=19, loss=0.887]


epoch:19, idx:0/1275, loss:0.9527033567428589, acc:0.5
epoch:19, idx:100/1275, loss:1.1890029464617815, acc:0.6311881188118812
epoch:19, idx:200/1275, loss:1.149335439228893, acc:0.6417910447761194
epoch:19, idx:300/1275, loss:1.1650543313881883, acc:0.6445182724252492
epoch:19, idx:400/1275, loss:1.1380918968348135, acc:0.6483790523690773
epoch:19, idx:500/1275, loss:1.1301618152987696, acc:0.6526946107784432
epoch:19, idx:600/1275, loss:1.1376721873854638, acc:0.6522462562396006
epoch:19, idx:700/1275, loss:1.1547850772164518, acc:0.6480028530670471
epoch:19, idx:800/1275, loss:1.1597229337945265, acc:0.6476279650436954
epoch:19, idx:900/1275, loss:1.149274416590372, acc:0.6506659267480577
epoch:19, idx:1000/1275, loss:1.1461276209259128, acc:0.6520979020979021
epoch:19, idx:1100/1275, loss:1.1388415919175265, acc:0.6532697547683923
epoch:19, idx:1200/1275, loss:1.1409439953489169, acc:0.6523730224812656


  1%|          | 100/10845 [00:58<1:32:33,  1.93it/s, acc=0.777, epoch=20, loss=0.687]

epoch:20, idx:99/10845, loss:0.6873631978034973, acc:0.7775


  2%|▏         | 200/10845 [01:55<1:43:16,  1.72it/s, acc=0.767, epoch=20, loss=0.734]

epoch:20, idx:199/10845, loss:0.7344743895530701, acc:0.7675


  3%|▎         | 300/10845 [02:43<1:40:00,  1.76it/s, acc=0.767, epoch=20, loss=0.756]

epoch:20, idx:299/10845, loss:0.7556934171915054, acc:0.7675


  4%|▎         | 400/10845 [03:29<1:45:31,  1.65it/s, acc=0.77, epoch=20, loss=0.775] 

epoch:20, idx:399/10845, loss:0.7751202748715877, acc:0.77


  5%|▍         | 500/10845 [04:20<1:23:49,  2.06it/s, acc=0.766, epoch=20, loss=0.78] 

epoch:20, idx:499/10845, loss:0.7803037590980529, acc:0.7665


  6%|▌         | 600/10845 [05:14<1:52:20,  1.52it/s, acc=0.764, epoch=20, loss=0.785]

epoch:20, idx:599/10845, loss:0.7846653377016385, acc:0.7641666666666667


  6%|▋         | 700/10845 [06:11<1:30:22,  1.87it/s, acc=0.759, epoch=20, loss=0.799]

epoch:20, idx:699/10845, loss:0.798867387005261, acc:0.7592857142857142


  7%|▋         | 800/10845 [07:08<1:24:20,  1.98it/s, acc=0.763, epoch=20, loss=0.788]

epoch:20, idx:799/10845, loss:0.7883651978522539, acc:0.763125


  8%|▊         | 900/10845 [08:02<1:29:26,  1.85it/s, acc=0.76, epoch=20, loss=0.798] 

epoch:20, idx:899/10845, loss:0.7979756750663122, acc:0.7597222222222222


  9%|▉         | 1000/10845 [08:53<1:41:37,  1.61it/s, acc=0.76, epoch=20, loss=0.796]

epoch:20, idx:999/10845, loss:0.7956328992843628, acc:0.7605


 10%|█         | 1100/10845 [09:50<1:34:08,  1.73it/s, acc=0.758, epoch=20, loss=0.811]

epoch:20, idx:1099/10845, loss:0.8113089269941504, acc:0.7579545454545454


 11%|█         | 1200/10845 [10:43<1:18:00,  2.06it/s, acc=0.755, epoch=20, loss=0.817]

epoch:20, idx:1199/10845, loss:0.816688562631607, acc:0.7552083333333334


 12%|█▏        | 1300/10845 [11:38<1:37:47,  1.63it/s, acc=0.755, epoch=20, loss=0.816]

epoch:20, idx:1299/10845, loss:0.8163756946875499, acc:0.7546153846153846


 13%|█▎        | 1400/10845 [12:33<1:58:26,  1.33it/s, acc=0.757, epoch=20, loss=0.808]

epoch:20, idx:1399/10845, loss:0.8083857147608484, acc:0.7569642857142858


 14%|█▍        | 1500/10845 [13:31<1:28:19,  1.76it/s, acc=0.759, epoch=20, loss=0.803]

epoch:20, idx:1499/10845, loss:0.8027134523789088, acc:0.7586666666666667


 15%|█▍        | 1600/10845 [14:28<1:28:24,  1.74it/s, acc=0.757, epoch=20, loss=0.811]

epoch:20, idx:1599/10845, loss:0.8108518261834979, acc:0.7571875


 16%|█▌        | 1700/10845 [15:28<1:21:52,  1.86it/s, acc=0.757, epoch=20, loss=0.817]

epoch:20, idx:1699/10845, loss:0.8170679017375497, acc:0.7566176470588235


 17%|█▋        | 1800/10845 [16:21<1:28:08,  1.71it/s, acc=0.757, epoch=20, loss=0.82] 

epoch:20, idx:1799/10845, loss:0.8200413394305441, acc:0.7566666666666667


 18%|█▊        | 1900/10845 [17:14<1:05:59,  2.26it/s, acc=0.759, epoch=20, loss=0.812]

epoch:20, idx:1899/10845, loss:0.8115186158920589, acc:0.7589473684210526


 18%|█▊        | 2000/10845 [18:08<1:25:03,  1.73it/s, acc=0.759, epoch=20, loss=0.809]

epoch:20, idx:1999/10845, loss:0.8086251522302628, acc:0.759375


 19%|█▉        | 2100/10845 [19:04<1:03:17,  2.30it/s, acc=0.76, epoch=20, loss=0.806] 

epoch:20, idx:2099/10845, loss:0.8057928319204422, acc:0.7598809523809524


 20%|██        | 2200/10845 [19:55<1:17:19,  1.86it/s, acc=0.76, epoch=20, loss=0.805] 

epoch:20, idx:2199/10845, loss:0.8050360502709042, acc:0.7597727272727273


 21%|██        | 2300/10845 [20:47<47:06,  3.02it/s, acc=0.759, epoch=20, loss=0.808]  

epoch:20, idx:2299/10845, loss:0.8076411674592806, acc:0.7592391304347826


 22%|██▏       | 2400/10845 [21:40<1:01:09,  2.30it/s, acc=0.759, epoch=20, loss=0.811]

epoch:20, idx:2399/10845, loss:0.810693474834164, acc:0.75875


 23%|██▎       | 2500/10845 [22:35<1:21:55,  1.70it/s, acc=0.758, epoch=20, loss=0.815]

epoch:20, idx:2499/10845, loss:0.8150585992336273, acc:0.7578


 24%|██▍       | 2600/10845 [23:33<1:04:51,  2.12it/s, acc=0.757, epoch=20, loss=0.823]

epoch:20, idx:2599/10845, loss:0.8227855689021257, acc:0.7568269230769231


 25%|██▍       | 2700/10845 [24:22<1:02:03,  2.19it/s, acc=0.756, epoch=20, loss=0.825]

epoch:20, idx:2699/10845, loss:0.8249907808833652, acc:0.7562962962962962


 26%|██▌       | 2800/10845 [25:18<1:10:19,  1.91it/s, acc=0.756, epoch=20, loss=0.828]

epoch:20, idx:2799/10845, loss:0.8278231982886791, acc:0.7555357142857143


 27%|██▋       | 2900/10845 [26:13<1:14:23,  1.78it/s, acc=0.755, epoch=20, loss=0.83] 

epoch:20, idx:2899/10845, loss:0.8302946460349806, acc:0.755


 28%|██▊       | 3000/10845 [27:09<1:31:26,  1.43it/s, acc=0.755, epoch=20, loss=0.832]

epoch:20, idx:2999/10845, loss:0.8316446301440398, acc:0.75475


 29%|██▊       | 3100/10845 [28:02<52:47,  2.45it/s, acc=0.755, epoch=20, loss=0.832]  

epoch:20, idx:3099/10845, loss:0.8321892782853496, acc:0.7548387096774194


 30%|██▉       | 3200/10845 [29:00<59:58,  2.12it/s, acc=0.755, epoch=20, loss=0.832]  

epoch:20, idx:3199/10845, loss:0.8324522250797599, acc:0.7546875


 30%|███       | 3300/10845 [29:54<1:28:31,  1.42it/s, acc=0.754, epoch=20, loss=0.839]

epoch:20, idx:3299/10845, loss:0.8385990712588484, acc:0.7540909090909091


 31%|███▏      | 3400/10845 [30:47<44:13,  2.81it/s, acc=0.754, epoch=20, loss=0.838]  

epoch:20, idx:3399/10845, loss:0.8376249137433136, acc:0.754264705882353


 32%|███▏      | 3500/10845 [31:40<1:02:12,  1.97it/s, acc=0.754, epoch=20, loss=0.836]

epoch:20, idx:3499/10845, loss:0.8361144762635231, acc:0.7541428571428571


 33%|███▎      | 3600/10845 [32:37<1:17:07,  1.57it/s, acc=0.755, epoch=20, loss=0.836]

epoch:20, idx:3599/10845, loss:0.8357547842048937, acc:0.7545138888888889


 34%|███▍      | 3700/10845 [33:32<1:03:33,  1.87it/s, acc=0.754, epoch=20, loss=0.837]

epoch:20, idx:3699/10845, loss:0.8370869407702136, acc:0.7543918918918919


 35%|███▌      | 3800/10845 [34:27<54:50,  2.14it/s, acc=0.754, epoch=20, loss=0.838]  

epoch:20, idx:3799/10845, loss:0.8383345256275252, acc:0.7538815789473684


 36%|███▌      | 3900/10845 [35:20<1:02:54,  1.84it/s, acc=0.753, epoch=20, loss=0.841]

epoch:20, idx:3899/10845, loss:0.8410011906425158, acc:0.752948717948718


 37%|███▋      | 4000/10845 [36:09<50:28,  2.26it/s, acc=0.753, epoch=20, loss=0.839]  

epoch:20, idx:3999/10845, loss:0.8391420802697539, acc:0.7531875


 38%|███▊      | 4100/10845 [36:55<50:22,  2.23it/s, acc=0.754, epoch=20, loss=0.837]  

epoch:20, idx:4099/10845, loss:0.837149376542103, acc:0.7538414634146341


 39%|███▊      | 4200/10845 [37:46<1:07:40,  1.64it/s, acc=0.753, epoch=20, loss=0.84] 

epoch:20, idx:4199/10845, loss:0.8399993808567524, acc:0.7529166666666667


 40%|███▉      | 4300/10845 [38:39<1:21:01,  1.35it/s, acc=0.752, epoch=20, loss=0.841]

epoch:20, idx:4299/10845, loss:0.8414551834658135, acc:0.751860465116279


 41%|████      | 4400/10845 [39:30<53:41,  2.00it/s, acc=0.751, epoch=20, loss=0.845]  

epoch:20, idx:4399/10845, loss:0.8446377964927392, acc:0.7509659090909091


 41%|████▏     | 4500/10845 [40:25<53:05,  1.99it/s, acc=0.752, epoch=20, loss=0.841]  

epoch:20, idx:4499/10845, loss:0.8411364635825157, acc:0.7516111111111111


 42%|████▏     | 4600/10845 [41:18<1:01:44,  1.69it/s, acc=0.752, epoch=20, loss=0.842]

epoch:20, idx:4599/10845, loss:0.8417883522419826, acc:0.7519565217391304


 43%|████▎     | 4700/10845 [42:10<39:19,  2.60it/s, acc=0.751, epoch=20, loss=0.842]  

epoch:20, idx:4699/10845, loss:0.8422408781089681, acc:0.7514893617021277


 44%|████▍     | 4800/10845 [43:10<43:58,  2.29it/s, acc=0.751, epoch=20, loss=0.842]  

epoch:20, idx:4799/10845, loss:0.8422899888642132, acc:0.7508333333333334


 45%|████▌     | 4900/10845 [44:04<40:01,  2.48it/s, acc=0.751, epoch=20, loss=0.843]  

epoch:20, idx:4899/10845, loss:0.8432832468468315, acc:0.7505612244897959


 46%|████▌     | 5000/10845 [45:02<44:20,  2.20it/s, acc=0.751, epoch=20, loss=0.844]  

epoch:20, idx:4999/10845, loss:0.8436142419695855, acc:0.75075


 47%|████▋     | 5101/10845 [45:50<31:14,  3.06it/s, acc=0.75, epoch=20, loss=0.847]   

epoch:20, idx:5099/10845, loss:0.8468520382923238, acc:0.75


 48%|████▊     | 5200/10845 [46:46<47:28,  1.98it/s, acc=0.75, epoch=20, loss=0.848]  

epoch:20, idx:5199/10845, loss:0.8481632253069143, acc:0.749951923076923


 49%|████▉     | 5300/10845 [47:43<43:13,  2.14it/s, acc=0.749, epoch=20, loss=0.851] 

epoch:20, idx:5299/10845, loss:0.851259914434181, acc:0.7493396226415094


 50%|████▉     | 5400/10845 [48:35<39:28,  2.30it/s, acc=0.749, epoch=20, loss=0.853]  

epoch:20, idx:5399/10845, loss:0.8529730675397096, acc:0.7487037037037036


 51%|█████     | 5500/10845 [49:30<39:57,  2.23it/s, acc=0.749, epoch=20, loss=0.852]  

epoch:20, idx:5499/10845, loss:0.8523241754770279, acc:0.7488636363636364


 52%|█████▏    | 5600/10845 [50:24<55:04,  1.59it/s, acc=0.749, epoch=20, loss=0.852]  

epoch:20, idx:5599/10845, loss:0.8519266292133502, acc:0.7489732142857143


 53%|█████▎    | 5700/10845 [51:15<52:17,  1.64it/s, acc=0.749, epoch=20, loss=0.853]  

epoch:20, idx:5699/10845, loss:0.8534928808923353, acc:0.7490350877192983


 53%|█████▎    | 5800/10845 [52:10<55:40,  1.51it/s, acc=0.749, epoch=20, loss=0.853]  

epoch:20, idx:5799/10845, loss:0.8529272573878025, acc:0.7490948275862069


 54%|█████▍    | 5901/10845 [53:06<46:43,  1.76it/s, acc=0.749, epoch=20, loss=0.854]  

epoch:20, idx:5899/10845, loss:0.8537585751485016, acc:0.7486440677966102


 55%|█████▌    | 6000/10845 [54:06<47:02,  1.72it/s, acc=0.749, epoch=20, loss=0.855]  

epoch:20, idx:5999/10845, loss:0.8550386490325133, acc:0.7485


 56%|█████▌    | 6100/10845 [55:01<41:30,  1.91it/s, acc=0.749, epoch=20, loss=0.855]  

epoch:20, idx:6099/10845, loss:0.855309523695805, acc:0.7485655737704918


 57%|█████▋    | 6200/10845 [55:53<45:56,  1.69it/s, acc=0.748, epoch=20, loss=0.856]  

epoch:20, idx:6199/10845, loss:0.8560154453304506, acc:0.7481854838709677


 58%|█████▊    | 6300/10845 [56:49<43:57,  1.72it/s, acc=0.748, epoch=20, loss=0.856]  

epoch:20, idx:6299/10845, loss:0.8562866327119252, acc:0.7479365079365079


 59%|█████▉    | 6401/10845 [57:43<35:45,  2.07it/s, acc=0.748, epoch=20, loss=0.857]

epoch:20, idx:6399/10845, loss:0.8574308516597375, acc:0.7477734375


 60%|█████▉    | 6500/10845 [58:37<39:44,  1.82it/s, acc=0.748, epoch=20, loss=0.856]  

epoch:20, idx:6499/10845, loss:0.8562649185428253, acc:0.7479615384615385


 61%|██████    | 6600/10845 [59:29<27:42,  2.55it/s, acc=0.748, epoch=20, loss=0.858]

epoch:20, idx:6599/10845, loss:0.8579888479953462, acc:0.7477272727272727


 62%|██████▏   | 6700/10845 [1:00:25<43:06,  1.60it/s, acc=0.747, epoch=20, loss=0.86] 

epoch:20, idx:6699/10845, loss:0.8602467193398903, acc:0.7474626865671642


 63%|██████▎   | 6800/10845 [1:01:21<51:11,  1.32it/s, acc=0.748, epoch=20, loss=0.86] 

epoch:20, idx:6799/10845, loss:0.8598649845272303, acc:0.7477205882352941


 64%|██████▎   | 6900/10845 [1:02:21<49:05,  1.34it/s, acc=0.748, epoch=20, loss=0.86]  

epoch:20, idx:6899/10845, loss:0.8599900656204292, acc:0.7476811594202899


 65%|██████▍   | 7000/10845 [1:03:25<43:36,  1.47it/s, acc=0.747, epoch=20, loss=0.862]  

epoch:20, idx:6999/10845, loss:0.8619640065048422, acc:0.7472142857142857


 65%|██████▌   | 7100/10845 [1:04:18<38:44,  1.61it/s, acc=0.747, epoch=20, loss=0.863]

epoch:20, idx:7099/10845, loss:0.8629254263597474, acc:0.7469366197183098


 66%|██████▋   | 7200/10845 [1:05:14<32:22,  1.88it/s, acc=0.747, epoch=20, loss=0.863]

epoch:20, idx:7199/10845, loss:0.8633257423631019, acc:0.7469791666666666


 67%|██████▋   | 7300/10845 [1:06:10<49:23,  1.20it/s, acc=0.747, epoch=20, loss=0.863]

epoch:20, idx:7299/10845, loss:0.8629078273618058, acc:0.7468835616438356


 68%|██████▊   | 7400/10845 [1:07:05<31:52,  1.80it/s, acc=0.747, epoch=20, loss=0.863]

epoch:20, idx:7399/10845, loss:0.8625256709875287, acc:0.7465202702702702


 69%|██████▉   | 7500/10845 [1:08:01<32:55,  1.69it/s, acc=0.746, epoch=20, loss=0.864]

epoch:20, idx:7499/10845, loss:0.864078978518645, acc:0.7460666666666667


 70%|███████   | 7600/10845 [1:08:55<29:21,  1.84it/s, acc=0.746, epoch=20, loss=0.864]

epoch:20, idx:7599/10845, loss:0.8643296512294757, acc:0.7460197368421052


 71%|███████   | 7700/10845 [1:09:51<34:06,  1.54it/s, acc=0.746, epoch=20, loss=0.863]

epoch:20, idx:7699/10845, loss:0.8630677521190085, acc:0.7461688311688311


 72%|███████▏  | 7800/10845 [1:10:49<30:03,  1.69it/s, acc=0.746, epoch=20, loss=0.864]

epoch:20, idx:7799/10845, loss:0.8635212726661792, acc:0.7457692307692307


 73%|███████▎  | 7900/10845 [1:11:38<23:54,  2.05it/s, acc=0.745, epoch=20, loss=0.865]

epoch:20, idx:7899/10845, loss:0.8652708063585849, acc:0.7452848101265823


 74%|███████▍  | 8000/10845 [1:12:31<21:11,  2.24it/s, acc=0.745, epoch=20, loss=0.864]

epoch:20, idx:7999/10845, loss:0.8639265133179724, acc:0.745375


 75%|███████▍  | 8100/10845 [1:13:27<59:21,  1.30s/it, acc=0.745, epoch=20, loss=0.865]  

epoch:20, idx:8099/10845, loss:0.8651593074864812, acc:0.7452777777777778


 76%|███████▌  | 8200/10845 [1:16:29<40:44,  1.08it/s, acc=0.746, epoch=20, loss=0.865]  

epoch:20, idx:8199/10845, loss:0.8648164720033727, acc:0.7455182926829268


 77%|███████▋  | 8300/10845 [1:17:45<25:27,  1.67it/s, acc=0.745, epoch=20, loss=0.866]

epoch:20, idx:8299/10845, loss:0.8656999727939985, acc:0.745210843373494


 77%|███████▋  | 8400/10845 [1:19:03<31:13,  1.30it/s, acc=0.745, epoch=20, loss=0.866]

epoch:20, idx:8399/10845, loss:0.8660868243553809, acc:0.7453273809523809


 78%|███████▊  | 8500/10845 [1:20:19<27:55,  1.40it/s, acc=0.745, epoch=20, loss=0.866]

epoch:20, idx:8499/10845, loss:0.866348064636483, acc:0.7452647058823529


 79%|███████▉  | 8600/10845 [1:21:38<29:40,  1.26it/s, acc=0.745, epoch=20, loss=0.866]

epoch:20, idx:8599/10845, loss:0.8660170810964218, acc:0.7453779069767442


 80%|████████  | 8700/10845 [1:23:56<30:49,  1.16it/s, acc=0.745, epoch=20, loss=0.867]  

epoch:20, idx:8699/10845, loss:0.8671127419670422, acc:0.7451436781609195


 81%|████████  | 8800/10845 [1:25:14<24:21,  1.40it/s, acc=0.745, epoch=20, loss=0.868]

epoch:20, idx:8799/10845, loss:0.8683009606159546, acc:0.745


 82%|████████▏ | 8900/10845 [1:27:38<19:14,  1.69it/s, acc=0.745, epoch=20, loss=0.868]  

epoch:20, idx:8899/10845, loss:0.8675325220990717, acc:0.7450842696629213


 83%|████████▎ | 9000/10845 [1:28:53<19:10,  1.60it/s, acc=0.745, epoch=20, loss=0.867]

epoch:20, idx:8999/10845, loss:0.8671006492111418, acc:0.7453055555555556


 84%|████████▍ | 9100/10845 [1:30:02<20:06,  1.45it/s, acc=0.745, epoch=20, loss=0.868]

epoch:20, idx:9099/10845, loss:0.8676595848209255, acc:0.7451923076923077


 85%|████████▍ | 9200/10845 [1:31:25<22:28,  1.22it/s, acc=0.745, epoch=20, loss=0.866]  

epoch:20, idx:9199/10845, loss:0.8663039361653121, acc:0.7452989130434783


 86%|████████▌ | 9300/10845 [1:32:33<16:49,  1.53it/s, acc=0.745, epoch=20, loss=0.867]

epoch:20, idx:9299/10845, loss:0.8673091707242433, acc:0.7450806451612904


 87%|████████▋ | 9400/10845 [1:33:45<22:46,  1.06it/s, acc=0.745, epoch=20, loss=0.867]

epoch:20, idx:9399/10845, loss:0.8670233792605552, acc:0.7451595744680851


 88%|████████▊ | 9500/10845 [1:34:57<16:11,  1.38it/s, acc=0.745, epoch=20, loss=0.867]

epoch:20, idx:9499/10845, loss:0.8669144654556324, acc:0.7451052631578947


 89%|████████▊ | 9600/10845 [1:36:10<13:56,  1.49it/s, acc=0.745, epoch=20, loss=0.867]

epoch:20, idx:9599/10845, loss:0.8667988925644508, acc:0.7451041666666667


 89%|████████▉ | 9700/10845 [1:37:21<15:04,  1.27it/s, acc=0.745, epoch=20, loss=0.868]

epoch:20, idx:9699/10845, loss:0.8680786636348852, acc:0.7447164948453608


 90%|█████████ | 9800/10845 [1:38:31<12:11,  1.43it/s, acc=0.744, epoch=20, loss=0.869]

epoch:20, idx:9799/10845, loss:0.8689285553991795, acc:0.7444897959183674


 91%|█████████▏| 9900/10845 [1:39:48<12:26,  1.27it/s, acc=0.744, epoch=20, loss=0.87] 

epoch:20, idx:9899/10845, loss:0.8698202991636113, acc:0.7442676767676768


 92%|█████████▏| 10000/10845 [1:40:57<09:16,  1.52it/s, acc=0.745, epoch=20, loss=0.869]

epoch:20, idx:9999/10845, loss:0.8693549675613642, acc:0.74455


 93%|█████████▎| 10100/10845 [1:42:01<07:43,  1.61it/s, acc=0.745, epoch=20, loss=0.869]

epoch:20, idx:10099/10845, loss:0.869022048525881, acc:0.7445792079207921


 94%|█████████▍| 10200/10845 [1:43:09<09:07,  1.18it/s, acc=0.745, epoch=20, loss=0.868]

epoch:20, idx:10199/10845, loss:0.867830621170647, acc:0.7447549019607843


 95%|█████████▍| 10300/10845 [1:44:19<05:59,  1.52it/s, acc=0.744, epoch=20, loss=0.869]

epoch:20, idx:10299/10845, loss:0.8694025843033513, acc:0.744490291262136


 96%|█████████▌| 10400/10845 [1:45:22<05:13,  1.42it/s, acc=0.744, epoch=20, loss=0.87] 

epoch:20, idx:10399/10845, loss:0.8696612746239855, acc:0.7444471153846154


 97%|█████████▋| 10500/10845 [1:46:34<05:00,  1.15it/s, acc=0.744, epoch=20, loss=0.87] 

epoch:20, idx:10499/10845, loss:0.8698313628520284, acc:0.7442619047619048


 98%|█████████▊| 10600/10845 [1:47:43<03:54,  1.05it/s, acc=0.744, epoch=20, loss=0.871]

epoch:20, idx:10599/10845, loss:0.8710949584673036, acc:0.7439150943396227


 99%|█████████▊| 10700/10845 [1:49:07<05:50,  2.42s/it, acc=0.744, epoch=20, loss=0.871]

epoch:20, idx:10699/10845, loss:0.8712555374497565, acc:0.7440887850467289


100%|█████████▉| 10800/10845 [1:50:44<00:27,  1.62it/s, acc=0.744, epoch=20, loss=0.872]

epoch:20, idx:10799/10845, loss:0.8721555313909495, acc:0.74375


100%|██████████| 10845/10845 [1:51:11<00:00,  2.28it/s, acc=0.744, epoch=20, loss=0.872]


epoch:20, idx:0/1275, loss:0.8798866271972656, acc:0.5
epoch:20, idx:100/1275, loss:1.2238986557072933, acc:0.6287128712871287
epoch:20, idx:200/1275, loss:1.1814059202943867, acc:0.6368159203980099
epoch:20, idx:300/1275, loss:1.1884121979985918, acc:0.6420265780730897
epoch:20, idx:400/1275, loss:1.1589782743382633, acc:0.6502493765586035
epoch:20, idx:500/1275, loss:1.1584404065699396, acc:0.6467065868263473
epoch:20, idx:600/1275, loss:1.1692160318377807, acc:0.6439267886855241
epoch:20, idx:700/1275, loss:1.1827433288692577, acc:0.641583452211127
epoch:20, idx:800/1275, loss:1.185645347156477, acc:0.6432584269662921
epoch:20, idx:900/1275, loss:1.1758689021693747, acc:0.6451165371809101
epoch:20, idx:1000/1275, loss:1.1733743879761729, acc:0.6463536463536463
epoch:20, idx:1100/1275, loss:1.1661921587183084, acc:0.6462306993642144
epoch:20, idx:1200/1275, loss:1.1676480616012481, acc:0.6444629475437136


  1%|          | 101/10845 [01:03<1:14:48,  2.39it/s, acc=0.772, epoch=21, loss=0.81]

epoch:21, idx:99/10845, loss:0.8180898553133011, acc:0.77


  2%|▏         | 200/10845 [02:09<2:07:22,  1.39it/s, acc=0.776, epoch=21, loss=0.777]

epoch:21, idx:199/10845, loss:0.7765131130814552, acc:0.77625


  3%|▎         | 300/10845 [03:12<1:36:29,  1.82it/s, acc=0.777, epoch=21, loss=0.776]

epoch:21, idx:299/10845, loss:0.7759892670313517, acc:0.7775


  4%|▎         | 400/10845 [04:15<2:13:30,  1.30it/s, acc=0.765, epoch=21, loss=0.807]

epoch:21, idx:399/10845, loss:0.8065599486231804, acc:0.765


  5%|▍         | 500/10845 [05:14<1:28:01,  1.96it/s, acc=0.762, epoch=21, loss=0.8]  

epoch:21, idx:499/10845, loss:0.7999930556416511, acc:0.762


  6%|▌         | 600/10845 [06:11<1:37:46,  1.75it/s, acc=0.758, epoch=21, loss=0.81] 

epoch:21, idx:599/10845, loss:0.8098086605966092, acc:0.7583333333333333


  6%|▋         | 700/10845 [07:14<1:33:20,  1.81it/s, acc=0.752, epoch=21, loss=0.824]

epoch:21, idx:699/10845, loss:0.8240892454130309, acc:0.7525


  7%|▋         | 800/10845 [08:16<1:29:07,  1.88it/s, acc=0.754, epoch=21, loss=0.83] 

epoch:21, idx:799/10845, loss:0.8300659516826272, acc:0.75375


  8%|▊         | 900/10845 [09:11<1:33:30,  1.77it/s, acc=0.747, epoch=21, loss=0.848]

epoch:21, idx:899/10845, loss:0.847617306345039, acc:0.7466666666666667


  9%|▉         | 1000/10845 [10:15<2:08:57,  1.27it/s, acc=0.75, epoch=21, loss=0.836]

epoch:21, idx:999/10845, loss:0.8361088933646679, acc:0.7505


 10%|█         | 1100/10845 [11:18<1:42:18,  1.59it/s, acc=0.752, epoch=21, loss=0.832]

epoch:21, idx:1099/10845, loss:0.8321271675012328, acc:0.7515909090909091


 11%|█         | 1200/10845 [12:14<1:17:33,  2.07it/s, acc=0.753, epoch=21, loss=0.833]

epoch:21, idx:1199/10845, loss:0.8329677200565736, acc:0.753125


 12%|█▏        | 1300/10845 [13:16<1:39:46,  1.59it/s, acc=0.753, epoch=21, loss=0.84] 

epoch:21, idx:1299/10845, loss:0.8403734597334495, acc:0.7528846153846154


 13%|█▎        | 1400/10845 [14:14<1:49:18,  1.44it/s, acc=0.755, epoch=21, loss=0.836]

epoch:21, idx:1399/10845, loss:0.8364775251064982, acc:0.7548214285714285


 14%|█▍        | 1500/10845 [15:13<1:16:35,  2.03it/s, acc=0.754, epoch=21, loss=0.841]

epoch:21, idx:1499/10845, loss:0.8409298379421234, acc:0.7543333333333333


 15%|█▍        | 1600/10845 [16:11<1:18:15,  1.97it/s, acc=0.754, epoch=21, loss=0.842]

epoch:21, idx:1599/10845, loss:0.8419688307121396, acc:0.75390625


 16%|█▌        | 1700/10845 [17:06<1:13:03,  2.09it/s, acc=0.752, epoch=21, loss=0.846]

epoch:21, idx:1699/10845, loss:0.8460344111218172, acc:0.7516176470588235


 17%|█▋        | 1800/10845 [18:06<2:25:45,  1.03it/s, acc=0.753, epoch=21, loss=0.84] 

epoch:21, idx:1799/10845, loss:0.8404681271645758, acc:0.7527777777777778


 18%|█▊        | 1900/10845 [19:05<2:19:27,  1.07it/s, acc=0.752, epoch=21, loss=0.845]

epoch:21, idx:1899/10845, loss:0.8451132074469014, acc:0.7521052631578947


 18%|█▊        | 2000/10845 [20:05<1:52:31,  1.31it/s, acc=0.75, epoch=21, loss=0.853] 

epoch:21, idx:1999/10845, loss:0.853290566533804, acc:0.750375


 19%|█▉        | 2100/10845 [21:02<1:46:12,  1.37it/s, acc=0.749, epoch=21, loss=0.861]

epoch:21, idx:2099/10845, loss:0.8610149162156241, acc:0.7485714285714286


 20%|██        | 2200/10845 [22:00<1:18:04,  1.85it/s, acc=0.748, epoch=21, loss=0.865]

epoch:21, idx:2199/10845, loss:0.8654932408170266, acc:0.7475


 21%|██        | 2300/10845 [22:54<1:06:37,  2.14it/s, acc=0.748, epoch=21, loss=0.865]

epoch:21, idx:2299/10845, loss:0.8649859132714893, acc:0.7484782608695653


 22%|██▏       | 2400/10845 [23:51<1:28:52,  1.58it/s, acc=0.75, epoch=21, loss=0.865] 

epoch:21, idx:2399/10845, loss:0.8646086320529381, acc:0.7495833333333334


 23%|██▎       | 2500/10845 [24:46<1:33:05,  1.49it/s, acc=0.751, epoch=21, loss=0.862]

epoch:21, idx:2499/10845, loss:0.8624950679302216, acc:0.7509


 24%|██▍       | 2600/10845 [25:44<1:06:02,  2.08it/s, acc=0.751, epoch=21, loss=0.864]

epoch:21, idx:2599/10845, loss:0.863894669849139, acc:0.75125


 25%|██▍       | 2701/10845 [26:38<1:01:02,  2.22it/s, acc=0.752, epoch=21, loss=0.862]

epoch:21, idx:2699/10845, loss:0.8624014553758833, acc:0.7516666666666667


 26%|██▌       | 2800/10845 [27:37<1:30:43,  1.48it/s, acc=0.752, epoch=21, loss=0.859]

epoch:21, idx:2799/10845, loss:0.8586944781669549, acc:0.7519642857142858


 27%|██▋       | 2900/10845 [28:33<1:06:47,  1.98it/s, acc=0.753, epoch=21, loss=0.856]

epoch:21, idx:2899/10845, loss:0.856247983307674, acc:0.7525862068965518


 28%|██▊       | 3000/10845 [29:30<58:32,  2.23it/s, acc=0.753, epoch=21, loss=0.854]  

epoch:21, idx:2999/10845, loss:0.8536371850570043, acc:0.7533333333333333


 29%|██▊       | 3100/10845 [30:29<59:01,  2.19it/s, acc=0.753, epoch=21, loss=0.853]  

epoch:21, idx:3099/10845, loss:0.8527720972991759, acc:0.7530645161290322


 30%|██▉       | 3200/10845 [31:23<57:48,  2.20it/s, acc=0.754, epoch=21, loss=0.849]  

epoch:21, idx:3199/10845, loss:0.8486572429351509, acc:0.75375


 30%|███       | 3300/10845 [32:21<1:23:25,  1.51it/s, acc=0.754, epoch=21, loss=0.848]

epoch:21, idx:3299/10845, loss:0.8477161484776121, acc:0.7543181818181818


 31%|███▏      | 3400/10845 [33:15<1:23:08,  1.49it/s, acc=0.755, epoch=21, loss=0.846]

epoch:21, idx:3399/10845, loss:0.8459675849711193, acc:0.7548529411764706


 32%|███▏      | 3500/10845 [34:09<1:10:15,  1.74it/s, acc=0.754, epoch=21, loss=0.848]

epoch:21, idx:3499/10845, loss:0.8481313066652842, acc:0.7543571428571428


 33%|███▎      | 3600/10845 [35:06<1:23:11,  1.45it/s, acc=0.754, epoch=21, loss=0.851]

epoch:21, idx:3599/10845, loss:0.8507655642761125, acc:0.7536111111111111


 34%|███▍      | 3700/10845 [36:03<56:03,  2.12it/s, acc=0.753, epoch=21, loss=0.853]  

epoch:21, idx:3699/10845, loss:0.853312943207251, acc:0.7529054054054054


 35%|███▌      | 3800/10845 [36:59<1:05:03,  1.80it/s, acc=0.752, epoch=21, loss=0.853]

epoch:21, idx:3799/10845, loss:0.8534015856447973, acc:0.7521710526315789


 36%|███▌      | 3900/10845 [37:55<55:44,  2.08it/s, acc=0.752, epoch=21, loss=0.852]  

epoch:21, idx:3899/10845, loss:0.8523287850312697, acc:0.7525


 37%|███▋      | 4000/10845 [38:50<1:07:00,  1.70it/s, acc=0.753, epoch=21, loss=0.851]

epoch:21, idx:3999/10845, loss:0.8511748579144478, acc:0.75275


 38%|███▊      | 4100/10845 [39:51<1:16:01,  1.48it/s, acc=0.752, epoch=21, loss=0.854]

epoch:21, idx:4099/10845, loss:0.8539041179418564, acc:0.7520121951219512


 39%|███▊      | 4200/10845 [40:53<1:18:57,  1.40it/s, acc=0.752, epoch=21, loss=0.854]

epoch:21, idx:4199/10845, loss:0.8537403460201763, acc:0.7519047619047619


 40%|███▉      | 4300/10845 [41:53<1:12:34,  1.50it/s, acc=0.752, epoch=21, loss=0.854]

epoch:21, idx:4299/10845, loss:0.8536963551682095, acc:0.7519767441860465


 41%|████      | 4400/10845 [42:52<1:02:30,  1.72it/s, acc=0.752, epoch=21, loss=0.853]

epoch:21, idx:4399/10845, loss:0.8526335966857996, acc:0.7518181818181818


 41%|████▏     | 4500/10845 [43:53<1:07:50,  1.56it/s, acc=0.752, epoch=21, loss=0.853]

epoch:21, idx:4499/10845, loss:0.8527244627078374, acc:0.7517777777777778


 42%|████▏     | 4600/10845 [44:57<1:13:34,  1.41it/s, acc=0.751, epoch=21, loss=0.856]

epoch:21, idx:4599/10845, loss:0.8558130374162094, acc:0.7511413043478261


 43%|████▎     | 4700/10845 [45:56<56:44,  1.81it/s, acc=0.751, epoch=21, loss=0.858]  

epoch:21, idx:4699/10845, loss:0.8575173966745113, acc:0.7510106382978723


 44%|████▍     | 4800/10845 [46:52<55:19,  1.82it/s, acc=0.751, epoch=21, loss=0.858]  

epoch:21, idx:4799/10845, loss:0.8575264743901789, acc:0.7511458333333333


 45%|████▌     | 4900/10845 [47:54<54:17,  1.83it/s, acc=0.751, epoch=21, loss=0.861]  

epoch:21, idx:4899/10845, loss:0.8613064336959196, acc:0.7505612244897959


 46%|████▌     | 5000/10845 [48:56<1:12:01,  1.35it/s, acc=0.749, epoch=21, loss=0.865]

epoch:21, idx:4999/10845, loss:0.8649974806964398, acc:0.7494


 47%|████▋     | 5100/10845 [49:55<57:37,  1.66it/s, acc=0.749, epoch=21, loss=0.866]  

epoch:21, idx:5099/10845, loss:0.8660862632826263, acc:0.7492156862745099


 48%|████▊     | 5200/10845 [50:54<53:14,  1.77it/s, acc=0.749, epoch=21, loss=0.866]  

epoch:21, idx:5199/10845, loss:0.8659312318494686, acc:0.7488942307692308


 49%|████▉     | 5300/10845 [51:53<49:57,  1.85it/s, acc=0.749, epoch=21, loss=0.866]  

epoch:21, idx:5299/10845, loss:0.8660584003520462, acc:0.7485849056603774


 50%|████▉     | 5400/10845 [52:53<40:08,  2.26it/s, acc=0.749, epoch=21, loss=0.867]  

epoch:21, idx:5399/10845, loss:0.8671784198725665, acc:0.7485185185185185


 51%|█████     | 5500/10845 [53:52<35:07,  2.54it/s, acc=0.749, epoch=21, loss=0.865]  

epoch:21, idx:5499/10845, loss:0.864749259352684, acc:0.7489090909090909


 52%|█████▏    | 5600/10845 [54:44<48:14,  1.81it/s, acc=0.75, epoch=21, loss=0.862]   

epoch:21, idx:5599/10845, loss:0.8622477565280029, acc:0.7496428571428572


 53%|█████▎    | 5700/10845 [55:44<53:07,  1.61it/s, acc=0.749, epoch=21, loss=0.864]  

epoch:21, idx:5699/10845, loss:0.8637303018883655, acc:0.7493421052631579


 53%|█████▎    | 5800/10845 [56:44<40:30,  2.08it/s, acc=0.749, epoch=21, loss=0.864]  

epoch:21, idx:5799/10845, loss:0.8636815764677935, acc:0.7491379310344828


 54%|█████▍    | 5900/10845 [57:40<35:16,  2.34it/s, acc=0.749, epoch=21, loss=0.863]  

epoch:21, idx:5899/10845, loss:0.8629135735863346, acc:0.7489406779661016


 55%|█████▌    | 6000/10845 [58:39<1:01:29,  1.31it/s, acc=0.749, epoch=21, loss=0.862]

epoch:21, idx:5999/10845, loss:0.8617605585654576, acc:0.749


 56%|█████▌    | 6100/10845 [59:39<1:02:26,  1.27it/s, acc=0.749, epoch=21, loss=0.863]

epoch:21, idx:6099/10845, loss:0.8625302823938307, acc:0.7486885245901639


 57%|█████▋    | 6200/10845 [1:00:35<59:31,  1.30it/s, acc=0.748, epoch=21, loss=0.864]  

epoch:21, idx:6199/10845, loss:0.8640072650774833, acc:0.7480241935483871


 58%|█████▊    | 6300/10845 [1:01:37<40:55,  1.85it/s, acc=0.748, epoch=21, loss=0.864]  

epoch:21, idx:6299/10845, loss:0.8639731095518385, acc:0.7478571428571429


 59%|█████▉    | 6400/10845 [1:02:36<43:22,  1.71it/s, acc=0.748, epoch=21, loss=0.863]  

epoch:21, idx:6399/10845, loss:0.8628543256036937, acc:0.7478515625


 60%|█████▉    | 6501/10845 [1:03:33<32:52,  2.20it/s, acc=0.748, epoch=21, loss=0.864]  

epoch:21, idx:6499/10845, loss:0.8636595410567064, acc:0.7478461538461538


 61%|██████    | 6600/10845 [1:04:27<38:40,  1.83it/s, acc=0.748, epoch=21, loss=0.864]

epoch:21, idx:6599/10845, loss:0.863757277618755, acc:0.7479166666666667


 62%|██████▏   | 6700/10845 [1:05:25<44:59,  1.54it/s, acc=0.748, epoch=21, loss=0.863]  

epoch:21, idx:6699/10845, loss:0.8633128676307735, acc:0.7477238805970149


 63%|██████▎   | 6800/10845 [1:06:26<38:39,  1.74it/s, acc=0.747, epoch=21, loss=0.864]  

epoch:21, idx:6799/10845, loss:0.8637400183344589, acc:0.747389705882353


 64%|██████▎   | 6900/10845 [1:07:23<39:18,  1.67it/s, acc=0.747, epoch=21, loss=0.866]  

epoch:21, idx:6899/10845, loss:0.865622953262882, acc:0.7474637681159421


 65%|██████▍   | 7000/10845 [1:08:22<42:59,  1.49it/s, acc=0.747, epoch=21, loss=0.867]

epoch:21, idx:6999/10845, loss:0.8672810184700148, acc:0.7468571428571429


 65%|██████▌   | 7100/10845 [1:09:20<39:10,  1.59it/s, acc=0.747, epoch=21, loss=0.867]  

epoch:21, idx:7099/10845, loss:0.86664477086403, acc:0.7467253521126761


 66%|██████▋   | 7200/10845 [1:10:19<28:57,  2.10it/s, acc=0.746, epoch=21, loss=0.868]  

epoch:21, idx:7199/10845, loss:0.8676999450143841, acc:0.7461111111111111


 67%|██████▋   | 7300/10845 [1:11:18<32:09,  1.84it/s, acc=0.746, epoch=21, loss=0.866]

epoch:21, idx:7299/10845, loss:0.8659146403531506, acc:0.7463356164383562


 68%|██████▊   | 7401/10845 [1:12:19<35:16,  1.63it/s, acc=0.746, epoch=21, loss=0.868]

epoch:21, idx:7399/10845, loss:0.868383292861887, acc:0.7459121621621622


 69%|██████▉   | 7500/10845 [1:13:16<33:25,  1.67it/s, acc=0.746, epoch=21, loss=0.867]

epoch:21, idx:7499/10845, loss:0.8670186305522919, acc:0.7458666666666667


 70%|███████   | 7600/10845 [1:14:16<26:15,  2.06it/s, acc=0.746, epoch=21, loss=0.866]

epoch:21, idx:7599/10845, loss:0.8658713849359437, acc:0.7461842105263158


 71%|███████   | 7700/10845 [1:15:15<22:50,  2.30it/s, acc=0.746, epoch=21, loss=0.865]

epoch:21, idx:7699/10845, loss:0.86536143962439, acc:0.7463311688311688


 72%|███████▏  | 7800/10845 [1:16:13<20:39,  2.46it/s, acc=0.747, epoch=21, loss=0.864]

epoch:21, idx:7799/10845, loss:0.8643362363638022, acc:0.7465384615384615


 73%|███████▎  | 7900/10845 [1:17:06<29:12,  1.68it/s, acc=0.747, epoch=21, loss=0.864]

epoch:21, idx:7899/10845, loss:0.8638470778812336, acc:0.7467721518987341


 74%|███████▍  | 8000/10845 [1:18:08<26:07,  1.82it/s, acc=0.746, epoch=21, loss=0.865]

epoch:21, idx:7999/10845, loss:0.8653213452473283, acc:0.74628125


 75%|███████▍  | 8100/10845 [1:19:04<30:18,  1.51it/s, acc=0.746, epoch=21, loss=0.865]

epoch:21, idx:8099/10845, loss:0.8647746844056212, acc:0.7464197530864197


 76%|███████▌  | 8200/10845 [1:20:05<21:36,  2.04it/s, acc=0.747, epoch=21, loss=0.863]

epoch:21, idx:8199/10845, loss:0.8630773323774338, acc:0.7467987804878049


 77%|███████▋  | 8300/10845 [1:21:04<28:32,  1.49it/s, acc=0.747, epoch=21, loss=0.863]

epoch:21, idx:8299/10845, loss:0.8631829141421491, acc:0.7466265060240964


 77%|███████▋  | 8400/10845 [1:22:02<27:17,  1.49it/s, acc=0.747, epoch=21, loss=0.862]

epoch:21, idx:8399/10845, loss:0.8615207454065482, acc:0.7470833333333333


 78%|███████▊  | 8500/10845 [1:22:59<20:17,  1.93it/s, acc=0.747, epoch=21, loss=0.861]

epoch:21, idx:8499/10845, loss:0.8608547628346611, acc:0.7470294117647058


 79%|███████▉  | 8600/10845 [1:23:56<22:06,  1.69it/s, acc=0.747, epoch=21, loss=0.86] 

epoch:21, idx:8599/10845, loss:0.8596690155184546, acc:0.747296511627907


 80%|████████  | 8700/10845 [1:24:52<17:37,  2.03it/s, acc=0.747, epoch=21, loss=0.86] 

epoch:21, idx:8699/10845, loss:0.8599541116377403, acc:0.7472701149425287


 81%|████████  | 8800/10845 [1:25:52<22:28,  1.52it/s, acc=0.748, epoch=21, loss=0.86] 

epoch:21, idx:8799/10845, loss:0.8599209561334414, acc:0.7475


 82%|████████▏ | 8900/10845 [1:26:48<17:58,  1.80it/s, acc=0.747, epoch=21, loss=0.861]

epoch:21, idx:8899/10845, loss:0.8607687722498112, acc:0.7472752808988764


 83%|████████▎ | 9000/10845 [1:27:46<18:45,  1.64it/s, acc=0.747, epoch=21, loss=0.861]

epoch:21, idx:8999/10845, loss:0.8607782338791423, acc:0.7473055555555556


 84%|████████▍ | 9100/10845 [1:28:42<16:00,  1.82it/s, acc=0.747, epoch=21, loss=0.862]

epoch:21, idx:9099/10845, loss:0.8615109916702731, acc:0.7470879120879121


 85%|████████▍ | 9200/10845 [1:29:43<15:46,  1.74it/s, acc=0.747, epoch=21, loss=0.86] 

epoch:21, idx:9199/10845, loss:0.860430483829392, acc:0.7474728260869565


 86%|████████▌ | 9300/10845 [1:30:39<12:41,  2.03it/s, acc=0.747, epoch=21, loss=0.861]

epoch:21, idx:9299/10845, loss:0.8613469242969508, acc:0.7468817204301075


 87%|████████▋ | 9400/10845 [1:31:37<14:05,  1.71it/s, acc=0.747, epoch=21, loss=0.863]

epoch:21, idx:9399/10845, loss:0.8629208610016614, acc:0.7467553191489362


 88%|████████▊ | 9500/10845 [1:32:32<13:52,  1.62it/s, acc=0.746, epoch=21, loss=0.864]

epoch:21, idx:9499/10845, loss:0.8644698131131499, acc:0.7463947368421052


 89%|████████▊ | 9601/10845 [1:33:32<10:10,  2.04it/s, acc=0.746, epoch=21, loss=0.865]

epoch:21, idx:9599/10845, loss:0.8645189518217619, acc:0.7462760416666666


 89%|████████▉ | 9700/10845 [1:34:30<11:10,  1.71it/s, acc=0.746, epoch=21, loss=0.866]

epoch:21, idx:9699/10845, loss:0.8658901172615203, acc:0.7459536082474226


 90%|█████████ | 9800/10845 [1:35:26<07:40,  2.27it/s, acc=0.746, epoch=21, loss=0.865]

epoch:21, idx:9799/10845, loss:0.8647373880096236, acc:0.7460969387755102


 91%|█████████▏| 9900/10845 [1:36:22<09:15,  1.70it/s, acc=0.746, epoch=21, loss=0.864]

epoch:21, idx:9899/10845, loss:0.8639089527319778, acc:0.7460858585858586


 92%|█████████▏| 10000/10845 [1:37:18<08:55,  1.58it/s, acc=0.746, epoch=21, loss=0.864]

epoch:21, idx:9999/10845, loss:0.863722086726129, acc:0.745875


 93%|█████████▎| 10100/10845 [1:38:16<06:14,  1.99it/s, acc=0.746, epoch=21, loss=0.864]

epoch:21, idx:10099/10845, loss:0.863671716627213, acc:0.745990099009901


 94%|█████████▍| 10200/10845 [1:39:13<06:42,  1.60it/s, acc=0.746, epoch=21, loss=0.865]

epoch:21, idx:10199/10845, loss:0.8647800546869928, acc:0.7457843137254901


 95%|█████████▍| 10300/10845 [1:40:10<05:50,  1.55it/s, acc=0.746, epoch=21, loss=0.865]

epoch:21, idx:10299/10845, loss:0.8648004445158741, acc:0.7456796116504855


 96%|█████████▌| 10400/10845 [1:41:09<04:50,  1.53it/s, acc=0.746, epoch=21, loss=0.865]

epoch:21, idx:10399/10845, loss:0.8651506148808851, acc:0.7455528846153846


 97%|█████████▋| 10500/10845 [1:42:02<02:51,  2.01it/s, acc=0.745, epoch=21, loss=0.865]

epoch:21, idx:10499/10845, loss:0.8654098052652109, acc:0.7453809523809524


 98%|█████████▊| 10600/10845 [1:42:58<03:21,  1.22it/s, acc=0.745, epoch=21, loss=0.865]

epoch:21, idx:10599/10845, loss:0.8650380172945981, acc:0.7454009433962264


 99%|█████████▊| 10700/10845 [1:43:53<01:32,  1.57it/s, acc=0.745, epoch=21, loss=0.865]

epoch:21, idx:10699/10845, loss:0.865426479729815, acc:0.7449532710280374


100%|█████████▉| 10800/10845 [1:44:49<00:22,  1.98it/s, acc=0.745, epoch=21, loss=0.864]

epoch:21, idx:10799/10845, loss:0.8644250645474704, acc:0.7451851851851852


100%|██████████| 10845/10845 [1:45:15<00:00,  1.87it/s, acc=0.745, epoch=21, loss=0.865]


epoch:21, idx:0/1275, loss:1.1931732892990112, acc:0.5
epoch:21, idx:100/1275, loss:1.2373506367796718, acc:0.6188118811881188
epoch:21, idx:200/1275, loss:1.1674648600431223, acc:0.6393034825870647
epoch:21, idx:300/1275, loss:1.1794638788185245, acc:0.6411960132890365
epoch:21, idx:400/1275, loss:1.151351435523378, acc:0.6471321695760599
epoch:21, idx:500/1275, loss:1.145800833454627, acc:0.6452095808383234
epoch:21, idx:600/1275, loss:1.1542570802217316, acc:0.6460066555740432
epoch:21, idx:700/1275, loss:1.1697595064208104, acc:0.6440798858773181
epoch:21, idx:800/1275, loss:1.175976907492577, acc:0.6441947565543071
epoch:21, idx:900/1275, loss:1.1635404518918113, acc:0.6476137624861266
epoch:21, idx:1000/1275, loss:1.1631577625260368, acc:0.6486013986013986
epoch:21, idx:1100/1275, loss:1.156066803040015, acc:0.650772025431426
epoch:21, idx:1200/1275, loss:1.1571574844388939, acc:0.6490424646128227


  1%|          | 100/10845 [00:46<1:41:19,  1.77it/s, acc=0.76, epoch=22, loss=0.78] 

epoch:22, idx:99/10845, loss:0.7802146279811859, acc:0.76


  2%|▏         | 201/10845 [01:40<1:13:20,  2.42it/s, acc=0.761, epoch=22, loss=0.779]

epoch:22, idx:199/10845, loss:0.7832261529564858, acc:0.76


  3%|▎         | 300/10845 [02:31<1:40:06,  1.76it/s, acc=0.76, epoch=22, loss=0.796] 

epoch:22, idx:299/10845, loss:0.7963757228851318, acc:0.76


  4%|▎         | 400/10845 [03:24<1:20:30,  2.16it/s, acc=0.767, epoch=22, loss=0.8]  

epoch:22, idx:399/10845, loss:0.8003105272352695, acc:0.7675


  5%|▍         | 500/10845 [04:16<1:33:04,  1.85it/s, acc=0.764, epoch=22, loss=0.8]  

epoch:22, idx:499/10845, loss:0.7995090876817703, acc:0.764


  6%|▌         | 601/10845 [05:10<1:16:24,  2.23it/s, acc=0.765, epoch=22, loss=0.798]

epoch:22, idx:599/10845, loss:0.7989010641972224, acc:0.765


  6%|▋         | 700/10845 [06:03<2:17:31,  1.23it/s, acc=0.763, epoch=22, loss=0.812]

epoch:22, idx:699/10845, loss:0.8124010939257486, acc:0.7628571428571429


  7%|▋         | 800/10845 [06:58<2:07:41,  1.31it/s, acc=0.766, epoch=22, loss=0.796]

epoch:22, idx:799/10845, loss:0.7957900170981884, acc:0.76625


  8%|▊         | 900/10845 [07:47<1:20:24,  2.06it/s, acc=0.766, epoch=22, loss=0.795]

epoch:22, idx:899/10845, loss:0.7954039056433572, acc:0.7663888888888889


  9%|▉         | 1000/10845 [08:42<1:04:51,  2.53it/s, acc=0.767, epoch=22, loss=0.793]

epoch:22, idx:999/10845, loss:0.7928117575645447, acc:0.76675


 10%|█         | 1100/10845 [09:33<1:05:52,  2.47it/s, acc=0.764, epoch=22, loss=0.802]

epoch:22, idx:1099/10845, loss:0.8021016693657095, acc:0.7643181818181818


 11%|█         | 1200/10845 [10:30<1:28:25,  1.82it/s, acc=0.765, epoch=22, loss=0.801]

epoch:22, idx:1199/10845, loss:0.8013103234022856, acc:0.7645833333333333


 12%|█▏        | 1300/10845 [11:24<1:41:41,  1.56it/s, acc=0.764, epoch=22, loss=0.801]

epoch:22, idx:1299/10845, loss:0.8012506140424656, acc:0.7642307692307693


 13%|█▎        | 1400/10845 [12:19<1:19:23,  1.98it/s, acc=0.764, epoch=22, loss=0.8]  

epoch:22, idx:1399/10845, loss:0.8004539707515921, acc:0.7641071428571429


 14%|█▍        | 1500/10845 [13:13<1:08:11,  2.28it/s, acc=0.764, epoch=22, loss=0.803]

epoch:22, idx:1499/10845, loss:0.803369632422924, acc:0.7638333333333334


 15%|█▍        | 1600/10845 [14:07<1:08:33,  2.25it/s, acc=0.762, epoch=22, loss=0.809]

epoch:22, idx:1599/10845, loss:0.8092856105230749, acc:0.76203125


 16%|█▌        | 1700/10845 [15:03<1:19:14,  1.92it/s, acc=0.762, epoch=22, loss=0.804]

epoch:22, idx:1699/10845, loss:0.8040408699828036, acc:0.7620588235294118


 17%|█▋        | 1800/10845 [16:00<1:17:08,  1.95it/s, acc=0.762, epoch=22, loss=0.806]

epoch:22, idx:1799/10845, loss:0.8055712320241664, acc:0.7623611111111112


 18%|█▊        | 1900/10845 [16:51<1:06:14,  2.25it/s, acc=0.763, epoch=22, loss=0.803]

epoch:22, idx:1899/10845, loss:0.8029456566352593, acc:0.7627631578947368


 18%|█▊        | 2000/10845 [17:46<1:17:39,  1.90it/s, acc=0.762, epoch=22, loss=0.806]

epoch:22, idx:1999/10845, loss:0.8061268687099218, acc:0.761875


 19%|█▉        | 2100/10845 [18:37<1:22:33,  1.77it/s, acc=0.761, epoch=22, loss=0.812]

epoch:22, idx:2099/10845, loss:0.8117700424222719, acc:0.7611904761904762


 20%|██        | 2200/10845 [19:33<1:26:56,  1.66it/s, acc=0.762, epoch=22, loss=0.809]

epoch:22, idx:2199/10845, loss:0.8087653310326013, acc:0.7618181818181818


 21%|██        | 2300/10845 [20:27<52:34,  2.71it/s, acc=0.762, epoch=22, loss=0.808]  

epoch:22, idx:2299/10845, loss:0.8079408687223559, acc:0.7625


 22%|██▏       | 2400/10845 [21:27<1:14:34,  1.89it/s, acc=0.761, epoch=22, loss=0.811]

epoch:22, idx:2399/10845, loss:0.810707108862698, acc:0.7609375


 23%|██▎       | 2500/10845 [22:22<1:22:10,  1.69it/s, acc=0.761, epoch=22, loss=0.809]

epoch:22, idx:2499/10845, loss:0.8092973097920417, acc:0.7608


 24%|██▍       | 2600/10845 [23:18<1:12:00,  1.91it/s, acc=0.76, epoch=22, loss=0.806] 

epoch:22, idx:2599/10845, loss:0.8059403128921986, acc:0.7602884615384615


 25%|██▍       | 2700/10845 [24:12<59:16,  2.29it/s, acc=0.761, epoch=22, loss=0.805]  

epoch:22, idx:2699/10845, loss:0.8049862455549064, acc:0.7609259259259259


 26%|██▌       | 2800/10845 [25:03<57:47,  2.32it/s, acc=0.76, epoch=22, loss=0.808]   

epoch:22, idx:2799/10845, loss:0.8081168800485986, acc:0.7604464285714285


 27%|██▋       | 2900/10845 [25:56<1:27:07,  1.52it/s, acc=0.76, epoch=22, loss=0.807] 

epoch:22, idx:2899/10845, loss:0.8066639745543743, acc:0.7603448275862069


 28%|██▊       | 3000/10845 [26:53<1:15:22,  1.73it/s, acc=0.76, epoch=22, loss=0.807] 

epoch:22, idx:2999/10845, loss:0.8066324381728966, acc:0.76025


 29%|██▊       | 3100/10845 [27:45<1:06:43,  1.93it/s, acc=0.759, epoch=22, loss=0.809]

epoch:22, idx:3099/10845, loss:0.8089323741678268, acc:0.7590322580645161


 30%|██▉       | 3200/10845 [28:33<1:01:48,  2.06it/s, acc=0.76, epoch=22, loss=0.807] 

epoch:22, idx:3199/10845, loss:0.8068244208116084, acc:0.75984375


 30%|███       | 3300/10845 [29:28<1:19:49,  1.58it/s, acc=0.76, epoch=22, loss=0.807]

epoch:22, idx:3299/10845, loss:0.8066150453686715, acc:0.7598484848484849


 31%|███▏      | 3400/10845 [30:20<1:12:17,  1.72it/s, acc=0.76, epoch=22, loss=0.806]

epoch:22, idx:3399/10845, loss:0.8062182985947413, acc:0.76


 32%|███▏      | 3500/10845 [31:15<1:18:59,  1.55it/s, acc=0.76, epoch=22, loss=0.805] 

epoch:22, idx:3499/10845, loss:0.8045274953075817, acc:0.7605


 33%|███▎      | 3600/10845 [32:09<1:24:45,  1.42it/s, acc=0.761, epoch=22, loss=0.807]

epoch:22, idx:3599/10845, loss:0.8065524783482154, acc:0.760625


 34%|███▍      | 3700/10845 [33:04<57:36,  2.07it/s, acc=0.76, epoch=22, loss=0.808]   

epoch:22, idx:3699/10845, loss:0.8078218282960556, acc:0.7600675675675675


 35%|███▌      | 3800/10845 [33:56<1:11:58,  1.63it/s, acc=0.76, epoch=22, loss=0.808]

epoch:22, idx:3799/10845, loss:0.8078872701917824, acc:0.760328947368421


 36%|███▌      | 3900/10845 [34:55<1:02:00,  1.87it/s, acc=0.76, epoch=22, loss=0.811] 

epoch:22, idx:3899/10845, loss:0.8106787508344039, acc:0.7600641025641026


 37%|███▋      | 4000/10845 [35:47<46:14,  2.47it/s, acc=0.76, epoch=22, loss=0.811]   

epoch:22, idx:3999/10845, loss:0.8114613158628344, acc:0.759625


 38%|███▊      | 4100/10845 [36:38<59:31,  1.89it/s, acc=0.758, epoch=22, loss=0.814]  

epoch:22, idx:4099/10845, loss:0.8137441299819365, acc:0.7584756097560975


 39%|███▊      | 4200/10845 [37:35<1:02:24,  1.77it/s, acc=0.758, epoch=22, loss=0.814]

epoch:22, idx:4199/10845, loss:0.8135234154902753, acc:0.7582738095238095


 40%|███▉      | 4300/10845 [38:28<58:03,  1.88it/s, acc=0.758, epoch=22, loss=0.812]  

epoch:22, idx:4299/10845, loss:0.8123381964481154, acc:0.7580232558139535


 41%|████      | 4400/10845 [39:21<1:03:22,  1.69it/s, acc=0.758, epoch=22, loss=0.812]

epoch:22, idx:4399/10845, loss:0.8119354052909395, acc:0.7576704545454546


 41%|████▏     | 4500/10845 [40:20<1:11:27,  1.48it/s, acc=0.757, epoch=22, loss=0.813]

epoch:22, idx:4499/10845, loss:0.8133215810259183, acc:0.7569444444444444


 42%|████▏     | 4600/10845 [41:14<55:00,  1.89it/s, acc=0.758, epoch=22, loss=0.811]  

epoch:22, idx:4599/10845, loss:0.8108696598397649, acc:0.757554347826087


 43%|████▎     | 4700/10845 [42:03<51:17,  2.00it/s, acc=0.758, epoch=22, loss=0.812]  

epoch:22, idx:4699/10845, loss:0.8123134654189678, acc:0.7576595744680851


 44%|████▍     | 4800/10845 [42:49<47:26,  2.12it/s, acc=0.757, epoch=22, loss=0.814]  

epoch:22, idx:4799/10845, loss:0.8143649823963642, acc:0.75734375


 45%|████▌     | 4900/10845 [43:35<1:01:13,  1.62it/s, acc=0.757, epoch=22, loss=0.815]

epoch:22, idx:4899/10845, loss:0.8146188226524664, acc:0.7570918367346938


 46%|████▌     | 5000/10845 [44:20<47:36,  2.05it/s, acc=0.757, epoch=22, loss=0.817]  

epoch:22, idx:4999/10845, loss:0.8170362397551536, acc:0.7568


 47%|████▋     | 5100/10845 [45:08<51:26,  1.86it/s, acc=0.757, epoch=22, loss=0.816]  

epoch:22, idx:5099/10845, loss:0.8156249184000726, acc:0.7568627450980392


 48%|████▊     | 5200/10845 [45:49<35:06,  2.68it/s, acc=0.757, epoch=22, loss=0.817]

epoch:22, idx:5199/10845, loss:0.8168357744239844, acc:0.7572596153846154


 49%|████▉     | 5300/10845 [46:34<32:10,  2.87it/s, acc=0.757, epoch=22, loss=0.816]

epoch:22, idx:5299/10845, loss:0.8156566103224484, acc:0.756933962264151


 50%|████▉     | 5400/10845 [47:20<48:23,  1.88it/s, acc=0.757, epoch=22, loss=0.816]

epoch:22, idx:5399/10845, loss:0.8157777029827789, acc:0.7567592592592592


 51%|█████     | 5500/10845 [48:05<36:52,  2.42it/s, acc=0.757, epoch=22, loss=0.815]  

epoch:22, idx:5499/10845, loss:0.8145307123444298, acc:0.7568181818181818


 52%|█████▏    | 5600/10845 [48:55<38:07,  2.29it/s, acc=0.756, epoch=22, loss=0.818]  

epoch:22, idx:5599/10845, loss:0.8175191456452012, acc:0.7562946428571429


 53%|█████▎    | 5700/10845 [49:48<52:32,  1.63it/s, acc=0.756, epoch=22, loss=0.82]   

epoch:22, idx:5699/10845, loss:0.8197845947585608, acc:0.7555263157894737


 53%|█████▎    | 5800/10845 [50:42<52:06,  1.61it/s, acc=0.755, epoch=22, loss=0.821]  

epoch:22, idx:5799/10845, loss:0.8212169654461844, acc:0.7554310344827586


 54%|█████▍    | 5900/10845 [51:36<42:53,  1.92it/s, acc=0.755, epoch=22, loss=0.822]  

epoch:22, idx:5899/10845, loss:0.8223211396902295, acc:0.7550847457627119


 55%|█████▌    | 6000/10845 [52:29<29:18,  2.76it/s, acc=0.755, epoch=22, loss=0.822]  

epoch:22, idx:5999/10845, loss:0.8223920408437649, acc:0.7549583333333333


 56%|█████▌    | 6100/10845 [53:26<35:35,  2.22it/s, acc=0.755, epoch=22, loss=0.821]  

epoch:22, idx:6099/10845, loss:0.8205143405423789, acc:0.7550819672131147


 57%|█████▋    | 6200/10845 [54:18<38:18,  2.02it/s, acc=0.755, epoch=22, loss=0.82]  

epoch:22, idx:6199/10845, loss:0.8197690554732276, acc:0.7551209677419355


 58%|█████▊    | 6300/10845 [55:11<41:39,  1.82it/s, acc=0.755, epoch=22, loss=0.82] 

epoch:22, idx:6299/10845, loss:0.8203143364995245, acc:0.7549206349206349


 59%|█████▉    | 6400/10845 [56:01<47:06,  1.57it/s, acc=0.754, epoch=22, loss=0.822]

epoch:22, idx:6399/10845, loss:0.822149331565015, acc:0.75421875


 60%|█████▉    | 6500/10845 [56:53<41:02,  1.76it/s, acc=0.754, epoch=22, loss=0.822]

epoch:22, idx:6499/10845, loss:0.8220138011391346, acc:0.7542307692307693


 61%|██████    | 6601/10845 [57:47<24:30,  2.89it/s, acc=0.754, epoch=22, loss=0.824]

epoch:22, idx:6599/10845, loss:0.8237366198183912, acc:0.7542424242424243


 62%|██████▏   | 6700/10845 [58:39<34:21,  2.01it/s, acc=0.754, epoch=22, loss=0.825]

epoch:22, idx:6699/10845, loss:0.8254683541317485, acc:0.7539925373134329


 63%|██████▎   | 6800/10845 [59:30<32:35,  2.07it/s, acc=0.754, epoch=22, loss=0.825]

epoch:22, idx:6799/10845, loss:0.8253724132609718, acc:0.7541544117647059


 64%|██████▎   | 6900/10845 [1:00:22<34:42,  1.89it/s, acc=0.754, epoch=22, loss=0.826]

epoch:22, idx:6899/10845, loss:0.825517417801463, acc:0.7542753623188406


 65%|██████▍   | 7000/10845 [1:01:18<34:18,  1.87it/s, acc=0.754, epoch=22, loss=0.826]

epoch:22, idx:6999/10845, loss:0.8264189785931791, acc:0.7537857142857143


 65%|██████▌   | 7100/10845 [1:02:09<32:53,  1.90it/s, acc=0.754, epoch=22, loss=0.827]

epoch:22, idx:7099/10845, loss:0.8270343045743418, acc:0.7537676056338029


 66%|██████▋   | 7200/10845 [1:03:03<23:44,  2.56it/s, acc=0.754, epoch=22, loss=0.827]

epoch:22, idx:7199/10845, loss:0.8267437512634529, acc:0.7537152777777778


 67%|██████▋   | 7300/10845 [1:03:55<35:40,  1.66it/s, acc=0.753, epoch=22, loss=0.828]

epoch:22, idx:7299/10845, loss:0.8276276584806508, acc:0.7534931506849315


 68%|██████▊   | 7400/10845 [1:04:50<28:02,  2.05it/s, acc=0.753, epoch=22, loss=0.83] 

epoch:22, idx:7399/10845, loss:0.8299158302634149, acc:0.7528378378378379


 69%|██████▉   | 7500/10845 [1:05:40<28:36,  1.95it/s, acc=0.753, epoch=22, loss=0.83] 

epoch:22, idx:7499/10845, loss:0.8299788718104363, acc:0.7528666666666667


 70%|███████   | 7600/10845 [1:06:35<30:34,  1.77it/s, acc=0.752, epoch=22, loss=0.832]

epoch:22, idx:7599/10845, loss:0.8318791861949782, acc:0.7524013157894737


 71%|███████   | 7700/10845 [1:07:24<34:22,  1.53it/s, acc=0.752, epoch=22, loss=0.833]

epoch:22, idx:7699/10845, loss:0.8329333641660678, acc:0.7519155844155844


 72%|███████▏  | 7800/10845 [1:08:16<24:20,  2.08it/s, acc=0.752, epoch=22, loss=0.834]

epoch:22, idx:7799/10845, loss:0.8337197867074074, acc:0.7518910256410256


 73%|███████▎  | 7900/10845 [1:09:06<21:22,  2.30it/s, acc=0.752, epoch=22, loss=0.834]

epoch:22, idx:7899/10845, loss:0.8336881381763688, acc:0.7522468354430379


 74%|███████▍  | 8000/10845 [1:10:00<23:39,  2.00it/s, acc=0.752, epoch=22, loss=0.834]

epoch:22, idx:7999/10845, loss:0.8337795201353729, acc:0.75228125


 75%|███████▍  | 8100/10845 [1:10:54<31:04,  1.47it/s, acc=0.752, epoch=22, loss=0.835]

epoch:22, idx:8099/10845, loss:0.835199410955847, acc:0.7515740740740741


 76%|███████▌  | 8200/10845 [1:11:45<30:44,  1.43it/s, acc=0.751, epoch=22, loss=0.836]

epoch:22, idx:8199/10845, loss:0.8357943579227459, acc:0.7513719512195122


 77%|███████▋  | 8300/10845 [1:12:39<23:34,  1.80it/s, acc=0.751, epoch=22, loss=0.838]

epoch:22, idx:8299/10845, loss:0.838436739972557, acc:0.7505722891566265


 77%|███████▋  | 8400/10845 [1:13:28<21:16,  1.92it/s, acc=0.751, epoch=22, loss=0.838]

epoch:22, idx:8399/10845, loss:0.8381860706813279, acc:0.7508333333333334


 78%|███████▊  | 8500/10845 [1:14:21<19:37,  1.99it/s, acc=0.751, epoch=22, loss=0.84] 

epoch:22, idx:8499/10845, loss:0.8400134056000148, acc:0.7506764705882353


 79%|███████▉  | 8600/10845 [1:15:14<19:34,  1.91it/s, acc=0.751, epoch=22, loss=0.84] 

epoch:22, idx:8599/10845, loss:0.8399961731219014, acc:0.7506686046511628


 80%|████████  | 8700/10845 [1:16:06<22:20,  1.60it/s, acc=0.751, epoch=22, loss=0.84] 

epoch:22, idx:8699/10845, loss:0.8398108491438558, acc:0.750919540229885


 81%|████████  | 8800/10845 [1:16:56<15:16,  2.23it/s, acc=0.751, epoch=22, loss=0.838]

epoch:22, idx:8799/10845, loss:0.8376943028548902, acc:0.7514772727272727


 82%|████████▏ | 8900/10845 [1:17:50<17:06,  1.89it/s, acc=0.751, epoch=22, loss=0.839]

epoch:22, idx:8899/10845, loss:0.8393331529149849, acc:0.7512359550561798


 83%|████████▎ | 9000/10845 [1:18:44<15:51,  1.94it/s, acc=0.751, epoch=22, loss=0.84] 

epoch:22, idx:8999/10845, loss:0.8399859479831324, acc:0.7508888888888889


 84%|████████▍ | 9100/10845 [1:19:35<19:11,  1.51it/s, acc=0.751, epoch=22, loss=0.84] 

epoch:22, idx:9099/10845, loss:0.8399833656503604, acc:0.7508241758241758


 85%|████████▍ | 9200/10845 [1:20:33<12:49,  2.14it/s, acc=0.751, epoch=22, loss=0.841]

epoch:22, idx:9199/10845, loss:0.8410256823864968, acc:0.7507880434782609


 86%|████████▌ | 9300/10845 [1:21:24<12:52,  2.00it/s, acc=0.751, epoch=22, loss=0.841]

epoch:22, idx:9299/10845, loss:0.8406310606675763, acc:0.7508333333333334


 87%|████████▋ | 9400/10845 [1:22:16<11:28,  2.10it/s, acc=0.751, epoch=22, loss=0.84] 

epoch:22, idx:9399/10845, loss:0.8404482535827668, acc:0.7508510638297873


 88%|████████▊ | 9500/10845 [1:23:15<13:56,  1.61it/s, acc=0.751, epoch=22, loss=0.84] 

epoch:22, idx:9499/10845, loss:0.8395373272723273, acc:0.7509210526315789


 89%|████████▊ | 9600/10845 [1:24:11<08:28,  2.45it/s, acc=0.751, epoch=22, loss=0.838]

epoch:22, idx:9599/10845, loss:0.8381401667635267, acc:0.7513020833333334


 89%|████████▉ | 9700/10845 [1:25:04<10:18,  1.85it/s, acc=0.751, epoch=22, loss=0.839]

epoch:22, idx:9699/10845, loss:0.838649180244232, acc:0.7512113402061855


 90%|█████████ | 9800/10845 [1:25:59<10:29,  1.66it/s, acc=0.751, epoch=22, loss=0.841]

epoch:22, idx:9799/10845, loss:0.8406067829731168, acc:0.7509183673469387


 91%|█████████▏| 9900/10845 [1:26:54<10:17,  1.53it/s, acc=0.751, epoch=22, loss=0.84] 

epoch:22, idx:9899/10845, loss:0.8400694615807798, acc:0.7511111111111111


 92%|█████████▏| 10000/10845 [1:27:48<06:44,  2.09it/s, acc=0.751, epoch=22, loss=0.841]

epoch:22, idx:9999/10845, loss:0.8406452390387654, acc:0.75085


 93%|█████████▎| 10100/10845 [1:28:39<05:07,  2.42it/s, acc=0.751, epoch=22, loss=0.841]

epoch:22, idx:10099/10845, loss:0.840628689357845, acc:0.7507425742574257


 94%|█████████▍| 10200/10845 [1:29:31<05:24,  1.99it/s, acc=0.751, epoch=22, loss=0.841]

epoch:22, idx:10199/10845, loss:0.8409626619327887, acc:0.7509313725490196


 95%|█████████▍| 10300/10845 [1:30:27<05:59,  1.51it/s, acc=0.751, epoch=22, loss=0.841]

epoch:22, idx:10299/10845, loss:0.8409677939348429, acc:0.7508980582524272


 96%|█████████▌| 10400/10845 [1:31:20<04:37,  1.60it/s, acc=0.751, epoch=22, loss=0.841]

epoch:22, idx:10399/10845, loss:0.8407364582972457, acc:0.7508413461538461


 97%|█████████▋| 10500/10845 [1:32:16<03:46,  1.52it/s, acc=0.751, epoch=22, loss=0.841]

epoch:22, idx:10499/10845, loss:0.8410670469502608, acc:0.7508571428571429


 98%|█████████▊| 10600/10845 [1:33:09<02:13,  1.83it/s, acc=0.751, epoch=22, loss=0.841]

epoch:22, idx:10599/10845, loss:0.8412102073782457, acc:0.751061320754717


 99%|█████████▊| 10700/10845 [1:34:05<01:01,  2.36it/s, acc=0.751, epoch=22, loss=0.842]

epoch:22, idx:10699/10845, loss:0.8422829384238364, acc:0.7508878504672897


100%|█████████▉| 10800/10845 [1:35:00<00:20,  2.22it/s, acc=0.751, epoch=22, loss=0.844]

epoch:22, idx:10799/10845, loss:0.8436343805781669, acc:0.7506481481481482


100%|██████████| 10845/10845 [1:35:25<00:00,  2.27it/s, acc=0.751, epoch=22, loss=0.844]


epoch:22, idx:0/1275, loss:0.8689100742340088, acc:0.5
epoch:22, idx:100/1275, loss:1.2106701814302123, acc:0.6386138613861386
epoch:22, idx:200/1275, loss:1.1649218409215634, acc:0.6417910447761194
epoch:22, idx:300/1275, loss:1.1806734287461569, acc:0.6420265780730897
epoch:22, idx:400/1275, loss:1.160965956803272, acc:0.6440149625935162
epoch:22, idx:500/1275, loss:1.1528147399544477, acc:0.6432135728542914
epoch:22, idx:600/1275, loss:1.1619663849448205, acc:0.6405990016638935
epoch:22, idx:700/1275, loss:1.176277280874157, acc:0.6383737517831669
epoch:22, idx:800/1275, loss:1.182901872528924, acc:0.6370162297128589
epoch:22, idx:900/1275, loss:1.168785699199757, acc:0.6403995560488346
epoch:22, idx:1000/1275, loss:1.16927273504503, acc:0.6401098901098901
epoch:22, idx:1100/1275, loss:1.1587128657627712, acc:0.6421435059037239
epoch:22, idx:1200/1275, loss:1.1579598556648782, acc:0.6427976686094921


  1%|          | 100/10845 [00:51<1:33:49,  1.91it/s, acc=0.79, epoch=23, loss=0.728]

epoch:23, idx:99/10845, loss:0.7282053858041764, acc:0.79


  2%|▏         | 200/10845 [01:48<1:15:50,  2.34it/s, acc=0.791, epoch=23, loss=0.708]

epoch:23, idx:199/10845, loss:0.7084872508049012, acc:0.79125


  3%|▎         | 300/10845 [02:41<1:26:45,  2.03it/s, acc=0.783, epoch=23, loss=0.745]

epoch:23, idx:299/10845, loss:0.7449991297721863, acc:0.7833333333333333


  4%|▎         | 400/10845 [03:32<1:17:00,  2.26it/s, acc=0.783, epoch=23, loss=0.755]

epoch:23, idx:399/10845, loss:0.7549012492597104, acc:0.783125


  5%|▍         | 500/10845 [04:24<1:18:48,  2.19it/s, acc=0.783, epoch=23, loss=0.75] 

epoch:23, idx:499/10845, loss:0.7500837086439133, acc:0.783


  6%|▌         | 600/10845 [05:20<1:33:06,  1.83it/s, acc=0.782, epoch=23, loss=0.76] 

epoch:23, idx:599/10845, loss:0.7601213641961415, acc:0.7825


  6%|▋         | 700/10845 [06:16<1:17:42,  2.18it/s, acc=0.777, epoch=23, loss=0.788]

epoch:23, idx:699/10845, loss:0.7875520923307964, acc:0.7767857142857143


  7%|▋         | 800/10845 [07:11<1:27:01,  1.92it/s, acc=0.777, epoch=23, loss=0.784]

epoch:23, idx:799/10845, loss:0.7844894440472125, acc:0.7765625


  8%|▊         | 900/10845 [08:04<1:28:03,  1.88it/s, acc=0.781, epoch=23, loss=0.768]

epoch:23, idx:899/10845, loss:0.7681264470352067, acc:0.7808333333333334


  9%|▉         | 1000/10845 [08:58<1:21:12,  2.02it/s, acc=0.781, epoch=23, loss=0.771]

epoch:23, idx:999/10845, loss:0.7713829282820225, acc:0.78125


 10%|█         | 1100/10845 [09:48<52:28,  3.10it/s, acc=0.781, epoch=23, loss=0.772]  

epoch:23, idx:1099/10845, loss:0.7715953965349631, acc:0.7811363636363636


 11%|█         | 1200/10845 [10:40<1:10:56,  2.27it/s, acc=0.778, epoch=23, loss=0.777]

epoch:23, idx:1199/10845, loss:0.7769824519008398, acc:0.7779166666666667


 12%|█▏        | 1300/10845 [11:36<1:10:12,  2.27it/s, acc=0.778, epoch=23, loss=0.776]

epoch:23, idx:1299/10845, loss:0.7762806667043612, acc:0.7780769230769231


 13%|█▎        | 1400/10845 [12:27<1:28:40,  1.78it/s, acc=0.777, epoch=23, loss=0.778]

epoch:23, idx:1399/10845, loss:0.7782247142067977, acc:0.7769642857142857


 14%|█▍        | 1500/10845 [13:23<1:53:24,  1.37it/s, acc=0.774, epoch=23, loss=0.788]

epoch:23, idx:1499/10845, loss:0.787847454170386, acc:0.7743333333333333


 15%|█▍        | 1600/10845 [14:21<1:12:05,  2.14it/s, acc=0.773, epoch=23, loss=0.786]

epoch:23, idx:1599/10845, loss:0.7862797985039651, acc:0.7734375


 16%|█▌        | 1700/10845 [15:13<1:17:25,  1.97it/s, acc=0.772, epoch=23, loss=0.784]

epoch:23, idx:1699/10845, loss:0.7844228113223525, acc:0.7723529411764706


 17%|█▋        | 1800/10845 [16:08<1:13:43,  2.04it/s, acc=0.77, epoch=23, loss=0.792] 

epoch:23, idx:1799/10845, loss:0.7920649059779115, acc:0.7695833333333333


 18%|█▊        | 1900/10845 [16:57<1:32:00,  1.62it/s, acc=0.769, epoch=23, loss=0.793]

epoch:23, idx:1899/10845, loss:0.7929539003811384, acc:0.7689473684210526


 18%|█▊        | 2000/10845 [17:53<1:25:01,  1.73it/s, acc=0.769, epoch=23, loss=0.796]

epoch:23, idx:1999/10845, loss:0.7961893446743489, acc:0.768875


 19%|█▉        | 2100/10845 [18:40<55:03,  2.65it/s, acc=0.768, epoch=23, loss=0.803]  

epoch:23, idx:2099/10845, loss:0.8030761319682712, acc:0.7679761904761905


 20%|██        | 2200/10845 [19:37<1:23:58,  1.72it/s, acc=0.769, epoch=23, loss=0.799]

epoch:23, idx:2199/10845, loss:0.7989713728834282, acc:0.76875


 21%|██        | 2300/10845 [20:30<1:33:08,  1.53it/s, acc=0.769, epoch=23, loss=0.799]

epoch:23, idx:2299/10845, loss:0.7987101303105769, acc:0.7691304347826087


 22%|██▏       | 2400/10845 [21:22<1:38:14,  1.43it/s, acc=0.77, epoch=23, loss=0.798] 

epoch:23, idx:2399/10845, loss:0.7976543205355604, acc:0.7697916666666667


 23%|██▎       | 2500/10845 [22:15<1:33:42,  1.48it/s, acc=0.77, epoch=23, loss=0.793] 

epoch:23, idx:2499/10845, loss:0.7930678346514702, acc:0.7701


 24%|██▍       | 2600/10845 [23:10<1:12:25,  1.90it/s, acc=0.769, epoch=23, loss=0.796]

epoch:23, idx:2599/10845, loss:0.7962718593271879, acc:0.7693269230769231


 25%|██▍       | 2700/10845 [24:04<1:24:10,  1.61it/s, acc=0.769, epoch=23, loss=0.798]

epoch:23, idx:2699/10845, loss:0.7975866334416248, acc:0.7693518518518518


 26%|██▌       | 2800/10845 [24:58<1:24:03,  1.60it/s, acc=0.769, epoch=23, loss=0.8]  

epoch:23, idx:2799/10845, loss:0.8001727388905627, acc:0.7690178571428572


 27%|██▋       | 2900/10845 [25:51<59:00,  2.24it/s, acc=0.769, epoch=23, loss=0.802]  

epoch:23, idx:2899/10845, loss:0.80167750015341, acc:0.7687068965517241


 28%|██▊       | 3000/10845 [26:44<56:05,  2.33it/s, acc=0.769, epoch=23, loss=0.803]  

epoch:23, idx:2999/10845, loss:0.8030385928750038, acc:0.7688333333333334


 29%|██▊       | 3100/10845 [27:33<1:11:31,  1.80it/s, acc=0.767, epoch=23, loss=0.809]

epoch:23, idx:3099/10845, loss:0.8088838804537251, acc:0.7674193548387097


 30%|██▉       | 3200/10845 [28:24<1:08:53,  1.85it/s, acc=0.768, epoch=23, loss=0.806]

epoch:23, idx:3199/10845, loss:0.8056033430248499, acc:0.76796875


 30%|███       | 3301/10845 [29:16<49:51,  2.52it/s, acc=0.768, epoch=23, loss=0.806]  

epoch:23, idx:3299/10845, loss:0.8049520041725853, acc:0.7678030303030303


 31%|███▏      | 3400/10845 [30:14<1:09:28,  1.79it/s, acc=0.768, epoch=23, loss=0.804]

epoch:23, idx:3399/10845, loss:0.8039608556382797, acc:0.7677941176470588


 32%|███▏      | 3500/10845 [31:10<1:02:27,  1.96it/s, acc=0.767, epoch=23, loss=0.804]

epoch:23, idx:3499/10845, loss:0.8039629050152642, acc:0.7672857142857142


 33%|███▎      | 3600/10845 [32:02<57:00,  2.12it/s, acc=0.768, epoch=23, loss=0.804]  

epoch:23, idx:3599/10845, loss:0.8044533286492029, acc:0.7680555555555556


 34%|███▍      | 3700/10845 [32:55<1:01:42,  1.93it/s, acc=0.768, epoch=23, loss=0.803]

epoch:23, idx:3699/10845, loss:0.8034021773692724, acc:0.7677027027027027


 35%|███▌      | 3800/10845 [33:49<52:56,  2.22it/s, acc=0.767, epoch=23, loss=0.803]  

epoch:23, idx:3799/10845, loss:0.8029152520549925, acc:0.7674342105263158


 36%|███▌      | 3900/10845 [34:46<1:09:12,  1.67it/s, acc=0.767, epoch=23, loss=0.805]

epoch:23, idx:3899/10845, loss:0.8049839000518505, acc:0.7669871794871795


 37%|███▋      | 4000/10845 [35:40<1:04:23,  1.77it/s, acc=0.766, epoch=23, loss=0.805]

epoch:23, idx:3999/10845, loss:0.8054718461632728, acc:0.7661875


 38%|███▊      | 4100/10845 [36:33<1:05:37,  1.71it/s, acc=0.766, epoch=23, loss=0.806]

epoch:23, idx:4099/10845, loss:0.8055562319842781, acc:0.766280487804878


 39%|███▊      | 4200/10845 [37:28<56:39,  1.95it/s, acc=0.767, epoch=23, loss=0.804]  

epoch:23, idx:4199/10845, loss:0.8037624392622993, acc:0.7667261904761905


 40%|███▉      | 4301/10845 [38:27<49:06,  2.22it/s, acc=0.766, epoch=23, loss=0.804]  

epoch:23, idx:4299/10845, loss:0.8040970936902734, acc:0.7662209302325581


 41%|████      | 4400/10845 [39:21<55:18,  1.94it/s, acc=0.766, epoch=23, loss=0.806]  

epoch:23, idx:4399/10845, loss:0.8064366699077866, acc:0.7657386363636364


 41%|████▏     | 4500/10845 [40:16<55:53,  1.89it/s, acc=0.765, epoch=23, loss=0.809]  

epoch:23, idx:4499/10845, loss:0.8091350290113025, acc:0.7650555555555556


 42%|████▏     | 4600/10845 [41:10<1:03:55,  1.63it/s, acc=0.765, epoch=23, loss=0.81] 

epoch:23, idx:4599/10845, loss:0.809654309036939, acc:0.7652173913043478


 43%|████▎     | 4700/10845 [42:03<42:32,  2.41it/s, acc=0.765, epoch=23, loss=0.81]   

epoch:23, idx:4699/10845, loss:0.8102158802747726, acc:0.7651063829787234


 44%|████▍     | 4800/10845 [42:58<1:04:56,  1.55it/s, acc=0.765, epoch=23, loss=0.813]

epoch:23, idx:4799/10845, loss:0.8129073360313972, acc:0.7645833333333333


 45%|████▌     | 4900/10845 [43:50<42:06,  2.35it/s, acc=0.765, epoch=23, loss=0.811]  

epoch:23, idx:4899/10845, loss:0.811117150625404, acc:0.7653061224489796


 46%|████▌     | 5000/10845 [44:41<52:39,  1.85it/s, acc=0.765, epoch=23, loss=0.812]  

epoch:23, idx:4999/10845, loss:0.812058996462822, acc:0.76485


 47%|████▋     | 5100/10845 [45:32<55:54,  1.71it/s, acc=0.765, epoch=23, loss=0.81]   

epoch:23, idx:5099/10845, loss:0.8102971559879827, acc:0.7650490196078431


 48%|████▊     | 5200/10845 [46:30<58:09,  1.62it/s, acc=0.765, epoch=23, loss=0.812]  

epoch:23, idx:5199/10845, loss:0.8124269034885443, acc:0.7645673076923077


 49%|████▉     | 5300/10845 [47:18<47:53,  1.93it/s, acc=0.764, epoch=23, loss=0.814]  

epoch:23, idx:5299/10845, loss:0.8141412389728259, acc:0.7641981132075472


 50%|████▉     | 5400/10845 [48:13<45:54,  1.98it/s, acc=0.765, epoch=23, loss=0.814]  

epoch:23, idx:5399/10845, loss:0.8139994810024898, acc:0.765


 51%|█████     | 5500/10845 [49:12<39:36,  2.25it/s, acc=0.765, epoch=23, loss=0.813]  

epoch:23, idx:5499/10845, loss:0.8133797675587914, acc:0.7646818181818181


 52%|█████▏    | 5600/10845 [50:06<48:35,  1.80it/s, acc=0.764, epoch=23, loss=0.815]  

epoch:23, idx:5599/10845, loss:0.8152972550690174, acc:0.7640178571428572


 53%|█████▎    | 5700/10845 [50:58<34:33,  2.48it/s, acc=0.764, epoch=23, loss=0.816]  

epoch:23, idx:5699/10845, loss:0.8157945622268477, acc:0.7637719298245614


 53%|█████▎    | 5800/10845 [51:59<58:49,  1.43it/s, acc=0.764, epoch=23, loss=0.816]  

epoch:23, idx:5799/10845, loss:0.8158936181664467, acc:0.7635775862068965


 54%|█████▍    | 5900/10845 [52:55<49:15,  1.67it/s, acc=0.763, epoch=23, loss=0.816]  

epoch:23, idx:5899/10845, loss:0.8164292714050261, acc:0.7633898305084745


 55%|█████▌    | 6000/10845 [53:50<35:20,  2.28it/s, acc=0.763, epoch=23, loss=0.816]  

epoch:23, idx:5999/10845, loss:0.8157895643313726, acc:0.7634583333333333


 56%|█████▌    | 6100/10845 [54:45<42:39,  1.85it/s, acc=0.763, epoch=23, loss=0.816]  

epoch:23, idx:6099/10845, loss:0.8162634925666402, acc:0.763483606557377


 57%|█████▋    | 6200/10845 [55:39<38:02,  2.03it/s, acc=0.763, epoch=23, loss=0.816]  

epoch:23, idx:6199/10845, loss:0.8161521842114386, acc:0.7633064516129032


 58%|█████▊    | 6300/10845 [56:32<38:10,  1.98it/s, acc=0.763, epoch=23, loss=0.817]

epoch:23, idx:6299/10845, loss:0.817257568978128, acc:0.7630555555555556


 59%|█████▉    | 6400/10845 [57:28<38:39,  1.92it/s, acc=0.763, epoch=23, loss=0.818]  

epoch:23, idx:6399/10845, loss:0.8177034525386989, acc:0.76328125


 60%|█████▉    | 6500/10845 [58:20<29:16,  2.47it/s, acc=0.763, epoch=23, loss=0.82] 

epoch:23, idx:6499/10845, loss:0.8195340221019891, acc:0.7629615384615385


 61%|██████    | 6600/10845 [59:09<36:12,  1.95it/s, acc=0.763, epoch=23, loss=0.82] 

epoch:23, idx:6599/10845, loss:0.8203853337602182, acc:0.7629166666666667


 62%|██████▏   | 6700/10845 [1:00:06<26:23,  2.62it/s, acc=0.763, epoch=23, loss=0.82]

epoch:23, idx:6699/10845, loss:0.8195461010843961, acc:0.7630597014925373


 63%|██████▎   | 6800/10845 [1:01:04<38:16,  1.76it/s, acc=0.763, epoch=23, loss=0.818]  

epoch:23, idx:6799/10845, loss:0.8182665825503714, acc:0.7633455882352941


 64%|██████▎   | 6900/10845 [1:02:00<35:34,  1.85it/s, acc=0.763, epoch=23, loss=0.818]  

epoch:23, idx:6899/10845, loss:0.8181832851361537, acc:0.7630434782608696


 65%|██████▍   | 7000/10845 [1:02:56<28:10,  2.27it/s, acc=0.763, epoch=23, loss=0.818]

epoch:23, idx:6999/10845, loss:0.8179113535114697, acc:0.7632857142857142


 65%|██████▌   | 7100/10845 [1:03:54<41:27,  1.51it/s, acc=0.763, epoch=23, loss=0.818]

epoch:23, idx:7099/10845, loss:0.8184235768083116, acc:0.7627464788732394


 66%|██████▋   | 7200/10845 [1:04:52<25:50,  2.35it/s, acc=0.763, epoch=23, loss=0.819]

epoch:23, idx:7199/10845, loss:0.8186688345836268, acc:0.7627083333333333


 67%|██████▋   | 7300/10845 [1:05:48<31:17,  1.89it/s, acc=0.762, epoch=23, loss=0.82] 

epoch:23, idx:7299/10845, loss:0.8203554073016938, acc:0.762431506849315


 68%|██████▊   | 7400/10845 [1:06:42<28:36,  2.01it/s, acc=0.762, epoch=23, loss=0.822]

epoch:23, idx:7399/10845, loss:0.8217652470034522, acc:0.7620270270270271


 69%|██████▉   | 7500/10845 [1:07:37<35:15,  1.58it/s, acc=0.762, epoch=23, loss=0.824]

epoch:23, idx:7499/10845, loss:0.8238398435751597, acc:0.7617333333333334


 70%|███████   | 7600/10845 [1:08:31<31:19,  1.73it/s, acc=0.761, epoch=23, loss=0.827]

epoch:23, idx:7599/10845, loss:0.8265015916526317, acc:0.760953947368421


 71%|███████   | 7700/10845 [1:09:27<27:15,  1.92it/s, acc=0.761, epoch=23, loss=0.827]

epoch:23, idx:7699/10845, loss:0.8273449776234565, acc:0.7606493506493507


 72%|███████▏  | 7800/10845 [1:10:20<25:49,  1.96it/s, acc=0.761, epoch=23, loss=0.827]

epoch:23, idx:7799/10845, loss:0.8270071597817616, acc:0.7608653846153847


 73%|███████▎  | 7900/10845 [1:11:10<19:24,  2.53it/s, acc=0.761, epoch=23, loss=0.827]

epoch:23, idx:7899/10845, loss:0.8266915162228331, acc:0.7608227848101266


 74%|███████▍  | 8000/10845 [1:12:05<23:29,  2.02it/s, acc=0.761, epoch=23, loss=0.826]

epoch:23, idx:7999/10845, loss:0.8262874425798654, acc:0.76065625


 75%|███████▍  | 8100/10845 [1:12:59<20:25,  2.24it/s, acc=0.761, epoch=23, loss=0.825]

epoch:23, idx:8099/10845, loss:0.8249839095568952, acc:0.7608641975308642


 76%|███████▌  | 8200/10845 [1:13:55<29:13,  1.51it/s, acc=0.76, epoch=23, loss=0.827] 

epoch:23, idx:8199/10845, loss:0.8273729192919848, acc:0.7604268292682926


 77%|███████▋  | 8300/10845 [1:14:51<23:12,  1.83it/s, acc=0.76, epoch=23, loss=0.829] 

epoch:23, idx:8299/10845, loss:0.8289389783287623, acc:0.7603614457831326


 77%|███████▋  | 8400/10845 [1:15:40<12:36,  3.23it/s, acc=0.761, epoch=23, loss=0.828]

epoch:23, idx:8399/10845, loss:0.8282571211741084, acc:0.760625


 78%|███████▊  | 8500/10845 [1:16:35<18:21,  2.13it/s, acc=0.761, epoch=23, loss=0.828]

epoch:23, idx:8499/10845, loss:0.828049395487589, acc:0.7605882352941177


 79%|███████▉  | 8600/10845 [1:17:24<14:14,  2.63it/s, acc=0.76, epoch=23, loss=0.828] 

epoch:23, idx:8599/10845, loss:0.8283296920913596, acc:0.7604651162790698


 80%|████████  | 8700/10845 [1:18:16<16:46,  2.13it/s, acc=0.76, epoch=23, loss=0.828]

epoch:23, idx:8699/10845, loss:0.827725647595422, acc:0.7603735632183908


 81%|████████  | 8800/10845 [1:19:15<21:44,  1.57it/s, acc=0.76, epoch=23, loss=0.829]

epoch:23, idx:8799/10845, loss:0.829005752466619, acc:0.7602272727272728


 82%|████████▏ | 8900/10845 [1:20:10<19:31,  1.66it/s, acc=0.761, epoch=23, loss=0.828]

epoch:23, idx:8899/10845, loss:0.8276490379685766, acc:0.7605337078651685


 83%|████████▎ | 9000/10845 [1:21:01<14:47,  2.08it/s, acc=0.76, epoch=23, loss=0.829] 

epoch:23, idx:8999/10845, loss:0.8292879414988888, acc:0.7600555555555556


 84%|████████▍ | 9100/10845 [1:21:58<13:00,  2.24it/s, acc=0.76, epoch=23, loss=0.83] 

epoch:23, idx:9099/10845, loss:0.8299843055075341, acc:0.7597527472527472


 85%|████████▍ | 9200/10845 [1:22:49<12:06,  2.26it/s, acc=0.76, epoch=23, loss=0.83] 

epoch:23, idx:9199/10845, loss:0.8300402561737144, acc:0.7597282608695652


 86%|████████▌ | 9300/10845 [1:23:47<18:42,  1.38it/s, acc=0.76, epoch=23, loss=0.832]

epoch:23, idx:9299/10845, loss:0.8315620788899801, acc:0.7595161290322581


 87%|████████▋ | 9400/10845 [1:24:44<14:59,  1.61it/s, acc=0.759, epoch=23, loss=0.832]

epoch:23, idx:9399/10845, loss:0.8315890079990347, acc:0.7592553191489362


 88%|████████▊ | 9500/10845 [1:25:37<09:48,  2.29it/s, acc=0.759, epoch=23, loss=0.833]

epoch:23, idx:9499/10845, loss:0.8326307843170668, acc:0.7591315789473684


 89%|████████▊ | 9600/10845 [1:26:32<09:52,  2.10it/s, acc=0.759, epoch=23, loss=0.833]

epoch:23, idx:9599/10845, loss:0.8330305765445034, acc:0.75875


 89%|████████▉ | 9700/10845 [1:27:26<06:47,  2.81it/s, acc=0.758, epoch=23, loss=0.835]

epoch:23, idx:9699/10845, loss:0.834545466863003, acc:0.7584536082474227


 90%|█████████ | 9800/10845 [1:28:24<12:53,  1.35it/s, acc=0.758, epoch=23, loss=0.837]

epoch:23, idx:9799/10845, loss:0.8367118162585765, acc:0.7578061224489796


 91%|█████████▏| 9900/10845 [1:29:23<06:55,  2.27it/s, acc=0.757, epoch=23, loss=0.838]

epoch:23, idx:9899/10845, loss:0.8375008107556237, acc:0.7574242424242424


 92%|█████████▏| 10000/10845 [1:30:17<09:40,  1.45it/s, acc=0.757, epoch=23, loss=0.838]

epoch:23, idx:9999/10845, loss:0.8384316014230251, acc:0.75705


 93%|█████████▎| 10100/10845 [1:31:12<06:36,  1.88it/s, acc=0.757, epoch=23, loss=0.839]

epoch:23, idx:10099/10845, loss:0.8389335307154325, acc:0.7569554455445544


 94%|█████████▍| 10200/10845 [1:32:06<05:34,  1.93it/s, acc=0.757, epoch=23, loss=0.839]

epoch:23, idx:10199/10845, loss:0.8394734699819603, acc:0.7567647058823529


 95%|█████████▍| 10300/10845 [1:32:56<04:30,  2.02it/s, acc=0.757, epoch=23, loss=0.84] 

epoch:23, idx:10299/10845, loss:0.8398234062808231, acc:0.7565533980582524


 96%|█████████▌| 10400/10845 [1:33:52<03:46,  1.97it/s, acc=0.757, epoch=23, loss=0.839]

epoch:23, idx:10399/10845, loss:0.8390279766802604, acc:0.7566586538461538


 97%|█████████▋| 10500/10845 [1:34:42<03:09,  1.82it/s, acc=0.757, epoch=23, loss=0.838]

epoch:23, idx:10499/10845, loss:0.8382666802633376, acc:0.7566190476190476


 98%|█████████▊| 10600/10845 [1:35:36<02:29,  1.64it/s, acc=0.756, epoch=23, loss=0.84] 

epoch:23, idx:10599/10845, loss:0.8396036758062975, acc:0.7563207547169811


 99%|█████████▊| 10700/10845 [1:36:32<01:11,  2.04it/s, acc=0.756, epoch=23, loss=0.839]

epoch:23, idx:10699/10845, loss:0.8393988920149402, acc:0.7562383177570093


100%|█████████▉| 10800/10845 [1:37:25<00:24,  1.80it/s, acc=0.756, epoch=23, loss=0.839]

epoch:23, idx:10799/10845, loss:0.8385688169300556, acc:0.7563888888888889


100%|██████████| 10845/10845 [1:37:47<00:00,  3.05it/s, acc=0.756, epoch=23, loss=0.839]


epoch:23, idx:0/1275, loss:0.9500192403793335, acc:0.5
epoch:23, idx:100/1275, loss:1.2398664372982364, acc:0.6336633663366337
epoch:23, idx:200/1275, loss:1.1705800772306338, acc:0.6393034825870647
epoch:23, idx:300/1275, loss:1.1823174612862724, acc:0.6403654485049833
epoch:23, idx:400/1275, loss:1.157224602681443, acc:0.6433915211970075
epoch:23, idx:500/1275, loss:1.1540376780156842, acc:0.6397205588822356
epoch:23, idx:600/1275, loss:1.1581220570796738, acc:0.6364392678868552
epoch:23, idx:700/1275, loss:1.1750721140271756, acc:0.6362339514978602
epoch:23, idx:800/1275, loss:1.1800166612930512, acc:0.6379525593008739
epoch:23, idx:900/1275, loss:1.1680634335659188, acc:0.6398446170921198
epoch:23, idx:1000/1275, loss:1.1663124861119392, acc:0.6411088911088911
epoch:23, idx:1100/1275, loss:1.1560027412291984, acc:0.6441871026339692
epoch:23, idx:1200/1275, loss:1.157236151279359, acc:0.6436303080766028


  1%|          | 100/10845 [00:51<1:30:34,  1.98it/s, acc=0.748, epoch=24, loss=0.924]

epoch:24, idx:99/10845, loss:0.9236437273025513, acc:0.7475


  2%|▏         | 200/10845 [01:41<1:48:07,  1.64it/s, acc=0.771, epoch=24, loss=0.832]

epoch:24, idx:199/10845, loss:0.8316575375199318, acc:0.77125


  3%|▎         | 300/10845 [02:33<1:35:12,  1.85it/s, acc=0.765, epoch=24, loss=0.846]

epoch:24, idx:299/10845, loss:0.8464468534787496, acc:0.765


  4%|▎         | 400/10845 [03:28<1:21:40,  2.13it/s, acc=0.764, epoch=24, loss=0.832]

epoch:24, idx:399/10845, loss:0.8317272515594959, acc:0.76375


  5%|▍         | 500/10845 [04:19<1:05:35,  2.63it/s, acc=0.761, epoch=24, loss=0.845]

epoch:24, idx:499/10845, loss:0.8445894153118133, acc:0.761


  6%|▌         | 600/10845 [05:15<1:51:46,  1.53it/s, acc=0.762, epoch=24, loss=0.838]

epoch:24, idx:599/10845, loss:0.8377551316221555, acc:0.7625


  6%|▋         | 700/10845 [06:06<1:05:49,  2.57it/s, acc=0.765, epoch=24, loss=0.825]

epoch:24, idx:699/10845, loss:0.8253289517334529, acc:0.7653571428571428


  7%|▋         | 800/10845 [07:03<1:50:47,  1.51it/s, acc=0.766, epoch=24, loss=0.81] 

epoch:24, idx:799/10845, loss:0.8095707372575999, acc:0.76625


  8%|▊         | 900/10845 [07:56<1:17:55,  2.13it/s, acc=0.761, epoch=24, loss=0.826]

epoch:24, idx:899/10845, loss:0.8255505157179303, acc:0.7608333333333334


  9%|▉         | 1000/10845 [08:53<1:20:47,  2.03it/s, acc=0.761, epoch=24, loss=0.827]

epoch:24, idx:999/10845, loss:0.827269559442997, acc:0.761


 10%|█         | 1100/10845 [09:45<1:45:37,  1.54it/s, acc=0.758, epoch=24, loss=0.834]

epoch:24, idx:1099/10845, loss:0.833585532524369, acc:0.7579545454545454


 11%|█         | 1200/10845 [10:39<1:19:04,  2.03it/s, acc=0.762, epoch=24, loss=0.823]

epoch:24, idx:1199/10845, loss:0.8231841303408146, acc:0.7620833333333333


 12%|█▏        | 1300/10845 [11:31<1:41:15,  1.57it/s, acc=0.763, epoch=24, loss=0.821]

epoch:24, idx:1299/10845, loss:0.8209722959078275, acc:0.7628846153846154


 13%|█▎        | 1400/10845 [12:24<1:13:07,  2.15it/s, acc=0.762, epoch=24, loss=0.826]

epoch:24, idx:1399/10845, loss:0.8255918402331216, acc:0.7617857142857143


 14%|█▍        | 1500/10845 [13:17<1:25:43,  1.82it/s, acc=0.762, epoch=24, loss=0.823]

epoch:24, idx:1499/10845, loss:0.8228990406592687, acc:0.7623333333333333


 15%|█▍        | 1600/10845 [14:13<1:15:48,  2.03it/s, acc=0.759, epoch=24, loss=0.83] 

epoch:24, idx:1599/10845, loss:0.8301716883853077, acc:0.759375


 16%|█▌        | 1700/10845 [15:08<58:11,  2.62it/s, acc=0.761, epoch=24, loss=0.822]  

epoch:24, idx:1699/10845, loss:0.8217628694281859, acc:0.7610294117647058


 17%|█▋        | 1800/10845 [15:59<1:36:23,  1.56it/s, acc=0.762, epoch=24, loss=0.816]

epoch:24, idx:1799/10845, loss:0.8163019508785672, acc:0.7623611111111112


 18%|█▊        | 1900/10845 [16:54<1:03:17,  2.36it/s, acc=0.763, epoch=24, loss=0.815]

epoch:24, idx:1899/10845, loss:0.8154456580940046, acc:0.7626315789473684


 18%|█▊        | 2000/10845 [17:45<1:09:49,  2.11it/s, acc=0.763, epoch=24, loss=0.819]

epoch:24, idx:1999/10845, loss:0.8185395876765251, acc:0.763


 19%|█▉        | 2100/10845 [18:43<1:33:30,  1.56it/s, acc=0.763, epoch=24, loss=0.817]

epoch:24, idx:2099/10845, loss:0.8171259270395551, acc:0.763095238095238


 20%|██        | 2200/10845 [19:36<1:37:22,  1.48it/s, acc=0.764, epoch=24, loss=0.81] 

epoch:24, idx:2199/10845, loss:0.8101026808131825, acc:0.7638636363636364


 21%|██        | 2300/10845 [20:29<1:04:58,  2.19it/s, acc=0.764, epoch=24, loss=0.814]

epoch:24, idx:2299/10845, loss:0.8138848179060479, acc:0.7635869565217391


 22%|██▏       | 2400/10845 [21:25<1:36:40,  1.46it/s, acc=0.763, epoch=24, loss=0.809]

epoch:24, idx:2399/10845, loss:0.8093553257733583, acc:0.763125


 23%|██▎       | 2500/10845 [22:19<1:17:57,  1.78it/s, acc=0.762, epoch=24, loss=0.813]

epoch:24, idx:2499/10845, loss:0.81291724152565, acc:0.7617


 24%|██▍       | 2600/10845 [23:12<1:08:11,  2.02it/s, acc=0.762, epoch=24, loss=0.818]

epoch:24, idx:2599/10845, loss:0.8178728235914157, acc:0.7616346153846154


 25%|██▍       | 2700/10845 [24:06<1:19:10,  1.71it/s, acc=0.761, epoch=24, loss=0.816]

epoch:24, idx:2699/10845, loss:0.8157411492091639, acc:0.7609259259259259


 26%|██▌       | 2800/10845 [25:02<1:15:21,  1.78it/s, acc=0.761, epoch=24, loss=0.817]

epoch:24, idx:2799/10845, loss:0.8168440840286868, acc:0.76125


 27%|██▋       | 2900/10845 [25:55<1:20:54,  1.64it/s, acc=0.761, epoch=24, loss=0.818]

epoch:24, idx:2899/10845, loss:0.8180676383396674, acc:0.7612931034482758


 28%|██▊       | 3000/10845 [26:45<45:37,  2.87it/s, acc=0.76, epoch=24, loss=0.822]   

epoch:24, idx:2999/10845, loss:0.8219167489409447, acc:0.7601666666666667


 29%|██▊       | 3100/10845 [27:40<1:30:27,  1.43it/s, acc=0.76, epoch=24, loss=0.82]  

epoch:24, idx:3099/10845, loss:0.8198493426076827, acc:0.7604032258064516


 30%|██▉       | 3200/10845 [28:39<1:12:55,  1.75it/s, acc=0.76, epoch=24, loss=0.819] 

epoch:24, idx:3199/10845, loss:0.819176169410348, acc:0.759765625


 30%|███       | 3300/10845 [29:34<45:30,  2.76it/s, acc=0.76, epoch=24, loss=0.818]   

epoch:24, idx:3299/10845, loss:0.8181374771667249, acc:0.7602272727272728


 31%|███▏      | 3400/10845 [30:31<56:57,  2.18it/s, acc=0.761, epoch=24, loss=0.817]  

epoch:24, idx:3399/10845, loss:0.8174415969498017, acc:0.7605147058823529


 32%|███▏      | 3500/10845 [31:26<1:08:16,  1.79it/s, acc=0.761, epoch=24, loss=0.815]

epoch:24, idx:3499/10845, loss:0.8148233541250229, acc:0.761


 33%|███▎      | 3600/10845 [32:16<47:19,  2.55it/s, acc=0.76, epoch=24, loss=0.818]   

epoch:24, idx:3599/10845, loss:0.817925403068463, acc:0.76


 34%|███▍      | 3700/10845 [33:06<38:38,  3.08it/s, acc=0.76, epoch=24, loss=0.819]   

epoch:24, idx:3699/10845, loss:0.8192087282683398, acc:0.759527027027027


 35%|███▌      | 3800/10845 [33:59<1:07:44,  1.73it/s, acc=0.76, epoch=24, loss=0.818] 

epoch:24, idx:3799/10845, loss:0.8184108023737606, acc:0.7598026315789473


 36%|███▌      | 3900/10845 [34:58<1:08:48,  1.68it/s, acc=0.76, epoch=24, loss=0.817] 

epoch:24, idx:3899/10845, loss:0.8171349091254748, acc:0.7598076923076923


 37%|███▋      | 4000/10845 [35:50<57:09,  2.00it/s, acc=0.76, epoch=24, loss=0.818]  

epoch:24, idx:3999/10845, loss:0.8175119633972645, acc:0.7595625


 38%|███▊      | 4100/10845 [36:42<1:11:56,  1.56it/s, acc=0.76, epoch=24, loss=0.817] 

epoch:24, idx:4099/10845, loss:0.8172049053994621, acc:0.760060975609756


 39%|███▊      | 4200/10845 [37:34<48:18,  2.29it/s, acc=0.76, epoch=24, loss=0.819]  

epoch:24, idx:4199/10845, loss:0.8190491421307836, acc:0.7603571428571428


 40%|███▉      | 4301/10845 [38:25<40:36,  2.69it/s, acc=0.761, epoch=24, loss=0.818]  

epoch:24, idx:4299/10845, loss:0.8182470458884572, acc:0.7605813953488372


 41%|████      | 4400/10845 [39:19<56:20,  1.91it/s, acc=0.761, epoch=24, loss=0.819]  

epoch:24, idx:4399/10845, loss:0.8187877614118836, acc:0.7610227272727272


 41%|████▏     | 4500/10845 [40:13<45:39,  2.32it/s, acc=0.761, epoch=24, loss=0.818]  

epoch:24, idx:4499/10845, loss:0.8182491015328301, acc:0.7611111111111111


 42%|████▏     | 4600/10845 [41:03<57:22,  1.81it/s, acc=0.761, epoch=24, loss=0.817]  

epoch:24, idx:4599/10845, loss:0.8169135391453038, acc:0.7614130434782609


 43%|████▎     | 4700/10845 [41:57<53:32,  1.91it/s, acc=0.761, epoch=24, loss=0.817]  

epoch:24, idx:4699/10845, loss:0.8171737583520564, acc:0.7612234042553192


 44%|████▍     | 4800/10845 [42:49<55:22,  1.82it/s, acc=0.761, epoch=24, loss=0.817]  

epoch:24, idx:4799/10845, loss:0.816585884814461, acc:0.7613541666666667


 45%|████▌     | 4900/10845 [43:41<51:05,  1.94it/s, acc=0.762, epoch=24, loss=0.818]  

epoch:24, idx:4899/10845, loss:0.8182588866292214, acc:0.7616836734693877


 46%|████▌     | 5001/10845 [44:36<39:44,  2.45it/s, acc=0.762, epoch=24, loss=0.818]  

epoch:24, idx:4999/10845, loss:0.817682884979248, acc:0.7616


 47%|████▋     | 5100/10845 [45:27<51:26,  1.86it/s, acc=0.761, epoch=24, loss=0.819]  

epoch:24, idx:5099/10845, loss:0.818683860208474, acc:0.7613235294117647


 48%|████▊     | 5200/10845 [46:27<54:39,  1.72it/s, acc=0.761, epoch=24, loss=0.821]  

epoch:24, idx:5199/10845, loss:0.8207595972602184, acc:0.7609134615384615


 49%|████▉     | 5300/10845 [47:20<47:12,  1.96it/s, acc=0.76, epoch=24, loss=0.822]   

epoch:24, idx:5299/10845, loss:0.8215465929598178, acc:0.7602830188679245


 50%|████▉     | 5400/10845 [48:14<56:40,  1.60it/s, acc=0.76, epoch=24, loss=0.824]  

epoch:24, idx:5399/10845, loss:0.823681805950624, acc:0.7597222222222222


 51%|█████     | 5500/10845 [49:06<45:03,  1.98it/s, acc=0.76, epoch=24, loss=0.822]  

epoch:24, idx:5499/10845, loss:0.8222379860877991, acc:0.7599545454545454


 52%|█████▏    | 5600/10845 [50:00<53:53,  1.62it/s, acc=0.76, epoch=24, loss=0.823]   

epoch:24, idx:5599/10845, loss:0.8227186067295926, acc:0.7596875


 53%|█████▎    | 5700/10845 [50:51<46:20,  1.85it/s, acc=0.759, epoch=24, loss=0.824]  

epoch:24, idx:5699/10845, loss:0.8237972524866723, acc:0.7593421052631579


 53%|█████▎    | 5800/10845 [51:43<50:51,  1.65it/s, acc=0.76, epoch=24, loss=0.823]   

epoch:24, idx:5799/10845, loss:0.8233693454954131, acc:0.7596551724137931


 54%|█████▍    | 5900/10845 [52:34<34:49,  2.37it/s, acc=0.76, epoch=24, loss=0.822]  

epoch:24, idx:5899/10845, loss:0.8223630979758197, acc:0.7603389830508475


 55%|█████▌    | 6000/10845 [53:28<53:56,  1.50it/s, acc=0.76, epoch=24, loss=0.821]   

epoch:24, idx:5999/10845, loss:0.821216157351931, acc:0.7604166666666666


 56%|█████▌    | 6100/10845 [54:19<47:52,  1.65it/s, acc=0.761, epoch=24, loss=0.82]

epoch:24, idx:6099/10845, loss:0.8202222483519648, acc:0.7605737704918033


 57%|█████▋    | 6200/10845 [55:15<40:46,  1.90it/s, acc=0.76, epoch=24, loss=0.823] 

epoch:24, idx:6199/10845, loss:0.8229384703838056, acc:0.760241935483871


 58%|█████▊    | 6300/10845 [56:06<39:37,  1.91it/s, acc=0.76, epoch=24, loss=0.822]

epoch:24, idx:6299/10845, loss:0.8221091527503634, acc:0.7600396825396826


 59%|█████▉    | 6400/10845 [57:02<36:13,  2.05it/s, acc=0.76, epoch=24, loss=0.823]

epoch:24, idx:6399/10845, loss:0.8229811598919332, acc:0.7598828125


 60%|█████▉    | 6500/10845 [57:55<31:39,  2.29it/s, acc=0.76, epoch=24, loss=0.824]

epoch:24, idx:6499/10845, loss:0.8236708836372082, acc:0.7600769230769231


 61%|██████    | 6600/10845 [58:53<41:53,  1.69it/s, acc=0.76, epoch=24, loss=0.822]  

epoch:24, idx:6599/10845, loss:0.8217949857043497, acc:0.7603030303030303


 62%|██████▏   | 6700/10845 [59:49<53:48,  1.28it/s, acc=0.76, epoch=24, loss=0.823]

epoch:24, idx:6699/10845, loss:0.8233790308148113, acc:0.7601492537313432


 63%|██████▎   | 6800/10845 [1:00:42<31:46,  2.12it/s, acc=0.761, epoch=24, loss=0.822]

epoch:24, idx:6799/10845, loss:0.8217086750181283, acc:0.7607352941176471


 64%|██████▎   | 6900/10845 [1:01:39<47:31,  1.38it/s, acc=0.761, epoch=24, loss=0.821]

epoch:24, idx:6899/10845, loss:0.821483120356781, acc:0.7607246376811594


 65%|██████▍   | 7000/10845 [1:02:36<32:27,  1.97it/s, acc=0.761, epoch=24, loss=0.821]

epoch:24, idx:6999/10845, loss:0.8208245812909943, acc:0.7605714285714286


 65%|██████▌   | 7100/10845 [1:03:30<31:03,  2.01it/s, acc=0.761, epoch=24, loss=0.82] 

epoch:24, idx:7099/10845, loss:0.8201203009150397, acc:0.7608098591549296


 66%|██████▋   | 7200/10845 [1:04:23<28:17,  2.15it/s, acc=0.761, epoch=24, loss=0.822] 

epoch:24, idx:7199/10845, loss:0.8215850548487571, acc:0.7606597222222222


 67%|██████▋   | 7300/10845 [1:05:15<25:53,  2.28it/s, acc=0.761, epoch=24, loss=0.821]

epoch:24, idx:7299/10845, loss:0.8205410763947931, acc:0.7611643835616438


 68%|██████▊   | 7400/10845 [1:06:09<36:41,  1.56it/s, acc=0.761, epoch=24, loss=0.821]

epoch:24, idx:7399/10845, loss:0.8214911782701273, acc:0.7609121621621622


 69%|██████▉   | 7500/10845 [1:07:06<25:39,  2.17it/s, acc=0.76, epoch=24, loss=0.821] 

epoch:24, idx:7499/10845, loss:0.8211464470187823, acc:0.7604666666666666


 70%|███████   | 7600/10845 [1:08:03<30:50,  1.75it/s, acc=0.761, epoch=24, loss=0.821]

epoch:24, idx:7599/10845, loss:0.8210001621238495, acc:0.7606578947368421


 71%|███████   | 7700/10845 [1:08:56<19:23,  2.70it/s, acc=0.761, epoch=24, loss=0.822]

epoch:24, idx:7699/10845, loss:0.8215483853453166, acc:0.7607467532467532


 72%|███████▏  | 7800/10845 [1:09:50<26:20,  1.93it/s, acc=0.761, epoch=24, loss=0.822]

epoch:24, idx:7799/10845, loss:0.8219780383851284, acc:0.7608333333333334


 73%|███████▎  | 7900/10845 [1:10:39<22:10,  2.21it/s, acc=0.761, epoch=24, loss=0.822]

epoch:24, idx:7899/10845, loss:0.8223276225864132, acc:0.7611392405063291


 74%|███████▍  | 8000/10845 [1:11:34<34:39,  1.37it/s, acc=0.761, epoch=24, loss=0.821]

epoch:24, idx:7999/10845, loss:0.82119571692124, acc:0.76140625


 75%|███████▍  | 8100/10845 [1:12:29<27:32,  1.66it/s, acc=0.762, epoch=24, loss=0.822]

epoch:24, idx:8099/10845, loss:0.8223061855155744, acc:0.7615432098765432


 76%|███████▌  | 8200/10845 [1:13:18<27:12,  1.62it/s, acc=0.762, epoch=24, loss=0.822]

epoch:24, idx:8199/10845, loss:0.8222325708844312, acc:0.7618292682926829


 77%|███████▋  | 8300/10845 [1:14:12<20:03,  2.12it/s, acc=0.762, epoch=24, loss=0.823]

epoch:24, idx:8299/10845, loss:0.8234228289881385, acc:0.7616265060240964


 77%|███████▋  | 8400/10845 [1:15:02<19:44,  2.06it/s, acc=0.762, epoch=24, loss=0.822]

epoch:24, idx:8399/10845, loss:0.8220453998162633, acc:0.7622023809523809


 78%|███████▊  | 8500/10845 [1:15:59<33:12,  1.18it/s, acc=0.762, epoch=24, loss=0.822]

epoch:24, idx:8499/10845, loss:0.8223921187765458, acc:0.7622941176470588


 79%|███████▉  | 8600/10845 [1:16:52<20:19,  1.84it/s, acc=0.763, epoch=24, loss=0.821]

epoch:24, idx:8599/10845, loss:0.8210181521814923, acc:0.7627616279069768


 80%|████████  | 8700/10845 [1:17:45<19:17,  1.85it/s, acc=0.762, epoch=24, loss=0.823]

epoch:24, idx:8699/10845, loss:0.8231189995318994, acc:0.7621551724137932


 81%|████████  | 8800/10845 [1:18:41<16:38,  2.05it/s, acc=0.762, epoch=24, loss=0.824]

epoch:24, idx:8799/10845, loss:0.8244327230311252, acc:0.7617613636363636


 82%|████████▏ | 8900/10845 [1:19:34<15:40,  2.07it/s, acc=0.762, epoch=24, loss=0.824]

epoch:24, idx:8899/10845, loss:0.8237706875834572, acc:0.7618539325842697


 83%|████████▎ | 9000/10845 [1:20:29<19:01,  1.62it/s, acc=0.761, epoch=24, loss=0.825]

epoch:24, idx:8999/10845, loss:0.8250607651538319, acc:0.7614444444444445


 84%|████████▍ | 9100/10845 [1:21:22<15:58,  1.82it/s, acc=0.761, epoch=24, loss=0.826]

epoch:24, idx:9099/10845, loss:0.8264611494213671, acc:0.7611538461538462


 85%|████████▍ | 9200/10845 [1:22:15<15:44,  1.74it/s, acc=0.761, epoch=24, loss=0.827]

epoch:24, idx:9199/10845, loss:0.8268027758727903, acc:0.7613043478260869


 86%|████████▌ | 9300/10845 [1:23:10<12:36,  2.04it/s, acc=0.761, epoch=24, loss=0.827]

epoch:24, idx:9299/10845, loss:0.8272872248195833, acc:0.7611290322580645


 87%|████████▋ | 9400/10845 [1:24:01<09:13,  2.61it/s, acc=0.761, epoch=24, loss=0.827]

epoch:24, idx:9399/10845, loss:0.826612439675534, acc:0.7611436170212766


 88%|████████▊ | 9500/10845 [1:25:01<13:57,  1.61it/s, acc=0.761, epoch=24, loss=0.827]

epoch:24, idx:9499/10845, loss:0.8266567156001141, acc:0.7611578947368421


 89%|████████▊ | 9600/10845 [1:25:50<11:05,  1.87it/s, acc=0.761, epoch=24, loss=0.826]

epoch:24, idx:9599/10845, loss:0.8263693670804302, acc:0.76140625


 89%|████████▉ | 9700/10845 [1:26:44<11:13,  1.70it/s, acc=0.761, epoch=24, loss=0.827]

epoch:24, idx:9699/10845, loss:0.8267572357267449, acc:0.7613659793814433


 90%|█████████ | 9800/10845 [1:27:33<07:15,  2.40it/s, acc=0.761, epoch=24, loss=0.827]

epoch:24, idx:9799/10845, loss:0.827017880000022, acc:0.7610714285714286


 91%|█████████▏| 9900/10845 [1:28:28<09:03,  1.74it/s, acc=0.761, epoch=24, loss=0.828]

epoch:24, idx:9899/10845, loss:0.8283642434983542, acc:0.7605555555555555


 92%|█████████▏| 10000/10845 [1:29:23<10:31,  1.34it/s, acc=0.761, epoch=24, loss=0.828]

epoch:24, idx:9999/10845, loss:0.8276875306457281, acc:0.760575


 93%|█████████▎| 10100/10845 [1:30:17<06:49,  1.82it/s, acc=0.76, epoch=24, loss=0.83]  

epoch:24, idx:10099/10845, loss:0.8297685386373265, acc:0.7601237623762376


 94%|█████████▍| 10200/10845 [1:31:12<04:32,  2.36it/s, acc=0.76, epoch=24, loss=0.831]

epoch:24, idx:10199/10845, loss:0.8305099505799658, acc:0.7599509803921569


 95%|█████████▍| 10301/10845 [1:32:04<03:29,  2.60it/s, acc=0.76, epoch=24, loss=0.831]

epoch:24, idx:10299/10845, loss:0.8307251103323641, acc:0.7597572815533981


 96%|█████████▌| 10400/10845 [1:33:01<03:58,  1.86it/s, acc=0.759, epoch=24, loss=0.832]

epoch:24, idx:10399/10845, loss:0.8315375079644414, acc:0.7593509615384615


 97%|█████████▋| 10500/10845 [1:33:55<02:32,  2.26it/s, acc=0.759, epoch=24, loss=0.831]

epoch:24, idx:10499/10845, loss:0.8307809810439746, acc:0.7594047619047619


 98%|█████████▊| 10600/10845 [1:34:51<02:12,  1.84it/s, acc=0.76, epoch=24, loss=0.83]  

epoch:24, idx:10599/10845, loss:0.8296581043640398, acc:0.7597641509433962


 99%|█████████▊| 10700/10845 [1:35:47<01:36,  1.51it/s, acc=0.76, epoch=24, loss=0.829]

epoch:24, idx:10699/10845, loss:0.8293244649734453, acc:0.7598130841121495


100%|█████████▉| 10800/10845 [1:36:38<00:18,  2.38it/s, acc=0.76, epoch=24, loss=0.83]  

epoch:24, idx:10799/10845, loss:0.8299086163800071, acc:0.7596527777777777


100%|██████████| 10845/10845 [1:37:03<00:00,  1.93it/s, acc=0.759, epoch=24, loss=0.831]


epoch:24, idx:0/1275, loss:1.1064412593841553, acc:0.5
epoch:24, idx:100/1275, loss:1.2159697932772118, acc:0.6410891089108911
epoch:24, idx:200/1275, loss:1.1558422742791437, acc:0.6368159203980099
epoch:24, idx:300/1275, loss:1.1615528963332953, acc:0.6411960132890365
epoch:24, idx:400/1275, loss:1.1418526471404364, acc:0.6458852867830424
epoch:24, idx:500/1275, loss:1.1327275276303053, acc:0.6482035928143712
epoch:24, idx:600/1275, loss:1.1355987785263189, acc:0.641846921797005
epoch:24, idx:700/1275, loss:1.146986534949206, acc:0.6398002853067047
epoch:24, idx:800/1275, loss:1.156043854396739, acc:0.6382646691635455
epoch:24, idx:900/1275, loss:1.1467803858228847, acc:0.6409544950055494
epoch:24, idx:1000/1275, loss:1.1426034635239906, acc:0.6438561438561439
epoch:24, idx:1100/1275, loss:1.135569311455095, acc:0.6444141689373297
epoch:24, idx:1200/1275, loss:1.1357382931975302, acc:0.6452955870108243


  1%|          | 100/10845 [00:57<1:40:59,  1.77it/s, acc=0.75, epoch=25, loss=0.793]

epoch:25, idx:99/10845, loss:0.7934859764575958, acc:0.75


  2%|▏         | 200/10845 [01:54<1:14:55,  2.37it/s, acc=0.77, epoch=25, loss=0.788] 

epoch:25, idx:199/10845, loss:0.7880495667457581, acc:0.77


  3%|▎         | 300/10845 [02:49<57:30,  3.06it/s, acc=0.775, epoch=25, loss=0.759]  

epoch:25, idx:299/10845, loss:0.7587792058785756, acc:0.775


  4%|▎         | 400/10845 [03:44<1:10:23,  2.47it/s, acc=0.761, epoch=25, loss=0.797]

epoch:25, idx:399/10845, loss:0.7972394862771034, acc:0.76125


  5%|▍         | 500/10845 [04:36<1:43:01,  1.67it/s, acc=0.756, epoch=25, loss=0.827]

epoch:25, idx:499/10845, loss:0.8270529203414917, acc:0.756


  6%|▌         | 600/10845 [05:30<1:28:24,  1.93it/s, acc=0.759, epoch=25, loss=0.815]

epoch:25, idx:599/10845, loss:0.8146889548500379, acc:0.7591666666666667


  6%|▋         | 700/10845 [06:23<1:13:37,  2.30it/s, acc=0.764, epoch=25, loss=0.793]

epoch:25, idx:699/10845, loss:0.7930380089793887, acc:0.7639285714285714


  7%|▋         | 800/10845 [07:19<1:38:10,  1.71it/s, acc=0.766, epoch=25, loss=0.789]

epoch:25, idx:799/10845, loss:0.7893949784338474, acc:0.7659375


  8%|▊         | 900/10845 [08:13<1:21:05,  2.04it/s, acc=0.768, epoch=25, loss=0.77] 

epoch:25, idx:899/10845, loss:0.7702208299107022, acc:0.7683333333333333


  9%|▉         | 1000/10845 [09:03<1:17:34,  2.12it/s, acc=0.769, epoch=25, loss=0.761]

epoch:25, idx:999/10845, loss:0.7612331632375717, acc:0.76925


 10%|█         | 1100/10845 [09:53<1:25:30,  1.90it/s, acc=0.771, epoch=25, loss=0.759]

epoch:25, idx:1099/10845, loss:0.7588700884580613, acc:0.7713636363636364


 11%|█         | 1200/10845 [10:46<1:33:54,  1.71it/s, acc=0.77, epoch=25, loss=0.768] 

epoch:25, idx:1199/10845, loss:0.7681027040878932, acc:0.7704166666666666


 12%|█▏        | 1300/10845 [11:40<1:39:22,  1.60it/s, acc=0.771, epoch=25, loss=0.761]

epoch:25, idx:1299/10845, loss:0.760799494935916, acc:0.7713461538461538


 13%|█▎        | 1400/10845 [12:28<1:16:52,  2.05it/s, acc=0.768, epoch=25, loss=0.768]

epoch:25, idx:1399/10845, loss:0.7682010959088802, acc:0.7683928571428571


 14%|█▍        | 1500/10845 [13:25<1:42:03,  1.53it/s, acc=0.768, epoch=25, loss=0.772]

epoch:25, idx:1499/10845, loss:0.7724743081529936, acc:0.7676666666666667


 15%|█▍        | 1600/10845 [14:20<1:44:32,  1.47it/s, acc=0.769, epoch=25, loss=0.769]

epoch:25, idx:1599/10845, loss:0.7693105345778167, acc:0.76859375


 16%|█▌        | 1700/10845 [15:14<1:12:51,  2.09it/s, acc=0.77, epoch=25, loss=0.765] 

epoch:25, idx:1699/10845, loss:0.7650783777236938, acc:0.7695588235294117


 17%|█▋        | 1800/10845 [16:06<1:29:27,  1.69it/s, acc=0.77, epoch=25, loss=0.773] 

epoch:25, idx:1799/10845, loss:0.7726977661583159, acc:0.7698611111111111


 18%|█▊        | 1900/10845 [17:04<1:23:02,  1.80it/s, acc=0.771, epoch=25, loss=0.77] 

epoch:25, idx:1899/10845, loss:0.7704659323002163, acc:0.7713157894736842


 18%|█▊        | 2000/10845 [17:56<1:53:21,  1.30it/s, acc=0.772, epoch=25, loss=0.77] 

epoch:25, idx:1999/10845, loss:0.769660604327917, acc:0.771625


 19%|█▉        | 2100/10845 [18:49<1:20:25,  1.81it/s, acc=0.769, epoch=25, loss=0.78] 

epoch:25, idx:2099/10845, loss:0.7795628798007965, acc:0.7691666666666667


 20%|██        | 2200/10845 [19:43<58:51,  2.45it/s, acc=0.768, epoch=25, loss=0.784]  

epoch:25, idx:2199/10845, loss:0.7842762950875543, acc:0.7680681818181818


 21%|██        | 2300/10845 [20:37<1:21:27,  1.75it/s, acc=0.766, epoch=25, loss=0.793]

epoch:25, idx:2299/10845, loss:0.7929257634411687, acc:0.7664130434782609


 22%|██▏       | 2400/10845 [21:30<1:20:43,  1.74it/s, acc=0.766, epoch=25, loss=0.794]

epoch:25, idx:2399/10845, loss:0.7943912412722905, acc:0.7659375


 23%|██▎       | 2500/10845 [22:21<1:08:15,  2.04it/s, acc=0.765, epoch=25, loss=0.798]

epoch:25, idx:2499/10845, loss:0.7982110855102539, acc:0.7652


 24%|██▍       | 2600/10845 [23:17<1:20:44,  1.70it/s, acc=0.766, epoch=25, loss=0.794]

epoch:25, idx:2599/10845, loss:0.7944446661839119, acc:0.76625


 25%|██▍       | 2700/10845 [24:12<1:14:20,  1.83it/s, acc=0.768, epoch=25, loss=0.792]

epoch:25, idx:2699/10845, loss:0.7919116935906587, acc:0.7675925925925926


 26%|██▌       | 2800/10845 [25:07<1:15:54,  1.77it/s, acc=0.767, epoch=25, loss=0.796]

epoch:25, idx:2799/10845, loss:0.7963688876799174, acc:0.7665178571428571


 27%|██▋       | 2900/10845 [26:03<59:05,  2.24it/s, acc=0.768, epoch=25, loss=0.795]  

epoch:25, idx:2899/10845, loss:0.7950779632864328, acc:0.7676724137931035


 28%|██▊       | 3000/10845 [26:57<51:28,  2.54it/s, acc=0.768, epoch=25, loss=0.795]  

epoch:25, idx:2999/10845, loss:0.7952847187916438, acc:0.7675833333333333


 29%|██▊       | 3100/10845 [27:51<1:16:51,  1.68it/s, acc=0.766, epoch=25, loss=0.798]

epoch:25, idx:3099/10845, loss:0.7983684083723253, acc:0.7663709677419355


 30%|██▉       | 3200/10845 [28:43<1:27:30,  1.46it/s, acc=0.766, epoch=25, loss=0.799]

epoch:25, idx:3199/10845, loss:0.7986656543053686, acc:0.7659375


 30%|███       | 3300/10845 [29:36<50:41,  2.48it/s, acc=0.766, epoch=25, loss=0.798]  

epoch:25, idx:3299/10845, loss:0.7984164456887679, acc:0.7662121212121212


 31%|███▏      | 3400/10845 [30:33<1:15:59,  1.63it/s, acc=0.766, epoch=25, loss=0.801]

epoch:25, idx:3399/10845, loss:0.8007682755414177, acc:0.7660294117647058


 32%|███▏      | 3500/10845 [31:28<1:06:50,  1.83it/s, acc=0.766, epoch=25, loss=0.801]

epoch:25, idx:3499/10845, loss:0.8009018774373191, acc:0.7662142857142857


 33%|███▎      | 3600/10845 [32:19<56:21,  2.14it/s, acc=0.766, epoch=25, loss=0.803]  

epoch:25, idx:3599/10845, loss:0.803288031866153, acc:0.7656944444444445


 34%|███▍      | 3700/10845 [33:14<1:17:12,  1.54it/s, acc=0.766, epoch=25, loss=0.804]

epoch:25, idx:3699/10845, loss:0.8039400065911783, acc:0.7658783783783784


 35%|███▌      | 3800/10845 [34:06<1:04:33,  1.82it/s, acc=0.766, epoch=25, loss=0.806]

epoch:25, idx:3799/10845, loss:0.8057397255928893, acc:0.7656578947368421


 36%|███▌      | 3900/10845 [35:00<1:25:45,  1.35it/s, acc=0.766, epoch=25, loss=0.804]

epoch:25, idx:3899/10845, loss:0.8039315310502664, acc:0.7657692307692308


 37%|███▋      | 4000/10845 [35:56<1:10:43,  1.61it/s, acc=0.765, epoch=25, loss=0.805]

epoch:25, idx:3999/10845, loss:0.8045369343161582, acc:0.7654375


 38%|███▊      | 4100/10845 [36:52<1:01:47,  1.82it/s, acc=0.766, epoch=25, loss=0.802]

epoch:25, idx:4099/10845, loss:0.8020429598994372, acc:0.7660365853658536


 39%|███▊      | 4200/10845 [37:44<1:03:40,  1.74it/s, acc=0.766, epoch=25, loss=0.801]

epoch:25, idx:4199/10845, loss:0.8009290503462155, acc:0.7664880952380952


 40%|███▉      | 4300/10845 [38:38<55:38,  1.96it/s, acc=0.767, epoch=25, loss=0.801]  

epoch:25, idx:4299/10845, loss:0.801036563765171, acc:0.766860465116279


 41%|████      | 4400/10845 [39:31<44:32,  2.41it/s, acc=0.766, epoch=25, loss=0.8]    

epoch:25, idx:4399/10845, loss:0.8002053274349733, acc:0.7664772727272727


 41%|████▏     | 4500/10845 [40:26<1:04:58,  1.63it/s, acc=0.767, epoch=25, loss=0.8]  

epoch:25, idx:4499/10845, loss:0.7995271764861213, acc:0.7668333333333334


 42%|████▏     | 4600/10845 [41:21<1:01:26,  1.69it/s, acc=0.768, epoch=25, loss=0.798]

epoch:25, idx:4599/10845, loss:0.7979002491546714, acc:0.7676086956521739


 43%|████▎     | 4700/10845 [42:17<36:33,  2.80it/s, acc=0.767, epoch=25, loss=0.8]    

epoch:25, idx:4699/10845, loss:0.800473507959792, acc:0.7670744680851064


 44%|████▍     | 4800/10845 [43:09<56:21,  1.79it/s, acc=0.767, epoch=25, loss=0.803]  

epoch:25, idx:4799/10845, loss:0.8029392264038324, acc:0.7671354166666666


 45%|████▌     | 4900/10845 [44:03<42:25,  2.34it/s, acc=0.767, epoch=25, loss=0.801]  

epoch:25, idx:4899/10845, loss:0.8011512020291114, acc:0.7672959183673469


 46%|████▌     | 5000/10845 [44:56<40:06,  2.43it/s, acc=0.767, epoch=25, loss=0.802]  

epoch:25, idx:4999/10845, loss:0.8021844663023948, acc:0.7673


 47%|████▋     | 5100/10845 [45:54<58:58,  1.62it/s, acc=0.767, epoch=25, loss=0.802]  

epoch:25, idx:5099/10845, loss:0.8021171669165293, acc:0.7672549019607843


 48%|████▊     | 5200/10845 [46:53<47:45,  1.97it/s, acc=0.767, epoch=25, loss=0.805]  

epoch:25, idx:5199/10845, loss:0.8046305206074165, acc:0.7667788461538462


 49%|████▉     | 5300/10845 [47:49<1:12:26,  1.28it/s, acc=0.767, epoch=25, loss=0.802]

epoch:25, idx:5299/10845, loss:0.8021192821185544, acc:0.767311320754717


 50%|████▉     | 5400/10845 [48:39<41:21,  2.19it/s, acc=0.768, epoch=25, loss=0.802]  

epoch:25, idx:5399/10845, loss:0.8016431513042361, acc:0.767824074074074


 51%|█████     | 5500/10845 [49:35<1:00:16,  1.48it/s, acc=0.768, epoch=25, loss=0.802]

epoch:25, idx:5499/10845, loss:0.8020368662151424, acc:0.7675909090909091


 52%|█████▏    | 5600/10845 [50:27<48:31,  1.80it/s, acc=0.768, epoch=25, loss=0.799]  

epoch:25, idx:5599/10845, loss:0.7989468275489552, acc:0.7682589285714285


 53%|█████▎    | 5700/10845 [51:20<34:45,  2.47it/s, acc=0.768, epoch=25, loss=0.799]  

epoch:25, idx:5699/10845, loss:0.7993033953037179, acc:0.7682894736842105


 53%|█████▎    | 5800/10845 [52:18<45:01,  1.87it/s, acc=0.768, epoch=25, loss=0.798]  

epoch:25, idx:5799/10845, loss:0.7983749684588662, acc:0.7683189655172413


 54%|█████▍    | 5900/10845 [53:14<48:49,  1.69it/s, acc=0.768, epoch=25, loss=0.801]  

epoch:25, idx:5899/10845, loss:0.8006365471674224, acc:0.7677966101694915


 55%|█████▌    | 6000/10845 [54:04<47:33,  1.70it/s, acc=0.768, epoch=25, loss=0.801]

epoch:25, idx:5999/10845, loss:0.8007982041041056, acc:0.7677916666666667


 56%|█████▌    | 6100/10845 [54:57<39:15,  2.01it/s, acc=0.767, epoch=25, loss=0.802]  

epoch:25, idx:6099/10845, loss:0.8018486722277813, acc:0.7671311475409837


 57%|█████▋    | 6200/10845 [55:46<33:19,  2.32it/s, acc=0.766, epoch=25, loss=0.805]  

epoch:25, idx:6199/10845, loss:0.8046964572321984, acc:0.7664112903225806


 58%|█████▊    | 6301/10845 [56:40<52:21,  1.45it/s, acc=0.766, epoch=25, loss=0.804]  

epoch:25, idx:6299/10845, loss:0.8043375915856589, acc:0.7662698412698413


 59%|█████▉    | 6400/10845 [57:35<32:46,  2.26it/s, acc=0.766, epoch=25, loss=0.805]

epoch:25, idx:6399/10845, loss:0.8050832563266158, acc:0.76578125


 60%|█████▉    | 6500/10845 [58:30<40:01,  1.81it/s, acc=0.766, epoch=25, loss=0.806]  

epoch:25, idx:6499/10845, loss:0.8056334322507565, acc:0.7656153846153846


 61%|██████    | 6600/10845 [59:23<54:24,  1.30it/s, acc=0.765, epoch=25, loss=0.805]  

epoch:25, idx:6599/10845, loss:0.8052728772524631, acc:0.7654924242424243


 62%|██████▏   | 6700/10845 [1:00:19<27:49,  2.48it/s, acc=0.765, epoch=25, loss=0.807]

epoch:25, idx:6699/10845, loss:0.8073849963608073, acc:0.7649626865671642


 63%|██████▎   | 6800/10845 [1:01:13<46:31,  1.45it/s, acc=0.765, epoch=25, loss=0.807]

epoch:25, idx:6799/10845, loss:0.8069835728669867, acc:0.7652941176470588


 64%|██████▎   | 6900/10845 [1:02:10<37:11,  1.77it/s, acc=0.765, epoch=25, loss=0.809]

epoch:25, idx:6899/10845, loss:0.8088223532058191, acc:0.7646376811594203


 65%|██████▍   | 7000/10845 [1:03:00<30:21,  2.11it/s, acc=0.765, epoch=25, loss=0.809]

epoch:25, idx:6999/10845, loss:0.8090805894689901, acc:0.7646071428571428


 65%|██████▌   | 7100/10845 [1:03:54<30:08,  2.07it/s, acc=0.765, epoch=25, loss=0.809]

epoch:25, idx:7099/10845, loss:0.8090799314833024, acc:0.7647535211267605


 66%|██████▋   | 7200/10845 [1:04:47<43:35,  1.39it/s, acc=0.765, epoch=25, loss=0.808]

epoch:25, idx:7199/10845, loss:0.8083594244759944, acc:0.7648611111111111


 67%|██████▋   | 7300/10845 [1:05:40<26:53,  2.20it/s, acc=0.765, epoch=25, loss=0.808]

epoch:25, idx:7299/10845, loss:0.8082769551709906, acc:0.7646232876712329


 68%|██████▊   | 7400/10845 [1:06:32<33:39,  1.71it/s, acc=0.764, epoch=25, loss=0.81] 

epoch:25, idx:7399/10845, loss:0.8101217792847673, acc:0.7643243243243243


 69%|██████▉   | 7500/10845 [1:07:26<44:30,  1.25it/s, acc=0.764, epoch=25, loss=0.81] 

epoch:25, idx:7499/10845, loss:0.8098837939858436, acc:0.7641666666666667


 70%|███████   | 7600/10845 [1:08:21<31:59,  1.69it/s, acc=0.764, epoch=25, loss=0.81] 

epoch:25, idx:7599/10845, loss:0.8096527154743671, acc:0.7641447368421053


 71%|███████   | 7700/10845 [1:09:13<25:45,  2.03it/s, acc=0.765, epoch=25, loss=0.809]

epoch:25, idx:7699/10845, loss:0.8092817970297553, acc:0.7645454545454545


 72%|███████▏  | 7800/10845 [1:10:08<21:07,  2.40it/s, acc=0.765, epoch=25, loss=0.808]

epoch:25, idx:7799/10845, loss:0.8080999298584767, acc:0.7648076923076923


 73%|███████▎  | 7900/10845 [1:11:07<31:34,  1.55it/s, acc=0.765, epoch=25, loss=0.81] 

epoch:25, idx:7899/10845, loss:0.8095419569106042, acc:0.7649050632911393


 74%|███████▍  | 8000/10845 [1:11:59<22:34,  2.10it/s, acc=0.765, epoch=25, loss=0.808]

epoch:25, idx:7999/10845, loss:0.80842173897475, acc:0.76534375


 75%|███████▍  | 8100/10845 [1:12:54<28:38,  1.60it/s, acc=0.766, epoch=25, loss=0.806]

epoch:25, idx:8099/10845, loss:0.8058192212272574, acc:0.7659876543209877


 76%|███████▌  | 8200/10845 [1:13:46<18:46,  2.35it/s, acc=0.766, epoch=25, loss=0.806]

epoch:25, idx:8199/10845, loss:0.8063205073010631, acc:0.7658231707317074


 77%|███████▋  | 8300/10845 [1:14:40<21:57,  1.93it/s, acc=0.766, epoch=25, loss=0.806]

epoch:25, idx:8299/10845, loss:0.8064707123156053, acc:0.765933734939759


 77%|███████▋  | 8400/10845 [1:15:29<18:32,  2.20it/s, acc=0.766, epoch=25, loss=0.808]

epoch:25, idx:8399/10845, loss:0.807731975735653, acc:0.7656547619047619


 78%|███████▊  | 8500/10845 [1:16:22<18:30,  2.11it/s, acc=0.766, epoch=25, loss=0.808]

epoch:25, idx:8499/10845, loss:0.8078899123458302, acc:0.7656764705882353


 79%|███████▉  | 8600/10845 [1:17:19<18:39,  2.01it/s, acc=0.766, epoch=25, loss=0.808]

epoch:25, idx:8599/10845, loss:0.8078100706948791, acc:0.7655813953488372


 80%|████████  | 8700/10845 [1:18:10<25:28,  1.40it/s, acc=0.765, epoch=25, loss=0.81] 

epoch:25, idx:8699/10845, loss:0.8095201833768823, acc:0.7651724137931034


 81%|████████  | 8800/10845 [1:19:03<15:33,  2.19it/s, acc=0.765, epoch=25, loss=0.81] 

epoch:25, idx:8799/10845, loss:0.809932425970381, acc:0.7651704545454545


 82%|████████▏ | 8900/10845 [1:20:01<15:33,  2.08it/s, acc=0.765, epoch=25, loss=0.811]

epoch:25, idx:8899/10845, loss:0.8109756873899632, acc:0.765056179775281


 83%|████████▎ | 9000/10845 [1:21:02<17:21,  1.77it/s, acc=0.765, epoch=25, loss=0.812]

epoch:25, idx:8999/10845, loss:0.812126145488686, acc:0.7646666666666667


 84%|████████▍ | 9100/10845 [1:21:56<15:57,  1.82it/s, acc=0.765, epoch=25, loss=0.812]

epoch:25, idx:9099/10845, loss:0.8120783393265126, acc:0.7647527472527472


 85%|████████▍ | 9200/10845 [1:22:58<13:48,  1.98it/s, acc=0.765, epoch=25, loss=0.812]

epoch:25, idx:9199/10845, loss:0.812239860056535, acc:0.7645923913043479


 86%|████████▌ | 9300/10845 [1:23:55<16:55,  1.52it/s, acc=0.764, epoch=25, loss=0.814]

epoch:25, idx:9299/10845, loss:0.8140531635476697, acc:0.7641397849462366


 87%|████████▋ | 9400/10845 [1:24:50<11:37,  2.07it/s, acc=0.764, epoch=25, loss=0.815]

epoch:25, idx:9399/10845, loss:0.8146080842550765, acc:0.7638563829787234


 88%|████████▊ | 9500/10845 [1:25:47<13:41,  1.64it/s, acc=0.764, epoch=25, loss=0.815]

epoch:25, idx:9499/10845, loss:0.815272509712922, acc:0.7637894736842106


 89%|████████▊ | 9600/10845 [1:26:43<14:38,  1.42it/s, acc=0.764, epoch=25, loss=0.815]

epoch:25, idx:9599/10845, loss:0.8153943701647222, acc:0.7640625


 89%|████████▉ | 9700/10845 [1:27:34<10:37,  1.80it/s, acc=0.764, epoch=25, loss=0.816]

epoch:25, idx:9699/10845, loss:0.8159440097304964, acc:0.764020618556701


 90%|█████████ | 9800/10845 [1:28:27<07:13,  2.41it/s, acc=0.764, epoch=25, loss=0.817]

epoch:25, idx:9799/10845, loss:0.816756191819298, acc:0.7636224489795919


 91%|█████████▏| 9900/10845 [1:29:24<08:57,  1.76it/s, acc=0.763, epoch=25, loss=0.819]

epoch:25, idx:9899/10845, loss:0.8190564082848906, acc:0.7633080808080808


 92%|█████████▏| 10000/10845 [1:30:16<08:13,  1.71it/s, acc=0.763, epoch=25, loss=0.819]

epoch:25, idx:9999/10845, loss:0.8192440661966801, acc:0.763175


 93%|█████████▎| 10100/10845 [1:31:10<06:07,  2.03it/s, acc=0.763, epoch=25, loss=0.82] 

epoch:25, idx:10099/10845, loss:0.8201702775813565, acc:0.7629455445544554


 94%|█████████▍| 10200/10845 [1:32:01<04:26,  2.42it/s, acc=0.763, epoch=25, loss=0.82] 

epoch:25, idx:10199/10845, loss:0.8196663671264461, acc:0.7630882352941176


 95%|█████████▍| 10300/10845 [1:32:50<04:39,  1.95it/s, acc=0.763, epoch=25, loss=0.819]

epoch:25, idx:10299/10845, loss:0.8193799762992026, acc:0.7632766990291262


 96%|█████████▌| 10400/10845 [1:33:40<03:59,  1.86it/s, acc=0.763, epoch=25, loss=0.819]

epoch:25, idx:10399/10845, loss:0.8190134259026784, acc:0.7634375


 97%|█████████▋| 10500/10845 [1:34:31<02:50,  2.03it/s, acc=0.763, epoch=25, loss=0.819]

epoch:25, idx:10499/10845, loss:0.8189585256576538, acc:0.763452380952381


 98%|█████████▊| 10600/10845 [1:35:28<02:34,  1.58it/s, acc=0.763, epoch=25, loss=0.819]

epoch:25, idx:10599/10845, loss:0.8190856293687281, acc:0.7633490566037736


 99%|█████████▊| 10700/10845 [1:36:20<01:19,  1.82it/s, acc=0.764, epoch=25, loss=0.819]

epoch:25, idx:10699/10845, loss:0.8194577510891674, acc:0.7635747663551402


100%|█████████▉| 10800/10845 [1:37:15<00:22,  1.97it/s, acc=0.763, epoch=25, loss=0.82] 

epoch:25, idx:10799/10845, loss:0.8199679840383707, acc:0.763425925925926


100%|██████████| 10845/10845 [1:37:42<00:00,  1.62it/s, acc=0.763, epoch=25, loss=0.821]


epoch:25, idx:0/1275, loss:1.124009609222412, acc:0.5
epoch:25, idx:100/1275, loss:1.2238420838176614, acc:0.6460396039603961
epoch:25, idx:200/1275, loss:1.1698298555108446, acc:0.6480099502487562
epoch:25, idx:300/1275, loss:1.1734800463499024, acc:0.6528239202657807
epoch:25, idx:400/1275, loss:1.1510258504280129, acc:0.6527431421446384
epoch:25, idx:500/1275, loss:1.1467101562046957, acc:0.6521956087824351
epoch:25, idx:600/1275, loss:1.1515265190462503, acc:0.6497504159733777
epoch:25, idx:700/1275, loss:1.1680083217872532, acc:0.6462196861626248
epoch:25, idx:800/1275, loss:1.177076194691152, acc:0.6429463171036205
epoch:25, idx:900/1275, loss:1.1670219839346925, acc:0.646503884572697
epoch:25, idx:1000/1275, loss:1.1641722313769451, acc:0.6481018981018981
epoch:25, idx:1100/1275, loss:1.1558869913098597, acc:0.6491825613079019
epoch:25, idx:1200/1275, loss:1.155716520711643, acc:0.6498751040799334


  1%|          | 100/10845 [00:51<1:02:59,  2.84it/s, acc=0.777, epoch=26, loss=0.714]

epoch:26, idx:99/10845, loss:0.7141383123397828, acc:0.7775


  2%|▏         | 200/10845 [01:45<1:15:42,  2.34it/s, acc=0.772, epoch=26, loss=0.723]

epoch:26, idx:199/10845, loss:0.7227982100844383, acc:0.7725


  3%|▎         | 300/10845 [02:39<1:37:09,  1.81it/s, acc=0.776, epoch=26, loss=0.747]

epoch:26, idx:299/10845, loss:0.7467933068672816, acc:0.7758333333333334


  4%|▎         | 400/10845 [03:32<1:53:08,  1.54it/s, acc=0.771, epoch=26, loss=0.762]

epoch:26, idx:399/10845, loss:0.7621874895691871, acc:0.770625


  5%|▍         | 500/10845 [04:25<1:22:19,  2.09it/s, acc=0.767, epoch=26, loss=0.769]

epoch:26, idx:499/10845, loss:0.7685547685623169, acc:0.767


  6%|▌         | 600/10845 [05:20<1:32:13,  1.85it/s, acc=0.764, epoch=26, loss=0.779]

epoch:26, idx:599/10845, loss:0.7786056921879451, acc:0.7641666666666667


  6%|▋         | 700/10845 [06:17<2:22:40,  1.19it/s, acc=0.762, epoch=26, loss=0.802]

epoch:26, idx:699/10845, loss:0.8021959857429777, acc:0.7625


  7%|▋         | 800/10845 [07:09<1:18:28,  2.13it/s, acc=0.762, epoch=26, loss=0.8]  

epoch:26, idx:799/10845, loss:0.7996134983003139, acc:0.7625


  8%|▊         | 900/10845 [08:05<1:22:54,  2.00it/s, acc=0.761, epoch=26, loss=0.805]

epoch:26, idx:899/10845, loss:0.8051569918460316, acc:0.7608333333333334


  9%|▉         | 1000/10845 [08:57<1:11:44,  2.29it/s, acc=0.761, epoch=26, loss=0.802]

epoch:26, idx:999/10845, loss:0.8024458757042885, acc:0.76075


 10%|█         | 1100/10845 [09:50<1:03:47,  2.55it/s, acc=0.765, epoch=26, loss=0.79] 

epoch:26, idx:1099/10845, loss:0.7895483643358404, acc:0.7654545454545455


 11%|█         | 1200/10845 [10:43<1:26:23,  1.86it/s, acc=0.768, epoch=26, loss=0.78] 

epoch:26, idx:1199/10845, loss:0.779789567242066, acc:0.7683333333333333


 12%|█▏        | 1300/10845 [11:35<1:39:02,  1.61it/s, acc=0.768, epoch=26, loss=0.784]

epoch:26, idx:1299/10845, loss:0.78426391768914, acc:0.7682692307692308


 13%|█▎        | 1400/10845 [12:29<1:04:33,  2.44it/s, acc=0.771, epoch=26, loss=0.778]

epoch:26, idx:1399/10845, loss:0.7779187236300537, acc:0.7705357142857143


 14%|█▍        | 1500/10845 [13:25<2:00:15,  1.30it/s, acc=0.772, epoch=26, loss=0.779]

epoch:26, idx:1499/10845, loss:0.7786658032139142, acc:0.7721666666666667


 15%|█▍        | 1600/10845 [14:17<1:22:26,  1.87it/s, acc=0.771, epoch=26, loss=0.791]

epoch:26, idx:1599/10845, loss:0.7907177629508078, acc:0.77140625


 16%|█▌        | 1700/10845 [15:10<1:18:45,  1.94it/s, acc=0.771, epoch=26, loss=0.79] 

epoch:26, idx:1699/10845, loss:0.789557527829619, acc:0.7710294117647059


 17%|█▋        | 1800/10845 [16:01<54:01,  2.79it/s, acc=0.77, epoch=26, loss=0.791]   

epoch:26, idx:1799/10845, loss:0.7907376354601648, acc:0.77


 18%|█▊        | 1900/10845 [16:52<1:46:40,  1.40it/s, acc=0.771, epoch=26, loss=0.79] 

epoch:26, idx:1899/10845, loss:0.7900437560520674, acc:0.7707894736842106


 18%|█▊        | 2000/10845 [17:46<1:09:33,  2.12it/s, acc=0.771, epoch=26, loss=0.791]

epoch:26, idx:1999/10845, loss:0.7912174088358879, acc:0.770625


 19%|█▉        | 2100/10845 [18:45<1:43:42,  1.41it/s, acc=0.77, epoch=26, loss=0.788] 

epoch:26, idx:2099/10845, loss:0.7877030724003201, acc:0.7702380952380953


 20%|██        | 2200/10845 [19:40<1:13:13,  1.97it/s, acc=0.771, epoch=26, loss=0.786]

epoch:26, idx:2199/10845, loss:0.7855264548009092, acc:0.7706818181818181


 21%|██        | 2300/10845 [20:35<1:22:27,  1.73it/s, acc=0.77, epoch=26, loss=0.789] 

epoch:26, idx:2299/10845, loss:0.7888302064978558, acc:0.7701086956521739


 22%|██▏       | 2400/10845 [21:30<1:35:13,  1.48it/s, acc=0.771, epoch=26, loss=0.786]

epoch:26, idx:2399/10845, loss:0.7857581786066293, acc:0.7713541666666667


 23%|██▎       | 2500/10845 [22:22<59:47,  2.33it/s, acc=0.773, epoch=26, loss=0.781]  

epoch:26, idx:2499/10845, loss:0.7807818689107895, acc:0.7728


 24%|██▍       | 2600/10845 [23:16<1:04:16,  2.14it/s, acc=0.772, epoch=26, loss=0.785]

epoch:26, idx:2599/10845, loss:0.7853193321594825, acc:0.7722115384615384


 25%|██▍       | 2700/10845 [24:09<1:01:46,  2.20it/s, acc=0.773, epoch=26, loss=0.78] 

epoch:26, idx:2699/10845, loss:0.7798867008310777, acc:0.7726851851851851


 26%|██▌       | 2800/10845 [24:59<1:01:15,  2.19it/s, acc=0.772, epoch=26, loss=0.783]

epoch:26, idx:2799/10845, loss:0.7831396455530609, acc:0.7719642857142858


 27%|██▋       | 2900/10845 [25:50<1:08:01,  1.95it/s, acc=0.772, epoch=26, loss=0.789]

epoch:26, idx:2899/10845, loss:0.7886557606684751, acc:0.771551724137931


 28%|██▊       | 3000/10845 [26:47<1:11:14,  1.84it/s, acc=0.77, epoch=26, loss=0.796] 

epoch:26, idx:2999/10845, loss:0.7960269813040892, acc:0.7699166666666667


 29%|██▊       | 3100/10845 [27:39<1:03:01,  2.05it/s, acc=0.77, epoch=26, loss=0.8]   

epoch:26, idx:3099/10845, loss:0.800350032852542, acc:0.7695967741935484


 30%|██▉       | 3200/10845 [28:36<1:39:51,  1.28it/s, acc=0.769, epoch=26, loss=0.799]

epoch:26, idx:3199/10845, loss:0.7994908705726266, acc:0.769296875


 30%|███       | 3300/10845 [29:31<1:00:44,  2.07it/s, acc=0.769, epoch=26, loss=0.798]

epoch:26, idx:3299/10845, loss:0.7982082941496011, acc:0.7692424242424243


 31%|███▏      | 3400/10845 [30:25<51:11,  2.42it/s, acc=0.769, epoch=26, loss=0.799]  

epoch:26, idx:3399/10845, loss:0.7989101293507744, acc:0.7694117647058824


 32%|███▏      | 3500/10845 [31:19<1:01:28,  1.99it/s, acc=0.769, epoch=26, loss=0.799]

epoch:26, idx:3499/10845, loss:0.7989968626328877, acc:0.7692142857142857


 33%|███▎      | 3600/10845 [32:11<1:09:20,  1.74it/s, acc=0.77, epoch=26, loss=0.795] 

epoch:26, idx:3599/10845, loss:0.7950582284066412, acc:0.7699305555555556


 34%|███▍      | 3700/10845 [33:04<1:09:06,  1.72it/s, acc=0.771, epoch=26, loss=0.793]

epoch:26, idx:3699/10845, loss:0.7934189625688501, acc:0.7708108108108108


 35%|███▌      | 3800/10845 [34:01<1:02:23,  1.88it/s, acc=0.77, epoch=26, loss=0.792] 

epoch:26, idx:3799/10845, loss:0.7924744194589163, acc:0.7702631578947369


 36%|███▌      | 3900/10845 [34:53<51:08,  2.26it/s, acc=0.77, epoch=26, loss=0.791]   

epoch:26, idx:3899/10845, loss:0.7911388201132799, acc:0.7703205128205128


 37%|███▋      | 4001/10845 [35:46<47:40,  2.39it/s, acc=0.77, epoch=26, loss=0.792]   

epoch:26, idx:3999/10845, loss:0.7924988425672054, acc:0.770125


 38%|███▊      | 4100/10845 [36:39<56:41,  1.98it/s, acc=0.771, epoch=26, loss=0.79]   

epoch:26, idx:4099/10845, loss:0.7898016800240772, acc:0.770670731707317


 39%|███▊      | 4200/10845 [37:28<58:25,  1.90it/s, acc=0.771, epoch=26, loss=0.79]   

epoch:26, idx:4199/10845, loss:0.7903599018000421, acc:0.7706547619047619


 40%|███▉      | 4300/10845 [38:24<56:18,  1.94it/s, acc=0.77, epoch=26, loss=0.791]   

epoch:26, idx:4299/10845, loss:0.7911004663622656, acc:0.7702906976744186


 41%|████      | 4400/10845 [39:14<37:51,  2.84it/s, acc=0.771, epoch=26, loss=0.79]   

epoch:26, idx:4399/10845, loss:0.7895847352797335, acc:0.7714772727272727


 41%|████▏     | 4500/10845 [40:09<52:29,  2.01it/s, acc=0.77, epoch=26, loss=0.794]   

epoch:26, idx:4499/10845, loss:0.7940643856525421, acc:0.7701111111111111


 42%|████▏     | 4600/10845 [41:04<57:58,  1.80it/s, acc=0.771, epoch=26, loss=0.793]  

epoch:26, idx:4599/10845, loss:0.7926693236050398, acc:0.7708152173913043


 43%|████▎     | 4700/10845 [41:57<1:02:10,  1.65it/s, acc=0.771, epoch=26, loss=0.791]

epoch:26, idx:4699/10845, loss:0.7914587620598205, acc:0.7706914893617022


 44%|████▍     | 4800/10845 [42:50<48:11,  2.09it/s, acc=0.771, epoch=26, loss=0.79]   

epoch:26, idx:4799/10845, loss:0.7903518876930078, acc:0.7708333333333334


 45%|████▌     | 4900/10845 [43:38<42:18,  2.34it/s, acc=0.771, epoch=26, loss=0.789]  

epoch:26, idx:4899/10845, loss:0.7894592236012828, acc:0.7708163265306123


 46%|████▌     | 5000/10845 [44:32<45:27,  2.14it/s, acc=0.77, epoch=26, loss=0.792]   

epoch:26, idx:4999/10845, loss:0.7918966383218765, acc:0.77025


 47%|████▋     | 5100/10845 [45:28<39:03,  2.45it/s, acc=0.769, epoch=26, loss=0.797]  

epoch:26, idx:5099/10845, loss:0.796687992811203, acc:0.7691176470588236


 48%|████▊     | 5200/10845 [46:23<56:02,  1.68it/s, acc=0.769, epoch=26, loss=0.795]  

epoch:26, idx:5199/10845, loss:0.7954921445250511, acc:0.769375


 49%|████▉     | 5300/10845 [47:16<43:14,  2.14it/s, acc=0.769, epoch=26, loss=0.795]  

epoch:26, idx:5299/10845, loss:0.7951841344361036, acc:0.7694811320754718


 50%|████▉     | 5400/10845 [48:11<42:33,  2.13it/s, acc=0.769, epoch=26, loss=0.798]  

epoch:26, idx:5399/10845, loss:0.797549204925696, acc:0.7687962962962963


 51%|█████     | 5501/10845 [49:06<34:31,  2.58it/s, acc=0.769, epoch=26, loss=0.797]  

epoch:26, idx:5499/10845, loss:0.7971963486996564, acc:0.7687727272727273


 52%|█████▏    | 5600/10845 [50:04<49:34,  1.76it/s, acc=0.768, epoch=26, loss=0.799]  

epoch:26, idx:5599/10845, loss:0.799232120652284, acc:0.7682589285714285


 53%|█████▎    | 5700/10845 [51:01<40:20,  2.13it/s, acc=0.768, epoch=26, loss=0.8]    

epoch:26, idx:5699/10845, loss:0.8004256246173591, acc:0.7682894736842105


 53%|█████▎    | 5800/10845 [51:53<54:31,  1.54it/s, acc=0.769, epoch=26, loss=0.8]  

epoch:26, idx:5799/10845, loss:0.7998832522384052, acc:0.7686206896551724


 54%|█████▍    | 5900/10845 [52:46<48:45,  1.69it/s, acc=0.769, epoch=26, loss=0.8]    

epoch:26, idx:5899/10845, loss:0.7999618184465473, acc:0.7688559322033899


 55%|█████▌    | 6000/10845 [53:43<40:48,  1.98it/s, acc=0.768, epoch=26, loss=0.801]  

epoch:26, idx:5999/10845, loss:0.8009521875182788, acc:0.7684583333333334


 56%|█████▌    | 6100/10845 [54:38<31:13,  2.53it/s, acc=0.769, epoch=26, loss=0.8]    

epoch:26, idx:6099/10845, loss:0.7998940178409951, acc:0.7687295081967213


 57%|█████▋    | 6200/10845 [55:30<35:30,  2.18it/s, acc=0.768, epoch=26, loss=0.801]  

epoch:26, idx:6199/10845, loss:0.8011094336452023, acc:0.7684677419354838


 58%|█████▊    | 6300/10845 [56:17<33:09,  2.28it/s, acc=0.769, epoch=26, loss=0.8]    

epoch:26, idx:6299/10845, loss:0.8003627567158805, acc:0.7689285714285714


 59%|█████▉    | 6400/10845 [57:12<42:47,  1.73it/s, acc=0.769, epoch=26, loss=0.801]

epoch:26, idx:6399/10845, loss:0.8007036747969687, acc:0.7690625


 60%|█████▉    | 6500/10845 [58:05<34:45,  2.08it/s, acc=0.769, epoch=26, loss=0.803]

epoch:26, idx:6499/10845, loss:0.802554382443428, acc:0.7688461538461538


 61%|██████    | 6600/10845 [59:02<56:23,  1.25it/s, acc=0.769, epoch=26, loss=0.802]

epoch:26, idx:6599/10845, loss:0.8024158157543703, acc:0.7694696969696969


 62%|██████▏   | 6700/10845 [1:00:03<44:53,  1.54it/s, acc=0.769, epoch=26, loss=0.803]

epoch:26, idx:6699/10845, loss:0.8030620647588772, acc:0.768955223880597


 63%|██████▎   | 6800/10845 [1:00:55<38:06,  1.77it/s, acc=0.769, epoch=26, loss=0.803]

epoch:26, idx:6799/10845, loss:0.8027225844430573, acc:0.7691544117647059


 64%|██████▎   | 6900/10845 [1:01:47<27:16,  2.41it/s, acc=0.769, epoch=26, loss=0.804]

epoch:26, idx:6899/10845, loss:0.8038512721916904, acc:0.7693115942028985


 65%|██████▍   | 7000/10845 [1:02:42<33:13,  1.93it/s, acc=0.769, epoch=26, loss=0.806]

epoch:26, idx:6999/10845, loss:0.8055575534488474, acc:0.7686428571428572


 65%|██████▌   | 7100/10845 [1:03:38<21:40,  2.88it/s, acc=0.769, epoch=26, loss=0.805]

epoch:26, idx:7099/10845, loss:0.804952924331309, acc:0.768661971830986


 66%|██████▋   | 7200/10845 [1:04:31<33:19,  1.82it/s, acc=0.768, epoch=26, loss=0.805]

epoch:26, idx:7199/10845, loss:0.804952227063477, acc:0.7684027777777778


 67%|██████▋   | 7300/10845 [1:05:23<30:45,  1.92it/s, acc=0.768, epoch=26, loss=0.807]

epoch:26, idx:7299/10845, loss:0.8073814420218337, acc:0.7681164383561644


 68%|██████▊   | 7400/10845 [1:06:14<35:18,  1.63it/s, acc=0.768, epoch=26, loss=0.81] 

epoch:26, idx:7399/10845, loss:0.8103987955281864, acc:0.7675675675675676


 69%|██████▉   | 7500/10845 [1:07:08<34:44,  1.60it/s, acc=0.768, epoch=26, loss=0.81] 

epoch:26, idx:7499/10845, loss:0.8104703342715899, acc:0.7675666666666666


 70%|███████   | 7600/10845 [1:08:03<35:52,  1.51it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:7599/10845, loss:0.8113342850635711, acc:0.7672697368421053


 71%|███████   | 7700/10845 [1:09:00<30:32,  1.72it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:7699/10845, loss:0.8124148677947459, acc:0.7666883116883116


 72%|███████▏  | 7800/10845 [1:09:57<31:33,  1.61it/s, acc=0.766, epoch=26, loss=0.814]

epoch:26, idx:7799/10845, loss:0.8140310273739771, acc:0.7663141025641026


 73%|███████▎  | 7900/10845 [1:10:51<35:58,  1.36it/s, acc=0.767, epoch=26, loss=0.813]

epoch:26, idx:7899/10845, loss:0.8126837948549397, acc:0.7666455696202532


 74%|███████▍  | 8000/10845 [1:11:47<18:18,  2.59it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:7999/10845, loss:0.812403928482905, acc:0.76665625


 75%|███████▍  | 8100/10845 [1:12:37<22:27,  2.04it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:8099/10845, loss:0.8117237831946509, acc:0.7667901234567901


 76%|███████▌  | 8200/10845 [1:13:29<16:47,  2.63it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:8199/10845, loss:0.8112106449084311, acc:0.7669207317073171


 77%|███████▋  | 8300/10845 [1:14:25<17:46,  2.39it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:8299/10845, loss:0.812117102478283, acc:0.7668072289156627


 77%|███████▋  | 8400/10845 [1:15:15<17:40,  2.31it/s, acc=0.767, epoch=26, loss=0.814]

epoch:26, idx:8399/10845, loss:0.8136356086904803, acc:0.7668154761904762


 78%|███████▊  | 8500/10845 [1:16:09<25:38,  1.52it/s, acc=0.767, epoch=26, loss=0.813]

epoch:26, idx:8499/10845, loss:0.8127715903608238, acc:0.7670294117647058


 79%|███████▉  | 8600/10845 [1:17:04<20:58,  1.78it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:8599/10845, loss:0.8123898321546095, acc:0.7671511627906977


 80%|████████  | 8700/10845 [1:17:57<20:17,  1.76it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:8699/10845, loss:0.8121261911094189, acc:0.7670689655172414


 81%|████████  | 8800/10845 [1:18:55<20:38,  1.65it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:8799/10845, loss:0.8116252950460396, acc:0.7671306818181818


 82%|████████▏ | 8900/10845 [1:19:49<13:16,  2.44it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:8899/10845, loss:0.8113815009811621, acc:0.7671629213483147


 83%|████████▎ | 9000/10845 [1:20:45<18:31,  1.66it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:8999/10845, loss:0.8111581621948216, acc:0.7673611111111112


 84%|████████▍ | 9100/10845 [1:21:38<14:37,  1.99it/s, acc=0.768, epoch=26, loss=0.81] 

epoch:26, idx:9099/10845, loss:0.8099627310217736, acc:0.7676648351648352


 85%|████████▍ | 9200/10845 [1:22:32<14:02,  1.95it/s, acc=0.767, epoch=26, loss=0.81]

epoch:26, idx:9199/10845, loss:0.809579053785166, acc:0.7672826086956521


 86%|████████▌ | 9300/10845 [1:23:25<10:07,  2.54it/s, acc=0.767, epoch=26, loss=0.81] 

epoch:26, idx:9299/10845, loss:0.80973385153118, acc:0.7670967741935484


 87%|████████▋ | 9400/10845 [1:24:18<09:57,  2.42it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:9399/10845, loss:0.8109268672209471, acc:0.7670744680851064


 88%|████████▊ | 9500/10845 [1:25:10<15:35,  1.44it/s, acc=0.767, epoch=26, loss=0.81] 

epoch:26, idx:9499/10845, loss:0.8103563675927489, acc:0.7670263157894737


 89%|████████▊ | 9600/10845 [1:26:05<10:39,  1.95it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:9599/10845, loss:0.8105530140968039, acc:0.7671354166666666


 89%|████████▉ | 9700/10845 [1:26:58<12:10,  1.57it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:9699/10845, loss:0.8113981569412443, acc:0.7670103092783506


 90%|█████████ | 9800/10845 [1:27:47<07:53,  2.21it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:9799/10845, loss:0.8108919409753717, acc:0.7673214285714286


 91%|█████████▏| 9900/10845 [1:28:42<06:19,  2.49it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:9899/10845, loss:0.8115022300454703, acc:0.7672222222222222


 92%|█████████▏| 10000/10845 [1:29:37<10:08,  1.39it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:9999/10845, loss:0.8114719766780734, acc:0.7671


 93%|█████████▎| 10100/10845 [1:30:32<07:51,  1.58it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:10099/10845, loss:0.810950054725503, acc:0.7671782178217822


 94%|█████████▍| 10200/10845 [1:31:23<05:48,  1.85it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:10199/10845, loss:0.8117039283569537, acc:0.7671078431372549


 95%|█████████▍| 10300/10845 [1:32:16<05:24,  1.68it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:10299/10845, loss:0.8111207420545296, acc:0.7672815533980583


 96%|█████████▌| 10400/10845 [1:33:12<03:07,  2.38it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:10399/10845, loss:0.8107927932117421, acc:0.7671875


 97%|█████████▋| 10500/10845 [1:34:01<02:29,  2.31it/s, acc=0.767, epoch=26, loss=0.811]

epoch:26, idx:10499/10845, loss:0.811060635267269, acc:0.7672619047619048


 98%|█████████▊| 10600/10845 [1:34:56<02:44,  1.49it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:10599/10845, loss:0.8121072833675821, acc:0.7672405660377358


 99%|█████████▊| 10700/10845 [1:35:46<01:19,  1.84it/s, acc=0.767, epoch=26, loss=0.812]

epoch:26, idx:10699/10845, loss:0.8120236597648848, acc:0.7673130841121495


100%|█████████▉| 10800/10845 [1:36:38<00:20,  2.17it/s, acc=0.767, epoch=26, loss=0.813]

epoch:26, idx:10799/10845, loss:0.8129210903564537, acc:0.7671990740740741


100%|██████████| 10845/10845 [1:37:00<00:00,  2.37it/s, acc=0.767, epoch=26, loss=0.812]


epoch:26, idx:0/1275, loss:1.0391812324523926, acc:0.5
epoch:26, idx:100/1275, loss:1.2450346108710413, acc:0.6386138613861386
epoch:26, idx:200/1275, loss:1.160508882347031, acc:0.6529850746268657
epoch:26, idx:300/1275, loss:1.1618935392544516, acc:0.6495016611295681
epoch:26, idx:400/1275, loss:1.1429838887473889, acc:0.6533665835411472
epoch:26, idx:500/1275, loss:1.1325352049159432, acc:0.653692614770459
epoch:26, idx:600/1275, loss:1.1400544240748427, acc:0.6522462562396006
epoch:26, idx:700/1275, loss:1.157345051006992, acc:0.6490727532097005
epoch:26, idx:800/1275, loss:1.1655559576257188, acc:0.647003745318352
epoch:26, idx:900/1275, loss:1.1549010775925448, acc:0.6509433962264151
epoch:26, idx:1000/1275, loss:1.1513658137052327, acc:0.6520979020979021
epoch:26, idx:1100/1275, loss:1.1443941476505308, acc:0.6525885558583107
epoch:26, idx:1200/1275, loss:1.1432930102802932, acc:0.6523730224812656


  1%|          | 100/10845 [00:56<1:42:14,  1.75it/s, acc=0.787, epoch=27, loss=0.748]

epoch:27, idx:99/10845, loss:0.7475193482637406, acc:0.7875


  2%|▏         | 200/10845 [01:47<1:22:36,  2.15it/s, acc=0.772, epoch=27, loss=0.766]

epoch:27, idx:199/10845, loss:0.7656635628640651, acc:0.7725


  3%|▎         | 300/10845 [02:42<1:58:01,  1.49it/s, acc=0.77, epoch=27, loss=0.811] 

epoch:27, idx:299/10845, loss:0.8105753541986147, acc:0.77


  4%|▎         | 400/10845 [03:34<1:17:46,  2.24it/s, acc=0.773, epoch=27, loss=0.808]

epoch:27, idx:399/10845, loss:0.8083896746486425, acc:0.773125


  5%|▍         | 500/10845 [04:30<1:34:39,  1.82it/s, acc=0.772, epoch=27, loss=0.815]

epoch:27, idx:499/10845, loss:0.8145679433941841, acc:0.772


  6%|▌         | 600/10845 [05:23<1:20:00,  2.13it/s, acc=0.77, epoch=27, loss=0.815] 

epoch:27, idx:599/10845, loss:0.815129235337178, acc:0.77


  6%|▋         | 700/10845 [06:15<1:56:00,  1.46it/s, acc=0.769, epoch=27, loss=0.819]

epoch:27, idx:699/10845, loss:0.8191270387598446, acc:0.7692857142857142


  7%|▋         | 800/10845 [07:09<1:09:04,  2.42it/s, acc=0.773, epoch=27, loss=0.803]

epoch:27, idx:799/10845, loss:0.803483071513474, acc:0.773125


  8%|▊         | 900/10845 [08:01<1:17:42,  2.13it/s, acc=0.772, epoch=27, loss=0.804]

epoch:27, idx:899/10845, loss:0.8035062918066979, acc:0.7716666666666666


  9%|▉         | 1000/10845 [08:54<1:30:02,  1.82it/s, acc=0.771, epoch=27, loss=0.801]

epoch:27, idx:999/10845, loss:0.8012500605285168, acc:0.7715


 10%|█         | 1100/10845 [09:46<1:36:30,  1.68it/s, acc=0.774, epoch=27, loss=0.792]

epoch:27, idx:1099/10845, loss:0.7920602442188697, acc:0.774090909090909


 11%|█         | 1200/10845 [10:34<1:02:45,  2.56it/s, acc=0.775, epoch=27, loss=0.792]

epoch:27, idx:1199/10845, loss:0.7917480399211247, acc:0.7754166666666666


 12%|█▏        | 1300/10845 [11:28<1:23:27,  1.91it/s, acc=0.774, epoch=27, loss=0.793]

epoch:27, idx:1299/10845, loss:0.7932796903756949, acc:0.7744230769230769


 13%|█▎        | 1400/10845 [12:25<1:07:01,  2.35it/s, acc=0.773, epoch=27, loss=0.796]

epoch:27, idx:1399/10845, loss:0.7958103392805372, acc:0.7730357142857143


 14%|█▍        | 1500/10845 [13:14<1:21:08,  1.92it/s, acc=0.777, epoch=27, loss=0.778]

epoch:27, idx:1499/10845, loss:0.7777572684685389, acc:0.777


 15%|█▍        | 1600/10845 [14:08<1:15:50,  2.03it/s, acc=0.777, epoch=27, loss=0.778]

epoch:27, idx:1599/10845, loss:0.7779536622948945, acc:0.77734375


 16%|█▌        | 1700/10845 [15:03<1:04:40,  2.36it/s, acc=0.775, epoch=27, loss=0.788]

epoch:27, idx:1699/10845, loss:0.7883667638897895, acc:0.7754411764705882


 17%|█▋        | 1800/10845 [15:55<1:17:05,  1.96it/s, acc=0.775, epoch=27, loss=0.792]

epoch:27, idx:1799/10845, loss:0.791859624998437, acc:0.7751388888888889


 18%|█▊        | 1900/10845 [16:49<1:11:18,  2.09it/s, acc=0.774, epoch=27, loss=0.793]

epoch:27, idx:1899/10845, loss:0.7926022834683719, acc:0.7743421052631579


 18%|█▊        | 2000/10845 [17:44<1:30:31,  1.63it/s, acc=0.775, epoch=27, loss=0.791]

epoch:27, idx:1999/10845, loss:0.7909473602026701, acc:0.775


 19%|█▉        | 2100/10845 [18:39<1:37:49,  1.49it/s, acc=0.772, epoch=27, loss=0.796]

epoch:27, idx:2099/10845, loss:0.796416014333566, acc:0.7720238095238096


 20%|██        | 2200/10845 [19:30<1:03:50,  2.26it/s, acc=0.771, epoch=27, loss=0.801]

epoch:27, idx:2199/10845, loss:0.8008586196059531, acc:0.7711363636363636


 21%|██        | 2300/10845 [20:21<1:03:59,  2.23it/s, acc=0.771, epoch=27, loss=0.802]

epoch:27, idx:2299/10845, loss:0.8019236800074577, acc:0.7709782608695652


 22%|██▏       | 2400/10845 [21:13<1:09:21,  2.03it/s, acc=0.769, epoch=27, loss=0.807]

epoch:27, idx:2399/10845, loss:0.806791442024211, acc:0.769375


 23%|██▎       | 2500/10845 [22:08<1:20:00,  1.74it/s, acc=0.769, epoch=27, loss=0.811]

epoch:27, idx:2499/10845, loss:0.8111187590003014, acc:0.7686


 24%|██▍       | 2600/10845 [23:05<1:15:38,  1.82it/s, acc=0.769, epoch=27, loss=0.806]

epoch:27, idx:2599/10845, loss:0.8063546894719967, acc:0.7693269230769231


 25%|██▍       | 2700/10845 [23:56<51:01,  2.66it/s, acc=0.77, epoch=27, loss=0.803]   

epoch:27, idx:2699/10845, loss:0.8029835619308331, acc:0.7700925925925926


 26%|██▌       | 2801/10845 [24:49<1:08:58,  1.94it/s, acc=0.77, epoch=27, loss=0.807] 

epoch:27, idx:2799/10845, loss:0.8069185380637646, acc:0.7698214285714285


 27%|██▋       | 2900/10845 [25:45<1:09:50,  1.90it/s, acc=0.772, epoch=27, loss=0.798]

epoch:27, idx:2899/10845, loss:0.7982753010248316, acc:0.7722413793103449


 28%|██▊       | 3000/10845 [26:39<1:07:32,  1.94it/s, acc=0.772, epoch=27, loss=0.801]

epoch:27, idx:2999/10845, loss:0.8006093989014625, acc:0.7720833333333333


 29%|██▊       | 3100/10845 [27:34<1:16:58,  1.68it/s, acc=0.772, epoch=27, loss=0.798]

epoch:27, idx:3099/10845, loss:0.7981039201828741, acc:0.7725


 30%|██▉       | 3200/10845 [28:28<52:58,  2.40it/s, acc=0.772, epoch=27, loss=0.801]  

epoch:27, idx:3199/10845, loss:0.800747888982296, acc:0.772109375


 30%|███       | 3300/10845 [29:23<1:10:04,  1.79it/s, acc=0.772, epoch=27, loss=0.801]

epoch:27, idx:3299/10845, loss:0.8014637810894937, acc:0.7715151515151515


 31%|███▏      | 3400/10845 [30:17<1:05:49,  1.89it/s, acc=0.772, epoch=27, loss=0.8]  

epoch:27, idx:3399/10845, loss:0.7998785831998376, acc:0.7715441176470588


 32%|███▏      | 3500/10845 [31:12<1:11:43,  1.71it/s, acc=0.771, epoch=27, loss=0.803]

epoch:27, idx:3499/10845, loss:0.8030415294510977, acc:0.7709285714285714


 33%|███▎      | 3600/10845 [32:08<1:03:20,  1.91it/s, acc=0.771, epoch=27, loss=0.805]

epoch:27, idx:3599/10845, loss:0.8049464488691754, acc:0.7711805555555555


 34%|███▍      | 3700/10845 [33:06<1:00:53,  1.96it/s, acc=0.77, epoch=27, loss=0.807] 

epoch:27, idx:3699/10845, loss:0.8067845317963007, acc:0.770472972972973


 35%|███▌      | 3800/10845 [33:56<1:07:18,  1.74it/s, acc=0.771, epoch=27, loss=0.805]

epoch:27, idx:3799/10845, loss:0.8051793675046218, acc:0.7706578947368421


 36%|███▌      | 3900/10845 [34:48<1:12:10,  1.60it/s, acc=0.77, epoch=27, loss=0.806] 

epoch:27, idx:3899/10845, loss:0.8063694221086991, acc:0.7703846153846153


 37%|███▋      | 4000/10845 [35:40<58:01,  1.97it/s, acc=0.77, epoch=27, loss=0.808]   

epoch:27, idx:3999/10845, loss:0.8080275540351868, acc:0.7698125


 38%|███▊      | 4100/10845 [36:30<37:26,  3.00it/s, acc=0.77, epoch=27, loss=0.806]   

epoch:27, idx:4099/10845, loss:0.8063216139339819, acc:0.7697560975609756


 39%|███▊      | 4200/10845 [37:21<50:12,  2.21it/s, acc=0.769, epoch=27, loss=0.807]  

epoch:27, idx:4199/10845, loss:0.8066095234240804, acc:0.7694047619047619


 40%|███▉      | 4300/10845 [38:15<1:21:18,  1.34it/s, acc=0.769, epoch=27, loss=0.808]

epoch:27, idx:4299/10845, loss:0.8077447591687358, acc:0.7691860465116279


 41%|████      | 4400/10845 [39:09<1:12:46,  1.48it/s, acc=0.768, epoch=27, loss=0.81] 

epoch:27, idx:4399/10845, loss:0.8101870928704739, acc:0.7679545454545454


 41%|████▏     | 4500/10845 [40:01<56:08,  1.88it/s, acc=0.769, epoch=27, loss=0.805]  

epoch:27, idx:4499/10845, loss:0.8052807942363951, acc:0.7692777777777777


 42%|████▏     | 4600/10845 [40:58<1:20:31,  1.29it/s, acc=0.77, epoch=27, loss=0.801] 

epoch:27, idx:4599/10845, loss:0.801248993990214, acc:0.7701086956521739


 43%|████▎     | 4700/10845 [41:52<1:00:30,  1.69it/s, acc=0.77, epoch=27, loss=0.801]

epoch:27, idx:4699/10845, loss:0.801419947020551, acc:0.7702127659574468


 44%|████▍     | 4800/10845 [42:48<1:02:16,  1.62it/s, acc=0.77, epoch=27, loss=0.8]  

epoch:27, idx:4799/10845, loss:0.7998039841155211, acc:0.7703645833333334


 45%|████▌     | 4900/10845 [43:40<44:27,  2.23it/s, acc=0.771, epoch=27, loss=0.797]  

epoch:27, idx:4899/10845, loss:0.7969914043679529, acc:0.7711224489795918


 46%|████▌     | 5000/10845 [44:34<1:03:10,  1.54it/s, acc=0.771, epoch=27, loss=0.798]

epoch:27, idx:4999/10845, loss:0.7981725749969483, acc:0.77115


 47%|████▋     | 5100/10845 [45:25<52:39,  1.82it/s, acc=0.771, epoch=27, loss=0.797]  

epoch:27, idx:5099/10845, loss:0.7970890259392122, acc:0.7714215686274509


 48%|████▊     | 5200/10845 [46:15<42:46,  2.20it/s, acc=0.771, epoch=27, loss=0.796]  

epoch:27, idx:5199/10845, loss:0.7963593432880365, acc:0.771298076923077


 49%|████▉     | 5300/10845 [47:12<1:07:34,  1.37it/s, acc=0.771, epoch=27, loss=0.796]

epoch:27, idx:5299/10845, loss:0.796300387292538, acc:0.7712735849056603


 50%|████▉     | 5400/10845 [48:03<48:36,  1.87it/s, acc=0.772, epoch=27, loss=0.797]  

epoch:27, idx:5399/10845, loss:0.7966848431582804, acc:0.7716203703703703


 51%|█████     | 5500/10845 [48:55<29:08,  3.06it/s, acc=0.772, epoch=27, loss=0.796]  

epoch:27, idx:5499/10845, loss:0.7955127591544932, acc:0.7718636363636364


 52%|█████▏    | 5600/10845 [49:47<50:38,  1.73it/s, acc=0.772, epoch=27, loss=0.795]  

epoch:27, idx:5599/10845, loss:0.7952898363343307, acc:0.7718303571428572


 53%|█████▎    | 5700/10845 [50:38<41:59,  2.04it/s, acc=0.772, epoch=27, loss=0.796]  

epoch:27, idx:5699/10845, loss:0.7958620005323176, acc:0.7716228070175438


 53%|█████▎    | 5800/10845 [51:32<41:39,  2.02it/s, acc=0.772, epoch=27, loss=0.797]  

epoch:27, idx:5799/10845, loss:0.7968531656470792, acc:0.7717672413793103


 54%|█████▍    | 5900/10845 [52:23<55:04,  1.50it/s, acc=0.772, epoch=27, loss=0.799]  

epoch:27, idx:5899/10845, loss:0.7985781924805399, acc:0.7716101694915254


 55%|█████▌    | 6000/10845 [53:18<47:12,  1.71it/s, acc=0.772, epoch=27, loss=0.798]  

epoch:27, idx:5999/10845, loss:0.7980286319653194, acc:0.7718333333333334


 56%|█████▌    | 6100/10845 [54:10<51:05,  1.55it/s, acc=0.772, epoch=27, loss=0.797]  

epoch:27, idx:6099/10845, loss:0.7967512047193089, acc:0.7718852459016393


 57%|█████▋    | 6200/10845 [55:06<39:04,  1.98it/s, acc=0.771, epoch=27, loss=0.799]  

epoch:27, idx:6199/10845, loss:0.7990800493955612, acc:0.7714516129032258


 58%|█████▊    | 6300/10845 [55:59<37:52,  2.00it/s, acc=0.771, epoch=27, loss=0.8]  

epoch:27, idx:6299/10845, loss:0.8001527908964763, acc:0.7710317460317461


 59%|█████▉    | 6400/10845 [56:53<40:42,  1.82it/s, acc=0.771, epoch=27, loss=0.799]  

epoch:27, idx:6399/10845, loss:0.7990580146946012, acc:0.7706640625


 60%|█████▉    | 6500/10845 [57:47<34:15,  2.11it/s, acc=0.77, epoch=27, loss=0.8]   

epoch:27, idx:6499/10845, loss:0.8004053942240201, acc:0.77


 61%|██████    | 6600/10845 [58:45<44:52,  1.58it/s, acc=0.77, epoch=27, loss=0.801]

epoch:27, idx:6599/10845, loss:0.800834846397241, acc:0.7697348484848485


 62%|██████▏   | 6700/10845 [59:40<48:58,  1.41it/s, acc=0.769, epoch=27, loss=0.801]

epoch:27, idx:6699/10845, loss:0.8010727655620717, acc:0.7692910447761194


 63%|██████▎   | 6800/10845 [1:00:36<34:54,  1.93it/s, acc=0.769, epoch=27, loss=0.801]

epoch:27, idx:6799/10845, loss:0.8011731405380894, acc:0.7688970588235294


 64%|██████▎   | 6900/10845 [1:01:33<34:33,  1.90it/s, acc=0.769, epoch=27, loss=0.801]

epoch:27, idx:6899/10845, loss:0.8008779078937959, acc:0.768731884057971


 65%|██████▍   | 7000/10845 [1:02:26<38:37,  1.66it/s, acc=0.769, epoch=27, loss=0.801]

epoch:27, idx:6999/10845, loss:0.8013122920691967, acc:0.7686785714285714


 65%|██████▌   | 7100/10845 [1:03:19<42:53,  1.46it/s, acc=0.769, epoch=27, loss=0.802]

epoch:27, idx:7099/10845, loss:0.8022555574942642, acc:0.7686267605633803


 66%|██████▋   | 7200/10845 [1:04:12<29:05,  2.09it/s, acc=0.769, epoch=27, loss=0.803]

epoch:27, idx:7199/10845, loss:0.8031055927235219, acc:0.7686458333333334


 67%|██████▋   | 7300/10845 [1:05:05<25:35,  2.31it/s, acc=0.769, epoch=27, loss=0.803]

epoch:27, idx:7299/10845, loss:0.8033142598973562, acc:0.768527397260274


 68%|██████▊   | 7400/10845 [1:06:00<24:20,  2.36it/s, acc=0.768, epoch=27, loss=0.805]

epoch:27, idx:7399/10845, loss:0.8054029787835237, acc:0.7681418918918919


 69%|██████▉   | 7500/10845 [1:06:53<44:52,  1.24it/s, acc=0.768, epoch=27, loss=0.805]

epoch:27, idx:7499/10845, loss:0.8048535648385684, acc:0.7683666666666666


 70%|███████   | 7600/10845 [1:07:48<25:02,  2.16it/s, acc=0.768, epoch=27, loss=0.806]

epoch:27, idx:7599/10845, loss:0.8056650679480089, acc:0.7682894736842105


 71%|███████   | 7700/10845 [1:08:40<30:05,  1.74it/s, acc=0.769, epoch=27, loss=0.804]

epoch:27, idx:7699/10845, loss:0.804493744771976, acc:0.7685064935064935


 72%|███████▏  | 7800/10845 [1:09:30<26:04,  1.95it/s, acc=0.768, epoch=27, loss=0.804]

epoch:27, idx:7799/10845, loss:0.8037215583752363, acc:0.7684294871794872


 73%|███████▎  | 7900/10845 [1:10:21<22:12,  2.21it/s, acc=0.768, epoch=27, loss=0.803]

epoch:27, idx:7899/10845, loss:0.8030632152361206, acc:0.7684177215189874


 74%|███████▍  | 8000/10845 [1:11:15<31:57,  1.48it/s, acc=0.769, epoch=27, loss=0.803]

epoch:27, idx:7999/10845, loss:0.8034462558552623, acc:0.76853125


 75%|███████▍  | 8100/10845 [1:12:05<23:06,  1.98it/s, acc=0.769, epoch=27, loss=0.803]

epoch:27, idx:8099/10845, loss:0.8034395792528435, acc:0.7685185185185185


 76%|███████▌  | 8200/10845 [1:12:54<19:40,  2.24it/s, acc=0.769, epoch=27, loss=0.803]

epoch:27, idx:8199/10845, loss:0.8032888770830341, acc:0.7686890243902439


 77%|███████▋  | 8300/10845 [1:13:45<23:58,  1.77it/s, acc=0.769, epoch=27, loss=0.802]

epoch:27, idx:8299/10845, loss:0.8016936846813524, acc:0.7688855421686747


 77%|███████▋  | 8400/10845 [1:14:38<24:17,  1.68it/s, acc=0.769, epoch=27, loss=0.803]

epoch:27, idx:8399/10845, loss:0.8032214539959317, acc:0.7685416666666667


 78%|███████▊  | 8500/10845 [1:15:33<15:49,  2.47it/s, acc=0.768, epoch=27, loss=0.803]

epoch:27, idx:8499/10845, loss:0.8030796052988838, acc:0.7684411764705882


 79%|███████▉  | 8600/10845 [1:16:26<18:24,  2.03it/s, acc=0.768, epoch=27, loss=0.804]

epoch:27, idx:8599/10845, loss:0.8035829680326373, acc:0.7682848837209302


 80%|████████  | 8700/10845 [1:17:22<22:35,  1.58it/s, acc=0.768, epoch=27, loss=0.804]

epoch:27, idx:8699/10845, loss:0.803916265389015, acc:0.7680747126436782


 81%|████████  | 8800/10845 [1:18:16<17:30,  1.95it/s, acc=0.768, epoch=27, loss=0.805]

epoch:27, idx:8799/10845, loss:0.8047131737592546, acc:0.7679261363636364


 82%|████████▏ | 8900/10845 [1:19:09<18:43,  1.73it/s, acc=0.768, epoch=27, loss=0.806]

epoch:27, idx:8899/10845, loss:0.8058819365836262, acc:0.7675842696629214


 83%|████████▎ | 9000/10845 [1:20:02<13:41,  2.24it/s, acc=0.768, epoch=27, loss=0.805]

epoch:27, idx:8999/10845, loss:0.8046054236094157, acc:0.7678611111111111


 84%|████████▍ | 9100/10845 [1:20:53<13:55,  2.09it/s, acc=0.768, epoch=27, loss=0.804]

epoch:27, idx:9099/10845, loss:0.8044164945398058, acc:0.7677747252747252


 85%|████████▍ | 9200/10845 [1:21:45<13:41,  2.00it/s, acc=0.767, epoch=27, loss=0.806]

epoch:27, idx:9199/10845, loss:0.8063179787151191, acc:0.7672282608695652


 86%|████████▌ | 9300/10845 [1:22:38<18:51,  1.37it/s, acc=0.767, epoch=27, loss=0.807]

epoch:27, idx:9299/10845, loss:0.8070106431745714, acc:0.767258064516129


 87%|████████▋ | 9400/10845 [1:23:30<14:10,  1.70it/s, acc=0.767, epoch=27, loss=0.806]

epoch:27, idx:9399/10845, loss:0.8063349813730158, acc:0.7675


 88%|████████▊ | 9500/10845 [1:24:20<15:49,  1.42it/s, acc=0.768, epoch=27, loss=0.805]

epoch:27, idx:9499/10845, loss:0.805198099155175, acc:0.7676578947368421


 89%|████████▊ | 9600/10845 [1:25:15<11:12,  1.85it/s, acc=0.767, epoch=27, loss=0.806]

epoch:27, idx:9599/10845, loss:0.8064636578907569, acc:0.7673697916666666


 89%|████████▉ | 9700/10845 [1:26:14<13:46,  1.39it/s, acc=0.767, epoch=27, loss=0.807]

epoch:27, idx:9699/10845, loss:0.8065442520564364, acc:0.7672680412371135


 90%|█████████ | 9800/10845 [1:27:06<10:03,  1.73it/s, acc=0.767, epoch=27, loss=0.807]

epoch:27, idx:9799/10845, loss:0.806602805165612, acc:0.7669897959183674


 91%|█████████▏| 9900/10845 [1:27:57<10:37,  1.48it/s, acc=0.767, epoch=27, loss=0.807]

epoch:27, idx:9899/10845, loss:0.8072043184439341, acc:0.767020202020202


 92%|█████████▏| 10000/10845 [1:28:57<08:14,  1.71it/s, acc=0.767, epoch=27, loss=0.808]

epoch:27, idx:9999/10845, loss:0.8082856435596943, acc:0.76665


 93%|█████████▎| 10100/10845 [1:29:52<06:59,  1.77it/s, acc=0.767, epoch=27, loss=0.809]

epoch:27, idx:10099/10845, loss:0.8090870346704332, acc:0.7666831683168317


 94%|█████████▍| 10200/10845 [1:30:45<06:01,  1.78it/s, acc=0.767, epoch=27, loss=0.809]

epoch:27, idx:10199/10845, loss:0.8093661845490044, acc:0.7665196078431372


 95%|█████████▍| 10300/10845 [1:31:33<05:27,  1.66it/s, acc=0.767, epoch=27, loss=0.808]

epoch:27, idx:10299/10845, loss:0.8084989327857796, acc:0.7666019417475728


 96%|█████████▌| 10400/10845 [1:32:25<03:02,  2.43it/s, acc=0.767, epoch=27, loss=0.809]

epoch:27, idx:10399/10845, loss:0.8086399473393193, acc:0.7666105769230769


 97%|█████████▋| 10500/10845 [1:33:17<03:45,  1.53it/s, acc=0.767, epoch=27, loss=0.809]

epoch:27, idx:10499/10845, loss:0.8089285154881931, acc:0.7666428571428572


 98%|█████████▊| 10600/10845 [1:34:12<01:58,  2.07it/s, acc=0.766, epoch=27, loss=0.81] 

epoch:27, idx:10599/10845, loss:0.8096778849301474, acc:0.7662971698113208


 99%|█████████▊| 10700/10845 [1:35:06<01:02,  2.30it/s, acc=0.766, epoch=27, loss=0.81] 

epoch:27, idx:10699/10845, loss:0.8103793485393034, acc:0.7659112149532711


100%|█████████▉| 10800/10845 [1:36:02<00:32,  1.40it/s, acc=0.766, epoch=27, loss=0.811]

epoch:27, idx:10799/10845, loss:0.8106080403013362, acc:0.765787037037037


100%|██████████| 10845/10845 [1:36:26<00:00,  2.43it/s, acc=0.766, epoch=27, loss=0.811]


epoch:27, idx:0/1275, loss:0.9612737894058228, acc:0.75
epoch:27, idx:100/1275, loss:1.2250721047420312, acc:0.6262376237623762
epoch:27, idx:200/1275, loss:1.1608004620419212, acc:0.6405472636815921
epoch:27, idx:300/1275, loss:1.164603127989658, acc:0.6420265780730897
epoch:27, idx:400/1275, loss:1.144075209363143, acc:0.6458852867830424
epoch:27, idx:500/1275, loss:1.1376265323090695, acc:0.6477045908183633
epoch:27, idx:600/1275, loss:1.143660143885557, acc:0.6480865224625624
epoch:27, idx:700/1275, loss:1.1595579382356325, acc:0.6462196861626248
epoch:27, idx:800/1275, loss:1.165670670894499, acc:0.6457553058676654
epoch:27, idx:900/1275, loss:1.1568978768871574, acc:0.6501109877913429
epoch:27, idx:1000/1275, loss:1.1550920425237834, acc:0.6518481518481518
epoch:27, idx:1100/1275, loss:1.1490527079064232, acc:0.6525885558583107
epoch:27, idx:1200/1275, loss:1.1472674611704634, acc:0.6525811823480433


  1%|          | 100/10845 [00:49<52:51,  3.39it/s, acc=0.815, epoch=28, loss=0.58]  

epoch:28, idx:99/10845, loss:0.5799345195293426, acc:0.815


  2%|▏         | 200/10845 [01:47<1:32:59,  1.91it/s, acc=0.787, epoch=28, loss=0.716]

epoch:28, idx:199/10845, loss:0.7157929411530495, acc:0.7875


  3%|▎         | 300/10845 [02:38<1:20:27,  2.18it/s, acc=0.777, epoch=28, loss=0.751]

epoch:28, idx:299/10845, loss:0.751201346317927, acc:0.7766666666666666


  4%|▎         | 400/10845 [03:30<1:27:02,  2.00it/s, acc=0.778, epoch=28, loss=0.744]

epoch:28, idx:399/10845, loss:0.7440135024487973, acc:0.778125


  5%|▍         | 500/10845 [04:26<1:24:57,  2.03it/s, acc=0.773, epoch=28, loss=0.758]

epoch:28, idx:499/10845, loss:0.7582690390348434, acc:0.7735


  6%|▌         | 600/10845 [05:17<1:31:34,  1.86it/s, acc=0.775, epoch=28, loss=0.772]

epoch:28, idx:599/10845, loss:0.7722690990567207, acc:0.7745833333333333


  6%|▋         | 700/10845 [06:11<1:39:01,  1.71it/s, acc=0.776, epoch=28, loss=0.77] 

epoch:28, idx:699/10845, loss:0.7702302031857626, acc:0.7757142857142857


  7%|▋         | 801/10845 [07:04<50:18,  3.33it/s, acc=0.776, epoch=28, loss=0.772]  

epoch:28, idx:799/10845, loss:0.7720753817260265, acc:0.775625


  8%|▊         | 900/10845 [07:50<1:19:48,  2.08it/s, acc=0.778, epoch=28, loss=0.772]

epoch:28, idx:899/10845, loss:0.7717419953478707, acc:0.7777777777777778


  9%|▉         | 1001/10845 [08:35<53:05,  3.09it/s, acc=0.78, epoch=28, loss=0.768]  

epoch:28, idx:999/10845, loss:0.7687005959749221, acc:0.78


 10%|█         | 1100/10845 [09:20<1:26:14,  1.88it/s, acc=0.776, epoch=28, loss=0.776]

epoch:28, idx:1099/10845, loss:0.7763876503435048, acc:0.7763636363636364


 11%|█         | 1200/10845 [10:08<1:28:16,  1.82it/s, acc=0.777, epoch=28, loss=0.769]

epoch:28, idx:1199/10845, loss:0.7688512710978588, acc:0.7766666666666666


 12%|█▏        | 1300/10845 [11:02<1:42:58,  1.54it/s, acc=0.777, epoch=28, loss=0.765]

epoch:28, idx:1299/10845, loss:0.7652335496819936, acc:0.7773076923076923


 13%|█▎        | 1400/10845 [13:11<3:15:41,  1.24s/it, acc=0.777, epoch=28, loss=0.769] 

epoch:28, idx:1399/10845, loss:0.7686007197627, acc:0.7767857142857143


 14%|█▍        | 1500/10845 [14:54<2:36:59,  1.01s/it, acc=0.778, epoch=28, loss=0.765]

epoch:28, idx:1499/10845, loss:0.7649511624375979, acc:0.778


 15%|█▍        | 1600/10845 [16:31<2:19:48,  1.10it/s, acc=0.779, epoch=28, loss=0.766]

epoch:28, idx:1599/10845, loss:0.7657498441077769, acc:0.7790625


 16%|█▌        | 1700/10845 [18:11<2:21:48,  1.07it/s, acc=0.783, epoch=28, loss=0.759]

epoch:28, idx:1699/10845, loss:0.7591282387165462, acc:0.7827941176470589


 17%|█▋        | 1800/10845 [19:39<1:51:20,  1.35it/s, acc=0.785, epoch=28, loss=0.75] 

epoch:28, idx:1799/10845, loss:0.749740366190672, acc:0.7848611111111111


 18%|█▊        | 1900/10845 [20:57<1:46:21,  1.40it/s, acc=0.783, epoch=28, loss=0.755]

epoch:28, idx:1899/10845, loss:0.7545685432929742, acc:0.7832894736842105


 18%|█▊        | 2000/10845 [22:23<2:45:11,  1.12s/it, acc=0.781, epoch=28, loss=0.758]

epoch:28, idx:1999/10845, loss:0.7581962450891733, acc:0.78125


 19%|█▉        | 2100/10845 [23:47<2:06:01,  1.16it/s, acc=0.781, epoch=28, loss=0.758]

epoch:28, idx:2099/10845, loss:0.7581718688351767, acc:0.7810714285714285


 20%|██        | 2200/10845 [25:08<2:06:16,  1.14it/s, acc=0.781, epoch=28, loss=0.76] 

epoch:28, idx:2199/10845, loss:0.7595786473696882, acc:0.7813636363636364


 21%|██        | 2300/10845 [26:28<1:28:16,  1.61it/s, acc=0.781, epoch=28, loss=0.764]

epoch:28, idx:2299/10845, loss:0.7636114114782084, acc:0.7808695652173913


 22%|██▏       | 2400/10845 [27:47<1:36:16,  1.46it/s, acc=0.78, epoch=28, loss=0.763] 

epoch:28, idx:2399/10845, loss:0.7625530764708917, acc:0.7804166666666666


 23%|██▎       | 2500/10845 [29:08<1:37:23,  1.43it/s, acc=0.779, epoch=28, loss=0.765]

epoch:28, idx:2499/10845, loss:0.7652601605176925, acc:0.7794


 24%|██▍       | 2600/10845 [30:25<1:29:42,  1.53it/s, acc=0.779, epoch=28, loss=0.768]

epoch:28, idx:2599/10845, loss:0.7677365400011723, acc:0.7786538461538461


 25%|██▍       | 2700/10845 [31:33<1:17:29,  1.75it/s, acc=0.777, epoch=28, loss=0.773]

epoch:28, idx:2699/10845, loss:0.773060449163119, acc:0.7773148148148148


 26%|██▌       | 2800/10845 [33:09<2:31:18,  1.13s/it, acc=0.777, epoch=28, loss=0.774] 

epoch:28, idx:2799/10845, loss:0.7744092781628881, acc:0.7766964285714286


 27%|██▋       | 2900/10845 [35:07<1:23:23,  1.59it/s, acc=0.776, epoch=28, loss=0.775] 

epoch:28, idx:2899/10845, loss:0.7745651435029918, acc:0.7762931034482758


 28%|██▊       | 3000/10845 [36:40<2:03:34,  1.06it/s, acc=0.776, epoch=28, loss=0.776]

epoch:28, idx:2999/10845, loss:0.7761831662654877, acc:0.7764166666666666


 29%|██▊       | 3100/10845 [38:16<1:52:47,  1.14it/s, acc=0.777, epoch=28, loss=0.775]

epoch:28, idx:3099/10845, loss:0.7749856734660364, acc:0.7771774193548387


 30%|██▉       | 3200/10845 [39:43<1:37:16,  1.31it/s, acc=0.778, epoch=28, loss=0.769]

epoch:28, idx:3199/10845, loss:0.7691821138560772, acc:0.77828125


 30%|███       | 3300/10845 [41:15<2:24:27,  1.15s/it, acc=0.778, epoch=28, loss=0.769]

epoch:28, idx:3299/10845, loss:0.7690956598159039, acc:0.7782575757575757


 31%|███▏      | 3400/10845 [42:45<1:37:23,  1.27it/s, acc=0.778, epoch=28, loss=0.768]

epoch:28, idx:3399/10845, loss:0.7682779153304942, acc:0.7783823529411765


 32%|███▏      | 3500/10845 [44:09<1:18:29,  1.56it/s, acc=0.779, epoch=28, loss=0.765]

epoch:28, idx:3499/10845, loss:0.7652537023680551, acc:0.7788571428571428


 33%|███▎      | 3600/10845 [45:38<1:54:27,  1.06it/s, acc=0.778, epoch=28, loss=0.766]

epoch:28, idx:3599/10845, loss:0.7657718193862173, acc:0.7781944444444444


 34%|███▍      | 3700/10845 [46:57<1:39:54,  1.19it/s, acc=0.777, epoch=28, loss=0.767]

epoch:28, idx:3699/10845, loss:0.7670983832591289, acc:0.7771621621621622


 35%|███▌      | 3800/10845 [48:40<1:23:35,  1.40it/s, acc=0.777, epoch=28, loss=0.766]

epoch:28, idx:3799/10845, loss:0.7658329671307614, acc:0.7770394736842106


 36%|███▌      | 3900/10845 [50:04<1:38:16,  1.18it/s, acc=0.779, epoch=28, loss=0.759]

epoch:28, idx:3899/10845, loss:0.759096162449091, acc:0.778525641025641


 37%|███▋      | 4000/10845 [51:23<1:20:25,  1.42it/s, acc=0.778, epoch=28, loss=0.759]

epoch:28, idx:3999/10845, loss:0.7593137344419956, acc:0.778375


 38%|███▊      | 4100/10845 [52:39<1:11:03,  1.58it/s, acc=0.778, epoch=28, loss=0.762]

epoch:28, idx:4099/10845, loss:0.7621192974579043, acc:0.7777439024390244


 39%|███▊      | 4200/10845 [53:57<1:24:41,  1.31it/s, acc=0.777, epoch=28, loss=0.766]

epoch:28, idx:4199/10845, loss:0.7657928869553975, acc:0.7767857142857143


 40%|███▉      | 4300/10845 [55:14<1:24:15,  1.29it/s, acc=0.777, epoch=28, loss=0.766]

epoch:28, idx:4299/10845, loss:0.7663108310034109, acc:0.776860465116279


 41%|████      | 4400/10845 [56:37<1:22:38,  1.30it/s, acc=0.777, epoch=28, loss=0.769]

epoch:28, idx:4399/10845, loss:0.7690999478643591, acc:0.7765340909090909


 41%|████▏     | 4500/10845 [57:53<1:26:59,  1.22it/s, acc=0.777, epoch=28, loss=0.768]

epoch:28, idx:4499/10845, loss:0.7678117274973127, acc:0.7766666666666666


 42%|████▏     | 4600/10845 [59:09<1:30:01,  1.16it/s, acc=0.776, epoch=28, loss=0.769]

epoch:28, idx:4599/10845, loss:0.7687697138216184, acc:0.7761956521739131


 43%|████▎     | 4700/10845 [1:00:23<1:14:14,  1.38it/s, acc=0.776, epoch=28, loss=0.768]

epoch:28, idx:4699/10845, loss:0.7683140135001629, acc:0.7762234042553191


 44%|████▍     | 4800/10845 [1:01:35<1:00:23,  1.67it/s, acc=0.776, epoch=28, loss=0.769]

epoch:28, idx:4799/10845, loss:0.7694101188269754, acc:0.7758854166666667


 45%|████▌     | 4900/10845 [1:02:47<1:14:34,  1.33it/s, acc=0.776, epoch=28, loss=0.769]

epoch:28, idx:4899/10845, loss:0.7689186384544081, acc:0.7762755102040816


 46%|████▌     | 5000/10845 [1:03:59<1:15:23,  1.29it/s, acc=0.776, epoch=28, loss=0.769]

epoch:28, idx:4999/10845, loss:0.7690710250794888, acc:0.7765


 47%|████▋     | 5100/10845 [1:05:08<1:01:11,  1.56it/s, acc=0.776, epoch=28, loss=0.771]

epoch:28, idx:5099/10845, loss:0.7708608678100156, acc:0.7757843137254902


 48%|████▊     | 5200/10845 [1:06:20<1:05:49,  1.43it/s, acc=0.776, epoch=28, loss=0.769]

epoch:28, idx:5199/10845, loss:0.7694328216234079, acc:0.7760576923076923


 49%|████▉     | 5300/10845 [1:07:25<45:28,  2.03it/s, acc=0.776, epoch=28, loss=0.771]  

epoch:28, idx:5299/10845, loss:0.7708202830969163, acc:0.7756603773584906


 50%|████▉     | 5400/10845 [1:08:39<1:29:21,  1.02it/s, acc=0.775, epoch=28, loss=0.773]

epoch:28, idx:5399/10845, loss:0.7728879985378848, acc:0.7749074074074074


 51%|█████     | 5500/10845 [1:09:50<52:59,  1.68it/s, acc=0.775, epoch=28, loss=0.77]   

epoch:28, idx:5499/10845, loss:0.7703931224400347, acc:0.7753181818181818


 52%|█████▏    | 5600/10845 [1:11:00<55:34,  1.57it/s, acc=0.774, epoch=28, loss=0.773]  

epoch:28, idx:5599/10845, loss:0.7731710461154581, acc:0.774375


 53%|█████▎    | 5700/10845 [1:12:10<39:02,  2.20it/s, acc=0.774, epoch=28, loss=0.774]  

epoch:28, idx:5699/10845, loss:0.7736175007935155, acc:0.7741228070175439


 53%|█████▎    | 5800/10845 [1:13:16<47:05,  1.79it/s, acc=0.774, epoch=28, loss=0.772]  

epoch:28, idx:5799/10845, loss:0.7721182268344122, acc:0.774353448275862


 54%|█████▍    | 5900/10845 [1:14:30<57:56,  1.42it/s, acc=0.775, epoch=28, loss=0.769]  

epoch:28, idx:5899/10845, loss:0.769198721657365, acc:0.7752118644067797


 55%|█████▌    | 6000/10845 [1:15:33<50:29,  1.60it/s, acc=0.776, epoch=28, loss=0.768]  

epoch:28, idx:5999/10845, loss:0.7680973132451375, acc:0.7755416666666667


 56%|█████▌    | 6100/10845 [1:16:37<56:17,  1.41it/s, acc=0.776, epoch=28, loss=0.769]  

epoch:28, idx:6099/10845, loss:0.7688397717573604, acc:0.7758196721311476


 57%|█████▋    | 6200/10845 [1:17:41<55:58,  1.38it/s, acc=0.776, epoch=28, loss=0.769]  

epoch:28, idx:6199/10845, loss:0.768631312443364, acc:0.7761693548387096


 58%|█████▊    | 6300/10845 [1:18:47<59:54,  1.26it/s, acc=0.776, epoch=28, loss=0.768]  

epoch:28, idx:6299/10845, loss:0.7675656671467281, acc:0.7761507936507936


 59%|█████▉    | 6400/10845 [1:19:54<54:35,  1.36it/s, acc=0.776, epoch=28, loss=0.767]  

epoch:28, idx:6399/10845, loss:0.7671332480665296, acc:0.776171875


 60%|█████▉    | 6500/10845 [1:21:05<41:57,  1.73it/s, acc=0.777, epoch=28, loss=0.765]  

epoch:28, idx:6499/10845, loss:0.7651374920239815, acc:0.7765384615384615


 61%|██████    | 6600/10845 [1:22:09<53:35,  1.32it/s, acc=0.777, epoch=28, loss=0.765]  

epoch:28, idx:6599/10845, loss:0.7649418720241749, acc:0.7765909090909091


 62%|██████▏   | 6700/10845 [1:23:16<59:27,  1.16it/s, acc=0.776, epoch=28, loss=0.767]  

epoch:28, idx:6699/10845, loss:0.7671677669304521, acc:0.7764925373134328


 63%|██████▎   | 6800/10845 [1:24:20<58:46,  1.15it/s, acc=0.776, epoch=28, loss=0.77] 

epoch:28, idx:6799/10845, loss:0.7702532869051485, acc:0.7758823529411765


 64%|██████▎   | 6900/10845 [1:25:25<32:01,  2.05it/s, acc=0.776, epoch=28, loss=0.77]  

epoch:28, idx:6899/10845, loss:0.7704328930896261, acc:0.7759057971014492


 65%|██████▍   | 7000/10845 [1:26:30<41:40,  1.54it/s, acc=0.776, epoch=28, loss=0.772]

epoch:28, idx:6999/10845, loss:0.7723258380549295, acc:0.7757857142857143


 65%|██████▌   | 7100/10845 [1:27:35<34:05,  1.83it/s, acc=0.776, epoch=28, loss=0.771]

epoch:28, idx:7099/10845, loss:0.7713947923334552, acc:0.7759507042253521


 66%|██████▋   | 7200/10845 [1:28:44<36:48,  1.65it/s, acc=0.776, epoch=28, loss=0.773]  

epoch:28, idx:7199/10845, loss:0.7729785832100444, acc:0.7757291666666667


 67%|██████▋   | 7300/10845 [1:29:48<39:57,  1.48it/s, acc=0.775, epoch=28, loss=0.775]

epoch:28, idx:7299/10845, loss:0.7745370852130733, acc:0.7754109589041096


 68%|██████▊   | 7400/10845 [1:30:53<40:13,  1.43it/s, acc=0.775, epoch=28, loss=0.775]

epoch:28, idx:7399/10845, loss:0.7746335373657781, acc:0.7751013513513514


 69%|██████▉   | 7500/10845 [1:31:59<1:03:37,  1.14s/it, acc=0.775, epoch=28, loss=0.775]

epoch:28, idx:7499/10845, loss:0.7748453129887581, acc:0.7748666666666667


 70%|███████   | 7600/10845 [1:32:59<21:00,  2.57it/s, acc=0.775, epoch=28, loss=0.775]  

epoch:28, idx:7599/10845, loss:0.7753487222171144, acc:0.7745065789473684


 71%|███████   | 7700/10845 [1:33:59<38:06,  1.38it/s, acc=0.775, epoch=28, loss=0.774]

epoch:28, idx:7699/10845, loss:0.7737950554722315, acc:0.775064935064935


 72%|███████▏  | 7800/10845 [1:35:02<45:19,  1.12it/s, acc=0.775, epoch=28, loss=0.774]

epoch:28, idx:7799/10845, loss:0.7737645042553926, acc:0.7753205128205128


 73%|███████▎  | 7900/10845 [1:36:02<32:17,  1.52it/s, acc=0.775, epoch=28, loss=0.774]

epoch:28, idx:7899/10845, loss:0.7744510157123397, acc:0.7751582278481013


 74%|███████▍  | 8000/10845 [1:37:04<33:18,  1.42it/s, acc=0.775, epoch=28, loss=0.774]

epoch:28, idx:7999/10845, loss:0.7738185498118401, acc:0.7754375


 75%|███████▍  | 8100/10845 [1:38:10<30:50,  1.48it/s, acc=0.775, epoch=28, loss=0.775]

epoch:28, idx:8099/10845, loss:0.7747627633810044, acc:0.7754012345679012


 76%|███████▌  | 8200/10845 [1:39:10<26:14,  1.68it/s, acc=0.775, epoch=28, loss=0.776]

epoch:28, idx:8199/10845, loss:0.7759356907010079, acc:0.7749085365853658


 77%|███████▋  | 8300/10845 [1:40:11<24:42,  1.72it/s, acc=0.775, epoch=28, loss=0.776]

epoch:28, idx:8299/10845, loss:0.7761212431522737, acc:0.7749397590361445


 77%|███████▋  | 8400/10845 [1:41:15<34:21,  1.19it/s, acc=0.774, epoch=28, loss=0.78] 

epoch:28, idx:8399/10845, loss:0.7797180556328523, acc:0.7741666666666667


 78%|███████▊  | 8500/10845 [1:42:14<27:40,  1.41it/s, acc=0.774, epoch=28, loss=0.78] 

epoch:28, idx:8499/10845, loss:0.780145110761418, acc:0.774235294117647


 79%|███████▉  | 8600/10845 [1:43:16<25:13,  1.48it/s, acc=0.774, epoch=28, loss=0.78] 

epoch:28, idx:8599/10845, loss:0.7795549779645232, acc:0.7744476744186046


 80%|████████  | 8700/10845 [1:44:14<15:26,  2.32it/s, acc=0.775, epoch=28, loss=0.78] 

epoch:28, idx:8699/10845, loss:0.7796128739228194, acc:0.7745114942528736


 81%|████████  | 8800/10845 [1:45:15<20:26,  1.67it/s, acc=0.775, epoch=28, loss=0.78] 

epoch:28, idx:8799/10845, loss:0.7796797392381863, acc:0.7745170454545455


 82%|████████▏ | 8900/10845 [1:46:17<20:37,  1.57it/s, acc=0.774, epoch=28, loss=0.781]

epoch:28, idx:8899/10845, loss:0.7809617498416579, acc:0.7742977528089887


 83%|████████▎ | 9001/10845 [1:47:20<13:19,  2.31it/s, acc=0.774, epoch=28, loss=0.781]

epoch:28, idx:8999/10845, loss:0.7810832589798503, acc:0.7744444444444445


 84%|████████▍ | 9100/10845 [1:48:26<16:56,  1.72it/s, acc=0.774, epoch=28, loss=0.784]

epoch:28, idx:9099/10845, loss:0.7837652372724407, acc:0.7737912087912088


 85%|████████▍ | 9200/10845 [1:49:25<17:44,  1.55it/s, acc=0.774, epoch=28, loss=0.784]

epoch:28, idx:9199/10845, loss:0.7836507406636425, acc:0.7736141304347826


 86%|████████▌ | 9300/10845 [1:50:27<15:34,  1.65it/s, acc=0.773, epoch=28, loss=0.784]

epoch:28, idx:9299/10845, loss:0.7844952973627275, acc:0.7733870967741936


 87%|████████▋ | 9400/10845 [1:51:21<12:30,  1.93it/s, acc=0.773, epoch=28, loss=0.785]

epoch:28, idx:9399/10845, loss:0.7854416556878293, acc:0.7733244680851064


 88%|████████▊ | 9500/10845 [1:52:14<15:25,  1.45it/s, acc=0.773, epoch=28, loss=0.787]

epoch:28, idx:9499/10845, loss:0.7869462713316867, acc:0.773


 89%|████████▊ | 9600/10845 [1:53:09<10:14,  2.03it/s, acc=0.773, epoch=28, loss=0.786]

epoch:28, idx:9599/10845, loss:0.7862524473729233, acc:0.7730729166666667


 89%|████████▉ | 9700/10845 [1:54:10<10:41,  1.78it/s, acc=0.773, epoch=28, loss=0.788]

epoch:28, idx:9699/10845, loss:0.7875178369664654, acc:0.7726030927835051


 90%|█████████ | 9800/10845 [1:55:14<12:18,  1.42it/s, acc=0.772, epoch=28, loss=0.788]

epoch:28, idx:9799/10845, loss:0.7881295550295285, acc:0.7722959183673469


 91%|█████████▏| 9900/10845 [1:56:16<11:02,  1.43it/s, acc=0.772, epoch=28, loss=0.789]

epoch:28, idx:9899/10845, loss:0.7886385416803938, acc:0.7718939393939394


 92%|█████████▏| 10000/10845 [1:57:13<06:30,  2.16it/s, acc=0.772, epoch=28, loss=0.788]

epoch:28, idx:9999/10845, loss:0.787724257621169, acc:0.771975


 93%|█████████▎| 10100/10845 [1:58:11<05:51,  2.12it/s, acc=0.772, epoch=28, loss=0.788]

epoch:28, idx:10099/10845, loss:0.7877712873037499, acc:0.7722524752475247


 94%|█████████▍| 10200/10845 [1:59:10<05:19,  2.02it/s, acc=0.772, epoch=28, loss=0.789]

epoch:28, idx:10199/10845, loss:0.7885873158100773, acc:0.7717647058823529


 95%|█████████▍| 10300/10845 [2:00:08<06:05,  1.49it/s, acc=0.772, epoch=28, loss=0.789]

epoch:28, idx:10299/10845, loss:0.7888228645226331, acc:0.7717233009708738


 96%|█████████▌| 10400/10845 [2:01:09<04:21,  1.70it/s, acc=0.771, epoch=28, loss=0.79] 

epoch:28, idx:10399/10845, loss:0.7900825487335141, acc:0.77125


 97%|█████████▋| 10500/10845 [2:02:11<03:30,  1.64it/s, acc=0.771, epoch=28, loss=0.79] 

epoch:28, idx:10499/10845, loss:0.7900480549023265, acc:0.771452380952381


 98%|█████████▊| 10600/10845 [2:03:11<02:28,  1.65it/s, acc=0.771, epoch=28, loss=0.79] 

epoch:28, idx:10599/10845, loss:0.7902796456414574, acc:0.77125


 99%|█████████▊| 10700/10845 [2:04:14<01:19,  1.83it/s, acc=0.771, epoch=28, loss=0.791]

epoch:28, idx:10699/10845, loss:0.7907711470266369, acc:0.7713785046728971


100%|█████████▉| 10800/10845 [2:05:18<00:26,  1.70it/s, acc=0.771, epoch=28, loss=0.791]

epoch:28, idx:10799/10845, loss:0.7907616694961433, acc:0.77125


100%|██████████| 10845/10845 [2:05:44<00:00,  1.82it/s, acc=0.772, epoch=28, loss=0.79] 


epoch:28, idx:0/1275, loss:1.0525988340377808, acc:0.5
epoch:28, idx:100/1275, loss:1.2443531792942841, acc:0.6262376237623762
epoch:28, idx:200/1275, loss:1.1801534038574542, acc:0.6355721393034826
epoch:28, idx:300/1275, loss:1.1840425257468936, acc:0.6378737541528239
epoch:28, idx:400/1275, loss:1.1632304049638145, acc:0.6446384039900249
epoch:28, idx:500/1275, loss:1.1569381806545866, acc:0.6452095808383234
epoch:28, idx:600/1275, loss:1.163086656474829, acc:0.6426788685524126
epoch:28, idx:700/1275, loss:1.1803227736589401, acc:0.6422967189728959
epoch:28, idx:800/1275, loss:1.1885904786217674, acc:0.6410736579275905
epoch:28, idx:900/1275, loss:1.1764625525567163, acc:0.6451165371809101
epoch:28, idx:1000/1275, loss:1.1730949012609153, acc:0.6463536463536463
epoch:28, idx:1100/1275, loss:1.1671907667080779, acc:0.6478201634877384
epoch:28, idx:1200/1275, loss:1.1664008690008614, acc:0.6482098251457119


  1%|          | 100/10845 [00:56<1:40:15,  1.79it/s, acc=0.785, epoch=29, loss=0.639]

epoch:29, idx:99/10845, loss:0.6394108361005784, acc:0.785


  2%|▏         | 200/10845 [01:53<1:38:56,  1.79it/s, acc=0.789, epoch=29, loss=0.678]

epoch:29, idx:199/10845, loss:0.6755096930265426, acc:0.78875


  3%|▎         | 300/10845 [02:48<1:05:55,  2.67it/s, acc=0.798, epoch=29, loss=0.668]

epoch:29, idx:299/10845, loss:0.6679980982343355, acc:0.7983333333333333


  4%|▎         | 400/10845 [03:43<1:37:54,  1.78it/s, acc=0.797, epoch=29, loss=0.669]

epoch:29, idx:399/10845, loss:0.6693347129225731, acc:0.796875


  5%|▍         | 500/10845 [04:38<1:21:20,  2.12it/s, acc=0.795, epoch=29, loss=0.685]

epoch:29, idx:499/10845, loss:0.6849879057407379, acc:0.7955


  6%|▌         | 600/10845 [05:34<1:16:04,  2.24it/s, acc=0.794, epoch=29, loss=0.679]

epoch:29, idx:599/10845, loss:0.678932009935379, acc:0.79375


  6%|▋         | 700/10845 [06:29<1:25:04,  1.99it/s, acc=0.791, epoch=29, loss=0.694]

epoch:29, idx:699/10845, loss:0.6940433063677379, acc:0.7914285714285715


  7%|▋         | 800/10845 [07:28<1:46:04,  1.58it/s, acc=0.791, epoch=29, loss=0.702]

epoch:29, idx:799/10845, loss:0.7020941129326821, acc:0.79125


  8%|▊         | 900/10845 [08:28<1:47:36,  1.54it/s, acc=0.79, epoch=29, loss=0.705] 

epoch:29, idx:899/10845, loss:0.704983019762569, acc:0.7897222222222222


  9%|▉         | 1000/10845 [09:26<1:32:27,  1.77it/s, acc=0.789, epoch=29, loss=0.716]

epoch:29, idx:999/10845, loss:0.7162301380038262, acc:0.789


 10%|█         | 1100/10845 [10:16<1:14:08,  2.19it/s, acc=0.786, epoch=29, loss=0.719]

epoch:29, idx:1099/10845, loss:0.7186831497062336, acc:0.7863636363636364


 11%|█         | 1200/10845 [10:56<54:02,  2.97it/s, acc=0.786, epoch=29, loss=0.725]  

epoch:29, idx:1199/10845, loss:0.7246421332657337, acc:0.785625


 12%|█▏        | 1300/10845 [11:38<1:06:55,  2.38it/s, acc=0.784, epoch=29, loss=0.725]

epoch:29, idx:1299/10845, loss:0.7245118645979808, acc:0.7844230769230769


 13%|█▎        | 1400/10845 [12:25<1:25:25,  1.84it/s, acc=0.782, epoch=29, loss=0.734]

epoch:29, idx:1399/10845, loss:0.7336341816612653, acc:0.7819642857142857


 14%|█▍        | 1500/10845 [13:16<1:19:42,  1.95it/s, acc=0.78, epoch=29, loss=0.744] 

epoch:29, idx:1499/10845, loss:0.7442317572832108, acc:0.7798333333333334


 15%|█▍        | 1600/10845 [14:11<1:08:34,  2.25it/s, acc=0.78, epoch=29, loss=0.742] 

epoch:29, idx:1599/10845, loss:0.7424835688248277, acc:0.77953125


 16%|█▌        | 1700/10845 [15:04<1:39:06,  1.54it/s, acc=0.78, epoch=29, loss=0.741] 

epoch:29, idx:1699/10845, loss:0.7414904917338315, acc:0.7797058823529411


 17%|█▋        | 1800/10845 [16:02<1:31:08,  1.65it/s, acc=0.779, epoch=29, loss=0.741]

epoch:29, idx:1799/10845, loss:0.7410164823714229, acc:0.7793055555555556


 18%|█▊        | 1900/10845 [16:59<2:00:29,  1.24it/s, acc=0.78, epoch=29, loss=0.739] 

epoch:29, idx:1899/10845, loss:0.7386430942149539, acc:0.7797368421052632


 18%|█▊        | 2000/10845 [17:49<1:01:17,  2.40it/s, acc=0.781, epoch=29, loss=0.737]

epoch:29, idx:1999/10845, loss:0.7365164809599519, acc:0.78075


 19%|█▉        | 2100/10845 [18:49<1:18:46,  1.85it/s, acc=0.78, epoch=29, loss=0.744] 

epoch:29, idx:2099/10845, loss:0.7440059459564232, acc:0.7801190476190476


 20%|██        | 2200/10845 [19:42<1:27:32,  1.65it/s, acc=0.778, epoch=29, loss=0.749]

epoch:29, idx:2199/10845, loss:0.7492021315273913, acc:0.7781818181818182


 21%|██        | 2300/10845 [20:30<1:06:21,  2.15it/s, acc=0.778, epoch=29, loss=0.748]

epoch:29, idx:2299/10845, loss:0.7482907230504181, acc:0.7783695652173913


 22%|██▏       | 2400/10845 [21:29<1:07:49,  2.08it/s, acc=0.778, epoch=29, loss=0.751]

epoch:29, idx:2399/10845, loss:0.7509215754829347, acc:0.7777083333333333


 23%|██▎       | 2500/10845 [22:23<58:42,  2.37it/s, acc=0.777, epoch=29, loss=0.751]  

epoch:29, idx:2499/10845, loss:0.7505127719700336, acc:0.7774


 24%|██▍       | 2600/10845 [23:20<1:18:19,  1.75it/s, acc=0.778, epoch=29, loss=0.748]

epoch:29, idx:2599/10845, loss:0.7480957824966082, acc:0.7781730769230769


 25%|██▍       | 2700/10845 [24:13<1:07:59,  2.00it/s, acc=0.777, epoch=29, loss=0.75] 

epoch:29, idx:2699/10845, loss:0.7504597329817436, acc:0.7772222222222223


 26%|██▌       | 2800/10845 [24:59<45:49,  2.93it/s, acc=0.776, epoch=29, loss=0.753]  

epoch:29, idx:2799/10845, loss:0.7528090079714145, acc:0.7761607142857143


 27%|██▋       | 2901/10845 [25:40<49:32,  2.67it/s, acc=0.775, epoch=29, loss=0.757]  

epoch:29, idx:2899/10845, loss:0.7574362300747427, acc:0.7751724137931034


 28%|██▊       | 3000/10845 [26:24<46:18,  2.82it/s, acc=0.776, epoch=29, loss=0.756]  

epoch:29, idx:2999/10845, loss:0.7558619199146827, acc:0.77575


 29%|██▊       | 3100/10845 [27:06<54:38,  2.36it/s, acc=0.776, epoch=29, loss=0.755]  

epoch:29, idx:3099/10845, loss:0.7548043351932879, acc:0.7758870967741935


 30%|██▉       | 3200/10845 [27:56<1:13:43,  1.73it/s, acc=0.776, epoch=29, loss=0.756]

epoch:29, idx:3199/10845, loss:0.7561750552384183, acc:0.775625


 30%|███       | 3300/10845 [28:50<54:16,  2.32it/s, acc=0.776, epoch=29, loss=0.76]   

epoch:29, idx:3299/10845, loss:0.7600992667088002, acc:0.7757575757575758


 31%|███▏      | 3400/10845 [29:50<59:01,  2.10it/s, acc=0.777, epoch=29, loss=0.753]  

epoch:29, idx:3399/10845, loss:0.7533303023918586, acc:0.7770588235294118


 32%|███▏      | 3500/10845 [30:47<1:12:24,  1.69it/s, acc=0.776, epoch=29, loss=0.756]

epoch:29, idx:3499/10845, loss:0.7560274208656379, acc:0.7757142857142857


 33%|███▎      | 3600/10845 [31:44<1:02:07,  1.94it/s, acc=0.775, epoch=29, loss=0.758]

epoch:29, idx:3599/10845, loss:0.758433992072112, acc:0.7754861111111111


 34%|███▍      | 3700/10845 [32:40<1:03:04,  1.89it/s, acc=0.776, epoch=29, loss=0.758]

epoch:29, idx:3699/10845, loss:0.7584730493257175, acc:0.7758108108108108


 35%|███▌      | 3800/10845 [33:34<59:45,  1.96it/s, acc=0.775, epoch=29, loss=0.762]  

epoch:29, idx:3799/10845, loss:0.7623071888755811, acc:0.775


 36%|███▌      | 3900/10845 [34:26<1:14:16,  1.56it/s, acc=0.774, epoch=29, loss=0.763]

epoch:29, idx:3899/10845, loss:0.7634413132491784, acc:0.7744230769230769


 37%|███▋      | 4000/10845 [35:24<1:06:10,  1.72it/s, acc=0.774, epoch=29, loss=0.763]

epoch:29, idx:3999/10845, loss:0.7625803337283432, acc:0.7741875


 38%|███▊      | 4100/10845 [36:15<51:41,  2.17it/s, acc=0.774, epoch=29, loss=0.764]  

epoch:29, idx:4099/10845, loss:0.7637671257482797, acc:0.7735365853658537


 39%|███▊      | 4200/10845 [37:12<1:16:06,  1.46it/s, acc=0.773, epoch=29, loss=0.764]

epoch:29, idx:4199/10845, loss:0.7636919233309372, acc:0.7733333333333333


 40%|███▉      | 4301/10845 [38:05<47:02,  2.32it/s, acc=0.774, epoch=29, loss=0.762]  

epoch:29, idx:4299/10845, loss:0.7621823257311832, acc:0.7743604651162791


 41%|████      | 4400/10845 [38:58<42:52,  2.51it/s, acc=0.775, epoch=29, loss=0.763]  

epoch:29, idx:4399/10845, loss:0.762663976716047, acc:0.7747727272727273


 41%|████▏     | 4500/10845 [39:56<59:33,  1.78it/s, acc=0.774, epoch=29, loss=0.763]  

epoch:29, idx:4499/10845, loss:0.7633946445816093, acc:0.7740555555555556


 42%|████▏     | 4600/10845 [40:51<42:38,  2.44it/s, acc=0.775, epoch=29, loss=0.761]  

epoch:29, idx:4599/10845, loss:0.7614983104525701, acc:0.7746739130434782


 43%|████▎     | 4700/10845 [41:45<38:24,  2.67it/s, acc=0.775, epoch=29, loss=0.762]  

epoch:29, idx:4699/10845, loss:0.7617145975251147, acc:0.7748936170212766


 44%|████▍     | 4800/10845 [42:38<1:02:16,  1.62it/s, acc=0.775, epoch=29, loss=0.763]

epoch:29, idx:4799/10845, loss:0.7625218977437666, acc:0.77453125


 45%|████▌     | 4900/10845 [43:31<57:39,  1.72it/s, acc=0.775, epoch=29, loss=0.76]   

epoch:29, idx:4899/10845, loss:0.7604457613673745, acc:0.774795918367347


 46%|████▌     | 5000/10845 [44:26<38:47,  2.51it/s, acc=0.775, epoch=29, loss=0.761]  

epoch:29, idx:4999/10845, loss:0.7613305643111468, acc:0.7749


 47%|████▋     | 5100/10845 [45:24<56:39,  1.69it/s, acc=0.775, epoch=29, loss=0.761]  

epoch:29, idx:5099/10845, loss:0.7608081882636921, acc:0.7749019607843137


 48%|████▊     | 5200/10845 [46:20<40:31,  2.32it/s, acc=0.775, epoch=29, loss=0.764]  

epoch:29, idx:5199/10845, loss:0.7637976131043756, acc:0.7748076923076923


 49%|████▉     | 5300/10845 [47:17<48:53,  1.89it/s, acc=0.775, epoch=29, loss=0.764]  

epoch:29, idx:5299/10845, loss:0.7635713072852144, acc:0.7749528301886792


 50%|████▉     | 5400/10845 [48:12<1:00:23,  1.50it/s, acc=0.775, epoch=29, loss=0.763]

epoch:29, idx:5399/10845, loss:0.7634475830556066, acc:0.775


 51%|█████     | 5500/10845 [49:10<39:49,  2.24it/s, acc=0.775, epoch=29, loss=0.763]  

epoch:29, idx:5499/10845, loss:0.7629289419894869, acc:0.7751363636363636


 52%|█████▏    | 5600/10845 [49:59<41:48,  2.09it/s, acc=0.775, epoch=29, loss=0.764]  

epoch:29, idx:5599/10845, loss:0.7636846174007016, acc:0.7752232142857143


 53%|█████▎    | 5700/10845 [50:54<55:16,  1.55it/s, acc=0.776, epoch=29, loss=0.761]  

epoch:29, idx:5699/10845, loss:0.7610065385568561, acc:0.7758771929824562


 53%|█████▎    | 5800/10845 [51:52<47:36,  1.77it/s, acc=0.776, epoch=29, loss=0.761]  

epoch:29, idx:5799/10845, loss:0.7613569125764329, acc:0.7758189655172414


 54%|█████▍    | 5900/10845 [52:45<35:33,  2.32it/s, acc=0.776, epoch=29, loss=0.762]  

epoch:29, idx:5899/10845, loss:0.7624100417227058, acc:0.7759322033898305


 55%|█████▌    | 6000/10845 [53:40<54:04,  1.49it/s, acc=0.776, epoch=29, loss=0.764]  

epoch:29, idx:5999/10845, loss:0.7638968692248066, acc:0.7759166666666667


 56%|█████▌    | 6100/10845 [54:35<34:35,  2.29it/s, acc=0.776, epoch=29, loss=0.763]  

epoch:29, idx:6099/10845, loss:0.7634513526641932, acc:0.7757786885245902


 57%|█████▋    | 6200/10845 [55:32<43:17,  1.79it/s, acc=0.776, epoch=29, loss=0.763]  

epoch:29, idx:6199/10845, loss:0.7627761986827658, acc:0.7759677419354839


 58%|█████▊    | 6300/10845 [56:21<36:50,  2.06it/s, acc=0.776, epoch=29, loss=0.763]

epoch:29, idx:6299/10845, loss:0.7630452659371353, acc:0.7761111111111111


 59%|█████▉    | 6400/10845 [57:15<30:11,  2.45it/s, acc=0.776, epoch=29, loss=0.764]  

epoch:29, idx:6399/10845, loss:0.7637022889195941, acc:0.77625


 60%|█████▉    | 6500/10845 [58:09<39:50,  1.82it/s, acc=0.776, epoch=29, loss=0.764]  

epoch:29, idx:6499/10845, loss:0.7643627008910363, acc:0.7758846153846154


 61%|██████    | 6600/10845 [59:07<29:27,  2.40it/s, acc=0.776, epoch=29, loss=0.763]  

epoch:29, idx:6599/10845, loss:0.7632849333841693, acc:0.7763257575757576


 62%|██████▏   | 6700/10845 [59:52<27:13,  2.54it/s, acc=0.776, epoch=29, loss=0.765]

epoch:29, idx:6699/10845, loss:0.765427837654281, acc:0.7759701492537313


 63%|██████▎   | 6800/10845 [1:00:37<31:46,  2.12it/s, acc=0.776, epoch=29, loss=0.765]

epoch:29, idx:6799/10845, loss:0.7651818204342442, acc:0.77625


 64%|██████▎   | 6900/10845 [1:01:22<26:38,  2.47it/s, acc=0.776, epoch=29, loss=0.769]

epoch:29, idx:6899/10845, loss:0.7687853453535101, acc:0.7759057971014492


 65%|██████▍   | 7000/10845 [1:02:03<26:22,  2.43it/s, acc=0.776, epoch=29, loss=0.77] 

epoch:29, idx:6999/10845, loss:0.7704222418133702, acc:0.7755714285714286


 65%|██████▌   | 7100/10845 [1:02:47<30:57,  2.02it/s, acc=0.776, epoch=29, loss=0.769]

epoch:29, idx:7099/10845, loss:0.7693561373189302, acc:0.7758450704225353


 66%|██████▋   | 7200/10845 [1:03:28<32:47,  1.85it/s, acc=0.776, epoch=29, loss=0.769]

epoch:29, idx:7199/10845, loss:0.7691901216304137, acc:0.7757291666666667


 67%|██████▋   | 7301/10845 [1:04:08<15:48,  3.74it/s, acc=0.775, epoch=29, loss=0.77] 

epoch:29, idx:7299/10845, loss:0.7700873078773283, acc:0.7753767123287671


 68%|██████▊   | 7400/10845 [1:04:49<29:03,  1.98it/s, acc=0.775, epoch=29, loss=0.771]

epoch:29, idx:7399/10845, loss:0.770964494337504, acc:0.7751013513513514


 69%|██████▉   | 7500/10845 [1:05:44<27:50,  2.00it/s, acc=0.775, epoch=29, loss=0.77] 

epoch:29, idx:7499/10845, loss:0.76984178677996, acc:0.7752666666666667


 70%|███████   | 7600/10845 [1:06:37<21:42,  2.49it/s, acc=0.775, epoch=29, loss=0.77] 

epoch:29, idx:7599/10845, loss:0.7700104022869154, acc:0.7752302631578948


 71%|███████   | 7701/10845 [1:07:23<15:51,  3.30it/s, acc=0.775, epoch=29, loss=0.771]

epoch:29, idx:7699/10845, loss:0.771444466901677, acc:0.7746753246753246


 72%|███████▏  | 7801/10845 [1:08:03<13:46,  3.69it/s, acc=0.774, epoch=29, loss=0.772]

epoch:29, idx:7799/10845, loss:0.77205111564543, acc:0.7743910256410257


 73%|███████▎  | 7901/10845 [1:08:41<18:31,  2.65it/s, acc=0.774, epoch=29, loss=0.771]

epoch:29, idx:7899/10845, loss:0.7711847131520132, acc:0.7744620253164557


 74%|███████▍  | 8000/10845 [1:09:24<22:56,  2.07it/s, acc=0.775, epoch=29, loss=0.77] 

epoch:29, idx:7999/10845, loss:0.7704258417319506, acc:0.7746875


 75%|███████▍  | 8100/10845 [1:10:08<18:16,  2.50it/s, acc=0.775, epoch=29, loss=0.77] 

epoch:29, idx:8099/10845, loss:0.7699322454116226, acc:0.7749382716049382


 76%|███████▌  | 8201/10845 [1:10:53<16:57,  2.60it/s, acc=0.775, epoch=29, loss=0.771]

epoch:29, idx:8199/10845, loss:0.7711665125282072, acc:0.7746646341463415


 77%|███████▋  | 8300/10845 [1:11:48<33:15,  1.28it/s, acc=0.775, epoch=29, loss=0.771]

epoch:29, idx:8299/10845, loss:0.7714097847008562, acc:0.7748493975903614


 77%|███████▋  | 8400/10845 [1:12:47<21:57,  1.86it/s, acc=0.775, epoch=29, loss=0.773]

epoch:29, idx:8399/10845, loss:0.7725930309845578, acc:0.7749107142857142


 78%|███████▊  | 8500/10845 [1:13:38<23:15,  1.68it/s, acc=0.775, epoch=29, loss=0.773]

epoch:29, idx:8499/10845, loss:0.7731670777710045, acc:0.7749411764705882


 79%|███████▉  | 8600/10845 [1:14:29<16:39,  2.25it/s, acc=0.775, epoch=29, loss=0.774]

epoch:29, idx:8599/10845, loss:0.773609189250788, acc:0.7748837209302326


 80%|████████  | 8700/10845 [1:15:23<17:10,  2.08it/s, acc=0.775, epoch=29, loss=0.774]

epoch:29, idx:8699/10845, loss:0.7741356681938144, acc:0.7747988505747126


 81%|████████  | 8800/10845 [1:16:16<22:06,  1.54it/s, acc=0.775, epoch=29, loss=0.773]

epoch:29, idx:8799/10845, loss:0.7730838840031489, acc:0.7748863636363637


 82%|████████▏ | 8900/10845 [1:17:10<15:33,  2.08it/s, acc=0.775, epoch=29, loss=0.773]

epoch:29, idx:8899/10845, loss:0.7733160763694329, acc:0.7747191011235955


 83%|████████▎ | 9000/10845 [1:18:07<24:06,  1.28it/s, acc=0.775, epoch=29, loss=0.774]

epoch:29, idx:8999/10845, loss:0.7740965813712941, acc:0.7746944444444445


 84%|████████▍ | 9100/10845 [1:19:04<20:04,  1.45it/s, acc=0.775, epoch=29, loss=0.774]

epoch:29, idx:9099/10845, loss:0.7740802941135653, acc:0.7746978021978022


 85%|████████▍ | 9200/10845 [1:19:59<15:05,  1.82it/s, acc=0.775, epoch=29, loss=0.775]

epoch:29, idx:9199/10845, loss:0.774521818962758, acc:0.7747282608695653


 86%|████████▌ | 9300/10845 [1:20:55<14:56,  1.72it/s, acc=0.775, epoch=29, loss=0.774]

epoch:29, idx:9299/10845, loss:0.7742032814714858, acc:0.7748387096774193


 87%|████████▋ | 9400/10845 [1:21:50<10:40,  2.26it/s, acc=0.775, epoch=29, loss=0.775]

epoch:29, idx:9399/10845, loss:0.7753836072426528, acc:0.7747074468085107


 88%|████████▊ | 9500/10845 [1:22:49<11:59,  1.87it/s, acc=0.774, epoch=29, loss=0.777]

epoch:29, idx:9499/10845, loss:0.7767527731453118, acc:0.7742105263157895


 89%|████████▊ | 9600/10845 [1:23:41<09:49,  2.11it/s, acc=0.774, epoch=29, loss=0.776]

epoch:29, idx:9599/10845, loss:0.7757775388766701, acc:0.7743489583333333


 89%|████████▉ | 9700/10845 [1:24:39<11:15,  1.70it/s, acc=0.774, epoch=29, loss=0.778]

epoch:29, idx:9699/10845, loss:0.7775980480353243, acc:0.7741237113402062


 90%|█████████ | 9800/10845 [1:25:33<09:46,  1.78it/s, acc=0.774, epoch=29, loss=0.776]

epoch:29, idx:9799/10845, loss:0.7764949628148152, acc:0.7743622448979591


 91%|█████████▏| 9900/10845 [1:26:24<08:44,  1.80it/s, acc=0.775, epoch=29, loss=0.776]

epoch:29, idx:9899/10845, loss:0.775686032204616, acc:0.7747222222222222


 92%|█████████▏| 10000/10845 [1:27:16<07:54,  1.78it/s, acc=0.775, epoch=29, loss=0.776]

epoch:29, idx:9999/10845, loss:0.7756810189709067, acc:0.77475


 93%|█████████▎| 10101/10845 [1:28:07<04:22,  2.83it/s, acc=0.774, epoch=29, loss=0.777]

epoch:29, idx:10099/10845, loss:0.7772234544880909, acc:0.7744059405940594


 94%|█████████▍| 10200/10845 [1:29:03<06:37,  1.62it/s, acc=0.775, epoch=29, loss=0.776]

epoch:29, idx:10199/10845, loss:0.7764559104907162, acc:0.774607843137255


 95%|█████████▍| 10300/10845 [1:29:59<04:10,  2.18it/s, acc=0.775, epoch=29, loss=0.776]

epoch:29, idx:10299/10845, loss:0.775913594739819, acc:0.7747330097087378


 96%|█████████▌| 10400/10845 [1:30:50<03:21,  2.21it/s, acc=0.775, epoch=29, loss=0.775]

epoch:29, idx:10399/10845, loss:0.7746734308895583, acc:0.7749519230769231


 97%|█████████▋| 10500/10845 [1:31:43<02:40,  2.15it/s, acc=0.775, epoch=29, loss=0.776]

epoch:29, idx:10499/10845, loss:0.7755244549044541, acc:0.7748571428571429


 98%|█████████▊| 10600/10845 [1:32:37<02:34,  1.59it/s, acc=0.775, epoch=29, loss=0.776]

epoch:29, idx:10599/10845, loss:0.7759172558545504, acc:0.7748584905660377


 99%|█████████▊| 10700/10845 [1:33:30<01:35,  1.52it/s, acc=0.775, epoch=29, loss=0.776]

epoch:29, idx:10699/10845, loss:0.7761639661975553, acc:0.7750233644859813


100%|█████████▉| 10800/10845 [1:34:26<00:18,  2.42it/s, acc=0.775, epoch=29, loss=0.777]

epoch:29, idx:10799/10845, loss:0.7771128470243679, acc:0.7746759259259259


100%|██████████| 10845/10845 [1:34:51<00:00,  1.90it/s, acc=0.775, epoch=29, loss=0.777]


epoch:29, idx:0/1275, loss:1.1723648309707642, acc:0.5
epoch:29, idx:100/1275, loss:1.2883905573646621, acc:0.6212871287128713
epoch:29, idx:200/1275, loss:1.1895672898387435, acc:0.6330845771144279
epoch:29, idx:300/1275, loss:1.202682658881444, acc:0.6362126245847176
epoch:29, idx:400/1275, loss:1.1861946688328597, acc:0.6396508728179551
epoch:29, idx:500/1275, loss:1.1823350089990687, acc:0.6427145708582834
epoch:29, idx:600/1275, loss:1.1835148898813372, acc:0.6426788685524126
epoch:29, idx:700/1275, loss:1.1986102678636341, acc:0.641583452211127
epoch:29, idx:800/1275, loss:1.2092184501789631, acc:0.6392009987515606
epoch:29, idx:900/1275, loss:1.1946997644832476, acc:0.6420643729189789
epoch:29, idx:1000/1275, loss:1.1913816314655823, acc:0.6443556443556444
epoch:29, idx:1100/1275, loss:1.1818281177441496, acc:0.6478201634877384
epoch:29, idx:1200/1275, loss:1.1810135027202937, acc:0.6484179850124896


  1%|          | 100/10845 [00:57<1:54:19,  1.57it/s, acc=0.795, epoch=30, loss=0.76]

epoch:30, idx:99/10845, loss:0.7596800380945206, acc:0.795


  2%|▏         | 200/10845 [01:48<1:24:07,  2.11it/s, acc=0.792, epoch=30, loss=0.758]

epoch:30, idx:199/10845, loss:0.758305955529213, acc:0.7925


  3%|▎         | 300/10845 [02:44<1:40:52,  1.74it/s, acc=0.797, epoch=30, loss=0.704]

epoch:30, idx:299/10845, loss:0.7040265015761058, acc:0.7966666666666666


  4%|▎         | 400/10845 [03:37<1:55:39,  1.51it/s, acc=0.79, epoch=30, loss=0.701] 

epoch:30, idx:399/10845, loss:0.7014466977864504, acc:0.79


  5%|▍         | 500/10845 [04:30<1:29:44,  1.92it/s, acc=0.784, epoch=30, loss=0.722]

epoch:30, idx:499/10845, loss:0.7224955143332481, acc:0.7845


  6%|▌         | 600/10845 [05:23<1:02:55,  2.71it/s, acc=0.788, epoch=30, loss=0.711]

epoch:30, idx:599/10845, loss:0.7105518064896266, acc:0.7879166666666667


  6%|▋         | 700/10845 [06:14<1:31:12,  1.85it/s, acc=0.786, epoch=30, loss=0.722]

epoch:30, idx:699/10845, loss:0.7215575076852526, acc:0.7857142857142857


  7%|▋         | 800/10845 [07:09<1:07:32,  2.48it/s, acc=0.784, epoch=30, loss=0.732]

epoch:30, idx:799/10845, loss:0.731655715033412, acc:0.784375


  8%|▊         | 900/10845 [08:04<1:23:57,  1.97it/s, acc=0.784, epoch=30, loss=0.735]

epoch:30, idx:899/10845, loss:0.7351626625988219, acc:0.7844444444444445


  9%|▉         | 1000/10845 [08:56<1:31:09,  1.80it/s, acc=0.788, epoch=30, loss=0.716]

epoch:30, idx:999/10845, loss:0.7156490905284881, acc:0.788


 10%|█         | 1100/10845 [09:48<1:47:16,  1.51it/s, acc=0.786, epoch=30, loss=0.723]

epoch:30, idx:1099/10845, loss:0.7228067135539922, acc:0.7863636363636364


 11%|█         | 1200/10845 [10:41<1:21:56,  1.96it/s, acc=0.787, epoch=30, loss=0.726]

epoch:30, idx:1199/10845, loss:0.726114849622051, acc:0.7866666666666666


 12%|█▏        | 1300/10845 [11:32<1:02:45,  2.53it/s, acc=0.79, epoch=30, loss=0.716] 

epoch:30, idx:1299/10845, loss:0.7156752925423475, acc:0.7903846153846154


 13%|█▎        | 1400/10845 [12:26<1:29:00,  1.77it/s, acc=0.788, epoch=30, loss=0.721]

epoch:30, idx:1399/10845, loss:0.7213428833442075, acc:0.7883928571428571


 14%|█▍        | 1500/10845 [13:20<1:17:39,  2.01it/s, acc=0.789, epoch=30, loss=0.722]

epoch:30, idx:1499/10845, loss:0.7223612758914629, acc:0.7893333333333333


 15%|█▍        | 1600/10845 [14:18<1:05:01,  2.37it/s, acc=0.79, epoch=30, loss=0.718] 

epoch:30, idx:1599/10845, loss:0.7184887817688286, acc:0.78984375


 16%|█▌        | 1700/10845 [15:10<1:12:25,  2.10it/s, acc=0.791, epoch=30, loss=0.718]

epoch:30, idx:1699/10845, loss:0.7183050446124638, acc:0.7905882352941176


 17%|█▋        | 1800/10845 [16:01<48:52,  3.08it/s, acc=0.792, epoch=30, loss=0.717]  

epoch:30, idx:1799/10845, loss:0.7166999392873711, acc:0.7918055555555555


 18%|█▊        | 1900/10845 [16:58<1:26:41,  1.72it/s, acc=0.792, epoch=30, loss=0.717]

epoch:30, idx:1899/10845, loss:0.7167032995820045, acc:0.7917105263157894


 18%|█▊        | 2000/10845 [17:48<1:05:42,  2.24it/s, acc=0.791, epoch=30, loss=0.721]

epoch:30, idx:1999/10845, loss:0.7208728794902564, acc:0.79075


 19%|█▉        | 2100/10845 [18:39<1:07:22,  2.16it/s, acc=0.792, epoch=30, loss=0.723]

epoch:30, idx:2099/10845, loss:0.7228882181786356, acc:0.791547619047619


 20%|██        | 2200/10845 [19:36<1:03:26,  2.27it/s, acc=0.79, epoch=30, loss=0.73]  

epoch:30, idx:2199/10845, loss:0.730280663791028, acc:0.7898863636363637


 21%|██        | 2300/10845 [20:30<1:15:42,  1.88it/s, acc=0.791, epoch=30, loss=0.73] 

epoch:30, idx:2299/10845, loss:0.7295461495933325, acc:0.7908695652173913


 22%|██▏       | 2400/10845 [21:24<1:07:25,  2.09it/s, acc=0.791, epoch=30, loss=0.731]

epoch:30, idx:2399/10845, loss:0.7312815624848008, acc:0.7909375


 23%|██▎       | 2500/10845 [22:18<58:33,  2.37it/s, acc=0.789, epoch=30, loss=0.738]  

epoch:30, idx:2499/10845, loss:0.7376708144307137, acc:0.7894


 24%|██▍       | 2600/10845 [23:10<1:05:35,  2.10it/s, acc=0.79, epoch=30, loss=0.735] 

epoch:30, idx:2599/10845, loss:0.7349501385711706, acc:0.7896153846153846


 25%|██▍       | 2700/10845 [24:04<1:04:30,  2.10it/s, acc=0.789, epoch=30, loss=0.737]

epoch:30, idx:2699/10845, loss:0.7366808231671651, acc:0.7894444444444444


 26%|██▌       | 2800/10845 [24:57<1:13:23,  1.83it/s, acc=0.789, epoch=30, loss=0.738]

epoch:30, idx:2799/10845, loss:0.7381416417019708, acc:0.7891071428571429


 27%|██▋       | 2900/10845 [25:46<49:57,  2.65it/s, acc=0.788, epoch=30, loss=0.738]  

epoch:30, idx:2899/10845, loss:0.7378440240744887, acc:0.788448275862069


 28%|██▊       | 3000/10845 [26:43<1:14:13,  1.76it/s, acc=0.788, epoch=30, loss=0.741]

epoch:30, idx:2999/10845, loss:0.7407499449451764, acc:0.7879166666666667


 29%|██▊       | 3100/10845 [27:32<1:05:00,  1.99it/s, acc=0.787, epoch=30, loss=0.743]

epoch:30, idx:3099/10845, loss:0.7432232047665503, acc:0.787016129032258


 30%|██▉       | 3200/10845 [28:25<53:49,  2.37it/s, acc=0.786, epoch=30, loss=0.744]  

epoch:30, idx:3199/10845, loss:0.7443710598489269, acc:0.786328125


 30%|███       | 3300/10845 [29:15<47:04,  2.67it/s, acc=0.786, epoch=30, loss=0.745]  

epoch:30, idx:3299/10845, loss:0.7454406873339956, acc:0.7863636363636364


 31%|███▏      | 3400/10845 [30:13<1:05:24,  1.90it/s, acc=0.786, epoch=30, loss=0.748]

epoch:30, idx:3399/10845, loss:0.748123898887459, acc:0.7860294117647059


 32%|███▏      | 3500/10845 [31:07<1:10:01,  1.75it/s, acc=0.786, epoch=30, loss=0.753]

epoch:30, idx:3499/10845, loss:0.752639435917139, acc:0.7857857142857143


 33%|███▎      | 3600/10845 [31:58<1:03:09,  1.91it/s, acc=0.784, epoch=30, loss=0.757]

epoch:30, idx:3599/10845, loss:0.7565085129398439, acc:0.784375


 34%|███▍      | 3700/10845 [32:48<52:27,  2.27it/s, acc=0.784, epoch=30, loss=0.757]  

epoch:30, idx:3699/10845, loss:0.7569464349062056, acc:0.784054054054054


 35%|███▌      | 3800/10845 [33:42<58:37,  2.00it/s, acc=0.783, epoch=30, loss=0.755]  

epoch:30, idx:3799/10845, loss:0.7552319800343953, acc:0.7834868421052632


 36%|███▌      | 3900/10845 [34:36<58:14,  1.99it/s, acc=0.783, epoch=30, loss=0.758]  

epoch:30, idx:3899/10845, loss:0.7577735733489196, acc:0.7827564102564103


 37%|███▋      | 4000/10845 [35:28<34:53,  3.27it/s, acc=0.782, epoch=30, loss=0.76]   

epoch:30, idx:3999/10845, loss:0.7598990532122553, acc:0.78225


 38%|███▊      | 4100/10845 [36:24<1:02:55,  1.79it/s, acc=0.782, epoch=30, loss=0.757]

epoch:30, idx:4099/10845, loss:0.7568774254692764, acc:0.7824390243902439


 39%|███▊      | 4200/10845 [37:20<1:02:31,  1.77it/s, acc=0.783, epoch=30, loss=0.754]

epoch:30, idx:4199/10845, loss:0.7539373944451412, acc:0.7828571428571428


 40%|███▉      | 4300/10845 [38:15<1:05:36,  1.66it/s, acc=0.783, epoch=30, loss=0.755]

epoch:30, idx:4299/10845, loss:0.7548079850056837, acc:0.7826744186046511


 41%|████      | 4400/10845 [39:08<1:18:53,  1.36it/s, acc=0.782, epoch=30, loss=0.756]

epoch:30, idx:4399/10845, loss:0.7563885349949653, acc:0.7825


 41%|████▏     | 4500/10845 [39:57<49:12,  2.15it/s, acc=0.783, epoch=30, loss=0.755]  

epoch:30, idx:4499/10845, loss:0.754908946947919, acc:0.7828888888888889


 42%|████▏     | 4600/10845 [40:54<1:01:56,  1.68it/s, acc=0.783, epoch=30, loss=0.754]

epoch:30, idx:4599/10845, loss:0.753756072142202, acc:0.7828260869565218


 43%|████▎     | 4700/10845 [41:47<1:07:34,  1.52it/s, acc=0.783, epoch=30, loss=0.753]

epoch:30, idx:4699/10845, loss:0.7527385683230897, acc:0.7831382978723405


 44%|████▍     | 4800/10845 [42:41<1:02:56,  1.60it/s, acc=0.783, epoch=30, loss=0.752]

epoch:30, idx:4799/10845, loss:0.7521746198926121, acc:0.7827604166666666


 45%|████▌     | 4900/10845 [43:35<51:10,  1.94it/s, acc=0.784, epoch=30, loss=0.75]   

epoch:30, idx:4899/10845, loss:0.7495960856061809, acc:0.7838265306122449


 46%|████▌     | 5000/10845 [44:28<46:24,  2.10it/s, acc=0.783, epoch=30, loss=0.752]  

epoch:30, idx:4999/10845, loss:0.7524842300027609, acc:0.78315


 47%|████▋     | 5101/10845 [45:24<46:40,  2.05it/s, acc=0.783, epoch=30, loss=0.751]  

epoch:30, idx:5099/10845, loss:0.7510017693481025, acc:0.7833823529411764


 48%|████▊     | 5200/10845 [46:17<1:14:55,  1.26it/s, acc=0.783, epoch=30, loss=0.751]

epoch:30, idx:5199/10845, loss:0.7509098372407831, acc:0.7832211538461539


 49%|████▉     | 5300/10845 [47:12<56:16,  1.64it/s, acc=0.783, epoch=30, loss=0.751]  

epoch:30, idx:5299/10845, loss:0.7511840860995481, acc:0.7828301886792453


 50%|████▉     | 5400/10845 [48:02<39:09,  2.32it/s, acc=0.783, epoch=30, loss=0.752]  

epoch:30, idx:5399/10845, loss:0.7517287312365241, acc:0.7827314814814815


 51%|█████     | 5500/10845 [48:58<50:05,  1.78it/s, acc=0.782, epoch=30, loss=0.756]  

epoch:30, idx:5499/10845, loss:0.7557629000788385, acc:0.7819090909090909


 52%|█████▏    | 5600/10845 [49:52<41:34,  2.10it/s, acc=0.782, epoch=30, loss=0.753]  

epoch:30, idx:5599/10845, loss:0.753000749591738, acc:0.7824553571428572


 53%|█████▎    | 5700/10845 [50:44<33:51,  2.53it/s, acc=0.782, epoch=30, loss=0.754]  

epoch:30, idx:5699/10845, loss:0.7543098294290534, acc:0.782061403508772


 53%|█████▎    | 5800/10845 [51:38<52:29,  1.60it/s, acc=0.783, epoch=30, loss=0.753]  

epoch:30, idx:5799/10845, loss:0.7531269495523182, acc:0.7826293103448276


 54%|█████▍    | 5900/10845 [52:32<47:12,  1.75it/s, acc=0.783, epoch=30, loss=0.752]  

epoch:30, idx:5899/10845, loss:0.7522280771828304, acc:0.7827118644067796


 55%|█████▌    | 6000/10845 [53:23<40:32,  1.99it/s, acc=0.782, epoch=30, loss=0.756]

epoch:30, idx:5999/10845, loss:0.7556828419044613, acc:0.7825


 56%|█████▌    | 6100/10845 [54:18<49:56,  1.58it/s, acc=0.782, epoch=30, loss=0.756]  

epoch:30, idx:6099/10845, loss:0.7564990737863252, acc:0.7823360655737704


 57%|█████▋    | 6200/10845 [55:09<50:36,  1.53it/s, acc=0.782, epoch=30, loss=0.756]  

epoch:30, idx:6199/10845, loss:0.7562994350324715, acc:0.7822983870967742


 58%|█████▊    | 6300/10845 [56:00<38:12,  1.98it/s, acc=0.782, epoch=30, loss=0.756]  

epoch:30, idx:6299/10845, loss:0.7556217972130056, acc:0.7825


 59%|█████▉    | 6400/10845 [56:56<41:44,  1.77it/s, acc=0.782, epoch=30, loss=0.757]

epoch:30, idx:6399/10845, loss:0.7573256341996603, acc:0.7824609375


 60%|█████▉    | 6500/10845 [57:51<49:32,  1.46it/s, acc=0.782, epoch=30, loss=0.758]

epoch:30, idx:6499/10845, loss:0.7584984810512799, acc:0.782


 61%|██████    | 6600/10845 [58:44<39:47,  1.78it/s, acc=0.781, epoch=30, loss=0.762]  

epoch:30, idx:6599/10845, loss:0.7616873964373813, acc:0.7810984848484849


 62%|██████▏   | 6700/10845 [59:38<47:26,  1.46it/s, acc=0.781, epoch=30, loss=0.763]

epoch:30, idx:6699/10845, loss:0.7625051280700449, acc:0.7809701492537313


 63%|██████▎   | 6800/10845 [1:00:32<37:24,  1.80it/s, acc=0.78, epoch=30, loss=0.763] 

epoch:30, idx:6799/10845, loss:0.7627583163691795, acc:0.7804779411764706


 64%|██████▎   | 6900/10845 [1:01:27<31:31,  2.09it/s, acc=0.78, epoch=30, loss=0.764]  

epoch:30, idx:6899/10845, loss:0.7638179371300815, acc:0.7798188405797102


 65%|██████▍   | 7000/10845 [1:02:22<35:18,  1.82it/s, acc=0.78, epoch=30, loss=0.764]

epoch:30, idx:6999/10845, loss:0.7637559323800461, acc:0.7798571428571428


 65%|██████▌   | 7100/10845 [1:03:20<43:55,  1.42it/s, acc=0.78, epoch=30, loss=0.761]

epoch:30, idx:7099/10845, loss:0.7612998519544031, acc:0.7802464788732394


 66%|██████▋   | 7200/10845 [1:04:12<33:09,  1.83it/s, acc=0.781, epoch=30, loss=0.76]

epoch:30, idx:7199/10845, loss:0.7595856102949216, acc:0.7806597222222222


 67%|██████▋   | 7300/10845 [1:05:08<28:04,  2.10it/s, acc=0.781, epoch=30, loss=0.758]

epoch:30, idx:7299/10845, loss:0.7577758660769626, acc:0.7809931506849315


 68%|██████▊   | 7400/10845 [1:06:02<25:39,  2.24it/s, acc=0.78, epoch=30, loss=0.76]  

epoch:30, idx:7399/10845, loss:0.7596290913767911, acc:0.780472972972973


 69%|██████▉   | 7500/10845 [1:06:52<30:18,  1.84it/s, acc=0.78, epoch=30, loss=0.76]  

epoch:30, idx:7499/10845, loss:0.7603371021886667, acc:0.7804


 70%|███████   | 7600/10845 [1:07:49<36:32,  1.48it/s, acc=0.78, epoch=30, loss=0.761]

epoch:30, idx:7599/10845, loss:0.760850283909393, acc:0.7803947368421053


 71%|███████   | 7700/10845 [1:08:42<23:06,  2.27it/s, acc=0.78, epoch=30, loss=0.761]

epoch:30, idx:7699/10845, loss:0.7605980218443777, acc:0.780422077922078


 72%|███████▏  | 7800/10845 [1:09:36<30:36,  1.66it/s, acc=0.78, epoch=30, loss=0.761]

epoch:30, idx:7799/10845, loss:0.7607785117607087, acc:0.7802564102564102


 73%|███████▎  | 7900/10845 [1:10:30<25:33,  1.92it/s, acc=0.78, epoch=30, loss=0.761]

epoch:30, idx:7899/10845, loss:0.7613436983412579, acc:0.7800632911392406


 74%|███████▍  | 8000/10845 [1:11:26<25:21,  1.87it/s, acc=0.781, epoch=30, loss=0.759]

epoch:30, idx:7999/10845, loss:0.7591333324518055, acc:0.78053125


 75%|███████▍  | 8100/10845 [1:12:21<24:30,  1.87it/s, acc=0.78, epoch=30, loss=0.76]  

epoch:30, idx:8099/10845, loss:0.7603132161001365, acc:0.7803395061728395


 76%|███████▌  | 8200/10845 [1:13:18<21:13,  2.08it/s, acc=0.781, epoch=30, loss=0.759]

epoch:30, idx:8199/10845, loss:0.7585432441914227, acc:0.780579268292683


 77%|███████▋  | 8300/10845 [1:14:14<29:20,  1.45it/s, acc=0.78, epoch=30, loss=0.76]  

epoch:30, idx:8299/10845, loss:0.759722707355956, acc:0.7803313253012049


 77%|███████▋  | 8400/10845 [1:15:08<23:13,  1.75it/s, acc=0.781, epoch=30, loss=0.759]

epoch:30, idx:8399/10845, loss:0.7585973154558312, acc:0.7805654761904762


 78%|███████▊  | 8500/10845 [1:16:02<20:38,  1.89it/s, acc=0.781, epoch=30, loss=0.759]

epoch:30, idx:8499/10845, loss:0.7585129388430539, acc:0.7805882352941177


 79%|███████▉  | 8600/10845 [1:16:53<17:21,  2.16it/s, acc=0.781, epoch=30, loss=0.759]

epoch:30, idx:8599/10845, loss:0.7592585435440374, acc:0.7805232558139535


 80%|████████  | 8700/10845 [1:17:49<20:34,  1.74it/s, acc=0.78, epoch=30, loss=0.76]  

epoch:30, idx:8699/10845, loss:0.7600418241750235, acc:0.7801436781609196


 81%|████████  | 8800/10845 [1:18:46<29:11,  1.17it/s, acc=0.78, epoch=30, loss=0.759]

epoch:30, idx:8799/10845, loss:0.7594701827317476, acc:0.7803977272727273


 82%|████████▏ | 8900/10845 [1:19:40<18:13,  1.78it/s, acc=0.781, epoch=30, loss=0.76] 

epoch:30, idx:8899/10845, loss:0.759739877387379, acc:0.7805337078651685


 83%|████████▎ | 9000/10845 [1:20:34<13:00,  2.36it/s, acc=0.78, epoch=30, loss=0.762]

epoch:30, idx:8999/10845, loss:0.7618244118955401, acc:0.78025


 84%|████████▍ | 9100/10845 [1:21:25<15:17,  1.90it/s, acc=0.78, epoch=30, loss=0.763]

epoch:30, idx:9099/10845, loss:0.7629920403970467, acc:0.7800824175824176


 85%|████████▍ | 9200/10845 [1:22:20<11:43,  2.34it/s, acc=0.78, epoch=30, loss=0.763]

epoch:30, idx:9199/10845, loss:0.7633886919397375, acc:0.7802445652173913


 86%|████████▌ | 9300/10845 [1:23:13<13:43,  1.88it/s, acc=0.78, epoch=30, loss=0.764]

epoch:30, idx:9299/10845, loss:0.7640654186343634, acc:0.7801075268817205


 87%|████████▋ | 9400/10845 [1:24:04<13:50,  1.74it/s, acc=0.781, epoch=30, loss=0.763]

epoch:30, idx:9399/10845, loss:0.7633208627396442, acc:0.7805319148936171


 88%|████████▊ | 9500/10845 [1:24:57<14:59,  1.50it/s, acc=0.781, epoch=30, loss=0.763]

epoch:30, idx:9499/10845, loss:0.762703019725649, acc:0.7806315789473685


 89%|████████▊ | 9600/10845 [1:25:50<10:09,  2.04it/s, acc=0.78, epoch=30, loss=0.762] 

epoch:30, idx:9599/10845, loss:0.7624253738981982, acc:0.7804947916666667


 89%|████████▉ | 9700/10845 [1:26:45<13:40,  1.40it/s, acc=0.781, epoch=30, loss=0.762]

epoch:30, idx:9699/10845, loss:0.7620364594643878, acc:0.7806958762886598


 90%|█████████ | 9800/10845 [1:27:36<07:16,  2.39it/s, acc=0.78, epoch=30, loss=0.762] 

epoch:30, idx:9799/10845, loss:0.7624548449504133, acc:0.7804336734693877


 91%|█████████▏| 9901/10845 [1:28:32<07:08,  2.20it/s, acc=0.78, epoch=30, loss=0.762]

epoch:30, idx:9899/10845, loss:0.7624858518682345, acc:0.7803282828282828


 92%|█████████▏| 10000/10845 [1:29:28<06:23,  2.20it/s, acc=0.78, epoch=30, loss=0.763]

epoch:30, idx:9999/10845, loss:0.7632685608744622, acc:0.78005


 93%|█████████▎| 10100/10845 [1:30:17<05:34,  2.23it/s, acc=0.78, epoch=30, loss=0.763]

epoch:30, idx:10099/10845, loss:0.7634689358673473, acc:0.7800495049504951


 94%|█████████▍| 10200/10845 [1:31:14<05:34,  1.93it/s, acc=0.78, epoch=30, loss=0.763]

epoch:30, idx:10199/10845, loss:0.762991249759992, acc:0.7801960784313725


 95%|█████████▍| 10300/10845 [1:32:09<06:26,  1.41it/s, acc=0.781, epoch=30, loss=0.761]

epoch:30, idx:10299/10845, loss:0.7613354774701943, acc:0.7805825242718447


 96%|█████████▌| 10400/10845 [1:33:02<03:10,  2.34it/s, acc=0.781, epoch=30, loss=0.762]

epoch:30, idx:10399/10845, loss:0.7616316109265272, acc:0.7805288461538461


 97%|█████████▋| 10500/10845 [1:33:51<02:23,  2.40it/s, acc=0.781, epoch=30, loss=0.762]

epoch:30, idx:10499/10845, loss:0.7615003641900562, acc:0.7806190476190477


 98%|█████████▊| 10600/10845 [1:34:28<01:08,  3.60it/s, acc=0.78, epoch=30, loss=0.763] 

epoch:30, idx:10599/10845, loss:0.7630045818663993, acc:0.7804009433962265


 99%|█████████▊| 10700/10845 [1:35:10<01:11,  2.01it/s, acc=0.781, epoch=30, loss=0.763]

epoch:30, idx:10699/10845, loss:0.7628933498179801, acc:0.78053738317757


100%|█████████▉| 10800/10845 [1:35:50<00:12,  3.65it/s, acc=0.78, epoch=30, loss=0.765] 

epoch:30, idx:10799/10845, loss:0.764668157680167, acc:0.7802777777777777


100%|██████████| 10845/10845 [1:36:07<00:00,  3.40it/s, acc=0.78, epoch=30, loss=0.765]


epoch:30, idx:0/1275, loss:1.1113241910934448, acc:0.5
epoch:30, idx:100/1275, loss:1.2621976927955552, acc:0.6287128712871287
epoch:30, idx:200/1275, loss:1.1892761215938263, acc:0.6393034825870647
epoch:30, idx:300/1275, loss:1.1975441271086469, acc:0.6420265780730897
epoch:30, idx:400/1275, loss:1.1677544600797116, acc:0.6477556109725686
epoch:30, idx:500/1275, loss:1.1633233969915888, acc:0.6501996007984032
epoch:30, idx:600/1275, loss:1.1680445556533514, acc:0.6476705490848585
epoch:30, idx:700/1275, loss:1.1841227561447998, acc:0.644793152639087
epoch:30, idx:800/1275, loss:1.196909579184469, acc:0.6416978776529338
epoch:30, idx:900/1275, loss:1.185605998110692, acc:0.6453940066592675
epoch:30, idx:1000/1275, loss:1.1824867218048065, acc:0.6471028971028971
epoch:30, idx:1100/1275, loss:1.1749781691735708, acc:0.6489554950045413
epoch:30, idx:1200/1275, loss:1.1733156195488896, acc:0.6492506244796004


  1%|          | 100/10845 [00:56<1:20:02,  2.24it/s, acc=0.745, epoch=31, loss=0.779]

epoch:31, idx:99/10845, loss:0.7793326544761657, acc:0.745


  2%|▏         | 200/10845 [01:50<46:26,  3.82it/s, acc=0.757, epoch=31, loss=0.764]  

epoch:31, idx:199/10845, loss:0.7642625418305397, acc:0.7575


  3%|▎         | 301/10845 [02:43<1:16:51,  2.29it/s, acc=0.775, epoch=31, loss=0.725]

epoch:31, idx:299/10845, loss:0.727040730714798, acc:0.7741666666666667


  4%|▎         | 400/10845 [03:39<2:12:20,  1.32it/s, acc=0.783, epoch=31, loss=0.698]

epoch:31, idx:399/10845, loss:0.6983452515304088, acc:0.783125


  5%|▍         | 500/10845 [04:34<1:30:02,  1.91it/s, acc=0.783, epoch=31, loss=0.712]

epoch:31, idx:499/10845, loss:0.7124264295101166, acc:0.783


  6%|▌         | 600/10845 [05:28<1:25:21,  2.00it/s, acc=0.792, epoch=31, loss=0.7]  

epoch:31, idx:599/10845, loss:0.6995469724138578, acc:0.7920833333333334


  6%|▋         | 700/10845 [06:18<1:36:15,  1.76it/s, acc=0.794, epoch=31, loss=0.712]

epoch:31, idx:699/10845, loss:0.7121012065240315, acc:0.7935714285714286


  7%|▋         | 800/10845 [07:06<1:21:03,  2.07it/s, acc=0.794, epoch=31, loss=0.706]

epoch:31, idx:799/10845, loss:0.7060081647336482, acc:0.7940625


  8%|▊         | 900/10845 [07:57<1:11:22,  2.32it/s, acc=0.794, epoch=31, loss=0.71] 

epoch:31, idx:899/10845, loss:0.7101849073171616, acc:0.7936111111111112


  9%|▉         | 1000/10845 [08:51<1:51:40,  1.47it/s, acc=0.79, epoch=31, loss=0.717]

epoch:31, idx:999/10845, loss:0.7173810077309608, acc:0.79


 10%|█         | 1100/10845 [09:44<1:23:54,  1.94it/s, acc=0.788, epoch=31, loss=0.725]

epoch:31, idx:1099/10845, loss:0.7245460217107426, acc:0.7881818181818182


 11%|█         | 1200/10845 [10:34<1:00:17,  2.67it/s, acc=0.787, epoch=31, loss=0.726]

epoch:31, idx:1199/10845, loss:0.7257021517554919, acc:0.7875


 12%|█▏        | 1300/10845 [11:29<1:35:25,  1.67it/s, acc=0.789, epoch=31, loss=0.72] 

epoch:31, idx:1299/10845, loss:0.7198778113035055, acc:0.7892307692307692


 13%|█▎        | 1400/10845 [12:19<1:40:25,  1.57it/s, acc=0.787, epoch=31, loss=0.727]

epoch:31, idx:1399/10845, loss:0.7269134137885911, acc:0.7866071428571428


 14%|█▍        | 1500/10845 [13:14<1:28:16,  1.76it/s, acc=0.786, epoch=31, loss=0.733]

epoch:31, idx:1499/10845, loss:0.7328083260854086, acc:0.7861666666666667


 15%|█▍        | 1600/10845 [14:12<1:06:32,  2.32it/s, acc=0.786, epoch=31, loss=0.734]

epoch:31, idx:1599/10845, loss:0.7344362838566303, acc:0.78578125


 16%|█▌        | 1700/10845 [15:08<1:26:26,  1.76it/s, acc=0.786, epoch=31, loss=0.734]

epoch:31, idx:1699/10845, loss:0.7341807141724754, acc:0.7861764705882353


 17%|█▋        | 1800/10845 [16:03<1:10:01,  2.15it/s, acc=0.784, epoch=31, loss=0.737]

epoch:31, idx:1799/10845, loss:0.7370688924524519, acc:0.7843055555555556


 18%|█▊        | 1900/10845 [16:54<1:25:53,  1.74it/s, acc=0.785, epoch=31, loss=0.734]

epoch:31, idx:1899/10845, loss:0.7342886898235271, acc:0.7846052631578947


 18%|█▊        | 2000/10845 [17:47<52:47,  2.79it/s, acc=0.787, epoch=31, loss=0.731]  

epoch:31, idx:1999/10845, loss:0.7308972144573926, acc:0.786875


 19%|█▉        | 2100/10845 [18:39<1:14:24,  1.96it/s, acc=0.788, epoch=31, loss=0.729]

epoch:31, idx:2099/10845, loss:0.728867479236353, acc:0.7876190476190477


 20%|██        | 2200/10845 [19:32<1:12:35,  1.98it/s, acc=0.787, epoch=31, loss=0.729]

epoch:31, idx:2199/10845, loss:0.7293777650188316, acc:0.7873863636363636


 21%|██        | 2300/10845 [20:30<1:32:34,  1.54it/s, acc=0.789, epoch=31, loss=0.726]

epoch:31, idx:2299/10845, loss:0.7255518539962561, acc:0.7890217391304348


 22%|██▏       | 2400/10845 [21:20<1:36:27,  1.46it/s, acc=0.789, epoch=31, loss=0.724]

epoch:31, idx:2399/10845, loss:0.7240202232574423, acc:0.7894791666666666


 23%|██▎       | 2500/10845 [22:11<1:00:55,  2.28it/s, acc=0.79, epoch=31, loss=0.722] 

epoch:31, idx:2499/10845, loss:0.7224233942747116, acc:0.7897


 24%|██▍       | 2600/10845 [23:06<1:19:20,  1.73it/s, acc=0.789, epoch=31, loss=0.726]

epoch:31, idx:2599/10845, loss:0.725959868476941, acc:0.7890384615384616


 25%|██▍       | 2700/10845 [23:44<50:47,  2.67it/s, acc=0.789, epoch=31, loss=0.728]  

epoch:31, idx:2699/10845, loss:0.7283984295306383, acc:0.7887037037037037


 26%|██▌       | 2801/10845 [24:26<38:55,  3.44it/s, acc=0.788, epoch=31, loss=0.731]  

epoch:31, idx:2799/10845, loss:0.7308064069492476, acc:0.7880357142857143


 27%|██▋       | 2900/10845 [25:03<38:38,  3.43it/s, acc=0.788, epoch=31, loss=0.731]  

epoch:31, idx:2899/10845, loss:0.7313618408400437, acc:0.7881896551724138


 28%|██▊       | 3000/10845 [25:43<41:59,  3.11it/s, acc=0.789, epoch=31, loss=0.731]  

epoch:31, idx:2999/10845, loss:0.7305956596930822, acc:0.789


 29%|██▊       | 3100/10845 [26:22<36:12,  3.56it/s, acc=0.789, epoch=31, loss=0.73]   

epoch:31, idx:3099/10845, loss:0.7301701967562398, acc:0.7893548387096774


 30%|██▉       | 3200/10845 [27:02<59:11,  2.15it/s, acc=0.789, epoch=31, loss=0.728]  

epoch:31, idx:3199/10845, loss:0.7278221142617985, acc:0.789453125


 30%|███       | 3300/10845 [27:52<1:22:01,  1.53it/s, acc=0.79, epoch=31, loss=0.724]

epoch:31, idx:3299/10845, loss:0.724048468538306, acc:0.7903030303030303


 31%|███▏      | 3400/10845 [28:46<1:26:46,  1.43it/s, acc=0.789, epoch=31, loss=0.725]

epoch:31, idx:3399/10845, loss:0.7247635832560413, acc:0.7893382352941176


 32%|███▏      | 3500/10845 [29:44<1:14:29,  1.64it/s, acc=0.789, epoch=31, loss=0.726]

epoch:31, idx:3499/10845, loss:0.7256283592624324, acc:0.7895


 33%|███▎      | 3600/10845 [30:34<55:02,  2.19it/s, acc=0.79, epoch=31, loss=0.725]   

epoch:31, idx:3599/10845, loss:0.7254375784719984, acc:0.7897222222222222


 34%|███▍      | 3700/10845 [31:26<48:45,  2.44it/s, acc=0.79, epoch=31, loss=0.728]   

epoch:31, idx:3699/10845, loss:0.7278523332967951, acc:0.7895945945945946


 35%|███▌      | 3800/10845 [32:18<43:09,  2.72it/s, acc=0.79, epoch=31, loss=0.725]   

epoch:31, idx:3799/10845, loss:0.7252981315278694, acc:0.7897368421052632


 36%|███▌      | 3900/10845 [33:13<1:21:19,  1.42it/s, acc=0.79, epoch=31, loss=0.727] 

epoch:31, idx:3899/10845, loss:0.7271298666451221, acc:0.7896794871794872


 37%|███▋      | 4000/10845 [34:10<1:19:53,  1.43it/s, acc=0.789, epoch=31, loss=0.729]

epoch:31, idx:3999/10845, loss:0.7292525458447635, acc:0.789375


 38%|███▊      | 4100/10845 [35:05<1:13:09,  1.54it/s, acc=0.789, epoch=31, loss=0.729]

epoch:31, idx:4099/10845, loss:0.7285619965522755, acc:0.7893292682926829


 39%|███▊      | 4200/10845 [35:58<46:19,  2.39it/s, acc=0.789, epoch=31, loss=0.729]  

epoch:31, idx:4199/10845, loss:0.7288085134220975, acc:0.7892261904761905


 40%|███▉      | 4300/10845 [36:50<48:04,  2.27it/s, acc=0.789, epoch=31, loss=0.728]  

epoch:31, idx:4299/10845, loss:0.7281898907000243, acc:0.7893023255813953


 41%|████      | 4400/10845 [37:48<50:48,  2.11it/s, acc=0.789, epoch=31, loss=0.729]  

epoch:31, idx:4399/10845, loss:0.7287639670988375, acc:0.7893181818181818


 41%|████▏     | 4500/10845 [38:41<44:45,  2.36it/s, acc=0.789, epoch=31, loss=0.729]  

epoch:31, idx:4499/10845, loss:0.7286647923489412, acc:0.7889444444444444


 42%|████▏     | 4600/10845 [39:36<1:09:29,  1.50it/s, acc=0.789, epoch=31, loss=0.731]

epoch:31, idx:4599/10845, loss:0.7307548107757517, acc:0.788695652173913


 43%|████▎     | 4700/10845 [40:28<43:50,  2.34it/s, acc=0.788, epoch=31, loss=0.734]  

epoch:31, idx:4699/10845, loss:0.7343781925357402, acc:0.7879255319148936


 44%|████▍     | 4800/10845 [41:23<1:22:29,  1.22it/s, acc=0.787, epoch=31, loss=0.736]

epoch:31, idx:4799/10845, loss:0.736030059925591, acc:0.7867708333333333


 45%|████▌     | 4900/10845 [42:14<47:58,  2.07it/s, acc=0.787, epoch=31, loss=0.735]  

epoch:31, idx:4899/10845, loss:0.7345768834589695, acc:0.7873469387755102


 46%|████▌     | 5000/10845 [43:02<56:09,  1.73it/s, acc=0.787, epoch=31, loss=0.735]  

epoch:31, idx:4999/10845, loss:0.7351681866496801, acc:0.7871


 47%|████▋     | 5100/10845 [43:48<42:17,  2.26it/s, acc=0.787, epoch=31, loss=0.737]  

epoch:31, idx:5099/10845, loss:0.7370820609348662, acc:0.7869117647058823


 48%|████▊     | 5200/10845 [44:37<1:03:22,  1.48it/s, acc=0.787, epoch=31, loss=0.737]

epoch:31, idx:5199/10845, loss:0.7369374455253666, acc:0.7871153846153847


 49%|████▉     | 5300/10845 [45:29<1:05:02,  1.42it/s, acc=0.787, epoch=31, loss=0.737]

epoch:31, idx:5299/10845, loss:0.7367846506805915, acc:0.7873584905660377


 50%|████▉     | 5400/10845 [46:20<55:40,  1.63it/s, acc=0.787, epoch=31, loss=0.736]  

epoch:31, idx:5399/10845, loss:0.7362190133636748, acc:0.7875


 51%|█████     | 5500/10845 [47:09<33:16,  2.68it/s, acc=0.787, epoch=31, loss=0.738]  

epoch:31, idx:5499/10845, loss:0.7380827269635417, acc:0.7873181818181818


 52%|█████▏    | 5600/10845 [47:59<36:11,  2.42it/s, acc=0.787, epoch=31, loss=0.738]  

epoch:31, idx:5599/10845, loss:0.7382060402711588, acc:0.7871875


 53%|█████▎    | 5700/10845 [48:50<52:53,  1.62it/s, acc=0.787, epoch=31, loss=0.739]

epoch:31, idx:5699/10845, loss:0.7391100840897936, acc:0.7869298245614035


 53%|█████▎    | 5800/10845 [49:41<52:54,  1.59it/s, acc=0.787, epoch=31, loss=0.741]

epoch:31, idx:5799/10845, loss:0.7408421984905826, acc:0.7865086206896552


 54%|█████▍    | 5900/10845 [50:34<34:37,  2.38it/s, acc=0.786, epoch=31, loss=0.743]  

epoch:31, idx:5899/10845, loss:0.7427744946818231, acc:0.7858050847457627


 55%|█████▌    | 6000/10845 [51:13<44:43,  1.81it/s, acc=0.785, epoch=31, loss=0.744]

epoch:31, idx:5999/10845, loss:0.7440971573566397, acc:0.7852916666666667


 56%|█████▌    | 6100/10845 [52:05<52:56,  1.49it/s, acc=0.785, epoch=31, loss=0.744]  

epoch:31, idx:6099/10845, loss:0.7439299707798684, acc:0.7853688524590164


 57%|█████▋    | 6200/10845 [52:57<39:03,  1.98it/s, acc=0.785, epoch=31, loss=0.745]  

epoch:31, idx:6199/10845, loss:0.7452989700604831, acc:0.7849193548387097


 58%|█████▊    | 6300/10845 [53:50<32:09,  2.36it/s, acc=0.785, epoch=31, loss=0.745]  

epoch:31, idx:6299/10845, loss:0.7449747537786052, acc:0.7845634920634921


 59%|█████▉    | 6400/10845 [54:40<44:20,  1.67it/s, acc=0.785, epoch=31, loss=0.746]

epoch:31, idx:6399/10845, loss:0.7455304203624837, acc:0.78453125


 60%|█████▉    | 6500/10845 [55:33<40:40,  1.78it/s, acc=0.784, epoch=31, loss=0.747]

epoch:31, idx:6499/10845, loss:0.74712847292194, acc:0.7841538461538462


 61%|██████    | 6600/10845 [56:27<32:13,  2.20it/s, acc=0.784, epoch=31, loss=0.747]  

epoch:31, idx:6599/10845, loss:0.7473465705527501, acc:0.7841287878787879


 62%|██████▏   | 6700/10845 [57:20<35:35,  1.94it/s, acc=0.785, epoch=31, loss=0.745]

epoch:31, idx:6699/10845, loss:0.7452442789099999, acc:0.7849626865671642


 63%|██████▎   | 6800/10845 [58:13<23:02,  2.93it/s, acc=0.785, epoch=31, loss=0.745]

epoch:31, idx:6799/10845, loss:0.7452824865478803, acc:0.7850367647058824


 64%|██████▎   | 6900/10845 [58:47<22:09,  2.97it/s, acc=0.785, epoch=31, loss=0.745]

epoch:31, idx:6899/10845, loss:0.7450541086063005, acc:0.785


 65%|██████▍   | 7000/10845 [59:24<12:33,  5.11it/s, acc=0.785, epoch=31, loss=0.745]

epoch:31, idx:6999/10845, loss:0.7449843793958425, acc:0.7851785714285714


 65%|██████▌   | 7100/10845 [59:56<16:08,  3.87it/s, acc=0.785, epoch=31, loss=0.745]

epoch:31, idx:7099/10845, loss:0.7447220259506098, acc:0.7852112676056338


 66%|██████▋   | 7200/10845 [1:00:31<22:16,  2.73it/s, acc=0.786, epoch=31, loss=0.743]

epoch:31, idx:7199/10845, loss:0.7434936455492345, acc:0.7856597222222222


 67%|██████▋   | 7300/10845 [1:01:21<35:11,  1.68it/s, acc=0.786, epoch=31, loss=0.743]

epoch:31, idx:7299/10845, loss:0.7426201124570958, acc:0.7858219178082192


 68%|██████▊   | 7400/10845 [1:02:10<21:34,  2.66it/s, acc=0.786, epoch=31, loss=0.744]

epoch:31, idx:7399/10845, loss:0.7436459899854821, acc:0.7856418918918919


 69%|██████▉   | 7500/10845 [1:02:44<14:52,  3.75it/s, acc=0.786, epoch=31, loss=0.744]

epoch:31, idx:7499/10845, loss:0.7440499780674775, acc:0.7857


 70%|███████   | 7600/10845 [1:03:17<17:19,  3.12it/s, acc=0.786, epoch=31, loss=0.743]

epoch:31, idx:7599/10845, loss:0.7431234979923619, acc:0.7858223684210527


 71%|███████   | 7700/10845 [1:03:49<19:18,  2.72it/s, acc=0.786, epoch=31, loss=0.743]

epoch:31, idx:7699/10845, loss:0.7434728136987655, acc:0.7856168831168832


 72%|███████▏  | 7800/10845 [1:04:22<14:12,  3.57it/s, acc=0.786, epoch=31, loss=0.743]

epoch:31, idx:7799/10845, loss:0.743225089638279, acc:0.785673076923077


 73%|███████▎  | 7900/10845 [1:04:59<19:02,  2.58it/s, acc=0.785, epoch=31, loss=0.744]

epoch:31, idx:7899/10845, loss:0.7438664520597911, acc:0.7854746835443038


 74%|███████▍  | 8001/10845 [1:05:40<23:49,  1.99it/s, acc=0.785, epoch=31, loss=0.743]

epoch:31, idx:7999/10845, loss:0.7432584090214223, acc:0.7854375


 75%|███████▍  | 8100/10845 [1:06:36<25:13,  1.81it/s, acc=0.786, epoch=31, loss=0.743]

epoch:31, idx:8099/10845, loss:0.7425562083592385, acc:0.7857407407407407


 76%|███████▌  | 8201/10845 [1:07:33<16:35,  2.66it/s, acc=0.785, epoch=31, loss=0.744]

epoch:31, idx:8199/10845, loss:0.7445259490118521, acc:0.785030487804878


 77%|███████▋  | 8300/10845 [1:08:07<11:52,  3.57it/s, acc=0.785, epoch=31, loss=0.745]

epoch:31, idx:8299/10845, loss:0.7445934011939779, acc:0.7849096385542169


 77%|███████▋  | 8400/10845 [1:08:54<20:23,  2.00it/s, acc=0.785, epoch=31, loss=0.745]

epoch:31, idx:8399/10845, loss:0.7449791460572963, acc:0.7847916666666667


 78%|███████▊  | 8500/10845 [1:09:47<16:40,  2.34it/s, acc=0.785, epoch=31, loss=0.744]

epoch:31, idx:8499/10845, loss:0.7437996450995698, acc:0.7852941176470588


 79%|███████▉  | 8600/10845 [1:10:38<21:08,  1.77it/s, acc=0.785, epoch=31, loss=0.743]

epoch:31, idx:8599/10845, loss:0.7432547984410857, acc:0.785406976744186


 80%|████████  | 8700/10845 [1:11:33<19:35,  1.82it/s, acc=0.785, epoch=31, loss=0.744]

epoch:31, idx:8699/10845, loss:0.7435212640224517, acc:0.7851436781609196


 81%|████████  | 8800/10845 [1:12:25<22:26,  1.52it/s, acc=0.785, epoch=31, loss=0.743]

epoch:31, idx:8799/10845, loss:0.7429347557151182, acc:0.7854261363636363


 82%|████████▏ | 8900/10845 [1:13:18<14:59,  2.16it/s, acc=0.786, epoch=31, loss=0.742]

epoch:31, idx:8899/10845, loss:0.7422985803863306, acc:0.7856460674157303


 83%|████████▎ | 9000/10845 [1:14:09<19:42,  1.56it/s, acc=0.786, epoch=31, loss=0.742]

epoch:31, idx:8999/10845, loss:0.7419727586689923, acc:0.7857777777777778


 84%|████████▍ | 9100/10845 [1:15:01<14:03,  2.07it/s, acc=0.785, epoch=31, loss=0.744]

epoch:31, idx:9099/10845, loss:0.7441089916540371, acc:0.7854395604395604


 85%|████████▍ | 9201/10845 [1:15:55<13:30,  2.03it/s, acc=0.785, epoch=31, loss=0.745]

epoch:31, idx:9199/10845, loss:0.7453639910353914, acc:0.7853804347826087


 86%|████████▌ | 9300/10845 [1:16:49<10:23,  2.48it/s, acc=0.786, epoch=31, loss=0.745]

epoch:31, idx:9299/10845, loss:0.7450722954878884, acc:0.785510752688172


 87%|████████▋ | 9400/10845 [1:17:48<15:06,  1.59it/s, acc=0.785, epoch=31, loss=0.746]

epoch:31, idx:9399/10845, loss:0.7460472994424561, acc:0.7851595744680852


 88%|████████▊ | 9500/10845 [1:18:40<13:21,  1.68it/s, acc=0.785, epoch=31, loss=0.746]

epoch:31, idx:9499/10845, loss:0.7460045697328291, acc:0.7850526315789473


 89%|████████▊ | 9600/10845 [1:19:31<13:49,  1.50it/s, acc=0.785, epoch=31, loss=0.746]

epoch:31, idx:9599/10845, loss:0.7460346579629307, acc:0.7849479166666666


 89%|████████▉ | 9700/10845 [1:20:30<11:29,  1.66it/s, acc=0.785, epoch=31, loss=0.747]

epoch:31, idx:9699/10845, loss:0.7474718611341776, acc:0.7845360824742268


 90%|█████████ | 9800/10845 [1:21:27<09:51,  1.77it/s, acc=0.785, epoch=31, loss=0.747]

epoch:31, idx:9799/10845, loss:0.7468818397500685, acc:0.785


 91%|█████████▏| 9900/10845 [1:22:16<06:44,  2.34it/s, acc=0.785, epoch=31, loss=0.747]

epoch:31, idx:9899/10845, loss:0.7472564507870361, acc:0.7848484848484848


 92%|█████████▏| 10001/10845 [1:23:08<05:32,  2.53it/s, acc=0.785, epoch=31, loss=0.748]

epoch:31, idx:9999/10845, loss:0.7476075321868062, acc:0.785025


 93%|█████████▎| 10100/10845 [1:24:03<05:51,  2.12it/s, acc=0.785, epoch=31, loss=0.749]

epoch:31, idx:10099/10845, loss:0.7488538201330321, acc:0.7848267326732673


 94%|█████████▍| 10200/10845 [1:25:00<07:38,  1.41it/s, acc=0.785, epoch=31, loss=0.75] 

epoch:31, idx:10199/10845, loss:0.7501999342047116, acc:0.7845833333333333


 95%|█████████▍| 10300/10845 [1:25:53<03:21,  2.70it/s, acc=0.784, epoch=31, loss=0.753]

epoch:31, idx:10299/10845, loss:0.7526040324727887, acc:0.7841504854368933


 96%|█████████▌| 10400/10845 [1:26:40<02:46,  2.68it/s, acc=0.784, epoch=31, loss=0.752]

epoch:31, idx:10399/10845, loss:0.751563331979112, acc:0.7844471153846154


 97%|█████████▋| 10501/10845 [1:27:31<03:00,  1.90it/s, acc=0.784, epoch=31, loss=0.75] 

epoch:31, idx:10499/10845, loss:0.7504577069523789, acc:0.7844285714285715


 98%|█████████▊| 10600/10845 [1:28:25<02:46,  1.47it/s, acc=0.785, epoch=31, loss=0.75] 

epoch:31, idx:10599/10845, loss:0.7501364624429986, acc:0.7845047169811321


 99%|█████████▊| 10700/10845 [1:29:16<01:02,  2.32it/s, acc=0.785, epoch=31, loss=0.75] 

epoch:31, idx:10699/10845, loss:0.7500855467684359, acc:0.7845093457943926


100%|█████████▉| 10800/10845 [1:30:05<00:19,  2.31it/s, acc=0.785, epoch=31, loss=0.75] 

epoch:31, idx:10799/10845, loss:0.7500675126675654, acc:0.784537037037037


100%|██████████| 10845/10845 [1:30:27<00:00,  2.12it/s, acc=0.784, epoch=31, loss=0.751]


epoch:31, idx:0/1275, loss:1.1346499919891357, acc:0.5
epoch:31, idx:100/1275, loss:1.2790923378255108, acc:0.6361386138613861
epoch:31, idx:200/1275, loss:1.1975632526388216, acc:0.6442786069651741
epoch:31, idx:300/1275, loss:1.215773663251503, acc:0.6461794019933554
epoch:31, idx:400/1275, loss:1.1847059945215905, acc:0.6533665835411472
epoch:31, idx:500/1275, loss:1.1785975867164824, acc:0.6551896207584831
epoch:31, idx:600/1275, loss:1.1811016773225465, acc:0.6509983361064892
epoch:31, idx:700/1275, loss:1.1981006148198192, acc:0.6476462196861626
epoch:31, idx:800/1275, loss:1.2077805492166573, acc:0.6457553058676654
epoch:31, idx:900/1275, loss:1.1947403012316975, acc:0.6484461709211987
epoch:31, idx:1000/1275, loss:1.1934990355899402, acc:0.6491008991008991
epoch:31, idx:1100/1275, loss:1.185166811639888, acc:0.650772025431426
epoch:31, idx:1200/1275, loss:1.1824225929754164, acc:0.6509159034138218


  1%|          | 100/10845 [00:51<1:31:55,  1.95it/s, acc=0.775, epoch=32, loss=0.775]

epoch:32, idx:99/10845, loss:0.7749779897928238, acc:0.775


  2%|▏         | 200/10845 [01:41<1:18:27,  2.26it/s, acc=0.796, epoch=32, loss=0.7]  

epoch:32, idx:199/10845, loss:0.7004395908117295, acc:0.79625


  3%|▎         | 300/10845 [02:35<1:56:24,  1.51it/s, acc=0.797, epoch=32, loss=0.704]

epoch:32, idx:299/10845, loss:0.7041348034143448, acc:0.7975


  4%|▎         | 400/10845 [03:26<1:46:29,  1.63it/s, acc=0.805, epoch=32, loss=0.671]

epoch:32, idx:399/10845, loss:0.671338657438755, acc:0.805


  5%|▍         | 500/10845 [04:22<1:27:46,  1.96it/s, acc=0.802, epoch=32, loss=0.678]

epoch:32, idx:499/10845, loss:0.6775364257097244, acc:0.8025


  6%|▌         | 600/10845 [05:18<1:21:54,  2.08it/s, acc=0.8, epoch=32, loss=0.681]  

epoch:32, idx:599/10845, loss:0.6811877583463987, acc:0.8004166666666667


  6%|▋         | 700/10845 [06:16<2:05:33,  1.35it/s, acc=0.795, epoch=32, loss=0.71] 

epoch:32, idx:699/10845, loss:0.710316173178809, acc:0.7946428571428571


  7%|▋         | 800/10845 [07:08<1:28:36,  1.89it/s, acc=0.798, epoch=32, loss=0.697]

epoch:32, idx:799/10845, loss:0.6969008343666792, acc:0.7978125


  8%|▊         | 900/10845 [07:59<1:27:58,  1.88it/s, acc=0.797, epoch=32, loss=0.7]  

epoch:32, idx:899/10845, loss:0.7004150233666102, acc:0.7972222222222223


  9%|▉         | 1000/10845 [08:55<1:16:33,  2.14it/s, acc=0.799, epoch=32, loss=0.697]

epoch:32, idx:999/10845, loss:0.6973140867948532, acc:0.7995


 10%|█         | 1100/10845 [09:48<1:11:53,  2.26it/s, acc=0.797, epoch=32, loss=0.706]

epoch:32, idx:1099/10845, loss:0.7057206112146378, acc:0.7970454545454545


 11%|█         | 1200/10845 [10:41<1:14:55,  2.15it/s, acc=0.797, epoch=32, loss=0.707]

epoch:32, idx:1199/10845, loss:0.7074800644318263, acc:0.7970833333333334


 12%|█▏        | 1300/10845 [11:35<1:24:11,  1.89it/s, acc=0.797, epoch=32, loss=0.706]

epoch:32, idx:1299/10845, loss:0.7063720547694426, acc:0.7973076923076923


 13%|█▎        | 1400/10845 [12:28<1:12:34,  2.17it/s, acc=0.795, epoch=32, loss=0.711]

epoch:32, idx:1399/10845, loss:0.7109692069249494, acc:0.7948214285714286


 14%|█▍        | 1500/10845 [13:25<1:40:19,  1.55it/s, acc=0.792, epoch=32, loss=0.717]

epoch:32, idx:1499/10845, loss:0.7170761546492577, acc:0.7916666666666666


 15%|█▍        | 1600/10845 [14:18<1:28:07,  1.75it/s, acc=0.793, epoch=32, loss=0.712]

epoch:32, idx:1599/10845, loss:0.7121976469643414, acc:0.7934375


 16%|█▌        | 1700/10845 [15:10<1:11:13,  2.14it/s, acc=0.794, epoch=32, loss=0.712]

epoch:32, idx:1699/10845, loss:0.7121484505604295, acc:0.794264705882353


 17%|█▋        | 1800/10845 [16:06<1:30:18,  1.67it/s, acc=0.795, epoch=32, loss=0.711]

epoch:32, idx:1799/10845, loss:0.7110650136238998, acc:0.7951388888888888


 18%|█▊        | 1900/10845 [16:57<1:00:30,  2.46it/s, acc=0.794, epoch=32, loss=0.715]

epoch:32, idx:1899/10845, loss:0.7153016951993892, acc:0.7938157894736843


 18%|█▊        | 2001/10845 [17:50<53:00,  2.78it/s, acc=0.793, epoch=32, loss=0.718]  

epoch:32, idx:1999/10845, loss:0.7179491018205881, acc:0.792625


 19%|█▉        | 2100/10845 [18:40<1:11:05,  2.05it/s, acc=0.792, epoch=32, loss=0.718]

epoch:32, idx:2099/10845, loss:0.717838731989974, acc:0.7921428571428571


 20%|██        | 2200/10845 [19:34<1:04:38,  2.23it/s, acc=0.792, epoch=32, loss=0.716]

epoch:32, idx:2199/10845, loss:0.7162317813391035, acc:0.7920454545454545


 21%|██        | 2300/10845 [20:27<1:20:05,  1.78it/s, acc=0.793, epoch=32, loss=0.715]

epoch:32, idx:2299/10845, loss:0.7151640752217044, acc:0.792608695652174


 22%|██▏       | 2400/10845 [21:19<1:03:19,  2.22it/s, acc=0.792, epoch=32, loss=0.717]

epoch:32, idx:2399/10845, loss:0.7173700086896618, acc:0.7923958333333333


 23%|██▎       | 2500/10845 [22:12<54:09,  2.57it/s, acc=0.793, epoch=32, loss=0.713]  

epoch:32, idx:2499/10845, loss:0.712920531475544, acc:0.7928


 24%|██▍       | 2600/10845 [23:04<1:10:28,  1.95it/s, acc=0.793, epoch=32, loss=0.714]

epoch:32, idx:2599/10845, loss:0.7138001139003497, acc:0.7925961538461539


 25%|██▍       | 2701/10845 [23:52<43:40,  3.11it/s, acc=0.794, epoch=32, loss=0.711]  

epoch:32, idx:2699/10845, loss:0.7113168404168553, acc:0.7937037037037037


 26%|██▌       | 2800/10845 [24:40<54:44,  2.45it/s, acc=0.794, epoch=32, loss=0.713]  

epoch:32, idx:2799/10845, loss:0.7128537754182305, acc:0.7936607142857143


 27%|██▋       | 2900/10845 [25:36<1:20:41,  1.64it/s, acc=0.795, epoch=32, loss=0.709]

epoch:32, idx:2899/10845, loss:0.7085788466498769, acc:0.7947413793103448


 28%|██▊       | 3000/10845 [26:28<1:50:17,  1.19it/s, acc=0.795, epoch=32, loss=0.711]

epoch:32, idx:2999/10845, loss:0.7112307044565678, acc:0.7945833333333333


 29%|██▊       | 3100/10845 [27:11<45:59,  2.81it/s, acc=0.794, epoch=32, loss=0.712]  

epoch:32, idx:3099/10845, loss:0.7120669864550714, acc:0.7942741935483871


 30%|██▉       | 3200/10845 [27:53<57:14,  2.23it/s, acc=0.794, epoch=32, loss=0.714]  

epoch:32, idx:3199/10845, loss:0.7135343282390386, acc:0.793515625


 30%|███       | 3300/10845 [28:40<1:29:05,  1.41it/s, acc=0.794, epoch=32, loss=0.715]

epoch:32, idx:3299/10845, loss:0.7152839196360472, acc:0.7935606060606061


 31%|███▏      | 3400/10845 [29:37<1:25:06,  1.46it/s, acc=0.793, epoch=32, loss=0.715]

epoch:32, idx:3399/10845, loss:0.7154659382823636, acc:0.7933823529411764


 32%|███▏      | 3500/10845 [30:34<1:19:07,  1.55it/s, acc=0.793, epoch=32, loss=0.72] 

epoch:32, idx:3499/10845, loss:0.7199070778489113, acc:0.793


 33%|███▎      | 3600/10845 [31:25<56:13,  2.15it/s, acc=0.793, epoch=32, loss=0.72]   

epoch:32, idx:3599/10845, loss:0.7203997598008977, acc:0.7929861111111111


 34%|███▍      | 3700/10845 [32:15<1:08:27,  1.74it/s, acc=0.792, epoch=32, loss=0.726]

epoch:32, idx:3699/10845, loss:0.725775308504298, acc:0.7925


 35%|███▌      | 3800/10845 [33:11<1:06:02,  1.78it/s, acc=0.792, epoch=32, loss=0.73] 

epoch:32, idx:3799/10845, loss:0.7303546060464884, acc:0.7921710526315789


 36%|███▌      | 3900/10845 [34:04<1:08:35,  1.69it/s, acc=0.792, epoch=32, loss=0.734]

epoch:32, idx:3899/10845, loss:0.7342685507887449, acc:0.7915384615384615


 37%|███▋      | 4000/10845 [34:55<48:07,  2.37it/s, acc=0.792, epoch=32, loss=0.732]  

epoch:32, idx:3999/10845, loss:0.7321131292954087, acc:0.7919375


 38%|███▊      | 4100/10845 [35:47<1:27:14,  1.29it/s, acc=0.792, epoch=32, loss=0.731]

epoch:32, idx:4099/10845, loss:0.7313392694476174, acc:0.791829268292683


 39%|███▊      | 4200/10845 [36:43<1:16:41,  1.44it/s, acc=0.791, epoch=32, loss=0.732]

epoch:32, idx:4199/10845, loss:0.7320336174610115, acc:0.7911904761904762


 40%|███▉      | 4300/10845 [37:40<1:32:49,  1.18it/s, acc=0.791, epoch=32, loss=0.732]

epoch:32, idx:4299/10845, loss:0.7317582185115925, acc:0.7911627906976744


 41%|████      | 4400/10845 [38:28<53:31,  2.01it/s, acc=0.792, epoch=32, loss=0.731]  

epoch:32, idx:4399/10845, loss:0.7305883776667443, acc:0.7917045454545455


 41%|████▏     | 4500/10845 [39:20<1:03:37,  1.66it/s, acc=0.792, epoch=32, loss=0.734]

epoch:32, idx:4499/10845, loss:0.7338708165950245, acc:0.7915555555555556


 42%|████▏     | 4600/10845 [40:16<1:10:31,  1.48it/s, acc=0.791, epoch=32, loss=0.734]

epoch:32, idx:4599/10845, loss:0.7337115594032018, acc:0.7913586956521739


 43%|████▎     | 4700/10845 [41:11<1:02:45,  1.63it/s, acc=0.79, epoch=32, loss=0.737] 

epoch:32, idx:4699/10845, loss:0.7369346454232297, acc:0.7901595744680852


 44%|████▍     | 4800/10845 [41:52<45:19,  2.22it/s, acc=0.791, epoch=32, loss=0.733]  

epoch:32, idx:4799/10845, loss:0.7334781281587979, acc:0.7908854166666667


 45%|████▌     | 4900/10845 [42:39<45:49,  2.16it/s, acc=0.791, epoch=32, loss=0.735]  

epoch:32, idx:4899/10845, loss:0.7347247209901713, acc:0.7907142857142857


 46%|████▌     | 5000/10845 [43:27<39:13,  2.48it/s, acc=0.791, epoch=32, loss=0.734]  

epoch:32, idx:4999/10845, loss:0.733910586053133, acc:0.7911


 47%|████▋     | 5101/10845 [44:10<40:09,  2.38it/s, acc=0.791, epoch=32, loss=0.732]  

epoch:32, idx:5099/10845, loss:0.7320322239808008, acc:0.7914705882352941


 48%|████▊     | 5200/10845 [45:03<52:55,  1.78it/s, acc=0.791, epoch=32, loss=0.734]  

epoch:32, idx:5199/10845, loss:0.7337129144771741, acc:0.7908653846153846


 49%|████▉     | 5300/10845 [45:56<37:28,  2.47it/s, acc=0.791, epoch=32, loss=0.736]  

epoch:32, idx:5299/10845, loss:0.7355084441349191, acc:0.7905660377358491


 50%|████▉     | 5400/10845 [46:47<59:58,  1.51it/s, acc=0.791, epoch=32, loss=0.736]  

epoch:32, idx:5399/10845, loss:0.7362959398218879, acc:0.7907407407407407


 51%|█████     | 5500/10845 [47:41<37:50,  2.35it/s, acc=0.79, epoch=32, loss=0.738]  

epoch:32, idx:5499/10845, loss:0.7380243567932736, acc:0.7901818181818182


 52%|█████▏    | 5600/10845 [48:34<51:14,  1.71it/s, acc=0.79, epoch=32, loss=0.736]  

epoch:32, idx:5599/10845, loss:0.7364754205143877, acc:0.7904464285714285


 53%|█████▎    | 5700/10845 [49:21<43:57,  1.95it/s, acc=0.791, epoch=32, loss=0.735]  

epoch:32, idx:5699/10845, loss:0.734776337507524, acc:0.7909649122807018


 53%|█████▎    | 5800/10845 [50:11<33:02,  2.54it/s, acc=0.791, epoch=32, loss=0.735]  

epoch:32, idx:5799/10845, loss:0.7350455046676356, acc:0.7909913793103448


 54%|█████▍    | 5900/10845 [51:03<40:11,  2.05it/s, acc=0.791, epoch=32, loss=0.734]  

epoch:32, idx:5899/10845, loss:0.7344079209535809, acc:0.7911016949152543


 55%|█████▌    | 6000/10845 [51:54<45:25,  1.78it/s, acc=0.791, epoch=32, loss=0.734]  

epoch:32, idx:5999/10845, loss:0.7342055910130342, acc:0.7909583333333333


 56%|█████▌    | 6100/10845 [52:49<38:54,  2.03it/s, acc=0.791, epoch=32, loss=0.734]  

epoch:32, idx:6099/10845, loss:0.7344641831956926, acc:0.7906147540983607


 57%|█████▋    | 6200/10845 [53:49<25:50,  3.00it/s, acc=0.791, epoch=32, loss=0.733]  

epoch:32, idx:6199/10845, loss:0.7327016623846946, acc:0.7909274193548387


 58%|█████▊    | 6300/10845 [54:35<39:13,  1.93it/s, acc=0.79, epoch=32, loss=0.736] 

epoch:32, idx:6299/10845, loss:0.7356842312642506, acc:0.7903571428571429


 59%|█████▉    | 6400/10845 [55:25<36:49,  2.01it/s, acc=0.79, epoch=32, loss=0.736]

epoch:32, idx:6399/10845, loss:0.736220632866025, acc:0.78984375


 60%|█████▉    | 6500/10845 [56:18<42:41,  1.70it/s, acc=0.79, epoch=32, loss=0.737]

epoch:32, idx:6499/10845, loss:0.7367708578751637, acc:0.7896538461538462


 61%|██████    | 6600/10845 [57:08<41:03,  1.72it/s, acc=0.79, epoch=32, loss=0.736]

epoch:32, idx:6599/10845, loss:0.7355768372615178, acc:0.7896969696969697


 62%|██████▏   | 6700/10845 [57:57<27:40,  2.50it/s, acc=0.79, epoch=32, loss=0.735]

epoch:32, idx:6699/10845, loss:0.7346506558158504, acc:0.7899626865671642


 63%|██████▎   | 6800/10845 [58:47<36:51,  1.83it/s, acc=0.791, epoch=32, loss=0.732]

epoch:32, idx:6799/10845, loss:0.7320018059102928, acc:0.790625


 64%|██████▎   | 6900/10845 [59:31<22:46,  2.89it/s, acc=0.79, epoch=32, loss=0.734] 

epoch:32, idx:6899/10845, loss:0.7337633101750112, acc:0.7902173913043479


 65%|██████▍   | 7000/10845 [1:00:22<29:07,  2.20it/s, acc=0.79, epoch=32, loss=0.736]

epoch:32, idx:6999/10845, loss:0.7358806453347206, acc:0.78975


 65%|██████▌   | 7100/10845 [1:01:18<40:45,  1.53it/s, acc=0.79, epoch=32, loss=0.736]

epoch:32, idx:7099/10845, loss:0.736470196322656, acc:0.7898591549295775


 66%|██████▋   | 7200/10845 [1:02:07<28:32,  2.13it/s, acc=0.79, epoch=32, loss=0.735]

epoch:32, idx:7199/10845, loss:0.73530670400295, acc:0.7900694444444445


 67%|██████▋   | 7300/10845 [1:03:02<32:26,  1.82it/s, acc=0.79, epoch=32, loss=0.735]

epoch:32, idx:7299/10845, loss:0.7346768478702193, acc:0.7904109589041096


 68%|██████▊   | 7400/10845 [1:03:53<21:20,  2.69it/s, acc=0.79, epoch=32, loss=0.735] 

epoch:32, idx:7399/10845, loss:0.7351003506336663, acc:0.790304054054054


 69%|██████▉   | 7500/10845 [1:04:45<25:07,  2.22it/s, acc=0.79, epoch=32, loss=0.736]

epoch:32, idx:7499/10845, loss:0.7355716470360756, acc:0.7901333333333334


 70%|███████   | 7600/10845 [1:05:39<34:39,  1.56it/s, acc=0.79, epoch=32, loss=0.736]

epoch:32, idx:7599/10845, loss:0.7362641938854205, acc:0.7897697368421053


 71%|███████   | 7700/10845 [1:06:29<18:12,  2.88it/s, acc=0.789, epoch=32, loss=0.738]

epoch:32, idx:7699/10845, loss:0.737759322535682, acc:0.7894805194805194


 72%|███████▏  | 7800/10845 [1:07:20<21:51,  2.32it/s, acc=0.789, epoch=32, loss=0.74] 

epoch:32, idx:7799/10845, loss:0.7398554993936649, acc:0.7888782051282052


 73%|███████▎  | 7900/10845 [1:08:10<18:24,  2.67it/s, acc=0.789, epoch=32, loss=0.74] 

epoch:32, idx:7899/10845, loss:0.7396802506378934, acc:0.7889873417721519


 74%|███████▍  | 8000/10845 [1:08:57<19:50,  2.39it/s, acc=0.789, epoch=32, loss=0.74] 

epoch:32, idx:7999/10845, loss:0.7399449626766145, acc:0.789


 75%|███████▍  | 8100/10845 [1:09:49<28:21,  1.61it/s, acc=0.789, epoch=32, loss=0.741]

epoch:32, idx:8099/10845, loss:0.7410271047920357, acc:0.7887962962962963


 76%|███████▌  | 8200/10845 [1:10:39<17:33,  2.51it/s, acc=0.789, epoch=32, loss=0.741]

epoch:32, idx:8199/10845, loss:0.7412215265722536, acc:0.7885365853658537


 77%|███████▋  | 8300/10845 [1:11:30<19:11,  2.21it/s, acc=0.789, epoch=32, loss=0.741]

epoch:32, idx:8299/10845, loss:0.7405554481963795, acc:0.7885542168674698


 77%|███████▋  | 8400/10845 [1:12:22<23:56,  1.70it/s, acc=0.788, epoch=32, loss=0.74] 

epoch:32, idx:8399/10845, loss:0.7404263687222487, acc:0.7884821428571429


 78%|███████▊  | 8500/10845 [1:13:14<20:00,  1.95it/s, acc=0.788, epoch=32, loss=0.741]

epoch:32, idx:8499/10845, loss:0.7411865229764405, acc:0.7883235294117648


 79%|███████▉  | 8600/10845 [1:14:00<16:05,  2.32it/s, acc=0.788, epoch=32, loss=0.743]

epoch:32, idx:8599/10845, loss:0.7428761638337097, acc:0.7881395348837209


 80%|████████  | 8700/10845 [1:14:51<13:54,  2.57it/s, acc=0.788, epoch=32, loss=0.744]

epoch:32, idx:8699/10845, loss:0.7436578255223817, acc:0.7877586206896552


 81%|████████  | 8800/10845 [1:15:43<13:23,  2.55it/s, acc=0.788, epoch=32, loss=0.743]

epoch:32, idx:8799/10845, loss:0.7432874018309469, acc:0.7878977272727272


 82%|████████▏ | 8900/10845 [1:16:38<13:51,  2.34it/s, acc=0.788, epoch=32, loss=0.742]

epoch:32, idx:8899/10845, loss:0.7415866212312425, acc:0.7882865168539326


 83%|████████▎ | 9000/10845 [1:17:31<13:20,  2.31it/s, acc=0.788, epoch=32, loss=0.742]

epoch:32, idx:8999/10845, loss:0.7419986008356015, acc:0.7880555555555555


 84%|████████▍ | 9100/10845 [1:18:22<14:53,  1.95it/s, acc=0.788, epoch=32, loss=0.743]

epoch:32, idx:9099/10845, loss:0.7430204254077686, acc:0.7881318681318681


 85%|████████▍ | 9200/10845 [1:19:11<10:11,  2.69it/s, acc=0.788, epoch=32, loss=0.743]

epoch:32, idx:9199/10845, loss:0.7428950659104664, acc:0.7878532608695652


 86%|████████▌ | 9300/10845 [1:20:05<11:23,  2.26it/s, acc=0.788, epoch=32, loss=0.743]

epoch:32, idx:9299/10845, loss:0.7434824843832882, acc:0.7878494623655914


 87%|████████▋ | 9400/10845 [1:20:59<11:27,  2.10it/s, acc=0.788, epoch=32, loss=0.744]

epoch:32, idx:9399/10845, loss:0.7441576370232282, acc:0.7876595744680851


 88%|████████▊ | 9500/10845 [1:21:50<10:35,  2.12it/s, acc=0.787, epoch=32, loss=0.744]

epoch:32, idx:9499/10845, loss:0.7444663020639043, acc:0.7873684210526316


 89%|████████▊ | 9600/10845 [1:22:42<09:41,  2.14it/s, acc=0.787, epoch=32, loss=0.745]

epoch:32, idx:9599/10845, loss:0.7453175819680715, acc:0.7871875


 89%|████████▉ | 9700/10845 [1:23:35<13:18,  1.43it/s, acc=0.787, epoch=32, loss=0.747]

epoch:32, idx:9699/10845, loss:0.7465766245879464, acc:0.7868298969072165


 90%|█████████ | 9800/10845 [1:24:26<10:10,  1.71it/s, acc=0.787, epoch=32, loss=0.747]

epoch:32, idx:9799/10845, loss:0.7472356888606232, acc:0.7866836734693877


 91%|█████████▏| 9900/10845 [1:25:17<07:09,  2.20it/s, acc=0.787, epoch=32, loss=0.746]

epoch:32, idx:9899/10845, loss:0.7457154752705434, acc:0.7872222222222223


 92%|█████████▏| 10000/10845 [1:26:12<07:59,  1.76it/s, acc=0.787, epoch=32, loss=0.746]

epoch:32, idx:9999/10845, loss:0.7463645044222474, acc:0.787025


 93%|█████████▎| 10100/10845 [1:27:04<08:02,  1.54it/s, acc=0.787, epoch=32, loss=0.746]

epoch:32, idx:10099/10845, loss:0.7458403177559376, acc:0.7872524752475247


 94%|█████████▍| 10200/10845 [1:27:55<06:24,  1.68it/s, acc=0.787, epoch=32, loss=0.747]

epoch:32, idx:10199/10845, loss:0.7465816805190315, acc:0.7870833333333334


 95%|█████████▍| 10300/10845 [1:28:47<05:46,  1.57it/s, acc=0.787, epoch=32, loss=0.747]

epoch:32, idx:10299/10845, loss:0.7472277261519317, acc:0.7869174757281553


 96%|█████████▌| 10400/10845 [1:29:36<05:35,  1.33it/s, acc=0.787, epoch=32, loss=0.748]

epoch:32, idx:10399/10845, loss:0.7477452700542143, acc:0.7867067307692308


 97%|█████████▋| 10500/10845 [1:30:26<02:19,  2.48it/s, acc=0.786, epoch=32, loss=0.748]

epoch:32, idx:10499/10845, loss:0.7483536980733985, acc:0.7864523809523809


 98%|█████████▊| 10600/10845 [1:31:19<02:32,  1.60it/s, acc=0.787, epoch=32, loss=0.748]

epoch:32, idx:10599/10845, loss:0.747564739870294, acc:0.7867688679245283


 99%|█████████▊| 10700/10845 [1:32:11<01:20,  1.80it/s, acc=0.787, epoch=32, loss=0.748]

epoch:32, idx:10699/10845, loss:0.7479216606274386, acc:0.7867523364485981


100%|█████████▉| 10800/10845 [1:33:06<00:22,  2.03it/s, acc=0.787, epoch=32, loss=0.748]

epoch:32, idx:10799/10845, loss:0.748499339039403, acc:0.7865277777777778


100%|██████████| 10845/10845 [1:33:27<00:00,  2.79it/s, acc=0.787, epoch=32, loss=0.748]


epoch:32, idx:0/1275, loss:1.0961222648620605, acc:0.5
epoch:32, idx:100/1275, loss:1.2511724307395444, acc:0.6410891089108911
epoch:32, idx:200/1275, loss:1.173474933525816, acc:0.6517412935323383
epoch:32, idx:300/1275, loss:1.201554188597638, acc:0.6511627906976745
epoch:32, idx:400/1275, loss:1.1736448048207528, acc:0.6596009975062345
epoch:32, idx:500/1275, loss:1.167261889892186, acc:0.6586826347305389
epoch:32, idx:600/1275, loss:1.16916556212549, acc:0.6551580698835274
epoch:32, idx:700/1275, loss:1.1892817034445884, acc:0.651925820256776
epoch:32, idx:800/1275, loss:1.1983535595228907, acc:0.650749063670412
epoch:32, idx:900/1275, loss:1.1883486939654102, acc:0.6526082130965594
epoch:32, idx:1000/1275, loss:1.1853155906086081, acc:0.6535964035964036
epoch:32, idx:1100/1275, loss:1.175414269047364, acc:0.655086285195277
epoch:32, idx:1200/1275, loss:1.1732214306861932, acc:0.6559117402164862


  1%|          | 100/10845 [00:50<1:32:50,  1.93it/s, acc=0.77, epoch=33, loss=0.782]

epoch:33, idx:99/10845, loss:0.7824611121416092, acc:0.77


  2%|▏         | 200/10845 [01:43<1:39:07,  1.79it/s, acc=0.775, epoch=33, loss=0.764]

epoch:33, idx:199/10845, loss:0.7640097078680992, acc:0.775


  3%|▎         | 300/10845 [02:33<1:23:14,  2.11it/s, acc=0.779, epoch=33, loss=0.719]

epoch:33, idx:299/10845, loss:0.7194674199819565, acc:0.7791666666666667


  4%|▎         | 400/10845 [03:16<1:25:45,  2.03it/s, acc=0.784, epoch=33, loss=0.704]

epoch:33, idx:399/10845, loss:0.7037611274421215, acc:0.78375


  5%|▍         | 500/10845 [04:08<1:28:04,  1.96it/s, acc=0.786, epoch=33, loss=0.711]

epoch:33, idx:499/10845, loss:0.7106446801424027, acc:0.786


  6%|▌         | 600/10845 [04:59<1:55:54,  1.47it/s, acc=0.787, epoch=33, loss=0.71] 

epoch:33, idx:599/10845, loss:0.7101207028826078, acc:0.7870833333333334


  6%|▋         | 700/10845 [05:52<1:41:29,  1.67it/s, acc=0.788, epoch=33, loss=0.707]

epoch:33, idx:699/10845, loss:0.7072010494981493, acc:0.7878571428571428


  7%|▋         | 800/10845 [06:41<57:21,  2.92it/s, acc=0.789, epoch=33, loss=0.71]   

epoch:33, idx:799/10845, loss:0.7104574985429645, acc:0.78875


  8%|▊         | 900/10845 [07:32<1:39:05,  1.67it/s, acc=0.787, epoch=33, loss=0.718]

epoch:33, idx:899/10845, loss:0.7183232217033704, acc:0.7866666666666666


  9%|▉         | 1000/10845 [08:16<1:22:49,  1.98it/s, acc=0.787, epoch=33, loss=0.723]

epoch:33, idx:999/10845, loss:0.7227462626993656, acc:0.7875


 10%|█         | 1100/10845 [09:05<1:38:12,  1.65it/s, acc=0.789, epoch=33, loss=0.722]

epoch:33, idx:1099/10845, loss:0.7223951331322843, acc:0.7886363636363637


 11%|█         | 1200/10845 [09:55<59:43,  2.69it/s, acc=0.785, epoch=33, loss=0.72]   

epoch:33, idx:1199/10845, loss:0.7204293122639259, acc:0.7854166666666667


 12%|█▏        | 1300/10845 [10:46<1:08:53,  2.31it/s, acc=0.79, epoch=33, loss=0.705] 

epoch:33, idx:1299/10845, loss:0.7054301311878057, acc:0.7896153846153846


 13%|█▎        | 1400/10845 [11:38<1:12:12,  2.18it/s, acc=0.788, epoch=33, loss=0.71] 

epoch:33, idx:1399/10845, loss:0.7100135112660272, acc:0.7880357142857143


 14%|█▍        | 1500/10845 [12:32<1:14:17,  2.10it/s, acc=0.788, epoch=33, loss=0.715]

epoch:33, idx:1499/10845, loss:0.7147353122234344, acc:0.788


 15%|█▍        | 1600/10845 [13:19<1:15:36,  2.04it/s, acc=0.788, epoch=33, loss=0.716]

epoch:33, idx:1599/10845, loss:0.7158854449912906, acc:0.788125


 16%|█▌        | 1700/10845 [14:09<1:29:28,  1.70it/s, acc=0.786, epoch=33, loss=0.723]

epoch:33, idx:1699/10845, loss:0.7231142085440019, acc:0.7860294117647059


 17%|█▋        | 1800/10845 [15:07<1:27:25,  1.72it/s, acc=0.787, epoch=33, loss=0.718]

epoch:33, idx:1799/10845, loss:0.7177055128084289, acc:0.7875


 18%|█▊        | 1900/10845 [16:00<1:21:35,  1.83it/s, acc=0.788, epoch=33, loss=0.719]

epoch:33, idx:1899/10845, loss:0.7187883707410411, acc:0.7882894736842105


 18%|█▊        | 2000/10845 [16:48<57:09,  2.58it/s, acc=0.789, epoch=33, loss=0.717]  

epoch:33, idx:1999/10845, loss:0.7168520481288433, acc:0.78925


 19%|█▉        | 2100/10845 [17:37<1:13:57,  1.97it/s, acc=0.79, epoch=33, loss=0.713] 

epoch:33, idx:2099/10845, loss:0.7125684310424896, acc:0.7898809523809524


 20%|██        | 2200/10845 [18:25<50:11,  2.87it/s, acc=0.79, epoch=33, loss=0.718]   

epoch:33, idx:2199/10845, loss:0.7179364269700917, acc:0.7897727272727273


 21%|██        | 2300/10845 [19:16<1:06:16,  2.15it/s, acc=0.791, epoch=33, loss=0.713]

epoch:33, idx:2299/10845, loss:0.7128383199028346, acc:0.7908695652173913


 22%|██▏       | 2400/10845 [20:08<1:26:50,  1.62it/s, acc=0.791, epoch=33, loss=0.711]

epoch:33, idx:2399/10845, loss:0.7106299076477687, acc:0.7905208333333333


 23%|██▎       | 2500/10845 [21:02<1:41:59,  1.36it/s, acc=0.791, epoch=33, loss=0.708]

epoch:33, idx:2499/10845, loss:0.7083682020425797, acc:0.7914


 24%|██▍       | 2600/10845 [21:51<1:13:24,  1.87it/s, acc=0.792, epoch=33, loss=0.708]

epoch:33, idx:2599/10845, loss:0.7077304798823136, acc:0.7916346153846154


 25%|██▍       | 2700/10845 [22:44<1:08:54,  1.97it/s, acc=0.79, epoch=33, loss=0.717] 

epoch:33, idx:2699/10845, loss:0.7174213180277083, acc:0.7900925925925926


 26%|██▌       | 2800/10845 [23:32<1:17:35,  1.73it/s, acc=0.79, epoch=33, loss=0.715] 

epoch:33, idx:2799/10845, loss:0.7151019572785923, acc:0.7901785714285714


 27%|██▋       | 2900/10845 [24:24<1:21:31,  1.62it/s, acc=0.79, epoch=33, loss=0.717] 

epoch:33, idx:2899/10845, loss:0.7173739864702883, acc:0.7897413793103448


 28%|██▊       | 3000/10845 [25:18<1:24:36,  1.55it/s, acc=0.791, epoch=33, loss=0.712]

epoch:33, idx:2999/10845, loss:0.7120813887317975, acc:0.79075


 29%|██▊       | 3100/10845 [26:11<57:47,  2.23it/s, acc=0.791, epoch=33, loss=0.712]  

epoch:33, idx:3099/10845, loss:0.7120448429930595, acc:0.7906451612903226


 30%|██▉       | 3200/10845 [27:02<1:13:54,  1.72it/s, acc=0.791, epoch=33, loss=0.713]

epoch:33, idx:3199/10845, loss:0.7125087020453066, acc:0.7909375


 30%|███       | 3300/10845 [27:55<27:25,  4.59it/s, acc=0.79, epoch=33, loss=0.716]   

epoch:33, idx:3299/10845, loss:0.7158008549701084, acc:0.7895454545454546


 31%|███▏      | 3400/10845 [28:47<1:03:01,  1.97it/s, acc=0.789, epoch=33, loss=0.719]

epoch:33, idx:3399/10845, loss:0.7190539994923507, acc:0.7891176470588235


 32%|███▏      | 3500/10845 [29:42<1:08:23,  1.79it/s, acc=0.789, epoch=33, loss=0.722]

epoch:33, idx:3499/10845, loss:0.7221951722332409, acc:0.7885714285714286


 33%|███▎      | 3600/10845 [30:37<1:07:40,  1.78it/s, acc=0.789, epoch=33, loss=0.722]

epoch:33, idx:3599/10845, loss:0.7217337889638212, acc:0.7885416666666667


 34%|███▍      | 3700/10845 [31:27<1:03:35,  1.87it/s, acc=0.789, epoch=33, loss=0.724]

epoch:33, idx:3699/10845, loss:0.7235996272596154, acc:0.788581081081081


 35%|███▌      | 3800/10845 [32:16<58:05,  2.02it/s, acc=0.788, epoch=33, loss=0.727]  

epoch:33, idx:3799/10845, loss:0.7268394780551133, acc:0.7878947368421053


 36%|███▌      | 3900/10845 [33:02<50:32,  2.29it/s, acc=0.788, epoch=33, loss=0.725]  

epoch:33, idx:3899/10845, loss:0.7252047102497174, acc:0.7881410256410256


 37%|███▋      | 4000/10845 [33:54<59:12,  1.93it/s, acc=0.788, epoch=33, loss=0.725]  

epoch:33, idx:3999/10845, loss:0.7252326037362218, acc:0.7884375


 38%|███▊      | 4100/10845 [34:44<1:04:57,  1.73it/s, acc=0.789, epoch=33, loss=0.726]

epoch:33, idx:4099/10845, loss:0.725965413749218, acc:0.7887804878048781


 39%|███▊      | 4200/10845 [35:38<1:25:21,  1.30it/s, acc=0.789, epoch=33, loss=0.726]

epoch:33, idx:4199/10845, loss:0.7263894893938587, acc:0.7889285714285714


 40%|███▉      | 4300/10845 [36:32<48:43,  2.24it/s, acc=0.789, epoch=33, loss=0.724]  

epoch:33, idx:4299/10845, loss:0.7243579964513003, acc:0.7894767441860465


 41%|████      | 4400/10845 [37:26<47:22,  2.27it/s, acc=0.79, epoch=33, loss=0.722]   

epoch:33, idx:4399/10845, loss:0.7222782846811143, acc:0.7895454545454546


 41%|████▏     | 4500/10845 [38:12<51:28,  2.05it/s, acc=0.79, epoch=33, loss=0.725]   

epoch:33, idx:4499/10845, loss:0.7245239716768265, acc:0.7898333333333334


 42%|████▏     | 4600/10845 [39:05<51:48,  2.01it/s, acc=0.789, epoch=33, loss=0.726]  

epoch:33, idx:4599/10845, loss:0.726298961095188, acc:0.7891304347826087


 43%|████▎     | 4700/10845 [39:57<44:10,  2.32it/s, acc=0.789, epoch=33, loss=0.726]  

epoch:33, idx:4699/10845, loss:0.725544160997614, acc:0.7888297872340425


 44%|████▍     | 4800/10845 [40:49<49:34,  2.03it/s, acc=0.789, epoch=33, loss=0.725]  

epoch:33, idx:4799/10845, loss:0.7249822376916806, acc:0.7889583333333333


 45%|████▌     | 4900/10845 [41:46<1:00:26,  1.64it/s, acc=0.788, epoch=33, loss=0.726]

epoch:33, idx:4899/10845, loss:0.7258349702066305, acc:0.7882653061224489


 46%|████▌     | 5000/10845 [42:38<49:35,  1.96it/s, acc=0.788, epoch=33, loss=0.728]  

epoch:33, idx:4999/10845, loss:0.7282965034604073, acc:0.7883


 47%|████▋     | 5100/10845 [43:28<47:18,  2.02it/s, acc=0.789, epoch=33, loss=0.726]  

epoch:33, idx:5099/10845, loss:0.7259567255249211, acc:0.7890686274509804


 48%|████▊     | 5200/10845 [44:18<46:40,  2.02it/s, acc=0.788, epoch=33, loss=0.728]  

epoch:33, idx:5199/10845, loss:0.7284238017751621, acc:0.7883173076923077


 49%|████▉     | 5300/10845 [45:10<1:11:40,  1.29it/s, acc=0.788, epoch=33, loss=0.728]

epoch:33, idx:5299/10845, loss:0.7281992513391207, acc:0.788254716981132


 50%|████▉     | 5400/10845 [45:59<43:07,  2.10it/s, acc=0.787, epoch=33, loss=0.729]  

epoch:33, idx:5399/10845, loss:0.729442371306596, acc:0.7875


 51%|█████     | 5500/10845 [46:54<35:49,  2.49it/s, acc=0.788, epoch=33, loss=0.73]   

epoch:33, idx:5499/10845, loss:0.7295864171439951, acc:0.788


 52%|█████▏    | 5600/10845 [47:46<46:08,  1.89it/s, acc=0.788, epoch=33, loss=0.73] 

epoch:33, idx:5599/10845, loss:0.7297878001310996, acc:0.7876785714285715


 53%|█████▎    | 5700/10845 [48:34<1:04:29,  1.33it/s, acc=0.788, epoch=33, loss=0.73]

epoch:33, idx:5699/10845, loss:0.7295999038114882, acc:0.7876754385964913


 53%|█████▎    | 5800/10845 [49:27<34:13,  2.46it/s, acc=0.788, epoch=33, loss=0.73]  

epoch:33, idx:5799/10845, loss:0.7299236069876572, acc:0.787801724137931


 54%|█████▍    | 5900/10845 [50:18<46:22,  1.78it/s, acc=0.788, epoch=33, loss=0.73] 

epoch:33, idx:5899/10845, loss:0.7298725748769308, acc:0.7876271186440678


 55%|█████▌    | 6000/10845 [51:11<45:12,  1.79it/s, acc=0.787, epoch=33, loss=0.731]  

epoch:33, idx:5999/10845, loss:0.7309828585684299, acc:0.787125


 56%|█████▌    | 6100/10845 [52:05<36:11,  2.19it/s, acc=0.788, epoch=33, loss=0.73]   

epoch:33, idx:6099/10845, loss:0.7296494518635703, acc:0.7875819672131148


 57%|█████▋    | 6200/10845 [52:55<17:46,  4.35it/s, acc=0.787, epoch=33, loss=0.732]  

epoch:33, idx:6199/10845, loss:0.7323972532729949, acc:0.7868548387096774


 58%|█████▊    | 6300/10845 [53:47<39:24,  1.92it/s, acc=0.787, epoch=33, loss=0.734]

epoch:33, idx:6299/10845, loss:0.7342111933704406, acc:0.786547619047619


 59%|█████▉    | 6400/10845 [54:39<36:49,  2.01it/s, acc=0.787, epoch=33, loss=0.734]

epoch:33, idx:6399/10845, loss:0.7341811756975949, acc:0.78671875


 60%|█████▉    | 6500/10845 [55:29<34:31,  2.10it/s, acc=0.787, epoch=33, loss=0.732]

epoch:33, idx:6499/10845, loss:0.7322771308422089, acc:0.7872307692307692


 61%|██████    | 6600/10845 [56:20<37:24,  1.89it/s, acc=0.787, epoch=33, loss=0.733]

epoch:33, idx:6599/10845, loss:0.7328276960687203, acc:0.7870454545454545


 62%|██████▏   | 6700/10845 [57:18<22:41,  3.04it/s, acc=0.787, epoch=33, loss=0.733]  

epoch:33, idx:6699/10845, loss:0.7334750815113978, acc:0.7869402985074627


 63%|██████▎   | 6800/10845 [58:10<47:57,  1.41it/s, acc=0.787, epoch=33, loss=0.734]

epoch:33, idx:6799/10845, loss:0.7337675825374967, acc:0.7867647058823529


 64%|██████▎   | 6900/10845 [59:03<35:04,  1.87it/s, acc=0.787, epoch=33, loss=0.735]

epoch:33, idx:6899/10845, loss:0.7347163537187853, acc:0.7868478260869565


 65%|██████▍   | 7000/10845 [1:00:01<32:38,  1.96it/s, acc=0.787, epoch=33, loss=0.736]

epoch:33, idx:6999/10845, loss:0.7357028341208185, acc:0.7868214285714286


 65%|██████▌   | 7100/10845 [1:00:57<25:46,  2.42it/s, acc=0.787, epoch=33, loss=0.735]

epoch:33, idx:7099/10845, loss:0.7351619030220408, acc:0.7870422535211268


 66%|██████▋   | 7200/10845 [1:01:50<29:09,  2.08it/s, acc=0.787, epoch=33, loss=0.736]

epoch:33, idx:7199/10845, loss:0.7361683729332354, acc:0.786875


 67%|██████▋   | 7300/10845 [1:02:43<37:24,  1.58it/s, acc=0.787, epoch=33, loss=0.737]

epoch:33, idx:7299/10845, loss:0.7373082512943712, acc:0.786541095890411


 68%|██████▊   | 7400/10845 [1:03:34<27:05,  2.12it/s, acc=0.787, epoch=33, loss=0.736]

epoch:33, idx:7399/10845, loss:0.7357419757907455, acc:0.7869594594594594


 69%|██████▉   | 7500/10845 [1:04:25<23:08,  2.41it/s, acc=0.787, epoch=33, loss=0.737]

epoch:33, idx:7499/10845, loss:0.7373252456744512, acc:0.7866666666666666


 70%|███████   | 7600/10845 [1:05:18<35:05,  1.54it/s, acc=0.787, epoch=33, loss=0.735]

epoch:33, idx:7599/10845, loss:0.7348178425666533, acc:0.7871710526315789


 71%|███████   | 7700/10845 [1:06:13<24:13,  2.16it/s, acc=0.787, epoch=33, loss=0.736]

epoch:33, idx:7699/10845, loss:0.7358041318206044, acc:0.7868831168831169


 72%|███████▏  | 7800/10845 [1:07:06<20:08,  2.52it/s, acc=0.787, epoch=33, loss=0.737]

epoch:33, idx:7799/10845, loss:0.7367984105360049, acc:0.786923076923077


 73%|███████▎  | 7900/10845 [1:07:54<11:41,  4.20it/s, acc=0.787, epoch=33, loss=0.738]

epoch:33, idx:7899/10845, loss:0.7379262956920304, acc:0.7865189873417722


 74%|███████▍  | 8000/10845 [1:08:50<24:59,  1.90it/s, acc=0.786, epoch=33, loss=0.739]

epoch:33, idx:7999/10845, loss:0.7389240847658366, acc:0.786125


 75%|███████▍  | 8100/10845 [1:09:48<26:06,  1.75it/s, acc=0.786, epoch=33, loss=0.738]

epoch:33, idx:8099/10845, loss:0.7384638259936998, acc:0.7862037037037037


 76%|███████▌  | 8200/10845 [1:10:40<23:05,  1.91it/s, acc=0.786, epoch=33, loss=0.739]

epoch:33, idx:8199/10845, loss:0.7385761475835632, acc:0.7861890243902439


 77%|███████▋  | 8300/10845 [1:11:31<24:47,  1.71it/s, acc=0.786, epoch=33, loss=0.74] 

epoch:33, idx:8299/10845, loss:0.7400751728160554, acc:0.7859939759036144


 77%|███████▋  | 8400/10845 [1:12:24<21:52,  1.86it/s, acc=0.786, epoch=33, loss=0.74] 

epoch:33, idx:8399/10845, loss:0.7402493316697932, acc:0.7859821428571429


 78%|███████▊  | 8500/10845 [1:13:09<21:33,  1.81it/s, acc=0.786, epoch=33, loss=0.739]

epoch:33, idx:8499/10845, loss:0.7391102707999594, acc:0.786264705882353


 79%|███████▉  | 8600/10845 [1:13:59<14:40,  2.55it/s, acc=0.787, epoch=33, loss=0.739]

epoch:33, idx:8599/10845, loss:0.738520566750058, acc:0.7865697674418605


 80%|████████  | 8700/10845 [1:14:53<19:04,  1.87it/s, acc=0.787, epoch=33, loss=0.738]

epoch:33, idx:8699/10845, loss:0.7383399203299791, acc:0.7866091954022989


 81%|████████  | 8800/10845 [1:15:48<19:34,  1.74it/s, acc=0.786, epoch=33, loss=0.739]

epoch:33, idx:8799/10845, loss:0.7393739195625213, acc:0.7863920454545454


 82%|████████▏ | 8900/10845 [1:16:41<16:21,  1.98it/s, acc=0.786, epoch=33, loss=0.739]

epoch:33, idx:8899/10845, loss:0.7390635698280308, acc:0.7864325842696629


 83%|████████▎ | 9000/10845 [1:17:37<18:51,  1.63it/s, acc=0.786, epoch=33, loss=0.739]

epoch:33, idx:8999/10845, loss:0.7390331864141756, acc:0.78625


 84%|████████▍ | 9100/10845 [1:18:25<20:50,  1.40it/s, acc=0.786, epoch=33, loss=0.74] 

epoch:33, idx:9099/10845, loss:0.7401099538786726, acc:0.7859065934065934


 85%|████████▍ | 9200/10845 [1:19:19<14:18,  1.92it/s, acc=0.786, epoch=33, loss=0.743]

epoch:33, idx:9199/10845, loss:0.7427365441085851, acc:0.785516304347826


 86%|████████▌ | 9300/10845 [1:20:09<12:10,  2.11it/s, acc=0.785, epoch=33, loss=0.742]

epoch:33, idx:9299/10845, loss:0.7422458538621344, acc:0.7852956989247312


 87%|████████▋ | 9400/10845 [1:21:02<09:25,  2.55it/s, acc=0.786, epoch=33, loss=0.742]

epoch:33, idx:9399/10845, loss:0.7415675099875698, acc:0.7855585106382978


 88%|████████▊ | 9500/10845 [1:21:59<15:20,  1.46it/s, acc=0.785, epoch=33, loss=0.742]

epoch:33, idx:9499/10845, loss:0.7417570416817539, acc:0.7853947368421053


 89%|████████▊ | 9600/10845 [1:22:52<07:20,  2.82it/s, acc=0.785, epoch=33, loss=0.742]

epoch:33, idx:9599/10845, loss:0.7422634695423767, acc:0.7853125


 89%|████████▉ | 9700/10845 [1:23:40<14:47,  1.29it/s, acc=0.785, epoch=33, loss=0.742]

epoch:33, idx:9699/10845, loss:0.742052536313374, acc:0.7854123711340206


 90%|█████████ | 9800/10845 [1:24:31<10:17,  1.69it/s, acc=0.786, epoch=33, loss=0.742]

epoch:33, idx:9799/10845, loss:0.7423780502789483, acc:0.7855867346938775


 91%|█████████▏| 9900/10845 [1:25:20<06:16,  2.51it/s, acc=0.786, epoch=33, loss=0.743]

epoch:33, idx:9899/10845, loss:0.7426290385184264, acc:0.7856818181818181


 92%|█████████▏| 10000/10845 [1:26:13<06:51,  2.05it/s, acc=0.786, epoch=33, loss=0.742]

epoch:33, idx:9999/10845, loss:0.7415454342439771, acc:0.785975


 93%|█████████▎| 10100/10845 [1:27:05<04:58,  2.50it/s, acc=0.786, epoch=33, loss=0.741]

epoch:33, idx:10099/10845, loss:0.7414672141573807, acc:0.7860148514851485


 94%|█████████▍| 10200/10845 [1:27:52<02:50,  3.78it/s, acc=0.786, epoch=33, loss=0.742]

epoch:33, idx:10199/10845, loss:0.7421518670475367, acc:0.7859803921568628


 95%|█████████▍| 10300/10845 [1:28:41<06:15,  1.45it/s, acc=0.786, epoch=33, loss=0.743]

epoch:33, idx:10299/10845, loss:0.7428143649787, acc:0.7856067961165049


 96%|█████████▌| 10400/10845 [1:29:36<03:14,  2.29it/s, acc=0.785, epoch=33, loss=0.742]

epoch:33, idx:10399/10845, loss:0.7423377933911979, acc:0.7854086538461539


 97%|█████████▋| 10500/10845 [1:30:24<02:55,  1.97it/s, acc=0.785, epoch=33, loss=0.742]

epoch:33, idx:10499/10845, loss:0.7423582220687752, acc:0.7854285714285715


 98%|█████████▊| 10600/10845 [1:31:18<02:15,  1.80it/s, acc=0.785, epoch=33, loss=0.742]

epoch:33, idx:10599/10845, loss:0.7424596223445996, acc:0.785495283018868


 99%|█████████▊| 10700/10845 [1:32:09<01:28,  1.64it/s, acc=0.785, epoch=33, loss=0.743]

epoch:33, idx:10699/10845, loss:0.7427421489901075, acc:0.785373831775701


100%|█████████▉| 10800/10845 [1:33:00<00:20,  2.23it/s, acc=0.785, epoch=33, loss=0.742]

epoch:33, idx:10799/10845, loss:0.7421406559604737, acc:0.7853703703703704


100%|██████████| 10845/10845 [1:33:24<00:00,  2.15it/s, acc=0.785, epoch=33, loss=0.743]


epoch:33, idx:0/1275, loss:0.9120296239852905, acc:0.5
epoch:33, idx:100/1275, loss:1.2597069297686663, acc:0.650990099009901
epoch:33, idx:200/1275, loss:1.1817678930747568, acc:0.654228855721393
epoch:33, idx:300/1275, loss:1.206019430263494, acc:0.6519933554817275
epoch:33, idx:400/1275, loss:1.1726189694202451, acc:0.6564837905236908
epoch:33, idx:500/1275, loss:1.1661229101483694, acc:0.6596806387225549
epoch:33, idx:600/1275, loss:1.1707729527636892, acc:0.6555740432612313
epoch:33, idx:700/1275, loss:1.1925607531114923, acc:0.6533523537803139
epoch:33, idx:800/1275, loss:1.2047756210099743, acc:0.650749063670412
epoch:33, idx:900/1275, loss:1.1936189154541321, acc:0.6539955604883463
epoch:33, idx:1000/1275, loss:1.1890413151992547, acc:0.6548451548451548
epoch:33, idx:1100/1275, loss:1.1799095105193724, acc:0.6559945504087193
epoch:33, idx:1200/1275, loss:1.1777719483585978, acc:0.6577851790174855


  1%|          | 100/10845 [00:55<1:40:05,  1.79it/s, acc=0.795, epoch=34, loss=0.675]

epoch:34, idx:99/10845, loss:0.6749297505617142, acc:0.795


  2%|▏         | 200/10845 [01:43<1:17:51,  2.28it/s, acc=0.792, epoch=34, loss=0.7]  

epoch:34, idx:199/10845, loss:0.7004782398045063, acc:0.7925


  3%|▎         | 300/10845 [02:35<1:41:06,  1.74it/s, acc=0.776, epoch=34, loss=0.781]

epoch:34, idx:299/10845, loss:0.7807725728551547, acc:0.7758333333333334


  4%|▎         | 400/10845 [03:26<1:11:46,  2.43it/s, acc=0.785, epoch=34, loss=0.762]

epoch:34, idx:399/10845, loss:0.7615006663650274, acc:0.785


  5%|▍         | 500/10845 [04:18<1:42:27,  1.68it/s, acc=0.783, epoch=34, loss=0.774]

epoch:34, idx:499/10845, loss:0.7740078907608986, acc:0.7835


  6%|▌         | 600/10845 [05:10<1:40:58,  1.69it/s, acc=0.788, epoch=34, loss=0.757]

epoch:34, idx:599/10845, loss:0.7567550248404344, acc:0.7879166666666667


  6%|▋         | 700/10845 [06:01<1:21:54,  2.06it/s, acc=0.784, epoch=34, loss=0.757]

epoch:34, idx:699/10845, loss:0.7565062863060407, acc:0.7839285714285714


  7%|▋         | 800/10845 [06:55<1:13:06,  2.29it/s, acc=0.785, epoch=34, loss=0.756]

epoch:34, idx:799/10845, loss:0.7564960623905063, acc:0.7846875


  8%|▊         | 900/10845 [07:43<1:02:10,  2.67it/s, acc=0.783, epoch=34, loss=0.763]

epoch:34, idx:899/10845, loss:0.7633782138758235, acc:0.7830555555555555


  9%|▉         | 1000/10845 [08:33<53:32,  3.06it/s, acc=0.787, epoch=34, loss=0.748] 

epoch:34, idx:999/10845, loss:0.7475030669569969, acc:0.7875


 10%|█         | 1100/10845 [09:27<1:13:08,  2.22it/s, acc=0.786, epoch=34, loss=0.744]

epoch:34, idx:1099/10845, loss:0.7440872787345539, acc:0.7856818181818181


 11%|█         | 1200/10845 [10:18<1:30:32,  1.78it/s, acc=0.786, epoch=34, loss=0.747]

epoch:34, idx:1199/10845, loss:0.7470207385222117, acc:0.785625


 12%|█▏        | 1300/10845 [11:07<1:17:00,  2.07it/s, acc=0.789, epoch=34, loss=0.741]

epoch:34, idx:1299/10845, loss:0.741283708856656, acc:0.7892307692307692


 13%|█▎        | 1400/10845 [11:56<1:50:01,  1.43it/s, acc=0.789, epoch=34, loss=0.736]

epoch:34, idx:1399/10845, loss:0.7359322639448302, acc:0.7894642857142857


 14%|█▍        | 1500/10845 [12:43<1:22:17,  1.89it/s, acc=0.789, epoch=34, loss=0.737]

epoch:34, idx:1499/10845, loss:0.7371661702394485, acc:0.7895


 15%|█▍        | 1600/10845 [13:33<59:29,  2.59it/s, acc=0.79, epoch=34, loss=0.733]   

epoch:34, idx:1599/10845, loss:0.7332974796742201, acc:0.7903125


 16%|█▌        | 1700/10845 [14:28<1:28:16,  1.73it/s, acc=0.79, epoch=34, loss=0.733] 

epoch:34, idx:1699/10845, loss:0.7326039481513641, acc:0.7901470588235294


 17%|█▋        | 1800/10845 [15:18<58:42,  2.57it/s, acc=0.79, epoch=34, loss=0.736]   

epoch:34, idx:1799/10845, loss:0.7361226886841986, acc:0.7895833333333333


 18%|█▊        | 1900/10845 [16:13<55:55,  2.67it/s, acc=0.788, epoch=34, loss=0.738]  

epoch:34, idx:1899/10845, loss:0.7376762310141012, acc:0.7880263157894737


 18%|█▊        | 2000/10845 [17:03<44:18,  3.33it/s, acc=0.791, epoch=34, loss=0.731]  

epoch:34, idx:1999/10845, loss:0.730658504575491, acc:0.790625


 19%|█▉        | 2100/10845 [17:50<1:03:27,  2.30it/s, acc=0.791, epoch=34, loss=0.726]

epoch:34, idx:2099/10845, loss:0.7257889747903461, acc:0.7911904761904762


 20%|██        | 2200/10845 [18:39<1:01:00,  2.36it/s, acc=0.792, epoch=34, loss=0.72] 

epoch:34, idx:2199/10845, loss:0.7200514823062854, acc:0.7915909090909091


 21%|██        | 2300/10845 [19:32<1:03:21,  2.25it/s, acc=0.791, epoch=34, loss=0.721]

epoch:34, idx:2299/10845, loss:0.7209657198190689, acc:0.7907608695652174


 22%|██▏       | 2400/10845 [20:27<1:14:22,  1.89it/s, acc=0.789, epoch=34, loss=0.726]

epoch:34, idx:2399/10845, loss:0.7260730901857217, acc:0.7890625


 23%|██▎       | 2500/10845 [21:18<1:06:07,  2.10it/s, acc=0.789, epoch=34, loss=0.727]

epoch:34, idx:2499/10845, loss:0.7272691506862641, acc:0.7887


 24%|██▍       | 2600/10845 [22:04<34:33,  3.98it/s, acc=0.789, epoch=34, loss=0.726]  

epoch:34, idx:2599/10845, loss:0.7261612770878352, acc:0.7893269230769231


 25%|██▍       | 2700/10845 [22:54<1:14:28,  1.82it/s, acc=0.79, epoch=34, loss=0.725] 

epoch:34, idx:2699/10845, loss:0.7249421848632671, acc:0.789537037037037


 26%|██▌       | 2800/10845 [23:46<56:45,  2.36it/s, acc=0.79, epoch=34, loss=0.726]   

epoch:34, idx:2799/10845, loss:0.7259506204937185, acc:0.7897321428571429


 27%|██▋       | 2900/10845 [24:41<1:23:49,  1.58it/s, acc=0.79, epoch=34, loss=0.726] 

epoch:34, idx:2899/10845, loss:0.7264662073398459, acc:0.79


 28%|██▊       | 3000/10845 [25:34<59:55,  2.18it/s, acc=0.791, epoch=34, loss=0.721]  

epoch:34, idx:2999/10845, loss:0.7205421022971471, acc:0.7909166666666667


 29%|██▊       | 3100/10845 [26:26<1:06:11,  1.95it/s, acc=0.792, epoch=34, loss=0.718]

epoch:34, idx:3099/10845, loss:0.7184191541133388, acc:0.792016129032258


 30%|██▉       | 3200/10845 [27:12<1:02:25,  2.04it/s, acc=0.792, epoch=34, loss=0.722]

epoch:34, idx:3199/10845, loss:0.7220843462273479, acc:0.792109375


 30%|███       | 3300/10845 [28:07<1:15:45,  1.66it/s, acc=0.791, epoch=34, loss=0.724]

epoch:34, idx:3299/10845, loss:0.7237580145308465, acc:0.7911363636363636


 31%|███▏      | 3400/10845 [29:00<57:39,  2.15it/s, acc=0.791, epoch=34, loss=0.724]  

epoch:34, idx:3399/10845, loss:0.7236470325992388, acc:0.7909558823529412


 32%|███▏      | 3500/10845 [29:52<1:04:02,  1.91it/s, acc=0.791, epoch=34, loss=0.721]

epoch:34, idx:3499/10845, loss:0.72126832062006, acc:0.7908571428571428


 33%|███▎      | 3600/10845 [30:44<1:01:24,  1.97it/s, acc=0.79, epoch=34, loss=0.723] 

epoch:34, idx:3599/10845, loss:0.7234181945936548, acc:0.7903472222222222


 34%|███▍      | 3700/10845 [31:36<47:22,  2.51it/s, acc=0.79, epoch=34, loss=0.724]   

epoch:34, idx:3699/10845, loss:0.7244444060567263, acc:0.7902027027027027


 35%|███▌      | 3800/10845 [32:24<1:00:39,  1.94it/s, acc=0.79, epoch=34, loss=0.723]

epoch:34, idx:3799/10845, loss:0.7233036911252297, acc:0.7901973684210526


 36%|███▌      | 3900/10845 [33:19<1:14:06,  1.56it/s, acc=0.789, epoch=34, loss=0.727]

epoch:34, idx:3899/10845, loss:0.7267255956469438, acc:0.7892307692307692


 37%|███▋      | 4000/10845 [34:10<1:06:49,  1.71it/s, acc=0.789, epoch=34, loss=0.73] 

epoch:34, idx:3999/10845, loss:0.7296698696091771, acc:0.7889375


 38%|███▊      | 4100/10845 [35:02<55:48,  2.01it/s, acc=0.789, epoch=34, loss=0.73]   

epoch:34, idx:4099/10845, loss:0.7297092837458704, acc:0.7887195121951219


 39%|███▊      | 4200/10845 [35:54<50:43,  2.18it/s, acc=0.789, epoch=34, loss=0.728]  

epoch:34, idx:4199/10845, loss:0.7276738971471787, acc:0.7894047619047619


 40%|███▉      | 4300/10845 [36:43<1:14:01,  1.47it/s, acc=0.79, epoch=34, loss=0.725] 

epoch:34, idx:4299/10845, loss:0.7250462310397348, acc:0.7901744186046512


 41%|████      | 4400/10845 [37:28<1:03:49,  1.68it/s, acc=0.79, epoch=34, loss=0.725]

epoch:34, idx:4399/10845, loss:0.7249614348872141, acc:0.7898295454545454


 41%|████▏     | 4500/10845 [38:23<48:12,  2.19it/s, acc=0.789, epoch=34, loss=0.726]  

epoch:34, idx:4499/10845, loss:0.7263711491690742, acc:0.7894444444444444


 42%|████▏     | 4600/10845 [39:15<1:04:36,  1.61it/s, acc=0.789, epoch=34, loss=0.728]

epoch:34, idx:4599/10845, loss:0.727883469151414, acc:0.7894565217391304


 43%|████▎     | 4700/10845 [40:06<1:07:11,  1.52it/s, acc=0.79, epoch=34, loss=0.726] 

epoch:34, idx:4699/10845, loss:0.7260472664046794, acc:0.7898404255319149


 44%|████▍     | 4800/10845 [40:58<42:55,  2.35it/s, acc=0.789, epoch=34, loss=0.725]  

epoch:34, idx:4799/10845, loss:0.7254918706292908, acc:0.7894791666666666


 45%|████▌     | 4900/10845 [41:53<1:14:03,  1.34it/s, acc=0.79, epoch=34, loss=0.723] 

epoch:34, idx:4899/10845, loss:0.722637517111642, acc:0.7901020408163265


 46%|████▌     | 5000/10845 [42:38<43:40,  2.23it/s, acc=0.791, epoch=34, loss=0.721]  

epoch:34, idx:4999/10845, loss:0.7210204187393189, acc:0.791


 47%|████▋     | 5100/10845 [43:32<52:01,  1.84it/s, acc=0.79, epoch=34, loss=0.724]   

epoch:34, idx:5099/10845, loss:0.7236288537348018, acc:0.7903431372549019


 48%|████▊     | 5200/10845 [44:25<50:40,  1.86it/s, acc=0.791, epoch=34, loss=0.722]  

epoch:34, idx:5199/10845, loss:0.7222662093891548, acc:0.7905769230769231


 49%|████▉     | 5300/10845 [45:18<1:10:00,  1.32it/s, acc=0.79, epoch=34, loss=0.723] 

epoch:34, idx:5299/10845, loss:0.7227040648966465, acc:0.7904716981132075


 50%|████▉     | 5400/10845 [46:10<32:29,  2.79it/s, acc=0.79, epoch=34, loss=0.727]  

epoch:34, idx:5399/10845, loss:0.7267070350878768, acc:0.7897685185185185


 51%|█████     | 5500/10845 [47:03<28:59,  3.07it/s, acc=0.79, epoch=34, loss=0.726]  

epoch:34, idx:5499/10845, loss:0.7262030937942592, acc:0.7898181818181819


 52%|█████▏    | 5600/10845 [47:56<35:00,  2.50it/s, acc=0.79, epoch=34, loss=0.725]  

epoch:34, idx:5599/10845, loss:0.7251752775215677, acc:0.7904464285714285


 53%|█████▎    | 5700/10845 [48:50<49:15,  1.74it/s, acc=0.791, epoch=34, loss=0.723]  

epoch:34, idx:5699/10845, loss:0.7228434389254503, acc:0.790921052631579


 53%|█████▎    | 5800/10845 [49:44<59:09,  1.42it/s, acc=0.791, epoch=34, loss=0.726]  

epoch:34, idx:5799/10845, loss:0.725533999024794, acc:0.7906896551724137


 54%|█████▍    | 5900/10845 [50:36<51:04,  1.61it/s, acc=0.791, epoch=34, loss=0.724]

epoch:34, idx:5899/10845, loss:0.7236170111116716, acc:0.7909322033898305


 55%|█████▌    | 6000/10845 [51:28<42:53,  1.88it/s, acc=0.791, epoch=34, loss=0.726]  

epoch:34, idx:5999/10845, loss:0.7256001726637284, acc:0.7906666666666666


 56%|█████▌    | 6100/10845 [52:15<26:04,  3.03it/s, acc=0.79, epoch=34, loss=0.726]  

epoch:34, idx:6099/10845, loss:0.7257117978910931, acc:0.7904098360655738


 57%|█████▋    | 6200/10845 [53:06<42:03,  1.84it/s, acc=0.791, epoch=34, loss=0.726] 

epoch:34, idx:6199/10845, loss:0.7259097065896757, acc:0.7905645161290322


 58%|█████▊    | 6300/10845 [53:58<42:35,  1.78it/s, acc=0.79, epoch=34, loss=0.728] 

epoch:34, idx:6299/10845, loss:0.7283419579598639, acc:0.790079365079365


 59%|█████▉    | 6400/10845 [54:51<37:03,  2.00it/s, acc=0.79, epoch=34, loss=0.729]  

epoch:34, idx:6399/10845, loss:0.7289625745406374, acc:0.790078125


 60%|█████▉    | 6500/10845 [55:46<37:14,  1.94it/s, acc=0.79, epoch=34, loss=0.731]

epoch:34, idx:6499/10845, loss:0.7310589358027165, acc:0.7896538461538462


 61%|██████    | 6600/10845 [56:36<35:14,  2.01it/s, acc=0.789, epoch=34, loss=0.731]

epoch:34, idx:6599/10845, loss:0.7313497166154963, acc:0.7894318181818182


 62%|██████▏   | 6700/10845 [57:18<32:36,  2.12it/s, acc=0.789, epoch=34, loss=0.733]

epoch:34, idx:6699/10845, loss:0.7329586381921128, acc:0.7891417910447761


 63%|██████▎   | 6800/10845 [58:09<38:56,  1.73it/s, acc=0.789, epoch=34, loss=0.733]

epoch:34, idx:6799/10845, loss:0.7326889536968049, acc:0.7890073529411765


 64%|██████▎   | 6900/10845 [59:04<43:36,  1.51it/s, acc=0.79, epoch=34, loss=0.732] 

epoch:34, idx:6899/10845, loss:0.731764390749344, acc:0.7895652173913044


 65%|██████▍   | 7000/10845 [59:56<39:51,  1.61it/s, acc=0.79, epoch=34, loss=0.732]

epoch:34, idx:6999/10845, loss:0.7319084893635341, acc:0.7898928571428572


 65%|██████▌   | 7100/10845 [1:00:46<30:59,  2.01it/s, acc=0.789, epoch=34, loss=0.733]

epoch:34, idx:7099/10845, loss:0.7328590821464297, acc:0.7894718309859154


 66%|██████▋   | 7200/10845 [1:01:44<40:38,  1.49it/s, acc=0.789, epoch=34, loss=0.734]

epoch:34, idx:7199/10845, loss:0.7341192466600074, acc:0.7892708333333334


 67%|██████▋   | 7300/10845 [1:02:36<21:33,  2.74it/s, acc=0.789, epoch=34, loss=0.735]

epoch:34, idx:7299/10845, loss:0.7348316136655743, acc:0.7892465753424658


 68%|██████▊   | 7400/10845 [1:03:25<23:26,  2.45it/s, acc=0.789, epoch=34, loss=0.735]

epoch:34, idx:7399/10845, loss:0.735060976554413, acc:0.7893918918918919


 69%|██████▉   | 7500/10845 [1:04:18<24:33,  2.27it/s, acc=0.789, epoch=34, loss=0.735]

epoch:34, idx:7499/10845, loss:0.7352757801055908, acc:0.7893666666666667


 70%|███████   | 7600/10845 [1:05:08<29:11,  1.85it/s, acc=0.789, epoch=34, loss=0.736]

epoch:34, idx:7599/10845, loss:0.7357605544281633, acc:0.7890131578947368


 71%|███████   | 7700/10845 [1:05:58<24:58,  2.10it/s, acc=0.789, epoch=34, loss=0.739]

epoch:34, idx:7699/10845, loss:0.7385880202519429, acc:0.7888636363636363


 72%|███████▏  | 7800/10845 [1:06:55<23:13,  2.18it/s, acc=0.789, epoch=34, loss=0.737]

epoch:34, idx:7799/10845, loss:0.737141954035331, acc:0.7892628205128205


 73%|███████▎  | 7900/10845 [1:07:44<22:24,  2.19it/s, acc=0.789, epoch=34, loss=0.738]

epoch:34, idx:7899/10845, loss:0.737999215171307, acc:0.7892721518987342


 74%|███████▍  | 8000/10845 [1:08:39<29:54,  1.59it/s, acc=0.789, epoch=34, loss=0.738]

epoch:34, idx:7999/10845, loss:0.738398122496903, acc:0.78903125


 75%|███████▍  | 8100/10845 [1:09:27<24:04,  1.90it/s, acc=0.789, epoch=34, loss=0.737]

epoch:34, idx:8099/10845, loss:0.7374796290456512, acc:0.7892592592592592


 76%|███████▌  | 8200/10845 [1:10:21<26:10,  1.68it/s, acc=0.789, epoch=34, loss=0.737]

epoch:34, idx:8199/10845, loss:0.7365784001641157, acc:0.7894207317073171


 77%|███████▋  | 8300/10845 [1:11:11<16:15,  2.61it/s, acc=0.79, epoch=34, loss=0.736] 

epoch:34, idx:8299/10845, loss:0.7355227639732591, acc:0.7895783132530121


 77%|███████▋  | 8400/10845 [1:12:02<10:45,  3.79it/s, acc=0.79, epoch=34, loss=0.735] 

epoch:34, idx:8399/10845, loss:0.7350572447549729, acc:0.7895238095238095


 78%|███████▊  | 8500/10845 [1:12:50<20:18,  1.93it/s, acc=0.79, epoch=34, loss=0.735] 

epoch:34, idx:8499/10845, loss:0.7350138575750239, acc:0.7895588235294118


 79%|███████▉  | 8600/10845 [1:13:39<18:21,  2.04it/s, acc=0.789, epoch=34, loss=0.736]

epoch:34, idx:8599/10845, loss:0.736076588090076, acc:0.7892732558139535


 80%|████████  | 8700/10845 [1:14:31<16:17,  2.20it/s, acc=0.789, epoch=34, loss=0.737]

epoch:34, idx:8699/10845, loss:0.7366831382903559, acc:0.7891954022988505


 81%|████████  | 8800/10845 [1:15:20<13:41,  2.49it/s, acc=0.789, epoch=34, loss=0.738]

epoch:34, idx:8799/10845, loss:0.7377639002048156, acc:0.7888920454545455


 82%|████████▏ | 8900/10845 [1:16:10<15:53,  2.04it/s, acc=0.789, epoch=34, loss=0.738]

epoch:34, idx:8899/10845, loss:0.7383493625414506, acc:0.7889044943820225


 83%|████████▎ | 9001/10845 [1:17:01<08:49,  3.49it/s, acc=0.788, epoch=34, loss=0.74] 

epoch:34, idx:8999/10845, loss:0.740116243067715, acc:0.7883888888888889


 84%|████████▍ | 9100/10845 [1:17:52<15:29,  1.88it/s, acc=0.789, epoch=34, loss=0.739]

epoch:34, idx:9099/10845, loss:0.7390881799963804, acc:0.7885989010989011


 85%|████████▍ | 9200/10845 [1:18:44<17:52,  1.53it/s, acc=0.788, epoch=34, loss=0.738]

epoch:34, idx:9199/10845, loss:0.7383832024653321, acc:0.7884782608695652


 86%|████████▌ | 9300/10845 [1:19:37<16:02,  1.61it/s, acc=0.789, epoch=34, loss=0.739]

epoch:34, idx:9299/10845, loss:0.7385974383770778, acc:0.7886827956989247


 87%|████████▋ | 9400/10845 [1:20:29<13:05,  1.84it/s, acc=0.788, epoch=34, loss=0.74] 

epoch:34, idx:9399/10845, loss:0.740142681741334, acc:0.7884308510638298


 88%|████████▊ | 9500/10845 [1:21:17<10:04,  2.22it/s, acc=0.788, epoch=34, loss=0.74] 

epoch:34, idx:9499/10845, loss:0.7404441249433317, acc:0.7884473684210527


 88%|████████▊ | 9574/10845 [1:21:58<09:01,  2.35it/s, acc=0.788, epoch=34, loss=0.74] 