### 多级推理模块

0. 由于没有self-attention pooling了，所以再加一层self-attention层
1. 每次更新段落的Summary vectors 
    input: [batch_sise, para_num, para_len, dim]
    query: [batch_size, dim]
    
2. expand -> view -> biSeqAtt -> sum
3. ori san

In [1]:
import os
import torch
import torch.nn as nn
import torchtext
from tensorboardX import SummaryWriter
import random
import numpy as np

from torchtext.data import NestedField, Field, RawField
from model import *
from dataset import DataHandler
%load_ext autoreload

%autoreload 2

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
class Config:
    def __init__(self):
        self.hidden = 50
        self.embedding_dim = 300
        self.lr = 7.5e-4
        self.epochs = 50
        self.fix_length = 256
        
        self.log_dir = './logs'
        self.model_name = 'h_reason'
        self.batch_size = 4
        self.train_data = './data/train_filter.pt'
        self.dev_data = './data/dev_filter.pt'
        
        self.word_vocab = './data/glove_vocab.pt'
        #self.word_vocab = None
        #self.charNGram_vocab = None
        
        self.dropout = 0.2
        self.seed = 1023
        self.steps = 2
        self.memory_type = 1
        
config = Config()
device = torch.device("cuda:0")


In [3]:
torch.cuda.is_available()

True

In [4]:
random.seed(config.seed)
np.random.seed(config.seed)
torch.manual_seed(config.seed)
torch.cuda.manual_seed_all(config.seed)

In [5]:
save_path = config.model_name  + '_lr_'+ str(config.lr)+ '__hidden__' + str(config.hidden) \
            + '_batchsize_' + str(config.batch_size) +  '_p'+ str(config.dropout)+'_steps_'+str(config.steps)+'memory_type_' \
            + str(config.memory_type)
save_path = os.path.join(config.log_dir, save_path)   
print(save_path)
config.save_path = save_path

./logs/h_reason_lr_0.00075__hidden__50_batchsize_4_p0.2_steps_2memory_type_1


### Define Fileds

In [6]:
word_field = Field(batch_first=True, sequential=True, tokenize="spacy", lower=True) # query
multi_word_field = NestedField(word_field) 

word_field_sup = Field(batch_first=True, sequential=True, tokenize="spacy", lower=True, fix_length=config.fix_length)
multi_word_field_sup = NestedField(word_field_sup) 

charNGram_field = Field(batch_first=True, sequential=True, tokenize="spacy", lower=True) # query
multi_charNGram_field = NestedField(charNGram_field) 

charNGram_field_sup = Field(batch_first=True, sequential=True, tokenize="spacy", lower=True, fix_length=config.fix_length)
multi_charNGram_field_sup = NestedField(charNGram_field_sup) 

raw = RawField()
raw.is_target = False

label_field = Field(sequential=False, is_target=True, use_vocab=False)

dict_field = {
    'id': ('id', raw),
    'supports': ('s_glove', multi_word_field_sup), 
    'query': ('q_glove', word_field), 
    'candidates': ('c_glove', multi_word_field),
    'label': ('label', label_field),
    'mentions': ('mentions', raw),
    'para_label': ('para_label', raw)
}

In [7]:
data_handler = DataHandler(config.train_data, config.dev_data, dict_field)

# torch.save(data_handler.trainset.examples, './data/train_example.pt')
# torch.save(data_handler.valset.examples, './data/dev_example.pt')

load examples.pt  :./data/train_filter.pt, ./data/dev_filter.pt


### Build Vocab

In [8]:
if config.word_vocab is not None:
    word_vocab = torch.load(config.word_vocab)
    multi_word_field_sup.vocab = word_vocab
    word_field_sup.vocab = word_vocab
else:
    multi_word_field_sup.build_vocab(data_handler.trainset, data_handler.valset, 
                                 vectors=torchtext.vocab.GloVe(dim=300,name='6B') )

word_field.vocab = multi_word_field_sup.vocab

In [9]:
print(multi_word_field_sup.vocab.vectors.shape)

torch.Size([312667, 300])


multi_word_field_sup.build_vocab(data_handler.trainset, data_handler.valset, 
                         vectors=torchtext.vocab.GloVe(dim=300,name='840B') )
torch.save(multi_word_field_sup.vocab, './data/glove_vocab.pt')

### Get data_iter

In [10]:
train_iter = data_handler.get_train_iter(batch_size=config.batch_size)
val_iter = data_handler.get_val_iter(batch_size=config.batch_size)

In [11]:
for idx, batch in enumerate(val_iter):
    break
batch


[torchtext.data.batch.Batch of size 4]
	[.id]:['WH_dev_0', 'WH_dev_1', 'WH_dev_2', 'WH_dev_3']
	[.s_glove]:[torch.LongTensor of size 4x15x256]
	[.q_glove]:[torch.LongTensor of size 4x11]
	[.c_glove]:[torch.LongTensor of size 4x18x4]
	[.label]:[torch.LongTensor of size 4]
	[.mentions]:[[[[6, 145, 146], [6, 173, 174], [7, 78, 79]], [[3, 50, 53], [5, 28, 31], [13, 1, 4]], [[6, 135, 136], [6, 218, 219], [6, 261, 262], [8, 45, 46], [12, 98, 99]], [[0, 2, 4], [7, 1, 3], [13, 64, 66], [13, 69, 71]], [[0, 36, 38], [10, 1, 3], [13, 75, 77]], [[0, 14, 15], [1, 63, 64], [1, 138, 139], [1, 186, 187], [1, 238, 239], [2, 128, 129], [9, 8, 9], [9, 43, 44], [10, 19, 20], [10, 34, 35], [11, 37, 38], [11, 79, 80], [13, 56, 57]], [[7, 37, 40]], [[6, 180, 181], [12, 101, 102]], [[12, 96, 97]], [[8, 43, 46]], [[6, 169, 172]], [[6, 179, 181]], [[7, 38, 40]], [[6, 147, 148], [6, 182, 183]], [[6, 171, 172], [9, 6, 7], [11, 35, 36], [11, 121, 122]], [[2, 125, 127], [8, 8, 10], [12, 89, 91]], [[1, 0, 2], [1, 1

### Define Model

In [12]:
def generate_mask(x_size, num_turn, dropout_p=0.0, is_training=False):
    if not is_training: dropout_p = 0.0
    new_data = torch.zeros(x_size, num_turn)
    new_data = (1-dropout_p) * (new_data.zero_() + 1)
    for i in range(new_data.size(0)):
        one = random.randint(0, new_data.size(1)-1)
        new_data[i][one] = 1
    mask = 1.0/(1 - dropout_p) * torch.bernoulli(new_data)
    mask.requires_grad = False
    return mask

class SAN(nn.Module):
    def __init__(self, question_dim, support_dim, candidate_dim, num_turn=5, dropout=0.2, memo_dropout=0.4, memory_type=0, device=None):
        super(SAN,self).__init__()
        self.qp_bilinear_attention_word = BilinearSeqAttn(support_dim, question_dim, dropout=dropout)
        self.qp_bilinear_attention_para = BilinearSeqAttn(support_dim, question_dim, dropout=dropout)

        self.candidates_scorer = BilinearSeqAttn(candidate_dim, question_dim, dropout=dropout)        
        self.gru = nn.GRUCell(support_dim, question_dim)
        
        self.num_turn = num_turn
        
        self.dropout = nn.Dropout(p=dropout)
        self.memo_dropout=memo_dropout
        self.device = device
        self.memory_type = memory_type
        
    def forward(self, question_embedding, para_embedding, candidates_embedding, para_length):
        '''
        input:
            question_embedding: [batch_size, hidden_dim]
            para_embedding: [batch_size*para_num, para_length, hidden_dim]
            candidates_embedding: [batch_size, candidates_num, hidden_dim]

        '''
        score_list = []
        batch_size = question_embedding.size(0)
        hidden = question_embedding.size(1)        
        for turn in range(self.num_turn):
            question_embedding_expand = question_embedding.unsqueeze(1).expand(batch_size, para_length, hidden).contiguous()
            question_embedding_expand = question_embedding_expand.view(-1,hidden)    
            
            # update paragraph embedding
            qp_score_word = self.qp_bilinear_attention_word(para_embedding, question_embedding_expand)
            qp_score_word = F.softmax(qp_score_word, 1)
            para_embedding_summary = torch.bmm(qp_score_word.unsqueeze(1), para_embedding).squeeze(1)
            para_embedding_summary = para_embedding_summary.contiguous().view(batch_size, para_length, hidden)
            
            # update question embedding
            qp_score_para = self.qp_bilinear_attention_para(para_embedding_summary, question_embedding)
            qp_score_para = F.softmax(qp_score_para, 1)
            S = torch.bmm(qp_score_para.unsqueeze(1), para_embedding_summary).squeeze(1)
            
            S = self.dropout(question_embedding)
            question_embedding = self.gru(S, question_embedding)
            
            # compute candidates score
            candidates_score = self.candidates_scorer(candidates_embedding, question_embedding)

            score_list.append(candidates_score)
        if self.memory_type == 0:
            mask = generate_mask(batch_size,self.num_turn, self.memo_dropout, self.training)
            mask = mask.to(self.device)
            mask = [m.contiguous() for m in torch.unbind(mask, 1)]

            score_list = [mask[idx].view(batch_size, 1).expand_as(inp) * inp for idx, inp in enumerate(score_list)]
            scores = torch.stack(score_list, 2)
            scores = torch.mean(scores, 2)
        elif self.memory_type == 1:
            scores = torch.stack(score_list, 2)
            scores = torch.mean(scores, 2)
        elif self.memory_type == 2:
            scores = score_list[-1]
            
        return scores
    
    

In [13]:
class SimpleQANet(nn.Module):
    
    def __init__(self, config, word_vectors, device):
        super(SimpleQANet, self).__init__()
        self.config = config
        self.device = device
        
        self.embedding_layer = EmbeddingLayer(word_vectors)
        

        self.rnn = EncoderRNN(config.embedding_dim, config.hidden, 1, True, True, config.dropout, False)
                
            
        self.co_att = CoAttention(config.hidden*2, att_type=2, dropout=config.dropout)
        
        self.linear_1 = nn.Sequential(
                        nn.Linear(config.hidden*4, config.hidden),
                        nn.ReLU()
                    )        
        self.rnn2 =  EncoderRNN(config.hidden, config.hidden, 1, True, True, config.dropout, False)
        
        self.word_att = SelfAttention(config.hidden*2, config.hidden*2, config.dropout)
        self.word_att_q = SelfAttention(config.hidden*2, config.hidden*2, config.dropout)
        
        self.pass_att = SelfAttention(config.hidden*2, config.hidden*2, config.dropout)
        
        self.c_att = SelfAttention(config.hidden*2, config.hidden*2, config.dropout)
                
        
        #self.fusion = FusionLayer(config.hidden*2, dropout=config.dropout)
        self.max_pooling = PoolingLayer()     
        
        self.fc = nn.Linear(config.hidden*2, config.hidden*4)
        self.san = SAN(config.hidden*2,config.hidden*2,config.hidden*6, num_turn=config.steps, memory_type=config.memory_type, device=device)
        
        self.to(device)
        
    def get_candidate_vectors(self, batch, support_vectors, device):
        batch_size, candidate_num,_ = batch.c_glove.shape
        _,support_num, support_length = batch.s_glove.shape
        hidden = support_vectors.shape[-1]

        masks = []
        for idx, candidate_mentions in enumerate(batch.mentions):
            mask = torch.zeros(candidate_num, support_num, support_length)
            for i in range(len(candidate_mentions)):
                candidate_mention = candidate_mentions[i]
                for mention in candidate_mention:
                    mask[i][mention[0]][mention[1]:mention[2]] = 1
            masks.append(mask)
        masks = torch.stack(masks).to(device)

        support_vectors = support_vectors.view(batch_size,-1,hidden).unsqueeze(1)

        masks = masks.view(batch_size,candidate_num,-1)
        masks_expand = masks.unsqueeze(-1).expand(batch_size, candidate_num, support_length*support_num, hidden)
        
        candidates = support_vectors * masks_expand
        
        candidates_max = candidates.max(-2)[0]
        candidates_mean = torch.mean(candidates,-2)
        candidates_vectors = torch.cat([candidates_max, candidates_mean],-1)    

        return candidates_vectors        
        
    def forward(self, batch, return_label = True):
        if type(batch.q_glove) is tuple:
            q_glove, _ = batch.q_glove
        else:
            q_glove = batch.q_glove
        s_glove = batch.s_glove
        c_glove = batch.c_glove
        
        q_glove = q_glove.to(self.device)
        s_glove = s_glove.to(self.device)
        c_glove = c_glove.to(self.device)        
        
        q_out = self.embedding_layer(q_glove) # [batch_size,qeustion_length, hidden_dim]
        s_out = self.embedding_layer(s_glove) # [batch_szie, support_num, support_length, hidden_dim]
        c_out = self.embedding_layer(c_glove) # [batch_size, candidates_num, candidates_length, hidden_dim]        
        
        batch_size=  s_out.size(0)
        
        s_len = s_out.size(1)
        c_len = c_out.size(1)
        
        s_word_len = s_out.size(2)
        c_word_len = c_out.size(2)
        
        hidden = s_out.size(-1)
        
        s_out = s_out.view(batch_size*s_len, s_word_len, hidden).contiguous()
        c_out = c_out.view(batch_size*c_len, c_word_len, hidden).contiguous()
        
        q_out = self.rnn(q_out) # [batch_size,qeustion_length, hidden_dim]
        c_out = self.rnn(c_out) # [batch_szie * support_num, support_length, hidden_dim]
        s_out = self.rnn(s_out) # [batch_size * candidates_num, candidates_length, hidden_dim] 
        
        # Attention
        
        q_word_len = q_out.size(1)
        q_out_expand = q_out.unsqueeze(1).expand(batch_size, s_len, q_word_len, q_out.size(-1)).contiguous()
        q_out_expand = q_out_expand.view(batch_size*s_len, q_word_len, q_out.size(-1)).contiguous()
        
        s_out_att, q_out_att = self.co_att(s_out, q_out_expand)
        #S_s = self.fusion(s_out, s_out_att)
        #S_q = self.fusion(q_out, q_out_att)
        
        S_s = self.linear_1(s_out_att)
        S_s = self.rnn2(S_s) # [batch_size * para_num, para_length, hidden*2]
        
        
        candidates_vectors = self.get_candidate_vectors(batch, S_s, self.device)
        question_summary = self.word_att_q(q_out)
        
        
        candidates_summary = self.c_att(c_out)        
        candidates_summary = candidates_summary.view(batch_size, c_len, -1)
        
        candidates_summary = torch.cat([candidates_summary, candidates_vectors],-1)
        
        
        
        score = self.san(question_summary, S_s, candidates_summary, s_len)
        
        if return_label:
            label = batch.label.to(self.device)
            return score, label
        return score

#### test model

In [14]:
model = SimpleQANet(config, word_field.vocab.vectors, device)
#score, label= model(batch)
#print(score.shape, label.shape)

In [15]:
from tqdm import tqdm, trange

In [16]:
from utils import AverageMeter

def train(epoch, data_iter, model, criterion, optimizer, batch_size=1):
    losses = AverageMeter()
    acces = AverageMeter()
    model.train()
    #model.embedding_layer.eval()
    with trange(len(data_iter)) as t:
        for idx, batch in enumerate(data_iter):
            score, label, = model(batch)

            loss = criterion(score, label)

            loss = loss / batch_size
            loss.backward()
            if (idx+1)%batch_size == 0 :
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)            
                optimizer.step()
                optimizer.zero_grad()        

            losses.update(loss.item()*batch_size)

            pred = score.argmax(1)
            acc = pred.eq(label).sum().item()  / pred.size(0)
            acces.update(acc)
            
            matrix = {
                'acc':acces.avg,
                'epoch':epoch,
                'loss': losses.avg
            }
            t.set_postfix(matrix)
            t.update()
            if (idx+1) % (batch_size*100) == 0:
                print(f'epoch:{epoch}, idx:{idx}/{len(data_iter)}, loss:{losses.avg}, acc:{acces.avg}')
    return losses.avg, acces.avg

def val(epoch, data_iter, model, criterion):
    losses = AverageMeter()
    acces = AverageMeter()
    model.eval()
    for idx, batch in enumerate(data_iter):
        with torch.no_grad():
            score, label = model(batch)
                    
        loss = criterion(score, label)

        losses.update(loss.item())
        
        pred = score.argmax(1)
        acc = pred.eq(label).sum().item()  / pred.size(0)
        acces.update(acc)
        if idx % 100 == 0:
            print(f'epoch:{epoch}, idx:{idx}/{len(data_iter)}, loss:{losses.avg}, acc:{acces.avg}')
    return losses.avg, acces.avg

In [17]:
optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()),
                             lr=config.lr)

criterion = nn.CrossEntropyLoss()

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.epochs)
#train(0, train_iter, model, criterion, optimizer, batch_size=config.batch_size)
# val(0, val_iter, model,criterion)

In [18]:
cycle_len = 1
cycle_iter = 50

In [19]:
if not os.path.exists(config.save_path):
    os.makedirs(config.save_path)
writer = SummaryWriter(config.save_path)

best_acc = 0.0
for i in range(cycle_len):
    optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()),
                             lr=config.lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=cycle_iter)
    for epoch in range(cycle_iter):
        scheduler.step()
        train_loss, train_acc = train(epoch, train_iter, model, criterion, optimizer, 1)
        val_loss, val_acc = val(epoch, val_iter, model, criterion)
        global_epoch = cycle_iter * i + epoch + 1
        writer.add_scalar('train_loss', train_loss, global_epoch)
        writer.add_scalar('val_loss', val_loss, global_epoch)
        writer.add_scalar('train_acc', train_acc, global_epoch)
        writer.add_scalar('val_acc', val_acc, global_epoch)

        state = {
            'val_acc': val_acc,
            'train_acc': train_acc,
            'epoch': epoch
            ,
            'model': model.state_dict()
        }
        torch.save(state, os.path.join(config.save_path,'lastest.pth'))
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(state, os.path.join(save_path, f'best_epoch{epoch}.pth'))

  alphas = self.softmax(alphas)  # (bsz, sent_len)
  1%|          | 100/10845 [00:22<40:32,  4.42it/s, acc=0.2, epoch=0, loss=2.65]  

epoch:0, idx:99/10845, loss:2.6503313982486727, acc:0.2


  2%|▏         | 200/10845 [00:45<40:02,  4.43it/s, acc=0.253, epoch=0, loss=2.45]

epoch:0, idx:199/10845, loss:2.4519499760866164, acc:0.2525


  3%|▎         | 300/10845 [01:08<41:02,  4.28it/s, acc=0.279, epoch=0, loss=2.36]

epoch:0, idx:299/10845, loss:2.3641386822859447, acc:0.2791666666666667


  4%|▎         | 400/10845 [01:30<38:43,  4.50it/s, acc=0.304, epoch=0, loss=2.28]

epoch:0, idx:399/10845, loss:2.277672953903675, acc:0.30375


  5%|▍         | 500/10845 [01:53<37:59,  4.54it/s, acc=0.308, epoch=0, loss=2.25]

epoch:0, idx:499/10845, loss:2.250548058271408, acc:0.308


  6%|▌         | 600/10845 [02:15<37:19,  4.57it/s, acc=0.324, epoch=0, loss=2.2] 

epoch:0, idx:599/10845, loss:2.200781476100286, acc:0.32375


  6%|▋         | 700/10845 [02:38<39:27,  4.28it/s, acc=0.324, epoch=0, loss=2.19]

epoch:0, idx:699/10845, loss:2.190663893904005, acc:0.3242857142857143


  7%|▋         | 801/10845 [03:01<38:10,  4.38it/s, acc=0.335, epoch=0, loss=2.16]

epoch:0, idx:799/10845, loss:2.158146344125271, acc:0.334375


  8%|▊         | 900/10845 [03:24<38:02,  4.36it/s, acc=0.344, epoch=0, loss=2.12]

epoch:0, idx:899/10845, loss:2.1254733006159463, acc:0.3436111111111111


  9%|▉         | 1000/10845 [03:46<35:40,  4.60it/s, acc=0.348, epoch=0, loss=2.1]

epoch:0, idx:999/10845, loss:2.1040954875946043, acc:0.348


 10%|█         | 1100/10845 [04:09<37:59,  4.27it/s, acc=0.354, epoch=0, loss=2.08]

epoch:0, idx:1099/10845, loss:2.085206900509921, acc:0.35409090909090907


 11%|█         | 1201/10845 [04:33<35:30,  4.53it/s, acc=0.357, epoch=0, loss=2.08]

epoch:0, idx:1199/10845, loss:2.075255636374156, acc:0.356875


 12%|█▏        | 1300/10845 [04:55<34:48,  4.57it/s, acc=0.363, epoch=0, loss=2.06]

epoch:0, idx:1299/10845, loss:2.057098494034547, acc:0.3626923076923077


 13%|█▎        | 1400/10845 [05:18<39:40,  3.97it/s, acc=0.367, epoch=0, loss=2.04]

epoch:0, idx:1399/10845, loss:2.0440261062553953, acc:0.3673214285714286


 14%|█▍        | 1500/10845 [05:41<35:30,  4.39it/s, acc=0.37, epoch=0, loss=2.03] 

epoch:0, idx:1499/10845, loss:2.0315115049680075, acc:0.37033333333333335


 15%|█▍        | 1600/10845 [06:05<35:44,  4.31it/s, acc=0.372, epoch=0, loss=2.02]

epoch:0, idx:1599/10845, loss:2.022582203671336, acc:0.3721875


 16%|█▌        | 1700/10845 [06:28<36:55,  4.13it/s, acc=0.373, epoch=0, loss=2.02]

epoch:0, idx:1699/10845, loss:2.0210276786369437, acc:0.37323529411764705


 17%|█▋        | 1801/10845 [06:51<32:21,  4.66it/s, acc=0.376, epoch=0, loss=2.01]

epoch:0, idx:1799/10845, loss:2.008316434323788, acc:0.3759722222222222


 18%|█▊        | 1900/10845 [07:14<36:36,  4.07it/s, acc=0.381, epoch=0, loss=2]   

epoch:0, idx:1899/10845, loss:1.9993278001797827, acc:0.3807894736842105


 18%|█▊        | 2000/10845 [07:38<35:37,  4.14it/s, acc=0.384, epoch=0, loss=1.99]

epoch:0, idx:1999/10845, loss:1.9883867782056333, acc:0.38425


 19%|█▉        | 2100/10845 [08:01<31:41,  4.60it/s, acc=0.385, epoch=0, loss=1.98]

epoch:0, idx:2099/10845, loss:1.9805373592319944, acc:0.38476190476190475


 20%|██        | 2200/10845 [08:23<34:21,  4.19it/s, acc=0.388, epoch=0, loss=1.97]

epoch:0, idx:2199/10845, loss:1.967126596759666, acc:0.3884090909090909


 21%|██        | 2300/10845 [08:46<31:20,  4.54it/s, acc=0.391, epoch=0, loss=1.96]

epoch:0, idx:2299/10845, loss:1.957124652162842, acc:0.3908695652173913


 22%|██▏       | 2401/10845 [09:09<30:57,  4.54it/s, acc=0.393, epoch=0, loss=1.95]

epoch:0, idx:2399/10845, loss:1.9515569601704676, acc:0.3927083333333333


 23%|██▎       | 2501/10845 [09:32<30:43,  4.53it/s, acc=0.395, epoch=0, loss=1.94]

epoch:0, idx:2499/10845, loss:1.9422326901197433, acc:0.3953


 24%|██▍       | 2600/10845 [09:55<31:54,  4.31it/s, acc=0.396, epoch=0, loss=1.94]

epoch:0, idx:2599/10845, loss:1.938912082016468, acc:0.39625


 25%|██▍       | 2700/10845 [10:18<32:07,  4.22it/s, acc=0.399, epoch=0, loss=1.93]

epoch:0, idx:2699/10845, loss:1.9288936142347477, acc:0.39925925925925926


 26%|██▌       | 2800/10845 [10:40<30:45,  4.36it/s, acc=0.401, epoch=0, loss=1.92]

epoch:0, idx:2799/10845, loss:1.9231106383247034, acc:0.40125


 27%|██▋       | 2900/10845 [11:03<30:44,  4.31it/s, acc=0.404, epoch=0, loss=1.91]

epoch:0, idx:2899/10845, loss:1.9135167454645552, acc:0.40422413793103446


 28%|██▊       | 3000/10845 [11:26<32:27,  4.03it/s, acc=0.404, epoch=0, loss=1.91]

epoch:0, idx:2999/10845, loss:1.9100316511591275, acc:0.40425


 29%|██▊       | 3100/10845 [11:49<29:58,  4.31it/s, acc=0.406, epoch=0, loss=1.9] 

epoch:0, idx:3099/10845, loss:1.9032537527045896, acc:0.40580645161290324


 30%|██▉       | 3201/10845 [12:13<27:15,  4.67it/s, acc=0.407, epoch=0, loss=1.9]

epoch:0, idx:3199/10845, loss:1.897873417865485, acc:0.406796875


 30%|███       | 3300/10845 [12:35<30:01,  4.19it/s, acc=0.408, epoch=0, loss=1.89]

epoch:0, idx:3299/10845, loss:1.8914809402371897, acc:0.4081060606060606


 31%|███▏      | 3400/10845 [12:58<29:28,  4.21it/s, acc=0.409, epoch=0, loss=1.89]

epoch:0, idx:3399/10845, loss:1.885883538249661, acc:0.4091911764705882


 32%|███▏      | 3500/10845 [13:21<29:12,  4.19it/s, acc=0.411, epoch=0, loss=1.88]

epoch:0, idx:3499/10845, loss:1.880843297634806, acc:0.4105714285714286


 33%|███▎      | 3600/10845 [13:44<29:04,  4.15it/s, acc=0.412, epoch=0, loss=1.87]

epoch:0, idx:3599/10845, loss:1.8722259036534363, acc:0.4122222222222222


 34%|███▍      | 3700/10845 [14:07<28:43,  4.15it/s, acc=0.413, epoch=0, loss=1.87]

epoch:0, idx:3699/10845, loss:1.8658510256941254, acc:0.4133783783783784


 35%|███▌      | 3800/10845 [14:30<26:09,  4.49it/s, acc=0.415, epoch=0, loss=1.86]

epoch:0, idx:3799/10845, loss:1.8598939204372857, acc:0.41539473684210526


 36%|███▌      | 3900/10845 [14:53<26:50,  4.31it/s, acc=0.416, epoch=0, loss=1.85]

epoch:0, idx:3899/10845, loss:1.8540109791663977, acc:0.41634615384615387


 37%|███▋      | 4000/10845 [15:16<27:26,  4.16it/s, acc=0.417, epoch=0, loss=1.85]

epoch:0, idx:3999/10845, loss:1.8510743336230517, acc:0.4171875


 38%|███▊      | 4101/10845 [15:40<27:39,  4.06it/s, acc=0.418, epoch=0, loss=1.85]

epoch:0, idx:4099/10845, loss:1.8493784014626247, acc:0.41847560975609754


 39%|███▊      | 4200/10845 [16:02<23:39,  4.68it/s, acc=0.419, epoch=0, loss=1.85]

epoch:0, idx:4199/10845, loss:1.8468406617215702, acc:0.41928571428571426


 40%|███▉      | 4300/10845 [16:25<24:14,  4.50it/s, acc=0.42, epoch=0, loss=1.84] 

epoch:0, idx:4299/10845, loss:1.8415847363444262, acc:0.4199418604651163


 41%|████      | 4400/10845 [16:48<25:55,  4.14it/s, acc=0.421, epoch=0, loss=1.84]

epoch:0, idx:4399/10845, loss:1.8370964054763317, acc:0.4211931818181818


 41%|████▏     | 4500/10845 [17:10<22:41,  4.66it/s, acc=0.422, epoch=0, loss=1.83]

epoch:0, idx:4499/10845, loss:1.831356974562009, acc:0.42233333333333334


 42%|████▏     | 4600/10845 [17:33<25:48,  4.03it/s, acc=0.423, epoch=0, loss=1.83]

epoch:0, idx:4599/10845, loss:1.826669608341611, acc:0.42342391304347826


 43%|████▎     | 4700/10845 [17:56<24:06,  4.25it/s, acc=0.425, epoch=0, loss=1.82]

epoch:0, idx:4699/10845, loss:1.821789271945649, acc:0.4245744680851064


 44%|████▍     | 4800/10845 [18:19<21:58,  4.58it/s, acc=0.425, epoch=0, loss=1.82]

epoch:0, idx:4799/10845, loss:1.819032332735757, acc:0.4253125


 45%|████▌     | 4900/10845 [18:42<22:25,  4.42it/s, acc=0.426, epoch=0, loss=1.81]

epoch:0, idx:4899/10845, loss:1.8144393381172297, acc:0.4260204081632653


 46%|████▌     | 5000/10845 [19:04<21:39,  4.50it/s, acc=0.428, epoch=0, loss=1.81]

epoch:0, idx:4999/10845, loss:1.8107423929095268, acc:0.42755


 47%|████▋     | 5100/10845 [19:27<21:57,  4.36it/s, acc=0.429, epoch=0, loss=1.81]

epoch:0, idx:5099/10845, loss:1.8075449406282575, acc:0.4285294117647059


 48%|████▊     | 5200/10845 [19:50<20:20,  4.63it/s, acc=0.43, epoch=0, loss=1.8]  

epoch:0, idx:5199/10845, loss:1.8033135379621617, acc:0.43


 49%|████▉     | 5301/10845 [20:13<20:09,  4.58it/s, acc=0.43, epoch=0, loss=1.8] 

epoch:0, idx:5299/10845, loss:1.8007902693636013, acc:0.43047169811320757


 50%|████▉     | 5400/10845 [20:35<20:39,  4.39it/s, acc=0.431, epoch=0, loss=1.8]

epoch:0, idx:5399/10845, loss:1.798031431833903, acc:0.43148148148148147


 51%|█████     | 5501/10845 [20:58<19:45,  4.51it/s, acc=0.433, epoch=0, loss=1.79]

epoch:0, idx:5499/10845, loss:1.7944146987741643, acc:0.4325909090909091


 52%|█████▏    | 5601/10845 [21:21<19:24,  4.50it/s, acc=0.433, epoch=0, loss=1.79]

epoch:0, idx:5599/10845, loss:1.7907608805596829, acc:0.43339285714285714


 53%|█████▎    | 5700/10845 [21:44<19:15,  4.45it/s, acc=0.435, epoch=0, loss=1.79]

epoch:0, idx:5699/10845, loss:1.7866209206246493, acc:0.43491228070175436


 53%|█████▎    | 5800/10845 [22:07<19:29,  4.31it/s, acc=0.436, epoch=0, loss=1.78]

epoch:0, idx:5799/10845, loss:1.782445297693384, acc:0.4359051724137931


 54%|█████▍    | 5901/10845 [22:30<19:01,  4.33it/s, acc=0.437, epoch=0, loss=1.78]

epoch:0, idx:5899/10845, loss:1.7794835037094052, acc:0.43703389830508477


 55%|█████▌    | 6000/10845 [22:53<18:45,  4.30it/s, acc=0.438, epoch=0, loss=1.78]

epoch:0, idx:5999/10845, loss:1.7753079692721367, acc:0.438375


 56%|█████▋    | 6101/10845 [23:16<16:57,  4.66it/s, acc=0.44, epoch=0, loss=1.77] 

epoch:0, idx:6099/10845, loss:1.7693284948341181, acc:0.44028688524590165


 57%|█████▋    | 6200/10845 [23:38<17:27,  4.43it/s, acc=0.441, epoch=0, loss=1.77]

epoch:0, idx:6199/10845, loss:1.76517549589757, acc:0.44125


 58%|█████▊    | 6300/10845 [24:01<17:35,  4.31it/s, acc=0.442, epoch=0, loss=1.76]

epoch:0, idx:6299/10845, loss:1.7633351892138285, acc:0.4415079365079365


 59%|█████▉    | 6400/10845 [24:24<17:52,  4.14it/s, acc=0.442, epoch=0, loss=1.76]

epoch:0, idx:6399/10845, loss:1.7609274456463755, acc:0.442109375


 60%|█████▉    | 6500/10845 [24:47<17:24,  4.16it/s, acc=0.443, epoch=0, loss=1.76]

epoch:0, idx:6499/10845, loss:1.7580533245343428, acc:0.44296153846153846


 61%|██████    | 6600/10845 [25:10<16:25,  4.31it/s, acc=0.445, epoch=0, loss=1.75]

epoch:0, idx:6599/10845, loss:1.7521027319178437, acc:0.44450757575757577


 62%|██████▏   | 6700/10845 [25:33<15:25,  4.48it/s, acc=0.445, epoch=0, loss=1.75]

epoch:0, idx:6699/10845, loss:1.750609027396387, acc:0.445


 63%|██████▎   | 6800/10845 [25:56<15:48,  4.27it/s, acc=0.447, epoch=0, loss=1.75]

epoch:0, idx:6799/10845, loss:1.7454143633737285, acc:0.44669117647058826


 64%|██████▎   | 6900/10845 [26:19<14:47,  4.45it/s, acc=0.447, epoch=0, loss=1.74]

epoch:0, idx:6899/10845, loss:1.7431136672565903, acc:0.4469927536231884


 65%|██████▍   | 7000/10845 [26:42<14:12,  4.51it/s, acc=0.448, epoch=0, loss=1.74]

epoch:0, idx:6999/10845, loss:1.7406242113794599, acc:0.44757142857142856


 65%|██████▌   | 7100/10845 [27:05<14:30,  4.30it/s, acc=0.449, epoch=0, loss=1.74]

epoch:0, idx:7099/10845, loss:1.7355449398302696, acc:0.44890845070422536


 66%|██████▋   | 7200/10845 [27:27<13:16,  4.57it/s, acc=0.45, epoch=0, loss=1.73] 

epoch:0, idx:7199/10845, loss:1.7337048661626047, acc:0.44972222222222225


 67%|██████▋   | 7300/10845 [27:50<13:03,  4.52it/s, acc=0.451, epoch=0, loss=1.73]

epoch:0, idx:7299/10845, loss:1.7296199889297355, acc:0.4508904109589041


 68%|██████▊   | 7401/10845 [28:13<13:00,  4.41it/s, acc=0.451, epoch=0, loss=1.73]

epoch:0, idx:7399/10845, loss:1.7273949750935709, acc:0.4513175675675676


 69%|██████▉   | 7500/10845 [28:36<13:18,  4.19it/s, acc=0.452, epoch=0, loss=1.73]

epoch:0, idx:7499/10845, loss:1.725805103723208, acc:0.45203333333333334


 70%|███████   | 7600/10845 [28:59<12:55,  4.19it/s, acc=0.453, epoch=0, loss=1.72]

epoch:0, idx:7599/10845, loss:1.7239087073191217, acc:0.45286184210526315


 71%|███████   | 7701/10845 [29:22<11:11,  4.68it/s, acc=0.454, epoch=0, loss=1.72]

epoch:0, idx:7699/10845, loss:1.720850651984091, acc:0.4539935064935065


 72%|███████▏  | 7800/10845 [29:45<11:30,  4.41it/s, acc=0.454, epoch=0, loss=1.72]

epoch:0, idx:7799/10845, loss:1.7182173225283623, acc:0.4543589743589744


 73%|███████▎  | 7900/10845 [30:09<11:00,  4.46it/s, acc=0.455, epoch=0, loss=1.72]

epoch:0, idx:7899/10845, loss:1.7164005678892136, acc:0.4548101265822785


 74%|███████▍  | 8000/10845 [30:32<10:50,  4.37it/s, acc=0.456, epoch=0, loss=1.71]

epoch:0, idx:7999/10845, loss:1.71486076400429, acc:0.45553125


 75%|███████▍  | 8100/10845 [30:55<10:13,  4.47it/s, acc=0.456, epoch=0, loss=1.71]

epoch:0, idx:8099/10845, loss:1.712912884928562, acc:0.45632716049382716


 76%|███████▌  | 8201/10845 [31:18<10:07,  4.35it/s, acc=0.458, epoch=0, loss=1.71]

epoch:0, idx:8199/10845, loss:1.7090809930897342, acc:0.45759146341463414


 77%|███████▋  | 8300/10845 [31:40<09:50,  4.31it/s, acc=0.459, epoch=0, loss=1.7] 

epoch:0, idx:8299/10845, loss:1.704768248516393, acc:0.4591566265060241


 77%|███████▋  | 8400/10845 [32:04<09:19,  4.37it/s, acc=0.46, epoch=0, loss=1.7]  

epoch:0, idx:8399/10845, loss:1.7046659534034274, acc:0.45961309523809524


 78%|███████▊  | 8500/10845 [32:26<09:02,  4.32it/s, acc=0.46, epoch=0, loss=1.7]

epoch:0, idx:8499/10845, loss:1.7026882956448723, acc:0.46


 79%|███████▉  | 8600/10845 [32:49<08:17,  4.51it/s, acc=0.461, epoch=0, loss=1.7]

epoch:0, idx:8599/10845, loss:1.7003432155071303, acc:0.4608139534883721


 80%|████████  | 8700/10845 [33:13<08:02,  4.45it/s, acc=0.461, epoch=0, loss=1.7]

epoch:0, idx:8699/10845, loss:1.6978733059935187, acc:0.4613793103448276


 81%|████████  | 8800/10845 [33:36<07:55,  4.30it/s, acc=0.462, epoch=0, loss=1.7]

epoch:0, idx:8799/10845, loss:1.6959254004399884, acc:0.4621590909090909


 82%|████████▏ | 8900/10845 [33:59<08:01,  4.04it/s, acc=0.463, epoch=0, loss=1.69]

epoch:0, idx:8899/10845, loss:1.6942919990386855, acc:0.4625


 83%|████████▎ | 9000/10845 [34:22<06:51,  4.48it/s, acc=0.463, epoch=0, loss=1.69]

epoch:0, idx:8999/10845, loss:1.6916594085759586, acc:0.46316666666666667


 84%|████████▍ | 9100/10845 [34:45<06:30,  4.47it/s, acc=0.464, epoch=0, loss=1.69]

epoch:0, idx:9099/10845, loss:1.6900093746185303, acc:0.4638186813186813


 85%|████████▍ | 9200/10845 [35:08<06:33,  4.18it/s, acc=0.464, epoch=0, loss=1.69]

epoch:0, idx:9199/10845, loss:1.6880843660235405, acc:0.4642391304347826


 86%|████████▌ | 9300/10845 [35:31<06:07,  4.21it/s, acc=0.465, epoch=0, loss=1.69]

epoch:0, idx:9299/10845, loss:1.6859055237616263, acc:0.4647311827956989


 87%|████████▋ | 9400/10845 [35:54<05:35,  4.30it/s, acc=0.465, epoch=0, loss=1.68]

epoch:0, idx:9399/10845, loss:1.6833545787220305, acc:0.465186170212766


 88%|████████▊ | 9500/10845 [36:17<04:50,  4.63it/s, acc=0.466, epoch=0, loss=1.68]

epoch:0, idx:9499/10845, loss:1.6808784760487707, acc:0.46628947368421053


 89%|████████▊ | 9600/10845 [36:40<04:52,  4.26it/s, acc=0.467, epoch=0, loss=1.68]

epoch:0, idx:9599/10845, loss:1.6781148241646588, acc:0.46708333333333335


 89%|████████▉ | 9700/10845 [37:04<04:25,  4.31it/s, acc=0.467, epoch=0, loss=1.68]

epoch:0, idx:9699/10845, loss:1.678364240238347, acc:0.4674226804123711


 90%|█████████ | 9800/10845 [37:27<04:13,  4.12it/s, acc=0.468, epoch=0, loss=1.68]

epoch:0, idx:9799/10845, loss:1.6770753148198128, acc:0.4680612244897959


 91%|█████████▏| 9900/10845 [37:50<03:37,  4.34it/s, acc=0.469, epoch=0, loss=1.68]

epoch:0, idx:9899/10845, loss:1.6752317505834078, acc:0.4686111111111111


 92%|█████████▏| 10000/10845 [38:13<03:12,  4.39it/s, acc=0.469, epoch=0, loss=1.67]

epoch:0, idx:9999/10845, loss:1.6734883570969106, acc:0.469225


 93%|█████████▎| 10100/10845 [38:36<02:52,  4.31it/s, acc=0.47, epoch=0, loss=1.67] 

epoch:0, idx:10099/10845, loss:1.6711026642287132, acc:0.4696039603960396


 94%|█████████▍| 10200/10845 [38:59<02:19,  4.63it/s, acc=0.47, epoch=0, loss=1.67]

epoch:0, idx:10199/10845, loss:1.6705614955109709, acc:0.46980392156862744


 95%|█████████▍| 10300/10845 [39:23<02:06,  4.30it/s, acc=0.47, epoch=0, loss=1.67]

epoch:0, idx:10299/10845, loss:1.6688211600583733, acc:0.47019417475728154


 96%|█████████▌| 10400/10845 [39:46<01:44,  4.25it/s, acc=0.471, epoch=0, loss=1.67]

epoch:0, idx:10399/10845, loss:1.6672026787870204, acc:0.47088942307692305


 97%|█████████▋| 10500/10845 [40:09<01:29,  3.87it/s, acc=0.472, epoch=0, loss=1.66]

epoch:0, idx:10499/10845, loss:1.6641258328925996, acc:0.4716666666666667


 98%|█████████▊| 10600/10845 [40:32<00:56,  4.31it/s, acc=0.472, epoch=0, loss=1.66]

epoch:0, idx:10599/10845, loss:1.662232476214193, acc:0.47226415094339624


 99%|█████████▊| 10700/10845 [40:55<00:35,  4.11it/s, acc=0.473, epoch=0, loss=1.66]

epoch:0, idx:10699/10845, loss:1.660325791055911, acc:0.4725233644859813


100%|█████████▉| 10800/10845 [41:18<00:10,  4.27it/s, acc=0.473, epoch=0, loss=1.66]

epoch:0, idx:10799/10845, loss:1.6579692165277622, acc:0.47335648148148146


100%|██████████| 10845/10845 [41:28<00:00,  4.02it/s, acc=0.473, epoch=0, loss=1.66]


epoch:0, idx:0/1275, loss:1.34248685836792, acc:0.5
epoch:0, idx:100/1275, loss:1.5058409624760694, acc:0.5173267326732673
epoch:0, idx:200/1275, loss:1.47698826694963, acc:0.5248756218905473
epoch:0, idx:300/1275, loss:1.449070545129998, acc:0.5257475083056479
epoch:0, idx:400/1275, loss:1.4456346831714126, acc:0.5249376558603491
epoch:0, idx:500/1275, loss:1.4300320010461256, acc:0.5259481037924152
epoch:0, idx:600/1275, loss:1.4357542733781152, acc:0.5220465890183028
epoch:0, idx:700/1275, loss:1.433924675157849, acc:0.5267475035663338
epoch:0, idx:800/1275, loss:1.443411816967263, acc:0.5237203495630461
epoch:0, idx:900/1275, loss:1.4366228509028665, acc:0.5269145394006659
epoch:0, idx:1000/1275, loss:1.4456169474255907, acc:0.5237262737262737
epoch:0, idx:1100/1275, loss:1.4382592998343529, acc:0.5252043596730245
epoch:0, idx:1200/1275, loss:1.4424722042806342, acc:0.5222731057452124


  1%|          | 100/10845 [00:22<38:54,  4.60it/s, acc=0.525, epoch=1, loss=1.36]

epoch:1, idx:99/10845, loss:1.3615957856178285, acc:0.525


  2%|▏         | 200/10845 [00:45<38:57,  4.55it/s, acc=0.537, epoch=1, loss=1.38]

epoch:1, idx:199/10845, loss:1.3800953632593156, acc:0.5375


  3%|▎         | 300/10845 [01:07<38:58,  4.51it/s, acc=0.541, epoch=1, loss=1.38]

epoch:1, idx:299/10845, loss:1.3794469459851584, acc:0.5408333333333334


  4%|▎         | 400/10845 [01:30<39:25,  4.42it/s, acc=0.547, epoch=1, loss=1.38]

epoch:1, idx:399/10845, loss:1.3752350637316704, acc:0.546875


  5%|▍         | 501/10845 [01:52<35:49,  4.81it/s, acc=0.549, epoch=1, loss=1.38]

epoch:1, idx:499/10845, loss:1.386696303844452, acc:0.548


  6%|▌         | 600/10845 [02:15<38:27,  4.44it/s, acc=0.546, epoch=1, loss=1.39]

epoch:1, idx:599/10845, loss:1.3889568110307058, acc:0.5458333333333333


  6%|▋         | 700/10845 [02:38<38:59,  4.34it/s, acc=0.542, epoch=1, loss=1.41]

epoch:1, idx:699/10845, loss:1.4064722745759146, acc:0.5425


  7%|▋         | 801/10845 [03:00<37:21,  4.48it/s, acc=0.546, epoch=1, loss=1.4] 

epoch:1, idx:799/10845, loss:1.403368928357959, acc:0.5459375


  8%|▊         | 900/10845 [03:23<38:21,  4.32it/s, acc=0.544, epoch=1, loss=1.41]

epoch:1, idx:899/10845, loss:1.4068319812085894, acc:0.5444444444444444


  9%|▉         | 1000/10845 [03:46<35:07,  4.67it/s, acc=0.546, epoch=1, loss=1.4]

epoch:1, idx:999/10845, loss:1.3979867767095566, acc:0.5465


 10%|█         | 1101/10845 [04:09<35:28,  4.58it/s, acc=0.546, epoch=1, loss=1.4] 

epoch:1, idx:1099/10845, loss:1.404822924787348, acc:0.5452272727272728


 11%|█         | 1200/10845 [04:31<34:46,  4.62it/s, acc=0.545, epoch=1, loss=1.41]

epoch:1, idx:1199/10845, loss:1.407158228258292, acc:0.545


 12%|█▏        | 1300/10845 [04:54<34:29,  4.61it/s, acc=0.545, epoch=1, loss=1.41]

epoch:1, idx:1299/10845, loss:1.4090303634680235, acc:0.5451923076923076


 13%|█▎        | 1400/10845 [05:16<35:27,  4.44it/s, acc=0.545, epoch=1, loss=1.41]

epoch:1, idx:1399/10845, loss:1.405815060905048, acc:0.545


 14%|█▍        | 1500/10845 [05:39<37:17,  4.18it/s, acc=0.543, epoch=1, loss=1.41]

epoch:1, idx:1499/10845, loss:1.4083557047843933, acc:0.5431666666666667


 15%|█▍        | 1601/10845 [06:02<31:45,  4.85it/s, acc=0.546, epoch=1, loss=1.4] 

epoch:1, idx:1599/10845, loss:1.4026354829967023, acc:0.54546875


 16%|█▌        | 1700/10845 [06:24<34:59,  4.36it/s, acc=0.548, epoch=1, loss=1.4]

epoch:1, idx:1699/10845, loss:1.3983031546718934, acc:0.5477941176470589


 17%|█▋        | 1800/10845 [06:47<33:13,  4.54it/s, acc=0.549, epoch=1, loss=1.39]

epoch:1, idx:1799/10845, loss:1.3927247323923642, acc:0.5490277777777778


 18%|█▊        | 1900/10845 [07:09<31:47,  4.69it/s, acc=0.552, epoch=1, loss=1.39]

epoch:1, idx:1899/10845, loss:1.3867099326535275, acc:0.5517105263157894


 18%|█▊        | 2000/10845 [07:31<32:59,  4.47it/s, acc=0.553, epoch=1, loss=1.38]

epoch:1, idx:1999/10845, loss:1.3833162751197814, acc:0.553375


 19%|█▉        | 2100/10845 [07:54<31:30,  4.63it/s, acc=0.553, epoch=1, loss=1.38]

epoch:1, idx:2099/10845, loss:1.38385621036802, acc:0.5534523809523809


 20%|██        | 2200/10845 [08:17<33:40,  4.28it/s, acc=0.551, epoch=1, loss=1.39]

epoch:1, idx:2199/10845, loss:1.3896973422169685, acc:0.5513636363636364


 21%|██        | 2300/10845 [08:39<32:43,  4.35it/s, acc=0.551, epoch=1, loss=1.39]

epoch:1, idx:2299/10845, loss:1.3885048994551534, acc:0.5507608695652174


 22%|██▏       | 2401/10845 [09:02<30:39,  4.59it/s, acc=0.551, epoch=1, loss=1.39]

epoch:1, idx:2399/10845, loss:1.389624491557479, acc:0.5505208333333333


 23%|██▎       | 2501/10845 [09:25<30:31,  4.55it/s, acc=0.551, epoch=1, loss=1.39]

epoch:1, idx:2499/10845, loss:1.3876589619874955, acc:0.5514


 24%|██▍       | 2600/10845 [09:47<29:49,  4.61it/s, acc=0.552, epoch=1, loss=1.39]

epoch:1, idx:2599/10845, loss:1.3877114746433037, acc:0.5516346153846153


 25%|██▍       | 2700/10845 [10:10<28:09,  4.82it/s, acc=0.552, epoch=1, loss=1.39]

epoch:1, idx:2699/10845, loss:1.3861233086276938, acc:0.5525


 26%|██▌       | 2800/10845 [10:32<30:06,  4.45it/s, acc=0.554, epoch=1, loss=1.38]

epoch:1, idx:2799/10845, loss:1.3825703864651067, acc:0.5536607142857143


 27%|██▋       | 2900/10845 [10:55<30:27,  4.35it/s, acc=0.555, epoch=1, loss=1.38]

epoch:1, idx:2899/10845, loss:1.3797332580130677, acc:0.5549137931034482


 28%|██▊       | 3000/10845 [11:18<29:11,  4.48it/s, acc=0.555, epoch=1, loss=1.38]

epoch:1, idx:2999/10845, loss:1.381227678279082, acc:0.5551666666666667


 29%|██▊       | 3101/10845 [11:40<27:53,  4.63it/s, acc=0.556, epoch=1, loss=1.38]

epoch:1, idx:3099/10845, loss:1.3800910536127706, acc:0.5555645161290322


 30%|██▉       | 3200/10845 [12:03<27:44,  4.59it/s, acc=0.556, epoch=1, loss=1.38]

epoch:1, idx:3199/10845, loss:1.3839212417416276, acc:0.555859375


 30%|███       | 3300/10845 [12:25<28:45,  4.37it/s, acc=0.556, epoch=1, loss=1.38]

epoch:1, idx:3299/10845, loss:1.383426990093607, acc:0.555530303030303


 31%|███▏      | 3400/10845 [12:48<26:36,  4.66it/s, acc=0.556, epoch=1, loss=1.38]

epoch:1, idx:3399/10845, loss:1.3815622241882717, acc:0.5557352941176471


 32%|███▏      | 3500/10845 [13:11<27:39,  4.42it/s, acc=0.555, epoch=1, loss=1.39]

epoch:1, idx:3499/10845, loss:1.385102986420904, acc:0.5548571428571428


 33%|███▎      | 3600/10845 [13:34<27:06,  4.45it/s, acc=0.555, epoch=1, loss=1.38]

epoch:1, idx:3599/10845, loss:1.3835116624501016, acc:0.5547916666666667


 34%|███▍      | 3701/10845 [13:57<26:40,  4.46it/s, acc=0.554, epoch=1, loss=1.39]

epoch:1, idx:3699/10845, loss:1.387156729118244, acc:0.5539864864864865


 35%|███▌      | 3800/10845 [14:20<26:27,  4.44it/s, acc=0.552, epoch=1, loss=1.39]

epoch:1, idx:3799/10845, loss:1.3912281907232185, acc:0.552171052631579


 36%|███▌      | 3900/10845 [14:42<26:17,  4.40it/s, acc=0.552, epoch=1, loss=1.39]

epoch:1, idx:3899/10845, loss:1.3925253564577835, acc:0.5518589743589744


 37%|███▋      | 4000/10845 [15:05<26:17,  4.34it/s, acc=0.552, epoch=1, loss=1.39]

epoch:1, idx:3999/10845, loss:1.391286186248064, acc:0.55225


 38%|███▊      | 4100/10845 [15:27<28:42,  3.92it/s, acc=0.553, epoch=1, loss=1.39]

epoch:1, idx:4099/10845, loss:1.3921849965467685, acc:0.5528048780487805


 39%|███▊      | 4200/10845 [15:50<25:11,  4.39it/s, acc=0.553, epoch=1, loss=1.39]

epoch:1, idx:4199/10845, loss:1.3929730544487635, acc:0.5526190476190476


 40%|███▉      | 4300/10845 [16:13<25:57,  4.20it/s, acc=0.553, epoch=1, loss=1.39]

epoch:1, idx:4299/10845, loss:1.3933474183637042, acc:0.5530232558139535


 41%|████      | 4400/10845 [16:35<23:31,  4.57it/s, acc=0.553, epoch=1, loss=1.39]

epoch:1, idx:4399/10845, loss:1.3947809552604502, acc:0.5525568181818182


 41%|████▏     | 4500/10845 [16:58<23:17,  4.54it/s, acc=0.552, epoch=1, loss=1.4] 

epoch:1, idx:4499/10845, loss:1.395388310485416, acc:0.5520555555555555


 47%|████▋     | 5100/10845 [19:14<20:27,  4.68it/s, acc=0.553, epoch=1, loss=1.39]

epoch:1, idx:5099/10845, loss:1.3930700525816748, acc:0.5531372549019608


 48%|████▊     | 5201/10845 [19:37<20:42,  4.54it/s, acc=0.554, epoch=1, loss=1.39]

epoch:1, idx:5199/10845, loss:1.391264306283914, acc:0.5538461538461539


 49%|████▉     | 5300/10845 [20:00<21:39,  4.27it/s, acc=0.554, epoch=1, loss=1.39]

epoch:1, idx:5299/10845, loss:1.3909934326612725, acc:0.5536792452830188


 50%|████▉     | 5400/10845 [20:23<20:36,  4.40it/s, acc=0.553, epoch=1, loss=1.39]

epoch:1, idx:5399/10845, loss:1.3922051620483398, acc:0.5526851851851852


 51%|█████     | 5501/10845 [20:46<19:20,  4.60it/s, acc=0.553, epoch=1, loss=1.39]

epoch:1, idx:5499/10845, loss:1.3910568944107402, acc:0.5528181818181818


 52%|█████▏    | 5600/10845 [21:08<20:20,  4.30it/s, acc=0.553, epoch=1, loss=1.39]

epoch:1, idx:5599/10845, loss:1.3916682339140347, acc:0.5525446428571429


 53%|█████▎    | 5700/10845 [21:31<20:27,  4.19it/s, acc=0.553, epoch=1, loss=1.39]

epoch:1, idx:5699/10845, loss:1.389055104046537, acc:0.5534210526315789


 53%|█████▎    | 5800/10845 [21:53<18:30,  4.54it/s, acc=0.554, epoch=1, loss=1.39]

epoch:1, idx:5799/10845, loss:1.3869711472659276, acc:0.5538793103448276


 54%|█████▍    | 5901/10845 [22:16<18:47,  4.39it/s, acc=0.554, epoch=1, loss=1.39]

epoch:1, idx:5899/10845, loss:1.3877286179388983, acc:0.553771186440678


 55%|█████▌    | 6001/10845 [22:39<18:09,  4.45it/s, acc=0.555, epoch=1, loss=1.39]

epoch:1, idx:5999/10845, loss:1.3853278728723526, acc:0.5547083333333334


 56%|█████▋    | 6101/10845 [22:56<12:02,  6.56it/s, acc=0.555, epoch=1, loss=1.39]

epoch:1, idx:6099/10845, loss:1.3856081390771708, acc:0.5549180327868852


 57%|█████▋    | 6201/10845 [23:12<11:34,  6.69it/s, acc=0.555, epoch=1, loss=1.39]

epoch:1, idx:6199/10845, loss:1.3856151050713754, acc:0.5548387096774193


 58%|█████▊    | 6300/10845 [23:27<12:24,  6.10it/s, acc=0.554, epoch=1, loss=1.39]

epoch:1, idx:6299/10845, loss:1.3852710971188924, acc:0.5543650793650794


 59%|█████▉    | 6401/10845 [23:43<11:25,  6.49it/s, acc=0.555, epoch=1, loss=1.38]

epoch:1, idx:6399/10845, loss:1.384164061602205, acc:0.55484375


 60%|█████▉    | 6501/10845 [23:59<12:14,  5.91it/s, acc=0.555, epoch=1, loss=1.38]

epoch:1, idx:6499/10845, loss:1.3840321506170126, acc:0.5547692307692308


 60%|█████▉    | 6502/10845 [23:59<11:54,  6.08it/s, acc=0.555, epoch=1, loss=1.38]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 77%|███████▋  | 8300/10845 [30:43<09:41,  4.38it/s, acc=0.556, epoch=1, loss=1.38]

epoch:1, idx:8299/10845, loss:1.382266814083938, acc:0.5562048192771084


 77%|███████▋  | 8400/10845 [31:08<09:42,  4.20it/s, acc=0.556, epoch=1, loss=1.38]

epoch:1, idx:8399/10845, loss:1.383093399355809, acc:0.5561607142857142


 78%|███████▊  | 8500/10845 [31:32<08:44,  4.47it/s, acc=0.557, epoch=1, loss=1.38]

epoch:1, idx:8499/10845, loss:1.381019965333097, acc:0.5568235294117647


 79%|███████▉  | 8600/10845 [31:57<08:55,  4.19it/s, acc=0.557, epoch=1, loss=1.38]

epoch:1, idx:8599/10845, loss:1.382808623736681, acc:0.5565116279069767


 80%|████████  | 8700/10845 [32:21<09:32,  3.75it/s, acc=0.556, epoch=1, loss=1.38]

epoch:1, idx:8699/10845, loss:1.38387766709958, acc:0.5562068965517242


 81%|████████  | 8800/10845 [32:45<08:32,  3.99it/s, acc=0.557, epoch=1, loss=1.38]

epoch:1, idx:8799/10845, loss:1.3833180484040217, acc:0.5565056818181818


 82%|████████▏ | 8900/10845 [33:09<07:33,  4.29it/s, acc=0.557, epoch=1, loss=1.38]

epoch:1, idx:8899/10845, loss:1.382478391106209, acc:0.5566853932584269


 83%|████████▎ | 9000/10845 [33:33<06:59,  4.40it/s, acc=0.557, epoch=1, loss=1.38]

epoch:1, idx:8999/10845, loss:1.38172450182173, acc:0.5567222222222222


 84%|████████▍ | 9100/10845 [33:57<06:44,  4.32it/s, acc=0.557, epoch=1, loss=1.38]

epoch:1, idx:9099/10845, loss:1.380135427412096, acc:0.5569780219780219


 85%|████████▍ | 9200/10845 [34:21<06:39,  4.11it/s, acc=0.557, epoch=1, loss=1.38]

epoch:1, idx:9199/10845, loss:1.379604686446812, acc:0.5573913043478261


 86%|████████▌ | 9300/10845 [34:45<07:24,  3.48it/s, acc=0.557, epoch=1, loss=1.38]

epoch:1, idx:9299/10845, loss:1.3795012131301305, acc:0.5574193548387096


 87%|████████▋ | 9401/10845 [35:10<05:34,  4.31it/s, acc=0.558, epoch=1, loss=1.38]

epoch:1, idx:9399/10845, loss:1.3777350826973611, acc:0.558031914893617


 88%|████████▊ | 9500/10845 [35:34<06:00,  3.73it/s, acc=0.558, epoch=1, loss=1.38]

epoch:1, idx:9499/10845, loss:1.3765531082780738, acc:0.5583157894736842


 89%|████████▊ | 9600/10845 [35:58<05:01,  4.12it/s, acc=0.559, epoch=1, loss=1.38]

epoch:1, idx:9599/10845, loss:1.3752675810952981, acc:0.5585416666666667


 89%|████████▉ | 9700/10845 [36:23<04:32,  4.21it/s, acc=0.558, epoch=1, loss=1.38]

epoch:1, idx:9699/10845, loss:1.375810919594519, acc:0.5582989690721649


 90%|████████▉ | 9724/10845 [36:29<04:23,  4.26it/s, acc=0.558, epoch=1, loss=1.38]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

  6%|▋         | 701/10845 [02:46<36:58,  4.57it/s, acc=0.611, epoch=2, loss=1.24]

epoch:2, idx:699/10845, loss:1.2412039480039052, acc:0.6107142857142858


  7%|▋         | 800/10845 [03:09<35:09,  4.76it/s, acc=0.608, epoch=2, loss=1.26]

epoch:2, idx:799/10845, loss:1.2553676369786262, acc:0.6075


  8%|▊         | 900/10845 [03:31<36:51,  4.50it/s, acc=0.604, epoch=2, loss=1.26]

epoch:2, idx:899/10845, loss:1.2622275590896606, acc:0.6041666666666666


  9%|▉         | 1000/10845 [03:53<36:58,  4.44it/s, acc=0.601, epoch=2, loss=1.26]

epoch:2, idx:999/10845, loss:1.2646402627229691, acc:0.6005


 10%|█         | 1100/10845 [04:16<36:20,  4.47it/s, acc=0.598, epoch=2, loss=1.27]

epoch:2, idx:1099/10845, loss:1.2738004819913344, acc:0.5979545454545454


 11%|█         | 1201/10845 [04:39<33:19,  4.82it/s, acc=0.597, epoch=2, loss=1.28]

epoch:2, idx:1199/10845, loss:1.2774552210172017, acc:0.5977083333333333


 12%|█▏        | 1301/10845 [05:01<34:19,  4.63it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:1299/10845, loss:1.2761109753755422, acc:0.5992307692307692


 13%|█▎        | 1400/10845 [05:23<32:49,  4.80it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:1399/10845, loss:1.2759720187527792, acc:0.5989285714285715


 14%|█▍        | 1501/10845 [05:46<34:39,  4.49it/s, acc=0.6, epoch=2, loss=1.28]  

epoch:2, idx:1499/10845, loss:1.2804744483629862, acc:0.5996666666666667


 15%|█▍        | 1600/10845 [06:08<34:09,  4.51it/s, acc=0.598, epoch=2, loss=1.28]

epoch:2, idx:1599/10845, loss:1.2802484848350286, acc:0.5984375


 16%|█▌        | 1700/10845 [06:29<31:37,  4.82it/s, acc=0.599, epoch=2, loss=1.28]

epoch:2, idx:1699/10845, loss:1.2773282108937993, acc:0.5988235294117648


 17%|█▋        | 1801/10845 [06:51<32:46,  4.60it/s, acc=0.599, epoch=2, loss=1.27]

epoch:2, idx:1799/10845, loss:1.2747680301467577, acc:0.59875


 18%|█▊        | 1900/10845 [07:13<32:58,  4.52it/s, acc=0.598, epoch=2, loss=1.28]

epoch:2, idx:1899/10845, loss:1.2757042176158804, acc:0.5981578947368421


 18%|█▊        | 2000/10845 [07:36<32:44,  4.50it/s, acc=0.596, epoch=2, loss=1.28]

epoch:2, idx:1999/10845, loss:1.2772494563758374, acc:0.59625


 19%|█▉        | 2100/10845 [07:58<32:43,  4.45it/s, acc=0.597, epoch=2, loss=1.27]

epoch:2, idx:2099/10845, loss:1.2728139054491407, acc:0.5969047619047619


 20%|█▉        | 2137/10845 [08:06<30:51,  4.70it/s, acc=0.597, epoch=2, loss=1.27]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 36%|███▌      | 3900/10845 [14:37<25:13,  4.59it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:3899/10845, loss:1.2761914721513405, acc:0.5937179487179487


 37%|███▋      | 4000/10845 [14:59<24:39,  4.63it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:3999/10845, loss:1.2756229987442493, acc:0.5939375


 38%|███▊      | 4100/10845 [15:21<25:24,  4.43it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:4099/10845, loss:1.276762434127854, acc:0.5938414634146342


 39%|███▊      | 4200/10845 [15:43<25:19,  4.37it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:4199/10845, loss:1.2761913935343425, acc:0.5944642857142857


 40%|███▉      | 4301/10845 [16:05<23:43,  4.60it/s, acc=0.595, epoch=2, loss=1.28]

epoch:2, idx:4299/10845, loss:1.2769916637830956, acc:0.5946511627906976


 41%|████      | 4401/10845 [16:27<23:58,  4.48it/s, acc=0.595, epoch=2, loss=1.28]

epoch:2, idx:4399/10845, loss:1.275335056863048, acc:0.5950568181818182


 41%|████▏     | 4500/10845 [16:49<22:51,  4.63it/s, acc=0.596, epoch=2, loss=1.27]

epoch:2, idx:4499/10845, loss:1.2739856973224215, acc:0.5955


 42%|████▏     | 4600/10845 [17:12<23:19,  4.46it/s, acc=0.595, epoch=2, loss=1.28]

epoch:2, idx:4599/10845, loss:1.2762563618110574, acc:0.5947282608695652


 43%|████▎     | 4700/10845 [17:34<24:50,  4.12it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:4699/10845, loss:1.2777707461346972, acc:0.594095744680851


 44%|████▍     | 4801/10845 [17:57<21:58,  4.58it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:4799/10845, loss:1.2763665021707615, acc:0.5940625


 45%|████▌     | 4901/10845 [18:19<21:11,  4.67it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:4899/10845, loss:1.2786791569359448, acc:0.5937244897959184


 46%|████▌     | 5000/10845 [18:41<21:21,  4.56it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:4999/10845, loss:1.2780223450183867, acc:0.5938


 47%|████▋     | 5101/10845 [19:03<20:32,  4.66it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:5099/10845, loss:1.277245289297665, acc:0.5938235294117648


 48%|████▊     | 5200/10845 [19:25<21:08,  4.45it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:5199/10845, loss:1.2765777086523864, acc:0.5942307692307692


 49%|████▉     | 5300/10845 [19:47<20:04,  4.60it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:5299/10845, loss:1.2769739375924165, acc:0.5941509433962264


 50%|████▉     | 5387/10845 [20:07<21:28,  4.24it/s, acc=0.594, epoch=2, loss=1.28]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 66%|██████▋   | 7200/10845 [26:50<14:27,  4.20it/s, acc=0.595, epoch=2, loss=1.28]

epoch:2, idx:7199/10845, loss:1.2759564110140005, acc:0.5945138888888889


 67%|██████▋   | 7301/10845 [27:12<12:29,  4.73it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:7299/10845, loss:1.2762359124340423, acc:0.5940753424657534


 68%|██████▊   | 7400/10845 [27:34<12:05,  4.75it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:7399/10845, loss:1.2757554818327363, acc:0.5941891891891892


 69%|██████▉   | 7500/10845 [27:57<12:41,  4.39it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:7499/10845, loss:1.275590486987432, acc:0.5943666666666667


 70%|███████   | 7601/10845 [28:19<11:26,  4.72it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:7599/10845, loss:1.2762557326178803, acc:0.5942763157894737


 71%|███████   | 7700/10845 [28:41<12:11,  4.30it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:7699/10845, loss:1.2774896248904142, acc:0.5936688311688312


 72%|███████▏  | 7801/10845 [29:04<11:18,  4.49it/s, acc=0.593, epoch=2, loss=1.28]

epoch:2, idx:7799/10845, loss:1.2782166797686847, acc:0.5933333333333334


 73%|███████▎  | 7900/10845 [29:25<10:41,  4.59it/s, acc=0.593, epoch=2, loss=1.28]

epoch:2, idx:7899/10845, loss:1.2769833250287213, acc:0.5934810126582278


 74%|███████▍  | 8001/10845 [29:48<10:03,  4.71it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:7999/10845, loss:1.2762987360358238, acc:0.59371875


 75%|███████▍  | 8100/10845 [30:09<09:24,  4.86it/s, acc=0.594, epoch=2, loss=1.27]

epoch:2, idx:8099/10845, loss:1.2748550711001878, acc:0.5940740740740741


 76%|███████▌  | 8200/10845 [30:32<09:21,  4.71it/s, acc=0.594, epoch=2, loss=1.27]

epoch:2, idx:8199/10845, loss:1.274975449515552, acc:0.5941768292682926


 77%|███████▋  | 8300/10845 [30:54<09:12,  4.60it/s, acc=0.594, epoch=2, loss=1.27]

epoch:2, idx:8299/10845, loss:1.2744320628872836, acc:0.5944578313253012


 77%|███████▋  | 8400/10845 [31:16<09:00,  4.53it/s, acc=0.595, epoch=2, loss=1.27]

epoch:2, idx:8399/10845, loss:1.2742642068721, acc:0.5945238095238096


 78%|███████▊  | 8500/10845 [31:38<09:09,  4.27it/s, acc=0.595, epoch=2, loss=1.27]

epoch:2, idx:8499/10845, loss:1.2732853140270008, acc:0.5948235294117648


 79%|███████▉  | 8600/10845 [32:00<08:14,  4.54it/s, acc=0.595, epoch=2, loss=1.27]

epoch:2, idx:8599/10845, loss:1.273715591458387, acc:0.594796511627907


 80%|███████▉  | 8656/10845 [32:13<08:32,  4.27it/s, acc=0.595, epoch=2, loss=1.27]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 97%|█████████▋| 10500/10845 [39:02<01:14,  4.61it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:10499/10845, loss:1.2786164122791517, acc:0.5944761904761905


 98%|█████████▊| 10600/10845 [39:25<00:56,  4.31it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:10599/10845, loss:1.2795351971123579, acc:0.5941037735849056


 99%|█████████▊| 10700/10845 [39:47<00:30,  4.75it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:10699/10845, loss:1.278732944907986, acc:0.5940420560747663


100%|█████████▉| 10801/10845 [40:09<00:09,  4.62it/s, acc=0.594, epoch=2, loss=1.28]

epoch:2, idx:10799/10845, loss:1.278822722448795, acc:0.5941203703703704


100%|██████████| 10845/10845 [40:19<00:00,  4.54it/s, acc=0.594, epoch=2, loss=1.28]


epoch:2, idx:0/1275, loss:0.9604623317718506, acc:0.5
epoch:2, idx:100/1275, loss:1.4203857572952119, acc:0.5668316831683168
epoch:2, idx:200/1275, loss:1.3670176884428185, acc:0.5646766169154229
epoch:2, idx:300/1275, loss:1.3275526714483368, acc:0.5805647840531561
epoch:2, idx:400/1275, loss:1.323820946817089, acc:0.580423940149626
epoch:2, idx:500/1275, loss:1.3038158521442833, acc:0.5828343313373253
epoch:2, idx:600/1275, loss:1.3077107058983675, acc:0.5786189683860233
epoch:2, idx:700/1275, loss:1.3003790679569762, acc:0.5777460770328102
epoch:2, idx:800/1275, loss:1.3115489392096036, acc:0.5767790262172284
epoch:2, idx:900/1275, loss:1.3001992193099263, acc:0.58157602663707
epoch:2, idx:1000/1275, loss:1.3059022515922873, acc:0.5784215784215784
epoch:2, idx:1100/1275, loss:1.2959131887021875, acc:0.5801544050862852
epoch:2, idx:1200/1275, loss:1.294024273914461, acc:0.5788925895087427


  1%|          | 100/10845 [00:22<40:43,  4.40it/s, acc=0.608, epoch=3, loss=1.21]

epoch:3, idx:99/10845, loss:1.2096909737586976, acc:0.6075


  2%|▏         | 200/10845 [00:44<38:01,  4.67it/s, acc=0.59, epoch=3, loss=1.27] 

epoch:3, idx:199/10845, loss:1.2747347795963286, acc:0.59


  3%|▎         | 300/10845 [01:06<38:43,  4.54it/s, acc=0.604, epoch=3, loss=1.24]

epoch:3, idx:299/10845, loss:1.2375436131159465, acc:0.6041666666666666


  4%|▎         | 400/10845 [01:28<36:40,  4.75it/s, acc=0.612, epoch=3, loss=1.21]

epoch:3, idx:399/10845, loss:1.211254454255104, acc:0.611875


  5%|▍         | 501/10845 [01:51<37:51,  4.55it/s, acc=0.605, epoch=3, loss=1.23]

epoch:3, idx:499/10845, loss:1.2301212646961213, acc:0.605


  6%|▌         | 600/10845 [02:13<41:46,  4.09it/s, acc=0.608, epoch=3, loss=1.22]

epoch:3, idx:599/10845, loss:1.2201884704828263, acc:0.6075


  6%|▋         | 700/10845 [02:35<38:27,  4.40it/s, acc=0.604, epoch=3, loss=1.22]

epoch:3, idx:699/10845, loss:1.2194133053507124, acc:0.6042857142857143


  7%|▋         | 800/10845 [02:58<37:01,  4.52it/s, acc=0.608, epoch=3, loss=1.21]

epoch:3, idx:799/10845, loss:1.2069407656788826, acc:0.608125


  8%|▊         | 900/10845 [03:21<39:17,  4.22it/s, acc=0.611, epoch=3, loss=1.2] 

epoch:3, idx:899/10845, loss:1.1972162155310313, acc:0.6108333333333333


  9%|▉         | 1000/10845 [03:43<37:45,  4.35it/s, acc=0.616, epoch=3, loss=1.18]

epoch:3, idx:999/10845, loss:1.1838668851852416, acc:0.6155


 10%|▉         | 1052/10845 [03:55<36:41,  4.45it/s, acc=0.617, epoch=3, loss=1.18]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 27%|██▋       | 2900/10845 [10:52<31:30,  4.20it/s, acc=0.611, epoch=3, loss=1.21]

epoch:3, idx:2899/10845, loss:1.2135013639104777, acc:0.6106034482758621


 28%|██▊       | 3001/10845 [11:14<28:28,  4.59it/s, acc=0.611, epoch=3, loss=1.21]

epoch:3, idx:2999/10845, loss:1.2101500006914139, acc:0.6110833333333333


 29%|██▊       | 3100/10845 [11:37<29:10,  4.43it/s, acc=0.611, epoch=3, loss=1.22]

epoch:3, idx:3099/10845, loss:1.215080682500716, acc:0.6105645161290323


 30%|██▉       | 3200/10845 [11:59<29:14,  4.36it/s, acc=0.611, epoch=3, loss=1.21]

epoch:3, idx:3199/10845, loss:1.21450864283368, acc:0.610625


 30%|███       | 3300/10845 [12:22<26:22,  4.77it/s, acc=0.611, epoch=3, loss=1.22]

epoch:3, idx:3299/10845, loss:1.215316879550616, acc:0.6106818181818182


 31%|███▏      | 3400/10845 [12:44<28:34,  4.34it/s, acc=0.611, epoch=3, loss=1.22]

epoch:3, idx:3399/10845, loss:1.215070116677705, acc:0.6106617647058824


 32%|███▏      | 3501/10845 [13:07<27:09,  4.51it/s, acc=0.611, epoch=3, loss=1.21]

epoch:3, idx:3499/10845, loss:1.2140113778454917, acc:0.611


 33%|███▎      | 3600/10845 [13:29<26:28,  4.56it/s, acc=0.612, epoch=3, loss=1.21]

epoch:3, idx:3599/10845, loss:1.2136221832368108, acc:0.6117361111111111


 34%|███▍      | 3701/10845 [13:52<26:46,  4.45it/s, acc=0.612, epoch=3, loss=1.21]

epoch:3, idx:3699/10845, loss:1.2103861554248914, acc:0.612027027027027


 35%|███▌      | 3800/10845 [14:14<27:31,  4.27it/s, acc=0.611, epoch=3, loss=1.21]

epoch:3, idx:3799/10845, loss:1.211616894477292, acc:0.61125


 36%|███▌      | 3900/10845 [14:36<26:45,  4.32it/s, acc=0.611, epoch=3, loss=1.21]

epoch:3, idx:3899/10845, loss:1.2114114893705417, acc:0.6114102564102564


 37%|███▋      | 4000/10845 [14:58<25:07,  4.54it/s, acc=0.611, epoch=3, loss=1.21]

epoch:3, idx:3999/10845, loss:1.2122014974951745, acc:0.6110625


 38%|███▊      | 4100/10845 [15:21<25:17,  4.45it/s, acc=0.612, epoch=3, loss=1.21]

epoch:3, idx:4099/10845, loss:1.2115294870225395, acc:0.6116463414634147


 39%|███▊      | 4200/10845 [15:43<25:31,  4.34it/s, acc=0.611, epoch=3, loss=1.21]

epoch:3, idx:4199/10845, loss:1.2123206864936011, acc:0.6114285714285714


 40%|███▉      | 4300/10845 [16:06<23:45,  4.59it/s, acc=0.611, epoch=3, loss=1.21]

epoch:3, idx:4299/10845, loss:1.2128730603428775, acc:0.6108139534883721


 40%|███▉      | 4312/10845 [16:09<25:15,  4.31it/s, acc=0.611, epoch=3, loss=1.21]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 56%|█████▌    | 6100/10845 [22:49<18:25,  4.29it/s, acc=0.612, epoch=3, loss=1.22]

epoch:3, idx:6099/10845, loss:1.2170405367358785, acc:0.611639344262295


 57%|█████▋    | 6200/10845 [23:11<16:41,  4.64it/s, acc=0.612, epoch=3, loss=1.21]

epoch:3, idx:6199/10845, loss:1.2145367610646833, acc:0.6124596774193548


 58%|█████▊    | 6300/10845 [23:33<16:47,  4.51it/s, acc=0.613, epoch=3, loss=1.22]

epoch:3, idx:6299/10845, loss:1.2151077704202562, acc:0.6125396825396825


 59%|█████▉    | 6400/10845 [23:55<17:01,  4.35it/s, acc=0.613, epoch=3, loss=1.22]

epoch:3, idx:6399/10845, loss:1.2151866140402854, acc:0.6126953125


 60%|█████▉    | 6500/10845 [24:18<16:56,  4.27it/s, acc=0.613, epoch=3, loss=1.21]

epoch:3, idx:6499/10845, loss:1.2137070455000951, acc:0.6128076923076923


 61%|██████    | 6601/10845 [24:40<14:51,  4.76it/s, acc=0.613, epoch=3, loss=1.21]

epoch:3, idx:6599/10845, loss:1.2128322565555572, acc:0.6131439393939394


 62%|██████▏   | 6700/10845 [25:03<15:02,  4.59it/s, acc=0.613, epoch=3, loss=1.21]

epoch:3, idx:6699/10845, loss:1.2134111613302088, acc:0.6127985074626866


 63%|██████▎   | 6800/10845 [25:25<15:18,  4.41it/s, acc=0.613, epoch=3, loss=1.21]

epoch:3, idx:6799/10845, loss:1.2131123167626998, acc:0.6129411764705882


 64%|██████▎   | 6901/10845 [25:48<14:41,  4.48it/s, acc=0.613, epoch=3, loss=1.21]

epoch:3, idx:6899/10845, loss:1.2141019580156907, acc:0.6125724637681159


 65%|██████▍   | 7000/10845 [26:11<14:26,  4.44it/s, acc=0.612, epoch=3, loss=1.22]

epoch:3, idx:6999/10845, loss:1.2154567625182016, acc:0.6123571428571428


 65%|██████▌   | 7100/10845 [26:33<13:40,  4.57it/s, acc=0.613, epoch=3, loss=1.21]

epoch:3, idx:7099/10845, loss:1.2136692969731881, acc:0.6128169014084507


 66%|██████▋   | 7201/10845 [26:56<13:31,  4.49it/s, acc=0.613, epoch=3, loss=1.21]

epoch:3, idx:7199/10845, loss:1.2125014041198625, acc:0.6132291666666667


 67%|██████▋   | 7301/10845 [27:19<13:07,  4.50it/s, acc=0.612, epoch=3, loss=1.21]

epoch:3, idx:7299/10845, loss:1.214179445407162, acc:0.612431506849315


 68%|██████▊   | 7400/10845 [27:41<12:09,  4.72it/s, acc=0.612, epoch=3, loss=1.21]

epoch:3, idx:7399/10845, loss:1.2140840957454733, acc:0.6124324324324324


 69%|██████▉   | 7500/10845 [28:03<11:56,  4.67it/s, acc=0.612, epoch=3, loss=1.21]

epoch:3, idx:7499/10845, loss:1.2138785071452458, acc:0.6122333333333333


 70%|██████▉   | 7545/10845 [28:13<11:59,  4.59it/s, acc=0.612, epoch=3, loss=1.21]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 87%|████████▋ | 9400/10845 [35:06<05:32,  4.34it/s, acc=0.613, epoch=3, loss=1.22]

epoch:3, idx:9399/10845, loss:1.2161353755377708, acc:0.612686170212766


 88%|████████▊ | 9501/10845 [35:29<04:43,  4.74it/s, acc=0.612, epoch=3, loss=1.22]

epoch:3, idx:9499/10845, loss:1.2172338219441865, acc:0.6121052631578947


 89%|████████▊ | 9600/10845 [35:51<04:27,  4.65it/s, acc=0.612, epoch=3, loss=1.22]

epoch:3, idx:9599/10845, loss:1.216940587287148, acc:0.6121614583333334


 89%|████████▉ | 9701/10845 [36:13<04:11,  4.54it/s, acc=0.612, epoch=3, loss=1.22]

epoch:3, idx:9699/10845, loss:1.21670364759632, acc:0.612139175257732


 90%|█████████ | 9801/10845 [36:36<03:46,  4.62it/s, acc=0.612, epoch=3, loss=1.22]

epoch:3, idx:9799/10845, loss:1.2169163853173353, acc:0.611938775510204


 91%|█████████▏| 9901/10845 [36:58<03:24,  4.61it/s, acc=0.612, epoch=3, loss=1.22]

epoch:3, idx:9899/10845, loss:1.2172337937595867, acc:0.611919191919192


 92%|█████████▏| 10001/10845 [37:20<03:11,  4.41it/s, acc=0.612, epoch=3, loss=1.22]

epoch:3, idx:9999/10845, loss:1.2173111473083496, acc:0.611775


 93%|█████████▎| 10101/10845 [37:43<02:42,  4.58it/s, acc=0.612, epoch=3, loss=1.22]

epoch:3, idx:10099/10845, loss:1.2181036055206071, acc:0.6116336633663366


 94%|█████████▍| 10201/10845 [38:05<02:20,  4.57it/s, acc=0.611, epoch=3, loss=1.22]

epoch:3, idx:10199/10845, loss:1.2182983504089655, acc:0.6113235294117647


 95%|█████████▍| 10300/10845 [38:27<02:03,  4.42it/s, acc=0.611, epoch=3, loss=1.22]

epoch:3, idx:10299/10845, loss:1.2176308241862694, acc:0.6114805825242718


 96%|█████████▌| 10400/10845 [38:49<01:42,  4.35it/s, acc=0.611, epoch=3, loss=1.22]

epoch:3, idx:10399/10845, loss:1.2171834416572864, acc:0.6114182692307693


 97%|█████████▋| 10500/10845 [39:11<01:13,  4.67it/s, acc=0.611, epoch=3, loss=1.22]

epoch:3, idx:10499/10845, loss:1.21743840331123, acc:0.6111904761904762


 98%|█████████▊| 10601/10845 [39:33<00:51,  4.70it/s, acc=0.612, epoch=3, loss=1.22]

epoch:3, idx:10599/10845, loss:1.2165662284729615, acc:0.611627358490566


 99%|█████████▊| 10701/10845 [39:55<00:29,  4.80it/s, acc=0.611, epoch=3, loss=1.22]

epoch:3, idx:10699/10845, loss:1.2173830766154228, acc:0.6112616822429906


100%|█████████▉| 10800/10845 [40:17<00:10,  4.41it/s, acc=0.611, epoch=3, loss=1.22]

epoch:3, idx:10799/10845, loss:1.2176714203479113, acc:0.6112268518518519


100%|█████████▉| 10809/10845 [40:19<00:07,  4.64it/s, acc=0.611, epoch=3, loss=1.22]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 17%|█▋        | 1800/10845 [06:38<33:12,  4.54it/s, acc=0.626, epoch=4, loss=1.17]

epoch:4, idx:1799/10845, loss:1.1745263586772812, acc:0.6263888888888889


 18%|█▊        | 1900/10845 [07:01<35:03,  4.25it/s, acc=0.627, epoch=4, loss=1.17]

epoch:4, idx:1899/10845, loss:1.1736508691624592, acc:0.6275


 18%|█▊        | 2001/10845 [07:23<30:38,  4.81it/s, acc=0.628, epoch=4, loss=1.17]

epoch:4, idx:1999/10845, loss:1.1734304425418376, acc:0.6275


 19%|█▉        | 2101/10845 [07:45<30:52,  4.72it/s, acc=0.628, epoch=4, loss=1.17]

epoch:4, idx:2099/10845, loss:1.170332717072396, acc:0.6276190476190476


 20%|██        | 2200/10845 [08:06<31:30,  4.57it/s, acc=0.629, epoch=4, loss=1.16]

epoch:4, idx:2199/10845, loss:1.1645809271931649, acc:0.6286363636363637


 21%|██        | 2300/10845 [08:29<33:01,  4.31it/s, acc=0.629, epoch=4, loss=1.16]

epoch:4, idx:2299/10845, loss:1.1626473507932995, acc:0.6290217391304348


 22%|██▏       | 2401/10845 [08:52<30:13,  4.66it/s, acc=0.628, epoch=4, loss=1.16]

epoch:4, idx:2399/10845, loss:1.162640380039811, acc:0.6282291666666666


 23%|██▎       | 2500/10845 [09:13<32:53,  4.23it/s, acc=0.628, epoch=4, loss=1.17]

epoch:4, idx:2499/10845, loss:1.1655638958454133, acc:0.6279


 24%|██▍       | 2600/10845 [09:35<28:55,  4.75it/s, acc=0.627, epoch=4, loss=1.17]

epoch:4, idx:2599/10845, loss:1.1679697539256169, acc:0.6271153846153846


 25%|██▍       | 2700/10845 [09:58<29:01,  4.68it/s, acc=0.625, epoch=4, loss=1.18]

epoch:4, idx:2699/10845, loss:1.1755674230610882, acc:0.6252777777777778


 26%|██▌       | 2801/10845 [10:20<28:38,  4.68it/s, acc=0.625, epoch=4, loss=1.18]

epoch:4, idx:2799/10845, loss:1.177775968526091, acc:0.6254464285714286


 27%|██▋       | 2901/10845 [10:42<29:17,  4.52it/s, acc=0.627, epoch=4, loss=1.18]

epoch:4, idx:2899/10845, loss:1.1759671976648527, acc:0.6274137931034482


 28%|██▊       | 3000/10845 [11:04<28:52,  4.53it/s, acc=0.627, epoch=4, loss=1.17]

epoch:4, idx:2999/10845, loss:1.174163079917431, acc:0.6274166666666666


 29%|██▊       | 3101/10845 [11:27<29:06,  4.43it/s, acc=0.627, epoch=4, loss=1.18]

epoch:4, idx:3099/10845, loss:1.177413230153822, acc:0.6271774193548387


 30%|██▉       | 3200/10845 [11:49<27:21,  4.66it/s, acc=0.628, epoch=4, loss=1.18]

epoch:4, idx:3199/10845, loss:1.1775902248546481, acc:0.627734375


 30%|██▉       | 3210/10845 [11:51<28:27,  4.47it/s, acc=0.628, epoch=4, loss=1.18]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 46%|████▌     | 5001/10845 [18:27<21:01,  4.63it/s, acc=0.627, epoch=4, loss=1.18]

epoch:4, idx:4999/10845, loss:1.1822149690628052, acc:0.62645


 47%|████▋     | 5100/10845 [18:49<21:04,  4.54it/s, acc=0.626, epoch=4, loss=1.18]

epoch:4, idx:5099/10845, loss:1.184607883013931, acc:0.6255882352941177


 48%|████▊     | 5200/10845 [19:11<20:46,  4.53it/s, acc=0.625, epoch=4, loss=1.18]

epoch:4, idx:5199/10845, loss:1.1834258137299465, acc:0.6254807692307692


 49%|████▉     | 5300/10845 [19:34<19:20,  4.78it/s, acc=0.626, epoch=4, loss=1.18]

epoch:4, idx:5299/10845, loss:1.1834100243730366, acc:0.6255660377358491


 50%|████▉     | 5400/10845 [19:55<20:54,  4.34it/s, acc=0.625, epoch=4, loss=1.18]

epoch:4, idx:5399/10845, loss:1.1834848711336101, acc:0.6254166666666666


 51%|█████     | 5501/10845 [20:17<19:35,  4.54it/s, acc=0.625, epoch=4, loss=1.18]

epoch:4, idx:5499/10845, loss:1.1842604776187378, acc:0.6253636363636363


 52%|█████▏    | 5600/10845 [20:40<19:15,  4.54it/s, acc=0.626, epoch=4, loss=1.18]

epoch:4, idx:5599/10845, loss:1.1832370535816465, acc:0.6256696428571429


 53%|█████▎    | 5700/10845 [21:02<18:12,  4.71it/s, acc=0.626, epoch=4, loss=1.18]

epoch:4, idx:5699/10845, loss:1.184883215322829, acc:0.6256578947368421


 53%|█████▎    | 5801/10845 [21:24<18:06,  4.64it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:5799/10845, loss:1.1850355064046794, acc:0.6255603448275862


 54%|█████▍    | 5900/10845 [21:46<18:51,  4.37it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:5899/10845, loss:1.1864321688474235, acc:0.6252542372881356


 55%|█████▌    | 6001/10845 [22:08<17:17,  4.67it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:5999/10845, loss:1.187881433169047, acc:0.624875


 56%|█████▌    | 6100/10845 [22:30<17:08,  4.61it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:6099/10845, loss:1.1880477729781729, acc:0.6247540983606558


 57%|█████▋    | 6200/10845 [22:52<16:14,  4.77it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:6199/10845, loss:1.1876258896627734, acc:0.6245967741935484


 58%|█████▊    | 6301/10845 [23:15<16:40,  4.54it/s, acc=0.626, epoch=4, loss=1.19]

epoch:4, idx:6299/10845, loss:1.1851337703825935, acc:0.625515873015873


 59%|█████▉    | 6400/10845 [23:37<16:26,  4.50it/s, acc=0.625, epoch=4, loss=1.18]

epoch:4, idx:6399/10845, loss:1.1844054824672638, acc:0.6253515625


 60%|█████▉    | 6480/10845 [23:55<15:53,  4.58it/s, acc=0.625, epoch=4, loss=1.18]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 77%|███████▋  | 8300/10845 [30:38<09:01,  4.70it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:8299/10845, loss:1.1857208350839386, acc:0.6247590361445783


 77%|███████▋  | 8400/10845 [31:00<08:47,  4.63it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:8399/10845, loss:1.1867113686530364, acc:0.6246130952380953


 78%|███████▊  | 8500/10845 [31:23<08:48,  4.44it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:8499/10845, loss:1.1864883645632687, acc:0.6248235294117647


 79%|███████▉  | 8600/10845 [31:45<08:52,  4.22it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:8599/10845, loss:1.1872847985597543, acc:0.6247674418604651


 80%|████████  | 8700/10845 [32:08<07:50,  4.56it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:8699/10845, loss:1.186852644770995, acc:0.6249137931034483


 81%|████████  | 8800/10845 [32:30<07:23,  4.61it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:8799/10845, loss:1.188427956991575, acc:0.6246590909090909


 82%|████████▏ | 8900/10845 [32:52<07:09,  4.52it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:8899/10845, loss:1.1891123001696018, acc:0.6245224719101123


 83%|████████▎ | 9000/10845 [33:14<06:47,  4.53it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:8999/10845, loss:1.1879111278123327, acc:0.6246944444444444


 84%|████████▍ | 9100/10845 [33:36<06:15,  4.64it/s, acc=0.624, epoch=4, loss=1.19]

epoch:4, idx:9099/10845, loss:1.1885466896505146, acc:0.624423076923077


 85%|████████▍ | 9200/10845 [33:58<05:46,  4.74it/s, acc=0.624, epoch=4, loss=1.19]

epoch:4, idx:9199/10845, loss:1.1892439681615519, acc:0.6242934782608696


 86%|████████▌ | 9300/10845 [34:21<05:46,  4.46it/s, acc=0.624, epoch=4, loss=1.19]

epoch:4, idx:9299/10845, loss:1.188631487437474, acc:0.6244086021505376


 87%|████████▋ | 9401/10845 [34:43<05:08,  4.68it/s, acc=0.624, epoch=4, loss=1.19]

epoch:4, idx:9399/10845, loss:1.1884273411175037, acc:0.6243085106382978


 88%|████████▊ | 9500/10845 [35:05<04:45,  4.71it/s, acc=0.624, epoch=4, loss=1.19]

epoch:4, idx:9499/10845, loss:1.1881653297261188, acc:0.6243421052631579


 89%|████████▊ | 9600/10845 [35:27<04:34,  4.54it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:9599/10845, loss:1.187290170683215, acc:0.6245833333333334


 89%|████████▉ | 9700/10845 [35:50<04:27,  4.28it/s, acc=0.625, epoch=4, loss=1.19]

epoch:4, idx:9699/10845, loss:1.1881345510175547, acc:0.6247938144329896


 90%|████████▉ | 9748/10845 [36:01<04:08,  4.42it/s, acc=0.625, epoch=4, loss=1.19]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

  6%|▋         | 700/10845 [02:35<36:28,  4.64it/s, acc=0.651, epoch=5, loss=1.12]

epoch:5, idx:699/10845, loss:1.1216940257378987, acc:0.6510714285714285


  7%|▋         | 800/10845 [02:57<39:26,  4.25it/s, acc=0.645, epoch=5, loss=1.15]

epoch:5, idx:799/10845, loss:1.1543812852352857, acc:0.6446875


  8%|▊         | 900/10845 [03:20<36:10,  4.58it/s, acc=0.642, epoch=5, loss=1.16]

epoch:5, idx:899/10845, loss:1.1554295985566245, acc:0.6422222222222222


  9%|▉         | 1000/10845 [03:42<38:24,  4.27it/s, acc=0.643, epoch=5, loss=1.16]

epoch:5, idx:999/10845, loss:1.1566993460059165, acc:0.64325


 10%|█         | 1101/10845 [04:05<35:50,  4.53it/s, acc=0.646, epoch=5, loss=1.15]

epoch:5, idx:1099/10845, loss:1.1456692154299128, acc:0.6456818181818181


 11%|█         | 1200/10845 [04:27<36:29,  4.41it/s, acc=0.646, epoch=5, loss=1.15]

epoch:5, idx:1199/10845, loss:1.1461101632813613, acc:0.6458333333333334


 12%|█▏        | 1300/10845 [04:49<36:09,  4.40it/s, acc=0.643, epoch=5, loss=1.15]

epoch:5, idx:1299/10845, loss:1.1504010063409806, acc:0.6426923076923077


 13%|█▎        | 1401/10845 [05:12<34:50,  4.52it/s, acc=0.642, epoch=5, loss=1.15]

epoch:5, idx:1399/10845, loss:1.1511274375660079, acc:0.6417857142857143


 14%|█▍        | 1501/10845 [05:34<35:09,  4.43it/s, acc=0.642, epoch=5, loss=1.14]

epoch:5, idx:1499/10845, loss:1.145107048948606, acc:0.6416666666666667


 15%|█▍        | 1600/10845 [05:56<32:35,  4.73it/s, acc=0.642, epoch=5, loss=1.14]

epoch:5, idx:1599/10845, loss:1.1385552263632417, acc:0.641875


 16%|█▌        | 1701/10845 [06:19<32:46,  4.65it/s, acc=0.639, epoch=5, loss=1.14]

epoch:5, idx:1699/10845, loss:1.1370020524193258, acc:0.6389705882352941


 17%|█▋        | 1801/10845 [06:40<31:38,  4.76it/s, acc=0.639, epoch=5, loss=1.14]

epoch:5, idx:1799/10845, loss:1.1389297137657801, acc:0.6391666666666667


 18%|█▊        | 1901/10845 [07:03<32:00,  4.66it/s, acc=0.64, epoch=5, loss=1.14] 

epoch:5, idx:1899/10845, loss:1.1415807126070323, acc:0.6396052631578948


 18%|█▊        | 2000/10845 [07:24<34:01,  4.33it/s, acc=0.641, epoch=5, loss=1.14]

epoch:5, idx:1999/10845, loss:1.1383791865110398, acc:0.640875


 19%|█▉        | 2101/10845 [07:47<31:26,  4.64it/s, acc=0.639, epoch=5, loss=1.14]

epoch:5, idx:2099/10845, loss:1.1422718293326242, acc:0.6386904761904761


 20%|█▉        | 2155/10845 [07:59<30:32,  4.74it/s, acc=0.64, epoch=5, loss=1.14] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 37%|███▋      | 4000/10845 [14:51<25:39,  4.45it/s, acc=0.637, epoch=5, loss=1.14]

epoch:5, idx:3999/10845, loss:1.1441860493421554, acc:0.636625


 38%|███▊      | 4100/10845 [15:13<25:22,  4.43it/s, acc=0.638, epoch=5, loss=1.14]

epoch:5, idx:4099/10845, loss:1.1411745133632567, acc:0.6378048780487805


 39%|███▊      | 4201/10845 [15:36<23:41,  4.67it/s, acc=0.637, epoch=5, loss=1.14]

epoch:5, idx:4199/10845, loss:1.1445528795037951, acc:0.6372023809523809


 40%|███▉      | 4300/10845 [15:57<23:40,  4.61it/s, acc=0.636, epoch=5, loss=1.15]

epoch:5, idx:4299/10845, loss:1.1452870902627013, acc:0.6363372093023256


 41%|████      | 4400/10845 [16:19<22:42,  4.73it/s, acc=0.637, epoch=5, loss=1.14]

epoch:5, idx:4399/10845, loss:1.1449978454817424, acc:0.6365909090909091


 41%|████▏     | 4500/10845 [16:42<23:08,  4.57it/s, acc=0.637, epoch=5, loss=1.14]

epoch:5, idx:4499/10845, loss:1.1447496223979525, acc:0.6367222222222222


 42%|████▏     | 4600/10845 [17:04<23:55,  4.35it/s, acc=0.638, epoch=5, loss=1.14]

epoch:5, idx:4599/10845, loss:1.1429988993509956, acc:0.6375


 43%|████▎     | 4701/10845 [17:27<21:12,  4.83it/s, acc=0.638, epoch=5, loss=1.14]

epoch:5, idx:4699/10845, loss:1.1422253047659041, acc:0.6379255319148937


 44%|████▍     | 4800/10845 [17:48<22:04,  4.56it/s, acc=0.638, epoch=5, loss=1.14]

epoch:5, idx:4799/10845, loss:1.142146603713433, acc:0.6378645833333333


 45%|████▌     | 4900/10845 [18:11<21:10,  4.68it/s, acc=0.638, epoch=5, loss=1.14]

epoch:5, idx:4899/10845, loss:1.1421425298282077, acc:0.6380102040816327


 46%|████▌     | 5000/10845 [18:33<23:38,  4.12it/s, acc=0.638, epoch=5, loss=1.14]

epoch:5, idx:4999/10845, loss:1.1423073005437852, acc:0.6382


 47%|████▋     | 5101/10845 [18:55<20:11,  4.74it/s, acc=0.638, epoch=5, loss=1.14]

epoch:5, idx:5099/10845, loss:1.1429104704249138, acc:0.6377941176470588


 48%|████▊     | 5201/10845 [19:18<21:31,  4.37it/s, acc=0.638, epoch=5, loss=1.14]

epoch:5, idx:5199/10845, loss:1.1433471994445874, acc:0.6377403846153846


 49%|████▉     | 5300/10845 [19:40<21:22,  4.32it/s, acc=0.637, epoch=5, loss=1.15]

epoch:5, idx:5299/10845, loss:1.1460035997066857, acc:0.6370283018867925


 50%|████▉     | 5400/10845 [20:02<20:06,  4.51it/s, acc=0.638, epoch=5, loss=1.15]

epoch:5, idx:5399/10845, loss:1.145358890582014, acc:0.6377314814814815


 50%|████▉     | 5405/10845 [20:03<21:06,  4.30it/s, acc=0.638, epoch=5, loss=1.15]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 66%|██████▋   | 7200/10845 [26:44<13:20,  4.55it/s, acc=0.635, epoch=5, loss=1.15]

epoch:5, idx:7199/10845, loss:1.1522116093751458, acc:0.6349652777777778


 67%|██████▋   | 7300/10845 [27:07<12:29,  4.73it/s, acc=0.635, epoch=5, loss=1.15]

epoch:5, idx:7299/10845, loss:1.1523125820208902, acc:0.6351369863013698


 68%|██████▊   | 7401/10845 [27:29<12:20,  4.65it/s, acc=0.634, epoch=5, loss=1.15]

epoch:5, idx:7399/10845, loss:1.1539922074127842, acc:0.6343581081081081


 69%|██████▉   | 7500/10845 [27:52<12:39,  4.41it/s, acc=0.634, epoch=5, loss=1.15]

epoch:5, idx:7499/10845, loss:1.154167097322146, acc:0.6342


 70%|███████   | 7600/10845 [28:14<11:23,  4.75it/s, acc=0.634, epoch=5, loss=1.16]

epoch:5, idx:7599/10845, loss:1.1551804357531823, acc:0.6340131578947369


 71%|███████   | 7701/10845 [28:37<11:40,  4.49it/s, acc=0.634, epoch=5, loss=1.16]

epoch:5, idx:7699/10845, loss:1.1572591092601998, acc:0.6337337662337662


 72%|███████▏  | 7801/10845 [28:59<11:02,  4.59it/s, acc=0.634, epoch=5, loss=1.16]

epoch:5, idx:7799/10845, loss:1.1564597854476708, acc:0.6339743589743589


 73%|███████▎  | 7901/10845 [29:21<10:46,  4.56it/s, acc=0.634, epoch=5, loss=1.16]

epoch:5, idx:7899/10845, loss:1.1558457315043558, acc:0.6338607594936709


 74%|███████▍  | 8000/10845 [29:43<10:21,  4.58it/s, acc=0.634, epoch=5, loss=1.16]

epoch:5, idx:7999/10845, loss:1.1559205590486525, acc:0.63359375


 75%|███████▍  | 8101/10845 [30:05<10:03,  4.55it/s, acc=0.633, epoch=5, loss=1.16]

epoch:5, idx:8099/10845, loss:1.1572983292444252, acc:0.6334567901234568


 76%|███████▌  | 8200/10845 [30:27<10:04,  4.38it/s, acc=0.634, epoch=5, loss=1.16]

epoch:5, idx:8199/10845, loss:1.1563723871475313, acc:0.6335060975609756


 77%|███████▋  | 8300/10845 [30:49<09:32,  4.44it/s, acc=0.634, epoch=5, loss=1.16]

epoch:5, idx:8299/10845, loss:1.1555014442392142, acc:0.6338253012048193


 77%|███████▋  | 8400/10845 [31:12<08:41,  4.69it/s, acc=0.634, epoch=5, loss=1.16]

epoch:5, idx:8399/10845, loss:1.155509318354584, acc:0.6340476190476191


 78%|███████▊  | 8501/10845 [31:34<08:42,  4.49it/s, acc=0.634, epoch=5, loss=1.16]

epoch:5, idx:8499/10845, loss:1.155954218864441, acc:0.6338823529411765


 79%|███████▉  | 8601/10845 [31:56<07:59,  4.68it/s, acc=0.634, epoch=5, loss=1.16]

epoch:5, idx:8599/10845, loss:1.1557439788552217, acc:0.6342151162790698


 80%|███████▉  | 8661/10845 [32:10<09:08,  3.98it/s, acc=0.634, epoch=5, loss=1.16]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 97%|█████████▋| 10500/10845 [38:57<01:12,  4.75it/s, acc=0.635, epoch=5, loss=1.16]

epoch:5, idx:10499/10845, loss:1.1579062309265136, acc:0.6347857142857143


 98%|█████████▊| 10600/10845 [39:20<00:53,  4.55it/s, acc=0.635, epoch=5, loss=1.16]

epoch:5, idx:10599/10845, loss:1.1568075945129934, acc:0.6351179245283018


 99%|█████████▊| 10700/10845 [39:42<00:31,  4.63it/s, acc=0.635, epoch=5, loss=1.16]

epoch:5, idx:10699/10845, loss:1.1560727817321492, acc:0.6352570093457944


100%|█████████▉| 10800/10845 [40:05<00:10,  4.40it/s, acc=0.635, epoch=5, loss=1.16]

epoch:5, idx:10799/10845, loss:1.1551858889504716, acc:0.6354861111111111


100%|██████████| 10845/10845 [40:14<00:00,  4.26it/s, acc=0.635, epoch=5, loss=1.16]


epoch:5, idx:0/1275, loss:1.169534683227539, acc:0.75
epoch:5, idx:100/1275, loss:1.2969307816854798, acc:0.6287128712871287
epoch:5, idx:200/1275, loss:1.2119672956751353, acc:0.6343283582089553
epoch:5, idx:300/1275, loss:1.1956967753033305, acc:0.6370431893687708
epoch:5, idx:400/1275, loss:1.1974129415212427, acc:0.6309226932668329
epoch:5, idx:500/1275, loss:1.179724811317916, acc:0.6312375249500998
epoch:5, idx:600/1275, loss:1.1932461918292943, acc:0.6239600665557404
epoch:5, idx:700/1275, loss:1.192115967181882, acc:0.6205420827389444
epoch:5, idx:800/1275, loss:1.2061727408612712, acc:0.6132958801498127
epoch:5, idx:900/1275, loss:1.1917534791404478, acc:0.6195893451720311
epoch:5, idx:1000/1275, loss:1.197084459867868, acc:0.6156343656343657
epoch:5, idx:1100/1275, loss:1.1849546494211098, acc:0.6205722070844687
epoch:5, idx:1200/1275, loss:1.1853324278109676, acc:0.6192756036636137


  1%|          | 101/10845 [00:22<39:45,  4.50it/s, acc=0.668, epoch=6, loss=1.01]

epoch:6, idx:99/10845, loss:1.0139923334121703, acc:0.6675


  2%|▏         | 200/10845 [00:44<41:11,  4.31it/s, acc=0.671, epoch=6, loss=1.04]

epoch:6, idx:199/10845, loss:1.0420734882354736, acc:0.67125


  3%|▎         | 300/10845 [01:07<39:01,  4.50it/s, acc=0.67, epoch=6, loss=1.05] 

epoch:6, idx:299/10845, loss:1.0511773216724396, acc:0.67


  4%|▎         | 400/10845 [01:29<39:06,  4.45it/s, acc=0.666, epoch=6, loss=1.05]

epoch:6, idx:399/10845, loss:1.0547434036433696, acc:0.665625


  5%|▍         | 500/10845 [01:52<38:59,  4.42it/s, acc=0.665, epoch=6, loss=1.06]

epoch:6, idx:499/10845, loss:1.0545647472143174, acc:0.665


  6%|▌         | 601/10845 [02:14<36:17,  4.70it/s, acc=0.661, epoch=6, loss=1.06]

epoch:6, idx:599/10845, loss:1.0548630369702976, acc:0.66125


  6%|▋         | 701/10845 [02:37<36:06,  4.68it/s, acc=0.658, epoch=6, loss=1.07]

epoch:6, idx:699/10845, loss:1.074701868891716, acc:0.6582142857142858


  7%|▋         | 801/10845 [02:59<38:08,  4.39it/s, acc=0.655, epoch=6, loss=1.09]

epoch:6, idx:799/10845, loss:1.0947886038571597, acc:0.655


  8%|▊         | 900/10845 [03:21<36:56,  4.49it/s, acc=0.651, epoch=6, loss=1.1] 

epoch:6, idx:899/10845, loss:1.1026474630170398, acc:0.6508333333333334


  9%|▉         | 1000/10845 [03:43<34:56,  4.70it/s, acc=0.65, epoch=6, loss=1.11]

epoch:6, idx:999/10845, loss:1.1084932398200036, acc:0.65025


 10%|▉         | 1072/10845 [04:00<37:59,  4.29it/s, acc=0.651, epoch=6, loss=1.11]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 25%|██▍       | 2700/10845 [09:59<29:06,  4.66it/s, acc=0.654, epoch=6, loss=1.1]

epoch:6, idx:2699/10845, loss:1.098881936735577, acc:0.6538888888888889


 26%|██▌       | 2800/10845 [10:22<28:26,  4.71it/s, acc=0.653, epoch=6, loss=1.1]

epoch:6, idx:2799/10845, loss:1.1024004704611643, acc:0.6532142857142857


 27%|██▋       | 2901/10845 [10:44<28:21,  4.67it/s, acc=0.653, epoch=6, loss=1.1]

epoch:6, idx:2899/10845, loss:1.100229479937718, acc:0.6529310344827586


 28%|██▊       | 3000/10845 [11:06<30:08,  4.34it/s, acc=0.653, epoch=6, loss=1.1]

epoch:6, idx:2999/10845, loss:1.0982133994102479, acc:0.6533333333333333


 29%|██▊       | 3101/10845 [11:28<28:52,  4.47it/s, acc=0.654, epoch=6, loss=1.09]

epoch:6, idx:3099/10845, loss:1.0944150947370836, acc:0.6542741935483871


 30%|██▉       | 3200/10845 [11:50<28:55,  4.41it/s, acc=0.655, epoch=6, loss=1.09]

epoch:6, idx:3199/10845, loss:1.091623818911612, acc:0.654765625


 30%|███       | 3300/10845 [12:12<28:09,  4.47it/s, acc=0.655, epoch=6, loss=1.1] 

epoch:6, idx:3299/10845, loss:1.0951532514167555, acc:0.6546969696969697


 31%|███▏      | 3401/10845 [12:34<26:36,  4.66it/s, acc=0.655, epoch=6, loss=1.1] 

epoch:6, idx:3399/10845, loss:1.0954238474018434, acc:0.6547058823529411


 32%|███▏      | 3500/10845 [12:56<26:06,  4.69it/s, acc=0.655, epoch=6, loss=1.1]

epoch:6, idx:3499/10845, loss:1.097298788854054, acc:0.6547142857142857


 33%|███▎      | 3600/10845 [13:18<25:18,  4.77it/s, acc=0.654, epoch=6, loss=1.1]

epoch:6, idx:3599/10845, loss:1.1002215383450191, acc:0.6538194444444444


 34%|███▍      | 3700/10845 [13:40<25:41,  4.63it/s, acc=0.653, epoch=6, loss=1.1]

epoch:6, idx:3699/10845, loss:1.1015207660842585, acc:0.6530405405405405


 35%|███▌      | 3800/10845 [14:03<27:54,  4.21it/s, acc=0.652, epoch=6, loss=1.11]

epoch:6, idx:3799/10845, loss:1.105499340640871, acc:0.6515131578947368


 36%|███▌      | 3900/10845 [14:25<25:30,  4.54it/s, acc=0.651, epoch=6, loss=1.1] 

epoch:6, idx:3899/10845, loss:1.1049198403419593, acc:0.651474358974359


 37%|███▋      | 4001/10845 [14:47<23:50,  4.78it/s, acc=0.652, epoch=6, loss=1.1] 

epoch:6, idx:3999/10845, loss:1.1044703058302403, acc:0.6518125


 38%|███▊      | 4100/10845 [15:09<25:10,  4.47it/s, acc=0.652, epoch=6, loss=1.1]

epoch:6, idx:4099/10845, loss:1.1043438110409713, acc:0.6517073170731708


 38%|███▊      | 4128/10845 [15:15<25:36,  4.37it/s, acc=0.651, epoch=6, loss=1.11]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 54%|█████▍    | 5901/10845 [21:49<17:56,  4.59it/s, acc=0.651, epoch=6, loss=1.11]

epoch:6, idx:5899/10845, loss:1.1080354907351024, acc:0.6509745762711865


 55%|█████▌    | 6000/10845 [22:12<19:52,  4.06it/s, acc=0.65, epoch=6, loss=1.11] 

epoch:6, idx:5999/10845, loss:1.1091643667618434, acc:0.6505


 56%|█████▌    | 6100/10845 [22:34<17:59,  4.40it/s, acc=0.65, epoch=6, loss=1.11]

epoch:6, idx:6099/10845, loss:1.112498813828484, acc:0.6501229508196721


 57%|█████▋    | 6200/10845 [22:56<16:40,  4.64it/s, acc=0.65, epoch=6, loss=1.11] 

epoch:6, idx:6199/10845, loss:1.1110513238656905, acc:0.650483870967742


 58%|█████▊    | 6300/10845 [23:18<17:13,  4.40it/s, acc=0.65, epoch=6, loss=1.11] 

epoch:6, idx:6299/10845, loss:1.112429819154361, acc:0.65


 59%|█████▉    | 6401/10845 [23:40<15:39,  4.73it/s, acc=0.65, epoch=6, loss=1.11]

epoch:6, idx:6399/10845, loss:1.1140450399834663, acc:0.64984375


 60%|█████▉    | 6501/10845 [24:02<15:44,  4.60it/s, acc=0.65, epoch=6, loss=1.12]

epoch:6, idx:6499/10845, loss:1.1151794258906291, acc:0.649576923076923


 61%|██████    | 6600/10845 [24:24<16:24,  4.31it/s, acc=0.65, epoch=6, loss=1.11] 

epoch:6, idx:6599/10845, loss:1.1137595068415005, acc:0.6499621212121212


 62%|██████▏   | 6700/10845 [24:46<14:46,  4.67it/s, acc=0.65, epoch=6, loss=1.11]

epoch:6, idx:6699/10845, loss:1.11435755840878, acc:0.6498880597014925


 63%|██████▎   | 6800/10845 [25:09<15:30,  4.35it/s, acc=0.65, epoch=6, loss=1.12]

epoch:6, idx:6799/10845, loss:1.115185266969835, acc:0.6497794117647059


 64%|██████▎   | 6901/10845 [25:31<13:56,  4.71it/s, acc=0.65, epoch=6, loss=1.11]

epoch:6, idx:6899/10845, loss:1.1146951202119606, acc:0.65


 65%|██████▍   | 7001/10845 [25:53<13:15,  4.83it/s, acc=0.65, epoch=6, loss=1.12]

epoch:6, idx:6999/10845, loss:1.1154318697537695, acc:0.6498571428571429


 65%|██████▌   | 7100/10845 [26:15<14:49,  4.21it/s, acc=0.65, epoch=6, loss=1.12]

epoch:6, idx:7099/10845, loss:1.1173583956251683, acc:0.649612676056338


 66%|██████▋   | 7201/10845 [26:38<13:27,  4.51it/s, acc=0.65, epoch=6, loss=1.12]

epoch:6, idx:7199/10845, loss:1.1167403314014275, acc:0.6500694444444445


 67%|██████▋   | 7301/10845 [27:00<12:03,  4.90it/s, acc=0.65, epoch=6, loss=1.12]

epoch:6, idx:7299/10845, loss:1.1168775844737275, acc:0.6496575342465754


 68%|██████▊   | 7368/10845 [27:15<12:21,  4.69it/s, acc=0.65, epoch=6, loss=1.12]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 85%|████████▍ | 9200/10845 [34:01<05:44,  4.77it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:9199/10845, loss:1.1275175441348035, acc:0.6474728260869566


 86%|████████▌ | 9301/10845 [34:23<05:23,  4.77it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:9299/10845, loss:1.1272144283786896, acc:0.6474731182795699


 87%|████████▋ | 9401/10845 [34:44<05:11,  4.63it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:9399/10845, loss:1.12792260667111, acc:0.6474468085106383


 88%|████████▊ | 9500/10845 [35:06<05:01,  4.46it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:9499/10845, loss:1.128734751701355, acc:0.6470526315789473


 89%|████████▊ | 9600/10845 [35:28<04:31,  4.58it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:9599/10845, loss:1.129198376921316, acc:0.646953125


 89%|████████▉ | 9700/10845 [35:50<04:15,  4.48it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:9699/10845, loss:1.1306977314924456, acc:0.6466494845360825


 90%|█████████ | 9801/10845 [36:12<03:40,  4.73it/s, acc=0.646, epoch=6, loss=1.13]

epoch:6, idx:9799/10845, loss:1.130547085124619, acc:0.6464030612244898


 91%|█████████▏| 9900/10845 [36:35<03:25,  4.59it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:9899/10845, loss:1.1295974212343043, acc:0.6466161616161616


 92%|█████████▏| 10001/10845 [36:57<03:09,  4.46it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:9999/10845, loss:1.1287804505050183, acc:0.646975


 93%|█████████▎| 10100/10845 [37:19<02:43,  4.55it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:10099/10845, loss:1.1291194216744735, acc:0.647029702970297


 94%|█████████▍| 10200/10845 [37:41<02:35,  4.15it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:10199/10845, loss:1.1282562001546224, acc:0.6472549019607843


 95%|█████████▍| 10300/10845 [38:03<02:02,  4.44it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:10299/10845, loss:1.1293285180006212, acc:0.6469417475728155


 96%|█████████▌| 10400/10845 [38:25<01:37,  4.56it/s, acc=0.647, epoch=6, loss=1.13]

epoch:6, idx:10399/10845, loss:1.1308620731933758, acc:0.6465625


 97%|█████████▋| 10500/10845 [38:48<01:18,  4.42it/s, acc=0.646, epoch=6, loss=1.13]

epoch:6, idx:10499/10845, loss:1.1317258181969325, acc:0.6463333333333333


 98%|█████████▊| 10600/10845 [39:10<00:53,  4.59it/s, acc=0.646, epoch=6, loss=1.13]

epoch:6, idx:10599/10845, loss:1.130785005952952, acc:0.6464858490566038


 98%|█████████▊| 10609/10845 [39:12<00:50,  4.66it/s, acc=0.646, epoch=6, loss=1.13]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 15%|█▍        | 1601/10845 [05:54<33:36,  4.58it/s, acc=0.673, epoch=7, loss=1.05]

epoch:7, idx:1599/10845, loss:1.0533532133325934, acc:0.67265625


 16%|█▌        | 1700/10845 [06:16<33:59,  4.48it/s, acc=0.674, epoch=7, loss=1.05]

epoch:7, idx:1699/10845, loss:1.051370814162142, acc:0.6738235294117647


 17%|█▋        | 1801/10845 [06:38<32:09,  4.69it/s, acc=0.673, epoch=7, loss=1.06]

epoch:7, idx:1799/10845, loss:1.0554989273349444, acc:0.6730555555555555


 18%|█▊        | 1900/10845 [07:00<32:31,  4.58it/s, acc=0.671, epoch=7, loss=1.06]

epoch:7, idx:1899/10845, loss:1.0623846957244372, acc:0.6705263157894736


 18%|█▊        | 2000/10845 [07:23<31:29,  4.68it/s, acc=0.668, epoch=7, loss=1.07]

epoch:7, idx:1999/10845, loss:1.065270959943533, acc:0.66825


 19%|█▉        | 2100/10845 [07:45<32:16,  4.52it/s, acc=0.666, epoch=7, loss=1.07]

epoch:7, idx:2099/10845, loss:1.0702926780780155, acc:0.6660714285714285


 20%|██        | 2200/10845 [08:08<31:47,  4.53it/s, acc=0.667, epoch=7, loss=1.06]

epoch:7, idx:2199/10845, loss:1.0648432232845912, acc:0.6672727272727272


 21%|██        | 2300/10845 [08:30<32:38,  4.36it/s, acc=0.667, epoch=7, loss=1.07]

epoch:7, idx:2299/10845, loss:1.065151045581569, acc:0.6672826086956521


 22%|██▏       | 2401/10845 [08:52<29:57,  4.70it/s, acc=0.665, epoch=7, loss=1.07]

epoch:7, idx:2399/10845, loss:1.071758247166872, acc:0.6647916666666667


 23%|██▎       | 2501/10845 [09:15<29:47,  4.67it/s, acc=0.666, epoch=7, loss=1.07]

epoch:7, idx:2499/10845, loss:1.0688689195632934, acc:0.6656


 24%|██▍       | 2600/10845 [09:37<32:59,  4.16it/s, acc=0.666, epoch=7, loss=1.07]

epoch:7, idx:2599/10845, loss:1.0697251026447003, acc:0.6657692307692308


 25%|██▍       | 2701/10845 [09:59<28:52,  4.70it/s, acc=0.667, epoch=7, loss=1.06]

epoch:7, idx:2699/10845, loss:1.0651642408635882, acc:0.667037037037037


 26%|██▌       | 2801/10845 [10:22<28:44,  4.67it/s, acc=0.665, epoch=7, loss=1.07]

epoch:7, idx:2799/10845, loss:1.0666209072300366, acc:0.6653571428571429


 27%|██▋       | 2901/10845 [10:44<29:25,  4.50it/s, acc=0.665, epoch=7, loss=1.07]

epoch:7, idx:2899/10845, loss:1.0669840531513608, acc:0.6650862068965517


 28%|██▊       | 3000/10845 [11:07<28:56,  4.52it/s, acc=0.664, epoch=7, loss=1.07]

epoch:7, idx:2999/10845, loss:1.0688639343182247, acc:0.664


 28%|██▊       | 3007/10845 [11:08<29:08,  4.48it/s, acc=0.664, epoch=7, loss=1.07]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 44%|████▍     | 4800/10845 [17:48<24:22,  4.13it/s, acc=0.659, epoch=7, loss=1.09]

epoch:7, idx:4799/10845, loss:1.0892770447209477, acc:0.6588541666666666


 45%|████▌     | 4900/10845 [18:10<22:32,  4.40it/s, acc=0.659, epoch=7, loss=1.09]

epoch:7, idx:4899/10845, loss:1.088152384672846, acc:0.6589795918367347


 46%|████▌     | 5001/10845 [18:32<20:13,  4.81it/s, acc=0.659, epoch=7, loss=1.09]

epoch:7, idx:4999/10845, loss:1.0868439276576043, acc:0.65895


 47%|████▋     | 5100/10845 [18:55<23:59,  3.99it/s, acc=0.659, epoch=7, loss=1.09]

epoch:7, idx:5099/10845, loss:1.0865315960552178, acc:0.6590686274509804


 48%|████▊     | 5200/10845 [19:17<21:05,  4.46it/s, acc=0.659, epoch=7, loss=1.09]

epoch:7, idx:5199/10845, loss:1.086769457814785, acc:0.6587019230769231


 49%|████▉     | 5301/10845 [19:40<21:03,  4.39it/s, acc=0.659, epoch=7, loss=1.09]

epoch:7, idx:5299/10845, loss:1.0874672653427664, acc:0.6587735849056604


 50%|████▉     | 5400/10845 [20:02<20:03,  4.52it/s, acc=0.658, epoch=7, loss=1.09]

epoch:7, idx:5399/10845, loss:1.0883430917285106, acc:0.6582407407407408


 51%|█████     | 5500/10845 [20:25<20:42,  4.30it/s, acc=0.658, epoch=7, loss=1.09]

epoch:7, idx:5499/10845, loss:1.088148769996383, acc:0.6580454545454546


 52%|█████▏    | 5601/10845 [20:47<17:57,  4.87it/s, acc=0.659, epoch=7, loss=1.09]

epoch:7, idx:5599/10845, loss:1.0859416660772903, acc:0.6586160714285715


 53%|█████▎    | 5700/10845 [21:09<18:35,  4.61it/s, acc=0.658, epoch=7, loss=1.09]

epoch:7, idx:5699/10845, loss:1.08936210141893, acc:0.6578947368421053


 53%|█████▎    | 5801/10845 [21:32<19:13,  4.37it/s, acc=0.658, epoch=7, loss=1.09]

epoch:7, idx:5799/10845, loss:1.0893442529953759, acc:0.6576724137931035


 54%|█████▍    | 5900/10845 [21:54<17:27,  4.72it/s, acc=0.658, epoch=7, loss=1.09]

epoch:7, idx:5899/10845, loss:1.0899648691537016, acc:0.6575847457627119


 55%|█████▌    | 6000/10845 [22:16<17:54,  4.51it/s, acc=0.658, epoch=7, loss=1.09]

epoch:7, idx:5999/10845, loss:1.0882311840951442, acc:0.658


 56%|█████▌    | 6100/10845 [22:38<17:35,  4.50it/s, acc=0.657, epoch=7, loss=1.09]

epoch:7, idx:6099/10845, loss:1.0914926924177857, acc:0.657172131147541


 57%|█████▋    | 6200/10845 [23:01<17:10,  4.51it/s, acc=0.657, epoch=7, loss=1.09]

epoch:7, idx:6199/10845, loss:1.0926766323947137, acc:0.6571370967741935


 58%|█████▊    | 6250/10845 [23:12<18:12,  4.20it/s, acc=0.657, epoch=7, loss=1.09]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 75%|███████▍  | 8100/10845 [30:03<09:41,  4.72it/s, acc=0.655, epoch=7, loss=1.1] 

epoch:7, idx:8099/10845, loss:1.1036085945956502, acc:0.655216049382716


 76%|███████▌  | 8200/10845 [30:25<10:06,  4.36it/s, acc=0.655, epoch=7, loss=1.1]

epoch:7, idx:8199/10845, loss:1.1037618728018388, acc:0.6553963414634146


 77%|███████▋  | 8301/10845 [30:47<09:17,  4.56it/s, acc=0.655, epoch=7, loss=1.1]

epoch:7, idx:8299/10845, loss:1.1038541338601744, acc:0.6552710843373494


 77%|███████▋  | 8400/10845 [31:09<08:56,  4.56it/s, acc=0.656, epoch=7, loss=1.1]

epoch:7, idx:8399/10845, loss:1.1030281927968775, acc:0.6555654761904762


 78%|███████▊  | 8501/10845 [31:32<08:31,  4.58it/s, acc=0.655, epoch=7, loss=1.1]

epoch:7, idx:8499/10845, loss:1.1032587643020293, acc:0.6554705882352941


 79%|███████▉  | 8600/10845 [31:53<08:15,  4.53it/s, acc=0.656, epoch=7, loss=1.1]

epoch:7, idx:8599/10845, loss:1.1032222392323405, acc:0.6555232558139535


 80%|████████  | 8700/10845 [32:16<07:41,  4.65it/s, acc=0.655, epoch=7, loss=1.1]

epoch:7, idx:8699/10845, loss:1.103908740549252, acc:0.6552011494252874


 81%|████████  | 8800/10845 [32:38<08:06,  4.21it/s, acc=0.655, epoch=7, loss=1.1] 

epoch:7, idx:8799/10845, loss:1.1047860558398745, acc:0.6549147727272727


 82%|████████▏ | 8900/10845 [33:00<07:06,  4.56it/s, acc=0.655, epoch=7, loss=1.11]

epoch:7, idx:8899/10845, loss:1.1061878656202488, acc:0.6547471910112359


 83%|████████▎ | 9000/10845 [33:22<06:44,  4.56it/s, acc=0.655, epoch=7, loss=1.11]

epoch:7, idx:8999/10845, loss:1.1060326441062822, acc:0.6545277777777778


 84%|████████▍ | 9101/10845 [33:45<06:17,  4.63it/s, acc=0.654, epoch=7, loss=1.11]

epoch:7, idx:9099/10845, loss:1.107348104433699, acc:0.6542032967032967


 85%|████████▍ | 9200/10845 [34:07<05:45,  4.77it/s, acc=0.654, epoch=7, loss=1.11]

epoch:7, idx:9199/10845, loss:1.1077552591652975, acc:0.6537771739130435


 86%|████████▌ | 9300/10845 [34:29<05:35,  4.61it/s, acc=0.653, epoch=7, loss=1.11]

epoch:7, idx:9299/10845, loss:1.1098637804228773, acc:0.6532258064516129


 87%|████████▋ | 9401/10845 [34:51<05:19,  4.52it/s, acc=0.653, epoch=7, loss=1.11]

epoch:7, idx:9399/10845, loss:1.1096747123751234, acc:0.6533510638297872


 88%|████████▊ | 9498/10845 [35:13<04:36,  4.86it/s, acc=0.653, epoch=7, loss=1.11]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

  5%|▍         | 501/10845 [01:50<38:20,  4.50it/s, acc=0.671, epoch=8, loss=1.05]

epoch:8, idx:499/10845, loss:1.0471004365086556, acc:0.671


  6%|▌         | 600/10845 [02:12<37:21,  4.57it/s, acc=0.665, epoch=8, loss=1.07]

epoch:8, idx:599/10845, loss:1.0709766120215256, acc:0.6654166666666667


  6%|▋         | 700/10845 [02:34<37:31,  4.51it/s, acc=0.666, epoch=8, loss=1.05]

epoch:8, idx:699/10845, loss:1.0540811472705431, acc:0.6664285714285715


  7%|▋         | 800/10845 [02:56<36:48,  4.55it/s, acc=0.668, epoch=8, loss=1.05]

epoch:8, idx:799/10845, loss:1.0514278816059233, acc:0.6678125


  8%|▊         | 900/10845 [03:18<35:08,  4.72it/s, acc=0.668, epoch=8, loss=1.05]

epoch:8, idx:899/10845, loss:1.0476687397228346, acc:0.6680555555555555


  9%|▉         | 1001/10845 [03:40<35:56,  4.57it/s, acc=0.672, epoch=8, loss=1.03]

epoch:8, idx:999/10845, loss:1.033237333446741, acc:0.67175


 10%|█         | 1100/10845 [04:03<34:32,  4.70it/s, acc=0.672, epoch=8, loss=1.04]

epoch:8, idx:1099/10845, loss:1.0369154299660162, acc:0.6720454545454545


 11%|█         | 1201/10845 [04:24<34:26,  4.67it/s, acc=0.67, epoch=8, loss=1.04] 

epoch:8, idx:1199/10845, loss:1.0414371116707721, acc:0.67


 12%|█▏        | 1300/10845 [04:46<33:51,  4.70it/s, acc=0.67, epoch=8, loss=1.05] 

epoch:8, idx:1299/10845, loss:1.0457607451539772, acc:0.6696153846153846


 13%|█▎        | 1400/10845 [05:09<36:26,  4.32it/s, acc=0.67, epoch=8, loss=1.05] 

epoch:8, idx:1399/10845, loss:1.0505357505381108, acc:0.6694642857142857


 14%|█▍        | 1500/10845 [05:31<32:47,  4.75it/s, acc=0.67, epoch=8, loss=1.05] 

epoch:8, idx:1499/10845, loss:1.0545851468046505, acc:0.67


 15%|█▍        | 1600/10845 [05:53<36:35,  4.21it/s, acc=0.672, epoch=8, loss=1.05]

epoch:8, idx:1599/10845, loss:1.049648238476366, acc:0.67203125


 16%|█▌        | 1700/10845 [06:16<33:44,  4.52it/s, acc=0.671, epoch=8, loss=1.06]

epoch:8, idx:1699/10845, loss:1.0600994926165133, acc:0.6708823529411765


 17%|█▋        | 1801/10845 [06:38<33:00,  4.57it/s, acc=0.67, epoch=8, loss=1.06] 

epoch:8, idx:1799/10845, loss:1.0626509292754862, acc:0.67


 17%|█▋        | 1897/10845 [07:00<33:24,  4.47it/s, acc=0.669, epoch=8, loss=1.06]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 34%|███▍      | 3701/10845 [13:40<26:23,  4.51it/s, acc=0.663, epoch=8, loss=1.08]

epoch:8, idx:3699/10845, loss:1.0787905223385708, acc:0.6626351351351352


 35%|███▌      | 3800/10845 [14:02<24:59,  4.70it/s, acc=0.663, epoch=8, loss=1.08]

epoch:8, idx:3799/10845, loss:1.0793600865806403, acc:0.6626973684210526


 36%|███▌      | 3900/10845 [14:25<24:22,  4.75it/s, acc=0.663, epoch=8, loss=1.08]

epoch:8, idx:3899/10845, loss:1.0788274080401812, acc:0.6626282051282051


 37%|███▋      | 4001/10845 [14:48<23:24,  4.87it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:3999/10845, loss:1.0818425854817033, acc:0.6616875


 38%|███▊      | 4101/10845 [15:10<23:52,  4.71it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:4099/10845, loss:1.0809330464935885, acc:0.6617073170731708


 39%|███▊      | 4200/10845 [15:32<24:09,  4.59it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:4199/10845, loss:1.0798836523365407, acc:0.6623809523809524


 40%|███▉      | 4300/10845 [15:54<24:55,  4.38it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:4299/10845, loss:1.0809610699290453, acc:0.662093023255814


 41%|████      | 4401/10845 [16:17<22:57,  4.68it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:4399/10845, loss:1.0808382856913588, acc:0.6618181818181819


 41%|████▏     | 4500/10845 [16:38<24:00,  4.40it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:4499/10845, loss:1.0788091557621955, acc:0.6619444444444444


 42%|████▏     | 4600/10845 [17:00<22:03,  4.72it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:4599/10845, loss:1.0789989256923613, acc:0.6616847826086957


 43%|████▎     | 4700/10845 [17:23<21:31,  4.76it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:4699/10845, loss:1.0766796710706772, acc:0.6622872340425532


 44%|████▍     | 4800/10845 [17:45<22:26,  4.49it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:4799/10845, loss:1.0779836860857903, acc:0.6619270833333334


 45%|████▌     | 4900/10845 [18:08<21:06,  4.69it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:4899/10845, loss:1.0804605860916936, acc:0.6617857142857143


 46%|████▌     | 5000/10845 [18:30<20:21,  4.79it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:4999/10845, loss:1.0785985165178775, acc:0.6624


 47%|████▋     | 5100/10845 [18:53<19:58,  4.80it/s, acc=0.662, epoch=8, loss=1.08]

epoch:8, idx:5099/10845, loss:1.0783772195145196, acc:0.6622549019607843


 47%|████▋     | 5135/10845 [19:00<20:13,  4.70it/s, acc=0.662, epoch=8, loss=1.08]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 64%|██████▎   | 6901/10845 [25:34<14:06,  4.66it/s, acc=0.658, epoch=8, loss=1.09]

epoch:8, idx:6899/10845, loss:1.0865487241917762, acc:0.6577898550724638


 65%|██████▍   | 7000/10845 [25:56<14:50,  4.32it/s, acc=0.657, epoch=8, loss=1.09]

epoch:8, idx:6999/10845, loss:1.0865212770700454, acc:0.6573214285714286


 65%|██████▌   | 7101/10845 [26:18<13:46,  4.53it/s, acc=0.657, epoch=8, loss=1.09]

epoch:8, idx:7099/10845, loss:1.0873613077570015, acc:0.6567957746478873


 66%|██████▋   | 7200/10845 [26:40<13:17,  4.57it/s, acc=0.656, epoch=8, loss=1.09]

epoch:8, idx:7199/10845, loss:1.0890183466424546, acc:0.65625


 67%|██████▋   | 7300/10845 [27:02<13:25,  4.40it/s, acc=0.657, epoch=8, loss=1.09]

epoch:8, idx:7299/10845, loss:1.0878265466347132, acc:0.6565753424657534


 68%|██████▊   | 7401/10845 [27:25<12:42,  4.52it/s, acc=0.656, epoch=8, loss=1.09]

epoch:8, idx:7399/10845, loss:1.0882781176228782, acc:0.6564864864864864


 69%|██████▉   | 7501/10845 [27:47<11:12,  4.97it/s, acc=0.656, epoch=8, loss=1.09]

epoch:8, idx:7499/10845, loss:1.0886408009767532, acc:0.6563666666666667


 70%|███████   | 7601/10845 [28:09<11:39,  4.64it/s, acc=0.656, epoch=8, loss=1.09]

epoch:8, idx:7599/10845, loss:1.0888966389941541, acc:0.6564144736842106


 71%|███████   | 7701/10845 [28:31<11:31,  4.55it/s, acc=0.657, epoch=8, loss=1.09]

epoch:8, idx:7699/10845, loss:1.0881618324425313, acc:0.656525974025974


 72%|███████▏  | 7801/10845 [28:53<10:35,  4.79it/s, acc=0.656, epoch=8, loss=1.09]

epoch:8, idx:7799/10845, loss:1.089616353076238, acc:0.6557051282051282


 73%|███████▎  | 7901/10845 [29:15<07:07,  6.88it/s, acc=0.656, epoch=8, loss=1.09]

epoch:8, idx:7899/10845, loss:1.0907520106397097, acc:0.6556329113924051


 74%|███████▍  | 8001/10845 [29:37<10:22,  4.57it/s, acc=0.655, epoch=8, loss=1.09]

epoch:8, idx:7999/10845, loss:1.0910323976352811, acc:0.65540625


 75%|███████▍  | 8101/10845 [29:59<10:12,  4.48it/s, acc=0.656, epoch=8, loss=1.09]

epoch:8, idx:8099/10845, loss:1.0917323672403525, acc:0.6555555555555556


 76%|███████▌  | 8200/10845 [30:21<09:49,  4.49it/s, acc=0.655, epoch=8, loss=1.09]

epoch:8, idx:8199/10845, loss:1.0917674355463285, acc:0.6554268292682927


 77%|███████▋  | 8300/10845 [30:43<09:40,  4.38it/s, acc=0.655, epoch=8, loss=1.09]

epoch:8, idx:8299/10845, loss:1.093388880857502, acc:0.6548795180722892


 77%|███████▋  | 8393/10845 [31:04<08:44,  4.67it/s, acc=0.655, epoch=8, loss=1.09]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 91%|█████████▏| 9901/10845 [36:38<03:20,  4.71it/s, acc=0.655, epoch=8, loss=1.1]

epoch:8, idx:9899/10845, loss:1.0968489704288618, acc:0.6548232323232324


 92%|█████████▏| 10000/10845 [37:00<03:07,  4.50it/s, acc=0.655, epoch=8, loss=1.1]

epoch:8, idx:9999/10845, loss:1.0963283340275287, acc:0.655


 93%|█████████▎| 10101/10845 [37:23<02:37,  4.71it/s, acc=0.655, epoch=8, loss=1.1]

epoch:8, idx:10099/10845, loss:1.0962980893283787, acc:0.6548019801980198


 94%|█████████▍| 10200/10845 [37:45<02:18,  4.66it/s, acc=0.655, epoch=8, loss=1.1]

epoch:8, idx:10199/10845, loss:1.0966854214960453, acc:0.6548774509803922


 95%|█████████▍| 10301/10845 [38:07<02:00,  4.52it/s, acc=0.655, epoch=8, loss=1.1]

epoch:8, idx:10299/10845, loss:1.0974197999539885, acc:0.6546844660194174


 96%|█████████▌| 10400/10845 [38:29<01:37,  4.58it/s, acc=0.655, epoch=8, loss=1.1]

epoch:8, idx:10399/10845, loss:1.0977164069964336, acc:0.6546875


 97%|█████████▋| 10500/10845 [38:51<01:18,  4.41it/s, acc=0.654, epoch=8, loss=1.1]

epoch:8, idx:10499/10845, loss:1.0986613052345457, acc:0.6543809523809524


 98%|█████████▊| 10600/10845 [39:13<00:52,  4.64it/s, acc=0.655, epoch=8, loss=1.1]

epoch:8, idx:10599/10845, loss:1.0976296207927307, acc:0.6548820754716981


 99%|█████████▊| 10700/10845 [39:35<00:32,  4.47it/s, acc=0.655, epoch=8, loss=1.1]

epoch:8, idx:10699/10845, loss:1.0969094040572087, acc:0.655


100%|█████████▉| 10800/10845 [39:57<00:10,  4.47it/s, acc=0.655, epoch=8, loss=1.1]

epoch:8, idx:10799/10845, loss:1.0968018849690755, acc:0.6548379629629629


100%|██████████| 10845/10845 [40:07<00:00,  4.66it/s, acc=0.655, epoch=8, loss=1.1]


epoch:8, idx:0/1275, loss:1.7566571235656738, acc:0.5
epoch:8, idx:100/1275, loss:1.3642096661105014, acc:0.6064356435643564
epoch:8, idx:200/1275, loss:1.2534027538489347, acc:0.6131840796019901
epoch:8, idx:300/1275, loss:1.2096650473699222, acc:0.6270764119601329
epoch:8, idx:400/1275, loss:1.2005733765866096, acc:0.6334164588528678
epoch:8, idx:500/1275, loss:1.1812323617364118, acc:0.6387225548902196
epoch:8, idx:600/1275, loss:1.1905935092297648, acc:0.6351913477537438
epoch:8, idx:700/1275, loss:1.1918416288541829, acc:0.6326676176890157
epoch:8, idx:800/1275, loss:1.2119525959726873, acc:0.6282771535580525
epoch:8, idx:900/1275, loss:1.1959413595654724, acc:0.6323529411764706
epoch:8, idx:1000/1275, loss:1.1943512247516201, acc:0.6326173826173827
epoch:8, idx:1100/1275, loss:1.1875011583115165, acc:0.6366939146230699
epoch:8, idx:1200/1275, loss:1.1852852622237828, acc:0.6328059950041632


  1%|          | 100/10845 [00:22<40:24,  4.43it/s, acc=0.676, epoch=9, loss=1.01]

epoch:9, idx:99/10845, loss:1.0050785517692566, acc:0.675


  2%|▏         | 200/10845 [00:44<41:14,  4.30it/s, acc=0.67, epoch=9, loss=1.09] 

epoch:9, idx:199/10845, loss:1.094494292140007, acc:0.67


  3%|▎         | 300/10845 [01:06<35:54,  4.89it/s, acc=0.679, epoch=9, loss=1.07]

epoch:9, idx:299/10845, loss:1.0733740111192067, acc:0.6791666666666667


  4%|▎         | 401/10845 [01:29<38:02,  4.58it/s, acc=0.695, epoch=9, loss=1.02]

epoch:9, idx:399/10845, loss:1.0155757892131805, acc:0.695625


  5%|▍         | 500/10845 [01:50<40:51,  4.22it/s, acc=0.692, epoch=9, loss=1]   

epoch:9, idx:499/10845, loss:1.0021399247646332, acc:0.6915


  5%|▍         | 512/10845 [01:53<38:00,  4.53it/s, acc=0.691, epoch=9, loss=0.999]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 22%|██▏       | 2401/10845 [08:52<30:18,  4.64it/s, acc=0.669, epoch=9, loss=1.05]

epoch:9, idx:2399/10845, loss:1.0536202319463095, acc:0.669375


 23%|██▎       | 2500/10845 [09:14<28:49,  4.82it/s, acc=0.669, epoch=9, loss=1.05]

epoch:9, idx:2499/10845, loss:1.0520657205104829, acc:0.6695


 24%|██▍       | 2601/10845 [09:36<28:41,  4.79it/s, acc=0.67, epoch=9, loss=1.05] 

epoch:9, idx:2599/10845, loss:1.0538616175376452, acc:0.6698076923076923


 25%|██▍       | 2700/10845 [09:58<29:45,  4.56it/s, acc=0.671, epoch=9, loss=1.05]

epoch:9, idx:2699/10845, loss:1.0492074259122213, acc:0.6706481481481481


 26%|██▌       | 2801/10845 [10:20<28:21,  4.73it/s, acc=0.671, epoch=9, loss=1.05]

epoch:9, idx:2799/10845, loss:1.0511821007302828, acc:0.67125


 27%|██▋       | 2900/10845 [10:42<31:06,  4.26it/s, acc=0.672, epoch=9, loss=1.05]

epoch:9, idx:2899/10845, loss:1.0484548926353454, acc:0.6724137931034483


 28%|██▊       | 3001/10845 [11:04<27:14,  4.80it/s, acc=0.671, epoch=9, loss=1.05]

epoch:9, idx:2999/10845, loss:1.0500035809278487, acc:0.6715


 29%|██▊       | 3100/10845 [11:26<30:07,  4.28it/s, acc=0.672, epoch=9, loss=1.05]

epoch:9, idx:3099/10845, loss:1.0480876438463889, acc:0.672258064516129


 30%|██▉       | 3200/10845 [11:48<28:14,  4.51it/s, acc=0.673, epoch=9, loss=1.05]

epoch:9, idx:3199/10845, loss:1.046194406822324, acc:0.672890625


 30%|███       | 3301/10845 [12:10<27:28,  4.58it/s, acc=0.673, epoch=9, loss=1.05]

epoch:9, idx:3299/10845, loss:1.04547760515502, acc:0.673030303030303


 31%|███▏      | 3400/10845 [12:32<25:25,  4.88it/s, acc=0.672, epoch=9, loss=1.05]

epoch:9, idx:3399/10845, loss:1.0488882635270849, acc:0.6719852941176471


 32%|███▏      | 3500/10845 [12:54<27:42,  4.42it/s, acc=0.672, epoch=9, loss=1.05]

epoch:9, idx:3499/10845, loss:1.0497657527242388, acc:0.6724285714285714


 33%|███▎      | 3601/10845 [13:16<26:06,  4.63it/s, acc=0.673, epoch=9, loss=1.05]

epoch:9, idx:3599/10845, loss:1.0471205969320403, acc:0.6729861111111111


 34%|███▍      | 3700/10845 [13:38<27:00,  4.41it/s, acc=0.672, epoch=9, loss=1.05]

epoch:9, idx:3699/10845, loss:1.051359725256224, acc:0.6724324324324324


 35%|███▌      | 3801/10845 [14:01<25:10,  4.66it/s, acc=0.673, epoch=9, loss=1.05]

epoch:9, idx:3799/10845, loss:1.0506211192356913, acc:0.6729605263157895


 35%|███▌      | 3807/10845 [14:02<27:14,  4.30it/s, acc=0.673, epoch=9, loss=1.05]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 53%|█████▎    | 5701/10845 [21:01<19:31,  4.39it/s, acc=0.667, epoch=9, loss=1.07]

epoch:9, idx:5699/10845, loss:1.0655855177578173, acc:0.6667543859649123


 53%|█████▎    | 5801/10845 [21:23<18:14,  4.61it/s, acc=0.667, epoch=9, loss=1.06]

epoch:9, idx:5799/10845, loss:1.0644211842890443, acc:0.6667241379310345


 54%|█████▍    | 5901/10845 [21:45<17:24,  4.74it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:5899/10845, loss:1.0653397598913161, acc:0.6660593220338983


 55%|█████▌    | 6000/10845 [22:07<17:27,  4.63it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:5999/10845, loss:1.0665668983856837, acc:0.6659166666666667


 56%|█████▌    | 6100/10845 [22:30<15:54,  4.97it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:6099/10845, loss:1.0663178379222995, acc:0.6659016393442623


 57%|█████▋    | 6200/10845 [22:52<16:30,  4.69it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:6199/10845, loss:1.0666514662004287, acc:0.6661693548387096


 58%|█████▊    | 6301/10845 [23:14<16:47,  4.51it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:6299/10845, loss:1.0686099429168399, acc:0.665952380952381


 59%|█████▉    | 6401/10845 [23:36<16:11,  4.57it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:6399/10845, loss:1.0681987524218857, acc:0.66609375


 60%|█████▉    | 6500/10845 [23:58<15:13,  4.76it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:6499/10845, loss:1.069042878646117, acc:0.6661153846153847


 61%|██████    | 6601/10845 [24:21<15:53,  4.45it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:6599/10845, loss:1.0687280292583234, acc:0.6661742424242424


 62%|██████▏   | 6701/10845 [24:43<15:27,  4.47it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:6699/10845, loss:1.0699220375338596, acc:0.6657462686567164


 63%|██████▎   | 6800/10845 [25:06<15:02,  4.48it/s, acc=0.665, epoch=9, loss=1.07]

epoch:9, idx:6799/10845, loss:1.070054202868658, acc:0.6652573529411765


 64%|██████▎   | 6900/10845 [25:28<15:03,  4.36it/s, acc=0.665, epoch=9, loss=1.07]

epoch:9, idx:6899/10845, loss:1.0698496194680531, acc:0.6653623188405797


 65%|██████▍   | 7000/10845 [25:50<13:43,  4.67it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:6999/10845, loss:1.0682586817485946, acc:0.6656071428571428


 65%|██████▌   | 7100/10845 [26:12<13:36,  4.59it/s, acc=0.666, epoch=9, loss=1.07]

epoch:9, idx:7099/10845, loss:1.0674494565120884, acc:0.6658450704225352


 66%|██████▌   | 7113/10845 [26:16<13:50,  4.49it/s, acc=0.666, epoch=9, loss=1.07]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 83%|████████▎ | 9001/10845 [33:12<06:23,  4.81it/s, acc=0.665, epoch=9, loss=1.07]

epoch:9, idx:8999/10845, loss:1.069877776649263, acc:0.6646666666666666


 84%|████████▍ | 9100/10845 [33:34<07:00,  4.15it/s, acc=0.665, epoch=9, loss=1.07]

epoch:9, idx:9099/10845, loss:1.0695839141489385, acc:0.664945054945055


 85%|████████▍ | 9201/10845 [33:57<05:53,  4.66it/s, acc=0.665, epoch=9, loss=1.07]

epoch:9, idx:9199/10845, loss:1.0701636325146842, acc:0.6646195652173913


 86%|████████▌ | 9300/10845 [34:19<05:26,  4.73it/s, acc=0.665, epoch=9, loss=1.07]

epoch:9, idx:9299/10845, loss:1.0705656949422693, acc:0.6645430107526882


 87%|████████▋ | 9401/10845 [34:42<05:02,  4.77it/s, acc=0.665, epoch=9, loss=1.07]

epoch:9, idx:9399/10845, loss:1.0706134239917107, acc:0.6646010638297872


 88%|████████▊ | 9501/10845 [35:04<04:49,  4.65it/s, acc=0.664, epoch=9, loss=1.07]

epoch:9, idx:9499/10845, loss:1.0714285725982566, acc:0.6641842105263158


 89%|████████▊ | 9600/10845 [35:26<04:37,  4.49it/s, acc=0.664, epoch=9, loss=1.07]

epoch:9, idx:9599/10845, loss:1.0718930394823352, acc:0.6643229166666667


 89%|████████▉ | 9700/10845 [35:48<04:11,  4.56it/s, acc=0.664, epoch=9, loss=1.07]

epoch:9, idx:9699/10845, loss:1.0719997026379575, acc:0.6640463917525773


 90%|█████████ | 9800/10845 [36:11<03:53,  4.48it/s, acc=0.664, epoch=9, loss=1.07]

epoch:9, idx:9799/10845, loss:1.0725633708798155, acc:0.6642091836734694


 91%|█████████▏| 9900/10845 [36:33<03:26,  4.57it/s, acc=0.664, epoch=9, loss=1.07]

epoch:9, idx:9899/10845, loss:1.0732362494685432, acc:0.6639646464646465


 92%|█████████▏| 10000/10845 [36:55<03:17,  4.28it/s, acc=0.663, epoch=9, loss=1.07]

epoch:9, idx:9999/10845, loss:1.0748692757368088, acc:0.663375


 93%|█████████▎| 10100/10845 [37:18<02:41,  4.61it/s, acc=0.663, epoch=9, loss=1.08]

epoch:9, idx:10099/10845, loss:1.0759777845958671, acc:0.6627722772277228


 94%|█████████▍| 10200/10845 [37:40<02:28,  4.34it/s, acc=0.663, epoch=9, loss=1.08]

epoch:9, idx:10199/10845, loss:1.0762511920695212, acc:0.6627450980392157


 95%|█████████▍| 10301/10845 [38:03<02:03,  4.39it/s, acc=0.663, epoch=9, loss=1.08]

epoch:9, idx:10299/10845, loss:1.0767683805308295, acc:0.6625485436893204


 96%|█████████▌| 10401/10845 [38:25<01:31,  4.86it/s, acc=0.663, epoch=9, loss=1.08]

epoch:9, idx:10399/10845, loss:1.0780691335063715, acc:0.6625961538461539


 96%|█████████▌| 10429/10845 [38:31<01:36,  4.32it/s, acc=0.663, epoch=9, loss=1.08]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 11%|█         | 1200/10845 [04:24<33:14,  4.83it/s, acc=0.668, epoch=10, loss=1.05]

epoch:10, idx:1199/10845, loss:1.045608966698249, acc:0.6683333333333333


 12%|█▏        | 1301/10845 [04:46<36:09,  4.40it/s, acc=0.67, epoch=10, loss=1.04] 

epoch:10, idx:1299/10845, loss:1.0370194471340912, acc:0.6698076923076923


 13%|█▎        | 1400/10845 [05:08<34:42,  4.53it/s, acc=0.671, epoch=10, loss=1.03]

epoch:10, idx:1399/10845, loss:1.0347275745442936, acc:0.6707142857142857


 14%|█▍        | 1501/10845 [05:30<33:38,  4.63it/s, acc=0.669, epoch=10, loss=1.05]

epoch:10, idx:1499/10845, loss:1.045906259338061, acc:0.6688333333333333


 15%|█▍        | 1600/10845 [05:52<33:42,  4.57it/s, acc=0.67, epoch=10, loss=1.04] 

epoch:10, idx:1599/10845, loss:1.0437377955392002, acc:0.6703125


 16%|█▌        | 1701/10845 [06:14<31:02,  4.91it/s, acc=0.671, epoch=10, loss=1.04]

epoch:10, idx:1699/10845, loss:1.0444591580769595, acc:0.6707352941176471


 17%|█▋        | 1800/10845 [06:36<32:20,  4.66it/s, acc=0.672, epoch=10, loss=1.04]

epoch:10, idx:1799/10845, loss:1.042490120894379, acc:0.6720833333333334


 18%|█▊        | 1900/10845 [06:58<32:11,  4.63it/s, acc=0.672, epoch=10, loss=1.05]

epoch:10, idx:1899/10845, loss:1.045648404014738, acc:0.6725


 18%|█▊        | 2001/10845 [07:21<31:17,  4.71it/s, acc=0.674, epoch=10, loss=1.04]

epoch:10, idx:1999/10845, loss:1.0434307881295681, acc:0.674


 19%|█▉        | 2100/10845 [07:43<32:34,  4.47it/s, acc=0.672, epoch=10, loss=1.05]

epoch:10, idx:2099/10845, loss:1.049187647388095, acc:0.6716666666666666


 20%|██        | 2200/10845 [08:05<33:20,  4.32it/s, acc=0.67, epoch=10, loss=1.05] 

epoch:10, idx:2199/10845, loss:1.049569355249405, acc:0.6704545454545454


 21%|██        | 2300/10845 [08:27<34:30,  4.13it/s, acc=0.671, epoch=10, loss=1.05]

epoch:10, idx:2299/10845, loss:1.0490410727003345, acc:0.6710869565217391


 22%|██▏       | 2401/10845 [08:49<30:47,  4.57it/s, acc=0.67, epoch=10, loss=1.05] 

epoch:10, idx:2399/10845, loss:1.0494124750047922, acc:0.6702083333333333


 23%|██▎       | 2501/10845 [09:12<31:06,  4.47it/s, acc=0.67, epoch=10, loss=1.05] 

epoch:10, idx:2499/10845, loss:1.054625195288658, acc:0.6697


 24%|██▍       | 2600/10845 [09:34<28:52,  4.76it/s, acc=0.67, epoch=10, loss=1.06] 

epoch:10, idx:2599/10845, loss:1.0568071112953699, acc:0.6698076923076923


 25%|██▍       | 2691/10845 [09:54<28:40,  4.74it/s, acc=0.67, epoch=10, loss=1.06] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 42%|████▏     | 4601/10845 [16:56<21:15,  4.89it/s, acc=0.668, epoch=10, loss=1.07]

epoch:10, idx:4599/10845, loss:1.0665069866309995, acc:0.6675


 43%|████▎     | 4701/10845 [17:18<21:18,  4.80it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:4699/10845, loss:1.0677720213570494, acc:0.6667553191489362


 44%|████▍     | 4800/10845 [17:40<23:03,  4.37it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:4799/10845, loss:1.0671238051975767, acc:0.66671875


 45%|████▌     | 4900/10845 [18:02<20:45,  4.77it/s, acc=0.666, epoch=10, loss=1.07]

epoch:10, idx:4899/10845, loss:1.068383374250665, acc:0.6661734693877551


 46%|████▌     | 5000/10845 [18:25<22:24,  4.35it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:4999/10845, loss:1.0682832046389579, acc:0.66675


 47%|████▋     | 5100/10845 [18:47<21:12,  4.52it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:5099/10845, loss:1.0688695718377244, acc:0.6668627450980392


 48%|████▊     | 5200/10845 [19:09<19:39,  4.78it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:5199/10845, loss:1.0679003791740307, acc:0.6670192307692308


 49%|████▉     | 5300/10845 [19:31<19:20,  4.78it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:5299/10845, loss:1.0672466135362408, acc:0.6671698113207547


 50%|████▉     | 5400/10845 [19:53<21:14,  4.27it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:5399/10845, loss:1.0684037892134102, acc:0.6665740740740741


 51%|█████     | 5500/10845 [20:16<20:26,  4.36it/s, acc=0.666, epoch=10, loss=1.07]

epoch:10, idx:5499/10845, loss:1.070834132823077, acc:0.6661363636363636


 52%|█████▏    | 5601/10845 [20:38<19:31,  4.48it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:5599/10845, loss:1.070124769423689, acc:0.6665178571428572


 53%|█████▎    | 5700/10845 [21:00<19:21,  4.43it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:5699/10845, loss:1.070523007208841, acc:0.666578947368421


 53%|█████▎    | 5801/10845 [21:22<17:56,  4.69it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:5799/10845, loss:1.0687896262160663, acc:0.6671551724137931


 54%|█████▍    | 5900/10845 [21:44<17:44,  4.65it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:5899/10845, loss:1.0678050571579045, acc:0.6673728813559322


 55%|█████▌    | 6000/10845 [22:07<17:41,  4.56it/s, acc=0.667, epoch=10, loss=1.07]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 72%|███████▏  | 7800/10845 [28:45<11:24,  4.45it/s, acc=0.668, epoch=10, loss=1.07]

epoch:10, idx:7799/10845, loss:1.0712608678677142, acc:0.6675320512820513


 73%|███████▎  | 7900/10845 [29:07<10:11,  4.81it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:7899/10845, loss:1.0731230927117263, acc:0.666993670886076


 74%|███████▍  | 8000/10845 [29:29<10:52,  4.36it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:7999/10845, loss:1.0731355156898499, acc:0.666875


 75%|███████▍  | 8100/10845 [29:51<10:13,  4.47it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:8099/10845, loss:1.0724166909853616, acc:0.667037037037037


 76%|███████▌  | 8201/10845 [30:14<09:27,  4.66it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:8199/10845, loss:1.0729126456743334, acc:0.6670121951219512


 77%|███████▋  | 8301/10845 [30:36<09:29,  4.47it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:8299/10845, loss:1.072430864012385, acc:0.6668975903614458


 77%|███████▋  | 8400/10845 [30:58<08:49,  4.62it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:8399/10845, loss:1.0729245753373418, acc:0.6669047619047619


 78%|███████▊  | 8501/10845 [31:21<08:24,  4.65it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:8499/10845, loss:1.0724590289873235, acc:0.6667941176470589


 79%|███████▉  | 8601/10845 [31:43<07:53,  4.74it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:8599/10845, loss:1.0718344881506854, acc:0.6668895348837209


 80%|████████  | 8701/10845 [32:05<07:33,  4.73it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:8699/10845, loss:1.0728176824251812, acc:0.6667241379310345


 81%|████████  | 8800/10845 [32:27<07:36,  4.48it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:8799/10845, loss:1.0735392207855528, acc:0.6665340909090909


 82%|████████▏ | 8900/10845 [32:49<07:37,  4.25it/s, acc=0.666, epoch=10, loss=1.07]

epoch:10, idx:8899/10845, loss:1.0732054323091935, acc:0.6663764044943821


 83%|████████▎ | 9000/10845 [33:12<06:47,  4.53it/s, acc=0.667, epoch=10, loss=1.07]

epoch:10, idx:8999/10845, loss:1.0726741954949166, acc:0.6666388888888889


 84%|████████▍ | 9100/10845 [33:34<06:28,  4.49it/s, acc=0.666, epoch=10, loss=1.07]

epoch:10, idx:9099/10845, loss:1.0738911809698566, acc:0.6664835164835164


 85%|████████▍ | 9201/10845 [33:56<06:04,  4.51it/s, acc=0.666, epoch=10, loss=1.07]

epoch:10, idx:9199/10845, loss:1.0743800635376701, acc:0.6660054347826087


 86%|████████▌ | 9289/10845 [34:16<06:07,  4.23it/s, acc=0.666, epoch=10, loss=1.08]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

  3%|▎         | 300/10845 [01:06<41:07,  4.27it/s, acc=0.673, epoch=11, loss=1.05]

epoch:11, idx:299/10845, loss:1.0548685359954835, acc:0.6733333333333333


  4%|▎         | 401/10845 [01:28<37:26,  4.65it/s, acc=0.673, epoch=11, loss=1.05]

epoch:11, idx:399/10845, loss:1.0496203196048737, acc:0.6725


  5%|▍         | 500/10845 [01:50<38:57,  4.43it/s, acc=0.676, epoch=11, loss=1.04]

epoch:11, idx:499/10845, loss:1.0358793164491653, acc:0.6765


  6%|▌         | 600/10845 [02:12<39:57,  4.27it/s, acc=0.68, epoch=11, loss=1.03] 

epoch:11, idx:599/10845, loss:1.0320558499296506, acc:0.68


  6%|▋         | 700/10845 [02:34<37:44,  4.48it/s, acc=0.678, epoch=11, loss=1.04]

epoch:11, idx:699/10845, loss:1.0394519973652703, acc:0.6782142857142858


  7%|▋         | 800/10845 [02:57<39:52,  4.20it/s, acc=0.675, epoch=11, loss=1.05]

epoch:11, idx:799/10845, loss:1.051618608906865, acc:0.6746875


  8%|▊         | 900/10845 [03:19<36:29,  4.54it/s, acc=0.678, epoch=11, loss=1.05]

epoch:11, idx:899/10845, loss:1.046761438647906, acc:0.6777777777777778


  9%|▉         | 1000/10845 [03:41<38:48,  4.23it/s, acc=0.674, epoch=11, loss=1.05]

epoch:11, idx:999/10845, loss:1.0494058013558387, acc:0.67375


 10%|█         | 1101/10845 [04:04<35:21,  4.59it/s, acc=0.675, epoch=11, loss=1.05]

epoch:11, idx:1099/10845, loss:1.0503227026354183, acc:0.675


 11%|█         | 1200/10845 [04:26<36:33,  4.40it/s, acc=0.675, epoch=11, loss=1.05]

epoch:11, idx:1199/10845, loss:1.050651590526104, acc:0.675


 12%|█▏        | 1300/10845 [04:48<35:50,  4.44it/s, acc=0.677, epoch=11, loss=1.05]

epoch:11, idx:1299/10845, loss:1.0473815334760226, acc:0.676923076923077


 13%|█▎        | 1401/10845 [05:10<33:32,  4.69it/s, acc=0.675, epoch=11, loss=1.05]

epoch:11, idx:1399/10845, loss:1.049919392807143, acc:0.6748214285714286


 17%|█▋        | 1800/10845 [06:38<33:52,  4.45it/s, acc=0.685, epoch=15, loss=1.01]

epoch:15, idx:1799/10845, loss:1.0145844476090538, acc:0.685


 18%|█▊        | 1900/10845 [07:00<33:40,  4.43it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:1899/10845, loss:1.00937592155055, acc:0.6864473684210526


 18%|█▊        | 2000/10845 [07:23<31:54,  4.62it/s, acc=0.685, epoch=15, loss=1.01]

epoch:15, idx:1999/10845, loss:1.0138509422540665, acc:0.685375


 19%|█▉        | 2100/10845 [07:45<32:09,  4.53it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:2099/10845, loss:1.009569372903733, acc:0.6855952380952381


 20%|██        | 2201/10845 [08:07<31:15,  4.61it/s, acc=0.687, epoch=15, loss=1]   

epoch:15, idx:2199/10845, loss:1.0046996332840485, acc:0.6867045454545454


 21%|██        | 2300/10845 [08:29<32:55,  4.33it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:2299/10845, loss:1.0080419870822326, acc:0.6861956521739131


 22%|██▏       | 2400/10845 [08:51<30:12,  4.66it/s, acc=0.685, epoch=15, loss=1.01]

epoch:15, idx:2399/10845, loss:1.011820613120993, acc:0.6851041666666666


 23%|██▎       | 2500/10845 [09:13<29:52,  4.66it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:2499/10845, loss:1.0085887471914292, acc:0.6859


 24%|██▍       | 2600/10845 [09:35<30:42,  4.47it/s, acc=0.687, epoch=15, loss=1.01]

epoch:15, idx:2599/10845, loss:1.0088143911499243, acc:0.6865384615384615


 25%|██▍       | 2700/10845 [09:58<28:44,  4.72it/s, acc=0.688, epoch=15, loss=1]   

epoch:15, idx:2699/10845, loss:1.0041136763051703, acc:0.6876851851851852


 26%|██▌       | 2800/10845 [10:20<28:00,  4.79it/s, acc=0.687, epoch=15, loss=1.01]

epoch:15, idx:2799/10845, loss:1.0088248610283648, acc:0.6866964285714285


 27%|██▋       | 2900/10845 [10:42<29:42,  4.46it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:2899/10845, loss:1.0131159546251955, acc:0.6857758620689656


 28%|██▊       | 3001/10845 [11:04<27:56,  4.68it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:2999/10845, loss:1.0098436618447304, acc:0.6856666666666666


 29%|██▊       | 3101/10845 [11:26<28:27,  4.54it/s, acc=0.684, epoch=15, loss=1.01]

epoch:15, idx:3099/10845, loss:1.0141987414321592, acc:0.6840322580645162


 30%|██▉       | 3200/10845 [11:48<27:35,  4.62it/s, acc=0.684, epoch=15, loss=1.02]

epoch:15, idx:3199/10845, loss:1.0164293457753957, acc:0.683828125


 30%|███       | 3301/10845 [12:10<26:09,  4.81it/s, acc=0.684, epoch=15, loss=1.01]

epoch:15, idx:3299/10845, loss:1.015189917141741, acc:0.6840909090909091


 31%|███▏      | 3400/10845 [12:32<26:56,  4.60it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:3399/10845, loss:1.0152014862965135, acc:0.6845588235294118


 32%|███▏      | 3500/10845 [12:54<26:34,  4.61it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:3499/10845, loss:1.0103514560461044, acc:0.6860714285714286


 33%|███▎      | 3600/10845 [13:16<28:13,  4.28it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:3599/10845, loss:1.0103697919680013, acc:0.6861805555555556


 34%|███▍      | 3700/10845 [13:38<26:55,  4.42it/s, acc=0.685, epoch=15, loss=1.01]

epoch:15, idx:3699/10845, loss:1.0123260048756728, acc:0.6852702702702703


 35%|███▌      | 3800/10845 [14:01<26:21,  4.45it/s, acc=0.684, epoch=15, loss=1.02]

epoch:15, idx:3799/10845, loss:1.0174707312176101, acc:0.6841447368421053


 36%|███▌      | 3900/10845 [14:23<24:35,  4.71it/s, acc=0.685, epoch=15, loss=1.01]

epoch:15, idx:3899/10845, loss:1.0129718459722323, acc:0.6852564102564103


 37%|███▋      | 4001/10845 [14:45<23:16,  4.90it/s, acc=0.685, epoch=15, loss=1.01]

epoch:15, idx:3999/10845, loss:1.012371722176671, acc:0.68525


 38%|███▊      | 4100/10845 [15:07<24:44,  4.54it/s, acc=0.684, epoch=15, loss=1.02]

epoch:15, idx:4099/10845, loss:1.0154028041042933, acc:0.6840243902439025


 39%|███▊      | 4201/10845 [15:30<24:20,  4.55it/s, acc=0.685, epoch=15, loss=1.01]

epoch:15, idx:4199/10845, loss:1.0136210193094752, acc:0.6851785714285714


 40%|███▉      | 4301/10845 [15:52<24:07,  4.52it/s, acc=0.684, epoch=15, loss=1.02]

epoch:15, idx:4299/10845, loss:1.0150646722178127, acc:0.6845348837209302


 41%|████      | 4400/10845 [16:15<23:32,  4.56it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:4399/10845, loss:1.0154320865192197, acc:0.6849431818181818


 42%|████▏     | 4501/10845 [16:37<21:55,  4.82it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:4499/10845, loss:1.014442147956954, acc:0.6854444444444444


 42%|████▏     | 4600/10845 [16:59<24:02,  4.33it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:4599/10845, loss:1.014105456639891, acc:0.6855978260869565


 43%|████▎     | 4701/10845 [17:21<22:15,  4.60it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:4699/10845, loss:1.0140363200547848, acc:0.6859042553191489


 44%|████▍     | 4801/10845 [17:44<22:27,  4.49it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:4799/10845, loss:1.014060489796102, acc:0.6861458333333333


 45%|████▌     | 4901/10845 [18:06<21:59,  4.50it/s, acc=0.686, epoch=15, loss=1.02]

epoch:15, idx:4899/10845, loss:1.015330207700632, acc:0.6856632653061224


 46%|████▌     | 5000/10845 [18:29<20:23,  4.78it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:4999/10845, loss:1.016156606042385, acc:0.68525


 47%|████▋     | 5101/10845 [18:51<20:22,  4.70it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:5099/10845, loss:1.0181071508748858, acc:0.685


 48%|████▊     | 5200/10845 [19:13<20:26,  4.60it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:5199/10845, loss:1.016891605819647, acc:0.6853846153846154


 49%|████▉     | 5300/10845 [19:35<20:39,  4.47it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:5299/10845, loss:1.0168849210356767, acc:0.6849056603773584


 50%|████▉     | 5401/10845 [19:58<19:59,  4.54it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:5399/10845, loss:1.0162141149242718, acc:0.6850462962962963


 51%|█████     | 5500/10845 [20:20<19:06,  4.66it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:5499/10845, loss:1.0173516707311978, acc:0.6847727272727273


 52%|█████▏    | 5600/10845 [20:43<20:08,  4.34it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:5599/10845, loss:1.0168368502706289, acc:0.6847767857142857


 53%|█████▎    | 5700/10845 [21:05<19:32,  4.39it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:5699/10845, loss:1.016143331977359, acc:0.6848684210526316


 53%|█████▎    | 5801/10845 [21:27<18:22,  4.57it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:5799/10845, loss:1.0156587561759456, acc:0.6850431034482759


 54%|█████▍    | 5901/10845 [21:50<17:54,  4.60it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:5899/10845, loss:1.015974826580387, acc:0.6851271186440678


 55%|█████▌    | 6000/10845 [22:12<17:39,  4.57it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:5999/10845, loss:1.0137656843960285, acc:0.6855416666666667


 56%|█████▌    | 6100/10845 [22:33<18:20,  4.31it/s, acc=0.686, epoch=15, loss=1.01]

epoch:15, idx:6099/10845, loss:1.0133301360587605, acc:0.6857377049180328


 57%|█████▋    | 6200/10845 [22:56<17:44,  4.36it/s, acc=0.685, epoch=15, loss=1.01]

epoch:15, idx:6199/10845, loss:1.0146614728916077, acc:0.6854838709677419


 58%|█████▊    | 6300/10845 [23:18<16:22,  4.63it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:6299/10845, loss:1.0166479167011049, acc:0.6849603174603175


 59%|█████▉    | 6400/10845 [23:40<17:36,  4.21it/s, acc=0.685, epoch=15, loss=1.02]

epoch:15, idx:6399/10845, loss:1.0178184087295086, acc:0.6848828125


 60%|█████▉    | 6500/10845 [24:03<16:06,  4.50it/s, acc=0.684, epoch=15, loss=1.02]

epoch:15, idx:6499/10845, loss:1.0199411442371515, acc:0.6844615384615385


 61%|██████    | 6601/10845 [24:25<15:16,  4.63it/s, acc=0.684, epoch=15, loss=1.02]

epoch:15, idx:6599/10845, loss:1.0223297675902194, acc:0.6838636363636363


 62%|██████▏   | 6700/10845 [24:47<15:18,  4.51it/s, acc=0.684, epoch=15, loss=1.02]

epoch:15, idx:6699/10845, loss:1.0231351070172752, acc:0.683768656716418


 63%|██████▎   | 6800/10845 [25:09<15:35,  4.32it/s, acc=0.684, epoch=15, loss=1.02]

epoch:15, idx:6799/10845, loss:1.0235164361403268, acc:0.6835661764705883


 64%|██████▎   | 6900/10845 [25:31<14:38,  4.49it/s, acc=0.683, epoch=15, loss=1.02]

epoch:15, idx:6899/10845, loss:1.0238494017245112, acc:0.6831884057971015


 65%|██████▍   | 7000/10845 [25:54<14:49,  4.32it/s, acc=0.683, epoch=15, loss=1.02]

epoch:15, idx:6999/10845, loss:1.024256533733436, acc:0.6832857142857143


 65%|██████▌   | 7100/10845 [26:16<14:43,  4.24it/s, acc=0.684, epoch=15, loss=1.02]

epoch:15, idx:7099/10845, loss:1.0229871397371024, acc:0.6837323943661971


 66%|██████▋   | 7201/10845 [26:39<12:51,  4.72it/s, acc=0.683, epoch=15, loss=1.02]

epoch:15, idx:7199/10845, loss:1.0239218838098976, acc:0.6834375


 67%|██████▋   | 7300/10845 [27:01<13:07,  4.50it/s, acc=0.683, epoch=15, loss=1.02]

epoch:15, idx:7299/10845, loss:1.0242906531487426, acc:0.6831849315068493


 68%|██████▊   | 7401/10845 [27:23<12:52,  4.46it/s, acc=0.684, epoch=15, loss=1.02]

epoch:15, idx:7399/10845, loss:1.0242429290671606, acc:0.6835810810810811


 69%|██████▉   | 7500/10845 [27:45<11:53,  4.69it/s, acc=0.683, epoch=15, loss=1.03]

epoch:15, idx:7499/10845, loss:1.0252398346980414, acc:0.6832


 70%|███████   | 7601/10845 [28:07<11:18,  4.78it/s, acc=0.683, epoch=15, loss=1.02]

epoch:15, idx:7599/10845, loss:1.0245471703927769, acc:0.6830592105263158


 71%|███████   | 7700/10845 [28:29<11:29,  4.56it/s, acc=0.683, epoch=15, loss=1.02]

epoch:15, idx:7699/10845, loss:1.0248413621063357, acc:0.6827922077922078


 72%|███████▏  | 7800/10845 [28:52<11:35,  4.38it/s, acc=0.683, epoch=15, loss=1.03]

epoch:15, idx:7799/10845, loss:1.0250230354911243, acc:0.682724358974359


 73%|███████▎  | 7900/10845 [29:14<10:26,  4.70it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:7899/10845, loss:1.0256406375199933, acc:0.6819620253164557


 74%|███████▍  | 8001/10845 [29:36<10:27,  4.54it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:7999/10845, loss:1.0270008819922805, acc:0.68171875


 75%|███████▍  | 8100/10845 [29:59<10:13,  4.47it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:8099/10845, loss:1.0261772611626871, acc:0.6819444444444445


 76%|███████▌  | 8200/10845 [30:21<09:51,  4.47it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:8199/10845, loss:1.0250867993002986, acc:0.6821646341463414


 77%|███████▋  | 8300/10845 [30:44<09:18,  4.56it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:8299/10845, loss:1.0253002357267471, acc:0.6821686746987952


 77%|███████▋  | 8401/10845 [31:06<08:40,  4.70it/s, acc=0.683, epoch=15, loss=1.02]

epoch:15, idx:8399/10845, loss:1.0245056777128152, acc:0.6825297619047619


 78%|███████▊  | 8501/10845 [31:27<05:34,  7.00it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:8499/10845, loss:1.0252968100449618, acc:0.6821764705882353


 79%|███████▉  | 8601/10845 [31:44<08:11,  4.56it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:8599/10845, loss:1.0255212711248287, acc:0.681889534883721


 80%|████████  | 8700/10845 [32:06<07:52,  4.54it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:8699/10845, loss:1.0255527669465405, acc:0.6820114942528736


 81%|████████  | 8800/10845 [32:28<07:11,  4.74it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:8799/10845, loss:1.0252120826935227, acc:0.6819318181818181


 82%|████████▏ | 8900/10845 [32:50<07:08,  4.54it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:8899/10845, loss:1.0273915158028013, acc:0.6814044943820224


 83%|████████▎ | 9000/10845 [33:12<07:02,  4.37it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:8999/10845, loss:1.027282418999407, acc:0.6813611111111111


 84%|████████▍ | 9100/10845 [33:34<06:58,  4.17it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:9099/10845, loss:1.0283155862905167, acc:0.6810439560439561


 85%|████████▍ | 9200/10845 [33:56<06:13,  4.41it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:9199/10845, loss:1.0280331981506037, acc:0.6813586956521739


 86%|████████▌ | 9300/10845 [34:18<05:27,  4.72it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:9299/10845, loss:1.0276244427119532, acc:0.6811827956989247


 87%|████████▋ | 9400/10845 [34:41<05:53,  4.09it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:9399/10845, loss:1.0273266505497567, acc:0.6811436170212766


 88%|████████▊ | 9500/10845 [35:05<05:42,  3.92it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:9499/10845, loss:1.0273983728320975, acc:0.681078947368421


 89%|████████▊ | 9600/10845 [35:29<04:42,  4.41it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:9599/10845, loss:1.0280583909961085, acc:0.6809635416666666


 89%|████████▉ | 9700/10845 [35:52<04:36,  4.14it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:9699/10845, loss:1.0285461066985868, acc:0.6809536082474227


 90%|█████████ | 9800/10845 [36:16<03:58,  4.39it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:9799/10845, loss:1.0294632364779102, acc:0.6808928571428572


 91%|█████████▏| 9900/10845 [36:40<03:28,  4.53it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:9899/10845, loss:1.0294373739006544, acc:0.6808080808080809


 92%|█████████▏| 10000/10845 [37:04<03:17,  4.28it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:9999/10845, loss:1.0305627393722534, acc:0.680775


 93%|█████████▎| 10100/10845 [37:28<02:48,  4.42it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:10099/10845, loss:1.0300969772409685, acc:0.6809653465346535


 94%|█████████▍| 10200/10845 [37:51<02:45,  3.89it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:10199/10845, loss:1.0294437659604876, acc:0.6812009803921568


 95%|█████████▍| 10300/10845 [38:13<02:11,  4.15it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:10299/10845, loss:1.0292808441745425, acc:0.6811165048543689


 96%|█████████▌| 10400/10845 [38:37<01:44,  4.26it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:10399/10845, loss:1.02881483361125, acc:0.6811538461538461


 97%|█████████▋| 10500/10845 [39:01<01:21,  4.25it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:10499/10845, loss:1.0278865487689064, acc:0.6813809523809524


 98%|█████████▊| 10600/10845 [39:24<00:55,  4.44it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:10599/10845, loss:1.0267832489845887, acc:0.6815330188679245


 99%|█████████▊| 10700/10845 [39:48<00:32,  4.45it/s, acc=0.681, epoch=15, loss=1.03]

epoch:15, idx:10699/10845, loss:1.0270233383134146, acc:0.6814953271028037


100%|█████████▉| 10800/10845 [40:12<00:10,  4.26it/s, acc=0.682, epoch=15, loss=1.03]

epoch:15, idx:10799/10845, loss:1.0268769313891728, acc:0.6817129629629629


100%|██████████| 10845/10845 [40:23<00:00,  4.18it/s, acc=0.681, epoch=15, loss=1.03]


epoch:15, idx:0/1275, loss:1.062164306640625, acc:0.75
epoch:15, idx:100/1275, loss:1.4379819897141788, acc:0.6212871287128713
epoch:15, idx:200/1275, loss:1.28985982540235, acc:0.6355721393034826
epoch:15, idx:300/1275, loss:1.236820562337324, acc:0.6486710963455149
epoch:15, idx:400/1275, loss:1.2255035267209173, acc:0.6527431421446384
epoch:15, idx:500/1275, loss:1.2164095594973383, acc:0.6492015968063872
epoch:15, idx:600/1275, loss:1.2292854488391844, acc:0.6422628951747088
epoch:15, idx:700/1275, loss:1.2257808756386162, acc:0.6390870185449358
epoch:15, idx:800/1275, loss:1.2393111835853585, acc:0.6363920099875156
epoch:15, idx:900/1275, loss:1.2276240578634492, acc:0.6381798002219756
epoch:15, idx:1000/1275, loss:1.2220198508504625, acc:0.6376123876123876
epoch:15, idx:1100/1275, loss:1.2092844033977532, acc:0.638283378746594
epoch:15, idx:1200/1275, loss:1.2076742695233507, acc:0.6353039134054954


  1%|          | 100/10845 [00:24<39:29,  4.53it/s, acc=0.688, epoch=16, loss=1.01] 

epoch:16, idx:99/10845, loss:1.014587562084198, acc:0.6875


  2%|▏         | 200/10845 [00:47<43:54,  4.04it/s, acc=0.69, epoch=16, loss=1.02]  

epoch:16, idx:199/10845, loss:1.0158239215612412, acc:0.69


  3%|▎         | 300/10845 [01:11<41:35,  4.23it/s, acc=0.665, epoch=16, loss=1.07]

epoch:16, idx:299/10845, loss:1.0699316573143005, acc:0.665


  4%|▎         | 400/10845 [01:36<41:38,  4.18it/s, acc=0.671, epoch=16, loss=1.04]

epoch:16, idx:399/10845, loss:1.0412734192609787, acc:0.670625


  5%|▍         | 500/10845 [02:00<40:18,  4.28it/s, acc=0.669, epoch=16, loss=1.04]

epoch:16, idx:499/10845, loss:1.0402933630943298, acc:0.669


  6%|▌         | 600/10845 [02:24<42:36,  4.01it/s, acc=0.673, epoch=16, loss=1.03]

epoch:16, idx:599/10845, loss:1.0257703417539596, acc:0.6729166666666667


  6%|▋         | 700/10845 [02:48<43:56,  3.85it/s, acc=0.673, epoch=16, loss=1.04]

epoch:16, idx:699/10845, loss:1.0358800300530024, acc:0.6732142857142858


  7%|▋         | 800/10845 [03:12<36:07,  4.63it/s, acc=0.674, epoch=16, loss=1.05]

epoch:16, idx:799/10845, loss:1.0452379035204649, acc:0.67375


  8%|▊         | 901/10845 [03:36<39:05,  4.24it/s, acc=0.678, epoch=16, loss=1.04]

epoch:16, idx:899/10845, loss:1.0390724406639735, acc:0.6783333333333333


  9%|▉         | 1000/10845 [04:00<37:02,  4.43it/s, acc=0.681, epoch=16, loss=1.03]

epoch:16, idx:999/10845, loss:1.0333270011544227, acc:0.68125


 10%|█         | 1100/10845 [04:24<39:23,  4.12it/s, acc=0.679, epoch=16, loss=1.04]

epoch:16, idx:1099/10845, loss:1.0384833898869428, acc:0.6786363636363636


 11%|█         | 1200/10845 [04:48<37:44,  4.26it/s, acc=0.677, epoch=16, loss=1.04]

epoch:16, idx:1199/10845, loss:1.0412282532950243, acc:0.6775


 12%|█▏        | 1301/10845 [05:11<35:07,  4.53it/s, acc=0.68, epoch=16, loss=1.03] 

epoch:16, idx:1299/10845, loss:1.0255400945131596, acc:0.6803846153846154


 13%|█▎        | 1400/10845 [05:34<37:00,  4.25it/s, acc=0.684, epoch=16, loss=1.01]

epoch:16, idx:1399/10845, loss:1.0142906729238375, acc:0.6839285714285714


 14%|█▍        | 1500/10845 [05:59<36:16,  4.29it/s, acc=0.684, epoch=16, loss=1.01]

epoch:16, idx:1499/10845, loss:1.0108449720144272, acc:0.6843333333333333


 15%|█▍        | 1600/10845 [06:23<33:57,  4.54it/s, acc=0.685, epoch=16, loss=1.01]

epoch:16, idx:1599/10845, loss:1.0090240606293082, acc:0.6853125


 16%|█▌        | 1700/10845 [06:47<41:22,  3.68it/s, acc=0.685, epoch=16, loss=1.01]

epoch:16, idx:1699/10845, loss:1.0123783184850916, acc:0.6851470588235294


 17%|█▋        | 1800/10845 [07:10<35:45,  4.22it/s, acc=0.687, epoch=16, loss=1.01]

epoch:16, idx:1799/10845, loss:1.0103785131706131, acc:0.6868055555555556


 18%|█▊        | 1900/10845 [07:34<35:20,  4.22it/s, acc=0.69, epoch=16, loss=1]    

epoch:16, idx:1899/10845, loss:1.0011231158595335, acc:0.6898684210526316


 18%|█▊        | 2000/10845 [07:58<35:04,  4.20it/s, acc=0.689, epoch=16, loss=1.01]

epoch:16, idx:1999/10845, loss:1.0069338271319865, acc:0.68875


 19%|█▉        | 2100/10845 [08:22<36:05,  4.04it/s, acc=0.688, epoch=16, loss=1.01]

epoch:16, idx:2099/10845, loss:1.009558983672233, acc:0.6879761904761905


 20%|██        | 2200/10845 [08:45<33:32,  4.30it/s, acc=0.686, epoch=16, loss=1.01]

epoch:16, idx:2199/10845, loss:1.0127181845632467, acc:0.6857954545454545


 21%|██        | 2300/10845 [09:10<37:59,  3.75it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:2299/10845, loss:1.0171051334039025, acc:0.6848913043478261


 22%|██▏       | 2400/10845 [09:34<33:15,  4.23it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:2399/10845, loss:1.020953463787834, acc:0.68375


 23%|██▎       | 2500/10845 [09:57<33:30,  4.15it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:2499/10845, loss:1.0172235186338425, acc:0.6845


 24%|██▍       | 2601/10845 [10:21<30:14,  4.54it/s, acc=0.686, epoch=16, loss=1.01]

epoch:16, idx:2599/10845, loss:1.0132961169343728, acc:0.6861538461538461


 25%|██▍       | 2700/10845 [10:45<34:06,  3.98it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:2699/10845, loss:1.018996059121909, acc:0.6856481481481481


 26%|██▌       | 2801/10845 [11:09<29:40,  4.52it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:2799/10845, loss:1.0201436667995794, acc:0.6855357142857142


 27%|██▋       | 2900/10845 [11:32<28:30,  4.65it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:2899/10845, loss:1.0243302867535886, acc:0.6843103448275862


 28%|██▊       | 3000/10845 [11:55<32:30,  4.02it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:2999/10845, loss:1.0209048589070637, acc:0.6848333333333333


 29%|██▊       | 3101/10845 [12:17<29:18,  4.40it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:3099/10845, loss:1.0174391381971297, acc:0.6857258064516129


 30%|██▉       | 3201/10845 [12:40<28:26,  4.48it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:3199/10845, loss:1.016267386712134, acc:0.685234375


 30%|███       | 3301/10845 [13:02<28:06,  4.47it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:3299/10845, loss:1.017312128832846, acc:0.6847727272727273


 31%|███▏      | 3400/10845 [13:24<26:30,  4.68it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:3399/10845, loss:1.0205881449054268, acc:0.6836029411764706


 32%|███▏      | 3501/10845 [13:46<25:54,  4.72it/s, acc=0.683, epoch=16, loss=1.02]

epoch:16, idx:3499/10845, loss:1.023261554139001, acc:0.6827142857142857


 33%|███▎      | 3600/10845 [14:08<26:51,  4.49it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:3599/10845, loss:1.0192163483632934, acc:0.6836111111111111


 34%|███▍      | 3700/10845 [14:30<27:30,  4.33it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:3699/10845, loss:1.0178271791741655, acc:0.6839864864864865


 35%|███▌      | 3800/10845 [14:53<25:23,  4.62it/s, acc=0.685, epoch=16, loss=1.01]

epoch:16, idx:3799/10845, loss:1.0138523109963065, acc:0.685


 36%|███▌      | 3901/10845 [15:15<25:53,  4.47it/s, acc=0.685, epoch=16, loss=1.01]

epoch:16, idx:3899/10845, loss:1.0127764247625302, acc:0.6855128205128205


 37%|███▋      | 4000/10845 [15:38<24:26,  4.67it/s, acc=0.685, epoch=16, loss=1.01]

epoch:16, idx:3999/10845, loss:1.0127086004316808, acc:0.685375


 38%|███▊      | 4100/10845 [16:00<26:20,  4.27it/s, acc=0.686, epoch=16, loss=1.01]

epoch:16, idx:4099/10845, loss:1.0136031514551582, acc:0.6856707317073171


 39%|███▊      | 4201/10845 [16:23<25:01,  4.43it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:4199/10845, loss:1.0151062725271498, acc:0.6852380952380952


 40%|███▉      | 4300/10845 [16:45<23:45,  4.59it/s, acc=0.686, epoch=16, loss=1.01]

epoch:16, idx:4299/10845, loss:1.0119122137025345, acc:0.6861046511627907


 41%|████      | 4400/10845 [17:07<25:04,  4.28it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:4399/10845, loss:1.0158351929892193, acc:0.685625


 41%|████▏     | 4500/10845 [17:30<23:52,  4.43it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:4499/10845, loss:1.0151975198586782, acc:0.686


 42%|████▏     | 4601/10845 [17:53<22:06,  4.71it/s, acc=0.686, epoch=16, loss=1.01]

epoch:16, idx:4599/10845, loss:1.0147335570791494, acc:0.6859782608695653


 43%|████▎     | 4700/10845 [18:15<22:03,  4.64it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:4699/10845, loss:1.0160168996770331, acc:0.685531914893617


 44%|████▍     | 4801/10845 [18:37<21:07,  4.77it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:4799/10845, loss:1.0167244355628888, acc:0.6851041666666666


 45%|████▌     | 4900/10845 [18:59<22:46,  4.35it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:4899/10845, loss:1.0163499996370198, acc:0.6853571428571429


 46%|████▌     | 5000/10845 [19:21<21:17,  4.58it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:4999/10845, loss:1.0174393803596498, acc:0.68515


 47%|████▋     | 5100/10845 [19:43<20:53,  4.58it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:5099/10845, loss:1.016127518158333, acc:0.6855392156862745


 48%|████▊     | 5200/10845 [20:06<20:56,  4.49it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:5199/10845, loss:1.0178601563435334, acc:0.6854807692307693


 49%|████▉     | 5300/10845 [20:28<19:35,  4.72it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:5299/10845, loss:1.0194093294638509, acc:0.6850471698113207


 50%|████▉     | 5400/10845 [20:51<20:32,  4.42it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:5399/10845, loss:1.0188752137069348, acc:0.6854166666666667


 51%|█████     | 5500/10845 [21:13<20:09,  4.42it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:5499/10845, loss:1.019470479553396, acc:0.6853181818181818


 52%|█████▏    | 5600/10845 [21:35<19:39,  4.45it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:5599/10845, loss:1.0192016654355185, acc:0.685625


 53%|█████▎    | 5700/10845 [21:57<19:17,  4.44it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:5699/10845, loss:1.0223928497757828, acc:0.685219298245614


 53%|█████▎    | 5800/10845 [22:19<17:58,  4.68it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:5799/10845, loss:1.0216379056100187, acc:0.6851724137931035


 54%|█████▍    | 5901/10845 [22:41<17:37,  4.67it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:5899/10845, loss:1.0213165813987537, acc:0.6851271186440678


 55%|█████▌    | 6000/10845 [23:04<17:19,  4.66it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:5999/10845, loss:1.0212409449219704, acc:0.6852083333333333


 56%|█████▌    | 6100/10845 [23:26<18:07,  4.36it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:6099/10845, loss:1.0201494931392983, acc:0.6851639344262295


 57%|█████▋    | 6200/10845 [23:48<16:33,  4.67it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:6199/10845, loss:1.018803309846309, acc:0.6854838709677419


 58%|█████▊    | 6301/10845 [24:11<16:30,  4.59it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:6299/10845, loss:1.0194429406192567, acc:0.6850793650793651


 59%|█████▉    | 6400/10845 [24:33<17:54,  4.14it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:6399/10845, loss:1.0183246936183423, acc:0.685625


 60%|█████▉    | 6500/10845 [24:55<16:38,  4.35it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:6499/10845, loss:1.0192419818823155, acc:0.6857307692307693


 61%|██████    | 6601/10845 [25:17<15:45,  4.49it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:6599/10845, loss:1.0186696834004287, acc:0.6856060606060606


 62%|██████▏   | 6700/10845 [25:39<14:45,  4.68it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:6699/10845, loss:1.0175310626581533, acc:0.6857835820895523


 63%|██████▎   | 6800/10845 [26:02<16:40,  4.04it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:6799/10845, loss:1.0180445403824834, acc:0.6856617647058824


 64%|██████▎   | 6900/10845 [26:25<14:25,  4.56it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:6899/10845, loss:1.0165242559408796, acc:0.6859782608695653


 65%|██████▍   | 7000/10845 [26:47<13:57,  4.59it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:6999/10845, loss:1.0164372458543096, acc:0.6856785714285715


 65%|██████▌   | 7100/10845 [27:10<14:06,  4.42it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:7099/10845, loss:1.017555901609676, acc:0.685387323943662


 66%|██████▋   | 7201/10845 [27:32<12:57,  4.69it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:7199/10845, loss:1.0167952980266677, acc:0.6855555555555556


 67%|██████▋   | 7301/10845 [27:54<12:23,  4.77it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:7299/10845, loss:1.016228714846585, acc:0.6852397260273972


 68%|██████▊   | 7400/10845 [28:16<13:19,  4.31it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:7399/10845, loss:1.0163060602867926, acc:0.685304054054054


 69%|██████▉   | 7501/10845 [28:39<12:12,  4.57it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:7499/10845, loss:1.0166804658730826, acc:0.6853666666666667


 70%|███████   | 7600/10845 [29:01<11:46,  4.59it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:7599/10845, loss:1.0169536612614205, acc:0.6853947368421053


 71%|███████   | 7700/10845 [29:23<10:57,  4.78it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:7699/10845, loss:1.0166763286544132, acc:0.6854545454545454


 72%|███████▏  | 7800/10845 [29:46<10:18,  4.93it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:7799/10845, loss:1.0172311325791554, acc:0.6856089743589744


 73%|███████▎  | 7900/10845 [30:08<10:52,  4.51it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:7899/10845, loss:1.0170652368777915, acc:0.6855696202531646


 74%|███████▍  | 8000/10845 [30:30<10:52,  4.36it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:7999/10845, loss:1.0178313016667961, acc:0.68528125


 75%|███████▍  | 8100/10845 [30:52<09:48,  4.67it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:8099/10845, loss:1.0190608904906262, acc:0.6851851851851852


 76%|███████▌  | 8200/10845 [31:14<10:36,  4.16it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:8199/10845, loss:1.0179399625629912, acc:0.6856707317073171


 77%|███████▋  | 8300/10845 [31:37<09:06,  4.65it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:8299/10845, loss:1.017609434953655, acc:0.6860542168674699


 77%|███████▋  | 8400/10845 [31:59<09:07,  4.46it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:8399/10845, loss:1.01829539408996, acc:0.6861011904761904


 78%|███████▊  | 8500/10845 [32:21<08:24,  4.65it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:8499/10845, loss:1.0174924810984556, acc:0.6861176470588235


 79%|███████▉  | 8601/10845 [32:44<07:57,  4.70it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:8599/10845, loss:1.019284783262153, acc:0.6857267441860465


 80%|████████  | 8700/10845 [33:06<07:38,  4.67it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:8699/10845, loss:1.01852162756454, acc:0.6857183908045977


 81%|████████  | 8800/10845 [33:28<07:41,  4.43it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:8799/10845, loss:1.01781596662646, acc:0.6859090909090909


 82%|████████▏ | 8900/10845 [33:50<07:32,  4.30it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:8899/10845, loss:1.0181015161516962, acc:0.6858988764044944


 83%|████████▎ | 9000/10845 [34:13<06:41,  4.59it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:8999/10845, loss:1.0175515745414627, acc:0.6864722222222223


 84%|████████▍ | 9100/10845 [34:35<06:36,  4.40it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:9099/10845, loss:1.0187710191587824, acc:0.6862637362637363


 85%|████████▍ | 9201/10845 [34:58<06:08,  4.47it/s, acc=0.686, epoch=16, loss=1.02]

epoch:16, idx:9199/10845, loss:1.0194071355397287, acc:0.6857065217391304


 86%|████████▌ | 9300/10845 [35:20<05:50,  4.41it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:9299/10845, loss:1.020494751872555, acc:0.6854032258064516


 87%|████████▋ | 9400/10845 [35:42<05:33,  4.33it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:9399/10845, loss:1.020407446679917, acc:0.6850797872340425


 88%|████████▊ | 9500/10845 [36:04<05:02,  4.45it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:9499/10845, loss:1.0208059597580057, acc:0.6847105263157894


 89%|████████▊ | 9601/10845 [36:27<04:21,  4.75it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:9599/10845, loss:1.0223235751626392, acc:0.6840625


 89%|████████▉ | 9700/10845 [36:49<04:04,  4.69it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:9699/10845, loss:1.0222636283488618, acc:0.684020618556701


 90%|█████████ | 9800/10845 [37:11<03:51,  4.52it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:9799/10845, loss:1.0224768724672648, acc:0.683954081632653


 91%|█████████▏| 9900/10845 [37:33<03:33,  4.42it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:9899/10845, loss:1.021609391630298, acc:0.6839646464646465


 92%|█████████▏| 10000/10845 [37:56<03:08,  4.48it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:9999/10845, loss:1.0215861642062665, acc:0.683975


 93%|█████████▎| 10101/10845 [38:18<02:45,  4.49it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:10099/10845, loss:1.0196706831868332, acc:0.6846287128712871


 94%|█████████▍| 10200/10845 [38:40<02:21,  4.56it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:10199/10845, loss:1.0185845458507539, acc:0.6848529411764706


 95%|█████████▍| 10301/10845 [39:03<01:56,  4.65it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:10299/10845, loss:1.0201101794983576, acc:0.6846116504854369


 96%|█████████▌| 10400/10845 [39:25<01:47,  4.15it/s, acc=0.685, epoch=16, loss=1.02]

epoch:16, idx:10399/10845, loss:1.020223438086418, acc:0.6846875


 97%|█████████▋| 10500/10845 [39:47<01:18,  4.39it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:10499/10845, loss:1.0211166894662949, acc:0.6842380952380952


 98%|█████████▊| 10601/10845 [40:10<00:52,  4.63it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:10599/10845, loss:1.0210101609072595, acc:0.6841981132075472


 99%|█████████▊| 10700/10845 [40:33<00:33,  4.38it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:10699/10845, loss:1.0220942535244415, acc:0.6838084112149533


100%|█████████▉| 10801/10845 [40:55<00:09,  4.47it/s, acc=0.684, epoch=16, loss=1.02]

epoch:16, idx:10799/10845, loss:1.0213658570581012, acc:0.684050925925926


100%|██████████| 10845/10845 [41:05<00:00,  4.20it/s, acc=0.684, epoch=16, loss=1.02]


epoch:16, idx:0/1275, loss:1.0539827346801758, acc:0.5
epoch:16, idx:100/1275, loss:1.440791115902438, acc:0.6039603960396039
epoch:16, idx:200/1275, loss:1.2939055695462582, acc:0.6231343283582089
epoch:16, idx:300/1275, loss:1.2322999500357035, acc:0.6428571428571429
epoch:16, idx:400/1275, loss:1.2241371855176892, acc:0.6452618453865336
epoch:16, idx:500/1275, loss:1.1955620951043393, acc:0.6511976047904192
epoch:16, idx:600/1275, loss:1.201776547063012, acc:0.6447587354409318
epoch:16, idx:700/1275, loss:1.202823104089745, acc:0.640870185449358
epoch:16, idx:800/1275, loss:1.2231737882754627, acc:0.6382646691635455
epoch:16, idx:900/1275, loss:1.2127764992391097, acc:0.6412319644839067
epoch:16, idx:1000/1275, loss:1.2122515816312212, acc:0.6413586413586414
epoch:16, idx:1100/1275, loss:1.2014593717512707, acc:0.6416893732970027
epoch:16, idx:1200/1275, loss:1.19848637289052, acc:0.6369691923397169


  1%|          | 101/10845 [00:22<38:49,  4.61it/s, acc=0.688, epoch=17, loss=0.933]

epoch:17, idx:99/10845, loss:0.9323565888404847, acc:0.69


  2%|▏         | 201/10845 [00:44<38:00,  4.67it/s, acc=0.684, epoch=17, loss=0.963]

epoch:17, idx:199/10845, loss:0.9650726646184922, acc:0.68375


  3%|▎         | 300/10845 [01:06<40:32,  4.34it/s, acc=0.674, epoch=17, loss=0.984]

epoch:17, idx:299/10845, loss:0.9837753029664358, acc:0.6741666666666667


  4%|▎         | 400/10845 [01:28<42:23,  4.11it/s, acc=0.69, epoch=17, loss=0.955] 

epoch:17, idx:399/10845, loss:0.9547887535393238, acc:0.69


  5%|▍         | 500/10845 [01:51<36:47,  4.69it/s, acc=0.689, epoch=17, loss=0.974]

epoch:17, idx:499/10845, loss:0.9738133527040481, acc:0.689


  6%|▌         | 600/10845 [02:13<40:50,  4.18it/s, acc=0.692, epoch=17, loss=0.967]

epoch:17, idx:599/10845, loss:0.9674392293890317, acc:0.6916666666666667


  6%|▋         | 700/10845 [02:35<37:06,  4.56it/s, acc=0.698, epoch=17, loss=0.952]

epoch:17, idx:699/10845, loss:0.9523958490576063, acc:0.6975


  7%|▋         | 800/10845 [02:58<36:17,  4.61it/s, acc=0.698, epoch=17, loss=0.959]

epoch:17, idx:799/10845, loss:0.9589651075005531, acc:0.6975


  8%|▊         | 900/10845 [03:20<37:54,  4.37it/s, acc=0.696, epoch=17, loss=0.966]

epoch:17, idx:899/10845, loss:0.9662134524186452, acc:0.6955555555555556


  9%|▉         | 1001/10845 [03:42<36:18,  4.52it/s, acc=0.695, epoch=17, loss=0.977]

epoch:17, idx:999/10845, loss:0.9772327234745025, acc:0.695


 10%|█         | 1100/10845 [04:05<36:04,  4.50it/s, acc=0.692, epoch=17, loss=0.982]

epoch:17, idx:1099/10845, loss:0.9820728693225167, acc:0.6922727272727273


 11%|█         | 1200/10845 [04:27<35:13,  4.56it/s, acc=0.692, epoch=17, loss=0.985]

epoch:17, idx:1199/10845, loss:0.9860233681400618, acc:0.69125


 12%|█▏        | 1300/10845 [04:50<35:54,  4.43it/s, acc=0.689, epoch=17, loss=0.988]

epoch:17, idx:1299/10845, loss:0.9875473282887386, acc:0.6890384615384615


 13%|█▎        | 1400/10845 [05:12<33:44,  4.66it/s, acc=0.691, epoch=17, loss=0.985]

epoch:17, idx:1399/10845, loss:0.9845466525214059, acc:0.6905357142857143


 14%|█▍        | 1500/10845 [05:34<34:31,  4.51it/s, acc=0.693, epoch=17, loss=0.975]

epoch:17, idx:1499/10845, loss:0.9747079413731893, acc:0.6931666666666667


 15%|█▍        | 1601/10845 [05:57<33:43,  4.57it/s, acc=0.695, epoch=17, loss=0.977]

epoch:17, idx:1599/10845, loss:0.9777655415982008, acc:0.69453125


 16%|█▌        | 1700/10845 [06:19<32:04,  4.75it/s, acc=0.698, epoch=17, loss=0.97] 

epoch:17, idx:1699/10845, loss:0.9700177459155812, acc:0.6976470588235294


 17%|█▋        | 1800/10845 [06:41<33:21,  4.52it/s, acc=0.7, epoch=17, loss=0.96]   

epoch:17, idx:1799/10845, loss:0.9600205334027608, acc:0.7002777777777778


 18%|█▊        | 1900/10845 [07:03<34:24,  4.33it/s, acc=0.698, epoch=17, loss=0.964]

epoch:17, idx:1899/10845, loss:0.964279958668508, acc:0.6980263157894737


 18%|█▊        | 2001/10845 [07:26<32:58,  4.47it/s, acc=0.7, epoch=17, loss=0.965]  

epoch:17, idx:1999/10845, loss:0.9648785784840583, acc:0.6995


 19%|█▉        | 2100/10845 [07:49<33:55,  4.30it/s, acc=0.699, epoch=17, loss=0.968]

epoch:17, idx:2099/10845, loss:0.9682121635051001, acc:0.6994047619047619


 20%|██        | 2200/10845 [08:11<30:43,  4.69it/s, acc=0.699, epoch=17, loss=0.968]

epoch:17, idx:2199/10845, loss:0.9678455972671509, acc:0.6992045454545455


 21%|██        | 2300/10845 [08:33<30:05,  4.73it/s, acc=0.699, epoch=17, loss=0.969]

epoch:17, idx:2299/10845, loss:0.9689893560305886, acc:0.6989130434782609


 22%|██▏       | 2400/10845 [08:55<29:45,  4.73it/s, acc=0.699, epoch=17, loss=0.969]

epoch:17, idx:2399/10845, loss:0.9685254842042923, acc:0.6986458333333333


 23%|██▎       | 2500/10845 [09:17<29:42,  4.68it/s, acc=0.699, epoch=17, loss=0.971]

epoch:17, idx:2499/10845, loss:0.9709570892333984, acc:0.6985


 24%|██▍       | 2600/10845 [09:39<30:12,  4.55it/s, acc=0.697, epoch=17, loss=0.977]

epoch:17, idx:2599/10845, loss:0.977183139370038, acc:0.6973076923076923


 25%|██▍       | 2701/10845 [10:02<29:34,  4.59it/s, acc=0.696, epoch=17, loss=0.98] 

epoch:17, idx:2699/10845, loss:0.9800642024146186, acc:0.6960185185185185


 26%|██▌       | 2801/10845 [10:24<27:46,  4.83it/s, acc=0.696, epoch=17, loss=0.978]

epoch:17, idx:2799/10845, loss:0.9778076041596276, acc:0.6964285714285714


 27%|██▋       | 2900/10845 [10:46<29:48,  4.44it/s, acc=0.696, epoch=17, loss=0.98] 

epoch:17, idx:2899/10845, loss:0.9797660343400363, acc:0.6962068965517242


 28%|██▊       | 3000/10845 [11:08<28:24,  4.60it/s, acc=0.696, epoch=17, loss=0.983]

epoch:17, idx:2999/10845, loss:0.9827539169788361, acc:0.6961666666666667


 29%|██▊       | 3100/10845 [11:30<27:40,  4.67it/s, acc=0.696, epoch=17, loss=0.983]

epoch:17, idx:3099/10845, loss:0.9825174086709176, acc:0.6962096774193548


 30%|██▉       | 3201/10845 [11:53<27:52,  4.57it/s, acc=0.697, epoch=17, loss=0.978]

epoch:17, idx:3199/10845, loss:0.9781451616436243, acc:0.696953125


 30%|███       | 3300/10845 [12:15<28:43,  4.38it/s, acc=0.698, epoch=17, loss=0.977]

epoch:17, idx:3299/10845, loss:0.9766732415466598, acc:0.6983333333333334


 31%|███▏      | 3401/10845 [12:37<26:31,  4.68it/s, acc=0.698, epoch=17, loss=0.977]

epoch:17, idx:3399/10845, loss:0.977175040648264, acc:0.6977941176470588


 32%|███▏      | 3500/10845 [12:59<29:09,  4.20it/s, acc=0.698, epoch=17, loss=0.978]

epoch:17, idx:3499/10845, loss:0.9784626280750547, acc:0.6976428571428571


 33%|███▎      | 3600/10845 [13:22<27:18,  4.42it/s, acc=0.698, epoch=17, loss=0.974]

epoch:17, idx:3599/10845, loss:0.973921184639136, acc:0.6984722222222223


 34%|███▍      | 3701/10845 [13:44<25:03,  4.75it/s, acc=0.699, epoch=17, loss=0.974]

epoch:17, idx:3699/10845, loss:0.9738069996640489, acc:0.6990540540540541


 35%|███▌      | 3800/10845 [14:06<27:57,  4.20it/s, acc=0.697, epoch=17, loss=0.978]

epoch:17, idx:3799/10845, loss:0.9783163436149296, acc:0.6973684210526315


 36%|███▌      | 3901/10845 [14:29<24:29,  4.72it/s, acc=0.698, epoch=17, loss=0.978]

epoch:17, idx:3899/10845, loss:0.9776884524333171, acc:0.6975641025641026


 37%|███▋      | 4000/10845 [14:51<25:23,  4.49it/s, acc=0.698, epoch=17, loss=0.977]

epoch:17, idx:3999/10845, loss:0.9772226420640946, acc:0.6976875


 38%|███▊      | 4101/10845 [15:13<23:22,  4.81it/s, acc=0.697, epoch=17, loss=0.976]

epoch:17, idx:4099/10845, loss:0.9762109642639393, acc:0.6973780487804878


 39%|███▊      | 4201/10845 [15:35<24:04,  4.60it/s, acc=0.697, epoch=17, loss=0.976]

epoch:17, idx:4199/10845, loss:0.9760182157158852, acc:0.6970238095238095


 40%|███▉      | 4300/10845 [15:57<24:35,  4.44it/s, acc=0.696, epoch=17, loss=0.977]

epoch:17, idx:4299/10845, loss:0.9770908181473267, acc:0.6962209302325582


 41%|████      | 4401/10845 [16:19<23:07,  4.65it/s, acc=0.696, epoch=17, loss=0.979]

epoch:17, idx:4399/10845, loss:0.9779479014060714, acc:0.6956818181818182


 41%|████▏     | 4500/10845 [16:41<24:00,  4.40it/s, acc=0.695, epoch=17, loss=0.981]

epoch:17, idx:4499/10845, loss:0.9807191796567705, acc:0.6951111111111111


 42%|████▏     | 4600/10845 [17:04<22:56,  4.54it/s, acc=0.694, epoch=17, loss=0.983]

epoch:17, idx:4599/10845, loss:0.9825881074563316, acc:0.6941847826086956


 43%|████▎     | 4700/10845 [17:26<23:04,  4.44it/s, acc=0.694, epoch=17, loss=0.981]

epoch:17, idx:4699/10845, loss:0.9808444315068265, acc:0.6944148936170212


 44%|████▍     | 4800/10845 [17:49<21:04,  4.78it/s, acc=0.694, epoch=17, loss=0.98] 

epoch:17, idx:4799/10845, loss:0.9802851125101248, acc:0.6941666666666667


 45%|████▌     | 4900/10845 [18:11<22:03,  4.49it/s, acc=0.694, epoch=17, loss=0.981]

epoch:17, idx:4899/10845, loss:0.981067527216308, acc:0.6938775510204082


 46%|████▌     | 5001/10845 [18:33<21:31,  4.52it/s, acc=0.694, epoch=17, loss=0.98] 

epoch:17, idx:4999/10845, loss:0.9802392213106156, acc:0.6939


 47%|████▋     | 5100/10845 [18:55<20:39,  4.63it/s, acc=0.694, epoch=17, loss=0.981]

epoch:17, idx:5099/10845, loss:0.9812999271879009, acc:0.6935294117647058


 48%|████▊     | 5200/10845 [19:17<20:50,  4.52it/s, acc=0.694, epoch=17, loss=0.98] 

epoch:17, idx:5199/10845, loss:0.9799932164412278, acc:0.6936057692307692


 49%|████▉     | 5300/10845 [19:40<21:33,  4.29it/s, acc=0.692, epoch=17, loss=0.984]

epoch:17, idx:5299/10845, loss:0.9836706699065443, acc:0.6923584905660377


 50%|████▉     | 5400/10845 [20:03<21:39,  4.19it/s, acc=0.693, epoch=17, loss=0.983]

epoch:17, idx:5399/10845, loss:0.9825497111788503, acc:0.6925


 51%|█████     | 5501/10845 [20:25<20:11,  4.41it/s, acc=0.692, epoch=17, loss=0.983]

epoch:17, idx:5499/10845, loss:0.9828177264170214, acc:0.6923181818181818


 52%|█████▏    | 5600/10845 [20:47<19:25,  4.50it/s, acc=0.691, epoch=17, loss=0.987]

epoch:17, idx:5599/10845, loss:0.98718462820564, acc:0.6913839285714286


 53%|█████▎    | 5700/10845 [21:10<18:08,  4.73it/s, acc=0.692, epoch=17, loss=0.988]

epoch:17, idx:5699/10845, loss:0.9878781236054605, acc:0.6915350877192983


 53%|█████▎    | 5801/10845 [21:33<19:00,  4.42it/s, acc=0.691, epoch=17, loss=0.989]

epoch:17, idx:5799/10845, loss:0.9895016051160879, acc:0.6910775862068965


 54%|█████▍    | 5901/10845 [21:55<18:19,  4.50it/s, acc=0.691, epoch=17, loss=0.988]

epoch:17, idx:5899/10845, loss:0.9878441477225999, acc:0.6914830508474576


 55%|█████▌    | 6001/10845 [22:17<17:10,  4.70it/s, acc=0.691, epoch=17, loss=0.988]

epoch:17, idx:5999/10845, loss:0.9885662719607353, acc:0.6913333333333334


 56%|█████▌    | 6100/10845 [22:39<18:09,  4.36it/s, acc=0.691, epoch=17, loss=0.99] 

epoch:17, idx:6099/10845, loss:0.9901267651456301, acc:0.6906967213114754


 57%|█████▋    | 6201/10845 [23:01<16:00,  4.84it/s, acc=0.691, epoch=17, loss=0.988]

epoch:17, idx:6199/10845, loss:0.988341926903494, acc:0.69125


 58%|█████▊    | 6301/10845 [23:24<17:07,  4.42it/s, acc=0.691, epoch=17, loss=0.99] 

epoch:17, idx:6299/10845, loss:0.9898300666184653, acc:0.6912301587301587


 59%|█████▉    | 6400/10845 [23:47<16:20,  4.53it/s, acc=0.691, epoch=17, loss=0.991]

epoch:17, idx:6399/10845, loss:0.9905305851530284, acc:0.69140625


 60%|█████▉    | 6500/10845 [24:09<16:54,  4.28it/s, acc=0.691, epoch=17, loss=0.992]

epoch:17, idx:6499/10845, loss:0.9921776566963929, acc:0.6906923076923077


 61%|██████    | 6601/10845 [24:32<14:46,  4.79it/s, acc=0.69, epoch=17, loss=0.994] 

epoch:17, idx:6599/10845, loss:0.9941292763027277, acc:0.6899621212121212


 62%|██████▏   | 6700/10845 [24:54<15:23,  4.49it/s, acc=0.69, epoch=17, loss=0.995]

epoch:17, idx:6699/10845, loss:0.9945121266681757, acc:0.6900373134328358


 63%|██████▎   | 6800/10845 [25:17<15:50,  4.25it/s, acc=0.69, epoch=17, loss=0.995]

epoch:17, idx:6799/10845, loss:0.9951249152860221, acc:0.6899632352941176


 64%|██████▎   | 6900/10845 [25:39<14:43,  4.47it/s, acc=0.69, epoch=17, loss=0.995]

epoch:17, idx:6899/10845, loss:0.9952549527088801, acc:0.6903985507246376


 65%|██████▍   | 7001/10845 [26:01<14:04,  4.55it/s, acc=0.69, epoch=17, loss=0.995] 

epoch:17, idx:6999/10845, loss:0.9952305122358458, acc:0.6904285714285714


 65%|██████▌   | 7100/10845 [26:23<14:10,  4.40it/s, acc=0.69, epoch=17, loss=0.998] 

epoch:17, idx:7099/10845, loss:0.9975555190989669, acc:0.6896830985915493


 66%|██████▋   | 7201/10845 [26:46<12:46,  4.75it/s, acc=0.69, epoch=17, loss=0.998] 

epoch:17, idx:7199/10845, loss:0.9980455962734089, acc:0.6897222222222222


 67%|██████▋   | 7300/10845 [27:07<12:28,  4.74it/s, acc=0.69, epoch=17, loss=0.997]

epoch:17, idx:7299/10845, loss:0.9965944249091083, acc:0.6901027397260274


 68%|██████▊   | 7400/10845 [27:29<13:03,  4.40it/s, acc=0.69, epoch=17, loss=0.996]

epoch:17, idx:7399/10845, loss:0.9958830139846415, acc:0.6897972972972973


 69%|██████▉   | 7500/10845 [27:52<12:24,  4.50it/s, acc=0.69, epoch=17, loss=0.996] 

epoch:17, idx:7499/10845, loss:0.9963182239929835, acc:0.6896


 70%|███████   | 7600/10845 [28:15<12:34,  4.30it/s, acc=0.689, epoch=17, loss=0.996]

epoch:17, idx:7599/10845, loss:0.9964323536737969, acc:0.6892434210526316


 71%|███████   | 7700/10845 [28:37<11:20,  4.62it/s, acc=0.689, epoch=17, loss=0.998]

epoch:17, idx:7699/10845, loss:0.9975090536591295, acc:0.6894805194805195


 72%|███████▏  | 7801/10845 [28:59<10:41,  4.75it/s, acc=0.689, epoch=17, loss=0.997]

epoch:17, idx:7799/10845, loss:0.9965747792522113, acc:0.6895192307692307


 73%|███████▎  | 7900/10845 [29:21<10:49,  4.53it/s, acc=0.689, epoch=17, loss=0.998]

epoch:17, idx:7899/10845, loss:0.9981492685894423, acc:0.6894620253164557


 74%|███████▍  | 8000/10845 [29:43<11:19,  4.19it/s, acc=0.69, epoch=17, loss=0.997] 

epoch:17, idx:7999/10845, loss:0.9970097682327033, acc:0.6896875


 75%|███████▍  | 8100/10845 [30:05<10:16,  4.45it/s, acc=0.69, epoch=17, loss=0.997]

epoch:17, idx:8099/10845, loss:0.9974775381588642, acc:0.6898456790123457


 76%|███████▌  | 8200/10845 [30:27<09:30,  4.63it/s, acc=0.69, epoch=17, loss=0.999]

epoch:17, idx:8199/10845, loss:0.9985214875965583, acc:0.6897865853658537


 77%|███████▋  | 8300/10845 [30:49<09:07,  4.65it/s, acc=0.69, epoch=17, loss=1]     

epoch:17, idx:8299/10845, loss:0.999812993371343, acc:0.6898192771084337


 77%|███████▋  | 8400/10845 [31:12<09:11,  4.43it/s, acc=0.69, epoch=17, loss=1]    

epoch:17, idx:8399/10845, loss:0.9995003751629875, acc:0.6897916666666667


 78%|███████▊  | 8500/10845 [31:34<08:35,  4.55it/s, acc=0.69, epoch=17, loss=0.999]

epoch:17, idx:8499/10845, loss:0.9990446045959697, acc:0.6899117647058823


 79%|███████▉  | 8600/10845 [31:56<08:42,  4.30it/s, acc=0.69, epoch=17, loss=0.999]

epoch:17, idx:8599/10845, loss:0.9987722231620966, acc:0.6897093023255814


 80%|████████  | 8700/10845 [32:18<07:55,  4.51it/s, acc=0.69, epoch=17, loss=0.999] 

epoch:17, idx:8699/10845, loss:0.9985489214631332, acc:0.6896264367816092


 81%|████████  | 8800/10845 [32:40<07:31,  4.53it/s, acc=0.689, epoch=17, loss=1]    

epoch:17, idx:8799/10845, loss:0.9997570111060684, acc:0.6894034090909091


 82%|████████▏ | 8901/10845 [33:03<07:04,  4.58it/s, acc=0.689, epoch=17, loss=1]

epoch:17, idx:8899/10845, loss:1.0001249689801355, acc:0.6891573033707865


 83%|████████▎ | 9000/10845 [33:24<06:39,  4.62it/s, acc=0.689, epoch=17, loss=1]

epoch:17, idx:8999/10845, loss:1.0002370236913363, acc:0.6888888888888889


 84%|████████▍ | 9101/10845 [33:47<06:25,  4.52it/s, acc=0.689, epoch=17, loss=1]

epoch:17, idx:9099/10845, loss:1.0005322145171218, acc:0.6885989010989011


 85%|████████▍ | 9201/10845 [34:09<05:58,  4.58it/s, acc=0.689, epoch=17, loss=1]

epoch:17, idx:9199/10845, loss:1.0007167470390381, acc:0.6887771739130435


 86%|████████▌ | 9300/10845 [34:31<05:36,  4.60it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:9299/10845, loss:1.0031830209878183, acc:0.6884408602150538


 87%|████████▋ | 9400/10845 [34:53<05:34,  4.32it/s, acc=0.689, epoch=17, loss=1]

epoch:17, idx:9399/10845, loss:1.0030974963561017, acc:0.6885106382978723


 88%|████████▊ | 9500/10845 [35:15<04:43,  4.74it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:9499/10845, loss:1.003399872121058, acc:0.6884210526315789


 89%|████████▊ | 9600/10845 [35:38<04:43,  4.40it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:9599/10845, loss:1.0044432926612596, acc:0.6879947916666667


 89%|████████▉ | 9700/10845 [36:00<04:09,  4.58it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:9699/10845, loss:1.004282978361415, acc:0.6878350515463918


 90%|█████████ | 9800/10845 [36:22<03:37,  4.80it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:9799/10845, loss:1.004470825456843, acc:0.6877551020408164


 91%|█████████▏| 9900/10845 [36:44<03:35,  4.38it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:9899/10845, loss:1.0033736064458134, acc:0.6881060606060606


 92%|█████████▏| 10000/10845 [37:06<03:20,  4.22it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:9999/10845, loss:1.003851952278614, acc:0.688175


 93%|█████████▎| 10100/10845 [37:29<02:45,  4.49it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:10099/10845, loss:1.003515893397945, acc:0.6883415841584158


 94%|█████████▍| 10201/10845 [37:51<02:14,  4.80it/s, acc=0.688, epoch=17, loss=1]   

epoch:17, idx:10199/10845, loss:1.004786416257129, acc:0.6880882352941177


 95%|█████████▍| 10300/10845 [38:13<02:04,  4.37it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:10299/10845, loss:1.003648583298748, acc:0.6882281553398059


 96%|█████████▌| 10400/10845 [38:35<01:39,  4.48it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:10399/10845, loss:1.0040766214178158, acc:0.6883173076923077


 97%|█████████▋| 10500/10845 [38:57<01:16,  4.51it/s, acc=0.688, epoch=17, loss=1]

epoch:17, idx:10499/10845, loss:1.0044458321276166, acc:0.6883333333333334


 98%|█████████▊| 10601/10845 [39:20<00:52,  4.64it/s, acc=0.688, epoch=17, loss=1]   

epoch:17, idx:10599/10845, loss:1.004361789586409, acc:0.6882075471698114


 99%|█████████▊| 10700/10845 [39:42<00:31,  4.65it/s, acc=0.688, epoch=17, loss=1]   

epoch:17, idx:10699/10845, loss:1.0049800954243848, acc:0.6879205607476635


100%|█████████▉| 10800/10845 [40:05<00:09,  4.52it/s, acc=0.688, epoch=17, loss=1.01]

epoch:17, idx:10799/10845, loss:1.0050105931471895, acc:0.6878472222222223


100%|██████████| 10845/10845 [40:15<00:00,  4.44it/s, acc=0.688, epoch=17, loss=1.01]


epoch:17, idx:0/1275, loss:1.48115873336792, acc:0.5
epoch:17, idx:100/1275, loss:1.4462868057855285, acc:0.6089108910891089
epoch:17, idx:200/1275, loss:1.3119573172052108, acc:0.6343283582089553
epoch:17, idx:300/1275, loss:1.2520216544997256, acc:0.6511627906976745
epoch:17, idx:400/1275, loss:1.2356791942196892, acc:0.6546134663341646
epoch:17, idx:500/1275, loss:1.2172464639126896, acc:0.6501996007984032
epoch:17, idx:600/1275, loss:1.2296428023876247, acc:0.6422628951747088
epoch:17, idx:700/1275, loss:1.2367540495882021, acc:0.6401569186875892
epoch:17, idx:800/1275, loss:1.2460537249973502, acc:0.6373283395755306
epoch:17, idx:900/1275, loss:1.2325433386814317, acc:0.6420643729189789
epoch:17, idx:1000/1275, loss:1.2273301431587287, acc:0.6431068931068931
epoch:17, idx:1100/1275, loss:1.2179116856283105, acc:0.6435059037238874
epoch:17, idx:1200/1275, loss:1.2120435909863614, acc:0.6411323896752706


  1%|          | 100/10845 [00:21<37:17,  4.80it/s, acc=0.69, epoch=18, loss=0.979]

epoch:18, idx:99/10845, loss:0.9792371773719788, acc:0.69


  2%|▏         | 200/10845 [00:44<37:43,  4.70it/s, acc=0.699, epoch=18, loss=0.956]

epoch:18, idx:199/10845, loss:0.9564208716154099, acc:0.69875


  3%|▎         | 300/10845 [01:07<39:02,  4.50it/s, acc=0.699, epoch=18, loss=0.945]

epoch:18, idx:299/10845, loss:0.9427672505378724, acc:0.6991666666666667


  4%|▎         | 400/10845 [01:29<38:55,  4.47it/s, acc=0.691, epoch=18, loss=0.974]

epoch:18, idx:399/10845, loss:0.9742946302890778, acc:0.690625


  5%|▍         | 500/10845 [01:51<39:06,  4.41it/s, acc=0.693, epoch=18, loss=0.97] 

epoch:18, idx:499/10845, loss:0.9702818207740783, acc:0.693


  6%|▌         | 600/10845 [02:14<38:57,  4.38it/s, acc=0.699, epoch=18, loss=0.96] 

epoch:18, idx:599/10845, loss:0.9603589347998301, acc:0.6991666666666667


  6%|▋         | 700/10845 [02:36<37:09,  4.55it/s, acc=0.704, epoch=18, loss=0.942]

epoch:18, idx:699/10845, loss:0.9424009226049695, acc:0.7039285714285715


  7%|▋         | 800/10845 [02:59<35:04,  4.77it/s, acc=0.704, epoch=18, loss=0.945]

epoch:18, idx:799/10845, loss:0.9449312626570463, acc:0.704375


  8%|▊         | 900/10845 [03:21<35:23,  4.68it/s, acc=0.701, epoch=18, loss=0.958]

epoch:18, idx:899/10845, loss:0.9576183980703354, acc:0.7008333333333333


  9%|▉         | 1000/10845 [03:43<36:29,  4.50it/s, acc=0.706, epoch=18, loss=0.948]

epoch:18, idx:999/10845, loss:0.9479124779701233, acc:0.7055


 10%|█         | 1100/10845 [04:06<36:11,  4.49it/s, acc=0.702, epoch=18, loss=0.956]

epoch:18, idx:1099/10845, loss:0.956165490692312, acc:0.7022727272727273


 11%|█         | 1201/10845 [04:29<35:51,  4.48it/s, acc=0.703, epoch=18, loss=0.956]

epoch:18, idx:1199/10845, loss:0.9557244399189949, acc:0.7029166666666666


 12%|█▏        | 1301/10845 [04:51<34:09,  4.66it/s, acc=0.7, epoch=18, loss=0.956]  

epoch:18, idx:1299/10845, loss:0.9562421972018021, acc:0.7003846153846154


 13%|█▎        | 1401/10845 [05:13<34:45,  4.53it/s, acc=0.699, epoch=18, loss=0.958]

epoch:18, idx:1399/10845, loss:0.9579349031618664, acc:0.6992857142857143


 14%|█▍        | 1500/10845 [05:35<34:53,  4.46it/s, acc=0.701, epoch=18, loss=0.96] 

epoch:18, idx:1499/10845, loss:0.9600782950719198, acc:0.7005


 15%|█▍        | 1601/10845 [05:58<33:15,  4.63it/s, acc=0.698, epoch=18, loss=0.969]

epoch:18, idx:1599/10845, loss:0.9692465440928936, acc:0.69828125


 16%|█▌        | 1700/10845 [06:21<33:32,  4.54it/s, acc=0.697, epoch=18, loss=0.974]

epoch:18, idx:1699/10845, loss:0.9737665741934496, acc:0.6972058823529412


 17%|█▋        | 1800/10845 [06:44<32:20,  4.66it/s, acc=0.697, epoch=18, loss=0.976]

epoch:18, idx:1799/10845, loss:0.9762017624576886, acc:0.6970833333333334


 18%|█▊        | 1901/10845 [07:07<31:28,  4.74it/s, acc=0.698, epoch=18, loss=0.977]

epoch:18, idx:1899/10845, loss:0.9760934895277024, acc:0.6978947368421052


 18%|█▊        | 2000/10845 [07:28<33:30,  4.40it/s, acc=0.698, epoch=18, loss=0.974]

epoch:18, idx:1999/10845, loss:0.9738167307078839, acc:0.698375


 19%|█▉        | 2100/10845 [07:51<34:20,  4.24it/s, acc=0.698, epoch=18, loss=0.974]

epoch:18, idx:2099/10845, loss:0.9744669540439334, acc:0.6982142857142857


 20%|██        | 2201/10845 [08:14<33:22,  4.32it/s, acc=0.699, epoch=18, loss=0.973]

epoch:18, idx:2199/10845, loss:0.9733346314592795, acc:0.6988636363636364


 21%|██        | 2300/10845 [08:36<31:30,  4.52it/s, acc=0.699, epoch=18, loss=0.973]

epoch:18, idx:2299/10845, loss:0.9725886173611102, acc:0.6989130434782609


 22%|██▏       | 2400/10845 [08:59<32:25,  4.34it/s, acc=0.698, epoch=18, loss=0.973]

epoch:18, idx:2399/10845, loss:0.9730254812290271, acc:0.6984375


 23%|██▎       | 2500/10845 [09:22<32:47,  4.24it/s, acc=0.697, epoch=18, loss=0.977]

epoch:18, idx:2499/10845, loss:0.9772534492731094, acc:0.6966


 24%|██▍       | 2600/10845 [09:44<31:00,  4.43it/s, acc=0.696, epoch=18, loss=0.976]

epoch:18, idx:2599/10845, loss:0.9756656969739841, acc:0.6964423076923076


 25%|██▍       | 2701/10845 [10:07<29:44,  4.56it/s, acc=0.697, epoch=18, loss=0.975]

epoch:18, idx:2699/10845, loss:0.9755150514399564, acc:0.6969444444444445


 26%|██▌       | 2800/10845 [10:29<29:19,  4.57it/s, acc=0.697, epoch=18, loss=0.972]

epoch:18, idx:2799/10845, loss:0.9716610533211912, acc:0.6974107142857143


 27%|██▋       | 2900/10845 [10:51<29:37,  4.47it/s, acc=0.698, epoch=18, loss=0.973]

epoch:18, idx:2899/10845, loss:0.9725454050713572, acc:0.6978448275862069


 28%|██▊       | 3000/10845 [11:14<29:35,  4.42it/s, acc=0.698, epoch=18, loss=0.972]

epoch:18, idx:2999/10845, loss:0.9723954821228981, acc:0.69775


 29%|██▊       | 3100/10845 [11:36<28:27,  4.53it/s, acc=0.698, epoch=18, loss=0.973]

epoch:18, idx:3099/10845, loss:0.9729249819824772, acc:0.697983870967742


 30%|██▉       | 3200/10845 [11:59<29:13,  4.36it/s, acc=0.699, epoch=18, loss=0.968]

epoch:18, idx:3199/10845, loss:0.9678721412830055, acc:0.6990625


 30%|███       | 3300/10845 [12:21<27:59,  4.49it/s, acc=0.698, epoch=18, loss=0.971]

epoch:18, idx:3299/10845, loss:0.9705749041564536, acc:0.6978787878787879


 31%|███▏      | 3401/10845 [12:44<27:36,  4.49it/s, acc=0.697, epoch=18, loss=0.975]

epoch:18, idx:3399/10845, loss:0.975016356654027, acc:0.6968382352941176


 32%|███▏      | 3500/10845 [13:06<26:07,  4.68it/s, acc=0.696, epoch=18, loss=0.977]

epoch:18, idx:3499/10845, loss:0.9767264828511647, acc:0.6957142857142857


 33%|███▎      | 3600/10845 [13:28<26:46,  4.51it/s, acc=0.695, epoch=18, loss=0.977]

epoch:18, idx:3599/10845, loss:0.9774992270436552, acc:0.6948611111111112


 34%|███▍      | 3700/10845 [13:51<27:49,  4.28it/s, acc=0.695, epoch=18, loss=0.975]

epoch:18, idx:3699/10845, loss:0.9753140837276304, acc:0.6954054054054054


 35%|███▌      | 3801/10845 [14:14<26:54,  4.36it/s, acc=0.695, epoch=18, loss=0.977]

epoch:18, idx:3799/10845, loss:0.9767523636159144, acc:0.6948684210526316


 36%|███▌      | 3900/10845 [14:36<25:56,  4.46it/s, acc=0.695, epoch=18, loss=0.977]

epoch:18, idx:3899/10845, loss:0.9771614978099481, acc:0.6951282051282052


 37%|███▋      | 4000/10845 [14:59<26:06,  4.37it/s, acc=0.695, epoch=18, loss=0.978]

epoch:18, idx:3999/10845, loss:0.9777721368521451, acc:0.6945625


 38%|███▊      | 4101/10845 [15:22<24:43,  4.55it/s, acc=0.694, epoch=18, loss=0.979]

epoch:18, idx:4099/10845, loss:0.978496899110515, acc:0.694390243902439


 39%|███▊      | 4200/10845 [15:44<24:58,  4.44it/s, acc=0.694, epoch=18, loss=0.981]

epoch:18, idx:4199/10845, loss:0.9805623338619868, acc:0.694047619047619


 40%|███▉      | 4300/10845 [16:07<23:03,  4.73it/s, acc=0.694, epoch=18, loss=0.979]

epoch:18, idx:4299/10845, loss:0.9791905005310857, acc:0.694360465116279


 41%|████      | 4400/10845 [16:29<23:02,  4.66it/s, acc=0.695, epoch=18, loss=0.978]

epoch:18, idx:4399/10845, loss:0.9783052265102213, acc:0.6951136363636363


 42%|████▏     | 4501/10845 [16:52<23:13,  4.55it/s, acc=0.694, epoch=18, loss=0.98] 

epoch:18, idx:4499/10845, loss:0.9806083039575153, acc:0.6941666666666667


 42%|████▏     | 4600/10845 [17:14<22:59,  4.53it/s, acc=0.695, epoch=18, loss=0.98] 

epoch:18, idx:4599/10845, loss:0.9799416228351385, acc:0.6946195652173913


 43%|████▎     | 4700/10845 [17:36<21:50,  4.69it/s, acc=0.694, epoch=18, loss=0.984]

epoch:18, idx:4699/10845, loss:0.9841155210581232, acc:0.6938297872340425


 44%|████▍     | 4801/10845 [17:59<22:06,  4.56it/s, acc=0.694, epoch=18, loss=0.984]

epoch:18, idx:4799/10845, loss:0.9837025067086022, acc:0.69390625


 45%|████▌     | 4900/10845 [18:21<21:46,  4.55it/s, acc=0.694, epoch=18, loss=0.984]

epoch:18, idx:4899/10845, loss:0.9842879976909987, acc:0.6938775510204082


 46%|████▌     | 5000/10845 [18:43<20:58,  4.65it/s, acc=0.694, epoch=18, loss=0.985]

epoch:18, idx:4999/10845, loss:0.9849315711379051, acc:0.69395


 47%|████▋     | 5101/10845 [19:06<20:30,  4.67it/s, acc=0.693, epoch=18, loss=0.988]

epoch:18, idx:5099/10845, loss:0.9874665279598797, acc:0.6932352941176471


 48%|████▊     | 5200/10845 [19:28<21:54,  4.29it/s, acc=0.693, epoch=18, loss=0.988]

epoch:18, idx:5199/10845, loss:0.9875722304559671, acc:0.6932692307692307


 49%|████▉     | 5300/10845 [19:50<20:45,  4.45it/s, acc=0.693, epoch=18, loss=0.987]

epoch:18, idx:5299/10845, loss:0.9867410766965938, acc:0.6932075471698114


 50%|████▉     | 5400/10845 [20:12<20:48,  4.36it/s, acc=0.693, epoch=18, loss=0.986]

epoch:18, idx:5399/10845, loss:0.985773115809317, acc:0.6932870370370371


 51%|█████     | 5500/10845 [20:34<18:48,  4.74it/s, acc=0.693, epoch=18, loss=0.985]

epoch:18, idx:5499/10845, loss:0.9853992738181895, acc:0.6933636363636364


 52%|█████▏    | 5600/10845 [20:57<19:52,  4.40it/s, acc=0.693, epoch=18, loss=0.985]

epoch:18, idx:5599/10845, loss:0.9854052830274616, acc:0.6933035714285715


 53%|█████▎    | 5701/10845 [21:19<18:23,  4.66it/s, acc=0.693, epoch=18, loss=0.986]

epoch:18, idx:5699/10845, loss:0.9856619243559085, acc:0.6933333333333334


 53%|█████▎    | 5801/10845 [21:42<17:52,  4.71it/s, acc=0.693, epoch=18, loss=0.986]

epoch:18, idx:5799/10845, loss:0.9863679303485772, acc:0.6926724137931034


 54%|█████▍    | 5901/10845 [22:04<18:21,  4.49it/s, acc=0.693, epoch=18, loss=0.984]

epoch:18, idx:5899/10845, loss:0.9842569291692669, acc:0.693135593220339


 55%|█████▌    | 6001/10845 [22:26<17:06,  4.72it/s, acc=0.693, epoch=18, loss=0.985]

epoch:18, idx:5999/10845, loss:0.9852652149697145, acc:0.6934583333333333


 56%|█████▋    | 6101/10845 [22:49<17:00,  4.65it/s, acc=0.694, epoch=18, loss=0.984]

epoch:18, idx:6099/10845, loss:0.9843376548856986, acc:0.6935655737704918


 57%|█████▋    | 6201/10845 [23:11<16:34,  4.67it/s, acc=0.693, epoch=18, loss=0.987]

epoch:18, idx:6199/10845, loss:0.9869630846765733, acc:0.692741935483871


 58%|█████▊    | 6300/10845 [23:34<16:53,  4.49it/s, acc=0.693, epoch=18, loss=0.986]

epoch:18, idx:6299/10845, loss:0.9864959936387955, acc:0.6930952380952381


 59%|█████▉    | 6400/10845 [23:56<15:22,  4.82it/s, acc=0.693, epoch=18, loss=0.985]

epoch:18, idx:6399/10845, loss:0.9853422311041504, acc:0.6933984375


 60%|█████▉    | 6501/10845 [24:18<16:42,  4.33it/s, acc=0.694, epoch=18, loss=0.983]

epoch:18, idx:6499/10845, loss:0.9834845772798245, acc:0.6941538461538461


 61%|██████    | 6600/10845 [24:40<14:56,  4.74it/s, acc=0.694, epoch=18, loss=0.984]

epoch:18, idx:6599/10845, loss:0.9843671468622757, acc:0.6940151515151515


 62%|██████▏   | 6700/10845 [25:02<16:16,  4.24it/s, acc=0.694, epoch=18, loss=0.984]

epoch:18, idx:6699/10845, loss:0.9841030082151071, acc:0.6940298507462687


 63%|██████▎   | 6801/10845 [25:25<15:37,  4.31it/s, acc=0.694, epoch=18, loss=0.985]

epoch:18, idx:6799/10845, loss:0.9848415200061658, acc:0.6941544117647059


 64%|██████▎   | 6900/10845 [25:47<14:30,  4.53it/s, acc=0.694, epoch=18, loss=0.985]

epoch:18, idx:6899/10845, loss:0.9849252999260806, acc:0.6938768115942029


 65%|██████▍   | 7000/10845 [26:09<14:27,  4.43it/s, acc=0.694, epoch=18, loss=0.985]

epoch:18, idx:6999/10845, loss:0.9851693988953318, acc:0.6939642857142857


 65%|██████▌   | 7100/10845 [26:32<14:11,  4.40it/s, acc=0.694, epoch=18, loss=0.986]

epoch:18, idx:7099/10845, loss:0.9856655473860216, acc:0.6935915492957746


 66%|██████▋   | 7201/10845 [26:55<13:44,  4.42it/s, acc=0.693, epoch=18, loss=0.987]

epoch:18, idx:7199/10845, loss:0.9873410219864713, acc:0.6929861111111111


 67%|██████▋   | 7300/10845 [27:17<13:22,  4.42it/s, acc=0.693, epoch=18, loss=0.986]

epoch:18, idx:7299/10845, loss:0.9861130006917536, acc:0.6931164383561644


 68%|██████▊   | 7400/10845 [27:40<13:35,  4.22it/s, acc=0.693, epoch=18, loss=0.986]

epoch:18, idx:7399/10845, loss:0.9862467960489762, acc:0.6926351351351351


 69%|██████▉   | 7500/10845 [28:03<12:05,  4.61it/s, acc=0.693, epoch=18, loss=0.987]

epoch:18, idx:7499/10845, loss:0.9865942415475846, acc:0.6929


 70%|███████   | 7600/10845 [28:25<12:48,  4.22it/s, acc=0.693, epoch=18, loss=0.986]

epoch:18, idx:7599/10845, loss:0.9861591544668925, acc:0.6930263157894737


 71%|███████   | 7700/10845 [28:47<11:46,  4.45it/s, acc=0.693, epoch=18, loss=0.988]

epoch:18, idx:7699/10845, loss:0.9882598756273071, acc:0.6926623376623376


 72%|███████▏  | 7801/10845 [29:10<11:17,  4.50it/s, acc=0.692, epoch=18, loss=0.988]

epoch:18, idx:7799/10845, loss:0.988540651179277, acc:0.6923076923076923


 73%|███████▎  | 7900/10845 [29:32<11:13,  4.37it/s, acc=0.692, epoch=18, loss=0.989]

epoch:18, idx:7899/10845, loss:0.9890862000365801, acc:0.6920253164556962


 74%|███████▍  | 8000/10845 [29:54<10:34,  4.48it/s, acc=0.692, epoch=18, loss=0.989]

epoch:18, idx:7999/10845, loss:0.9885111035481096, acc:0.6921875


 75%|███████▍  | 8101/10845 [30:17<09:52,  4.63it/s, acc=0.692, epoch=18, loss=0.988]

epoch:18, idx:8099/10845, loss:0.988448640556983, acc:0.6920061728395062


 76%|███████▌  | 8201/10845 [30:39<10:06,  4.36it/s, acc=0.692, epoch=18, loss=0.989]

epoch:18, idx:8199/10845, loss:0.9884170070145188, acc:0.6915243902439024


 77%|███████▋  | 8301/10845 [31:02<09:12,  4.60it/s, acc=0.692, epoch=18, loss=0.988]

epoch:18, idx:8299/10845, loss:0.988203692328499, acc:0.6916867469879519


 77%|███████▋  | 8400/10845 [31:24<08:37,  4.72it/s, acc=0.691, epoch=18, loss=0.991]

epoch:18, idx:8399/10845, loss:0.9912212415891034, acc:0.6908333333333333


 78%|███████▊  | 8501/10845 [31:47<08:08,  4.80it/s, acc=0.69, epoch=18, loss=0.993] 

epoch:18, idx:8499/10845, loss:0.9930403756744721, acc:0.6904411764705882


 79%|███████▉  | 8600/10845 [32:09<08:15,  4.53it/s, acc=0.691, epoch=18, loss=0.992]

epoch:18, idx:8599/10845, loss:0.992237339886122, acc:0.6905232558139535


 80%|████████  | 8701/10845 [32:31<08:12,  4.35it/s, acc=0.691, epoch=18, loss=0.991]

epoch:18, idx:8699/10845, loss:0.9909181876360685, acc:0.6906321839080459


 81%|████████  | 8800/10845 [32:53<07:45,  4.39it/s, acc=0.69, epoch=18, loss=0.991] 

epoch:18, idx:8799/10845, loss:0.9914492767710578, acc:0.6903977272727273


 82%|████████▏ | 8900/10845 [33:16<07:30,  4.32it/s, acc=0.691, epoch=18, loss=0.989]

epoch:18, idx:8899/10845, loss:0.9890534585982226, acc:0.6909550561797753


 83%|████████▎ | 9000/10845 [33:38<07:10,  4.29it/s, acc=0.691, epoch=18, loss=0.989]

epoch:18, idx:8999/10845, loss:0.9888919982976384, acc:0.6911944444444444


 84%|████████▍ | 9101/10845 [34:00<06:26,  4.52it/s, acc=0.692, epoch=18, loss=0.987]

epoch:18, idx:9099/10845, loss:0.9869618727021165, acc:0.6916758241758242


 85%|████████▍ | 9200/10845 [34:22<06:25,  4.27it/s, acc=0.692, epoch=18, loss=0.986]

epoch:18, idx:9199/10845, loss:0.986388322043678, acc:0.6918478260869565


 86%|████████▌ | 9300/10845 [34:44<05:23,  4.78it/s, acc=0.692, epoch=18, loss=0.987]

epoch:18, idx:9299/10845, loss:0.9871112002416323, acc:0.6918279569892473


 87%|████████▋ | 9400/10845 [35:07<05:35,  4.31it/s, acc=0.692, epoch=18, loss=0.987]

epoch:18, idx:9399/10845, loss:0.9871680175180131, acc:0.691968085106383


 88%|████████▊ | 9500/10845 [35:30<05:02,  4.45it/s, acc=0.692, epoch=18, loss=0.986]

epoch:18, idx:9499/10845, loss:0.9859115869685223, acc:0.6921052631578948


 89%|████████▊ | 9601/10845 [35:53<04:25,  4.68it/s, acc=0.692, epoch=18, loss=0.987]

epoch:18, idx:9599/10845, loss:0.9874871896766126, acc:0.6917447916666667


 89%|████████▉ | 9701/10845 [36:15<04:07,  4.61it/s, acc=0.692, epoch=18, loss=0.988]

epoch:18, idx:9699/10845, loss:0.9877872429864922, acc:0.6915979381443299


 90%|█████████ | 9800/10845 [36:37<03:38,  4.78it/s, acc=0.692, epoch=18, loss=0.987]

epoch:18, idx:9799/10845, loss:0.9868720662289736, acc:0.6917602040816326


 91%|█████████▏| 9901/10845 [37:00<03:30,  4.48it/s, acc=0.692, epoch=18, loss=0.986]

epoch:18, idx:9899/10845, loss:0.9856525894186714, acc:0.6919444444444445


 92%|█████████▏| 10000/10845 [37:22<03:24,  4.13it/s, acc=0.692, epoch=18, loss=0.987]

epoch:18, idx:9999/10845, loss:0.9866200580060482, acc:0.69195


 93%|█████████▎| 10101/10845 [37:44<02:43,  4.55it/s, acc=0.692, epoch=18, loss=0.986]

epoch:18, idx:10099/10845, loss:0.9861578302985371, acc:0.6919801980198019


 94%|█████████▍| 10200/10845 [38:07<02:30,  4.28it/s, acc=0.692, epoch=18, loss=0.986]

epoch:18, idx:10199/10845, loss:0.9864418831000141, acc:0.6919117647058823


 95%|█████████▍| 10301/10845 [38:30<01:56,  4.67it/s, acc=0.692, epoch=18, loss=0.988]

epoch:18, idx:10299/10845, loss:0.9875059015831901, acc:0.6916504854368932


 96%|█████████▌| 10400/10845 [38:52<01:38,  4.51it/s, acc=0.691, epoch=18, loss=0.988]

epoch:18, idx:10399/10845, loss:0.9881977418924753, acc:0.6914903846153846


 97%|█████████▋| 10501/10845 [39:14<01:12,  4.75it/s, acc=0.691, epoch=18, loss=0.989]

epoch:18, idx:10499/10845, loss:0.9888269003062021, acc:0.6914523809523809


 98%|█████████▊| 10600/10845 [39:36<00:49,  4.91it/s, acc=0.692, epoch=18, loss=0.988]

epoch:18, idx:10599/10845, loss:0.98846029640931, acc:0.6916037735849057


 99%|█████████▊| 10700/10845 [39:59<00:32,  4.44it/s, acc=0.691, epoch=18, loss=0.99] 

epoch:18, idx:10699/10845, loss:0.9901052039193216, acc:0.691285046728972


100%|█████████▉| 10800/10845 [40:21<00:10,  4.29it/s, acc=0.692, epoch=18, loss=0.99] 

epoch:18, idx:10799/10845, loss:0.9897086839598638, acc:0.6916203703703704


100%|██████████| 10845/10845 [40:31<00:00,  4.54it/s, acc=0.692, epoch=18, loss=0.99]


epoch:18, idx:0/1275, loss:1.1971087455749512, acc:0.75
epoch:18, idx:100/1275, loss:1.4315383564127553, acc:0.6262376237623762
epoch:18, idx:200/1275, loss:1.2894177608822115, acc:0.6318407960199005
epoch:18, idx:300/1275, loss:1.242874026298523, acc:0.6470099667774086
epoch:18, idx:400/1275, loss:1.2373203125380519, acc:0.6496259351620948
epoch:18, idx:500/1275, loss:1.2183769598692478, acc:0.6482035928143712
epoch:18, idx:600/1275, loss:1.2394331965390935, acc:0.6435108153078203
epoch:18, idx:700/1275, loss:1.2389549030556999, acc:0.6437232524964337
epoch:18, idx:800/1275, loss:1.2627213626318656, acc:0.6401373283395755
epoch:18, idx:900/1275, loss:1.2513351710337512, acc:0.6409544950055494
epoch:18, idx:1000/1275, loss:1.2560597537876248, acc:0.6376123876123876
epoch:18, idx:1100/1275, loss:1.244636806436932, acc:0.6394187102633969
epoch:18, idx:1200/1275, loss:1.2382808946550736, acc:0.6353039134054954


  1%|          | 100/10845 [00:22<40:04,  4.47it/s, acc=0.698, epoch=19, loss=0.958]

epoch:19, idx:99/10845, loss:0.9581971490383148, acc:0.6975


  2%|▏         | 200/10845 [00:44<38:47,  4.57it/s, acc=0.703, epoch=19, loss=0.958]

epoch:19, idx:199/10845, loss:0.9577943903207778, acc:0.7025


  3%|▎         | 300/10845 [01:06<39:31,  4.45it/s, acc=0.706, epoch=19, loss=0.93] 

epoch:19, idx:299/10845, loss:0.9303625233968099, acc:0.7058333333333333


  4%|▎         | 400/10845 [01:28<39:38,  4.39it/s, acc=0.701, epoch=19, loss=0.931]

epoch:19, idx:399/10845, loss:0.9306974717974663, acc:0.70125


  5%|▍         | 500/10845 [01:50<36:13,  4.76it/s, acc=0.7, epoch=19, loss=0.926]  

epoch:19, idx:499/10845, loss:0.9263205969333649, acc:0.7


  6%|▌         | 600/10845 [02:12<39:17,  4.35it/s, acc=0.702, epoch=19, loss=0.924]

epoch:19, idx:599/10845, loss:0.924391798377037, acc:0.7020833333333333


  6%|▋         | 700/10845 [02:35<36:33,  4.63it/s, acc=0.703, epoch=19, loss=0.924]

epoch:19, idx:699/10845, loss:0.925415882723672, acc:0.7025


  7%|▋         | 800/10845 [02:56<35:37,  4.70it/s, acc=0.704, epoch=19, loss=0.922]

epoch:19, idx:799/10845, loss:0.9232950095832347, acc:0.70375


  8%|▊         | 900/10845 [03:19<37:01,  4.48it/s, acc=0.71, epoch=19, loss=0.918] 

epoch:19, idx:899/10845, loss:0.9175122311380175, acc:0.7097222222222223


  9%|▉         | 1000/10845 [03:41<35:34,  4.61it/s, acc=0.704, epoch=19, loss=0.929]

epoch:19, idx:999/10845, loss:0.9292820880413055, acc:0.70425


 10%|█         | 1100/10845 [04:03<38:05,  4.26it/s, acc=0.704, epoch=19, loss=0.937]

epoch:19, idx:1099/10845, loss:0.9380569206584584, acc:0.7036363636363636


 11%|█         | 1200/10845 [04:25<36:05,  4.45it/s, acc=0.705, epoch=19, loss=0.937]

epoch:19, idx:1199/10845, loss:0.9367359414696693, acc:0.7045833333333333


 12%|█▏        | 1301/10845 [04:47<35:16,  4.51it/s, acc=0.702, epoch=19, loss=0.944]

epoch:19, idx:1299/10845, loss:0.9447642369453724, acc:0.7019230769230769


 13%|█▎        | 1401/10845 [05:10<34:07,  4.61it/s, acc=0.702, epoch=19, loss=0.955]

epoch:19, idx:1399/10845, loss:0.9557611000537872, acc:0.7017857142857142


 14%|█▍        | 1500/10845 [05:32<35:17,  4.41it/s, acc=0.7, epoch=19, loss=0.956]  

epoch:19, idx:1499/10845, loss:0.955515412012736, acc:0.6998333333333333


 15%|█▍        | 1600/10845 [05:54<33:02,  4.66it/s, acc=0.699, epoch=19, loss=0.96] 

epoch:19, idx:1599/10845, loss:0.9600754424929618, acc:0.699375


 16%|█▌        | 1701/10845 [06:16<33:29,  4.55it/s, acc=0.701, epoch=19, loss=0.957]

epoch:19, idx:1699/10845, loss:0.9563908830109764, acc:0.7016176470588236


 17%|█▋        | 1801/10845 [06:38<32:21,  4.66it/s, acc=0.702, epoch=19, loss=0.957]

epoch:19, idx:1799/10845, loss:0.9564172580507067, acc:0.7023611111111111


 18%|█▊        | 1900/10845 [07:00<34:02,  4.38it/s, acc=0.703, epoch=19, loss=0.952]

epoch:19, idx:1899/10845, loss:0.9518448352813721, acc:0.7028947368421052


 18%|█▊        | 2001/10845 [07:22<32:04,  4.59it/s, acc=0.703, epoch=19, loss=0.954]

epoch:19, idx:1999/10845, loss:0.9538593401312828, acc:0.702875


 19%|█▉        | 2101/10845 [07:44<32:15,  4.52it/s, acc=0.702, epoch=19, loss=0.958]

epoch:19, idx:2099/10845, loss:0.9577086545172192, acc:0.7022619047619048


 20%|██        | 2200/10845 [08:06<32:13,  4.47it/s, acc=0.702, epoch=19, loss=0.96] 

epoch:19, idx:2199/10845, loss:0.9596855563467199, acc:0.7020454545454545


 21%|██        | 2300/10845 [08:28<31:56,  4.46it/s, acc=0.703, epoch=19, loss=0.959]

epoch:19, idx:2299/10845, loss:0.9586886244753132, acc:0.7025


 22%|██▏       | 2400/10845 [08:51<31:25,  4.48it/s, acc=0.704, epoch=19, loss=0.953]

epoch:19, idx:2399/10845, loss:0.953562299311161, acc:0.7036458333333333


 23%|██▎       | 2501/10845 [09:13<29:51,  4.66it/s, acc=0.703, epoch=19, loss=0.961]

epoch:19, idx:2499/10845, loss:0.9608457705974579, acc:0.7025


 24%|██▍       | 2600/10845 [09:35<31:16,  4.39it/s, acc=0.702, epoch=19, loss=0.963]

epoch:19, idx:2599/10845, loss:0.9628220405486914, acc:0.7021153846153846


 25%|██▍       | 2700/10845 [09:57<29:10,  4.65it/s, acc=0.702, epoch=19, loss=0.965]

epoch:19, idx:2699/10845, loss:0.9644717333051893, acc:0.7021296296296297


 26%|██▌       | 2800/10845 [10:19<30:19,  4.42it/s, acc=0.702, epoch=19, loss=0.965]

epoch:19, idx:2799/10845, loss:0.9648776243839945, acc:0.7023214285714285


 27%|██▋       | 2900/10845 [10:42<27:20,  4.84it/s, acc=0.702, epoch=19, loss=0.967]

epoch:19, idx:2899/10845, loss:0.9669371871290535, acc:0.7020689655172414


 28%|██▊       | 3000/10845 [11:04<28:43,  4.55it/s, acc=0.701, epoch=19, loss=0.971]

epoch:19, idx:2999/10845, loss:0.9706621684233347, acc:0.7009166666666666


 29%|██▊       | 3100/10845 [11:27<27:52,  4.63it/s, acc=0.701, epoch=19, loss=0.97] 

epoch:19, idx:3099/10845, loss:0.9704525753375023, acc:0.7006451612903226


 30%|██▉       | 3201/10845 [11:49<27:42,  4.60it/s, acc=0.7, epoch=19, loss=0.968]  

epoch:19, idx:3199/10845, loss:0.9683603423461318, acc:0.700390625


 30%|███       | 3300/10845 [12:11<28:23,  4.43it/s, acc=0.699, epoch=19, loss=0.971]

epoch:19, idx:3299/10845, loss:0.9708024634014476, acc:0.6992424242424242


 31%|███▏      | 3400/10845 [12:33<26:44,  4.64it/s, acc=0.699, epoch=19, loss=0.971]

epoch:19, idx:3399/10845, loss:0.9712497149144902, acc:0.6988235294117647


 32%|███▏      | 3501/10845 [12:55<27:09,  4.51it/s, acc=0.699, epoch=19, loss=0.97] 

epoch:19, idx:3499/10845, loss:0.9705164542198181, acc:0.6991428571428572


 33%|███▎      | 3600/10845 [13:17<27:27,  4.40it/s, acc=0.699, epoch=19, loss=0.97] 

epoch:19, idx:3599/10845, loss:0.9699528054396311, acc:0.699375


 34%|███▍      | 3700/10845 [13:39<26:54,  4.42it/s, acc=0.701, epoch=19, loss=0.965]

epoch:19, idx:3699/10845, loss:0.9651873401049021, acc:0.7008783783783784


 35%|███▌      | 3800/10845 [14:01<26:47,  4.38it/s, acc=0.701, epoch=19, loss=0.968]

epoch:19, idx:3799/10845, loss:0.9682373199651115, acc:0.7005921052631578


 36%|███▌      | 3900/10845 [14:23<26:52,  4.31it/s, acc=0.701, epoch=19, loss=0.966]

epoch:19, idx:3899/10845, loss:0.9660355374446282, acc:0.7008974358974359


 37%|███▋      | 4001/10845 [14:45<25:11,  4.53it/s, acc=0.701, epoch=19, loss=0.967]

epoch:19, idx:3999/10845, loss:0.9673888723552228, acc:0.700875


 38%|███▊      | 4101/10845 [15:07<25:20,  4.43it/s, acc=0.7, epoch=19, loss=0.97]   

epoch:19, idx:4099/10845, loss:0.9697301274683418, acc:0.6997560975609756


 39%|███▊      | 4201/10845 [15:29<22:38,  4.89it/s, acc=0.7, epoch=19, loss=0.971]  

epoch:19, idx:4199/10845, loss:0.9708231578838258, acc:0.6995833333333333


 40%|███▉      | 4300/10845 [15:51<22:50,  4.77it/s, acc=0.701, epoch=19, loss=0.969]

epoch:19, idx:4299/10845, loss:0.9686846123739731, acc:0.700813953488372


 41%|████      | 4400/10845 [16:14<22:03,  4.87it/s, acc=0.7, epoch=19, loss=0.971]  

epoch:19, idx:4399/10845, loss:0.9710753455758094, acc:0.6997159090909091


 41%|████▏     | 4500/10845 [16:36<22:45,  4.65it/s, acc=0.699, epoch=19, loss=0.973]

epoch:19, idx:4499/10845, loss:0.9726670461628172, acc:0.6991111111111111


 42%|████▏     | 4600/10845 [16:58<23:40,  4.40it/s, acc=0.698, epoch=19, loss=0.973]

epoch:19, idx:4599/10845, loss:0.9731795048324958, acc:0.6983152173913043


 43%|████▎     | 4701/10845 [17:21<21:32,  4.75it/s, acc=0.698, epoch=19, loss=0.975]

epoch:19, idx:4699/10845, loss:0.9747625720627764, acc:0.6978191489361703


 44%|████▍     | 4800/10845 [17:42<25:20,  3.98it/s, acc=0.698, epoch=19, loss=0.974]

epoch:19, idx:4799/10845, loss:0.9742699105168382, acc:0.6978645833333333


 45%|████▌     | 4901/10845 [18:05<20:56,  4.73it/s, acc=0.698, epoch=19, loss=0.974]

epoch:19, idx:4899/10845, loss:0.973826097894688, acc:0.6977040816326531


 46%|████▌     | 5001/10845 [18:27<21:04,  4.62it/s, acc=0.698, epoch=19, loss=0.973]

epoch:19, idx:4999/10845, loss:0.9728945137619972, acc:0.69805


 47%|████▋     | 5100/10845 [18:49<22:57,  4.17it/s, acc=0.698, epoch=19, loss=0.973]

epoch:19, idx:5099/10845, loss:0.972588872220002, acc:0.6976470588235294


 48%|████▊     | 5200/10845 [19:11<21:09,  4.45it/s, acc=0.697, epoch=19, loss=0.973]

epoch:19, idx:5199/10845, loss:0.972837132112338, acc:0.6973076923076923


 49%|████▉     | 5300/10845 [19:33<20:01,  4.62it/s, acc=0.697, epoch=19, loss=0.973]

epoch:19, idx:5299/10845, loss:0.9734627071079218, acc:0.6970283018867924


 50%|████▉     | 5400/10845 [19:56<20:31,  4.42it/s, acc=0.697, epoch=19, loss=0.974]

epoch:19, idx:5399/10845, loss:0.974187200985573, acc:0.6973148148148148


 51%|█████     | 5501/10845 [20:18<18:50,  4.73it/s, acc=0.698, epoch=19, loss=0.974]

epoch:19, idx:5499/10845, loss:0.9745020330385729, acc:0.6976818181818182


 52%|█████▏    | 5600/10845 [20:40<20:05,  4.35it/s, acc=0.698, epoch=19, loss=0.974]

epoch:19, idx:5599/10845, loss:0.9740858482675893, acc:0.6977232142857143


 53%|█████▎    | 5701/10845 [21:02<18:35,  4.61it/s, acc=0.698, epoch=19, loss=0.974]

epoch:19, idx:5699/10845, loss:0.9740613335266448, acc:0.697938596491228


 53%|█████▎    | 5800/10845 [21:24<18:02,  4.66it/s, acc=0.698, epoch=19, loss=0.973]

epoch:19, idx:5799/10845, loss:0.9729531791086855, acc:0.6984051724137931


 54%|█████▍    | 5901/10845 [21:46<17:07,  4.81it/s, acc=0.698, epoch=19, loss=0.973]

epoch:19, idx:5899/10845, loss:0.9732707954665362, acc:0.698135593220339


 55%|█████▌    | 6000/10845 [22:08<18:10,  4.44it/s, acc=0.699, epoch=19, loss=0.974]

epoch:19, idx:5999/10845, loss:0.9737382002870242, acc:0.6985


 56%|█████▌    | 6100/10845 [22:31<17:37,  4.49it/s, acc=0.698, epoch=19, loss=0.975]

epoch:19, idx:6099/10845, loss:0.9751443574663068, acc:0.698032786885246


 57%|█████▋    | 6201/10845 [22:53<16:34,  4.67it/s, acc=0.698, epoch=19, loss=0.974]

epoch:19, idx:6199/10845, loss:0.9738697598441954, acc:0.6983064516129033


 58%|█████▊    | 6300/10845 [23:16<17:46,  4.26it/s, acc=0.698, epoch=19, loss=0.974]

epoch:19, idx:6299/10845, loss:0.9744389857186212, acc:0.697936507936508


 59%|█████▉    | 6401/10845 [23:38<15:14,  4.86it/s, acc=0.698, epoch=19, loss=0.973]

epoch:19, idx:6399/10845, loss:0.9730096854269504, acc:0.698203125


 60%|█████▉    | 6500/10845 [24:00<16:31,  4.38it/s, acc=0.698, epoch=19, loss=0.973]

epoch:19, idx:6499/10845, loss:0.9726900456685287, acc:0.6979230769230769


 61%|██████    | 6600/10845 [24:22<16:06,  4.39it/s, acc=0.698, epoch=19, loss=0.973]

epoch:19, idx:6599/10845, loss:0.9725069769765391, acc:0.6981818181818182


 62%|██████▏   | 6700/10845 [24:44<16:03,  4.30it/s, acc=0.698, epoch=19, loss=0.972]

epoch:19, idx:6699/10845, loss:0.9721601998361189, acc:0.6980970149253731


 63%|██████▎   | 6800/10845 [25:07<14:47,  4.56it/s, acc=0.697, epoch=19, loss=0.974]

epoch:19, idx:6799/10845, loss:0.974315279345302, acc:0.6972058823529412


 64%|██████▎   | 6900/10845 [25:29<14:37,  4.49it/s, acc=0.697, epoch=19, loss=0.974]

epoch:19, idx:6899/10845, loss:0.9736239247823107, acc:0.6973188405797102


 65%|██████▍   | 7000/10845 [25:51<14:12,  4.51it/s, acc=0.697, epoch=19, loss=0.973]

epoch:19, idx:6999/10845, loss:0.9729169254899025, acc:0.6971071428571428


 65%|██████▌   | 7101/10845 [26:14<13:28,  4.63it/s, acc=0.697, epoch=19, loss=0.974]

epoch:19, idx:7099/10845, loss:0.9738521118315173, acc:0.6969014084507043


 66%|██████▋   | 7201/10845 [26:36<13:50,  4.39it/s, acc=0.697, epoch=19, loss=0.974]

epoch:19, idx:7199/10845, loss:0.9735251343995333, acc:0.6971180555555555


 67%|██████▋   | 7300/10845 [26:58<13:08,  4.49it/s, acc=0.697, epoch=19, loss=0.973]

epoch:19, idx:7299/10845, loss:0.9726510952841746, acc:0.6970890410958904


 68%|██████▊   | 7401/10845 [27:21<12:31,  4.58it/s, acc=0.697, epoch=19, loss=0.973]

epoch:19, idx:7399/10845, loss:0.9731205515362121, acc:0.6967905405405406


 69%|██████▉   | 7500/10845 [27:43<12:14,  4.56it/s, acc=0.696, epoch=19, loss=0.975]

epoch:19, idx:7499/10845, loss:0.9749672315835953, acc:0.6963


 70%|███████   | 7600/10845 [28:05<11:38,  4.65it/s, acc=0.696, epoch=19, loss=0.977]

epoch:19, idx:7599/10845, loss:0.9767566240069113, acc:0.695921052631579


 71%|███████   | 7701/10845 [28:28<11:01,  4.75it/s, acc=0.696, epoch=19, loss=0.977]

epoch:19, idx:7699/10845, loss:0.9770593302513098, acc:0.6957467532467533


 72%|███████▏  | 7801/10845 [28:50<10:44,  4.72it/s, acc=0.696, epoch=19, loss=0.977]

epoch:19, idx:7799/10845, loss:0.9767696082057097, acc:0.6956089743589744


 73%|███████▎  | 7900/10845 [29:12<11:05,  4.42it/s, acc=0.696, epoch=19, loss=0.978]

epoch:19, idx:7899/10845, loss:0.9776213702069053, acc:0.6955063291139241


 74%|███████▍  | 8000/10845 [29:34<10:17,  4.60it/s, acc=0.696, epoch=19, loss=0.976]

epoch:19, idx:7999/10845, loss:0.9757799343466759, acc:0.69584375


 75%|███████▍  | 8101/10845 [29:57<09:40,  4.73it/s, acc=0.696, epoch=19, loss=0.977]

epoch:19, idx:8099/10845, loss:0.9771204079080511, acc:0.695679012345679


 76%|███████▌  | 8200/10845 [30:19<09:19,  4.73it/s, acc=0.696, epoch=19, loss=0.977]

epoch:19, idx:8199/10845, loss:0.9770582196189136, acc:0.6957317073170731


 77%|███████▋  | 8300/10845 [30:41<09:10,  4.62it/s, acc=0.696, epoch=19, loss=0.976]

epoch:19, idx:8299/10845, loss:0.975886061019208, acc:0.6959337349397591


 77%|███████▋  | 8400/10845 [31:03<09:11,  4.43it/s, acc=0.696, epoch=19, loss=0.976]

epoch:19, idx:8399/10845, loss:0.9764574881252789, acc:0.6958333333333333


 78%|███████▊  | 8501/10845 [31:26<08:35,  4.55it/s, acc=0.695, epoch=19, loss=0.978]

epoch:19, idx:8499/10845, loss:0.9783879242504344, acc:0.6954411764705882


 79%|███████▉  | 8601/10845 [31:48<08:29,  4.40it/s, acc=0.695, epoch=19, loss=0.979]

epoch:19, idx:8599/10845, loss:0.9792081261928691, acc:0.6951453488372094


 80%|████████  | 8700/10845 [32:10<08:03,  4.43it/s, acc=0.695, epoch=19, loss=0.98] 

epoch:19, idx:8699/10845, loss:0.9795190165919819, acc:0.6951436781609195


 81%|████████  | 8800/10845 [32:32<07:39,  4.46it/s, acc=0.695, epoch=19, loss=0.98] 

epoch:19, idx:8799/10845, loss:0.9796788106045939, acc:0.6950284090909091


 82%|████████▏ | 8900/10845 [32:55<06:57,  4.65it/s, acc=0.695, epoch=19, loss=0.981]

epoch:19, idx:8899/10845, loss:0.980590082286449, acc:0.6949719101123596


 83%|████████▎ | 9001/10845 [33:17<06:33,  4.69it/s, acc=0.695, epoch=19, loss=0.98] 

epoch:19, idx:8999/10845, loss:0.9804835581382115, acc:0.6949166666666666


 84%|████████▍ | 9100/10845 [33:39<07:07,  4.08it/s, acc=0.695, epoch=19, loss=0.981]

epoch:19, idx:9099/10845, loss:0.9812883971287654, acc:0.6948901098901099


 85%|████████▍ | 9200/10845 [34:01<06:25,  4.27it/s, acc=0.695, epoch=19, loss=0.981]

epoch:19, idx:9199/10845, loss:0.9810169840506885, acc:0.6948369565217392


 86%|████████▌ | 9300/10845 [34:23<05:45,  4.48it/s, acc=0.695, epoch=19, loss=0.982]

epoch:19, idx:9299/10845, loss:0.9818240563972022, acc:0.6946236559139785


 87%|████████▋ | 9401/10845 [34:46<05:26,  4.42it/s, acc=0.695, epoch=19, loss=0.982]

epoch:19, idx:9399/10845, loss:0.9823897030886184, acc:0.6945212765957447


 88%|████████▊ | 9500/10845 [35:08<05:02,  4.44it/s, acc=0.695, epoch=19, loss=0.982]

epoch:19, idx:9499/10845, loss:0.9820527756841559, acc:0.6945789473684211


 89%|████████▊ | 9600/10845 [35:30<04:43,  4.39it/s, acc=0.694, epoch=19, loss=0.983]

epoch:19, idx:9599/10845, loss:0.9832340421775977, acc:0.694296875


 89%|████████▉ | 9700/10845 [35:52<04:27,  4.28it/s, acc=0.694, epoch=19, loss=0.984]

epoch:19, idx:9699/10845, loss:0.9835961951054248, acc:0.6942525773195877


 90%|█████████ | 9801/10845 [36:15<03:48,  4.57it/s, acc=0.694, epoch=19, loss=0.983]

epoch:19, idx:9799/10845, loss:0.9830567634713893, acc:0.6941836734693878


 91%|█████████▏| 9900/10845 [36:36<03:25,  4.61it/s, acc=0.694, epoch=19, loss=0.984]

epoch:19, idx:9899/10845, loss:0.9840323017703162, acc:0.6940151515151515


 92%|█████████▏| 10000/10845 [36:59<03:11,  4.41it/s, acc=0.694, epoch=19, loss=0.985]

epoch:19, idx:9999/10845, loss:0.984595619893074, acc:0.693925


 93%|█████████▎| 10100/10845 [37:21<02:46,  4.46it/s, acc=0.694, epoch=19, loss=0.985]

epoch:19, idx:10099/10845, loss:0.9849840234293796, acc:0.6936386138613861


 94%|█████████▍| 10200/10845 [37:43<02:18,  4.67it/s, acc=0.694, epoch=19, loss=0.986]

epoch:19, idx:10199/10845, loss:0.9856880508976824, acc:0.6936029411764706


 95%|█████████▍| 10301/10845 [38:05<01:53,  4.78it/s, acc=0.693, epoch=19, loss=0.987]

epoch:19, idx:10299/10845, loss:0.9866056042909622, acc:0.6931310679611651


 96%|█████████▌| 10400/10845 [38:27<01:35,  4.68it/s, acc=0.693, epoch=19, loss=0.986]

epoch:19, idx:10399/10845, loss:0.9863056671332855, acc:0.693173076923077


 97%|█████████▋| 10501/10845 [38:49<01:11,  4.81it/s, acc=0.693, epoch=19, loss=0.986]

epoch:19, idx:10499/10845, loss:0.986488782349087, acc:0.6928333333333333


 98%|█████████▊| 10600/10845 [39:12<00:54,  4.47it/s, acc=0.693, epoch=19, loss=0.988]

epoch:19, idx:10599/10845, loss:0.9879658456905833, acc:0.6925


 99%|█████████▊| 10700/10845 [39:34<00:32,  4.43it/s, acc=0.693, epoch=19, loss=0.988]

epoch:19, idx:10699/10845, loss:0.9878670223294017, acc:0.6925


100%|█████████▉| 10800/10845 [39:57<00:10,  4.16it/s, acc=0.693, epoch=19, loss=0.987]

epoch:19, idx:10799/10845, loss:0.9868339929095021, acc:0.6930092592592593


100%|██████████| 10845/10845 [40:07<00:00,  4.44it/s, acc=0.693, epoch=19, loss=0.987]


epoch:19, idx:0/1275, loss:1.2155952453613281, acc:0.75
epoch:19, idx:100/1275, loss:1.3786039399628591, acc:0.6188118811881188
epoch:19, idx:200/1275, loss:1.2561056370758892, acc:0.6467661691542289
epoch:19, idx:300/1275, loss:1.2238537215711272, acc:0.6569767441860465
epoch:19, idx:400/1275, loss:1.2308105185739417, acc:0.6571072319201995
epoch:19, idx:500/1275, loss:1.2025775790452482, acc:0.6596806387225549
epoch:19, idx:600/1275, loss:1.2121736610590321, acc:0.6534941763727121
epoch:19, idx:700/1275, loss:1.2203914228077453, acc:0.6512125534950072
epoch:19, idx:800/1275, loss:1.240351980545101, acc:0.647003745318352
epoch:19, idx:900/1275, loss:1.2282264412574049, acc:0.6490011098779135
epoch:19, idx:1000/1275, loss:1.2283789695916951, acc:0.6493506493506493
epoch:19, idx:1100/1275, loss:1.214760468289811, acc:0.6512261580381471
epoch:19, idx:1200/1275, loss:1.2071834839353157, acc:0.6465445462114904


  1%|          | 100/10845 [00:21<39:45,  4.50it/s, acc=0.765, epoch=20, loss=0.776]

epoch:20, idx:99/10845, loss:0.7760083246231079, acc:0.765


  2%|▏         | 200/10845 [00:44<38:04,  4.66it/s, acc=0.741, epoch=20, loss=0.82] 

epoch:20, idx:199/10845, loss:0.8200859946012496, acc:0.74125


  3%|▎         | 300/10845 [01:06<36:01,  4.88it/s, acc=0.726, epoch=20, loss=0.851]

epoch:20, idx:299/10845, loss:0.8491679489612579, acc:0.7266666666666667


  4%|▎         | 400/10845 [01:27<37:54,  4.59it/s, acc=0.73, epoch=20, loss=0.851] 

epoch:20, idx:399/10845, loss:0.8512504133582115, acc:0.73


  5%|▍         | 501/10845 [01:50<37:43,  4.57it/s, acc=0.73, epoch=20, loss=0.86]  

epoch:20, idx:499/10845, loss:0.8614214100837707, acc:0.729


  6%|▌         | 601/10845 [02:12<34:21,  4.97it/s, acc=0.732, epoch=20, loss=0.854]

epoch:20, idx:599/10845, loss:0.8538489663600921, acc:0.7316666666666667


  6%|▋         | 700/10845 [02:34<37:09,  4.55it/s, acc=0.729, epoch=20, loss=0.864]

epoch:20, idx:699/10845, loss:0.8640114980084556, acc:0.7292857142857143


  7%|▋         | 801/10845 [02:56<38:54,  4.30it/s, acc=0.728, epoch=20, loss=0.876]

epoch:20, idx:799/10845, loss:0.8768955527245998, acc:0.7278125


  8%|▊         | 900/10845 [03:18<38:05,  4.35it/s, acc=0.726, epoch=20, loss=0.889]

epoch:20, idx:899/10845, loss:0.8894501003954146, acc:0.7258333333333333


  9%|▉         | 1000/10845 [03:41<36:55,  4.44it/s, acc=0.724, epoch=20, loss=0.894]

epoch:20, idx:999/10845, loss:0.8940868357419968, acc:0.72375


 10%|█         | 1100/10845 [04:03<38:31,  4.22it/s, acc=0.718, epoch=20, loss=0.907]

epoch:20, idx:1099/10845, loss:0.9073068033565175, acc:0.7184090909090909


 11%|█         | 1200/10845 [04:25<34:32,  4.65it/s, acc=0.718, epoch=20, loss=0.913]

epoch:20, idx:1199/10845, loss:0.9133707413077354, acc:0.7175


 12%|█▏        | 1301/10845 [04:48<34:31,  4.61it/s, acc=0.713, epoch=20, loss=0.928]

epoch:20, idx:1299/10845, loss:0.9266986538813664, acc:0.7132692307692308


 13%|█▎        | 1400/10845 [05:10<34:49,  4.52it/s, acc=0.711, epoch=20, loss=0.932]

epoch:20, idx:1399/10845, loss:0.9317288514545986, acc:0.7105357142857143


 14%|█▍        | 1500/10845 [05:32<33:09,  4.70it/s, acc=0.711, epoch=20, loss=0.929]

epoch:20, idx:1499/10845, loss:0.9285244388580323, acc:0.7108333333333333


 15%|█▍        | 1600/10845 [05:54<34:20,  4.49it/s, acc=0.708, epoch=20, loss=0.931]

epoch:20, idx:1599/10845, loss:0.9313952541351318, acc:0.708125


 16%|█▌        | 1700/10845 [06:16<32:53,  4.63it/s, acc=0.706, epoch=20, loss=0.938]

epoch:20, idx:1699/10845, loss:0.9378677443897023, acc:0.705735294117647


 17%|█▋        | 1800/10845 [06:38<33:03,  4.56it/s, acc=0.705, epoch=20, loss=0.941]

epoch:20, idx:1799/10845, loss:0.9411531329154968, acc:0.705


 18%|█▊        | 1900/10845 [07:00<32:07,  4.64it/s, acc=0.705, epoch=20, loss=0.942]

epoch:20, idx:1899/10845, loss:0.9422140814128674, acc:0.7052631578947368


 18%|█▊        | 2000/10845 [07:23<32:43,  4.51it/s, acc=0.707, epoch=20, loss=0.938]

epoch:20, idx:1999/10845, loss:0.9378800912499428, acc:0.706625


 19%|█▉        | 2100/10845 [07:45<33:05,  4.40it/s, acc=0.707, epoch=20, loss=0.933]

epoch:20, idx:2099/10845, loss:0.9332510949884142, acc:0.7073809523809523


 20%|██        | 2200/10845 [08:07<30:43,  4.69it/s, acc=0.706, epoch=20, loss=0.935]

epoch:20, idx:2199/10845, loss:0.9348334941538897, acc:0.7064772727272727


 21%|██        | 2301/10845 [08:30<31:55,  4.46it/s, acc=0.706, epoch=20, loss=0.941]

epoch:20, idx:2299/10845, loss:0.9410670681621718, acc:0.7058695652173913


 22%|██▏       | 2401/10845 [08:52<30:31,  4.61it/s, acc=0.706, epoch=20, loss=0.94] 

epoch:20, idx:2399/10845, loss:0.9403424463669459, acc:0.7059375


 23%|██▎       | 2501/10845 [09:14<30:47,  4.52it/s, acc=0.708, epoch=20, loss=0.934]

epoch:20, idx:2499/10845, loss:0.9335253131389618, acc:0.7084


 24%|██▍       | 2601/10845 [09:37<31:15,  4.39it/s, acc=0.708, epoch=20, loss=0.943]

epoch:20, idx:2599/10845, loss:0.9424160480499267, acc:0.7077884615384615


 25%|██▍       | 2701/10845 [10:00<29:04,  4.67it/s, acc=0.706, epoch=20, loss=0.947]

epoch:20, idx:2699/10845, loss:0.9472995225146965, acc:0.7063888888888888


 26%|██▌       | 2800/10845 [10:22<29:27,  4.55it/s, acc=0.706, epoch=20, loss=0.947]

epoch:20, idx:2799/10845, loss:0.9465130081347056, acc:0.7061607142857143


 27%|██▋       | 2900/10845 [10:44<31:27,  4.21it/s, acc=0.705, epoch=20, loss=0.949]

epoch:20, idx:2899/10845, loss:0.9490173926024601, acc:0.7049137931034483


 28%|██▊       | 3000/10845 [11:06<28:05,  4.65it/s, acc=0.705, epoch=20, loss=0.95] 

epoch:20, idx:2999/10845, loss:0.9503193473815919, acc:0.7045


 29%|██▊       | 3100/10845 [11:28<30:27,  4.24it/s, acc=0.704, epoch=20, loss=0.951]

epoch:20, idx:3099/10845, loss:0.9506472408771515, acc:0.7041935483870968


 30%|██▉       | 3201/10845 [11:51<28:31,  4.46it/s, acc=0.704, epoch=20, loss=0.951]

epoch:20, idx:3199/10845, loss:0.9508923037350178, acc:0.70421875


 30%|███       | 3300/10845 [12:13<27:41,  4.54it/s, acc=0.704, epoch=20, loss=0.949]

epoch:20, idx:3299/10845, loss:0.9485976817752376, acc:0.7042424242424242


 31%|███▏      | 3401/10845 [12:35<26:51,  4.62it/s, acc=0.704, epoch=20, loss=0.95] 

epoch:20, idx:3399/10845, loss:0.949362383519902, acc:0.7038970588235294


 32%|███▏      | 3500/10845 [12:57<27:47,  4.40it/s, acc=0.703, epoch=20, loss=0.95] 

epoch:20, idx:3499/10845, loss:0.949764957019261, acc:0.7032857142857143


 33%|███▎      | 3601/10845 [13:19<26:35,  4.54it/s, acc=0.703, epoch=20, loss=0.95] 

epoch:20, idx:3599/10845, loss:0.9504862568444676, acc:0.7032638888888889


 34%|███▍      | 3700/10845 [13:42<27:58,  4.26it/s, acc=0.703, epoch=20, loss=0.952]

epoch:20, idx:3699/10845, loss:0.9518138971522048, acc:0.7029054054054054


 35%|███▌      | 3801/10845 [14:04<28:24,  4.13it/s, acc=0.703, epoch=20, loss=0.95] 

epoch:20, idx:3799/10845, loss:0.9500838945413891, acc:0.703421052631579


 36%|███▌      | 3901/10845 [14:26<24:40,  4.69it/s, acc=0.703, epoch=20, loss=0.951]

epoch:20, idx:3899/10845, loss:0.9508817927959637, acc:0.703076923076923


 37%|███▋      | 4000/10845 [14:48<24:38,  4.63it/s, acc=0.704, epoch=20, loss=0.949]

epoch:20, idx:3999/10845, loss:0.9491371481716633, acc:0.7035625


 38%|███▊      | 4100/10845 [15:10<25:42,  4.37it/s, acc=0.703, epoch=20, loss=0.953]

epoch:20, idx:4099/10845, loss:0.9531203570017001, acc:0.7033536585365854


 39%|███▊      | 4200/10845 [15:33<25:19,  4.37it/s, acc=0.703, epoch=20, loss=0.954]

epoch:20, idx:4199/10845, loss:0.953995962767374, acc:0.7028571428571428


 40%|███▉      | 4300/10845 [15:55<24:29,  4.45it/s, acc=0.703, epoch=20, loss=0.956]

epoch:20, idx:4299/10845, loss:0.9559048589024433, acc:0.7029651162790698


 41%|████      | 4400/10845 [16:18<24:38,  4.36it/s, acc=0.703, epoch=20, loss=0.955]

epoch:20, idx:4399/10845, loss:0.9552341798625209, acc:0.7031818181818181


 41%|████▏     | 4500/10845 [16:40<22:51,  4.63it/s, acc=0.703, epoch=20, loss=0.955]

epoch:20, idx:4499/10845, loss:0.9548450419505438, acc:0.7026111111111111


 42%|████▏     | 4600/10845 [17:02<24:08,  4.31it/s, acc=0.703, epoch=20, loss=0.955]

epoch:20, idx:4599/10845, loss:0.955191075037355, acc:0.7029347826086957


 43%|████▎     | 4701/10845 [17:25<22:49,  4.49it/s, acc=0.703, epoch=20, loss=0.955]

epoch:20, idx:4699/10845, loss:0.9544118704313928, acc:0.703031914893617


 44%|████▍     | 4801/10845 [17:47<21:42,  4.64it/s, acc=0.702, epoch=20, loss=0.957]

epoch:20, idx:4799/10845, loss:0.9575559776897232, acc:0.7021354166666667


 45%|████▌     | 4901/10845 [18:09<21:41,  4.57it/s, acc=0.702, epoch=20, loss=0.957]

epoch:20, idx:4899/10845, loss:0.9570200418331185, acc:0.7016326530612245


 46%|████▌     | 5001/10845 [18:31<20:45,  4.69it/s, acc=0.701, epoch=20, loss=0.957]

epoch:20, idx:4999/10845, loss:0.9575151585221291, acc:0.70145


 47%|████▋     | 5101/10845 [18:54<20:44,  4.62it/s, acc=0.701, epoch=20, loss=0.959]

epoch:20, idx:5099/10845, loss:0.9591442252257291, acc:0.7008333333333333


 48%|████▊     | 5201/10845 [19:16<20:28,  4.60it/s, acc=0.701, epoch=20, loss=0.959]

epoch:20, idx:5199/10845, loss:0.95876444918605, acc:0.7013942307692308


 49%|████▉     | 5300/10845 [19:38<20:53,  4.42it/s, acc=0.701, epoch=20, loss=0.962]

epoch:20, idx:5299/10845, loss:0.9618090728656301, acc:0.7006132075471698


 50%|████▉     | 5401/10845 [20:01<20:08,  4.50it/s, acc=0.701, epoch=20, loss=0.959]

epoch:20, idx:5399/10845, loss:0.9591472998923726, acc:0.7008333333333333


 51%|█████     | 5501/10845 [20:23<19:14,  4.63it/s, acc=0.701, epoch=20, loss=0.958]

epoch:20, idx:5499/10845, loss:0.9580216339718212, acc:0.7012727272727273


 52%|█████▏    | 5600/10845 [20:45<19:58,  4.38it/s, acc=0.701, epoch=20, loss=0.957]

epoch:20, idx:5599/10845, loss:0.9573333254669394, acc:0.7013839285714286


 53%|█████▎    | 5700/10845 [21:07<18:32,  4.62it/s, acc=0.702, epoch=20, loss=0.956]

epoch:20, idx:5699/10845, loss:0.9562291451504356, acc:0.7016228070175439


 53%|█████▎    | 5800/10845 [21:29<19:40,  4.27it/s, acc=0.701, epoch=20, loss=0.958]

epoch:20, idx:5799/10845, loss:0.9576157865853145, acc:0.7013793103448276


 54%|█████▍    | 5901/10845 [21:52<17:58,  4.59it/s, acc=0.701, epoch=20, loss=0.959]

epoch:20, idx:5899/10845, loss:0.9591184814299567, acc:0.7008898305084745


 55%|█████▌    | 6001/10845 [22:14<17:07,  4.71it/s, acc=0.701, epoch=20, loss=0.959]

epoch:20, idx:5999/10845, loss:0.9591638341546058, acc:0.7009166666666666


 56%|█████▋    | 6101/10845 [22:36<17:18,  4.57it/s, acc=0.701, epoch=20, loss=0.96] 

epoch:20, idx:6099/10845, loss:0.9602855931540004, acc:0.7005327868852459


 57%|█████▋    | 6200/10845 [22:58<16:05,  4.81it/s, acc=0.701, epoch=20, loss=0.96] 

epoch:20, idx:6199/10845, loss:0.9597810907133164, acc:0.7005241935483871


 58%|█████▊    | 6300/10845 [23:20<17:18,  4.38it/s, acc=0.7, epoch=20, loss=0.961] 

epoch:20, idx:6299/10845, loss:0.9610823297689831, acc:0.7002777777777778


 59%|█████▉    | 6400/10845 [23:42<15:57,  4.64it/s, acc=0.7, epoch=20, loss=0.96]  

epoch:20, idx:6399/10845, loss:0.9602162103541195, acc:0.70046875


 60%|█████▉    | 6500/10845 [24:04<16:03,  4.51it/s, acc=0.701, epoch=20, loss=0.959]

epoch:20, idx:6499/10845, loss:0.9587278481079982, acc:0.7008846153846154


 61%|██████    | 6600/10845 [24:26<16:20,  4.33it/s, acc=0.7, epoch=20, loss=0.96]   

epoch:20, idx:6599/10845, loss:0.9597446022972916, acc:0.7003409090909091


 62%|██████▏   | 6701/10845 [24:49<15:31,  4.45it/s, acc=0.7, epoch=20, loss=0.96] 

epoch:20, idx:6699/10845, loss:0.9605445608363222, acc:0.7002238805970149


 63%|██████▎   | 6801/10845 [25:11<15:32,  4.34it/s, acc=0.7, epoch=20, loss=0.96] 

epoch:20, idx:6799/10845, loss:0.9605368265246643, acc:0.7002573529411765


 64%|██████▎   | 6901/10845 [25:33<14:45,  4.45it/s, acc=0.7, epoch=20, loss=0.961]

epoch:20, idx:6899/10845, loss:0.9607594160325286, acc:0.7002173913043478


 65%|██████▍   | 7000/10845 [25:56<14:47,  4.33it/s, acc=0.7, epoch=20, loss=0.961]

epoch:20, idx:6999/10845, loss:0.9606933265328407, acc:0.70025


 65%|██████▌   | 7101/10845 [26:18<13:23,  4.66it/s, acc=0.7, epoch=20, loss=0.961]  

epoch:20, idx:7099/10845, loss:0.9607592787960886, acc:0.7004577464788733


 66%|██████▋   | 7201/10845 [26:40<13:01,  4.66it/s, acc=0.7, epoch=20, loss=0.961]  

epoch:20, idx:7199/10845, loss:0.9607961942338281, acc:0.7001388888888889


 67%|██████▋   | 7300/10845 [27:02<13:36,  4.34it/s, acc=0.7, epoch=20, loss=0.961]

epoch:20, idx:7299/10845, loss:0.9608320152677902, acc:0.7001712328767123


 68%|██████▊   | 7400/10845 [27:24<12:50,  4.47it/s, acc=0.7, epoch=20, loss=0.96]  

epoch:20, idx:7399/10845, loss:0.9603602457610336, acc:0.7002702702702702


 69%|██████▉   | 7501/10845 [27:47<11:25,  4.88it/s, acc=0.7, epoch=20, loss=0.96]   

epoch:20, idx:7499/10845, loss:0.9598877689441045, acc:0.7000333333333333


 70%|███████   | 7600/10845 [28:09<11:27,  4.72it/s, acc=0.7, epoch=20, loss=0.959]

epoch:20, idx:7599/10845, loss:0.959393999207961, acc:0.7001973684210526


 71%|███████   | 7700/10845 [28:31<11:46,  4.45it/s, acc=0.7, epoch=20, loss=0.959]

epoch:20, idx:7699/10845, loss:0.9592193598638882, acc:0.7003896103896103


 72%|███████▏  | 7800/10845 [28:54<11:06,  4.57it/s, acc=0.7, epoch=20, loss=0.96] 

epoch:20, idx:7799/10845, loss:0.9599588926098286, acc:0.7000641025641026


 73%|███████▎  | 7900/10845 [29:16<10:46,  4.55it/s, acc=0.7, epoch=20, loss=0.96] 

epoch:20, idx:7899/10845, loss:0.9600248967318595, acc:0.7000632911392405


 74%|███████▍  | 8001/10845 [29:38<09:47,  4.84it/s, acc=0.7, epoch=20, loss=0.96] 

epoch:20, idx:7999/10845, loss:0.9597007187828421, acc:0.700125


 75%|███████▍  | 8101/10845 [30:01<09:49,  4.65it/s, acc=0.699, epoch=20, loss=0.963]

epoch:20, idx:8099/10845, loss:0.9627076655626297, acc:0.6993518518518519


 76%|███████▌  | 8201/10845 [30:23<09:42,  4.54it/s, acc=0.699, epoch=20, loss=0.963]

epoch:20, idx:8199/10845, loss:0.9630608160248617, acc:0.6993292682926829


 77%|███████▋  | 8300/10845 [30:45<08:55,  4.76it/s, acc=0.699, epoch=20, loss=0.964]

epoch:20, idx:8299/10845, loss:0.9641821556708899, acc:0.6992469879518072


 77%|███████▋  | 8400/10845 [31:07<08:40,  4.70it/s, acc=0.699, epoch=20, loss=0.964]

epoch:20, idx:8399/10845, loss:0.9640300742714178, acc:0.6993154761904762


 78%|███████▊  | 8501/10845 [31:30<08:53,  4.39it/s, acc=0.699, epoch=20, loss=0.964]

epoch:20, idx:8499/10845, loss:0.9640332289934158, acc:0.6993823529411765


 79%|███████▉  | 8601/10845 [31:51<08:08,  4.60it/s, acc=0.699, epoch=20, loss=0.964]

epoch:20, idx:8599/10845, loss:0.9641336941788363, acc:0.6989244186046512


 80%|████████  | 8700/10845 [32:13<08:08,  4.39it/s, acc=0.699, epoch=20, loss=0.964]

epoch:20, idx:8699/10845, loss:0.9638944212664133, acc:0.6990229885057472


 81%|████████  | 8800/10845 [32:36<07:51,  4.34it/s, acc=0.699, epoch=20, loss=0.964]

epoch:20, idx:8799/10845, loss:0.9642529445683414, acc:0.6990340909090909


 82%|████████▏ | 8901/10845 [32:59<06:52,  4.72it/s, acc=0.699, epoch=20, loss=0.963]

epoch:20, idx:8899/10845, loss:0.9632838578639406, acc:0.6992977528089888


 83%|████████▎ | 9000/10845 [33:21<07:07,  4.32it/s, acc=0.699, epoch=20, loss=0.963]

epoch:20, idx:8999/10845, loss:0.963126598086622, acc:0.6992777777777778


 84%|████████▍ | 9100/10845 [33:43<05:54,  4.92it/s, acc=0.7, epoch=20, loss=0.962]  

epoch:20, idx:9099/10845, loss:0.9620357584363811, acc:0.6996428571428571


 85%|████████▍ | 9200/10845 [34:05<05:56,  4.62it/s, acc=0.699, epoch=20, loss=0.963]

epoch:20, idx:9199/10845, loss:0.9630567993094091, acc:0.6994565217391304


 86%|████████▌ | 9300/10845 [34:27<05:59,  4.29it/s, acc=0.699, epoch=20, loss=0.964]

epoch:20, idx:9299/10845, loss:0.9641952677247345, acc:0.6991666666666667


 87%|████████▋ | 9400/10845 [34:50<05:29,  4.39it/s, acc=0.699, epoch=20, loss=0.964]

epoch:20, idx:9399/10845, loss:0.9640643830565696, acc:0.6991223404255319


 88%|████████▊ | 9500/10845 [35:12<05:13,  4.29it/s, acc=0.699, epoch=20, loss=0.964]

epoch:20, idx:9499/10845, loss:0.964058180520409, acc:0.6991052631578948


 89%|████████▊ | 9600/10845 [35:35<04:28,  4.64it/s, acc=0.699, epoch=20, loss=0.965]

epoch:20, idx:9599/10845, loss:0.9645850925023357, acc:0.6991145833333333


 89%|████████▉ | 9700/10845 [35:57<04:29,  4.25it/s, acc=0.699, epoch=20, loss=0.965]

epoch:20, idx:9699/10845, loss:0.9646731083048987, acc:0.6990463917525773


 90%|█████████ | 9800/10845 [36:19<03:55,  4.43it/s, acc=0.699, epoch=20, loss=0.965]

epoch:20, idx:9799/10845, loss:0.9647204619281146, acc:0.6991836734693877


 91%|█████████▏| 9900/10845 [36:41<03:25,  4.60it/s, acc=0.699, epoch=20, loss=0.967]

epoch:20, idx:9899/10845, loss:0.9665534891985884, acc:0.6988636363636364


 92%|█████████▏| 10000/10845 [37:03<03:05,  4.56it/s, acc=0.699, epoch=20, loss=0.966]

epoch:20, idx:9999/10845, loss:0.9663841657280922, acc:0.698625


 93%|█████████▎| 10101/10845 [37:26<02:46,  4.48it/s, acc=0.699, epoch=20, loss=0.967]

epoch:20, idx:10099/10845, loss:0.9670081345633705, acc:0.6986138613861386


 94%|█████████▍| 10201/10845 [37:48<02:22,  4.51it/s, acc=0.699, epoch=20, loss=0.967]

epoch:20, idx:10199/10845, loss:0.9668719834556767, acc:0.6986029411764706


 95%|█████████▍| 10300/10845 [38:10<01:57,  4.65it/s, acc=0.699, epoch=20, loss=0.967]

epoch:20, idx:10299/10845, loss:0.9666201590855145, acc:0.6986165048543689


 96%|█████████▌| 10401/10845 [38:33<01:39,  4.48it/s, acc=0.698, epoch=20, loss=0.968]

epoch:20, idx:10399/10845, loss:0.9679477393569854, acc:0.698173076923077


 97%|█████████▋| 10501/10845 [38:55<01:18,  4.36it/s, acc=0.698, epoch=20, loss=0.968]

epoch:20, idx:10499/10845, loss:0.9678066805771419, acc:0.6981904761904761


 98%|█████████▊| 10600/10845 [39:17<00:52,  4.63it/s, acc=0.698, epoch=20, loss=0.968]

epoch:20, idx:10599/10845, loss:0.968220675508931, acc:0.6980660377358491


 99%|█████████▊| 10700/10845 [39:39<00:31,  4.58it/s, acc=0.698, epoch=20, loss=0.969]

epoch:20, idx:10699/10845, loss:0.9687519239599459, acc:0.6982242990654206


100%|█████████▉| 10800/10845 [40:02<00:09,  4.65it/s, acc=0.698, epoch=20, loss=0.969]

epoch:20, idx:10799/10845, loss:0.9688608030809297, acc:0.6980787037037037


100%|██████████| 10845/10845 [40:12<00:00,  4.71it/s, acc=0.698, epoch=20, loss=0.969]


epoch:20, idx:0/1275, loss:1.312317132949829, acc:0.5
epoch:20, idx:100/1275, loss:1.3460183497702722, acc:0.6212871287128713
epoch:20, idx:200/1275, loss:1.2640809656968757, acc:0.6417910447761194
epoch:20, idx:300/1275, loss:1.217012892529814, acc:0.6511627906976745
epoch:20, idx:400/1275, loss:1.2162255884108697, acc:0.6514962593516209
epoch:20, idx:500/1275, loss:1.19829112993267, acc:0.6497005988023952
epoch:20, idx:600/1275, loss:1.211397970774011, acc:0.6447587354409318
epoch:20, idx:700/1275, loss:1.2179068644954882, acc:0.644793152639087
epoch:20, idx:800/1275, loss:1.2357463217555509, acc:0.6426342072409488
epoch:20, idx:900/1275, loss:1.221441132246985, acc:0.6431742508324084
epoch:20, idx:1000/1275, loss:1.226997543762733, acc:0.6398601398601399
epoch:20, idx:1100/1275, loss:1.2154887151328355, acc:0.6416893732970027
epoch:20, idx:1200/1275, loss:1.2110140094352106, acc:0.638218151540383


  1%|          | 100/10845 [00:22<38:39,  4.63it/s, acc=0.713, epoch=21, loss=0.885]

epoch:21, idx:99/10845, loss:0.8849712038040161, acc:0.7125


  2%|▏         | 200/10845 [00:45<44:56,  3.95it/s, acc=0.719, epoch=21, loss=0.87] 

epoch:21, idx:199/10845, loss:0.8700363689661026, acc:0.71875


  3%|▎         | 300/10845 [01:07<38:57,  4.51it/s, acc=0.701, epoch=21, loss=0.937]

epoch:21, idx:299/10845, loss:0.9368745406468709, acc:0.7008333333333333


  4%|▎         | 401/10845 [01:29<36:44,  4.74it/s, acc=0.697, epoch=21, loss=0.966]

epoch:21, idx:399/10845, loss:0.9661300879716873, acc:0.696875


  5%|▍         | 501/10845 [01:51<37:06,  4.65it/s, acc=0.698, epoch=21, loss=0.954]

epoch:21, idx:499/10845, loss:0.9475458805561066, acc:0.699


  6%|▌         | 600/10845 [02:13<41:02,  4.16it/s, acc=0.701, epoch=21, loss=0.952]

epoch:21, idx:599/10845, loss:0.9517747868100802, acc:0.7008333333333333


  6%|▋         | 700/10845 [02:35<35:07,  4.81it/s, acc=0.697, epoch=21, loss=0.957]

epoch:21, idx:699/10845, loss:0.9569994585003172, acc:0.6971428571428572


  7%|▋         | 800/10845 [02:58<40:43,  4.11it/s, acc=0.694, epoch=21, loss=0.963]

epoch:21, idx:799/10845, loss:0.9629206185787916, acc:0.69375


  8%|▊         | 900/10845 [03:20<36:33,  4.53it/s, acc=0.698, epoch=21, loss=0.962]

epoch:21, idx:899/10845, loss:0.9615133510033289, acc:0.6975


  9%|▉         | 1000/10845 [03:43<36:28,  4.50it/s, acc=0.699, epoch=21, loss=0.963]

epoch:21, idx:999/10845, loss:0.9634181995987892, acc:0.699


 10%|█         | 1100/10845 [04:05<35:51,  4.53it/s, acc=0.701, epoch=21, loss=0.959]

epoch:21, idx:1099/10845, loss:0.958512524745681, acc:0.7011363636363637


 11%|█         | 1200/10845 [04:26<35:55,  4.47it/s, acc=0.701, epoch=21, loss=0.958]

epoch:21, idx:1199/10845, loss:0.9583197300632794, acc:0.7010416666666667


 12%|█▏        | 1301/10845 [04:49<36:46,  4.33it/s, acc=0.705, epoch=21, loss=0.947]

epoch:21, idx:1299/10845, loss:0.946976795563331, acc:0.7046153846153846


 13%|█▎        | 1400/10845 [05:11<35:08,  4.48it/s, acc=0.704, epoch=21, loss=0.951]

epoch:21, idx:1399/10845, loss:0.9506770370687757, acc:0.7044642857142858


 14%|█▍        | 1500/10845 [05:33<33:56,  4.59it/s, acc=0.706, epoch=21, loss=0.947]

epoch:21, idx:1499/10845, loss:0.9473222396373748, acc:0.7056666666666667


 15%|█▍        | 1600/10845 [05:55<35:05,  4.39it/s, acc=0.703, epoch=21, loss=0.954]

epoch:21, idx:1599/10845, loss:0.953751228749752, acc:0.70328125


 16%|█▌        | 1700/10845 [06:17<31:18,  4.87it/s, acc=0.702, epoch=21, loss=0.96] 

epoch:21, idx:1699/10845, loss:0.9594588006243986, acc:0.7025


 17%|█▋        | 1800/10845 [06:39<34:39,  4.35it/s, acc=0.702, epoch=21, loss=0.96] 

epoch:21, idx:1799/10845, loss:0.9602500970496072, acc:0.7020833333333333


 18%|█▊        | 1900/10845 [07:04<46:46,  3.19it/s, acc=0.703, epoch=21, loss=0.959]

epoch:21, idx:1899/10845, loss:0.9585117328794379, acc:0.7028947368421052


 18%|█▊        | 2000/10845 [07:26<34:20,  4.29it/s, acc=0.705, epoch=21, loss=0.953]

epoch:21, idx:1999/10845, loss:0.9525145149230957, acc:0.7045


 19%|█▉        | 2101/10845 [07:48<32:14,  4.52it/s, acc=0.705, epoch=21, loss=0.952]

epoch:21, idx:2099/10845, loss:0.9528399239267622, acc:0.7047619047619048


 20%|██        | 2200/10845 [08:10<30:42,  4.69it/s, acc=0.704, epoch=21, loss=0.955]

epoch:21, idx:2199/10845, loss:0.9550505911220204, acc:0.7039772727272727


 21%|██        | 2300/10845 [08:33<31:31,  4.52it/s, acc=0.704, epoch=21, loss=0.954]

epoch:21, idx:2299/10845, loss:0.9538603199046591, acc:0.7043478260869566


 22%|██▏       | 2400/10845 [08:55<30:28,  4.62it/s, acc=0.703, epoch=21, loss=0.958]

epoch:21, idx:2399/10845, loss:0.958176089078188, acc:0.7033333333333334


 23%|██▎       | 2500/10845 [09:17<29:48,  4.67it/s, acc=0.703, epoch=21, loss=0.961]

epoch:21, idx:2499/10845, loss:0.9605855695724488, acc:0.7029


 24%|██▍       | 2601/10845 [09:39<29:56,  4.59it/s, acc=0.703, epoch=21, loss=0.961]

epoch:21, idx:2599/10845, loss:0.9608419831211751, acc:0.703076923076923


 25%|██▍       | 2700/10845 [10:01<30:06,  4.51it/s, acc=0.702, epoch=21, loss=0.964]

epoch:21, idx:2699/10845, loss:0.9642279793377276, acc:0.7018518518518518


 26%|██▌       | 2801/10845 [10:23<29:03,  4.61it/s, acc=0.702, epoch=21, loss=0.962]

epoch:21, idx:2799/10845, loss:0.9624197569276606, acc:0.701875


 27%|██▋       | 2900/10845 [10:45<28:01,  4.72it/s, acc=0.702, epoch=21, loss=0.963]

epoch:21, idx:2899/10845, loss:0.9628102657507206, acc:0.7020689655172414


 28%|██▊       | 3001/10845 [11:08<28:58,  4.51it/s, acc=0.702, epoch=21, loss=0.963]

epoch:21, idx:2999/10845, loss:0.9635929045677185, acc:0.7019166666666666


 29%|██▊       | 3100/10845 [11:30<28:12,  4.58it/s, acc=0.702, epoch=21, loss=0.963]

epoch:21, idx:3099/10845, loss:0.9634900477624708, acc:0.7016129032258065


 30%|██▉       | 3200/10845 [11:52<28:07,  4.53it/s, acc=0.702, epoch=21, loss=0.961]

epoch:21, idx:3199/10845, loss:0.9607788886874914, acc:0.7021875


 30%|███       | 3301/10845 [12:15<27:21,  4.60it/s, acc=0.701, epoch=21, loss=0.965]

epoch:21, idx:3299/10845, loss:0.9656254749587088, acc:0.7008333333333333


 31%|███▏      | 3400/10845 [12:37<25:58,  4.78it/s, acc=0.702, epoch=21, loss=0.962]

epoch:21, idx:3399/10845, loss:0.9624078658047844, acc:0.7018382352941176


 32%|███▏      | 3500/10845 [12:59<28:15,  4.33it/s, acc=0.703, epoch=21, loss=0.963]

epoch:21, idx:3499/10845, loss:0.963386767574719, acc:0.7025


 33%|███▎      | 3600/10845 [13:21<29:26,  4.10it/s, acc=0.703, epoch=21, loss=0.961]

epoch:21, idx:3599/10845, loss:0.9612725951770942, acc:0.7027083333333334


 34%|███▍      | 3701/10845 [13:44<27:04,  4.40it/s, acc=0.703, epoch=21, loss=0.96] 

epoch:21, idx:3699/10845, loss:0.9606378439310435, acc:0.7033108108108108


 35%|███▌      | 3800/10845 [14:06<26:11,  4.48it/s, acc=0.704, epoch=21, loss=0.958]

epoch:21, idx:3799/10845, loss:0.9582418213392558, acc:0.7035526315789473


 36%|███▌      | 3900/10845 [14:28<27:38,  4.19it/s, acc=0.704, epoch=21, loss=0.959]

epoch:21, idx:3899/10845, loss:0.9587912906133211, acc:0.7040384615384615


 37%|███▋      | 4000/10845 [14:51<25:48,  4.42it/s, acc=0.705, epoch=21, loss=0.957]

epoch:21, idx:3999/10845, loss:0.9570043911933899, acc:0.704875


 38%|███▊      | 4100/10845 [15:13<23:57,  4.69it/s, acc=0.705, epoch=21, loss=0.955]

epoch:21, idx:4099/10845, loss:0.955007840743879, acc:0.7049390243902439


 39%|███▊      | 4200/10845 [15:35<24:04,  4.60it/s, acc=0.706, epoch=21, loss=0.955]

epoch:21, idx:4199/10845, loss:0.9545360087213062, acc:0.7057142857142857


 40%|███▉      | 4300/10845 [15:58<24:31,  4.45it/s, acc=0.705, epoch=21, loss=0.956]

epoch:21, idx:4299/10845, loss:0.956305113088253, acc:0.7047674418604651


 41%|████      | 4400/10845 [16:20<22:42,  4.73it/s, acc=0.705, epoch=21, loss=0.957]

epoch:21, idx:4399/10845, loss:0.9566473828662525, acc:0.704659090909091


 42%|████▏     | 4501/10845 [16:42<22:12,  4.76it/s, acc=0.705, epoch=21, loss=0.954]

epoch:21, idx:4499/10845, loss:0.9543959007263184, acc:0.7052777777777778


 42%|████▏     | 4600/10845 [17:04<24:40,  4.22it/s, acc=0.706, epoch=21, loss=0.952]

epoch:21, idx:4599/10845, loss:0.9517684374684873, acc:0.7064130434782608


 43%|████▎     | 4700/10845 [17:26<24:16,  4.22it/s, acc=0.707, epoch=21, loss=0.95] 

epoch:21, idx:4699/10845, loss:0.9502732446599514, acc:0.7067021276595745


 44%|████▍     | 4800/10845 [17:48<21:14,  4.74it/s, acc=0.706, epoch=21, loss=0.954]

epoch:21, idx:4799/10845, loss:0.9537339267631372, acc:0.7060416666666667


 45%|████▌     | 4900/10845 [18:11<21:54,  4.52it/s, acc=0.706, epoch=21, loss=0.956]

epoch:21, idx:4899/10845, loss:0.9560816201141903, acc:0.7055102040816327


 46%|████▌     | 5000/10845 [18:33<21:00,  4.64it/s, acc=0.705, epoch=21, loss=0.957]

epoch:21, idx:4999/10845, loss:0.9566196115493775, acc:0.705


 47%|████▋     | 5101/10845 [18:56<21:37,  4.43it/s, acc=0.705, epoch=21, loss=0.956]

epoch:21, idx:5099/10845, loss:0.9564884461842331, acc:0.7053921568627451


 48%|████▊     | 5201/10845 [19:18<20:16,  4.64it/s, acc=0.705, epoch=21, loss=0.957]

epoch:21, idx:5199/10845, loss:0.956611958512893, acc:0.7054326923076923


 49%|████▉     | 5301/10845 [19:40<21:02,  4.39it/s, acc=0.705, epoch=21, loss=0.955]

epoch:21, idx:5299/10845, loss:0.9550700496502642, acc:0.7054245283018868


 50%|████▉     | 5401/10845 [20:02<20:33,  4.41it/s, acc=0.705, epoch=21, loss=0.956]

epoch:21, idx:5399/10845, loss:0.9557982054242381, acc:0.7046759259259259


 51%|█████     | 5500/10845 [20:25<19:19,  4.61it/s, acc=0.705, epoch=21, loss=0.954]

epoch:21, idx:5499/10845, loss:0.9540351508747448, acc:0.7050909090909091


 52%|█████▏    | 5600/10845 [20:47<19:02,  4.59it/s, acc=0.704, epoch=21, loss=0.958]

epoch:21, idx:5599/10845, loss:0.9583241963812283, acc:0.7040625


 53%|█████▎    | 5700/10845 [21:09<19:55,  4.31it/s, acc=0.704, epoch=21, loss=0.957]

epoch:21, idx:5699/10845, loss:0.9573400527970832, acc:0.7041666666666667


 53%|█████▎    | 5800/10845 [21:31<18:21,  4.58it/s, acc=0.704, epoch=21, loss=0.958]

epoch:21, idx:5799/10845, loss:0.9581373399085013, acc:0.7039655172413793


 54%|█████▍    | 5901/10845 [21:53<18:49,  4.38it/s, acc=0.704, epoch=21, loss=0.958]

epoch:21, idx:5899/10845, loss:0.9580564224517951, acc:0.7040254237288136


 55%|█████▌    | 6001/10845 [22:15<18:01,  4.48it/s, acc=0.704, epoch=21, loss=0.957]

epoch:21, idx:5999/10845, loss:0.9570160918235778, acc:0.703875


 56%|█████▋    | 6101/10845 [22:37<17:28,  4.53it/s, acc=0.703, epoch=21, loss=0.957]

epoch:21, idx:6099/10845, loss:0.9571119538291556, acc:0.7034426229508197


 57%|█████▋    | 6200/10845 [22:59<16:50,  4.60it/s, acc=0.703, epoch=21, loss=0.957]

epoch:21, idx:6199/10845, loss:0.9571004951192487, acc:0.7031854838709677


 58%|█████▊    | 6301/10845 [23:22<16:47,  4.51it/s, acc=0.703, epoch=21, loss=0.956]

epoch:21, idx:6299/10845, loss:0.9561537512143453, acc:0.7032539682539682


 59%|█████▉    | 6400/10845 [23:44<15:39,  4.73it/s, acc=0.703, epoch=21, loss=0.957]

epoch:21, idx:6399/10845, loss:0.9570514618605376, acc:0.7033203125


 60%|█████▉    | 6500/10845 [24:06<16:24,  4.41it/s, acc=0.703, epoch=21, loss=0.959]

epoch:21, idx:6499/10845, loss:0.9588322479724884, acc:0.7030384615384615


 61%|██████    | 6600/10845 [24:29<15:34,  4.54it/s, acc=0.703, epoch=21, loss=0.958]

epoch:21, idx:6599/10845, loss:0.9583804212916981, acc:0.7031818181818181


 62%|██████▏   | 6700/10845 [24:51<15:24,  4.48it/s, acc=0.703, epoch=21, loss=0.958]

epoch:21, idx:6699/10845, loss:0.9575229251384735, acc:0.7030597014925373


 63%|██████▎   | 6800/10845 [25:13<14:15,  4.73it/s, acc=0.703, epoch=21, loss=0.957]

epoch:21, idx:6799/10845, loss:0.9572020719857777, acc:0.703125


 64%|██████▎   | 6901/10845 [25:35<14:30,  4.53it/s, acc=0.703, epoch=21, loss=0.956]

epoch:21, idx:6899/10845, loss:0.9562518949957861, acc:0.7032971014492754


 65%|██████▍   | 7001/10845 [25:58<14:06,  4.54it/s, acc=0.703, epoch=21, loss=0.957]

epoch:21, idx:6999/10845, loss:0.9568468208483287, acc:0.7029285714285715


 65%|██████▌   | 7100/10845 [26:20<14:20,  4.35it/s, acc=0.703, epoch=21, loss=0.956]

epoch:21, idx:7099/10845, loss:0.9563350575742587, acc:0.703274647887324


 66%|██████▋   | 7200/10845 [26:42<12:38,  4.80it/s, acc=0.703, epoch=21, loss=0.956]

epoch:21, idx:7199/10845, loss:0.956167480531666, acc:0.7034722222222223


 67%|██████▋   | 7301/10845 [27:04<12:30,  4.72it/s, acc=0.703, epoch=21, loss=0.957]

epoch:21, idx:7299/10845, loss:0.9573569710654756, acc:0.7030821917808219


 68%|██████▊   | 7400/10845 [27:26<12:20,  4.65it/s, acc=0.703, epoch=21, loss=0.957]

epoch:21, idx:7399/10845, loss:0.9574882990403756, acc:0.7029054054054054


 69%|██████▉   | 7500/10845 [27:49<11:42,  4.76it/s, acc=0.703, epoch=21, loss=0.957]

epoch:21, idx:7499/10845, loss:0.9569229369123777, acc:0.7030666666666666


 70%|███████   | 7600/10845 [28:11<11:44,  4.61it/s, acc=0.703, epoch=21, loss=0.958]

epoch:21, idx:7599/10845, loss:0.9575977265481886, acc:0.7027302631578948


 71%|███████   | 7700/10845 [28:33<12:07,  4.32it/s, acc=0.702, epoch=21, loss=0.959]

epoch:21, idx:7699/10845, loss:0.9591943018428691, acc:0.7022077922077922


 72%|███████▏  | 7800/10845 [28:55<10:58,  4.63it/s, acc=0.702, epoch=21, loss=0.959]

epoch:21, idx:7799/10845, loss:0.9587341121335824, acc:0.702275641025641


 73%|███████▎  | 7901/10845 [29:18<10:15,  4.78it/s, acc=0.702, epoch=21, loss=0.959]

epoch:21, idx:7899/10845, loss:0.9591376747021192, acc:0.7022784810126582


 74%|███████▍  | 8000/10845 [29:39<09:53,  4.79it/s, acc=0.703, epoch=21, loss=0.959]

epoch:21, idx:7999/10845, loss:0.9585398383699357, acc:0.70253125


 75%|███████▍  | 8100/10845 [30:01<10:05,  4.54it/s, acc=0.703, epoch=21, loss=0.958]

epoch:21, idx:8099/10845, loss:0.9577229265482338, acc:0.7026234567901235


 76%|███████▌  | 8201/10845 [30:24<09:51,  4.47it/s, acc=0.703, epoch=21, loss=0.959]

epoch:21, idx:8199/10845, loss:0.9585243703352242, acc:0.7026219512195122


 77%|███████▋  | 8300/10845 [30:45<08:57,  4.73it/s, acc=0.703, epoch=21, loss=0.96] 

epoch:21, idx:8299/10845, loss:0.9601131170162236, acc:0.7025602409638554


 77%|███████▋  | 8400/10845 [31:08<09:28,  4.30it/s, acc=0.702, epoch=21, loss=0.96] 

epoch:21, idx:8399/10845, loss:0.960045385825492, acc:0.7022619047619048


 78%|███████▊  | 8501/10845 [31:30<08:32,  4.58it/s, acc=0.702, epoch=21, loss=0.961]

epoch:21, idx:8499/10845, loss:0.9605826842890066, acc:0.7022058823529411


 79%|███████▉  | 8601/10845 [31:52<08:37,  4.33it/s, acc=0.702, epoch=21, loss=0.961]

epoch:21, idx:8599/10845, loss:0.9607886685985465, acc:0.7020930232558139


 80%|████████  | 8701/10845 [32:14<07:50,  4.56it/s, acc=0.702, epoch=21, loss=0.96] 

epoch:21, idx:8699/10845, loss:0.9605589442383284, acc:0.7021551724137931


 81%|████████  | 8800/10845 [32:37<07:54,  4.31it/s, acc=0.702, epoch=21, loss=0.96] 

epoch:21, idx:8799/10845, loss:0.9604457706043666, acc:0.7023579545454546


 82%|████████▏ | 8901/10845 [32:59<07:09,  4.53it/s, acc=0.702, epoch=21, loss=0.962]

epoch:21, idx:8899/10845, loss:0.961536133567269, acc:0.7017977528089888


 83%|████████▎ | 9001/10845 [33:22<06:47,  4.53it/s, acc=0.702, epoch=21, loss=0.962]

epoch:21, idx:8999/10845, loss:0.9619128172496955, acc:0.7015555555555556


 84%|████████▍ | 9100/10845 [33:44<06:29,  4.48it/s, acc=0.701, epoch=21, loss=0.963]

epoch:21, idx:9099/10845, loss:0.9630290909881121, acc:0.7013461538461538


 85%|████████▍ | 9200/10845 [34:06<06:08,  4.46it/s, acc=0.701, epoch=21, loss=0.962]

epoch:21, idx:9199/10845, loss:0.9620438251644373, acc:0.7013858695652174


 86%|████████▌ | 9300/10845 [34:29<05:31,  4.66it/s, acc=0.701, epoch=21, loss=0.963]

epoch:21, idx:9299/10845, loss:0.9627958296543808, acc:0.7012903225806452


 87%|████████▋ | 9400/10845 [34:51<05:08,  4.69it/s, acc=0.702, epoch=21, loss=0.962]

epoch:21, idx:9399/10845, loss:0.9620612153823072, acc:0.7016489361702127


 88%|████████▊ | 9500/10845 [35:14<04:53,  4.58it/s, acc=0.702, epoch=21, loss=0.961]

epoch:21, idx:9499/10845, loss:0.9614000353907284, acc:0.7018947368421052


 89%|████████▊ | 9600/10845 [35:36<04:34,  4.53it/s, acc=0.702, epoch=21, loss=0.963]

epoch:21, idx:9599/10845, loss:0.962513541749989, acc:0.7018489583333334


 89%|████████▉ | 9700/10845 [35:58<04:21,  4.38it/s, acc=0.702, epoch=21, loss=0.963]

epoch:21, idx:9699/10845, loss:0.9632314910931685, acc:0.7017525773195876


 90%|█████████ | 9801/10845 [36:21<03:51,  4.50it/s, acc=0.701, epoch=21, loss=0.964]

epoch:21, idx:9799/10845, loss:0.963744924016753, acc:0.7015051020408163


 91%|█████████▏| 9900/10845 [36:43<03:30,  4.49it/s, acc=0.701, epoch=21, loss=0.965]

epoch:21, idx:9899/10845, loss:0.9650026637345853, acc:0.701060606060606


 92%|█████████▏| 10001/10845 [37:05<03:09,  4.45it/s, acc=0.701, epoch=21, loss=0.964]

epoch:21, idx:9999/10845, loss:0.9643195611804724, acc:0.7012


 93%|█████████▎| 10100/10845 [37:27<02:45,  4.49it/s, acc=0.702, epoch=21, loss=0.964]

epoch:21, idx:10099/10845, loss:0.9636250261121457, acc:0.7015346534653465


 94%|█████████▍| 10200/10845 [37:49<02:19,  4.62it/s, acc=0.701, epoch=21, loss=0.964]

epoch:21, idx:10199/10845, loss:0.9640172790166209, acc:0.7014460784313725


 95%|█████████▍| 10300/10845 [38:12<02:03,  4.40it/s, acc=0.701, epoch=21, loss=0.966]

epoch:21, idx:10299/10845, loss:0.9658027288144074, acc:0.7011407766990291


 96%|█████████▌| 10401/10845 [38:34<01:33,  4.74it/s, acc=0.701, epoch=21, loss=0.966]

epoch:21, idx:10399/10845, loss:0.9654702120073713, acc:0.7012019230769231


 97%|█████████▋| 10500/10845 [38:56<01:16,  4.53it/s, acc=0.701, epoch=21, loss=0.965]

epoch:21, idx:10499/10845, loss:0.9654376447626523, acc:0.701


 98%|█████████▊| 10600/10845 [39:18<00:56,  4.36it/s, acc=0.701, epoch=21, loss=0.966]

epoch:21, idx:10599/10845, loss:0.966129558949538, acc:0.7007547169811321


 99%|█████████▊| 10701/10845 [39:41<00:30,  4.77it/s, acc=0.7, epoch=21, loss=0.966]  

epoch:21, idx:10699/10845, loss:0.9662727088766677, acc:0.7004906542056075


100%|█████████▉| 10800/10845 [40:02<00:10,  4.49it/s, acc=0.701, epoch=21, loss=0.966]

epoch:21, idx:10799/10845, loss:0.9663729896120451, acc:0.7006481481481481


100%|██████████| 10845/10845 [40:12<00:00,  4.47it/s, acc=0.701, epoch=21, loss=0.966]


epoch:21, idx:0/1275, loss:1.3453624248504639, acc:0.5
epoch:21, idx:100/1275, loss:1.379122273756726, acc:0.6212871287128713
epoch:21, idx:200/1275, loss:1.2766206472074215, acc:0.6305970149253731
epoch:21, idx:300/1275, loss:1.240567302387022, acc:0.6453488372093024
epoch:21, idx:400/1275, loss:1.2250585960331106, acc:0.6477556109725686
epoch:21, idx:500/1275, loss:1.1936849607916886, acc:0.6526946107784432
epoch:21, idx:600/1275, loss:1.2074919046459103, acc:0.64891846921797
epoch:21, idx:700/1275, loss:1.2107419384359124, acc:0.6483594864479315
epoch:21, idx:800/1275, loss:1.2287863376881747, acc:0.6454431960049938
epoch:21, idx:900/1275, loss:1.2202516261004448, acc:0.646503884572697
epoch:21, idx:1000/1275, loss:1.2216761696946015, acc:0.6451048951048951
epoch:21, idx:1100/1275, loss:1.209523156163478, acc:0.6469118982742961
epoch:21, idx:1200/1275, loss:1.2059320001975384, acc:0.6430058284762697


  1%|          | 100/10845 [00:22<42:02,  4.26it/s, acc=0.685, epoch=22, loss=1]    

epoch:22, idx:99/10845, loss:1.0044278657436372, acc:0.685


  2%|▏         | 200/10845 [00:44<40:59,  4.33it/s, acc=0.718, epoch=22, loss=0.89] 

epoch:22, idx:199/10845, loss:0.8897649699449539, acc:0.7175


  3%|▎         | 300/10845 [01:07<40:16,  4.36it/s, acc=0.721, epoch=22, loss=0.874]

epoch:22, idx:299/10845, loss:0.8737226883570354, acc:0.7208333333333333


  4%|▎         | 400/10845 [01:29<37:35,  4.63it/s, acc=0.714, epoch=22, loss=0.908]

epoch:22, idx:399/10845, loss:0.9080311885476112, acc:0.714375


  5%|▍         | 501/10845 [01:51<38:43,  4.45it/s, acc=0.718, epoch=22, loss=0.91] 

epoch:22, idx:499/10845, loss:0.9106499633789062, acc:0.718


  6%|▌         | 600/10845 [02:13<36:21,  4.70it/s, acc=0.711, epoch=22, loss=0.927]

epoch:22, idx:599/10845, loss:0.92654634197553, acc:0.7108333333333333


  6%|▋         | 701/10845 [02:36<36:49,  4.59it/s, acc=0.714, epoch=22, loss=0.926]

epoch:22, idx:699/10845, loss:0.927067095211574, acc:0.7139285714285715


  7%|▋         | 800/10845 [02:58<37:09,  4.50it/s, acc=0.711, epoch=22, loss=0.929]

epoch:22, idx:799/10845, loss:0.9292885547876358, acc:0.7109375


  8%|▊         | 900/10845 [03:20<40:00,  4.14it/s, acc=0.713, epoch=22, loss=0.929]

epoch:22, idx:899/10845, loss:0.9289999080366559, acc:0.7130555555555556


  9%|▉         | 1000/10845 [03:43<34:30,  4.75it/s, acc=0.714, epoch=22, loss=0.921]

epoch:22, idx:999/10845, loss:0.9207292352318763, acc:0.714


 10%|█         | 1100/10845 [04:05<38:26,  4.23it/s, acc=0.713, epoch=22, loss=0.92] 

epoch:22, idx:1099/10845, loss:0.9200730227882212, acc:0.7131818181818181


 11%|█         | 1200/10845 [04:27<34:41,  4.63it/s, acc=0.709, epoch=22, loss=0.924]

epoch:22, idx:1199/10845, loss:0.9237823481857776, acc:0.7089583333333334


 12%|█▏        | 1301/10845 [04:49<34:02,  4.67it/s, acc=0.709, epoch=22, loss=0.921]

epoch:22, idx:1299/10845, loss:0.9213920531823085, acc:0.7094230769230769


 13%|█▎        | 1400/10845 [05:11<37:39,  4.18it/s, acc=0.711, epoch=22, loss=0.921]

epoch:22, idx:1399/10845, loss:0.9213782912492752, acc:0.7107142857142857


 14%|█▍        | 1501/10845 [05:34<32:13,  4.83it/s, acc=0.711, epoch=22, loss=0.928]

epoch:22, idx:1499/10845, loss:0.9284126919905344, acc:0.7103333333333334


 15%|█▍        | 1600/10845 [05:56<35:08,  4.39it/s, acc=0.709, epoch=22, loss=0.931]

epoch:22, idx:1599/10845, loss:0.9307194238901139, acc:0.70921875


 16%|█▌        | 1701/10845 [06:18<34:45,  4.38it/s, acc=0.711, epoch=22, loss=0.922]

epoch:22, idx:1699/10845, loss:0.9217730245169471, acc:0.7110294117647059


 17%|█▋        | 1801/10845 [06:41<33:41,  4.47it/s, acc=0.71, epoch=22, loss=0.925] 

epoch:22, idx:1799/10845, loss:0.9240630127323999, acc:0.7104166666666667


 18%|█▊        | 1900/10845 [07:03<34:10,  4.36it/s, acc=0.71, epoch=22, loss=0.924] 

epoch:22, idx:1899/10845, loss:0.9239417578672108, acc:0.7101315789473684


 18%|█▊        | 2001/10845 [07:25<33:22,  4.42it/s, acc=0.711, epoch=22, loss=0.924]

epoch:22, idx:1999/10845, loss:0.9241162327528, acc:0.711125


 19%|█▉        | 2100/10845 [07:47<31:12,  4.67it/s, acc=0.713, epoch=22, loss=0.919]

epoch:22, idx:2099/10845, loss:0.9185962338106973, acc:0.7132142857142857


 20%|██        | 2200/10845 [08:09<35:00,  4.11it/s, acc=0.711, epoch=22, loss=0.922]

epoch:22, idx:2199/10845, loss:0.9223592229593884, acc:0.71125


 21%|██        | 2300/10845 [08:31<30:05,  4.73it/s, acc=0.712, epoch=22, loss=0.924]

epoch:22, idx:2299/10845, loss:0.9239155313502188, acc:0.7120652173913044


 22%|██▏       | 2400/10845 [08:53<29:57,  4.70it/s, acc=0.713, epoch=22, loss=0.921]

epoch:22, idx:2399/10845, loss:0.9214900547514359, acc:0.7132291666666667


 23%|██▎       | 2500/10845 [09:16<31:32,  4.41it/s, acc=0.712, epoch=22, loss=0.923]

epoch:22, idx:2499/10845, loss:0.9229556425333023, acc:0.7124


 24%|██▍       | 2601/10845 [09:38<31:14,  4.40it/s, acc=0.712, epoch=22, loss=0.923]

epoch:22, idx:2599/10845, loss:0.9235595874832226, acc:0.7119230769230769


 25%|██▍       | 2700/10845 [10:01<32:12,  4.21it/s, acc=0.712, epoch=22, loss=0.922]

epoch:22, idx:2699/10845, loss:0.9221119051730191, acc:0.7117592592592593


 26%|██▌       | 2800/10845 [10:23<29:35,  4.53it/s, acc=0.712, epoch=22, loss=0.922]

epoch:22, idx:2799/10845, loss:0.9215479871843543, acc:0.7121428571428572


 27%|██▋       | 2901/10845 [10:45<27:32,  4.81it/s, acc=0.713, epoch=22, loss=0.92] 

epoch:22, idx:2899/10845, loss:0.9204934632161568, acc:0.7126724137931034


 28%|██▊       | 3001/10845 [11:07<28:07,  4.65it/s, acc=0.712, epoch=22, loss=0.923]

epoch:22, idx:2999/10845, loss:0.9230720509489377, acc:0.712


 29%|██▊       | 3100/10845 [11:29<28:37,  4.51it/s, acc=0.713, epoch=22, loss=0.92] 

epoch:22, idx:3099/10845, loss:0.9195514945253249, acc:0.7129032258064516


 30%|██▉       | 3200/10845 [11:51<27:46,  4.59it/s, acc=0.713, epoch=22, loss=0.921]

epoch:22, idx:3199/10845, loss:0.9205749061144889, acc:0.712734375


 30%|███       | 3301/10845 [12:14<27:27,  4.58it/s, acc=0.713, epoch=22, loss=0.92] 

epoch:22, idx:3299/10845, loss:0.9197363763325143, acc:0.7131060606060606


 31%|███▏      | 3400/10845 [12:36<28:30,  4.35it/s, acc=0.714, epoch=22, loss=0.918]

epoch:22, idx:3399/10845, loss:0.9176466938502649, acc:0.7138235294117647


 32%|███▏      | 3500/10845 [12:57<25:24,  4.82it/s, acc=0.714, epoch=22, loss=0.919]

epoch:22, idx:3499/10845, loss:0.9186185322659356, acc:0.7138571428571429


 33%|███▎      | 3600/10845 [13:20<26:12,  4.61it/s, acc=0.713, epoch=22, loss=0.923]

epoch:22, idx:3599/10845, loss:0.9230762837827206, acc:0.7132638888888889


 34%|███▍      | 3700/10845 [13:42<26:47,  4.45it/s, acc=0.713, epoch=22, loss=0.925]

epoch:22, idx:3699/10845, loss:0.9252700324799563, acc:0.7125


 35%|███▌      | 3800/10845 [14:04<24:44,  4.74it/s, acc=0.712, epoch=22, loss=0.928]

epoch:22, idx:3799/10845, loss:0.9280512250567737, acc:0.7122368421052632


 36%|███▌      | 3901/10845 [14:27<25:51,  4.48it/s, acc=0.712, epoch=22, loss=0.928]

epoch:22, idx:3899/10845, loss:0.928062646923921, acc:0.7122435897435897


 37%|███▋      | 4001/10845 [14:49<24:34,  4.64it/s, acc=0.712, epoch=22, loss=0.927]

epoch:22, idx:3999/10845, loss:0.9275203919857741, acc:0.712375


 38%|███▊      | 4100/10845 [15:11<24:35,  4.57it/s, acc=0.712, epoch=22, loss=0.926]

epoch:22, idx:4099/10845, loss:0.9264093274459606, acc:0.7121951219512195


 39%|███▊      | 4200/10845 [15:33<24:55,  4.44it/s, acc=0.712, epoch=22, loss=0.924]

epoch:22, idx:4199/10845, loss:0.9243595913620222, acc:0.7122619047619048


 40%|███▉      | 4300/10845 [15:56<24:57,  4.37it/s, acc=0.712, epoch=22, loss=0.925]

epoch:22, idx:4299/10845, loss:0.9254833525142004, acc:0.7119186046511627


 41%|████      | 4400/10845 [16:18<24:46,  4.34it/s, acc=0.713, epoch=22, loss=0.922]

epoch:22, idx:4399/10845, loss:0.9221875731105155, acc:0.7127840909090909


 42%|████▏     | 4501/10845 [16:41<22:49,  4.63it/s, acc=0.712, epoch=22, loss=0.925]

epoch:22, idx:4499/10845, loss:0.9252644430134032, acc:0.7117777777777777


 42%|████▏     | 4600/10845 [17:03<23:07,  4.50it/s, acc=0.711, epoch=22, loss=0.929]

epoch:22, idx:4599/10845, loss:0.9292336756638858, acc:0.7110326086956522


 43%|████▎     | 4700/10845 [17:25<24:02,  4.26it/s, acc=0.711, epoch=22, loss=0.93] 

epoch:22, idx:4699/10845, loss:0.9299043026503097, acc:0.7112765957446808


 44%|████▍     | 4800/10845 [17:48<22:50,  4.41it/s, acc=0.711, epoch=22, loss=0.932]

epoch:22, idx:4799/10845, loss:0.9318345778187116, acc:0.710625


 45%|████▌     | 4901/10845 [18:10<21:00,  4.72it/s, acc=0.71, epoch=22, loss=0.933] 

epoch:22, idx:4899/10845, loss:0.932824638571058, acc:0.7099489795918368


 46%|████▌     | 5001/10845 [18:32<21:20,  4.56it/s, acc=0.71, epoch=22, loss=0.933]

epoch:22, idx:4999/10845, loss:0.9326956631422043, acc:0.71


 47%|████▋     | 5100/10845 [18:54<21:43,  4.41it/s, acc=0.71, epoch=22, loss=0.933] 

epoch:22, idx:5099/10845, loss:0.9325574660067465, acc:0.7102941176470589


 48%|████▊     | 5201/10845 [19:17<21:19,  4.41it/s, acc=0.71, epoch=22, loss=0.935]

epoch:22, idx:5199/10845, loss:0.9348079560811703, acc:0.7099519230769231


 49%|████▉     | 5300/10845 [19:39<20:36,  4.48it/s, acc=0.71, epoch=22, loss=0.936]

epoch:22, idx:5299/10845, loss:0.9362566824904028, acc:0.709622641509434


 50%|████▉     | 5400/10845 [20:01<18:35,  4.88it/s, acc=0.71, epoch=22, loss=0.935] 

epoch:22, idx:5399/10845, loss:0.9348230924429717, acc:0.7097222222222223


 51%|█████     | 5500/10845 [20:23<20:20,  4.38it/s, acc=0.71, epoch=22, loss=0.934]

epoch:22, idx:5499/10845, loss:0.9335260640707883, acc:0.7100909090909091


 52%|█████▏    | 5600/10845 [20:46<19:16,  4.54it/s, acc=0.71, epoch=22, loss=0.935]

epoch:22, idx:5599/10845, loss:0.9353827351544585, acc:0.7095982142857142


 53%|█████▎    | 5700/10845 [21:08<20:30,  4.18it/s, acc=0.709, epoch=22, loss=0.935]

epoch:22, idx:5699/10845, loss:0.934768523207882, acc:0.7094298245614035


 53%|█████▎    | 5800/10845 [21:30<18:33,  4.53it/s, acc=0.709, epoch=22, loss=0.934]

epoch:22, idx:5799/10845, loss:0.9341480159348455, acc:0.7093103448275863


 54%|█████▍    | 5900/10845 [21:53<18:32,  4.44it/s, acc=0.71, epoch=22, loss=0.933] 

epoch:22, idx:5899/10845, loss:0.9327174847004778, acc:0.7096186440677966


 55%|█████▌    | 6000/10845 [22:15<18:15,  4.42it/s, acc=0.709, epoch=22, loss=0.933]

epoch:22, idx:5999/10845, loss:0.9325195608536402, acc:0.7094583333333333


 56%|█████▌    | 6100/10845 [22:37<17:57,  4.40it/s, acc=0.71, epoch=22, loss=0.93]  

epoch:22, idx:6099/10845, loss:0.930355727985257, acc:0.7100409836065574


 57%|█████▋    | 6200/10845 [22:59<17:53,  4.33it/s, acc=0.709, epoch=22, loss=0.932]

epoch:22, idx:6199/10845, loss:0.9316520883191016, acc:0.7094354838709678


 58%|█████▊    | 6300/10845 [23:21<17:44,  4.27it/s, acc=0.709, epoch=22, loss=0.933]

epoch:22, idx:6299/10845, loss:0.9327674788520449, acc:0.7093253968253969


 59%|█████▉    | 6400/10845 [23:43<15:10,  4.88it/s, acc=0.71, epoch=22, loss=0.931] 

epoch:22, idx:6399/10845, loss:0.9311065136641264, acc:0.7099609375


 60%|█████▉    | 6500/10845 [24:05<16:06,  4.50it/s, acc=0.71, epoch=22, loss=0.932]

epoch:22, idx:6499/10845, loss:0.9316026026285612, acc:0.710076923076923


 61%|██████    | 6601/10845 [24:28<15:31,  4.55it/s, acc=0.71, epoch=22, loss=0.931]

epoch:22, idx:6599/10845, loss:0.9309658605402166, acc:0.7100757575757576


 62%|██████▏   | 6701/10845 [24:50<14:56,  4.62it/s, acc=0.71, epoch=22, loss=0.932]

epoch:22, idx:6699/10845, loss:0.9323106795816279, acc:0.7098507462686567


 63%|██████▎   | 6800/10845 [25:12<14:09,  4.76it/s, acc=0.71, epoch=22, loss=0.93] 

epoch:22, idx:6799/10845, loss:0.9300083679486724, acc:0.7104411764705882


 64%|██████▎   | 6900/10845 [25:34<13:59,  4.70it/s, acc=0.71, epoch=22, loss=0.931]

epoch:22, idx:6899/10845, loss:0.9310974211105402, acc:0.7101086956521739


 65%|██████▍   | 7000/10845 [25:57<15:00,  4.27it/s, acc=0.71, epoch=22, loss=0.933]

epoch:22, idx:6999/10845, loss:0.9331119214807237, acc:0.7098928571428571


 65%|██████▌   | 7100/10845 [26:19<14:09,  4.41it/s, acc=0.71, epoch=22, loss=0.934]

epoch:22, idx:7099/10845, loss:0.9331876615571304, acc:0.7096478873239437


 66%|██████▋   | 7200/10845 [26:41<13:36,  4.46it/s, acc=0.71, epoch=22, loss=0.932]

epoch:22, idx:7199/10845, loss:0.9323335104684035, acc:0.7098263888888889


 67%|██████▋   | 7300/10845 [27:03<13:06,  4.51it/s, acc=0.71, epoch=22, loss=0.933]

epoch:22, idx:7299/10845, loss:0.9327988931005948, acc:0.7098972602739726


 68%|██████▊   | 7400/10845 [27:25<12:26,  4.62it/s, acc=0.71, epoch=22, loss=0.934]

epoch:22, idx:7399/10845, loss:0.933569063681203, acc:0.7097297297297297


 69%|██████▉   | 7501/10845 [27:48<12:43,  4.38it/s, acc=0.71, epoch=22, loss=0.935]

epoch:22, idx:7499/10845, loss:0.9346120631774266, acc:0.7096666666666667


 70%|███████   | 7600/10845 [28:10<12:21,  4.38it/s, acc=0.709, epoch=22, loss=0.936]

epoch:22, idx:7599/10845, loss:0.9364158235019759, acc:0.7094407894736842


 71%|███████   | 7700/10845 [28:32<11:52,  4.41it/s, acc=0.709, epoch=22, loss=0.937]

epoch:22, idx:7699/10845, loss:0.9373971359373687, acc:0.7090584415584416


 72%|███████▏  | 7800/10845 [28:54<11:18,  4.49it/s, acc=0.71, epoch=22, loss=0.936] 

epoch:22, idx:7799/10845, loss:0.9358328366814516, acc:0.7095833333333333


 73%|███████▎  | 7900/10845 [29:17<11:18,  4.34it/s, acc=0.71, epoch=22, loss=0.934] 

epoch:22, idx:7899/10845, loss:0.9343365208483949, acc:0.7095569620253165


 74%|███████▍  | 8001/10845 [29:39<09:38,  4.91it/s, acc=0.71, epoch=22, loss=0.936] 

epoch:22, idx:7999/10845, loss:0.935467951245606, acc:0.70959375


 75%|███████▍  | 8100/10845 [30:01<10:44,  4.26it/s, acc=0.709, epoch=22, loss=0.936]

epoch:22, idx:8099/10845, loss:0.9361787032639539, acc:0.7094444444444444


 76%|███████▌  | 8201/10845 [30:24<09:38,  4.57it/s, acc=0.709, epoch=22, loss=0.937]

epoch:22, idx:8199/10845, loss:0.9370838277514388, acc:0.709390243902439


 77%|███████▋  | 8300/10845 [30:46<09:26,  4.49it/s, acc=0.71, epoch=22, loss=0.936] 

epoch:22, idx:8299/10845, loss:0.9359822566250721, acc:0.709578313253012


 77%|███████▋  | 8401/10845 [31:08<08:55,  4.57it/s, acc=0.709, epoch=22, loss=0.938]

epoch:22, idx:8399/10845, loss:0.937892496344589, acc:0.7089880952380953


 78%|███████▊  | 8500/10845 [31:30<08:31,  4.59it/s, acc=0.709, epoch=22, loss=0.938]

epoch:22, idx:8499/10845, loss:0.9377245873703676, acc:0.7089705882352941


 79%|███████▉  | 8600/10845 [31:53<08:18,  4.51it/s, acc=0.709, epoch=22, loss=0.94] 

epoch:22, idx:8599/10845, loss:0.9397022036896195, acc:0.7086046511627907


 80%|████████  | 8701/10845 [32:15<07:21,  4.86it/s, acc=0.709, epoch=22, loss=0.939]

epoch:22, idx:8699/10845, loss:0.9386795727822972, acc:0.7088793103448275


 81%|████████  | 8801/10845 [32:37<07:42,  4.42it/s, acc=0.709, epoch=22, loss=0.94] 

epoch:22, idx:8799/10845, loss:0.9400414757295088, acc:0.7086079545454546


 82%|████████▏ | 8901/10845 [32:59<07:29,  4.32it/s, acc=0.708, epoch=22, loss=0.941]

epoch:22, idx:8899/10845, loss:0.9405941487430187, acc:0.7083426966292135


 83%|████████▎ | 9000/10845 [33:21<06:44,  4.56it/s, acc=0.709, epoch=22, loss=0.939]

epoch:22, idx:8999/10845, loss:0.9393962588177787, acc:0.7086388888888889


 84%|████████▍ | 9100/10845 [33:44<06:16,  4.63it/s, acc=0.709, epoch=22, loss=0.939]

epoch:22, idx:9099/10845, loss:0.9387474563488594, acc:0.7087912087912088


 85%|████████▍ | 9200/10845 [34:06<06:46,  4.05it/s, acc=0.709, epoch=22, loss=0.939]

epoch:22, idx:9199/10845, loss:0.9392020982115165, acc:0.7085054347826087


 86%|████████▌ | 9300/10845 [34:28<06:05,  4.23it/s, acc=0.708, epoch=22, loss=0.94] 

epoch:22, idx:9299/10845, loss:0.9401930428704908, acc:0.707983870967742


 87%|████████▋ | 9400/10845 [34:50<05:11,  4.63it/s, acc=0.708, epoch=22, loss=0.94] 

epoch:22, idx:9399/10845, loss:0.9400129674089716, acc:0.708218085106383


 88%|████████▊ | 9500/10845 [35:12<04:49,  4.64it/s, acc=0.709, epoch=22, loss=0.94] 

epoch:22, idx:9499/10845, loss:0.9397073951645901, acc:0.7085526315789473


 89%|████████▊ | 9600/10845 [35:34<04:27,  4.66it/s, acc=0.708, epoch=22, loss=0.94] 

epoch:22, idx:9599/10845, loss:0.9404205169528723, acc:0.7084635416666667


 89%|████████▉ | 9701/10845 [35:56<04:10,  4.57it/s, acc=0.708, epoch=22, loss=0.94] 

epoch:22, idx:9699/10845, loss:0.9397630364747391, acc:0.7082989690721649


 90%|█████████ | 9801/10845 [36:18<03:43,  4.66it/s, acc=0.708, epoch=22, loss=0.941]

epoch:22, idx:9799/10845, loss:0.9409356829828146, acc:0.7081887755102041


 91%|█████████▏| 9901/10845 [36:40<03:16,  4.80it/s, acc=0.708, epoch=22, loss=0.941]

epoch:22, idx:9899/10845, loss:0.940839223921901, acc:0.7081313131313132


 92%|█████████▏| 10000/10845 [37:02<03:00,  4.67it/s, acc=0.708, epoch=22, loss=0.94]

epoch:22, idx:9999/10845, loss:0.9404067833662033, acc:0.708275


 93%|█████████▎| 10101/10845 [37:25<02:46,  4.46it/s, acc=0.708, epoch=22, loss=0.94] 

epoch:22, idx:10099/10845, loss:0.9401159082544912, acc:0.7082673267326732


 94%|█████████▍| 10201/10845 [37:47<02:22,  4.52it/s, acc=0.708, epoch=22, loss=0.942]

epoch:22, idx:10199/10845, loss:0.941977835262523, acc:0.707671568627451


 95%|█████████▍| 10301/10845 [38:10<01:55,  4.71it/s, acc=0.707, epoch=22, loss=0.943]

epoch:22, idx:10299/10845, loss:0.9432794770106528, acc:0.7072572815533981


 96%|█████████▌| 10400/10845 [38:31<01:37,  4.58it/s, acc=0.707, epoch=22, loss=0.943]

epoch:22, idx:10399/10845, loss:0.9430483026229418, acc:0.7071634615384615


 97%|█████████▋| 10501/10845 [38:54<01:15,  4.56it/s, acc=0.707, epoch=22, loss=0.943]

epoch:22, idx:10499/10845, loss:0.9426710611411503, acc:0.7074047619047619


 98%|█████████▊| 10600/10845 [39:16<00:52,  4.63it/s, acc=0.707, epoch=22, loss=0.942]

epoch:22, idx:10599/10845, loss:0.9421339894803065, acc:0.7074292452830189


 99%|█████████▊| 10700/10845 [39:38<00:35,  4.12it/s, acc=0.707, epoch=22, loss=0.942]

epoch:22, idx:10699/10845, loss:0.941740905258143, acc:0.7074299065420561


100%|█████████▉| 10801/10845 [40:00<00:10,  4.36it/s, acc=0.707, epoch=22, loss=0.943]

epoch:22, idx:10799/10845, loss:0.9425177030927605, acc:0.7072222222222222


100%|██████████| 10845/10845 [40:10<00:00,  4.47it/s, acc=0.707, epoch=22, loss=0.942]


epoch:22, idx:0/1275, loss:1.1881961822509766, acc:0.5
epoch:22, idx:100/1275, loss:1.357934579400733, acc:0.6287128712871287
epoch:22, idx:200/1275, loss:1.2301246114631197, acc:0.6393034825870647
epoch:22, idx:300/1275, loss:1.19518912650422, acc:0.6503322259136213
epoch:22, idx:400/1275, loss:1.1860757196633298, acc:0.6571072319201995
epoch:22, idx:500/1275, loss:1.161248590536936, acc:0.656686626746507
epoch:22, idx:600/1275, loss:1.182967484394048, acc:0.6522462562396006
epoch:22, idx:700/1275, loss:1.1832511704760509, acc:0.6529957203994294
epoch:22, idx:800/1275, loss:1.1959781096817999, acc:0.6513732833957553
epoch:22, idx:900/1275, loss:1.1880973210345362, acc:0.6526082130965594
epoch:22, idx:1000/1275, loss:1.1953740340846402, acc:0.6515984015984015
epoch:22, idx:1100/1275, loss:1.18505450474144, acc:0.65236148955495
epoch:22, idx:1200/1275, loss:1.1831676125724946, acc:0.6511240632805995


  1%|          | 101/10845 [00:21<38:20,  4.67it/s, acc=0.743, epoch=23, loss=0.857]

epoch:23, idx:99/10845, loss:0.8546874463558197, acc:0.7475


  2%|▏         | 200/10845 [00:44<39:34,  4.48it/s, acc=0.724, epoch=23, loss=0.901]

epoch:23, idx:199/10845, loss:0.9012961250543594, acc:0.72375


  3%|▎         | 300/10845 [01:06<39:24,  4.46it/s, acc=0.708, epoch=23, loss=0.915]

epoch:23, idx:299/10845, loss:0.914745565255483, acc:0.7083333333333334


  4%|▎         | 401/10845 [01:29<37:17,  4.67it/s, acc=0.709, epoch=23, loss=0.917]

epoch:23, idx:399/10845, loss:0.9186243909597397, acc:0.70875


  5%|▍         | 500/10845 [01:51<38:59,  4.42it/s, acc=0.719, epoch=23, loss=0.893]

epoch:23, idx:499/10845, loss:0.8927343429327012, acc:0.719


  6%|▌         | 600/10845 [02:13<38:58,  4.38it/s, acc=0.723, epoch=23, loss=0.875]

epoch:23, idx:599/10845, loss:0.8750355697671572, acc:0.7229166666666667


  6%|▋         | 701/10845 [02:36<35:46,  4.73it/s, acc=0.723, epoch=23, loss=0.873]

epoch:23, idx:699/10845, loss:0.8735639309031623, acc:0.7221428571428572


  7%|▋         | 801/10845 [02:58<36:10,  4.63it/s, acc=0.725, epoch=23, loss=0.879]

epoch:23, idx:799/10845, loss:0.8792004349082708, acc:0.7253125


  8%|▊         | 901/10845 [03:20<36:46,  4.51it/s, acc=0.723, epoch=23, loss=0.883]

epoch:23, idx:899/10845, loss:0.8830027774307463, acc:0.7233333333333334


  9%|▉         | 1001/10845 [03:42<34:52,  4.70it/s, acc=0.722, epoch=23, loss=0.893]

epoch:23, idx:999/10845, loss:0.8929204370379448, acc:0.72175


 10%|█         | 1101/10845 [04:04<35:24,  4.59it/s, acc=0.718, epoch=23, loss=0.9]  

epoch:23, idx:1099/10845, loss:0.8992117713256316, acc:0.7181818181818181


 11%|█         | 1200/10845 [04:27<36:13,  4.44it/s, acc=0.716, epoch=23, loss=0.9]  

epoch:23, idx:1199/10845, loss:0.8996753358344237, acc:0.7164583333333333


 12%|█▏        | 1300/10845 [04:49<35:47,  4.44it/s, acc=0.715, epoch=23, loss=0.908]

epoch:23, idx:1299/10845, loss:0.9082750484118095, acc:0.7146153846153847


 13%|█▎        | 1400/10845 [05:12<33:02,  4.76it/s, acc=0.711, epoch=23, loss=0.915]

epoch:23, idx:1399/10845, loss:0.9153878767575536, acc:0.7105357142857143


 14%|█▍        | 1501/10845 [05:34<34:35,  4.50it/s, acc=0.712, epoch=23, loss=0.913]

epoch:23, idx:1499/10845, loss:0.9129639923175176, acc:0.712


 15%|█▍        | 1600/10845 [05:56<34:05,  4.52it/s, acc=0.712, epoch=23, loss=0.913]

epoch:23, idx:1599/10845, loss:0.9132924456521869, acc:0.71203125


 16%|█▌        | 1700/10845 [06:19<32:59,  4.62it/s, acc=0.712, epoch=23, loss=0.919]

epoch:23, idx:1699/10845, loss:0.9192873378711588, acc:0.711764705882353


 17%|█▋        | 1801/10845 [06:41<32:28,  4.64it/s, acc=0.712, epoch=23, loss=0.918]

epoch:23, idx:1799/10845, loss:0.9182909073763423, acc:0.7123611111111111


 18%|█▊        | 1900/10845 [07:04<32:30,  4.59it/s, acc=0.712, epoch=23, loss=0.919]

epoch:23, idx:1899/10845, loss:0.9188195706354945, acc:0.7121052631578947


 18%|█▊        | 2000/10845 [07:26<32:41,  4.51it/s, acc=0.713, epoch=23, loss=0.915]

epoch:23, idx:1999/10845, loss:0.9148578475415706, acc:0.712625


 19%|█▉        | 2100/10845 [07:48<32:51,  4.44it/s, acc=0.713, epoch=23, loss=0.914]

epoch:23, idx:2099/10845, loss:0.9136817026989801, acc:0.7132142857142857


 20%|██        | 2201/10845 [08:11<31:03,  4.64it/s, acc=0.711, epoch=23, loss=0.921]

epoch:23, idx:2199/10845, loss:0.9211683122949167, acc:0.7110227272727273


 21%|██        | 2300/10845 [08:33<30:25,  4.68it/s, acc=0.711, epoch=23, loss=0.923]

epoch:23, idx:2299/10845, loss:0.923431300987368, acc:0.7108695652173913


 22%|██▏       | 2401/10845 [08:56<31:07,  4.52it/s, acc=0.71, epoch=23, loss=0.931] 

epoch:23, idx:2399/10845, loss:0.9310620963325104, acc:0.7098958333333333


 23%|██▎       | 2500/10845 [09:18<31:19,  4.44it/s, acc=0.709, epoch=23, loss=0.935]

epoch:23, idx:2499/10845, loss:0.9349291878223419, acc:0.7091


 24%|██▍       | 2600/10845 [09:40<29:46,  4.62it/s, acc=0.708, epoch=23, loss=0.933]

epoch:23, idx:2599/10845, loss:0.9334817878558086, acc:0.7083653846153846


 25%|██▍       | 2700/10845 [10:02<31:59,  4.24it/s, acc=0.709, epoch=23, loss=0.933]

epoch:23, idx:2699/10845, loss:0.9327369077117355, acc:0.7090740740740741


 26%|██▌       | 2801/10845 [10:25<28:29,  4.71it/s, acc=0.71, epoch=23, loss=0.928] 

epoch:23, idx:2799/10845, loss:0.9280286267825535, acc:0.7105357142857143


 27%|██▋       | 2900/10845 [10:47<29:09,  4.54it/s, acc=0.712, epoch=23, loss=0.928]

epoch:23, idx:2899/10845, loss:0.9278624237405843, acc:0.711551724137931


 28%|██▊       | 3001/10845 [11:09<27:35,  4.74it/s, acc=0.711, epoch=23, loss=0.929]

epoch:23, idx:2999/10845, loss:0.9282519975105922, acc:0.71125


 29%|██▊       | 3100/10845 [11:31<29:48,  4.33it/s, acc=0.712, epoch=23, loss=0.924]

epoch:23, idx:3099/10845, loss:0.9244515652425828, acc:0.7118548387096775


 30%|██▉       | 3201/10845 [11:54<27:52,  4.57it/s, acc=0.713, epoch=23, loss=0.921]

epoch:23, idx:3199/10845, loss:0.9213833990693092, acc:0.71296875


 30%|███       | 3300/10845 [12:16<29:15,  4.30it/s, acc=0.713, epoch=23, loss=0.921]

epoch:23, idx:3299/10845, loss:0.9214723845322926, acc:0.713030303030303


 31%|███▏      | 3400/10845 [12:38<28:10,  4.40it/s, acc=0.714, epoch=23, loss=0.919]

epoch:23, idx:3399/10845, loss:0.9192454849621828, acc:0.7141176470588235


 32%|███▏      | 3500/10845 [13:01<25:19,  4.83it/s, acc=0.714, epoch=23, loss=0.92] 

epoch:23, idx:3499/10845, loss:0.9195827432700566, acc:0.7142142857142857


 33%|███▎      | 3600/10845 [13:23<26:34,  4.54it/s, acc=0.715, epoch=23, loss=0.915]

epoch:23, idx:3599/10845, loss:0.914513379169835, acc:0.7152777777777778


 34%|███▍      | 3701/10845 [13:45<25:56,  4.59it/s, acc=0.716, epoch=23, loss=0.914]

epoch:23, idx:3699/10845, loss:0.9142411185760756, acc:0.715945945945946


 35%|███▌      | 3801/10845 [14:07<24:24,  4.81it/s, acc=0.716, epoch=23, loss=0.916]

epoch:23, idx:3799/10845, loss:0.9161782631905455, acc:0.7159868421052632


 36%|███▌      | 3900/10845 [14:29<25:02,  4.62it/s, acc=0.716, epoch=23, loss=0.917]

epoch:23, idx:3899/10845, loss:0.9165497518044252, acc:0.7162820512820512


 37%|███▋      | 4000/10845 [14:51<25:29,  4.47it/s, acc=0.715, epoch=23, loss=0.919]

epoch:23, idx:3999/10845, loss:0.9188449241667986, acc:0.7154375


 38%|███▊      | 4101/10845 [15:13<25:52,  4.34it/s, acc=0.716, epoch=23, loss=0.918]

epoch:23, idx:4099/10845, loss:0.9177079321843822, acc:0.7162195121951219


 39%|███▊      | 4201/10845 [15:36<24:05,  4.60it/s, acc=0.716, epoch=23, loss=0.92] 

epoch:23, idx:4199/10845, loss:0.9201946958048003, acc:0.7158928571428571


 40%|███▉      | 4300/10845 [15:58<23:39,  4.61it/s, acc=0.716, epoch=23, loss=0.92] 

epoch:23, idx:4299/10845, loss:0.9202793836455012, acc:0.7158720930232558


 41%|████      | 4401/10845 [16:20<22:42,  4.73it/s, acc=0.715, epoch=23, loss=0.922]

epoch:23, idx:4399/10845, loss:0.9214830802110109, acc:0.7151704545454546


 41%|████▏     | 4500/10845 [16:43<23:57,  4.41it/s, acc=0.715, epoch=23, loss=0.923]

epoch:23, idx:4499/10845, loss:0.9229730398257573, acc:0.715


 42%|████▏     | 4601/10845 [17:05<21:47,  4.77it/s, acc=0.715, epoch=23, loss=0.925]

epoch:23, idx:4599/10845, loss:0.9253353243418362, acc:0.7147826086956521


 43%|████▎     | 4700/10845 [17:27<22:03,  4.64it/s, acc=0.715, epoch=23, loss=0.926]

epoch:23, idx:4699/10845, loss:0.9259251644129449, acc:0.7145212765957447


 44%|████▍     | 4800/10845 [17:49<21:04,  4.78it/s, acc=0.715, epoch=23, loss=0.924]

epoch:23, idx:4799/10845, loss:0.9237507561221718, acc:0.7153125


 45%|████▌     | 4900/10845 [18:11<23:35,  4.20it/s, acc=0.715, epoch=23, loss=0.926]

epoch:23, idx:4899/10845, loss:0.9258342476402011, acc:0.7148979591836735


 46%|████▌     | 5000/10845 [18:34<21:50,  4.46it/s, acc=0.715, epoch=23, loss=0.926]

epoch:23, idx:4999/10845, loss:0.925823456966877, acc:0.71485


 47%|████▋     | 5101/10845 [18:56<20:29,  4.67it/s, acc=0.715, epoch=23, loss=0.927]

epoch:23, idx:5099/10845, loss:0.9271058219203762, acc:0.715


 48%|████▊     | 5201/10845 [19:19<20:52,  4.50it/s, acc=0.715, epoch=23, loss=0.928]

epoch:23, idx:5199/10845, loss:0.9275606823999148, acc:0.7149038461538462


 49%|████▉     | 5300/10845 [19:41<20:07,  4.59it/s, acc=0.715, epoch=23, loss=0.929]

epoch:23, idx:5299/10845, loss:0.9287187710123243, acc:0.7146698113207547


 50%|████▉     | 5401/10845 [20:03<20:05,  4.52it/s, acc=0.715, epoch=23, loss=0.927]

epoch:23, idx:5399/10845, loss:0.927328287296825, acc:0.715


 51%|█████     | 5501/10845 [20:26<19:16,  4.62it/s, acc=0.715, epoch=23, loss=0.927]

epoch:23, idx:5499/10845, loss:0.9275408998185938, acc:0.7151363636363637


 52%|█████▏    | 5601/10845 [20:48<18:22,  4.76it/s, acc=0.715, epoch=23, loss=0.928]

epoch:23, idx:5599/10845, loss:0.9283144085960728, acc:0.7153125


 53%|█████▎    | 5700/10845 [21:10<19:40,  4.36it/s, acc=0.715, epoch=23, loss=0.931]

epoch:23, idx:5699/10845, loss:0.93053687068454, acc:0.7146052631578947


 53%|█████▎    | 5801/10845 [21:32<18:03,  4.66it/s, acc=0.715, epoch=23, loss=0.932]

epoch:23, idx:5799/10845, loss:0.931952978701427, acc:0.7148275862068966


 54%|█████▍    | 5900/10845 [21:54<17:38,  4.67it/s, acc=0.715, epoch=23, loss=0.933]

epoch:23, idx:5899/10845, loss:0.9329384588387053, acc:0.7145762711864406


 55%|█████▌    | 6001/10845 [22:17<17:41,  4.56it/s, acc=0.715, epoch=23, loss=0.931]

epoch:23, idx:5999/10845, loss:0.9309716526865959, acc:0.7146666666666667


 56%|█████▌    | 6100/10845 [22:39<17:27,  4.53it/s, acc=0.714, epoch=23, loss=0.932]

epoch:23, idx:6099/10845, loss:0.932303010186211, acc:0.7139754098360656


 57%|█████▋    | 6201/10845 [23:02<16:33,  4.68it/s, acc=0.714, epoch=23, loss=0.933]

epoch:23, idx:6199/10845, loss:0.9327710307605805, acc:0.7137096774193549


 58%|█████▊    | 6300/10845 [23:24<16:36,  4.56it/s, acc=0.714, epoch=23, loss=0.931]

epoch:23, idx:6299/10845, loss:0.93114883946994, acc:0.7139285714285715


 59%|█████▉    | 6401/10845 [23:46<16:10,  4.58it/s, acc=0.714, epoch=23, loss=0.931]

epoch:23, idx:6399/10845, loss:0.9307015895098448, acc:0.71390625


 60%|█████▉    | 6500/10845 [24:08<15:21,  4.71it/s, acc=0.713, epoch=23, loss=0.931]

epoch:23, idx:6499/10845, loss:0.9306145316270682, acc:0.7134230769230769


 61%|██████    | 6601/10845 [24:30<15:47,  4.48it/s, acc=0.713, epoch=23, loss=0.93] 

epoch:23, idx:6599/10845, loss:0.929927754763401, acc:0.713030303030303


 62%|██████▏   | 6701/10845 [24:52<15:32,  4.44it/s, acc=0.713, epoch=23, loss=0.93] 

epoch:23, idx:6699/10845, loss:0.9298579280056171, acc:0.7133208955223881


 63%|██████▎   | 6800/10845 [25:14<15:05,  4.46it/s, acc=0.713, epoch=23, loss=0.93] 

epoch:23, idx:6799/10845, loss:0.9295915068598355, acc:0.7131985294117648


 64%|██████▎   | 6900/10845 [25:37<14:32,  4.52it/s, acc=0.713, epoch=23, loss=0.929]

epoch:23, idx:6899/10845, loss:0.9294440837701162, acc:0.7132971014492754


 65%|██████▍   | 7000/10845 [25:59<13:49,  4.64it/s, acc=0.714, epoch=23, loss=0.929]

epoch:23, idx:6999/10845, loss:0.9292007114972387, acc:0.7135714285714285


 65%|██████▌   | 7100/10845 [26:21<14:14,  4.38it/s, acc=0.713, epoch=23, loss=0.93] 

epoch:23, idx:7099/10845, loss:0.929522532971812, acc:0.7133802816901409


 66%|██████▋   | 7201/10845 [26:44<13:02,  4.66it/s, acc=0.713, epoch=23, loss=0.93]

epoch:23, idx:7199/10845, loss:0.9300669688897001, acc:0.7131597222222222


 67%|██████▋   | 7300/10845 [27:06<13:51,  4.26it/s, acc=0.713, epoch=23, loss=0.93] 

epoch:23, idx:7299/10845, loss:0.9304558388092746, acc:0.7130479452054794


 68%|██████▊   | 7401/10845 [27:29<13:10,  4.36it/s, acc=0.713, epoch=23, loss=0.931]

epoch:23, idx:7399/10845, loss:0.9309073242384034, acc:0.7130743243243243


 69%|██████▉   | 7500/10845 [27:51<12:38,  4.41it/s, acc=0.713, epoch=23, loss=0.93] 

epoch:23, idx:7499/10845, loss:0.9296348196744919, acc:0.7132333333333334


 70%|███████   | 7601/10845 [28:13<11:29,  4.70it/s, acc=0.713, epoch=23, loss=0.93] 

epoch:23, idx:7599/10845, loss:0.9299284861354451, acc:0.7134868421052631


 71%|███████   | 7701/10845 [28:35<11:42,  4.48it/s, acc=0.714, epoch=23, loss=0.928]

epoch:23, idx:7699/10845, loss:0.928408405339563, acc:0.7137662337662337


 72%|███████▏  | 7801/10845 [28:58<10:54,  4.65it/s, acc=0.713, epoch=23, loss=0.933]

epoch:23, idx:7799/10845, loss:0.9324135825037956, acc:0.712948717948718


 73%|███████▎  | 7900/10845 [29:20<10:59,  4.46it/s, acc=0.713, epoch=23, loss=0.933]

epoch:23, idx:7899/10845, loss:0.9328606423018854, acc:0.7128481012658228


 74%|███████▍  | 8000/10845 [29:42<11:18,  4.20it/s, acc=0.713, epoch=23, loss=0.934]

epoch:23, idx:7999/10845, loss:0.933754608489573, acc:0.71275


 75%|███████▍  | 8100/10845 [30:04<10:23,  4.41it/s, acc=0.713, epoch=23, loss=0.932]

epoch:23, idx:8099/10845, loss:0.9322333268545292, acc:0.7128395061728395


 76%|███████▌  | 8200/10845 [30:27<10:13,  4.31it/s, acc=0.713, epoch=23, loss=0.933]

epoch:23, idx:8199/10845, loss:0.9325948977179643, acc:0.7128048780487805


 77%|███████▋  | 8300/10845 [30:49<09:15,  4.59it/s, acc=0.712, epoch=23, loss=0.933]

epoch:23, idx:8299/10845, loss:0.9334581737345959, acc:0.712289156626506


 77%|███████▋  | 8401/10845 [31:11<08:23,  4.86it/s, acc=0.712, epoch=23, loss=0.933]

epoch:23, idx:8399/10845, loss:0.9331729454653603, acc:0.7123214285714285


 78%|███████▊  | 8501/10845 [31:34<07:59,  4.88it/s, acc=0.712, epoch=23, loss=0.935]

epoch:23, idx:8499/10845, loss:0.9347392279961529, acc:0.7119705882352941


 79%|███████▉  | 8600/10845 [31:56<08:45,  4.27it/s, acc=0.712, epoch=23, loss=0.934]

epoch:23, idx:8599/10845, loss:0.9342554920773174, acc:0.711889534883721


 80%|████████  | 8701/10845 [32:18<07:28,  4.78it/s, acc=0.712, epoch=23, loss=0.934]

epoch:23, idx:8699/10845, loss:0.9344405529279819, acc:0.7120402298850574


 81%|████████  | 8800/10845 [32:40<07:29,  4.54it/s, acc=0.713, epoch=23, loss=0.933]

epoch:23, idx:8799/10845, loss:0.9325798174467954, acc:0.7125852272727272


 82%|████████▏ | 8900/10845 [33:02<07:04,  4.59it/s, acc=0.713, epoch=23, loss=0.932]

epoch:23, idx:8899/10845, loss:0.9318857241346595, acc:0.7125280898876405


 83%|████████▎ | 9000/10845 [33:24<06:55,  4.44it/s, acc=0.713, epoch=23, loss=0.931]

epoch:23, idx:8999/10845, loss:0.9312365317874485, acc:0.7125


 84%|████████▍ | 9101/10845 [33:46<06:05,  4.77it/s, acc=0.713, epoch=23, loss=0.93] 

epoch:23, idx:9099/10845, loss:0.9305224241529192, acc:0.7126098901098901


 85%|████████▍ | 9200/10845 [34:09<06:05,  4.50it/s, acc=0.713, epoch=23, loss=0.931]

epoch:23, idx:9199/10845, loss:0.9307740009867627, acc:0.7127445652173913


 86%|████████▌ | 9300/10845 [34:31<05:56,  4.34it/s, acc=0.713, epoch=23, loss=0.931]

epoch:23, idx:9299/10845, loss:0.9304874733571084, acc:0.7128494623655914


 87%|████████▋ | 9401/10845 [34:54<05:24,  4.45it/s, acc=0.712, epoch=23, loss=0.932]

epoch:23, idx:9399/10845, loss:0.9315599675444847, acc:0.7124734042553191


 88%|████████▊ | 9500/10845 [35:16<05:04,  4.42it/s, acc=0.713, epoch=23, loss=0.932]

epoch:23, idx:9499/10845, loss:0.931808128589078, acc:0.7128157894736842


 89%|████████▊ | 9601/10845 [35:38<04:30,  4.59it/s, acc=0.713, epoch=23, loss=0.932]

epoch:23, idx:9599/10845, loss:0.9321430576282242, acc:0.7129166666666666


 89%|████████▉ | 9700/10845 [36:00<03:59,  4.79it/s, acc=0.713, epoch=23, loss=0.932]

epoch:23, idx:9699/10845, loss:0.9316768503250535, acc:0.7130670103092783


 90%|█████████ | 9801/10845 [36:23<03:49,  4.55it/s, acc=0.713, epoch=23, loss=0.931]

epoch:23, idx:9799/10845, loss:0.9306143225638234, acc:0.7131632653061224


 91%|█████████▏| 9900/10845 [36:45<03:20,  4.71it/s, acc=0.713, epoch=23, loss=0.931]

epoch:23, idx:9899/10845, loss:0.9309252236166385, acc:0.7133585858585859


 92%|█████████▏| 10000/10845 [37:07<03:11,  4.42it/s, acc=0.714, epoch=23, loss=0.931]

epoch:23, idx:9999/10845, loss:0.9309093436777591, acc:0.713575


 93%|█████████▎| 10101/10845 [37:29<02:50,  4.37it/s, acc=0.713, epoch=23, loss=0.931]

epoch:23, idx:10099/10845, loss:0.9307094926586246, acc:0.7133168316831683


 94%|█████████▍| 10200/10845 [37:51<02:22,  4.51it/s, acc=0.713, epoch=23, loss=0.931]

epoch:23, idx:10199/10845, loss:0.931298521038364, acc:0.713406862745098


 95%|█████████▍| 10300/10845 [38:13<02:05,  4.36it/s, acc=0.713, epoch=23, loss=0.931]

epoch:23, idx:10299/10845, loss:0.9311653560978695, acc:0.7134708737864077


 96%|█████████▌| 10400/10845 [38:36<01:33,  4.78it/s, acc=0.713, epoch=23, loss=0.932]

epoch:23, idx:10399/10845, loss:0.9323281906831723, acc:0.7132211538461538


 97%|█████████▋| 10501/10845 [38:58<01:18,  4.37it/s, acc=0.713, epoch=23, loss=0.933]

epoch:23, idx:10499/10845, loss:0.933334636864208, acc:0.713


 98%|█████████▊| 10600/10845 [39:21<00:52,  4.63it/s, acc=0.713, epoch=23, loss=0.935]

epoch:23, idx:10599/10845, loss:0.934657250535938, acc:0.7128537735849056


 99%|█████████▊| 10700/10845 [39:43<00:32,  4.41it/s, acc=0.713, epoch=23, loss=0.934]

epoch:23, idx:10699/10845, loss:0.9341232923313836, acc:0.7127803738317757


100%|█████████▉| 10800/10845 [40:05<00:09,  4.73it/s, acc=0.713, epoch=23, loss=0.934]

epoch:23, idx:10799/10845, loss:0.9338102832933267, acc:0.7126388888888889


100%|██████████| 10845/10845 [40:14<00:00,  4.49it/s, acc=0.713, epoch=23, loss=0.934]


epoch:23, idx:0/1275, loss:1.4215993881225586, acc:0.5
epoch:23, idx:100/1275, loss:1.4122146757522431, acc:0.6262376237623762
epoch:23, idx:200/1275, loss:1.294112690645664, acc:0.6393034825870647
epoch:23, idx:300/1275, loss:1.253195740851849, acc:0.654485049833887
epoch:23, idx:400/1275, loss:1.24207890925562, acc:0.6614713216957606
epoch:23, idx:500/1275, loss:1.2160381942452072, acc:0.6606786427145709
epoch:23, idx:600/1275, loss:1.2390309373868285, acc:0.6518302828618968
epoch:23, idx:700/1275, loss:1.2435151983089692, acc:0.6515691868758916
epoch:23, idx:800/1275, loss:1.2726872979329618, acc:0.6438826466916354
epoch:23, idx:900/1275, loss:1.2641269344336186, acc:0.6453940066592675
epoch:23, idx:1000/1275, loss:1.2687664629815223, acc:0.6438561438561439
epoch:23, idx:1100/1275, loss:1.2592505572818822, acc:0.6475930971843779
epoch:23, idx:1200/1275, loss:1.257803015466733, acc:0.6432139883430474


  1%|          | 100/10845 [00:22<38:03,  4.71it/s, acc=0.748, epoch=24, loss=0.839]

epoch:24, idx:99/10845, loss:0.8448368752002716, acc:0.745


  2%|▏         | 200/10845 [00:44<40:57,  4.33it/s, acc=0.73, epoch=24, loss=0.866] 

epoch:24, idx:199/10845, loss:0.8655527001619339, acc:0.73


  3%|▎         | 300/10845 [01:06<38:56,  4.51it/s, acc=0.727, epoch=24, loss=0.869]

epoch:24, idx:299/10845, loss:0.8690541954835256, acc:0.7266666666666667


  4%|▎         | 400/10845 [01:28<36:56,  4.71it/s, acc=0.723, epoch=24, loss=0.875]

epoch:24, idx:399/10845, loss:0.8746084669232368, acc:0.7225


  5%|▍         | 500/10845 [01:50<37:47,  4.56it/s, acc=0.721, epoch=24, loss=0.874]

epoch:24, idx:499/10845, loss:0.8735369029045105, acc:0.7205


  6%|▌         | 601/10845 [02:13<36:46,  4.64it/s, acc=0.726, epoch=24, loss=0.862]

epoch:24, idx:599/10845, loss:0.8622517548004787, acc:0.72625


  6%|▋         | 700/10845 [02:35<38:00,  4.45it/s, acc=0.727, epoch=24, loss=0.858]

epoch:24, idx:699/10845, loss:0.8575015342235566, acc:0.7267857142857143


  7%|▋         | 801/10845 [02:57<36:40,  4.56it/s, acc=0.727, epoch=24, loss=0.861]

epoch:24, idx:799/10845, loss:0.8622340932488441, acc:0.726875


  8%|▊         | 901/10845 [03:20<36:06,  4.59it/s, acc=0.728, epoch=24, loss=0.86] 

epoch:24, idx:899/10845, loss:0.8603906856642829, acc:0.7275


  9%|▉         | 1000/10845 [03:42<36:51,  4.45it/s, acc=0.727, epoch=24, loss=0.858]

epoch:24, idx:999/10845, loss:0.8575895130634308, acc:0.7265


 10%|█         | 1101/10845 [04:04<35:50,  4.53it/s, acc=0.724, epoch=24, loss=0.864]

epoch:24, idx:1099/10845, loss:0.8645768665183674, acc:0.7245454545454545


 11%|█         | 1200/10845 [04:25<33:54,  4.74it/s, acc=0.72, epoch=24, loss=0.879] 

epoch:24, idx:1199/10845, loss:0.8794177062312762, acc:0.7197916666666667


 12%|█▏        | 1301/10845 [04:48<34:27,  4.62it/s, acc=0.719, epoch=24, loss=0.876]

epoch:24, idx:1299/10845, loss:0.8755637278006627, acc:0.7198076923076923


 13%|█▎        | 1400/10845 [05:10<37:26,  4.20it/s, acc=0.719, epoch=24, loss=0.877]

epoch:24, idx:1399/10845, loss:0.8767436128854752, acc:0.71875


 14%|█▍        | 1501/10845 [05:32<33:32,  4.64it/s, acc=0.717, epoch=24, loss=0.889]

epoch:24, idx:1499/10845, loss:0.8892859510580698, acc:0.7171666666666666


 15%|█▍        | 1601/10845 [05:55<34:09,  4.51it/s, acc=0.716, epoch=24, loss=0.893]

epoch:24, idx:1599/10845, loss:0.8911347460746765, acc:0.71640625


 16%|█▌        | 1700/10845 [06:17<32:36,  4.67it/s, acc=0.714, epoch=24, loss=0.901]

epoch:24, idx:1699/10845, loss:0.9010468344828662, acc:0.7142647058823529


 17%|█▋        | 1801/10845 [06:39<32:06,  4.70it/s, acc=0.716, epoch=24, loss=0.896]

epoch:24, idx:1799/10845, loss:0.8963343506720332, acc:0.7156944444444444


 18%|█▊        | 1900/10845 [07:01<31:50,  4.68it/s, acc=0.717, epoch=24, loss=0.895]

epoch:24, idx:1899/10845, loss:0.8949793800868486, acc:0.7165789473684211


 18%|█▊        | 2000/10845 [07:23<36:12,  4.07it/s, acc=0.718, epoch=24, loss=0.893]

epoch:24, idx:1999/10845, loss:0.892580824881792, acc:0.7175


 19%|█▉        | 2100/10845 [07:46<35:21,  4.12it/s, acc=0.718, epoch=24, loss=0.898]

epoch:24, idx:2099/10845, loss:0.8984684994958696, acc:0.7177380952380953


 20%|██        | 2200/10845 [08:08<30:49,  4.67it/s, acc=0.719, epoch=24, loss=0.896]

epoch:24, idx:2199/10845, loss:0.895545183067972, acc:0.7185227272727273


 21%|██        | 2300/10845 [08:30<30:34,  4.66it/s, acc=0.719, epoch=24, loss=0.894]

epoch:24, idx:2299/10845, loss:0.893590161567149, acc:0.7191304347826087


 22%|██▏       | 2400/10845 [08:52<31:09,  4.52it/s, acc=0.72, epoch=24, loss=0.891] 

epoch:24, idx:2399/10845, loss:0.890577368264397, acc:0.72


 23%|██▎       | 2501/10845 [09:15<31:45,  4.38it/s, acc=0.719, epoch=24, loss=0.894]

epoch:24, idx:2499/10845, loss:0.8941682795286179, acc:0.7193


 24%|██▍       | 2600/10845 [09:37<30:33,  4.50it/s, acc=0.719, epoch=24, loss=0.897]

epoch:24, idx:2599/10845, loss:0.8968108012355291, acc:0.7188461538461538


 25%|██▍       | 2700/10845 [09:59<29:45,  4.56it/s, acc=0.72, epoch=24, loss=0.895] 

epoch:24, idx:2699/10845, loss:0.8949845683353919, acc:0.72


 26%|██▌       | 2800/10845 [10:22<33:26,  4.01it/s, acc=0.719, epoch=24, loss=0.896]

epoch:24, idx:2799/10845, loss:0.8957041097325938, acc:0.7189285714285715


 27%|██▋       | 2900/10845 [10:44<29:43,  4.45it/s, acc=0.719, epoch=24, loss=0.895]

epoch:24, idx:2899/10845, loss:0.8954793649500814, acc:0.7187931034482758


 28%|██▊       | 3000/10845 [11:06<30:18,  4.31it/s, acc=0.72, epoch=24, loss=0.894] 

epoch:24, idx:2999/10845, loss:0.8942584658662478, acc:0.71975


 29%|██▊       | 3100/10845 [11:29<27:44,  4.65it/s, acc=0.719, epoch=24, loss=0.896]

epoch:24, idx:3099/10845, loss:0.8967184575719218, acc:0.7193548387096774


 30%|██▉       | 3200/10845 [11:50<28:07,  4.53it/s, acc=0.72, epoch=24, loss=0.897] 

epoch:24, idx:3199/10845, loss:0.8965859318338335, acc:0.7196875


 30%|███       | 3300/10845 [12:12<27:39,  4.55it/s, acc=0.721, epoch=24, loss=0.893]

epoch:24, idx:3299/10845, loss:0.8926119642366063, acc:0.7212878787878788


 31%|███▏      | 3400/10845 [12:35<27:47,  4.47it/s, acc=0.722, epoch=24, loss=0.894]

epoch:24, idx:3399/10845, loss:0.8937456894096206, acc:0.721764705882353


 32%|███▏      | 3500/10845 [12:57<27:44,  4.41it/s, acc=0.722, epoch=24, loss=0.893]

epoch:24, idx:3499/10845, loss:0.8929246618236815, acc:0.7220714285714286


 33%|███▎      | 3600/10845 [13:19<26:40,  4.53it/s, acc=0.723, epoch=24, loss=0.89] 

epoch:24, idx:3599/10845, loss:0.8904823169277774, acc:0.7225


 34%|███▍      | 3700/10845 [13:42<25:46,  4.62it/s, acc=0.722, epoch=24, loss=0.889]

epoch:24, idx:3699/10845, loss:0.8892291418120668, acc:0.7223648648648648


 35%|███▌      | 3801/10845 [14:05<25:32,  4.60it/s, acc=0.722, epoch=24, loss=0.892]

epoch:24, idx:3799/10845, loss:0.8922499777925642, acc:0.7216447368421053


 36%|███▌      | 3901/10845 [14:27<25:17,  4.57it/s, acc=0.723, epoch=24, loss=0.889]

epoch:24, idx:3899/10845, loss:0.8896552834296838, acc:0.7226923076923077


 37%|███▋      | 4001/10845 [14:49<27:01,  4.22it/s, acc=0.722, epoch=24, loss=0.892]

epoch:24, idx:3999/10845, loss:0.8920778830498457, acc:0.7220625


 38%|███▊      | 4100/10845 [15:11<24:19,  4.62it/s, acc=0.722, epoch=24, loss=0.891]

epoch:24, idx:4099/10845, loss:0.8911506143285007, acc:0.7223780487804878


 39%|███▊      | 4200/10845 [15:33<23:54,  4.63it/s, acc=0.722, epoch=24, loss=0.892]

epoch:24, idx:4199/10845, loss:0.8920269644402322, acc:0.7220238095238095


 40%|███▉      | 4300/10845 [15:55<23:56,  4.56it/s, acc=0.722, epoch=24, loss=0.891]

epoch:24, idx:4299/10845, loss:0.8908101094462151, acc:0.7219186046511628


 41%|████      | 4400/10845 [16:18<22:31,  4.77it/s, acc=0.721, epoch=24, loss=0.893]

epoch:24, idx:4399/10845, loss:0.8928946859321811, acc:0.7211363636363637


 41%|████▏     | 4500/10845 [16:40<23:50,  4.43it/s, acc=0.721, epoch=24, loss=0.892]

epoch:24, idx:4499/10845, loss:0.8920221137338215, acc:0.7212777777777778


 42%|████▏     | 4600/10845 [17:03<21:55,  4.75it/s, acc=0.721, epoch=24, loss=0.892]

epoch:24, idx:4599/10845, loss:0.8921288486537726, acc:0.7208152173913044


 43%|████▎     | 4700/10845 [17:25<22:02,  4.65it/s, acc=0.721, epoch=24, loss=0.891]

epoch:24, idx:4699/10845, loss:0.8910132647702035, acc:0.7209042553191489


 44%|████▍     | 4800/10845 [17:47<23:08,  4.35it/s, acc=0.72, epoch=24, loss=0.893] 

epoch:24, idx:4799/10845, loss:0.8925869767243664, acc:0.7203645833333333


 45%|████▌     | 4901/10845 [18:10<20:56,  4.73it/s, acc=0.721, epoch=24, loss=0.892]

epoch:24, idx:4899/10845, loss:0.89172794272705, acc:0.7206632653061225


 46%|████▌     | 5001/10845 [18:32<21:12,  4.59it/s, acc=0.721, epoch=24, loss=0.891]

epoch:24, idx:4999/10845, loss:0.8907867869734765, acc:0.7212


 47%|████▋     | 5101/10845 [18:55<22:18,  4.29it/s, acc=0.721, epoch=24, loss=0.893]

epoch:24, idx:5099/10845, loss:0.8928052223313089, acc:0.7208333333333333


 48%|████▊     | 5200/10845 [19:17<20:13,  4.65it/s, acc=0.72, epoch=24, loss=0.894] 

epoch:24, idx:5199/10845, loss:0.893746110120645, acc:0.7202403846153846


 49%|████▉     | 5300/10845 [19:39<21:28,  4.30it/s, acc=0.72, epoch=24, loss=0.896] 

epoch:24, idx:5299/10845, loss:0.8957684952470492, acc:0.719622641509434


 50%|████▉     | 5400/10845 [20:01<19:14,  4.72it/s, acc=0.72, epoch=24, loss=0.895] 

epoch:24, idx:5399/10845, loss:0.8952696585986349, acc:0.7200925925925926


 51%|█████     | 5501/10845 [20:24<19:58,  4.46it/s, acc=0.72, epoch=24, loss=0.897]

epoch:24, idx:5499/10845, loss:0.896946500507268, acc:0.7197272727272728


 52%|█████▏    | 5600/10845 [20:46<19:12,  4.55it/s, acc=0.72, epoch=24, loss=0.898]

epoch:24, idx:5599/10845, loss:0.8982036291382143, acc:0.7198660714285714


 53%|█████▎    | 5700/10845 [21:08<19:28,  4.40it/s, acc=0.72, epoch=24, loss=0.9]  

epoch:24, idx:5699/10845, loss:0.8998416203469561, acc:0.719561403508772


 53%|█████▎    | 5801/10845 [21:31<18:53,  4.45it/s, acc=0.719, epoch=24, loss=0.902]

epoch:24, idx:5799/10845, loss:0.9015498512675022, acc:0.7190517241379311


 54%|█████▍    | 5900/10845 [21:53<18:20,  4.49it/s, acc=0.719, epoch=24, loss=0.9]  

epoch:24, idx:5899/10845, loss:0.8999098176370233, acc:0.7192796610169492


 55%|█████▌    | 6001/10845 [22:15<17:11,  4.70it/s, acc=0.719, epoch=24, loss=0.901]

epoch:24, idx:5999/10845, loss:0.9007852097849051, acc:0.7192083333333333


 56%|█████▌    | 6100/10845 [22:37<16:51,  4.69it/s, acc=0.719, epoch=24, loss=0.901]

epoch:24, idx:6099/10845, loss:0.9006551405934037, acc:0.7190163934426229


 57%|█████▋    | 6200/10845 [23:00<17:10,  4.51it/s, acc=0.72, epoch=24, loss=0.898] 

epoch:24, idx:6199/10845, loss:0.8983038527830954, acc:0.7195564516129033


 58%|█████▊    | 6301/10845 [23:22<16:25,  4.61it/s, acc=0.72, epoch=24, loss=0.899] 

epoch:24, idx:6299/10845, loss:0.8990158600371981, acc:0.7195238095238096


 59%|█████▉    | 6400/10845 [23:44<17:07,  4.33it/s, acc=0.719, epoch=24, loss=0.9]  

epoch:24, idx:6399/10845, loss:0.9003805676568299, acc:0.7189453125


 60%|█████▉    | 6500/10845 [24:06<16:23,  4.42it/s, acc=0.719, epoch=24, loss=0.9]  

epoch:24, idx:6499/10845, loss:0.9003902039252795, acc:0.7188461538461538


 61%|██████    | 6601/10845 [24:29<14:54,  4.74it/s, acc=0.719, epoch=24, loss=0.898]

epoch:24, idx:6599/10845, loss:0.8982607898116112, acc:0.7192424242424242


 62%|██████▏   | 6700/10845 [24:51<15:25,  4.48it/s, acc=0.718, epoch=24, loss=0.901]

epoch:24, idx:6699/10845, loss:0.9007587793602873, acc:0.7183955223880597


 63%|██████▎   | 6800/10845 [25:13<14:56,  4.51it/s, acc=0.719, epoch=24, loss=0.901]

epoch:24, idx:6799/10845, loss:0.9005755209309213, acc:0.7186397058823529


 64%|██████▎   | 6901/10845 [25:36<14:23,  4.57it/s, acc=0.719, epoch=24, loss=0.901]

epoch:24, idx:6899/10845, loss:0.9010797391335169, acc:0.7185507246376811


 65%|██████▍   | 7001/10845 [25:58<13:29,  4.75it/s, acc=0.718, epoch=24, loss=0.902]

epoch:24, idx:6999/10845, loss:0.902054977612836, acc:0.7184642857142857


 65%|██████▌   | 7100/10845 [26:20<15:02,  4.15it/s, acc=0.718, epoch=24, loss=0.903]

epoch:24, idx:7099/10845, loss:0.902669442262448, acc:0.7183802816901409


 66%|██████▋   | 7201/10845 [26:42<13:14,  4.59it/s, acc=0.718, epoch=24, loss=0.904]

epoch:24, idx:7199/10845, loss:0.9038250182486243, acc:0.718125


 67%|██████▋   | 7301/10845 [27:05<14:07,  4.18it/s, acc=0.719, epoch=24, loss=0.903]

epoch:24, idx:7299/10845, loss:0.9028571358200622, acc:0.7185616438356165


 68%|██████▊   | 7400/10845 [27:27<12:40,  4.53it/s, acc=0.719, epoch=24, loss=0.902]

epoch:24, idx:7399/10845, loss:0.9024943495038393, acc:0.7188851351351352


 69%|██████▉   | 7500/10845 [27:49<13:04,  4.26it/s, acc=0.718, epoch=24, loss=0.905]

epoch:24, idx:7499/10845, loss:0.9048502611716588, acc:0.7183666666666667


 70%|███████   | 7601/10845 [28:12<11:55,  4.54it/s, acc=0.718, epoch=24, loss=0.905]

epoch:24, idx:7599/10845, loss:0.9046925373061707, acc:0.7182894736842105


 71%|███████   | 7701/10845 [28:34<12:01,  4.36it/s, acc=0.718, epoch=24, loss=0.905]

epoch:24, idx:7699/10845, loss:0.905212901389444, acc:0.7180194805194805


 72%|███████▏  | 7800/10845 [28:56<11:34,  4.38it/s, acc=0.718, epoch=24, loss=0.906]

epoch:24, idx:7799/10845, loss:0.9061208901114953, acc:0.7179807692307693


 73%|███████▎  | 7900/10845 [29:19<11:02,  4.44it/s, acc=0.718, epoch=24, loss=0.907]

epoch:24, idx:7899/10845, loss:0.9074621881488003, acc:0.7176898734177215


 74%|███████▍  | 8000/10845 [29:41<10:09,  4.67it/s, acc=0.718, epoch=24, loss=0.907]

epoch:24, idx:7999/10845, loss:0.9074370584860444, acc:0.71759375


 75%|███████▍  | 8100/10845 [30:04<10:08,  4.51it/s, acc=0.717, epoch=24, loss=0.908]

epoch:24, idx:8099/10845, loss:0.9084744158129633, acc:0.7171604938271605


 76%|███████▌  | 8200/10845 [30:26<10:24,  4.23it/s, acc=0.717, epoch=24, loss=0.91] 

epoch:24, idx:8199/10845, loss:0.9099171792079763, acc:0.716859756097561


 77%|███████▋  | 8301/10845 [30:49<09:43,  4.36it/s, acc=0.717, epoch=24, loss=0.909]

epoch:24, idx:8299/10845, loss:0.9089858832919454, acc:0.7168674698795181


 77%|███████▋  | 8400/10845 [31:11<09:31,  4.27it/s, acc=0.717, epoch=24, loss=0.91] 

epoch:24, idx:8399/10845, loss:0.9097199080032962, acc:0.7167261904761905


 78%|███████▊  | 8501/10845 [31:33<08:44,  4.47it/s, acc=0.716, epoch=24, loss=0.912]

epoch:24, idx:8499/10845, loss:0.91190857277197, acc:0.7160882352941177


 79%|███████▉  | 8600/10845 [31:55<08:10,  4.57it/s, acc=0.716, epoch=24, loss=0.912]

epoch:24, idx:8599/10845, loss:0.9118741946858029, acc:0.71625


 80%|████████  | 8701/10845 [32:17<07:42,  4.64it/s, acc=0.716, epoch=24, loss=0.913]

epoch:24, idx:8699/10845, loss:0.9132577092483126, acc:0.7157471264367816


 81%|████████  | 8801/10845 [32:39<07:15,  4.69it/s, acc=0.715, epoch=24, loss=0.914]

epoch:24, idx:8799/10845, loss:0.9136580009893938, acc:0.7154829545454545


 82%|████████▏ | 8900/10845 [33:02<07:35,  4.27it/s, acc=0.715, epoch=24, loss=0.917]

epoch:24, idx:8899/10845, loss:0.9170884182346001, acc:0.7148033707865169


 83%|████████▎ | 9000/10845 [33:24<06:53,  4.46it/s, acc=0.714, epoch=24, loss=0.92] 

epoch:24, idx:8999/10845, loss:0.9195133077038659, acc:0.7141944444444445


 84%|████████▍ | 9100/10845 [33:46<06:25,  4.53it/s, acc=0.714, epoch=24, loss=0.919]

epoch:24, idx:9099/10845, loss:0.919289642729602, acc:0.7140384615384615


 85%|████████▍ | 9201/10845 [34:09<05:55,  4.62it/s, acc=0.714, epoch=24, loss=0.919]

epoch:24, idx:9199/10845, loss:0.9186708120289057, acc:0.7143478260869566


 86%|████████▌ | 9301/10845 [34:31<05:23,  4.77it/s, acc=0.714, epoch=24, loss=0.918]

epoch:24, idx:9299/10845, loss:0.9175590729841622, acc:0.7144086021505376


 87%|████████▋ | 9400/10845 [34:53<05:44,  4.19it/s, acc=0.714, epoch=24, loss=0.919]

epoch:24, idx:9399/10845, loss:0.9189470658911035, acc:0.7141223404255319


 88%|████████▊ | 9501/10845 [35:16<04:57,  4.51it/s, acc=0.714, epoch=24, loss=0.921]

epoch:24, idx:9499/10845, loss:0.9206592132668746, acc:0.7136052631578947


 89%|████████▊ | 9600/10845 [35:37<04:36,  4.50it/s, acc=0.714, epoch=24, loss=0.921]

epoch:24, idx:9599/10845, loss:0.920670267107586, acc:0.713515625


 89%|████████▉ | 9701/10845 [36:00<04:02,  4.72it/s, acc=0.713, epoch=24, loss=0.921]

epoch:24, idx:9699/10845, loss:0.9210537078577219, acc:0.7134020618556701


 90%|█████████ | 9800/10845 [36:22<04:01,  4.33it/s, acc=0.713, epoch=24, loss=0.921]

epoch:24, idx:9799/10845, loss:0.9214202472263453, acc:0.7132908163265306


 91%|█████████▏| 9900/10845 [36:44<03:23,  4.64it/s, acc=0.714, epoch=24, loss=0.921]

epoch:24, idx:9899/10845, loss:0.9205941805092975, acc:0.7136111111111111


 92%|█████████▏| 10000/10845 [37:07<03:07,  4.52it/s, acc=0.714, epoch=24, loss=0.921]

epoch:24, idx:9999/10845, loss:0.9206682876229286, acc:0.71365


 93%|█████████▎| 10101/10845 [37:29<02:33,  4.84it/s, acc=0.714, epoch=24, loss=0.921]

epoch:24, idx:10099/10845, loss:0.9212183526718971, acc:0.7135643564356435


 94%|█████████▍| 10200/10845 [37:50<02:17,  4.68it/s, acc=0.713, epoch=24, loss=0.921]

epoch:24, idx:10199/10845, loss:0.9209541032594794, acc:0.7134313725490196


 95%|█████████▍| 10301/10845 [38:13<01:57,  4.65it/s, acc=0.713, epoch=24, loss=0.922]

epoch:24, idx:10299/10845, loss:0.9224621975306169, acc:0.7133737864077669


 96%|█████████▌| 10400/10845 [38:35<01:37,  4.56it/s, acc=0.713, epoch=24, loss=0.922]

epoch:24, idx:10399/10845, loss:0.921781607144154, acc:0.7134375


 97%|█████████▋| 10501/10845 [38:57<01:16,  4.48it/s, acc=0.713, epoch=24, loss=0.922]

epoch:24, idx:10499/10845, loss:0.9221589404287792, acc:0.7132857142857143


 98%|█████████▊| 10601/10845 [39:19<00:51,  4.73it/s, acc=0.713, epoch=24, loss=0.922]

epoch:24, idx:10599/10845, loss:0.9218925710219257, acc:0.7133962264150944


 99%|█████████▊| 10701/10845 [39:41<00:31,  4.59it/s, acc=0.713, epoch=24, loss=0.922]

epoch:24, idx:10699/10845, loss:0.9223049456939519, acc:0.713411214953271


100%|█████████▉| 10801/10845 [40:03<00:09,  4.66it/s, acc=0.713, epoch=24, loss=0.924]

epoch:24, idx:10799/10845, loss:0.9238210105233722, acc:0.7129861111111111


100%|██████████| 10845/10845 [40:13<00:00,  4.51it/s, acc=0.713, epoch=24, loss=0.924]


epoch:24, idx:0/1275, loss:1.6104426383972168, acc:0.5
epoch:24, idx:100/1275, loss:1.3821592472567417, acc:0.655940594059406
epoch:24, idx:200/1275, loss:1.2788629994463565, acc:0.6517412935323383
epoch:24, idx:300/1275, loss:1.2498802833778913, acc:0.6528239202657807
epoch:24, idx:400/1275, loss:1.232513304363165, acc:0.6533665835411472
epoch:24, idx:500/1275, loss:1.2101943090766252, acc:0.6511976047904192
epoch:24, idx:600/1275, loss:1.2222703901582868, acc:0.6430948419301165
epoch:24, idx:700/1275, loss:1.2223503276727, acc:0.6458630527817404
epoch:24, idx:800/1275, loss:1.2416841485229473, acc:0.6429463171036205
epoch:24, idx:900/1275, loss:1.2281584483008008, acc:0.6459489456159823
epoch:24, idx:1000/1275, loss:1.2320019793915344, acc:0.6436063936063936
epoch:24, idx:1100/1275, loss:1.2198377262993794, acc:0.6469118982742961
epoch:24, idx:1200/1275, loss:1.2159953692076506, acc:0.6461282264779351


  1%|          | 100/10845 [00:22<40:01,  4.47it/s, acc=0.73, epoch=25, loss=0.918]

epoch:25, idx:99/10845, loss:0.917763659954071, acc:0.73


  2%|▏         | 201/10845 [00:44<37:38,  4.71it/s, acc=0.708, epoch=25, loss=0.986]

epoch:25, idx:199/10845, loss:0.9903934371471405, acc:0.70625


  3%|▎         | 301/10845 [01:06<38:44,  4.54it/s, acc=0.726, epoch=25, loss=0.907]

epoch:25, idx:299/10845, loss:0.9058416215578715, acc:0.7258333333333333


  4%|▎         | 400/10845 [01:28<36:08,  4.82it/s, acc=0.726, epoch=25, loss=0.91] 

epoch:25, idx:399/10845, loss:0.9103294989466667, acc:0.725625


  5%|▍         | 500/10845 [01:50<36:19,  4.75it/s, acc=0.725, epoch=25, loss=0.905]

epoch:25, idx:499/10845, loss:0.9046194500923157, acc:0.7245


  6%|▌         | 601/10845 [02:12<38:30,  4.43it/s, acc=0.731, epoch=25, loss=0.897]

epoch:25, idx:599/10845, loss:0.8969810541470845, acc:0.73125


  6%|▋         | 700/10845 [02:34<39:07,  4.32it/s, acc=0.729, epoch=25, loss=0.916]

epoch:25, idx:699/10845, loss:0.9164513199669975, acc:0.7292857142857143


  7%|▋         | 801/10845 [02:56<35:03,  4.78it/s, acc=0.726, epoch=25, loss=0.917]

epoch:25, idx:799/10845, loss:0.9174793907999992, acc:0.725625


  8%|▊         | 901/10845 [03:18<35:42,  4.64it/s, acc=0.725, epoch=25, loss=0.908]

epoch:25, idx:899/10845, loss:0.9071320501301023, acc:0.7255555555555555


  9%|▉         | 1001/10845 [03:40<34:19,  4.78it/s, acc=0.72, epoch=25, loss=0.914]

epoch:25, idx:999/10845, loss:0.9133333392739296, acc:0.72025


 10%|█         | 1100/10845 [04:02<40:27,  4.02it/s, acc=0.719, epoch=25, loss=0.913]

epoch:25, idx:1099/10845, loss:0.91340962220322, acc:0.7186363636363636


 11%|█         | 1200/10845 [04:25<35:18,  4.55it/s, acc=0.719, epoch=25, loss=0.922]

epoch:25, idx:1199/10845, loss:0.9215423000355562, acc:0.71875


 12%|█▏        | 1300/10845 [04:47<33:44,  4.71it/s, acc=0.719, epoch=25, loss=0.915]

epoch:25, idx:1299/10845, loss:0.9148945111953295, acc:0.7194230769230769


 13%|█▎        | 1400/10845 [05:10<37:50,  4.16it/s, acc=0.72, epoch=25, loss=0.914] 

epoch:25, idx:1399/10845, loss:0.914649980536529, acc:0.7203571428571428


 14%|█▍        | 1501/10845 [05:32<34:20,  4.54it/s, acc=0.717, epoch=25, loss=0.927]

epoch:25, idx:1499/10845, loss:0.9269821922381719, acc:0.7173333333333334


 15%|█▍        | 1600/10845 [05:54<35:36,  4.33it/s, acc=0.718, epoch=25, loss=0.927]

epoch:25, idx:1599/10845, loss:0.9269796342030168, acc:0.71796875


 16%|█▌        | 1701/10845 [06:16<33:50,  4.50it/s, acc=0.717, epoch=25, loss=0.925]

epoch:25, idx:1699/10845, loss:0.9250365165051292, acc:0.7169117647058824


 17%|█▋        | 1800/10845 [06:38<32:09,  4.69it/s, acc=0.718, epoch=25, loss=0.92] 

epoch:25, idx:1799/10845, loss:0.9196474301152759, acc:0.7181944444444445


 18%|█▊        | 1900/10845 [07:00<33:37,  4.43it/s, acc=0.718, epoch=25, loss=0.918]

epoch:25, idx:1899/10845, loss:0.9184599486777657, acc:0.718421052631579


 18%|█▊        | 2000/10845 [07:23<33:38,  4.38it/s, acc=0.719, epoch=25, loss=0.916]

epoch:25, idx:1999/10845, loss:0.9158271836042404, acc:0.719375


 19%|█▉        | 2100/10845 [07:45<32:52,  4.43it/s, acc=0.721, epoch=25, loss=0.91] 

epoch:25, idx:2099/10845, loss:0.9098083364963532, acc:0.7205952380952381


 20%|██        | 2200/10845 [08:07<31:18,  4.60it/s, acc=0.721, epoch=25, loss=0.906]

epoch:25, idx:2199/10845, loss:0.9061944163387472, acc:0.7210227272727273


 21%|██        | 2301/10845 [08:29<29:29,  4.83it/s, acc=0.72, epoch=25, loss=0.906] 

epoch:25, idx:2299/10845, loss:0.9064411215678505, acc:0.7202173913043478


 22%|██▏       | 2401/10845 [08:51<30:17,  4.64it/s, acc=0.721, epoch=25, loss=0.903]

epoch:25, idx:2399/10845, loss:0.903020905405283, acc:0.7207291666666666


 23%|██▎       | 2501/10845 [09:14<31:03,  4.48it/s, acc=0.72, epoch=25, loss=0.902] 

epoch:25, idx:2499/10845, loss:0.9015110089302063, acc:0.7203


 24%|██▍       | 2600/10845 [09:36<30:17,  4.54it/s, acc=0.721, epoch=25, loss=0.899]

epoch:25, idx:2599/10845, loss:0.8987224438098761, acc:0.7213461538461539


 25%|██▍       | 2700/10845 [09:58<29:34,  4.59it/s, acc=0.722, epoch=25, loss=0.901]

epoch:25, idx:2699/10845, loss:0.9009578352725064, acc:0.721574074074074


 26%|██▌       | 2800/10845 [10:20<28:22,  4.72it/s, acc=0.721, epoch=25, loss=0.905]

epoch:25, idx:2799/10845, loss:0.9045048800323691, acc:0.7211607142857143


 27%|██▋       | 2901/10845 [10:43<29:22,  4.51it/s, acc=0.721, epoch=25, loss=0.905]

epoch:25, idx:2899/10845, loss:0.9053401991622201, acc:0.7207758620689655


 28%|██▊       | 3000/10845 [11:05<27:59,  4.67it/s, acc=0.72, epoch=25, loss=0.907] 

epoch:25, idx:2999/10845, loss:0.9069353256026904, acc:0.7201666666666666


 29%|██▊       | 3100/10845 [11:27<28:43,  4.49it/s, acc=0.72, epoch=25, loss=0.909] 

epoch:25, idx:3099/10845, loss:0.9089894133037136, acc:0.719758064516129


 30%|██▉       | 3200/10845 [11:49<29:25,  4.33it/s, acc=0.72, epoch=25, loss=0.906] 

epoch:25, idx:3199/10845, loss:0.9060809662379324, acc:0.7203125


 30%|███       | 3300/10845 [12:11<27:49,  4.52it/s, acc=0.72, epoch=25, loss=0.906]

epoch:25, idx:3299/10845, loss:0.9055930970654343, acc:0.7198484848484848


 31%|███▏      | 3400/10845 [12:34<26:00,  4.77it/s, acc=0.72, epoch=25, loss=0.908] 

epoch:25, idx:3399/10845, loss:0.9085674229790183, acc:0.7194852941176471


 32%|███▏      | 3500/10845 [12:56<25:57,  4.72it/s, acc=0.72, epoch=25, loss=0.907]

epoch:25, idx:3499/10845, loss:0.9070366773945945, acc:0.7197857142857143


 33%|███▎      | 3601/10845 [13:18<28:07,  4.29it/s, acc=0.718, epoch=25, loss=0.914]

epoch:25, idx:3599/10845, loss:0.9137202069825596, acc:0.718125


 34%|███▍      | 3701/10845 [13:40<25:28,  4.67it/s, acc=0.718, epoch=25, loss=0.913]

epoch:25, idx:3699/10845, loss:0.9134228955249528, acc:0.7183783783783784


 35%|███▌      | 3800/10845 [14:03<24:59,  4.70it/s, acc=0.72, epoch=25, loss=0.911] 

epoch:25, idx:3799/10845, loss:0.9108563854662995, acc:0.7196710526315789


 36%|███▌      | 3901/10845 [14:25<27:37,  4.19it/s, acc=0.721, epoch=25, loss=0.909]

epoch:25, idx:3899/10845, loss:0.9088488411750549, acc:0.7205769230769231


 37%|███▋      | 4000/10845 [14:47<25:18,  4.51it/s, acc=0.721, epoch=25, loss=0.91] 

epoch:25, idx:3999/10845, loss:0.9099467293918133, acc:0.7205


 38%|███▊      | 4100/10845 [15:09<24:31,  4.58it/s, acc=0.722, epoch=25, loss=0.905]

epoch:25, idx:4099/10845, loss:0.9052999208903895, acc:0.7217682926829269


 39%|███▊      | 4200/10845 [15:32<25:36,  4.32it/s, acc=0.721, epoch=25, loss=0.907]

epoch:25, idx:4199/10845, loss:0.9072705077273505, acc:0.72125


 40%|███▉      | 4301/10845 [15:54<22:37,  4.82it/s, acc=0.721, epoch=25, loss=0.906]

epoch:25, idx:4299/10845, loss:0.9056061159039652, acc:0.7213953488372093


 41%|████      | 4400/10845 [16:16<24:58,  4.30it/s, acc=0.722, epoch=25, loss=0.905]

epoch:25, idx:4399/10845, loss:0.905158167576248, acc:0.7217613636363637


 41%|████▏     | 4500/10845 [16:38<21:49,  4.84it/s, acc=0.723, epoch=25, loss=0.902]

epoch:25, idx:4499/10845, loss:0.9025804042948616, acc:0.7225


 42%|████▏     | 4600/10845 [17:00<24:14,  4.29it/s, acc=0.723, epoch=25, loss=0.902]

epoch:25, idx:4599/10845, loss:0.9017538606990939, acc:0.7226630434782608


 43%|████▎     | 4700/10845 [17:22<23:06,  4.43it/s, acc=0.721, epoch=25, loss=0.906]

epoch:25, idx:4699/10845, loss:0.9060409503667912, acc:0.7214893617021276


 44%|████▍     | 4800/10845 [17:44<22:34,  4.46it/s, acc=0.722, epoch=25, loss=0.904]

epoch:25, idx:4799/10845, loss:0.9044679096216957, acc:0.72171875


 45%|████▌     | 4901/10845 [18:07<21:00,  4.72it/s, acc=0.721, epoch=25, loss=0.906]

epoch:25, idx:4899/10845, loss:0.9054085404410654, acc:0.7212755102040816


 46%|████▌     | 5001/10845 [18:29<20:05,  4.85it/s, acc=0.722, epoch=25, loss=0.904]

epoch:25, idx:4999/10845, loss:0.9042919234395027, acc:0.7224


 47%|████▋     | 5100/10845 [18:51<21:25,  4.47it/s, acc=0.722, epoch=25, loss=0.905]

epoch:25, idx:5099/10845, loss:0.9048554765126284, acc:0.722156862745098


 48%|████▊     | 5201/10845 [19:13<21:01,  4.47it/s, acc=0.722, epoch=25, loss=0.903]

epoch:25, idx:5199/10845, loss:0.903172914397258, acc:0.7219230769230769


 49%|████▉     | 5300/10845 [19:36<21:28,  4.30it/s, acc=0.722, epoch=25, loss=0.904]

epoch:25, idx:5299/10845, loss:0.9043484076801336, acc:0.7217924528301887


 50%|████▉     | 5400/10845 [19:58<21:16,  4.26it/s, acc=0.722, epoch=25, loss=0.904]

epoch:25, idx:5399/10845, loss:0.9035657757189539, acc:0.721574074074074


 51%|█████     | 5500/10845 [20:20<20:35,  4.32it/s, acc=0.721, epoch=25, loss=0.904]

epoch:25, idx:5499/10845, loss:0.9037071502317082, acc:0.7214090909090909


 52%|█████▏    | 5600/10845 [20:43<21:04,  4.15it/s, acc=0.721, epoch=25, loss=0.904]

epoch:25, idx:5599/10845, loss:0.9038408413210085, acc:0.7212053571428572


 53%|█████▎    | 5700/10845 [21:05<18:51,  4.55it/s, acc=0.721, epoch=25, loss=0.905]

epoch:25, idx:5699/10845, loss:0.905418067574501, acc:0.7213157894736842


 53%|█████▎    | 5800/10845 [21:28<19:14,  4.37it/s, acc=0.721, epoch=25, loss=0.906]

epoch:25, idx:5799/10845, loss:0.906062643394388, acc:0.7206034482758621


 54%|█████▍    | 5901/10845 [21:50<17:33,  4.69it/s, acc=0.721, epoch=25, loss=0.904]

epoch:25, idx:5899/10845, loss:0.9037972670187384, acc:0.7211016949152542


 55%|█████▌    | 6000/10845 [22:12<17:43,  4.56it/s, acc=0.721, epoch=25, loss=0.904]

epoch:25, idx:5999/10845, loss:0.9043887582421303, acc:0.7210833333333333


 56%|█████▌    | 6100/10845 [22:34<16:31,  4.78it/s, acc=0.721, epoch=25, loss=0.905]

epoch:25, idx:6099/10845, loss:0.9050469075070053, acc:0.7213934426229508


 57%|█████▋    | 6200/10845 [22:57<17:19,  4.47it/s, acc=0.721, epoch=25, loss=0.906]

epoch:25, idx:6199/10845, loss:0.905528470258559, acc:0.7214919354838709


 58%|█████▊    | 6300/10845 [23:19<17:16,  4.38it/s, acc=0.721, epoch=25, loss=0.907]

epoch:25, idx:6299/10845, loss:0.9067772287414188, acc:0.7209920634920635


 59%|█████▉    | 6401/10845 [23:41<15:50,  4.68it/s, acc=0.721, epoch=25, loss=0.906]

epoch:25, idx:6399/10845, loss:0.9060085730999708, acc:0.7211328125


 60%|█████▉    | 6501/10845 [24:04<15:21,  4.72it/s, acc=0.721, epoch=25, loss=0.907]

epoch:25, idx:6499/10845, loss:0.907013349542251, acc:0.7209615384615384


 61%|██████    | 6600/10845 [24:26<15:33,  4.55it/s, acc=0.72, epoch=25, loss=0.91]  

epoch:25, idx:6599/10845, loss:0.9095704497622721, acc:0.7203030303030303


 62%|██████▏   | 6701/10845 [24:49<15:25,  4.48it/s, acc=0.72, epoch=25, loss=0.912]

epoch:25, idx:6699/10845, loss:0.9119774585428523, acc:0.7202238805970149


 63%|██████▎   | 6800/10845 [25:11<14:45,  4.57it/s, acc=0.721, epoch=25, loss=0.909]

epoch:25, idx:6799/10845, loss:0.9094673260257524, acc:0.7209926470588235


 64%|██████▎   | 6900/10845 [25:32<13:55,  4.72it/s, acc=0.721, epoch=25, loss=0.908]

epoch:25, idx:6899/10845, loss:0.9075657543151275, acc:0.7214492753623188


 65%|██████▍   | 7001/10845 [25:55<14:42,  4.36it/s, acc=0.722, epoch=25, loss=0.907]

epoch:25, idx:6999/10845, loss:0.9067668996623585, acc:0.7215714285714285


 65%|██████▌   | 7101/10845 [26:17<14:24,  4.33it/s, acc=0.721, epoch=25, loss=0.908]

epoch:25, idx:7099/10845, loss:0.9078157413593481, acc:0.721338028169014


 66%|██████▋   | 7201/10845 [26:39<12:49,  4.74it/s, acc=0.722, epoch=25, loss=0.907]

epoch:25, idx:7199/10845, loss:0.906553710690803, acc:0.7215972222222222


 67%|██████▋   | 7301/10845 [27:01<12:59,  4.55it/s, acc=0.721, epoch=25, loss=0.907]

epoch:25, idx:7299/10845, loss:0.9070675551972978, acc:0.721472602739726


 68%|██████▊   | 7401/10845 [27:23<12:14,  4.69it/s, acc=0.721, epoch=25, loss=0.909]

epoch:25, idx:7399/10845, loss:0.9088725802624548, acc:0.7213175675675676


 69%|██████▉   | 7501/10845 [27:46<11:12,  4.97it/s, acc=0.721, epoch=25, loss=0.909]

epoch:25, idx:7499/10845, loss:0.9090457672993342, acc:0.7211


 70%|███████   | 7600/10845 [28:08<12:56,  4.18it/s, acc=0.721, epoch=25, loss=0.909]

epoch:25, idx:7599/10845, loss:0.9086034924027167, acc:0.7213157894736842


 71%|███████   | 7700/10845 [28:30<11:00,  4.76it/s, acc=0.722, epoch=25, loss=0.907]

epoch:25, idx:7699/10845, loss:0.907484666287125, acc:0.7216883116883117


 72%|███████▏  | 7801/10845 [28:53<10:54,  4.65it/s, acc=0.722, epoch=25, loss=0.907]

epoch:25, idx:7799/10845, loss:0.9070318689789527, acc:0.7217307692307692


 73%|███████▎  | 7901/10845 [29:15<10:07,  4.84it/s, acc=0.722, epoch=25, loss=0.908]

epoch:25, idx:7899/10845, loss:0.9080080077693432, acc:0.7217405063291139


 74%|███████▍  | 8000/10845 [29:37<10:35,  4.47it/s, acc=0.721, epoch=25, loss=0.91] 

epoch:25, idx:7999/10845, loss:0.9099317854866386, acc:0.72134375


 75%|███████▍  | 8100/10845 [30:00<10:24,  4.39it/s, acc=0.721, epoch=25, loss=0.911]

epoch:25, idx:8099/10845, loss:0.9110638914652812, acc:0.7208641975308642


 76%|███████▌  | 8201/10845 [30:22<09:22,  4.70it/s, acc=0.721, epoch=25, loss=0.911]

epoch:25, idx:8199/10845, loss:0.9113170510312406, acc:0.7207926829268293


 77%|███████▋  | 8300/10845 [30:44<09:33,  4.43it/s, acc=0.721, epoch=25, loss=0.911]

epoch:25, idx:8299/10845, loss:0.910660589796951, acc:0.7209638554216867


 77%|███████▋  | 8401/10845 [31:07<08:33,  4.76it/s, acc=0.721, epoch=25, loss=0.91] 

epoch:25, idx:8399/10845, loss:0.9100593432996954, acc:0.7210714285714286


 78%|███████▊  | 8500/10845 [31:29<09:01,  4.33it/s, acc=0.721, epoch=25, loss=0.91] 

epoch:25, idx:8499/10845, loss:0.910492681685616, acc:0.7210588235294118


 79%|███████▉  | 8600/10845 [31:51<08:04,  4.63it/s, acc=0.72, epoch=25, loss=0.912] 

epoch:25, idx:8599/10845, loss:0.9119036289425784, acc:0.7203488372093023


 80%|████████  | 8700/10845 [32:13<07:58,  4.48it/s, acc=0.72, epoch=25, loss=0.913]

epoch:25, idx:8699/10845, loss:0.9125112880098408, acc:0.7202011494252873


 81%|████████  | 8801/10845 [32:36<07:54,  4.31it/s, acc=0.72, epoch=25, loss=0.913]

epoch:25, idx:8799/10845, loss:0.9127975752543319, acc:0.7200852272727273


 82%|████████▏ | 8900/10845 [32:58<06:45,  4.80it/s, acc=0.72, epoch=25, loss=0.914]

epoch:25, idx:8899/10845, loss:0.91355785332369, acc:0.7198595505617977


 83%|████████▎ | 9000/10845 [33:20<06:32,  4.70it/s, acc=0.72, epoch=25, loss=0.914]

epoch:25, idx:8999/10845, loss:0.9135920303397709, acc:0.7199166666666666


 84%|████████▍ | 9100/10845 [33:42<06:22,  4.57it/s, acc=0.72, epoch=25, loss=0.913]

epoch:25, idx:9099/10845, loss:0.9131204321096231, acc:0.72


 85%|████████▍ | 9200/10845 [34:04<06:02,  4.53it/s, acc=0.72, epoch=25, loss=0.914]

epoch:25, idx:9199/10845, loss:0.9140266876246618, acc:0.7197282608695652


 86%|████████▌ | 9300/10845 [34:27<05:27,  4.72it/s, acc=0.72, epoch=25, loss=0.914]

epoch:25, idx:9299/10845, loss:0.9136020333792574, acc:0.7197043010752688


 87%|████████▋ | 9401/10845 [34:49<05:18,  4.54it/s, acc=0.72, epoch=25, loss=0.913]

epoch:25, idx:9399/10845, loss:0.9129829442120613, acc:0.7195478723404255


 88%|████████▊ | 9500/10845 [35:12<05:18,  4.22it/s, acc=0.72, epoch=25, loss=0.914] 

epoch:25, idx:9499/10845, loss:0.9135418718488593, acc:0.7195


 89%|████████▊ | 9600/10845 [35:34<04:24,  4.70it/s, acc=0.719, epoch=25, loss=0.914]

epoch:25, idx:9599/10845, loss:0.9139963719000419, acc:0.7193489583333333


 89%|████████▉ | 9700/10845 [35:56<04:32,  4.20it/s, acc=0.719, epoch=25, loss=0.915]

epoch:25, idx:9699/10845, loss:0.9154928967510302, acc:0.7185051546391753


 90%|█████████ | 9800/10845 [36:18<03:42,  4.70it/s, acc=0.718, epoch=25, loss=0.916]

epoch:25, idx:9799/10845, loss:0.915747600368091, acc:0.7183928571428572


 91%|█████████▏| 9900/10845 [36:40<03:27,  4.55it/s, acc=0.719, epoch=25, loss=0.915]

epoch:25, idx:9899/10845, loss:0.9150678186946445, acc:0.718560606060606


 92%|█████████▏| 10001/10845 [37:03<03:05,  4.55it/s, acc=0.719, epoch=25, loss=0.915]

epoch:25, idx:9999/10845, loss:0.9147125547766686, acc:0.7187


 93%|█████████▎| 10100/10845 [37:25<02:49,  4.40it/s, acc=0.719, epoch=25, loss=0.914]

epoch:25, idx:10099/10845, loss:0.9139722759534817, acc:0.7188861386138614


 94%|█████████▍| 10200/10845 [37:47<02:18,  4.66it/s, acc=0.719, epoch=25, loss=0.915]

epoch:25, idx:10199/10845, loss:0.9145699779134171, acc:0.7187745098039215


 95%|█████████▍| 10300/10845 [38:10<02:05,  4.35it/s, acc=0.719, epoch=25, loss=0.914]

epoch:25, idx:10299/10845, loss:0.9143141757226685, acc:0.7189805825242719


 96%|█████████▌| 10401/10845 [38:32<01:41,  4.35it/s, acc=0.719, epoch=25, loss=0.914]

epoch:25, idx:10399/10845, loss:0.9142284148473006, acc:0.7188701923076923


 97%|█████████▋| 10500/10845 [38:54<01:18,  4.37it/s, acc=0.719, epoch=25, loss=0.915]

epoch:25, idx:10499/10845, loss:0.9146098087742215, acc:0.719


 98%|█████████▊| 10600/10845 [39:16<00:53,  4.62it/s, acc=0.719, epoch=25, loss=0.914]

epoch:25, idx:10599/10845, loss:0.914297120964752, acc:0.7190094339622641


 99%|█████████▊| 10701/10845 [39:39<00:29,  4.81it/s, acc=0.719, epoch=25, loss=0.915]

epoch:25, idx:10699/10845, loss:0.9146000684421753, acc:0.7189018691588785


100%|█████████▉| 10801/10845 [40:01<00:09,  4.62it/s, acc=0.719, epoch=25, loss=0.914]

epoch:25, idx:10799/10845, loss:0.9138784020863198, acc:0.7191898148148148


100%|██████████| 10845/10845 [40:11<00:00,  4.50it/s, acc=0.719, epoch=25, loss=0.915]


epoch:25, idx:0/1275, loss:1.3331942558288574, acc:0.5
epoch:25, idx:100/1275, loss:1.4373323409864218, acc:0.6262376237623762
epoch:25, idx:200/1275, loss:1.318566543545889, acc:0.6417910447761194
epoch:25, idx:300/1275, loss:1.276375036302991, acc:0.6503322259136213
epoch:25, idx:400/1275, loss:1.259675296762043, acc:0.6577306733167082
epoch:25, idx:500/1275, loss:1.2239910628267392, acc:0.6576846307385229
epoch:25, idx:600/1275, loss:1.238682677860863, acc:0.6497504159733777
epoch:25, idx:700/1275, loss:1.247311745832718, acc:0.6490727532097005
epoch:25, idx:800/1275, loss:1.266162850734744, acc:0.6435705368289638
epoch:25, idx:900/1275, loss:1.2570737362172575, acc:0.6456714761376249
epoch:25, idx:1000/1275, loss:1.2606932167763953, acc:0.6431068931068931
epoch:25, idx:1100/1275, loss:1.2430450736556022, acc:0.6475930971843779
epoch:25, idx:1200/1275, loss:1.2393736663607138, acc:0.6461282264779351


  1%|          | 100/10845 [00:21<40:23,  4.43it/s, acc=0.75, epoch=26, loss=0.832] 

epoch:26, idx:99/10845, loss:0.8317730689048767, acc:0.75


  2%|▏         | 200/10845 [00:44<38:31,  4.61it/s, acc=0.741, epoch=26, loss=0.875]

epoch:26, idx:199/10845, loss:0.8748188745975495, acc:0.74125


  3%|▎         | 301/10845 [01:06<36:45,  4.78it/s, acc=0.733, epoch=26, loss=0.875]

epoch:26, idx:299/10845, loss:0.8779529122511546, acc:0.7325


  4%|▎         | 400/10845 [01:29<39:33,  4.40it/s, acc=0.726, epoch=26, loss=0.857]

epoch:26, idx:399/10845, loss:0.8572171103954315, acc:0.725625


  5%|▍         | 501/10845 [01:51<39:30,  4.36it/s, acc=0.728, epoch=26, loss=0.852]

epoch:26, idx:499/10845, loss:0.852854202747345, acc:0.7275


  6%|▌         | 600/10845 [02:14<38:25,  4.44it/s, acc=0.729, epoch=26, loss=0.852]

epoch:26, idx:599/10845, loss:0.8524943415323893, acc:0.7291666666666666


  6%|▋         | 700/10845 [02:36<36:45,  4.60it/s, acc=0.727, epoch=26, loss=0.857]

epoch:26, idx:699/10845, loss:0.8573820655686515, acc:0.7267857142857143


  7%|▋         | 800/10845 [02:58<40:48,  4.10it/s, acc=0.729, epoch=26, loss=0.855]

epoch:26, idx:799/10845, loss:0.8551358518004417, acc:0.72875


  8%|▊         | 900/10845 [03:20<36:05,  4.59it/s, acc=0.728, epoch=26, loss=0.854]

epoch:26, idx:899/10845, loss:0.8543397030565474, acc:0.7277777777777777


  9%|▉         | 1000/10845 [03:42<36:41,  4.47it/s, acc=0.726, epoch=26, loss=0.864]

epoch:26, idx:999/10845, loss:0.8642819398641586, acc:0.726


 10%|█         | 1100/10845 [04:04<34:48,  4.67it/s, acc=0.725, epoch=26, loss=0.866]

epoch:26, idx:1099/10845, loss:0.8656171618808399, acc:0.7252272727272727


 11%|█         | 1200/10845 [04:27<38:12,  4.21it/s, acc=0.729, epoch=26, loss=0.866]

epoch:26, idx:1199/10845, loss:0.8651591235399246, acc:0.729375


 12%|█▏        | 1300/10845 [04:49<37:03,  4.29it/s, acc=0.733, epoch=26, loss=0.856]

epoch:26, idx:1299/10845, loss:0.8562730860251647, acc:0.7326923076923076


 13%|█▎        | 1400/10845 [05:11<34:57,  4.50it/s, acc=0.733, epoch=26, loss=0.864]

epoch:26, idx:1399/10845, loss:0.8635820161019053, acc:0.7326785714285714


 14%|█▍        | 1500/10845 [05:34<35:01,  4.45it/s, acc=0.729, epoch=26, loss=0.875]

epoch:26, idx:1499/10845, loss:0.8748323626120885, acc:0.729


 15%|█▍        | 1600/10845 [05:56<34:13,  4.50it/s, acc=0.728, epoch=26, loss=0.873]

epoch:26, idx:1599/10845, loss:0.8732502410188318, acc:0.7284375


 16%|█▌        | 1700/10845 [06:18<32:57,  4.63it/s, acc=0.728, epoch=26, loss=0.877]

epoch:26, idx:1699/10845, loss:0.8771879374630311, acc:0.7283823529411765


 17%|█▋        | 1800/10845 [06:40<33:16,  4.53it/s, acc=0.728, epoch=26, loss=0.879]

epoch:26, idx:1799/10845, loss:0.8786917358967993, acc:0.7281944444444445


 18%|█▊        | 1901/10845 [07:03<32:28,  4.59it/s, acc=0.729, epoch=26, loss=0.873]

epoch:26, idx:1899/10845, loss:0.8729527289930142, acc:0.729078947368421


 18%|█▊        | 2000/10845 [07:25<33:04,  4.46it/s, acc=0.728, epoch=26, loss=0.875]

epoch:26, idx:1999/10845, loss:0.8750044936835766, acc:0.72825


 19%|█▉        | 2100/10845 [07:47<30:40,  4.75it/s, acc=0.729, epoch=26, loss=0.872]

epoch:26, idx:2099/10845, loss:0.8723326136668523, acc:0.7285714285714285


 20%|██        | 2201/10845 [08:09<30:38,  4.70it/s, acc=0.727, epoch=26, loss=0.879]

epoch:26, idx:2199/10845, loss:0.8782514680244706, acc:0.7268181818181818


 21%|██        | 2300/10845 [08:31<32:16,  4.41it/s, acc=0.726, epoch=26, loss=0.885]

epoch:26, idx:2299/10845, loss:0.8847572596695112, acc:0.726304347826087


 22%|██▏       | 2400/10845 [08:53<30:18,  4.64it/s, acc=0.724, epoch=26, loss=0.89] 

epoch:26, idx:2399/10845, loss:0.8899891656637192, acc:0.7242708333333333


 23%|██▎       | 2500/10845 [09:16<31:02,  4.48it/s, acc=0.723, epoch=26, loss=0.889]

epoch:26, idx:2499/10845, loss:0.8892792109966278, acc:0.7234


 24%|██▍       | 2600/10845 [09:38<30:48,  4.46it/s, acc=0.724, epoch=26, loss=0.893]

epoch:26, idx:2599/10845, loss:0.8929321249173238, acc:0.7236538461538462


 25%|██▍       | 2700/10845 [10:01<31:11,  4.35it/s, acc=0.722, epoch=26, loss=0.899]

epoch:26, idx:2699/10845, loss:0.8985085161085482, acc:0.7216666666666667


 26%|██▌       | 2801/10845 [10:23<30:34,  4.38it/s, acc=0.721, epoch=26, loss=0.904]

epoch:26, idx:2799/10845, loss:0.9038156009571893, acc:0.7208928571428571


 27%|██▋       | 2900/10845 [10:45<31:03,  4.26it/s, acc=0.721, epoch=26, loss=0.903]

epoch:26, idx:2899/10845, loss:0.9027331855790368, acc:0.7205172413793104


 28%|██▊       | 3001/10845 [11:08<28:13,  4.63it/s, acc=0.721, epoch=26, loss=0.901]

epoch:26, idx:2999/10845, loss:0.9009965262413024, acc:0.7208333333333333


 29%|██▊       | 3100/10845 [11:30<27:39,  4.67it/s, acc=0.72, epoch=26, loss=0.901] 

epoch:26, idx:3099/10845, loss:0.9013295843908864, acc:0.7204032258064517


 30%|██▉       | 3200/10845 [11:52<27:28,  4.64it/s, acc=0.721, epoch=26, loss=0.899]

epoch:26, idx:3199/10845, loss:0.898525204025209, acc:0.721015625


 30%|███       | 3300/10845 [12:14<27:26,  4.58it/s, acc=0.722, epoch=26, loss=0.898]

epoch:26, idx:3299/10845, loss:0.8981302505189722, acc:0.7217424242424243


 31%|███▏      | 3401/10845 [12:36<26:14,  4.73it/s, acc=0.722, epoch=26, loss=0.895]

epoch:26, idx:3399/10845, loss:0.8949969944533179, acc:0.7215441176470588


 32%|███▏      | 3501/10845 [12:59<26:05,  4.69it/s, acc=0.722, epoch=26, loss=0.896]

epoch:26, idx:3499/10845, loss:0.8958847371510097, acc:0.7215


 33%|███▎      | 3600/10845 [13:20<26:47,  4.51it/s, acc=0.722, epoch=26, loss=0.895]

epoch:26, idx:3599/10845, loss:0.8954860193530718, acc:0.7222222222222222


 34%|███▍      | 3700/10845 [13:43<27:35,  4.32it/s, acc=0.723, epoch=26, loss=0.897]

epoch:26, idx:3699/10845, loss:0.8967449350614806, acc:0.7225


 35%|███▌      | 3800/10845 [14:05<27:14,  4.31it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:3799/10845, loss:0.8981368362276178, acc:0.7226973684210526


 36%|███▌      | 3900/10845 [14:27<25:19,  4.57it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:3899/10845, loss:0.8978471917372484, acc:0.7228846153846153


 37%|███▋      | 4001/10845 [14:50<24:49,  4.59it/s, acc=0.724, epoch=26, loss=0.895]

epoch:26, idx:3999/10845, loss:0.895553819090128, acc:0.72375


 38%|███▊      | 4101/10845 [15:12<23:54,  4.70it/s, acc=0.724, epoch=26, loss=0.895]

epoch:26, idx:4099/10845, loss:0.8952035206992451, acc:0.7235365853658536


 39%|███▊      | 4200/10845 [15:34<24:07,  4.59it/s, acc=0.723, epoch=26, loss=0.896]

epoch:26, idx:4199/10845, loss:0.8960734692925498, acc:0.7231547619047619


 40%|███▉      | 4300/10845 [15:56<22:09,  4.92it/s, acc=0.724, epoch=26, loss=0.892]

epoch:26, idx:4299/10845, loss:0.8922292982423028, acc:0.7240116279069767


 41%|████      | 4400/10845 [16:18<23:51,  4.50it/s, acc=0.723, epoch=26, loss=0.896]

epoch:26, idx:4399/10845, loss:0.8958175033330917, acc:0.7230681818181818


 41%|████▏     | 4500/10845 [16:40<21:24,  4.94it/s, acc=0.723, epoch=26, loss=0.897]

epoch:26, idx:4499/10845, loss:0.8968963637616899, acc:0.7225555555555555


 42%|████▏     | 4600/10845 [17:03<22:16,  4.67it/s, acc=0.723, epoch=26, loss=0.896]

epoch:26, idx:4599/10845, loss:0.8964669870552809, acc:0.7228260869565217


 43%|████▎     | 4701/10845 [17:25<21:36,  4.74it/s, acc=0.722, epoch=26, loss=0.898]

epoch:26, idx:4699/10845, loss:0.8982369899496119, acc:0.7221808510638298


 44%|████▍     | 4801/10845 [17:47<21:58,  4.58it/s, acc=0.723, epoch=26, loss=0.896]

epoch:26, idx:4799/10845, loss:0.8964155937979619, acc:0.7228645833333334


 45%|████▌     | 4900/10845 [18:08<21:25,  4.62it/s, acc=0.723, epoch=26, loss=0.896]

epoch:26, idx:4899/10845, loss:0.8960612889212005, acc:0.7227040816326531


 46%|████▌     | 5000/10845 [18:31<23:05,  4.22it/s, acc=0.723, epoch=26, loss=0.894]

epoch:26, idx:4999/10845, loss:0.8944306475400925, acc:0.72325


 47%|████▋     | 5100/10845 [18:53<21:44,  4.40it/s, acc=0.724, epoch=26, loss=0.894]

epoch:26, idx:5099/10845, loss:0.8944594358462913, acc:0.7235294117647059


 48%|████▊     | 5200/10845 [19:15<20:35,  4.57it/s, acc=0.724, epoch=26, loss=0.894]

epoch:26, idx:5199/10845, loss:0.8940574029546517, acc:0.7235576923076923


 49%|████▉     | 5300/10845 [19:38<20:27,  4.52it/s, acc=0.724, epoch=26, loss=0.894]

epoch:26, idx:5299/10845, loss:0.8937658622804677, acc:0.7236320754716982


 50%|████▉     | 5401/10845 [20:00<18:41,  4.85it/s, acc=0.724, epoch=26, loss=0.894]

epoch:26, idx:5399/10845, loss:0.8937694008924343, acc:0.7236574074074074


 51%|█████     | 5500/10845 [20:22<18:35,  4.79it/s, acc=0.724, epoch=26, loss=0.892]

epoch:26, idx:5499/10845, loss:0.8922460396614942, acc:0.7238181818181818


 52%|█████▏    | 5601/10845 [20:44<20:11,  4.33it/s, acc=0.724, epoch=26, loss=0.891]

epoch:26, idx:5599/10845, loss:0.8908171167863267, acc:0.7241964285714285


 53%|█████▎    | 5701/10845 [21:06<20:11,  4.25it/s, acc=0.724, epoch=26, loss=0.892]

epoch:26, idx:5699/10845, loss:0.8918939688226633, acc:0.7242105263157895


 53%|█████▎    | 5800/10845 [21:28<19:44,  4.26it/s, acc=0.724, epoch=26, loss=0.893]

epoch:26, idx:5799/10845, loss:0.8932812257573522, acc:0.7242241379310345


 54%|█████▍    | 5900/10845 [21:50<18:43,  4.40it/s, acc=0.724, epoch=26, loss=0.893]

epoch:26, idx:5899/10845, loss:0.8931099453719996, acc:0.7239830508474576


 55%|█████▌    | 6001/10845 [22:12<17:27,  4.63it/s, acc=0.724, epoch=26, loss=0.892]

epoch:26, idx:5999/10845, loss:0.8917880358000596, acc:0.724375


 56%|█████▋    | 6101/10845 [22:35<17:23,  4.55it/s, acc=0.725, epoch=26, loss=0.89] 

epoch:26, idx:6099/10845, loss:0.8901317468138992, acc:0.7245901639344262


 57%|█████▋    | 6200/10845 [22:57<18:02,  4.29it/s, acc=0.724, epoch=26, loss=0.889]

epoch:26, idx:6199/10845, loss:0.8890964462391792, acc:0.7243951612903226


 58%|█████▊    | 6300/10845 [23:19<17:55,  4.22it/s, acc=0.724, epoch=26, loss=0.89] 

epoch:26, idx:6299/10845, loss:0.8900064574442212, acc:0.7237698412698412


 59%|█████▉    | 6401/10845 [23:41<15:31,  4.77it/s, acc=0.724, epoch=26, loss=0.89] 

epoch:26, idx:6399/10845, loss:0.8899182434659452, acc:0.72375


 60%|█████▉    | 6501/10845 [24:04<15:09,  4.78it/s, acc=0.724, epoch=26, loss=0.89] 

epoch:26, idx:6499/10845, loss:0.8897519700618891, acc:0.7238461538461538


 61%|██████    | 6600/10845 [24:26<15:46,  4.49it/s, acc=0.723, epoch=26, loss=0.892]

epoch:26, idx:6599/10845, loss:0.8919322067860401, acc:0.7231818181818181


 62%|██████▏   | 6701/10845 [24:48<14:48,  4.66it/s, acc=0.723, epoch=26, loss=0.891]

epoch:26, idx:6699/10845, loss:0.8912255761160779, acc:0.7234328358208956


 63%|██████▎   | 6800/10845 [25:10<14:09,  4.76it/s, acc=0.724, epoch=26, loss=0.89] 

epoch:26, idx:6799/10845, loss:0.8902091587816968, acc:0.7239705882352941


 64%|██████▎   | 6900/10845 [25:33<14:48,  4.44it/s, acc=0.724, epoch=26, loss=0.89] 

epoch:26, idx:6899/10845, loss:0.8904371476346168, acc:0.7236956521739131


 65%|██████▍   | 7001/10845 [25:55<13:29,  4.75it/s, acc=0.724, epoch=26, loss=0.89] 

epoch:26, idx:6999/10845, loss:0.8900967267921993, acc:0.7240714285714286


 65%|██████▌   | 7101/10845 [26:17<13:22,  4.67it/s, acc=0.724, epoch=26, loss=0.891]

epoch:26, idx:7099/10845, loss:0.8909930388524498, acc:0.7238732394366197


 66%|██████▋   | 7200/10845 [26:40<13:38,  4.46it/s, acc=0.724, epoch=26, loss=0.891]

epoch:26, idx:7199/10845, loss:0.8914146109587616, acc:0.7236458333333333


 67%|██████▋   | 7300/10845 [27:02<12:36,  4.69it/s, acc=0.724, epoch=26, loss=0.892]

epoch:26, idx:7299/10845, loss:0.8918835050929083, acc:0.723595890410959


 68%|██████▊   | 7400/10845 [27:24<13:18,  4.32it/s, acc=0.723, epoch=26, loss=0.894]

epoch:26, idx:7399/10845, loss:0.8942735690522838, acc:0.7231418918918919


 69%|██████▉   | 7501/10845 [27:46<12:30,  4.45it/s, acc=0.723, epoch=26, loss=0.894]

epoch:26, idx:7499/10845, loss:0.8942327223936717, acc:0.7229666666666666


 70%|███████   | 7600/10845 [28:09<11:54,  4.54it/s, acc=0.723, epoch=26, loss=0.895]

epoch:26, idx:7599/10845, loss:0.8947144388995673, acc:0.7230263157894737


 71%|███████   | 7701/10845 [28:31<10:51,  4.83it/s, acc=0.723, epoch=26, loss=0.896]

epoch:26, idx:7699/10845, loss:0.896027167989062, acc:0.7229545454545454


 72%|███████▏  | 7801/10845 [28:53<10:27,  4.85it/s, acc=0.723, epoch=26, loss=0.897]

epoch:26, idx:7799/10845, loss:0.8965777472349313, acc:0.7228525641025642


 73%|███████▎  | 7900/10845 [29:15<11:10,  4.39it/s, acc=0.723, epoch=26, loss=0.897]

epoch:26, idx:7899/10845, loss:0.89744651417189, acc:0.722626582278481


 74%|███████▍  | 8000/10845 [29:37<10:13,  4.64it/s, acc=0.722, epoch=26, loss=0.898]

epoch:26, idx:7999/10845, loss:0.8981728516966104, acc:0.72246875


 75%|███████▍  | 8101/10845 [30:00<09:33,  4.78it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:8099/10845, loss:0.8978422673322536, acc:0.7225617283950617


 76%|███████▌  | 8200/10845 [30:22<10:04,  4.37it/s, acc=0.723, epoch=26, loss=0.897]

epoch:26, idx:8199/10845, loss:0.8971473473310471, acc:0.7227439024390244


 77%|███████▋  | 8300/10845 [30:44<09:14,  4.59it/s, acc=0.723, epoch=26, loss=0.897]

epoch:26, idx:8299/10845, loss:0.8970115759430162, acc:0.7227409638554216


 77%|███████▋  | 8400/10845 [31:06<08:54,  4.57it/s, acc=0.723, epoch=26, loss=0.897]

epoch:26, idx:8399/10845, loss:0.8966385582373255, acc:0.7227976190476191


 78%|███████▊  | 8500/10845 [31:28<08:33,  4.57it/s, acc=0.723, epoch=26, loss=0.896]

epoch:26, idx:8499/10845, loss:0.8963499896245845, acc:0.722735294117647


 79%|███████▉  | 8600/10845 [31:51<08:38,  4.33it/s, acc=0.722, epoch=26, loss=0.898]

epoch:26, idx:8599/10845, loss:0.8980592425063599, acc:0.7223546511627907


 80%|████████  | 8700/10845 [32:13<07:57,  4.50it/s, acc=0.723, epoch=26, loss=0.897]

epoch:26, idx:8699/10845, loss:0.8972817580042214, acc:0.7225862068965517


 81%|████████  | 8801/10845 [32:35<07:48,  4.36it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:8799/10845, loss:0.8980225574157454, acc:0.7226420454545455


 82%|████████▏ | 8900/10845 [32:57<06:33,  4.95it/s, acc=0.723, epoch=26, loss=0.897]

epoch:26, idx:8899/10845, loss:0.8970490121573544, acc:0.7229775280898877


 83%|████████▎ | 9001/10845 [33:20<06:39,  4.62it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:8999/10845, loss:0.8976486281951268, acc:0.723


 84%|████████▍ | 9101/10845 [33:42<06:21,  4.57it/s, acc=0.723, epoch=26, loss=0.9]  

epoch:26, idx:9099/10845, loss:0.8996953683109074, acc:0.7224725274725274


 85%|████████▍ | 9200/10845 [34:04<06:05,  4.51it/s, acc=0.723, epoch=26, loss=0.899]

epoch:26, idx:9199/10845, loss:0.8992595217668492, acc:0.7226630434782608


 86%|████████▌ | 9300/10845 [34:26<05:43,  4.50it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:9299/10845, loss:0.8984286856907671, acc:0.7228494623655914


 87%|████████▋ | 9401/10845 [34:48<05:02,  4.78it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:9399/10845, loss:0.8978895377161655, acc:0.7230053191489362


 88%|████████▊ | 9500/10845 [35:10<04:42,  4.76it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:9499/10845, loss:0.8981529713743611, acc:0.7230526315789474


 89%|████████▊ | 9601/10845 [35:32<04:23,  4.72it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:9599/10845, loss:0.8981538990450402, acc:0.7228645833333334


 89%|████████▉ | 9700/10845 [35:54<04:04,  4.68it/s, acc=0.723, epoch=26, loss=0.899]

epoch:26, idx:9699/10845, loss:0.8988670510176531, acc:0.7226546391752577


 90%|█████████ | 9801/10845 [36:16<03:48,  4.57it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:9799/10845, loss:0.8978788981693132, acc:0.7229591836734693


 91%|█████████▏| 9901/10845 [36:39<03:29,  4.51it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:9899/10845, loss:0.8981954511307707, acc:0.7231313131313132


 92%|█████████▏| 10000/10845 [37:01<03:10,  4.44it/s, acc=0.723, epoch=26, loss=0.898]

epoch:26, idx:9999/10845, loss:0.898031527787447, acc:0.72325


 93%|█████████▎| 10100/10845 [37:23<02:46,  4.46it/s, acc=0.723, epoch=26, loss=0.899]

epoch:26, idx:10099/10845, loss:0.8986887151652043, acc:0.7233168316831683


 94%|█████████▍| 10201/10845 [37:46<02:17,  4.67it/s, acc=0.723, epoch=26, loss=0.9]  

epoch:26, idx:10199/10845, loss:0.9000439993832625, acc:0.7230882352941177


 95%|█████████▍| 10300/10845 [38:07<01:54,  4.74it/s, acc=0.723, epoch=26, loss=0.901]

epoch:26, idx:10299/10845, loss:0.9011120673348603, acc:0.7228640776699029


 96%|█████████▌| 10400/10845 [38:30<01:42,  4.36it/s, acc=0.723, epoch=26, loss=0.901]

epoch:26, idx:10399/10845, loss:0.9007160032311311, acc:0.7230288461538461


 97%|█████████▋| 10501/10845 [38:52<01:12,  4.73it/s, acc=0.723, epoch=26, loss=0.901]

epoch:26, idx:10499/10845, loss:0.9007129453307107, acc:0.7228809523809524


 98%|█████████▊| 10601/10845 [39:15<00:52,  4.69it/s, acc=0.723, epoch=26, loss=0.899]

epoch:26, idx:10599/10845, loss:0.8993782529347347, acc:0.7232075471698113


 99%|█████████▊| 10700/10845 [39:36<00:31,  4.61it/s, acc=0.723, epoch=26, loss=0.9]  

epoch:26, idx:10699/10845, loss:0.8996728662631222, acc:0.7230841121495327


100%|█████████▉| 10800/10845 [39:58<00:09,  4.68it/s, acc=0.723, epoch=26, loss=0.901]

epoch:26, idx:10799/10845, loss:0.9009045191329939, acc:0.7227777777777777


100%|██████████| 10845/10845 [40:08<00:00,  4.60it/s, acc=0.723, epoch=26, loss=0.902]


epoch:26, idx:0/1275, loss:1.3928327560424805, acc:0.5
epoch:26, idx:100/1275, loss:1.4250972418501826, acc:0.6410891089108911
epoch:26, idx:200/1275, loss:1.317460498109979, acc:0.650497512437811
epoch:26, idx:300/1275, loss:1.2771603528447326, acc:0.6586378737541528
epoch:26, idx:400/1275, loss:1.2530566092738487, acc:0.6639650872817955
epoch:26, idx:500/1275, loss:1.2245380356164273, acc:0.6646706586826348
epoch:26, idx:600/1275, loss:1.2340642151340668, acc:0.6576539101497504
epoch:26, idx:700/1275, loss:1.2421125199927412, acc:0.6562054208273894
epoch:26, idx:800/1275, loss:1.2583617783813144, acc:0.651685393258427
epoch:26, idx:900/1275, loss:1.2447240224953628, acc:0.6548279689234184
epoch:26, idx:1000/1275, loss:1.2506386985431066, acc:0.6520979020979021
epoch:26, idx:1100/1275, loss:1.2367756932459562, acc:0.6566757493188011
epoch:26, idx:1200/1275, loss:1.23187702278809, acc:0.6548709408825978


  1%|          | 101/10845 [00:21<40:10,  4.46it/s, acc=0.765, epoch=27, loss=0.723]

epoch:27, idx:99/10845, loss:0.7230418384075165, acc:0.765


  2%|▏         | 200/10845 [00:44<40:39,  4.36it/s, acc=0.735, epoch=27, loss=0.857]

epoch:27, idx:199/10845, loss:0.8572151362895966, acc:0.735


  3%|▎         | 300/10845 [01:06<37:44,  4.66it/s, acc=0.745, epoch=27, loss=0.843]

epoch:27, idx:299/10845, loss:0.8433027589321136, acc:0.745


  4%|▎         | 400/10845 [01:28<36:05,  4.82it/s, acc=0.741, epoch=27, loss=0.836]

epoch:27, idx:399/10845, loss:0.8362505532801151, acc:0.74125


  5%|▍         | 500/10845 [01:51<38:21,  4.49it/s, acc=0.741, epoch=27, loss=0.842]

epoch:27, idx:499/10845, loss:0.842805853009224, acc:0.7405


  6%|▌         | 600/10845 [02:13<42:33,  4.01it/s, acc=0.733, epoch=27, loss=0.864]

epoch:27, idx:599/10845, loss:0.8635602324207624, acc:0.7333333333333333


  6%|▋         | 700/10845 [02:36<36:23,  4.65it/s, acc=0.732, epoch=27, loss=0.867]

epoch:27, idx:699/10845, loss:0.8665369881050927, acc:0.7317857142857143


  7%|▋         | 801/10845 [02:58<35:59,  4.65it/s, acc=0.727, epoch=27, loss=0.877]

epoch:27, idx:799/10845, loss:0.876413004770875, acc:0.7265625


  8%|▊         | 900/10845 [03:20<36:22,  4.56it/s, acc=0.726, epoch=27, loss=0.88] 

epoch:27, idx:899/10845, loss:0.8798802861240175, acc:0.7258333333333333


  9%|▉         | 1001/10845 [03:42<35:00,  4.69it/s, acc=0.727, epoch=27, loss=0.873]

epoch:27, idx:999/10845, loss:0.8736438122391701, acc:0.7265


 10%|█         | 1101/10845 [04:05<34:15,  4.74it/s, acc=0.728, epoch=27, loss=0.868]

epoch:27, idx:1099/10845, loss:0.8681119766560468, acc:0.7286363636363636


 11%|█         | 1201/10845 [04:27<35:10,  4.57it/s, acc=0.728, epoch=27, loss=0.868]

epoch:27, idx:1199/10845, loss:0.8660222361485164, acc:0.7285416666666666


 12%|█▏        | 1300/10845 [04:49<34:50,  4.56it/s, acc=0.729, epoch=27, loss=0.862]

epoch:27, idx:1299/10845, loss:0.8620049317066486, acc:0.7286538461538462


 13%|█▎        | 1400/10845 [05:11<33:46,  4.66it/s, acc=0.728, epoch=27, loss=0.867]

epoch:27, idx:1399/10845, loss:0.8665569051674434, acc:0.7280357142857142


 14%|█▍        | 1500/10845 [05:34<35:34,  4.38it/s, acc=0.728, epoch=27, loss=0.872]

epoch:27, idx:1499/10845, loss:0.872145261446635, acc:0.7275


 15%|█▍        | 1601/10845 [05:56<33:48,  4.56it/s, acc=0.725, epoch=27, loss=0.882]

epoch:27, idx:1599/10845, loss:0.8820541258901358, acc:0.725


 16%|█▌        | 1700/10845 [06:18<32:18,  4.72it/s, acc=0.725, epoch=27, loss=0.882]

epoch:27, idx:1699/10845, loss:0.8821622497193954, acc:0.7251470588235294


 17%|█▋        | 1800/10845 [06:40<32:13,  4.68it/s, acc=0.725, epoch=27, loss=0.881]

epoch:27, idx:1799/10845, loss:0.8806127058135138, acc:0.7254166666666667


 18%|█▊        | 1900/10845 [07:02<32:06,  4.64it/s, acc=0.725, epoch=27, loss=0.877]

epoch:27, idx:1899/10845, loss:0.8773177094208566, acc:0.725


 18%|█▊        | 2001/10845 [07:24<34:04,  4.33it/s, acc=0.726, epoch=27, loss=0.876]

epoch:27, idx:1999/10845, loss:0.876899736225605, acc:0.7255


 19%|█▉        | 2101/10845 [07:47<31:00,  4.70it/s, acc=0.727, epoch=27, loss=0.876]

epoch:27, idx:2099/10845, loss:0.875475418652807, acc:0.7266666666666667


 20%|██        | 2200/10845 [08:09<31:08,  4.63it/s, acc=0.727, epoch=27, loss=0.877]

epoch:27, idx:2199/10845, loss:0.8771002231673761, acc:0.7272727272727273


 21%|██        | 2300/10845 [08:31<30:08,  4.73it/s, acc=0.727, epoch=27, loss=0.876]

epoch:27, idx:2299/10845, loss:0.8764297730508058, acc:0.7268478260869565


 22%|██▏       | 2400/10845 [08:53<29:46,  4.73it/s, acc=0.726, epoch=27, loss=0.879]

epoch:27, idx:2399/10845, loss:0.8793255872527759, acc:0.7258333333333333


 23%|██▎       | 2500/10845 [09:15<29:41,  4.68it/s, acc=0.727, epoch=27, loss=0.875]

epoch:27, idx:2499/10845, loss:0.8751375291347504, acc:0.7269


 24%|██▍       | 2600/10845 [09:37<29:27,  4.67it/s, acc=0.727, epoch=27, loss=0.873]

epoch:27, idx:2599/10845, loss:0.873498797416687, acc:0.7272115384615384


 25%|██▍       | 2701/10845 [09:59<29:12,  4.65it/s, acc=0.728, epoch=27, loss=0.873]

epoch:27, idx:2699/10845, loss:0.8735421975453694, acc:0.7276851851851852


 26%|██▌       | 2801/10845 [10:22<29:43,  4.51it/s, acc=0.727, epoch=27, loss=0.878]

epoch:27, idx:2799/10845, loss:0.8785684041891779, acc:0.7270535714285714


 27%|██▋       | 2900/10845 [10:44<29:39,  4.46it/s, acc=0.726, epoch=27, loss=0.88] 

epoch:27, idx:2899/10845, loss:0.8805085685335357, acc:0.7256896551724138


 28%|██▊       | 3001/10845 [11:06<27:48,  4.70it/s, acc=0.726, epoch=27, loss=0.884]

epoch:27, idx:2999/10845, loss:0.8839989258448283, acc:0.7255


 29%|██▊       | 3100/10845 [11:28<28:03,  4.60it/s, acc=0.725, epoch=27, loss=0.888]

epoch:27, idx:3099/10845, loss:0.8875230227939545, acc:0.725


 30%|██▉       | 3200/10845 [11:50<27:05,  4.70it/s, acc=0.725, epoch=27, loss=0.888]

epoch:27, idx:3199/10845, loss:0.8881397440098227, acc:0.724765625


 30%|███       | 3300/10845 [12:12<26:38,  4.72it/s, acc=0.724, epoch=27, loss=0.891]

epoch:27, idx:3299/10845, loss:0.8905015020117615, acc:0.723939393939394


 31%|███▏      | 3400/10845 [12:35<26:41,  4.65it/s, acc=0.723, epoch=27, loss=0.893]

epoch:27, idx:3399/10845, loss:0.893290605071713, acc:0.7232352941176471


 32%|███▏      | 3501/10845 [12:57<25:43,  4.76it/s, acc=0.723, epoch=27, loss=0.895]

epoch:27, idx:3499/10845, loss:0.8953730788401195, acc:0.7224285714285714


 33%|███▎      | 3600/10845 [13:18<26:23,  4.58it/s, acc=0.722, epoch=27, loss=0.899]

epoch:27, idx:3599/10845, loss:0.8986523098912504, acc:0.7217361111111111


 34%|███▍      | 3700/10845 [13:41<26:41,  4.46it/s, acc=0.723, epoch=27, loss=0.894]

epoch:27, idx:3699/10845, loss:0.8940463707737021, acc:0.7227702702702703


 35%|███▌      | 3800/10845 [14:03<26:39,  4.40it/s, acc=0.724, epoch=27, loss=0.892]

epoch:27, idx:3799/10845, loss:0.8916432878061344, acc:0.7238157894736842


 36%|███▌      | 3900/10845 [14:25<25:58,  4.45it/s, acc=0.724, epoch=27, loss=0.892]

epoch:27, idx:3899/10845, loss:0.89175003104485, acc:0.7235897435897436


 37%|███▋      | 4000/10845 [14:48<29:46,  3.83it/s, acc=0.723, epoch=27, loss=0.893]

epoch:27, idx:3999/10845, loss:0.8932085982933641, acc:0.7229375


 38%|███▊      | 4100/10845 [15:10<24:39,  4.56it/s, acc=0.723, epoch=27, loss=0.892]

epoch:27, idx:4099/10845, loss:0.8919461087846174, acc:0.7232926829268292


 39%|███▊      | 4200/10845 [15:32<23:02,  4.81it/s, acc=0.724, epoch=27, loss=0.893]

epoch:27, idx:4199/10845, loss:0.8930706543085121, acc:0.7236309523809524


 40%|███▉      | 4301/10845 [15:55<23:28,  4.65it/s, acc=0.724, epoch=27, loss=0.892]

epoch:27, idx:4299/10845, loss:0.8920754579125448, acc:0.7240697674418605


 41%|████      | 4401/10845 [16:17<23:04,  4.66it/s, acc=0.724, epoch=27, loss=0.892]

epoch:27, idx:4399/10845, loss:0.8921500147540461, acc:0.7240909090909091


 41%|████▏     | 4500/10845 [16:39<22:47,  4.64it/s, acc=0.724, epoch=27, loss=0.895]

epoch:27, idx:4499/10845, loss:0.8946887140075366, acc:0.7236666666666667


 42%|████▏     | 4600/10845 [17:02<22:44,  4.58it/s, acc=0.724, epoch=27, loss=0.896]

epoch:27, idx:4599/10845, loss:0.8955367941506531, acc:0.7236413043478261


 43%|████▎     | 4700/10845 [17:24<22:49,  4.49it/s, acc=0.723, epoch=27, loss=0.897]

epoch:27, idx:4699/10845, loss:0.8973951909960585, acc:0.7232446808510639


 44%|████▍     | 4801/10845 [17:46<20:54,  4.82it/s, acc=0.724, epoch=27, loss=0.897]

epoch:27, idx:4799/10845, loss:0.8971499957454702, acc:0.72375


 45%|████▌     | 4900/10845 [18:08<21:34,  4.59it/s, acc=0.724, epoch=27, loss=0.893]

epoch:27, idx:4899/10845, loss:0.8934174721642416, acc:0.7241836734693877


 46%|████▌     | 5000/10845 [18:30<21:47,  4.47it/s, acc=0.724, epoch=27, loss=0.894]

epoch:27, idx:4999/10845, loss:0.8939235135376453, acc:0.72435


 47%|████▋     | 5101/10845 [18:53<20:53,  4.58it/s, acc=0.724, epoch=27, loss=0.893]

epoch:27, idx:5099/10845, loss:0.8931080072358543, acc:0.7242647058823529


 48%|████▊     | 5201/10845 [19:15<21:16,  4.42it/s, acc=0.724, epoch=27, loss=0.893]

epoch:27, idx:5199/10845, loss:0.8929542943204825, acc:0.724423076923077


 49%|████▉     | 5300/10845 [19:37<21:06,  4.38it/s, acc=0.724, epoch=27, loss=0.894]

epoch:27, idx:5299/10845, loss:0.8935869570554428, acc:0.7241037735849056


 50%|████▉     | 5400/10845 [19:59<20:56,  4.33it/s, acc=0.724, epoch=27, loss=0.894]

epoch:27, idx:5399/10845, loss:0.8939110457069344, acc:0.7239351851851852


 51%|█████     | 5500/10845 [20:21<18:48,  4.74it/s, acc=0.723, epoch=27, loss=0.896]

epoch:27, idx:5499/10845, loss:0.8960638317249038, acc:0.7232272727272727


 52%|█████▏    | 5600/10845 [20:43<19:00,  4.60it/s, acc=0.723, epoch=27, loss=0.896]

epoch:27, idx:5599/10845, loss:0.8955064098909498, acc:0.7230357142857143


 53%|█████▎    | 5700/10845 [21:05<18:56,  4.53it/s, acc=0.724, epoch=27, loss=0.894]

epoch:27, idx:5699/10845, loss:0.8940012158584176, acc:0.7237719298245614


 53%|█████▎    | 5801/10845 [21:28<18:40,  4.50it/s, acc=0.724, epoch=27, loss=0.895]

epoch:27, idx:5799/10845, loss:0.8946642567731183, acc:0.72375


 54%|█████▍    | 5901/10845 [21:50<17:27,  4.72it/s, acc=0.724, epoch=27, loss=0.893]

epoch:27, idx:5899/10845, loss:0.8932442423398211, acc:0.7240254237288135


 55%|█████▌    | 6000/10845 [22:12<17:45,  4.55it/s, acc=0.724, epoch=27, loss=0.894]

epoch:27, idx:5999/10845, loss:0.8935269334067901, acc:0.7240416666666667


 56%|█████▌    | 6100/10845 [22:34<18:52,  4.19it/s, acc=0.724, epoch=27, loss=0.895]

epoch:27, idx:6099/10845, loss:0.8946062350419701, acc:0.7239344262295082


 57%|█████▋    | 6200/10845 [22:56<17:04,  4.54it/s, acc=0.724, epoch=27, loss=0.894]

epoch:27, idx:6199/10845, loss:0.8935048475428935, acc:0.7240725806451613


 58%|█████▊    | 6300/10845 [23:19<17:02,  4.45it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:6299/10845, loss:0.8914282029441425, acc:0.7246031746031746


 59%|█████▉    | 6401/10845 [23:41<16:23,  4.52it/s, acc=0.724, epoch=27, loss=0.892]

epoch:27, idx:6399/10845, loss:0.8919912682147697, acc:0.724375


 60%|█████▉    | 6500/10845 [24:03<15:50,  4.57it/s, acc=0.725, epoch=27, loss=0.89] 

epoch:27, idx:6499/10845, loss:0.8899378476005334, acc:0.7248461538461538


 61%|██████    | 6601/10845 [24:25<15:43,  4.50it/s, acc=0.725, epoch=27, loss=0.89] 

epoch:27, idx:6599/10845, loss:0.89009088113904, acc:0.7251136363636363


 62%|██████▏   | 6700/10845 [24:47<14:38,  4.72it/s, acc=0.725, epoch=27, loss=0.89] 

epoch:27, idx:6699/10845, loss:0.8904245761481684, acc:0.725


 63%|██████▎   | 6800/10845 [25:09<14:57,  4.51it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:6799/10845, loss:0.8909827570222757, acc:0.7248161764705883


 64%|██████▎   | 6900/10845 [25:31<14:38,  4.49it/s, acc=0.725, epoch=27, loss=0.892]

epoch:27, idx:6899/10845, loss:0.8916454632601876, acc:0.7247826086956521


 65%|██████▍   | 7000/10845 [25:53<14:01,  4.57it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:6999/10845, loss:0.8914102601621832, acc:0.7248571428571429


 65%|██████▌   | 7101/10845 [26:15<13:09,  4.74it/s, acc=0.725, epoch=27, loss=0.892]

epoch:27, idx:7099/10845, loss:0.8917955942514917, acc:0.7247887323943661


 66%|██████▋   | 7200/10845 [26:37<12:47,  4.75it/s, acc=0.725, epoch=27, loss=0.892]

epoch:27, idx:7199/10845, loss:0.891698339941601, acc:0.7248611111111111


 67%|██████▋   | 7300/10845 [26:59<12:39,  4.67it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:7299/10845, loss:0.8908691043518994, acc:0.7248287671232877


 68%|██████▊   | 7400/10845 [27:21<12:48,  4.48it/s, acc=0.725, epoch=27, loss=0.892]

epoch:27, idx:7399/10845, loss:0.8923468402712732, acc:0.7247635135135135


 69%|██████▉   | 7501/10845 [27:44<12:17,  4.54it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:7499/10845, loss:0.891132490726312, acc:0.7252666666666666


 70%|███████   | 7600/10845 [28:06<12:53,  4.19it/s, acc=0.725, epoch=27, loss=0.892]

epoch:27, idx:7599/10845, loss:0.8915556112676859, acc:0.7250986842105264


 71%|███████   | 7700/10845 [28:28<11:45,  4.46it/s, acc=0.726, epoch=27, loss=0.89] 

epoch:27, idx:7699/10845, loss:0.8900778873051916, acc:0.7256168831168831


 72%|███████▏  | 7801/10845 [28:50<10:39,  4.76it/s, acc=0.726, epoch=27, loss=0.889]

epoch:27, idx:7799/10845, loss:0.8890494159207895, acc:0.725801282051282


 73%|███████▎  | 7901/10845 [29:12<10:27,  4.69it/s, acc=0.726, epoch=27, loss=0.889]

epoch:27, idx:7899/10845, loss:0.8889243700949452, acc:0.7259493670886076


 74%|███████▍  | 8000/10845 [29:35<10:44,  4.41it/s, acc=0.726, epoch=27, loss=0.889]

epoch:27, idx:7999/10845, loss:0.8889663750641048, acc:0.72603125


 75%|███████▍  | 8100/10845 [29:57<09:36,  4.76it/s, acc=0.726, epoch=27, loss=0.89] 

epoch:27, idx:8099/10845, loss:0.8899967012398037, acc:0.7258333333333333


 76%|███████▌  | 8200/10845 [30:19<09:11,  4.79it/s, acc=0.726, epoch=27, loss=0.891]

epoch:27, idx:8199/10845, loss:0.8913667984234124, acc:0.7255182926829268


 77%|███████▋  | 8300/10845 [30:41<09:30,  4.46it/s, acc=0.726, epoch=27, loss=0.891]

epoch:27, idx:8299/10845, loss:0.8909275890222514, acc:0.725512048192771


 77%|███████▋  | 8400/10845 [31:03<09:52,  4.13it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:8399/10845, loss:0.8912064461871272, acc:0.7253571428571428


 78%|███████▊  | 8500/10845 [31:25<08:16,  4.72it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:8499/10845, loss:0.8906497231195954, acc:0.7254705882352941


 79%|███████▉  | 8600/10845 [31:48<08:11,  4.57it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:8599/10845, loss:0.8909158236169538, acc:0.7253779069767442


 80%|████████  | 8701/10845 [32:10<07:38,  4.67it/s, acc=0.726, epoch=27, loss=0.891]

epoch:27, idx:8699/10845, loss:0.8907227328967774, acc:0.7255459770114943


 81%|████████  | 8800/10845 [32:32<07:43,  4.41it/s, acc=0.726, epoch=27, loss=0.891]

epoch:27, idx:8799/10845, loss:0.8905711785331368, acc:0.7255113636363636


 82%|████████▏ | 8900/10845 [32:54<07:07,  4.55it/s, acc=0.725, epoch=27, loss=0.89] 

epoch:27, idx:8899/10845, loss:0.8900460053528293, acc:0.7254494382022472


 83%|████████▎ | 9000/10845 [33:16<06:49,  4.50it/s, acc=0.725, epoch=27, loss=0.89]

epoch:27, idx:8999/10845, loss:0.8898509122100141, acc:0.7251944444444445


 84%|████████▍ | 9100/10845 [33:39<06:43,  4.33it/s, acc=0.725, epoch=27, loss=0.89] 

epoch:27, idx:9099/10845, loss:0.8903250913927844, acc:0.7251648351648352


 85%|████████▍ | 9200/10845 [34:01<05:52,  4.67it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:9199/10845, loss:0.8909425855298405, acc:0.725


 86%|████████▌ | 9300/10845 [34:24<06:19,  4.07it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:9299/10845, loss:0.890652614649265, acc:0.7249462365591398


 87%|████████▋ | 9400/10845 [34:46<05:05,  4.73it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:9399/10845, loss:0.8913519913433714, acc:0.725


 88%|████████▊ | 9500/10845 [35:08<05:03,  4.43it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:9499/10845, loss:0.8912472151549239, acc:0.7251315789473685


 89%|████████▊ | 9601/10845 [35:31<04:38,  4.47it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:9599/10845, loss:0.8911865722977866, acc:0.725234375


 89%|████████▉ | 9700/10845 [35:53<04:05,  4.66it/s, acc=0.725, epoch=27, loss=0.892]

epoch:27, idx:9699/10845, loss:0.892086228755946, acc:0.7249484536082474


 90%|█████████ | 9800/10845 [36:15<03:45,  4.64it/s, acc=0.725, epoch=27, loss=0.892]

epoch:27, idx:9799/10845, loss:0.891660117625582, acc:0.7251275510204082


 91%|█████████▏| 9900/10845 [36:36<03:22,  4.66it/s, acc=0.725, epoch=27, loss=0.892]

epoch:27, idx:9899/10845, loss:0.8916127789231262, acc:0.7251262626262627


 92%|█████████▏| 10000/10845 [36:58<02:57,  4.75it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:9999/10845, loss:0.8911177888959646, acc:0.7251


 93%|█████████▎| 10100/10845 [37:20<02:35,  4.78it/s, acc=0.725, epoch=27, loss=0.891]

epoch:27, idx:10099/10845, loss:0.8910408118072123, acc:0.7252227722772278


 94%|█████████▍| 10200/10845 [37:43<02:29,  4.33it/s, acc=0.725, epoch=27, loss=0.89] 

epoch:27, idx:10199/10845, loss:0.890181224086121, acc:0.7252941176470589


 95%|█████████▍| 10300/10845 [38:05<01:58,  4.60it/s, acc=0.725, epoch=27, loss=0.892]

epoch:27, idx:10299/10845, loss:0.8917990408215708, acc:0.7249757281553398


 96%|█████████▌| 10400/10845 [38:27<01:34,  4.71it/s, acc=0.725, epoch=27, loss=0.893]

epoch:27, idx:10399/10845, loss:0.8929083580323137, acc:0.7247355769230769


 97%|█████████▋| 10500/10845 [38:49<01:14,  4.62it/s, acc=0.724, epoch=27, loss=0.894]

epoch:27, idx:10499/10845, loss:0.8935537796673321, acc:0.7244047619047619


 98%|█████████▊| 10600/10845 [39:11<00:52,  4.69it/s, acc=0.724, epoch=27, loss=0.893]

epoch:27, idx:10599/10845, loss:0.8931182592179415, acc:0.7242452830188679


 99%|█████████▊| 10701/10845 [39:34<00:31,  4.51it/s, acc=0.724, epoch=27, loss=0.893]

epoch:27, idx:10699/10845, loss:0.8933470888978967, acc:0.7239252336448598


100%|█████████▉| 10801/10845 [39:56<00:09,  4.52it/s, acc=0.724, epoch=27, loss=0.894]

epoch:27, idx:10799/10845, loss:0.893834136136704, acc:0.7238425925925925


100%|██████████| 10845/10845 [40:06<00:00,  4.67it/s, acc=0.724, epoch=27, loss=0.894]


epoch:27, idx:0/1275, loss:1.2732963562011719, acc:0.75
epoch:27, idx:100/1275, loss:1.321790087341082, acc:0.6584158415841584
epoch:27, idx:200/1275, loss:1.213954726261879, acc:0.6629353233830846
epoch:27, idx:300/1275, loss:1.1938517355047586, acc:0.6677740863787376
epoch:27, idx:400/1275, loss:1.1782209293503416, acc:0.669576059850374
epoch:27, idx:500/1275, loss:1.1582258216397252, acc:0.6681636726546906
epoch:27, idx:600/1275, loss:1.1758514041710217, acc:0.6593178036605657
epoch:27, idx:700/1275, loss:1.1845647103436152, acc:0.6590584878744651
epoch:27, idx:800/1275, loss:1.2030972337603718, acc:0.6569912609238452
epoch:27, idx:900/1275, loss:1.192537313163876, acc:0.6592674805771365
epoch:27, idx:1000/1275, loss:1.1946316868156106, acc:0.6573426573426573
epoch:27, idx:1100/1275, loss:1.179537818498984, acc:0.6614441416893733
epoch:27, idx:1200/1275, loss:1.175105451544953, acc:0.6613238967527061


  1%|          | 101/10845 [00:21<39:10,  4.57it/s, acc=0.748, epoch=28, loss=0.806]

epoch:28, idx:99/10845, loss:0.8075693345069885, acc:0.75


  2%|▏         | 200/10845 [00:44<40:23,  4.39it/s, acc=0.723, epoch=28, loss=0.888]

epoch:28, idx:199/10845, loss:0.888002963066101, acc:0.7225


  3%|▎         | 300/10845 [01:06<38:23,  4.58it/s, acc=0.733, epoch=28, loss=0.862]

epoch:28, idx:299/10845, loss:0.861696885029475, acc:0.7333333333333333


  4%|▎         | 401/10845 [01:28<37:20,  4.66it/s, acc=0.734, epoch=28, loss=0.852]

epoch:28, idx:399/10845, loss:0.8511419679224491, acc:0.734375


  5%|▍         | 500/10845 [01:50<37:20,  4.62it/s, acc=0.734, epoch=28, loss=0.857]

epoch:28, idx:499/10845, loss:0.8571195396184921, acc:0.7335


  6%|▌         | 600/10845 [02:12<36:24,  4.69it/s, acc=0.738, epoch=28, loss=0.853]

epoch:28, idx:599/10845, loss:0.8527996533115705, acc:0.7383333333333333


  6%|▋         | 701/10845 [02:35<37:12,  4.54it/s, acc=0.737, epoch=28, loss=0.852]

epoch:28, idx:699/10845, loss:0.8526890132256917, acc:0.7367857142857143


  7%|▋         | 801/10845 [02:57<35:54,  4.66it/s, acc=0.734, epoch=28, loss=0.86] 

epoch:28, idx:799/10845, loss:0.8607569652050734, acc:0.734375


  8%|▊         | 901/10845 [03:20<35:17,  4.70it/s, acc=0.731, epoch=28, loss=0.869]

epoch:28, idx:899/10845, loss:0.8695817142724991, acc:0.7311111111111112


  9%|▉         | 1000/10845 [03:42<35:17,  4.65it/s, acc=0.732, epoch=28, loss=0.871]

epoch:28, idx:999/10845, loss:0.8709353275895119, acc:0.73175


 10%|█         | 1100/10845 [04:04<39:01,  4.16it/s, acc=0.73, epoch=28, loss=0.872] 

epoch:28, idx:1099/10845, loss:0.872484994855794, acc:0.7295454545454545


 11%|█         | 1200/10845 [04:26<34:33,  4.65it/s, acc=0.733, epoch=28, loss=0.861]

epoch:28, idx:1199/10845, loss:0.8612033549447854, acc:0.7329166666666667


 12%|█▏        | 1301/10845 [04:49<32:30,  4.89it/s, acc=0.732, epoch=28, loss=0.863]

epoch:28, idx:1299/10845, loss:0.8636864114724673, acc:0.7313461538461539


 13%|█▎        | 1400/10845 [05:11<32:25,  4.85it/s, acc=0.732, epoch=28, loss=0.868]

epoch:28, idx:1399/10845, loss:0.8677874166624887, acc:0.7321428571428571


 14%|█▍        | 1500/10845 [05:33<36:47,  4.23it/s, acc=0.732, epoch=28, loss=0.866]

epoch:28, idx:1499/10845, loss:0.8662160959243774, acc:0.732


 15%|█▍        | 1600/10845 [05:55<33:33,  4.59it/s, acc=0.734, epoch=28, loss=0.86] 

epoch:28, idx:1599/10845, loss:0.859613875746727, acc:0.73421875


 16%|█▌        | 1700/10845 [06:17<33:18,  4.58it/s, acc=0.734, epoch=28, loss=0.864]

epoch:28, idx:1699/10845, loss:0.864464086785036, acc:0.7341176470588235


 17%|█▋        | 1800/10845 [06:40<35:53,  4.20it/s, acc=0.734, epoch=28, loss=0.862]

epoch:28, idx:1799/10845, loss:0.8624538264671961, acc:0.73375


 18%|█▊        | 1901/10845 [07:02<31:30,  4.73it/s, acc=0.736, epoch=28, loss=0.855]

epoch:28, idx:1899/10845, loss:0.8547686915648611, acc:0.7359210526315789


 18%|█▊        | 2000/10845 [07:25<31:51,  4.63it/s, acc=0.737, epoch=28, loss=0.852]

epoch:28, idx:1999/10845, loss:0.8518945843577385, acc:0.736625


 19%|█▉        | 2100/10845 [07:47<33:25,  4.36it/s, acc=0.735, epoch=28, loss=0.86] 

epoch:28, idx:2099/10845, loss:0.8601405757949466, acc:0.7352380952380952


 20%|██        | 2200/10845 [08:09<32:01,  4.50it/s, acc=0.735, epoch=28, loss=0.861]

epoch:28, idx:2199/10845, loss:0.8607196170091629, acc:0.7348863636363636


 21%|██        | 2300/10845 [08:31<31:57,  4.46it/s, acc=0.733, epoch=28, loss=0.864]

epoch:28, idx:2299/10845, loss:0.8641292645101962, acc:0.7330434782608696


 22%|██▏       | 2401/10845 [08:54<30:23,  4.63it/s, acc=0.735, epoch=28, loss=0.862]

epoch:28, idx:2399/10845, loss:0.8623394750555357, acc:0.7345833333333334


 23%|██▎       | 2500/10845 [09:16<30:34,  4.55it/s, acc=0.736, epoch=28, loss=0.861]

epoch:28, idx:2499/10845, loss:0.8607021058559418, acc:0.7355


 24%|██▍       | 2600/10845 [09:38<31:07,  4.41it/s, acc=0.736, epoch=28, loss=0.857]

epoch:28, idx:2599/10845, loss:0.8571333226790795, acc:0.7355769230769231


 25%|██▍       | 2701/10845 [10:01<30:24,  4.46it/s, acc=0.734, epoch=28, loss=0.866]

epoch:28, idx:2699/10845, loss:0.8657746014330122, acc:0.7339814814814815


 26%|██▌       | 2801/10845 [10:23<28:52,  4.64it/s, acc=0.734, epoch=28, loss=0.868]

epoch:28, idx:2799/10845, loss:0.8681442775257996, acc:0.73375


 27%|██▋       | 2900/10845 [10:44<29:05,  4.55it/s, acc=0.733, epoch=28, loss=0.87] 

epoch:28, idx:2899/10845, loss:0.8703434632769946, acc:0.733103448275862


 28%|██▊       | 3001/10845 [11:07<28:27,  4.59it/s, acc=0.733, epoch=28, loss=0.87] 

epoch:28, idx:2999/10845, loss:0.8694796298940977, acc:0.7334166666666667


 29%|██▊       | 3100/10845 [11:29<28:48,  4.48it/s, acc=0.733, epoch=28, loss=0.87] 

epoch:28, idx:3099/10845, loss:0.8702472904612941, acc:0.7331451612903226


 30%|██▉       | 3200/10845 [11:51<27:12,  4.68it/s, acc=0.732, epoch=28, loss=0.873]

epoch:28, idx:3199/10845, loss:0.8732565333135426, acc:0.73171875


 30%|███       | 3300/10845 [12:13<27:05,  4.64it/s, acc=0.732, epoch=28, loss=0.872]

epoch:28, idx:3299/10845, loss:0.8720705480286569, acc:0.7319696969696969


 31%|███▏      | 3401/10845 [12:36<26:59,  4.60it/s, acc=0.732, epoch=28, loss=0.873]

epoch:28, idx:3399/10845, loss:0.8730801850907943, acc:0.731985294117647


 32%|███▏      | 3500/10845 [12:58<28:02,  4.37it/s, acc=0.732, epoch=28, loss=0.871]

epoch:28, idx:3499/10845, loss:0.8710988908495222, acc:0.7322857142857143


 33%|███▎      | 3600/10845 [13:20<25:40,  4.70it/s, acc=0.732, epoch=28, loss=0.873]

epoch:28, idx:3599/10845, loss:0.8730639389488433, acc:0.7316666666666667


 34%|███▍      | 3700/10845 [13:42<26:50,  4.44it/s, acc=0.732, epoch=28, loss=0.871]

epoch:28, idx:3699/10845, loss:0.8708883137316317, acc:0.7322297297297298


 35%|███▌      | 3801/10845 [14:04<25:33,  4.59it/s, acc=0.732, epoch=28, loss=0.872]

epoch:28, idx:3799/10845, loss:0.8717637457031953, acc:0.7317105263157895


 36%|███▌      | 3901/10845 [14:26<25:43,  4.50it/s, acc=0.732, epoch=28, loss=0.87] 

epoch:28, idx:3899/10845, loss:0.8699649140468011, acc:0.7317307692307692


 37%|███▋      | 4001/10845 [14:48<24:27,  4.66it/s, acc=0.731, epoch=28, loss=0.872]

epoch:28, idx:3999/10845, loss:0.8720933720469475, acc:0.7309375


 38%|███▊      | 4100/10845 [15:10<24:51,  4.52it/s, acc=0.73, epoch=28, loss=0.872] 

epoch:28, idx:4099/10845, loss:0.8724465926682077, acc:0.7304268292682927


 39%|███▊      | 4200/10845 [15:33<24:11,  4.58it/s, acc=0.73, epoch=28, loss=0.875] 

epoch:28, idx:4199/10845, loss:0.8753787713675272, acc:0.73


 40%|███▉      | 4301/10845 [15:55<23:18,  4.68it/s, acc=0.731, epoch=28, loss=0.874]

epoch:28, idx:4299/10845, loss:0.8738488511983739, acc:0.7307558139534883


 41%|████      | 4400/10845 [16:17<24:01,  4.47it/s, acc=0.731, epoch=28, loss=0.874]

epoch:28, idx:4399/10845, loss:0.8735905469005758, acc:0.730909090909091


 42%|████▏     | 4501/10845 [16:39<21:56,  4.82it/s, acc=0.731, epoch=28, loss=0.874]

epoch:28, idx:4499/10845, loss:0.8735767249796126, acc:0.7308333333333333


 42%|████▏     | 4600/10845 [17:01<23:09,  4.49it/s, acc=0.73, epoch=28, loss=0.875] 

epoch:28, idx:4599/10845, loss:0.8752048930914506, acc:0.7303260869565218


 43%|████▎     | 4701/10845 [17:24<21:43,  4.71it/s, acc=0.73, epoch=28, loss=0.877] 

epoch:28, idx:4699/10845, loss:0.8774238277242539, acc:0.7296808510638297


 44%|████▍     | 4800/10845 [17:46<22:25,  4.49it/s, acc=0.73, epoch=28, loss=0.874]

epoch:28, idx:4799/10845, loss:0.8741397609313329, acc:0.73


 45%|████▌     | 4901/10845 [18:08<20:58,  4.72it/s, acc=0.729, epoch=28, loss=0.875]

epoch:28, idx:4899/10845, loss:0.8752485540570045, acc:0.7293877551020408


 46%|████▌     | 5000/10845 [18:30<21:50,  4.46it/s, acc=0.729, epoch=28, loss=0.877]

epoch:28, idx:4999/10845, loss:0.876535015642643, acc:0.7291


 47%|████▋     | 5100/10845 [18:53<21:56,  4.36it/s, acc=0.729, epoch=28, loss=0.878]

epoch:28, idx:5099/10845, loss:0.877641715687864, acc:0.7291176470588235


 48%|████▊     | 5201/10845 [19:15<19:51,  4.74it/s, acc=0.729, epoch=28, loss=0.878]

epoch:28, idx:5199/10845, loss:0.878028648277888, acc:0.7291346153846154


 49%|████▉     | 5300/10845 [19:37<19:29,  4.74it/s, acc=0.73, epoch=28, loss=0.877] 

epoch:28, idx:5299/10845, loss:0.877178815726964, acc:0.729622641509434


 50%|████▉     | 5401/10845 [20:00<18:56,  4.79it/s, acc=0.73, epoch=28, loss=0.877]

epoch:28, idx:5399/10845, loss:0.8767092621878342, acc:0.7297222222222223


 51%|█████     | 5500/10845 [20:22<20:04,  4.44it/s, acc=0.73, epoch=28, loss=0.876]

epoch:28, idx:5499/10845, loss:0.8763680739077655, acc:0.73


 52%|█████▏    | 5600/10845 [20:45<19:45,  4.42it/s, acc=0.73, epoch=28, loss=0.879]

epoch:28, idx:5599/10845, loss:0.878534040376544, acc:0.7298214285714286


 53%|█████▎    | 5700/10845 [21:07<19:45,  4.34it/s, acc=0.729, epoch=28, loss=0.879]

epoch:28, idx:5699/10845, loss:0.8791146010578724, acc:0.7294736842105263


 53%|█████▎    | 5801/10845 [21:29<17:38,  4.76it/s, acc=0.73, epoch=28, loss=0.878] 

epoch:28, idx:5799/10845, loss:0.8779131896639693, acc:0.7301724137931035


 54%|█████▍    | 5900/10845 [21:51<18:10,  4.54it/s, acc=0.731, epoch=28, loss=0.877]

epoch:28, idx:5899/10845, loss:0.8768975895441184, acc:0.730635593220339


 55%|█████▌    | 6000/10845 [22:13<17:44,  4.55it/s, acc=0.731, epoch=28, loss=0.877]

epoch:28, idx:5999/10845, loss:0.8769212757746379, acc:0.730625


 56%|█████▌    | 6100/10845 [22:35<17:54,  4.42it/s, acc=0.73, epoch=28, loss=0.877] 

epoch:28, idx:6099/10845, loss:0.8774913932456345, acc:0.7302049180327869


 57%|█████▋    | 6200/10845 [22:57<16:11,  4.78it/s, acc=0.73, epoch=28, loss=0.877]

epoch:28, idx:6199/10845, loss:0.8767815492037804, acc:0.7303629032258064


 58%|█████▊    | 6300/10845 [23:19<16:24,  4.62it/s, acc=0.731, epoch=28, loss=0.877]

epoch:28, idx:6299/10845, loss:0.8767674605240897, acc:0.7307539682539682


 59%|█████▉    | 6400/10845 [23:40<16:33,  4.47it/s, acc=0.73, epoch=28, loss=0.878] 

epoch:28, idx:6399/10845, loss:0.877950718505308, acc:0.7304296875


 60%|█████▉    | 6500/10845 [24:02<15:32,  4.66it/s, acc=0.731, epoch=28, loss=0.878]

epoch:28, idx:6499/10845, loss:0.8775575872843082, acc:0.7306153846153847


 61%|██████    | 6600/10845 [24:24<16:36,  4.26it/s, acc=0.73, epoch=28, loss=0.879] 

epoch:28, idx:6599/10845, loss:0.8785634395660776, acc:0.7300757575757576


 62%|██████▏   | 6701/10845 [24:47<15:18,  4.51it/s, acc=0.73, epoch=28, loss=0.879]

epoch:28, idx:6699/10845, loss:0.8795072086444542, acc:0.7299253731343284


 63%|██████▎   | 6801/10845 [25:09<14:42,  4.58it/s, acc=0.73, epoch=28, loss=0.879]

epoch:28, idx:6799/10845, loss:0.8795302138872007, acc:0.7298529411764706


 64%|██████▎   | 6901/10845 [25:31<14:28,  4.54it/s, acc=0.73, epoch=28, loss=0.878]

epoch:28, idx:6899/10845, loss:0.8782035365743913, acc:0.7303260869565218


 65%|██████▍   | 7001/10845 [25:53<13:48,  4.64it/s, acc=0.73, epoch=28, loss=0.878]

epoch:28, idx:6999/10845, loss:0.8784368591308593, acc:0.7301785714285715


 65%|██████▌   | 7101/10845 [26:15<13:21,  4.67it/s, acc=0.731, epoch=28, loss=0.878]

epoch:28, idx:7099/10845, loss:0.8776692755793182, acc:0.7305985915492957


 66%|██████▋   | 7200/10845 [26:37<13:24,  4.53it/s, acc=0.73, epoch=28, loss=0.879] 

epoch:28, idx:7199/10845, loss:0.8785722333855099, acc:0.7304513888888889


 67%|██████▋   | 7300/10845 [26:59<12:53,  4.58it/s, acc=0.73, epoch=28, loss=0.88]  

epoch:28, idx:7299/10845, loss:0.8799185334084785, acc:0.7302739726027397


 68%|██████▊   | 7400/10845 [27:22<12:17,  4.67it/s, acc=0.731, epoch=28, loss=0.879]

epoch:28, idx:7399/10845, loss:0.8787214359802169, acc:0.7305405405405405


 69%|██████▉   | 7501/10845 [27:44<12:49,  4.35it/s, acc=0.731, epoch=28, loss=0.877]

epoch:28, idx:7499/10845, loss:0.8774410362005234, acc:0.7307333333333333


 70%|███████   | 7600/10845 [28:06<11:49,  4.57it/s, acc=0.73, epoch=28, loss=0.878] 

epoch:28, idx:7599/10845, loss:0.8779560787034663, acc:0.7304605263157895


 71%|███████   | 7700/10845 [28:29<11:43,  4.47it/s, acc=0.731, epoch=28, loss=0.876]

epoch:28, idx:7699/10845, loss:0.8761058181524277, acc:0.7307142857142858


 72%|███████▏  | 7801/10845 [28:51<11:00,  4.61it/s, acc=0.731, epoch=28, loss=0.876]

epoch:28, idx:7799/10845, loss:0.8760116414763989, acc:0.7306730769230769


 73%|███████▎  | 7900/10845 [29:13<10:49,  4.54it/s, acc=0.731, epoch=28, loss=0.876]

epoch:28, idx:7899/10845, loss:0.876007250479505, acc:0.7308227848101266


 74%|███████▍  | 8000/10845 [29:35<10:24,  4.55it/s, acc=0.731, epoch=28, loss=0.877]

epoch:28, idx:7999/10845, loss:0.8770247150138021, acc:0.7308125


 75%|███████▍  | 8100/10845 [29:58<09:50,  4.65it/s, acc=0.731, epoch=28, loss=0.877]

epoch:28, idx:8099/10845, loss:0.8770396777362, acc:0.7305555555555555


 76%|███████▌  | 8200/10845 [30:20<10:06,  4.36it/s, acc=0.73, epoch=28, loss=0.877] 

epoch:28, idx:8199/10845, loss:0.8769802840235756, acc:0.7304268292682927


 77%|███████▋  | 8300/10845 [30:42<09:25,  4.50it/s, acc=0.731, epoch=28, loss=0.877]

epoch:28, idx:8299/10845, loss:0.8765485461002372, acc:0.7305421686746988


 77%|███████▋  | 8401/10845 [31:04<08:42,  4.68it/s, acc=0.731, epoch=28, loss=0.877]

epoch:28, idx:8399/10845, loss:0.8764575338718437, acc:0.7305654761904762


 78%|███████▊  | 8500/10845 [31:26<09:05,  4.30it/s, acc=0.731, epoch=28, loss=0.877]

epoch:28, idx:8499/10845, loss:0.8771586593950496, acc:0.730735294117647


 79%|███████▉  | 8600/10845 [31:48<07:56,  4.71it/s, acc=0.73, epoch=28, loss=0.878] 

epoch:28, idx:8599/10845, loss:0.8781083727922551, acc:0.7302906976744186


 80%|████████  | 8701/10845 [32:11<07:34,  4.71it/s, acc=0.73, epoch=28, loss=0.877] 

epoch:28, idx:8699/10845, loss:0.876789804018777, acc:0.7303448275862069


 81%|████████  | 8800/10845 [32:33<07:34,  4.50it/s, acc=0.73, epoch=28, loss=0.878]

epoch:28, idx:8799/10845, loss:0.8784175692972812, acc:0.7297443181818182


 82%|████████▏ | 8901/10845 [32:55<07:05,  4.57it/s, acc=0.73, epoch=28, loss=0.878]

epoch:28, idx:8899/10845, loss:0.8780608088447807, acc:0.7298876404494382


 83%|████████▎ | 9000/10845 [33:17<06:34,  4.68it/s, acc=0.73, epoch=28, loss=0.877]

epoch:28, idx:8999/10845, loss:0.8771369461814562, acc:0.7303611111111111


 84%|████████▍ | 9100/10845 [33:39<06:25,  4.52it/s, acc=0.73, epoch=28, loss=0.877]

epoch:28, idx:9099/10845, loss:0.8771318356872915, acc:0.7300824175824175


 85%|████████▍ | 9200/10845 [34:02<05:58,  4.58it/s, acc=0.73, epoch=28, loss=0.877]

epoch:28, idx:9199/10845, loss:0.8772727814381537, acc:0.7299728260869566


 86%|████████▌ | 9300/10845 [34:24<05:35,  4.61it/s, acc=0.73, epoch=28, loss=0.878]

epoch:28, idx:9299/10845, loss:0.8781746248981004, acc:0.7297311827956989


 87%|████████▋ | 9400/10845 [34:47<05:12,  4.62it/s, acc=0.729, epoch=28, loss=0.879]

epoch:28, idx:9399/10845, loss:0.8789521242456233, acc:0.7294414893617022


 88%|████████▊ | 9501/10845 [35:09<04:42,  4.76it/s, acc=0.73, epoch=28, loss=0.878] 

epoch:28, idx:9499/10845, loss:0.8777388069880636, acc:0.7297368421052631


 89%|████████▊ | 9600/10845 [35:31<04:11,  4.96it/s, acc=0.73, epoch=28, loss=0.878]

epoch:28, idx:9599/10845, loss:0.8775354868918658, acc:0.7297916666666666


 89%|████████▉ | 9700/10845 [35:53<04:23,  4.35it/s, acc=0.73, epoch=28, loss=0.877]

epoch:28, idx:9699/10845, loss:0.8771730088327349, acc:0.73


 90%|█████████ | 9800/10845 [36:16<03:49,  4.56it/s, acc=0.73, epoch=28, loss=0.876]

epoch:28, idx:9799/10845, loss:0.8762097583376631, acc:0.7303571428571428


 91%|█████████▏| 9900/10845 [36:38<03:17,  4.78it/s, acc=0.73, epoch=28, loss=0.876]

epoch:28, idx:9899/10845, loss:0.8762944513860375, acc:0.7302777777777778


 92%|█████████▏| 10001/10845 [37:00<02:56,  4.79it/s, acc=0.73, epoch=28, loss=0.876]

epoch:28, idx:9999/10845, loss:0.8761166277050972, acc:0.730475


 93%|█████████▎| 10101/10845 [37:22<02:37,  4.72it/s, acc=0.73, epoch=28, loss=0.877] 

epoch:28, idx:10099/10845, loss:0.8770110332021619, acc:0.7302970297029703


 94%|█████████▍| 10200/10845 [37:44<02:23,  4.48it/s, acc=0.73, epoch=28, loss=0.878]

epoch:28, idx:10199/10845, loss:0.8777389999931934, acc:0.7299754901960784


 95%|█████████▍| 10301/10845 [38:07<01:55,  4.71it/s, acc=0.73, epoch=28, loss=0.878]

epoch:28, idx:10299/10845, loss:0.8776584484854948, acc:0.730121359223301


 96%|█████████▌| 10400/10845 [38:29<01:35,  4.67it/s, acc=0.73, epoch=28, loss=0.878]

epoch:28, idx:10399/10845, loss:0.877778871838863, acc:0.7297836538461538


 97%|█████████▋| 10501/10845 [38:51<01:13,  4.67it/s, acc=0.73, epoch=28, loss=0.878]

epoch:28, idx:10499/10845, loss:0.8778676246120816, acc:0.7298095238095238


 98%|█████████▊| 10601/10845 [39:13<00:52,  4.63it/s, acc=0.73, epoch=28, loss=0.879]

epoch:28, idx:10599/10845, loss:0.878953501672115, acc:0.7295518867924529


 99%|█████████▊| 10701/10845 [39:35<00:31,  4.64it/s, acc=0.729, epoch=28, loss=0.88] 

epoch:28, idx:10699/10845, loss:0.8798420871641034, acc:0.7292523364485981


100%|█████████▉| 10800/10845 [39:57<00:10,  4.38it/s, acc=0.729, epoch=28, loss=0.881]

epoch:28, idx:10799/10845, loss:0.8807665041365005, acc:0.7288888888888889


100%|██████████| 10845/10845 [40:07<00:00,  4.71it/s, acc=0.729, epoch=28, loss=0.881]


epoch:28, idx:0/1275, loss:1.1607120037078857, acc:0.75
epoch:28, idx:100/1275, loss:1.4093335312191804, acc:0.6410891089108911
epoch:28, idx:200/1275, loss:1.2943595985868084, acc:0.6492537313432836
epoch:28, idx:300/1275, loss:1.2634371404235942, acc:0.6578073089700996
epoch:28, idx:400/1275, loss:1.245204858351824, acc:0.6652119700748129
epoch:28, idx:500/1275, loss:1.2148394529928943, acc:0.6641716566866267
epoch:28, idx:600/1275, loss:1.2310702271152059, acc:0.6551580698835274
epoch:28, idx:700/1275, loss:1.240137666506366, acc:0.6540656205420827
epoch:28, idx:800/1275, loss:1.2595720707849318, acc:0.6513732833957553
epoch:28, idx:900/1275, loss:1.24758834460467, acc:0.6537180910099889
epoch:28, idx:1000/1275, loss:1.2522402545669815, acc:0.6520979020979021
epoch:28, idx:1100/1275, loss:1.2393274707863484, acc:0.6562216167120799
epoch:28, idx:1200/1275, loss:1.2331524918617356, acc:0.6548709408825978


  1%|          | 100/10845 [00:22<37:30,  4.77it/s, acc=0.752, epoch=29, loss=0.751]

epoch:29, idx:99/10845, loss:0.7505097377300263, acc:0.7525


  2%|▏         | 201/10845 [00:44<36:54,  4.81it/s, acc=0.744, epoch=29, loss=0.767]

epoch:29, idx:199/10845, loss:0.7691153699159622, acc:0.7425


  3%|▎         | 301/10845 [01:06<39:33,  4.44it/s, acc=0.743, epoch=29, loss=0.811]

epoch:29, idx:299/10845, loss:0.8110337102413178, acc:0.7433333333333333


  4%|▎         | 400/10845 [01:28<39:42,  4.38it/s, acc=0.738, epoch=29, loss=0.838]

epoch:29, idx:399/10845, loss:0.8382283234596253, acc:0.738125


  5%|▍         | 501/10845 [01:51<37:21,  4.61it/s, acc=0.743, epoch=29, loss=0.828]

epoch:29, idx:499/10845, loss:0.8254423614740372, acc:0.7425


  6%|▌         | 600/10845 [02:13<39:36,  4.31it/s, acc=0.742, epoch=29, loss=0.824]

epoch:29, idx:599/10845, loss:0.8238067637880643, acc:0.7420833333333333


  6%|▋         | 700/10845 [02:35<37:02,  4.57it/s, acc=0.739, epoch=29, loss=0.829]

epoch:29, idx:699/10845, loss:0.829186264191355, acc:0.7385714285714285


  7%|▋         | 800/10845 [02:58<38:35,  4.34it/s, acc=0.739, epoch=29, loss=0.833]

epoch:29, idx:799/10845, loss:0.8330494090169668, acc:0.73875


  8%|▊         | 901/10845 [03:21<35:07,  4.72it/s, acc=0.739, epoch=29, loss=0.834]

epoch:29, idx:899/10845, loss:0.8338216860426797, acc:0.7386111111111111


  9%|▉         | 1000/10845 [03:42<35:07,  4.67it/s, acc=0.735, epoch=29, loss=0.847]

epoch:29, idx:999/10845, loss:0.8466778333783149, acc:0.735


 10%|█         | 1100/10845 [04:05<34:50,  4.66it/s, acc=0.734, epoch=29, loss=0.85] 

epoch:29, idx:1099/10845, loss:0.8496275678006086, acc:0.7336363636363636


 11%|█         | 1201/10845 [04:27<34:05,  4.71it/s, acc=0.73, epoch=29, loss=0.863] 

epoch:29, idx:1199/10845, loss:0.8625317246218522, acc:0.73


 12%|█▏        | 1301/10845 [04:49<36:16,  4.38it/s, acc=0.731, epoch=29, loss=0.871]

epoch:29, idx:1299/10845, loss:0.871391650392459, acc:0.7309615384615384


 13%|█▎        | 1400/10845 [05:11<35:29,  4.44it/s, acc=0.732, epoch=29, loss=0.869]

epoch:29, idx:1399/10845, loss:0.8693552254353251, acc:0.7319642857142857


 14%|█▍        | 1500/10845 [05:34<33:31,  4.65it/s, acc=0.731, epoch=29, loss=0.865]

epoch:29, idx:1499/10845, loss:0.8650658922592799, acc:0.7313333333333333


 15%|█▍        | 1601/10845 [05:56<34:21,  4.48it/s, acc=0.732, epoch=29, loss=0.869]

epoch:29, idx:1599/10845, loss:0.8690774711593986, acc:0.73234375


 16%|█▌        | 1701/10845 [06:18<31:54,  4.78it/s, acc=0.732, epoch=29, loss=0.869]

epoch:29, idx:1699/10845, loss:0.8696253118444891, acc:0.7320588235294118


 17%|█▋        | 1801/10845 [06:40<32:05,  4.70it/s, acc=0.732, epoch=29, loss=0.866]

epoch:29, idx:1799/10845, loss:0.8667273758186235, acc:0.7318055555555556


 18%|█▊        | 1901/10845 [07:03<32:35,  4.57it/s, acc=0.73, epoch=29, loss=0.872] 

epoch:29, idx:1899/10845, loss:0.8718474484431116, acc:0.7303947368421052


 18%|█▊        | 2000/10845 [07:24<32:51,  4.49it/s, acc=0.729, epoch=29, loss=0.878]

epoch:29, idx:1999/10845, loss:0.8779970647990704, acc:0.728625


 19%|█▉        | 2101/10845 [07:47<32:34,  4.47it/s, acc=0.73, epoch=29, loss=0.876] 

epoch:29, idx:2099/10845, loss:0.8765625476837158, acc:0.7303571428571428


 20%|██        | 2200/10845 [08:09<31:26,  4.58it/s, acc=0.728, epoch=29, loss=0.88] 

epoch:29, idx:2199/10845, loss:0.8804864316095006, acc:0.7281818181818182


 21%|██        | 2300/10845 [08:31<29:06,  4.89it/s, acc=0.73, epoch=29, loss=0.875] 

epoch:29, idx:2299/10845, loss:0.8748177907259568, acc:0.7295652173913043


 22%|██▏       | 2400/10845 [08:53<30:14,  4.66it/s, acc=0.729, epoch=29, loss=0.877]

epoch:29, idx:2399/10845, loss:0.8769921736419201, acc:0.7286458333333333


 23%|██▎       | 2501/10845 [09:16<31:31,  4.41it/s, acc=0.73, epoch=29, loss=0.872] 

epoch:29, idx:2499/10845, loss:0.8718238529205322, acc:0.7303


 24%|██▍       | 2600/10845 [09:38<30:00,  4.58it/s, acc=0.731, epoch=29, loss=0.87] 

epoch:29, idx:2599/10845, loss:0.8696860420703888, acc:0.7307692307692307


 25%|██▍       | 2700/10845 [10:00<30:20,  4.47it/s, acc=0.731, epoch=29, loss=0.869]

epoch:29, idx:2699/10845, loss:0.8690481598288925, acc:0.7308333333333333


 26%|██▌       | 2800/10845 [10:22<29:50,  4.49it/s, acc=0.73, epoch=29, loss=0.872] 

epoch:29, idx:2799/10845, loss:0.8721229939375605, acc:0.7300892857142857


 27%|██▋       | 2900/10845 [10:44<29:43,  4.46it/s, acc=0.731, epoch=29, loss=0.872]

epoch:29, idx:2899/10845, loss:0.8717541427653411, acc:0.730948275862069


 28%|██▊       | 3001/10845 [11:06<27:39,  4.73it/s, acc=0.732, epoch=29, loss=0.868]

epoch:29, idx:2999/10845, loss:0.8676326346198717, acc:0.7323333333333333


 29%|██▊       | 3100/10845 [11:28<29:52,  4.32it/s, acc=0.731, epoch=29, loss=0.873]

epoch:29, idx:3099/10845, loss:0.8731293608873121, acc:0.7310483870967742


 30%|██▉       | 3201/10845 [11:51<27:45,  4.59it/s, acc=0.731, epoch=29, loss=0.872]

epoch:29, idx:3199/10845, loss:0.8718847935460508, acc:0.73109375


 30%|███       | 3300/10845 [12:13<28:10,  4.46it/s, acc=0.733, epoch=29, loss=0.866]

epoch:29, idx:3299/10845, loss:0.865994950298107, acc:0.7326515151515152


 31%|███▏      | 3401/10845 [12:36<27:34,  4.50it/s, acc=0.732, epoch=29, loss=0.869]

epoch:29, idx:3399/10845, loss:0.8695272395715994, acc:0.7321323529411765


 32%|███▏      | 3501/10845 [12:58<26:33,  4.61it/s, acc=0.733, epoch=29, loss=0.865]

epoch:29, idx:3499/10845, loss:0.8656613832712173, acc:0.7333571428571428


 33%|███▎      | 3601/10845 [13:20<26:02,  4.64it/s, acc=0.732, epoch=29, loss=0.871]

epoch:29, idx:3599/10845, loss:0.8706921022633711, acc:0.7317361111111111


 34%|███▍      | 3700/10845 [13:42<25:55,  4.59it/s, acc=0.731, epoch=29, loss=0.872]

epoch:29, idx:3699/10845, loss:0.8715383169618813, acc:0.7308783783783783


 35%|███▌      | 3800/10845 [14:04<27:12,  4.32it/s, acc=0.732, epoch=29, loss=0.87] 

epoch:29, idx:3799/10845, loss:0.8697615307412649, acc:0.7316447368421053


 36%|███▌      | 3900/10845 [14:26<26:07,  4.43it/s, acc=0.732, epoch=29, loss=0.87] 

epoch:29, idx:3899/10845, loss:0.8700913931620426, acc:0.7316666666666667


 37%|███▋      | 4000/10845 [14:48<24:42,  4.62it/s, acc=0.731, epoch=29, loss=0.87] 

epoch:29, idx:3999/10845, loss:0.8703724525123835, acc:0.7309375


 38%|███▊      | 4101/10845 [15:10<25:34,  4.40it/s, acc=0.731, epoch=29, loss=0.871]

epoch:29, idx:4099/10845, loss:0.8707029786487905, acc:0.7309146341463415


 39%|███▊      | 4201/10845 [15:33<23:39,  4.68it/s, acc=0.733, epoch=29, loss=0.865]

epoch:29, idx:4199/10845, loss:0.8652141331207185, acc:0.7324404761904761


 40%|███▉      | 4300/10845 [15:54<24:19,  4.48it/s, acc=0.732, epoch=29, loss=0.865]

epoch:29, idx:4299/10845, loss:0.8646695084904515, acc:0.7318604651162791


 41%|████      | 4401/10845 [16:17<24:45,  4.34it/s, acc=0.731, epoch=29, loss=0.866]

epoch:29, idx:4399/10845, loss:0.8651984259486198, acc:0.7315909090909091


 42%|████▏     | 4501/10845 [16:39<24:12,  4.37it/s, acc=0.732, epoch=29, loss=0.863]

epoch:29, idx:4499/10845, loss:0.8636089196734958, acc:0.7320555555555556


 42%|████▏     | 4601/10845 [17:01<22:31,  4.62it/s, acc=0.732, epoch=29, loss=0.864]

epoch:29, idx:4599/10845, loss:0.8635645968758542, acc:0.7319565217391304


 43%|████▎     | 4700/10845 [17:23<23:22,  4.38it/s, acc=0.732, epoch=29, loss=0.864]

epoch:29, idx:4699/10845, loss:0.8637744178670518, acc:0.7321808510638298


 44%|████▍     | 4801/10845 [17:46<23:07,  4.36it/s, acc=0.732, epoch=29, loss=0.866]

epoch:29, idx:4799/10845, loss:0.8657529377440611, acc:0.73203125


 45%|████▌     | 4900/10845 [18:08<24:28,  4.05it/s, acc=0.732, epoch=29, loss=0.865]

epoch:29, idx:4899/10845, loss:0.8646964777854024, acc:0.7320918367346939


 46%|████▌     | 5000/10845 [18:30<21:32,  4.52it/s, acc=0.732, epoch=29, loss=0.866]

epoch:29, idx:4999/10845, loss:0.8655444459557533, acc:0.7319


 47%|████▋     | 5100/10845 [18:52<22:15,  4.30it/s, acc=0.732, epoch=29, loss=0.866]

epoch:29, idx:5099/10845, loss:0.8657451618769589, acc:0.7316666666666667


 48%|████▊     | 5200/10845 [19:15<22:10,  4.24it/s, acc=0.732, epoch=29, loss=0.866]

epoch:29, idx:5199/10845, loss:0.8664031121478631, acc:0.7318269230769231


 49%|████▉     | 5300/10845 [19:37<22:15,  4.15it/s, acc=0.731, epoch=29, loss=0.868]

epoch:29, idx:5299/10845, loss:0.86806329258208, acc:0.7313207547169811


 50%|████▉     | 5401/10845 [20:00<19:55,  4.55it/s, acc=0.731, epoch=29, loss=0.868]

epoch:29, idx:5399/10845, loss:0.8675164382214899, acc:0.7313888888888889


 51%|█████     | 5500/10845 [20:22<20:43,  4.30it/s, acc=0.732, epoch=29, loss=0.867]

epoch:29, idx:5499/10845, loss:0.8671631254824725, acc:0.7315454545454545


 52%|█████▏    | 5600/10845 [20:44<19:14,  4.54it/s, acc=0.732, epoch=29, loss=0.867]

epoch:29, idx:5599/10845, loss:0.8671887974334614, acc:0.7317410714285715


 53%|█████▎    | 5700/10845 [21:07<19:20,  4.43it/s, acc=0.732, epoch=29, loss=0.867]

epoch:29, idx:5699/10845, loss:0.8668339493295603, acc:0.7315789473684211


 53%|█████▎    | 5800/10845 [21:29<18:26,  4.56it/s, acc=0.731, epoch=29, loss=0.868]

epoch:29, idx:5799/10845, loss:0.8682092037591441, acc:0.7314655172413793


 54%|█████▍    | 5900/10845 [21:52<18:40,  4.41it/s, acc=0.732, epoch=29, loss=0.869]

epoch:29, idx:5899/10845, loss:0.8687697268845671, acc:0.7317796610169491


 55%|█████▌    | 6000/10845 [22:14<18:20,  4.40it/s, acc=0.731, epoch=29, loss=0.87] 

epoch:29, idx:5999/10845, loss:0.8696505992511908, acc:0.7313333333333333


 56%|█████▋    | 6101/10845 [22:36<16:59,  4.66it/s, acc=0.731, epoch=29, loss=0.871]

epoch:29, idx:6099/10845, loss:0.8705091558812095, acc:0.7309426229508197


 57%|█████▋    | 6200/10845 [22:58<16:30,  4.69it/s, acc=0.731, epoch=29, loss=0.871]

epoch:29, idx:6199/10845, loss:0.8714951697184193, acc:0.7308064516129033


 58%|█████▊    | 6300/10845 [23:20<17:54,  4.23it/s, acc=0.731, epoch=29, loss=0.871]

epoch:29, idx:6299/10845, loss:0.8707556999202759, acc:0.7307142857142858


 59%|█████▉    | 6400/10845 [23:42<16:50,  4.40it/s, acc=0.731, epoch=29, loss=0.87] 

epoch:29, idx:6399/10845, loss:0.8704088199045509, acc:0.7310546875


 60%|█████▉    | 6501/10845 [24:05<15:23,  4.71it/s, acc=0.731, epoch=29, loss=0.87] 

epoch:29, idx:6499/10845, loss:0.8702795187785075, acc:0.7313461538461539


 61%|██████    | 6600/10845 [24:27<15:39,  4.52it/s, acc=0.731, epoch=29, loss=0.871]

epoch:29, idx:6599/10845, loss:0.8710969531445792, acc:0.73125


 62%|██████▏   | 6701/10845 [24:49<15:31,  4.45it/s, acc=0.731, epoch=29, loss=0.871]

epoch:29, idx:6699/10845, loss:0.8716019428932845, acc:0.7311567164179105


 63%|██████▎   | 6801/10845 [25:11<14:43,  4.58it/s, acc=0.731, epoch=29, loss=0.871]

epoch:29, idx:6799/10845, loss:0.8707505296170711, acc:0.7314705882352941


 64%|██████▎   | 6901/10845 [25:33<14:37,  4.49it/s, acc=0.731, epoch=29, loss=0.87] 

epoch:29, idx:6899/10845, loss:0.8699254015822342, acc:0.7313768115942029


 65%|██████▍   | 7001/10845 [25:55<13:29,  4.75it/s, acc=0.731, epoch=29, loss=0.871]

epoch:29, idx:6999/10845, loss:0.8706802652393069, acc:0.7312857142857143


 65%|██████▌   | 7100/10845 [26:17<13:28,  4.63it/s, acc=0.731, epoch=29, loss=0.87] 

epoch:29, idx:7099/10845, loss:0.8700362003856982, acc:0.7311971830985915


 66%|██████▋   | 7200/10845 [26:39<13:21,  4.55it/s, acc=0.732, epoch=29, loss=0.869]

epoch:29, idx:7199/10845, loss:0.8694720385637549, acc:0.7317361111111111


 67%|██████▋   | 7301/10845 [27:02<13:10,  4.48it/s, acc=0.732, epoch=29, loss=0.87] 

epoch:29, idx:7299/10845, loss:0.8701817758769205, acc:0.7316095890410959


 68%|██████▊   | 7401/10845 [27:24<12:22,  4.64it/s, acc=0.731, epoch=29, loss=0.871]

epoch:29, idx:7399/10845, loss:0.8715431153210433, acc:0.7313175675675676


 69%|██████▉   | 7500/10845 [27:46<12:40,  4.40it/s, acc=0.732, epoch=29, loss=0.871]

epoch:29, idx:7499/10845, loss:0.8713411784410476, acc:0.7316333333333334


 70%|███████   | 7601/10845 [28:09<11:22,  4.75it/s, acc=0.732, epoch=29, loss=0.873]

epoch:29, idx:7599/10845, loss:0.8728254047428307, acc:0.7315789473684211


 71%|███████   | 7700/10845 [28:31<11:57,  4.38it/s, acc=0.732, epoch=29, loss=0.872]

epoch:29, idx:7699/10845, loss:0.872167089612453, acc:0.7316558441558442


 72%|███████▏  | 7800/10845 [28:53<11:24,  4.45it/s, acc=0.731, epoch=29, loss=0.874]

epoch:29, idx:7799/10845, loss:0.8742293859750797, acc:0.7312179487179488


 73%|███████▎  | 7900/10845 [29:15<10:09,  4.83it/s, acc=0.731, epoch=29, loss=0.875]

epoch:29, idx:7899/10845, loss:0.8748232306407977, acc:0.7312341772151899


 74%|███████▍  | 8001/10845 [29:38<10:12,  4.64it/s, acc=0.731, epoch=29, loss=0.876]

epoch:29, idx:7999/10845, loss:0.8755868738517165, acc:0.7313125


 75%|███████▍  | 8100/10845 [29:59<09:45,  4.69it/s, acc=0.731, epoch=29, loss=0.875]

epoch:29, idx:8099/10845, loss:0.8749941150273806, acc:0.731358024691358


 76%|███████▌  | 8200/10845 [30:22<09:13,  4.78it/s, acc=0.732, epoch=29, loss=0.874]

epoch:29, idx:8199/10845, loss:0.8737389690774243, acc:0.7315853658536585


 77%|███████▋  | 8300/10845 [30:44<09:41,  4.38it/s, acc=0.731, epoch=29, loss=0.874]

epoch:29, idx:8299/10845, loss:0.8744520846691476, acc:0.7314457831325302


 77%|███████▋  | 8400/10845 [31:06<08:49,  4.62it/s, acc=0.731, epoch=29, loss=0.876]

epoch:29, idx:8399/10845, loss:0.8756129204162529, acc:0.7311904761904762


 78%|███████▊  | 8500/10845 [31:29<08:55,  4.38it/s, acc=0.731, epoch=29, loss=0.877]

epoch:29, idx:8499/10845, loss:0.876619121446329, acc:0.731


 79%|███████▉  | 8600/10845 [31:51<08:03,  4.65it/s, acc=0.731, epoch=29, loss=0.878]

epoch:29, idx:8599/10845, loss:0.8778806855997374, acc:0.7306976744186047


 80%|████████  | 8700/10845 [32:13<07:43,  4.63it/s, acc=0.731, epoch=29, loss=0.877]

epoch:29, idx:8699/10845, loss:0.8771546760167198, acc:0.7305747126436781


 81%|████████  | 8801/10845 [32:36<07:13,  4.72it/s, acc=0.73, epoch=29, loss=0.878] 

epoch:29, idx:8799/10845, loss:0.8776960682259365, acc:0.7303125


 82%|████████▏ | 8900/10845 [32:58<07:16,  4.46it/s, acc=0.73, epoch=29, loss=0.879]

epoch:29, idx:8899/10845, loss:0.8787869788220759, acc:0.73


 83%|████████▎ | 9001/10845 [33:20<06:51,  4.48it/s, acc=0.731, epoch=29, loss=0.877]

epoch:29, idx:8999/10845, loss:0.8768293524516954, acc:0.7305


 84%|████████▍ | 9101/10845 [33:43<06:12,  4.68it/s, acc=0.73, epoch=29, loss=0.878] 

epoch:29, idx:9099/10845, loss:0.8781598897753181, acc:0.7304120879120879


 85%|████████▍ | 9200/10845 [34:05<06:01,  4.55it/s, acc=0.73, epoch=29, loss=0.878] 

epoch:29, idx:9199/10845, loss:0.8781540212359118, acc:0.7304891304347826


 86%|████████▌ | 9300/10845 [34:27<05:36,  4.59it/s, acc=0.73, epoch=29, loss=0.878] 

epoch:29, idx:9299/10845, loss:0.8777551534291237, acc:0.7302150537634409


 87%|████████▋ | 9400/10845 [34:50<05:14,  4.59it/s, acc=0.731, epoch=29, loss=0.877]

epoch:29, idx:9399/10845, loss:0.8769723599071199, acc:0.730531914893617


 88%|████████▊ | 9500/10845 [35:12<05:00,  4.48it/s, acc=0.73, epoch=29, loss=0.879] 

epoch:29, idx:9499/10845, loss:0.8788977502960907, acc:0.7301842105263158


 89%|████████▊ | 9600/10845 [35:34<04:22,  4.75it/s, acc=0.73, epoch=29, loss=0.878]

epoch:29, idx:9599/10845, loss:0.8776727388985455, acc:0.7301302083333333


 89%|████████▉ | 9700/10845 [35:57<04:17,  4.44it/s, acc=0.73, epoch=29, loss=0.878]

epoch:29, idx:9699/10845, loss:0.8783227073593238, acc:0.7299484536082474


 90%|█████████ | 9800/10845 [36:19<03:46,  4.62it/s, acc=0.729, epoch=29, loss=0.879]

epoch:29, idx:9799/10845, loss:0.8791667162946293, acc:0.7294897959183674


 91%|█████████▏| 9900/10845 [36:41<03:33,  4.43it/s, acc=0.73, epoch=29, loss=0.879] 

epoch:29, idx:9899/10845, loss:0.8788572650666189, acc:0.729570707070707


 92%|█████████▏| 10000/10845 [37:04<02:57,  4.76it/s, acc=0.73, epoch=29, loss=0.878]

epoch:29, idx:9999/10845, loss:0.8782622627437114, acc:0.729875


 93%|█████████▎| 10100/10845 [37:26<02:34,  4.82it/s, acc=0.73, epoch=29, loss=0.879]

epoch:29, idx:10099/10845, loss:0.8791819902162741, acc:0.7297277227722773


 94%|█████████▍| 10201/10845 [37:48<02:22,  4.51it/s, acc=0.73, epoch=29, loss=0.879]

epoch:29, idx:10199/10845, loss:0.879138623797426, acc:0.729828431372549


 95%|█████████▍| 10300/10845 [38:10<02:09,  4.22it/s, acc=0.73, epoch=29, loss=0.88] 

epoch:29, idx:10299/10845, loss:0.8797823059848211, acc:0.7297087378640776


 96%|█████████▌| 10400/10845 [38:32<01:35,  4.64it/s, acc=0.73, epoch=29, loss=0.879]

epoch:29, idx:10399/10845, loss:0.8794833469333557, acc:0.7295192307692308


 97%|█████████▋| 10501/10845 [38:54<01:14,  4.64it/s, acc=0.73, epoch=29, loss=0.879] 

epoch:29, idx:10499/10845, loss:0.8786933267173312, acc:0.7297380952380952


 98%|█████████▊| 10600/10845 [39:15<00:53,  4.57it/s, acc=0.73, epoch=29, loss=0.878]

epoch:29, idx:10599/10845, loss:0.8779883336964643, acc:0.729811320754717


 99%|█████████▊| 10700/10845 [39:38<00:29,  4.90it/s, acc=0.73, epoch=29, loss=0.878]

epoch:29, idx:10699/10845, loss:0.877784086048046, acc:0.7296728971962617


100%|█████████▉| 10800/10845 [40:00<00:09,  4.62it/s, acc=0.729, epoch=29, loss=0.878]

epoch:29, idx:10799/10845, loss:0.8784793088557543, acc:0.7294907407407407


100%|██████████| 10845/10845 [40:10<00:00,  4.66it/s, acc=0.73, epoch=29, loss=0.878] 


epoch:29, idx:0/1275, loss:1.3623850345611572, acc:0.75
epoch:29, idx:100/1275, loss:1.4085326147551585, acc:0.6435643564356436
epoch:29, idx:200/1275, loss:1.2994230665377717, acc:0.6467661691542289
epoch:29, idx:300/1275, loss:1.266704520513845, acc:0.6495016611295681
epoch:29, idx:400/1275, loss:1.2435257723206594, acc:0.655860349127182
epoch:29, idx:500/1275, loss:1.2143150413345671, acc:0.656686626746507
epoch:29, idx:600/1275, loss:1.2337560717159024, acc:0.6518302828618968
epoch:29, idx:700/1275, loss:1.2362495494126933, acc:0.6512125534950072
epoch:29, idx:800/1275, loss:1.25787045967564, acc:0.648876404494382
epoch:29, idx:900/1275, loss:1.2500422112023526, acc:0.6531631520532741
epoch:29, idx:1000/1275, loss:1.2565300942181827, acc:0.6496003996003996
epoch:29, idx:1100/1275, loss:1.2452281254405437, acc:0.6530426884650318
epoch:29, idx:1200/1275, loss:1.2393525146028581, acc:0.6515403830141548


  1%|          | 101/10845 [00:22<40:08,  4.46it/s, acc=0.715, epoch=30, loss=0.842]

epoch:30, idx:99/10845, loss:0.8490336138010025, acc:0.7125


  2%|▏         | 201/10845 [00:44<37:42,  4.70it/s, acc=0.724, epoch=30, loss=0.851]

epoch:30, idx:199/10845, loss:0.8488232198357583, acc:0.72375


  3%|▎         | 300/10845 [01:07<37:08,  4.73it/s, acc=0.728, epoch=30, loss=0.826]

epoch:30, idx:299/10845, loss:0.8258246523141861, acc:0.7275


  4%|▎         | 401/10845 [01:29<40:07,  4.34it/s, acc=0.741, epoch=30, loss=0.801]

epoch:30, idx:399/10845, loss:0.8029620467126369, acc:0.74125


  5%|▍         | 501/10845 [01:51<37:05,  4.65it/s, acc=0.735, epoch=30, loss=0.818]

epoch:30, idx:499/10845, loss:0.8191619120836258, acc:0.7345


  6%|▌         | 601/10845 [02:14<38:26,  4.44it/s, acc=0.729, epoch=30, loss=0.827]

epoch:30, idx:599/10845, loss:0.8286852930982908, acc:0.72875


  6%|▋         | 700/10845 [02:35<37:15,  4.54it/s, acc=0.734, epoch=30, loss=0.827]

epoch:30, idx:699/10845, loss:0.827218706692968, acc:0.7339285714285714


  7%|▋         | 800/10845 [02:57<35:50,  4.67it/s, acc=0.732, epoch=30, loss=0.837]

epoch:30, idx:799/10845, loss:0.8367268871515989, acc:0.7315625


  8%|▊         | 900/10845 [03:19<37:09,  4.46it/s, acc=0.734, epoch=30, loss=0.828]

epoch:30, idx:899/10845, loss:0.8279331838422351, acc:0.7344444444444445


  9%|▉         | 1000/10845 [03:42<34:57,  4.69it/s, acc=0.734, epoch=30, loss=0.841]

epoch:30, idx:999/10845, loss:0.8406597613692284, acc:0.734


 10%|█         | 1100/10845 [04:04<35:23,  4.59it/s, acc=0.737, epoch=30, loss=0.838]

epoch:30, idx:1099/10845, loss:0.8376278941739689, acc:0.7370454545454546


 11%|█         | 1200/10845 [04:25<37:17,  4.31it/s, acc=0.737, epoch=30, loss=0.84] 

epoch:30, idx:1199/10845, loss:0.8403160342077414, acc:0.7370833333333333


 12%|█▏        | 1300/10845 [04:48<34:46,  4.57it/s, acc=0.736, epoch=30, loss=0.84] 

epoch:30, idx:1299/10845, loss:0.8403420790800682, acc:0.7357692307692307


 13%|█▎        | 1401/10845 [05:10<33:15,  4.73it/s, acc=0.735, epoch=30, loss=0.843]

epoch:30, idx:1399/10845, loss:0.842574893789632, acc:0.7353571428571428


 14%|█▍        | 1500/10845 [05:33<37:29,  4.16it/s, acc=0.737, epoch=30, loss=0.839]

epoch:30, idx:1499/10845, loss:0.839019487897555, acc:0.7365


 15%|█▍        | 1600/10845 [05:55<34:38,  4.45it/s, acc=0.737, epoch=30, loss=0.841]

epoch:30, idx:1599/10845, loss:0.841468325741589, acc:0.736875


 16%|█▌        | 1700/10845 [06:17<36:09,  4.22it/s, acc=0.735, epoch=30, loss=0.844]

epoch:30, idx:1699/10845, loss:0.8435865727242301, acc:0.7354411764705883


 17%|█▋        | 1801/10845 [06:40<32:19,  4.66it/s, acc=0.736, epoch=30, loss=0.842]

epoch:30, idx:1799/10845, loss:0.8426463190052245, acc:0.7354166666666667


 18%|█▊        | 1901/10845 [07:02<31:55,  4.67it/s, acc=0.737, epoch=30, loss=0.84] 

epoch:30, idx:1899/10845, loss:0.8404548521732029, acc:0.736578947368421


 18%|█▊        | 2000/10845 [07:24<34:22,  4.29it/s, acc=0.737, epoch=30, loss=0.84] 

epoch:30, idx:1999/10845, loss:0.8395772153437138, acc:0.737


 19%|█▉        | 2100/10845 [07:46<31:47,  4.58it/s, acc=0.737, epoch=30, loss=0.84] 

epoch:30, idx:2099/10845, loss:0.8402501521507899, acc:0.7366666666666667


 20%|██        | 2200/10845 [08:08<31:21,  4.59it/s, acc=0.739, epoch=30, loss=0.835]

epoch:30, idx:2199/10845, loss:0.8347298455509272, acc:0.7386363636363636


 21%|██        | 2300/10845 [08:31<30:57,  4.60it/s, acc=0.74, epoch=30, loss=0.831] 

epoch:30, idx:2299/10845, loss:0.8308832868544952, acc:0.74


 22%|██▏       | 2401/10845 [08:53<32:47,  4.29it/s, acc=0.741, epoch=30, loss=0.827]

epoch:30, idx:2399/10845, loss:0.8273352238287528, acc:0.7407291666666667


 23%|██▎       | 2500/10845 [09:16<33:27,  4.16it/s, acc=0.74, epoch=30, loss=0.83]  

epoch:30, idx:2499/10845, loss:0.8295611769914627, acc:0.7402


 24%|██▍       | 2600/10845 [09:38<31:26,  4.37it/s, acc=0.741, epoch=30, loss=0.829]

epoch:30, idx:2599/10845, loss:0.82941125949988, acc:0.7408653846153846


 25%|██▍       | 2701/10845 [10:01<29:57,  4.53it/s, acc=0.742, epoch=30, loss=0.828]

epoch:30, idx:2699/10845, loss:0.8286160492676276, acc:0.7416666666666667


 26%|██▌       | 2800/10845 [10:23<29:13,  4.59it/s, acc=0.74, epoch=30, loss=0.834] 

epoch:30, idx:2799/10845, loss:0.8341325105726719, acc:0.7402678571428571


 27%|██▋       | 2900/10845 [10:45<29:16,  4.52it/s, acc=0.738, epoch=30, loss=0.842]

epoch:30, idx:2899/10845, loss:0.8419386266634382, acc:0.738448275862069


 28%|██▊       | 3000/10845 [11:08<29:52,  4.38it/s, acc=0.739, epoch=30, loss=0.841]

epoch:30, idx:2999/10845, loss:0.8409250824650129, acc:0.7386666666666667


 29%|██▊       | 3100/10845 [11:30<28:32,  4.52it/s, acc=0.737, epoch=30, loss=0.846]

epoch:30, idx:3099/10845, loss:0.8460153581826918, acc:0.7372580645161291


 30%|██▉       | 3200/10845 [11:52<29:35,  4.31it/s, acc=0.737, epoch=30, loss=0.847]

epoch:30, idx:3199/10845, loss:0.8472367301397026, acc:0.737265625


 30%|███       | 3300/10845 [12:14<28:06,  4.47it/s, acc=0.736, epoch=30, loss=0.849]

epoch:30, idx:3299/10845, loss:0.8494709845383962, acc:0.7363636363636363


 31%|███▏      | 3400/10845 [12:37<27:25,  4.52it/s, acc=0.737, epoch=30, loss=0.85] 

epoch:30, idx:3399/10845, loss:0.8504067700750687, acc:0.7368382352941176


 32%|███▏      | 3500/10845 [12:59<27:17,  4.49it/s, acc=0.736, epoch=30, loss=0.852]

epoch:30, idx:3499/10845, loss:0.8518374430452075, acc:0.7362857142857143


 33%|███▎      | 3600/10845 [13:21<26:24,  4.57it/s, acc=0.736, epoch=30, loss=0.853]

epoch:30, idx:3599/10845, loss:0.8531740107470088, acc:0.7356944444444444


 34%|███▍      | 3700/10845 [13:43<27:00,  4.41it/s, acc=0.734, epoch=30, loss=0.857]

epoch:30, idx:3699/10845, loss:0.8569018092670956, acc:0.7343918918918919


 35%|███▌      | 3800/10845 [14:05<24:38,  4.76it/s, acc=0.735, epoch=30, loss=0.855]

epoch:30, idx:3799/10845, loss:0.8551130808027168, acc:0.735


 36%|███▌      | 3901/10845 [14:28<23:53,  4.84it/s, acc=0.734, epoch=30, loss=0.859]

epoch:30, idx:3899/10845, loss:0.8586531362166772, acc:0.7336538461538461


 37%|███▋      | 4000/10845 [14:50<25:34,  4.46it/s, acc=0.734, epoch=30, loss=0.857]

epoch:30, idx:3999/10845, loss:0.8570425494611263, acc:0.7336875


 38%|███▊      | 4101/10845 [15:13<23:37,  4.76it/s, acc=0.733, epoch=30, loss=0.857]

epoch:30, idx:4099/10845, loss:0.8569888092831868, acc:0.7334146341463414


 39%|███▊      | 4200/10845 [15:34<23:54,  4.63it/s, acc=0.734, epoch=30, loss=0.855]

epoch:30, idx:4199/10845, loss:0.8550560272023792, acc:0.7338690476190476


 40%|███▉      | 4300/10845 [15:56<22:48,  4.78it/s, acc=0.734, epoch=30, loss=0.854]

epoch:30, idx:4299/10845, loss:0.8539619692259056, acc:0.7344186046511628


 41%|████      | 4400/10845 [16:18<23:56,  4.49it/s, acc=0.734, epoch=30, loss=0.857]

epoch:30, idx:4399/10845, loss:0.8566921228441325, acc:0.7338068181818181


 41%|████▏     | 4500/10845 [16:41<23:40,  4.47it/s, acc=0.734, epoch=30, loss=0.857]

epoch:30, idx:4499/10845, loss:0.8568791812790765, acc:0.7338888888888889


 42%|████▏     | 4600/10845 [17:02<22:51,  4.55it/s, acc=0.735, epoch=30, loss=0.854]

epoch:30, idx:4599/10845, loss:0.854214860641438, acc:0.7347826086956522


 43%|████▎     | 4701/10845 [17:25<22:39,  4.52it/s, acc=0.735, epoch=30, loss=0.853]

epoch:30, idx:4699/10845, loss:0.8535454983660515, acc:0.734468085106383


 44%|████▍     | 4800/10845 [17:47<23:33,  4.28it/s, acc=0.734, epoch=30, loss=0.854]

epoch:30, idx:4799/10845, loss:0.8535588411490123, acc:0.734375


 45%|████▌     | 4900/10845 [18:09<20:52,  4.75it/s, acc=0.734, epoch=30, loss=0.854]

epoch:30, idx:4899/10845, loss:0.8537551787191507, acc:0.7343367346938775


 46%|████▌     | 5000/10845 [18:31<21:51,  4.46it/s, acc=0.735, epoch=30, loss=0.855]

epoch:30, idx:4999/10845, loss:0.8547828431606292, acc:0.73455


 47%|████▋     | 5100/10845 [18:54<23:03,  4.15it/s, acc=0.735, epoch=30, loss=0.854]

epoch:30, idx:5099/10845, loss:0.8536973744747686, acc:0.7346078431372549


 48%|████▊     | 5200/10845 [19:16<21:03,  4.47it/s, acc=0.734, epoch=30, loss=0.853]

epoch:30, idx:5199/10845, loss:0.853013053719814, acc:0.734423076923077


 49%|████▉     | 5300/10845 [19:38<19:28,  4.75it/s, acc=0.735, epoch=30, loss=0.852]

epoch:30, idx:5299/10845, loss:0.8523131481206642, acc:0.7345283018867924


 50%|████▉     | 5400/10845 [20:01<20:44,  4.38it/s, acc=0.734, epoch=30, loss=0.853]

epoch:30, idx:5399/10845, loss:0.8527382357252968, acc:0.734212962962963


 51%|█████     | 5501/10845 [20:23<18:52,  4.72it/s, acc=0.734, epoch=30, loss=0.853]

epoch:30, idx:5499/10845, loss:0.8526292170502923, acc:0.7344090909090909


 52%|█████▏    | 5601/10845 [20:45<19:01,  4.59it/s, acc=0.735, epoch=30, loss=0.851]

epoch:30, idx:5599/10845, loss:0.850733673923782, acc:0.7346875


 53%|█████▎    | 5700/10845 [21:07<18:41,  4.59it/s, acc=0.735, epoch=30, loss=0.851]

epoch:30, idx:5699/10845, loss:0.8514255969461642, acc:0.734561403508772


 53%|█████▎    | 5800/10845 [21:29<18:41,  4.50it/s, acc=0.734, epoch=30, loss=0.852]

epoch:30, idx:5799/10845, loss:0.8516329029613527, acc:0.7342241379310345


 54%|█████▍    | 5900/10845 [21:52<20:02,  4.11it/s, acc=0.734, epoch=30, loss=0.852]

epoch:30, idx:5899/10845, loss:0.8523575268458512, acc:0.734406779661017


 55%|█████▌    | 6000/10845 [22:14<18:24,  4.38it/s, acc=0.734, epoch=30, loss=0.854]

epoch:30, idx:5999/10845, loss:0.8540787739356359, acc:0.7341666666666666


 56%|█████▌    | 6100/10845 [22:36<16:56,  4.67it/s, acc=0.734, epoch=30, loss=0.853]

epoch:30, idx:6099/10845, loss:0.8527810281612834, acc:0.7344262295081967


 57%|█████▋    | 6200/10845 [22:58<16:32,  4.68it/s, acc=0.734, epoch=30, loss=0.853]

epoch:30, idx:6199/10845, loss:0.8533323408711341, acc:0.7343145161290323


 58%|█████▊    | 6301/10845 [23:21<16:33,  4.58it/s, acc=0.734, epoch=30, loss=0.854]

epoch:30, idx:6299/10845, loss:0.8542812452619037, acc:0.7341269841269841


 59%|█████▉    | 6401/10845 [23:43<16:08,  4.59it/s, acc=0.734, epoch=30, loss=0.855]

epoch:30, idx:6399/10845, loss:0.855276621170342, acc:0.7337890625


 60%|█████▉    | 6500/10845 [24:05<15:37,  4.64it/s, acc=0.733, epoch=30, loss=0.855]

epoch:30, idx:6499/10845, loss:0.8549369745437916, acc:0.7333461538461539


 61%|██████    | 6601/10845 [24:27<14:56,  4.73it/s, acc=0.733, epoch=30, loss=0.856]

epoch:30, idx:6599/10845, loss:0.8559487181540691, acc:0.7328787878787879


 62%|██████▏   | 6700/10845 [24:49<15:43,  4.39it/s, acc=0.733, epoch=30, loss=0.855]

epoch:30, idx:6699/10845, loss:0.8548757758425243, acc:0.7331343283582089


 63%|██████▎   | 6801/10845 [25:12<14:48,  4.55it/s, acc=0.733, epoch=30, loss=0.857]

epoch:30, idx:6799/10845, loss:0.8570498044087607, acc:0.7327941176470588


 64%|██████▎   | 6900/10845 [25:33<14:41,  4.47it/s, acc=0.733, epoch=30, loss=0.857]

epoch:30, idx:6899/10845, loss:0.8572412839920625, acc:0.7328985507246377


 65%|██████▍   | 7000/10845 [25:56<14:50,  4.32it/s, acc=0.733, epoch=30, loss=0.859]

epoch:30, idx:6999/10845, loss:0.8585542892983982, acc:0.7327857142857143


 65%|██████▌   | 7101/10845 [26:18<13:28,  4.63it/s, acc=0.733, epoch=30, loss=0.861]

epoch:30, idx:7099/10845, loss:0.8609658332525845, acc:0.7325


 66%|██████▋   | 7201/10845 [26:40<13:40,  4.44it/s, acc=0.733, epoch=30, loss=0.859]

epoch:30, idx:7199/10845, loss:0.8589266044480933, acc:0.7327777777777778


 67%|██████▋   | 7300/10845 [27:02<13:13,  4.47it/s, acc=0.733, epoch=30, loss=0.86] 

epoch:30, idx:7299/10845, loss:0.8601772442174285, acc:0.7327739726027397


 68%|██████▊   | 7400/10845 [27:24<12:32,  4.58it/s, acc=0.733, epoch=30, loss=0.861]

epoch:30, idx:7399/10845, loss:0.8614866638908515, acc:0.7325337837837838


 69%|██████▉   | 7500/10845 [27:46<12:15,  4.55it/s, acc=0.733, epoch=30, loss=0.86] 

epoch:30, idx:7499/10845, loss:0.8602697153965632, acc:0.7327


 70%|███████   | 7600/10845 [28:08<11:11,  4.83it/s, acc=0.733, epoch=30, loss=0.86] 

epoch:30, idx:7599/10845, loss:0.8599715303511996, acc:0.7325657894736842


 71%|███████   | 7700/10845 [28:31<11:25,  4.59it/s, acc=0.733, epoch=30, loss=0.86] 

epoch:30, idx:7699/10845, loss:0.8600951342845892, acc:0.7326298701298701


 72%|███████▏  | 7800/10845 [28:53<10:37,  4.78it/s, acc=0.733, epoch=30, loss=0.86] 

epoch:30, idx:7799/10845, loss:0.8596373111697344, acc:0.7325641025641025


 73%|███████▎  | 7901/10845 [29:16<09:59,  4.91it/s, acc=0.733, epoch=30, loss=0.859]

epoch:30, idx:7899/10845, loss:0.8592445214564287, acc:0.732753164556962


 74%|███████▍  | 8000/10845 [29:37<11:07,  4.26it/s, acc=0.733, epoch=30, loss=0.859]

epoch:30, idx:7999/10845, loss:0.8588653399720788, acc:0.73259375


 75%|███████▍  | 8100/10845 [30:00<09:50,  4.65it/s, acc=0.733, epoch=30, loss=0.86] 

epoch:30, idx:8099/10845, loss:0.8597234866722131, acc:0.7325925925925926


 76%|███████▌  | 8200/10845 [30:22<10:10,  4.33it/s, acc=0.733, epoch=30, loss=0.86] 

epoch:30, idx:8199/10845, loss:0.8597605466188454, acc:0.7327743902439025


 77%|███████▋  | 8300/10845 [30:45<08:52,  4.78it/s, acc=0.733, epoch=30, loss=0.859]

epoch:30, idx:8299/10845, loss:0.8588592907391399, acc:0.7327710843373494


 77%|███████▋  | 8400/10845 [31:07<08:50,  4.61it/s, acc=0.733, epoch=30, loss=0.86] 

epoch:30, idx:8399/10845, loss:0.859540252820367, acc:0.7327976190476191


 78%|███████▊  | 8501/10845 [31:29<08:26,  4.63it/s, acc=0.733, epoch=30, loss=0.86] 

epoch:30, idx:8499/10845, loss:0.8599904830245411, acc:0.7328235294117647


 79%|███████▉  | 8601/10845 [31:52<08:12,  4.55it/s, acc=0.733, epoch=30, loss=0.859]

epoch:30, idx:8599/10845, loss:0.8590135348674863, acc:0.7330813953488372


 80%|████████  | 8700/10845 [32:14<07:46,  4.60it/s, acc=0.732, epoch=30, loss=0.861]

epoch:30, idx:8699/10845, loss:0.8606776641018089, acc:0.7324425287356322


 81%|████████  | 8800/10845 [32:36<07:21,  4.63it/s, acc=0.732, epoch=30, loss=0.861]

epoch:30, idx:8799/10845, loss:0.8608038125796752, acc:0.7322727272727273


 82%|████████▏ | 8901/10845 [32:59<07:02,  4.60it/s, acc=0.732, epoch=30, loss=0.862]

epoch:30, idx:8899/10845, loss:0.8617368697785259, acc:0.731938202247191


 83%|████████▎ | 9001/10845 [33:21<06:46,  4.54it/s, acc=0.732, epoch=30, loss=0.862]

epoch:30, idx:8999/10845, loss:0.8615520578225454, acc:0.7323055555555555


 84%|████████▍ | 9100/10845 [33:42<06:06,  4.77it/s, acc=0.732, epoch=30, loss=0.862]

epoch:30, idx:9099/10845, loss:0.8616264865162608, acc:0.7324175824175824


 85%|████████▍ | 9200/10845 [34:05<05:52,  4.67it/s, acc=0.733, epoch=30, loss=0.86] 

epoch:30, idx:9199/10845, loss:0.8601574108522871, acc:0.7327173913043479


 86%|████████▌ | 9300/10845 [34:27<05:34,  4.61it/s, acc=0.733, epoch=30, loss=0.861]

epoch:30, idx:9299/10845, loss:0.8608447140391154, acc:0.7326344086021506


 87%|████████▋ | 9400/10845 [34:49<05:23,  4.47it/s, acc=0.733, epoch=30, loss=0.861]

epoch:30, idx:9399/10845, loss:0.8611266702667196, acc:0.7325265957446808


 88%|████████▊ | 9500/10845 [35:12<04:56,  4.54it/s, acc=0.733, epoch=30, loss=0.862]

epoch:30, idx:9499/10845, loss:0.8615676880008296, acc:0.7326052631578948


 89%|████████▊ | 9601/10845 [35:34<04:27,  4.64it/s, acc=0.732, epoch=30, loss=0.863]

epoch:30, idx:9599/10845, loss:0.8625043892239531, acc:0.7323697916666667


 89%|████████▉ | 9700/10845 [35:56<04:17,  4.45it/s, acc=0.732, epoch=30, loss=0.862]

epoch:30, idx:9699/10845, loss:0.862104429633347, acc:0.7324742268041237


 90%|█████████ | 9800/10845 [36:18<03:27,  5.04it/s, acc=0.732, epoch=30, loss=0.862]

epoch:30, idx:9799/10845, loss:0.8623208236329409, acc:0.7322704081632653


 91%|█████████▏| 9900/10845 [36:40<03:24,  4.63it/s, acc=0.732, epoch=30, loss=0.862]

epoch:30, idx:9899/10845, loss:0.862371942406953, acc:0.7324747474747475


 92%|█████████▏| 10000/10845 [37:02<03:07,  4.51it/s, acc=0.732, epoch=30, loss=0.865]

epoch:30, idx:9999/10845, loss:0.8647667909145356, acc:0.732


 93%|█████████▎| 10101/10845 [37:24<02:36,  4.74it/s, acc=0.732, epoch=30, loss=0.864]

epoch:30, idx:10099/10845, loss:0.8643963583153074, acc:0.7321039603960396


 94%|█████████▍| 10201/10845 [37:47<02:18,  4.65it/s, acc=0.732, epoch=30, loss=0.864]

epoch:30, idx:10199/10845, loss:0.8641067670139612, acc:0.7322549019607844


 95%|█████████▍| 10301/10845 [38:09<01:57,  4.63it/s, acc=0.732, epoch=30, loss=0.864]

epoch:30, idx:10299/10845, loss:0.863785667974972, acc:0.7323543689320389


 96%|█████████▌| 10400/10845 [38:31<01:38,  4.50it/s, acc=0.733, epoch=30, loss=0.864]

epoch:30, idx:10399/10845, loss:0.8636682325028456, acc:0.7325961538461538


 97%|█████████▋| 10500/10845 [38:53<01:15,  4.60it/s, acc=0.732, epoch=30, loss=0.865]

epoch:30, idx:10499/10845, loss:0.864938294172287, acc:0.7323571428571428


 98%|█████████▊| 10601/10845 [39:15<00:53,  4.56it/s, acc=0.733, epoch=30, loss=0.864]

epoch:30, idx:10599/10845, loss:0.8644053553410296, acc:0.7327122641509434


 99%|█████████▊| 10701/10845 [39:37<00:31,  4.62it/s, acc=0.733, epoch=30, loss=0.864]

epoch:30, idx:10699/10845, loss:0.8635117842660887, acc:0.732873831775701


100%|█████████▉| 10800/10845 [39:59<00:10,  4.18it/s, acc=0.733, epoch=30, loss=0.864]

epoch:30, idx:10799/10845, loss:0.8642758758421297, acc:0.7326388888888888


100%|██████████| 10845/10845 [40:09<00:00,  4.59it/s, acc=0.733, epoch=30, loss=0.864]


epoch:30, idx:0/1275, loss:1.1538467407226562, acc:0.75
epoch:30, idx:100/1275, loss:1.425365429113407, acc:0.6435643564356436
epoch:30, idx:200/1275, loss:1.3080889636011266, acc:0.6517412935323383
epoch:30, idx:300/1275, loss:1.2726699812863753, acc:0.6561461794019934
epoch:30, idx:400/1275, loss:1.2522408901307351, acc:0.6589775561097256
epoch:30, idx:500/1275, loss:1.2141153889739822, acc:0.6601796407185628
epoch:30, idx:600/1275, loss:1.2318634321407946, acc:0.6568219633943427
epoch:30, idx:700/1275, loss:1.2372732880962387, acc:0.6576319543509273
epoch:30, idx:800/1275, loss:1.2522564658362618, acc:0.6585518102372035
epoch:30, idx:900/1275, loss:1.2442131782742372, acc:0.6606548279689234
epoch:30, idx:1000/1275, loss:1.250088884697094, acc:0.6575924075924076
epoch:30, idx:1100/1275, loss:1.2371734989761332, acc:0.6612170753860127
epoch:30, idx:1200/1275, loss:1.2325249275597407, acc:0.6592422980849292


  1%|          | 100/10845 [00:22<40:09,  4.46it/s, acc=0.755, epoch=31, loss=0.806]

epoch:31, idx:99/10845, loss:0.8056053531169891, acc:0.755


  2%|▏         | 200/10845 [00:44<38:17,  4.63it/s, acc=0.744, epoch=31, loss=0.828]

epoch:31, idx:199/10845, loss:0.8282044005393981, acc:0.74375


  3%|▎         | 301/10845 [01:07<37:15,  4.72it/s, acc=0.741, epoch=31, loss=0.846]

epoch:31, idx:299/10845, loss:0.8482895008722942, acc:0.7408333333333333


  4%|▎         | 400/10845 [01:29<40:38,  4.28it/s, acc=0.743, epoch=31, loss=0.825]

epoch:31, idx:399/10845, loss:0.8254041227698327, acc:0.743125


  5%|▍         | 500/10845 [01:51<37:00,  4.66it/s, acc=0.749, epoch=31, loss=0.806]

epoch:31, idx:499/10845, loss:0.8060105946063996, acc:0.7485


  6%|▌         | 600/10845 [02:13<39:31,  4.32it/s, acc=0.752, epoch=31, loss=0.799]

epoch:31, idx:599/10845, loss:0.7989289295673371, acc:0.7525


  6%|▋         | 700/10845 [02:36<39:23,  4.29it/s, acc=0.746, epoch=31, loss=0.818]

epoch:31, idx:699/10845, loss:0.8182227313518524, acc:0.7464285714285714


  7%|▋         | 800/10845 [02:58<40:25,  4.14it/s, acc=0.746, epoch=31, loss=0.83] 

epoch:31, idx:799/10845, loss:0.8299985186755657, acc:0.745625


  8%|▊         | 900/10845 [03:20<35:22,  4.69it/s, acc=0.75, epoch=31, loss=0.821] 

epoch:31, idx:899/10845, loss:0.8209751949045393, acc:0.7502777777777778


  9%|▉         | 1000/10845 [03:42<35:08,  4.67it/s, acc=0.751, epoch=31, loss=0.817]

epoch:31, idx:999/10845, loss:0.8174372096061706, acc:0.751


 10%|█         | 1100/10845 [04:04<38:51,  4.18it/s, acc=0.751, epoch=31, loss=0.817]

epoch:31, idx:1099/10845, loss:0.8174214221130718, acc:0.7506818181818182


 11%|█         | 1200/10845 [04:27<35:44,  4.50it/s, acc=0.749, epoch=31, loss=0.818]

epoch:31, idx:1199/10845, loss:0.8180301487445831, acc:0.7491666666666666


 12%|█▏        | 1300/10845 [04:49<34:24,  4.62it/s, acc=0.747, epoch=31, loss=0.827]

epoch:31, idx:1299/10845, loss:0.8268563144023602, acc:0.7473076923076923


 13%|█▎        | 1401/10845 [05:11<33:24,  4.71it/s, acc=0.747, epoch=31, loss=0.828]

epoch:31, idx:1399/10845, loss:0.8273450374603272, acc:0.7469642857142857


 14%|█▍        | 1500/10845 [05:32<33:49,  4.60it/s, acc=0.745, epoch=31, loss=0.84] 

epoch:31, idx:1499/10845, loss:0.8397084004084269, acc:0.7451666666666666


 15%|█▍        | 1601/10845 [05:54<33:13,  4.64it/s, acc=0.744, epoch=31, loss=0.84] 

epoch:31, idx:1599/10845, loss:0.840339861959219, acc:0.74390625


 16%|█▌        | 1700/10845 [06:17<31:48,  4.79it/s, acc=0.743, epoch=31, loss=0.846]

epoch:31, idx:1699/10845, loss:0.8463301859883701, acc:0.7427941176470588


 17%|█▋        | 1800/10845 [06:39<33:03,  4.56it/s, acc=0.743, epoch=31, loss=0.842]

epoch:31, idx:1799/10845, loss:0.8422748995489544, acc:0.7430555555555556


 18%|█▊        | 1900/10845 [07:01<34:46,  4.29it/s, acc=0.742, epoch=31, loss=0.842]

epoch:31, idx:1899/10845, loss:0.8417438326383891, acc:0.7422368421052632


 18%|█▊        | 2000/10845 [07:23<31:53,  4.62it/s, acc=0.744, epoch=31, loss=0.834]

epoch:31, idx:1999/10845, loss:0.8342840307950974, acc:0.7435


 19%|█▉        | 2101/10845 [07:46<32:51,  4.44it/s, acc=0.742, epoch=31, loss=0.841]

epoch:31, idx:2099/10845, loss:0.8411535017830984, acc:0.7422619047619048


 20%|██        | 2200/10845 [08:08<32:28,  4.44it/s, acc=0.741, epoch=31, loss=0.845]

epoch:31, idx:2199/10845, loss:0.8445445690913634, acc:0.7414772727272727


 21%|██        | 2300/10845 [08:30<30:13,  4.71it/s, acc=0.742, epoch=31, loss=0.847]

epoch:31, idx:2299/10845, loss:0.8465429641889489, acc:0.7415217391304347


 22%|██▏       | 2401/10845 [08:52<29:40,  4.74it/s, acc=0.741, epoch=31, loss=0.85] 

epoch:31, idx:2399/10845, loss:0.8500723332166672, acc:0.74125


 23%|██▎       | 2500/10845 [09:14<29:52,  4.66it/s, acc=0.742, epoch=31, loss=0.851]

epoch:31, idx:2499/10845, loss:0.8511075548171997, acc:0.742


 24%|██▍       | 2600/10845 [09:37<29:35,  4.64it/s, acc=0.743, epoch=31, loss=0.852]

epoch:31, idx:2599/10845, loss:0.8516865384120208, acc:0.7425961538461539


 25%|██▍       | 2700/10845 [09:59<33:03,  4.11it/s, acc=0.743, epoch=31, loss=0.852]

epoch:31, idx:2699/10845, loss:0.8515701652456213, acc:0.7425


 26%|██▌       | 2800/10845 [10:21<31:30,  4.26it/s, acc=0.742, epoch=31, loss=0.852]

epoch:31, idx:2799/10845, loss:0.8521894994803837, acc:0.7416964285714286


 27%|██▋       | 2900/10845 [10:44<29:22,  4.51it/s, acc=0.741, epoch=31, loss=0.854]

epoch:31, idx:2899/10845, loss:0.8543410024766265, acc:0.7413793103448276


 28%|██▊       | 3001/10845 [11:06<27:09,  4.81it/s, acc=0.741, epoch=31, loss=0.854]

epoch:31, idx:2999/10845, loss:0.8541443807085355, acc:0.7405


 29%|██▊       | 3100/10845 [11:28<28:55,  4.46it/s, acc=0.741, epoch=31, loss=0.856]

epoch:31, idx:3099/10845, loss:0.8562208158931425, acc:0.7405645161290323


 30%|██▉       | 3200/10845 [11:50<28:56,  4.40it/s, acc=0.741, epoch=31, loss=0.855]

epoch:31, idx:3199/10845, loss:0.8546634262613952, acc:0.740546875


 30%|███       | 3301/10845 [12:13<26:51,  4.68it/s, acc=0.741, epoch=31, loss=0.856]

epoch:31, idx:3299/10845, loss:0.8561350216468175, acc:0.7406818181818182


 31%|███▏      | 3400/10845 [12:35<26:25,  4.69it/s, acc=0.74, epoch=31, loss=0.858] 

epoch:31, idx:3399/10845, loss:0.8583015383517041, acc:0.7402205882352941


 32%|███▏      | 3500/10845 [12:57<28:52,  4.24it/s, acc=0.739, epoch=31, loss=0.858]

epoch:31, idx:3499/10845, loss:0.8575229673726218, acc:0.7394285714285714


 33%|███▎      | 3600/10845 [13:19<27:16,  4.43it/s, acc=0.739, epoch=31, loss=0.861]

epoch:31, idx:3599/10845, loss:0.8610973339610629, acc:0.7393055555555555


 34%|███▍      | 3701/10845 [13:41<25:57,  4.59it/s, acc=0.739, epoch=31, loss=0.861]

epoch:31, idx:3699/10845, loss:0.8614004250635972, acc:0.7392567567567567


 35%|███▌      | 3800/10845 [14:03<25:45,  4.56it/s, acc=0.739, epoch=31, loss=0.862]

epoch:31, idx:3799/10845, loss:0.8621526376040358, acc:0.7394736842105263


 36%|███▌      | 3900/10845 [14:26<26:05,  4.44it/s, acc=0.739, epoch=31, loss=0.862]

epoch:31, idx:3899/10845, loss:0.861451635253735, acc:0.7394871794871795


 37%|███▋      | 4000/10845 [14:48<25:44,  4.43it/s, acc=0.74, epoch=31, loss=0.861] 

epoch:31, idx:3999/10845, loss:0.8614062350541353, acc:0.74


 38%|███▊      | 4100/10845 [15:10<26:00,  4.32it/s, acc=0.74, epoch=31, loss=0.859] 

epoch:31, idx:4099/10845, loss:0.8588722223334196, acc:0.7403658536585366


 39%|███▊      | 4200/10845 [15:32<23:37,  4.69it/s, acc=0.739, epoch=31, loss=0.862]

epoch:31, idx:4199/10845, loss:0.862280214386327, acc:0.7391666666666666


 40%|███▉      | 4300/10845 [15:54<24:27,  4.46it/s, acc=0.739, epoch=31, loss=0.862]

epoch:31, idx:4299/10845, loss:0.8620065867485002, acc:0.7391860465116279


 41%|████      | 4400/10845 [16:17<24:01,  4.47it/s, acc=0.739, epoch=31, loss=0.862]

epoch:31, idx:4399/10845, loss:0.8621024183658036, acc:0.7392613636363636


 42%|████▏     | 4501/10845 [16:39<23:31,  4.50it/s, acc=0.739, epoch=31, loss=0.862]

epoch:31, idx:4499/10845, loss:0.8618710578680039, acc:0.7392777777777778


 42%|████▏     | 4600/10845 [17:02<22:32,  4.62it/s, acc=0.739, epoch=31, loss=0.866]

epoch:31, idx:4599/10845, loss:0.8659801279850629, acc:0.7385326086956522


 43%|████▎     | 4700/10845 [17:24<22:50,  4.49it/s, acc=0.739, epoch=31, loss=0.864]

epoch:31, idx:4699/10845, loss:0.8640111741740653, acc:0.7388829787234042


 44%|████▍     | 4801/10845 [17:46<22:41,  4.44it/s, acc=0.739, epoch=31, loss=0.862]

epoch:31, idx:4799/10845, loss:0.861823723676304, acc:0.73921875


 45%|████▌     | 4900/10845 [18:09<21:25,  4.62it/s, acc=0.739, epoch=31, loss=0.863]

epoch:31, idx:4899/10845, loss:0.8625417784403782, acc:0.7394387755102041


 46%|████▌     | 5001/10845 [18:31<20:54,  4.66it/s, acc=0.739, epoch=31, loss=0.865]

epoch:31, idx:4999/10845, loss:0.8655409905552864, acc:0.73885


 47%|████▋     | 5100/10845 [18:53<20:43,  4.62it/s, acc=0.739, epoch=31, loss=0.865]

epoch:31, idx:5099/10845, loss:0.8647079556479174, acc:0.7386764705882353


 48%|████▊     | 5200/10845 [19:15<19:35,  4.80it/s, acc=0.739, epoch=31, loss=0.862]

epoch:31, idx:5199/10845, loss:0.8621520821062418, acc:0.7390384615384615


 49%|████▉     | 5300/10845 [19:38<21:31,  4.29it/s, acc=0.739, epoch=31, loss=0.862]

epoch:31, idx:5299/10845, loss:0.862317539419768, acc:0.7388207547169812


 50%|████▉     | 5400/10845 [20:00<19:55,  4.55it/s, acc=0.739, epoch=31, loss=0.861]

epoch:31, idx:5399/10845, loss:0.8611577667682259, acc:0.739212962962963


 51%|█████     | 5500/10845 [20:22<20:06,  4.43it/s, acc=0.739, epoch=31, loss=0.861]

epoch:31, idx:5499/10845, loss:0.8612710001143542, acc:0.7389545454545454


 52%|█████▏    | 5600/10845 [20:44<18:55,  4.62it/s, acc=0.738, epoch=31, loss=0.864]

epoch:31, idx:5599/10845, loss:0.8635511551690953, acc:0.7380803571428571


 53%|█████▎    | 5700/10845 [21:06<19:30,  4.39it/s, acc=0.738, epoch=31, loss=0.864]

epoch:31, idx:5699/10845, loss:0.8638711947202683, acc:0.7379824561403509


 53%|█████▎    | 5800/10845 [21:28<17:59,  4.67it/s, acc=0.738, epoch=31, loss=0.865]

epoch:31, idx:5799/10845, loss:0.8646118487674614, acc:0.7375862068965517


 54%|█████▍    | 5900/10845 [21:51<18:25,  4.47it/s, acc=0.738, epoch=31, loss=0.864]

epoch:31, idx:5899/10845, loss:0.8643176641403618, acc:0.7380084745762712


 55%|█████▌    | 6001/10845 [22:13<16:29,  4.90it/s, acc=0.738, epoch=31, loss=0.866]

epoch:31, idx:5999/10845, loss:0.8655535520017147, acc:0.7377083333333333


 56%|█████▌    | 6100/10845 [22:35<17:01,  4.65it/s, acc=0.737, epoch=31, loss=0.867]

epoch:31, idx:6099/10845, loss:0.8668314528758408, acc:0.7373360655737705


 57%|█████▋    | 6201/10845 [22:58<17:07,  4.52it/s, acc=0.738, epoch=31, loss=0.866]

epoch:31, idx:6199/10845, loss:0.8660712106573967, acc:0.7378225806451613


 58%|█████▊    | 6300/10845 [23:20<16:35,  4.56it/s, acc=0.738, epoch=31, loss=0.863]

epoch:31, idx:6299/10845, loss:0.8630775184669192, acc:0.738452380952381


 59%|█████▉    | 6401/10845 [23:42<15:57,  4.64it/s, acc=0.738, epoch=31, loss=0.863]

epoch:31, idx:6399/10845, loss:0.862733941487968, acc:0.7383984375


 60%|█████▉    | 6500/10845 [24:04<16:17,  4.44it/s, acc=0.738, epoch=31, loss=0.863]

epoch:31, idx:6499/10845, loss:0.8630461267141195, acc:0.7382307692307692


 61%|██████    | 6600/10845 [24:26<16:35,  4.27it/s, acc=0.738, epoch=31, loss=0.862]

epoch:31, idx:6599/10845, loss:0.8620647323854042, acc:0.7384469696969697


 62%|██████▏   | 6700/10845 [24:48<16:15,  4.25it/s, acc=0.738, epoch=31, loss=0.862]

epoch:31, idx:6699/10845, loss:0.8619037384506482, acc:0.7382089552238806


 63%|██████▎   | 6800/10845 [25:10<15:24,  4.37it/s, acc=0.738, epoch=31, loss=0.862]

epoch:31, idx:6799/10845, loss:0.8624791068978169, acc:0.7381617647058824


 64%|██████▎   | 6900/10845 [25:32<14:32,  4.52it/s, acc=0.738, epoch=31, loss=0.863]

epoch:31, idx:6899/10845, loss:0.8633973551919495, acc:0.7382971014492754


 65%|██████▍   | 7000/10845 [25:54<14:38,  4.38it/s, acc=0.738, epoch=31, loss=0.863]

epoch:31, idx:6999/10845, loss:0.8626644882559776, acc:0.7383214285714286


 65%|██████▌   | 7100/10845 [26:16<13:59,  4.46it/s, acc=0.738, epoch=31, loss=0.863]

epoch:31, idx:7099/10845, loss:0.8626601766280725, acc:0.7384507042253521


 66%|██████▋   | 7200/10845 [26:39<13:45,  4.42it/s, acc=0.739, epoch=31, loss=0.863]

epoch:31, idx:7199/10845, loss:0.8629187047564321, acc:0.7385416666666667


 67%|██████▋   | 7301/10845 [27:01<12:21,  4.78it/s, acc=0.738, epoch=31, loss=0.864]

epoch:31, idx:7299/10845, loss:0.8642260951456958, acc:0.7383904109589041


 68%|██████▊   | 7401/10845 [27:23<12:10,  4.71it/s, acc=0.738, epoch=31, loss=0.865]

epoch:31, idx:7399/10845, loss:0.8651745711387815, acc:0.7377702702702703


 69%|██████▉   | 7500/10845 [27:46<12:37,  4.42it/s, acc=0.738, epoch=31, loss=0.866]

epoch:31, idx:7499/10845, loss:0.8658132338523865, acc:0.7377666666666667


 70%|███████   | 7600/10845 [28:08<12:04,  4.48it/s, acc=0.738, epoch=31, loss=0.866]

epoch:31, idx:7599/10845, loss:0.8662249428033829, acc:0.7375328947368421


 71%|███████   | 7700/10845 [28:30<11:55,  4.40it/s, acc=0.737, epoch=31, loss=0.867]

epoch:31, idx:7699/10845, loss:0.8667985520269964, acc:0.7372077922077922


 72%|███████▏  | 7800/10845 [28:53<11:12,  4.53it/s, acc=0.737, epoch=31, loss=0.868]

epoch:31, idx:7799/10845, loss:0.8676946433538045, acc:0.7369871794871795


 73%|███████▎  | 7900/10845 [29:15<10:53,  4.51it/s, acc=0.737, epoch=31, loss=0.869]

epoch:31, idx:7899/10845, loss:0.8685736089718493, acc:0.7365822784810127


 74%|███████▍  | 8000/10845 [29:38<10:30,  4.52it/s, acc=0.737, epoch=31, loss=0.868]

epoch:31, idx:7999/10845, loss:0.8683503223359584, acc:0.7365


 75%|███████▍  | 8101/10845 [30:00<10:10,  4.49it/s, acc=0.737, epoch=31, loss=0.867]

epoch:31, idx:8099/10845, loss:0.8670832084873576, acc:0.7367901234567901


 76%|███████▌  | 8200/10845 [30:23<09:49,  4.49it/s, acc=0.737, epoch=31, loss=0.865]

epoch:31, idx:8199/10845, loss:0.865365703236766, acc:0.7371646341463415


 77%|███████▋  | 8301/10845 [30:45<09:46,  4.34it/s, acc=0.737, epoch=31, loss=0.864]

epoch:31, idx:8299/10845, loss:0.8643447697737131, acc:0.7370180722891566


 77%|███████▋  | 8401/10845 [31:06<08:29,  4.80it/s, acc=0.737, epoch=31, loss=0.865]

epoch:31, idx:8399/10845, loss:0.8648309755892981, acc:0.7369345238095238


 78%|███████▊  | 8501/10845 [31:28<08:17,  4.71it/s, acc=0.737, epoch=31, loss=0.864]

epoch:31, idx:8499/10845, loss:0.8637055644147537, acc:0.7372647058823529


 79%|███████▉  | 8601/10845 [31:51<08:04,  4.63it/s, acc=0.737, epoch=31, loss=0.864]

epoch:31, idx:8599/10845, loss:0.8639412433463474, acc:0.7373255813953489


 80%|████████  | 8700/10845 [32:12<07:52,  4.54it/s, acc=0.737, epoch=31, loss=0.865]

epoch:31, idx:8699/10845, loss:0.8649620810048334, acc:0.7372988505747127


 81%|████████  | 8801/10845 [32:35<07:18,  4.66it/s, acc=0.738, epoch=31, loss=0.864]

epoch:31, idx:8799/10845, loss:0.8642601941932332, acc:0.7376136363636364


 82%|████████▏ | 8900/10845 [32:57<06:53,  4.70it/s, acc=0.737, epoch=31, loss=0.866]

epoch:31, idx:8899/10845, loss:0.8659919189871027, acc:0.7374438202247191


 83%|████████▎ | 9000/10845 [33:19<06:43,  4.58it/s, acc=0.737, epoch=31, loss=0.865]

epoch:31, idx:8999/10845, loss:0.8653997496763866, acc:0.7373055555555555


 84%|████████▍ | 9100/10845 [33:42<06:20,  4.58it/s, acc=0.737, epoch=31, loss=0.867]

epoch:31, idx:9099/10845, loss:0.8668556686548087, acc:0.7368681318681318


 85%|████████▍ | 9200/10845 [34:04<05:51,  4.68it/s, acc=0.737, epoch=31, loss=0.867]

epoch:31, idx:9199/10845, loss:0.8667919150059638, acc:0.7365217391304347


 86%|████████▌ | 9301/10845 [34:26<05:27,  4.71it/s, acc=0.737, epoch=31, loss=0.867]

epoch:31, idx:9299/10845, loss:0.8666192513960664, acc:0.7366666666666667


 87%|████████▋ | 9400/10845 [34:48<05:16,  4.57it/s, acc=0.736, epoch=31, loss=0.868]

epoch:31, idx:9399/10845, loss:0.868220735731277, acc:0.7362765957446809


 88%|████████▊ | 9501/10845 [35:10<04:48,  4.66it/s, acc=0.736, epoch=31, loss=0.868]

epoch:31, idx:9499/10845, loss:0.8680204908722325, acc:0.7362105263157894


 89%|████████▊ | 9600/10845 [35:32<04:45,  4.36it/s, acc=0.737, epoch=31, loss=0.868]

epoch:31, idx:9599/10845, loss:0.8676789575194319, acc:0.7365364583333334


 89%|████████▉ | 9701/10845 [35:55<04:04,  4.68it/s, acc=0.736, epoch=31, loss=0.868]

epoch:31, idx:9699/10845, loss:0.8678018754044758, acc:0.7364690721649485


 90%|█████████ | 9800/10845 [36:17<04:02,  4.31it/s, acc=0.736, epoch=31, loss=0.868]

epoch:31, idx:9799/10845, loss:0.8676855042515969, acc:0.7363265306122448


 91%|█████████▏| 9901/10845 [36:39<03:25,  4.60it/s, acc=0.737, epoch=31, loss=0.867]

epoch:31, idx:9899/10845, loss:0.8672563727215084, acc:0.7366919191919192


 92%|█████████▏| 10000/10845 [37:01<03:03,  4.59it/s, acc=0.737, epoch=31, loss=0.868]

epoch:31, idx:9999/10845, loss:0.8675585529148578, acc:0.73675


 93%|█████████▎| 10101/10845 [37:23<02:41,  4.61it/s, acc=0.737, epoch=31, loss=0.867]

epoch:31, idx:10099/10845, loss:0.866833661788761, acc:0.737029702970297


 94%|█████████▍| 10200/10845 [37:45<02:21,  4.55it/s, acc=0.737, epoch=31, loss=0.867]

epoch:31, idx:10199/10845, loss:0.8668604321982346, acc:0.7366666666666667


 95%|█████████▍| 10300/10845 [38:08<01:58,  4.59it/s, acc=0.737, epoch=31, loss=0.867]

epoch:31, idx:10299/10845, loss:0.8668576854351655, acc:0.7366504854368932


 96%|█████████▌| 10400/10845 [38:30<01:37,  4.58it/s, acc=0.737, epoch=31, loss=0.866]

epoch:31, idx:10399/10845, loss:0.8662334828021435, acc:0.736610576923077


 97%|█████████▋| 10500/10845 [38:52<01:15,  4.59it/s, acc=0.736, epoch=31, loss=0.867]

epoch:31, idx:10499/10845, loss:0.8665801075526646, acc:0.7362857142857143


 98%|█████████▊| 10601/10845 [39:14<00:52,  4.69it/s, acc=0.736, epoch=31, loss=0.867]

epoch:31, idx:10599/10845, loss:0.8671968631924323, acc:0.7360377358490566


 99%|█████████▊| 10700/10845 [39:36<00:32,  4.44it/s, acc=0.736, epoch=31, loss=0.867]

epoch:31, idx:10699/10845, loss:0.8674687293533967, acc:0.7361214953271028


100%|█████████▉| 10801/10845 [39:58<00:09,  4.72it/s, acc=0.736, epoch=31, loss=0.867]

epoch:31, idx:10799/10845, loss:0.8672011740892022, acc:0.73625


100%|██████████| 10845/10845 [40:08<00:00,  4.43it/s, acc=0.736, epoch=31, loss=0.867]


epoch:31, idx:0/1275, loss:1.1618566513061523, acc:0.5
epoch:31, idx:100/1275, loss:1.4012062018460567, acc:0.6534653465346535
epoch:31, idx:200/1275, loss:1.2818191410297186, acc:0.6529850746268657
epoch:31, idx:300/1275, loss:1.2558743836871809, acc:0.659468438538206
epoch:31, idx:400/1275, loss:1.23790755013278, acc:0.6652119700748129
epoch:31, idx:500/1275, loss:1.2016925353727892, acc:0.6656686626746507
epoch:31, idx:600/1275, loss:1.2218901425748816, acc:0.6589018302828619
epoch:31, idx:700/1275, loss:1.2284362956392612, acc:0.6594151212553495
epoch:31, idx:800/1275, loss:1.2431891508465551, acc:0.6588639200998752
epoch:31, idx:900/1275, loss:1.2395133603690864, acc:0.6623196448390677
epoch:31, idx:1000/1275, loss:1.2430072249351563, acc:0.6588411588411588
epoch:31, idx:1100/1275, loss:1.2296495076529446, acc:0.6623524069028156
epoch:31, idx:1200/1275, loss:1.227040879583478, acc:0.6609075770191507


  1%|          | 101/10845 [00:22<38:49,  4.61it/s, acc=0.787, epoch=32, loss=0.748]

epoch:32, idx:99/10845, loss:0.7538365173339844, acc:0.785


  2%|▏         | 200/10845 [00:44<38:49,  4.57it/s, acc=0.769, epoch=32, loss=0.748]

epoch:32, idx:199/10845, loss:0.747671417593956, acc:0.76875


  3%|▎         | 300/10845 [01:06<40:26,  4.35it/s, acc=0.752, epoch=32, loss=0.79] 

epoch:32, idx:299/10845, loss:0.7901573117574056, acc:0.7525


  4%|▎         | 401/10845 [01:29<38:25,  4.53it/s, acc=0.749, epoch=32, loss=0.804]

epoch:32, idx:399/10845, loss:0.8041218212246894, acc:0.74875


  5%|▍         | 500/10845 [01:51<37:54,  4.55it/s, acc=0.747, epoch=32, loss=0.804]

epoch:32, idx:499/10845, loss:0.8056140571832657, acc:0.7465


  6%|▌         | 601/10845 [02:13<37:46,  4.52it/s, acc=0.746, epoch=32, loss=0.821]

epoch:32, idx:599/10845, loss:0.8219233587384224, acc:0.7454166666666666


  6%|▋         | 701/10845 [02:35<38:36,  4.38it/s, acc=0.744, epoch=32, loss=0.827]

epoch:32, idx:699/10845, loss:0.8283141288587025, acc:0.7439285714285714


  7%|▋         | 800/10845 [02:57<38:32,  4.34it/s, acc=0.74, epoch=32, loss=0.85]  

epoch:32, idx:799/10845, loss:0.8503094821423293, acc:0.7396875


  8%|▊         | 900/10845 [03:19<38:54,  4.26it/s, acc=0.735, epoch=32, loss=0.859]

epoch:32, idx:899/10845, loss:0.8592726202143564, acc:0.7352777777777778


  9%|▉         | 1001/10845 [03:42<34:41,  4.73it/s, acc=0.736, epoch=32, loss=0.855]

epoch:32, idx:999/10845, loss:0.854202526152134, acc:0.73625


 10%|█         | 1100/10845 [04:04<36:44,  4.42it/s, acc=0.735, epoch=32, loss=0.865]

epoch:32, idx:1099/10845, loss:0.8655640523542057, acc:0.7347727272727272


 11%|█         | 1200/10845 [04:26<35:02,  4.59it/s, acc=0.738, epoch=32, loss=0.858]

epoch:32, idx:1199/10845, loss:0.8580030525227388, acc:0.7377083333333333


 12%|█▏        | 1301/10845 [04:48<33:08,  4.80it/s, acc=0.736, epoch=32, loss=0.854]

epoch:32, idx:1299/10845, loss:0.8542954718608122, acc:0.7361538461538462


 13%|█▎        | 1400/10845 [05:10<33:24,  4.71it/s, acc=0.736, epoch=32, loss=0.855]

epoch:32, idx:1399/10845, loss:0.855084311068058, acc:0.73625


 14%|█▍        | 1500/10845 [05:32<37:06,  4.20it/s, acc=0.734, epoch=32, loss=0.86] 

epoch:32, idx:1499/10845, loss:0.8600233770608902, acc:0.7341666666666666


 15%|█▍        | 1600/10845 [05:55<32:17,  4.77it/s, acc=0.732, epoch=32, loss=0.866]

epoch:32, idx:1599/10845, loss:0.8655538315698504, acc:0.73203125


 16%|█▌        | 1701/10845 [06:17<32:59,  4.62it/s, acc=0.733, epoch=32, loss=0.86] 

epoch:32, idx:1699/10845, loss:0.8603526687271454, acc:0.7329411764705882


 17%|█▋        | 1800/10845 [06:39<34:16,  4.40it/s, acc=0.733, epoch=32, loss=0.858]

epoch:32, idx:1799/10845, loss:0.8584509894251824, acc:0.7334722222222222


 18%|█▊        | 1900/10845 [07:01<31:19,  4.76it/s, acc=0.734, epoch=32, loss=0.856]

epoch:32, idx:1899/10845, loss:0.8564502709476571, acc:0.7338157894736842


 18%|█▊        | 2001/10845 [07:24<31:18,  4.71it/s, acc=0.732, epoch=32, loss=0.861]

epoch:32, idx:1999/10845, loss:0.8615362567603588, acc:0.732125


 19%|█▉        | 2101/10845 [07:46<32:58,  4.42it/s, acc=0.731, epoch=32, loss=0.863]

epoch:32, idx:2099/10845, loss:0.8632938683032989, acc:0.7313095238095239


 20%|██        | 2200/10845 [08:08<31:26,  4.58it/s, acc=0.731, epoch=32, loss=0.861]

epoch:32, idx:2199/10845, loss:0.8613099479675292, acc:0.7307954545454546


 21%|██        | 2301/10845 [08:30<30:30,  4.67it/s, acc=0.732, epoch=32, loss=0.862]

epoch:32, idx:2299/10845, loss:0.862107747948688, acc:0.7317391304347826


 22%|██▏       | 2401/10845 [08:52<31:23,  4.48it/s, acc=0.733, epoch=32, loss=0.86] 

epoch:32, idx:2399/10845, loss:0.8600878006716569, acc:0.7323958333333334


 23%|██▎       | 2500/10845 [09:14<31:00,  4.48it/s, acc=0.733, epoch=32, loss=0.857]

epoch:32, idx:2499/10845, loss:0.8566485537052154, acc:0.7328


 24%|██▍       | 2600/10845 [09:37<30:26,  4.51it/s, acc=0.734, epoch=32, loss=0.854]

epoch:32, idx:2599/10845, loss:0.8537359728262974, acc:0.7336538461538461


 25%|██▍       | 2701/10845 [09:59<29:28,  4.60it/s, acc=0.735, epoch=32, loss=0.855]

epoch:32, idx:2699/10845, loss:0.8550174914024494, acc:0.7347222222222223


 26%|██▌       | 2801/10845 [10:21<29:20,  4.57it/s, acc=0.736, epoch=32, loss=0.85] 

epoch:32, idx:2799/10845, loss:0.85052268160241, acc:0.7360714285714286


 27%|██▋       | 2901/10845 [10:43<28:07,  4.71it/s, acc=0.735, epoch=32, loss=0.851]

epoch:32, idx:2899/10845, loss:0.8510778940135035, acc:0.7351724137931035


 28%|██▊       | 3000/10845 [11:05<29:12,  4.48it/s, acc=0.735, epoch=32, loss=0.854]

epoch:32, idx:2999/10845, loss:0.8541250879367193, acc:0.7349166666666667


 29%|██▊       | 3100/10845 [11:28<29:44,  4.34it/s, acc=0.736, epoch=32, loss=0.85] 

epoch:32, idx:3099/10845, loss:0.8503069435781048, acc:0.7361290322580645


 30%|██▉       | 3201/10845 [11:50<27:12,  4.68it/s, acc=0.736, epoch=32, loss=0.852]

epoch:32, idx:3199/10845, loss:0.851604382097721, acc:0.7365625


 30%|███       | 3300/10845 [12:12<28:48,  4.36it/s, acc=0.737, epoch=32, loss=0.85] 

epoch:32, idx:3299/10845, loss:0.8503411469315038, acc:0.7367424242424242


 31%|███▏      | 3401/10845 [12:34<27:06,  4.58it/s, acc=0.736, epoch=32, loss=0.851]

epoch:32, idx:3399/10845, loss:0.8508361816055634, acc:0.7363235294117647


 32%|███▏      | 3500/10845 [12:56<26:56,  4.54it/s, acc=0.737, epoch=32, loss=0.849]

epoch:32, idx:3499/10845, loss:0.8491754759379796, acc:0.7369285714285714


 33%|███▎      | 3600/10845 [13:18<27:41,  4.36it/s, acc=0.737, epoch=32, loss=0.85] 

epoch:32, idx:3599/10845, loss:0.8495333742101987, acc:0.7368055555555556


 34%|███▍      | 3700/10845 [13:40<26:09,  4.55it/s, acc=0.737, epoch=32, loss=0.848]

epoch:32, idx:3699/10845, loss:0.8477222863725714, acc:0.7371621621621621


 35%|███▌      | 3800/10845 [14:02<25:20,  4.63it/s, acc=0.737, epoch=32, loss=0.849]

epoch:32, idx:3799/10845, loss:0.8490324776423605, acc:0.7372368421052632


 36%|███▌      | 3900/10845 [14:24<27:12,  4.26it/s, acc=0.738, epoch=32, loss=0.847]

epoch:32, idx:3899/10845, loss:0.8473882703597729, acc:0.7378205128205129


 37%|███▋      | 4000/10845 [14:47<25:53,  4.40it/s, acc=0.739, epoch=32, loss=0.843]

epoch:32, idx:3999/10845, loss:0.8430715245604515, acc:0.739125


 38%|███▊      | 4101/10845 [15:10<24:02,  4.68it/s, acc=0.74, epoch=32, loss=0.84]  

epoch:32, idx:4099/10845, loss:0.8399151760775868, acc:0.7398170731707318


 39%|███▊      | 4200/10845 [15:32<24:57,  4.44it/s, acc=0.74, epoch=32, loss=0.842]

epoch:32, idx:4199/10845, loss:0.8416032402146431, acc:0.7401190476190476


 40%|███▉      | 4301/10845 [15:54<23:07,  4.72it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:4299/10845, loss:0.8406941398908926, acc:0.7406395348837209


 41%|████      | 4401/10845 [16:16<22:32,  4.76it/s, acc=0.741, epoch=32, loss=0.837]

epoch:32, idx:4399/10845, loss:0.836721967431632, acc:0.7413636363636363


 42%|████▏     | 4501/10845 [16:38<22:37,  4.67it/s, acc=0.742, epoch=32, loss=0.837]

epoch:32, idx:4499/10845, loss:0.8366884392102559, acc:0.7415555555555555


 42%|████▏     | 4600/10845 [17:00<23:19,  4.46it/s, acc=0.742, epoch=32, loss=0.836]

epoch:32, idx:4599/10845, loss:0.8361704721917277, acc:0.7415760869565218


 43%|████▎     | 4700/10845 [17:23<23:09,  4.42it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:4699/10845, loss:0.839912952032495, acc:0.7411170212765957


 44%|████▍     | 4800/10845 [17:45<21:30,  4.68it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:4799/10845, loss:0.8400377511729796, acc:0.7408854166666666


 45%|████▌     | 4901/10845 [18:07<20:50,  4.75it/s, acc=0.74, epoch=32, loss=0.841] 

epoch:32, idx:4899/10845, loss:0.8411363974639348, acc:0.7404591836734694


 46%|████▌     | 5000/10845 [18:29<21:17,  4.57it/s, acc=0.741, epoch=32, loss=0.842]

epoch:32, idx:4999/10845, loss:0.8416594109773636, acc:0.74055


 47%|████▋     | 5101/10845 [18:52<20:38,  4.64it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:5099/10845, loss:0.8408980265785666, acc:0.7405882352941177


 48%|████▊     | 5200/10845 [19:14<21:10,  4.44it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:5199/10845, loss:0.8400009683920787, acc:0.7410096153846154


 49%|████▉     | 5300/10845 [19:36<19:32,  4.73it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:5299/10845, loss:0.8407316064609671, acc:0.7409905660377358


 50%|████▉     | 5401/10845 [19:59<20:02,  4.53it/s, acc=0.742, epoch=32, loss=0.839]

epoch:32, idx:5399/10845, loss:0.8390573707554075, acc:0.742037037037037


 51%|█████     | 5501/10845 [20:21<18:41,  4.76it/s, acc=0.742, epoch=32, loss=0.837]

epoch:32, idx:5499/10845, loss:0.8372499940828844, acc:0.7423636363636363


 52%|█████▏    | 5601/10845 [20:43<18:58,  4.61it/s, acc=0.742, epoch=32, loss=0.836]

epoch:32, idx:5599/10845, loss:0.8365494166953223, acc:0.7423660714285715


 53%|█████▎    | 5700/10845 [21:05<18:05,  4.74it/s, acc=0.743, epoch=32, loss=0.835]

epoch:32, idx:5699/10845, loss:0.8347399746744256, acc:0.7428070175438597


 53%|█████▎    | 5800/10845 [21:28<19:17,  4.36it/s, acc=0.743, epoch=32, loss=0.834]

epoch:32, idx:5799/10845, loss:0.8339975242162573, acc:0.7428879310344828


 54%|█████▍    | 5900/10845 [21:50<18:42,  4.40it/s, acc=0.742, epoch=32, loss=0.835]

epoch:32, idx:5899/10845, loss:0.8355395605200429, acc:0.7423728813559322


 55%|█████▌    | 6000/10845 [22:12<18:00,  4.48it/s, acc=0.742, epoch=32, loss=0.836]

epoch:32, idx:5999/10845, loss:0.8358045348624389, acc:0.7423333333333333


 56%|█████▌    | 6100/10845 [22:34<16:59,  4.65it/s, acc=0.742, epoch=32, loss=0.838]

epoch:32, idx:6099/10845, loss:0.8379734580145508, acc:0.7420901639344263


 57%|█████▋    | 6201/10845 [22:57<16:32,  4.68it/s, acc=0.742, epoch=32, loss=0.838]

epoch:32, idx:6199/10845, loss:0.8384582951664925, acc:0.7418548387096774


 58%|█████▊    | 6301/10845 [23:19<16:13,  4.67it/s, acc=0.742, epoch=32, loss=0.839]

epoch:32, idx:6299/10845, loss:0.8394250744959665, acc:0.7417063492063493


 59%|█████▉    | 6400/10845 [23:41<16:02,  4.62it/s, acc=0.742, epoch=32, loss=0.84] 

epoch:32, idx:6399/10845, loss:0.840154032157734, acc:0.7415234375


 60%|█████▉    | 6500/10845 [24:03<16:11,  4.47it/s, acc=0.742, epoch=32, loss=0.841]

epoch:32, idx:6499/10845, loss:0.8408412287693757, acc:0.7415384615384616


 61%|██████    | 6600/10845 [24:26<15:24,  4.59it/s, acc=0.742, epoch=32, loss=0.841]

epoch:32, idx:6599/10845, loss:0.8407892280004241, acc:0.7415530303030303


 62%|██████▏   | 6700/10845 [24:48<15:49,  4.37it/s, acc=0.742, epoch=32, loss=0.839]

epoch:32, idx:6699/10845, loss:0.839317098473435, acc:0.7420149253731343


 63%|██████▎   | 6800/10845 [25:10<14:59,  4.50it/s, acc=0.742, epoch=32, loss=0.84] 

epoch:32, idx:6799/10845, loss:0.8399529537295594, acc:0.7416911764705882


 64%|██████▎   | 6900/10845 [25:32<14:07,  4.65it/s, acc=0.742, epoch=32, loss=0.84] 

epoch:32, idx:6899/10845, loss:0.8403366342834805, acc:0.7416304347826087


 65%|██████▍   | 7001/10845 [25:55<13:56,  4.60it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:6999/10845, loss:0.8402643455948149, acc:0.7413928571428572


 65%|██████▌   | 7100/10845 [26:17<13:36,  4.59it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:7099/10845, loss:0.8399205848364762, acc:0.7414084507042253


 66%|██████▋   | 7200/10845 [26:40<13:13,  4.60it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:7199/10845, loss:0.8402248186204169, acc:0.7411458333333333


 67%|██████▋   | 7300/10845 [27:02<12:35,  4.69it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:7299/10845, loss:0.8404509898571119, acc:0.7408219178082192


 68%|██████▊   | 7400/10845 [27:24<12:35,  4.56it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:7399/10845, loss:0.8395400792601946, acc:0.7406756756756757


 69%|██████▉   | 7500/10845 [27:46<12:11,  4.57it/s, acc=0.741, epoch=32, loss=0.839]

epoch:32, idx:7499/10845, loss:0.8392747799158097, acc:0.7408333333333333


 70%|███████   | 7600/10845 [28:08<11:29,  4.70it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:7599/10845, loss:0.8407732774474119, acc:0.7408881578947368


 71%|███████   | 7700/10845 [28:30<11:06,  4.72it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:7699/10845, loss:0.8405216320226719, acc:0.741038961038961


 72%|███████▏  | 7800/10845 [28:52<10:42,  4.74it/s, acc=0.741, epoch=32, loss=0.839]

epoch:32, idx:7799/10845, loss:0.8394601813875712, acc:0.7409935897435898


 73%|███████▎  | 7900/10845 [29:14<11:33,  4.25it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:7899/10845, loss:0.8395097369710102, acc:0.7410126582278481


 74%|███████▍  | 8000/10845 [29:36<09:58,  4.76it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:7999/10845, loss:0.8398089936450124, acc:0.74071875


 75%|███████▍  | 8100/10845 [29:59<10:17,  4.44it/s, acc=0.741, epoch=32, loss=0.839]

epoch:32, idx:8099/10845, loss:0.8392643145499405, acc:0.7408333333333333


 76%|███████▌  | 8200/10845 [30:21<09:53,  4.45it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:8199/10845, loss:0.8401893988687817, acc:0.7407621951219512


 77%|███████▋  | 8301/10845 [30:43<09:12,  4.61it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:8299/10845, loss:0.8396350534350039, acc:0.7410240963855421


 77%|███████▋  | 8400/10845 [31:05<08:53,  4.59it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:8399/10845, loss:0.8408549115274634, acc:0.7409821428571428


 78%|███████▊  | 8500/10845 [31:27<08:53,  4.39it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:8499/10845, loss:0.8402061205681632, acc:0.7410588235294118


 79%|███████▉  | 8600/10845 [31:49<08:21,  4.48it/s, acc=0.741, epoch=32, loss=0.842]

epoch:32, idx:8599/10845, loss:0.84150255068097, acc:0.7408720930232559


 80%|████████  | 8700/10845 [32:11<07:30,  4.76it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:8699/10845, loss:0.8402747189656071, acc:0.7411781609195403


 81%|████████  | 8801/10845 [32:34<07:24,  4.60it/s, acc=0.742, epoch=32, loss=0.839]

epoch:32, idx:8799/10845, loss:0.83862962260165, acc:0.7415909090909091


 82%|████████▏ | 8901/10845 [32:56<06:55,  4.68it/s, acc=0.741, epoch=32, loss=0.839]

epoch:32, idx:8899/10845, loss:0.8392416686489341, acc:0.7413483146067416


 83%|████████▎ | 9000/10845 [33:18<06:41,  4.59it/s, acc=0.742, epoch=32, loss=0.839]

epoch:32, idx:8999/10845, loss:0.8386320628258918, acc:0.7415833333333334


 84%|████████▍ | 9100/10845 [33:40<06:49,  4.26it/s, acc=0.741, epoch=32, loss=0.839]

epoch:32, idx:9099/10845, loss:0.838850491289254, acc:0.741401098901099


 85%|████████▍ | 9201/10845 [34:02<05:52,  4.66it/s, acc=0.741, epoch=32, loss=0.839]

epoch:32, idx:9199/10845, loss:0.8393218663140484, acc:0.7414945652173913


 86%|████████▌ | 9300/10845 [34:24<05:20,  4.83it/s, acc=0.742, epoch=32, loss=0.839]

epoch:32, idx:9299/10845, loss:0.8388328165841359, acc:0.7416935483870968


 87%|████████▋ | 9400/10845 [34:46<05:12,  4.62it/s, acc=0.742, epoch=32, loss=0.839]

epoch:32, idx:9399/10845, loss:0.839154351930669, acc:0.7416223404255319


 88%|████████▊ | 9500/10845 [35:08<04:58,  4.51it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:9499/10845, loss:0.8397908818282579, acc:0.7414473684210526


 89%|████████▊ | 9600/10845 [35:31<04:28,  4.63it/s, acc=0.741, epoch=32, loss=0.84] 

epoch:32, idx:9599/10845, loss:0.8396619739073018, acc:0.7414583333333333


 89%|████████▉ | 9701/10845 [35:53<04:06,  4.64it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:9699/10845, loss:0.8411015575632607, acc:0.7412113402061856


 90%|█████████ | 9800/10845 [36:15<03:58,  4.38it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:9799/10845, loss:0.8406914128880112, acc:0.74125


 91%|█████████▏| 9900/10845 [36:37<03:18,  4.76it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:9899/10845, loss:0.8407769476945954, acc:0.7412878787878788


 92%|█████████▏| 10000/10845 [36:59<03:07,  4.51it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:9999/10845, loss:0.8410447948276997, acc:0.741475


 93%|█████████▎| 10100/10845 [37:22<02:46,  4.48it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:10099/10845, loss:0.8408163281183432, acc:0.741410891089109


 94%|█████████▍| 10200/10845 [37:44<02:23,  4.48it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:10199/10845, loss:0.8408430388569832, acc:0.7414460784313726


 95%|█████████▍| 10300/10845 [38:06<01:56,  4.70it/s, acc=0.741, epoch=32, loss=0.842]

epoch:32, idx:10299/10845, loss:0.841892988444532, acc:0.7409951456310679


 96%|█████████▌| 10401/10845 [38:29<01:35,  4.63it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:10399/10845, loss:0.8409075270650479, acc:0.7412019230769231


 97%|█████████▋| 10500/10845 [38:51<01:16,  4.51it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:10499/10845, loss:0.8412011022965113, acc:0.7411666666666666


 98%|█████████▊| 10601/10845 [39:13<00:53,  4.59it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:10599/10845, loss:0.8410550316156081, acc:0.7411556603773585


 99%|█████████▊| 10700/10845 [39:35<00:34,  4.21it/s, acc=0.741, epoch=32, loss=0.842]

epoch:32, idx:10699/10845, loss:0.8415866251136656, acc:0.7410514018691589


100%|█████████▉| 10801/10845 [39:58<00:09,  4.51it/s, acc=0.741, epoch=32, loss=0.841]

epoch:32, idx:10799/10845, loss:0.841514045563009, acc:0.7412268518518519


100%|██████████| 10845/10845 [40:07<00:00,  4.64it/s, acc=0.741, epoch=32, loss=0.841]


epoch:32, idx:0/1275, loss:1.5674591064453125, acc:0.75
epoch:32, idx:100/1275, loss:1.4327654006457564, acc:0.650990099009901
epoch:32, idx:200/1275, loss:1.310775938022196, acc:0.6616915422885572
epoch:32, idx:300/1275, loss:1.285603249389864, acc:0.665282392026578
epoch:32, idx:400/1275, loss:1.2614248516850934, acc:0.6677057356608479
epoch:32, idx:500/1275, loss:1.2295490414082646, acc:0.6641716566866267
epoch:32, idx:600/1275, loss:1.2455984440302095, acc:0.6559900166389351
epoch:32, idx:700/1275, loss:1.251912769424422, acc:0.6554921540656206
epoch:32, idx:800/1275, loss:1.2631575399719077, acc:0.6541822721598003
epoch:32, idx:900/1275, loss:1.257828217433904, acc:0.6578801331853497
epoch:32, idx:1000/1275, loss:1.2624427795291067, acc:0.6558441558441559
epoch:32, idx:1100/1275, loss:1.2490464389919693, acc:0.6600817438692098
epoch:32, idx:1200/1275, loss:1.246743766344358, acc:0.6588259783513739


  1%|          | 100/10845 [00:22<40:07,  4.46it/s, acc=0.728, epoch=33, loss=0.867]

epoch:33, idx:99/10845, loss:0.8672973358631134, acc:0.7275


  2%|▏         | 200/10845 [00:44<40:50,  4.34it/s, acc=0.731, epoch=33, loss=0.845]

epoch:33, idx:199/10845, loss:0.8445929425954819, acc:0.73125


  3%|▎         | 300/10845 [01:07<38:23,  4.58it/s, acc=0.731, epoch=33, loss=0.855]

epoch:33, idx:299/10845, loss:0.8548844214280447, acc:0.7308333333333333


  4%|▎         | 400/10845 [01:29<36:38,  4.75it/s, acc=0.732, epoch=33, loss=0.847]

epoch:33, idx:399/10845, loss:0.8471576902270317, acc:0.731875


  5%|▍         | 500/10845 [01:51<38:46,  4.45it/s, acc=0.736, epoch=33, loss=0.85] 

epoch:33, idx:499/10845, loss:0.8499629728794098, acc:0.736


  6%|▌         | 600/10845 [02:13<36:17,  4.70it/s, acc=0.74, epoch=33, loss=0.829] 

epoch:33, idx:599/10845, loss:0.8285349589586258, acc:0.7395833333333334


  6%|▋         | 700/10845 [02:34<40:40,  4.16it/s, acc=0.739, epoch=33, loss=0.831]

epoch:33, idx:699/10845, loss:0.8321932062080928, acc:0.7389285714285714


  7%|▋         | 801/10845 [02:57<38:25,  4.36it/s, acc=0.738, epoch=33, loss=0.844]

epoch:33, idx:799/10845, loss:0.8444916115701199, acc:0.7384375


  8%|▊         | 900/10845 [03:19<38:40,  4.29it/s, acc=0.74, epoch=33, loss=0.84]  

epoch:33, idx:899/10845, loss:0.839838225973977, acc:0.7402777777777778


  9%|▉         | 1000/10845 [03:41<37:46,  4.34it/s, acc=0.739, epoch=33, loss=0.842]

epoch:33, idx:999/10845, loss:0.8420813553333283, acc:0.73925


 10%|█         | 1100/10845 [04:04<35:47,  4.54it/s, acc=0.741, epoch=33, loss=0.842]

epoch:33, idx:1099/10845, loss:0.8419227803837169, acc:0.740909090909091


 11%|█         | 1201/10845 [04:26<33:55,  4.74it/s, acc=0.739, epoch=33, loss=0.842]

epoch:33, idx:1199/10845, loss:0.8413995415965716, acc:0.73875


 12%|█▏        | 1300/10845 [04:48<33:31,  4.75it/s, acc=0.738, epoch=33, loss=0.845]

epoch:33, idx:1299/10845, loss:0.8454895124068627, acc:0.7380769230769231


 13%|█▎        | 1400/10845 [05:10<33:51,  4.65it/s, acc=0.738, epoch=33, loss=0.844]

epoch:33, idx:1399/10845, loss:0.8439293467998504, acc:0.7380357142857142


 14%|█▍        | 1500/10845 [05:32<34:04,  4.57it/s, acc=0.74, epoch=33, loss=0.833] 

epoch:33, idx:1499/10845, loss:0.8327970592578252, acc:0.7401666666666666


 15%|█▍        | 1600/10845 [05:54<33:56,  4.54it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:1599/10845, loss:0.8355832010135055, acc:0.74125


 16%|█▌        | 1700/10845 [06:15<33:53,  4.50it/s, acc=0.742, epoch=33, loss=0.834]

epoch:33, idx:1699/10845, loss:0.8336858674708535, acc:0.7423529411764705


 17%|█▋        | 1800/10845 [06:38<33:53,  4.45it/s, acc=0.742, epoch=33, loss=0.833]

epoch:33, idx:1799/10845, loss:0.8335674316684405, acc:0.7419444444444444


 18%|█▊        | 1901/10845 [07:00<31:00,  4.81it/s, acc=0.743, epoch=33, loss=0.835]

epoch:33, idx:1899/10845, loss:0.8357784164893, acc:0.7425


 18%|█▊        | 2000/10845 [07:22<32:08,  4.59it/s, acc=0.743, epoch=33, loss=0.834]

epoch:33, idx:1999/10845, loss:0.8341072541177272, acc:0.74325


 19%|█▉        | 2100/10845 [07:44<31:06,  4.69it/s, acc=0.743, epoch=33, loss=0.834]

epoch:33, idx:2099/10845, loss:0.8343883662280582, acc:0.7433333333333333


 20%|██        | 2200/10845 [08:06<31:18,  4.60it/s, acc=0.743, epoch=33, loss=0.834]

epoch:33, idx:2199/10845, loss:0.8341229044036432, acc:0.7432954545454545


 21%|██        | 2301/10845 [08:29<32:09,  4.43it/s, acc=0.742, epoch=33, loss=0.839]

epoch:33, idx:2299/10845, loss:0.8390538514956184, acc:0.7423913043478261


 22%|██▏       | 2401/10845 [08:51<31:36,  4.45it/s, acc=0.743, epoch=33, loss=0.84] 

epoch:33, idx:2399/10845, loss:0.8402763759344816, acc:0.7423958333333334


 23%|██▎       | 2500/10845 [09:13<29:30,  4.71it/s, acc=0.742, epoch=33, loss=0.842]

epoch:33, idx:2499/10845, loss:0.8419205946207047, acc:0.7415


 24%|██▍       | 2600/10845 [09:35<28:52,  4.76it/s, acc=0.742, epoch=33, loss=0.839]

epoch:33, idx:2599/10845, loss:0.8391033755586698, acc:0.7417307692307692


 25%|██▍       | 2700/10845 [09:57<29:46,  4.56it/s, acc=0.74, epoch=33, loss=0.841] 

epoch:33, idx:2699/10845, loss:0.8408042400192332, acc:0.7402777777777778


 26%|██▌       | 2800/10845 [10:19<30:42,  4.37it/s, acc=0.741, epoch=33, loss=0.838]

epoch:33, idx:2799/10845, loss:0.8378975389046328, acc:0.7411607142857143


 27%|██▋       | 2901/10845 [10:41<29:38,  4.47it/s, acc=0.739, epoch=33, loss=0.842]

epoch:33, idx:2899/10845, loss:0.8425864863190158, acc:0.7392241379310345


 28%|██▊       | 3000/10845 [11:04<28:34,  4.58it/s, acc=0.739, epoch=33, loss=0.844]

epoch:33, idx:2999/10845, loss:0.8441741401950519, acc:0.7385833333333334


 29%|██▊       | 3100/10845 [11:26<28:37,  4.51it/s, acc=0.739, epoch=33, loss=0.842]

epoch:33, idx:3099/10845, loss:0.8422282594442367, acc:0.7387903225806451


 30%|██▉       | 3201/10845 [11:48<28:14,  4.51it/s, acc=0.739, epoch=33, loss=0.839]

epoch:33, idx:3199/10845, loss:0.8391668996401132, acc:0.73890625


 30%|███       | 3301/10845 [12:11<27:09,  4.63it/s, acc=0.739, epoch=33, loss=0.84] 

epoch:33, idx:3299/10845, loss:0.8401047178290107, acc:0.7386363636363636


 31%|███▏      | 3400/10845 [12:33<26:10,  4.74it/s, acc=0.74, epoch=33, loss=0.837] 

epoch:33, idx:3399/10845, loss:0.8374379079657442, acc:0.7398529411764706


 32%|███▏      | 3500/10845 [12:55<26:58,  4.54it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:3499/10845, loss:0.8352865864719663, acc:0.7408571428571429


 33%|███▎      | 3601/10845 [13:17<26:39,  4.53it/s, acc=0.74, epoch=33, loss=0.835] 

epoch:33, idx:3599/10845, loss:0.8350716217193339, acc:0.7404861111111111


 34%|███▍      | 3700/10845 [13:39<25:55,  4.59it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:3699/10845, loss:0.8356508918871751, acc:0.7408783783783783


 35%|███▌      | 3800/10845 [14:01<26:46,  4.39it/s, acc=0.741, epoch=33, loss=0.834]

epoch:33, idx:3799/10845, loss:0.833623322797449, acc:0.7411184210526316


 36%|███▌      | 3901/10845 [14:24<24:05,  4.80it/s, acc=0.743, epoch=33, loss=0.829]

epoch:33, idx:3899/10845, loss:0.8286757445793885, acc:0.7426923076923077


 37%|███▋      | 4000/10845 [14:46<26:02,  4.38it/s, acc=0.742, epoch=33, loss=0.833]

epoch:33, idx:3999/10845, loss:0.8326978209912776, acc:0.741875


 38%|███▊      | 4100/10845 [15:08<24:52,  4.52it/s, acc=0.742, epoch=33, loss=0.832]

epoch:33, idx:4099/10845, loss:0.8317277080838273, acc:0.7424390243902439


 39%|███▊      | 4200/10845 [15:31<24:44,  4.48it/s, acc=0.743, epoch=33, loss=0.83] 

epoch:33, idx:4199/10845, loss:0.8301402502116703, acc:0.7429166666666667


 40%|███▉      | 4301/10845 [15:54<23:11,  4.70it/s, acc=0.742, epoch=33, loss=0.831]

epoch:33, idx:4299/10845, loss:0.8312624678500863, acc:0.7423837209302325


 41%|████      | 4400/10845 [16:16<25:09,  4.27it/s, acc=0.743, epoch=33, loss=0.832]

epoch:33, idx:4399/10845, loss:0.8316343660517173, acc:0.7425568181818182


 42%|████▏     | 4501/10845 [16:38<21:58,  4.81it/s, acc=0.742, epoch=33, loss=0.83] 

epoch:33, idx:4499/10845, loss:0.8302950699859195, acc:0.7421111111111112


 42%|████▏     | 4600/10845 [17:00<23:29,  4.43it/s, acc=0.742, epoch=33, loss=0.829]

epoch:33, idx:4599/10845, loss:0.8291198219164558, acc:0.7420652173913044


 43%|████▎     | 4700/10845 [17:22<22:05,  4.64it/s, acc=0.743, epoch=33, loss=0.83] 

epoch:33, idx:4699/10845, loss:0.8298588240019819, acc:0.7425


 44%|████▍     | 4800/10845 [17:45<24:52,  4.05it/s, acc=0.742, epoch=33, loss=0.833]

epoch:33, idx:4799/10845, loss:0.8326459191863735, acc:0.7415104166666666


 45%|████▌     | 4900/10845 [18:07<22:10,  4.47it/s, acc=0.741, epoch=33, loss=0.834]

epoch:33, idx:4899/10845, loss:0.8340979040277248, acc:0.7407142857142858


 46%|████▌     | 5000/10845 [18:30<21:17,  4.57it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:4999/10845, loss:0.834842424762249, acc:0.74065


 47%|████▋     | 5100/10845 [18:52<21:19,  4.49it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:5099/10845, loss:0.8356928636513504, acc:0.740735294117647


 48%|████▊     | 5200/10845 [19:14<21:09,  4.45it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:5199/10845, loss:0.8356445996119426, acc:0.7406730769230769


 49%|████▉     | 5300/10845 [19:36<19:53,  4.65it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:5299/10845, loss:0.8352299297980542, acc:0.7406132075471699


 50%|████▉     | 5401/10845 [19:59<19:44,  4.60it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:5399/10845, loss:0.8349538580797337, acc:0.7407870370370371


 51%|█████     | 5500/10845 [20:21<19:33,  4.56it/s, acc=0.741, epoch=33, loss=0.834]

epoch:33, idx:5499/10845, loss:0.8338605874018236, acc:0.7413181818181818


 52%|█████▏    | 5600/10845 [20:44<18:38,  4.69it/s, acc=0.742, epoch=33, loss=0.834]

epoch:33, idx:5599/10845, loss:0.8338509622641972, acc:0.7417410714285714


 53%|█████▎    | 5701/10845 [21:06<17:50,  4.80it/s, acc=0.742, epoch=33, loss=0.833]

epoch:33, idx:5699/10845, loss:0.8335782681849965, acc:0.7414912280701754


 53%|█████▎    | 5801/10845 [21:28<18:21,  4.58it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:5799/10845, loss:0.8352628796265043, acc:0.740948275862069


 54%|█████▍    | 5901/10845 [21:50<17:18,  4.76it/s, acc=0.741, epoch=33, loss=0.834]

epoch:33, idx:5899/10845, loss:0.8341863828796451, acc:0.741228813559322


 55%|█████▌    | 6000/10845 [22:12<18:45,  4.30it/s, acc=0.742, epoch=33, loss=0.834]

epoch:33, idx:5999/10845, loss:0.8337124500075976, acc:0.7415


 56%|█████▋    | 6101/10845 [22:34<17:22,  4.55it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:6099/10845, loss:0.8343955504112557, acc:0.7409016393442623


 57%|█████▋    | 6201/10845 [22:57<17:05,  4.53it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:6199/10845, loss:0.8346777569478558, acc:0.7408064516129033


 58%|█████▊    | 6300/10845 [23:19<17:42,  4.28it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:6299/10845, loss:0.8351118454857478, acc:0.7405952380952381


 59%|█████▉    | 6400/10845 [23:41<17:33,  4.22it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:6399/10845, loss:0.8358977777138352, acc:0.7408984375


 60%|█████▉    | 6500/10845 [24:03<16:00,  4.52it/s, acc=0.74, epoch=33, loss=0.838] 

epoch:33, idx:6499/10845, loss:0.8381136118815495, acc:0.7402307692307692


 61%|██████    | 6600/10845 [24:26<15:19,  4.62it/s, acc=0.741, epoch=33, loss=0.837]

epoch:33, idx:6599/10845, loss:0.8367923722303275, acc:0.7406818181818182


 62%|██████▏   | 6700/10845 [24:47<14:39,  4.71it/s, acc=0.74, epoch=33, loss=0.838] 

epoch:33, idx:6699/10845, loss:0.8383010402544221, acc:0.7401865671641791


 63%|██████▎   | 6800/10845 [25:09<15:04,  4.47it/s, acc=0.74, epoch=33, loss=0.839] 

epoch:33, idx:6799/10845, loss:0.8390672130093855, acc:0.7401838235294118


 64%|██████▎   | 6900/10845 [25:32<15:11,  4.33it/s, acc=0.74, epoch=33, loss=0.838]

epoch:33, idx:6899/10845, loss:0.8384306775832522, acc:0.740072463768116


 65%|██████▍   | 7000/10845 [25:54<13:10,  4.86it/s, acc=0.741, epoch=33, loss=0.837]

epoch:33, idx:6999/10845, loss:0.8366494648115975, acc:0.7408571428571429


 65%|██████▌   | 7100/10845 [26:16<13:50,  4.51it/s, acc=0.741, epoch=33, loss=0.837]

epoch:33, idx:7099/10845, loss:0.8371049525368381, acc:0.7407042253521127


 66%|██████▋   | 7200/10845 [26:38<13:39,  4.45it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:7199/10845, loss:0.8355544025202593, acc:0.74125


 67%|██████▋   | 7300/10845 [27:00<12:20,  4.79it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:7299/10845, loss:0.835639569775699, acc:0.7411301369863014


 68%|██████▊   | 7400/10845 [27:22<12:53,  4.45it/s, acc=0.742, epoch=33, loss=0.834]

epoch:33, idx:7399/10845, loss:0.8339745103024148, acc:0.7418581081081081


 69%|██████▉   | 7500/10845 [27:45<12:48,  4.35it/s, acc=0.742, epoch=33, loss=0.833]

epoch:33, idx:7499/10845, loss:0.833025600194931, acc:0.7422


 70%|███████   | 7600/10845 [28:07<11:11,  4.84it/s, acc=0.743, epoch=33, loss=0.833]

epoch:33, idx:7599/10845, loss:0.8327156893359987, acc:0.7426644736842105


 71%|███████   | 7700/10845 [28:29<13:08,  3.99it/s, acc=0.742, epoch=33, loss=0.833]

epoch:33, idx:7699/10845, loss:0.8332635047219016, acc:0.7422402597402598


 72%|███████▏  | 7800/10845 [28:52<11:52,  4.28it/s, acc=0.742, epoch=33, loss=0.834]

epoch:33, idx:7799/10845, loss:0.8336677148861763, acc:0.742275641025641


 73%|███████▎  | 7901/10845 [29:15<10:38,  4.61it/s, acc=0.742, epoch=33, loss=0.834]

epoch:33, idx:7899/10845, loss:0.8343538438066651, acc:0.7419620253164557


 74%|███████▍  | 8001/10845 [29:37<10:14,  4.63it/s, acc=0.742, epoch=33, loss=0.833]

epoch:33, idx:7999/10845, loss:0.8334126290231944, acc:0.7421875


 75%|███████▍  | 8100/10845 [29:59<09:58,  4.59it/s, acc=0.742, epoch=33, loss=0.834]

epoch:33, idx:8099/10845, loss:0.8337567516167959, acc:0.7419753086419754


 76%|███████▌  | 8200/10845 [30:21<10:14,  4.31it/s, acc=0.742, epoch=33, loss=0.835]

epoch:33, idx:8199/10845, loss:0.8346645882943782, acc:0.7415853658536585


 77%|███████▋  | 8301/10845 [30:44<09:47,  4.33it/s, acc=0.742, epoch=33, loss=0.834]

epoch:33, idx:8299/10845, loss:0.834127717793706, acc:0.7416867469879518


 77%|███████▋  | 8400/10845 [31:06<09:00,  4.52it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:8399/10845, loss:0.8348013809607142, acc:0.7414880952380952


 78%|███████▊  | 8500/10845 [31:28<08:32,  4.57it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:8499/10845, loss:0.8351097937050987, acc:0.7413235294117647


 79%|███████▉  | 8600/10845 [31:50<08:18,  4.50it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:8599/10845, loss:0.8362328039490899, acc:0.7409302325581395


 80%|████████  | 8700/10845 [32:12<07:41,  4.65it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:8699/10845, loss:0.8357053668197544, acc:0.7411781609195403


 81%|████████  | 8800/10845 [32:34<08:07,  4.20it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:8799/10845, loss:0.8358063770288771, acc:0.7413068181818182


 82%|████████▏ | 8900/10845 [32:57<07:22,  4.40it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:8899/10845, loss:0.8352137142218901, acc:0.7412640449438203


 83%|████████▎ | 9000/10845 [33:19<06:45,  4.55it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:8999/10845, loss:0.8353110787736046, acc:0.7413888888888889


 84%|████████▍ | 9100/10845 [33:41<06:31,  4.46it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:9099/10845, loss:0.8355843202884381, acc:0.7414560439560439


 85%|████████▍ | 9201/10845 [34:04<05:37,  4.87it/s, acc=0.741, epoch=33, loss=0.835]

epoch:33, idx:9199/10845, loss:0.8349719433421674, acc:0.7414945652173913


 86%|████████▌ | 9300/10845 [34:26<05:51,  4.40it/s, acc=0.742, epoch=33, loss=0.835]

epoch:33, idx:9299/10845, loss:0.8351396459789686, acc:0.741505376344086


 87%|████████▋ | 9400/10845 [34:48<05:15,  4.58it/s, acc=0.741, epoch=33, loss=0.836]

epoch:33, idx:9399/10845, loss:0.8359542951178044, acc:0.7413031914893617


 88%|████████▊ | 9500/10845 [35:10<05:04,  4.41it/s, acc=0.741, epoch=33, loss=0.837]

epoch:33, idx:9499/10845, loss:0.8367920941428134, acc:0.7410526315789474


 89%|████████▊ | 9601/10845 [35:33<04:16,  4.85it/s, acc=0.741, epoch=33, loss=0.838]

epoch:33, idx:9599/10845, loss:0.8375640066961447, acc:0.74078125


 89%|████████▉ | 9701/10845 [35:55<04:04,  4.67it/s, acc=0.741, epoch=33, loss=0.838]

epoch:33, idx:9699/10845, loss:0.8377577605689924, acc:0.7406185567010309


 90%|█████████ | 9800/10845 [36:17<03:55,  4.45it/s, acc=0.741, epoch=33, loss=0.838]

epoch:33, idx:9799/10845, loss:0.8377230730956915, acc:0.7405102040816327


 91%|█████████▏| 9901/10845 [36:40<03:20,  4.70it/s, acc=0.74, epoch=33, loss=0.839] 

epoch:33, idx:9899/10845, loss:0.838585122004904, acc:0.7403282828282828


 92%|█████████▏| 10000/10845 [37:01<02:56,  4.79it/s, acc=0.74, epoch=33, loss=0.839]

epoch:33, idx:9999/10845, loss:0.8387200510382652, acc:0.740275


 93%|█████████▎| 10100/10845 [37:24<02:47,  4.46it/s, acc=0.741, epoch=33, loss=0.838]

epoch:33, idx:10099/10845, loss:0.8382951944653351, acc:0.7405198019801981


 94%|█████████▍| 10200/10845 [37:46<02:30,  4.29it/s, acc=0.741, epoch=33, loss=0.838]

epoch:33, idx:10199/10845, loss:0.8381457148813733, acc:0.7405882352941177


 95%|█████████▍| 10301/10845 [38:09<02:08,  4.23it/s, acc=0.741, epoch=33, loss=0.839]

epoch:33, idx:10299/10845, loss:0.8385786386600976, acc:0.7406310679611651


 96%|█████████▌| 10400/10845 [38:31<01:44,  4.27it/s, acc=0.741, epoch=33, loss=0.839]

epoch:33, idx:10399/10845, loss:0.8389889220091012, acc:0.7405288461538462


 97%|█████████▋| 10500/10845 [38:53<01:16,  4.50it/s, acc=0.74, epoch=33, loss=0.839] 

epoch:33, idx:10499/10845, loss:0.8389991865385147, acc:0.740452380952381


 98%|█████████▊| 10600/10845 [39:15<00:52,  4.63it/s, acc=0.74, epoch=33, loss=0.839] 

epoch:33, idx:10599/10845, loss:0.8385442282568734, acc:0.7404952830188679


 99%|█████████▊| 10701/10845 [39:37<00:34,  4.22it/s, acc=0.74, epoch=33, loss=0.838] 

epoch:33, idx:10699/10845, loss:0.8384266089836013, acc:0.7404439252336449


100%|█████████▉| 10801/10845 [39:59<00:09,  4.50it/s, acc=0.74, epoch=33, loss=0.838] 

epoch:33, idx:10799/10845, loss:0.8381155475311809, acc:0.7402777777777778


100%|██████████| 10845/10845 [40:09<00:00,  4.30it/s, acc=0.74, epoch=33, loss=0.838]


epoch:33, idx:0/1275, loss:1.6679253578186035, acc:0.75
epoch:33, idx:100/1275, loss:1.4726947422074799, acc:0.6410891089108911
epoch:33, idx:200/1275, loss:1.337192824527399, acc:0.6517412935323383
epoch:33, idx:300/1275, loss:1.2831418524152813, acc:0.6578073089700996
epoch:33, idx:400/1275, loss:1.2667751896411106, acc:0.6614713216957606
epoch:33, idx:500/1275, loss:1.2317478270111921, acc:0.6631736526946108
epoch:33, idx:600/1275, loss:1.2482345904566088, acc:0.6572379367720466
epoch:33, idx:700/1275, loss:1.2516096215615429, acc:0.6579885877318117
epoch:33, idx:800/1275, loss:1.2678296106882607, acc:0.6573033707865169
epoch:33, idx:900/1275, loss:1.2597513904714426, acc:0.660377358490566
epoch:33, idx:1000/1275, loss:1.2653892302846574, acc:0.6585914085914086
epoch:33, idx:1100/1275, loss:1.25118919825359, acc:0.6612170753860127
epoch:33, idx:1200/1275, loss:1.2495225392511544, acc:0.6590341382181515


  1%|          | 100/10845 [00:22<43:10,  4.15it/s, acc=0.693, epoch=34, loss=0.966]

epoch:34, idx:99/10845, loss:0.9663736927509308, acc:0.6925


  2%|▏         | 200/10845 [00:44<40:30,  4.38it/s, acc=0.73, epoch=34, loss=0.869] 

epoch:34, idx:199/10845, loss:0.8689402794837952, acc:0.73


  3%|▎         | 300/10845 [01:06<39:47,  4.42it/s, acc=0.743, epoch=34, loss=0.837]

epoch:34, idx:299/10845, loss:0.836852536201477, acc:0.7433333333333333


  4%|▎         | 400/10845 [01:28<38:55,  4.47it/s, acc=0.747, epoch=34, loss=0.829]

epoch:34, idx:399/10845, loss:0.8307279065251351, acc:0.74625


  5%|▍         | 500/10845 [01:50<39:00,  4.42it/s, acc=0.75, epoch=34, loss=0.82]  

epoch:34, idx:499/10845, loss:0.8201294288635254, acc:0.75


  6%|▌         | 600/10845 [02:12<36:59,  4.62it/s, acc=0.755, epoch=34, loss=0.807]

epoch:34, idx:599/10845, loss:0.806732605099678, acc:0.7545833333333334


  6%|▋         | 701/10845 [02:35<37:46,  4.48it/s, acc=0.752, epoch=34, loss=0.817]

epoch:34, idx:699/10845, loss:0.8173914960452489, acc:0.7521428571428571


  7%|▋         | 800/10845 [02:57<40:11,  4.17it/s, acc=0.752, epoch=34, loss=0.809]

epoch:34, idx:799/10845, loss:0.8094298428297043, acc:0.7521875


  8%|▊         | 900/10845 [03:18<38:19,  4.32it/s, acc=0.746, epoch=34, loss=0.819]

epoch:34, idx:899/10845, loss:0.8189883935451507, acc:0.7458333333333333


  9%|▉         | 1000/10845 [03:40<37:13,  4.41it/s, acc=0.747, epoch=34, loss=0.82]

epoch:34, idx:999/10845, loss:0.8196222077608109, acc:0.7465


 10%|█         | 1101/10845 [04:02<33:44,  4.81it/s, acc=0.75, epoch=34, loss=0.809] 

epoch:34, idx:1099/10845, loss:0.8084361895647916, acc:0.7502272727272727


 11%|█         | 1201/10845 [04:24<34:43,  4.63it/s, acc=0.752, epoch=34, loss=0.803]

epoch:34, idx:1199/10845, loss:0.8035053580005964, acc:0.751875


 12%|█▏        | 1300/10845 [04:46<33:17,  4.78it/s, acc=0.751, epoch=34, loss=0.805]

epoch:34, idx:1299/10845, loss:0.805061253951146, acc:0.7507692307692307


 13%|█▎        | 1400/10845 [05:09<34:31,  4.56it/s, acc=0.751, epoch=34, loss=0.806]

epoch:34, idx:1399/10845, loss:0.8051505702733993, acc:0.75125


 14%|█▍        | 1500/10845 [05:31<35:18,  4.41it/s, acc=0.751, epoch=34, loss=0.808]

epoch:34, idx:1499/10845, loss:0.8075625288486481, acc:0.7511666666666666


 15%|█▍        | 1600/10845 [05:53<33:22,  4.62it/s, acc=0.752, epoch=34, loss=0.807]

epoch:34, idx:1599/10845, loss:0.8076654994487762, acc:0.7515625


 16%|█▌        | 1701/10845 [06:16<32:11,  4.73it/s, acc=0.749, epoch=34, loss=0.813]

epoch:34, idx:1699/10845, loss:0.8127243267087375, acc:0.7494117647058823


 17%|█▋        | 1800/10845 [06:38<32:34,  4.63it/s, acc=0.749, epoch=34, loss=0.814]

epoch:34, idx:1799/10845, loss:0.8139463129970762, acc:0.7491666666666666


 18%|█▊        | 1901/10845 [07:00<32:27,  4.59it/s, acc=0.749, epoch=34, loss=0.819]

epoch:34, idx:1899/10845, loss:0.8190984487533569, acc:0.7485526315789474


 18%|█▊        | 2000/10845 [07:22<32:48,  4.49it/s, acc=0.748, epoch=34, loss=0.816]

epoch:34, idx:1999/10845, loss:0.8162185897827149, acc:0.74825


 19%|█▉        | 2100/10845 [07:44<30:20,  4.80it/s, acc=0.747, epoch=34, loss=0.821]

epoch:34, idx:2099/10845, loss:0.8214121039424623, acc:0.7470238095238095


 20%|██        | 2200/10845 [08:07<32:08,  4.48it/s, acc=0.746, epoch=34, loss=0.823]

epoch:34, idx:2199/10845, loss:0.8229315692457286, acc:0.74625


 21%|██        | 2301/10845 [08:29<31:38,  4.50it/s, acc=0.747, epoch=34, loss=0.821]

epoch:34, idx:2299/10845, loss:0.8205213613354642, acc:0.7466304347826087


 22%|██▏       | 2401/10845 [08:51<28:56,  4.86it/s, acc=0.746, epoch=34, loss=0.824]

epoch:34, idx:2399/10845, loss:0.8240422721455495, acc:0.7461458333333333


 23%|██▎       | 2501/10845 [09:13<28:59,  4.80it/s, acc=0.745, epoch=34, loss=0.83] 

epoch:34, idx:2499/10845, loss:0.8300163580179214, acc:0.7445


 24%|██▍       | 2600/10845 [09:36<29:52,  4.60it/s, acc=0.744, epoch=34, loss=0.831]

epoch:34, idx:2599/10845, loss:0.8307806420096985, acc:0.7440384615384615


 25%|██▍       | 2701/10845 [09:58<30:06,  4.51it/s, acc=0.745, epoch=34, loss=0.83] 

epoch:34, idx:2699/10845, loss:0.8304515991608302, acc:0.7447222222222222


 26%|██▌       | 2801/10845 [10:20<30:43,  4.36it/s, acc=0.746, epoch=34, loss=0.825]

epoch:34, idx:2799/10845, loss:0.8256137039193086, acc:0.745625


 27%|██▋       | 2900/10845 [10:42<28:17,  4.68it/s, acc=0.746, epoch=34, loss=0.827]

epoch:34, idx:2899/10845, loss:0.8273775078715949, acc:0.7456896551724138


 28%|██▊       | 3000/10845 [11:05<30:47,  4.25it/s, acc=0.745, epoch=34, loss=0.829]

epoch:34, idx:2999/10845, loss:0.8291604238549868, acc:0.74525


 29%|██▊       | 3100/10845 [11:27<28:33,  4.52it/s, acc=0.745, epoch=34, loss=0.829]

epoch:34, idx:3099/10845, loss:0.8287040707949669, acc:0.745


 30%|██▉       | 3200/10845 [11:50<28:11,  4.52it/s, acc=0.745, epoch=34, loss=0.827]

epoch:34, idx:3199/10845, loss:0.8265324559994042, acc:0.745


 30%|███       | 3301/10845 [12:12<29:16,  4.29it/s, acc=0.745, epoch=34, loss=0.824]

epoch:34, idx:3299/10845, loss:0.8241874979662173, acc:0.7448484848484849


 31%|███▏      | 3401/10845 [12:35<27:08,  4.57it/s, acc=0.745, epoch=34, loss=0.824]

epoch:34, idx:3399/10845, loss:0.824407505095005, acc:0.7447794117647059


 32%|███▏      | 3500/10845 [12:56<26:10,  4.68it/s, acc=0.744, epoch=34, loss=0.826]

epoch:34, idx:3499/10845, loss:0.8255139389719282, acc:0.7441428571428571


 33%|███▎      | 3600/10845 [13:19<26:20,  4.58it/s, acc=0.744, epoch=34, loss=0.83] 

epoch:34, idx:3599/10845, loss:0.83001995679405, acc:0.7439583333333334


 34%|███▍      | 3700/10845 [13:41<27:28,  4.33it/s, acc=0.744, epoch=34, loss=0.831]

epoch:34, idx:3699/10845, loss:0.8307544611595773, acc:0.7443243243243243


 35%|███▌      | 3800/10845 [14:03<26:51,  4.37it/s, acc=0.745, epoch=34, loss=0.829]

epoch:34, idx:3799/10845, loss:0.8293555714268434, acc:0.745


 36%|███▌      | 3900/10845 [14:25<26:01,  4.45it/s, acc=0.745, epoch=34, loss=0.831]

epoch:34, idx:3899/10845, loss:0.8308961926668118, acc:0.7446153846153846


 37%|███▋      | 4000/10845 [14:47<24:54,  4.58it/s, acc=0.744, epoch=34, loss=0.831]

epoch:34, idx:3999/10845, loss:0.8310649585425853, acc:0.7444375


 38%|███▊      | 4100/10845 [15:10<24:28,  4.59it/s, acc=0.745, epoch=34, loss=0.831]

epoch:34, idx:4099/10845, loss:0.830884209929443, acc:0.7448780487804878


 39%|███▊      | 4200/10845 [15:32<23:05,  4.80it/s, acc=0.745, epoch=34, loss=0.833]

epoch:34, idx:4199/10845, loss:0.8328879515613828, acc:0.7446428571428572


 40%|███▉      | 4301/10845 [15:54<23:07,  4.72it/s, acc=0.745, epoch=34, loss=0.833]

epoch:34, idx:4299/10845, loss:0.8325249409398368, acc:0.7446511627906977


 41%|████      | 4401/10845 [16:17<25:04,  4.28it/s, acc=0.744, epoch=34, loss=0.832]

epoch:34, idx:4399/10845, loss:0.8325406409393658, acc:0.7441477272727273


 41%|████▏     | 4500/10845 [16:39<24:00,  4.41it/s, acc=0.744, epoch=34, loss=0.832]

epoch:34, idx:4499/10845, loss:0.83191060090065, acc:0.7441666666666666


 42%|████▏     | 4600/10845 [17:01<23:26,  4.44it/s, acc=0.744, epoch=34, loss=0.835]

epoch:34, idx:4599/10845, loss:0.8350170888330626, acc:0.7439673913043479


 43%|████▎     | 4700/10845 [17:23<24:37,  4.16it/s, acc=0.744, epoch=34, loss=0.836]

epoch:34, idx:4699/10845, loss:0.8356568480298874, acc:0.7436170212765958


 44%|████▍     | 4801/10845 [17:46<21:38,  4.65it/s, acc=0.744, epoch=34, loss=0.837]

epoch:34, idx:4799/10845, loss:0.8373475759973129, acc:0.7435416666666667


 45%|████▌     | 4900/10845 [18:08<21:30,  4.61it/s, acc=0.743, epoch=34, loss=0.838]

epoch:34, idx:4899/10845, loss:0.8384319878354365, acc:0.7432142857142857


 46%|████▌     | 5000/10845 [18:30<20:49,  4.68it/s, acc=0.744, epoch=34, loss=0.836]

epoch:34, idx:4999/10845, loss:0.8355876234054566, acc:0.7437


 47%|████▋     | 5100/10845 [18:52<21:58,  4.36it/s, acc=0.744, epoch=34, loss=0.835]

epoch:34, idx:5099/10845, loss:0.8354799060728035, acc:0.743921568627451


 48%|████▊     | 5200/10845 [19:14<21:40,  4.34it/s, acc=0.744, epoch=34, loss=0.835]

epoch:34, idx:5199/10845, loss:0.8354221220887624, acc:0.74375


 49%|████▉     | 5300/10845 [19:37<20:25,  4.52it/s, acc=0.744, epoch=34, loss=0.836]

epoch:34, idx:5299/10845, loss:0.836085437423778, acc:0.7441509433962264


 50%|████▉     | 5401/10845 [20:00<19:17,  4.70it/s, acc=0.744, epoch=34, loss=0.835]

epoch:34, idx:5399/10845, loss:0.8348727914580593, acc:0.7444907407407407


 51%|█████     | 5501/10845 [20:22<18:46,  4.75it/s, acc=0.744, epoch=34, loss=0.836]

epoch:34, idx:5499/10845, loss:0.8361843585859645, acc:0.744


 52%|█████▏    | 5600/10845 [20:44<19:12,  4.55it/s, acc=0.744, epoch=34, loss=0.838]

epoch:34, idx:5599/10845, loss:0.8375505649511303, acc:0.7438839285714286


 53%|█████▎    | 5700/10845 [21:07<19:18,  4.44it/s, acc=0.744, epoch=34, loss=0.838]

epoch:34, idx:5699/10845, loss:0.8378073207432764, acc:0.7435964912280701


 53%|█████▎    | 5800/10845 [21:29<19:20,  4.35it/s, acc=0.743, epoch=34, loss=0.838]

epoch:34, idx:5799/10845, loss:0.8378972761076072, acc:0.743103448275862


 54%|█████▍    | 5900/10845 [21:52<17:55,  4.60it/s, acc=0.743, epoch=34, loss=0.84] 

epoch:34, idx:5899/10845, loss:0.8402385173106598, acc:0.7426271186440678


 55%|█████▌    | 6001/10845 [22:14<17:36,  4.58it/s, acc=0.743, epoch=34, loss=0.841]

epoch:34, idx:5999/10845, loss:0.8408545235792796, acc:0.743125


 56%|█████▋    | 6101/10845 [22:37<17:13,  4.59it/s, acc=0.743, epoch=34, loss=0.84] 

epoch:34, idx:6099/10845, loss:0.8401050644819854, acc:0.7430737704918032


 57%|█████▋    | 6201/10845 [22:59<16:01,  4.83it/s, acc=0.744, epoch=34, loss=0.839]

epoch:34, idx:6199/10845, loss:0.8388172993736882, acc:0.7437903225806451


 58%|█████▊    | 6301/10845 [23:21<17:23,  4.35it/s, acc=0.744, epoch=34, loss=0.837]

epoch:34, idx:6299/10845, loss:0.8368004549684979, acc:0.7444047619047619


 59%|█████▉    | 6400/10845 [23:43<17:24,  4.26it/s, acc=0.744, epoch=34, loss=0.835]

epoch:34, idx:6399/10845, loss:0.8351253429427743, acc:0.7444140625


 60%|█████▉    | 6500/10845 [24:06<15:59,  4.53it/s, acc=0.744, epoch=34, loss=0.837]

epoch:34, idx:6499/10845, loss:0.8365230594048133, acc:0.7442692307692308


 61%|██████    | 6600/10845 [24:28<16:31,  4.28it/s, acc=0.744, epoch=34, loss=0.836]

epoch:34, idx:6599/10845, loss:0.8357207351381128, acc:0.744469696969697


 62%|██████▏   | 6700/10845 [24:50<15:21,  4.50it/s, acc=0.744, epoch=34, loss=0.836]

epoch:34, idx:6699/10845, loss:0.8359237212387484, acc:0.7444402985074627


 63%|██████▎   | 6800/10845 [25:12<15:29,  4.35it/s, acc=0.745, epoch=34, loss=0.835]

epoch:34, idx:6799/10845, loss:0.8347972611118766, acc:0.7447794117647059


 64%|██████▎   | 6900/10845 [25:34<15:31,  4.23it/s, acc=0.745, epoch=34, loss=0.833]

epoch:34, idx:6899/10845, loss:0.8334980197747548, acc:0.7451086956521739


 65%|██████▍   | 7000/10845 [25:57<14:00,  4.57it/s, acc=0.745, epoch=34, loss=0.832]

epoch:34, idx:6999/10845, loss:0.8322265883684158, acc:0.7453571428571428


 65%|██████▌   | 7100/10845 [26:19<13:21,  4.67it/s, acc=0.745, epoch=34, loss=0.834]

epoch:34, idx:7099/10845, loss:0.8339787710720384, acc:0.7451408450704226


 66%|██████▋   | 7201/10845 [26:41<13:32,  4.48it/s, acc=0.746, epoch=34, loss=0.834]

epoch:34, idx:7199/10845, loss:0.8339100985229015, acc:0.7455208333333333


 67%|██████▋   | 7300/10845 [27:03<12:45,  4.63it/s, acc=0.745, epoch=34, loss=0.834]

epoch:34, idx:7299/10845, loss:0.8337254384772418, acc:0.7453767123287671


 68%|██████▊   | 7401/10845 [27:26<12:41,  4.52it/s, acc=0.746, epoch=34, loss=0.832]

epoch:34, idx:7399/10845, loss:0.83206359768236, acc:0.745945945945946


 69%|██████▉   | 7501/10845 [27:48<12:16,  4.54it/s, acc=0.746, epoch=34, loss=0.833]

epoch:34, idx:7499/10845, loss:0.8334960405190786, acc:0.7456


 70%|███████   | 7601/10845 [28:11<11:34,  4.67it/s, acc=0.746, epoch=34, loss=0.832]

epoch:34, idx:7599/10845, loss:0.8322070173997628, acc:0.7460526315789474


 71%|███████   | 7701/10845 [28:33<11:45,  4.45it/s, acc=0.746, epoch=34, loss=0.831]

epoch:34, idx:7699/10845, loss:0.8309678747437217, acc:0.7460064935064935


 72%|███████▏  | 7800/10845 [28:55<11:29,  4.42it/s, acc=0.747, epoch=34, loss=0.83] 

epoch:34, idx:7799/10845, loss:0.8296465543600229, acc:0.7465705128205128


 73%|███████▎  | 7901/10845 [29:17<10:17,  4.77it/s, acc=0.747, epoch=34, loss=0.829]

epoch:34, idx:7899/10845, loss:0.8289594033398205, acc:0.7467088607594937


 74%|███████▍  | 8000/10845 [29:39<10:38,  4.46it/s, acc=0.747, epoch=34, loss=0.829]

epoch:34, idx:7999/10845, loss:0.8290377564281225, acc:0.7465625


 75%|███████▍  | 8100/10845 [30:02<09:52,  4.63it/s, acc=0.747, epoch=34, loss=0.829]

epoch:34, idx:8099/10845, loss:0.8289138663845298, acc:0.7465432098765432


 76%|███████▌  | 8200/10845 [30:24<09:21,  4.71it/s, acc=0.747, epoch=34, loss=0.828]

epoch:34, idx:8199/10845, loss:0.8283647895295445, acc:0.7466158536585366


 77%|███████▋  | 8300/10845 [30:46<09:42,  4.37it/s, acc=0.746, epoch=34, loss=0.829]

epoch:34, idx:8299/10845, loss:0.8291431742547506, acc:0.746355421686747


 77%|███████▋  | 8401/10845 [31:08<08:50,  4.60it/s, acc=0.746, epoch=34, loss=0.829]

epoch:34, idx:8399/10845, loss:0.8291607679355713, acc:0.7463095238095238


 78%|███████▊  | 8500/10845 [31:30<08:39,  4.52it/s, acc=0.747, epoch=34, loss=0.827]

epoch:34, idx:8499/10845, loss:0.8270003903613371, acc:0.7467941176470588


 79%|███████▉  | 8600/10845 [31:53<07:56,  4.71it/s, acc=0.747, epoch=34, loss=0.827]

epoch:34, idx:8599/10845, loss:0.8270252391834592, acc:0.7466279069767442


 80%|████████  | 8701/10845 [32:15<08:25,  4.24it/s, acc=0.747, epoch=34, loss=0.826]

epoch:34, idx:8699/10845, loss:0.8259620371837726, acc:0.7468390804597701


 81%|████████  | 8801/10845 [32:37<07:20,  4.65it/s, acc=0.747, epoch=34, loss=0.826]

epoch:34, idx:8799/10845, loss:0.8256395210799846, acc:0.7470454545454546


 82%|████████▏ | 8900/10845 [32:59<07:14,  4.47it/s, acc=0.747, epoch=34, loss=0.826]

epoch:34, idx:8899/10845, loss:0.8259352209126012, acc:0.7471348314606742


 83%|████████▎ | 9001/10845 [33:22<06:46,  4.54it/s, acc=0.747, epoch=34, loss=0.825]

epoch:34, idx:8999/10845, loss:0.8252069500750966, acc:0.7473611111111111


 84%|████████▍ | 9101/10845 [33:44<06:17,  4.62it/s, acc=0.747, epoch=34, loss=0.825]

epoch:34, idx:9099/10845, loss:0.8245668952412658, acc:0.7474725274725275


 85%|████████▍ | 9200/10845 [34:06<06:12,  4.42it/s, acc=0.747, epoch=34, loss=0.825]

epoch:34, idx:9199/10845, loss:0.8245313103302665, acc:0.7474184782608696


 86%|████████▌ | 9300/10845 [34:28<06:13,  4.14it/s, acc=0.748, epoch=34, loss=0.822]

epoch:34, idx:9299/10845, loss:0.8224573832173502, acc:0.7479032258064516


 87%|████████▋ | 9401/10845 [34:50<04:59,  4.82it/s, acc=0.748, epoch=34, loss=0.823]

epoch:34, idx:9399/10845, loss:0.8230669418801653, acc:0.7479787234042553


 88%|████████▊ | 9501/10845 [35:13<05:08,  4.36it/s, acc=0.748, epoch=34, loss=0.823]

epoch:34, idx:9499/10845, loss:0.8232113631650021, acc:0.7480263157894737


 89%|████████▊ | 9600/10845 [35:34<04:16,  4.84it/s, acc=0.748, epoch=34, loss=0.823]

epoch:34, idx:9599/10845, loss:0.8230369969954093, acc:0.748046875


 89%|████████▉ | 9700/10845 [35:57<04:20,  4.40it/s, acc=0.748, epoch=34, loss=0.823]

epoch:34, idx:9699/10845, loss:0.8227502417810184, acc:0.7479639175257732


 90%|█████████ | 9800/10845 [36:19<03:40,  4.74it/s, acc=0.748, epoch=34, loss=0.824]

epoch:34, idx:9799/10845, loss:0.8240368129647508, acc:0.747704081632653


 91%|█████████▏| 9900/10845 [36:41<03:25,  4.59it/s, acc=0.748, epoch=34, loss=0.823]

epoch:34, idx:9899/10845, loss:0.8233933639044714, acc:0.7478535353535354


 92%|█████████▏| 10001/10845 [37:04<03:05,  4.56it/s, acc=0.748, epoch=34, loss=0.824]

epoch:34, idx:9999/10845, loss:0.8239142619490624, acc:0.747825


 93%|█████████▎| 10101/10845 [37:26<02:49,  4.38it/s, acc=0.748, epoch=34, loss=0.824]

epoch:34, idx:10099/10845, loss:0.8238171365119443, acc:0.7478465346534654


 94%|█████████▍| 10201/10845 [37:48<02:22,  4.51it/s, acc=0.748, epoch=34, loss=0.824]

epoch:34, idx:10199/10845, loss:0.8235408043627646, acc:0.7478676470588236


 95%|█████████▍| 10301/10845 [38:11<01:59,  4.55it/s, acc=0.748, epoch=34, loss=0.824]

epoch:34, idx:10299/10845, loss:0.8244404038989428, acc:0.7475970873786407


 96%|█████████▌| 10400/10845 [38:33<01:38,  4.53it/s, acc=0.748, epoch=34, loss=0.824]

epoch:34, idx:10399/10845, loss:0.8241272448232541, acc:0.7477884615384616


 97%|█████████▋| 10500/10845 [38:55<01:15,  4.58it/s, acc=0.748, epoch=34, loss=0.825]

epoch:34, idx:10499/10845, loss:0.8252465547266461, acc:0.7476666666666667


 98%|█████████▊| 10600/10845 [39:17<00:53,  4.56it/s, acc=0.747, epoch=34, loss=0.826]

epoch:34, idx:10599/10845, loss:0.826010324921248, acc:0.747311320754717


 99%|█████████▊| 10701/10845 [39:39<00:31,  4.52it/s, acc=0.747, epoch=34, loss=0.826]

epoch:34, idx:10699/10845, loss:0.8262209471753824, acc:0.7470093457943925


100%|█████████▉| 10801/10845 [40:01<00:09,  4.85it/s, acc=0.747, epoch=34, loss=0.825]

epoch:34, idx:10799/10845, loss:0.8254195366468694, acc:0.7472222222222222


100%|██████████| 10845/10845 [40:10<00:00,  4.33it/s, acc=0.747, epoch=34, loss=0.825]


epoch:34, idx:0/1275, loss:1.5109477043151855, acc:0.5
epoch:34, idx:100/1275, loss:1.5025847937801096, acc:0.6460396039603961
epoch:34, idx:200/1275, loss:1.3616319005169086, acc:0.650497512437811
epoch:34, idx:300/1275, loss:1.318762706363716, acc:0.6528239202657807
epoch:34, idx:400/1275, loss:1.2984248147046478, acc:0.6589775561097256
epoch:34, idx:500/1275, loss:1.2614988190923146, acc:0.6616766467065869
epoch:34, idx:600/1275, loss:1.2794619941473404, acc:0.6534941763727121
epoch:34, idx:700/1275, loss:1.283307333815625, acc:0.6547788873038516
epoch:34, idx:800/1275, loss:1.2991214457820268, acc:0.6538701622971286
epoch:34, idx:900/1275, loss:1.2914425886166876, acc:0.6556603773584906
epoch:34, idx:1000/1275, loss:1.2986797881531311, acc:0.6555944055944056
epoch:34, idx:1100/1275, loss:1.283064541439919, acc:0.6600817438692098
epoch:34, idx:1200/1275, loss:1.2803025938489851, acc:0.6579933388842631


  1%|          | 101/10845 [00:22<36:59,  4.84it/s, acc=0.76, epoch=35, loss=0.812]

epoch:35, idx:99/10845, loss:0.8124796783924103, acc:0.76


  2%|▏         | 200/10845 [00:44<38:05,  4.66it/s, acc=0.744, epoch=35, loss=0.844]

epoch:35, idx:199/10845, loss:0.8438994216918946, acc:0.74375


  3%|▎         | 301/10845 [01:06<37:35,  4.68it/s, acc=0.752, epoch=35, loss=0.809]

epoch:35, idx:299/10845, loss:0.8108397316932678, acc:0.7516666666666667


  4%|▎         | 401/10845 [01:28<37:40,  4.62it/s, acc=0.751, epoch=35, loss=0.801]

epoch:35, idx:399/10845, loss:0.8032612004876136, acc:0.75


  5%|▍         | 501/10845 [01:51<36:21,  4.74it/s, acc=0.751, epoch=35, loss=0.797]

epoch:35, idx:499/10845, loss:0.7978390123844147, acc:0.7505


  6%|▌         | 601/10845 [02:13<39:05,  4.37it/s, acc=0.745, epoch=35, loss=0.816]

epoch:35, idx:599/10845, loss:0.8139114695787429, acc:0.7454166666666666


  6%|▋         | 701/10845 [02:35<37:21,  4.53it/s, acc=0.747, epoch=35, loss=0.81] 

epoch:35, idx:699/10845, loss:0.8104062814371926, acc:0.7467857142857143


  7%|▋         | 800/10845 [02:57<38:58,  4.29it/s, acc=0.744, epoch=35, loss=0.81] 

epoch:35, idx:799/10845, loss:0.8097960324585438, acc:0.744375


  8%|▊         | 900/10845 [03:19<38:14,  4.33it/s, acc=0.747, epoch=35, loss=0.807]

epoch:35, idx:899/10845, loss:0.8073536549011866, acc:0.7472222222222222


  9%|▉         | 1000/10845 [03:42<36:44,  4.47it/s, acc=0.748, epoch=35, loss=0.812]

epoch:35, idx:999/10845, loss:0.812493256866932, acc:0.7475


 10%|█         | 1100/10845 [04:05<34:37,  4.69it/s, acc=0.745, epoch=35, loss=0.822]

epoch:35, idx:1099/10845, loss:0.8219265615398234, acc:0.7454545454545455


 11%|█         | 1200/10845 [04:27<34:04,  4.72it/s, acc=0.746, epoch=35, loss=0.822]

epoch:35, idx:1199/10845, loss:0.8217878988881906, acc:0.7464583333333333


 12%|█▏        | 1300/10845 [04:49<35:52,  4.43it/s, acc=0.747, epoch=35, loss=0.823]

epoch:35, idx:1299/10845, loss:0.8234155635191844, acc:0.7465384615384615


 13%|█▎        | 1400/10845 [05:11<33:19,  4.72it/s, acc=0.749, epoch=35, loss=0.814]

epoch:35, idx:1399/10845, loss:0.813678505718708, acc:0.7494642857142857


 14%|█▍        | 1501/10845 [05:33<32:15,  4.83it/s, acc=0.75, epoch=35, loss=0.814] 

epoch:35, idx:1499/10845, loss:0.8137880055507024, acc:0.7505


 15%|█▍        | 1600/10845 [05:55<33:00,  4.67it/s, acc=0.75, epoch=35, loss=0.819] 

epoch:35, idx:1599/10845, loss:0.8190474008396268, acc:0.7503125


 16%|█▌        | 1701/10845 [06:17<31:55,  4.77it/s, acc=0.751, epoch=35, loss=0.816]

epoch:35, idx:1699/10845, loss:0.8158829903251985, acc:0.7510294117647058


 17%|█▋        | 1801/10845 [06:39<32:43,  4.61it/s, acc=0.752, epoch=35, loss=0.813]

epoch:35, idx:1799/10845, loss:0.8137509844038222, acc:0.7513888888888889


 18%|█▊        | 1900/10845 [07:01<32:24,  4.60it/s, acc=0.753, epoch=35, loss=0.809]

epoch:35, idx:1899/10845, loss:0.8087075947146667, acc:0.7526315789473684


 18%|█▊        | 2000/10845 [07:24<33:33,  4.39it/s, acc=0.752, epoch=35, loss=0.811]

epoch:35, idx:1999/10845, loss:0.8110542322695256, acc:0.751875


 19%|█▉        | 2101/10845 [07:46<30:54,  4.72it/s, acc=0.751, epoch=35, loss=0.815]

epoch:35, idx:2099/10845, loss:0.8146815707286199, acc:0.7508333333333334


 20%|██        | 2201/10845 [08:08<30:37,  4.70it/s, acc=0.751, epoch=35, loss=0.815]

epoch:35, idx:2199/10845, loss:0.8143242183869536, acc:0.7514772727272727


 21%|██        | 2300/10845 [08:30<31:07,  4.58it/s, acc=0.751, epoch=35, loss=0.821]

epoch:35, idx:2299/10845, loss:0.8205886604215787, acc:0.7509782608695652


 22%|██▏       | 2400/10845 [08:52<32:16,  4.36it/s, acc=0.752, epoch=35, loss=0.818]

epoch:35, idx:2399/10845, loss:0.8180544678121805, acc:0.751875


 23%|██▎       | 2500/10845 [09:14<31:13,  4.45it/s, acc=0.752, epoch=35, loss=0.817]

epoch:35, idx:2499/10845, loss:0.8170107532262803, acc:0.7524


 24%|██▍       | 2600/10845 [09:37<31:12,  4.40it/s, acc=0.752, epoch=35, loss=0.819]

epoch:35, idx:2599/10845, loss:0.8193206936350236, acc:0.7519230769230769


 25%|██▍       | 2700/10845 [09:58<29:44,  4.56it/s, acc=0.751, epoch=35, loss=0.819]

epoch:35, idx:2699/10845, loss:0.8192850457518189, acc:0.7511111111111111


 26%|██▌       | 2800/10845 [10:20<29:37,  4.53it/s, acc=0.75, epoch=35, loss=0.822] 

epoch:35, idx:2799/10845, loss:0.8219089521467686, acc:0.75


 27%|██▋       | 2901/10845 [10:43<28:15,  4.69it/s, acc=0.749, epoch=35, loss=0.827]

epoch:35, idx:2899/10845, loss:0.8276252598392553, acc:0.7491379310344828


 28%|██▊       | 3000/10845 [11:05<29:54,  4.37it/s, acc=0.75, epoch=35, loss=0.824] 

epoch:35, idx:2999/10845, loss:0.8236085066199302, acc:0.74975


 29%|██▊       | 3100/10845 [11:27<27:17,  4.73it/s, acc=0.75, epoch=35, loss=0.823] 

epoch:35, idx:3099/10845, loss:0.8234982849897877, acc:0.7504032258064516


 30%|██▉       | 3200/10845 [11:49<28:24,  4.49it/s, acc=0.751, epoch=35, loss=0.823]

epoch:35, idx:3199/10845, loss:0.822807719502598, acc:0.750625


 30%|███       | 3301/10845 [12:11<26:14,  4.79it/s, acc=0.75, epoch=35, loss=0.825] 

epoch:35, idx:3299/10845, loss:0.824644555810726, acc:0.75


 31%|███▏      | 3401/10845 [12:33<25:21,  4.89it/s, acc=0.749, epoch=35, loss=0.826]

epoch:35, idx:3399/10845, loss:0.8254566499941489, acc:0.7492647058823529


 32%|███▏      | 3500/10845 [12:55<28:20,  4.32it/s, acc=0.75, epoch=35, loss=0.824] 

epoch:35, idx:3499/10845, loss:0.8235379881007331, acc:0.7497857142857143


 33%|███▎      | 3600/10845 [13:17<26:47,  4.51it/s, acc=0.75, epoch=35, loss=0.822]

epoch:35, idx:3599/10845, loss:0.8220239873561594, acc:0.75


 34%|███▍      | 3700/10845 [13:40<28:29,  4.18it/s, acc=0.749, epoch=35, loss=0.825]

epoch:35, idx:3699/10845, loss:0.8249590330027246, acc:0.7494594594594595


 35%|███▌      | 3801/10845 [14:02<26:27,  4.44it/s, acc=0.749, epoch=35, loss=0.826]

epoch:35, idx:3799/10845, loss:0.8257152008853461, acc:0.7490131578947369


 36%|███▌      | 3900/10845 [14:24<25:09,  4.60it/s, acc=0.75, epoch=35, loss=0.824] 

epoch:35, idx:3899/10845, loss:0.8238737547856111, acc:0.7496794871794872


 37%|███▋      | 4000/10845 [14:47<25:22,  4.50it/s, acc=0.749, epoch=35, loss=0.825]

epoch:35, idx:3999/10845, loss:0.8256284509748221, acc:0.7493125


 38%|███▊      | 4100/10845 [15:10<25:04,  4.48it/s, acc=0.749, epoch=35, loss=0.827]

epoch:35, idx:4099/10845, loss:0.8268797418983971, acc:0.7492682926829268


 39%|███▊      | 4200/10845 [15:32<24:59,  4.43it/s, acc=0.75, epoch=35, loss=0.826] 

epoch:35, idx:4199/10845, loss:0.8264942423076856, acc:0.7496428571428572


 40%|███▉      | 4300/10845 [15:54<23:29,  4.64it/s, acc=0.75, epoch=35, loss=0.824]

epoch:35, idx:4299/10845, loss:0.8244023774806843, acc:0.750406976744186


 41%|████      | 4400/10845 [16:16<23:02,  4.66it/s, acc=0.75, epoch=35, loss=0.826] 

epoch:35, idx:4399/10845, loss:0.8257850996066224, acc:0.7500568181818181


 41%|████▏     | 4500/10845 [16:39<22:20,  4.73it/s, acc=0.75, epoch=35, loss=0.826] 

epoch:35, idx:4499/10845, loss:0.8258666808737649, acc:0.7499444444444444


 42%|████▏     | 4600/10845 [17:01<25:32,  4.08it/s, acc=0.75, epoch=35, loss=0.823]

epoch:35, idx:4599/10845, loss:0.8226991003622179, acc:0.7502717391304348


 43%|████▎     | 4700/10845 [17:23<22:49,  4.49it/s, acc=0.751, epoch=35, loss=0.821]

epoch:35, idx:4699/10845, loss:0.820876523801621, acc:0.7506914893617022


 44%|████▍     | 4801/10845 [17:46<22:09,  4.55it/s, acc=0.751, epoch=35, loss=0.822]

epoch:35, idx:4799/10845, loss:0.8222626608734329, acc:0.750625


 45%|████▌     | 4901/10845 [18:08<22:27,  4.41it/s, acc=0.751, epoch=35, loss=0.821]

epoch:35, idx:4899/10845, loss:0.8213698914221355, acc:0.750765306122449


 46%|████▌     | 5000/10845 [18:30<21:15,  4.58it/s, acc=0.751, epoch=35, loss=0.822]

epoch:35, idx:4999/10845, loss:0.822288653242588, acc:0.7509


 47%|████▋     | 5100/10845 [18:52<21:06,  4.54it/s, acc=0.75, epoch=35, loss=0.823] 

epoch:35, idx:5099/10845, loss:0.823396304974369, acc:0.750343137254902


 48%|████▊     | 5200/10845 [19:15<21:04,  4.46it/s, acc=0.75, epoch=35, loss=0.823] 

epoch:35, idx:5199/10845, loss:0.82345675983108, acc:0.7502884615384615


 49%|████▉     | 5300/10845 [19:37<20:41,  4.47it/s, acc=0.75, epoch=35, loss=0.823]

epoch:35, idx:5299/10845, loss:0.8233680350263164, acc:0.7501415094339623


 50%|████▉     | 5401/10845 [20:00<19:39,  4.62it/s, acc=0.75, epoch=35, loss=0.823]

epoch:35, idx:5399/10845, loss:0.8230705987082587, acc:0.7497685185185186


 51%|█████     | 5501/10845 [20:22<19:27,  4.58it/s, acc=0.75, epoch=35, loss=0.823]

epoch:35, idx:5499/10845, loss:0.8228395540931008, acc:0.7498181818181818


 52%|█████▏    | 5600/10845 [20:45<18:41,  4.68it/s, acc=0.75, epoch=35, loss=0.821]

epoch:35, idx:5599/10845, loss:0.8214465540860381, acc:0.7500446428571429


 53%|█████▎    | 5700/10845 [21:07<20:26,  4.20it/s, acc=0.75, epoch=35, loss=0.82] 

epoch:35, idx:5699/10845, loss:0.82026479564215, acc:0.7501754385964913


 53%|█████▎    | 5800/10845 [21:29<17:32,  4.79it/s, acc=0.75, epoch=35, loss=0.821]

epoch:35, idx:5799/10845, loss:0.8214861117354755, acc:0.7498275862068966


 54%|█████▍    | 5901/10845 [21:52<17:57,  4.59it/s, acc=0.75, epoch=35, loss=0.821] 

epoch:35, idx:5899/10845, loss:0.8214508879992921, acc:0.7495338983050848


 55%|█████▌    | 6000/10845 [22:14<18:46,  4.30it/s, acc=0.748, epoch=35, loss=0.825]

epoch:35, idx:5999/10845, loss:0.8247809180816015, acc:0.7484166666666666


 56%|█████▋    | 6101/10845 [22:37<16:50,  4.69it/s, acc=0.748, epoch=35, loss=0.827]

epoch:35, idx:6099/10845, loss:0.8267259653279039, acc:0.747827868852459


 57%|█████▋    | 6201/10845 [22:59<16:57,  4.57it/s, acc=0.747, epoch=35, loss=0.829]

epoch:35, idx:6199/10845, loss:0.8288442327418635, acc:0.7471370967741936


 58%|█████▊    | 6300/10845 [23:21<16:35,  4.57it/s, acc=0.747, epoch=35, loss=0.83] 

epoch:35, idx:6299/10845, loss:0.8300173993999996, acc:0.7468650793650794


 59%|█████▉    | 6401/10845 [23:44<17:14,  4.30it/s, acc=0.748, epoch=35, loss=0.829]

epoch:35, idx:6399/10845, loss:0.8292265523690731, acc:0.7475


 60%|█████▉    | 6500/10845 [24:06<16:38,  4.35it/s, acc=0.747, epoch=35, loss=0.829]

epoch:35, idx:6499/10845, loss:0.8291134920762135, acc:0.7473076923076923


 61%|██████    | 6601/10845 [24:29<15:07,  4.68it/s, acc=0.748, epoch=35, loss=0.829]

epoch:35, idx:6599/10845, loss:0.8286351932991635, acc:0.7476136363636363


 62%|██████▏   | 6701/10845 [24:51<14:01,  4.93it/s, acc=0.747, epoch=35, loss=0.828]

epoch:35, idx:6699/10845, loss:0.8282351235400385, acc:0.7473134328358209


 63%|██████▎   | 6800/10845 [25:13<15:38,  4.31it/s, acc=0.747, epoch=35, loss=0.827]

epoch:35, idx:6799/10845, loss:0.8274693641504821, acc:0.747389705882353


 64%|██████▎   | 6901/10845 [25:36<14:39,  4.49it/s, acc=0.747, epoch=35, loss=0.828]

epoch:35, idx:6899/10845, loss:0.8283520581152128, acc:0.7468840579710145


 65%|██████▍   | 7000/10845 [25:58<13:59,  4.58it/s, acc=0.747, epoch=35, loss=0.829]

epoch:35, idx:6999/10845, loss:0.8294724292499679, acc:0.7467142857142857


 65%|██████▌   | 7100/10845 [26:21<14:21,  4.35it/s, acc=0.746, epoch=35, loss=0.83] 

epoch:35, idx:7099/10845, loss:0.8299237755066912, acc:0.746443661971831


 66%|██████▋   | 7201/10845 [26:43<12:59,  4.68it/s, acc=0.747, epoch=35, loss=0.829]

epoch:35, idx:7199/10845, loss:0.8284144372161891, acc:0.7468402777777777


 67%|██████▋   | 7300/10845 [27:05<13:15,  4.45it/s, acc=0.746, epoch=35, loss=0.83] 

epoch:35, idx:7299/10845, loss:0.8296180942695435, acc:0.746027397260274


 68%|██████▊   | 7401/10845 [27:27<12:03,  4.76it/s, acc=0.746, epoch=35, loss=0.83] 

epoch:35, idx:7399/10845, loss:0.8301471413228962, acc:0.7460472972972974


 69%|██████▉   | 7501/10845 [27:50<12:07,  4.59it/s, acc=0.746, epoch=35, loss=0.831]

epoch:35, idx:7499/10845, loss:0.8307976571957271, acc:0.7459


 70%|███████   | 7600/10845 [28:12<12:21,  4.38it/s, acc=0.746, epoch=35, loss=0.83] 

epoch:35, idx:7599/10845, loss:0.82994969269947, acc:0.7458552631578947


 71%|███████   | 7701/10845 [28:35<11:13,  4.67it/s, acc=0.746, epoch=35, loss=0.831]

epoch:35, idx:7699/10845, loss:0.8306699185479771, acc:0.7458441558441559


 72%|███████▏  | 7801/10845 [28:57<11:48,  4.29it/s, acc=0.746, epoch=35, loss=0.83] 

epoch:35, idx:7799/10845, loss:0.8296820821059055, acc:0.7460576923076923


 73%|███████▎  | 7900/10845 [29:20<11:15,  4.36it/s, acc=0.747, epoch=35, loss=0.828]

epoch:35, idx:7899/10845, loss:0.8284243438968175, acc:0.7465506329113925


 74%|███████▍  | 8000/10845 [29:41<10:44,  4.42it/s, acc=0.747, epoch=35, loss=0.828]

epoch:35, idx:7999/10845, loss:0.8280123478323221, acc:0.7468125


 75%|███████▍  | 8100/10845 [30:04<10:18,  4.44it/s, acc=0.747, epoch=35, loss=0.829]

epoch:35, idx:8099/10845, loss:0.8288381866172508, acc:0.7467592592592592


 76%|███████▌  | 8201/10845 [30:26<09:25,  4.68it/s, acc=0.747, epoch=35, loss=0.828]

epoch:35, idx:8199/10845, loss:0.8285453165158992, acc:0.7470731707317073


 77%|███████▋  | 8300/10845 [30:49<10:00,  4.24it/s, acc=0.747, epoch=35, loss=0.829]

epoch:35, idx:8299/10845, loss:0.828924734319549, acc:0.7465361445783133


 77%|███████▋  | 8400/10845 [31:11<09:06,  4.47it/s, acc=0.747, epoch=35, loss=0.829]

epoch:35, idx:8399/10845, loss:0.8286218475869723, acc:0.7466964285714286


 78%|███████▊  | 8500/10845 [31:34<08:25,  4.64it/s, acc=0.747, epoch=35, loss=0.829]

epoch:35, idx:8499/10845, loss:0.8286819916893454, acc:0.7465588235294117


 79%|███████▉  | 8600/10845 [31:56<08:30,  4.40it/s, acc=0.747, epoch=35, loss=0.827]

epoch:35, idx:8599/10845, loss:0.8266830800300421, acc:0.746860465116279


 80%|████████  | 8700/10845 [32:19<07:42,  4.64it/s, acc=0.747, epoch=35, loss=0.827]

epoch:35, idx:8699/10845, loss:0.8271782403704764, acc:0.7467816091954023


 81%|████████  | 8800/10845 [32:41<07:33,  4.51it/s, acc=0.747, epoch=35, loss=0.827]

epoch:35, idx:8799/10845, loss:0.8267073226923293, acc:0.7471022727272727


 82%|████████▏ | 8900/10845 [33:03<07:11,  4.51it/s, acc=0.747, epoch=35, loss=0.826]

epoch:35, idx:8899/10845, loss:0.8265200420577874, acc:0.7471629213483146


 83%|████████▎ | 9000/10845 [33:25<06:52,  4.48it/s, acc=0.747, epoch=35, loss=0.828]

epoch:35, idx:8999/10845, loss:0.827965655154652, acc:0.7469444444444444


 84%|████████▍ | 9100/10845 [33:47<06:42,  4.34it/s, acc=0.747, epoch=35, loss=0.83] 

epoch:35, idx:9099/10845, loss:0.829564412183814, acc:0.7467032967032967


 85%|████████▍ | 9200/10845 [34:10<05:58,  4.59it/s, acc=0.747, epoch=35, loss=0.83] 

epoch:35, idx:9199/10845, loss:0.8295220031492088, acc:0.7466847826086956


 86%|████████▌ | 9300/10845 [34:32<05:36,  4.59it/s, acc=0.746, epoch=35, loss=0.83]

epoch:35, idx:9299/10845, loss:0.8299422008440058, acc:0.7464516129032258


 87%|████████▋ | 9400/10845 [34:54<05:10,  4.65it/s, acc=0.746, epoch=35, loss=0.83] 

epoch:35, idx:9399/10845, loss:0.829620019355987, acc:0.7461968085106383


 88%|████████▊ | 9501/10845 [35:17<05:05,  4.40it/s, acc=0.746, epoch=35, loss=0.83] 

epoch:35, idx:9499/10845, loss:0.8297401623412183, acc:0.7460263157894736


 89%|████████▊ | 9600/10845 [35:39<04:25,  4.70it/s, acc=0.746, epoch=35, loss=0.83] 

epoch:35, idx:9599/10845, loss:0.8297676565684378, acc:0.746015625


 89%|████████▉ | 9701/10845 [36:01<03:57,  4.81it/s, acc=0.746, epoch=35, loss=0.83] 

epoch:35, idx:9699/10845, loss:0.8304233312545364, acc:0.746159793814433


 90%|█████████ | 9800/10845 [36:24<04:12,  4.14it/s, acc=0.746, epoch=35, loss=0.831]

epoch:35, idx:9799/10845, loss:0.8314374142094534, acc:0.7459438775510204


 91%|█████████▏| 9900/10845 [36:46<03:25,  4.60it/s, acc=0.746, epoch=35, loss=0.831]

epoch:35, idx:9899/10845, loss:0.8311726202326591, acc:0.7462121212121212


 92%|█████████▏| 10000/10845 [37:08<03:07,  4.51it/s, acc=0.746, epoch=35, loss=0.833]

epoch:35, idx:9999/10845, loss:0.8327230561196804, acc:0.7461


 93%|█████████▎| 10101/10845 [37:30<02:42,  4.56it/s, acc=0.746, epoch=35, loss=0.833]

epoch:35, idx:10099/10845, loss:0.8332195926833861, acc:0.745990099009901


 94%|█████████▍| 10200/10845 [37:52<02:27,  4.37it/s, acc=0.746, epoch=35, loss=0.834]

epoch:35, idx:10199/10845, loss:0.8337969054836852, acc:0.745686274509804


 95%|█████████▍| 10300/10845 [38:15<01:59,  4.56it/s, acc=0.746, epoch=35, loss=0.834]

epoch:35, idx:10299/10845, loss:0.8341956492361513, acc:0.7455339805825243


 96%|█████████▌| 10400/10845 [38:37<01:34,  4.73it/s, acc=0.746, epoch=35, loss=0.834]

epoch:35, idx:10399/10845, loss:0.8341344520965448, acc:0.745576923076923


 97%|█████████▋| 10500/10845 [38:59<01:13,  4.69it/s, acc=0.746, epoch=35, loss=0.833]

epoch:35, idx:10499/10845, loss:0.8331612770387105, acc:0.7456428571428572


 98%|█████████▊| 10600/10845 [39:21<00:53,  4.59it/s, acc=0.746, epoch=35, loss=0.833]

epoch:35, idx:10599/10845, loss:0.8329581024343112, acc:0.7458490566037735


 99%|█████████▊| 10700/10845 [39:43<00:31,  4.54it/s, acc=0.746, epoch=35, loss=0.833]

epoch:35, idx:10699/10845, loss:0.8330401409953554, acc:0.7457242990654206


100%|█████████▉| 10801/10845 [40:05<00:09,  4.88it/s, acc=0.746, epoch=35, loss=0.832]

epoch:35, idx:10799/10845, loss:0.8315578752369792, acc:0.7459722222222223


100%|██████████| 10845/10845 [40:15<00:00,  4.64it/s, acc=0.746, epoch=35, loss=0.832]


epoch:35, idx:0/1275, loss:1.6309137344360352, acc:0.5
epoch:35, idx:100/1275, loss:1.4883074288321014, acc:0.655940594059406
epoch:35, idx:200/1275, loss:1.3492509612989663, acc:0.6554726368159204
epoch:35, idx:300/1275, loss:1.302386195160622, acc:0.6586378737541528
epoch:35, idx:400/1275, loss:1.2767830896853212, acc:0.6645885286783042
epoch:35, idx:500/1275, loss:1.2421080140534513, acc:0.6656686626746507
epoch:35, idx:600/1275, loss:1.2594444291563875, acc:0.6572379367720466
epoch:35, idx:700/1275, loss:1.2619300551829427, acc:0.6562054208273894
epoch:35, idx:800/1275, loss:1.2755696623215218, acc:0.6544943820224719
epoch:35, idx:900/1275, loss:1.2683643864738559, acc:0.658157602663707
epoch:35, idx:1000/1275, loss:1.273582863045501, acc:0.6580919080919081
epoch:35, idx:1100/1275, loss:1.258361762161151, acc:0.6628065395095368
epoch:35, idx:1200/1275, loss:1.2570512547878103, acc:0.6609075770191507


  1%|          | 100/10845 [00:22<38:59,  4.59it/s, acc=0.755, epoch=36, loss=0.833]

epoch:36, idx:99/10845, loss:0.8329096341133118, acc:0.755


  2%|▏         | 201/10845 [00:44<38:48,  4.57it/s, acc=0.752, epoch=36, loss=0.808]

epoch:36, idx:199/10845, loss:0.8118999820947647, acc:0.75125


  3%|▎         | 301/10845 [01:06<37:39,  4.67it/s, acc=0.74, epoch=36, loss=0.825] 

epoch:36, idx:299/10845, loss:0.8270494329929352, acc:0.7391666666666666


  4%|▎         | 400/10845 [01:28<40:46,  4.27it/s, acc=0.736, epoch=36, loss=0.854]

epoch:36, idx:399/10845, loss:0.8537858176231384, acc:0.73625


  5%|▍         | 501/10845 [01:51<36:42,  4.70it/s, acc=0.737, epoch=36, loss=0.843]

epoch:36, idx:499/10845, loss:0.8419457831382752, acc:0.737


  6%|▌         | 600/10845 [02:12<35:49,  4.77it/s, acc=0.74, epoch=36, loss=0.83]  

epoch:36, idx:599/10845, loss:0.8300139464934667, acc:0.7395833333333334


  6%|▋         | 701/10845 [02:35<37:18,  4.53it/s, acc=0.744, epoch=36, loss=0.811]

epoch:36, idx:699/10845, loss:0.8112911038739341, acc:0.7435714285714285


  7%|▋         | 800/10845 [02:56<36:18,  4.61it/s, acc=0.743, epoch=36, loss=0.816]

epoch:36, idx:799/10845, loss:0.8155020643770695, acc:0.7425


  8%|▊         | 901/10845 [03:19<35:23,  4.68it/s, acc=0.741, epoch=36, loss=0.82] 

epoch:36, idx:899/10845, loss:0.8210897052950329, acc:0.7408333333333333


  9%|▉         | 1000/10845 [03:41<36:00,  4.56it/s, acc=0.744, epoch=36, loss=0.808]

epoch:36, idx:999/10845, loss:0.8075102272629738, acc:0.74425


 10%|█         | 1100/10845 [04:03<35:46,  4.54it/s, acc=0.746, epoch=36, loss=0.806]

epoch:36, idx:1099/10845, loss:0.8059603987498717, acc:0.7456818181818182


 11%|█         | 1200/10845 [04:26<39:54,  4.03it/s, acc=0.751, epoch=36, loss=0.794]

epoch:36, idx:1199/10845, loss:0.7941087438166141, acc:0.7508333333333334


 12%|█▏        | 1300/10845 [04:48<33:04,  4.81it/s, acc=0.753, epoch=36, loss=0.793]

epoch:36, idx:1299/10845, loss:0.793136136669379, acc:0.7532692307692308


 13%|█▎        | 1401/10845 [05:11<33:39,  4.68it/s, acc=0.753, epoch=36, loss=0.797]

epoch:36, idx:1399/10845, loss:0.7979741960338184, acc:0.7525


 14%|█▍        | 1500/10845 [05:33<34:58,  4.45it/s, acc=0.754, epoch=36, loss=0.795]

epoch:36, idx:1499/10845, loss:0.7948709805409113, acc:0.7536666666666667


 15%|█▍        | 1600/10845 [05:55<33:13,  4.64it/s, acc=0.754, epoch=36, loss=0.792]

epoch:36, idx:1599/10845, loss:0.7918731492385268, acc:0.754375


 16%|█▌        | 1700/10845 [06:17<33:13,  4.59it/s, acc=0.754, epoch=36, loss=0.798]

epoch:36, idx:1699/10845, loss:0.7981559325316373, acc:0.7535294117647059


 17%|█▋        | 1800/10845 [06:39<33:46,  4.46it/s, acc=0.753, epoch=36, loss=0.796]

epoch:36, idx:1799/10845, loss:0.7961910372972488, acc:0.7533333333333333


 18%|█▊        | 1901/10845 [07:02<34:16,  4.35it/s, acc=0.753, epoch=36, loss=0.8]  

epoch:36, idx:1899/10845, loss:0.8005076592219503, acc:0.7531578947368421


 18%|█▊        | 2000/10845 [07:24<31:21,  4.70it/s, acc=0.753, epoch=36, loss=0.797]

epoch:36, idx:1999/10845, loss:0.796635467350483, acc:0.75325


 19%|█▉        | 2100/10845 [07:46<32:42,  4.46it/s, acc=0.753, epoch=36, loss=0.796]

epoch:36, idx:2099/10845, loss:0.7959689978474662, acc:0.7533333333333333


 20%|██        | 2200/10845 [08:08<31:49,  4.53it/s, acc=0.753, epoch=36, loss=0.799]

epoch:36, idx:2199/10845, loss:0.7986091136119583, acc:0.7530681818181818


 21%|██        | 2300/10845 [08:31<30:51,  4.62it/s, acc=0.753, epoch=36, loss=0.802]

epoch:36, idx:2299/10845, loss:0.8019251139008481, acc:0.7528260869565218


 22%|██▏       | 2400/10845 [08:53<31:50,  4.42it/s, acc=0.752, epoch=36, loss=0.803]

epoch:36, idx:2399/10845, loss:0.8034882870564858, acc:0.751875


 23%|██▎       | 2500/10845 [09:15<29:48,  4.67it/s, acc=0.751, epoch=36, loss=0.81] 

epoch:36, idx:2499/10845, loss:0.8095155108690262, acc:0.7507


 24%|██▍       | 2600/10845 [09:37<29:53,  4.60it/s, acc=0.751, epoch=36, loss=0.809]

epoch:36, idx:2599/10845, loss:0.8091837322024199, acc:0.7514423076923077


 25%|██▍       | 2700/10845 [10:00<29:29,  4.60it/s, acc=0.751, epoch=36, loss=0.81] 

epoch:36, idx:2699/10845, loss:0.8103568944886879, acc:0.7508333333333334


 26%|██▌       | 2800/10845 [10:22<29:13,  4.59it/s, acc=0.75, epoch=36, loss=0.815] 

epoch:36, idx:2799/10845, loss:0.8146364530708109, acc:0.75


 27%|██▋       | 2900/10845 [10:44<27:01,  4.90it/s, acc=0.749, epoch=36, loss=0.815]

epoch:36, idx:2899/10845, loss:0.8145174596021916, acc:0.7493965517241379


 28%|██▊       | 3001/10845 [11:07<29:36,  4.41it/s, acc=0.749, epoch=36, loss=0.816]

epoch:36, idx:2999/10845, loss:0.8159565563400586, acc:0.749


 29%|██▊       | 3100/10845 [11:29<26:56,  4.79it/s, acc=0.75, epoch=36, loss=0.814] 

epoch:36, idx:3099/10845, loss:0.8137488394398843, acc:0.7495161290322581


 30%|██▉       | 3200/10845 [11:51<28:01,  4.55it/s, acc=0.749, epoch=36, loss=0.813]

epoch:36, idx:3199/10845, loss:0.8125391518324614, acc:0.749453125


 30%|███       | 3300/10845 [12:14<28:27,  4.42it/s, acc=0.75, epoch=36, loss=0.812] 

epoch:36, idx:3299/10845, loss:0.8118690880139668, acc:0.75


 31%|███▏      | 3401/10845 [12:36<26:14,  4.73it/s, acc=0.75, epoch=36, loss=0.811]

epoch:36, idx:3399/10845, loss:0.8110692823634428, acc:0.7499264705882352


 32%|███▏      | 3501/10845 [12:59<27:01,  4.53it/s, acc=0.75, epoch=36, loss=0.812] 

epoch:36, idx:3499/10845, loss:0.8121291377203805, acc:0.7503571428571428


 33%|███▎      | 3600/10845 [13:21<26:54,  4.49it/s, acc=0.751, epoch=36, loss=0.811]

epoch:36, idx:3599/10845, loss:0.8108427584833569, acc:0.7505555555555555


 34%|███▍      | 3700/10845 [13:43<27:27,  4.34it/s, acc=0.751, epoch=36, loss=0.812]

epoch:36, idx:3699/10845, loss:0.8124784520832268, acc:0.7506756756756757


 35%|███▌      | 3800/10845 [14:05<26:07,  4.50it/s, acc=0.751, epoch=36, loss=0.811]

epoch:36, idx:3799/10845, loss:0.8113361630000566, acc:0.7513815789473685


 36%|███▌      | 3900/10845 [14:27<26:30,  4.37it/s, acc=0.751, epoch=36, loss=0.812]

epoch:36, idx:3899/10845, loss:0.8123438927760491, acc:0.7512820512820513


 37%|███▋      | 4000/10845 [14:50<26:40,  4.28it/s, acc=0.751, epoch=36, loss=0.813]

epoch:36, idx:3999/10845, loss:0.8125107451379299, acc:0.751375


 38%|███▊      | 4100/10845 [15:12<24:32,  4.58it/s, acc=0.751, epoch=36, loss=0.812]

epoch:36, idx:4099/10845, loss:0.8117940399414155, acc:0.7514634146341463


 39%|███▊      | 4200/10845 [15:34<24:11,  4.58it/s, acc=0.751, epoch=36, loss=0.812]

epoch:36, idx:4199/10845, loss:0.8124215648287818, acc:0.7508333333333334


 40%|███▉      | 4300/10845 [15:57<22:47,  4.79it/s, acc=0.751, epoch=36, loss=0.811]

epoch:36, idx:4299/10845, loss:0.8107824451978817, acc:0.7511627906976744


 41%|████      | 4401/10845 [16:19<23:42,  4.53it/s, acc=0.751, epoch=36, loss=0.813]

epoch:36, idx:4399/10845, loss:0.8129493203217333, acc:0.7505681818181819


 41%|████▏     | 4500/10845 [16:41<22:48,  4.64it/s, acc=0.751, epoch=36, loss=0.811]

epoch:36, idx:4499/10845, loss:0.8110013311704, acc:0.7506111111111111


 42%|████▏     | 4600/10845 [17:03<21:46,  4.78it/s, acc=0.751, epoch=36, loss=0.81] 

epoch:36, idx:4599/10845, loss:0.8100351188882537, acc:0.7509782608695652


 43%|████▎     | 4700/10845 [17:25<22:39,  4.52it/s, acc=0.751, epoch=36, loss=0.808]

epoch:36, idx:4699/10845, loss:0.808323457431286, acc:0.7513829787234042


 44%|████▍     | 4801/10845 [17:47<22:40,  4.44it/s, acc=0.751, epoch=36, loss=0.809]

epoch:36, idx:4799/10845, loss:0.8089179078365366, acc:0.7513020833333334


 45%|████▌     | 4900/10845 [18:09<22:14,  4.46it/s, acc=0.751, epoch=36, loss=0.808]

epoch:36, idx:4899/10845, loss:0.8077411522549026, acc:0.7514285714285714


 46%|████▌     | 5000/10845 [18:31<21:07,  4.61it/s, acc=0.752, epoch=36, loss=0.806]

epoch:36, idx:4999/10845, loss:0.8057393200278282, acc:0.7522


 47%|████▋     | 5100/10845 [18:54<21:59,  4.35it/s, acc=0.752, epoch=36, loss=0.805]

epoch:36, idx:5099/10845, loss:0.805158580179308, acc:0.7523529411764706


 48%|████▊     | 5200/10845 [19:16<20:23,  4.61it/s, acc=0.752, epoch=36, loss=0.804]

epoch:36, idx:5199/10845, loss:0.8044816551873317, acc:0.7523557692307692


 49%|████▉     | 5300/10845 [19:38<19:07,  4.83it/s, acc=0.752, epoch=36, loss=0.805]

epoch:36, idx:5299/10845, loss:0.8054130273832465, acc:0.7517924528301887


 50%|████▉     | 5400/10845 [20:00<20:25,  4.44it/s, acc=0.751, epoch=36, loss=0.808]

epoch:36, idx:5399/10845, loss:0.8078702944185999, acc:0.7514351851851852


 51%|█████     | 5500/10845 [20:23<19:26,  4.58it/s, acc=0.751, epoch=36, loss=0.807]

epoch:36, idx:5499/10845, loss:0.8068340388536454, acc:0.7514545454545455


 52%|█████▏    | 5600/10845 [20:45<19:55,  4.39it/s, acc=0.752, epoch=36, loss=0.806]

epoch:36, idx:5599/10845, loss:0.8059749884158373, acc:0.7521428571428571


 53%|█████▎    | 5700/10845 [21:07<20:10,  4.25it/s, acc=0.752, epoch=36, loss=0.807]

epoch:36, idx:5699/10845, loss:0.806744110594716, acc:0.7520614035087719


 53%|█████▎    | 5801/10845 [21:29<19:18,  4.35it/s, acc=0.751, epoch=36, loss=0.808]

epoch:36, idx:5799/10845, loss:0.8083534314406329, acc:0.75125


 54%|█████▍    | 5901/10845 [21:52<18:02,  4.57it/s, acc=0.751, epoch=36, loss=0.807]

epoch:36, idx:5899/10845, loss:0.8070726172701788, acc:0.751228813559322


 55%|█████▌    | 6000/10845 [22:13<17:39,  4.57it/s, acc=0.751, epoch=36, loss=0.809]

epoch:36, idx:5999/10845, loss:0.8090912102758885, acc:0.750625


 56%|█████▋    | 6101/10845 [22:36<16:42,  4.73it/s, acc=0.751, epoch=36, loss=0.808]

epoch:36, idx:6099/10845, loss:0.807648769900447, acc:0.7505737704918033


 57%|█████▋    | 6201/10845 [22:58<16:55,  4.57it/s, acc=0.751, epoch=36, loss=0.809]

epoch:36, idx:6199/10845, loss:0.8092124696989214, acc:0.7505241935483871


 58%|█████▊    | 6301/10845 [23:20<16:53,  4.48it/s, acc=0.75, epoch=36, loss=0.81]  

epoch:36, idx:6299/10845, loss:0.8104776067298556, acc:0.75


 59%|█████▉    | 6400/10845 [23:42<15:36,  4.75it/s, acc=0.751, epoch=36, loss=0.809]

epoch:36, idx:6399/10845, loss:0.8084815898258239, acc:0.7507421875


 60%|█████▉    | 6501/10845 [24:04<15:18,  4.73it/s, acc=0.751, epoch=36, loss=0.808]

epoch:36, idx:6499/10845, loss:0.8075857703960859, acc:0.7509230769230769


 61%|██████    | 6601/10845 [24:27<15:21,  4.61it/s, acc=0.751, epoch=36, loss=0.807]

epoch:36, idx:6599/10845, loss:0.8067355890617226, acc:0.7509469696969697


 62%|██████▏   | 6700/10845 [24:49<15:34,  4.43it/s, acc=0.75, epoch=36, loss=0.808] 

epoch:36, idx:6699/10845, loss:0.8075213790028842, acc:0.7504850746268656


 63%|██████▎   | 6801/10845 [25:11<13:51,  4.86it/s, acc=0.75, epoch=36, loss=0.808] 

epoch:36, idx:6799/10845, loss:0.8082238144471365, acc:0.7500735294117648


 64%|██████▎   | 6900/10845 [25:33<14:23,  4.57it/s, acc=0.751, epoch=36, loss=0.808]

epoch:36, idx:6899/10845, loss:0.807766312006591, acc:0.7505434782608695


 65%|██████▍   | 7000/10845 [25:56<12:49,  5.00it/s, acc=0.75, epoch=36, loss=0.809] 

epoch:36, idx:6999/10845, loss:0.8093654648150717, acc:0.7500714285714286


 65%|██████▌   | 7100/10845 [26:18<13:34,  4.60it/s, acc=0.751, epoch=36, loss=0.807]

epoch:36, idx:7099/10845, loss:0.8073335589909217, acc:0.7506338028169014


 66%|██████▋   | 7200/10845 [26:40<14:10,  4.29it/s, acc=0.75, epoch=36, loss=0.809] 

epoch:36, idx:7199/10845, loss:0.8089885759105285, acc:0.7500694444444445


 67%|██████▋   | 7301/10845 [27:03<12:44,  4.64it/s, acc=0.75, epoch=36, loss=0.809]

epoch:36, idx:7299/10845, loss:0.8091616821534013, acc:0.7501712328767123


 68%|██████▊   | 7400/10845 [27:25<12:48,  4.48it/s, acc=0.75, epoch=36, loss=0.81] 

epoch:36, idx:7399/10845, loss:0.8099419295385077, acc:0.7498648648648648


 69%|██████▉   | 7501/10845 [27:47<12:17,  4.53it/s, acc=0.75, epoch=36, loss=0.811]

epoch:36, idx:7499/10845, loss:0.8107448587814967, acc:0.7499333333333333


 70%|███████   | 7600/10845 [28:10<12:03,  4.48it/s, acc=0.75, epoch=36, loss=0.811]

epoch:36, idx:7599/10845, loss:0.811060355407627, acc:0.7495394736842105


 71%|███████   | 7700/10845 [28:32<10:59,  4.77it/s, acc=0.75, epoch=36, loss=0.812]

epoch:36, idx:7699/10845, loss:0.8116017684766225, acc:0.7496103896103896


 72%|███████▏  | 7800/10845 [28:55<10:45,  4.72it/s, acc=0.749, epoch=36, loss=0.813]

epoch:36, idx:7799/10845, loss:0.8131216232058329, acc:0.7494551282051282


 73%|███████▎  | 7901/10845 [29:17<10:31,  4.66it/s, acc=0.75, epoch=36, loss=0.813] 

epoch:36, idx:7899/10845, loss:0.8133282584253746, acc:0.749746835443038


 74%|███████▍  | 8000/10845 [29:39<10:41,  4.44it/s, acc=0.75, epoch=36, loss=0.812]

epoch:36, idx:7999/10845, loss:0.8123716046288609, acc:0.7500625


 75%|███████▍  | 8100/10845 [30:01<10:23,  4.40it/s, acc=0.75, epoch=36, loss=0.813]

epoch:36, idx:8099/10845, loss:0.8127048551374012, acc:0.750216049382716


 76%|███████▌  | 8200/10845 [30:24<09:33,  4.61it/s, acc=0.75, epoch=36, loss=0.813]

epoch:36, idx:8199/10845, loss:0.8134645814357734, acc:0.7500914634146342


 77%|███████▋  | 8301/10845 [30:46<09:10,  4.62it/s, acc=0.75, epoch=36, loss=0.814]

epoch:36, idx:8299/10845, loss:0.8138577049683375, acc:0.7498192771084338


 77%|███████▋  | 8401/10845 [31:08<08:37,  4.72it/s, acc=0.75, epoch=36, loss=0.814]

epoch:36, idx:8399/10845, loss:0.8139433182775975, acc:0.7501190476190476


 78%|███████▊  | 8500/10845 [31:31<08:40,  4.51it/s, acc=0.75, epoch=36, loss=0.814]

epoch:36, idx:8499/10845, loss:0.814087410681388, acc:0.7502058823529412


 79%|███████▉  | 8600/10845 [31:53<08:40,  4.31it/s, acc=0.75, epoch=36, loss=0.813]

epoch:36, idx:8599/10845, loss:0.8133964673377747, acc:0.7503488372093023


 80%|████████  | 8701/10845 [32:15<07:54,  4.52it/s, acc=0.75, epoch=36, loss=0.813] 

epoch:36, idx:8699/10845, loss:0.813269657698171, acc:0.7503448275862069


 81%|████████  | 8800/10845 [32:37<07:27,  4.57it/s, acc=0.75, epoch=36, loss=0.814]

epoch:36, idx:8799/10845, loss:0.8135375150699508, acc:0.7504261363636363


 82%|████████▏ | 8901/10845 [33:00<07:01,  4.61it/s, acc=0.75, epoch=36, loss=0.813] 

epoch:36, idx:8899/10845, loss:0.8133812369322508, acc:0.7504213483146067


 83%|████████▎ | 9000/10845 [33:22<06:45,  4.55it/s, acc=0.75, epoch=36, loss=0.813] 

epoch:36, idx:8999/10845, loss:0.812844052626027, acc:0.7503333333333333


 84%|████████▍ | 9100/10845 [33:44<06:27,  4.51it/s, acc=0.75, epoch=36, loss=0.813]

epoch:36, idx:9099/10845, loss:0.8127952507825998, acc:0.7504120879120879


 85%|████████▍ | 9200/10845 [34:06<05:53,  4.65it/s, acc=0.75, epoch=36, loss=0.814]

epoch:36, idx:9199/10845, loss:0.8137597986148751, acc:0.7503260869565217


 86%|████████▌ | 9300/10845 [34:29<05:51,  4.40it/s, acc=0.75, epoch=36, loss=0.814]

epoch:36, idx:9299/10845, loss:0.8142402674305823, acc:0.7500537634408602


 87%|████████▋ | 9400/10845 [34:51<05:13,  4.61it/s, acc=0.75, epoch=36, loss=0.814]

epoch:36, idx:9399/10845, loss:0.8141010066296192, acc:0.7498936170212765


 88%|████████▊ | 9500/10845 [35:13<05:18,  4.22it/s, acc=0.749, epoch=36, loss=0.816]

epoch:36, idx:9499/10845, loss:0.8162750749337045, acc:0.749421052631579


 89%|████████▊ | 9600/10845 [35:36<04:31,  4.59it/s, acc=0.749, epoch=36, loss=0.816]

epoch:36, idx:9599/10845, loss:0.8162126083485782, acc:0.7493489583333334


 89%|████████▉ | 9701/10845 [35:58<04:15,  4.48it/s, acc=0.749, epoch=36, loss=0.817]

epoch:36, idx:9699/10845, loss:0.8170240069480287, acc:0.7493041237113403


 90%|█████████ | 9801/10845 [36:20<03:35,  4.85it/s, acc=0.749, epoch=36, loss=0.817]

epoch:36, idx:9799/10845, loss:0.8174776780179569, acc:0.749234693877551


 91%|█████████▏| 9900/10845 [36:43<03:41,  4.27it/s, acc=0.749, epoch=36, loss=0.817]

epoch:36, idx:9899/10845, loss:0.8173581084879962, acc:0.7490909090909091


 92%|█████████▏| 10000/10845 [37:05<03:05,  4.55it/s, acc=0.749, epoch=36, loss=0.818]

epoch:36, idx:9999/10845, loss:0.8184259467661381, acc:0.74895


 93%|█████████▎| 10101/10845 [37:27<02:33,  4.83it/s, acc=0.749, epoch=36, loss=0.819]

epoch:36, idx:10099/10845, loss:0.8190386956458044, acc:0.748960396039604


 94%|█████████▍| 10200/10845 [37:49<02:18,  4.66it/s, acc=0.749, epoch=36, loss=0.819]

epoch:36, idx:10199/10845, loss:0.819220572412014, acc:0.7489950980392157


 95%|█████████▍| 10300/10845 [38:11<02:07,  4.27it/s, acc=0.749, epoch=36, loss=0.819]

epoch:36, idx:10299/10845, loss:0.8185586285648994, acc:0.7491504854368932


 96%|█████████▌| 10400/10845 [38:34<01:41,  4.39it/s, acc=0.749, epoch=36, loss=0.819]

epoch:36, idx:10399/10845, loss:0.8185960588604212, acc:0.7494951923076923


 97%|█████████▋| 10501/10845 [38:56<01:13,  4.67it/s, acc=0.749, epoch=36, loss=0.819]

epoch:36, idx:10499/10845, loss:0.8187727944226492, acc:0.749452380952381


 98%|█████████▊| 10600/10845 [39:18<00:56,  4.31it/s, acc=0.749, epoch=36, loss=0.82] 

epoch:36, idx:10599/10845, loss:0.8195257685038279, acc:0.7493867924528302


 99%|█████████▊| 10701/10845 [39:40<00:30,  4.66it/s, acc=0.75, epoch=36, loss=0.819] 

epoch:36, idx:10699/10845, loss:0.8184896871475416, acc:0.7496028037383178


100%|█████████▉| 10801/10845 [40:02<00:08,  4.90it/s, acc=0.749, epoch=36, loss=0.819]

epoch:36, idx:10799/10845, loss:0.81855889989822, acc:0.7493518518518518


100%|██████████| 10845/10845 [40:11<00:00,  4.57it/s, acc=0.749, epoch=36, loss=0.819]


epoch:36, idx:0/1275, loss:1.53485107421875, acc:0.5
epoch:36, idx:100/1275, loss:1.475453550272649, acc:0.6435643564356436
epoch:36, idx:200/1275, loss:1.3422548112584585, acc:0.6517412935323383
epoch:36, idx:300/1275, loss:1.3053160589002692, acc:0.6561461794019934
epoch:36, idx:400/1275, loss:1.2764362932143365, acc:0.6596009975062345
epoch:36, idx:500/1275, loss:1.2432328086651252, acc:0.6646706586826348
epoch:36, idx:600/1275, loss:1.254453513269218, acc:0.6576539101497504
epoch:36, idx:700/1275, loss:1.2564638628599136, acc:0.6579885877318117
epoch:36, idx:800/1275, loss:1.2728550725810686, acc:0.6557428214731585
epoch:36, idx:900/1275, loss:1.2671037241568444, acc:0.6584350721420644
epoch:36, idx:1000/1275, loss:1.273636114942682, acc:0.6565934065934066
epoch:36, idx:1100/1275, loss:1.2583042438413532, acc:0.6612170753860127
epoch:36, idx:1200/1275, loss:1.2569202646228494, acc:0.6602830974188176


  1%|          | 100/10845 [00:22<40:24,  4.43it/s, acc=0.748, epoch=37, loss=0.765]

epoch:37, idx:99/10845, loss:0.7645160472393036, acc:0.7475


  2%|▏         | 201/10845 [00:45<39:13,  4.52it/s, acc=0.739, epoch=37, loss=0.864]

epoch:37, idx:199/10845, loss:0.8654300183057785, acc:0.73875


  3%|▎         | 301/10845 [01:07<37:13,  4.72it/s, acc=0.747, epoch=37, loss=0.825]

epoch:37, idx:299/10845, loss:0.8244958519935608, acc:0.7483333333333333


  4%|▎         | 400/10845 [01:29<41:36,  4.18it/s, acc=0.752, epoch=37, loss=0.8]  

epoch:37, idx:399/10845, loss:0.7997909811139107, acc:0.7525


  5%|▍         | 500/10845 [01:51<36:04,  4.78it/s, acc=0.754, epoch=37, loss=0.777]

epoch:37, idx:499/10845, loss:0.7766262173652649, acc:0.754


  6%|▌         | 600/10845 [02:13<42:14,  4.04it/s, acc=0.757, epoch=37, loss=0.759]

epoch:37, idx:599/10845, loss:0.7592552711566289, acc:0.7575


  6%|▋         | 700/10845 [02:35<36:48,  4.59it/s, acc=0.755, epoch=37, loss=0.774]

epoch:37, idx:699/10845, loss:0.7742605430739267, acc:0.7546428571428572


  7%|▋         | 800/10845 [02:57<36:08,  4.63it/s, acc=0.758, epoch=37, loss=0.762]

epoch:37, idx:799/10845, loss:0.7624571050703526, acc:0.758125


  8%|▊         | 901/10845 [03:20<36:36,  4.53it/s, acc=0.756, epoch=37, loss=0.773]

epoch:37, idx:899/10845, loss:0.7725326550006867, acc:0.7563888888888889


  9%|▉         | 1000/10845 [03:42<34:07,  4.81it/s, acc=0.756, epoch=37, loss=0.777]

epoch:37, idx:999/10845, loss:0.7774643021821975, acc:0.75575


 10%|█         | 1100/10845 [04:04<34:31,  4.70it/s, acc=0.756, epoch=37, loss=0.772]

epoch:37, idx:1099/10845, loss:0.7723886030912399, acc:0.7563636363636363


 11%|█         | 1201/10845 [04:26<34:42,  4.63it/s, acc=0.754, epoch=37, loss=0.781]

epoch:37, idx:1199/10845, loss:0.781819465359052, acc:0.7541666666666667


 12%|█▏        | 1301/10845 [04:48<36:02,  4.41it/s, acc=0.755, epoch=37, loss=0.781]

epoch:37, idx:1299/10845, loss:0.7799470654817728, acc:0.7551923076923077


 13%|█▎        | 1400/10845 [05:10<34:13,  4.60it/s, acc=0.752, epoch=37, loss=0.783]

epoch:37, idx:1399/10845, loss:0.7821565952471324, acc:0.7526785714285714


 14%|█▍        | 1500/10845 [05:32<32:15,  4.83it/s, acc=0.75, epoch=37, loss=0.784] 

epoch:37, idx:1499/10845, loss:0.7843417584101359, acc:0.75


 15%|█▍        | 1601/10845 [05:55<34:06,  4.52it/s, acc=0.749, epoch=37, loss=0.793]

epoch:37, idx:1599/10845, loss:0.7932822877168655, acc:0.7490625


 16%|█▌        | 1701/10845 [06:17<34:35,  4.40it/s, acc=0.747, epoch=37, loss=0.797]

epoch:37, idx:1699/10845, loss:0.797522405946956, acc:0.7470588235294118


 17%|█▋        | 1801/10845 [06:40<33:13,  4.54it/s, acc=0.747, epoch=37, loss=0.797]

epoch:37, idx:1799/10845, loss:0.7969441511233648, acc:0.7466666666666667


 18%|█▊        | 1901/10845 [07:02<28:12,  5.28it/s, acc=0.747, epoch=37, loss=0.797]

epoch:37, idx:1899/10845, loss:0.7972471568458959, acc:0.7471052631578947


 18%|█▊        | 2001/10845 [07:24<32:12,  4.58it/s, acc=0.748, epoch=37, loss=0.794]

epoch:37, idx:1999/10845, loss:0.7947154780626297, acc:0.747625


 19%|█▉        | 2100/10845 [07:46<33:39,  4.33it/s, acc=0.747, epoch=37, loss=0.794]

epoch:37, idx:2099/10845, loss:0.7938734896410079, acc:0.7473809523809524


 20%|██        | 2201/10845 [08:08<30:19,  4.75it/s, acc=0.749, epoch=37, loss=0.791]

epoch:37, idx:2199/10845, loss:0.790742830471559, acc:0.7490909090909091


 21%|██        | 2300/10845 [08:30<30:38,  4.65it/s, acc=0.75, epoch=37, loss=0.79]  

epoch:37, idx:2299/10845, loss:0.7902873358519181, acc:0.7496739130434783


 22%|██▏       | 2401/10845 [08:53<31:53,  4.41it/s, acc=0.75, epoch=37, loss=0.791]

epoch:37, idx:2399/10845, loss:0.7911657435198625, acc:0.7497916666666666


 23%|██▎       | 2501/10845 [09:15<30:33,  4.55it/s, acc=0.749, epoch=37, loss=0.793]

epoch:37, idx:2499/10845, loss:0.7926923325061798, acc:0.7492


 24%|██▍       | 2600/10845 [09:37<30:53,  4.45it/s, acc=0.75, epoch=37, loss=0.792] 

epoch:37, idx:2599/10845, loss:0.7920543321737876, acc:0.7495192307692308


 25%|██▍       | 2700/10845 [09:59<29:42,  4.57it/s, acc=0.749, epoch=37, loss=0.792]

epoch:37, idx:2699/10845, loss:0.7923559393706145, acc:0.7494444444444445


 26%|██▌       | 2801/10845 [10:22<30:09,  4.45it/s, acc=0.749, epoch=37, loss=0.794]

epoch:37, idx:2799/10845, loss:0.7935366161806243, acc:0.749375


 27%|██▋       | 2900/10845 [10:44<31:16,  4.23it/s, acc=0.75, epoch=37, loss=0.794] 

epoch:37, idx:2899/10845, loss:0.7938707213977287, acc:0.7500862068965517


 28%|██▊       | 3001/10845 [11:06<27:47,  4.70it/s, acc=0.749, epoch=37, loss=0.795]

epoch:37, idx:2999/10845, loss:0.7956618885993958, acc:0.7486666666666667


 29%|██▊       | 3101/10845 [11:28<28:48,  4.48it/s, acc=0.748, epoch=37, loss=0.8]  

epoch:37, idx:3099/10845, loss:0.7998340036023047, acc:0.7479838709677419


 30%|██▉       | 3200/10845 [11:50<28:24,  4.48it/s, acc=0.749, epoch=37, loss=0.799]

epoch:37, idx:3199/10845, loss:0.7991888145916164, acc:0.748828125


 30%|███       | 3300/10845 [12:13<28:57,  4.34it/s, acc=0.749, epoch=37, loss=0.8]  

epoch:37, idx:3299/10845, loss:0.7995929270260261, acc:0.748560606060606


 31%|███▏      | 3400/10845 [12:35<27:49,  4.46it/s, acc=0.749, epoch=37, loss=0.798]

epoch:37, idx:3399/10845, loss:0.7982385224629851, acc:0.7492647058823529


 32%|███▏      | 3500/10845 [12:57<27:02,  4.53it/s, acc=0.75, epoch=37, loss=0.798] 

epoch:37, idx:3499/10845, loss:0.7981950927291598, acc:0.7495


 33%|███▎      | 3600/10845 [13:20<27:05,  4.46it/s, acc=0.75, epoch=37, loss=0.796] 

epoch:37, idx:3599/10845, loss:0.7960642794602447, acc:0.7502777777777778


 34%|███▍      | 3701/10845 [13:42<25:48,  4.61it/s, acc=0.751, epoch=37, loss=0.793]

epoch:37, idx:3699/10845, loss:0.7933135697809426, acc:0.7511486486486486


 35%|███▌      | 3800/10845 [14:04<26:26,  4.44it/s, acc=0.751, epoch=37, loss=0.796]

epoch:37, idx:3799/10845, loss:0.7959169810226089, acc:0.7505263157894737


 36%|███▌      | 3900/10845 [14:26<27:43,  4.17it/s, acc=0.75, epoch=37, loss=0.798] 

epoch:37, idx:3899/10845, loss:0.7984969290556052, acc:0.7496794871794872


 37%|███▋      | 4000/10845 [14:49<25:38,  4.45it/s, acc=0.75, epoch=37, loss=0.797]

epoch:37, idx:3999/10845, loss:0.7969115537256003, acc:0.7500625


 38%|███▊      | 4101/10845 [15:11<23:29,  4.78it/s, acc=0.75, epoch=37, loss=0.798]

epoch:37, idx:4099/10845, loss:0.7977454686600988, acc:0.749939024390244


 39%|███▊      | 4200/10845 [15:33<24:07,  4.59it/s, acc=0.75, epoch=37, loss=0.8]  

epoch:37, idx:4199/10845, loss:0.7998971482259887, acc:0.7501190476190476


 40%|███▉      | 4300/10845 [15:55<24:06,  4.53it/s, acc=0.75, epoch=37, loss=0.801] 

epoch:37, idx:4299/10845, loss:0.8006694890316143, acc:0.749593023255814


 41%|████      | 4400/10845 [16:17<24:27,  4.39it/s, acc=0.749, epoch=37, loss=0.802]

epoch:37, idx:4399/10845, loss:0.8022068888897246, acc:0.7490909090909091


 41%|████▏     | 4500/10845 [16:39<23:21,  4.53it/s, acc=0.749, epoch=37, loss=0.804]

epoch:37, idx:4499/10845, loss:0.8036302303605609, acc:0.7487222222222222


 42%|████▏     | 4600/10845 [17:01<22:11,  4.69it/s, acc=0.749, epoch=37, loss=0.802]

epoch:37, idx:4599/10845, loss:0.8022560947226441, acc:0.7488586956521739


 43%|████▎     | 4701/10845 [17:24<20:36,  4.97it/s, acc=0.749, epoch=37, loss=0.801]

epoch:37, idx:4699/10845, loss:0.8015416266182636, acc:0.7490425531914894


 44%|████▍     | 4801/10845 [17:46<22:04,  4.56it/s, acc=0.748, epoch=37, loss=0.805]

epoch:37, idx:4799/10845, loss:0.8055144957080483, acc:0.74828125


 45%|████▌     | 4900/10845 [18:08<22:04,  4.49it/s, acc=0.749, epoch=37, loss=0.804]

epoch:37, idx:4899/10845, loss:0.8042668411561421, acc:0.7486224489795918


 46%|████▌     | 5001/10845 [18:31<20:24,  4.77it/s, acc=0.749, epoch=37, loss=0.805]

epoch:37, idx:4999/10845, loss:0.8042341383099556, acc:0.7486


 47%|████▋     | 5100/10845 [18:53<20:35,  4.65it/s, acc=0.749, epoch=37, loss=0.803]

epoch:37, idx:5099/10845, loss:0.8029461163516138, acc:0.7488725490196079


 48%|████▊     | 5200/10845 [19:15<22:21,  4.21it/s, acc=0.748, epoch=37, loss=0.808]

epoch:37, idx:5199/10845, loss:0.8083170997064847, acc:0.7478365384615384


 49%|████▉     | 5300/10845 [19:37<19:47,  4.67it/s, acc=0.748, epoch=37, loss=0.809]

epoch:37, idx:5299/10845, loss:0.8086825504842794, acc:0.7476415094339622


 50%|████▉     | 5400/10845 [20:00<20:11,  4.50it/s, acc=0.748, epoch=37, loss=0.806]

epoch:37, idx:5399/10845, loss:0.8064822643995285, acc:0.7483796296296297


 51%|█████     | 5500/10845 [20:22<18:48,  4.74it/s, acc=0.749, epoch=37, loss=0.805]

epoch:37, idx:5499/10845, loss:0.8046616550683975, acc:0.7487272727272727


 52%|█████▏    | 5600/10845 [20:45<20:25,  4.28it/s, acc=0.749, epoch=37, loss=0.804]

epoch:37, idx:5599/10845, loss:0.8039354542642831, acc:0.7490178571428572


 53%|█████▎    | 5700/10845 [21:07<18:23,  4.66it/s, acc=0.75, epoch=37, loss=0.803] 

epoch:37, idx:5699/10845, loss:0.8030177237799293, acc:0.7496491228070176


 53%|█████▎    | 5801/10845 [21:30<18:14,  4.61it/s, acc=0.749, epoch=37, loss=0.804]

epoch:37, idx:5799/10845, loss:0.8033643796834452, acc:0.7489655172413793


 54%|█████▍    | 5900/10845 [21:52<17:43,  4.65it/s, acc=0.75, epoch=37, loss=0.802] 

epoch:37, idx:5899/10845, loss:0.8021517994343225, acc:0.7496610169491525


 55%|█████▌    | 6000/10845 [22:14<16:27,  4.91it/s, acc=0.75, epoch=37, loss=0.802]

epoch:37, idx:5999/10845, loss:0.8021149583160877, acc:0.7497916666666666


 56%|█████▌    | 6100/10845 [22:37<17:44,  4.46it/s, acc=0.75, epoch=37, loss=0.803]

epoch:37, idx:6099/10845, loss:0.8027642312108493, acc:0.7497131147540984


 57%|█████▋    | 6200/10845 [22:59<16:27,  4.70it/s, acc=0.749, epoch=37, loss=0.804]

epoch:37, idx:6199/10845, loss:0.8038314689455494, acc:0.7494354838709677


 58%|█████▊    | 6300/10845 [23:21<16:38,  4.55it/s, acc=0.749, epoch=37, loss=0.806]

epoch:37, idx:6299/10845, loss:0.8055093855138808, acc:0.7490873015873016


 59%|█████▉    | 6400/10845 [23:44<16:10,  4.58it/s, acc=0.749, epoch=37, loss=0.806]

epoch:37, idx:6399/10845, loss:0.8058394929487258, acc:0.7489453125


 60%|█████▉    | 6500/10845 [24:06<15:49,  4.58it/s, acc=0.748, epoch=37, loss=0.807]

epoch:37, idx:6499/10845, loss:0.8065106614461311, acc:0.748423076923077


 61%|██████    | 6600/10845 [24:29<15:35,  4.54it/s, acc=0.749, epoch=37, loss=0.806]

epoch:37, idx:6599/10845, loss:0.8060660786159111, acc:0.7486742424242424


 62%|██████▏   | 6700/10845 [24:51<15:17,  4.52it/s, acc=0.749, epoch=37, loss=0.806]

epoch:37, idx:6699/10845, loss:0.8060987729456888, acc:0.7487686567164179


 63%|██████▎   | 6800/10845 [25:12<15:28,  4.36it/s, acc=0.749, epoch=37, loss=0.806]

epoch:37, idx:6799/10845, loss:0.8055923119537971, acc:0.7488970588235294


 64%|██████▎   | 6900/10845 [25:35<15:13,  4.32it/s, acc=0.749, epoch=37, loss=0.805]

epoch:37, idx:6899/10845, loss:0.8050885782207268, acc:0.7493478260869565


 65%|██████▍   | 7000/10845 [25:57<14:39,  4.37it/s, acc=0.75, epoch=37, loss=0.806] 

epoch:37, idx:6999/10845, loss:0.8057171660491398, acc:0.7495357142857143


 65%|██████▌   | 7100/10845 [26:20<12:51,  4.86it/s, acc=0.75, epoch=37, loss=0.806] 

epoch:37, idx:7099/10845, loss:0.8063372849242788, acc:0.7497183098591549


 66%|██████▋   | 7200/10845 [26:42<13:45,  4.42it/s, acc=0.75, epoch=37, loss=0.806]

epoch:37, idx:7199/10845, loss:0.8060121882458527, acc:0.7496527777777777


 67%|██████▋   | 7300/10845 [27:05<13:40,  4.32it/s, acc=0.75, epoch=37, loss=0.806]

epoch:37, idx:7299/10845, loss:0.8056344231514082, acc:0.7496575342465753


 68%|██████▊   | 7400/10845 [27:27<13:14,  4.34it/s, acc=0.749, epoch=37, loss=0.806]

epoch:37, idx:7399/10845, loss:0.8059148334168099, acc:0.7494256756756756


 69%|██████▉   | 7501/10845 [27:50<11:33,  4.82it/s, acc=0.749, epoch=37, loss=0.806]

epoch:37, idx:7499/10845, loss:0.806300365336736, acc:0.7494666666666666


 70%|███████   | 7601/10845 [28:12<11:11,  4.83it/s, acc=0.75, epoch=37, loss=0.807] 

epoch:37, idx:7599/10845, loss:0.8065802997664402, acc:0.7496381578947369


 71%|███████   | 7700/10845 [28:34<11:00,  4.76it/s, acc=0.75, epoch=37, loss=0.806]

epoch:37, idx:7699/10845, loss:0.8063308504185119, acc:0.7498376623376624


 72%|███████▏  | 7801/10845 [28:57<11:12,  4.53it/s, acc=0.75, epoch=37, loss=0.808] 

epoch:37, idx:7799/10845, loss:0.8084315927059222, acc:0.7495512820512821


 73%|███████▎  | 7901/10845 [29:19<11:01,  4.45it/s, acc=0.749, epoch=37, loss=0.811]

epoch:37, idx:7899/10845, loss:0.8106603592256957, acc:0.7493354430379747


 74%|███████▍  | 8001/10845 [29:41<10:25,  4.55it/s, acc=0.749, epoch=37, loss=0.812]

epoch:37, idx:7999/10845, loss:0.8121713389903307, acc:0.74903125


 75%|███████▍  | 8100/10845 [30:03<09:53,  4.62it/s, acc=0.749, epoch=37, loss=0.812]

epoch:37, idx:8099/10845, loss:0.8115433944743357, acc:0.7494753086419753


 76%|███████▌  | 8201/10845 [30:25<09:41,  4.54it/s, acc=0.749, epoch=37, loss=0.813]

epoch:37, idx:8199/10845, loss:0.8133600209398968, acc:0.7491768292682927


 77%|███████▋  | 8301/10845 [30:47<09:47,  4.33it/s, acc=0.749, epoch=37, loss=0.814]

epoch:37, idx:8299/10845, loss:0.8136786436747355, acc:0.7491265060240964


 77%|███████▋  | 8400/10845 [31:08<08:24,  4.84it/s, acc=0.749, epoch=37, loss=0.813]

epoch:37, idx:8399/10845, loss:0.8126976817846299, acc:0.7493452380952381


 78%|███████▊  | 8500/10845 [31:31<08:33,  4.57it/s, acc=0.749, epoch=37, loss=0.813]

epoch:37, idx:8499/10845, loss:0.8131154864535612, acc:0.7492941176470588


 79%|███████▉  | 8600/10845 [31:53<08:52,  4.22it/s, acc=0.749, epoch=37, loss=0.813]

epoch:37, idx:8599/10845, loss:0.8128088669167008, acc:0.7494767441860465


 80%|████████  | 8700/10845 [32:15<07:40,  4.65it/s, acc=0.75, epoch=37, loss=0.814] 

epoch:37, idx:8699/10845, loss:0.8135690231843926, acc:0.7495402298850574


 81%|████████  | 8801/10845 [32:37<07:44,  4.40it/s, acc=0.75, epoch=37, loss=0.813] 

epoch:37, idx:8799/10845, loss:0.8131110066717321, acc:0.7495454545454545


 82%|████████▏ | 8901/10845 [32:59<07:20,  4.42it/s, acc=0.75, epoch=37, loss=0.813] 

epoch:37, idx:8899/10845, loss:0.8127011115497418, acc:0.7495505617977528


 83%|████████▎ | 9000/10845 [33:21<06:53,  4.46it/s, acc=0.75, epoch=37, loss=0.812] 

epoch:37, idx:8999/10845, loss:0.8124346797333823, acc:0.7495833333333334


 84%|████████▍ | 9101/10845 [33:43<06:34,  4.42it/s, acc=0.75, epoch=37, loss=0.812]

epoch:37, idx:9099/10845, loss:0.8122502037784556, acc:0.7496428571428572


 85%|████████▍ | 9200/10845 [34:05<06:08,  4.47it/s, acc=0.75, epoch=37, loss=0.812]

epoch:37, idx:9199/10845, loss:0.8124599883025108, acc:0.7497282608695652


 86%|████████▌ | 9300/10845 [34:28<06:02,  4.27it/s, acc=0.75, epoch=37, loss=0.812]

epoch:37, idx:9299/10845, loss:0.8117655116447838, acc:0.7499193548387096


 87%|████████▋ | 9401/10845 [34:51<05:08,  4.67it/s, acc=0.75, epoch=37, loss=0.811]

epoch:37, idx:9399/10845, loss:0.8113365765264694, acc:0.7499734042553191


 88%|████████▊ | 9500/10845 [35:12<04:37,  4.84it/s, acc=0.75, epoch=37, loss=0.811]

epoch:37, idx:9499/10845, loss:0.8114643504807824, acc:0.7498684210526316


 89%|████████▊ | 9600/10845 [35:35<04:38,  4.48it/s, acc=0.75, epoch=37, loss=0.811]

epoch:37, idx:9599/10845, loss:0.8108493357338011, acc:0.7499479166666667


 89%|████████▉ | 9700/10845 [35:57<04:09,  4.59it/s, acc=0.75, epoch=37, loss=0.812]

epoch:37, idx:9699/10845, loss:0.8118209751549456, acc:0.7497680412371134


 90%|█████████ | 9801/10845 [36:19<03:42,  4.69it/s, acc=0.75, epoch=37, loss=0.81] 

epoch:37, idx:9799/10845, loss:0.810344196746544, acc:0.7500510204081633


 91%|█████████▏| 9901/10845 [36:40<02:29,  6.34it/s, acc=0.75, epoch=37, loss=0.811]

epoch:37, idx:9899/10845, loss:0.8114171371014431, acc:0.7497979797979798


 92%|█████████▏| 10001/10845 [37:02<02:59,  4.70it/s, acc=0.75, epoch=37, loss=0.811]

epoch:37, idx:9999/10845, loss:0.8109792626082897, acc:0.7497


 93%|█████████▎| 10100/10845 [37:24<02:39,  4.68it/s, acc=0.75, epoch=37, loss=0.811] 

epoch:37, idx:10099/10845, loss:0.8110610512754705, acc:0.7497277227722773


 94%|█████████▍| 10201/10845 [37:46<02:22,  4.51it/s, acc=0.75, epoch=37, loss=0.81] 

epoch:37, idx:10199/10845, loss:0.8100556399600178, acc:0.7498774509803922


 95%|█████████▍| 10300/10845 [38:08<01:57,  4.65it/s, acc=0.75, epoch=37, loss=0.81]

epoch:37, idx:10299/10845, loss:0.8101919796802465, acc:0.7497815533980583


 96%|█████████▌| 10400/10845 [38:30<01:34,  4.69it/s, acc=0.75, epoch=37, loss=0.811]

epoch:37, idx:10399/10845, loss:0.8108325854803508, acc:0.7497115384615385


 97%|█████████▋| 10500/10845 [38:53<01:17,  4.43it/s, acc=0.75, epoch=37, loss=0.811]

epoch:37, idx:10499/10845, loss:0.8113967404762904, acc:0.7496904761904762


 98%|█████████▊| 10600/10845 [39:15<01:00,  4.04it/s, acc=0.749, epoch=37, loss=0.813]

epoch:37, idx:10599/10845, loss:0.8129997523494487, acc:0.7493867924528302


 99%|█████████▊| 10700/10845 [39:37<00:31,  4.55it/s, acc=0.749, epoch=37, loss=0.813]

epoch:37, idx:10699/10845, loss:0.8130352055636522, acc:0.7492757009345794


100%|█████████▉| 10801/10845 [40:00<00:09,  4.71it/s, acc=0.749, epoch=37, loss=0.812]

epoch:37, idx:10799/10845, loss:0.8124172403580613, acc:0.749375


100%|██████████| 10845/10845 [40:09<00:00,  4.84it/s, acc=0.749, epoch=37, loss=0.812]


epoch:37, idx:0/1275, loss:1.4325032234191895, acc:0.5
epoch:37, idx:100/1275, loss:1.454434587223695, acc:0.6485148514851485
epoch:37, idx:200/1275, loss:1.3393656139943138, acc:0.6529850746268657
epoch:37, idx:300/1275, loss:1.3037232489284882, acc:0.6578073089700996
epoch:37, idx:400/1275, loss:1.2791919758789556, acc:0.6645885286783042
epoch:37, idx:500/1275, loss:1.2433326318116482, acc:0.6661676646706587
epoch:37, idx:600/1275, loss:1.2548278322235717, acc:0.6589018302828619
epoch:37, idx:700/1275, loss:1.2596150259148546, acc:0.6604850213980028
epoch:37, idx:800/1275, loss:1.2764076810055756, acc:0.6601123595505618
epoch:37, idx:900/1275, loss:1.2718366831706447, acc:0.6617647058823529
epoch:37, idx:1000/1275, loss:1.276764840155572, acc:0.6605894105894106
epoch:37, idx:1100/1275, loss:1.260918042748544, acc:0.6646230699364214
epoch:37, idx:1200/1275, loss:1.2585885563460515, acc:0.6634054954204829


  1%|          | 100/10845 [00:21<38:35,  4.64it/s, acc=0.745, epoch=38, loss=0.826]

epoch:38, idx:99/10845, loss:0.8257083010673523, acc:0.745


  2%|▏         | 201/10845 [00:44<37:26,  4.74it/s, acc=0.757, epoch=38, loss=0.799]

epoch:38, idx:199/10845, loss:0.7999552232027054, acc:0.75625


  3%|▎         | 300/10845 [01:06<40:36,  4.33it/s, acc=0.759, epoch=38, loss=0.793]

epoch:38, idx:299/10845, loss:0.7943081446488698, acc:0.7591666666666667


  4%|▎         | 401/10845 [01:29<38:08,  4.56it/s, acc=0.746, epoch=38, loss=0.805]

epoch:38, idx:399/10845, loss:0.8049816703796386, acc:0.745625


  5%|▍         | 501/10845 [01:51<36:59,  4.66it/s, acc=0.744, epoch=38, loss=0.801]

epoch:38, idx:499/10845, loss:0.8023282009363174, acc:0.7435


  6%|▌         | 600/10845 [02:11<38:59,  4.38it/s, acc=0.75, epoch=38, loss=0.789] 

epoch:38, idx:599/10845, loss:0.788956051170826, acc:0.7495833333333334


  6%|▋         | 700/10845 [02:34<37:08,  4.55it/s, acc=0.749, epoch=38, loss=0.779]

epoch:38, idx:699/10845, loss:0.7790320616960525, acc:0.7492857142857143


  7%|▋         | 800/10845 [02:56<41:20,  4.05it/s, acc=0.747, epoch=38, loss=0.784]

epoch:38, idx:799/10845, loss:0.7838919032365084, acc:0.746875


  8%|▊         | 900/10845 [03:18<36:11,  4.58it/s, acc=0.747, epoch=38, loss=0.788]

epoch:38, idx:899/10845, loss:0.7879637524154451, acc:0.7466666666666667


  9%|▉         | 1001/10845 [03:41<35:34,  4.61it/s, acc=0.748, epoch=38, loss=0.785]

epoch:38, idx:999/10845, loss:0.7856932345032692, acc:0.748


 10%|█         | 1100/10845 [04:03<36:28,  4.45it/s, acc=0.752, epoch=38, loss=0.774]

epoch:38, idx:1099/10845, loss:0.7741759004918012, acc:0.7518181818181818


 11%|█         | 1200/10845 [04:25<35:11,  4.57it/s, acc=0.75, epoch=38, loss=0.781] 

epoch:38, idx:1199/10845, loss:0.7814637922743957, acc:0.75


 12%|█▏        | 1300/10845 [04:48<37:22,  4.26it/s, acc=0.749, epoch=38, loss=0.783]

epoch:38, idx:1299/10845, loss:0.7833200464340356, acc:0.7488461538461538


 13%|█▎        | 1400/10845 [05:10<34:48,  4.52it/s, acc=0.752, epoch=38, loss=0.776]

epoch:38, idx:1399/10845, loss:0.7758777994343212, acc:0.7525


 14%|█▍        | 1500/10845 [05:32<34:52,  4.47it/s, acc=0.755, epoch=38, loss=0.771]

epoch:38, idx:1499/10845, loss:0.7714312601486842, acc:0.7548333333333334


 15%|█▍        | 1600/10845 [05:54<34:21,  4.49it/s, acc=0.756, epoch=38, loss=0.767]

epoch:38, idx:1599/10845, loss:0.7670144741609692, acc:0.75625


 16%|█▌        | 1701/10845 [06:17<31:45,  4.80it/s, acc=0.755, epoch=38, loss=0.769]

epoch:38, idx:1699/10845, loss:0.768231761841213, acc:0.7552941176470588


 17%|█▋        | 1800/10845 [06:38<31:19,  4.81it/s, acc=0.754, epoch=38, loss=0.772]

epoch:38, idx:1799/10845, loss:0.7714885686172379, acc:0.7538888888888889


 18%|█▊        | 1900/10845 [07:01<34:42,  4.29it/s, acc=0.754, epoch=38, loss=0.77] 

epoch:38, idx:1899/10845, loss:0.7700613731773276, acc:0.7540789473684211


 18%|█▊        | 2000/10845 [07:22<31:48,  4.64it/s, acc=0.756, epoch=38, loss=0.767]

epoch:38, idx:1999/10845, loss:0.7667784512937069, acc:0.755625


 19%|█▉        | 2100/10845 [07:44<31:07,  4.68it/s, acc=0.756, epoch=38, loss=0.764]

epoch:38, idx:2099/10845, loss:0.763697968238876, acc:0.7561904761904762


 20%|██        | 2201/10845 [08:07<31:45,  4.54it/s, acc=0.757, epoch=38, loss=0.76] 

epoch:38, idx:2199/10845, loss:0.7608386380835013, acc:0.7567045454545455


 21%|██        | 2300/10845 [08:29<31:23,  4.54it/s, acc=0.757, epoch=38, loss=0.76] 

epoch:38, idx:2299/10845, loss:0.7598359654519868, acc:0.7570652173913044


 22%|██▏       | 2400/10845 [08:51<32:19,  4.36it/s, acc=0.757, epoch=38, loss=0.767]

epoch:38, idx:2399/10845, loss:0.7669529732316732, acc:0.7567708333333333


 23%|██▎       | 2501/10845 [09:14<29:28,  4.72it/s, acc=0.757, epoch=38, loss=0.765]

epoch:38, idx:2499/10845, loss:0.764994203543663, acc:0.7572


 24%|██▍       | 2600/10845 [09:36<29:20,  4.68it/s, acc=0.758, epoch=38, loss=0.766]

epoch:38, idx:2599/10845, loss:0.7658227507655437, acc:0.7576923076923077


 25%|██▍       | 2701/10845 [09:58<29:25,  4.61it/s, acc=0.758, epoch=38, loss=0.766]

epoch:38, idx:2699/10845, loss:0.766597118973732, acc:0.7575


 26%|██▌       | 2801/10845 [10:20<31:01,  4.32it/s, acc=0.758, epoch=38, loss=0.767]

epoch:38, idx:2799/10845, loss:0.7668951996309417, acc:0.7579464285714286


 27%|██▋       | 2901/10845 [10:42<28:52,  4.59it/s, acc=0.758, epoch=38, loss=0.766]

epoch:38, idx:2899/10845, loss:0.7666178850469918, acc:0.7581034482758621


 28%|██▊       | 3000/10845 [11:04<27:04,  4.83it/s, acc=0.757, epoch=38, loss=0.771]

epoch:38, idx:2999/10845, loss:0.7712840417226156, acc:0.7573333333333333


 29%|██▊       | 3100/10845 [11:26<28:59,  4.45it/s, acc=0.757, epoch=38, loss=0.771]

epoch:38, idx:3099/10845, loss:0.7707239789732041, acc:0.7568548387096774


 30%|██▉       | 3201/10845 [11:49<27:38,  4.61it/s, acc=0.757, epoch=38, loss=0.768]

epoch:38, idx:3199/10845, loss:0.7682625282928348, acc:0.756953125


 30%|███       | 3301/10845 [12:11<28:05,  4.48it/s, acc=0.757, epoch=38, loss=0.771]

epoch:38, idx:3299/10845, loss:0.7716416731747714, acc:0.7568181818181818


 31%|███▏      | 3400/10845 [12:33<27:50,  4.46it/s, acc=0.756, epoch=38, loss=0.773]

epoch:38, idx:3399/10845, loss:0.772774406987078, acc:0.7557352941176471


 32%|███▏      | 3500/10845 [12:55<26:37,  4.60it/s, acc=0.756, epoch=38, loss=0.774]

epoch:38, idx:3499/10845, loss:0.7736507083177566, acc:0.7557142857142857


 33%|███▎      | 3600/10845 [13:17<28:13,  4.28it/s, acc=0.757, epoch=38, loss=0.77] 

epoch:38, idx:3599/10845, loss:0.7697956975797812, acc:0.7565972222222223


 34%|███▍      | 3700/10845 [13:39<27:09,  4.39it/s, acc=0.755, epoch=38, loss=0.776]

epoch:38, idx:3699/10845, loss:0.776295918403445, acc:0.755472972972973


 35%|███▌      | 3801/10845 [14:02<24:27,  4.80it/s, acc=0.755, epoch=38, loss=0.78] 

epoch:38, idx:3799/10845, loss:0.7805006940427579, acc:0.7547368421052632


 36%|███▌      | 3901/10845 [14:24<24:08,  4.79it/s, acc=0.755, epoch=38, loss=0.781]

epoch:38, idx:3899/10845, loss:0.7814051049183577, acc:0.7545512820512821


 37%|███▋      | 4001/10845 [14:46<24:45,  4.61it/s, acc=0.754, epoch=38, loss=0.781]

epoch:38, idx:3999/10845, loss:0.7804585729837418, acc:0.7543125


 38%|███▊      | 4101/10845 [15:08<24:21,  4.61it/s, acc=0.755, epoch=38, loss=0.778]

epoch:38, idx:4099/10845, loss:0.7782516387613808, acc:0.7548780487804878


 39%|███▊      | 4201/10845 [15:31<24:49,  4.46it/s, acc=0.755, epoch=38, loss=0.78] 

epoch:38, idx:4199/10845, loss:0.7796602190392358, acc:0.755297619047619


 40%|███▉      | 4300/10845 [15:53<23:47,  4.59it/s, acc=0.756, epoch=38, loss=0.78] 

epoch:38, idx:4299/10845, loss:0.7799915826320648, acc:0.7555813953488372


 41%|████      | 4400/10845 [16:14<24:22,  4.41it/s, acc=0.755, epoch=38, loss=0.781]

epoch:38, idx:4399/10845, loss:0.7809215570579876, acc:0.7554545454545455


 41%|████▏     | 4500/10845 [16:37<24:26,  4.33it/s, acc=0.755, epoch=38, loss=0.779]

epoch:38, idx:4499/10845, loss:0.7786777183479733, acc:0.7552777777777778


 42%|████▏     | 4600/10845 [16:59<25:07,  4.14it/s, acc=0.755, epoch=38, loss=0.78] 

epoch:38, idx:4599/10845, loss:0.780239590484163, acc:0.7553804347826087


 43%|████▎     | 4701/10845 [17:22<22:06,  4.63it/s, acc=0.755, epoch=38, loss=0.782]

epoch:38, idx:4699/10845, loss:0.7820453581911452, acc:0.7549468085106383


 44%|████▍     | 4800/10845 [17:44<22:22,  4.50it/s, acc=0.755, epoch=38, loss=0.781]

epoch:38, idx:4799/10845, loss:0.7809465558826924, acc:0.7553645833333333


 45%|████▌     | 4900/10845 [18:06<23:51,  4.15it/s, acc=0.756, epoch=38, loss=0.782]

epoch:38, idx:4899/10845, loss:0.781579045130282, acc:0.7558163265306123


 46%|████▌     | 5001/10845 [18:29<20:40,  4.71it/s, acc=0.756, epoch=38, loss=0.783]

epoch:38, idx:4999/10845, loss:0.7827108427762985, acc:0.75595


 47%|████▋     | 5100/10845 [18:51<22:51,  4.19it/s, acc=0.756, epoch=38, loss=0.782]

epoch:38, idx:5099/10845, loss:0.7826044565088609, acc:0.755686274509804


 48%|████▊     | 5200/10845 [19:13<20:43,  4.54it/s, acc=0.756, epoch=38, loss=0.784]

epoch:38, idx:5199/10845, loss:0.7836546933650971, acc:0.755576923076923


 49%|████▉     | 5300/10845 [19:35<20:22,  4.54it/s, acc=0.755, epoch=38, loss=0.785]

epoch:38, idx:5299/10845, loss:0.7854485971972628, acc:0.7553773584905661


 50%|████▉     | 5400/10845 [19:57<20:56,  4.33it/s, acc=0.755, epoch=38, loss=0.786]

epoch:38, idx:5399/10845, loss:0.7862595261026312, acc:0.7552777777777778


 51%|█████     | 5500/10845 [20:19<18:50,  4.73it/s, acc=0.755, epoch=38, loss=0.788]

epoch:38, idx:5499/10845, loss:0.7882724360335956, acc:0.7548636363636364


 52%|█████▏    | 5600/10845 [20:42<19:47,  4.42it/s, acc=0.755, epoch=38, loss=0.788]

epoch:38, idx:5599/10845, loss:0.7878147038604532, acc:0.7548660714285714


 53%|█████▎    | 5701/10845 [21:04<18:39,  4.59it/s, acc=0.756, epoch=38, loss=0.786]

epoch:38, idx:5699/10845, loss:0.7863842270457954, acc:0.7555701754385965


 53%|█████▎    | 5800/10845 [21:27<18:42,  4.49it/s, acc=0.755, epoch=38, loss=0.788]

epoch:38, idx:5799/10845, loss:0.7884796471431338, acc:0.7551293103448276


 54%|█████▍    | 5900/10845 [21:49<18:50,  4.37it/s, acc=0.755, epoch=38, loss=0.79] 

epoch:38, idx:5899/10845, loss:0.7902630327717732, acc:0.7547457627118644


 55%|█████▌    | 6000/10845 [22:11<16:57,  4.76it/s, acc=0.754, epoch=38, loss=0.791]

epoch:38, idx:5999/10845, loss:0.7910845428903898, acc:0.754375


 56%|█████▌    | 6100/10845 [22:33<16:57,  4.66it/s, acc=0.754, epoch=38, loss=0.794]

epoch:38, idx:6099/10845, loss:0.7935480622385369, acc:0.7540573770491803


 57%|█████▋    | 6200/10845 [22:56<18:16,  4.24it/s, acc=0.754, epoch=38, loss=0.794]

epoch:38, idx:6199/10845, loss:0.7941932422211093, acc:0.7539516129032258


 58%|█████▊    | 6301/10845 [23:18<16:15,  4.66it/s, acc=0.754, epoch=38, loss=0.793]

epoch:38, idx:6299/10845, loss:0.7931702642497562, acc:0.7544444444444445


 59%|█████▉    | 6400/10845 [23:40<16:23,  4.52it/s, acc=0.754, epoch=38, loss=0.793]

epoch:38, idx:6399/10845, loss:0.7931675889249891, acc:0.7544140625


 60%|█████▉    | 6500/10845 [24:03<15:59,  4.53it/s, acc=0.754, epoch=38, loss=0.793]

epoch:38, idx:6499/10845, loss:0.7934054166628765, acc:0.7543846153846154


 61%|██████    | 6600/10845 [24:25<15:29,  4.57it/s, acc=0.755, epoch=38, loss=0.793]

epoch:38, idx:6599/10845, loss:0.7928633031790907, acc:0.7546590909090909


 62%|██████▏   | 6701/10845 [24:48<15:29,  4.46it/s, acc=0.755, epoch=38, loss=0.792]

epoch:38, idx:6699/10845, loss:0.7922604480608185, acc:0.7547388059701493


 63%|██████▎   | 6800/10845 [25:09<14:57,  4.51it/s, acc=0.754, epoch=38, loss=0.792]

epoch:38, idx:6799/10845, loss:0.7923183348950218, acc:0.7544117647058823


 64%|██████▎   | 6900/10845 [25:31<15:27,  4.25it/s, acc=0.755, epoch=38, loss=0.792]

epoch:38, idx:6899/10845, loss:0.7922712665709897, acc:0.7545289855072463


 65%|██████▍   | 7001/10845 [25:54<15:05,  4.25it/s, acc=0.755, epoch=38, loss=0.791]

epoch:38, idx:6999/10845, loss:0.7911919902307647, acc:0.7548571428571429


 65%|██████▌   | 7100/10845 [26:15<13:27,  4.64it/s, acc=0.755, epoch=38, loss=0.791]

epoch:38, idx:7099/10845, loss:0.7910943676468353, acc:0.7548239436619718


 66%|██████▋   | 7200/10845 [26:37<12:59,  4.67it/s, acc=0.755, epoch=38, loss=0.79] 

epoch:38, idx:7199/10845, loss:0.7896348981642061, acc:0.7551388888888889


 67%|██████▋   | 7300/10845 [27:00<12:51,  4.60it/s, acc=0.755, epoch=38, loss=0.79] 

epoch:38, idx:7299/10845, loss:0.7900031246959347, acc:0.7547945205479452


 68%|██████▊   | 7401/10845 [27:22<12:58,  4.43it/s, acc=0.755, epoch=38, loss=0.789]

epoch:38, idx:7399/10845, loss:0.7895155938332146, acc:0.7546283783783784


 69%|██████▉   | 7501/10845 [27:45<12:05,  4.61it/s, acc=0.754, epoch=38, loss=0.79] 

epoch:38, idx:7499/10845, loss:0.7899385933478673, acc:0.7545666666666667


 70%|███████   | 7601/10845 [28:07<12:09,  4.44it/s, acc=0.754, epoch=38, loss=0.792]

epoch:38, idx:7599/10845, loss:0.79168857568973, acc:0.7542105263157894


 71%|███████   | 7701/10845 [28:29<11:39,  4.50it/s, acc=0.754, epoch=38, loss=0.793]

epoch:38, idx:7699/10845, loss:0.792473199437191, acc:0.7541233766233766


 72%|███████▏  | 7800/10845 [28:51<11:04,  4.59it/s, acc=0.754, epoch=38, loss=0.792]

epoch:38, idx:7799/10845, loss:0.7920282058150341, acc:0.7540384615384615


 73%|███████▎  | 7900/10845 [29:13<11:01,  4.45it/s, acc=0.754, epoch=38, loss=0.794]

epoch:38, idx:7899/10845, loss:0.7936406645669213, acc:0.7535443037974684


 74%|███████▍  | 8001/10845 [29:35<09:47,  4.84it/s, acc=0.753, epoch=38, loss=0.794]

epoch:38, idx:7999/10845, loss:0.7943365355581045, acc:0.75328125


 75%|███████▍  | 8100/10845 [29:57<10:13,  4.48it/s, acc=0.754, epoch=38, loss=0.794]

epoch:38, idx:8099/10845, loss:0.7937754665627892, acc:0.7535802469135803


 76%|███████▌  | 8200/10845 [30:19<09:38,  4.57it/s, acc=0.754, epoch=38, loss=0.793]

epoch:38, idx:8199/10845, loss:0.7932433354563829, acc:0.7539329268292683


 77%|███████▋  | 8301/10845 [30:41<09:02,  4.69it/s, acc=0.754, epoch=38, loss=0.793]

epoch:38, idx:8299/10845, loss:0.7926679922299212, acc:0.7543373493975903


 77%|███████▋  | 8401/10845 [31:04<08:52,  4.59it/s, acc=0.754, epoch=38, loss=0.792]

epoch:38, idx:8399/10845, loss:0.7923132440447808, acc:0.7544345238095238


 78%|███████▊  | 8500/10845 [31:26<08:48,  4.44it/s, acc=0.754, epoch=38, loss=0.794]

epoch:38, idx:8499/10845, loss:0.79359424607894, acc:0.7540882352941176


 79%|███████▉  | 8601/10845 [31:48<08:03,  4.64it/s, acc=0.754, epoch=38, loss=0.795]

epoch:38, idx:8599/10845, loss:0.7947171139509179, acc:0.753546511627907


 80%|████████  | 8701/10845 [32:11<07:53,  4.53it/s, acc=0.754, epoch=38, loss=0.795]

epoch:38, idx:8699/10845, loss:0.7944908088445664, acc:0.7537931034482759


 81%|████████  | 8801/10845 [32:32<07:13,  4.71it/s, acc=0.753, epoch=38, loss=0.796]

epoch:38, idx:8799/10845, loss:0.7963047405738722, acc:0.7533806818181819


 82%|████████▏ | 8900/10845 [32:54<06:48,  4.76it/s, acc=0.754, epoch=38, loss=0.796]

epoch:38, idx:8899/10845, loss:0.796094387012921, acc:0.753567415730337


 83%|████████▎ | 9000/10845 [33:17<06:41,  4.59it/s, acc=0.753, epoch=38, loss=0.797]

epoch:38, idx:8999/10845, loss:0.7968525529901187, acc:0.7534722222222222


 84%|████████▍ | 9100/10845 [33:39<06:43,  4.32it/s, acc=0.754, epoch=38, loss=0.796]

epoch:38, idx:9099/10845, loss:0.7964291900036099, acc:0.7535714285714286


 85%|████████▍ | 9200/10845 [34:01<06:24,  4.28it/s, acc=0.753, epoch=38, loss=0.796]

epoch:38, idx:9199/10845, loss:0.7963867371723704, acc:0.7534782608695653


 86%|████████▌ | 9301/10845 [34:23<05:35,  4.61it/s, acc=0.754, epoch=38, loss=0.795]

epoch:38, idx:9299/10845, loss:0.795290253697544, acc:0.7537634408602151


 87%|████████▋ | 9401/10845 [34:46<05:36,  4.29it/s, acc=0.754, epoch=38, loss=0.796]

epoch:38, idx:9399/10845, loss:0.7955163283329061, acc:0.75375


 88%|████████▊ | 9500/10845 [35:08<05:05,  4.40it/s, acc=0.754, epoch=38, loss=0.795]

epoch:38, idx:9499/10845, loss:0.7947728109140145, acc:0.7541315789473684


 89%|████████▊ | 9601/10845 [35:30<04:29,  4.62it/s, acc=0.754, epoch=38, loss=0.795]

epoch:38, idx:9599/10845, loss:0.7953425664361566, acc:0.7540885416666666


 89%|████████▉ | 9700/10845 [35:52<04:08,  4.61it/s, acc=0.754, epoch=38, loss=0.797]

epoch:38, idx:9699/10845, loss:0.7973848257390495, acc:0.7536340206185567


 90%|█████████ | 9801/10845 [36:15<03:46,  4.61it/s, acc=0.754, epoch=38, loss=0.798]

epoch:38, idx:9799/10845, loss:0.7983728963896937, acc:0.753545918367347


 91%|█████████▏| 9901/10845 [36:37<03:22,  4.67it/s, acc=0.753, epoch=38, loss=0.799]

epoch:38, idx:9899/10845, loss:0.7992561899200834, acc:0.7532323232323233


 92%|█████████▏| 10001/10845 [36:59<03:03,  4.61it/s, acc=0.753, epoch=38, loss=0.799]

epoch:38, idx:9999/10845, loss:0.7988607738643885, acc:0.75335


 93%|█████████▎| 10101/10845 [37:21<02:48,  4.40it/s, acc=0.753, epoch=38, loss=0.8]  

epoch:38, idx:10099/10845, loss:0.7997443093787325, acc:0.7531188118811881


 94%|█████████▍| 10201/10845 [37:44<02:26,  4.39it/s, acc=0.753, epoch=38, loss=0.799]

epoch:38, idx:10199/10845, loss:0.7993702017822686, acc:0.7532843137254902


 95%|█████████▍| 10300/10845 [38:06<02:01,  4.47it/s, acc=0.753, epoch=38, loss=0.8]  

epoch:38, idx:10299/10845, loss:0.7998355135321618, acc:0.7531067961165049


 96%|█████████▌| 10400/10845 [38:28<01:38,  4.51it/s, acc=0.753, epoch=38, loss=0.8]  

epoch:38, idx:10399/10845, loss:0.7996781222722852, acc:0.7529567307692308


 97%|█████████▋| 10501/10845 [38:50<01:16,  4.52it/s, acc=0.753, epoch=38, loss=0.799]

epoch:38, idx:10499/10845, loss:0.7993020150746618, acc:0.7531666666666667


 98%|█████████▊| 10601/10845 [39:12<00:54,  4.51it/s, acc=0.753, epoch=38, loss=0.8]  

epoch:38, idx:10599/10845, loss:0.7995666378033611, acc:0.7530188679245283


 99%|█████████▊| 10700/10845 [39:35<00:33,  4.37it/s, acc=0.753, epoch=38, loss=0.8]  

epoch:38, idx:10699/10845, loss:0.7997206512204954, acc:0.7530373831775701


100%|█████████▉| 10800/10845 [39:57<00:09,  4.56it/s, acc=0.753, epoch=38, loss=0.799]

epoch:38, idx:10799/10845, loss:0.799334205640135, acc:0.7531481481481481


100%|██████████| 10845/10845 [40:06<00:00,  6.00it/s, acc=0.753, epoch=38, loss=0.8]  


epoch:38, idx:0/1275, loss:1.5545258522033691, acc:0.5
epoch:38, idx:100/1275, loss:1.466147736747666, acc:0.6460396039603961
epoch:38, idx:200/1275, loss:1.3452007847638865, acc:0.650497512437811
epoch:38, idx:300/1275, loss:1.3102267014227833, acc:0.6586378737541528
epoch:38, idx:400/1275, loss:1.2878415397277794, acc:0.6664588528678305
epoch:38, idx:500/1275, loss:1.252915443774469, acc:0.6706586826347305
epoch:38, idx:600/1275, loss:1.2627596634993339, acc:0.6643094841930116
epoch:38, idx:700/1275, loss:1.2707716008906018, acc:0.6636947218259629
epoch:38, idx:800/1275, loss:1.2885296286715104, acc:0.6632334581772784
epoch:38, idx:900/1275, loss:1.281875308822183, acc:0.6656492785793563
epoch:38, idx:1000/1275, loss:1.288547159432174, acc:0.6640859140859141
epoch:38, idx:1100/1275, loss:1.2717419248835593, acc:0.6673478655767484
epoch:38, idx:1200/1275, loss:1.2681049555167867, acc:0.664654454621149


  1%|          | 101/10845 [00:21<40:46,  4.39it/s, acc=0.767, epoch=39, loss=0.702]

epoch:39, idx:99/10845, loss:0.7072481858730316, acc:0.765


  2%|▏         | 201/10845 [00:44<38:15,  4.64it/s, acc=0.761, epoch=39, loss=0.758]

epoch:39, idx:199/10845, loss:0.7583695805072784, acc:0.76125


  3%|▎         | 300/10845 [01:06<39:04,  4.50it/s, acc=0.758, epoch=39, loss=0.766]

epoch:39, idx:299/10845, loss:0.7656953163941701, acc:0.7583333333333333


  4%|▎         | 401/10845 [01:29<39:40,  4.39it/s, acc=0.756, epoch=39, loss=0.79] 

epoch:39, idx:399/10845, loss:0.7919858968257905, acc:0.755


  5%|▍         | 500/10845 [01:51<36:16,  4.75it/s, acc=0.752, epoch=39, loss=0.785]

epoch:39, idx:499/10845, loss:0.78635622549057, acc:0.752


  6%|▌         | 600/10845 [02:13<37:24,  4.56it/s, acc=0.757, epoch=39, loss=0.762]

epoch:39, idx:599/10845, loss:0.7619901192188263, acc:0.7570833333333333


  6%|▋         | 700/10845 [02:35<37:13,  4.54it/s, acc=0.754, epoch=39, loss=0.771]

epoch:39, idx:699/10845, loss:0.7700779833112444, acc:0.7546428571428572


  7%|▋         | 801/10845 [02:57<36:43,  4.56it/s, acc=0.757, epoch=39, loss=0.765]

epoch:39, idx:799/10845, loss:0.7649173529446125, acc:0.756875


  8%|▊         | 901/10845 [03:20<34:44,  4.77it/s, acc=0.756, epoch=39, loss=0.773]

epoch:39, idx:899/10845, loss:0.7732423108153873, acc:0.7555555555555555


  9%|▉         | 1000/10845 [03:42<36:00,  4.56it/s, acc=0.754, epoch=39, loss=0.788]

epoch:39, idx:999/10845, loss:0.7877515649795532, acc:0.75425


 10%|█         | 1100/10845 [04:04<37:30,  4.33it/s, acc=0.757, epoch=39, loss=0.779]

epoch:39, idx:1099/10845, loss:0.7791039795225316, acc:0.7575


 11%|█         | 1201/10845 [04:27<34:53,  4.61it/s, acc=0.758, epoch=39, loss=0.776]

epoch:39, idx:1199/10845, loss:0.7768705080946287, acc:0.7577083333333333


 12%|█▏        | 1300/10845 [04:49<34:33,  4.60it/s, acc=0.757, epoch=39, loss=0.783]

epoch:39, idx:1299/10845, loss:0.7826014209710634, acc:0.7567307692307692


 13%|█▎        | 1400/10845 [05:11<36:09,  4.35it/s, acc=0.756, epoch=39, loss=0.786]

epoch:39, idx:1399/10845, loss:0.7857539335744722, acc:0.7560714285714286


 14%|█▍        | 1500/10845 [05:33<35:01,  4.45it/s, acc=0.756, epoch=39, loss=0.781]

epoch:39, idx:1499/10845, loss:0.7812991054455439, acc:0.7565


 15%|█▍        | 1600/10845 [05:56<32:59,  4.67it/s, acc=0.756, epoch=39, loss=0.786]

epoch:39, idx:1599/10845, loss:0.7857377828657627, acc:0.75609375


 16%|█▌        | 1700/10845 [06:18<33:01,  4.61it/s, acc=0.755, epoch=39, loss=0.794]

epoch:39, idx:1699/10845, loss:0.7937412311049069, acc:0.755


 17%|█▋        | 1801/10845 [06:40<31:45,  4.75it/s, acc=0.756, epoch=39, loss=0.79] 

epoch:39, idx:1799/10845, loss:0.789905584388309, acc:0.7556944444444444


 18%|█▊        | 1900/10845 [07:02<31:52,  4.68it/s, acc=0.753, epoch=39, loss=0.795]

epoch:39, idx:1899/10845, loss:0.7945970607431312, acc:0.7532894736842105


 18%|█▊        | 2001/10845 [07:25<33:33,  4.39it/s, acc=0.753, epoch=39, loss=0.796]

epoch:39, idx:1999/10845, loss:0.7956118669509887, acc:0.7535


 19%|█▉        | 2100/10845 [07:47<30:15,  4.82it/s, acc=0.752, epoch=39, loss=0.801]

epoch:39, idx:2099/10845, loss:0.8013954914183844, acc:0.751547619047619


 20%|██        | 2200/10845 [08:09<32:31,  4.43it/s, acc=0.752, epoch=39, loss=0.801]

epoch:39, idx:2199/10845, loss:0.8009053633971648, acc:0.7522727272727273


 21%|██        | 2300/10845 [08:32<31:12,  4.56it/s, acc=0.752, epoch=39, loss=0.801]

epoch:39, idx:2299/10845, loss:0.8007139798869257, acc:0.7519565217391304


 22%|██▏       | 2401/10845 [08:54<30:41,  4.59it/s, acc=0.752, epoch=39, loss=0.799]

epoch:39, idx:2399/10845, loss:0.7992400012910366, acc:0.7525


 23%|██▎       | 2500/10845 [09:16<29:53,  4.65it/s, acc=0.754, epoch=39, loss=0.792]

epoch:39, idx:2499/10845, loss:0.792212623167038, acc:0.754


 24%|██▍       | 2600/10845 [09:39<31:49,  4.32it/s, acc=0.753, epoch=39, loss=0.791]

epoch:39, idx:2599/10845, loss:0.7909768943603223, acc:0.7533653846153846


 25%|██▍       | 2700/10845 [10:01<31:38,  4.29it/s, acc=0.754, epoch=39, loss=0.792]

epoch:39, idx:2699/10845, loss:0.7916115648216672, acc:0.7537962962962963


 26%|██▌       | 2801/10845 [10:24<29:19,  4.57it/s, acc=0.754, epoch=39, loss=0.788]

epoch:39, idx:2799/10845, loss:0.7884247368574142, acc:0.754375


 27%|██▋       | 2900/10845 [10:46<28:58,  4.57it/s, acc=0.755, epoch=39, loss=0.789]

epoch:39, idx:2899/10845, loss:0.7885147995784365, acc:0.7548275862068966


 28%|██▊       | 3001/10845 [11:08<27:10,  4.81it/s, acc=0.755, epoch=39, loss=0.789]

epoch:39, idx:2999/10845, loss:0.788671875188748, acc:0.7549166666666667


 29%|██▊       | 3100/10845 [11:31<29:14,  4.42it/s, acc=0.755, epoch=39, loss=0.792]

epoch:39, idx:3099/10845, loss:0.7922598494541261, acc:0.7550806451612904


 30%|██▉       | 3200/10845 [11:53<27:38,  4.61it/s, acc=0.755, epoch=39, loss=0.793]

epoch:39, idx:3199/10845, loss:0.7934588668402285, acc:0.754921875


 30%|███       | 3300/10845 [12:16<28:04,  4.48it/s, acc=0.755, epoch=39, loss=0.794]

epoch:39, idx:3299/10845, loss:0.7939639806657126, acc:0.7547727272727273


 31%|███▏      | 3400/10845 [12:38<28:06,  4.41it/s, acc=0.755, epoch=39, loss=0.795]

epoch:39, idx:3399/10845, loss:0.7953078249009217, acc:0.755


 32%|███▏      | 3500/10845 [13:00<26:26,  4.63it/s, acc=0.754, epoch=39, loss=0.797]

epoch:39, idx:3499/10845, loss:0.797194492655141, acc:0.7542142857142857


 33%|███▎      | 3600/10845 [13:23<27:06,  4.45it/s, acc=0.755, epoch=39, loss=0.792]

epoch:39, idx:3599/10845, loss:0.7923384917858574, acc:0.7553472222222222


 34%|███▍      | 3700/10845 [13:45<25:53,  4.60it/s, acc=0.755, epoch=39, loss=0.793]

epoch:39, idx:3699/10845, loss:0.7932566285213909, acc:0.7547972972972973


 35%|███▌      | 3801/10845 [14:07<24:59,  4.70it/s, acc=0.755, epoch=39, loss=0.792]

epoch:39, idx:3799/10845, loss:0.7919816993490646, acc:0.7552631578947369


 36%|███▌      | 3900/10845 [14:29<26:00,  4.45it/s, acc=0.754, epoch=39, loss=0.797]

epoch:39, idx:3899/10845, loss:0.7965146336570764, acc:0.7544871794871795


 37%|███▋      | 4001/10845 [14:52<23:51,  4.78it/s, acc=0.755, epoch=39, loss=0.795]

epoch:39, idx:3999/10845, loss:0.7946843155100942, acc:0.7550625


 38%|███▊      | 4100/10845 [15:14<25:56,  4.33it/s, acc=0.755, epoch=39, loss=0.796]

epoch:39, idx:4099/10845, loss:0.7962726316582865, acc:0.7548170731707317


 39%|███▊      | 4200/10845 [15:36<24:22,  4.54it/s, acc=0.755, epoch=39, loss=0.796]

epoch:39, idx:4199/10845, loss:0.7956872302506651, acc:0.755


 40%|███▉      | 4301/10845 [15:59<23:11,  4.70it/s, acc=0.755, epoch=39, loss=0.798]

epoch:39, idx:4299/10845, loss:0.7977363097875617, acc:0.7547674418604651


 41%|████      | 4400/10845 [16:21<24:38,  4.36it/s, acc=0.755, epoch=39, loss=0.799]

epoch:39, idx:4399/10845, loss:0.7989806856011803, acc:0.7547727272727273


 41%|████▏     | 4500/10845 [16:43<24:13,  4.37it/s, acc=0.754, epoch=39, loss=0.801]

epoch:39, idx:4499/10845, loss:0.8011910971469349, acc:0.7542222222222222


 42%|████▏     | 4601/10845 [17:06<23:22,  4.45it/s, acc=0.755, epoch=39, loss=0.801]

epoch:39, idx:4599/10845, loss:0.8012612117049487, acc:0.7544565217391305


 43%|████▎     | 4700/10845 [17:27<22:07,  4.63it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:4699/10845, loss:0.8023387052847984, acc:0.7544148936170213


 44%|████▍     | 4801/10845 [17:50<22:23,  4.50it/s, acc=0.755, epoch=39, loss=0.8]  

epoch:39, idx:4799/10845, loss:0.8000238895602524, acc:0.7548958333333333


 45%|████▌     | 4900/10845 [18:12<22:15,  4.45it/s, acc=0.755, epoch=39, loss=0.799]

epoch:39, idx:4899/10845, loss:0.7994602478949391, acc:0.7553061224489795


 46%|████▌     | 5001/10845 [18:35<20:18,  4.80it/s, acc=0.755, epoch=39, loss=0.799]

epoch:39, idx:4999/10845, loss:0.7990713434159755, acc:0.7555


 47%|████▋     | 5100/10845 [18:56<23:08,  4.14it/s, acc=0.756, epoch=39, loss=0.798]

epoch:39, idx:5099/10845, loss:0.7980267938854647, acc:0.7557843137254902


 48%|████▊     | 5200/10845 [19:19<20:23,  4.61it/s, acc=0.756, epoch=39, loss=0.798]

epoch:39, idx:5199/10845, loss:0.7980710097173086, acc:0.755576923076923


 49%|████▉     | 5300/10845 [19:41<21:24,  4.32it/s, acc=0.755, epoch=39, loss=0.799]

epoch:39, idx:5299/10845, loss:0.7994782357339589, acc:0.7549056603773585


 50%|████▉     | 5400/10845 [20:04<19:45,  4.59it/s, acc=0.754, epoch=39, loss=0.803]

epoch:39, idx:5399/10845, loss:0.8033301758269469, acc:0.7540740740740741


 51%|█████     | 5500/10845 [20:26<18:56,  4.70it/s, acc=0.754, epoch=39, loss=0.804]

epoch:39, idx:5499/10845, loss:0.8042249721451239, acc:0.7544090909090909


 52%|█████▏    | 5600/10845 [20:48<20:02,  4.36it/s, acc=0.755, epoch=39, loss=0.802]

epoch:39, idx:5599/10845, loss:0.8023326712687101, acc:0.7548660714285714


 53%|█████▎    | 5700/10845 [21:10<20:53,  4.10it/s, acc=0.755, epoch=39, loss=0.801]

epoch:39, idx:5699/10845, loss:0.8010896782132617, acc:0.7551315789473684


 53%|█████▎    | 5800/10845 [21:33<19:12,  4.38it/s, acc=0.755, epoch=39, loss=0.802]

epoch:39, idx:5799/10845, loss:0.8023764608081044, acc:0.7545258620689655


 54%|█████▍    | 5901/10845 [21:55<18:49,  4.38it/s, acc=0.755, epoch=39, loss=0.802]

epoch:39, idx:5899/10845, loss:0.8020385081535679, acc:0.7546610169491526


 55%|█████▌    | 6001/10845 [22:17<17:08,  4.71it/s, acc=0.755, epoch=39, loss=0.801]

epoch:39, idx:5999/10845, loss:0.8009317895025014, acc:0.7550416666666667


 56%|█████▌    | 6100/10845 [22:39<17:11,  4.60it/s, acc=0.755, epoch=39, loss=0.799]

epoch:39, idx:6099/10845, loss:0.7993047228846394, acc:0.7552459016393442


 57%|█████▋    | 6200/10845 [23:01<16:45,  4.62it/s, acc=0.755, epoch=39, loss=0.8]  

epoch:39, idx:6199/10845, loss:0.8001829689885339, acc:0.7545967741935484


 58%|█████▊    | 6301/10845 [23:24<16:36,  4.56it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:6299/10845, loss:0.8015866664052009, acc:0.7541666666666667


 59%|█████▉    | 6401/10845 [23:46<15:46,  4.70it/s, acc=0.754, epoch=39, loss=0.801]

epoch:39, idx:6399/10845, loss:0.8013048552302644, acc:0.7541796875


 60%|█████▉    | 6500/10845 [24:08<17:22,  4.17it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:6499/10845, loss:0.801799806700303, acc:0.7539230769230769


 61%|██████    | 6600/10845 [24:30<14:53,  4.75it/s, acc=0.754, epoch=39, loss=0.803]

epoch:39, idx:6599/10845, loss:0.8029564330930059, acc:0.75375


 62%|██████▏   | 6700/10845 [24:52<14:10,  4.88it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:6699/10845, loss:0.8017063985787221, acc:0.7539925373134329


 63%|██████▎   | 6800/10845 [25:14<14:09,  4.76it/s, acc=0.754, epoch=39, loss=0.801]

epoch:39, idx:6799/10845, loss:0.8012889831162551, acc:0.7541176470588236


 64%|██████▎   | 6900/10845 [25:36<14:21,  4.58it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:6899/10845, loss:0.8021485700425894, acc:0.7540217391304348


 65%|██████▍   | 7000/10845 [25:58<14:20,  4.47it/s, acc=0.754, epoch=39, loss=0.803]

epoch:39, idx:6999/10845, loss:0.8033253999224731, acc:0.7536785714285714


 65%|██████▌   | 7101/10845 [26:21<13:05,  4.77it/s, acc=0.754, epoch=39, loss=0.803]

epoch:39, idx:7099/10845, loss:0.8031620001918833, acc:0.753556338028169


 66%|██████▋   | 7201/10845 [26:43<13:00,  4.67it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:7199/10845, loss:0.8018971591318647, acc:0.7540277777777777


 67%|██████▋   | 7300/10845 [27:05<12:56,  4.57it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:7299/10845, loss:0.8017137132727936, acc:0.7539383561643835


 68%|██████▊   | 7400/10845 [27:27<12:31,  4.58it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:7399/10845, loss:0.8018150409452013, acc:0.7536824324324324


 69%|██████▉   | 7500/10845 [27:50<11:43,  4.75it/s, acc=0.754, epoch=39, loss=0.801]

epoch:39, idx:7499/10845, loss:0.800756557349364, acc:0.754


 70%|███████   | 7600/10845 [28:12<11:23,  4.75it/s, acc=0.754, epoch=39, loss=0.8]  

epoch:39, idx:7599/10845, loss:0.8004053730517626, acc:0.7543421052631579


 71%|███████   | 7700/10845 [28:35<11:29,  4.56it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:7699/10845, loss:0.8015844353259384, acc:0.7539285714285714


 72%|███████▏  | 7800/10845 [28:57<11:11,  4.53it/s, acc=0.754, epoch=39, loss=0.801]

epoch:39, idx:7799/10845, loss:0.8007573150633237, acc:0.7538461538461538


 73%|███████▎  | 7900/10845 [29:18<10:41,  4.59it/s, acc=0.754, epoch=39, loss=0.801]

epoch:39, idx:7899/10845, loss:0.8011477650381342, acc:0.7542721518987342


 74%|███████▍  | 8000/10845 [29:40<10:28,  4.52it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:7999/10845, loss:0.8019333357922733, acc:0.75396875


 75%|███████▍  | 8101/10845 [30:03<09:53,  4.62it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:8099/10845, loss:0.8020376530878338, acc:0.7539814814814815


 76%|███████▌  | 8200/10845 [30:25<10:19,  4.27it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:8199/10845, loss:0.8018623749984474, acc:0.75375


 77%|███████▋  | 8301/10845 [30:48<09:27,  4.48it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:8299/10845, loss:0.8016021005970886, acc:0.7537650602409639


 77%|███████▋  | 8400/10845 [31:10<09:26,  4.32it/s, acc=0.754, epoch=39, loss=0.802]

epoch:39, idx:8399/10845, loss:0.8015252626261541, acc:0.7539583333333333


 78%|███████▊  | 8500/10845 [31:32<08:55,  4.38it/s, acc=0.754, epoch=39, loss=0.8]  

epoch:39, idx:8499/10845, loss:0.8004146050600445, acc:0.754


 79%|███████▉  | 8600/10845 [31:54<08:27,  4.42it/s, acc=0.754, epoch=39, loss=0.8]  

epoch:39, idx:8599/10845, loss:0.8003874438481275, acc:0.753953488372093


 80%|████████  | 8700/10845 [32:16<07:42,  4.64it/s, acc=0.754, epoch=39, loss=0.799]

epoch:39, idx:8699/10845, loss:0.7993641440107905, acc:0.7541954022988506


 81%|████████  | 8801/10845 [32:38<07:34,  4.50it/s, acc=0.754, epoch=39, loss=0.801]

epoch:39, idx:8799/10845, loss:0.8006615093519742, acc:0.7538920454545455


 82%|████████▏ | 8900/10845 [33:00<06:59,  4.63it/s, acc=0.754, epoch=39, loss=0.801]

epoch:39, idx:8899/10845, loss:0.8009783041242803, acc:0.753623595505618


 83%|████████▎ | 9000/10845 [33:23<06:53,  4.46it/s, acc=0.753, epoch=39, loss=0.801]

epoch:39, idx:8999/10845, loss:0.800979446825054, acc:0.7534444444444445


 84%|████████▍ | 9100/10845 [33:45<06:30,  4.46it/s, acc=0.753, epoch=39, loss=0.801]

epoch:39, idx:9099/10845, loss:0.8014958360463709, acc:0.7532967032967033


 85%|████████▍ | 9201/10845 [34:08<06:03,  4.52it/s, acc=0.754, epoch=39, loss=0.801]

epoch:39, idx:9199/10845, loss:0.8008422596849825, acc:0.7535054347826087


 86%|████████▌ | 9301/10845 [34:30<05:32,  4.64it/s, acc=0.754, epoch=39, loss=0.8]  

epoch:39, idx:9299/10845, loss:0.8003405092512408, acc:0.753736559139785


 87%|████████▋ | 9400/10845 [34:52<05:06,  4.72it/s, acc=0.753, epoch=39, loss=0.801]

epoch:39, idx:9399/10845, loss:0.8011585742266888, acc:0.7534042553191489


 88%|████████▊ | 9500/10845 [35:14<05:03,  4.44it/s, acc=0.753, epoch=39, loss=0.801]

epoch:39, idx:9499/10845, loss:0.8014970314659571, acc:0.7533947368421052


 89%|████████▊ | 9600/10845 [35:36<04:42,  4.41it/s, acc=0.754, epoch=39, loss=0.8]  

epoch:39, idx:9599/10845, loss:0.8003185711459567, acc:0.753671875


 89%|████████▉ | 9701/10845 [35:59<04:10,  4.56it/s, acc=0.754, epoch=39, loss=0.801]

epoch:39, idx:9699/10845, loss:0.8006146788504935, acc:0.7537113402061856


 90%|█████████ | 9800/10845 [36:21<03:50,  4.53it/s, acc=0.754, epoch=39, loss=0.8]  

epoch:39, idx:9799/10845, loss:0.7998806965138231, acc:0.7537755102040816


 91%|█████████▏| 9901/10845 [36:43<03:20,  4.72it/s, acc=0.754, epoch=39, loss=0.799]

epoch:39, idx:9899/10845, loss:0.7991204239292579, acc:0.7539393939393939


 92%|█████████▏| 10001/10845 [37:05<02:57,  4.77it/s, acc=0.754, epoch=39, loss=0.799]

epoch:39, idx:9999/10845, loss:0.7988545460492372, acc:0.754025


 93%|█████████▎| 10100/10845 [37:28<02:48,  4.43it/s, acc=0.754, epoch=39, loss=0.8]  

epoch:39, idx:10099/10845, loss:0.7997026483286725, acc:0.7536633663366337


 94%|█████████▍| 10201/10845 [37:50<02:18,  4.66it/s, acc=0.754, epoch=39, loss=0.8]

epoch:39, idx:10199/10845, loss:0.7996222042131658, acc:0.7537009803921568


 95%|█████████▍| 10300/10845 [38:12<02:01,  4.50it/s, acc=0.754, epoch=39, loss=0.799]

epoch:39, idx:10299/10845, loss:0.7986221496948918, acc:0.7540776699029126


 96%|█████████▌| 10401/10845 [38:35<01:37,  4.55it/s, acc=0.754, epoch=39, loss=0.799]

epoch:39, idx:10399/10845, loss:0.7985253569512413, acc:0.7539423076923077


 97%|█████████▋| 10501/10845 [38:57<01:17,  4.46it/s, acc=0.754, epoch=39, loss=0.798]

epoch:39, idx:10499/10845, loss:0.7980004354289599, acc:0.7543095238095238


 98%|█████████▊| 10601/10845 [39:20<00:54,  4.48it/s, acc=0.754, epoch=39, loss=0.797]

epoch:39, idx:10599/10845, loss:0.7973075208433394, acc:0.7544339622641509


 99%|█████████▊| 10700/10845 [39:42<00:32,  4.46it/s, acc=0.754, epoch=39, loss=0.798]

epoch:39, idx:10699/10845, loss:0.7978203744726761, acc:0.7544392523364486


100%|█████████▉| 10800/10845 [40:04<00:10,  4.46it/s, acc=0.754, epoch=39, loss=0.798]

epoch:39, idx:10799/10845, loss:0.7985107013776347, acc:0.7540972222222222


100%|██████████| 10845/10845 [40:14<00:00,  4.73it/s, acc=0.754, epoch=39, loss=0.799]


epoch:39, idx:0/1275, loss:1.6141934394836426, acc:0.5
epoch:39, idx:100/1275, loss:1.4815248758486002, acc:0.6386138613861386
epoch:39, idx:200/1275, loss:1.338501768325692, acc:0.6480099502487562
epoch:39, idx:300/1275, loss:1.3019839022246706, acc:0.6578073089700996
epoch:39, idx:400/1275, loss:1.2799506472827789, acc:0.6670822942643392
epoch:39, idx:500/1275, loss:1.2464608389460399, acc:0.6721556886227545
epoch:39, idx:600/1275, loss:1.2597115263962706, acc:0.6647254575707154
epoch:39, idx:700/1275, loss:1.2697591793520133, acc:0.6644079885877318
epoch:39, idx:800/1275, loss:1.2882450186506789, acc:0.6638576779026217
epoch:39, idx:900/1275, loss:1.2792437887615158, acc:0.6667591564927858
epoch:39, idx:1000/1275, loss:1.2877399398611262, acc:0.6648351648351648
epoch:39, idx:1100/1275, loss:1.271047337286912, acc:0.667574931880109
epoch:39, idx:1200/1275, loss:1.2659306372333625, acc:0.6656952539550375


  1%|          | 101/10845 [00:22<40:24,  4.43it/s, acc=0.748, epoch=40, loss=0.817]

epoch:40, idx:99/10845, loss:0.8225990533828735, acc:0.745


  2%|▏         | 200/10845 [00:44<38:24,  4.62it/s, acc=0.769, epoch=40, loss=0.745]

epoch:40, idx:199/10845, loss:0.744556114077568, acc:0.76875


  3%|▎         | 300/10845 [01:06<39:18,  4.47it/s, acc=0.772, epoch=40, loss=0.75] 

epoch:40, idx:299/10845, loss:0.7496159636974334, acc:0.7725


  4%|▎         | 401/10845 [01:29<39:08,  4.45it/s, acc=0.767, epoch=40, loss=0.765]

epoch:40, idx:399/10845, loss:0.7669608347117901, acc:0.76625


  5%|▍         | 500/10845 [01:50<37:51,  4.55it/s, acc=0.765, epoch=40, loss=0.759]

epoch:40, idx:499/10845, loss:0.7592347263097763, acc:0.7655


  6%|▌         | 600/10845 [02:12<36:25,  4.69it/s, acc=0.765, epoch=40, loss=0.745]

epoch:40, idx:599/10845, loss:0.7448217271765073, acc:0.7645833333333333


  6%|▋         | 700/10845 [02:35<37:43,  4.48it/s, acc=0.758, epoch=40, loss=0.764]

epoch:40, idx:699/10845, loss:0.7642822546618325, acc:0.7582142857142857


  7%|▋         | 800/10845 [02:57<39:18,  4.26it/s, acc=0.757, epoch=40, loss=0.783]

epoch:40, idx:799/10845, loss:0.7824532553553581, acc:0.756875


  8%|▊         | 900/10845 [03:20<35:07,  4.72it/s, acc=0.754, epoch=40, loss=0.798]

epoch:40, idx:899/10845, loss:0.7972523630989923, acc:0.7538888888888889


  9%|▉         | 1001/10845 [03:42<36:47,  4.46it/s, acc=0.753, epoch=40, loss=0.799]

epoch:40, idx:999/10845, loss:0.7988576411008835, acc:0.75375


 10%|█         | 1101/10845 [04:04<35:38,  4.56it/s, acc=0.757, epoch=40, loss=0.785]

epoch:40, idx:1099/10845, loss:0.7858880019187927, acc:0.7568181818181818


 11%|█         | 1201/10845 [04:26<34:14,  4.69it/s, acc=0.755, epoch=40, loss=0.787]

epoch:40, idx:1199/10845, loss:0.7871308949589729, acc:0.755


 12%|█▏        | 1300/10845 [04:48<34:27,  4.62it/s, acc=0.754, epoch=40, loss=0.79] 

epoch:40, idx:1299/10845, loss:0.7901701046870305, acc:0.7538461538461538


 13%|█▎        | 1400/10845 [05:10<34:21,  4.58it/s, acc=0.756, epoch=40, loss=0.783]

epoch:40, idx:1399/10845, loss:0.7831023976206779, acc:0.7557142857142857


 14%|█▍        | 1501/10845 [05:33<34:56,  4.46it/s, acc=0.755, epoch=40, loss=0.788]

epoch:40, idx:1499/10845, loss:0.7881336710055669, acc:0.7546666666666667


 15%|█▍        | 1601/10845 [05:55<34:10,  4.51it/s, acc=0.756, epoch=40, loss=0.787]

epoch:40, idx:1599/10845, loss:0.7871293577179312, acc:0.75578125


 16%|█▌        | 1700/10845 [06:17<33:12,  4.59it/s, acc=0.755, epoch=40, loss=0.79] 

epoch:40, idx:1699/10845, loss:0.7903480276290108, acc:0.7548529411764706


 17%|█▋        | 1801/10845 [06:40<33:17,  4.53it/s, acc=0.755, epoch=40, loss=0.786]

epoch:40, idx:1799/10845, loss:0.7857828572392463, acc:0.7554166666666666


 18%|█▊        | 1900/10845 [07:02<33:23,  4.47it/s, acc=0.757, epoch=40, loss=0.786]

epoch:40, idx:1899/10845, loss:0.7849077715058076, acc:0.7569736842105264


 18%|█▊        | 2001/10845 [07:24<32:26,  4.54it/s, acc=0.756, epoch=40, loss=0.787]

epoch:40, idx:1999/10845, loss:0.787124840438366, acc:0.75575


 19%|█▉        | 2100/10845 [07:46<32:19,  4.51it/s, acc=0.754, epoch=40, loss=0.795]

epoch:40, idx:2099/10845, loss:0.7954992422603425, acc:0.7535714285714286


 20%|██        | 2201/10845 [08:09<32:47,  4.39it/s, acc=0.754, epoch=40, loss=0.794]

epoch:40, idx:2199/10845, loss:0.7934452093731273, acc:0.7544318181818181


 21%|██        | 2300/10845 [08:30<31:34,  4.51it/s, acc=0.753, epoch=40, loss=0.796]

epoch:40, idx:2299/10845, loss:0.796424933568291, acc:0.7528260869565218


 22%|██▏       | 2401/10845 [08:53<30:42,  4.58it/s, acc=0.753, epoch=40, loss=0.797]

epoch:40, idx:2399/10845, loss:0.7971441999077797, acc:0.7529166666666667


 23%|██▎       | 2500/10845 [09:15<31:07,  4.47it/s, acc=0.754, epoch=40, loss=0.793]

epoch:40, idx:2499/10845, loss:0.7926069654941559, acc:0.7545


 24%|██▍       | 2601/10845 [09:38<29:02,  4.73it/s, acc=0.754, epoch=40, loss=0.795]

epoch:40, idx:2599/10845, loss:0.7947143409573115, acc:0.75375


 25%|██▍       | 2700/10845 [10:00<31:03,  4.37it/s, acc=0.755, epoch=40, loss=0.792]

epoch:40, idx:2699/10845, loss:0.7919261063249022, acc:0.7549074074074074


 26%|██▌       | 2801/10845 [10:23<28:34,  4.69it/s, acc=0.754, epoch=40, loss=0.793]

epoch:40, idx:2799/10845, loss:0.792923008693116, acc:0.7544642857142857


 27%|██▋       | 2900/10845 [10:45<29:21,  4.51it/s, acc=0.754, epoch=40, loss=0.793]

epoch:40, idx:2899/10845, loss:0.793245713279165, acc:0.7542241379310345


 28%|██▊       | 3000/10845 [11:07<29:49,  4.38it/s, acc=0.754, epoch=40, loss=0.791]

epoch:40, idx:2999/10845, loss:0.7908963929613432, acc:0.7540833333333333


 29%|██▊       | 3100/10845 [11:29<26:23,  4.89it/s, acc=0.755, epoch=40, loss=0.792]

epoch:40, idx:3099/10845, loss:0.792210225116822, acc:0.7545161290322581


 30%|██▉       | 3200/10845 [11:51<29:29,  4.32it/s, acc=0.755, epoch=40, loss=0.792]

epoch:40, idx:3199/10845, loss:0.7919970452226699, acc:0.7553125


 30%|███       | 3300/10845 [12:13<27:34,  4.56it/s, acc=0.754, epoch=40, loss=0.797]

epoch:40, idx:3299/10845, loss:0.7967297646313003, acc:0.7537878787878788


 31%|███▏      | 3400/10845 [12:35<27:01,  4.59it/s, acc=0.753, epoch=40, loss=0.797]

epoch:40, idx:3399/10845, loss:0.7970911693748306, acc:0.7531617647058824


 32%|███▏      | 3501/10845 [12:57<27:35,  4.44it/s, acc=0.754, epoch=40, loss=0.795]

epoch:40, idx:3499/10845, loss:0.7950904810598919, acc:0.7537142857142857


 33%|███▎      | 3601/10845 [13:20<24:59,  4.83it/s, acc=0.754, epoch=40, loss=0.796]

epoch:40, idx:3599/10845, loss:0.795974906053808, acc:0.7539583333333333


 34%|███▍      | 3700/10845 [13:42<24:31,  4.85it/s, acc=0.754, epoch=40, loss=0.798]

epoch:40, idx:3699/10845, loss:0.79768960997865, acc:0.7543243243243243


 35%|███▌      | 3801/10845 [14:04<24:30,  4.79it/s, acc=0.755, epoch=40, loss=0.795]

epoch:40, idx:3799/10845, loss:0.7946005688842974, acc:0.754671052631579


 36%|███▌      | 3900/10845 [14:26<25:22,  4.56it/s, acc=0.754, epoch=40, loss=0.797]

epoch:40, idx:3899/10845, loss:0.7973935282841706, acc:0.7541025641025642


 37%|███▋      | 4000/10845 [14:48<26:21,  4.33it/s, acc=0.754, epoch=40, loss=0.797]

epoch:40, idx:3999/10845, loss:0.7970017147958278, acc:0.754125


 38%|███▊      | 4100/10845 [15:11<25:00,  4.50it/s, acc=0.755, epoch=40, loss=0.796]

epoch:40, idx:4099/10845, loss:0.7963991835059189, acc:0.7548170731707317


 39%|███▊      | 4200/10845 [15:33<25:01,  4.43it/s, acc=0.756, epoch=40, loss=0.793]

epoch:40, idx:4199/10845, loss:0.7934805106832867, acc:0.7555952380952381


 40%|███▉      | 4300/10845 [15:55<24:05,  4.53it/s, acc=0.756, epoch=40, loss=0.791]

epoch:40, idx:4299/10845, loss:0.7910796325983003, acc:0.7560465116279069


 41%|████      | 4401/10845 [16:18<22:58,  4.67it/s, acc=0.756, epoch=40, loss=0.791]

epoch:40, idx:4399/10845, loss:0.7907621255381541, acc:0.7561931818181818


 42%|████▏     | 4501/10845 [16:40<22:55,  4.61it/s, acc=0.756, epoch=40, loss=0.791]

epoch:40, idx:4499/10845, loss:0.7905772257248561, acc:0.7563888888888889


 42%|████▏     | 4601/10845 [17:02<23:14,  4.48it/s, acc=0.756, epoch=40, loss=0.791]

epoch:40, idx:4599/10845, loss:0.7912581399601438, acc:0.7563043478260869


 43%|████▎     | 4701/10845 [17:24<21:55,  4.67it/s, acc=0.757, epoch=40, loss=0.79] 

epoch:40, idx:4699/10845, loss:0.790033881195048, acc:0.7568617021276596


 44%|████▍     | 4800/10845 [17:46<22:41,  4.44it/s, acc=0.756, epoch=40, loss=0.793]

epoch:40, idx:4799/10845, loss:0.7933014808471004, acc:0.75625


 45%|████▌     | 4900/10845 [18:09<22:29,  4.41it/s, acc=0.756, epoch=40, loss=0.792]

epoch:40, idx:4899/10845, loss:0.7921427970759722, acc:0.7564285714285715


 46%|████▌     | 5000/10845 [18:31<22:03,  4.42it/s, acc=0.757, epoch=40, loss=0.792]

epoch:40, idx:4999/10845, loss:0.7916000710964203, acc:0.75685


 47%|████▋     | 5100/10845 [18:53<21:27,  4.46it/s, acc=0.757, epoch=40, loss=0.791]

epoch:40, idx:5099/10845, loss:0.7914300001368804, acc:0.7566176470588235


 48%|████▊     | 5201/10845 [19:15<20:11,  4.66it/s, acc=0.756, epoch=40, loss=0.793]

epoch:40, idx:5199/10845, loss:0.7930068474778762, acc:0.7561538461538462


 49%|████▉     | 5300/10845 [19:37<21:08,  4.37it/s, acc=0.756, epoch=40, loss=0.793]

epoch:40, idx:5299/10845, loss:0.7927951464563046, acc:0.7563207547169811


 50%|████▉     | 5400/10845 [20:00<20:10,  4.50it/s, acc=0.756, epoch=40, loss=0.794]

epoch:40, idx:5399/10845, loss:0.7936095828700949, acc:0.7558796296296296


 51%|█████     | 5500/10845 [20:22<18:49,  4.73it/s, acc=0.756, epoch=40, loss=0.793]

epoch:40, idx:5499/10845, loss:0.7925795428427783, acc:0.7562727272727273


 52%|█████▏    | 5600/10845 [20:44<20:46,  4.21it/s, acc=0.756, epoch=40, loss=0.793]

epoch:40, idx:5599/10845, loss:0.7933079451216118, acc:0.7562053571428572


 53%|█████▎    | 5700/10845 [21:07<18:45,  4.57it/s, acc=0.756, epoch=40, loss=0.793]

epoch:40, idx:5699/10845, loss:0.7934511934142364, acc:0.7563157894736842


 53%|█████▎    | 5800/10845 [21:29<18:06,  4.64it/s, acc=0.757, epoch=40, loss=0.791]

epoch:40, idx:5799/10845, loss:0.7908513383310417, acc:0.7570689655172413


 54%|█████▍    | 5900/10845 [21:51<20:18,  4.06it/s, acc=0.757, epoch=40, loss=0.793]

epoch:40, idx:5899/10845, loss:0.7934600923121986, acc:0.7565677966101695


 55%|█████▌    | 6000/10845 [22:14<17:46,  4.55it/s, acc=0.757, epoch=40, loss=0.793]

epoch:40, idx:5999/10845, loss:0.7931660983264446, acc:0.75675


 56%|█████▋    | 6101/10845 [22:36<17:23,  4.55it/s, acc=0.757, epoch=40, loss=0.791]

epoch:40, idx:6099/10845, loss:0.7914743212305132, acc:0.7572540983606557


 57%|█████▋    | 6200/10845 [22:58<17:02,  4.54it/s, acc=0.757, epoch=40, loss=0.792]

epoch:40, idx:6199/10845, loss:0.7923610131490615, acc:0.7570564516129032


 58%|█████▊    | 6300/10845 [23:20<15:51,  4.78it/s, acc=0.757, epoch=40, loss=0.791]

epoch:40, idx:6299/10845, loss:0.7910239449852989, acc:0.7573412698412698


 59%|█████▉    | 6401/10845 [23:42<15:32,  4.77it/s, acc=0.757, epoch=40, loss=0.792]

epoch:40, idx:6399/10845, loss:0.7921926504652947, acc:0.7570703125


 60%|█████▉    | 6500/10845 [24:04<16:43,  4.33it/s, acc=0.757, epoch=40, loss=0.793]

epoch:40, idx:6499/10845, loss:0.7934567290727909, acc:0.756576923076923


 61%|██████    | 6601/10845 [24:26<16:13,  4.36it/s, acc=0.757, epoch=40, loss=0.793]

epoch:40, idx:6599/10845, loss:0.7926432821154594, acc:0.7565909090909091


 62%|██████▏   | 6700/10845 [24:48<15:47,  4.37it/s, acc=0.756, epoch=40, loss=0.794]

epoch:40, idx:6699/10845, loss:0.7937986027393769, acc:0.7558955223880597


 63%|██████▎   | 6801/10845 [25:10<14:32,  4.63it/s, acc=0.756, epoch=40, loss=0.793]

epoch:40, idx:6799/10845, loss:0.7927399908795076, acc:0.7561397058823529


 64%|██████▎   | 6901/10845 [25:33<15:04,  4.36it/s, acc=0.756, epoch=40, loss=0.794]

epoch:40, idx:6899/10845, loss:0.7944096896959387, acc:0.7556159420289855


 65%|██████▍   | 7000/10845 [25:55<15:16,  4.19it/s, acc=0.756, epoch=40, loss=0.795]

epoch:40, idx:6999/10845, loss:0.7948650475740433, acc:0.7559285714285714


 65%|██████▌   | 7101/10845 [26:17<13:34,  4.60it/s, acc=0.756, epoch=40, loss=0.794]

epoch:40, idx:7099/10845, loss:0.7938640837266412, acc:0.756443661971831


 66%|██████▋   | 7200/10845 [26:39<13:36,  4.47it/s, acc=0.757, epoch=40, loss=0.794]

epoch:40, idx:7199/10845, loss:0.7938913979795243, acc:0.7565625


 67%|██████▋   | 7301/10845 [27:01<12:25,  4.75it/s, acc=0.756, epoch=40, loss=0.794]

epoch:40, idx:7299/10845, loss:0.7943654276900095, acc:0.7561301369863014


 68%|██████▊   | 7400/10845 [27:23<12:12,  4.70it/s, acc=0.756, epoch=40, loss=0.794]

epoch:40, idx:7399/10845, loss:0.7936464368652653, acc:0.75625


 69%|██████▉   | 7500/10845 [27:45<12:51,  4.33it/s, acc=0.756, epoch=40, loss=0.794]

epoch:40, idx:7499/10845, loss:0.7944689122835795, acc:0.7564


 70%|███████   | 7600/10845 [28:07<11:19,  4.78it/s, acc=0.756, epoch=40, loss=0.795]

epoch:40, idx:7599/10845, loss:0.7954971387511806, acc:0.755953947368421


 71%|███████   | 7700/10845 [28:29<11:55,  4.40it/s, acc=0.756, epoch=40, loss=0.794]

epoch:40, idx:7699/10845, loss:0.7941895513410692, acc:0.7564285714285715


 72%|███████▏  | 7800/10845 [28:51<12:06,  4.19it/s, acc=0.756, epoch=40, loss=0.796]

epoch:40, idx:7799/10845, loss:0.7959190049079748, acc:0.7560897435897436


 73%|███████▎  | 7901/10845 [29:14<10:56,  4.49it/s, acc=0.756, epoch=40, loss=0.797]

epoch:40, idx:7899/10845, loss:0.7966743276089052, acc:0.7559810126582278


 74%|███████▍  | 8000/10845 [29:35<10:17,  4.61it/s, acc=0.756, epoch=40, loss=0.796]

epoch:40, idx:7999/10845, loss:0.7962399055510759, acc:0.7563125


 75%|███████▍  | 8100/10845 [29:57<10:35,  4.32it/s, acc=0.756, epoch=40, loss=0.797]

epoch:40, idx:8099/10845, loss:0.7968619350592295, acc:0.7562037037037037


 76%|███████▌  | 8200/10845 [30:19<10:05,  4.36it/s, acc=0.756, epoch=40, loss=0.796]

epoch:40, idx:8199/10845, loss:0.796379263168428, acc:0.7561585365853658


 77%|███████▋  | 8300/10845 [30:41<09:27,  4.48it/s, acc=0.756, epoch=40, loss=0.797]

epoch:40, idx:8299/10845, loss:0.7965552082119218, acc:0.756144578313253


 77%|███████▋  | 8401/10845 [31:03<08:32,  4.76it/s, acc=0.756, epoch=40, loss=0.798]

epoch:40, idx:8399/10845, loss:0.7978789544389362, acc:0.7560119047619047


 78%|███████▊  | 8500/10845 [31:26<08:31,  4.59it/s, acc=0.756, epoch=40, loss=0.799]

epoch:40, idx:8499/10845, loss:0.79927011625907, acc:0.7556176470588235


 79%|███████▉  | 8600/10845 [31:48<08:01,  4.66it/s, acc=0.756, epoch=40, loss=0.799]

epoch:40, idx:8599/10845, loss:0.799336580148963, acc:0.7555523255813954


 80%|████████  | 8701/10845 [32:10<08:00,  4.46it/s, acc=0.756, epoch=40, loss=0.798]

epoch:40, idx:8699/10845, loss:0.7981508776442758, acc:0.7558908045977012


 81%|████████  | 8800/10845 [32:32<07:38,  4.46it/s, acc=0.756, epoch=40, loss=0.797]

epoch:40, idx:8799/10845, loss:0.7968594145503911, acc:0.7560511363636364


 82%|████████▏ | 8900/10845 [32:54<07:45,  4.18it/s, acc=0.756, epoch=40, loss=0.796]

epoch:40, idx:8899/10845, loss:0.796198270615567, acc:0.7563202247191011


 83%|████████▎ | 9000/10845 [33:17<07:01,  4.38it/s, acc=0.756, epoch=40, loss=0.796]

epoch:40, idx:8999/10845, loss:0.7963066186110178, acc:0.7563611111111112


 84%|████████▍ | 9100/10845 [33:39<06:05,  4.78it/s, acc=0.757, epoch=40, loss=0.796]

epoch:40, idx:9099/10845, loss:0.7960461089571754, acc:0.7567582417582418


 85%|████████▍ | 9201/10845 [34:01<05:53,  4.65it/s, acc=0.757, epoch=40, loss=0.796]

epoch:40, idx:9199/10845, loss:0.796346207798823, acc:0.756820652173913


 86%|████████▌ | 9300/10845 [34:23<06:05,  4.23it/s, acc=0.757, epoch=40, loss=0.796]

epoch:40, idx:9299/10845, loss:0.7961799237740937, acc:0.7569086021505377


 87%|████████▋ | 9400/10845 [34:46<05:37,  4.29it/s, acc=0.757, epoch=40, loss=0.796]

epoch:40, idx:9399/10845, loss:0.7964081999279083, acc:0.7566755319148936


 88%|████████▊ | 9501/10845 [35:08<04:57,  4.51it/s, acc=0.757, epoch=40, loss=0.797]

epoch:40, idx:9499/10845, loss:0.7969501681767012, acc:0.7566315789473684


 89%|████████▊ | 9600/10845 [35:30<04:38,  4.47it/s, acc=0.757, epoch=40, loss=0.797]

epoch:40, idx:9599/10845, loss:0.7965693544285993, acc:0.7567447916666666


 89%|████████▉ | 9700/10845 [35:53<04:20,  4.40it/s, acc=0.757, epoch=40, loss=0.796]

epoch:40, idx:9699/10845, loss:0.7961922788435651, acc:0.7568298969072165


 90%|█████████ | 9801/10845 [36:15<03:48,  4.58it/s, acc=0.757, epoch=40, loss=0.796]

epoch:40, idx:9799/10845, loss:0.7962215950963448, acc:0.7570663265306122


 91%|█████████▏| 9900/10845 [36:37<03:27,  4.56it/s, acc=0.757, epoch=40, loss=0.795]

epoch:40, idx:9899/10845, loss:0.794852551360323, acc:0.7575


 92%|█████████▏| 10001/10845 [36:59<03:08,  4.47it/s, acc=0.757, epoch=40, loss=0.794]

epoch:40, idx:9999/10845, loss:0.7940939291477204, acc:0.757475


 93%|█████████▎| 10100/10845 [37:21<02:45,  4.51it/s, acc=0.757, epoch=40, loss=0.794]

epoch:40, idx:10099/10845, loss:0.7942527069195663, acc:0.7573514851485148


 94%|█████████▍| 10201/10845 [37:44<02:18,  4.67it/s, acc=0.757, epoch=40, loss=0.796]

epoch:40, idx:10199/10845, loss:0.7955905895139657, acc:0.7569607843137255


 95%|█████████▍| 10300/10845 [38:05<01:58,  4.60it/s, acc=0.757, epoch=40, loss=0.795]

epoch:40, idx:10299/10845, loss:0.7952502911762126, acc:0.7570388349514563


 96%|█████████▌| 10400/10845 [38:28<01:40,  4.44it/s, acc=0.757, epoch=40, loss=0.795]

epoch:40, idx:10399/10845, loss:0.7954444741858886, acc:0.7571394230769231


 97%|█████████▋| 10500/10845 [38:50<01:12,  4.79it/s, acc=0.757, epoch=40, loss=0.795]

epoch:40, idx:10499/10845, loss:0.7950394298122043, acc:0.757095238095238


 98%|█████████▊| 10600/10845 [39:12<00:51,  4.72it/s, acc=0.757, epoch=40, loss=0.794]

epoch:40, idx:10599/10845, loss:0.7942393007031027, acc:0.7573349056603773


 99%|█████████▊| 10700/10845 [39:34<00:30,  4.76it/s, acc=0.757, epoch=40, loss=0.794]

epoch:40, idx:10699/10845, loss:0.7942535724483918, acc:0.7572663551401869


100%|█████████▉| 10800/10845 [39:55<00:09,  4.64it/s, acc=0.757, epoch=40, loss=0.794]

epoch:40, idx:10799/10845, loss:0.7943961079694607, acc:0.7573148148148148


100%|██████████| 10845/10845 [40:05<00:00,  4.76it/s, acc=0.757, epoch=40, loss=0.795]


epoch:40, idx:0/1275, loss:1.553248405456543, acc:0.5
epoch:40, idx:100/1275, loss:1.510820433644965, acc:0.6336633663366337
epoch:40, idx:200/1275, loss:1.3653569316389549, acc:0.6455223880597015
epoch:40, idx:300/1275, loss:1.3248362157035904, acc:0.6553156146179402
epoch:40, idx:400/1275, loss:1.2989978983515218, acc:0.6652119700748129
epoch:40, idx:500/1275, loss:1.2609567049734607, acc:0.6671656686626747
epoch:40, idx:600/1275, loss:1.2725475364040812, acc:0.6613976705490848
epoch:40, idx:700/1275, loss:1.28135100768058, acc:0.6615549215406562
epoch:40, idx:800/1275, loss:1.2993750411472964, acc:0.6601123595505618
epoch:40, idx:900/1275, loss:1.291010427157437, acc:0.6628745837957825
epoch:40, idx:1000/1275, loss:1.3001012366253895, acc:0.6610889110889111
epoch:40, idx:1100/1275, loss:1.2818217873031934, acc:0.6650772025431426
epoch:40, idx:1200/1275, loss:1.2769511658186519, acc:0.6634054954204829


  1%|          | 100/10845 [00:22<42:16,  4.24it/s, acc=0.782, epoch=41, loss=0.692]

epoch:41, idx:99/10845, loss:0.6922022461891174, acc:0.7825


  2%|▏         | 200/10845 [00:44<38:23,  4.62it/s, acc=0.759, epoch=41, loss=0.768]

epoch:41, idx:199/10845, loss:0.7682482627034187, acc:0.75875


  3%|▎         | 301/10845 [01:07<37:49,  4.65it/s, acc=0.782, epoch=41, loss=0.709]

epoch:41, idx:299/10845, loss:0.7100768289963404, acc:0.7816666666666666


  4%|▎         | 400/10845 [01:29<37:59,  4.58it/s, acc=0.764, epoch=41, loss=0.744]

epoch:41, idx:399/10845, loss:0.7456922556459904, acc:0.763125


  5%|▍         | 501/10845 [01:51<36:09,  4.77it/s, acc=0.77, epoch=41, loss=0.738] 

epoch:41, idx:499/10845, loss:0.7392267147302628, acc:0.77


  6%|▌         | 600/10845 [02:13<36:49,  4.64it/s, acc=0.77, epoch=41, loss=0.741] 

epoch:41, idx:599/10845, loss:0.7411941680312156, acc:0.7695833333333333


  6%|▋         | 700/10845 [02:35<37:17,  4.53it/s, acc=0.766, epoch=41, loss=0.759]

epoch:41, idx:699/10845, loss:0.7585812612090792, acc:0.7657142857142857


  7%|▋         | 801/10845 [02:58<36:54,  4.54it/s, acc=0.763, epoch=41, loss=0.779]

epoch:41, idx:799/10845, loss:0.779633231535554, acc:0.7628125


  8%|▊         | 900/10845 [03:20<35:49,  4.63it/s, acc=0.759, epoch=41, loss=0.794]

epoch:41, idx:899/10845, loss:0.7939019934998618, acc:0.7594444444444445


  9%|▉         | 1000/10845 [03:42<37:47,  4.34it/s, acc=0.755, epoch=41, loss=0.802]

epoch:41, idx:999/10845, loss:0.8023988108038902, acc:0.75525


 10%|█         | 1101/10845 [04:05<34:11,  4.75it/s, acc=0.754, epoch=41, loss=0.803]

epoch:41, idx:1099/10845, loss:0.8031506856463172, acc:0.7543181818181818


 11%|█         | 1200/10845 [04:27<33:43,  4.77it/s, acc=0.755, epoch=41, loss=0.8]  

epoch:41, idx:1199/10845, loss:0.8002471054097017, acc:0.7545833333333334


 12%|█▏        | 1301/10845 [04:49<34:35,  4.60it/s, acc=0.753, epoch=41, loss=0.802]

epoch:41, idx:1299/10845, loss:0.8023265320979632, acc:0.7534615384615385


 13%|█▎        | 1400/10845 [05:11<36:30,  4.31it/s, acc=0.754, epoch=41, loss=0.802]

epoch:41, idx:1399/10845, loss:0.8018685661894934, acc:0.7535714285714286


 14%|█▍        | 1500/10845 [05:33<34:50,  4.47it/s, acc=0.753, epoch=41, loss=0.803]

epoch:41, idx:1499/10845, loss:0.8030375841061275, acc:0.7533333333333333


 15%|█▍        | 1600/10845 [05:55<33:57,  4.54it/s, acc=0.756, epoch=41, loss=0.793]

epoch:41, idx:1599/10845, loss:0.7930728251859546, acc:0.755625


 16%|█▌        | 1700/10845 [06:17<37:06,  4.11it/s, acc=0.755, epoch=41, loss=0.799]

epoch:41, idx:1699/10845, loss:0.7988278887201758, acc:0.7547058823529412


 17%|█▋        | 1800/10845 [06:40<37:32,  4.02it/s, acc=0.755, epoch=41, loss=0.799]

epoch:41, idx:1799/10845, loss:0.798545925517877, acc:0.7548611111111111


 18%|█▊        | 1901/10845 [07:03<31:54,  4.67it/s, acc=0.756, epoch=41, loss=0.792]

epoch:41, idx:1899/10845, loss:0.7919990471789712, acc:0.7561842105263158


 18%|█▊        | 2000/10845 [07:25<32:46,  4.50it/s, acc=0.756, epoch=41, loss=0.793]

epoch:41, idx:1999/10845, loss:0.7927175648808479, acc:0.756125


 19%|█▉        | 2101/10845 [07:47<30:33,  4.77it/s, acc=0.756, epoch=41, loss=0.795]

epoch:41, idx:2099/10845, loss:0.7950175311451867, acc:0.7557142857142857


 20%|██        | 2201/10845 [08:09<31:57,  4.51it/s, acc=0.755, epoch=41, loss=0.801]

epoch:41, idx:2199/10845, loss:0.8014918040687388, acc:0.755


 21%|██        | 2300/10845 [08:31<32:26,  4.39it/s, acc=0.756, epoch=41, loss=0.805]

epoch:41, idx:2299/10845, loss:0.8052453115193741, acc:0.7556521739130435


 22%|██▏       | 2400/10845 [08:53<31:38,  4.45it/s, acc=0.755, epoch=41, loss=0.807]

epoch:41, idx:2399/10845, loss:0.8068230266372363, acc:0.7553125


 23%|██▎       | 2500/10845 [09:15<31:36,  4.40it/s, acc=0.756, epoch=41, loss=0.804]

epoch:41, idx:2499/10845, loss:0.8041133617401123, acc:0.7561


 24%|██▍       | 2600/10845 [09:38<30:52,  4.45it/s, acc=0.757, epoch=41, loss=0.802]

epoch:41, idx:2599/10845, loss:0.8017787652749282, acc:0.7570192307692307


 25%|██▍       | 2700/10845 [10:00<30:24,  4.46it/s, acc=0.757, epoch=41, loss=0.799]

epoch:41, idx:2699/10845, loss:0.7990961559613545, acc:0.7573148148148148


 26%|██▌       | 2800/10845 [10:22<30:12,  4.44it/s, acc=0.758, epoch=41, loss=0.802]

epoch:41, idx:2799/10845, loss:0.8016764285734722, acc:0.7575892857142857


 27%|██▋       | 2900/10845 [10:44<29:42,  4.46it/s, acc=0.757, epoch=41, loss=0.805]

epoch:41, idx:2899/10845, loss:0.8048763453960419, acc:0.756896551724138


 28%|██▊       | 3000/10845 [11:06<28:18,  4.62it/s, acc=0.757, epoch=41, loss=0.804]

epoch:41, idx:2999/10845, loss:0.8042413750489553, acc:0.7566666666666667


 29%|██▊       | 3100/10845 [11:28<28:51,  4.47it/s, acc=0.756, epoch=41, loss=0.804]

epoch:41, idx:3099/10845, loss:0.8043980275046441, acc:0.7563709677419355


 30%|██▉       | 3200/10845 [11:50<29:03,  4.38it/s, acc=0.757, epoch=41, loss=0.803]

epoch:41, idx:3199/10845, loss:0.802708246037364, acc:0.756796875


 30%|███       | 3300/10845 [12:13<27:39,  4.55it/s, acc=0.757, epoch=41, loss=0.802]

epoch:41, idx:3299/10845, loss:0.8018069785652738, acc:0.756969696969697


 31%|███▏      | 3401/10845 [12:35<26:24,  4.70it/s, acc=0.757, epoch=41, loss=0.8]  

epoch:41, idx:3399/10845, loss:0.7997207915081698, acc:0.7573529411764706


 32%|███▏      | 3501/10845 [12:57<27:02,  4.53it/s, acc=0.758, epoch=41, loss=0.801]

epoch:41, idx:3499/10845, loss:0.8007622510365078, acc:0.7576428571428572


 33%|███▎      | 3600/10845 [13:19<27:47,  4.35it/s, acc=0.757, epoch=41, loss=0.799]

epoch:41, idx:3599/10845, loss:0.7986702821983231, acc:0.7575


 34%|███▍      | 3700/10845 [13:41<27:17,  4.36it/s, acc=0.758, epoch=41, loss=0.796]

epoch:41, idx:3699/10845, loss:0.7963045295831319, acc:0.7578378378378379


 35%|███▌      | 3800/10845 [14:03<27:18,  4.30it/s, acc=0.757, epoch=41, loss=0.802]

epoch:41, idx:3799/10845, loss:0.8019574410664407, acc:0.756578947368421


 36%|███▌      | 3900/10845 [14:26<24:54,  4.65it/s, acc=0.757, epoch=41, loss=0.8]  

epoch:41, idx:3899/10845, loss:0.8001237551982586, acc:0.7571153846153846


 37%|███▋      | 4001/10845 [14:48<24:20,  4.68it/s, acc=0.757, epoch=41, loss=0.8]  

epoch:41, idx:3999/10845, loss:0.800507638335228, acc:0.756875


 38%|███▊      | 4100/10845 [15:10<24:23,  4.61it/s, acc=0.757, epoch=41, loss=0.798]

epoch:41, idx:4099/10845, loss:0.7975911864711017, acc:0.7573170731707317


 39%|███▊      | 4201/10845 [15:33<22:58,  4.82it/s, acc=0.758, epoch=41, loss=0.796]

epoch:41, idx:4199/10845, loss:0.796076089824949, acc:0.757797619047619


 40%|███▉      | 4300/10845 [15:55<25:20,  4.30it/s, acc=0.758, epoch=41, loss=0.796]

epoch:41, idx:4299/10845, loss:0.7963387534230254, acc:0.7580232558139535


 41%|████      | 4400/10845 [16:18<24:56,  4.31it/s, acc=0.758, epoch=41, loss=0.795]

epoch:41, idx:4399/10845, loss:0.7954389116168022, acc:0.7579545454545454


 42%|████▏     | 4501/10845 [16:40<22:32,  4.69it/s, acc=0.758, epoch=41, loss=0.794]

epoch:41, idx:4499/10845, loss:0.7942908041212294, acc:0.7578333333333334


 42%|████▏     | 4601/10845 [17:02<22:55,  4.54it/s, acc=0.758, epoch=41, loss=0.795]

epoch:41, idx:4599/10845, loss:0.7947780754255211, acc:0.7577717391304348


 43%|████▎     | 4700/10845 [17:25<25:08,  4.07it/s, acc=0.758, epoch=41, loss=0.795]

epoch:41, idx:4699/10845, loss:0.7946981223847004, acc:0.7579787234042553


 44%|████▍     | 4800/10845 [17:47<23:27,  4.29it/s, acc=0.758, epoch=41, loss=0.793]

epoch:41, idx:4799/10845, loss:0.792809663216273, acc:0.7584895833333334


 45%|████▌     | 4900/10845 [18:09<22:41,  4.37it/s, acc=0.759, epoch=41, loss=0.792]

epoch:41, idx:4899/10845, loss:0.792121468952724, acc:0.7587244897959183


 46%|████▌     | 5000/10845 [18:32<21:23,  4.55it/s, acc=0.759, epoch=41, loss=0.791]

epoch:41, idx:4999/10845, loss:0.790862717962265, acc:0.75865


 47%|████▋     | 5101/10845 [18:54<21:11,  4.52it/s, acc=0.759, epoch=41, loss=0.79] 

epoch:41, idx:5099/10845, loss:0.7898148945967356, acc:0.7590686274509804


 48%|████▊     | 5200/10845 [19:16<21:47,  4.32it/s, acc=0.759, epoch=41, loss=0.787]

epoch:41, idx:5199/10845, loss:0.7869808475329326, acc:0.7594711538461538


 49%|████▉     | 5300/10845 [19:38<20:03,  4.61it/s, acc=0.759, epoch=41, loss=0.788]

epoch:41, idx:5299/10845, loss:0.7884680579743295, acc:0.7590566037735849


 50%|████▉     | 5400/10845 [20:00<19:57,  4.55it/s, acc=0.76, epoch=41, loss=0.786] 

epoch:41, idx:5399/10845, loss:0.7863117635029334, acc:0.7595833333333334


 51%|█████     | 5501/10845 [20:23<18:36,  4.79it/s, acc=0.759, epoch=41, loss=0.789]

epoch:41, idx:5499/10845, loss:0.7887013400901448, acc:0.759090909090909


 52%|█████▏    | 5601/10845 [20:46<19:20,  4.52it/s, acc=0.759, epoch=41, loss=0.79] 

epoch:41, idx:5599/10845, loss:0.7899218811733382, acc:0.7585714285714286


 53%|█████▎    | 5700/10845 [21:08<18:37,  4.60it/s, acc=0.758, epoch=41, loss=0.791]

epoch:41, idx:5699/10845, loss:0.7903971921665627, acc:0.7583333333333333


 53%|█████▎    | 5801/10845 [21:30<18:57,  4.43it/s, acc=0.758, epoch=41, loss=0.791]

epoch:41, idx:5799/10845, loss:0.7906821390164309, acc:0.7580603448275862


 54%|█████▍    | 5900/10845 [21:52<17:14,  4.78it/s, acc=0.758, epoch=41, loss=0.791]

epoch:41, idx:5899/10845, loss:0.7909974227416313, acc:0.7579661016949153


 55%|█████▌    | 6001/10845 [22:15<17:43,  4.55it/s, acc=0.758, epoch=41, loss=0.789]

epoch:41, idx:5999/10845, loss:0.7891431564788024, acc:0.7577916666666666


 56%|█████▌    | 6100/10845 [22:37<17:05,  4.63it/s, acc=0.758, epoch=41, loss=0.789]

epoch:41, idx:6099/10845, loss:0.7888432239606732, acc:0.7575819672131148


 57%|█████▋    | 6200/10845 [22:59<16:39,  4.65it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:6199/10845, loss:0.7886002536742918, acc:0.7574596774193548


 58%|█████▊    | 6300/10845 [23:21<17:02,  4.45it/s, acc=0.757, epoch=41, loss=0.791]

epoch:41, idx:6299/10845, loss:0.7909508467099023, acc:0.7571825396825397


 59%|█████▉    | 6400/10845 [23:43<16:46,  4.42it/s, acc=0.757, epoch=41, loss=0.791]

epoch:41, idx:6399/10845, loss:0.7907755256257951, acc:0.756875


 60%|█████▉    | 6500/10845 [24:06<16:08,  4.48it/s, acc=0.757, epoch=41, loss=0.79] 

epoch:41, idx:6499/10845, loss:0.7901520372720865, acc:0.7572307692307693


 61%|██████    | 6601/10845 [24:28<15:10,  4.66it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:6599/10845, loss:0.7890616422169137, acc:0.7574242424242424


 62%|██████▏   | 6701/10845 [24:51<15:13,  4.54it/s, acc=0.758, epoch=41, loss=0.787]

epoch:41, idx:6699/10845, loss:0.7873406641696816, acc:0.7576865671641791


 63%|██████▎   | 6800/10845 [25:13<15:06,  4.46it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:6799/10845, loss:0.7887842257233227, acc:0.7572426470588235


 64%|██████▎   | 6900/10845 [25:35<14:03,  4.68it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:6899/10845, loss:0.789393511440443, acc:0.7569202898550724


 65%|██████▍   | 7000/10845 [25:57<13:57,  4.59it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:6999/10845, loss:0.7893049176761082, acc:0.7568571428571429


 65%|██████▌   | 7100/10845 [26:20<13:45,  4.54it/s, acc=0.757, epoch=41, loss=0.791]

epoch:41, idx:7099/10845, loss:0.7905887973476463, acc:0.7566549295774648


 66%|██████▋   | 7200/10845 [26:42<13:40,  4.44it/s, acc=0.756, epoch=41, loss=0.792]

epoch:41, idx:7199/10845, loss:0.7915379730694824, acc:0.7563194444444444


 67%|██████▋   | 7300/10845 [27:04<12:44,  4.64it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:7299/10845, loss:0.7894818365247283, acc:0.7568835616438356


 68%|██████▊   | 7400/10845 [27:27<11:59,  4.79it/s, acc=0.757, epoch=41, loss=0.79] 

epoch:41, idx:7399/10845, loss:0.7903614305483329, acc:0.7569594594594594


 69%|██████▉   | 7501/10845 [27:49<12:52,  4.33it/s, acc=0.757, epoch=41, loss=0.79] 

epoch:41, idx:7499/10845, loss:0.7895807551542918, acc:0.7571


 70%|███████   | 7600/10845 [28:12<11:14,  4.81it/s, acc=0.757, epoch=41, loss=0.791]

epoch:41, idx:7599/10845, loss:0.7909075580772601, acc:0.7566118421052631


 71%|███████   | 7700/10845 [28:34<12:46,  4.10it/s, acc=0.757, epoch=41, loss=0.79] 

epoch:41, idx:7699/10845, loss:0.7902244773313597, acc:0.7568181818181818


 72%|███████▏  | 7800/10845 [28:56<11:17,  4.49it/s, acc=0.757, epoch=41, loss=0.79] 

epoch:41, idx:7799/10845, loss:0.790193405518165, acc:0.757051282051282


 73%|███████▎  | 7901/10845 [29:19<10:26,  4.70it/s, acc=0.757, epoch=41, loss=0.791]

epoch:41, idx:7899/10845, loss:0.7905355353445946, acc:0.7567721518987341


 74%|███████▍  | 8001/10845 [29:41<09:48,  4.83it/s, acc=0.757, epoch=41, loss=0.79] 

epoch:41, idx:7999/10845, loss:0.7904266992509366, acc:0.7568125


 75%|███████▍  | 8100/10845 [30:03<10:16,  4.45it/s, acc=0.757, epoch=41, loss=0.791]

epoch:41, idx:8099/10845, loss:0.7905433673917511, acc:0.7568827160493827


 76%|███████▌  | 8200/10845 [30:26<10:06,  4.36it/s, acc=0.757, epoch=41, loss=0.79] 

epoch:41, idx:8199/10845, loss:0.7901150891839004, acc:0.7569207317073171


 77%|███████▋  | 8300/10845 [30:48<09:37,  4.41it/s, acc=0.757, epoch=41, loss=0.79] 

epoch:41, idx:8299/10845, loss:0.7895225824936327, acc:0.757078313253012


 77%|███████▋  | 8401/10845 [31:11<08:34,  4.75it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:8399/10845, loss:0.7885767822748139, acc:0.7571130952380952


 78%|███████▊  | 8501/10845 [31:33<08:43,  4.47it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:8499/10845, loss:0.7886649245724958, acc:0.7571470588235294


 79%|███████▉  | 8600/10845 [31:55<08:15,  4.53it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:8599/10845, loss:0.7892402689609417, acc:0.7569186046511628


 80%|████████  | 8701/10845 [32:18<07:40,  4.66it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:8699/10845, loss:0.7893963485408103, acc:0.7566666666666667


 81%|████████  | 8801/10845 [32:40<07:28,  4.56it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:8799/10845, loss:0.7882235224612735, acc:0.7568465909090909


 82%|████████▏ | 8900/10845 [33:02<07:12,  4.50it/s, acc=0.757, epoch=41, loss=0.787]

epoch:41, idx:8899/10845, loss:0.7870098537742422, acc:0.7570224719101124


 83%|████████▎ | 9001/10845 [33:25<06:27,  4.76it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:8999/10845, loss:0.789139514585336, acc:0.7565833333333334


 84%|████████▍ | 9100/10845 [33:47<06:44,  4.32it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:9099/10845, loss:0.7884404134619367, acc:0.7566758241758241


 85%|████████▍ | 9201/10845 [34:09<05:43,  4.79it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:9199/10845, loss:0.7873841574528943, acc:0.7569021739130435


 86%|████████▌ | 9301/10845 [34:31<05:59,  4.30it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:9299/10845, loss:0.7876926607854905, acc:0.7568548387096774


 87%|████████▋ | 9401/10845 [34:54<05:12,  4.63it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:9399/10845, loss:0.7880748784668902, acc:0.7566755319148936


 88%|████████▊ | 9500/10845 [35:16<05:14,  4.27it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:9499/10845, loss:0.7881334177694823, acc:0.7566578947368421


 89%|████████▊ | 9600/10845 [35:38<04:53,  4.24it/s, acc=0.757, epoch=41, loss=0.789]

epoch:41, idx:9599/10845, loss:0.7887373633558551, acc:0.7565364583333334


 89%|████████▉ | 9700/10845 [36:00<04:09,  4.58it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:9699/10845, loss:0.7881447633271365, acc:0.7567268041237113


 90%|█████████ | 9800/10845 [36:23<03:35,  4.84it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:9799/10845, loss:0.7882868398452292, acc:0.7565816326530612


 91%|█████████▏| 9900/10845 [36:45<03:22,  4.66it/s, acc=0.756, epoch=41, loss=0.788]

epoch:41, idx:9899/10845, loss:0.7882628831718907, acc:0.756439393939394


 92%|█████████▏| 10000/10845 [37:07<03:02,  4.64it/s, acc=0.756, epoch=41, loss=0.789]

epoch:41, idx:9999/10845, loss:0.7885887152791023, acc:0.756375


 93%|█████████▎| 10100/10845 [37:29<02:55,  4.25it/s, acc=0.756, epoch=41, loss=0.789]

epoch:41, idx:10099/10845, loss:0.7893467674869122, acc:0.7562871287128713


 94%|█████████▍| 10200/10845 [37:51<02:20,  4.60it/s, acc=0.756, epoch=41, loss=0.789]

epoch:41, idx:10199/10845, loss:0.7889687026131387, acc:0.75625


 95%|█████████▍| 10301/10845 [38:13<01:59,  4.56it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:10299/10845, loss:0.7881092441197738, acc:0.7566019417475728


 96%|█████████▌| 10400/10845 [38:35<01:32,  4.83it/s, acc=0.757, epoch=41, loss=0.787]

epoch:41, idx:10399/10845, loss:0.7874553620987214, acc:0.7567307692307692


 97%|█████████▋| 10500/10845 [38:57<01:16,  4.51it/s, acc=0.757, epoch=41, loss=0.787]

epoch:41, idx:10499/10845, loss:0.7872039308718273, acc:0.7568095238095238


 98%|█████████▊| 10601/10845 [39:20<00:52,  4.63it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:10599/10845, loss:0.7879233131374953, acc:0.7567924528301887


 99%|█████████▊| 10701/10845 [39:42<00:32,  4.49it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:10699/10845, loss:0.7884693459651181, acc:0.7568691588785047


100%|█████████▉| 10801/10845 [40:04<00:09,  4.43it/s, acc=0.757, epoch=41, loss=0.788]

epoch:41, idx:10799/10845, loss:0.7884179086100173, acc:0.7569212962962963


100%|██████████| 10845/10845 [40:14<00:00,  4.32it/s, acc=0.757, epoch=41, loss=0.788]


epoch:41, idx:0/1275, loss:1.5377497673034668, acc:0.75
epoch:41, idx:100/1275, loss:1.5125444572750886, acc:0.6410891089108911
epoch:41, idx:200/1275, loss:1.366112943312422, acc:0.6554726368159204
epoch:41, idx:300/1275, loss:1.3264877871421485, acc:0.6619601328903655
epoch:41, idx:400/1275, loss:1.3033418242176276, acc:0.6701995012468828
epoch:41, idx:500/1275, loss:1.2652602645451436, acc:0.6721556886227545
epoch:41, idx:600/1275, loss:1.2795919513147007, acc:0.6659733777038269
epoch:41, idx:700/1275, loss:1.2858483204657953, acc:0.6665477888730386
epoch:41, idx:800/1275, loss:1.3065095082502092, acc:0.6651061173533084
epoch:41, idx:900/1275, loss:1.297963126319627, acc:0.667591564927858
epoch:41, idx:1000/1275, loss:1.3074728554779, acc:0.6665834165834166
epoch:41, idx:1100/1275, loss:1.2894121113416825, acc:0.6700726612170754
epoch:41, idx:1200/1275, loss:1.2846925071534467, acc:0.6688176519567027


  1%|          | 100/10845 [00:21<38:33,  4.64it/s, acc=0.73, epoch=42, loss=0.837] 

epoch:42, idx:99/10845, loss:0.843906678557396, acc:0.7275


  2%|▏         | 200/10845 [00:44<39:36,  4.48it/s, acc=0.738, epoch=42, loss=0.827]

epoch:42, idx:199/10845, loss:0.826988585293293, acc:0.7375


  3%|▎         | 300/10845 [01:06<38:23,  4.58it/s, acc=0.743, epoch=42, loss=0.813]

epoch:42, idx:299/10845, loss:0.8132848697900772, acc:0.7425


  4%|▎         | 400/10845 [01:28<38:45,  4.49it/s, acc=0.751, epoch=42, loss=0.809]

epoch:42, idx:399/10845, loss:0.8092117515206337, acc:0.750625


  5%|▍         | 501/10845 [01:50<26:04,  6.61it/s, acc=0.759, epoch=42, loss=0.774]

epoch:42, idx:499/10845, loss:0.7752073315382004, acc:0.759


  6%|▌         | 600/10845 [02:12<36:26,  4.69it/s, acc=0.765, epoch=42, loss=0.76] 

epoch:42, idx:599/10845, loss:0.7599279339114825, acc:0.7645833333333333


  6%|▋         | 700/10845 [02:35<40:55,  4.13it/s, acc=0.765, epoch=42, loss=0.762]

epoch:42, idx:699/10845, loss:0.7621550819703511, acc:0.7646428571428572


  7%|▋         | 800/10845 [02:57<39:59,  4.19it/s, acc=0.766, epoch=42, loss=0.764]

epoch:42, idx:799/10845, loss:0.7637196301668883, acc:0.76625


  8%|▊         | 901/10845 [03:19<34:50,  4.76it/s, acc=0.764, epoch=42, loss=0.773]

epoch:42, idx:899/10845, loss:0.7735878689421548, acc:0.7636111111111111


  9%|▉         | 1001/10845 [03:41<36:47,  4.46it/s, acc=0.763, epoch=42, loss=0.773]

epoch:42, idx:999/10845, loss:0.7732877013087273, acc:0.7635


 10%|█         | 1101/10845 [04:03<37:08,  4.37it/s, acc=0.764, epoch=42, loss=0.771]

epoch:42, idx:1099/10845, loss:0.7716780807213349, acc:0.7638636363636364


 11%|█         | 1200/10845 [04:26<38:19,  4.20it/s, acc=0.763, epoch=42, loss=0.769]

epoch:42, idx:1199/10845, loss:0.7688892396787802, acc:0.763125


 12%|█▏        | 1301/10845 [04:48<36:11,  4.39it/s, acc=0.762, epoch=42, loss=0.775]

epoch:42, idx:1299/10845, loss:0.7750872739920249, acc:0.7623076923076924


 13%|█▎        | 1400/10845 [05:10<34:50,  4.52it/s, acc=0.76, epoch=42, loss=0.783] 

epoch:42, idx:1399/10845, loss:0.7830520454900606, acc:0.7601785714285715


 14%|█▍        | 1500/10845 [05:33<37:11,  4.19it/s, acc=0.76, epoch=42, loss=0.788] 

epoch:42, idx:1499/10845, loss:0.7886086272001267, acc:0.76


 15%|█▍        | 1601/10845 [05:55<34:49,  4.42it/s, acc=0.757, epoch=42, loss=0.792]

epoch:42, idx:1599/10845, loss:0.7914830043166876, acc:0.75734375


 16%|█▌        | 1700/10845 [06:17<36:34,  4.17it/s, acc=0.757, epoch=42, loss=0.791]

epoch:42, idx:1699/10845, loss:0.7911324998210458, acc:0.7572058823529412


 17%|█▋        | 1801/10845 [06:40<34:30,  4.37it/s, acc=0.757, epoch=42, loss=0.794]

epoch:42, idx:1799/10845, loss:0.7944462254974577, acc:0.7566666666666667


 18%|█▊        | 1900/10845 [07:02<32:18,  4.62it/s, acc=0.757, epoch=42, loss=0.795]

epoch:42, idx:1899/10845, loss:0.7945296641399986, acc:0.7572368421052632


 18%|█▊        | 2000/10845 [07:24<32:19,  4.56it/s, acc=0.758, epoch=42, loss=0.792]

epoch:42, idx:1999/10845, loss:0.791847173333168, acc:0.758125


 19%|█▉        | 2100/10845 [07:47<31:51,  4.57it/s, acc=0.758, epoch=42, loss=0.793]

epoch:42, idx:2099/10845, loss:0.7925196323508308, acc:0.7577380952380952


 20%|██        | 2200/10845 [08:09<30:38,  4.70it/s, acc=0.757, epoch=42, loss=0.792]

epoch:42, idx:2199/10845, loss:0.7922499147328463, acc:0.7572727272727273


 21%|██        | 2300/10845 [08:31<32:52,  4.33it/s, acc=0.757, epoch=42, loss=0.791]

epoch:42, idx:2299/10845, loss:0.7908544590680495, acc:0.7572826086956522


 22%|██▏       | 2400/10845 [08:54<28:53,  4.87it/s, acc=0.757, epoch=42, loss=0.79] 

epoch:42, idx:2399/10845, loss:0.7896184672415256, acc:0.7567708333333333


 23%|██▎       | 2500/10845 [09:16<31:39,  4.39it/s, acc=0.756, epoch=42, loss=0.792]

epoch:42, idx:2499/10845, loss:0.7916101826667785, acc:0.7559


 24%|██▍       | 2600/10845 [09:38<29:01,  4.73it/s, acc=0.757, epoch=42, loss=0.787]

epoch:42, idx:2599/10845, loss:0.7867805556379832, acc:0.7568269230769231


 25%|██▍       | 2701/10845 [10:00<30:06,  4.51it/s, acc=0.756, epoch=42, loss=0.786]

epoch:42, idx:2699/10845, loss:0.7860681887246944, acc:0.7562037037037037


 26%|██▌       | 2801/10845 [10:22<28:47,  4.66it/s, acc=0.756, epoch=42, loss=0.787]

epoch:42, idx:2799/10845, loss:0.78715842630182, acc:0.75625


 27%|██▋       | 2901/10845 [10:45<28:21,  4.67it/s, acc=0.757, epoch=42, loss=0.785]

epoch:42, idx:2899/10845, loss:0.7849453357170368, acc:0.7571551724137932


 28%|██▊       | 3000/10845 [11:07<29:10,  4.48it/s, acc=0.757, epoch=42, loss=0.785]

epoch:42, idx:2999/10845, loss:0.7847298773129782, acc:0.757


 29%|██▊       | 3101/10845 [11:29<27:41,  4.66it/s, acc=0.757, epoch=42, loss=0.785]

epoch:42, idx:3099/10845, loss:0.7856568799095769, acc:0.7564516129032258


 30%|██▉       | 3200/10845 [11:51<27:34,  4.62it/s, acc=0.756, epoch=42, loss=0.784]

epoch:42, idx:3199/10845, loss:0.7844972189888358, acc:0.755625


 30%|███       | 3300/10845 [12:13<28:07,  4.47it/s, acc=0.756, epoch=42, loss=0.784]

epoch:42, idx:3299/10845, loss:0.7843748320593978, acc:0.7557575757575757


 31%|███▏      | 3400/10845 [12:35<27:09,  4.57it/s, acc=0.756, epoch=42, loss=0.787]

epoch:42, idx:3399/10845, loss:0.7866576999776503, acc:0.7555147058823529


 32%|███▏      | 3501/10845 [12:58<27:25,  4.46it/s, acc=0.755, epoch=42, loss=0.79] 

epoch:42, idx:3499/10845, loss:0.7903810551166535, acc:0.7547857142857143


 33%|███▎      | 3601/10845 [13:20<26:03,  4.63it/s, acc=0.755, epoch=42, loss=0.788]

epoch:42, idx:3599/10845, loss:0.7876944518751569, acc:0.7545833333333334


 34%|███▍      | 3700/10845 [13:42<28:55,  4.12it/s, acc=0.755, epoch=42, loss=0.786]

epoch:42, idx:3699/10845, loss:0.7857938115339022, acc:0.754527027027027


 35%|███▌      | 3800/10845 [14:03<25:15,  4.65it/s, acc=0.754, epoch=42, loss=0.786]

epoch:42, idx:3799/10845, loss:0.7857933093685853, acc:0.7544736842105263


 36%|███▌      | 3900/10845 [14:26<27:22,  4.23it/s, acc=0.754, epoch=42, loss=0.787]

epoch:42, idx:3899/10845, loss:0.787490892196313, acc:0.7541025641025642


 37%|███▋      | 4000/10845 [14:48<24:37,  4.63it/s, acc=0.754, epoch=42, loss=0.789]

epoch:42, idx:3999/10845, loss:0.78914554515481, acc:0.7545


 38%|███▊      | 4100/10845 [15:10<25:35,  4.39it/s, acc=0.755, epoch=42, loss=0.787]

epoch:42, idx:4099/10845, loss:0.7872905185164475, acc:0.7547560975609756


 39%|███▊      | 4200/10845 [15:32<23:50,  4.65it/s, acc=0.755, epoch=42, loss=0.789]

epoch:42, idx:4199/10845, loss:0.788746442652884, acc:0.7551190476190476


 40%|███▉      | 4301/10845 [15:55<23:16,  4.68it/s, acc=0.755, epoch=42, loss=0.789]

epoch:42, idx:4299/10845, loss:0.7883590787510539, acc:0.7552325581395349


 41%|████      | 4400/10845 [16:17<23:54,  4.49it/s, acc=0.756, epoch=42, loss=0.786]

epoch:42, idx:4399/10845, loss:0.7863905052705245, acc:0.7560795454545455


 41%|████▏     | 4500/10845 [16:39<23:04,  4.58it/s, acc=0.755, epoch=42, loss=0.79] 

epoch:42, idx:4499/10845, loss:0.7896768983205159, acc:0.7555


 42%|████▏     | 4600/10845 [17:01<26:02,  4.00it/s, acc=0.755, epoch=42, loss=0.789]

epoch:42, idx:4599/10845, loss:0.789363349598387, acc:0.7554891304347826


 43%|████▎     | 4700/10845 [17:23<22:29,  4.55it/s, acc=0.756, epoch=42, loss=0.788]

epoch:42, idx:4699/10845, loss:0.7875845252960286, acc:0.7560106382978723


 44%|████▍     | 4800/10845 [17:45<24:18,  4.14it/s, acc=0.756, epoch=42, loss=0.79] 

epoch:42, idx:4799/10845, loss:0.7897612801442544, acc:0.7558854166666666


 45%|████▌     | 4901/10845 [18:08<21:23,  4.63it/s, acc=0.756, epoch=42, loss=0.792]

epoch:42, idx:4899/10845, loss:0.7926112820420946, acc:0.7555102040816326


 46%|████▌     | 5000/10845 [18:30<21:49,  4.46it/s, acc=0.756, epoch=42, loss=0.792]

epoch:42, idx:4999/10845, loss:0.7921987668275833, acc:0.75575


 47%|████▋     | 5100/10845 [18:52<22:29,  4.26it/s, acc=0.757, epoch=42, loss=0.79] 

epoch:42, idx:5099/10845, loss:0.7901203748291614, acc:0.7566666666666667


 48%|████▊     | 5200/10845 [19:14<19:37,  4.79it/s, acc=0.757, epoch=42, loss=0.79] 

epoch:42, idx:5199/10845, loss:0.7896437511535791, acc:0.7565384615384615


 49%|████▉     | 5300/10845 [19:36<21:26,  4.31it/s, acc=0.756, epoch=42, loss=0.79] 

epoch:42, idx:5299/10845, loss:0.7900086325744413, acc:0.7564622641509434


 50%|████▉     | 5400/10845 [19:58<19:50,  4.57it/s, acc=0.757, epoch=42, loss=0.79] 

epoch:42, idx:5399/10845, loss:0.7898451292294043, acc:0.756712962962963


 51%|█████     | 5500/10845 [20:20<19:20,  4.61it/s, acc=0.756, epoch=42, loss=0.792]

epoch:42, idx:5499/10845, loss:0.7916260166601701, acc:0.7564090909090909


 52%|█████▏    | 5600/10845 [20:42<20:36,  4.24it/s, acc=0.756, epoch=42, loss=0.793]

epoch:42, idx:5599/10845, loss:0.7927841318505151, acc:0.7558482142857142


 53%|█████▎    | 5700/10845 [21:04<18:51,  4.55it/s, acc=0.757, epoch=42, loss=0.79] 

epoch:42, idx:5699/10845, loss:0.7897069904678746, acc:0.7566666666666667


 53%|█████▎    | 5800/10845 [21:26<18:55,  4.44it/s, acc=0.757, epoch=42, loss=0.788]

epoch:42, idx:5799/10845, loss:0.7882443269573409, acc:0.7570258620689655


 54%|█████▍    | 5900/10845 [21:49<18:45,  4.39it/s, acc=0.757, epoch=42, loss=0.788]

epoch:42, idx:5899/10845, loss:0.787718842898385, acc:0.7574576271186441


 55%|█████▌    | 6000/10845 [22:11<17:50,  4.52it/s, acc=0.758, epoch=42, loss=0.786]

epoch:42, idx:5999/10845, loss:0.7858368503848712, acc:0.7575416666666667


 56%|█████▌    | 6100/10845 [22:33<17:17,  4.57it/s, acc=0.757, epoch=42, loss=0.785]

epoch:42, idx:6099/10845, loss:0.7849196178893574, acc:0.7574590163934426


 57%|█████▋    | 6200/10845 [22:55<18:00,  4.30it/s, acc=0.758, epoch=42, loss=0.784]

epoch:42, idx:6199/10845, loss:0.7844690010912957, acc:0.7578225806451613


 58%|█████▊    | 6300/10845 [23:18<17:02,  4.45it/s, acc=0.758, epoch=42, loss=0.784]

epoch:42, idx:6299/10845, loss:0.7838251420902828, acc:0.7578968253968253


 59%|█████▉    | 6400/10845 [23:40<16:32,  4.48it/s, acc=0.758, epoch=42, loss=0.785]

epoch:42, idx:6399/10845, loss:0.7846270087826998, acc:0.7575390625


 60%|█████▉    | 6500/10845 [24:02<15:57,  4.54it/s, acc=0.758, epoch=42, loss=0.784]

epoch:42, idx:6499/10845, loss:0.7842265105705994, acc:0.7578461538461538


 61%|██████    | 6600/10845 [24:24<15:18,  4.62it/s, acc=0.758, epoch=42, loss=0.784]

epoch:42, idx:6599/10845, loss:0.7838839726285501, acc:0.7577272727272727


 62%|██████▏   | 6701/10845 [24:46<14:49,  4.66it/s, acc=0.758, epoch=42, loss=0.785]

epoch:42, idx:6699/10845, loss:0.7847506973102911, acc:0.7579850746268657


 63%|██████▎   | 6801/10845 [25:09<14:12,  4.75it/s, acc=0.758, epoch=42, loss=0.784]

epoch:42, idx:6799/10845, loss:0.7844818357246763, acc:0.7579779411764705


 64%|██████▎   | 6900/10845 [25:31<15:52,  4.14it/s, acc=0.758, epoch=42, loss=0.785]

epoch:42, idx:6899/10845, loss:0.7846200547270152, acc:0.7578985507246376


 65%|██████▍   | 7000/10845 [25:53<13:15,  4.83it/s, acc=0.758, epoch=42, loss=0.785]

epoch:42, idx:6999/10845, loss:0.7853326964122909, acc:0.7575357142857143


 65%|██████▌   | 7101/10845 [26:16<13:35,  4.59it/s, acc=0.757, epoch=42, loss=0.786]

epoch:42, idx:7099/10845, loss:0.7862974840066802, acc:0.7573591549295775


 66%|██████▋   | 7200/10845 [26:38<13:48,  4.40it/s, acc=0.757, epoch=42, loss=0.787]

epoch:42, idx:7199/10845, loss:0.7873297496388356, acc:0.7573958333333334


 67%|██████▋   | 7301/10845 [27:01<12:42,  4.65it/s, acc=0.757, epoch=42, loss=0.789]

epoch:42, idx:7299/10845, loss:0.7887987010527964, acc:0.7573630136986301


 68%|██████▊   | 7401/10845 [27:23<12:39,  4.53it/s, acc=0.757, epoch=42, loss=0.789]

epoch:42, idx:7399/10845, loss:0.788523206316136, acc:0.7573986486486487


 69%|██████▉   | 7500/10845 [27:45<12:58,  4.29it/s, acc=0.757, epoch=42, loss=0.789]

epoch:42, idx:7499/10845, loss:0.7890547620058059, acc:0.7574


 70%|███████   | 7601/10845 [28:07<11:28,  4.71it/s, acc=0.757, epoch=42, loss=0.788]

epoch:42, idx:7599/10845, loss:0.7879244997548429, acc:0.757203947368421


 71%|███████   | 7700/10845 [28:30<11:10,  4.69it/s, acc=0.757, epoch=42, loss=0.789]

epoch:42, idx:7699/10845, loss:0.7890359611402858, acc:0.7571428571428571


 72%|███████▏  | 7801/10845 [28:52<10:42,  4.74it/s, acc=0.757, epoch=42, loss=0.79] 

epoch:42, idx:7799/10845, loss:0.7897868932745395, acc:0.7566346153846154


 73%|███████▎  | 7901/10845 [29:14<10:58,  4.47it/s, acc=0.757, epoch=42, loss=0.789]

epoch:42, idx:7899/10845, loss:0.788705959342703, acc:0.7567721518987341


 74%|███████▍  | 8000/10845 [29:36<11:28,  4.13it/s, acc=0.757, epoch=42, loss=0.788]

epoch:42, idx:7999/10845, loss:0.788233578979969, acc:0.756625


 75%|███████▍  | 8100/10845 [29:58<10:16,  4.45it/s, acc=0.756, epoch=42, loss=0.789]

epoch:42, idx:8099/10845, loss:0.78885443855215, acc:0.7563888888888889


 76%|███████▌  | 8200/10845 [30:21<09:02,  4.88it/s, acc=0.756, epoch=42, loss=0.79] 

epoch:42, idx:8199/10845, loss:0.790213314396579, acc:0.756219512195122


 77%|███████▋  | 8301/10845 [30:43<09:38,  4.39it/s, acc=0.756, epoch=42, loss=0.791]

epoch:42, idx:8299/10845, loss:0.7911334334080478, acc:0.7560542168674699


 77%|███████▋  | 8400/10845 [31:05<09:14,  4.41it/s, acc=0.757, epoch=42, loss=0.79] 

epoch:42, idx:8399/10845, loss:0.790078726879188, acc:0.756547619047619


 78%|███████▊  | 8500/10845 [31:27<08:51,  4.42it/s, acc=0.757, epoch=42, loss=0.79]

epoch:42, idx:8499/10845, loss:0.7900029868799098, acc:0.7567647058823529


 79%|███████▉  | 8600/10845 [31:50<08:30,  4.40it/s, acc=0.757, epoch=42, loss=0.789]

epoch:42, idx:8599/10845, loss:0.7889327397596004, acc:0.7570058139534884


 80%|████████  | 8700/10845 [32:12<08:02,  4.45it/s, acc=0.757, epoch=42, loss=0.789]

epoch:42, idx:8699/10845, loss:0.7886527566115061, acc:0.757183908045977


 81%|████████  | 8800/10845 [32:35<08:19,  4.09it/s, acc=0.757, epoch=42, loss=0.789]

epoch:42, idx:8799/10845, loss:0.788675771464001, acc:0.7571306818181818


 82%|████████▏ | 8901/10845 [32:57<07:29,  4.33it/s, acc=0.757, epoch=42, loss=0.79] 

epoch:42, idx:8899/10845, loss:0.78957897643025, acc:0.7569101123595505


 83%|████████▎ | 9001/10845 [33:19<06:28,  4.75it/s, acc=0.757, epoch=42, loss=0.789]

epoch:42, idx:8999/10845, loss:0.789462168123987, acc:0.7571666666666667


 84%|████████▍ | 9100/10845 [33:42<06:31,  4.46it/s, acc=0.757, epoch=42, loss=0.79] 

epoch:42, idx:9099/10845, loss:0.7897035243092002, acc:0.7571153846153846


 85%|████████▍ | 9200/10845 [34:04<05:54,  4.64it/s, acc=0.758, epoch=42, loss=0.788]

epoch:42, idx:9199/10845, loss:0.788363534250985, acc:0.757554347826087


 86%|████████▌ | 9300/10845 [34:26<05:50,  4.41it/s, acc=0.758, epoch=42, loss=0.789]

epoch:42, idx:9299/10845, loss:0.7886459434160622, acc:0.7575537634408602


 87%|████████▋ | 9400/10845 [34:49<05:18,  4.53it/s, acc=0.758, epoch=42, loss=0.788]

epoch:42, idx:9399/10845, loss:0.7884999265188867, acc:0.7576063829787234


 88%|████████▊ | 9500/10845 [35:11<04:57,  4.52it/s, acc=0.758, epoch=42, loss=0.788]

epoch:42, idx:9499/10845, loss:0.7875272084411822, acc:0.7579210526315789


 89%|████████▊ | 9600/10845 [35:33<04:29,  4.63it/s, acc=0.758, epoch=42, loss=0.788]

epoch:42, idx:9599/10845, loss:0.787639224914213, acc:0.7576041666666666


 89%|████████▉ | 9700/10845 [35:56<04:16,  4.46it/s, acc=0.758, epoch=42, loss=0.787]

epoch:42, idx:9699/10845, loss:0.7866268545696415, acc:0.7579896907216495


 90%|█████████ | 9801/10845 [36:19<03:48,  4.57it/s, acc=0.758, epoch=42, loss=0.787]

epoch:42, idx:9799/10845, loss:0.7867768688956086, acc:0.7581377551020408


 91%|█████████▏| 9900/10845 [36:41<03:30,  4.50it/s, acc=0.758, epoch=42, loss=0.788]

epoch:42, idx:9899/10845, loss:0.7881113701878172, acc:0.7578535353535354


 92%|█████████▏| 10000/10845 [37:03<03:00,  4.69it/s, acc=0.758, epoch=42, loss=0.787]

epoch:42, idx:9999/10845, loss:0.7874666099071502, acc:0.757975


 93%|█████████▎| 10100/10845 [37:25<02:59,  4.16it/s, acc=0.758, epoch=42, loss=0.788]

epoch:42, idx:10099/10845, loss:0.7883039106590913, acc:0.7579207920792079


 94%|█████████▍| 10200/10845 [37:47<02:26,  4.40it/s, acc=0.758, epoch=42, loss=0.788]

epoch:42, idx:10199/10845, loss:0.7878086359594383, acc:0.7581862745098039


 95%|█████████▍| 10300/10845 [38:10<01:59,  4.58it/s, acc=0.758, epoch=42, loss=0.788]

epoch:42, idx:10299/10845, loss:0.7876158087809109, acc:0.7580582524271845


 96%|█████████▌| 10401/10845 [38:32<01:36,  4.61it/s, acc=0.758, epoch=42, loss=0.788]

epoch:42, idx:10399/10845, loss:0.7881829746984519, acc:0.7578125


 97%|█████████▋| 10500/10845 [38:54<01:15,  4.55it/s, acc=0.758, epoch=42, loss=0.788]

epoch:42, idx:10499/10845, loss:0.7883246403421674, acc:0.757547619047619


 98%|█████████▊| 10601/10845 [39:16<00:50,  4.84it/s, acc=0.758, epoch=42, loss=0.787]

epoch:42, idx:10599/10845, loss:0.7875239397219892, acc:0.7579481132075472


 99%|█████████▊| 10700/10845 [39:38<00:30,  4.73it/s, acc=0.758, epoch=42, loss=0.787]

epoch:42, idx:10699/10845, loss:0.7871265838302185, acc:0.7578971962616823


100%|█████████▉| 10800/10845 [40:00<00:09,  4.57it/s, acc=0.758, epoch=42, loss=0.787]

epoch:42, idx:10799/10845, loss:0.7870581931869189, acc:0.757962962962963


100%|██████████| 10845/10845 [40:10<00:00,  4.75it/s, acc=0.758, epoch=42, loss=0.787]


epoch:42, idx:0/1275, loss:1.530947208404541, acc:0.75
epoch:42, idx:100/1275, loss:1.5024957609648752, acc:0.6435643564356436
epoch:42, idx:200/1275, loss:1.3604385737162918, acc:0.6592039800995025
epoch:42, idx:300/1275, loss:1.320211787754515, acc:0.6644518272425249
epoch:42, idx:400/1275, loss:1.2996938818708026, acc:0.6708229426433915
epoch:42, idx:500/1275, loss:1.2603281035632667, acc:0.6746506986027944
epoch:42, idx:600/1275, loss:1.2750490242351906, acc:0.6676372712146422
epoch:42, idx:700/1275, loss:1.2814572036861522, acc:0.6672610556348074
epoch:42, idx:800/1275, loss:1.3015342706002844, acc:0.6644818976279651
epoch:42, idx:900/1275, loss:1.2946799418637809, acc:0.6667591564927858
epoch:42, idx:1000/1275, loss:1.3024334415093763, acc:0.6648351648351648
epoch:42, idx:1100/1275, loss:1.2846158147940951, acc:0.6684831970935513
epoch:42, idx:1200/1275, loss:1.2804681446331923, acc:0.6677768526228143


  1%|          | 100/10845 [00:22<40:10,  4.46it/s, acc=0.77, epoch=43, loss=0.686]

epoch:43, idx:99/10845, loss:0.6861118757724762, acc:0.77


  2%|▏         | 201/10845 [00:44<37:20,  4.75it/s, acc=0.772, epoch=43, loss=0.686]

epoch:43, idx:199/10845, loss:0.6874749371409417, acc:0.7725


  3%|▎         | 301/10845 [01:06<37:53,  4.64it/s, acc=0.773, epoch=43, loss=0.729]

epoch:43, idx:299/10845, loss:0.7296015892426173, acc:0.7733333333333333


  4%|▎         | 400/10845 [01:28<39:20,  4.42it/s, acc=0.773, epoch=43, loss=0.717]

epoch:43, idx:399/10845, loss:0.7167881394922734, acc:0.773125


  5%|▍         | 501/10845 [01:51<36:37,  4.71it/s, acc=0.77, epoch=43, loss=0.729] 

epoch:43, idx:499/10845, loss:0.7304124237298966, acc:0.7695


  6%|▌         | 601/10845 [02:13<36:24,  4.69it/s, acc=0.769, epoch=43, loss=0.743]

epoch:43, idx:599/10845, loss:0.7432941827178001, acc:0.76875


  6%|▋         | 700/10845 [02:35<40:17,  4.20it/s, acc=0.769, epoch=43, loss=0.753]

epoch:43, idx:699/10845, loss:0.754178654210908, acc:0.7682142857142857


  7%|▋         | 800/10845 [02:57<35:57,  4.66it/s, acc=0.771, epoch=43, loss=0.747]

epoch:43, idx:799/10845, loss:0.7470398772507906, acc:0.77125


  8%|▊         | 900/10845 [03:20<35:06,  4.72it/s, acc=0.776, epoch=43, loss=0.736]

epoch:43, idx:899/10845, loss:0.7361233347654342, acc:0.7758333333333334


  9%|▉         | 1001/10845 [03:42<36:24,  4.51it/s, acc=0.774, epoch=43, loss=0.75] 

epoch:43, idx:999/10845, loss:0.7491558614373207, acc:0.77425


 10%|█         | 1101/10845 [04:04<35:17,  4.60it/s, acc=0.773, epoch=43, loss=0.75] 

epoch:43, idx:1099/10845, loss:0.7490460567582737, acc:0.7736363636363637


 11%|█         | 1200/10845 [04:26<38:49,  4.14it/s, acc=0.773, epoch=43, loss=0.748]

epoch:43, idx:1199/10845, loss:0.7478433227042357, acc:0.7729166666666667


 12%|█▏        | 1301/10845 [04:49<34:04,  4.67it/s, acc=0.773, epoch=43, loss=0.741]

epoch:43, idx:1299/10845, loss:0.7412680116525063, acc:0.7732692307692308


 13%|█▎        | 1401/10845 [05:11<36:53,  4.27it/s, acc=0.776, epoch=43, loss=0.733]

epoch:43, idx:1399/10845, loss:0.73256298239742, acc:0.7757142857142857


 14%|█▍        | 1500/10845 [05:33<33:49,  4.60it/s, acc=0.776, epoch=43, loss=0.735]

epoch:43, idx:1499/10845, loss:0.7350831056038538, acc:0.7761666666666667


 15%|█▍        | 1600/10845 [05:55<35:12,  4.38it/s, acc=0.774, epoch=43, loss=0.739]

epoch:43, idx:1599/10845, loss:0.7389120377041399, acc:0.77390625


 16%|█▌        | 1700/10845 [06:18<33:49,  4.50it/s, acc=0.774, epoch=43, loss=0.742]

epoch:43, idx:1699/10845, loss:0.7423221599529771, acc:0.7736764705882353


 17%|█▋        | 1800/10845 [06:40<33:41,  4.47it/s, acc=0.771, epoch=43, loss=0.755]

epoch:43, idx:1799/10845, loss:0.7554979467557537, acc:0.77125


 18%|█▊        | 1900/10845 [07:03<34:11,  4.36it/s, acc=0.772, epoch=43, loss=0.756]

epoch:43, idx:1899/10845, loss:0.7551118333245579, acc:0.7718421052631579


 18%|█▊        | 2000/10845 [07:25<35:00,  4.21it/s, acc=0.771, epoch=43, loss=0.757]

epoch:43, idx:1999/10845, loss:0.7569888942986727, acc:0.770625


 19%|█▉        | 2100/10845 [07:48<34:04,  4.28it/s, acc=0.772, epoch=43, loss=0.75] 

epoch:43, idx:2099/10845, loss:0.7504769627678962, acc:0.7720238095238096


 20%|██        | 2201/10845 [08:10<30:45,  4.68it/s, acc=0.772, epoch=43, loss=0.75] 

epoch:43, idx:2199/10845, loss:0.7506246880374171, acc:0.772159090909091


 21%|██        | 2300/10845 [08:32<33:46,  4.22it/s, acc=0.77, epoch=43, loss=0.759] 

epoch:43, idx:2299/10845, loss:0.758636100590229, acc:0.7702173913043479


 22%|██▏       | 2400/10845 [08:55<32:38,  4.31it/s, acc=0.771, epoch=43, loss=0.755]

epoch:43, idx:2399/10845, loss:0.7554754004751643, acc:0.7704166666666666


 23%|██▎       | 2501/10845 [09:17<30:43,  4.53it/s, acc=0.77, epoch=43, loss=0.755] 

epoch:43, idx:2499/10845, loss:0.7552380116343498, acc:0.7704


 24%|██▍       | 2600/10845 [09:39<29:39,  4.63it/s, acc=0.769, epoch=43, loss=0.756]

epoch:43, idx:2599/10845, loss:0.7555186283932283, acc:0.7693269230769231


 25%|██▍       | 2700/10845 [10:01<31:06,  4.36it/s, acc=0.77, epoch=43, loss=0.758] 

epoch:43, idx:2699/10845, loss:0.7581231546733115, acc:0.7698148148148148


 26%|██▌       | 2800/10845 [10:24<29:21,  4.57it/s, acc=0.77, epoch=43, loss=0.759] 

epoch:43, idx:2799/10845, loss:0.759322889521718, acc:0.7695535714285714


 27%|██▋       | 2901/10845 [10:46<28:37,  4.63it/s, acc=0.77, epoch=43, loss=0.759] 

epoch:43, idx:2899/10845, loss:0.7591211643198441, acc:0.7696551724137931


 28%|██▊       | 3000/10845 [11:08<30:27,  4.29it/s, acc=0.768, epoch=43, loss=0.761]

epoch:43, idx:2999/10845, loss:0.7606234957675139, acc:0.7684166666666666


 29%|██▊       | 3100/10845 [11:30<27:57,  4.62it/s, acc=0.767, epoch=43, loss=0.764]

epoch:43, idx:3099/10845, loss:0.7635156211449254, acc:0.767016129032258


 30%|██▉       | 3200/10845 [11:53<27:21,  4.66it/s, acc=0.767, epoch=43, loss=0.761]

epoch:43, idx:3199/10845, loss:0.7607967408653349, acc:0.767421875


 30%|███       | 3300/10845 [12:15<29:11,  4.31it/s, acc=0.767, epoch=43, loss=0.763]

epoch:43, idx:3299/10845, loss:0.7630437036265026, acc:0.7665151515151515


 31%|███▏      | 3400/10845 [12:37<27:37,  4.49it/s, acc=0.766, epoch=43, loss=0.763]

epoch:43, idx:3399/10845, loss:0.7630728955216267, acc:0.7660294117647058


 32%|███▏      | 3500/10845 [13:00<28:08,  4.35it/s, acc=0.766, epoch=43, loss=0.764]

epoch:43, idx:3499/10845, loss:0.7636650022523743, acc:0.7658571428571429


 33%|███▎      | 3601/10845 [13:22<27:02,  4.46it/s, acc=0.765, epoch=43, loss=0.765]

epoch:43, idx:3599/10845, loss:0.7645359988096687, acc:0.7652777777777777


 34%|███▍      | 3700/10845 [13:45<27:15,  4.37it/s, acc=0.764, epoch=43, loss=0.767]

epoch:43, idx:3699/10845, loss:0.7671648086808823, acc:0.7641216216216217


 35%|███▌      | 3800/10845 [14:07<25:52,  4.54it/s, acc=0.764, epoch=43, loss=0.768]

epoch:43, idx:3799/10845, loss:0.7680794226260562, acc:0.7642763157894736


 36%|███▌      | 3900/10845 [14:29<26:27,  4.37it/s, acc=0.764, epoch=43, loss=0.771]

epoch:43, idx:3899/10845, loss:0.7706648539044918, acc:0.7636538461538461


 37%|███▋      | 4000/10845 [14:52<25:38,  4.45it/s, acc=0.764, epoch=43, loss=0.771]

epoch:43, idx:3999/10845, loss:0.7713384948149323, acc:0.7636875


 38%|███▊      | 4100/10845 [15:14<23:59,  4.68it/s, acc=0.763, epoch=43, loss=0.774]

epoch:43, idx:4099/10845, loss:0.7741880269530342, acc:0.7634756097560975


 39%|███▊      | 4201/10845 [15:36<23:42,  4.67it/s, acc=0.764, epoch=43, loss=0.772]

epoch:43, idx:4199/10845, loss:0.7722999995024431, acc:0.7636309523809524


 40%|███▉      | 4300/10845 [15:59<24:07,  4.52it/s, acc=0.763, epoch=43, loss=0.773]

epoch:43, idx:4299/10845, loss:0.7726848054140113, acc:0.7630232558139535


 41%|████      | 4401/10845 [16:21<23:07,  4.64it/s, acc=0.762, epoch=43, loss=0.773]

epoch:43, idx:4399/10845, loss:0.7735869136926803, acc:0.7619886363636363


 42%|████▏     | 4501/10845 [16:43<23:38,  4.47it/s, acc=0.762, epoch=43, loss=0.775]

epoch:43, idx:4499/10845, loss:0.7750235249797504, acc:0.7617222222222222


 42%|████▏     | 4601/10845 [17:05<23:07,  4.50it/s, acc=0.762, epoch=43, loss=0.773]

epoch:43, idx:4599/10845, loss:0.7732390353537124, acc:0.7617934782608695


 43%|████▎     | 4700/10845 [17:27<23:53,  4.29it/s, acc=0.761, epoch=43, loss=0.775]

epoch:43, idx:4699/10845, loss:0.7750119374470508, acc:0.7612765957446809


 44%|████▍     | 4800/10845 [17:49<24:05,  4.18it/s, acc=0.762, epoch=43, loss=0.774]

epoch:43, idx:4799/10845, loss:0.7741451299749315, acc:0.7617708333333333


 45%|████▌     | 4900/10845 [18:11<22:46,  4.35it/s, acc=0.762, epoch=43, loss=0.775]

epoch:43, idx:4899/10845, loss:0.7751042373387181, acc:0.7619387755102041


 46%|████▌     | 5001/10845 [18:33<20:59,  4.64it/s, acc=0.762, epoch=43, loss=0.775]

epoch:43, idx:4999/10845, loss:0.7751120058596134, acc:0.76165


 47%|████▋     | 5100/10845 [18:55<20:51,  4.59it/s, acc=0.762, epoch=43, loss=0.775]

epoch:43, idx:5099/10845, loss:0.7751248642802239, acc:0.7620098039215686


 48%|████▊     | 5200/10845 [19:17<21:34,  4.36it/s, acc=0.762, epoch=43, loss=0.776]

epoch:43, idx:5199/10845, loss:0.7764455279765221, acc:0.7615865384615385


 49%|████▉     | 5300/10845 [19:39<20:25,  4.52it/s, acc=0.761, epoch=43, loss=0.776]

epoch:43, idx:5299/10845, loss:0.7755519676377188, acc:0.7614622641509434


 50%|████▉     | 5401/10845 [20:02<19:32,  4.64it/s, acc=0.761, epoch=43, loss=0.777]

epoch:43, idx:5399/10845, loss:0.776974524607261, acc:0.7607407407407407


 51%|█████     | 5500/10845 [20:24<21:00,  4.24it/s, acc=0.761, epoch=43, loss=0.778]

epoch:43, idx:5499/10845, loss:0.7784370991804384, acc:0.7606363636363637


 52%|█████▏    | 5601/10845 [20:47<18:26,  4.74it/s, acc=0.761, epoch=43, loss=0.778]

epoch:43, idx:5599/10845, loss:0.7778009472840599, acc:0.7610267857142857


 53%|█████▎    | 5701/10845 [21:09<17:58,  4.77it/s, acc=0.761, epoch=43, loss=0.776]

epoch:43, idx:5699/10845, loss:0.7762985595642474, acc:0.761140350877193


 53%|█████▎    | 5800/10845 [21:31<20:30,  4.10it/s, acc=0.761, epoch=43, loss=0.777]

epoch:43, idx:5799/10845, loss:0.7765752701255781, acc:0.7614655172413793


 54%|█████▍    | 5901/10845 [21:54<17:18,  4.76it/s, acc=0.761, epoch=43, loss=0.776]

epoch:43, idx:5899/10845, loss:0.7761806670639475, acc:0.7615677966101695


 55%|█████▌    | 6000/10845 [22:16<16:22,  4.93it/s, acc=0.762, epoch=43, loss=0.774]

epoch:43, idx:5999/10845, loss:0.7743622682839632, acc:0.762125


 56%|█████▌    | 6100/10845 [22:38<16:51,  4.69it/s, acc=0.762, epoch=43, loss=0.774]

epoch:43, idx:6099/10845, loss:0.7737470768465371, acc:0.7624590163934426


 57%|█████▋    | 6201/10845 [23:01<17:08,  4.52it/s, acc=0.763, epoch=43, loss=0.772]

epoch:43, idx:6199/10845, loss:0.771760148593495, acc:0.7629838709677419


 58%|█████▊    | 6300/10845 [23:23<16:35,  4.56it/s, acc=0.762, epoch=43, loss=0.773]

epoch:43, idx:6299/10845, loss:0.7729020681788051, acc:0.7623412698412698


 59%|█████▉    | 6400/10845 [23:45<16:14,  4.56it/s, acc=0.762, epoch=43, loss=0.773]

epoch:43, idx:6399/10845, loss:0.7730175584321841, acc:0.7623046875


 60%|█████▉    | 6501/10845 [24:07<16:00,  4.52it/s, acc=0.762, epoch=43, loss=0.772]

epoch:43, idx:6499/10845, loss:0.7718167311549187, acc:0.7623846153846154


 61%|██████    | 6601/10845 [24:29<15:04,  4.69it/s, acc=0.762, epoch=43, loss=0.774]

epoch:43, idx:6599/10845, loss:0.7745271538920475, acc:0.761969696969697


 62%|██████▏   | 6700/10845 [24:51<15:59,  4.32it/s, acc=0.762, epoch=43, loss=0.774]

epoch:43, idx:6699/10845, loss:0.7736159559639533, acc:0.7621641791044776


 63%|██████▎   | 6801/10845 [25:14<15:20,  4.39it/s, acc=0.762, epoch=43, loss=0.774]

epoch:43, idx:6799/10845, loss:0.7739720142019145, acc:0.7621691176470589


 64%|██████▎   | 6901/10845 [25:36<13:54,  4.73it/s, acc=0.762, epoch=43, loss=0.774]

epoch:43, idx:6899/10845, loss:0.7742370101809501, acc:0.7623913043478261


 65%|██████▍   | 7001/10845 [25:58<13:39,  4.69it/s, acc=0.762, epoch=43, loss=0.776]

epoch:43, idx:6999/10845, loss:0.7754917209957327, acc:0.7615357142857143


 65%|██████▌   | 7100/10845 [26:20<13:42,  4.55it/s, acc=0.761, epoch=43, loss=0.778]

epoch:43, idx:7099/10845, loss:0.7779857765578888, acc:0.7610915492957746


 66%|██████▋   | 7200/10845 [26:42<13:18,  4.56it/s, acc=0.761, epoch=43, loss=0.778]

epoch:43, idx:7199/10845, loss:0.7779749233317044, acc:0.7607638888888889


 67%|██████▋   | 7301/10845 [27:04<13:04,  4.52it/s, acc=0.761, epoch=43, loss=0.778]

epoch:43, idx:7299/10845, loss:0.7780721626502194, acc:0.7607191780821918


 68%|██████▊   | 7400/10845 [27:26<12:28,  4.61it/s, acc=0.761, epoch=43, loss=0.779]

epoch:43, idx:7399/10845, loss:0.7785668896460856, acc:0.7606756756756756


 69%|██████▉   | 7501/10845 [27:49<11:49,  4.71it/s, acc=0.76, epoch=43, loss=0.78]  

epoch:43, idx:7499/10845, loss:0.7801994567831357, acc:0.7602333333333333


 70%|███████   | 7600/10845 [28:11<13:04,  4.14it/s, acc=0.76, epoch=43, loss=0.78] 

epoch:43, idx:7599/10845, loss:0.7797981898996391, acc:0.7602302631578948


 71%|███████   | 7700/10845 [28:33<11:38,  4.50it/s, acc=0.76, epoch=43, loss=0.78] 

epoch:43, idx:7699/10845, loss:0.7803584761704717, acc:0.7601298701298701


 72%|███████▏  | 7800/10845 [28:55<11:47,  4.30it/s, acc=0.76, epoch=43, loss=0.78] 

epoch:43, idx:7799/10845, loss:0.7798840337647841, acc:0.760224358974359


 73%|███████▎  | 7901/10845 [29:18<10:30,  4.67it/s, acc=0.761, epoch=43, loss=0.779]

epoch:43, idx:7899/10845, loss:0.7789371880209899, acc:0.760759493670886


 74%|███████▍  | 8000/10845 [29:40<10:40,  4.44it/s, acc=0.76, epoch=43, loss=0.78]  

epoch:43, idx:7999/10845, loss:0.7804010524563492, acc:0.76021875


 75%|███████▍  | 8101/10845 [30:02<09:42,  4.71it/s, acc=0.761, epoch=43, loss=0.78] 

epoch:43, idx:8099/10845, loss:0.7797703768404913, acc:0.7608024691358025


 76%|███████▌  | 8200/10845 [30:24<10:36,  4.15it/s, acc=0.761, epoch=43, loss=0.779]

epoch:43, idx:8199/10845, loss:0.7793751422334008, acc:0.7610670731707317


 77%|███████▋  | 8301/10845 [30:46<09:11,  4.61it/s, acc=0.761, epoch=43, loss=0.78] 

epoch:43, idx:8299/10845, loss:0.7800867455802768, acc:0.760722891566265


 77%|███████▋  | 8400/10845 [31:08<09:25,  4.32it/s, acc=0.761, epoch=43, loss=0.778]

epoch:43, idx:8399/10845, loss:0.7779339329933836, acc:0.7610119047619047


 78%|███████▊  | 8501/10845 [31:31<08:38,  4.52it/s, acc=0.761, epoch=43, loss=0.779]

epoch:43, idx:8499/10845, loss:0.7786622803737135, acc:0.7608823529411765


 79%|███████▉  | 8600/10845 [31:53<09:36,  3.90it/s, acc=0.761, epoch=43, loss=0.78] 

epoch:43, idx:8599/10845, loss:0.7797643489234669, acc:0.760843023255814


 80%|████████  | 8701/10845 [32:15<08:09,  4.38it/s, acc=0.761, epoch=43, loss=0.781]

epoch:43, idx:8699/10845, loss:0.7805020728638803, acc:0.7606896551724138


 81%|████████  | 8800/10845 [32:38<07:23,  4.61it/s, acc=0.761, epoch=43, loss=0.78] 

epoch:43, idx:8799/10845, loss:0.7803590479120612, acc:0.7606818181818182


 82%|████████▏ | 8900/10845 [33:00<07:22,  4.40it/s, acc=0.76, epoch=43, loss=0.781] 

epoch:43, idx:8899/10845, loss:0.7810999492212628, acc:0.7603651685393259


 83%|████████▎ | 9000/10845 [33:22<06:46,  4.54it/s, acc=0.76, epoch=43, loss=0.782]

epoch:43, idx:8999/10845, loss:0.7822791049910917, acc:0.7599722222222223


 84%|████████▍ | 9101/10845 [33:44<06:18,  4.60it/s, acc=0.76, epoch=43, loss=0.782]

epoch:43, idx:9099/10845, loss:0.7816870574669523, acc:0.7599175824175824


 85%|████████▍ | 9200/10845 [34:06<06:43,  4.08it/s, acc=0.76, epoch=43, loss=0.782]

epoch:43, idx:9199/10845, loss:0.78150134983594, acc:0.7598369565217391


 86%|████████▌ | 9300/10845 [34:28<05:37,  4.58it/s, acc=0.76, epoch=43, loss=0.782]

epoch:43, idx:9299/10845, loss:0.7815081025355606, acc:0.7595967741935484


 87%|████████▋ | 9401/10845 [34:51<05:19,  4.51it/s, acc=0.759, epoch=43, loss=0.783]

epoch:43, idx:9399/10845, loss:0.7829349512527598, acc:0.7592287234042553


 88%|████████▊ | 9500/10845 [35:12<04:46,  4.69it/s, acc=0.759, epoch=43, loss=0.784]

epoch:43, idx:9499/10845, loss:0.7835671780893677, acc:0.7589736842105264


 89%|████████▊ | 9600/10845 [35:35<04:33,  4.56it/s, acc=0.759, epoch=43, loss=0.784]

epoch:43, idx:9599/10845, loss:0.7840305896320691, acc:0.7586979166666666


 89%|████████▉ | 9700/10845 [35:56<04:08,  4.61it/s, acc=0.759, epoch=43, loss=0.783]

epoch:43, idx:9699/10845, loss:0.7834832191313665, acc:0.7586597938144329


 90%|█████████ | 9800/10845 [36:19<03:57,  4.41it/s, acc=0.759, epoch=43, loss=0.784]

epoch:43, idx:9799/10845, loss:0.783595396301576, acc:0.7588010204081632


 91%|█████████▏| 9900/10845 [36:41<03:40,  4.29it/s, acc=0.759, epoch=43, loss=0.784]

epoch:43, idx:9899/10845, loss:0.783760613115749, acc:0.7587121212121212


 92%|█████████▏| 10001/10845 [37:04<03:01,  4.65it/s, acc=0.759, epoch=43, loss=0.783]

epoch:43, idx:9999/10845, loss:0.7831452054053545, acc:0.7588


 93%|█████████▎| 10100/10845 [37:26<02:47,  4.44it/s, acc=0.759, epoch=43, loss=0.783]

epoch:43, idx:10099/10845, loss:0.7833618556125329, acc:0.7589108910891089


 94%|█████████▍| 10200/10845 [37:49<02:18,  4.66it/s, acc=0.759, epoch=43, loss=0.784]

epoch:43, idx:10199/10845, loss:0.783592498398879, acc:0.7586764705882353


 95%|█████████▍| 10300/10845 [38:10<01:57,  4.65it/s, acc=0.759, epoch=43, loss=0.784]

epoch:43, idx:10299/10845, loss:0.7839250691683547, acc:0.758883495145631


 96%|█████████▌| 10400/10845 [38:32<01:37,  4.54it/s, acc=0.759, epoch=43, loss=0.784]

epoch:43, idx:10399/10845, loss:0.7843288491580349, acc:0.7587980769230769


 97%|█████████▋| 10500/10845 [38:54<01:15,  4.58it/s, acc=0.759, epoch=43, loss=0.785]

epoch:43, idx:10499/10845, loss:0.7844953540308135, acc:0.7586428571428572


 98%|█████████▊| 10600/10845 [39:16<00:55,  4.43it/s, acc=0.759, epoch=43, loss=0.784]

epoch:43, idx:10599/10845, loss:0.7836665336956393, acc:0.7588679245283019


 99%|█████████▊| 10700/10845 [39:38<00:32,  4.43it/s, acc=0.759, epoch=43, loss=0.784]

epoch:43, idx:10699/10845, loss:0.7836419998736025, acc:0.7588084112149532


100%|█████████▉| 10800/10845 [40:01<00:09,  4.56it/s, acc=0.759, epoch=43, loss=0.784]

epoch:43, idx:10799/10845, loss:0.7838897341765739, acc:0.7588657407407408


100%|██████████| 10845/10845 [40:11<00:00,  4.35it/s, acc=0.759, epoch=43, loss=0.784]


epoch:43, idx:0/1275, loss:1.5940613746643066, acc:0.75
epoch:43, idx:100/1275, loss:1.5088272909126659, acc:0.6435643564356436
epoch:43, idx:200/1275, loss:1.3640024964489155, acc:0.6517412935323383
epoch:43, idx:300/1275, loss:1.3240254598598544, acc:0.6602990033222591
epoch:43, idx:400/1275, loss:1.3017681397107475, acc:0.6683291770573566
epoch:43, idx:500/1275, loss:1.2619990272198371, acc:0.6726546906187625
epoch:43, idx:600/1275, loss:1.2757107662083504, acc:0.6655574043261231
epoch:43, idx:700/1275, loss:1.2830243591915353, acc:0.6654778887303852
epoch:43, idx:800/1275, loss:1.3035533602318068, acc:0.6632334581772784
epoch:43, idx:900/1275, loss:1.2972133843668558, acc:0.6664816870144284
epoch:43, idx:1000/1275, loss:1.3061557232916772, acc:0.6650849150849151
epoch:43, idx:1100/1275, loss:1.2885349276496756, acc:0.6691643960036331
epoch:43, idx:1200/1275, loss:1.2847465233044462, acc:0.6677768526228143


  1%|          | 101/10845 [00:23<38:36,  4.64it/s, acc=0.74, epoch=44, loss=0.921] 

epoch:44, idx:99/10845, loss:0.8905061411857605, acc:0.7425


  2%|▏         | 200/10845 [00:45<37:29,  4.73it/s, acc=0.762, epoch=44, loss=0.822]

epoch:44, idx:199/10845, loss:0.8219247108697891, acc:0.7625


  3%|▎         | 300/10845 [01:07<37:42,  4.66it/s, acc=0.777, epoch=44, loss=0.778]

epoch:44, idx:299/10845, loss:0.7782954200108846, acc:0.7766666666666666


  4%|▎         | 400/10845 [01:30<39:49,  4.37it/s, acc=0.778, epoch=44, loss=0.783]

epoch:44, idx:399/10845, loss:0.7833429738879204, acc:0.778125


  5%|▍         | 501/10845 [01:52<36:27,  4.73it/s, acc=0.78, epoch=44, loss=0.777] 

epoch:44, idx:499/10845, loss:0.7712962217330933, acc:0.7805


  6%|▌         | 600/10845 [02:14<37:55,  4.50it/s, acc=0.773, epoch=44, loss=0.797]

epoch:44, idx:599/10845, loss:0.7966523755590121, acc:0.7733333333333333


  6%|▋         | 701/10845 [02:36<28:21,  5.96it/s, acc=0.775, epoch=44, loss=0.786]

epoch:44, idx:699/10845, loss:0.7866618790796824, acc:0.775


  7%|▋         | 800/10845 [02:58<40:13,  4.16it/s, acc=0.774, epoch=44, loss=0.785]

epoch:44, idx:799/10845, loss:0.7853875678032637, acc:0.7740625


  8%|▊         | 900/10845 [03:20<36:25,  4.55it/s, acc=0.77, epoch=44, loss=0.803] 

epoch:44, idx:899/10845, loss:0.803484275009897, acc:0.7702777777777777


  9%|▉         | 1000/10845 [03:42<38:12,  4.29it/s, acc=0.769, epoch=44, loss=0.802]

epoch:44, idx:999/10845, loss:0.8019616378545761, acc:0.76875


 10%|█         | 1100/10845 [04:04<36:07,  4.50it/s, acc=0.767, epoch=44, loss=0.804]

epoch:44, idx:1099/10845, loss:0.8035996478254145, acc:0.7672727272727272


 11%|█         | 1200/10845 [04:26<33:38,  4.78it/s, acc=0.765, epoch=44, loss=0.81] 

epoch:44, idx:1199/10845, loss:0.8097003474831581, acc:0.765


 12%|█▏        | 1300/10845 [04:49<34:07,  4.66it/s, acc=0.766, epoch=44, loss=0.802]

epoch:44, idx:1299/10845, loss:0.8028310280579787, acc:0.765576923076923


 13%|█▎        | 1401/10845 [05:11<34:26,  4.57it/s, acc=0.763, epoch=44, loss=0.802]

epoch:44, idx:1399/10845, loss:0.8018506446055004, acc:0.7632142857142857


 14%|█▍        | 1500/10845 [05:33<34:19,  4.54it/s, acc=0.762, epoch=44, loss=0.804]

epoch:44, idx:1499/10845, loss:0.8038075177669525, acc:0.7625


 15%|█▍        | 1600/10845 [05:55<32:12,  4.78it/s, acc=0.763, epoch=44, loss=0.798]

epoch:44, idx:1599/10845, loss:0.798107450120151, acc:0.7628125


 16%|█▌        | 1700/10845 [06:17<33:00,  4.62it/s, acc=0.763, epoch=44, loss=0.798]

epoch:44, idx:1699/10845, loss:0.7984540847820394, acc:0.7626470588235295


 17%|█▋        | 1800/10845 [06:39<33:36,  4.49it/s, acc=0.762, epoch=44, loss=0.799]

epoch:44, idx:1799/10845, loss:0.7985741601056523, acc:0.7620833333333333


 18%|█▊        | 1901/10845 [07:01<33:48,  4.41it/s, acc=0.763, epoch=44, loss=0.794]

epoch:44, idx:1899/10845, loss:0.7946554586448168, acc:0.7625


 18%|█▊        | 2001/10845 [07:23<31:28,  4.68it/s, acc=0.763, epoch=44, loss=0.794]

epoch:44, idx:1999/10845, loss:0.7933617732226849, acc:0.762875


 19%|█▉        | 2100/10845 [07:45<31:54,  4.57it/s, acc=0.762, epoch=44, loss=0.796]

epoch:44, idx:2099/10845, loss:0.7957276086864017, acc:0.7617857142857143


 20%|██        | 2200/10845 [08:08<32:39,  4.41it/s, acc=0.762, epoch=44, loss=0.794]

epoch:44, idx:2199/10845, loss:0.7939680702307007, acc:0.7618181818181818


 21%|██        | 2301/10845 [08:30<31:01,  4.59it/s, acc=0.762, epoch=44, loss=0.794]

epoch:44, idx:2299/10845, loss:0.7943189475069875, acc:0.7615217391304347


 22%|██▏       | 2400/10845 [08:52<32:40,  4.31it/s, acc=0.762, epoch=44, loss=0.794]

epoch:44, idx:2399/10845, loss:0.7942892601837714, acc:0.761875


 23%|██▎       | 2501/10845 [09:14<29:51,  4.66it/s, acc=0.761, epoch=44, loss=0.793]

epoch:44, idx:2499/10845, loss:0.792831090092659, acc:0.7615


 24%|██▍       | 2600/10845 [09:36<29:55,  4.59it/s, acc=0.763, epoch=44, loss=0.786]

epoch:44, idx:2599/10845, loss:0.7861929102356617, acc:0.7629807692307692


 25%|██▍       | 2701/10845 [09:58<29:11,  4.65it/s, acc=0.763, epoch=44, loss=0.786]

epoch:44, idx:2699/10845, loss:0.7850101872064449, acc:0.7635185185185185


 26%|██▌       | 2801/10845 [10:21<29:25,  4.56it/s, acc=0.763, epoch=44, loss=0.783]

epoch:44, idx:2799/10845, loss:0.7825343246757984, acc:0.7628571428571429


 27%|██▋       | 2900/10845 [10:43<28:54,  4.58it/s, acc=0.762, epoch=44, loss=0.783]

epoch:44, idx:2899/10845, loss:0.7830309003180471, acc:0.7623275862068966


 28%|██▊       | 3001/10845 [11:06<29:43,  4.40it/s, acc=0.762, epoch=44, loss=0.786]

epoch:44, idx:2999/10845, loss:0.7865614437460899, acc:0.7616666666666667


 29%|██▊       | 3101/10845 [11:28<28:22,  4.55it/s, acc=0.763, epoch=44, loss=0.783]

epoch:44, idx:3099/10845, loss:0.7836812191047976, acc:0.7628225806451613


 30%|██▉       | 3200/10845 [11:50<28:39,  4.45it/s, acc=0.762, epoch=44, loss=0.786]

epoch:44, idx:3199/10845, loss:0.7858486567996442, acc:0.761796875


 30%|███       | 3300/10845 [12:12<28:43,  4.38it/s, acc=0.761, epoch=44, loss=0.787]

epoch:44, idx:3299/10845, loss:0.787440808596033, acc:0.7613636363636364


 31%|███▏      | 3400/10845 [12:35<26:17,  4.72it/s, acc=0.76, epoch=44, loss=0.792] 

epoch:44, idx:3399/10845, loss:0.79197718793855, acc:0.76


 32%|███▏      | 3500/10845 [12:57<28:15,  4.33it/s, acc=0.759, epoch=44, loss=0.792]

epoch:44, idx:3499/10845, loss:0.7916118292297636, acc:0.7595714285714286


 33%|███▎      | 3601/10845 [13:19<26:47,  4.51it/s, acc=0.759, epoch=44, loss=0.794]

epoch:44, idx:3599/10845, loss:0.7938332420421971, acc:0.7591666666666667


 34%|███▍      | 3701/10845 [13:41<25:34,  4.66it/s, acc=0.759, epoch=44, loss=0.794]

epoch:44, idx:3699/10845, loss:0.7941139173990971, acc:0.7589189189189189


 35%|███▌      | 3800/10845 [14:04<26:17,  4.46it/s, acc=0.759, epoch=44, loss=0.796]

epoch:44, idx:3799/10845, loss:0.7955305051333026, acc:0.75875


 36%|███▌      | 3900/10845 [14:26<25:50,  4.48it/s, acc=0.759, epoch=44, loss=0.794]

epoch:44, idx:3899/10845, loss:0.7936086910810226, acc:0.7588461538461538


 37%|███▋      | 4000/10845 [14:48<24:45,  4.61it/s, acc=0.758, epoch=44, loss=0.795]

epoch:44, idx:3999/10845, loss:0.7953776332139969, acc:0.758


 38%|███▊      | 4100/10845 [15:10<24:47,  4.53it/s, acc=0.758, epoch=44, loss=0.795]

epoch:44, idx:4099/10845, loss:0.7949570707286276, acc:0.7582317073170731


 39%|███▊      | 4200/10845 [15:32<24:14,  4.57it/s, acc=0.758, epoch=44, loss=0.796]

epoch:44, idx:4199/10845, loss:0.7957005356606983, acc:0.7581547619047619


 40%|███▉      | 4301/10845 [15:55<23:04,  4.73it/s, acc=0.758, epoch=44, loss=0.794]

epoch:44, idx:4299/10845, loss:0.7941979396620462, acc:0.7580232558139535


 41%|████      | 4400/10845 [16:17<23:39,  4.54it/s, acc=0.757, epoch=44, loss=0.798]

epoch:44, idx:4399/10845, loss:0.7975953505526889, acc:0.7573863636363637


 42%|████▏     | 4501/10845 [16:40<22:26,  4.71it/s, acc=0.757, epoch=44, loss=0.8]  

epoch:44, idx:4499/10845, loss:0.7997923855516645, acc:0.7572222222222222


 42%|████▏     | 4600/10845 [17:02<22:48,  4.56it/s, acc=0.758, epoch=44, loss=0.797]

epoch:44, idx:4599/10845, loss:0.7970243999491567, acc:0.7576630434782609


 43%|████▎     | 4701/10845 [17:24<21:49,  4.69it/s, acc=0.757, epoch=44, loss=0.796]

epoch:44, idx:4699/10845, loss:0.7960125990370487, acc:0.7573404255319149


 44%|████▍     | 4800/10845 [17:46<20:55,  4.82it/s, acc=0.757, epoch=44, loss=0.796]

epoch:44, idx:4799/10845, loss:0.7959049384544293, acc:0.7572395833333333


 45%|████▌     | 4900/10845 [18:08<21:26,  4.62it/s, acc=0.758, epoch=44, loss=0.793]

epoch:44, idx:4899/10845, loss:0.7928564314939538, acc:0.7579591836734694


 46%|████▌     | 5000/10845 [18:30<21:07,  4.61it/s, acc=0.758, epoch=44, loss=0.793]

epoch:44, idx:4999/10845, loss:0.7926468163251876, acc:0.75785


 47%|████▋     | 5100/10845 [18:52<21:34,  4.44it/s, acc=0.758, epoch=44, loss=0.794]

epoch:44, idx:5099/10845, loss:0.7939430848991169, acc:0.7581372549019608


 48%|████▊     | 5200/10845 [19:15<20:39,  4.55it/s, acc=0.758, epoch=44, loss=0.793]

epoch:44, idx:5199/10845, loss:0.7932258704533943, acc:0.7580288461538461


 49%|████▉     | 5300/10845 [19:37<21:10,  4.36it/s, acc=0.759, epoch=44, loss=0.79] 

epoch:44, idx:5299/10845, loss:0.7897907778452028, acc:0.7587264150943396


 50%|████▉     | 5401/10845 [19:59<19:52,  4.56it/s, acc=0.759, epoch=44, loss=0.789]

epoch:44, idx:5399/10845, loss:0.7891863834195667, acc:0.7593981481481481


 51%|█████     | 5501/10845 [20:21<19:40,  4.53it/s, acc=0.76, epoch=44, loss=0.786] 

epoch:44, idx:5499/10845, loss:0.7856707693230022, acc:0.7603181818181818


 52%|█████▏    | 5601/10845 [20:43<20:31,  4.26it/s, acc=0.76, epoch=44, loss=0.787]

epoch:44, idx:5599/10845, loss:0.7872933390097959, acc:0.76


 53%|█████▎    | 5700/10845 [21:06<20:53,  4.10it/s, acc=0.76, epoch=44, loss=0.788]

epoch:44, idx:5699/10845, loss:0.7882864757797174, acc:0.759780701754386


 53%|█████▎    | 5801/10845 [21:28<17:12,  4.89it/s, acc=0.76, epoch=44, loss=0.788]

epoch:44, idx:5799/10845, loss:0.7876802162466378, acc:0.7598275862068965


 54%|█████▍    | 5901/10845 [21:50<17:43,  4.65it/s, acc=0.759, epoch=44, loss=0.789]

epoch:44, idx:5899/10845, loss:0.7885658461158559, acc:0.7592372881355932


 55%|█████▌    | 6000/10845 [22:12<18:43,  4.31it/s, acc=0.759, epoch=44, loss=0.789]

epoch:44, idx:5999/10845, loss:0.7892045265634855, acc:0.7592083333333334


 56%|█████▌    | 6100/10845 [22:35<16:34,  4.77it/s, acc=0.76, epoch=44, loss=0.787] 

epoch:44, idx:6099/10845, loss:0.7870224931787272, acc:0.76


 57%|█████▋    | 6200/10845 [22:56<16:35,  4.66it/s, acc=0.76, epoch=44, loss=0.788]

epoch:44, idx:6199/10845, loss:0.7880917014998774, acc:0.7600806451612904


 58%|█████▊    | 6301/10845 [23:19<15:59,  4.74it/s, acc=0.76, epoch=44, loss=0.787]

epoch:44, idx:6299/10845, loss:0.7874364963410393, acc:0.7601190476190476


 59%|█████▉    | 6400/10845 [23:41<16:06,  4.60it/s, acc=0.76, epoch=44, loss=0.787]

epoch:44, idx:6399/10845, loss:0.7872966452687978, acc:0.7598046875


 60%|█████▉    | 6500/10845 [24:03<16:25,  4.41it/s, acc=0.76, epoch=44, loss=0.787]

epoch:44, idx:6499/10845, loss:0.7870956825293027, acc:0.7597692307692308


 61%|██████    | 6601/10845 [24:26<16:22,  4.32it/s, acc=0.76, epoch=44, loss=0.787]

epoch:44, idx:6599/10845, loss:0.7865513622760772, acc:0.7598484848484849


 62%|██████▏   | 6700/10845 [24:48<15:56,  4.33it/s, acc=0.76, epoch=44, loss=0.787]

epoch:44, idx:6699/10845, loss:0.7871920282449295, acc:0.7597761194029851


 63%|██████▎   | 6801/10845 [25:10<14:12,  4.74it/s, acc=0.76, epoch=44, loss=0.787]

epoch:44, idx:6799/10845, loss:0.7871996796657057, acc:0.7596691176470588


 64%|██████▎   | 6900/10845 [25:32<14:11,  4.63it/s, acc=0.76, epoch=44, loss=0.786]

epoch:44, idx:6899/10845, loss:0.7862635997585629, acc:0.7597101449275362


 65%|██████▍   | 7000/10845 [25:54<13:38,  4.69it/s, acc=0.76, epoch=44, loss=0.786] 

epoch:44, idx:6999/10845, loss:0.7863826141527721, acc:0.7596071428571428


 65%|██████▌   | 7100/10845 [26:16<13:10,  4.74it/s, acc=0.759, epoch=44, loss=0.788]

epoch:44, idx:7099/10845, loss:0.7877910295002897, acc:0.7591197183098591


 66%|██████▋   | 7201/10845 [26:39<13:01,  4.66it/s, acc=0.759, epoch=44, loss=0.789]

epoch:44, idx:7199/10845, loss:0.7891801069014602, acc:0.7588888888888888


 67%|██████▋   | 7300/10845 [27:01<12:26,  4.75it/s, acc=0.759, epoch=44, loss=0.79] 

epoch:44, idx:7299/10845, loss:0.7900952645687208, acc:0.7588013698630137


 68%|██████▊   | 7400/10845 [27:23<13:05,  4.38it/s, acc=0.759, epoch=44, loss=0.791]

epoch:44, idx:7399/10845, loss:0.7909144103688163, acc:0.7585810810810811


 69%|██████▉   | 7500/10845 [27:46<12:35,  4.43it/s, acc=0.759, epoch=44, loss=0.79] 

epoch:44, idx:7499/10845, loss:0.7895520382563274, acc:0.7588333333333334


 70%|███████   | 7600/10845 [28:08<11:36,  4.66it/s, acc=0.759, epoch=44, loss=0.79] 

epoch:44, idx:7599/10845, loss:0.7895645973870629, acc:0.7589802631578947


 71%|███████   | 7700/10845 [28:31<11:53,  4.41it/s, acc=0.759, epoch=44, loss=0.789]

epoch:44, idx:7699/10845, loss:0.7886996289971587, acc:0.759025974025974


 72%|███████▏  | 7800/10845 [28:53<11:11,  4.53it/s, acc=0.759, epoch=44, loss=0.788]

epoch:44, idx:7799/10845, loss:0.7880609764196934, acc:0.7591346153846154


 73%|███████▎  | 7900/10845 [29:16<11:26,  4.29it/s, acc=0.759, epoch=44, loss=0.789]

epoch:44, idx:7899/10845, loss:0.7889846814735026, acc:0.7588291139240506


 74%|███████▍  | 8000/10845 [29:38<10:17,  4.61it/s, acc=0.759, epoch=44, loss=0.789]

epoch:44, idx:7999/10845, loss:0.7886677625328302, acc:0.759125


 75%|███████▍  | 8100/10845 [30:00<10:20,  4.42it/s, acc=0.759, epoch=44, loss=0.789]

epoch:44, idx:8099/10845, loss:0.7893244000128757, acc:0.7589506172839506


 76%|███████▌  | 8200/10845 [30:22<09:25,  4.68it/s, acc=0.758, epoch=44, loss=0.79] 

epoch:44, idx:8199/10845, loss:0.7903853087454308, acc:0.7583841463414634


 77%|███████▋  | 8300/10845 [30:45<09:44,  4.35it/s, acc=0.758, epoch=44, loss=0.79] 

epoch:44, idx:8299/10845, loss:0.7901763727291521, acc:0.7584036144578313


 77%|███████▋  | 8400/10845 [31:07<09:17,  4.38it/s, acc=0.759, epoch=44, loss=0.789]

epoch:44, idx:8399/10845, loss:0.7890379033911796, acc:0.7586011904761905


 78%|███████▊  | 8501/10845 [31:30<08:28,  4.61it/s, acc=0.758, epoch=44, loss=0.789]

epoch:44, idx:8499/10845, loss:0.7890636567087734, acc:0.7582058823529412


 79%|███████▉  | 8600/10845 [31:52<08:28,  4.42it/s, acc=0.758, epoch=44, loss=0.789]

epoch:44, idx:8599/10845, loss:0.7894594908037851, acc:0.7583430232558139


 80%|████████  | 8701/10845 [32:15<08:30,  4.20it/s, acc=0.758, epoch=44, loss=0.79] 

epoch:44, idx:8699/10845, loss:0.7903708317362029, acc:0.7582758620689655


 81%|████████  | 8800/10845 [32:37<07:28,  4.56it/s, acc=0.758, epoch=44, loss=0.79] 

epoch:44, idx:8799/10845, loss:0.78961113889109, acc:0.7582102272727272


 82%|████████▏ | 8900/10845 [32:59<07:10,  4.52it/s, acc=0.758, epoch=44, loss=0.789]

epoch:44, idx:8899/10845, loss:0.7893002231201429, acc:0.7582584269662921


 83%|████████▎ | 9000/10845 [33:21<06:55,  4.44it/s, acc=0.758, epoch=44, loss=0.789]

epoch:44, idx:8999/10845, loss:0.7893392706712087, acc:0.7583888888888889


 84%|████████▍ | 9101/10845 [33:43<06:14,  4.66it/s, acc=0.759, epoch=44, loss=0.787]

epoch:44, idx:9099/10845, loss:0.7868358443464551, acc:0.7586263736263736


 85%|████████▍ | 9201/10845 [34:06<06:06,  4.49it/s, acc=0.759, epoch=44, loss=0.786]

epoch:44, idx:9199/10845, loss:0.7860035968085994, acc:0.7589130434782608


 86%|████████▌ | 9301/10845 [34:28<05:41,  4.52it/s, acc=0.759, epoch=44, loss=0.786]

epoch:44, idx:9299/10845, loss:0.7855021805532517, acc:0.758978494623656


 87%|████████▋ | 9400/10845 [34:50<05:17,  4.55it/s, acc=0.759, epoch=44, loss=0.785]

epoch:44, idx:9399/10845, loss:0.7853559874600553, acc:0.7589627659574468


 88%|████████▊ | 9500/10845 [35:13<05:10,  4.33it/s, acc=0.759, epoch=44, loss=0.787]

epoch:44, idx:9499/10845, loss:0.786915094977931, acc:0.7585263157894737


 89%|████████▊ | 9601/10845 [35:35<04:34,  4.53it/s, acc=0.759, epoch=44, loss=0.787]

epoch:44, idx:9599/10845, loss:0.7868484172224999, acc:0.758515625


 89%|████████▉ | 9701/10845 [35:58<04:08,  4.60it/s, acc=0.759, epoch=44, loss=0.787]

epoch:44, idx:9699/10845, loss:0.787494032677916, acc:0.7585824742268041


 90%|█████████ | 9800/10845 [36:20<03:47,  4.60it/s, acc=0.759, epoch=44, loss=0.788]

epoch:44, idx:9799/10845, loss:0.7876221431761372, acc:0.7585204081632653


 91%|█████████▏| 9901/10845 [36:42<03:23,  4.65it/s, acc=0.759, epoch=44, loss=0.786]

epoch:44, idx:9899/10845, loss:0.7864667991736922, acc:0.7590656565656566


 92%|█████████▏| 10000/10845 [37:04<03:12,  4.38it/s, acc=0.759, epoch=44, loss=0.786]

epoch:44, idx:9999/10845, loss:0.785717534762621, acc:0.7591


 93%|█████████▎| 10100/10845 [37:26<02:46,  4.47it/s, acc=0.759, epoch=44, loss=0.785]

epoch:44, idx:10099/10845, loss:0.7851655551643655, acc:0.7592326732673267


 94%|█████████▍| 10200/10845 [37:49<02:25,  4.44it/s, acc=0.759, epoch=44, loss=0.787]

epoch:44, idx:10199/10845, loss:0.7865083440787652, acc:0.7589950980392157


 95%|█████████▍| 10300/10845 [38:11<02:00,  4.52it/s, acc=0.759, epoch=44, loss=0.787]

epoch:44, idx:10299/10845, loss:0.7869440957122636, acc:0.7589077669902913


 96%|█████████▌| 10401/10845 [38:34<01:42,  4.35it/s, acc=0.759, epoch=44, loss=0.788]

epoch:44, idx:10399/10845, loss:0.7876220933978374, acc:0.7587259615384615


 97%|█████████▋| 10501/10845 [38:56<01:15,  4.55it/s, acc=0.759, epoch=44, loss=0.787]

epoch:44, idx:10499/10845, loss:0.7869365435895466, acc:0.7589047619047619


 98%|█████████▊| 10601/10845 [39:18<00:57,  4.21it/s, acc=0.759, epoch=44, loss=0.787]

epoch:44, idx:10599/10845, loss:0.7866406359650054, acc:0.7590801886792453


 99%|█████████▊| 10701/10845 [39:41<00:31,  4.57it/s, acc=0.759, epoch=44, loss=0.786]

epoch:44, idx:10699/10845, loss:0.7860434543417993, acc:0.7593925233644859


100%|█████████▉| 10800/10845 [40:03<00:10,  4.50it/s, acc=0.759, epoch=44, loss=0.786]

epoch:44, idx:10799/10845, loss:0.7864121339387364, acc:0.759375


100%|██████████| 10845/10845 [40:13<00:00,  4.65it/s, acc=0.759, epoch=44, loss=0.786]


epoch:44, idx:0/1275, loss:1.568497657775879, acc:0.75
epoch:44, idx:100/1275, loss:1.5099111028236918, acc:0.6460396039603961
epoch:44, idx:200/1275, loss:1.3656076026793142, acc:0.6592039800995025
epoch:44, idx:300/1275, loss:1.3252836366824534, acc:0.6661129568106312
epoch:44, idx:400/1275, loss:1.3025355621466315, acc:0.6726932668329177
epoch:44, idx:500/1275, loss:1.2626415652905159, acc:0.6756487025948104
epoch:44, idx:600/1275, loss:1.2763182821369012, acc:0.6684692179700499
epoch:44, idx:700/1275, loss:1.282685946635955, acc:0.6690442225392297
epoch:44, idx:800/1275, loss:1.302541451061263, acc:0.6660424469413233
epoch:44, idx:900/1275, loss:1.2955688811036512, acc:0.6681465038845728
epoch:44, idx:1000/1275, loss:1.304715564796379, acc:0.6663336663336663
epoch:44, idx:1100/1275, loss:1.2868110461196067, acc:0.6700726612170754
epoch:44, idx:1200/1275, loss:1.2829106377125978, acc:0.6681931723563697


  1%|          | 101/10845 [00:22<38:37,  4.64it/s, acc=0.743, epoch=45, loss=0.762]

epoch:45, idx:99/10845, loss:0.7673470699787139, acc:0.74


  2%|▏         | 200/10845 [00:44<37:30,  4.73it/s, acc=0.76, epoch=45, loss=0.722] 

epoch:45, idx:199/10845, loss:0.7222677588462829, acc:0.76


  3%|▎         | 301/10845 [01:06<38:03,  4.62it/s, acc=0.777, epoch=45, loss=0.718]

epoch:45, idx:299/10845, loss:0.718381157318751, acc:0.7766666666666666


  4%|▎         | 400/10845 [01:28<37:33,  4.63it/s, acc=0.778, epoch=45, loss=0.712]

epoch:45, idx:399/10845, loss:0.7115794828534127, acc:0.778125


  5%|▍         | 500/10845 [01:51<39:43,  4.34it/s, acc=0.774, epoch=45, loss=0.723]

epoch:45, idx:499/10845, loss:0.7231829867362977, acc:0.7745


  6%|▌         | 601/10845 [02:13<38:27,  4.44it/s, acc=0.77, epoch=45, loss=0.732] 

epoch:45, idx:599/10845, loss:0.7314434711138408, acc:0.7695833333333333


  6%|▋         | 700/10845 [02:35<37:30,  4.51it/s, acc=0.774, epoch=45, loss=0.731]

epoch:45, idx:699/10845, loss:0.7311174542563302, acc:0.7739285714285714


  7%|▋         | 800/10845 [02:58<36:55,  4.53it/s, acc=0.772, epoch=45, loss=0.762]

epoch:45, idx:799/10845, loss:0.7619531074166298, acc:0.7725


  8%|▊         | 901/10845 [03:21<37:29,  4.42it/s, acc=0.768, epoch=45, loss=0.771]

epoch:45, idx:899/10845, loss:0.7722988998889924, acc:0.7677777777777778


  9%|▉         | 1001/10845 [03:43<37:07,  4.42it/s, acc=0.765, epoch=45, loss=0.773]

epoch:45, idx:999/10845, loss:0.7737544959783554, acc:0.76525


 10%|█         | 1100/10845 [04:05<35:57,  4.52it/s, acc=0.765, epoch=45, loss=0.775]

epoch:45, idx:1099/10845, loss:0.7750090133060109, acc:0.7654545454545455


 11%|█         | 1200/10845 [04:27<36:02,  4.46it/s, acc=0.765, epoch=45, loss=0.773]

epoch:45, idx:1199/10845, loss:0.7726796277364095, acc:0.765


 12%|█▏        | 1300/10845 [04:50<36:38,  4.34it/s, acc=0.762, epoch=45, loss=0.78] 

epoch:45, idx:1299/10845, loss:0.7802610446856572, acc:0.7623076923076924


 13%|█▎        | 1401/10845 [05:12<34:16,  4.59it/s, acc=0.763, epoch=45, loss=0.775]

epoch:45, idx:1399/10845, loss:0.77480232494218, acc:0.7628571428571429


 14%|█▍        | 1501/10845 [05:34<33:37,  4.63it/s, acc=0.76, epoch=45, loss=0.78]  

epoch:45, idx:1499/10845, loss:0.7801025394598643, acc:0.7603333333333333


 15%|█▍        | 1600/10845 [05:57<34:10,  4.51it/s, acc=0.76, epoch=45, loss=0.78]  

epoch:45, idx:1599/10845, loss:0.7800847196578979, acc:0.7603125


 16%|█▌        | 1700/10845 [06:19<33:03,  4.61it/s, acc=0.76, epoch=45, loss=0.779] 

epoch:45, idx:1699/10845, loss:0.7792498365570517, acc:0.7601470588235294


 17%|█▋        | 1800/10845 [06:41<33:58,  4.44it/s, acc=0.761, epoch=45, loss=0.773]

epoch:45, idx:1799/10845, loss:0.7728011856476465, acc:0.7613888888888889


 18%|█▊        | 1900/10845 [07:03<35:22,  4.21it/s, acc=0.762, epoch=45, loss=0.769]

epoch:45, idx:1899/10845, loss:0.7689493429033379, acc:0.7625


 18%|█▊        | 2000/10845 [07:26<33:28,  4.40it/s, acc=0.764, epoch=45, loss=0.767]

epoch:45, idx:1999/10845, loss:0.7669260308742524, acc:0.764


 19%|█▉        | 2100/10845 [07:48<33:03,  4.41it/s, acc=0.764, epoch=45, loss=0.767]

epoch:45, idx:2099/10845, loss:0.7669630567800431, acc:0.7640476190476191


 20%|██        | 2200/10845 [08:10<32:24,  4.45it/s, acc=0.764, epoch=45, loss=0.766]

epoch:45, idx:2199/10845, loss:0.765818904177709, acc:0.7644318181818182


 21%|██        | 2301/10845 [08:32<31:17,  4.55it/s, acc=0.765, epoch=45, loss=0.767]

epoch:45, idx:2299/10845, loss:0.767004404793615, acc:0.7654347826086957


 22%|██▏       | 2401/10845 [08:55<31:06,  4.52it/s, acc=0.766, epoch=45, loss=0.765]

epoch:45, idx:2399/10845, loss:0.7651405934989453, acc:0.7661458333333333


 23%|██▎       | 2500/10845 [09:16<30:37,  4.54it/s, acc=0.766, epoch=45, loss=0.768]

epoch:45, idx:2499/10845, loss:0.7677831501483917, acc:0.7658


 24%|██▍       | 2600/10845 [09:39<31:34,  4.35it/s, acc=0.766, epoch=45, loss=0.766]

epoch:45, idx:2599/10845, loss:0.7656339860420961, acc:0.7663461538461539


 25%|██▍       | 2700/10845 [10:02<30:43,  4.42it/s, acc=0.767, epoch=45, loss=0.765]

epoch:45, idx:2699/10845, loss:0.7648111792846962, acc:0.7667592592592593


 26%|██▌       | 2801/10845 [10:24<29:39,  4.52it/s, acc=0.767, epoch=45, loss=0.763]

epoch:45, idx:2799/10845, loss:0.7628830514209611, acc:0.7670535714285714


 27%|██▋       | 2900/10845 [10:46<27:57,  4.74it/s, acc=0.766, epoch=45, loss=0.764]

epoch:45, idx:2899/10845, loss:0.7638040394618594, acc:0.7663793103448275


 28%|██▊       | 3001/10845 [11:09<28:54,  4.52it/s, acc=0.765, epoch=45, loss=0.767]

epoch:45, idx:2999/10845, loss:0.76647729297479, acc:0.7648333333333334


 29%|██▊       | 3100/10845 [11:31<28:49,  4.48it/s, acc=0.765, epoch=45, loss=0.768]

epoch:45, idx:3099/10845, loss:0.7681799099137706, acc:0.7645161290322581


 30%|██▉       | 3200/10845 [11:53<27:46,  4.59it/s, acc=0.764, epoch=45, loss=0.77] 

epoch:45, idx:3199/10845, loss:0.7696671490743756, acc:0.76421875


 30%|███       | 3300/10845 [12:16<26:02,  4.83it/s, acc=0.765, epoch=45, loss=0.768]

epoch:45, idx:3299/10845, loss:0.7684288323648049, acc:0.7646212121212121


 31%|███▏      | 3401/10845 [12:38<27:53,  4.45it/s, acc=0.765, epoch=45, loss=0.767]

epoch:45, idx:3399/10845, loss:0.7670542941373937, acc:0.7647794117647059


 32%|███▏      | 3500/10845 [13:00<27:14,  4.49it/s, acc=0.765, epoch=45, loss=0.768]

epoch:45, idx:3499/10845, loss:0.767776268516268, acc:0.7647142857142857


 33%|███▎      | 3601/10845 [13:22<26:20,  4.58it/s, acc=0.765, epoch=45, loss=0.768]

epoch:45, idx:3599/10845, loss:0.7682694261603885, acc:0.7647222222222222


 34%|███▍      | 3701/10845 [13:44<25:44,  4.62it/s, acc=0.764, epoch=45, loss=0.77] 

epoch:45, idx:3699/10845, loss:0.7705665929897412, acc:0.7637837837837838


 35%|███▌      | 3800/10845 [14:06<27:18,  4.30it/s, acc=0.764, epoch=45, loss=0.772]

epoch:45, idx:3799/10845, loss:0.7724603206860392, acc:0.7636842105263157


 36%|███▌      | 3901/10845 [14:29<25:38,  4.51it/s, acc=0.765, epoch=45, loss=0.771]

epoch:45, idx:3899/10845, loss:0.7709175993846012, acc:0.7646794871794872


 37%|███▋      | 4000/10845 [14:51<25:57,  4.39it/s, acc=0.765, epoch=45, loss=0.771]

epoch:45, idx:3999/10845, loss:0.770691754758358, acc:0.765125


 38%|███▊      | 4100/10845 [15:13<25:12,  4.46it/s, acc=0.765, epoch=45, loss=0.769]

epoch:45, idx:4099/10845, loss:0.7692376361242155, acc:0.765


 39%|███▊      | 4200/10845 [15:35<23:27,  4.72it/s, acc=0.765, epoch=45, loss=0.771]

epoch:45, idx:4199/10845, loss:0.7709442520993096, acc:0.7645833333333333


 40%|███▉      | 4301/10845 [15:58<23:32,  4.63it/s, acc=0.764, epoch=45, loss=0.772]

epoch:45, idx:4299/10845, loss:0.7715599134633707, acc:0.7640116279069767


 41%|████      | 4400/10845 [16:20<24:54,  4.31it/s, acc=0.765, epoch=45, loss=0.77] 

epoch:45, idx:4399/10845, loss:0.770111572417346, acc:0.7648295454545454


 41%|████▏     | 4500/10845 [16:42<23:17,  4.54it/s, acc=0.765, epoch=45, loss=0.769]

epoch:45, idx:4499/10845, loss:0.7691953125794728, acc:0.7646111111111111


 42%|████▏     | 4601/10845 [17:04<22:52,  4.55it/s, acc=0.764, epoch=45, loss=0.77] 

epoch:45, idx:4599/10845, loss:0.7705126917103062, acc:0.7639673913043479


 43%|████▎     | 4700/10845 [17:27<22:02,  4.64it/s, acc=0.765, epoch=45, loss=0.767]

epoch:45, idx:4699/10845, loss:0.7670724700613225, acc:0.7653191489361703


 44%|████▍     | 4800/10845 [17:50<23:43,  4.25it/s, acc=0.766, epoch=45, loss=0.765]

epoch:45, idx:4799/10845, loss:0.7651245319843292, acc:0.7659375


 45%|████▌     | 4900/10845 [18:12<21:33,  4.60it/s, acc=0.766, epoch=45, loss=0.764]

epoch:45, idx:4899/10845, loss:0.7637483059630102, acc:0.7661224489795918


 46%|████▌     | 5000/10845 [18:34<21:24,  4.55it/s, acc=0.765, epoch=45, loss=0.766]

epoch:45, idx:4999/10845, loss:0.7657491203308106, acc:0.76515


 47%|████▋     | 5101/10845 [18:57<21:48,  4.39it/s, acc=0.765, epoch=45, loss=0.766]

epoch:45, idx:5099/10845, loss:0.7661829195536819, acc:0.7650980392156863


 48%|████▊     | 5200/10845 [19:19<20:10,  4.66it/s, acc=0.765, epoch=45, loss=0.767]

epoch:45, idx:5199/10845, loss:0.7669456058740616, acc:0.7649038461538461


 49%|████▉     | 5300/10845 [19:41<20:26,  4.52it/s, acc=0.765, epoch=45, loss=0.769]

epoch:45, idx:5299/10845, loss:0.7689418968164696, acc:0.7647169811320754


 50%|████▉     | 5400/10845 [20:04<19:57,  4.55it/s, acc=0.765, epoch=45, loss=0.77] 

epoch:45, idx:5399/10845, loss:0.7698256912496355, acc:0.764537037037037


 51%|█████     | 5500/10845 [20:26<19:56,  4.47it/s, acc=0.765, epoch=45, loss=0.769]

epoch:45, idx:5499/10845, loss:0.7688136172294616, acc:0.7646818181818181


 52%|█████▏    | 5601/10845 [20:48<18:04,  4.84it/s, acc=0.765, epoch=45, loss=0.767]

epoch:45, idx:5599/10845, loss:0.7673596499221665, acc:0.7652678571428572


 53%|█████▎    | 5701/10845 [21:11<18:51,  4.55it/s, acc=0.765, epoch=45, loss=0.769]

epoch:45, idx:5699/10845, loss:0.7691681666227809, acc:0.7652631578947369


 53%|█████▎    | 5800/10845 [21:33<18:23,  4.57it/s, acc=0.765, epoch=45, loss=0.769]

epoch:45, idx:5799/10845, loss:0.7685396752994637, acc:0.7653448275862069


 54%|█████▍    | 5900/10845 [21:55<17:47,  4.63it/s, acc=0.766, epoch=45, loss=0.768]

epoch:45, idx:5899/10845, loss:0.7676424603846114, acc:0.7655508474576271


 55%|█████▌    | 6000/10845 [22:17<17:32,  4.60it/s, acc=0.766, epoch=45, loss=0.766]

epoch:45, idx:5999/10845, loss:0.7664459633727868, acc:0.7658333333333334


 56%|█████▋    | 6101/10845 [22:39<16:53,  4.68it/s, acc=0.765, epoch=45, loss=0.769]

epoch:45, idx:6099/10845, loss:0.7692152584478503, acc:0.7652868852459016


 57%|█████▋    | 6201/10845 [23:01<17:13,  4.49it/s, acc=0.765, epoch=45, loss=0.768]

epoch:45, idx:6199/10845, loss:0.7676712310025768, acc:0.7654435483870968


 58%|█████▊    | 6300/10845 [23:23<16:26,  4.61it/s, acc=0.766, epoch=45, loss=0.768]

epoch:45, idx:6299/10845, loss:0.7675059289686263, acc:0.765515873015873


 59%|█████▉    | 6400/10845 [23:46<16:39,  4.45it/s, acc=0.765, epoch=45, loss=0.768]

epoch:45, idx:6399/10845, loss:0.7676288951095194, acc:0.7653125


 60%|█████▉    | 6500/10845 [24:08<15:15,  4.75it/s, acc=0.766, epoch=45, loss=0.768]

epoch:45, idx:6499/10845, loss:0.7675385461862271, acc:0.765576923076923


 61%|██████    | 6600/10845 [24:30<16:06,  4.39it/s, acc=0.766, epoch=45, loss=0.768]

epoch:45, idx:6599/10845, loss:0.7679025896480589, acc:0.7657954545454545


 62%|██████▏   | 6700/10845 [24:53<15:10,  4.55it/s, acc=0.766, epoch=45, loss=0.768]

epoch:45, idx:6699/10845, loss:0.768010983137942, acc:0.7655223880597015


 63%|██████▎   | 6800/10845 [25:15<15:19,  4.40it/s, acc=0.765, epoch=45, loss=0.768]

epoch:45, idx:6799/10845, loss:0.7682406266121303, acc:0.7653676470588235


 64%|██████▎   | 6901/10845 [25:38<14:11,  4.63it/s, acc=0.765, epoch=45, loss=0.77] 

epoch:45, idx:6899/10845, loss:0.7703131322843441, acc:0.764927536231884


 65%|██████▍   | 7000/10845 [26:00<14:27,  4.43it/s, acc=0.765, epoch=45, loss=0.77] 

epoch:45, idx:6999/10845, loss:0.7703114521248, acc:0.7652857142857142


 65%|██████▌   | 7100/10845 [26:22<14:09,  4.41it/s, acc=0.765, epoch=45, loss=0.771]

epoch:45, idx:7099/10845, loss:0.7713895563928175, acc:0.7648239436619718


 66%|██████▋   | 7200/10845 [26:45<13:48,  4.40it/s, acc=0.765, epoch=45, loss=0.771]

epoch:45, idx:7199/10845, loss:0.7711096001995934, acc:0.7647569444444444


 67%|██████▋   | 7300/10845 [27:07<12:39,  4.67it/s, acc=0.764, epoch=45, loss=0.772]

epoch:45, idx:7299/10845, loss:0.772374709305698, acc:0.7637671232876713


 68%|██████▊   | 7400/10845 [27:30<12:39,  4.54it/s, acc=0.764, epoch=45, loss=0.772]

epoch:45, idx:7399/10845, loss:0.7720416323719798, acc:0.7641216216216217


 69%|██████▉   | 7500/10845 [27:52<12:35,  4.43it/s, acc=0.764, epoch=45, loss=0.772]

epoch:45, idx:7499/10845, loss:0.7719269499142964, acc:0.7641666666666667


 70%|███████   | 7601/10845 [28:14<12:15,  4.41it/s, acc=0.764, epoch=45, loss=0.772]

epoch:45, idx:7599/10845, loss:0.7715885606015983, acc:0.7642434210526315


 71%|███████   | 7700/10845 [28:36<11:29,  4.56it/s, acc=0.764, epoch=45, loss=0.771]

epoch:45, idx:7699/10845, loss:0.770809459477276, acc:0.7644805194805194


 72%|███████▏  | 7801/10845 [28:59<10:55,  4.65it/s, acc=0.765, epoch=45, loss=0.77] 

epoch:45, idx:7799/10845, loss:0.7704511378743709, acc:0.7645512820512821


 73%|███████▎  | 7901/10845 [29:21<10:13,  4.80it/s, acc=0.765, epoch=45, loss=0.77] 

epoch:45, idx:7899/10845, loss:0.7696647491560706, acc:0.764746835443038


 74%|███████▍  | 8000/10845 [29:43<10:14,  4.63it/s, acc=0.765, epoch=45, loss=0.77] 

epoch:45, idx:7999/10845, loss:0.7698453486636281, acc:0.7646875


 75%|███████▍  | 8101/10845 [30:05<09:15,  4.94it/s, acc=0.765, epoch=45, loss=0.769]

epoch:45, idx:8099/10845, loss:0.7688854340785816, acc:0.7651543209876543


 76%|███████▌  | 8200/10845 [30:27<09:09,  4.81it/s, acc=0.765, epoch=45, loss=0.768]

epoch:45, idx:8199/10845, loss:0.7676452493594914, acc:0.7653048780487804


 77%|███████▋  | 8300/10845 [30:49<09:09,  4.63it/s, acc=0.765, epoch=45, loss=0.767]

epoch:45, idx:8299/10845, loss:0.7668148522850978, acc:0.7654819277108433


 77%|███████▋  | 8400/10845 [31:12<09:51,  4.14it/s, acc=0.766, epoch=45, loss=0.766]

epoch:45, idx:8399/10845, loss:0.7665307896974541, acc:0.7655357142857143


 78%|███████▊  | 8500/10845 [31:34<08:36,  4.54it/s, acc=0.766, epoch=45, loss=0.765]

epoch:45, idx:8499/10845, loss:0.7645146804767496, acc:0.766


 79%|███████▉  | 8600/10845 [31:56<08:27,  4.42it/s, acc=0.766, epoch=45, loss=0.764]

epoch:45, idx:8599/10845, loss:0.7638838748807131, acc:0.7663081395348837


 80%|████████  | 8701/10845 [32:17<07:42,  4.64it/s, acc=0.766, epoch=45, loss=0.764]

epoch:45, idx:8699/10845, loss:0.7644033304644727, acc:0.7663218390804598


 81%|████████  | 8800/10845 [32:39<07:18,  4.66it/s, acc=0.767, epoch=45, loss=0.762]

epoch:45, idx:8799/10845, loss:0.7623856295848435, acc:0.7665909090909091


 82%|████████▏ | 8900/10845 [33:01<06:40,  4.85it/s, acc=0.767, epoch=45, loss=0.762]

epoch:45, idx:8899/10845, loss:0.7619276780291889, acc:0.7667134831460675


 83%|████████▎ | 9001/10845 [33:23<04:09,  7.40it/s, acc=0.767, epoch=45, loss=0.762]

epoch:45, idx:8999/10845, loss:0.7623281283974648, acc:0.7666111111111111


 84%|████████▍ | 9101/10845 [33:45<06:21,  4.57it/s, acc=0.766, epoch=45, loss=0.762]

epoch:45, idx:9099/10845, loss:0.7622343072917436, acc:0.7664560439560439


 85%|████████▍ | 9201/10845 [34:07<05:39,  4.84it/s, acc=0.766, epoch=45, loss=0.763]

epoch:45, idx:9199/10845, loss:0.763086507624906, acc:0.7661413043478261


 86%|████████▌ | 9301/10845 [34:29<05:22,  4.79it/s, acc=0.766, epoch=45, loss=0.763]

epoch:45, idx:9299/10845, loss:0.7630865742378338, acc:0.765994623655914


 87%|████████▋ | 9400/10845 [34:51<05:21,  4.49it/s, acc=0.766, epoch=45, loss=0.763]

epoch:45, idx:9399/10845, loss:0.762625717894828, acc:0.7660372340425532


 88%|████████▊ | 9501/10845 [35:14<04:47,  4.68it/s, acc=0.766, epoch=45, loss=0.764]

epoch:45, idx:9499/10845, loss:0.763633476502017, acc:0.7658947368421053


 89%|████████▊ | 9600/10845 [35:36<04:39,  4.46it/s, acc=0.766, epoch=45, loss=0.764]

epoch:45, idx:9599/10845, loss:0.7636843926273287, acc:0.7658072916666666


 89%|████████▉ | 9700/10845 [35:58<04:08,  4.61it/s, acc=0.766, epoch=45, loss=0.764]

epoch:45, idx:9699/10845, loss:0.7642115440872527, acc:0.7657216494845361


 90%|█████████ | 9801/10845 [36:20<03:41,  4.71it/s, acc=0.766, epoch=45, loss=0.764]

epoch:45, idx:9799/10845, loss:0.7638008009110179, acc:0.7657397959183674


 91%|█████████▏| 9901/10845 [36:42<03:24,  4.62it/s, acc=0.765, epoch=45, loss=0.764]

epoch:45, idx:9899/10845, loss:0.7640341214880799, acc:0.7654545454545455


 92%|█████████▏| 10000/10845 [37:04<03:01,  4.66it/s, acc=0.765, epoch=45, loss=0.765]

epoch:45, idx:9999/10845, loss:0.7646671837508678, acc:0.7653


 93%|█████████▎| 10100/10845 [37:26<02:49,  4.39it/s, acc=0.765, epoch=45, loss=0.765]

epoch:45, idx:10099/10845, loss:0.7647274767231234, acc:0.7654702970297029


 94%|█████████▍| 10200/10845 [37:48<02:21,  4.55it/s, acc=0.766, epoch=45, loss=0.765]

epoch:45, idx:10199/10845, loss:0.7647997431603133, acc:0.7655392156862745


 95%|█████████▍| 10300/10845 [38:10<02:00,  4.53it/s, acc=0.765, epoch=45, loss=0.766]

epoch:45, idx:10299/10845, loss:0.7657753119943211, acc:0.7652912621359224


 96%|█████████▌| 10401/10845 [38:33<01:36,  4.61it/s, acc=0.765, epoch=45, loss=0.766]

epoch:45, idx:10399/10845, loss:0.7657855906795997, acc:0.7652163461538461


 97%|█████████▋| 10501/10845 [38:55<01:11,  4.81it/s, acc=0.765, epoch=45, loss=0.766]

epoch:45, idx:10499/10845, loss:0.7656083902915318, acc:0.7653333333333333


 98%|█████████▊| 10600/10845 [39:17<00:56,  4.31it/s, acc=0.765, epoch=45, loss=0.766]

epoch:45, idx:10599/10845, loss:0.766248150912096, acc:0.765306603773585


 99%|█████████▊| 10701/10845 [39:40<00:30,  4.70it/s, acc=0.765, epoch=45, loss=0.767]

epoch:45, idx:10699/10845, loss:0.7671241010070962, acc:0.7650934579439252


100%|█████████▉| 10800/10845 [40:02<00:09,  4.80it/s, acc=0.765, epoch=45, loss=0.768]

epoch:45, idx:10799/10845, loss:0.7679318907801752, acc:0.7648379629629629


100%|██████████| 10845/10845 [40:12<00:00,  4.39it/s, acc=0.765, epoch=45, loss=0.768]


epoch:45, idx:0/1275, loss:1.5801773071289062, acc:0.75
epoch:45, idx:100/1275, loss:1.5115451187190443, acc:0.6460396039603961
epoch:45, idx:200/1275, loss:1.3664487108069272, acc:0.6579601990049752
epoch:45, idx:300/1275, loss:1.3274812322122314, acc:0.6644518272425249
epoch:45, idx:400/1275, loss:1.3059962066331707, acc:0.6714463840399002
epoch:45, idx:500/1275, loss:1.2651215189707254, acc:0.6761477045908184
epoch:45, idx:600/1275, loss:1.2781536422434345, acc:0.6684692179700499
epoch:45, idx:700/1275, loss:1.2854218685337888, acc:0.6690442225392297
epoch:45, idx:800/1275, loss:1.3057627151074926, acc:0.6660424469413233
epoch:45, idx:900/1275, loss:1.2987835051349212, acc:0.6681465038845728
epoch:45, idx:1000/1275, loss:1.307642700074317, acc:0.6670829170829171
epoch:45, idx:1100/1275, loss:1.289640713560917, acc:0.6705267938237965
epoch:45, idx:1200/1275, loss:1.2859270104162897, acc:0.668609492089925


  1%|          | 101/10845 [00:22<40:09,  4.46it/s, acc=0.755, epoch=46, loss=0.839]

epoch:46, idx:99/10845, loss:0.8231324195861817, acc:0.755


  2%|▏         | 200/10845 [00:44<38:38,  4.59it/s, acc=0.746, epoch=46, loss=0.832]

epoch:46, idx:199/10845, loss:0.8320822161436081, acc:0.74625


  3%|▎         | 301/10845 [01:07<37:46,  4.65it/s, acc=0.75, epoch=46, loss=0.802] 

epoch:46, idx:299/10845, loss:0.8019409676392873, acc:0.75


  4%|▎         | 400/10845 [01:29<41:20,  4.21it/s, acc=0.751, epoch=46, loss=0.8]  

epoch:46, idx:399/10845, loss:0.7999696183204651, acc:0.75125


  5%|▍         | 500/10845 [01:51<40:41,  4.24it/s, acc=0.752, epoch=46, loss=0.799]

epoch:46, idx:499/10845, loss:0.7994660711288453, acc:0.752


  6%|▌         | 600/10845 [02:14<37:23,  4.57it/s, acc=0.761, epoch=46, loss=0.778]

epoch:46, idx:599/10845, loss:0.7776915887991588, acc:0.7608333333333334


  6%|▋         | 701/10845 [02:36<36:41,  4.61it/s, acc=0.762, epoch=46, loss=0.764]

epoch:46, idx:699/10845, loss:0.761424354485103, acc:0.7621428571428571


  7%|▋         | 800/10845 [02:58<39:19,  4.26it/s, acc=0.76, epoch=46, loss=0.776] 

epoch:46, idx:799/10845, loss:0.7761026532948017, acc:0.7603125


  8%|▊         | 900/10845 [03:20<36:30,  4.54it/s, acc=0.762, epoch=46, loss=0.778]

epoch:46, idx:899/10845, loss:0.7781876368655098, acc:0.7616666666666667


  9%|▉         | 1000/10845 [03:42<35:29,  4.62it/s, acc=0.763, epoch=46, loss=0.782]

epoch:46, idx:999/10845, loss:0.7815942359566689, acc:0.763


 10%|█         | 1101/10845 [04:05<36:23,  4.46it/s, acc=0.761, epoch=46, loss=0.787]

epoch:46, idx:1099/10845, loss:0.7857462364435196, acc:0.7611363636363636


 11%|█         | 1200/10845 [04:27<34:32,  4.65it/s, acc=0.761, epoch=46, loss=0.784]

epoch:46, idx:1199/10845, loss:0.7838528369367123, acc:0.760625


 12%|█▏        | 1301/10845 [04:50<35:17,  4.51it/s, acc=0.758, epoch=46, loss=0.783]

epoch:46, idx:1299/10845, loss:0.7831580632008039, acc:0.7586538461538461


 13%|█▎        | 1400/10845 [05:12<36:16,  4.34it/s, acc=0.758, epoch=46, loss=0.782]

epoch:46, idx:1399/10845, loss:0.7823680993063109, acc:0.7576785714285714


 14%|█▍        | 1500/10845 [05:34<33:22,  4.67it/s, acc=0.759, epoch=46, loss=0.776]

epoch:46, idx:1499/10845, loss:0.7758173277775446, acc:0.7595


 15%|█▍        | 1600/10845 [05:56<33:57,  4.54it/s, acc=0.76, epoch=46, loss=0.771] 

epoch:46, idx:1599/10845, loss:0.7710702273622155, acc:0.76046875


 16%|█▌        | 1700/10845 [06:18<31:34,  4.83it/s, acc=0.761, epoch=46, loss=0.776]

epoch:46, idx:1699/10845, loss:0.7759590391201131, acc:0.7607352941176471


 17%|█▋        | 1800/10845 [06:40<32:36,  4.62it/s, acc=0.763, epoch=46, loss=0.774]

epoch:46, idx:1799/10845, loss:0.7739292196763886, acc:0.7626388888888889


 18%|█▊        | 1900/10845 [07:03<32:33,  4.58it/s, acc=0.763, epoch=46, loss=0.768]

epoch:46, idx:1899/10845, loss:0.7684729250167546, acc:0.763421052631579


 18%|█▊        | 2001/10845 [07:24<18:51,  7.82it/s, acc=0.762, epoch=46, loss=0.774]

epoch:46, idx:1999/10845, loss:0.7735695818960666, acc:0.762625


 19%|█▉        | 2101/10845 [07:45<30:14,  4.82it/s, acc=0.762, epoch=46, loss=0.772]

epoch:46, idx:2099/10845, loss:0.7721661272219249, acc:0.7623809523809524


 20%|██        | 2200/10845 [08:07<32:37,  4.42it/s, acc=0.765, epoch=46, loss=0.767]

epoch:46, idx:2199/10845, loss:0.7669494834542274, acc:0.7645454545454545


 21%|██        | 2300/10845 [08:29<30:45,  4.63it/s, acc=0.765, epoch=46, loss=0.764]

epoch:46, idx:2299/10845, loss:0.7644126886647681, acc:0.765


 22%|██▏       | 2400/10845 [08:52<31:38,  4.45it/s, acc=0.765, epoch=46, loss=0.763]

epoch:46, idx:2399/10845, loss:0.7629091707120339, acc:0.7653125


 23%|██▎       | 2501/10845 [09:14<30:04,  4.62it/s, acc=0.765, epoch=46, loss=0.762]

epoch:46, idx:2499/10845, loss:0.7622623412847519, acc:0.7652


 24%|██▍       | 2600/10845 [09:36<30:00,  4.58it/s, acc=0.766, epoch=46, loss=0.764]

epoch:46, idx:2599/10845, loss:0.7638371796562121, acc:0.7658653846153847


 25%|██▍       | 2700/10845 [09:58<32:49,  4.13it/s, acc=0.765, epoch=46, loss=0.768]

epoch:46, idx:2699/10845, loss:0.7680145384867986, acc:0.764537037037037


 26%|██▌       | 2801/10845 [10:21<29:16,  4.58it/s, acc=0.765, epoch=46, loss=0.768]

epoch:46, idx:2799/10845, loss:0.7679791819410665, acc:0.7652678571428572


 27%|██▋       | 2900/10845 [10:43<29:21,  4.51it/s, acc=0.766, epoch=46, loss=0.769]

epoch:46, idx:2899/10845, loss:0.7689445409075967, acc:0.7656896551724138


 28%|██▊       | 3001/10845 [11:05<29:27,  4.44it/s, acc=0.766, epoch=46, loss=0.771]

epoch:46, idx:2999/10845, loss:0.7711129576166471, acc:0.7655833333333333


 29%|██▊       | 3100/10845 [11:27<27:40,  4.66it/s, acc=0.765, epoch=46, loss=0.775]

epoch:46, idx:3099/10845, loss:0.7750444921562749, acc:0.7653225806451613


 30%|██▉       | 3201/10845 [11:50<27:10,  4.69it/s, acc=0.765, epoch=46, loss=0.775]

epoch:46, idx:3199/10845, loss:0.7752946230955422, acc:0.7646875


 30%|███       | 3300/10845 [12:12<27:52,  4.51it/s, acc=0.765, epoch=46, loss=0.775]

epoch:46, idx:3299/10845, loss:0.7752040976647174, acc:0.765


 31%|███▏      | 3401/10845 [12:35<26:29,  4.68it/s, acc=0.765, epoch=46, loss=0.774]

epoch:46, idx:3399/10845, loss:0.7736574909441611, acc:0.7646323529411765


 32%|███▏      | 3501/10845 [12:57<26:47,  4.57it/s, acc=0.764, epoch=46, loss=0.773]

epoch:46, idx:3499/10845, loss:0.7731904632874897, acc:0.7637857142857143


 33%|███▎      | 3601/10845 [13:19<28:14,  4.28it/s, acc=0.764, epoch=46, loss=0.773]

epoch:46, idx:3599/10845, loss:0.7727108559509118, acc:0.7636805555555556


 34%|███▍      | 3700/10845 [13:41<24:53,  4.78it/s, acc=0.763, epoch=46, loss=0.776]

epoch:46, idx:3699/10845, loss:0.7759967959732623, acc:0.7628378378378379


 35%|███▌      | 3800/10845 [14:03<25:32,  4.60it/s, acc=0.763, epoch=46, loss=0.776]

epoch:46, idx:3799/10845, loss:0.7756943189627246, acc:0.7628947368421053


 36%|███▌      | 3901/10845 [14:25<25:16,  4.58it/s, acc=0.762, epoch=46, loss=0.777]

epoch:46, idx:3899/10845, loss:0.7771411272348502, acc:0.7616025641025641


 37%|███▋      | 4000/10845 [14:47<26:16,  4.34it/s, acc=0.761, epoch=46, loss=0.779]

epoch:46, idx:3999/10845, loss:0.7786015933901071, acc:0.761125


 38%|███▊      | 4100/10845 [15:10<25:05,  4.48it/s, acc=0.761, epoch=46, loss=0.777]

epoch:46, idx:4099/10845, loss:0.7767835673907908, acc:0.7609756097560976


 39%|███▊      | 4201/10845 [15:32<24:21,  4.55it/s, acc=0.761, epoch=46, loss=0.778]

epoch:46, idx:4199/10845, loss:0.7780836986785843, acc:0.76125


 40%|███▉      | 4301/10845 [15:54<23:35,  4.62it/s, acc=0.762, epoch=46, loss=0.777]

epoch:46, idx:4299/10845, loss:0.7767721118344817, acc:0.7615116279069768


 41%|████      | 4400/10845 [16:17<25:03,  4.29it/s, acc=0.761, epoch=46, loss=0.777]

epoch:46, idx:4399/10845, loss:0.7773413519832221, acc:0.7614772727272727


 41%|████▏     | 4500/10845 [16:39<23:15,  4.55it/s, acc=0.762, epoch=46, loss=0.775]

epoch:46, idx:4499/10845, loss:0.774901557803154, acc:0.7617777777777778


 42%|████▏     | 4600/10845 [17:01<24:02,  4.33it/s, acc=0.762, epoch=46, loss=0.776]

epoch:46, idx:4599/10845, loss:0.7758677411468132, acc:0.7616304347826087


 43%|████▎     | 4701/10845 [17:24<22:19,  4.59it/s, acc=0.762, epoch=46, loss=0.774]

epoch:46, idx:4699/10845, loss:0.7745628844803952, acc:0.7617021276595745


 44%|████▍     | 4800/10845 [17:46<22:51,  4.41it/s, acc=0.762, epoch=46, loss=0.775]

epoch:46, idx:4799/10845, loss:0.7752056606486439, acc:0.7615104166666666


 45%|████▌     | 4900/10845 [18:08<21:40,  4.57it/s, acc=0.762, epoch=46, loss=0.774]

epoch:46, idx:4899/10845, loss:0.7737378144629148, acc:0.7619897959183674


 46%|████▌     | 5000/10845 [18:30<22:00,  4.43it/s, acc=0.762, epoch=46, loss=0.774]

epoch:46, idx:4999/10845, loss:0.7736288018107415, acc:0.76235


 47%|████▋     | 5101/10845 [18:53<20:27,  4.68it/s, acc=0.762, epoch=46, loss=0.773]

epoch:46, idx:5099/10845, loss:0.7732282278935114, acc:0.7622549019607843


 48%|████▊     | 5200/10845 [19:15<21:50,  4.31it/s, acc=0.762, epoch=46, loss=0.773]

epoch:46, idx:5199/10845, loss:0.7731589066179899, acc:0.7623557692307692


 49%|████▉     | 5300/10845 [19:37<19:34,  4.72it/s, acc=0.763, epoch=46, loss=0.771]

epoch:46, idx:5299/10845, loss:0.7714639178649435, acc:0.7627830188679245


 50%|████▉     | 5400/10845 [20:00<19:38,  4.62it/s, acc=0.763, epoch=46, loss=0.772]

epoch:46, idx:5399/10845, loss:0.7721475965557275, acc:0.7626851851851851


 51%|█████     | 5500/10845 [20:23<21:07,  4.22it/s, acc=0.763, epoch=46, loss=0.771]

epoch:46, idx:5499/10845, loss:0.7712421906102788, acc:0.763090909090909


 52%|█████▏    | 5600/10845 [20:45<18:37,  4.69it/s, acc=0.764, epoch=46, loss=0.771]

epoch:46, idx:5599/10845, loss:0.770973120576569, acc:0.7636160714285715


 53%|█████▎    | 5700/10845 [21:08<19:15,  4.45it/s, acc=0.764, epoch=46, loss=0.77] 

epoch:46, idx:5699/10845, loss:0.7703891868235773, acc:0.763640350877193


 53%|█████▎    | 5800/10845 [21:30<19:33,  4.30it/s, acc=0.763, epoch=46, loss=0.772]

epoch:46, idx:5799/10845, loss:0.7718680575181698, acc:0.7631034482758621


 54%|█████▍    | 5901/10845 [21:53<18:13,  4.52it/s, acc=0.763, epoch=46, loss=0.772]

epoch:46, idx:5899/10845, loss:0.7722346353732933, acc:0.7630508474576271


 55%|█████▌    | 6000/10845 [22:15<18:04,  4.47it/s, acc=0.763, epoch=46, loss=0.773]

epoch:46, idx:5999/10845, loss:0.7725115306774775, acc:0.7631666666666667


 56%|█████▌    | 6100/10845 [22:38<17:14,  4.59it/s, acc=0.763, epoch=46, loss=0.772]

epoch:46, idx:6099/10845, loss:0.7719055349123283, acc:0.7631557377049181


 57%|█████▋    | 6200/10845 [23:01<17:57,  4.31it/s, acc=0.763, epoch=46, loss=0.773]

epoch:46, idx:6199/10845, loss:0.7729406055423521, acc:0.7629032258064516


 58%|█████▊    | 6300/10845 [23:23<16:59,  4.46it/s, acc=0.762, epoch=46, loss=0.772]

epoch:46, idx:6299/10845, loss:0.7720525560776392, acc:0.7625


 59%|█████▉    | 6401/10845 [23:46<16:27,  4.50it/s, acc=0.762, epoch=46, loss=0.773]

epoch:46, idx:6399/10845, loss:0.773267741529271, acc:0.76234375


 60%|█████▉    | 6500/10845 [24:08<16:17,  4.44it/s, acc=0.762, epoch=46, loss=0.772]

epoch:46, idx:6499/10845, loss:0.7724877617267462, acc:0.7624230769230769


 61%|██████    | 6600/10845 [24:30<15:40,  4.52it/s, acc=0.763, epoch=46, loss=0.771]

epoch:46, idx:6599/10845, loss:0.7709539073615364, acc:0.762689393939394


 62%|██████▏   | 6700/10845 [24:53<14:53,  4.64it/s, acc=0.763, epoch=46, loss=0.771]

epoch:46, idx:6699/10845, loss:0.7705716349206754, acc:0.7627611940298508


 63%|██████▎   | 6800/10845 [25:15<15:52,  4.25it/s, acc=0.762, epoch=46, loss=0.773]

epoch:46, idx:6799/10845, loss:0.7733937239208643, acc:0.7620220588235294


 64%|██████▎   | 6900/10845 [25:38<14:33,  4.52it/s, acc=0.762, epoch=46, loss=0.773]

epoch:46, idx:6899/10845, loss:0.7729869987567266, acc:0.7619202898550724


 65%|██████▍   | 7000/10845 [26:00<14:28,  4.43it/s, acc=0.762, epoch=46, loss=0.774]

epoch:46, idx:6999/10845, loss:0.773519767326968, acc:0.762


 65%|██████▌   | 7100/10845 [26:23<13:45,  4.54it/s, acc=0.763, epoch=46, loss=0.772]

epoch:46, idx:7099/10845, loss:0.772516415899908, acc:0.7625704225352112


 66%|██████▋   | 7201/10845 [26:46<12:46,  4.75it/s, acc=0.763, epoch=46, loss=0.771]

epoch:46, idx:7199/10845, loss:0.7711396609163946, acc:0.7628125


 67%|██████▋   | 7300/10845 [27:07<10:54,  5.42it/s, acc=0.763, epoch=46, loss=0.771]

epoch:46, idx:7299/10845, loss:0.7705042829987121, acc:0.7628767123287671


 68%|██████▊   | 7400/10845 [27:29<12:12,  4.70it/s, acc=0.763, epoch=46, loss=0.769]

epoch:46, idx:7399/10845, loss:0.7694664777855615, acc:0.7629391891891892


 69%|██████▉   | 7500/10845 [27:52<12:21,  4.51it/s, acc=0.763, epoch=46, loss=0.771]

epoch:46, idx:7499/10845, loss:0.7707600054979324, acc:0.7627


 70%|███████   | 7600/10845 [28:14<11:50,  4.57it/s, acc=0.763, epoch=46, loss=0.772]

epoch:46, idx:7599/10845, loss:0.7723520637264377, acc:0.7626644736842105


 71%|███████   | 7700/10845 [28:37<12:21,  4.24it/s, acc=0.762, epoch=46, loss=0.774]

epoch:46, idx:7699/10845, loss:0.7738560969101919, acc:0.7621753246753247


 72%|███████▏  | 7800/10845 [28:59<10:44,  4.72it/s, acc=0.762, epoch=46, loss=0.773]

epoch:46, idx:7799/10845, loss:0.7728149314492176, acc:0.7623717948717949


 73%|███████▎  | 7901/10845 [29:22<10:39,  4.61it/s, acc=0.762, epoch=46, loss=0.774]

epoch:46, idx:7899/10845, loss:0.7743889321933819, acc:0.761993670886076


 74%|███████▍  | 8000/10845 [29:44<10:36,  4.47it/s, acc=0.762, epoch=46, loss=0.774]

epoch:46, idx:7999/10845, loss:0.774102111235261, acc:0.76215625


 75%|███████▍  | 8100/10845 [30:06<10:11,  4.49it/s, acc=0.762, epoch=46, loss=0.775]

epoch:46, idx:8099/10845, loss:0.7748483126398957, acc:0.7620370370370371


 76%|███████▌  | 8201/10845 [30:29<09:41,  4.54it/s, acc=0.762, epoch=46, loss=0.774]

epoch:46, idx:8199/10845, loss:0.7739164173457681, acc:0.7620426829268293


 77%|███████▋  | 8300/10845 [30:52<09:35,  4.42it/s, acc=0.762, epoch=46, loss=0.774]

epoch:46, idx:8299/10845, loss:0.7737756575302905, acc:0.7621084337349397


 77%|███████▋  | 8401/10845 [31:14<09:13,  4.41it/s, acc=0.762, epoch=46, loss=0.776]

epoch:46, idx:8399/10845, loss:0.7762093496748379, acc:0.7615178571428571


 78%|███████▊  | 8500/10845 [31:36<09:36,  4.06it/s, acc=0.762, epoch=46, loss=0.775]

epoch:46, idx:8499/10845, loss:0.7752509744658189, acc:0.7617352941176471


 79%|███████▉  | 8600/10845 [31:59<08:28,  4.41it/s, acc=0.762, epoch=46, loss=0.775]

epoch:46, idx:8599/10845, loss:0.7752955224971438, acc:0.7615697674418604


 80%|████████  | 8701/10845 [32:22<07:39,  4.66it/s, acc=0.761, epoch=46, loss=0.777]

epoch:46, idx:8699/10845, loss:0.7771125849841655, acc:0.761264367816092


 81%|████████  | 8800/10845 [32:44<07:53,  4.32it/s, acc=0.761, epoch=46, loss=0.777]

epoch:46, idx:8799/10845, loss:0.7768789515779777, acc:0.7611931818181819


 82%|████████▏ | 8900/10845 [33:07<07:21,  4.40it/s, acc=0.761, epoch=46, loss=0.777]

epoch:46, idx:8899/10845, loss:0.7771811447183737, acc:0.7609831460674157


 83%|████████▎ | 9000/10845 [33:30<07:07,  4.32it/s, acc=0.761, epoch=46, loss=0.777]

epoch:46, idx:8999/10845, loss:0.7772040102018251, acc:0.7610277777777777


 84%|████████▍ | 9100/10845 [33:53<06:19,  4.59it/s, acc=0.761, epoch=46, loss=0.776]

epoch:46, idx:9099/10845, loss:0.7765100544167088, acc:0.7610989010989011


 85%|████████▍ | 9200/10845 [34:15<06:03,  4.52it/s, acc=0.761, epoch=46, loss=0.776]

epoch:46, idx:9199/10845, loss:0.7757924549508354, acc:0.7610597826086957


 86%|████████▌ | 9301/10845 [34:38<05:40,  4.53it/s, acc=0.761, epoch=46, loss=0.776]

epoch:46, idx:9299/10845, loss:0.7759046311820706, acc:0.7610752688172043


 87%|████████▋ | 9400/10845 [35:00<05:09,  4.67it/s, acc=0.761, epoch=46, loss=0.777]

epoch:46, idx:9399/10845, loss:0.7774971924468558, acc:0.7606914893617022


 88%|████████▊ | 9500/10845 [35:23<05:26,  4.12it/s, acc=0.761, epoch=46, loss=0.777]

epoch:46, idx:9499/10845, loss:0.7773704972549489, acc:0.7605526315789474


 89%|████████▊ | 9600/10845 [35:45<04:30,  4.61it/s, acc=0.761, epoch=46, loss=0.777]

epoch:46, idx:9599/10845, loss:0.7771594210496793, acc:0.760625


 89%|████████▉ | 9701/10845 [36:08<04:02,  4.71it/s, acc=0.76, epoch=46, loss=0.778] 

epoch:46, idx:9699/10845, loss:0.7778986058069258, acc:0.7603092783505154


 90%|█████████ | 9800/10845 [36:30<03:54,  4.46it/s, acc=0.761, epoch=46, loss=0.778]

epoch:46, idx:9799/10845, loss:0.777578615001878, acc:0.7605102040816326


 91%|█████████▏| 9900/10845 [36:52<03:26,  4.57it/s, acc=0.76, epoch=46, loss=0.778] 

epoch:46, idx:9899/10845, loss:0.7783620525399844, acc:0.7603282828282828


 92%|█████████▏| 10000/10845 [37:14<03:02,  4.64it/s, acc=0.76, epoch=46, loss=0.778]

epoch:46, idx:9999/10845, loss:0.7782089937359095, acc:0.76015


 93%|█████████▎| 10101/10845 [37:37<02:35,  4.79it/s, acc=0.76, epoch=46, loss=0.778]

epoch:46, idx:10099/10845, loss:0.7779696059728612, acc:0.7602722772277227


 94%|█████████▍| 10200/10845 [38:00<02:31,  4.26it/s, acc=0.76, epoch=46, loss=0.778]

epoch:46, idx:10199/10845, loss:0.7784722127546282, acc:0.7602941176470588


 95%|█████████▍| 10300/10845 [38:22<01:56,  4.67it/s, acc=0.76, epoch=46, loss=0.778] 

epoch:46, idx:10299/10845, loss:0.7781651544947069, acc:0.7602669902912621


 96%|█████████▌| 10401/10845 [38:45<01:40,  4.43it/s, acc=0.76, epoch=46, loss=0.778]

epoch:46, idx:10399/10845, loss:0.7784374627499626, acc:0.7600721153846154


 97%|█████████▋| 10501/10845 [39:07<01:13,  4.70it/s, acc=0.76, epoch=46, loss=0.779]

epoch:46, idx:10499/10845, loss:0.7789158072954132, acc:0.760095238095238


 98%|█████████▊| 10601/10845 [39:30<00:52,  4.62it/s, acc=0.76, epoch=46, loss=0.778]

epoch:46, idx:10599/10845, loss:0.7785453072274631, acc:0.7602830188679245


 99%|█████████▊| 10700/10845 [39:52<00:31,  4.60it/s, acc=0.76, epoch=46, loss=0.779]

epoch:46, idx:10699/10845, loss:0.7794058803559464, acc:0.7601168224299065


100%|█████████▉| 10800/10845 [40:14<00:09,  4.55it/s, acc=0.76, epoch=46, loss=0.779]

epoch:46, idx:10799/10845, loss:0.7789227962963007, acc:0.760162037037037


100%|██████████| 10845/10845 [40:25<00:00,  4.30it/s, acc=0.76, epoch=46, loss=0.779]


epoch:46, idx:0/1275, loss:1.6151270866394043, acc:0.75
epoch:46, idx:100/1275, loss:1.514422343509032, acc:0.6410891089108911
epoch:46, idx:200/1275, loss:1.3692652965659526, acc:0.654228855721393
epoch:46, idx:300/1275, loss:1.3305291903771435, acc:0.6619601328903655
epoch:46, idx:400/1275, loss:1.3092372857424386, acc:0.6701995012468828
epoch:46, idx:500/1275, loss:1.2685678359753119, acc:0.6741516966067864
epoch:46, idx:600/1275, loss:1.2814251559347956, acc:0.6672212978369384
epoch:46, idx:700/1275, loss:1.288972962429792, acc:0.6679743223965763
epoch:46, idx:800/1275, loss:1.3098954731754298, acc:0.6644818976279651
epoch:46, idx:900/1275, loss:1.3027849525510935, acc:0.6667591564927858
epoch:46, idx:1000/1275, loss:1.3117034647252772, acc:0.6655844155844156
epoch:46, idx:1100/1275, loss:1.293729704905379, acc:0.6691643960036331
epoch:46, idx:1200/1275, loss:1.290000108953916, acc:0.6677768526228143


  1%|          | 100/10845 [00:22<44:14,  4.05it/s, acc=0.76, epoch=47, loss=0.813]

epoch:47, idx:99/10845, loss:0.813351936340332, acc:0.76


  2%|▏         | 200/10845 [00:44<39:21,  4.51it/s, acc=0.762, epoch=47, loss=0.809]

epoch:47, idx:199/10845, loss:0.8089204394817352, acc:0.7625


  3%|▎         | 300/10845 [01:07<37:58,  4.63it/s, acc=0.755, epoch=47, loss=0.797]

epoch:47, idx:299/10845, loss:0.7970999224980673, acc:0.755


  4%|▎         | 400/10845 [01:29<40:14,  4.33it/s, acc=0.758, epoch=47, loss=0.786]

epoch:47, idx:399/10845, loss:0.7858678036928177, acc:0.758125


  5%|▍         | 501/10845 [01:51<37:26,  4.60it/s, acc=0.761, epoch=47, loss=0.791]

epoch:47, idx:499/10845, loss:0.7918444559574127, acc:0.7615


  6%|▌         | 600/10845 [02:13<38:44,  4.41it/s, acc=0.758, epoch=47, loss=0.806]

epoch:47, idx:599/10845, loss:0.8063564985990525, acc:0.7583333333333333


  6%|▋         | 700/10845 [02:35<37:00,  4.57it/s, acc=0.757, epoch=47, loss=0.822]

epoch:47, idx:699/10845, loss:0.8220747896603176, acc:0.7571428571428571


  7%|▋         | 800/10845 [02:58<36:12,  4.62it/s, acc=0.757, epoch=47, loss=0.805]

epoch:47, idx:799/10845, loss:0.8052662014961243, acc:0.756875


  8%|▊         | 900/10845 [03:20<36:31,  4.54it/s, acc=0.761, epoch=47, loss=0.792]

epoch:47, idx:899/10845, loss:0.7924084957440695, acc:0.7605555555555555


  9%|▉         | 1001/10845 [03:42<34:56,  4.69it/s, acc=0.757, epoch=47, loss=0.8] 

epoch:47, idx:999/10845, loss:0.8004163483381271, acc:0.757


 10%|█         | 1100/10845 [04:04<36:28,  4.45it/s, acc=0.755, epoch=47, loss=0.803]

epoch:47, idx:1099/10845, loss:0.8025288028066808, acc:0.7554545454545455


 11%|█         | 1201/10845 [04:26<34:12,  4.70it/s, acc=0.758, epoch=47, loss=0.797]

epoch:47, idx:1199/10845, loss:0.7962644721070925, acc:0.758125


 12%|█▏        | 1300/10845 [04:48<34:21,  4.63it/s, acc=0.759, epoch=47, loss=0.789]

epoch:47, idx:1299/10845, loss:0.7891850296809123, acc:0.7590384615384616


 13%|█▎        | 1400/10845 [05:11<34:54,  4.51it/s, acc=0.759, epoch=47, loss=0.781]

epoch:47, idx:1399/10845, loss:0.7808731328163828, acc:0.7592857142857142


 14%|█▍        | 1500/10845 [05:34<35:43,  4.36it/s, acc=0.757, epoch=47, loss=0.787]

epoch:47, idx:1499/10845, loss:0.7870075953404109, acc:0.7575


 15%|█▍        | 1601/10845 [05:56<34:39,  4.44it/s, acc=0.756, epoch=47, loss=0.795]

epoch:47, idx:1599/10845, loss:0.7953766409680247, acc:0.75625


 16%|█▌        | 1700/10845 [06:18<35:25,  4.30it/s, acc=0.754, epoch=47, loss=0.8]  

epoch:47, idx:1699/10845, loss:0.7996751776512931, acc:0.7541176470588236


 17%|█▋        | 1800/10845 [06:40<33:30,  4.50it/s, acc=0.755, epoch=47, loss=0.796]

epoch:47, idx:1799/10845, loss:0.7959310096171167, acc:0.7548611111111111


 18%|█▊        | 1900/10845 [07:03<34:06,  4.37it/s, acc=0.754, epoch=47, loss=0.797]

epoch:47, idx:1899/10845, loss:0.7966997036494706, acc:0.7540789473684211


 18%|█▊        | 2000/10845 [07:26<31:51,  4.63it/s, acc=0.755, epoch=47, loss=0.794]

epoch:47, idx:1999/10845, loss:0.794102849572897, acc:0.754875


 19%|█▉        | 2100/10845 [07:48<33:14,  4.38it/s, acc=0.755, epoch=47, loss=0.795]

epoch:47, idx:2099/10845, loss:0.7945389271917798, acc:0.7547619047619047


 20%|██        | 2200/10845 [08:10<33:50,  4.26it/s, acc=0.755, epoch=47, loss=0.793]

epoch:47, idx:2199/10845, loss:0.7930026189305566, acc:0.7553409090909091


 21%|██        | 2300/10845 [08:32<32:09,  4.43it/s, acc=0.756, epoch=47, loss=0.794]

epoch:47, idx:2299/10845, loss:0.7938242969305619, acc:0.7558695652173913


 22%|██▏       | 2400/10845 [08:54<30:06,  4.68it/s, acc=0.755, epoch=47, loss=0.798]

epoch:47, idx:2399/10845, loss:0.7981513067086538, acc:0.7547916666666666


 23%|██▎       | 2500/10845 [09:17<31:22,  4.43it/s, acc=0.756, epoch=47, loss=0.793]

epoch:47, idx:2499/10845, loss:0.7932928337097168, acc:0.7565


 24%|██▍       | 2601/10845 [09:39<28:25,  4.83it/s, acc=0.757, epoch=47, loss=0.792]

epoch:47, idx:2599/10845, loss:0.7917208815996464, acc:0.7573076923076923


 25%|██▍       | 2701/10845 [10:02<30:10,  4.50it/s, acc=0.757, epoch=47, loss=0.789]

epoch:47, idx:2699/10845, loss:0.7895746844344669, acc:0.7573148148148148


 26%|██▌       | 2801/10845 [10:24<29:24,  4.56it/s, acc=0.757, epoch=47, loss=0.793]

epoch:47, idx:2799/10845, loss:0.7933594986370631, acc:0.7565178571428571


 27%|██▋       | 2900/10845 [10:46<28:51,  4.59it/s, acc=0.757, epoch=47, loss=0.792]

epoch:47, idx:2899/10845, loss:0.7922033803627408, acc:0.7572413793103449


 28%|██▊       | 3000/10845 [11:08<28:50,  4.53it/s, acc=0.757, epoch=47, loss=0.79] 

epoch:47, idx:2999/10845, loss:0.7903304149707159, acc:0.7571666666666667


 29%|██▊       | 3100/10845 [11:30<30:05,  4.29it/s, acc=0.757, epoch=47, loss=0.792]

epoch:47, idx:3099/10845, loss:0.7920385089612776, acc:0.7566935483870968


 30%|██▉       | 3201/10845 [11:52<27:45,  4.59it/s, acc=0.757, epoch=47, loss=0.792]

epoch:47, idx:3199/10845, loss:0.792199282720685, acc:0.756640625


 30%|███       | 3300/10845 [12:15<27:47,  4.53it/s, acc=0.757, epoch=47, loss=0.791]

epoch:47, idx:3299/10845, loss:0.7914568228252006, acc:0.7574242424242424


 31%|███▏      | 3401/10845 [12:37<28:25,  4.37it/s, acc=0.757, epoch=47, loss=0.792]

epoch:47, idx:3399/10845, loss:0.7926786204471308, acc:0.7567647058823529


 32%|███▏      | 3500/10845 [13:00<28:20,  4.32it/s, acc=0.757, epoch=47, loss=0.792]

epoch:47, idx:3499/10845, loss:0.7927149055515017, acc:0.7572857142857143


 33%|███▎      | 3601/10845 [13:22<25:41,  4.70it/s, acc=0.757, epoch=47, loss=0.792]

epoch:47, idx:3599/10845, loss:0.791637202815877, acc:0.7570833333333333


 34%|███▍      | 3701/10845 [13:44<25:09,  4.73it/s, acc=0.757, epoch=47, loss=0.795]

epoch:47, idx:3699/10845, loss:0.7943489078412185, acc:0.7568243243243243


 35%|███▌      | 3800/10845 [14:06<25:29,  4.61it/s, acc=0.757, epoch=47, loss=0.794]

epoch:47, idx:3799/10845, loss:0.7938857216740909, acc:0.7568421052631579


 36%|███▌      | 3900/10845 [14:28<25:06,  4.61it/s, acc=0.758, epoch=47, loss=0.79] 

epoch:47, idx:3899/10845, loss:0.7898134704278066, acc:0.7576923076923077


 37%|███▋      | 4000/10845 [14:50<25:03,  4.55it/s, acc=0.757, epoch=47, loss=0.793]

epoch:47, idx:3999/10845, loss:0.7925902761369944, acc:0.756875


 38%|███▊      | 4100/10845 [15:13<24:07,  4.66it/s, acc=0.756, epoch=47, loss=0.794]

epoch:47, idx:4099/10845, loss:0.7939882160541488, acc:0.7559756097560976


 39%|███▊      | 4200/10845 [15:35<25:41,  4.31it/s, acc=0.756, epoch=47, loss=0.796]

epoch:47, idx:4199/10845, loss:0.7961816789564632, acc:0.7557738095238096


 40%|███▉      | 4300/10845 [15:58<25:27,  4.28it/s, acc=0.756, epoch=47, loss=0.795]

epoch:47, idx:4299/10845, loss:0.7953089665257653, acc:0.7558139534883721


 41%|████      | 4400/10845 [16:20<23:25,  4.59it/s, acc=0.756, epoch=47, loss=0.795]

epoch:47, idx:4399/10845, loss:0.7951771947199648, acc:0.7559659090909091


 42%|████▏     | 4501/10845 [16:42<23:28,  4.50it/s, acc=0.756, epoch=47, loss=0.795]

epoch:47, idx:4499/10845, loss:0.7945471687051985, acc:0.7565


 42%|████▏     | 4601/10845 [17:04<22:33,  4.61it/s, acc=0.757, epoch=47, loss=0.794]

epoch:47, idx:4599/10845, loss:0.7935864881588065, acc:0.7567934782608695


 43%|████▎     | 4700/10845 [17:26<21:35,  4.74it/s, acc=0.757, epoch=47, loss=0.792]

epoch:47, idx:4699/10845, loss:0.7921079623952825, acc:0.7569148936170212


 44%|████▍     | 4800/10845 [17:49<22:24,  4.49it/s, acc=0.757, epoch=47, loss=0.792]

epoch:47, idx:4799/10845, loss:0.7920945668965578, acc:0.7569791666666666


 45%|████▌     | 4900/10845 [18:11<21:36,  4.58it/s, acc=0.757, epoch=47, loss=0.79] 

epoch:47, idx:4899/10845, loss:0.7902162693957894, acc:0.7571938775510204


 46%|████▌     | 5000/10845 [18:33<20:43,  4.70it/s, acc=0.756, epoch=47, loss=0.791]

epoch:47, idx:4999/10845, loss:0.7909695851325989, acc:0.75645


 47%|████▋     | 5100/10845 [18:55<20:42,  4.62it/s, acc=0.756, epoch=47, loss=0.791]

epoch:47, idx:5099/10845, loss:0.791112335733339, acc:0.7563235294117647


 48%|████▊     | 5201/10845 [19:17<21:04,  4.46it/s, acc=0.757, epoch=47, loss=0.788]

epoch:47, idx:5199/10845, loss:0.7879843275363628, acc:0.756875


 49%|████▉     | 5301/10845 [19:39<21:30,  4.30it/s, acc=0.757, epoch=47, loss=0.786]

epoch:47, idx:5299/10845, loss:0.7859421840928635, acc:0.7568867924528302


 50%|████▉     | 5400/10845 [20:01<20:14,  4.49it/s, acc=0.757, epoch=47, loss=0.784]

epoch:47, idx:5399/10845, loss:0.7836633704547529, acc:0.7572222222222222


 51%|█████     | 5500/10845 [20:23<19:34,  4.55it/s, acc=0.757, epoch=47, loss=0.784]

epoch:47, idx:5499/10845, loss:0.7843855560042642, acc:0.7570454545454546


 52%|█████▏    | 5600/10845 [20:46<20:43,  4.22it/s, acc=0.758, epoch=47, loss=0.784]

epoch:47, idx:5599/10845, loss:0.783948830217123, acc:0.7577232142857143


 53%|█████▎    | 5701/10845 [21:09<18:44,  4.57it/s, acc=0.758, epoch=47, loss=0.784]

epoch:47, idx:5699/10845, loss:0.7841208212626608, acc:0.7577631578947368


 53%|█████▎    | 5800/10845 [21:30<19:57,  4.21it/s, acc=0.758, epoch=47, loss=0.784]

epoch:47, idx:5799/10845, loss:0.7838714625146882, acc:0.7579310344827587


 54%|█████▍    | 5900/10845 [21:53<17:05,  4.82it/s, acc=0.758, epoch=47, loss=0.784]

epoch:47, idx:5899/10845, loss:0.7836615507734024, acc:0.7584322033898305


 55%|█████▌    | 6001/10845 [22:15<16:38,  4.85it/s, acc=0.758, epoch=47, loss=0.786]

epoch:47, idx:5999/10845, loss:0.7857221199323734, acc:0.758


 56%|█████▌    | 6100/10845 [22:37<16:33,  4.77it/s, acc=0.758, epoch=47, loss=0.785]

epoch:47, idx:6099/10845, loss:0.7854899631025362, acc:0.7576639344262295


 57%|█████▋    | 6200/10845 [23:00<17:40,  4.38it/s, acc=0.757, epoch=47, loss=0.786]

epoch:47, idx:6199/10845, loss:0.7855828545987606, acc:0.7574596774193548


 58%|█████▊    | 6300/10845 [23:22<16:57,  4.47it/s, acc=0.757, epoch=47, loss=0.786]

epoch:47, idx:6299/10845, loss:0.7858352410083725, acc:0.7574603174603175


 59%|█████▉    | 6401/10845 [23:45<15:49,  4.68it/s, acc=0.758, epoch=47, loss=0.786]

epoch:47, idx:6399/10845, loss:0.7851882631937042, acc:0.7579296875


 60%|█████▉    | 6500/10845 [24:07<16:02,  4.51it/s, acc=0.758, epoch=47, loss=0.784]

epoch:47, idx:6499/10845, loss:0.7842685773235101, acc:0.758


 61%|██████    | 6601/10845 [24:29<15:52,  4.46it/s, acc=0.758, epoch=47, loss=0.784]

epoch:47, idx:6599/10845, loss:0.7843634761689288, acc:0.7578787878787879


 62%|██████▏   | 6701/10845 [24:52<15:35,  4.43it/s, acc=0.759, epoch=47, loss=0.783]

epoch:47, idx:6699/10845, loss:0.7830268247314354, acc:0.7585074626865672


 63%|██████▎   | 6800/10845 [25:14<14:23,  4.69it/s, acc=0.759, epoch=47, loss=0.783]

epoch:47, idx:6799/10845, loss:0.7829373868642484, acc:0.7586764705882353


 64%|██████▎   | 6901/10845 [25:37<14:33,  4.52it/s, acc=0.759, epoch=47, loss=0.784]

epoch:47, idx:6899/10845, loss:0.7837466710762702, acc:0.758768115942029


 65%|██████▍   | 7000/10845 [25:59<14:28,  4.43it/s, acc=0.759, epoch=47, loss=0.784]

epoch:47, idx:6999/10845, loss:0.7836223276002067, acc:0.7587857142857143


 65%|██████▌   | 7101/10845 [26:21<13:30,  4.62it/s, acc=0.758, epoch=47, loss=0.784]

epoch:47, idx:7099/10845, loss:0.7843605346746848, acc:0.7584507042253521


 66%|██████▋   | 7200/10845 [26:43<14:08,  4.30it/s, acc=0.759, epoch=47, loss=0.784]

epoch:47, idx:7199/10845, loss:0.7840894455214341, acc:0.7588541666666667


 67%|██████▋   | 7300/10845 [27:06<13:06,  4.50it/s, acc=0.759, epoch=47, loss=0.784]

epoch:47, idx:7299/10845, loss:0.7836490269229837, acc:0.7590068493150685


 68%|██████▊   | 7400/10845 [27:28<12:28,  4.60it/s, acc=0.759, epoch=47, loss=0.783]

epoch:47, idx:7399/10845, loss:0.7832315962701231, acc:0.7589189189189189


 69%|██████▉   | 7500/10845 [27:51<12:51,  4.33it/s, acc=0.76, epoch=47, loss=0.779] 

epoch:47, idx:7499/10845, loss:0.7790397374153137, acc:0.7600333333333333


 70%|███████   | 7601/10845 [28:13<11:57,  4.52it/s, acc=0.76, epoch=47, loss=0.779]

epoch:47, idx:7599/10845, loss:0.779121477227462, acc:0.7598355263157894


 71%|███████   | 7701/10845 [28:35<11:47,  4.45it/s, acc=0.76, epoch=47, loss=0.778] 

epoch:47, idx:7699/10845, loss:0.7780454031368355, acc:0.7602597402597403


 72%|███████▏  | 7800/10845 [28:57<11:31,  4.41it/s, acc=0.76, epoch=47, loss=0.777] 

epoch:47, idx:7799/10845, loss:0.7771661015198781, acc:0.7603525641025641


 73%|███████▎  | 7901/10845 [29:20<10:30,  4.67it/s, acc=0.76, epoch=47, loss=0.779]

epoch:47, idx:7899/10845, loss:0.7785212748412844, acc:0.7599367088607595


 74%|███████▍  | 8000/10845 [29:42<11:00,  4.31it/s, acc=0.76, epoch=47, loss=0.779]

epoch:47, idx:7999/10845, loss:0.7790178583413363, acc:0.759625


 75%|███████▍  | 8100/10845 [30:04<10:14,  4.47it/s, acc=0.759, epoch=47, loss=0.78] 

epoch:47, idx:8099/10845, loss:0.7795737562797688, acc:0.7593827160493827


 76%|███████▌  | 8200/10845 [30:27<09:46,  4.51it/s, acc=0.76, epoch=47, loss=0.778] 

epoch:47, idx:8199/10845, loss:0.7780149197723807, acc:0.7596036585365854


 77%|███████▋  | 8300/10845 [30:49<10:09,  4.18it/s, acc=0.76, epoch=47, loss=0.777] 

epoch:47, idx:8299/10845, loss:0.7772642874430461, acc:0.7598493975903614


 77%|███████▋  | 8400/10845 [31:12<09:09,  4.45it/s, acc=0.76, epoch=47, loss=0.778]

epoch:47, idx:8399/10845, loss:0.7779047135299161, acc:0.7597916666666666


 78%|███████▊  | 8500/10845 [31:34<08:16,  4.72it/s, acc=0.76, epoch=47, loss=0.778]

epoch:47, idx:8499/10845, loss:0.7776875140035854, acc:0.7600588235294118


 79%|███████▉  | 8601/10845 [31:57<08:23,  4.45it/s, acc=0.76, epoch=47, loss=0.779]

epoch:47, idx:8599/10845, loss:0.779355839872083, acc:0.7596220930232558


 80%|████████  | 8700/10845 [32:19<07:49,  4.56it/s, acc=0.76, epoch=47, loss=0.779]

epoch:47, idx:8699/10845, loss:0.7785783717481569, acc:0.7599137931034483


 81%|████████  | 8800/10845 [32:41<07:14,  4.71it/s, acc=0.76, epoch=47, loss=0.779]

epoch:47, idx:8799/10845, loss:0.7787316118248484, acc:0.7601988636363637


 82%|████████▏ | 8901/10845 [33:04<06:48,  4.76it/s, acc=0.76, epoch=47, loss=0.78] 

epoch:47, idx:8899/10845, loss:0.7797921758086493, acc:0.7598033707865168


 83%|████████▎ | 9000/10845 [33:26<06:30,  4.72it/s, acc=0.76, epoch=47, loss=0.781]

epoch:47, idx:8999/10845, loss:0.780766803642114, acc:0.7598055555555555


 84%|████████▍ | 9101/10845 [33:48<06:18,  4.61it/s, acc=0.76, epoch=47, loss=0.781]

epoch:47, idx:9099/10845, loss:0.7808575836053261, acc:0.7599450549450549


 85%|████████▍ | 9201/10845 [34:10<05:51,  4.68it/s, acc=0.76, epoch=47, loss=0.781]

epoch:47, idx:9199/10845, loss:0.7810710200602594, acc:0.7599456521739131


 86%|████████▌ | 9300/10845 [34:32<05:33,  4.63it/s, acc=0.76, epoch=47, loss=0.78] 

epoch:47, idx:9299/10845, loss:0.7798838151462616, acc:0.7601612903225806


 87%|████████▋ | 9400/10845 [34:54<05:07,  4.70it/s, acc=0.76, epoch=47, loss=0.778]

epoch:47, idx:9399/10845, loss:0.7783180535473722, acc:0.760345744680851


 88%|████████▊ | 9500/10845 [35:16<05:08,  4.35it/s, acc=0.761, epoch=47, loss=0.778]

epoch:47, idx:9499/10845, loss:0.7779814438192468, acc:0.7606578947368421


 89%|████████▊ | 9600/10845 [35:38<04:39,  4.46it/s, acc=0.761, epoch=47, loss=0.777]

epoch:47, idx:9599/10845, loss:0.7772197353964051, acc:0.76109375


 89%|████████▉ | 9700/10845 [36:00<04:00,  4.77it/s, acc=0.761, epoch=47, loss=0.777]

epoch:47, idx:9699/10845, loss:0.7766223007624912, acc:0.7613144329896907


 90%|█████████ | 9800/10845 [36:23<04:08,  4.20it/s, acc=0.761, epoch=47, loss=0.777]

epoch:47, idx:9799/10845, loss:0.7767832994704343, acc:0.761454081632653


 91%|█████████▏| 9900/10845 [36:45<03:26,  4.57it/s, acc=0.761, epoch=47, loss=0.777]

epoch:47, idx:9899/10845, loss:0.7772142654597157, acc:0.7611868686868687


 92%|█████████▏| 10000/10845 [37:07<03:21,  4.20it/s, acc=0.762, epoch=47, loss=0.776]

epoch:47, idx:9999/10845, loss:0.7763075405240059, acc:0.761625


 93%|█████████▎| 10100/10845 [37:30<02:45,  4.49it/s, acc=0.762, epoch=47, loss=0.776]

epoch:47, idx:10099/10845, loss:0.7759932601215815, acc:0.7617079207920792


 94%|█████████▍| 10201/10845 [37:52<02:18,  4.66it/s, acc=0.762, epoch=47, loss=0.776]

epoch:47, idx:10199/10845, loss:0.7760367432061364, acc:0.7615441176470589


 95%|█████████▍| 10300/10845 [38:14<02:02,  4.44it/s, acc=0.762, epoch=47, loss=0.776]

epoch:47, idx:10299/10845, loss:0.7759053569511303, acc:0.7616504854368932


 96%|█████████▌| 10401/10845 [38:36<01:37,  4.56it/s, acc=0.761, epoch=47, loss=0.777]

epoch:47, idx:10399/10845, loss:0.7772906581484355, acc:0.7611778846153846


 97%|█████████▋| 10500/10845 [38:59<01:18,  4.42it/s, acc=0.761, epoch=47, loss=0.777]

epoch:47, idx:10499/10845, loss:0.777439001299086, acc:0.7609761904761905


 98%|█████████▊| 10600/10845 [39:21<00:56,  4.30it/s, acc=0.761, epoch=47, loss=0.777]

epoch:47, idx:10599/10845, loss:0.7773210187565606, acc:0.7609198113207547


 99%|█████████▊| 10700/10845 [39:44<00:35,  4.10it/s, acc=0.761, epoch=47, loss=0.778]

epoch:47, idx:10699/10845, loss:0.7783741092236243, acc:0.7608177570093458


100%|█████████▉| 10800/10845 [40:03<00:09,  4.59it/s, acc=0.761, epoch=47, loss=0.777]

epoch:47, idx:10799/10845, loss:0.7774814218061942, acc:0.7612037037037037


100%|██████████| 10845/10845 [40:13<00:00,  4.47it/s, acc=0.761, epoch=47, loss=0.777]


epoch:47, idx:0/1275, loss:1.6152749061584473, acc:0.75
epoch:47, idx:100/1275, loss:1.5167160164011586, acc:0.6435643564356436
epoch:47, idx:200/1275, loss:1.3720129129305407, acc:0.6567164179104478
epoch:47, idx:300/1275, loss:1.3328338097100243, acc:0.6627906976744186
epoch:47, idx:400/1275, loss:1.3116670406964652, acc:0.6714463840399002
epoch:47, idx:500/1275, loss:1.2703543085300042, acc:0.6761477045908184
epoch:47, idx:600/1275, loss:1.2833434646815904, acc:0.6688851913477537
epoch:47, idx:700/1275, loss:1.2908280188278873, acc:0.6694008559201141
epoch:47, idx:800/1275, loss:1.3115787614002061, acc:0.6660424469413233
epoch:47, idx:900/1275, loss:1.304455219773156, acc:0.6681465038845728
epoch:47, idx:1000/1275, loss:1.313443592676035, acc:0.6673326673326674
epoch:47, idx:1100/1275, loss:1.2954730151134874, acc:0.6707538601271571
epoch:47, idx:1200/1275, loss:1.2917370134646649, acc:0.6694421315570358


  1%|          | 101/10845 [00:21<38:08,  4.69it/s, acc=0.775, epoch=48, loss=0.736]

epoch:48, idx:99/10845, loss:0.7385921680927277, acc:0.775


  2%|▏         | 200/10845 [00:44<40:34,  4.37it/s, acc=0.764, epoch=48, loss=0.769]

epoch:48, idx:199/10845, loss:0.7690023416280747, acc:0.76375


  3%|▎         | 300/10845 [01:06<37:50,  4.64it/s, acc=0.762, epoch=48, loss=0.79] 

epoch:48, idx:299/10845, loss:0.7902380192279815, acc:0.7625


  4%|▎         | 400/10845 [01:28<38:36,  4.51it/s, acc=0.759, epoch=48, loss=0.807]

epoch:48, idx:399/10845, loss:0.8072252377867699, acc:0.759375


  5%|▍         | 500/10845 [01:50<36:44,  4.69it/s, acc=0.759, epoch=48, loss=0.811]

epoch:48, idx:499/10845, loss:0.8106524896621704, acc:0.759


  6%|▌         | 600/10845 [02:13<41:54,  4.07it/s, acc=0.761, epoch=48, loss=0.805]

epoch:48, idx:599/10845, loss:0.8054069888591766, acc:0.76125


  6%|▋         | 700/10845 [02:35<41:25,  4.08it/s, acc=0.764, epoch=48, loss=0.794]

epoch:48, idx:699/10845, loss:0.794010454245976, acc:0.7635714285714286


  7%|▋         | 800/10845 [02:57<35:12,  4.75it/s, acc=0.765, epoch=48, loss=0.785]

epoch:48, idx:799/10845, loss:0.7854379791021348, acc:0.7646875


  8%|▊         | 901/10845 [03:19<35:08,  4.72it/s, acc=0.765, epoch=48, loss=0.784]

epoch:48, idx:899/10845, loss:0.7845672663052877, acc:0.7647222222222222


  9%|▉         | 1000/10845 [03:42<35:45,  4.59it/s, acc=0.764, epoch=48, loss=0.782]

epoch:48, idx:999/10845, loss:0.7816024872064591, acc:0.7645


 10%|█         | 1100/10845 [04:04<36:23,  4.46it/s, acc=0.766, epoch=48, loss=0.785]

epoch:48, idx:1099/10845, loss:0.7825553343512796, acc:0.7659090909090909


 11%|█         | 1200/10845 [04:26<34:52,  4.61it/s, acc=0.765, epoch=48, loss=0.785]

epoch:48, idx:1199/10845, loss:0.784666917026043, acc:0.765


 12%|█▏        | 1300/10845 [04:48<35:00,  4.54it/s, acc=0.767, epoch=48, loss=0.775]

epoch:48, idx:1299/10845, loss:0.774593278994927, acc:0.7675


 13%|█▎        | 1400/10845 [05:11<33:05,  4.76it/s, acc=0.766, epoch=48, loss=0.771]

epoch:48, idx:1399/10845, loss:0.7707133380004337, acc:0.7664285714285715


 14%|█▍        | 1500/10845 [05:33<32:40,  4.77it/s, acc=0.763, epoch=48, loss=0.781]

epoch:48, idx:1499/10845, loss:0.7811305158933004, acc:0.7635


 15%|█▍        | 1601/10845 [05:56<32:57,  4.67it/s, acc=0.762, epoch=48, loss=0.786]

epoch:48, idx:1599/10845, loss:0.7854261758178472, acc:0.76265625


 16%|█▌        | 1701/10845 [06:19<34:24,  4.43it/s, acc=0.764, epoch=48, loss=0.778]

epoch:48, idx:1699/10845, loss:0.7777620015424841, acc:0.764264705882353


 17%|█▋        | 1800/10845 [06:41<34:24,  4.38it/s, acc=0.765, epoch=48, loss=0.774]

epoch:48, idx:1799/10845, loss:0.774438293642468, acc:0.7654166666666666


 18%|█▊        | 1900/10845 [07:03<33:54,  4.40it/s, acc=0.767, epoch=48, loss=0.769]

epoch:48, idx:1899/10845, loss:0.7688413673325589, acc:0.7667105263157895


 18%|█▊        | 2001/10845 [07:25<32:21,  4.55it/s, acc=0.766, epoch=48, loss=0.773]

epoch:48, idx:1999/10845, loss:0.7731962253451348, acc:0.765625


 19%|█▉        | 2100/10845 [07:47<34:53,  4.18it/s, acc=0.767, epoch=48, loss=0.772]

epoch:48, idx:2099/10845, loss:0.7718492518720173, acc:0.7669047619047619


 20%|██        | 2200/10845 [08:09<31:36,  4.56it/s, acc=0.765, epoch=48, loss=0.778]

epoch:48, idx:2199/10845, loss:0.7781784355098551, acc:0.765


 21%|██        | 2301/10845 [08:32<33:01,  4.31it/s, acc=0.766, epoch=48, loss=0.777]

epoch:48, idx:2299/10845, loss:0.7767547628672227, acc:0.7658695652173914


 22%|██▏       | 2401/10845 [08:54<30:29,  4.62it/s, acc=0.764, epoch=48, loss=0.783]

epoch:48, idx:2399/10845, loss:0.7831695442398389, acc:0.764375


 23%|██▎       | 2500/10845 [09:16<31:01,  4.48it/s, acc=0.764, epoch=48, loss=0.78] 

epoch:48, idx:2499/10845, loss:0.7795172669887542, acc:0.7643


 24%|██▍       | 2600/10845 [09:39<30:11,  4.55it/s, acc=0.766, epoch=48, loss=0.777]

epoch:48, idx:2599/10845, loss:0.7765910616746315, acc:0.7658653846153847


 25%|██▍       | 2701/10845 [10:01<29:19,  4.63it/s, acc=0.766, epoch=48, loss=0.774]

epoch:48, idx:2699/10845, loss:0.7738896804827231, acc:0.7662962962962963


 26%|██▌       | 2800/10845 [10:23<29:00,  4.62it/s, acc=0.767, epoch=48, loss=0.771]

epoch:48, idx:2799/10845, loss:0.7713197206173624, acc:0.7669642857142858


 27%|██▋       | 2900/10845 [10:45<31:16,  4.23it/s, acc=0.767, epoch=48, loss=0.77] 

epoch:48, idx:2899/10845, loss:0.7701766816089893, acc:0.7670689655172414


 28%|██▊       | 3000/10845 [11:08<29:38,  4.41it/s, acc=0.767, epoch=48, loss=0.772]

epoch:48, idx:2999/10845, loss:0.7715931305090586, acc:0.7669166666666667


 29%|██▊       | 3100/10845 [11:30<27:08,  4.76it/s, acc=0.767, epoch=48, loss=0.772]

epoch:48, idx:3099/10845, loss:0.772379037276391, acc:0.766774193548387


 30%|██▉       | 3200/10845 [11:53<28:19,  4.50it/s, acc=0.766, epoch=48, loss=0.774]

epoch:48, idx:3199/10845, loss:0.7740169254131615, acc:0.766328125


 30%|███       | 3300/10845 [12:15<27:27,  4.58it/s, acc=0.766, epoch=48, loss=0.775]

epoch:48, idx:3299/10845, loss:0.7748089244690808, acc:0.765530303030303


 31%|███▏      | 3400/10845 [12:37<27:31,  4.51it/s, acc=0.766, epoch=48, loss=0.773]

epoch:48, idx:3399/10845, loss:0.7730060205214164, acc:0.7661029411764706


 32%|███▏      | 3500/10845 [12:59<28:02,  4.36it/s, acc=0.767, epoch=48, loss=0.771]

epoch:48, idx:3499/10845, loss:0.770619947041784, acc:0.7665714285714286


 33%|███▎      | 3601/10845 [13:21<26:13,  4.61it/s, acc=0.767, epoch=48, loss=0.768]

epoch:48, idx:3599/10845, loss:0.7684837207032574, acc:0.7666666666666667


 34%|███▍      | 3701/10845 [13:43<25:37,  4.65it/s, acc=0.766, epoch=48, loss=0.769]

epoch:48, idx:3699/10845, loss:0.7692401715065982, acc:0.7661486486486486


 35%|███▌      | 3800/10845 [14:05<25:53,  4.53it/s, acc=0.765, epoch=48, loss=0.772]

epoch:48, idx:3799/10845, loss:0.7720851701968594, acc:0.765


 36%|███▌      | 3901/10845 [14:28<26:41,  4.33it/s, acc=0.765, epoch=48, loss=0.773]

epoch:48, idx:3899/10845, loss:0.7733100211161833, acc:0.7650641025641025


 37%|███▋      | 4000/10845 [14:51<25:21,  4.50it/s, acc=0.764, epoch=48, loss=0.777]

epoch:48, idx:3999/10845, loss:0.7766966259330511, acc:0.763875


 38%|███▊      | 4101/10845 [15:13<24:51,  4.52it/s, acc=0.764, epoch=48, loss=0.776]

epoch:48, idx:4099/10845, loss:0.7754022230026199, acc:0.7644512195121951


 39%|███▊      | 4201/10845 [15:35<24:32,  4.51it/s, acc=0.764, epoch=48, loss=0.778]

epoch:48, idx:4199/10845, loss:0.7776099543628239, acc:0.7641071428571429


 40%|███▉      | 4300/10845 [15:58<24:39,  4.42it/s, acc=0.764, epoch=48, loss=0.777]

epoch:48, idx:4299/10845, loss:0.7770969988578974, acc:0.7640116279069767


 41%|████      | 4400/10845 [16:20<26:47,  4.01it/s, acc=0.764, epoch=48, loss=0.778]

epoch:48, idx:4399/10845, loss:0.7775615932182832, acc:0.7638636363636364


 41%|████▏     | 4500/10845 [16:42<23:11,  4.56it/s, acc=0.764, epoch=48, loss=0.778]

epoch:48, idx:4499/10845, loss:0.7776860522958967, acc:0.7638888888888888


 42%|████▏     | 4600/10845 [17:04<22:32,  4.62it/s, acc=0.763, epoch=48, loss=0.778]

epoch:48, idx:4599/10845, loss:0.778367135213769, acc:0.7633695652173913


 43%|████▎     | 4701/10845 [17:27<22:19,  4.59it/s, acc=0.763, epoch=48, loss=0.78] 

epoch:48, idx:4699/10845, loss:0.7800869550603501, acc:0.7629787234042553


 44%|████▍     | 4800/10845 [17:49<25:12,  4.00it/s, acc=0.763, epoch=48, loss=0.78] 

epoch:48, idx:4799/10845, loss:0.7801423473407825, acc:0.7630208333333334


 45%|████▌     | 4901/10845 [18:11<21:20,  4.64it/s, acc=0.763, epoch=48, loss=0.779]

epoch:48, idx:4899/10845, loss:0.7788796728971054, acc:0.7633163265306122


 46%|████▌     | 5000/10845 [18:33<20:52,  4.67it/s, acc=0.764, epoch=48, loss=0.778]

epoch:48, idx:4999/10845, loss:0.7780439667463303, acc:0.7637


 47%|████▋     | 5100/10845 [18:56<21:22,  4.48it/s, acc=0.764, epoch=48, loss=0.778]

epoch:48, idx:5099/10845, loss:0.7784334171052073, acc:0.7636764705882353


 48%|████▊     | 5201/10845 [19:18<20:32,  4.58it/s, acc=0.763, epoch=48, loss=0.78] 

epoch:48, idx:5199/10845, loss:0.7802136637614323, acc:0.7631730769230769


 49%|████▉     | 5301/10845 [19:41<20:55,  4.42it/s, acc=0.763, epoch=48, loss=0.779]

epoch:48, idx:5299/10845, loss:0.7787674728204619, acc:0.7633490566037736


 50%|████▉     | 5401/10845 [20:03<20:06,  4.51it/s, acc=0.764, epoch=48, loss=0.777]

epoch:48, idx:5399/10845, loss:0.7771815863141307, acc:0.7635185185185185


 51%|█████     | 5501/10845 [20:25<19:40,  4.53it/s, acc=0.764, epoch=48, loss=0.777]

epoch:48, idx:5499/10845, loss:0.7773910169818185, acc:0.764


 52%|█████▏    | 5601/10845 [20:47<18:22,  4.76it/s, acc=0.764, epoch=48, loss=0.777]

epoch:48, idx:5599/10845, loss:0.77705703326634, acc:0.7642410714285715


 53%|█████▎    | 5700/10845 [21:09<18:44,  4.57it/s, acc=0.764, epoch=48, loss=0.777]

epoch:48, idx:5699/10845, loss:0.7772921882805072, acc:0.764298245614035


 53%|█████▎    | 5800/10845 [21:31<18:41,  4.50it/s, acc=0.765, epoch=48, loss=0.774]

epoch:48, idx:5799/10845, loss:0.7738694608005984, acc:0.7653017241379311


 54%|█████▍    | 5900/10845 [21:53<18:01,  4.57it/s, acc=0.766, epoch=48, loss=0.772]

epoch:48, idx:5899/10845, loss:0.7724653982712051, acc:0.7656779661016949


 55%|█████▌    | 6000/10845 [22:15<18:01,  4.48it/s, acc=0.766, epoch=48, loss=0.772]

epoch:48, idx:5999/10845, loss:0.7721535584529241, acc:0.7657083333333333


 56%|█████▋    | 6101/10845 [22:38<17:44,  4.46it/s, acc=0.766, epoch=48, loss=0.771]

epoch:48, idx:6099/10845, loss:0.7705845904741131, acc:0.7661065573770491


 57%|█████▋    | 6201/10845 [23:00<17:18,  4.47it/s, acc=0.766, epoch=48, loss=0.769]

epoch:48, idx:6199/10845, loss:0.7687570054684916, acc:0.7663709677419355


 58%|█████▊    | 6301/10845 [23:21<15:31,  4.88it/s, acc=0.767, epoch=48, loss=0.768]

epoch:48, idx:6299/10845, loss:0.7677909695345257, acc:0.7666269841269842


 59%|█████▉    | 6401/10845 [23:44<16:43,  4.43it/s, acc=0.766, epoch=48, loss=0.768]

epoch:48, idx:6399/10845, loss:0.7682792897708715, acc:0.7662890625


 60%|█████▉    | 6501/10845 [24:06<15:17,  4.74it/s, acc=0.766, epoch=48, loss=0.769]

epoch:48, idx:6499/10845, loss:0.7687150139441857, acc:0.7661538461538462


 61%|██████    | 6600/10845 [24:28<16:13,  4.36it/s, acc=0.766, epoch=48, loss=0.769]

epoch:48, idx:6599/10845, loss:0.7689545433268402, acc:0.7659848484848485


 62%|██████▏   | 6700/10845 [24:50<15:24,  4.49it/s, acc=0.766, epoch=48, loss=0.769]

epoch:48, idx:6699/10845, loss:0.7686980753870153, acc:0.765634328358209


 63%|██████▎   | 6801/10845 [25:13<15:00,  4.49it/s, acc=0.766, epoch=48, loss=0.769]

epoch:48, idx:6799/10845, loss:0.7689963616869029, acc:0.7655514705882352


 64%|██████▎   | 6901/10845 [25:35<14:21,  4.58it/s, acc=0.766, epoch=48, loss=0.77] 

epoch:48, idx:6899/10845, loss:0.770287433223448, acc:0.7655434782608695


 65%|██████▍   | 7000/10845 [25:57<14:49,  4.32it/s, acc=0.766, epoch=48, loss=0.769]

epoch:48, idx:6999/10845, loss:0.7693886280230113, acc:0.7656071428571428


 65%|██████▌   | 7101/10845 [26:20<13:54,  4.49it/s, acc=0.765, epoch=48, loss=0.77] 

epoch:48, idx:7099/10845, loss:0.7696901643024364, acc:0.7652464788732395


 66%|██████▋   | 7200/10845 [26:42<13:04,  4.65it/s, acc=0.765, epoch=48, loss=0.77] 

epoch:48, idx:7199/10845, loss:0.7700638834718201, acc:0.7652777777777777


 67%|██████▋   | 7300/10845 [27:04<12:22,  4.77it/s, acc=0.765, epoch=48, loss=0.771]

epoch:48, idx:7299/10845, loss:0.7712263951889456, acc:0.7646575342465753


 68%|██████▊   | 7401/10845 [27:26<12:18,  4.67it/s, acc=0.765, epoch=48, loss=0.771]

epoch:48, idx:7399/10845, loss:0.7714247254101005, acc:0.7646283783783784


 69%|██████▉   | 7501/10845 [27:49<12:08,  4.59it/s, acc=0.764, epoch=48, loss=0.772]

epoch:48, idx:7499/10845, loss:0.7716397093931834, acc:0.7644333333333333


 70%|███████   | 7600/10845 [28:11<11:49,  4.57it/s, acc=0.764, epoch=48, loss=0.775]

epoch:48, idx:7599/10845, loss:0.7748182731082565, acc:0.7636513157894737


 71%|███████   | 7701/10845 [28:33<10:46,  4.86it/s, acc=0.764, epoch=48, loss=0.775]

epoch:48, idx:7699/10845, loss:0.7752826179157604, acc:0.7637662337662338


 72%|███████▏  | 7800/10845 [28:56<11:38,  4.36it/s, acc=0.763, epoch=48, loss=0.776]

epoch:48, idx:7799/10845, loss:0.7764391768437165, acc:0.7634615384615384


 73%|███████▎  | 7900/10845 [29:18<10:45,  4.56it/s, acc=0.764, epoch=48, loss=0.775]

epoch:48, idx:7899/10845, loss:0.7745184843298755, acc:0.7637974683544304


 74%|███████▍  | 8000/10845 [29:40<11:11,  4.24it/s, acc=0.764, epoch=48, loss=0.774]

epoch:48, idx:7999/10845, loss:0.7743893544971943, acc:0.7638125


 75%|███████▍  | 8101/10845 [30:03<10:09,  4.50it/s, acc=0.764, epoch=48, loss=0.774]

epoch:48, idx:8099/10845, loss:0.7736507858170404, acc:0.7637654320987655


 76%|███████▌  | 8201/10845 [30:25<09:21,  4.71it/s, acc=0.764, epoch=48, loss=0.775]

epoch:48, idx:8199/10845, loss:0.7747009263969049, acc:0.7637195121951219


 77%|███████▋  | 8301/10845 [30:47<08:57,  4.73it/s, acc=0.764, epoch=48, loss=0.774]

epoch:48, idx:8299/10845, loss:0.7741319009482143, acc:0.7637048192771084


 77%|███████▋  | 8400/10845 [31:09<09:38,  4.23it/s, acc=0.764, epoch=48, loss=0.776]

epoch:48, idx:8399/10845, loss:0.7761268310390768, acc:0.7636011904761905


 78%|███████▊  | 8501/10845 [31:32<08:33,  4.56it/s, acc=0.763, epoch=48, loss=0.776]

epoch:48, idx:8499/10845, loss:0.7761247475077124, acc:0.7632941176470588


 79%|███████▉  | 8600/10845 [31:54<07:49,  4.78it/s, acc=0.763, epoch=48, loss=0.776]

epoch:48, idx:8599/10845, loss:0.7757614152999811, acc:0.7634302325581396


 80%|████████  | 8700/10845 [32:16<07:56,  4.50it/s, acc=0.763, epoch=48, loss=0.777]

epoch:48, idx:8699/10845, loss:0.7774692434689094, acc:0.7627298850574713


 81%|████████  | 8800/10845 [32:39<07:14,  4.70it/s, acc=0.763, epoch=48, loss=0.778]

epoch:48, idx:8799/10845, loss:0.777970113131133, acc:0.7626988636363636


 82%|████████▏ | 8900/10845 [33:01<06:59,  4.63it/s, acc=0.763, epoch=48, loss=0.778]

epoch:48, idx:8899/10845, loss:0.7781910693176677, acc:0.7627247191011236


 83%|████████▎ | 9000/10845 [33:23<07:05,  4.33it/s, acc=0.763, epoch=48, loss=0.778]

epoch:48, idx:8999/10845, loss:0.7776199147370126, acc:0.7628333333333334


 84%|████████▍ | 9100/10845 [33:45<06:14,  4.66it/s, acc=0.763, epoch=48, loss=0.778]

epoch:48, idx:9099/10845, loss:0.7778633689159875, acc:0.7630494505494505


 85%|████████▍ | 9201/10845 [34:08<05:53,  4.65it/s, acc=0.763, epoch=48, loss=0.777]

epoch:48, idx:9199/10845, loss:0.7772154805776865, acc:0.763070652173913


 86%|████████▌ | 9300/10845 [34:30<05:33,  4.63it/s, acc=0.764, epoch=48, loss=0.775]

epoch:48, idx:9299/10845, loss:0.7754673840345875, acc:0.7635752688172043


 87%|████████▋ | 9400/10845 [34:52<05:31,  4.36it/s, acc=0.763, epoch=48, loss=0.777]

epoch:48, idx:9399/10845, loss:0.7766194414902241, acc:0.7631914893617021


 88%|████████▊ | 9500/10845 [35:15<04:39,  4.81it/s, acc=0.763, epoch=48, loss=0.778]

epoch:48, idx:9499/10845, loss:0.7776300088418158, acc:0.7631842105263158


 89%|████████▊ | 9600/10845 [35:37<04:41,  4.42it/s, acc=0.763, epoch=48, loss=0.777]

epoch:48, idx:9599/10845, loss:0.7774258786067366, acc:0.7633072916666667


 89%|████████▉ | 9700/10845 [35:59<04:08,  4.61it/s, acc=0.763, epoch=48, loss=0.777]

epoch:48, idx:9699/10845, loss:0.7772212801643253, acc:0.7629896907216495


 90%|█████████ | 9800/10845 [36:22<03:42,  4.69it/s, acc=0.763, epoch=48, loss=0.777]

epoch:48, idx:9799/10845, loss:0.7769483919776216, acc:0.7629591836734694


 91%|█████████▏| 9901/10845 [36:44<03:23,  4.65it/s, acc=0.763, epoch=48, loss=0.776]

epoch:48, idx:9899/10845, loss:0.7763737207831758, acc:0.7631060606060606


 92%|█████████▏| 10000/10845 [37:06<03:10,  4.43it/s, acc=0.763, epoch=48, loss=0.775]

epoch:48, idx:9999/10845, loss:0.7753942430973053, acc:0.763275


 93%|█████████▎| 10100/10845 [37:28<02:42,  4.58it/s, acc=0.763, epoch=48, loss=0.775]

epoch:48, idx:10099/10845, loss:0.7746974827983591, acc:0.762970297029703


 94%|█████████▍| 10200/10845 [37:50<02:23,  4.50it/s, acc=0.763, epoch=48, loss=0.774]

epoch:48, idx:10199/10845, loss:0.7743829856140941, acc:0.7630637254901961


 95%|█████████▍| 10301/10845 [38:13<01:54,  4.74it/s, acc=0.763, epoch=48, loss=0.774]

epoch:48, idx:10299/10845, loss:0.7737155172836433, acc:0.7632524271844661


 96%|█████████▌| 10400/10845 [38:35<01:36,  4.60it/s, acc=0.763, epoch=48, loss=0.773]

epoch:48, idx:10399/10845, loss:0.7733059819902365, acc:0.763125


 97%|█████████▋| 10500/10845 [38:57<01:20,  4.31it/s, acc=0.763, epoch=48, loss=0.773]

epoch:48, idx:10499/10845, loss:0.7729556725422542, acc:0.7631904761904762


 98%|█████████▊| 10600/10845 [39:19<00:57,  4.30it/s, acc=0.763, epoch=48, loss=0.773]

epoch:48, idx:10599/10845, loss:0.7729015966350178, acc:0.7632311320754717


 99%|█████████▊| 10700/10845 [39:41<00:31,  4.58it/s, acc=0.763, epoch=48, loss=0.772]

epoch:48, idx:10699/10845, loss:0.7723896710950637, acc:0.763481308411215


100%|█████████▉| 10800/10845 [40:03<00:10,  4.43it/s, acc=0.763, epoch=48, loss=0.773]

epoch:48, idx:10799/10845, loss:0.7734544223381413, acc:0.763125


100%|██████████| 10845/10845 [40:14<00:00,  4.45it/s, acc=0.763, epoch=48, loss=0.774]


epoch:48, idx:0/1275, loss:1.6163206100463867, acc:0.75
epoch:48, idx:100/1275, loss:1.5172274938904413, acc:0.6410891089108911
epoch:48, idx:200/1275, loss:1.3724314027757787, acc:0.654228855721393
epoch:48, idx:300/1275, loss:1.3333549123269774, acc:0.6611295681063123
epoch:48, idx:400/1275, loss:1.3121346288785674, acc:0.6701995012468828
epoch:48, idx:500/1275, loss:1.2705911650153214, acc:0.6746506986027944
epoch:48, idx:600/1275, loss:1.2835369346145782, acc:0.6676372712146422
epoch:48, idx:700/1275, loss:1.290934426298155, acc:0.6679743223965763
epoch:48, idx:800/1275, loss:1.3116007372085818, acc:0.6651061173533084
epoch:48, idx:900/1275, loss:1.3045619921996512, acc:0.6673140954495006
epoch:48, idx:1000/1275, loss:1.3135631118978297, acc:0.6663336663336663
epoch:48, idx:1100/1275, loss:1.295658140914425, acc:0.6698455949137148
epoch:48, idx:1200/1275, loss:1.29192243994126, acc:0.668609492089925


  1%|          | 101/10845 [00:22<37:52,  4.73it/s, acc=0.797, epoch=49, loss=0.672]

epoch:49, idx:99/10845, loss:0.6745191884040832, acc:0.7975


  2%|▏         | 200/10845 [00:44<39:42,  4.47it/s, acc=0.777, epoch=49, loss=0.716]

epoch:49, idx:199/10845, loss:0.7162718909978867, acc:0.7775


  3%|▎         | 300/10845 [01:07<42:28,  4.14it/s, acc=0.758, epoch=49, loss=0.755]

epoch:49, idx:299/10845, loss:0.7545560642083486, acc:0.7583333333333333


  4%|▎         | 401/10845 [01:29<38:40,  4.50it/s, acc=0.765, epoch=49, loss=0.754]

epoch:49, idx:399/10845, loss:0.7553836706280709, acc:0.764375


  5%|▍         | 500/10845 [01:52<38:57,  4.42it/s, acc=0.757, epoch=49, loss=0.777]

epoch:49, idx:499/10845, loss:0.7772794208526611, acc:0.7575


  6%|▌         | 601/10845 [02:14<36:35,  4.67it/s, acc=0.761, epoch=49, loss=0.771]

epoch:49, idx:599/10845, loss:0.7721295462052027, acc:0.7604166666666666


  6%|▋         | 700/10845 [02:36<38:11,  4.43it/s, acc=0.762, epoch=49, loss=0.775]

epoch:49, idx:699/10845, loss:0.7753320130280086, acc:0.7621428571428571


  7%|▋         | 800/10845 [02:58<35:15,  4.75it/s, acc=0.765, epoch=49, loss=0.772]

epoch:49, idx:799/10845, loss:0.7718572697043419, acc:0.765


  8%|▊         | 900/10845 [03:20<35:28,  4.67it/s, acc=0.765, epoch=49, loss=0.765]

epoch:49, idx:899/10845, loss:0.7650534069538116, acc:0.765


  9%|▉         | 1000/10845 [03:42<32:31,  5.04it/s, acc=0.763, epoch=49, loss=0.769]

epoch:49, idx:999/10845, loss:0.7691408689022065, acc:0.7635


 10%|█         | 1100/10845 [04:05<36:49,  4.41it/s, acc=0.763, epoch=49, loss=0.765]

epoch:49, idx:1099/10845, loss:0.7653467027707533, acc:0.7634090909090909


 11%|█         | 1200/10845 [04:26<34:50,  4.61it/s, acc=0.767, epoch=49, loss=0.759]

epoch:49, idx:1199/10845, loss:0.7591888103882471, acc:0.7666666666666667


 12%|█▏        | 1300/10845 [04:49<38:34,  4.12it/s, acc=0.769, epoch=49, loss=0.754]

epoch:49, idx:1299/10845, loss:0.7540836247114034, acc:0.7690384615384616


 13%|█▎        | 1401/10845 [05:12<33:15,  4.73it/s, acc=0.771, epoch=49, loss=0.748]

epoch:49, idx:1399/10845, loss:0.7480135154724121, acc:0.7705357142857143


 14%|█▍        | 1500/10845 [05:34<33:38,  4.63it/s, acc=0.768, epoch=49, loss=0.751]

epoch:49, idx:1499/10845, loss:0.7512595330079397, acc:0.768


 15%|█▍        | 1600/10845 [05:56<34:11,  4.51it/s, acc=0.768, epoch=49, loss=0.755]

epoch:49, idx:1599/10845, loss:0.7547552959620952, acc:0.7678125


 16%|█▌        | 1700/10845 [06:18<31:23,  4.85it/s, acc=0.767, epoch=49, loss=0.762]

epoch:49, idx:1699/10845, loss:0.7622326939947465, acc:0.7670588235294118


 17%|█▋        | 1800/10845 [06:41<32:58,  4.57it/s, acc=0.766, epoch=49, loss=0.765]

epoch:49, idx:1799/10845, loss:0.764818567832311, acc:0.7663888888888889


 18%|█▊        | 1901/10845 [07:03<31:38,  4.71it/s, acc=0.766, epoch=49, loss=0.769]

epoch:49, idx:1899/10845, loss:0.7680579630952132, acc:0.7657894736842106


 18%|█▊        | 2001/10845 [07:25<32:31,  4.53it/s, acc=0.766, epoch=49, loss=0.77] 

epoch:49, idx:1999/10845, loss:0.770243657618761, acc:0.76575


 19%|█▉        | 2100/10845 [07:47<31:08,  4.68it/s, acc=0.765, epoch=49, loss=0.772]

epoch:49, idx:2099/10845, loss:0.7718707700854256, acc:0.7648809523809523


 20%|██        | 2201/10845 [08:10<31:16,  4.61it/s, acc=0.766, epoch=49, loss=0.766]

epoch:49, idx:2199/10845, loss:0.7664651194756681, acc:0.7657954545454545


 21%|██        | 2301/10845 [08:32<30:32,  4.66it/s, acc=0.766, epoch=49, loss=0.766]

epoch:49, idx:2299/10845, loss:0.7660787415763606, acc:0.7656521739130435


 22%|██▏       | 2400/10845 [08:53<32:17,  4.36it/s, acc=0.766, epoch=49, loss=0.766]

epoch:49, idx:2399/10845, loss:0.7657762275077402, acc:0.76625


 23%|██▎       | 2500/10845 [09:15<28:42,  4.85it/s, acc=0.765, epoch=49, loss=0.765]

epoch:49, idx:2499/10845, loss:0.7647059834897518, acc:0.765


 24%|██▍       | 2600/10845 [09:37<30:56,  4.44it/s, acc=0.765, epoch=49, loss=0.766]

epoch:49, idx:2599/10845, loss:0.7660834544839767, acc:0.7648076923076923


 25%|██▍       | 2700/10845 [10:00<30:10,  4.50it/s, acc=0.764, epoch=49, loss=0.769]

epoch:49, idx:2699/10845, loss:0.7687157229968795, acc:0.7638888888888888


 26%|██▌       | 2801/10845 [10:22<30:09,  4.45it/s, acc=0.763, epoch=49, loss=0.769]

epoch:49, idx:2799/10845, loss:0.769743788449892, acc:0.7632142857142857


 27%|██▋       | 2900/10845 [10:44<31:41,  4.18it/s, acc=0.763, epoch=49, loss=0.773]

epoch:49, idx:2899/10845, loss:0.773294199165599, acc:0.7631034482758621


 28%|██▊       | 3001/10845 [11:07<28:09,  4.64it/s, acc=0.763, epoch=49, loss=0.771]

epoch:49, idx:2999/10845, loss:0.7713551496018966, acc:0.7631666666666667


 29%|██▊       | 3101/10845 [11:29<28:05,  4.60it/s, acc=0.762, epoch=49, loss=0.773]

epoch:49, idx:3099/10845, loss:0.7729991386926943, acc:0.7625


 30%|██▉       | 3201/10845 [11:52<27:16,  4.67it/s, acc=0.761, epoch=49, loss=0.778]

epoch:49, idx:3199/10845, loss:0.7784132954617963, acc:0.76109375


 30%|███       | 3300/10845 [12:14<28:04,  4.48it/s, acc=0.761, epoch=49, loss=0.784]

epoch:49, idx:3299/10845, loss:0.7836653724357937, acc:0.760530303030303


 31%|███▏      | 3400/10845 [12:36<27:23,  4.53it/s, acc=0.76, epoch=49, loss=0.785] 

epoch:49, idx:3399/10845, loss:0.7851595898367026, acc:0.7595588235294117


 32%|███▏      | 3500/10845 [12:59<26:47,  4.57it/s, acc=0.759, epoch=49, loss=0.786]

epoch:49, idx:3499/10845, loss:0.7860544351637363, acc:0.7591428571428571


 33%|███▎      | 3601/10845 [13:21<26:12,  4.61it/s, acc=0.76, epoch=49, loss=0.783] 

epoch:49, idx:3599/10845, loss:0.7834100719748271, acc:0.7601388888888889


 34%|███▍      | 3700/10845 [13:43<25:12,  4.72it/s, acc=0.761, epoch=49, loss=0.781]

epoch:49, idx:3699/10845, loss:0.7806692626468233, acc:0.7606756756756756


 35%|███▌      | 3800/10845 [14:05<27:26,  4.28it/s, acc=0.761, epoch=49, loss=0.778]

epoch:49, idx:3799/10845, loss:0.7777289556006067, acc:0.7607894736842106


 36%|███▌      | 3901/10845 [14:28<25:32,  4.53it/s, acc=0.761, epoch=49, loss=0.777]

epoch:49, idx:3899/10845, loss:0.7770322719407388, acc:0.7605128205128205


 37%|███▋      | 4000/10845 [14:50<27:01,  4.22it/s, acc=0.76, epoch=49, loss=0.778] 

epoch:49, idx:3999/10845, loss:0.7781021643094719, acc:0.760375


 38%|███▊      | 4100/10845 [15:12<24:16,  4.63it/s, acc=0.76, epoch=49, loss=0.779] 

epoch:49, idx:4099/10845, loss:0.7794438153178227, acc:0.7603658536585366


 39%|███▊      | 4200/10845 [15:34<23:37,  4.69it/s, acc=0.76, epoch=49, loss=0.779] 

epoch:49, idx:4199/10845, loss:0.7792403073076691, acc:0.7604761904761905


 40%|███▉      | 4301/10845 [15:57<23:48,  4.58it/s, acc=0.761, epoch=49, loss=0.777]

epoch:49, idx:4299/10845, loss:0.7769942558747391, acc:0.7608720930232559


 41%|████      | 4400/10845 [16:19<24:10,  4.44it/s, acc=0.761, epoch=49, loss=0.775]

epoch:49, idx:4399/10845, loss:0.7754302302646366, acc:0.76125


 42%|████▏     | 4501/10845 [16:41<23:03,  4.59it/s, acc=0.761, epoch=49, loss=0.776]

epoch:49, idx:4499/10845, loss:0.7762048322988881, acc:0.7608888888888888


 42%|████▏     | 4601/10845 [17:04<23:29,  4.43it/s, acc=0.761, epoch=49, loss=0.776]

epoch:49, idx:4599/10845, loss:0.7757413382731054, acc:0.7610869565217391


 43%|████▎     | 4700/10845 [17:26<23:10,  4.42it/s, acc=0.761, epoch=49, loss=0.776]

epoch:49, idx:4699/10845, loss:0.7755563219525713, acc:0.7610106382978723


 44%|████▍     | 4800/10845 [17:49<23:37,  4.26it/s, acc=0.761, epoch=49, loss=0.777]

epoch:49, idx:4799/10845, loss:0.7770977427779386, acc:0.7611458333333333


 45%|████▌     | 4901/10845 [18:12<22:45,  4.35it/s, acc=0.762, epoch=49, loss=0.775]

epoch:49, idx:4899/10845, loss:0.7751576716528863, acc:0.7616326530612245


 46%|████▌     | 5001/10845 [18:34<20:09,  4.83it/s, acc=0.761, epoch=49, loss=0.778]

epoch:49, idx:4999/10845, loss:0.7777748284548521, acc:0.7609


 47%|████▋     | 5101/10845 [18:56<20:46,  4.61it/s, acc=0.761, epoch=49, loss=0.776]

epoch:49, idx:5099/10845, loss:0.7762740860002882, acc:0.7610294117647058


 48%|████▊     | 5200/10845 [19:19<21:36,  4.35it/s, acc=0.761, epoch=49, loss=0.776]

epoch:49, idx:5199/10845, loss:0.7762077717683636, acc:0.760625


 49%|████▉     | 5300/10845 [19:41<22:01,  4.20it/s, acc=0.76, epoch=49, loss=0.776] 

epoch:49, idx:5299/10845, loss:0.7759415726925967, acc:0.7604716981132076


 50%|████▉     | 5401/10845 [20:03<19:57,  4.55it/s, acc=0.761, epoch=49, loss=0.775]

epoch:49, idx:5399/10845, loss:0.7752274792586212, acc:0.7609259259259259


 51%|█████     | 5500/10845 [20:25<20:06,  4.43it/s, acc=0.761, epoch=49, loss=0.773]

epoch:49, idx:5499/10845, loss:0.7727012681175361, acc:0.7615


 52%|█████▏    | 5600/10845 [20:47<19:41,  4.44it/s, acc=0.762, epoch=49, loss=0.772]

epoch:49, idx:5599/10845, loss:0.7716667968966067, acc:0.7616964285714286


 53%|█████▎    | 5701/10845 [21:10<18:50,  4.55it/s, acc=0.762, epoch=49, loss=0.772]

epoch:49, idx:5699/10845, loss:0.7717148591002875, acc:0.7617543859649123


 53%|█████▎    | 5801/10845 [21:32<18:10,  4.63it/s, acc=0.762, epoch=49, loss=0.77] 

epoch:49, idx:5799/10845, loss:0.769752751349889, acc:0.7620258620689655


 54%|█████▍    | 5900/10845 [21:54<18:47,  4.39it/s, acc=0.762, epoch=49, loss=0.769]

epoch:49, idx:5899/10845, loss:0.7694660306860834, acc:0.7624152542372882


 55%|█████▌    | 6001/10845 [22:17<17:11,  4.70it/s, acc=0.763, epoch=49, loss=0.768]

epoch:49, idx:5999/10845, loss:0.7685479485467076, acc:0.76275


 56%|█████▋    | 6101/10845 [22:39<16:49,  4.70it/s, acc=0.763, epoch=49, loss=0.767]

epoch:49, idx:6099/10845, loss:0.7671232831746828, acc:0.7631967213114754


 57%|█████▋    | 6200/10845 [23:01<17:10,  4.51it/s, acc=0.762, epoch=49, loss=0.769]

epoch:49, idx:6199/10845, loss:0.7691167644219052, acc:0.7624596774193548


 58%|█████▊    | 6300/10845 [23:24<16:31,  4.58it/s, acc=0.763, epoch=49, loss=0.768]

epoch:49, idx:6299/10845, loss:0.7681875398494895, acc:0.7626984126984127


 59%|█████▉    | 6401/10845 [23:46<16:26,  4.51it/s, acc=0.763, epoch=49, loss=0.768]

epoch:49, idx:6399/10845, loss:0.7679455063003116, acc:0.762734375


 60%|█████▉    | 6500/10845 [24:09<17:16,  4.19it/s, acc=0.763, epoch=49, loss=0.769]

epoch:49, idx:6499/10845, loss:0.7686065945327282, acc:0.7627307692307692


 61%|██████    | 6601/10845 [24:31<16:00,  4.42it/s, acc=0.762, epoch=49, loss=0.77] 

epoch:49, idx:6599/10845, loss:0.7700270126692274, acc:0.762159090909091


 62%|██████▏   | 6700/10845 [24:53<15:20,  4.50it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:6699/10845, loss:0.7708294435112334, acc:0.7620522388059702


 63%|██████▎   | 6800/10845 [25:15<14:47,  4.56it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:6799/10845, loss:0.7706115202057887, acc:0.7620955882352941


 64%|██████▎   | 6901/10845 [25:38<14:04,  4.67it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:6899/10845, loss:0.7714961800432724, acc:0.7618840579710144


 65%|██████▍   | 7000/10845 [26:00<14:27,  4.43it/s, acc=0.761, epoch=49, loss=0.773]

epoch:49, idx:6999/10845, loss:0.772708012876766, acc:0.7613928571428571


 65%|██████▌   | 7101/10845 [26:22<13:41,  4.56it/s, acc=0.761, epoch=49, loss=0.774]

epoch:49, idx:7099/10845, loss:0.7732661509157066, acc:0.7614084507042254


 66%|██████▋   | 7200/10845 [26:44<13:06,  4.63it/s, acc=0.761, epoch=49, loss=0.774]

epoch:49, idx:7199/10845, loss:0.774034804749406, acc:0.7612152777777778


 67%|██████▋   | 7300/10845 [27:07<13:35,  4.35it/s, acc=0.761, epoch=49, loss=0.773]

epoch:49, idx:7299/10845, loss:0.7730179098513845, acc:0.7613698630136986


 68%|██████▊   | 7401/10845 [27:29<12:27,  4.61it/s, acc=0.761, epoch=49, loss=0.774]

epoch:49, idx:7399/10845, loss:0.7737468182456655, acc:0.7610472972972973


 69%|██████▉   | 7501/10845 [27:51<11:50,  4.71it/s, acc=0.761, epoch=49, loss=0.775]

epoch:49, idx:7499/10845, loss:0.7743486328979333, acc:0.7610333333333333


 70%|███████   | 7600/10845 [28:13<12:08,  4.45it/s, acc=0.761, epoch=49, loss=0.774]

epoch:49, idx:7599/10845, loss:0.7737637332532751, acc:0.7610855263157895


 71%|███████   | 7700/10845 [28:35<11:58,  4.38it/s, acc=0.761, epoch=49, loss=0.772]

epoch:49, idx:7699/10845, loss:0.771849291446534, acc:0.7613311688311688


 72%|███████▏  | 7800/10845 [28:57<10:59,  4.62it/s, acc=0.761, epoch=49, loss=0.772]

epoch:49, idx:7799/10845, loss:0.7720980211079885, acc:0.7611538461538462


 73%|███████▎  | 7900/10845 [29:19<10:52,  4.52it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:7899/10845, loss:0.7714108870734897, acc:0.7615189873417721


 74%|███████▍  | 8000/10845 [29:42<10:06,  4.69it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:7999/10845, loss:0.7709332214128226, acc:0.7616875


 75%|███████▍  | 8101/10845 [30:04<10:03,  4.54it/s, acc=0.762, epoch=49, loss=0.77] 

epoch:49, idx:8099/10845, loss:0.7699882736746911, acc:0.7616666666666667


 76%|███████▌  | 8200/10845 [30:27<09:34,  4.60it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:8199/10845, loss:0.7708951352264096, acc:0.7617378048780488


 77%|███████▋  | 8301/10845 [30:49<09:23,  4.52it/s, acc=0.761, epoch=49, loss=0.772]

epoch:49, idx:8299/10845, loss:0.7721878047000212, acc:0.7612048192771085


 77%|███████▋  | 8400/10845 [31:11<09:00,  4.52it/s, acc=0.761, epoch=49, loss=0.772]

epoch:49, idx:8399/10845, loss:0.7716312383026595, acc:0.7613988095238096


 78%|███████▊  | 8500/10845 [31:33<09:04,  4.30it/s, acc=0.762, epoch=49, loss=0.77] 

epoch:49, idx:8499/10845, loss:0.7698461668894572, acc:0.7618529411764706


 79%|███████▉  | 8600/10845 [31:55<07:46,  4.81it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:8599/10845, loss:0.7707664311053448, acc:0.7615697674418604


 80%|████████  | 8700/10845 [32:18<08:12,  4.36it/s, acc=0.761, epoch=49, loss=0.771]

epoch:49, idx:8699/10845, loss:0.7714552487158912, acc:0.761235632183908


 81%|████████  | 8800/10845 [32:40<07:31,  4.53it/s, acc=0.761, epoch=49, loss=0.771]

epoch:49, idx:8799/10845, loss:0.7713358258100396, acc:0.7611647727272727


 82%|████████▏ | 8901/10845 [33:03<07:22,  4.39it/s, acc=0.761, epoch=49, loss=0.77] 

epoch:49, idx:8899/10845, loss:0.7702841921318113, acc:0.7613483146067416


 83%|████████▎ | 9000/10845 [33:25<06:42,  4.58it/s, acc=0.762, epoch=49, loss=0.77]

epoch:49, idx:8999/10845, loss:0.7701741814861695, acc:0.7616388888888889


 84%|████████▍ | 9100/10845 [33:47<06:15,  4.65it/s, acc=0.761, epoch=49, loss=0.772]

epoch:49, idx:9099/10845, loss:0.7720633376414304, acc:0.7612362637362637


 85%|████████▍ | 9200/10845 [34:10<06:12,  4.42it/s, acc=0.761, epoch=49, loss=0.772]

epoch:49, idx:9199/10845, loss:0.7724216424625205, acc:0.7614402173913043


 86%|████████▌ | 9300/10845 [34:32<05:45,  4.47it/s, acc=0.762, epoch=49, loss=0.772]

epoch:49, idx:9299/10845, loss:0.7720386573431953, acc:0.7615322580645161


 87%|████████▋ | 9400/10845 [34:55<05:00,  4.81it/s, acc=0.761, epoch=49, loss=0.773]

epoch:49, idx:9399/10845, loss:0.7729990074212881, acc:0.761436170212766


 88%|████████▊ | 9501/10845 [35:17<04:45,  4.70it/s, acc=0.762, epoch=49, loss=0.772]

epoch:49, idx:9499/10845, loss:0.7717594880665604, acc:0.7616315789473684


 89%|████████▊ | 9601/10845 [35:39<04:14,  4.88it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:9599/10845, loss:0.7712046004692092, acc:0.7616145833333333


 89%|████████▉ | 9701/10845 [36:01<04:02,  4.72it/s, acc=0.761, epoch=49, loss=0.773]

epoch:49, idx:9699/10845, loss:0.772666330151644, acc:0.7614690721649484


 90%|█████████ | 9800/10845 [36:23<04:01,  4.33it/s, acc=0.762, epoch=49, loss=0.772]

epoch:49, idx:9799/10845, loss:0.7724724465654212, acc:0.7616071428571428


 91%|█████████▏| 9900/10845 [36:46<03:18,  4.75it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:9899/10845, loss:0.7705737882957916, acc:0.7621717171717172


 92%|█████████▏| 10000/10845 [37:08<03:06,  4.54it/s, acc=0.762, epoch=49, loss=0.77]

epoch:49, idx:9999/10845, loss:0.7703865492984653, acc:0.762025


 93%|█████████▎| 10100/10845 [37:30<02:47,  4.46it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:10099/10845, loss:0.7710263345899558, acc:0.7620792079207921


 94%|█████████▍| 10200/10845 [37:52<02:27,  4.37it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:10199/10845, loss:0.7711511412657359, acc:0.7622549019607843


 95%|█████████▍| 10300/10845 [38:15<02:08,  4.25it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:10299/10845, loss:0.7706019121104652, acc:0.762378640776699


 96%|█████████▌| 10400/10845 [38:37<01:35,  4.67it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:10399/10845, loss:0.7707436265266285, acc:0.7624038461538462


 97%|█████████▋| 10500/10845 [38:59<01:21,  4.24it/s, acc=0.762, epoch=49, loss=0.77] 

epoch:49, idx:10499/10845, loss:0.7702926104451929, acc:0.7624285714285715


 98%|█████████▊| 10600/10845 [39:22<00:54,  4.48it/s, acc=0.762, epoch=49, loss=0.772]

epoch:49, idx:10599/10845, loss:0.7715387830236611, acc:0.762122641509434


 99%|█████████▊| 10700/10845 [39:44<00:31,  4.55it/s, acc=0.762, epoch=49, loss=0.771]

epoch:49, idx:10699/10845, loss:0.7714255177682249, acc:0.7619626168224299


100%|█████████▉| 10801/10845 [40:06<00:09,  4.41it/s, acc=0.762, epoch=49, loss=0.772]

epoch:49, idx:10799/10845, loss:0.7721518124405433, acc:0.7616203703703703


100%|██████████| 10845/10845 [40:16<00:00,  4.75it/s, acc=0.762, epoch=49, loss=0.772]


epoch:49, idx:0/1275, loss:1.616520881652832, acc:0.75
epoch:49, idx:100/1275, loss:1.5167323339103471, acc:0.6435643564356436
epoch:49, idx:200/1275, loss:1.371985256968446, acc:0.6554726368159204
epoch:49, idx:300/1275, loss:1.3329225445902624, acc:0.6619601328903655
epoch:49, idx:400/1275, loss:1.311747905322144, acc:0.6708229426433915
epoch:49, idx:500/1275, loss:1.270241354040043, acc:0.6751497005988024
epoch:49, idx:600/1275, loss:1.2831507586004731, acc:0.668053244592346
epoch:49, idx:700/1275, loss:1.2905667345125904, acc:0.6683309557774608
epoch:49, idx:800/1275, loss:1.311224917868401, acc:0.66541822721598
epoch:49, idx:900/1275, loss:1.3041877559366555, acc:0.667591564927858
epoch:49, idx:1000/1275, loss:1.3131638856439085, acc:0.6665834165834166
epoch:49, idx:1100/1275, loss:1.2952651260226127, acc:0.6700726612170754
epoch:49, idx:1200/1275, loss:1.2915049827565361, acc:0.6688176519567027


In [20]:
best_acc

0.6690196078431373