In [1]:
import os
import torch
import torch.nn as nn
import torchtext
from tensorboardX import SummaryWriter
import random
import numpy as np
from pytorch_pretrained_bert import BertModel
from pytorch_pretrained_bert.optimization import BertAdam
from tqdm import tqdm
from model import *
from utils import AverageMeter

%load_ext autoreload
%autoreload 2
os.environ["CUDA_VISIBLE_DEVICES"] = '2'

03/12/2019 20:39:23 - INFO - summarizer.preprocessing.cleaner -   'pattern' package not found; tag filters are not available for English


In [2]:
class Config:
    def __init__(self):
        self.lr = 1e-5
        self.warmup_proportion = 0.1
        self.epochs = 5
        self.max_seq_length = 128
        self.gradient_accumulation_steps = 16
        self.log_dir = './logs'
        self.model_name = 'bert_finetune'
        
        self.train_bert_path = './data/train_data_bert_128.pt'
        self.dev_bert_path = './data/dev_data_bert_128.pt'        

        self.bert_type = 'base'
        self.bert_path = './bert-base-uncased/'
        self.bert_dim = 768
        
        self.dropout = 0.2
        self.seed = 1023

        
config = Config()
device = torch.device("cuda:0")


In [3]:
random.seed(config.seed)
np.random.seed(config.seed)
torch.manual_seed(config.seed)
torch.cuda.manual_seed_all(config.seed)

In [4]:
save_path = config.model_name  + '_lr_'+ str(config.lr)+ '_dropout_' + str(config.dropout) \
            + '_maxSeqLen_' + str(config.max_seq_length) + '_bertType_' + config.bert_type + '__epochs_' + str(config.epochs)

save_path = os.path.join(config.log_dir, save_path)   
print(save_path)
config.save_path = save_path

./logs/bert_finetune_lr_1e-05_dropout_0.2_maxSeqLen_128_bertType_base__epochs_5


In [5]:
if not os.path.exists(config.save_path):
    os.makedirs(config.save_path)
writer = SummaryWriter(config.save_path)

In [6]:
class BertForWiki(nn.Module):
    
    def __init__(self, config, device):
        super(BertForWiki, self).__init__()
        
        self.device = device
        self.bert_model = BertModel.from_pretrained(config.bert_path)

        self.pq_w_attn = CoAttention(hidden_dims=config.bert_dim, att_type=0, dropout=config.dropout)
        
        # only use max row pooling for simplify
        self.pooling_1 = SelfAttention(config.bert_dim, config.bert_dim, config.dropout)
        self.pooling_2 = SelfAttention(config.bert_dim, config.bert_dim, config.dropout)

        self.pooling_3 = SelfAttention(config.bert_dim, config.bert_dim, config.dropout)
        
        self.linear_last = BilinearSeqAttn(config.bert_dim, config.bert_dim, dropout=config.dropout)

        self.to(device)
        
    def forward(self, choices, passages, question, max_batch =32):
        device = self.device
        
        batch = tuple( t.to(device) for t in choices)
        input_ids, input_mask, segment_ids  = batch
        all_encoder_layers, _ = self.bert_model(input_ids, segment_ids, input_mask)
        c_emb = all_encoder_layers[-1]

        
        batch = tuple( t.to(device) for t in passages)
        input_ids, input_mask, segment_ids  = batch
        
        if input_ids.size(0) > max_batch:
            input_ids = input_ids[:max_batch]
            input_mask = input_mask[:max_batch]
            segment_ids = segment_ids[:max_batch]
            
        all_encoder_layers, _ = self.bert_model(input_ids, segment_ids, input_mask)
        p_emb = all_encoder_layers[-1]

        batch = tuple( t.to(device) for t in question)
        input_ids, input_mask, segment_ids  = batch
        all_encoder_layers, _ = self.bert_model(input_ids, segment_ids, input_mask)
        q_emb = all_encoder_layers[-1]
        
        q_emb_p = q_emb.expand(p_emb.size(0), q_emb.size(1), q_emb.size(2))
        p_attn_out, qp_attn_out = self.pq_w_attn(p_emb, q_emb_p)


        # p_len * bert_hidden
        p_summarys = self.pooling_1(p_attn_out)
        
        # 1 * bert_hidden
        p_final = self.pooling_2(p_summarys.unsqueeze(0)) 

        # 1 * c_len * hidden
        c_final = self.pooling_3(c_emb).unsqueeze(0)
        
        # 1 * c_len
        score = self.linear_last(c_final, p_final)
        
        return score

In [7]:
model = BertForWiki(config, device)

In [8]:
train_data = torch.load(config.train_bert_path)
dev_data = torch.load(config.dev_bert_path)

In [9]:
dev_data[0]

{'choices': (tensor([[  101,  5118,   102,     0,     0],
          [  101, 11068,  1997,  9192,   102],
          [  101,  2605,   102,     0,     0],
          [  101,  2446, 11078,   102,     0],
          [  101,  2446,  3400,   102,     0],
          [  101,  2762,   102,     0,     0],
          [  101,  4151,  3142,  3400,   102],
          [  101,  3304,   102,     0,     0],
          [  101,  2900,   102,     0,     0],
          [  101,  2983,  1997,  2605,   102],
          [  101,  2983,  1997, 13019,   102],
          [  101,  2642,  3304,   102,     0],
          [  101,  3142,  3400,   102,     0],
          [  101,  3607,   102,     0,     0],
          [  101, 13019,   102,     0,     0],
          [  101,  2142,  2983,   102,     0],
          [  101, 20695,  3072,   102,     0],
          [  101,  2088,   102,     0,     0]]), tensor([[1, 1, 1, 0, 0],
          [1, 1, 1, 1, 1],
          [1, 1, 1, 0, 0],
          [1, 1, 1, 1, 0],
          [1, 1, 1, 1, 0],
        

In [10]:
param_optimizer = list(model.named_parameters())

# hack to remove pooler, which is not used
# thus it produce None grad that break apex
param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]

no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

t_total = len(train_data) // config.gradient_accumulation_steps * config.epochs
print(t_total)

optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=config.lr,
                             warmup=config.warmup_proportion,
                             t_total=t_total)
criterion = torch.nn.CrossEntropyLoss()

13665


In [11]:
def val(data_iter, model, criterion):
    losses = AverageMeter()
    acces = AverageMeter()
    model.eval()
    for idx, item in enumerate(tqdm(data_iter)):
        choices, passages, question = item['choices'], item['passages'], item['question']
        label = item['label'].to(device)
        with torch.no_grad():
            score = model(choices, passages, question)
                    
        loss = criterion(score, label)
        losses.update(loss.item())
        
        pred = score.argmax(1)
        acc = pred.eq(label).sum().item()  / pred.size(0)
        acces.update(acc)
    return losses.avg, acces.avg

def warmup_linear(x, warmup=0.002):
    if x < warmup:
        return x/warmup
    return 1.0 - x

In [12]:
global_step = 0

for ep in range(config.epochs):
    random.shuffle(train_data)
    model.train()
    losses = AverageMeter()
    acces = AverageMeter()    

    for step, item in enumerate(tqdm(train_data)):
        choices, passages, question = item['choices'], item['passages'], item['question']
        label = item['label'].to(device)
        score = model(choices, passages, question)
        
        loss = criterion(score, label)
        loss = loss / config.gradient_accumulation_steps

        losses.update(loss.item())
        pred = score.argmax(1)
        acc = pred.eq(label).sum().item()  / pred.size(0)
        acces.update(acc)
        loss.backward()
        if(step+1) % config.gradient_accumulation_steps == 0:
            '''
            lr_this_step = config.lr * warmup_linear(global_step/t_total, config.warmup_proportion)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_this_step
            '''
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1  
        if global_step % 20 == 0:                            
            writer.add_scalar('train_loss', losses.avg, global_step)
            writer.add_scalar('train_acc', acces.avg, global_step)
            print(f'step:{global_step}, train_loss:{losses.avg}, acc:{acces.avg}')
            
    print('eval on dev set')
    val_loss, val_acc = val(dev_data, model, criterion)
    print(f'{val_loss}, {val_acc}')
    writer.add_scalar('val_loss', val_loss, ep)
    writer.add_scalar('val_acc', val_acc, ep)

  alphas = self.softmax(alphas)  # (bsz, sent_len)
  0%|          | 1/43738 [00:00<5:57:20,  2.04it/s]

step:0, train_loss:0.29659658670425415, acc:0.0


  0%|          | 2/43738 [00:01<6:32:33,  1.86it/s]

step:0, train_loss:0.29428306221961975, acc:0.0


  0%|          | 3/43738 [00:01<5:27:35,  2.23it/s]

step:0, train_loss:0.33481648564338684, acc:0.0


  0%|          | 4/43738 [00:01<4:57:40,  2.45it/s]

step:0, train_loss:0.29897554963827133, acc:0.0


  0%|          | 5/43738 [00:02<4:50:39,  2.51it/s]

step:0, train_loss:0.2586246311664581, acc:0.2


  0%|          | 6/43738 [00:02<5:12:01,  2.34it/s]

step:0, train_loss:0.23456765214602152, acc:0.16666666666666666


  0%|          | 7/43738 [00:02<5:02:37,  2.41it/s]

step:0, train_loss:0.20914574818951742, acc:0.2857142857142857


  0%|          | 8/43738 [00:03<4:24:46,  2.75it/s]

step:0, train_loss:0.19974397867918015, acc:0.25


  0%|          | 9/43738 [00:03<4:16:07,  2.85it/s]

step:0, train_loss:0.20164375007152557, acc:0.2222222222222222


  0%|          | 10/43738 [00:03<4:37:53,  2.62it/s]

step:0, train_loss:0.19171634912490845, acc:0.2


  0%|          | 11/43738 [00:04<4:57:15,  2.45it/s]

step:0, train_loss:0.19877753474495627, acc:0.18181818181818182


  0%|          | 12/43738 [00:05<6:33:50,  1.85it/s]

step:0, train_loss:0.19684898108243942, acc:0.16666666666666666


  0%|          | 13/43738 [00:05<5:47:53,  2.09it/s]

step:0, train_loss:0.18300520686002877, acc:0.23076923076923078


  0%|          | 14/43738 [00:05<5:15:28,  2.31it/s]

step:0, train_loss:0.17563040341649735, acc:0.2857142857142857


  0%|          | 15/43738 [00:06<6:32:25,  1.86it/s]

step:0, train_loss:0.17603636781374613, acc:0.26666666666666666


  1%|          | 320/43738 [02:19<5:16:24,  2.29it/s]

step:20, train_loss:0.2231011846801266, acc:0.1


  1%|          | 321/43738 [02:19<5:23:59,  2.23it/s]

step:20, train_loss:0.2228550752421777, acc:0.09968847352024922


  1%|          | 322/43738 [02:19<4:39:59,  2.58it/s]

step:20, train_loss:0.22231085548721116, acc:0.09937888198757763


  1%|          | 323/43738 [02:20<4:06:23,  2.94it/s]

step:20, train_loss:0.22188819980612112, acc:0.09907120743034056


  1%|          | 324/43738 [02:20<4:35:25,  2.63it/s]

step:20, train_loss:0.22242706769180517, acc:0.09876543209876543


  1%|          | 325/43738 [02:21<6:03:45,  1.99it/s]

step:20, train_loss:0.222668324559927, acc:0.09846153846153846


  1%|          | 326/43738 [02:21<5:33:17,  2.17it/s]

step:20, train_loss:0.22296791879083117, acc:0.09815950920245399


  1%|          | 327/43738 [02:22<5:20:19,  2.26it/s]

step:20, train_loss:0.22229697000497342, acc:0.10091743119266056


  1%|          | 328/43738 [02:22<6:11:48,  1.95it/s]

step:20, train_loss:0.2226653177541022, acc:0.10060975609756098


  1%|          | 329/43738 [02:23<5:32:32,  2.18it/s]

step:20, train_loss:0.22248081019357707, acc:0.10030395136778116


  1%|          | 330/43738 [02:23<4:43:08,  2.56it/s]

step:20, train_loss:0.22194984657972147, acc:0.1


  1%|          | 331/43738 [02:23<4:37:11,  2.61it/s]

step:20, train_loss:0.22182342994204102, acc:0.09969788519637462


  1%|          | 332/43738 [02:24<4:36:08,  2.62it/s]

step:20, train_loss:0.2216554996725845, acc:0.09939759036144578


  1%|          | 333/43738 [02:24<4:48:08,  2.51it/s]

step:20, train_loss:0.2223450059893432, acc:0.0990990990990991


  1%|          | 334/43738 [02:25<5:40:13,  2.13it/s]

step:20, train_loss:0.22224156610935392, acc:0.09880239520958084


  1%|          | 335/43738 [02:25<5:12:10,  2.32it/s]

step:20, train_loss:0.2231762925969131, acc:0.09850746268656717


  1%|▏         | 640/43738 [04:42<6:26:35,  1.86it/s]

step:40, train_loss:0.22252581194334198, acc:0.10625


  1%|▏         | 641/43738 [04:43<6:10:49,  1.94it/s]

step:40, train_loss:0.22254771770918147, acc:0.1060842433697348


  1%|▏         | 642/43738 [04:43<5:08:32,  2.33it/s]

step:40, train_loss:0.22229631182177695, acc:0.1059190031152648


  1%|▏         | 643/43738 [04:43<5:15:57,  2.27it/s]

step:40, train_loss:0.22245333861403202, acc:0.1057542768273717


  1%|▏         | 644/43738 [04:44<6:04:13,  1.97it/s]

step:40, train_loss:0.22250338460706312, acc:0.10559006211180125


  1%|▏         | 645/43738 [04:44<5:23:53,  2.22it/s]

step:40, train_loss:0.22250583423373774, acc:0.10542635658914729


  1%|▏         | 646/43738 [04:44<4:44:38,  2.52it/s]

step:40, train_loss:0.2227778732055804, acc:0.10526315789473684


  1%|▏         | 647/43738 [04:45<4:44:56,  2.52it/s]

step:40, train_loss:0.22258873647467523, acc:0.10510046367851623


  1%|▏         | 648/43738 [04:45<4:16:55,  2.80it/s]

step:40, train_loss:0.2225249563947458, acc:0.10493827160493827


  1%|▏         | 649/43738 [04:45<4:10:40,  2.86it/s]

step:40, train_loss:0.22233119287538142, acc:0.10631741140215717


  1%|▏         | 650/43738 [04:46<4:20:53,  2.75it/s]

step:40, train_loss:0.22227222194751867, acc:0.10615384615384615


  1%|▏         | 651/43738 [04:46<4:10:17,  2.87it/s]

step:40, train_loss:0.222391982000613, acc:0.10599078341013825


  1%|▏         | 652/43738 [04:47<4:31:26,  2.65it/s]

step:40, train_loss:0.22258290712167422, acc:0.1058282208588957


  1%|▏         | 653/43738 [04:47<5:17:06,  2.26it/s]

step:40, train_loss:0.2224974119479541, acc:0.10566615620214395


  1%|▏         | 654/43738 [04:48<4:52:59,  2.45it/s]

step:40, train_loss:0.2222184422611051, acc:0.10703363914373089


  1%|▏         | 655/43738 [04:48<5:24:36,  2.21it/s]

step:40, train_loss:0.22257881314415512, acc:0.10687022900763359


  2%|▏         | 960/43738 [07:06<4:16:20,  2.78it/s]

step:60, train_loss:0.2161518528650049, acc:0.11875


  2%|▏         | 961/43738 [07:06<4:23:57,  2.70it/s]

step:60, train_loss:0.21625270464813237, acc:0.1186264308012487


  2%|▏         | 962/43738 [07:07<4:43:01,  2.52it/s]

step:60, train_loss:0.21661529238306745, acc:0.11850311850311851


  2%|▏         | 963/43738 [07:08<6:06:33,  1.94it/s]

step:60, train_loss:0.2167461668184706, acc:0.11838006230529595


  2%|▏         | 964/43738 [07:08<6:26:51,  1.84it/s]

step:60, train_loss:0.21671057956803327, acc:0.11825726141078838


  2%|▏         | 965/43738 [07:09<6:54:22,  1.72it/s]

step:60, train_loss:0.21667162805911483, acc:0.11813471502590674


  2%|▏         | 966/43738 [07:09<6:13:05,  1.91it/s]

step:60, train_loss:0.21672995945951645, acc:0.11801242236024845


  2%|▏         | 967/43738 [07:10<6:00:53,  1.98it/s]

step:60, train_loss:0.21676650932087585, acc:0.11789038262668046


  2%|▏         | 968/43738 [07:10<5:24:32,  2.20it/s]

step:60, train_loss:0.21658249626374873, acc:0.11880165289256199


  2%|▏         | 969/43738 [07:10<5:03:13,  2.35it/s]

step:60, train_loss:0.21676502165925698, acc:0.11867905056759546


  2%|▏         | 970/43738 [07:11<5:21:01,  2.22it/s]

step:60, train_loss:0.21664327203881803, acc:0.11958762886597939


  2%|▏         | 971/43738 [07:11<5:39:01,  2.10it/s]

step:60, train_loss:0.21668188035081454, acc:0.11946446961894953


  2%|▏         | 972/43738 [07:12<5:33:25,  2.14it/s]

step:60, train_loss:0.21671348688127512, acc:0.11934156378600823


  2%|▏         | 973/43738 [07:12<5:26:26,  2.18it/s]

step:60, train_loss:0.2166486153894038, acc:0.11921891058581706


  2%|▏         | 974/43738 [07:13<4:45:07,  2.50it/s]

step:60, train_loss:0.21651224469769845, acc:0.11909650924024641


  2%|▏         | 975/43738 [07:13<6:08:44,  1.93it/s]

step:60, train_loss:0.21661868656866062, acc:0.11897435897435897


  3%|▎         | 1280/43738 [09:29<4:59:54,  2.36it/s]

step:80, train_loss:0.21271182479431444, acc:0.13125


  3%|▎         | 1281/43738 [09:29<4:25:56,  2.66it/s]

step:80, train_loss:0.21260644477202464, acc:0.13114754098360656


  3%|▎         | 1282/43738 [09:30<5:53:22,  2.00it/s]

step:80, train_loss:0.2126547700399315, acc:0.1310452418096724


  3%|▎         | 1283/43738 [09:31<6:22:34,  1.85it/s]

step:80, train_loss:0.21264008144337745, acc:0.1309431021044427


  3%|▎         | 1284/43738 [09:31<6:00:01,  1.97it/s]

step:80, train_loss:0.21264646969760284, acc:0.1308411214953271


  3%|▎         | 1285/43738 [09:32<6:08:33,  1.92it/s]

step:80, train_loss:0.21276889936563628, acc:0.13073929961089495


  3%|▎         | 1286/43738 [09:32<5:38:48,  2.09it/s]

step:80, train_loss:0.21277523422316819, acc:0.13063763608087092


  3%|▎         | 1287/43738 [09:32<5:21:03,  2.20it/s]

step:80, train_loss:0.21275747525480393, acc:0.13053613053613053


  3%|▎         | 1288/43738 [09:33<5:00:58,  2.35it/s]

step:80, train_loss:0.21279031828787723, acc:0.13043478260869565


  3%|▎         | 1289/43738 [09:33<4:46:06,  2.47it/s]

step:80, train_loss:0.21266267225298022, acc:0.13033359193173003


  3%|▎         | 1290/43738 [09:34<6:06:44,  1.93it/s]

step:80, train_loss:0.21258596475597097, acc:0.1310077519379845


  3%|▎         | 1291/43738 [09:34<5:06:29,  2.31it/s]

step:80, train_loss:0.2124694357063451, acc:0.13090627420604184


  3%|▎         | 1292/43738 [09:35<5:31:17,  2.14it/s]

step:80, train_loss:0.21253512890024787, acc:0.13080495356037153


  3%|▎         | 1293/43738 [09:35<4:44:38,  2.49it/s]

step:80, train_loss:0.21247809730385186, acc:0.13070378963650425


  3%|▎         | 1294/43738 [09:35<4:12:41,  2.80it/s]

step:80, train_loss:0.21231559561270433, acc:0.13137557959814528


  3%|▎         | 1295/43738 [09:35<3:53:55,  3.02it/s]

step:80, train_loss:0.21234303718346484, acc:0.13127413127413126


  4%|▎         | 1600/43738 [11:54<6:05:20,  1.92it/s]

step:100, train_loss:0.20926430968596832, acc:0.13875


  4%|▎         | 1601/43738 [11:55<5:23:43,  2.17it/s]

step:100, train_loss:0.20921982077446571, acc:0.1386633354153654


  4%|▎         | 1602/43738 [11:55<5:47:51,  2.02it/s]

step:100, train_loss:0.20919940630921607, acc:0.13857677902621723


  4%|▎         | 1603/43738 [11:56<5:20:33,  2.19it/s]

step:100, train_loss:0.2091198879263886, acc:0.13849033063006863


  4%|▎         | 1604/43738 [11:56<5:12:05,  2.25it/s]

step:100, train_loss:0.20911266955754898, acc:0.13840399002493767


  4%|▎         | 1605/43738 [11:57<4:54:50,  2.38it/s]

step:100, train_loss:0.2091155838003713, acc:0.1383177570093458


  4%|▎         | 1606/43738 [11:57<4:13:21,  2.77it/s]

step:100, train_loss:0.20908436097539868, acc:0.1382316313823163


  4%|▎         | 1607/43738 [11:57<4:20:34,  2.69it/s]

step:100, train_loss:0.20902526773675795, acc:0.13814561294337274


  4%|▎         | 1608/43738 [11:58<4:59:34,  2.34it/s]

step:100, train_loss:0.20891886012685434, acc:0.138681592039801


  4%|▎         | 1609/43738 [11:58<4:25:56,  2.64it/s]

step:100, train_loss:0.20906334242136945, acc:0.13859540087010566


  4%|▎         | 1610/43738 [11:58<4:32:54,  2.57it/s]

step:100, train_loss:0.20911666131751924, acc:0.13850931677018632


  4%|▎         | 1611/43738 [11:59<4:31:19,  2.59it/s]

step:100, train_loss:0.20906881224982612, acc:0.13842333954065797


  4%|▎         | 1612/43738 [11:59<4:48:21,  2.43it/s]

step:100, train_loss:0.2091517051495265, acc:0.13833746898263027


  4%|▎         | 1613/43738 [12:00<4:38:52,  2.52it/s]

step:100, train_loss:0.2091019882858654, acc:0.13825170489770613


  4%|▎         | 1614/43738 [12:00<4:15:20,  2.75it/s]

step:100, train_loss:0.20932211373330242, acc:0.13816604708798016


  4%|▎         | 1615/43738 [12:00<5:05:30,  2.30it/s]

step:100, train_loss:0.2094178888638253, acc:0.13808049535603714


  4%|▍         | 1920/43738 [14:31<5:31:57,  2.10it/s]

step:120, train_loss:0.20425479455043388, acc:0.14583333333333334


  4%|▍         | 1921/43738 [14:32<5:18:04,  2.19it/s]

step:120, train_loss:0.20436872022465466, acc:0.14575741801145237


  4%|▍         | 1922/43738 [14:32<5:21:57,  2.16it/s]

step:120, train_loss:0.20431653402334332, acc:0.14568158168574402


  4%|▍         | 1923/43738 [14:33<4:43:13,  2.46it/s]

step:120, train_loss:0.20421754786838184, acc:0.14612584503380135


  4%|▍         | 1924/43738 [14:33<4:19:53,  2.68it/s]

step:120, train_loss:0.20415510679341042, acc:0.14604989604989604


  4%|▍         | 1925/43738 [14:33<4:27:18,  2.61it/s]

step:120, train_loss:0.2041337468860937, acc:0.14597402597402598


  4%|▍         | 1926/43738 [14:34<4:21:06,  2.67it/s]

step:120, train_loss:0.20406219305851142, acc:0.14641744548286603


  4%|▍         | 1927/43738 [14:34<4:23:42,  2.64it/s]

step:120, train_loss:0.20400913873096307, acc:0.14634146341463414


  4%|▍         | 1928/43738 [14:34<4:41:11,  2.48it/s]

step:120, train_loss:0.20407835750333883, acc:0.1462655601659751


  4%|▍         | 1929/43738 [14:35<4:53:03,  2.38it/s]

step:120, train_loss:0.20410714125728543, acc:0.14618973561430793


  4%|▍         | 1930/43738 [14:35<4:50:26,  2.40it/s]

step:120, train_loss:0.20409511896658572, acc:0.1461139896373057


  4%|▍         | 1931/43738 [14:36<4:24:24,  2.64it/s]

step:120, train_loss:0.2041003404493607, acc:0.14603832211289489


  4%|▍         | 1932/43738 [14:36<4:27:15,  2.61it/s]

step:120, train_loss:0.20407488651264663, acc:0.14596273291925466


  4%|▍         | 1933/43738 [14:36<4:05:37,  2.84it/s]

step:120, train_loss:0.20400569101410596, acc:0.14588722193481635


  4%|▍         | 1934/43738 [14:37<3:50:35,  3.02it/s]

step:120, train_loss:0.20393856608170646, acc:0.14632885211995864


  4%|▍         | 1935/43738 [14:37<3:40:18,  3.16it/s]

step:120, train_loss:0.20387132583169185, acc:0.1462532299741602


  5%|▌         | 2240/43738 [16:57<4:47:05,  2.41it/s]

step:140, train_loss:0.199294301610228, acc:0.153125


  5%|▌         | 2241/43738 [16:57<4:43:24,  2.44it/s]

step:140, train_loss:0.19927600809640597, acc:0.1530566711289603


  5%|▌         | 2242/43738 [16:57<4:50:32,  2.38it/s]

step:140, train_loss:0.19929884189590696, acc:0.15298840321141838


  5%|▌         | 2243/43738 [16:58<5:34:13,  2.07it/s]

step:140, train_loss:0.1992956330117197, acc:0.15292019616584931


  5%|▌         | 2244/43738 [16:58<5:07:54,  2.25it/s]

step:140, train_loss:0.19931524891272243, acc:0.15285204991087345


  5%|▌         | 2245/43738 [16:59<4:43:21,  2.44it/s]

step:140, train_loss:0.1992412339428228, acc:0.15322939866369711


  5%|▌         | 2246/43738 [16:59<5:26:10,  2.12it/s]

step:140, train_loss:0.19922894398208693, acc:0.15316117542297417


  5%|▌         | 2247/43738 [17:00<5:21:41,  2.15it/s]

step:140, train_loss:0.19927078778159188, acc:0.15309301290609703


  5%|▌         | 2248/43738 [17:00<4:49:41,  2.39it/s]

step:140, train_loss:0.19920473383605775, acc:0.1534697508896797


  5%|▌         | 2249/43738 [17:01<5:29:16,  2.10it/s]

step:140, train_loss:0.19924171695851367, acc:0.15340151178301467


  5%|▌         | 2250/43738 [17:01<5:00:44,  2.30it/s]

step:140, train_loss:0.1991862291417395, acc:0.15333333333333332


  5%|▌         | 2251/43738 [17:01<4:56:15,  2.33it/s]

step:140, train_loss:0.19919262836920615, acc:0.15326521545979566


  5%|▌         | 2252/43738 [17:02<4:54:09,  2.35it/s]

step:140, train_loss:0.19918722083569185, acc:0.15319715808170514


  5%|▌         | 2253/43738 [17:03<6:04:44,  1.90it/s]

step:140, train_loss:0.199113913696103, acc:0.15357301375943186


  5%|▌         | 2254/43738 [17:03<5:36:51,  2.05it/s]

step:140, train_loss:0.19905316376715268, acc:0.15394853593611357


  5%|▌         | 2255/43738 [17:03<4:49:14,  2.39it/s]

step:140, train_loss:0.1990170249245656, acc:0.15388026607538802


  6%|▌         | 2560/43738 [19:20<5:23:33,  2.12it/s]

step:160, train_loss:0.19533719122346155, acc:0.163671875


  6%|▌         | 2561/43738 [19:21<6:33:03,  1.75it/s]

step:160, train_loss:0.19534396529366793, acc:0.1636079656384225


  6%|▌         | 2562/43738 [19:21<5:57:32,  1.92it/s]

step:160, train_loss:0.19531715966575355, acc:0.16354410616705697


  6%|▌         | 2563/43738 [19:22<5:06:04,  2.24it/s]

step:160, train_loss:0.19524839397135016, acc:0.16387046429964885


  6%|▌         | 2564/43738 [19:22<4:22:12,  2.62it/s]

step:160, train_loss:0.19519394953699212, acc:0.16380655226209048


  6%|▌         | 2565/43738 [19:22<4:10:25,  2.74it/s]

step:160, train_loss:0.19517816585401476, acc:0.16374269005847952


  6%|▌         | 2566/43738 [19:23<5:40:17,  2.02it/s]

step:160, train_loss:0.19520584467970145, acc:0.1636788776305534


  6%|▌         | 2567/43738 [19:23<5:30:06,  2.08it/s]

step:160, train_loss:0.19517912333067486, acc:0.16361511492014025


  6%|▌         | 2568/43738 [19:24<5:06:20,  2.24it/s]

step:160, train_loss:0.19514547911847252, acc:0.16355140186915887


  6%|▌         | 2569/43738 [19:24<5:40:07,  2.02it/s]

step:160, train_loss:0.19512501818022074, acc:0.16348773841961853


  6%|▌         | 2570/43738 [19:25<5:44:43,  1.99it/s]

step:160, train_loss:0.1950936087687112, acc:0.16342412451361868


  6%|▌         | 2571/43738 [19:25<5:31:38,  2.07it/s]

step:160, train_loss:0.19501967799073888, acc:0.16374951380785688


  6%|▌         | 2572/43738 [19:26<5:26:38,  2.10it/s]

step:160, train_loss:0.195009198134731, acc:0.16368584758942456


  6%|▌         | 2573/43738 [19:26<5:23:35,  2.12it/s]

step:160, train_loss:0.19494260776850617, acc:0.16401088223863194


  6%|▌         | 2574/43738 [19:27<6:30:38,  1.76it/s]

step:160, train_loss:0.19498566531085226, acc:0.16394716394716394


  6%|▌         | 2575/43738 [19:27<5:53:41,  1.94it/s]

step:160, train_loss:0.19497997617774002, acc:0.16388349514563108


  7%|▋         | 2880/43738 [21:47<5:09:02,  2.20it/s]

step:180, train_loss:0.19149468771429384, acc:0.171875


  7%|▋         | 2881/43738 [21:47<4:23:53,  2.58it/s]

step:180, train_loss:0.19146203266284423, acc:0.17181534189517528


  7%|▋         | 2882/43738 [21:48<5:44:49,  1.97it/s]

step:180, train_loss:0.1915284182174625, acc:0.1717557251908397


  7%|▋         | 2883/43738 [21:48<5:07:02,  2.22it/s]

step:180, train_loss:0.1915237674292725, acc:0.1716961498439126


  7%|▋         | 2884/43738 [21:49<5:18:35,  2.14it/s]

step:180, train_loss:0.19150574512344729, acc:0.1716366158113731


  7%|▋         | 2885/43738 [21:49<4:47:48,  2.37it/s]

step:180, train_loss:0.19153627044395374, acc:0.17157712305025996


  7%|▋         | 2886/43738 [21:49<4:08:59,  2.73it/s]

step:180, train_loss:0.1914813797111763, acc:0.17186417186417186


  7%|▋         | 2887/43738 [21:50<5:10:01,  2.20it/s]

step:180, train_loss:0.19150442401955958, acc:0.17180464149636301


  7%|▋         | 2888/43738 [21:51<6:19:27,  1.79it/s]

step:180, train_loss:0.19154408591429553, acc:0.17174515235457063


  7%|▋         | 2889/43738 [21:51<6:42:34,  1.69it/s]

step:180, train_loss:0.19157628197483745, acc:0.17168570439598477


  7%|▋         | 2890/43738 [21:52<6:19:53,  1.79it/s]

step:180, train_loss:0.19156388864477059, acc:0.17162629757785466


  7%|▋         | 2891/43738 [21:52<5:21:30,  2.12it/s]

step:180, train_loss:0.19150556204671415, acc:0.17191283292978207


  7%|▋         | 2892/43738 [21:53<5:03:15,  2.24it/s]

step:180, train_loss:0.19145660123275465, acc:0.17219917012448133


  7%|▋         | 2893/43738 [21:53<4:19:04,  2.63it/s]

step:180, train_loss:0.19142942507384295, acc:0.17213964742481852


  7%|▋         | 2894/43738 [21:53<4:18:39,  2.63it/s]

step:180, train_loss:0.19138235855519672, acc:0.17242570836212853


  7%|▋         | 2895/43738 [21:54<4:39:19,  2.44it/s]

step:180, train_loss:0.19133508504249336, acc:0.17271157167530224


  7%|▋         | 3200/43738 [24:10<7:24:06,  1.52it/s]

step:200, train_loss:0.18771828520439157, acc:0.1834375


  7%|▋         | 3201/43738 [24:11<7:53:10,  1.43it/s]

step:200, train_loss:0.1877880112295816, acc:0.18338019368947203


  7%|▋         | 3202/43738 [24:11<6:53:53,  1.63it/s]

step:200, train_loss:0.18780416109083484, acc:0.18332292317301688


  7%|▋         | 3203/43738 [24:12<5:45:24,  1.96it/s]

step:200, train_loss:0.18780431978887485, acc:0.18326568841710897


  7%|▋         | 3204/43738 [24:12<4:51:26,  2.32it/s]

step:200, train_loss:0.18777979107507645, acc:0.18320848938826467


  7%|▋         | 3205/43738 [24:12<5:01:50,  2.24it/s]

step:200, train_loss:0.18779711674836388, acc:0.18315132605304213


  7%|▋         | 3206/43738 [24:13<5:55:05,  1.90it/s]

step:200, train_loss:0.18781815103802213, acc:0.18309419837804117


  7%|▋         | 3207/43738 [24:13<4:55:44,  2.28it/s]

step:200, train_loss:0.1877841881471551, acc:0.18303710632990333


  7%|▋         | 3208/43738 [24:14<4:48:49,  2.34it/s]

step:200, train_loss:0.1877498844267012, acc:0.18298004987531172


  7%|▋         | 3209/43738 [24:14<4:53:32,  2.30it/s]

step:200, train_loss:0.18774252223864085, acc:0.18292302898099097


  7%|▋         | 3210/43738 [24:14<4:29:50,  2.50it/s]

step:200, train_loss:0.1877155786627509, acc:0.18286604361370717


  7%|▋         | 3211/43738 [24:15<4:36:43,  2.44it/s]

step:200, train_loss:0.1877433587001548, acc:0.18280909374026783


  7%|▋         | 3212/43738 [24:15<4:53:19,  2.30it/s]

step:200, train_loss:0.18774554255752793, acc:0.1827521793275218


  7%|▋         | 3213/43738 [24:16<6:08:01,  1.84it/s]

step:200, train_loss:0.1878004231581646, acc:0.18269530034235917


  7%|▋         | 3214/43738 [24:17<6:22:30,  1.77it/s]

step:200, train_loss:0.1878409806738782, acc:0.18263845675171125


  7%|▋         | 3215/43738 [24:17<6:16:46,  1.79it/s]

step:200, train_loss:0.18778689886837283, acc:0.1828926905132193


  8%|▊         | 3520/43738 [26:37<4:20:02,  2.58it/s]

step:220, train_loss:0.18503208982907712, acc:0.190625


  8%|▊         | 3521/43738 [26:37<3:56:18,  2.84it/s]

step:220, train_loss:0.18502722100753605, acc:0.19057086055097983


  8%|▊         | 3522/43738 [26:38<4:37:38,  2.41it/s]

step:220, train_loss:0.1850496803972662, acc:0.19051675184554231


  8%|▊         | 3523/43738 [26:38<4:00:55,  2.78it/s]

step:220, train_loss:0.1850576197730233, acc:0.1904626738575078


  8%|▊         | 3525/43738 [26:38<3:05:17,  3.62it/s]

step:220, train_loss:0.18510144279994098, acc:0.19040862656072643
step:220, train_loss:0.18510937459060117, acc:0.19035460992907802


  8%|▊         | 3526/43738 [26:39<3:42:44,  3.01it/s]

step:220, train_loss:0.18509164166004702, acc:0.19030062393647193


  8%|▊         | 3527/43738 [26:39<3:34:04,  3.13it/s]

step:220, train_loss:0.18504932456423281, acc:0.190530195633683


  8%|▊         | 3528/43738 [26:39<3:37:39,  3.08it/s]

step:220, train_loss:0.18500399227548958, acc:0.19075963718820863


  8%|▊         | 3529/43738 [26:40<4:04:03,  2.75it/s]

step:220, train_loss:0.18501650475919063, acc:0.1907055823179371


  8%|▊         | 3530/43738 [26:40<4:11:05,  2.67it/s]

step:220, train_loss:0.18501091075582277, acc:0.1906515580736544


  8%|▊         | 3531/43738 [26:41<4:13:39,  2.64it/s]

step:220, train_loss:0.18503252236842233, acc:0.19059756442934014


  8%|▊         | 3532/43738 [26:41<3:51:13,  2.90it/s]

step:220, train_loss:0.18503476746385866, acc:0.1905436013590034


  8%|▊         | 3533/43738 [26:41<4:36:26,  2.42it/s]

step:220, train_loss:0.1850423745802682, acc:0.1904896688366827


  8%|▊         | 3534/43738 [26:42<5:10:19,  2.16it/s]

step:220, train_loss:0.18506322070793105, acc:0.19043576683644595


  8%|▊         | 3535/43738 [26:43<5:45:37,  1.94it/s]

step:220, train_loss:0.18509197428992447, acc:0.1903818953323904


  9%|▉         | 3840/43738 [29:02<4:19:00,  2.57it/s]

step:240, train_loss:0.1828141286703006, acc:0.19322916666666667


  9%|▉         | 3841/43738 [29:02<4:16:31,  2.59it/s]

step:240, train_loss:0.1827827367245539, acc:0.19317885967196044


  9%|▉         | 3842/43738 [29:03<4:39:18,  2.38it/s]

step:240, train_loss:0.18274419022345154, acc:0.19338885996876626


  9%|▉         | 3843/43738 [29:03<4:33:43,  2.43it/s]

step:240, train_loss:0.18275328519348916, acc:0.1933385376008327


  9%|▉         | 3844/43738 [29:04<4:44:58,  2.33it/s]

step:240, train_loss:0.18277045584587345, acc:0.19328824141519252


  9%|▉         | 3845/43738 [29:04<4:53:29,  2.27it/s]

step:240, train_loss:0.18279180740907314, acc:0.19323797139141743


  9%|▉         | 3846/43738 [29:05<5:24:15,  2.05it/s]

step:240, train_loss:0.18275832949193882, acc:0.1934477379095164


  9%|▉         | 3847/43738 [29:05<5:35:07,  1.98it/s]

step:240, train_loss:0.18277893672539403, acc:0.1933974525604367


  9%|▉         | 3848/43738 [29:06<4:59:09,  2.22it/s]

step:240, train_loss:0.18278476934965454, acc:0.19334719334719336


  9%|▉         | 3849/43738 [29:06<5:31:34,  2.00it/s]

step:240, train_loss:0.18275836012222135, acc:0.19355676799168614


  9%|▉         | 3850/43738 [29:07<5:27:46,  2.03it/s]

step:240, train_loss:0.18277383250516974, acc:0.19350649350649352


  9%|▉         | 3851/43738 [29:07<6:29:44,  1.71it/s]

step:240, train_loss:0.18276356281034292, acc:0.19345624513113477


  9%|▉         | 3852/43738 [29:08<5:40:09,  1.95it/s]

step:240, train_loss:0.18275432266651093, acc:0.1934060228452752


  9%|▉         | 3853/43738 [29:08<5:12:03,  2.13it/s]

step:240, train_loss:0.18274096627971081, acc:0.1933558266286011


  9%|▉         | 3854/43738 [29:09<5:34:56,  1.98it/s]

step:240, train_loss:0.18272962069389476, acc:0.19330565646081993


  9%|▉         | 3855/43738 [29:09<5:14:34,  2.11it/s]

step:240, train_loss:0.18272682981941266, acc:0.1932555123216602


 10%|▉         | 4160/43738 [31:28<4:19:30,  2.54it/s]

step:260, train_loss:0.18000266832358858, acc:0.19951923076923078


 10%|▉         | 4161/43738 [31:29<4:13:50,  2.60it/s]

step:260, train_loss:0.17997783341094076, acc:0.19947128094208122


 10%|▉         | 4162/43738 [31:29<4:24:08,  2.50it/s]

step:260, train_loss:0.179935315346463, acc:0.199663623258049


 10%|▉         | 4163/43738 [31:30<5:17:00,  2.08it/s]

step:260, train_loss:0.17993639248741847, acc:0.19961566178236848


 10%|▉         | 4164/43738 [31:30<4:46:50,  2.30it/s]

step:260, train_loss:0.17998212707519942, acc:0.19956772334293948


 10%|▉         | 4165/43738 [31:31<4:49:49,  2.28it/s]

step:260, train_loss:0.17994368857970194, acc:0.19975990396158463


 10%|▉         | 4166/43738 [31:31<4:51:41,  2.26it/s]

step:260, train_loss:0.17993722388466513, acc:0.19971195391262603


 10%|▉         | 4167/43738 [31:31<4:22:18,  2.51it/s]

step:260, train_loss:0.1799007295885103, acc:0.19990400767938565


 10%|▉         | 4168/43738 [31:32<5:01:52,  2.18it/s]

step:260, train_loss:0.17987948321544514, acc:0.20009596928982726


 10%|▉         | 4169/43738 [31:32<5:06:28,  2.15it/s]

step:260, train_loss:0.1798749168735296, acc:0.20004797313504438


 10%|▉         | 4170/43738 [31:33<5:31:22,  1.99it/s]

step:260, train_loss:0.17987034420267292, acc:0.2


 10%|▉         | 4171/43738 [31:33<5:38:49,  1.95it/s]

step:260, train_loss:0.17987203431635912, acc:0.19995204986813714


 10%|▉         | 4172/43738 [31:34<5:12:59,  2.11it/s]

step:260, train_loss:0.17984306364578498, acc:0.200143815915628


 10%|▉         | 4173/43738 [31:35<6:19:08,  1.74it/s]

step:260, train_loss:0.17985520738977318, acc:0.2000958543014618


 10%|▉         | 4174/43738 [31:35<6:32:52,  1.68it/s]

step:260, train_loss:0.1798684836758986, acc:0.2000479156684236


 10%|▉         | 4175/43738 [31:36<6:11:17,  1.78it/s]

step:260, train_loss:0.17984833147895415, acc:0.2


 10%|█         | 4480/43738 [33:47<5:42:13,  1.91it/s]

step:280, train_loss:0.17736683868242836, acc:0.20625


 10%|█         | 4481/43738 [33:48<5:56:23,  1.84it/s]

step:280, train_loss:0.17735833564603398, acc:0.20642713679982147


 10%|█         | 4482/43738 [33:48<6:02:31,  1.80it/s]

step:280, train_loss:0.17733014388109022, acc:0.2066041945560018


 10%|█         | 4483/43738 [33:49<6:42:23,  1.63it/s]

step:280, train_loss:0.17732670992458485, acc:0.2065581084095472


 10%|█         | 4484/43738 [33:50<7:12:01,  1.51it/s]

step:280, train_loss:0.17736751811682097, acc:0.2065120428189117


 10%|█         | 4485/43738 [33:50<5:42:37,  1.91it/s]

step:280, train_loss:0.1773742925048128, acc:0.2064659977703456


 10%|█         | 4486/43738 [33:50<4:48:21,  2.27it/s]

step:280, train_loss:0.1773555492845767, acc:0.20641997325011147


 10%|█         | 4487/43738 [33:51<4:24:37,  2.47it/s]

step:280, train_loss:0.17733629229532505, acc:0.2065968353019835


 10%|█         | 4488/43738 [33:51<3:53:54,  2.80it/s]

step:280, train_loss:0.17733665638394303, acc:0.20655080213903743


 10%|█         | 4489/43738 [33:51<4:15:45,  2.56it/s]

step:280, train_loss:0.17735936368372035, acc:0.2065047894854088


 10%|█         | 4490/43738 [33:52<4:03:01,  2.69it/s]

step:280, train_loss:0.1773532790735982, acc:0.20645879732739422


 10%|█         | 4491/43738 [33:52<4:11:20,  2.60it/s]

step:280, train_loss:0.17737863278359653, acc:0.20641282565130262


 10%|█         | 4492/43738 [33:53<4:07:04,  2.65it/s]

step:280, train_loss:0.17734794007853305, acc:0.20658949243098843


 10%|█         | 4493/43738 [33:53<4:26:29,  2.45it/s]

step:280, train_loss:0.1773207192462072, acc:0.20676608056977522


 10%|█         | 4494/43738 [33:53<3:58:45,  2.74it/s]

step:280, train_loss:0.17731522060488278, acc:0.2067200712060525


 10%|█         | 4495/43738 [33:54<4:31:10,  2.41it/s]

step:280, train_loss:0.17729474572310466, acc:0.20667408231368187


 11%|█         | 4800/43738 [36:12<5:55:46,  1.82it/s]

step:300, train_loss:0.17548445793028805, acc:0.21041666666666667


 11%|█         | 4801/43738 [36:12<5:30:36,  1.96it/s]

step:300, train_loss:0.17549125461459317, acc:0.2103728389918767


 11%|█         | 4802/43738 [36:13<5:31:16,  1.96it/s]

step:300, train_loss:0.17550311900559123, acc:0.21032902957101207


 11%|█         | 4803/43738 [36:13<5:12:53,  2.07it/s]

step:300, train_loss:0.17550150560060573, acc:0.21028523839267124


 11%|█         | 4804/43738 [36:14<5:30:15,  1.96it/s]

step:300, train_loss:0.17546780533212541, acc:0.2104496253122398


 11%|█         | 4805/43738 [36:14<5:43:20,  1.89it/s]

step:300, train_loss:0.1754524862062783, acc:0.21040582726326743


 11%|█         | 4806/43738 [36:15<6:15:15,  1.73it/s]

step:300, train_loss:0.17547471277634533, acc:0.2103620474406991


 11%|█         | 4807/43738 [36:15<5:11:54,  2.08it/s]

step:300, train_loss:0.1754620889179383, acc:0.21031828583315998


 11%|█         | 4808/43738 [36:15<4:53:49,  2.21it/s]

step:300, train_loss:0.17547223314542373, acc:0.21027454242928453


 11%|█         | 4809/43738 [36:16<4:57:42,  2.18it/s]

step:300, train_loss:0.1754629507407265, acc:0.21023081721771678


 11%|█         | 4810/43738 [36:16<4:21:02,  2.49it/s]

step:300, train_loss:0.17544902554425662, acc:0.2101871101871102


 11%|█         | 4811/43738 [36:16<3:47:17,  2.85it/s]

step:300, train_loss:0.17542531124507416, acc:0.21014342132612762


 11%|█         | 4812/43738 [36:17<4:06:04,  2.64it/s]

step:300, train_loss:0.17542525477006185, acc:0.21009975062344138


 11%|█         | 4813/43738 [36:17<4:16:14,  2.53it/s]

step:300, train_loss:0.17540267668088788, acc:0.21026386868896738


 11%|█         | 4814/43738 [36:18<5:34:02,  1.94it/s]

step:300, train_loss:0.17540068401312583, acc:0.21022019110926465


 11%|█         | 4815/43738 [36:19<5:57:56,  1.81it/s]

step:300, train_loss:0.17543677386841305, acc:0.21017653167185876


 12%|█▏        | 5120/43738 [38:38<4:35:02,  2.34it/s]

step:320, train_loss:0.17390809290468495, acc:0.2134765625


 12%|█▏        | 5121/43738 [38:38<4:25:23,  2.43it/s]

step:320, train_loss:0.17388647894480144, acc:0.21363015036125757


 12%|█▏        | 5122/43738 [38:38<3:51:29,  2.78it/s]

step:320, train_loss:0.17388085302894435, acc:0.21358844201483795


 12%|█▏        | 5123/43738 [38:39<3:42:47,  2.89it/s]

step:320, train_loss:0.17388649134701814, acc:0.21354674995120046


 12%|█▏        | 5124/43738 [38:39<3:20:09,  3.22it/s]

step:320, train_loss:0.17386137805596483, acc:0.21350507416081185


 12%|█▏        | 5125/43738 [38:39<4:08:37,  2.59it/s]

step:320, train_loss:0.17388376258445404, acc:0.21346341463414634


 12%|█▏        | 5126/43738 [38:40<4:19:58,  2.48it/s]

step:320, train_loss:0.17389056294193725, acc:0.21342177136168552


 12%|█▏        | 5127/43738 [38:40<3:59:55,  2.68it/s]

step:320, train_loss:0.1738764397622042, acc:0.21338014433391847


 12%|█▏        | 5128/43738 [38:41<4:03:35,  2.64it/s]

step:320, train_loss:0.17389201166061213, acc:0.21333853354134166


 12%|█▏        | 5129/43738 [38:41<3:59:18,  2.69it/s]

step:320, train_loss:0.17387154749600076, acc:0.21349190875414312


 12%|█▏        | 5130/43738 [38:42<4:48:50,  2.23it/s]

step:320, train_loss:0.17387141872140202, acc:0.2134502923976608


 12%|█▏        | 5131/43738 [38:42<4:25:01,  2.43it/s]

step:320, train_loss:0.1738722398988318, acc:0.21340869226271683


 12%|█▏        | 5132/43738 [38:42<3:53:57,  2.75it/s]

step:320, train_loss:0.17386379460910326, acc:0.21336710833982853


 12%|█▏        | 5133/43738 [38:43<4:23:37,  2.44it/s]

step:320, train_loss:0.17385765933758274, acc:0.21332554061952075


 12%|█▏        | 5134/43738 [38:43<4:37:03,  2.32it/s]

step:320, train_loss:0.17383709127838237, acc:0.21347876899104012


 12%|█▏        | 5135/43738 [38:43<4:15:57,  2.51it/s]

step:320, train_loss:0.17385047416732316, acc:0.21343719571567674


 12%|█▏        | 5440/43738 [40:58<4:29:40,  2.37it/s]

step:340, train_loss:0.1722201294004181, acc:0.21875


 12%|█▏        | 5441/43738 [40:59<4:10:53,  2.54it/s]

step:340, train_loss:0.17219225971429256, acc:0.2188935857379158


 12%|█▏        | 5442/43738 [40:59<3:46:40,  2.82it/s]

step:340, train_loss:0.17219436815757583, acc:0.21885336273428888


 12%|█▏        | 5443/43738 [40:59<3:42:34,  2.87it/s]

step:340, train_loss:0.1721789998767787, acc:0.2188131545103803


 12%|█▏        | 5444/43738 [41:00<3:22:55,  3.15it/s]

step:340, train_loss:0.1721592818766721, acc:0.21895664952240998


 12%|█▏        | 5445/43738 [41:00<3:48:21,  2.79it/s]

step:340, train_loss:0.17214973114411233, acc:0.21910009182736456


 12%|█▏        | 5446/43738 [41:01<4:27:31,  2.39it/s]

step:340, train_loss:0.17214817039141336, acc:0.21905986044803524


 12%|█▏        | 5447/43738 [41:01<3:51:00,  2.76it/s]

step:340, train_loss:0.17211816596732943, acc:0.21920323113640536


 12%|█▏        | 5448/43738 [41:01<3:43:52,  2.85it/s]

step:340, train_loss:0.1721233098148269, acc:0.21916299559471367


 12%|█▏        | 5449/43738 [41:02<4:05:09,  2.60it/s]

step:340, train_loss:0.17210568225035672, acc:0.21930629473297852


 12%|█▏        | 5450/43738 [41:02<5:22:12,  1.98it/s]

step:340, train_loss:0.172108482354013, acc:0.21926605504587157


 12%|█▏        | 5451/43738 [41:03<5:06:36,  2.08it/s]

step:340, train_loss:0.17210071674483848, acc:0.21922583012291322


 12%|█▏        | 5452/43738 [41:03<4:45:31,  2.23it/s]

step:340, train_loss:0.17211843477323774, acc:0.21918561995597946


 12%|█▏        | 5453/43738 [41:04<5:28:18,  1.94it/s]

step:340, train_loss:0.17213159560934002, acc:0.21914542453695213


 12%|█▏        | 5454/43738 [41:04<5:36:15,  1.90it/s]

step:340, train_loss:0.17211289765784735, acc:0.2192885955262193


 12%|█▏        | 5455/43738 [41:05<5:24:11,  1.97it/s]

step:340, train_loss:0.1721502309341127, acc:0.21924839596700274


 13%|█▎        | 5760/43738 [43:28<5:43:55,  1.84it/s]

step:360, train_loss:0.170740428322986, acc:0.22118055555555555


 13%|█▎        | 5761/43738 [43:28<4:47:22,  2.20it/s]

step:360, train_loss:0.17073532399841718, acc:0.22114216281895505


 13%|█▎        | 5762/43738 [43:29<4:46:04,  2.21it/s]

step:360, train_loss:0.17074333015120247, acc:0.2211037834085387


 13%|█▎        | 5763/43738 [43:29<4:06:52,  2.56it/s]

step:360, train_loss:0.17072323165238765, acc:0.22123893805309736


 13%|█▎        | 5764/43738 [43:30<5:15:32,  2.01it/s]

step:360, train_loss:0.17071230172786034, acc:0.22120055517002082


 13%|█▎        | 5765/43738 [43:30<6:03:42,  1.74it/s]

step:360, train_loss:0.17070852713671522, acc:0.22116218560277537


 13%|█▎        | 5766/43738 [43:31<6:43:24,  1.57it/s]

step:360, train_loss:0.17069816956768363, acc:0.22129725979882067


 13%|█▎        | 5767/43738 [43:32<6:20:09,  1.66it/s]

step:360, train_loss:0.17069363742809207, acc:0.22125888676955088


 13%|█▎        | 5768/43738 [43:32<5:40:02,  1.86it/s]

step:360, train_loss:0.17069833790716307, acc:0.22122052704576978


 13%|█▎        | 5769/43738 [43:33<5:20:06,  1.98it/s]

step:360, train_loss:0.17071984623769332, acc:0.22118218062055817


 13%|█▎        | 5770/43738 [43:33<4:27:20,  2.37it/s]

step:360, train_loss:0.1707029817737655, acc:0.22114384748700172


 13%|█▎        | 5771/43738 [43:33<4:49:31,  2.19it/s]

step:360, train_loss:0.1706931092543568, acc:0.22127880783226478


 13%|█▎        | 5772/43738 [43:34<4:07:10,  2.56it/s]

step:360, train_loss:0.17068099313708762, acc:0.22124047124047125


 13%|█▎        | 5773/43738 [43:34<4:23:47,  2.40it/s]

step:360, train_loss:0.17066795153767755, acc:0.22120214793001905


 13%|█▎        | 5774/43738 [43:34<3:49:03,  2.76it/s]

step:360, train_loss:0.17064297917003962, acc:0.22133702805680638


 13%|█▎        | 5775/43738 [43:35<3:41:02,  2.86it/s]

step:360, train_loss:0.17063270492265525, acc:0.2212987012987013


 14%|█▍        | 6080/43738 [45:58<6:22:42,  1.64it/s]

step:380, train_loss:0.1690884237839782, acc:0.22598684210526315


 14%|█▍        | 6081/43738 [45:59<5:54:05,  1.77it/s]

step:380, train_loss:0.16908504355067375, acc:0.2259496793290577


 14%|█▍        | 6082/43738 [45:59<5:57:16,  1.76it/s]

step:380, train_loss:0.16910953784229946, acc:0.2259125287734298


 14%|█▍        | 6083/43738 [46:00<5:11:55,  2.01it/s]

step:380, train_loss:0.16909917376506947, acc:0.22587539043235247


 14%|█▍        | 6084/43738 [46:00<4:27:11,  2.35it/s]

step:380, train_loss:0.1690719864128894, acc:0.2260026298487837


 14%|█▍        | 6085/43738 [46:00<4:35:15,  2.28it/s]

step:380, train_loss:0.16907013833844972, acc:0.22596548890714874


 14%|█▍        | 6086/43738 [46:01<4:00:04,  2.61it/s]

step:380, train_loss:0.16905916555544542, acc:0.22609267170555372


 14%|█▍        | 6087/43738 [46:01<4:46:17,  2.19it/s]

step:380, train_loss:0.16907161376922522, acc:0.22605552817479876


 14%|█▍        | 6088/43738 [46:02<4:41:23,  2.23it/s]

step:380, train_loss:0.16907892637717373, acc:0.22601839684625494


 14%|█▍        | 6089/43738 [46:02<4:16:48,  2.44it/s]

step:380, train_loss:0.16907474634896502, acc:0.22598127771391033


 14%|█▍        | 6090/43738 [46:02<4:08:41,  2.52it/s]

step:380, train_loss:0.1691036159952962, acc:0.225944170771757


 14%|█▍        | 6091/43738 [46:03<4:03:05,  2.58it/s]

step:380, train_loss:0.16907917750730073, acc:0.22607125266787062


 14%|█▍        | 6092/43738 [46:03<4:16:46,  2.44it/s]

step:380, train_loss:0.16907126875506354, acc:0.22603414313854234


 14%|█▍        | 6093/43738 [46:03<3:46:56,  2.76it/s]

step:380, train_loss:0.16905564936968512, acc:0.2259970457902511


 14%|█▍        | 6094/43738 [46:04<3:47:12,  2.76it/s]

step:380, train_loss:0.16903159688901606, acc:0.2261240564489662


 14%|█▍        | 6095/43738 [46:04<3:54:34,  2.67it/s]

step:380, train_loss:0.16901437957042262, acc:0.22608695652173913


 15%|█▍        | 6400/43738 [48:23<5:17:34,  1.96it/s]

step:400, train_loss:0.16732904359643727, acc:0.23


 15%|█▍        | 6401/43738 [48:24<4:49:09,  2.15it/s]

step:400, train_loss:0.16732332400412067, acc:0.22996406811435713


 15%|█▍        | 6402/43738 [48:24<5:11:58,  1.99it/s]

step:400, train_loss:0.16731192279169618, acc:0.22992814745392065


 15%|█▍        | 6403/43738 [48:25<4:52:41,  2.13it/s]

step:400, train_loss:0.16731334030550388, acc:0.2298922380134312


 15%|█▍        | 6404/43738 [48:25<4:07:00,  2.52it/s]

step:400, train_loss:0.16728760297474907, acc:0.23001249219237976


 15%|█▍        | 6405/43738 [48:25<4:06:49,  2.52it/s]

step:400, train_loss:0.16729949798662533, acc:0.22997658079625294


 15%|█▍        | 6406/43738 [48:26<3:59:44,  2.60it/s]

step:400, train_loss:0.1673024292975153, acc:0.22994068061192632


 15%|█▍        | 6407/43738 [48:26<3:46:54,  2.74it/s]

step:400, train_loss:0.16730013246277636, acc:0.22990479163415015


 15%|█▍        | 6408/43738 [48:27<4:07:49,  2.51it/s]

step:400, train_loss:0.1673083897109522, acc:0.2298689138576779


 15%|█▍        | 6409/43738 [48:27<3:52:59,  2.67it/s]

step:400, train_loss:0.16732071412221977, acc:0.22983304727726633


 15%|█▍        | 6410/43738 [48:27<4:09:37,  2.49it/s]

step:400, train_loss:0.16731847815986362, acc:0.2297971918876755


 15%|█▍        | 6411/43738 [48:28<4:22:50,  2.37it/s]

step:400, train_loss:0.16733139301324823, acc:0.22976134768366868


 15%|█▍        | 6412/43738 [48:28<3:49:14,  2.71it/s]

step:400, train_loss:0.1673100713486272, acc:0.22988147223955085


 15%|█▍        | 6413/43738 [48:28<4:06:09,  2.53it/s]

step:400, train_loss:0.16731582685690602, acc:0.22984562607204118


 15%|█▍        | 6414/43738 [48:29<5:19:31,  1.95it/s]

step:400, train_loss:0.1673171810526212, acc:0.2298097910820081


 15%|█▍        | 6415/43738 [48:30<5:13:01,  1.99it/s]

step:400, train_loss:0.1673132639358347, acc:0.22977396726422447


 15%|█▌        | 6720/43738 [50:54<5:33:50,  1.85it/s]

step:420, train_loss:0.16565019740377293, acc:0.23288690476190477


 15%|█▌        | 6721/43738 [50:54<5:22:19,  1.91it/s]

step:420, train_loss:0.16565142444806336, acc:0.23285225412884988


 15%|█▌        | 6722/43738 [50:54<4:56:33,  2.08it/s]

step:420, train_loss:0.1656373344626479, acc:0.232966379053853


 15%|█▌        | 6723/43738 [50:55<4:56:12,  2.08it/s]

step:420, train_loss:0.16563157263299455, acc:0.23308047002826118


 15%|█▌        | 6724/43738 [50:55<5:09:41,  1.99it/s]

step:420, train_loss:0.16562907556119663, acc:0.23304580606781677


 15%|█▌        | 6725/43738 [50:56<6:14:07,  1.65it/s]

step:420, train_loss:0.16562515354874546, acc:0.23301115241635686


 15%|█▌        | 6726/43738 [50:57<6:02:25,  1.70it/s]

step:420, train_loss:0.16562301044472402, acc:0.23297650906928338


 15%|█▌        | 6727/43738 [50:58<6:31:57,  1.57it/s]

step:420, train_loss:0.16562211323824785, acc:0.2329418760220009


 15%|█▌        | 6728/43738 [50:58<5:33:50,  1.85it/s]

step:420, train_loss:0.1656118682583234, acc:0.23305588585017836


 15%|█▌        | 6729/43738 [50:58<4:58:14,  2.07it/s]

step:420, train_loss:0.16561026867803091, acc:0.2330212513003418


 15%|█▌        | 6730/43738 [50:59<4:43:41,  2.17it/s]

step:420, train_loss:0.16561102631019586, acc:0.23298662704309064


 15%|█▌        | 6731/43738 [50:59<4:36:26,  2.23it/s]

step:420, train_loss:0.16559844696102158, acc:0.233100579408706


 15%|█▌        | 6732/43738 [51:00<4:31:29,  2.27it/s]

step:420, train_loss:0.16559123298249706, acc:0.23306595365418895


 15%|█▌        | 6733/43738 [51:00<4:23:10,  2.34it/s]

step:420, train_loss:0.16559505599782454, acc:0.23303133818505867


 15%|█▌        | 6734/43738 [51:00<3:47:12,  2.71it/s]

step:420, train_loss:0.16557203971360815, acc:0.23314523314523314


 15%|█▌        | 6735/43738 [51:01<4:17:18,  2.40it/s]

step:420, train_loss:0.16555515502202003, acc:0.23325909428359318


 16%|█▌        | 7040/43738 [53:27<6:09:04,  1.66it/s]

step:440, train_loss:0.16449516677421344, acc:0.2346590909090909


 16%|█▌        | 7041/43738 [53:27<5:41:55,  1.79it/s]

step:440, train_loss:0.16449385743646805, acc:0.23462576338588267


 16%|█▌        | 7042/43738 [53:28<5:55:26,  1.72it/s]

step:440, train_loss:0.16449568629286918, acc:0.2345924453280318


 16%|█▌        | 7043/43738 [53:29<5:51:32,  1.74it/s]

step:440, train_loss:0.16448908798201806, acc:0.23455913673150647


 16%|█▌        | 7044/43738 [53:29<6:00:05,  1.70it/s]

step:440, train_loss:0.16451104285910356, acc:0.2345258375922771


 16%|█▌        | 7045/43738 [53:30<6:06:03,  1.67it/s]

step:440, train_loss:0.1645032515459495, acc:0.23449254790631655


 16%|█▌        | 7046/43738 [53:30<5:38:06,  1.81it/s]

step:440, train_loss:0.16451063241437264, acc:0.23445926766959976


 16%|█▌        | 7047/43738 [53:31<5:42:13,  1.79it/s]

step:440, train_loss:0.16450517338410373, acc:0.23442599687810417


 16%|█▌        | 7048/43738 [53:32<6:17:40,  1.62it/s]

step:440, train_loss:0.16452865806382724, acc:0.23439273552780931


 16%|█▌        | 7049/43738 [53:32<5:42:40,  1.78it/s]

step:440, train_loss:0.1645100262411563, acc:0.23450134770889489


 16%|█▌        | 7050/43738 [53:32<5:24:16,  1.89it/s]

step:440, train_loss:0.1645178096017186, acc:0.23446808510638298


 16%|█▌        | 7051/43738 [53:33<5:31:35,  1.84it/s]

step:440, train_loss:0.16450941990706328, acc:0.2344348319387321


 16%|█▌        | 7052/43738 [53:34<5:33:19,  1.83it/s]

step:440, train_loss:0.16450777331969219, acc:0.23440158820192852


 16%|█▌        | 7053/43738 [53:34<4:59:06,  2.04it/s]

step:440, train_loss:0.16450878324407164, acc:0.23436835389196087


 16%|█▌        | 7054/43738 [53:35<6:00:06,  1.70it/s]

step:440, train_loss:0.1644991816963585, acc:0.2344768925432379


 16%|█▌        | 7055/43738 [53:35<6:02:41,  1.69it/s]

step:440, train_loss:0.16451850118129405, acc:0.23444365698086464


 17%|█▋        | 7360/43738 [56:00<4:32:23,  2.23it/s]

step:460, train_loss:0.1626160595752362, acc:0.240625


 17%|█▋        | 7361/43738 [56:01<4:19:38,  2.34it/s]

step:460, train_loss:0.162616829583224, acc:0.24059231082733323


 17%|█▋        | 7362/43738 [56:01<4:42:07,  2.15it/s]

step:460, train_loss:0.16262915341694745, acc:0.24055963053518065


 17%|█▋        | 7363/43738 [56:02<4:43:48,  2.14it/s]

step:460, train_loss:0.16265896372978364, acc:0.24052695911992394


 17%|█▋        | 7364/43738 [56:02<4:33:44,  2.21it/s]

step:460, train_loss:0.16266328986170223, acc:0.24049429657794677


 17%|█▋        | 7365/43738 [56:03<4:24:55,  2.29it/s]

step:460, train_loss:0.16265623250869352, acc:0.24046164290563476


 17%|█▋        | 7366/43738 [56:03<3:50:57,  2.62it/s]

step:460, train_loss:0.16263495873222508, acc:0.24056475699158295


 17%|█▋        | 7367/43738 [56:03<4:16:02,  2.37it/s]

step:460, train_loss:0.16263986591028345, acc:0.24053210261979097


 17%|█▋        | 7368/43738 [56:04<4:04:47,  2.48it/s]

step:460, train_loss:0.16261957652390183, acc:0.24063517915309446


 17%|█▋        | 7369/43738 [56:04<3:53:03,  2.60it/s]

step:460, train_loss:0.16260442038876552, acc:0.24060252408739313


 17%|█▋        | 7370/43738 [56:04<4:15:25,  2.37it/s]

step:460, train_loss:0.16258897034131026, acc:0.2407055630936228


 17%|█▋        | 7371/43738 [56:05<4:16:41,  2.36it/s]

step:460, train_loss:0.1625842387463469, acc:0.240672907339574


 17%|█▋        | 7372/43738 [56:05<4:21:26,  2.32it/s]

step:460, train_loss:0.1625725837227382, acc:0.24077590884427563


 17%|█▋        | 7373/43738 [56:06<4:12:53,  2.40it/s]

step:460, train_loss:0.1625613236765289, acc:0.24074325240743252


 17%|█▋        | 7374/43738 [56:06<4:09:23,  2.43it/s]

step:460, train_loss:0.16254434842128934, acc:0.24084621643612694


 17%|█▋        | 7375/43738 [56:06<3:54:06,  2.59it/s]

step:460, train_loss:0.16252804271488497, acc:0.24094915254237287


 18%|█▊        | 7680/43738 [58:26<4:57:30,  2.02it/s]

step:480, train_loss:0.16097367736920204, acc:0.24596354166666667


 18%|█▊        | 7681/43738 [58:27<4:44:02,  2.12it/s]

step:480, train_loss:0.16096410486476015, acc:0.2460617107147507


 18%|█▊        | 7682/43738 [58:27<4:04:26,  2.46it/s]

step:480, train_loss:0.16094771903965369, acc:0.2461598542046342


 18%|█▊        | 7683/43738 [58:27<4:17:45,  2.33it/s]

step:480, train_loss:0.16095059278844642, acc:0.24612781465573344


 18%|█▊        | 7684/43738 [58:28<4:00:06,  2.50it/s]

step:480, train_loss:0.1609676549371314, acc:0.24609578344612182


 18%|█▊        | 7685/43738 [58:29<5:29:18,  1.82it/s]

step:480, train_loss:0.1609701668900282, acc:0.24606376057254392


 18%|█▊        | 7686/43738 [58:29<5:16:39,  1.90it/s]

step:480, train_loss:0.16095265653611443, acc:0.24616185271922977


 18%|█▊        | 7687/43738 [58:30<5:26:52,  1.84it/s]

step:480, train_loss:0.16099445230643278, acc:0.24612982958241186


 18%|█▊        | 7688/43738 [58:30<5:09:36,  1.94it/s]

step:480, train_loss:0.16099923379831554, acc:0.2460978147762747


 18%|█▊        | 7689/43738 [58:31<5:24:20,  1.85it/s]

step:480, train_loss:0.1609998427213412, acc:0.24606580829756797


 18%|█▊        | 7690/43738 [58:31<4:35:29,  2.18it/s]

step:480, train_loss:0.16097936903442026, acc:0.24616384915474643


 18%|█▊        | 7691/43738 [58:32<4:48:00,  2.09it/s]

step:480, train_loss:0.16097645383839088, acc:0.24613184241321023


 18%|█▊        | 7692/43738 [58:32<5:30:58,  1.82it/s]

step:480, train_loss:0.16098255068578918, acc:0.24609984399375975


 18%|█▊        | 7693/43738 [58:33<4:44:15,  2.11it/s]

step:480, train_loss:0.16096587968754986, acc:0.24619784219420252


 18%|█▊        | 7694/43738 [58:33<4:03:32,  2.47it/s]

step:480, train_loss:0.16094499093447862, acc:0.24629581492071745


 18%|█▊        | 7695/43738 [58:33<4:11:49,  2.39it/s]

step:480, train_loss:0.16094403850433056, acc:0.24626380766731643


 18%|█▊        | 8000/43738 [1:00:52<5:30:19,  1.80it/s]

step:500, train_loss:0.15970846757939217, acc:0.250625


 18%|█▊        | 8001/43738 [1:00:53<4:35:20,  2.16it/s]

step:500, train_loss:0.15971553402844613, acc:0.25059367579052616


 18%|█▊        | 8002/43738 [1:00:53<3:59:30,  2.49it/s]

step:500, train_loss:0.15973123069404868, acc:0.2505623594101475


 18%|█▊        | 8003/43738 [1:00:53<4:06:06,  2.42it/s]

step:500, train_loss:0.15972870710533973, acc:0.250531050855929


 18%|█▊        | 8004/43738 [1:00:54<5:06:36,  1.94it/s]

step:500, train_loss:0.15973325524341514, acc:0.2504997501249375


 18%|█▊        | 8005/43738 [1:00:54<4:18:51,  2.30it/s]

step:500, train_loss:0.15972768070634996, acc:0.25059337913803875


 18%|█▊        | 8006/43738 [1:00:55<3:46:39,  2.63it/s]

step:500, train_loss:0.15971260219751102, acc:0.2506869847614289


 18%|█▊        | 8007/43738 [1:00:55<3:23:07,  2.93it/s]

step:500, train_loss:0.15970256505708888, acc:0.25065567628325214


 18%|█▊        | 8008/43738 [1:00:55<3:05:52,  3.20it/s]

step:500, train_loss:0.1597221669285468, acc:0.2506243756243756


 18%|█▊        | 8009/43738 [1:00:56<3:36:21,  2.75it/s]

step:500, train_loss:0.1597297683828367, acc:0.25059308278187037


 18%|█▊        | 8010/43738 [1:00:56<4:34:35,  2.17it/s]

step:500, train_loss:0.1597329578242102, acc:0.250561797752809


 18%|█▊        | 8011/43738 [1:00:57<4:25:20,  2.24it/s]

step:500, train_loss:0.15972665825211765, acc:0.250655348895269


 18%|█▊        | 8012/43738 [1:00:57<4:26:55,  2.23it/s]

step:500, train_loss:0.15972938941143625, acc:0.25062406390414377


 18%|█▊        | 8013/43738 [1:00:58<4:25:29,  2.24it/s]

step:500, train_loss:0.15973072110834985, acc:0.2505927867215774


 18%|█▊        | 8014/43738 [1:00:58<3:54:02,  2.54it/s]

step:500, train_loss:0.15975385178772233, acc:0.2505615173446469


 18%|█▊        | 8015/43738 [1:00:58<3:41:44,  2.69it/s]

step:500, train_loss:0.15973604610666567, acc:0.25065502183406113


 19%|█▉        | 8320/43738 [1:03:20<3:54:41,  2.52it/s]

step:520, train_loss:0.15809443537118217, acc:0.2561298076923077


 19%|█▉        | 8321/43738 [1:03:20<3:40:07,  2.68it/s]

step:520, train_loss:0.15807881853079644, acc:0.25621920442254537


 19%|█▉        | 8322/43738 [1:03:21<3:58:07,  2.48it/s]

step:520, train_loss:0.15808144140028965, acc:0.2561884162460947


 19%|█▉        | 8323/43738 [1:03:21<3:48:02,  2.59it/s]

step:520, train_loss:0.1580753668085439, acc:0.2561576354679803


 19%|█▉        | 8324/43738 [1:03:22<4:40:01,  2.11it/s]

step:520, train_loss:0.1580705783062648, acc:0.2561268620855358


 19%|█▉        | 8325/43738 [1:03:23<5:36:26,  1.75it/s]

step:520, train_loss:0.1580821037987791, acc:0.25609609609609607


 19%|█▉        | 8326/43738 [1:03:23<4:42:42,  2.09it/s]

step:520, train_loss:0.15807217687838318, acc:0.2561854431900072


 19%|█▉        | 8327/43738 [1:03:23<4:25:52,  2.22it/s]

step:520, train_loss:0.1580628003737558, acc:0.25615467755494176


 19%|█▉        | 8328/43738 [1:03:24<3:50:01,  2.57it/s]

step:520, train_loss:0.15805117977535393, acc:0.2561239193083574


 19%|█▉        | 8329/43738 [1:03:24<4:43:46,  2.08it/s]

step:520, train_loss:0.15803313642973849, acc:0.25621323088005765


 19%|█▉        | 8330/43738 [1:03:25<4:57:32,  1.98it/s]

step:520, train_loss:0.15803414197859234, acc:0.25618247298919566


 19%|█▉        | 8331/43738 [1:03:25<5:04:47,  1.94it/s]

step:520, train_loss:0.15804559603357746, acc:0.25615172248229506


 19%|█▉        | 8332/43738 [1:03:26<4:21:44,  2.25it/s]

step:520, train_loss:0.1580293909878542, acc:0.25624099855976956


 19%|█▉        | 8333/43738 [1:03:26<4:03:11,  2.43it/s]

step:520, train_loss:0.15801045946954026, acc:0.2563302532101284


 19%|█▉        | 8334/43738 [1:03:26<4:13:26,  2.33it/s]

step:520, train_loss:0.15799328973183038, acc:0.2564194864410847


 19%|█▉        | 8335/43738 [1:03:27<4:10:26,  2.36it/s]

step:520, train_loss:0.15799567925201627, acc:0.2563887222555489


 20%|█▉        | 8640/43738 [1:05:52<4:51:42,  2.01it/s]

step:540, train_loss:0.15684651962435425, acc:0.26087962962962963


 20%|█▉        | 8641/43738 [1:05:53<4:30:41,  2.16it/s]

step:540, train_loss:0.156834829070603, acc:0.26096516606874204


 20%|█▉        | 8642/43738 [1:05:54<5:25:00,  1.80it/s]

step:540, train_loss:0.15684262878838182, acc:0.2609349687572321


 20%|█▉        | 8643/43738 [1:05:54<4:32:43,  2.14it/s]

step:540, train_loss:0.15684393271731997, acc:0.2609047784334143


 20%|█▉        | 8644/43738 [1:05:54<4:27:37,  2.19it/s]

step:540, train_loss:0.1568392785486408, acc:0.2608745950948635


 20%|█▉        | 8645/43738 [1:05:55<4:10:11,  2.34it/s]

step:540, train_loss:0.15684243715904503, acc:0.2608444187391556


 20%|█▉        | 8646/43738 [1:05:55<3:54:51,  2.49it/s]

step:540, train_loss:0.15683813256277837, acc:0.2608142493638677


 20%|█▉        | 8647/43738 [1:05:56<4:46:56,  2.04it/s]

step:540, train_loss:0.1568296914893857, acc:0.260899734011796


 20%|█▉        | 8648/43738 [1:05:56<4:30:32,  2.16it/s]

step:540, train_loss:0.15683472340813356, acc:0.2608695652173913


 20%|█▉        | 8649/43738 [1:05:57<5:01:11,  1.94it/s]

step:540, train_loss:0.15683581009349226, acc:0.2608394033992369


 20%|█▉        | 8650/43738 [1:05:57<4:28:43,  2.18it/s]

step:540, train_loss:0.15681971964491276, acc:0.2609248554913295


 20%|█▉        | 8651/43738 [1:05:57<3:56:57,  2.47it/s]

step:540, train_loss:0.15680559707285663, acc:0.2610102878279968


 20%|█▉        | 8652/43738 [1:05:58<3:44:04,  2.61it/s]

step:540, train_loss:0.156788519106413, acc:0.26109570041608876


 20%|█▉        | 8653/43738 [1:05:58<4:42:03,  2.07it/s]

step:540, train_loss:0.15678380289250207, acc:0.26106552640702646


 20%|█▉        | 8654/43738 [1:05:59<4:40:03,  2.09it/s]

step:540, train_loss:0.15678359463283717, acc:0.26103535937138894


 20%|█▉        | 8655/43738 [1:05:59<4:12:33,  2.32it/s]

step:540, train_loss:0.15677525680055204, acc:0.2610051993067591


 20%|██        | 8960/43738 [1:08:17<4:57:53,  1.95it/s]

step:560, train_loss:0.15583955043990563, acc:0.2638392857142857


 20%|██        | 8961/43738 [1:08:17<5:02:18,  1.92it/s]

step:560, train_loss:0.155837170795142, acc:0.26380984265148977


 20%|██        | 8962/43738 [1:08:18<4:18:43,  2.24it/s]

step:560, train_loss:0.15582009704231659, acc:0.2638919883954474


 20%|██        | 8963/43738 [1:08:18<4:11:16,  2.31it/s]

step:560, train_loss:0.15581711174321602, acc:0.2638625460225371


 20%|██        | 8964/43738 [1:08:18<3:59:25,  2.42it/s]

step:560, train_loss:0.15581333617669085, acc:0.2638331102186524


 20%|██        | 8965/43738 [1:08:19<3:50:41,  2.51it/s]

step:560, train_loss:0.15581394064526682, acc:0.26380368098159507


 20%|██        | 8966/43738 [1:08:19<4:25:54,  2.18it/s]

step:560, train_loss:0.15581748712902327, acc:0.26377425830916795


 21%|██        | 8967/43738 [1:08:20<4:01:33,  2.40it/s]

step:560, train_loss:0.15582428464998274, acc:0.26374484219917477


 21%|██        | 8968/43738 [1:08:20<4:10:13,  2.32it/s]

step:560, train_loss:0.15582317378781238, acc:0.26371543264942016


 21%|██        | 8969/43738 [1:08:21<4:04:49,  2.37it/s]

step:560, train_loss:0.15581174736478573, acc:0.2637975248076709


 21%|██        | 8970/43738 [1:08:21<4:23:23,  2.20it/s]

step:560, train_loss:0.15582073050345865, acc:0.263768115942029


 21%|██        | 8971/43738 [1:08:21<3:44:16,  2.58it/s]

step:560, train_loss:0.15580699636311904, acc:0.26385018392598375


 21%|██        | 8972/43738 [1:08:22<4:00:48,  2.41it/s]

step:560, train_loss:0.15580844792999224, acc:0.2638207757467677


 21%|██        | 8973/43738 [1:08:22<4:29:05,  2.15it/s]

step:560, train_loss:0.15583658480317938, acc:0.2637913741223671


 21%|██        | 8974/43738 [1:08:23<4:28:09,  2.16it/s]

step:560, train_loss:0.15584894781844347, acc:0.2637619790505906


 21%|██        | 8975/43738 [1:08:23<4:17:14,  2.25it/s]

step:560, train_loss:0.1558337844618237, acc:0.2638440111420613


 21%|██        | 9280/43738 [1:10:43<5:27:38,  1.75it/s]

step:580, train_loss:0.15490558539203417, acc:0.2658405172413793


 21%|██        | 9281/43738 [1:10:43<5:08:09,  1.86it/s]

step:580, train_loss:0.1548948834059004, acc:0.2659196207305247


 21%|██        | 9282/43738 [1:10:43<4:16:53,  2.24it/s]

step:580, train_loss:0.15487905165392185, acc:0.26599870717517776


 21%|██        | 9283/43738 [1:10:44<3:56:16,  2.43it/s]

step:580, train_loss:0.15486530241571125, acc:0.2660777765808467


 21%|██        | 9284/43738 [1:10:44<3:26:06,  2.79it/s]

step:580, train_loss:0.15485480608243749, acc:0.2661568289530375


 21%|██        | 9285/43738 [1:10:44<3:36:30,  2.65it/s]

step:580, train_loss:0.15485743303900407, acc:0.2661281637049004


 21%|██        | 9286/43738 [1:10:45<3:36:20,  2.65it/s]

step:580, train_loss:0.15485020120602014, acc:0.26609950463062676


 21%|██        | 9287/43738 [1:10:45<4:29:20,  2.13it/s]

step:580, train_loss:0.15484909255434348, acc:0.2661785291267363


 21%|██        | 9288/43738 [1:10:46<3:51:30,  2.48it/s]

step:580, train_loss:0.15484197613374184, acc:0.2661498708010336


 21%|██        | 9289/43738 [1:10:46<3:59:11,  2.40it/s]

step:580, train_loss:0.15485208193937386, acc:0.26612121864571


 21%|██        | 9290/43738 [1:10:47<4:27:16,  2.15it/s]

step:580, train_loss:0.154850016915432, acc:0.2660925726587729


 21%|██        | 9291/43738 [1:10:47<4:10:23,  2.29it/s]

step:580, train_loss:0.1548413750940121, acc:0.2661715638790227


 21%|██        | 9292/43738 [1:10:47<3:40:00,  2.61it/s]

step:580, train_loss:0.15483909831977002, acc:0.26614291863969003


 21%|██        | 9293/43738 [1:10:48<3:37:43,  2.64it/s]

step:580, train_loss:0.15483073721377855, acc:0.26622188744216074


 21%|██        | 9294/43738 [1:10:48<3:35:07,  2.67it/s]

step:580, train_loss:0.15483239233791332, acc:0.26619324295244245


 21%|██▏       | 9295/43738 [1:10:48<3:51:20,  2.48it/s]

step:580, train_loss:0.15481814372259886, acc:0.26627218934911245


 22%|██▏       | 9600/43738 [1:13:08<4:51:01,  1.96it/s]

step:600, train_loss:0.1538524107820831, acc:0.26927083333333335


 22%|██▏       | 9601/43738 [1:13:08<5:08:21,  1.85it/s]

step:600, train_loss:0.15385424736764328, acc:0.26924278720966566


 22%|██▏       | 9602/43738 [1:13:08<4:32:09,  2.09it/s]

step:600, train_loss:0.15386247159542005, acc:0.2692147469277234


 22%|██▏       | 9603/43738 [1:13:09<5:08:11,  1.85it/s]

step:600, train_loss:0.15385304680154555, acc:0.26929084661043423


 22%|██▏       | 9604/43738 [1:13:09<4:31:38,  2.09it/s]

step:600, train_loss:0.15383849264537308, acc:0.26936693044564763


 22%|██▏       | 9605/43738 [1:13:10<4:11:47,  2.26it/s]

step:600, train_loss:0.1538313778986596, acc:0.2694429984383134


 22%|██▏       | 9606/43738 [1:13:10<4:15:13,  2.23it/s]

step:600, train_loss:0.15383080150632794, acc:0.26941494899021445


 22%|██▏       | 9607/43738 [1:13:11<4:44:33,  2.00it/s]

step:600, train_loss:0.15383112451526842, acc:0.2693869053814927


 22%|██▏       | 9608/43738 [1:13:11<4:50:34,  1.96it/s]

step:600, train_loss:0.153821224546973, acc:0.2694629475437136


 22%|██▏       | 9609/43738 [1:13:12<5:29:17,  1.73it/s]

step:600, train_loss:0.15383100387924484, acc:0.2694349047767718


 22%|██▏       | 9610/43738 [1:13:12<4:35:27,  2.06it/s]

step:600, train_loss:0.15381770702820616, acc:0.2695109261186264


 22%|██▏       | 9611/43738 [1:13:13<3:52:41,  2.44it/s]

step:600, train_loss:0.15380292364435508, acc:0.2695869316408282


 22%|██▏       | 9612/43738 [1:13:13<3:54:38,  2.42it/s]

step:600, train_loss:0.15379032282668878, acc:0.2696629213483146


 22%|██▏       | 9613/43738 [1:13:14<4:12:35,  2.25it/s]

step:600, train_loss:0.15379685546466246, acc:0.269634869447623


 22%|██▏       | 9614/43738 [1:13:14<3:51:11,  2.46it/s]

step:600, train_loss:0.1537982312782165, acc:0.2696068233825671


 22%|██▏       | 9615/43738 [1:13:14<3:35:47,  2.64it/s]

step:600, train_loss:0.15380015766037253, acc:0.26957878315132605


 23%|██▎       | 9920/43738 [1:15:35<4:47:24,  1.96it/s]

step:620, train_loss:0.15298989894912843, acc:0.2724798387096774


 23%|██▎       | 9921/43738 [1:15:36<4:28:05,  2.10it/s]

step:620, train_loss:0.1529823369044784, acc:0.2724523737526459


 23%|██▎       | 9922/43738 [1:15:36<3:48:46,  2.46it/s]

step:620, train_loss:0.1529669480398702, acc:0.2725257004636162


 23%|██▎       | 9923/43738 [1:15:37<4:32:55,  2.07it/s]

step:620, train_loss:0.15297135101973397, acc:0.27249823642043736


 23%|██▎       | 9924/43738 [1:15:37<4:17:54,  2.19it/s]

step:620, train_loss:0.15297199939803957, acc:0.2724707779121322


 23%|██▎       | 9925/43738 [1:15:38<3:57:39,  2.37it/s]

step:620, train_loss:0.15296681164156573, acc:0.2724433249370277


 23%|██▎       | 9926/43738 [1:15:38<4:01:07,  2.34it/s]

step:620, train_loss:0.1529674411518857, acc:0.27241587749345153


 23%|██▎       | 9927/43738 [1:15:38<4:08:49,  2.26it/s]

step:620, train_loss:0.15296918576127796, acc:0.272388435579732


 23%|██▎       | 9928/43738 [1:15:39<4:07:33,  2.28it/s]

step:620, train_loss:0.15296553129159465, acc:0.2724617244157937


 23%|██▎       | 9929/43738 [1:15:40<4:53:25,  1.92it/s]

step:620, train_loss:0.15296623372168366, acc:0.2724342834122268


 23%|██▎       | 9930/43738 [1:15:40<4:07:55,  2.27it/s]

step:620, train_loss:0.1529644370930672, acc:0.27240684793554887


 23%|██▎       | 9931/43738 [1:15:41<4:59:58,  1.88it/s]

step:620, train_loss:0.15296445383739637, acc:0.2723794179840902


 23%|██▎       | 9932/43738 [1:15:41<4:45:58,  1.97it/s]

step:620, train_loss:0.1529567731083807, acc:0.27235199355618206


 23%|██▎       | 9933/43738 [1:15:42<5:04:44,  1.85it/s]

step:620, train_loss:0.15294928481615305, acc:0.2724252491694352


 23%|██▎       | 9934/43738 [1:15:42<4:29:28,  2.09it/s]

step:620, train_loss:0.15294809868469159, acc:0.2723978256492853


 23%|██▎       | 9935/43738 [1:15:42<4:24:56,  2.13it/s]

step:620, train_loss:0.15293876263676537, acc:0.2723704076497232


 23%|██▎       | 10240/43738 [1:17:58<4:25:18,  2.10it/s]

step:640, train_loss:0.15188055770532002, acc:0.27578125


 23%|██▎       | 10241/43738 [1:17:59<4:41:30,  1.98it/s]

step:640, train_loss:0.15188395144559488, acc:0.27575432086710283


 23%|██▎       | 10242/43738 [1:17:59<4:12:40,  2.21it/s]

step:640, train_loss:0.1518791057654132, acc:0.2757273969927749


 23%|██▎       | 10243/43738 [1:18:00<4:16:21,  2.18it/s]

step:640, train_loss:0.15186772251226105, acc:0.27579810602362587


 23%|██▎       | 10244/43738 [1:18:00<3:38:35,  2.55it/s]

step:640, train_loss:0.15186483821775926, acc:0.2757711831315892


 23%|██▎       | 10245/43738 [1:18:00<3:33:01,  2.62it/s]

step:640, train_loss:0.15185798938259615, acc:0.27574426549536357


 23%|██▎       | 10246/43738 [1:18:01<3:32:44,  2.62it/s]

step:640, train_loss:0.15185377847579185, acc:0.2757173531134101


 23%|██▎       | 10247/43738 [1:18:01<3:32:20,  2.63it/s]

step:640, train_loss:0.15184630141706953, acc:0.2756904459841905


 23%|██▎       | 10248/43738 [1:18:01<3:28:59,  2.67it/s]

step:640, train_loss:0.15183345812936958, acc:0.27576112412177983


 23%|██▎       | 10249/43738 [1:18:02<4:00:04,  2.32it/s]

step:640, train_loss:0.15183521145636059, acc:0.2757342179724851


 23%|██▎       | 10250/43738 [1:18:02<3:42:55,  2.50it/s]

step:640, train_loss:0.15182071377419037, acc:0.2758048780487805


 23%|██▎       | 10251/43738 [1:18:03<4:15:07,  2.19it/s]

step:640, train_loss:0.15181007188645282, acc:0.27587552433908885


 23%|██▎       | 10252/43738 [1:18:03<4:19:53,  2.15it/s]

step:640, train_loss:0.1518059735526361, acc:0.2758486149044089


 23%|██▎       | 10253/43738 [1:18:03<3:41:01,  2.53it/s]

step:640, train_loss:0.15179404655242282, acc:0.2759192431483468


 23%|██▎       | 10254/43738 [1:18:04<4:10:19,  2.23it/s]

step:640, train_loss:0.1518131759551242, acc:0.2758923346986542


 23%|██▎       | 10255/43738 [1:18:04<3:36:58,  2.57it/s]

step:640, train_loss:0.15180313294151604, acc:0.2759629449049244


 24%|██▍       | 10560/43738 [1:20:27<4:30:17,  2.05it/s]

step:660, train_loss:0.1510106725668541, acc:0.2785984848484849


 24%|██▍       | 10561/43738 [1:20:27<4:46:07,  1.93it/s]

step:660, train_loss:0.15100934058324028, acc:0.27857210491430734


 24%|██▍       | 10562/43738 [1:20:28<4:27:17,  2.07it/s]

step:660, train_loss:0.15099785490934547, acc:0.27864040901344445


 24%|██▍       | 10563/43738 [1:20:28<5:01:57,  1.83it/s]

step:660, train_loss:0.1510029693093511, acc:0.2786140301050838


 24%|██▍       | 10564/43738 [1:20:29<4:41:12,  1.97it/s]

step:660, train_loss:0.1509999126300493, acc:0.2785876561908368


 24%|██▍       | 10565/43738 [1:20:29<3:55:46,  2.34it/s]

step:660, train_loss:0.15098943789108588, acc:0.27865593942262185


 24%|██▍       | 10566/43738 [1:20:29<3:59:18,  2.31it/s]

step:660, train_loss:0.1509972230784098, acc:0.2786295665341662


 24%|██▍       | 10567/43738 [1:20:30<3:24:42,  2.70it/s]

step:660, train_loss:0.1509879615669582, acc:0.27860319863726696


 24%|██▍       | 10568/43738 [1:20:30<3:35:32,  2.56it/s]

step:660, train_loss:0.15097572896490608, acc:0.27867146101438306


 24%|██▍       | 10569/43738 [1:20:30<3:15:37,  2.83it/s]

step:660, train_loss:0.15096538520287703, acc:0.2787397104740278


 24%|██▍       | 10570/43738 [1:20:31<3:12:54,  2.87it/s]

step:660, train_loss:0.15095686210062395, acc:0.27880794701986755


 24%|██▍       | 10571/43738 [1:20:31<3:10:02,  2.91it/s]

step:660, train_loss:0.15095851548316244, acc:0.27878157222590105


 24%|██▍       | 10572/43738 [1:20:31<3:24:16,  2.71it/s]

step:660, train_loss:0.15094839896757897, acc:0.27884979190314035


 24%|██▍       | 10573/43738 [1:20:32<3:32:01,  2.61it/s]

step:660, train_loss:0.150939957300396, acc:0.2789179986758725


 24%|██▍       | 10574/43738 [1:20:32<4:06:33,  2.24it/s]

step:660, train_loss:0.15094097543968554, acc:0.2788916209570645


 24%|██▍       | 10575/43738 [1:20:33<4:06:09,  2.25it/s]

step:660, train_loss:0.15094838134019997, acc:0.27886524822695036


 25%|██▍       | 10880/43738 [1:22:52<3:39:45,  2.49it/s]

step:680, train_loss:0.15005959427833607, acc:0.28161764705882353


 25%|██▍       | 10881/43738 [1:22:52<3:18:48,  2.75it/s]

step:680, train_loss:0.15006481981756498, acc:0.2815917654627332


 25%|██▍       | 10882/43738 [1:22:53<3:15:24,  2.80it/s]

step:680, train_loss:0.15008203820163948, acc:0.2815658886234148


 25%|██▍       | 10883/43738 [1:22:53<2:56:35,  3.10it/s]

step:680, train_loss:0.15006851770417393, acc:0.2816319029679316


 25%|██▍       | 10884/43738 [1:22:54<4:03:29,  2.25it/s]

step:680, train_loss:0.15006307315783968, acc:0.28160602719588385


 25%|██▍       | 10885/43738 [1:22:55<4:57:01,  1.84it/s]

step:680, train_loss:0.15006443346976187, acc:0.2815801561782269


 25%|██▍       | 10886/43738 [1:22:55<4:56:48,  1.84it/s]

step:680, train_loss:0.15007876551000424, acc:0.28155428991365056


 25%|██▍       | 10887/43738 [1:22:56<4:41:36,  1.94it/s]

step:680, train_loss:0.15007673291132345, acc:0.281528428400845


 25%|██▍       | 10888/43738 [1:22:56<5:31:57,  1.65it/s]

step:680, train_loss:0.15007823686787003, acc:0.2815025716385011


 25%|██▍       | 10889/43738 [1:22:57<5:26:06,  1.68it/s]

step:680, train_loss:0.15007212045781096, acc:0.28147671962530996


 25%|██▍       | 10890/43738 [1:22:57<4:44:59,  1.92it/s]

step:680, train_loss:0.15007480364036566, acc:0.28145087235996324


 25%|██▍       | 10891/43738 [1:22:58<4:19:09,  2.11it/s]

step:680, train_loss:0.15007891446097785, acc:0.28142502984115325


 25%|██▍       | 10892/43738 [1:22:58<3:46:26,  2.42it/s]

step:680, train_loss:0.15007319370728886, acc:0.2813991920675725


 25%|██▍       | 10893/43738 [1:22:58<3:46:05,  2.42it/s]

step:680, train_loss:0.15007375015745875, acc:0.2813733590379143


 25%|██▍       | 10894/43738 [1:22:59<4:10:54,  2.18it/s]

step:680, train_loss:0.15006599072603172, acc:0.28143932439875163


 25%|██▍       | 10895/43738 [1:22:59<3:41:15,  2.47it/s]

step:680, train_loss:0.15005225033635056, acc:0.2815052776502983


 26%|██▌       | 11200/43738 [1:25:23<4:50:49,  1.86it/s]

step:700, train_loss:0.14926951783534864, acc:0.284375


 26%|██▌       | 11201/43738 [1:25:24<4:43:58,  1.91it/s]

step:700, train_loss:0.14926212000710828, acc:0.2844388893848763


 26%|██▌       | 11202/43738 [1:25:24<4:49:53,  1.87it/s]

step:700, train_loss:0.14925026659322094, acc:0.2845027673629709


 26%|██▌       | 11203/43738 [1:25:25<4:23:52,  2.05it/s]

step:700, train_loss:0.14923812372057074, acc:0.2845666339373382


 26%|██▌       | 11204/43738 [1:25:25<4:22:12,  2.07it/s]

step:700, train_loss:0.14924084202014967, acc:0.28454123527311675


 26%|██▌       | 11205/43738 [1:25:26<4:34:32,  1.97it/s]

step:700, train_loss:0.14924799127364022, acc:0.2845158411423472


 26%|██▌       | 11206/43738 [1:25:27<5:34:59,  1.62it/s]

step:700, train_loss:0.14925569709260678, acc:0.2844904515438158


 26%|██▌       | 11207/43738 [1:25:27<4:57:01,  1.83it/s]

step:700, train_loss:0.1492454799575596, acc:0.2845542964218792


 26%|██▌       | 11208/43738 [1:25:28<5:20:57,  1.69it/s]

step:700, train_loss:0.14924626714966688, acc:0.2845289079229122


 26%|██▌       | 11209/43738 [1:25:28<5:35:14,  1.62it/s]

step:700, train_loss:0.1492579328572592, acc:0.28450352395396555


 26%|██▌       | 11210/43738 [1:25:29<5:06:51,  1.77it/s]

step:700, train_loss:0.14926416593413797, acc:0.2844781445138269


 26%|██▌       | 11211/43738 [1:25:30<5:25:09,  1.67it/s]

step:700, train_loss:0.14926187441624836, acc:0.28445276960128446


 26%|██▌       | 11212/43738 [1:25:30<5:08:10,  1.76it/s]

step:700, train_loss:0.14926263781391275, acc:0.28442739921512666


 26%|██▌       | 11213/43738 [1:25:30<4:37:59,  1.95it/s]

step:700, train_loss:0.14924988950946247, acc:0.28449121555337553


 26%|██▌       | 11214/43738 [1:25:31<5:20:38,  1.69it/s]

step:700, train_loss:0.14924611308889077, acc:0.28446584626359905


 26%|██▌       | 11215/43738 [1:25:32<5:02:20,  1.79it/s]

step:700, train_loss:0.1492379185582138, acc:0.2845296477931342


 26%|██▋       | 11520/43738 [1:27:53<4:23:30,  2.04it/s]

step:720, train_loss:0.14842724975262628, acc:0.2872395833333333


 26%|██▋       | 11521/43738 [1:27:53<3:55:51,  2.28it/s]

step:720, train_loss:0.14842035980268442, acc:0.2873014495269508


 26%|██▋       | 11522/43738 [1:27:54<4:14:10,  2.11it/s]

step:720, train_loss:0.1484253959350958, acc:0.2872765144940115


 26%|██▋       | 11523/43738 [1:27:54<3:35:23,  2.49it/s]

step:720, train_loss:0.14841465875007975, acc:0.2873383667447713


 26%|██▋       | 11524/43738 [1:27:55<4:08:07,  2.16it/s]

step:720, train_loss:0.14843107579131234, acc:0.2873134328358209


 26%|██▋       | 11525/43738 [1:27:55<4:25:33,  2.02it/s]

step:720, train_loss:0.14842226469800646, acc:0.2873752711496746


 26%|██▋       | 11526/43738 [1:27:55<3:57:06,  2.26it/s]

step:720, train_loss:0.14841349549964614, acc:0.28743709873329865


 26%|██▋       | 11527/43738 [1:27:56<4:53:54,  1.83it/s]

step:720, train_loss:0.14841148850690156, acc:0.28741216274833


 26%|██▋       | 11528/43738 [1:27:57<5:35:45,  1.60it/s]

step:720, train_loss:0.14840427353812147, acc:0.2874739764052741


 26%|██▋       | 11529/43738 [1:27:57<4:58:50,  1.80it/s]

step:720, train_loss:0.14839762453671376, acc:0.28753577933905805


 26%|██▋       | 11530/43738 [1:27:58<4:27:56,  2.00it/s]

step:720, train_loss:0.14838767018559554, acc:0.2875975715524718


 26%|██▋       | 11531/43738 [1:27:58<3:45:18,  2.38it/s]

step:720, train_loss:0.1483803388492082, acc:0.2876593530483046


 26%|██▋       | 11532/43738 [1:27:59<4:13:20,  2.12it/s]

step:720, train_loss:0.14837757028209764, acc:0.28772112382934445


 26%|██▋       | 11533/43738 [1:27:59<5:06:46,  1.75it/s]

step:720, train_loss:0.14838887332879672, acc:0.2876961761900633


 26%|██▋       | 11534/43738 [1:28:00<4:52:07,  1.84it/s]

step:720, train_loss:0.14839060874598767, acc:0.2876712328767123


 26%|██▋       | 11535/43738 [1:28:00<4:18:49,  2.07it/s]

step:720, train_loss:0.14839266805135137, acc:0.2876462938881664


 27%|██▋       | 11840/43738 [1:30:22<4:23:18,  2.02it/s]

step:740, train_loss:0.14764034783147753, acc:0.2900337837837838


 27%|██▋       | 11841/43738 [1:30:22<4:09:48,  2.13it/s]

step:740, train_loss:0.14766726459638435, acc:0.29000928975593276


 27%|██▋       | 11842/43738 [1:30:23<4:24:26,  2.01it/s]

step:740, train_loss:0.14766800832517157, acc:0.2899847998648877


 27%|██▋       | 11843/43738 [1:30:23<3:49:43,  2.31it/s]

step:740, train_loss:0.14766565563113723, acc:0.2899603141096006


 27%|██▋       | 11844/43738 [1:30:23<4:01:37,  2.20it/s]

step:740, train_loss:0.1476582114683295, acc:0.29002026342451875


 27%|██▋       | 11845/43738 [1:30:24<3:28:56,  2.54it/s]

step:740, train_loss:0.14765822271391707, acc:0.2899957788096243


 27%|██▋       | 11846/43738 [1:30:24<3:34:36,  2.48it/s]

step:740, train_loss:0.1476468824351534, acc:0.29005571500928584


 27%|██▋       | 11847/43738 [1:30:24<3:09:34,  2.80it/s]

step:740, train_loss:0.1476397865560854, acc:0.29003123153540983


 27%|██▋       | 11848/43738 [1:30:25<3:38:57,  2.43it/s]

step:740, train_loss:0.14763910216120005, acc:0.2900067521944632


 27%|██▋       | 11849/43738 [1:30:25<3:26:02,  2.58it/s]

step:740, train_loss:0.14763994754784113, acc:0.28998227698539963


 27%|██▋       | 11850/43738 [1:30:26<3:49:10,  2.32it/s]

step:740, train_loss:0.14764284423906365, acc:0.289957805907173


 27%|██▋       | 11851/43738 [1:30:26<4:16:21,  2.07it/s]

step:740, train_loss:0.14766133984104082, acc:0.2899333389587377


 27%|██▋       | 11852/43738 [1:30:27<5:06:19,  1.73it/s]

step:740, train_loss:0.1476628996202479, acc:0.28990887613904825


 27%|██▋       | 11853/43738 [1:30:28<5:31:59,  1.60it/s]

step:740, train_loss:0.14766252810769967, acc:0.28988441744705984


 27%|██▋       | 11854/43738 [1:30:29<6:06:21,  1.45it/s]

step:740, train_loss:0.14766884576068326, acc:0.28985996288172766


 27%|██▋       | 11855/43738 [1:30:29<5:26:14,  1.63it/s]

step:740, train_loss:0.1476687415097623, acc:0.2898355124420076


 28%|██▊       | 12160/43738 [1:32:49<4:28:16,  1.96it/s]

step:760, train_loss:0.14676792326798643, acc:0.2926809210526316


 28%|██▊       | 12161/43738 [1:32:49<4:17:46,  2.04it/s]

step:760, train_loss:0.14675916175941567, acc:0.29273908395691145


 28%|██▊       | 12162/43738 [1:32:50<3:42:43,  2.36it/s]

step:760, train_loss:0.14675004763775215, acc:0.29279723729649726


 28%|██▊       | 12163/43738 [1:32:50<4:36:25,  1.90it/s]

step:760, train_loss:0.14673921306263507, acc:0.29285538107374826


 28%|██▊       | 12164/43738 [1:32:51<4:13:06,  2.08it/s]

step:760, train_loss:0.14673164970853012, acc:0.2928313054916146


 28%|██▊       | 12165/43738 [1:32:51<4:19:16,  2.03it/s]

step:760, train_loss:0.14672298438218, acc:0.29288943690916563


 28%|██▊       | 12166/43738 [1:32:52<4:29:50,  1.95it/s]

step:760, train_loss:0.1467141164841098, acc:0.29294755877034356


 28%|██▊       | 12167/43738 [1:32:52<4:01:43,  2.18it/s]

step:760, train_loss:0.14670312847667405, acc:0.29300567107750475


 28%|██▊       | 12168/43738 [1:32:53<4:13:25,  2.08it/s]

step:760, train_loss:0.14671296829057645, acc:0.2929815910585141


 28%|██▊       | 12169/43738 [1:32:53<3:52:43,  2.26it/s]

step:760, train_loss:0.14670496020813478, acc:0.2930396910181609


 28%|██▊       | 12170/43738 [1:32:53<3:54:13,  2.25it/s]

step:760, train_loss:0.14670575573028977, acc:0.29301561216105176


 28%|██▊       | 12171/43738 [1:32:54<3:46:30,  2.32it/s]

step:760, train_loss:0.14670572251505012, acc:0.29299153726070165


 28%|██▊       | 12172/43738 [1:32:54<3:58:19,  2.21it/s]

step:760, train_loss:0.14669911991803547, acc:0.29304962208347024


 28%|██▊       | 12173/43738 [1:32:55<3:59:29,  2.20it/s]

step:760, train_loss:0.14670350449592245, acc:0.2930255483446973


 28%|██▊       | 12174/43738 [1:32:55<3:28:25,  2.52it/s]

step:760, train_loss:0.14669671754634664, acc:0.29308362083127976


 28%|██▊       | 12175/43738 [1:32:55<3:17:30,  2.66it/s]

step:760, train_loss:0.1466991913128186, acc:0.2930595482546201


 29%|██▊       | 12480/43738 [1:35:16<5:28:56,  1.58it/s]

step:780, train_loss:0.14617056313955357, acc:0.2948717948717949


 29%|██▊       | 12481/43738 [1:35:16<4:26:45,  1.95it/s]

step:780, train_loss:0.14616445128825345, acc:0.29484816921721013


 29%|██▊       | 12482/43738 [1:35:17<4:20:04,  2.00it/s]

step:780, train_loss:0.14615430985032962, acc:0.2949046627143086


 29%|██▊       | 12483/43738 [1:35:17<3:43:14,  2.33it/s]

step:780, train_loss:0.14615598243112274, acc:0.29488103821196826


 29%|██▊       | 12484/43738 [1:35:17<3:12:45,  2.70it/s]

step:780, train_loss:0.14615397289727838, acc:0.2948574174943928


 29%|██▊       | 12485/43738 [1:35:18<2:55:09,  2.97it/s]

step:780, train_loss:0.1461430735667929, acc:0.2949138966760112


 29%|██▊       | 12486/43738 [1:35:18<3:48:03,  2.28it/s]

step:780, train_loss:0.14613813338772408, acc:0.2949703668108281


 29%|██▊       | 12487/43738 [1:35:19<4:00:34,  2.17it/s]

step:780, train_loss:0.14612947212910682, acc:0.29502682790101703


 29%|██▊       | 12488/43738 [1:35:19<3:48:41,  2.28it/s]

step:780, train_loss:0.14612836573474686, acc:0.295003203074952


 29%|██▊       | 12489/43738 [1:35:20<5:15:36,  1.65it/s]

step:780, train_loss:0.14613641865487723, acc:0.2949795820321883


 29%|██▊       | 12490/43738 [1:35:21<5:10:14,  1.68it/s]

step:780, train_loss:0.1461328132049367, acc:0.29495596477181746


 29%|██▊       | 12491/43738 [1:35:21<5:06:27,  1.70it/s]

step:780, train_loss:0.14612581806997826, acc:0.2950124089344328


 29%|██▊       | 12492/43738 [1:35:22<4:34:46,  1.90it/s]

step:780, train_loss:0.1461224994415159, acc:0.29498879282740953


 29%|██▊       | 12493/43738 [1:35:22<4:00:47,  2.16it/s]

step:780, train_loss:0.14611634367921797, acc:0.29504522532618266


 29%|██▊       | 12494/43738 [1:35:23<4:48:04,  1.81it/s]

step:780, train_loss:0.1461136328535855, acc:0.295021610372979


 29%|██▊       | 12495/43738 [1:35:24<5:18:39,  1.63it/s]

step:780, train_loss:0.14611319990548613, acc:0.2949979991996799


 29%|██▉       | 12800/43738 [1:37:43<3:46:32,  2.28it/s]

step:800, train_loss:0.1453224962819172, acc:0.298125


 29%|██▉       | 12801/43738 [1:37:43<3:15:31,  2.64it/s]

step:800, train_loss:0.14531115192856028, acc:0.29817982970080464


 29%|██▉       | 12802/43738 [1:37:44<3:18:06,  2.60it/s]

step:800, train_loss:0.1453030890532526, acc:0.2982346508358069


 29%|██▉       | 12803/43738 [1:37:44<3:16:35,  2.62it/s]

step:800, train_loss:0.14530675596419065, acc:0.2982113567132703


 29%|██▉       | 12804/43738 [1:37:45<3:41:45,  2.32it/s]

step:800, train_loss:0.1453033596645963, acc:0.29818806622930333


 29%|██▉       | 12805/43738 [1:37:45<3:40:34,  2.34it/s]

step:800, train_loss:0.14530013935736688, acc:0.29816477938305347


 29%|██▉       | 12806/43738 [1:37:45<3:13:37,  2.66it/s]

step:800, train_loss:0.14529155005066074, acc:0.29821958456973297


 29%|██▉       | 12807/43738 [1:37:46<3:21:21,  2.56it/s]

step:800, train_loss:0.1452817300526503, acc:0.2982743811977825


 29%|██▉       | 12808/43738 [1:37:46<3:25:37,  2.51it/s]

step:800, train_loss:0.14527054006839166, acc:0.2983291692692068


 29%|██▉       | 12809/43738 [1:37:47<4:24:40,  1.95it/s]

step:800, train_loss:0.14527077843239095, acc:0.2983058786790538


 29%|██▉       | 12810/43738 [1:37:48<4:35:10,  1.87it/s]

step:800, train_loss:0.1452818278342895, acc:0.29828259172521465


 29%|██▉       | 12811/43738 [1:37:48<4:07:44,  2.08it/s]

step:800, train_loss:0.14529003018849992, acc:0.2982593084068379


 29%|██▉       | 12812/43738 [1:37:48<3:58:10,  2.16it/s]

step:800, train_loss:0.145287276273315, acc:0.29823602872307214


 29%|██▉       | 12813/43738 [1:37:49<4:08:58,  2.07it/s]

step:800, train_loss:0.1452867587948598, acc:0.2982127526730664


 29%|██▉       | 12814/43738 [1:37:49<4:02:24,  2.13it/s]

step:800, train_loss:0.14529024069603258, acc:0.29818948025597003


 29%|██▉       | 12815/43738 [1:37:50<3:55:24,  2.19it/s]

step:800, train_loss:0.1452803265534864, acc:0.2982442450253609


 30%|██▉       | 13120/43738 [1:40:10<4:02:35,  2.10it/s]

step:820, train_loss:0.14442769258028465, acc:0.30121951219512194


 30%|██▉       | 13121/43738 [1:40:11<3:38:37,  2.33it/s]

step:820, train_loss:0.14441870391743433, acc:0.30127276884383813


 30%|███       | 13122/43738 [1:40:11<3:29:50,  2.43it/s]

step:820, train_loss:0.1444082581266034, acc:0.3013260173754001


 30%|███       | 13123/43738 [1:40:12<4:02:48,  2.10it/s]

step:820, train_loss:0.14441570333951764, acc:0.3013030557037263


 30%|███       | 13124/43738 [1:40:12<3:51:51,  2.20it/s]

step:820, train_loss:0.14441296421815528, acc:0.30128009753124046


 30%|███       | 13125/43738 [1:40:12<3:21:36,  2.53it/s]

step:820, train_loss:0.14440363865370773, acc:0.30133333333333334


 30%|███       | 13126/43738 [1:40:13<3:11:47,  2.66it/s]

step:820, train_loss:0.14439292169606519, acc:0.301386561023922


 30%|███       | 13127/43738 [1:40:13<2:55:12,  2.91it/s]

step:820, train_loss:0.14438575451911187, acc:0.3013636017368782


 30%|███       | 13128/43738 [1:40:13<2:51:37,  2.97it/s]

step:820, train_loss:0.1443841474513928, acc:0.3013406459475929


 30%|███       | 13129/43738 [1:40:14<2:55:07,  2.91it/s]

step:820, train_loss:0.14437533015844023, acc:0.30139386091857717


 30%|███       | 13130/43738 [1:40:14<3:12:14,  2.65it/s]

step:820, train_loss:0.14436933327021734, acc:0.30144706778370145


 30%|███       | 13131/43738 [1:40:14<3:17:35,  2.58it/s]

step:820, train_loss:0.14436265594770814, acc:0.3015002665448176


 30%|███       | 13132/43738 [1:40:15<3:27:32,  2.46it/s]

step:820, train_loss:0.14436277389082489, acc:0.30147730734084677


 30%|███       | 13133/43738 [1:40:16<4:23:57,  1.93it/s]

step:820, train_loss:0.14435262839470855, acc:0.30153049569786033


 30%|███       | 13134/43738 [1:40:16<3:55:59,  2.16it/s]

step:820, train_loss:0.14434316609845949, acc:0.30158367595553526


 30%|███       | 13135/43738 [1:40:16<3:23:44,  2.50it/s]

step:820, train_loss:0.1443341236335021, acc:0.30163684811572133


 31%|███       | 13440/43738 [1:42:34<3:10:53,  2.65it/s]

step:840, train_loss:0.14371506191580358, acc:0.3034970238095238


 31%|███       | 13441/43738 [1:42:35<3:39:09,  2.30it/s]

step:840, train_loss:0.14370844569098323, acc:0.3034744438657838


 31%|███       | 13442/43738 [1:42:35<3:09:41,  2.66it/s]

step:840, train_loss:0.14370563882465648, acc:0.3034518672816545


 31%|███       | 13443/43738 [1:42:35<2:48:07,  3.00it/s]

step:840, train_loss:0.143713199819752, acc:0.30342929405638625


 31%|███       | 13444/43738 [1:42:36<2:37:55,  3.20it/s]

step:840, train_loss:0.1437029832664373, acc:0.30348110681344836


 31%|███       | 13445/43738 [1:42:36<2:38:58,  3.18it/s]

step:840, train_loss:0.14369505701743368, acc:0.3035329118631461


 31%|███       | 13446/43738 [1:42:37<3:08:09,  2.68it/s]

step:840, train_loss:0.1437023831002597, acc:0.30351033764688384


 31%|███       | 13447/43738 [1:42:37<4:10:28,  2.02it/s]

step:840, train_loss:0.14369354653139513, acc:0.30356213281772887


 31%|███       | 13448/43738 [1:42:38<3:59:25,  2.11it/s]

step:840, train_loss:0.14368716440142854, acc:0.3036139202855443


 31%|███       | 13449/43738 [1:42:38<3:37:10,  2.32it/s]

step:840, train_loss:0.14367802843789787, acc:0.30366570005204846


 31%|███       | 13450/43738 [1:42:39<4:08:38,  2.03it/s]

step:840, train_loss:0.1436724652015179, acc:0.30364312267657995


 31%|███       | 13451/43738 [1:42:40<5:04:29,  1.66it/s]

step:840, train_loss:0.14366433678710783, acc:0.3036948925730429


 31%|███       | 13452/43738 [1:42:40<5:06:27,  1.65it/s]

step:840, train_loss:0.14367518846753485, acc:0.3036723163841808


 31%|███       | 13453/43738 [1:42:41<5:04:08,  1.66it/s]

step:840, train_loss:0.14367406814048028, acc:0.30364974355162416


 31%|███       | 13454/43738 [1:42:41<4:53:57,  1.72it/s]

step:840, train_loss:0.1436706510380719, acc:0.30362717407462464


 31%|███       | 13455/43738 [1:42:42<4:55:25,  1.71it/s]

step:840, train_loss:0.14368057461318628, acc:0.30360460795243405


 31%|███▏      | 13760/43738 [1:45:00<3:50:59,  2.16it/s]

step:860, train_loss:0.1429575532906626, acc:0.30603197674418603


 31%|███▏      | 13761/43738 [1:45:00<3:22:27,  2.47it/s]

step:860, train_loss:0.14296419781816816, acc:0.3060097376644139


 31%|███▏      | 13762/43738 [1:45:01<3:54:39,  2.13it/s]

step:860, train_loss:0.1429721363896966, acc:0.30598750181659645


 31%|███▏      | 13763/43738 [1:45:01<3:27:40,  2.41it/s]

step:860, train_loss:0.14297971416917715, acc:0.3059652692000291


 31%|███▏      | 13764/43738 [1:45:02<3:37:28,  2.30it/s]

step:860, train_loss:0.14297570657723196, acc:0.3060156931124673


 31%|███▏      | 13765/43738 [1:45:02<3:17:02,  2.54it/s]

step:860, train_loss:0.1429689163873599, acc:0.30599346167816927


 31%|███▏      | 13766/43738 [1:45:03<3:44:50,  2.22it/s]

step:860, train_loss:0.14296220402386683, acc:0.30604387621676593


 31%|███▏      | 13767/43738 [1:45:03<3:28:25,  2.40it/s]

step:860, train_loss:0.14296114616902741, acc:0.30602164596498876


 31%|███▏      | 13768/43738 [1:45:03<3:22:14,  2.47it/s]

step:860, train_loss:0.14296155440638741, acc:0.3059994189424753


 31%|███▏      | 13769/43738 [1:45:04<3:48:26,  2.19it/s]

step:860, train_loss:0.14295527050639797, acc:0.30604982206405695


 31%|███▏      | 13770/43738 [1:45:04<3:53:33,  2.14it/s]

step:860, train_loss:0.14295631465488767, acc:0.30602759622367465


 31%|███▏      | 13771/43738 [1:45:05<3:51:04,  2.16it/s]

step:860, train_loss:0.1429465668219909, acc:0.30607798997894126


 31%|███▏      | 13772/43738 [1:45:05<3:23:10,  2.46it/s]

step:860, train_loss:0.14293684232526466, acc:0.30612837641591634


 31%|███▏      | 13773/43738 [1:45:05<3:20:06,  2.50it/s]

step:860, train_loss:0.14294088228278554, acc:0.306106149713207


 31%|███▏      | 13774/43738 [1:45:06<3:38:33,  2.28it/s]

step:860, train_loss:0.1429386464051674, acc:0.3060839262378394


 31%|███▏      | 13775/43738 [1:45:06<3:42:16,  2.25it/s]

step:860, train_loss:0.14293111403079106, acc:0.3061343012704174


 32%|███▏      | 14080/43738 [1:47:34<3:58:26,  2.07it/s]

step:880, train_loss:0.142366975186023, acc:0.3080965909090909


 32%|███▏      | 14081/43738 [1:47:34<4:33:23,  1.81it/s]

step:880, train_loss:0.1423684120501736, acc:0.3080747106029401


 32%|███▏      | 14083/43738 [1:47:35<3:24:37,  2.42it/s]

step:880, train_loss:0.1423728382416969, acc:0.30805283340434597
step:880, train_loss:0.14236585136841381, acc:0.30803095931264646


 32%|███▏      | 14084/43738 [1:47:36<4:15:51,  1.93it/s]

step:880, train_loss:0.14236783681530024, acc:0.30800908832717977


 32%|███▏      | 14085/43738 [1:47:36<4:29:14,  1.84it/s]

step:880, train_loss:0.14237272722956248, acc:0.3079872204472843


 32%|███▏      | 14086/43738 [1:47:37<4:13:25,  1.95it/s]

step:880, train_loss:0.14237530944022436, acc:0.3079653556722987


 32%|███▏      | 14087/43738 [1:47:37<3:54:57,  2.10it/s]

step:880, train_loss:0.1423701573150532, acc:0.30801448143678567


 32%|███▏      | 14088/43738 [1:47:38<3:25:13,  2.41it/s]

step:880, train_loss:0.14236745515053512, acc:0.30799261783077797


 32%|███▏      | 14089/43738 [1:47:38<3:02:50,  2.70it/s]

step:880, train_loss:0.14236284835201893, acc:0.3079707573284122


 32%|███▏      | 14090/43738 [1:47:38<2:48:38,  2.93it/s]

step:880, train_loss:0.14235438659197128, acc:0.3080198722498226


 32%|███▏      | 14091/43738 [1:47:39<3:04:54,  2.67it/s]

step:880, train_loss:0.1423534279504218, acc:0.3079980129160457


 32%|███▏      | 14092/43738 [1:47:39<3:16:00,  2.52it/s]

step:880, train_loss:0.14234371694621792, acc:0.3080471189327278


 32%|███▏      | 14093/43738 [1:47:39<3:05:27,  2.66it/s]

step:880, train_loss:0.14234296878152908, acc:0.30802526076775705


 32%|███▏      | 14094/43738 [1:47:40<3:31:26,  2.34it/s]

step:880, train_loss:0.14233347734984925, acc:0.308074357882787


 32%|███▏      | 14095/43738 [1:47:40<3:37:51,  2.27it/s]

step:880, train_loss:0.14233998338397735, acc:0.30805250088683933


 33%|███▎      | 14400/43738 [1:49:52<4:10:45,  1.95it/s]

step:900, train_loss:0.1417176322983404, acc:0.3096527777777778


 33%|███▎      | 14401/43738 [1:49:52<3:59:54,  2.04it/s]

step:900, train_loss:0.14171339840431874, acc:0.30970071522810916


 33%|███▎      | 14402/43738 [1:49:52<3:20:35,  2.44it/s]

step:900, train_loss:0.1417037611572813, acc:0.3097486460213859


 33%|███▎      | 14403/43738 [1:49:53<3:38:35,  2.24it/s]

step:900, train_loss:0.14170263290211102, acc:0.30972714017912933


 33%|███▎      | 14404/43738 [1:49:53<3:45:51,  2.16it/s]

step:900, train_loss:0.1417059238255057, acc:0.30970563732296585


 33%|███▎      | 14405/43738 [1:49:54<3:34:37,  2.28it/s]

step:900, train_loss:0.14169815098136446, acc:0.3097535577924332


 33%|███▎      | 14406/43738 [1:49:54<3:38:22,  2.24it/s]

step:900, train_loss:0.14169005373532215, acc:0.3098014716090518


 33%|███▎      | 14407/43738 [1:49:54<3:03:20,  2.67it/s]

step:900, train_loss:0.14168128844447356, acc:0.309849378774207


 33%|███▎      | 14408/43738 [1:49:55<3:06:54,  2.62it/s]

step:900, train_loss:0.14167369328980986, acc:0.30989727928928373


 33%|███▎      | 14409/43738 [1:49:55<2:44:49,  2.97it/s]

step:900, train_loss:0.14166809103996547, acc:0.30987577208689016


 33%|███▎      | 14410/43738 [1:49:56<3:18:53,  2.46it/s]

step:900, train_loss:0.14166886507653906, acc:0.30985426786953507


 33%|███▎      | 14411/43738 [1:49:56<2:53:53,  2.81it/s]

step:900, train_loss:0.1416622951817638, acc:0.30983276663659703


 33%|███▎      | 14412/43738 [1:49:56<2:56:49,  2.76it/s]

step:900, train_loss:0.14165708742371816, acc:0.3098806550097141


 33%|███▎      | 14413/43738 [1:49:57<3:06:58,  2.61it/s]

step:900, train_loss:0.14165082242034932, acc:0.3099285367376674


 33%|███▎      | 14414/43738 [1:49:57<3:51:18,  2.11it/s]

step:900, train_loss:0.14164817193675092, acc:0.3099070348272513


 33%|███▎      | 14415/43738 [1:49:58<4:15:00,  1.92it/s]

step:900, train_loss:0.1416429094473089, acc:0.30988553590010404


 34%|███▎      | 14720/43738 [1:52:12<4:06:37,  1.96it/s]

step:920, train_loss:0.1411507977026284, acc:0.31161684782608695


 34%|███▎      | 14721/43738 [1:52:12<4:12:49,  1.91it/s]

step:920, train_loss:0.14115759743349673, acc:0.3115956796413287


 34%|███▎      | 14722/43738 [1:52:13<4:03:16,  1.99it/s]

step:920, train_loss:0.14116039857243354, acc:0.31157451433229183


 34%|███▎      | 14723/43738 [1:52:13<4:05:42,  1.97it/s]

step:920, train_loss:0.1411581462426487, acc:0.3115533518983903


 34%|███▎      | 14724/43738 [1:52:14<3:44:44,  2.15it/s]

step:920, train_loss:0.14115375820555762, acc:0.3116001086661233


 34%|███▎      | 14725/43738 [1:52:14<4:03:08,  1.99it/s]

step:920, train_loss:0.14115761145623837, acc:0.31157894736842107


 34%|███▎      | 14726/43738 [1:52:15<3:30:04,  2.30it/s]

step:920, train_loss:0.14115439952344014, acc:0.31155778894472363


 34%|███▎      | 14727/43738 [1:52:15<3:30:14,  2.30it/s]

step:920, train_loss:0.1411560827744734, acc:0.3115366333944456


 34%|███▎      | 14728/43738 [1:52:16<3:27:52,  2.33it/s]

step:920, train_loss:0.14114944557553924, acc:0.3115833785985877


 34%|███▎      | 14729/43738 [1:52:16<2:59:37,  2.69it/s]

step:920, train_loss:0.14114522945800076, acc:0.3115622241835834


 34%|███▎      | 14730/43738 [1:52:16<3:24:18,  2.37it/s]

step:920, train_loss:0.14113970489850883, acc:0.31160896130346233


 34%|███▎      | 14731/43738 [1:52:17<3:21:21,  2.40it/s]

step:920, train_loss:0.14113753890456018, acc:0.3115878080238952


 34%|███▎      | 14732/43738 [1:52:17<3:40:33,  2.19it/s]

step:920, train_loss:0.1411317366815705, acc:0.3116345370621776


 34%|███▎      | 14733/43738 [1:52:18<3:19:35,  2.42it/s]

step:920, train_loss:0.1411277820723606, acc:0.31161338491821083


 34%|███▎      | 14734/43738 [1:52:18<3:15:35,  2.47it/s]

step:920, train_loss:0.14112775851698273, acc:0.3115922356454459


 34%|███▎      | 14735/43738 [1:52:18<3:14:56,  2.48it/s]

step:920, train_loss:0.1411186542394545, acc:0.31163895486935866


 34%|███▍      | 15040/43738 [1:54:42<3:56:19,  2.02it/s]

step:940, train_loss:0.1403662588624142, acc:0.31449468085106386


 34%|███▍      | 15041/43738 [1:54:43<3:41:27,  2.16it/s]

step:940, train_loss:0.14036601485609662, acc:0.31447377169071206


 34%|███▍      | 15042/43738 [1:54:43<3:28:48,  2.29it/s]

step:940, train_loss:0.14035809791353368, acc:0.3145193458316713


 34%|███▍      | 15043/43738 [1:54:43<3:11:02,  2.50it/s]

step:940, train_loss:0.14034910924989313, acc:0.3145649139134481


 34%|███▍      | 15044/43738 [1:54:44<3:17:07,  2.43it/s]

step:940, train_loss:0.14034161403684653, acc:0.3146104759372507


 34%|███▍      | 15045/43738 [1:54:44<3:35:42,  2.22it/s]

step:940, train_loss:0.14033632177303235, acc:0.31458956463941506


 34%|███▍      | 15046/43738 [1:54:45<3:08:40,  2.53it/s]

step:940, train_loss:0.14033241298469676, acc:0.3146351189684966


 34%|███▍      | 15047/43738 [1:54:45<4:00:53,  1.99it/s]

step:940, train_loss:0.1403243335479454, acc:0.3146806672426397


 34%|███▍      | 15048/43738 [1:54:46<3:23:04,  2.35it/s]

step:940, train_loss:0.14031628889582354, acc:0.3147262094630516


 34%|███▍      | 15049/43738 [1:54:46<3:22:33,  2.36it/s]

step:940, train_loss:0.14032860297348607, acc:0.314705296032959


 34%|███▍      | 15050/43738 [1:54:46<2:54:18,  2.74it/s]

step:940, train_loss:0.14031927936927197, acc:0.31475083056478403


 34%|███▍      | 15051/43738 [1:54:47<2:53:10,  2.76it/s]

step:940, train_loss:0.14032469577510204, acc:0.3147299182778553


 34%|███▍      | 15052/43738 [1:54:47<3:17:28,  2.42it/s]

step:940, train_loss:0.1403227706435543, acc:0.31477544512357164


 34%|███▍      | 15053/43738 [1:54:48<3:46:45,  2.11it/s]

step:940, train_loss:0.14032584139370877, acc:0.3147545339799376


 34%|███▍      | 15054/43738 [1:54:48<3:36:52,  2.20it/s]

step:940, train_loss:0.1403198841781023, acc:0.31480005314202203


 34%|███▍      | 15055/43738 [1:54:49<4:24:57,  1.80it/s]

step:940, train_loss:0.14032417939943884, acc:0.3147791431418134


 35%|███▌      | 15360/43738 [1:57:08<3:48:53,  2.07it/s]

step:960, train_loss:0.13990356934831236, acc:0.31640625


 35%|███▌      | 15361/43738 [1:57:09<3:49:21,  2.06it/s]

step:960, train_loss:0.1399029682724598, acc:0.31638565197578283


 35%|███▌      | 15362/43738 [1:57:09<4:01:49,  1.96it/s]

step:960, train_loss:0.13990647590194583, acc:0.3163650566332509


 35%|███▌      | 15363/43738 [1:57:10<4:32:08,  1.74it/s]

step:960, train_loss:0.13990516547157614, acc:0.3163444639718805


 35%|███▌      | 15364/43738 [1:57:11<4:31:43,  1.74it/s]

step:960, train_loss:0.1399022762265002, acc:0.31638896120801874


 35%|███▌      | 15365/43738 [1:57:11<4:48:17,  1.64it/s]

step:960, train_loss:0.1398948589998434, acc:0.31643345265213146


 35%|███▌      | 15366/43738 [1:57:12<4:06:53,  1.92it/s]

step:960, train_loss:0.1398894626294931, acc:0.3164779383053495


 35%|███▌      | 15367/43738 [1:57:12<3:23:17,  2.33it/s]

step:960, train_loss:0.13988072991399464, acc:0.3165224181688033


 35%|███▌      | 15368/43738 [1:57:12<3:10:16,  2.49it/s]

step:960, train_loss:0.13987370162939708, acc:0.3165668922436231


 35%|███▌      | 15369/43738 [1:57:12<3:04:29,  2.56it/s]

step:960, train_loss:0.1398665946571089, acc:0.3166113605309389


 35%|███▌      | 15370/43738 [1:57:13<3:15:49,  2.41it/s]

step:960, train_loss:0.13987143628745394, acc:0.316590761223162


 35%|███▌      | 15371/43738 [1:57:13<3:08:14,  2.51it/s]

step:960, train_loss:0.13987693159334844, acc:0.31657016459566717


 35%|███▌      | 15372/43738 [1:57:14<3:32:33,  2.22it/s]

step:960, train_loss:0.13988303095698662, acc:0.3165495706479313


 35%|███▌      | 15373/43738 [1:57:14<3:02:39,  2.59it/s]

step:960, train_loss:0.13987584228777877, acc:0.3165940284915111


 35%|███▌      | 15374/43738 [1:57:14<2:40:22,  2.95it/s]

step:960, train_loss:0.1398695631482669, acc:0.3166384805515806


 35%|███▌      | 15375/43738 [1:57:15<3:03:14,  2.58it/s]

step:960, train_loss:0.13987159948378264, acc:0.3166178861788618


 36%|███▌      | 15680/43738 [1:59:35<3:12:44,  2.43it/s]

step:980, train_loss:0.13940450243440605, acc:0.317984693877551


 36%|███▌      | 15681/43738 [1:59:36<3:08:50,  2.48it/s]

step:980, train_loss:0.1394143357926909, acc:0.31796441553472354


 36%|███▌      | 15682/43738 [1:59:36<2:57:06,  2.64it/s]

step:980, train_loss:0.1394127032649137, acc:0.31794413977808955


 36%|███▌      | 15683/43738 [1:59:36<2:37:04,  2.98it/s]

step:980, train_loss:0.13940683112219887, acc:0.3179876299177453


 36%|███▌      | 15684/43738 [1:59:37<2:37:33,  2.97it/s]

step:980, train_loss:0.13940274206070366, acc:0.3180311145116042


 36%|███▌      | 15685/43738 [1:59:37<3:28:31,  2.24it/s]

step:980, train_loss:0.13939504182304224, acc:0.3180745935607268


 36%|███▌      | 15686/43738 [1:59:38<3:00:40,  2.59it/s]

step:980, train_loss:0.1393904467333608, acc:0.31805431595052913


 36%|███▌      | 15687/43738 [1:59:38<2:51:42,  2.72it/s]

step:980, train_loss:0.13939212022625252, acc:0.31803404092560716


 36%|███▌      | 15688/43738 [1:59:38<2:35:16,  3.01it/s]

step:980, train_loss:0.1393898362615618, acc:0.3180137684854666


 36%|███▌      | 15689/43738 [1:59:39<2:44:45,  2.84it/s]

step:980, train_loss:0.13938261340871383, acc:0.31805723755497484


 36%|███▌      | 15690/43738 [1:59:39<3:16:20,  2.38it/s]

step:980, train_loss:0.13938171866963225, acc:0.31803696622052263


 36%|███▌      | 15691/43738 [1:59:40<3:03:19,  2.55it/s]

step:980, train_loss:0.13938311454613755, acc:0.3180166974698872


 36%|███▌      | 15692/43738 [1:59:40<2:40:56,  2.90it/s]

step:980, train_loss:0.13937425934950756, acc:0.3180601580423146


 36%|███▌      | 15693/43738 [1:59:40<3:29:35,  2.23it/s]

step:980, train_loss:0.13938037788971921, acc:0.31803989039699226


 36%|███▌      | 15694/43738 [1:59:41<3:16:32,  2.38it/s]

step:980, train_loss:0.13938027640456613, acc:0.31801962533452277


 36%|███▌      | 15695/43738 [1:59:41<3:15:45,  2.39it/s]

step:980, train_loss:0.13937681739311583, acc:0.3179993628544122


 37%|███▋      | 16000/43738 [2:01:55<2:59:29,  2.58it/s]

step:1000, train_loss:0.1389108849718923, acc:0.32


 37%|███▋      | 16001/43738 [2:01:56<3:53:56,  1.98it/s]

step:1000, train_loss:0.1389051268146462, acc:0.320042497343916


 37%|███▋      | 16002/43738 [2:01:56<3:51:11,  2.00it/s]

step:1000, train_loss:0.13889819836389844, acc:0.32008498937632796


 37%|███▋      | 16003/43738 [2:01:57<3:15:33,  2.36it/s]

step:1000, train_loss:0.13889146708133487, acc:0.32012747609823156


 37%|███▋      | 16004/43738 [2:01:57<3:23:08,  2.28it/s]

step:1000, train_loss:0.1388991134602708, acc:0.32010747313171706


 37%|███▋      | 16005/43738 [2:01:58<4:10:00,  1.85it/s]

step:1000, train_loss:0.13889754977755625, acc:0.32014995313964384


 37%|███▋      | 16006/43738 [2:01:59<4:21:51,  1.77it/s]

step:1000, train_loss:0.1388980716786915, acc:0.32012995126827437


 37%|███▋      | 16007/43738 [2:01:59<3:56:39,  1.95it/s]

step:1000, train_loss:0.13888996277359747, acc:0.32017242456425316


 37%|███▋      | 16008/43738 [2:02:00<3:56:17,  1.96it/s]

step:1000, train_loss:0.13888765119501503, acc:0.32021489255372315


 37%|███▋      | 16009/43738 [2:02:00<3:35:55,  2.14it/s]

step:1000, train_loss:0.13888742594957312, acc:0.32019489037416454


 37%|███▋      | 16010/43738 [2:02:01<4:19:07,  1.78it/s]

step:1000, train_loss:0.13889507056974104, acc:0.32017489069331667


 37%|███▋      | 16011/43738 [2:02:01<3:35:34,  2.14it/s]

step:1000, train_loss:0.13889626154172122, acc:0.3201548935107114


 37%|███▋      | 16012/43738 [2:02:01<3:05:28,  2.49it/s]

step:1000, train_loss:0.13889457417569076, acc:0.3201348988258806


 37%|███▋      | 16013/43738 [2:02:02<3:13:09,  2.39it/s]

step:1000, train_loss:0.13889540549252866, acc:0.32011490663835634


 37%|███▋      | 16014/43738 [2:02:02<3:44:51,  2.05it/s]

step:1000, train_loss:0.13889570743516788, acc:0.3200949169476708


 37%|███▋      | 16015/43738 [2:02:03<3:21:11,  2.30it/s]

step:1000, train_loss:0.13889210783275358, acc:0.3200749297533562


 37%|███▋      | 16320/43738 [2:04:21<3:00:14,  2.54it/s]

step:1020, train_loss:0.13843092449543454, acc:0.32138480392156865


 37%|███▋      | 16321/43738 [2:04:22<2:55:00,  2.61it/s]

step:1020, train_loss:0.13842282019102153, acc:0.3214263831873047


 37%|███▋      | 16322/43738 [2:04:22<3:01:04,  2.52it/s]

step:1020, train_loss:0.13842902870002136, acc:0.32140669035657393


 37%|███▋      | 16323/43738 [2:04:22<2:41:25,  2.83it/s]

step:1020, train_loss:0.1384311190264231, acc:0.32138699993873676


 37%|███▋      | 16324/43738 [2:04:23<3:41:00,  2.07it/s]

step:1020, train_loss:0.13843513636346969, acc:0.32136731193334966


 37%|███▋      | 16325/43738 [2:04:24<4:09:48,  1.83it/s]

step:1020, train_loss:0.13843906062672443, acc:0.3213476263399694


 37%|███▋      | 16326/43738 [2:04:24<3:55:09,  1.94it/s]

step:1020, train_loss:0.13844552073911917, acc:0.32132794315815266


 37%|███▋      | 16327/43738 [2:04:25<4:26:05,  1.72it/s]

step:1020, train_loss:0.13844406711918747, acc:0.32130826238745636


 37%|███▋      | 16328/43738 [2:04:25<4:15:34,  1.79it/s]

step:1020, train_loss:0.13843759418994864, acc:0.3213498285154336


 37%|███▋      | 16329/43738 [2:04:26<3:46:51,  2.01it/s]

step:1020, train_loss:0.13842966781462598, acc:0.3213913895523302


 37%|███▋      | 16330/43738 [2:04:26<3:27:12,  2.20it/s]

step:1020, train_loss:0.13842577749351018, acc:0.3213717085119412


 37%|███▋      | 16331/43738 [2:04:26<3:13:08,  2.37it/s]

step:1020, train_loss:0.13841860040440054, acc:0.32141326311922114


 37%|███▋      | 16332/43738 [2:04:27<3:03:20,  2.49it/s]

step:1020, train_loss:0.13841792412048284, acc:0.3213935831496449


 37%|███▋      | 16333/43738 [2:04:27<2:39:35,  2.86it/s]

step:1020, train_loss:0.13841087480116548, acc:0.3214351313292108


 37%|███▋      | 16334/43738 [2:04:27<2:35:02,  2.95it/s]

step:1020, train_loss:0.13841404571008864, acc:0.32141545243051306


 37%|███▋      | 16335/43738 [2:04:28<3:36:56,  2.11it/s]

step:1020, train_loss:0.1384129006072827, acc:0.3213957759412305


 38%|███▊      | 16640/43738 [2:06:47<2:54:44,  2.58it/s]

step:1040, train_loss:0.13785297461583224, acc:0.3239182692307692


 38%|███▊      | 16641/43738 [2:06:47<3:16:46,  2.30it/s]

step:1040, train_loss:0.13784826779244347, acc:0.32395889670091943


 38%|███▊      | 16642/43738 [2:06:48<3:53:46,  1.93it/s]

step:1040, train_loss:0.13785174402807543, acc:0.32393943035692824


 38%|███▊      | 16643/43738 [2:06:48<3:51:01,  1.95it/s]

step:1040, train_loss:0.13784548666542185, acc:0.3239800516733762


 38%|███▊      | 16644/43738 [2:06:49<3:17:00,  2.29it/s]

step:1040, train_loss:0.13783800071379998, acc:0.32402066810862773


 38%|███▊      | 16645/43738 [2:06:49<3:00:52,  2.50it/s]

step:1040, train_loss:0.1378311161377246, acc:0.32406127966356263


 38%|███▊      | 16646/43738 [2:06:49<2:49:18,  2.67it/s]

step:1040, train_loss:0.13782374814673942, acc:0.32410188633906045


 38%|███▊      | 16647/43738 [2:06:50<2:48:46,  2.68it/s]

step:1040, train_loss:0.1378210993955887, acc:0.3241424881360005


 38%|███▊      | 16648/43738 [2:06:50<3:46:16,  2.00it/s]

step:1040, train_loss:0.1378167785046639, acc:0.3241830850552619


 38%|███▊      | 16649/43738 [2:06:51<3:25:32,  2.20it/s]

step:1040, train_loss:0.1378102005385183, acc:0.3242236770977236


 38%|███▊      | 16650/43738 [2:06:51<3:00:34,  2.50it/s]

step:1040, train_loss:0.1378058632767662, acc:0.32426426426426425


 38%|███▊      | 16651/43738 [2:06:52<3:16:13,  2.30it/s]

step:1040, train_loss:0.13779922381302742, acc:0.3243048465557624


 38%|███▊      | 16652/43738 [2:06:52<3:14:18,  2.32it/s]

step:1040, train_loss:0.1377940173378475, acc:0.32434542397309635


 38%|███▊      | 16653/43738 [2:06:52<3:00:46,  2.50it/s]

step:1040, train_loss:0.1377897379687507, acc:0.32432594727676695


 38%|███▊      | 16654/43738 [2:06:53<3:35:59,  2.09it/s]

step:1040, train_loss:0.13779529523505507, acc:0.3243064729194188


 38%|███▊      | 16655/43738 [2:06:54<4:17:11,  1.76it/s]

step:1040, train_loss:0.13779885912065673, acc:0.32428700090063045


 39%|███▉      | 16960/43738 [2:09:19<3:14:01,  2.30it/s]

step:1060, train_loss:0.1371631038292435, acc:0.326061320754717


 39%|███▉      | 16961/43738 [2:09:19<2:57:12,  2.52it/s]

step:1060, train_loss:0.13715642854757604, acc:0.3261010553623018


 39%|███▉      | 16962/43738 [2:09:19<3:03:25,  2.43it/s]

step:1060, train_loss:0.1371488520159638, acc:0.32614078528475415


 39%|███▉      | 16963/43738 [2:09:20<3:22:15,  2.21it/s]

step:1060, train_loss:0.1371461455584933, acc:0.32612155868655307


 39%|███▉      | 16964/43738 [2:09:20<3:10:32,  2.34it/s]

step:1060, train_loss:0.13714699247743492, acc:0.32610233435510494


 39%|███▉      | 16965/43738 [2:09:21<3:21:52,  2.21it/s]

step:1060, train_loss:0.13714422155722145, acc:0.32614205717653993


 39%|███▉      | 16966/43738 [2:09:21<2:56:25,  2.53it/s]

step:1060, train_loss:0.1371365634908106, acc:0.32618177531533654


 39%|███▉      | 16967/43738 [2:09:22<3:17:57,  2.25it/s]

step:1060, train_loss:0.13713956138454048, acc:0.32616255083397183


 39%|███▉      | 16968/43738 [2:09:22<3:06:30,  2.39it/s]

step:1060, train_loss:0.1371320212352363, acc:0.3262022630834512


 39%|███▉      | 16969/43738 [2:09:22<2:56:45,  2.52it/s]

step:1060, train_loss:0.13712675975485383, acc:0.3262419706523661


 39%|███▉      | 16970/43738 [2:09:23<2:45:42,  2.69it/s]

step:1060, train_loss:0.1371195234181201, acc:0.3262816735415439


 39%|███▉      | 16971/43738 [2:09:23<3:25:53,  2.17it/s]

step:1060, train_loss:0.13711383124000273, acc:0.3263213717518119


 39%|███▉      | 16972/43738 [2:09:24<3:24:16,  2.18it/s]

step:1060, train_loss:0.1371186039940879, acc:0.3263021447089324


 39%|███▉      | 16973/43738 [2:09:25<4:05:48,  1.81it/s]

step:1060, train_loss:0.137113030095259, acc:0.32634183703529135


 39%|███▉      | 16974/43738 [2:09:25<4:26:52,  1.67it/s]

step:1060, train_loss:0.137117509536109, acc:0.32632261105219745


 39%|███▉      | 16975/43738 [2:09:26<3:42:40,  2.00it/s]

step:1060, train_loss:0.13710944733434474, acc:0.3263622974963181


 40%|███▉      | 17280/43738 [2:11:52<3:18:19,  2.22it/s]

step:1080, train_loss:0.13653360928245395, acc:0.32829861111111114


 40%|███▉      | 17281/43738 [2:11:52<3:07:44,  2.35it/s]

step:1080, train_loss:0.13652760867950572, acc:0.3283374804698802


 40%|███▉      | 17282/43738 [2:11:52<2:46:45,  2.64it/s]

step:1080, train_loss:0.13654704140028676, acc:0.3283184816572156


 40%|███▉      | 17283/43738 [2:11:53<3:42:19,  1.98it/s]

step:1080, train_loss:0.13654697098052615, acc:0.32829948504310597


 40%|███▉      | 17284/43738 [2:11:53<3:16:24,  2.24it/s]

step:1080, train_loss:0.13654175449893388, acc:0.32828049062716963


 40%|███▉      | 17285/43738 [2:11:54<3:10:09,  2.32it/s]

step:1080, train_loss:0.13653879937967078, acc:0.3283193520393405


 40%|███▉      | 17286/43738 [2:11:54<3:10:23,  2.32it/s]

step:1080, train_loss:0.13653378428230772, acc:0.3283582089552239


 40%|███▉      | 17287/43738 [2:11:55<3:14:20,  2.27it/s]

step:1080, train_loss:0.1365314442154348, acc:0.3283392144385955


 40%|███▉      | 17288/43738 [2:11:55<3:15:50,  2.25it/s]

step:1080, train_loss:0.13653143042951488, acc:0.32832022211938916


 40%|███▉      | 17289/43738 [2:11:56<3:05:45,  2.37it/s]

step:1080, train_loss:0.13652541525401662, acc:0.3283590722424663


 40%|███▉      | 17290/43738 [2:11:56<3:07:43,  2.35it/s]

step:1080, train_loss:0.1365302306357675, acc:0.3283400809716599


 40%|███▉      | 17291/43738 [2:11:56<3:04:52,  2.38it/s]

step:1080, train_loss:0.13652253093347144, acc:0.32837892545254754


 40%|███▉      | 17292/43738 [2:11:57<3:04:40,  2.39it/s]

step:1080, train_loss:0.13651490061002072, acc:0.3284177654406662


 40%|███▉      | 17293/43738 [2:11:57<2:42:04,  2.72it/s]

step:1080, train_loss:0.13653377604102063, acc:0.3283987740704331


 40%|███▉      | 17294/43738 [2:11:58<3:01:22,  2.43it/s]

step:1080, train_loss:0.13653418765685632, acc:0.3283797848964959


 40%|███▉      | 17295/43738 [2:11:58<2:48:52,  2.61it/s]

step:1080, train_loss:0.13652829650562692, acc:0.3284186180977161


 40%|████      | 17600/43738 [2:14:18<2:52:56,  2.52it/s]

step:1100, train_loss:0.1360579907567262, acc:0.3302272727272727


 40%|████      | 17601/43738 [2:14:18<2:35:46,  2.80it/s]

step:1100, train_loss:0.1360513773627301, acc:0.33026532583375945


 40%|████      | 17602/43738 [2:14:18<2:36:46,  2.78it/s]

step:1100, train_loss:0.13605173314985375, acc:0.33024656289058063


 40%|████      | 17603/43738 [2:14:19<2:49:05,  2.58it/s]

step:1100, train_loss:0.13604403611790222, acc:0.33028461057774244


 40%|████      | 17604/43738 [2:14:19<2:57:26,  2.45it/s]

step:1100, train_loss:0.1360478269036308, acc:0.33026584867075665


 40%|████      | 17605/43738 [2:14:20<3:06:23,  2.34it/s]

step:1100, train_loss:0.1360475632719034, acc:0.3302470888952002


 40%|████      | 17606/43738 [2:14:20<2:57:37,  2.45it/s]

step:1100, train_loss:0.13604046564474945, acc:0.33028513006929455


 40%|████      | 17607/43738 [2:14:20<2:39:58,  2.72it/s]

step:1100, train_loss:0.1360327734370823, acc:0.33032316692224684


 40%|████      | 17608/43738 [2:14:21<2:34:57,  2.81it/s]

step:1100, train_loss:0.13603176216472385, acc:0.33036119945479325


 40%|████      | 17609/43738 [2:14:21<3:06:17,  2.34it/s]

step:1100, train_loss:0.13602556062809712, acc:0.33039922766766994


 40%|████      | 17610/43738 [2:14:22<3:11:05,  2.28it/s]

step:1100, train_loss:0.13602964369447307, acc:0.33038046564452017


 40%|████      | 17611/43738 [2:14:22<3:08:47,  2.31it/s]

step:1100, train_loss:0.13603000577224875, acc:0.3303617057520868


 40%|████      | 17612/43738 [2:14:23<2:47:52,  2.59it/s]

step:1100, train_loss:0.1360244800578152, acc:0.330399727458551


 40%|████      | 17613/43738 [2:14:23<2:54:26,  2.50it/s]

step:1100, train_loss:0.1360225176525239, acc:0.33038096860273664


 40%|████      | 17614/43738 [2:14:23<3:01:01,  2.41it/s]

step:1100, train_loss:0.13603163313913508, acc:0.3303622118769161


 40%|████      | 17615/43738 [2:14:24<2:53:45,  2.51it/s]

step:1100, train_loss:0.13602730911411615, acc:0.33040022707919386


 41%|████      | 17920/43738 [2:16:43<2:36:48,  2.74it/s]

step:1120, train_loss:0.1354592363045429, acc:0.33309151785714286


 41%|████      | 17921/43738 [2:16:44<3:05:54,  2.31it/s]

step:1120, train_loss:0.13545523727018757, acc:0.3331287316555996


 41%|████      | 17922/43738 [2:16:44<3:06:21,  2.31it/s]

step:1120, train_loss:0.13546279055497518, acc:0.33311014395714766


 41%|████      | 17923/43738 [2:16:45<2:57:12,  2.43it/s]

step:1120, train_loss:0.13546102268472926, acc:0.33309155833286835


 41%|████      | 17924/43738 [2:16:45<2:50:19,  2.53it/s]

step:1120, train_loss:0.13546222887208426, acc:0.33307297478241465


 41%|████      | 17925/43738 [2:16:45<2:32:03,  2.83it/s]

step:1120, train_loss:0.1354600646982643, acc:0.3331101813110181


 41%|████      | 17926/43738 [2:16:46<3:15:17,  2.20it/s]

step:1120, train_loss:0.13546097520604702, acc:0.3330915987950463


 41%|████      | 17927/43738 [2:16:46<2:49:14,  2.54it/s]

step:1120, train_loss:0.1354587897520045, acc:0.33307301835220615


 41%|████      | 17928/43738 [2:16:47<2:59:48,  2.39it/s]

step:1120, train_loss:0.13546042824015364, acc:0.33305443998215084


 41%|████      | 17929/43738 [2:16:47<2:36:30,  2.75it/s]

step:1120, train_loss:0.13545808355049654, acc:0.3330358636845334


 41%|████      | 17930/43738 [2:16:48<3:21:56,  2.13it/s]

step:1120, train_loss:0.13545622931656107, acc:0.33301728945900727


 41%|████      | 17931/43738 [2:16:48<3:27:34,  2.07it/s]

step:1120, train_loss:0.1354504119960808, acc:0.33305448664324355


 41%|████      | 17932/43738 [2:16:49<3:49:15,  1.88it/s]

step:1120, train_loss:0.13545639618318916, acc:0.3330359134508142


 41%|████      | 17933/43738 [2:16:49<3:42:38,  1.93it/s]

step:1120, train_loss:0.13545470874291568, acc:0.3330173423297831


 41%|████      | 17934/43738 [2:16:50<3:55:25,  1.83it/s]

step:1120, train_loss:0.1354514769319586, acc:0.3329987732798037


 41%|████      | 17935/43738 [2:16:50<3:17:42,  2.18it/s]

step:1120, train_loss:0.13544507494342115, acc:0.33303596320044604


 42%|████▏     | 18240/43738 [2:19:06<2:53:48,  2.45it/s]

step:1140, train_loss:0.13491607817479012, acc:0.33514254385964914


 42%|████▏     | 18241/43738 [2:19:07<3:25:30,  2.07it/s]

step:1140, train_loss:0.13491860314127094, acc:0.335124170823968


 42%|████▏     | 18242/43738 [2:19:07<2:57:56,  2.39it/s]

step:1140, train_loss:0.1349123563825043, acc:0.3351606183532507


 42%|████▏     | 18243/43738 [2:19:08<2:57:56,  2.39it/s]

step:1140, train_loss:0.13491747176745966, acc:0.3351422463410623


 42%|████▏     | 18244/43738 [2:19:08<2:35:14,  2.74it/s]

step:1140, train_loss:0.13491199795852715, acc:0.33517868888401664


 42%|████▏     | 18245/43738 [2:19:09<3:30:00,  2.02it/s]

step:1140, train_loss:0.13490465778869626, acc:0.3352151274321732


 42%|████▏     | 18246/43738 [2:19:10<3:56:53,  1.79it/s]

step:1140, train_loss:0.13489800714766612, acc:0.3352515619861888


 42%|████▏     | 18247/43738 [2:19:10<3:42:45,  1.91it/s]

step:1140, train_loss:0.13489437798523826, acc:0.33528799254672


 42%|████▏     | 18248/43738 [2:19:11<3:54:58,  1.81it/s]

step:1140, train_loss:0.1348950003573975, acc:0.3352696185883384


 42%|████▏     | 18249/43738 [2:19:11<3:16:38,  2.16it/s]

step:1140, train_loss:0.13488913093932248, acc:0.33530604416680365


 42%|████▏     | 18250/43738 [2:19:11<3:33:28,  1.99it/s]

step:1140, train_loss:0.13488949334671343, acc:0.3352876712328767


 42%|████▏     | 18251/43738 [2:19:12<4:06:42,  1.72it/s]

step:1140, train_loss:0.1348866307180671, acc:0.33526930031231167


 42%|████▏     | 18252/43738 [2:19:13<4:06:51,  1.72it/s]

step:1140, train_loss:0.1348798497985637, acc:0.33530571992110453


 42%|████▏     | 18253/43738 [2:19:13<3:34:43,  1.98it/s]

step:1140, train_loss:0.13487447708746467, acc:0.3353421355393634


 42%|████▏     | 18254/43738 [2:19:14<3:54:43,  1.81it/s]

step:1140, train_loss:0.13487779643259773, acc:0.33532376465432234


 42%|████▏     | 18255/43738 [2:19:14<3:42:18,  1.91it/s]

step:1140, train_loss:0.13487900539739733, acc:0.33530539578197754


 42%|████▏     | 18560/43738 [2:21:30<3:07:36,  2.24it/s]

step:1160, train_loss:0.13434368943392788, acc:0.33696120689655173


 42%|████▏     | 18561/43738 [2:21:31<3:20:40,  2.09it/s]

step:1160, train_loss:0.1343386611810421, acc:0.3369969290447713


 42%|████▏     | 18562/43738 [2:21:31<3:36:25,  1.94it/s]

step:1160, train_loss:0.13433758678424568, acc:0.33697877383902597


 42%|████▏     | 18563/43738 [2:21:32<3:32:38,  1.97it/s]

step:1160, train_loss:0.13433658571741214, acc:0.3369606205893444


 42%|████▏     | 18564/43738 [2:21:32<3:19:49,  2.10it/s]

step:1160, train_loss:0.13433561187655166, acc:0.3369424692954105


 42%|████▏     | 18565/43738 [2:21:33<3:08:03,  2.23it/s]

step:1160, train_loss:0.1343386461387974, acc:0.33692431995690814


 42%|████▏     | 18566/43738 [2:21:33<2:56:45,  2.37it/s]

step:1160, train_loss:0.1343354811293237, acc:0.3369061725735215


 42%|████▏     | 18567/43738 [2:21:33<3:05:53,  2.26it/s]

step:1160, train_loss:0.13433366312503137, acc:0.33688802714493454


 42%|████▏     | 18568/43738 [2:21:34<3:10:58,  2.20it/s]

step:1160, train_loss:0.13433331386459704, acc:0.33686988367083154


 42%|████▏     | 18569/43738 [2:21:34<3:03:17,  2.29it/s]

step:1160, train_loss:0.13433966828492072, acc:0.33685174215089664


 42%|████▏     | 18570/43738 [2:21:35<2:55:04,  2.40it/s]

step:1160, train_loss:0.13433840127735713, acc:0.3368336025848142


 42%|████▏     | 18571/43738 [2:21:35<2:47:56,  2.50it/s]

step:1160, train_loss:0.13433431671429205, acc:0.33686931236874695


 42%|████▏     | 18572/43738 [2:21:35<2:42:58,  2.57it/s]

step:1160, train_loss:0.13433161050921574, acc:0.3368511738100366


 42%|████▏     | 18573/43738 [2:21:36<2:45:09,  2.54it/s]

step:1160, train_loss:0.13432475966262414, acc:0.33688687880256285


 42%|████▏     | 18574/43738 [2:21:36<2:30:15,  2.79it/s]

step:1160, train_loss:0.13432592980889624, acc:0.33686874125121136


 42%|████▏     | 18575/43738 [2:21:36<2:17:17,  3.05it/s]

step:1160, train_loss:0.13431898443462695, acc:0.3369044414535666


 43%|████▎     | 18880/43738 [2:23:53<4:53:29,  1.41it/s]

step:1180, train_loss:0.13383419955804415, acc:0.3385063559322034


 43%|████▎     | 18881/43738 [2:23:54<4:15:26,  1.62it/s]

step:1180, train_loss:0.13383085226201044, acc:0.3385413908161644


 43%|████▎     | 18882/43738 [2:23:54<4:43:45,  1.46it/s]

step:1180, train_loss:0.1338287705287046, acc:0.3385234614977227


 43%|████▎     | 18883/43738 [2:23:55<4:14:53,  1.63it/s]

step:1180, train_loss:0.13382841843362242, acc:0.33850553407827144


 43%|████▎     | 18884/43738 [2:23:56<4:29:30,  1.54it/s]

step:1180, train_loss:0.1338223339849722, acc:0.33854056343994915


 43%|████▎     | 18885/43738 [2:23:56<4:18:46,  1.60it/s]

step:1180, train_loss:0.1338162646455083, acc:0.33857558909187185


 43%|████▎     | 18886/43738 [2:23:57<4:33:10,  1.52it/s]

step:1180, train_loss:0.13381195409466293, acc:0.3386106110346288


 43%|████▎     | 18887/43738 [2:23:58<5:20:10,  1.29it/s]

step:1180, train_loss:0.1338140694108435, acc:0.33859268279769156


 43%|████▎     | 18888/43738 [2:23:59<5:12:20,  1.33it/s]

step:1180, train_loss:0.1338151684817844, acc:0.3385747564591275


 43%|████▎     | 18889/43738 [2:23:59<4:52:53,  1.41it/s]

step:1180, train_loss:0.13381231722758588, acc:0.3385568320186352


 43%|████▎     | 18890/43738 [2:24:00<4:57:30,  1.39it/s]

step:1180, train_loss:0.13380524164678093, acc:0.3385918475383801


 43%|████▎     | 18891/43738 [2:24:01<4:40:10,  1.48it/s]

step:1180, train_loss:0.13380513310694353, acc:0.3385739240908369


 43%|████▎     | 18892/43738 [2:24:01<3:52:29,  1.78it/s]

step:1180, train_loss:0.13380319070486693, acc:0.338556002540758


 43%|████▎     | 18893/43738 [2:24:01<3:24:44,  2.02it/s]

step:1180, train_loss:0.13380153083505697, acc:0.33853808288784204


 43%|████▎     | 18894/43738 [2:24:02<3:11:53,  2.16it/s]

step:1180, train_loss:0.1337979241832582, acc:0.33857309198687413


 43%|████▎     | 18895/43738 [2:24:02<2:48:33,  2.46it/s]

step:1180, train_loss:0.13379465958986347, acc:0.33855517332627677


 44%|████▍     | 19200/43738 [2:26:17<3:18:49,  2.06it/s]

step:1200, train_loss:0.1333341133307844, acc:0.3407291666666667


 44%|████▍     | 19201/43738 [2:26:18<2:47:39,  2.44it/s]

step:1200, train_loss:0.13332775205011574, acc:0.3407635019009427


 44%|████▍     | 19202/43738 [2:26:18<2:44:05,  2.49it/s]

step:1200, train_loss:0.13332567082267185, acc:0.34074575565045306


 44%|████▍     | 19203/43738 [2:26:18<2:52:56,  2.36it/s]

step:1200, train_loss:0.13332287089323344, acc:0.3407280112482425


 44%|████▍     | 19204/43738 [2:26:19<2:31:51,  2.69it/s]

step:1200, train_loss:0.1333204967225755, acc:0.3407102686940221


 44%|████▍     | 19205/43738 [2:26:19<2:43:06,  2.51it/s]

step:1200, train_loss:0.13331966092603975, acc:0.34069252798750327


 44%|████▍     | 19206/43738 [2:26:19<2:33:53,  2.66it/s]

step:1200, train_loss:0.13331556893497437, acc:0.3407268561907737


 44%|████▍     | 19207/43738 [2:26:20<2:55:15,  2.33it/s]

step:1200, train_loss:0.13331314173979064, acc:0.3407091164679544


 44%|████▍     | 19208/43738 [2:26:20<2:55:06,  2.33it/s]

step:1200, train_loss:0.1333086108808731, acc:0.34074344023323616


 44%|████▍     | 19209/43738 [2:26:21<2:53:23,  2.36it/s]

step:1200, train_loss:0.13330347963088465, acc:0.3407777604248009


 44%|████▍     | 19210/43738 [2:26:22<3:33:07,  1.92it/s]

step:1200, train_loss:0.13330466848827707, acc:0.3407600208224883


 44%|████▍     | 19211/43738 [2:26:22<2:58:39,  2.29it/s]

step:1200, train_loss:0.13329878950830062, acc:0.3407943365780022


 44%|████▍     | 19212/43738 [2:26:22<2:36:24,  2.61it/s]

step:1200, train_loss:0.1332919482524601, acc:0.3408286487611909


 44%|████▍     | 19213/43738 [2:26:23<2:41:10,  2.54it/s]

step:1200, train_loss:0.1332890476578439, acc:0.3408629573726123


 44%|████▍     | 19214/43738 [2:26:23<2:34:18,  2.65it/s]

step:1200, train_loss:0.13329113631254227, acc:0.3408452170292495


 44%|████▍     | 19215/43738 [2:26:24<3:18:13,  2.06it/s]

step:1200, train_loss:0.13329376343282953, acc:0.3408274785323966


 45%|████▍     | 19520/43738 [2:28:39<3:09:15,  2.13it/s]

step:1220, train_loss:0.13282355976185597, acc:0.3430327868852459


 45%|████▍     | 19521/43738 [2:28:40<3:20:24,  2.01it/s]

step:1220, train_loss:0.13282254709417135, acc:0.34306644126837765


 45%|████▍     | 19522/43738 [2:28:41<4:04:22,  1.65it/s]

step:1220, train_loss:0.1328185535247704, acc:0.34310009220366766


 45%|████▍     | 19523/43738 [2:28:41<3:42:02,  1.82it/s]

step:1220, train_loss:0.13281196561230202, acc:0.34313373969164573


 45%|████▍     | 19524/43738 [2:28:42<3:32:57,  1.89it/s]

step:1220, train_loss:0.13282053021105364, acc:0.3431161647203442


 45%|████▍     | 19525/43738 [2:28:42<3:13:49,  2.08it/s]

step:1220, train_loss:0.13281707096856873, acc:0.3430985915492958


 45%|████▍     | 19526/43738 [2:28:42<3:11:46,  2.10it/s]

step:1220, train_loss:0.13281351612871037, acc:0.3430810201782239


 45%|████▍     | 19527/43738 [2:28:43<3:04:48,  2.18it/s]

step:1220, train_loss:0.1328156700533742, acc:0.34306345060685206


 45%|████▍     | 19528/43738 [2:28:43<3:15:34,  2.06it/s]

step:1220, train_loss:0.13281614474575207, acc:0.3430458828349037


 45%|████▍     | 19529/43738 [2:28:44<3:01:31,  2.22it/s]

step:1220, train_loss:0.1328093843919969, acc:0.34307952276102205


 45%|████▍     | 19530/43738 [2:28:44<2:53:26,  2.33it/s]

step:1220, train_loss:0.13280353619902124, acc:0.3431131592421915


 45%|████▍     | 19531/43738 [2:28:44<2:31:55,  2.66it/s]

step:1220, train_loss:0.13280246548528138, acc:0.3430955916235728


 45%|████▍     | 19532/43738 [2:28:45<2:14:53,  2.99it/s]

step:1220, train_loss:0.13279902037557292, acc:0.34307802580380914


 45%|████▍     | 19533/43738 [2:28:45<3:03:20,  2.20it/s]

step:1220, train_loss:0.13280042912966036, acc:0.3430604617826243


 45%|████▍     | 19534/43738 [2:28:46<2:48:02,  2.40it/s]

step:1220, train_loss:0.1327960401976513, acc:0.3430940923517969


 45%|████▍     | 19535/43738 [2:28:46<2:37:27,  2.56it/s]

step:1220, train_loss:0.1327955303056966, acc:0.34307652930637317


 45%|████▌     | 19840/43738 [2:31:05<3:10:49,  2.09it/s]

step:1240, train_loss:0.1323670007379761, acc:0.34430443548387096


 45%|████▌     | 19841/43738 [2:31:06<2:45:06,  2.41it/s]

step:1240, train_loss:0.1323673018246082, acc:0.3442870823043193


 45%|████▌     | 19842/43738 [2:31:06<2:35:42,  2.56it/s]

step:1240, train_loss:0.13236296967114783, acc:0.34426973087390383


 45%|████▌     | 19843/43738 [2:31:06<2:37:30,  2.53it/s]

step:1240, train_loss:0.13236276818854678, acc:0.34425238119236


 45%|████▌     | 19844/43738 [2:31:07<2:34:35,  2.58it/s]

step:1240, train_loss:0.13237114929348867, acc:0.3442350332594235


 45%|████▌     | 19845/43738 [2:31:07<2:39:48,  2.49it/s]

step:1240, train_loss:0.1323716840695037, acc:0.3442176870748299


 45%|████▌     | 19846/43738 [2:31:08<2:41:39,  2.46it/s]

step:1240, train_loss:0.13236543391999256, acc:0.3442507306258188


 45%|████▌     | 19847/43738 [2:31:08<2:31:07,  2.63it/s]

step:1240, train_loss:0.1323599744730652, acc:0.3442837708469794


 45%|████▌     | 19848/43738 [2:31:09<2:53:27,  2.30it/s]

step:1240, train_loss:0.13236205179874677, acc:0.3442664248286981


 45%|████▌     | 19849/43738 [2:31:09<2:46:37,  2.39it/s]

step:1240, train_loss:0.13235793698415732, acc:0.3442994609300217


 45%|████▌     | 19850/43738 [2:31:09<2:26:28,  2.72it/s]

step:1240, train_loss:0.132353302413523, acc:0.3443324937027708


 45%|████▌     | 19851/43738 [2:31:10<2:38:42,  2.51it/s]

step:1240, train_loss:0.1323485487961461, acc:0.3443655231474485


 45%|████▌     | 19852/43738 [2:31:10<2:19:08,  2.86it/s]

step:1240, train_loss:0.13234252689720913, acc:0.3443985492645577


 45%|████▌     | 19853/43738 [2:31:11<2:56:50,  2.25it/s]

step:1240, train_loss:0.13233973585061803, acc:0.34443157205460134


 45%|████▌     | 19854/43738 [2:31:11<3:32:48,  1.87it/s]

step:1240, train_loss:0.13234439904695008, acc:0.3444142238339881


 45%|████▌     | 19855/43738 [2:31:12<3:22:44,  1.96it/s]

step:1240, train_loss:0.1323484509422754, acc:0.34439687736086627


 46%|████▌     | 20160/43738 [2:33:36<2:58:31,  2.20it/s]

step:1260, train_loss:0.1318220144200802, acc:0.34652777777777777


 46%|████▌     | 20161/43738 [2:33:36<2:49:12,  2.32it/s]

step:1260, train_loss:0.13181676899874442, acc:0.34656019046674275


 46%|████▌     | 20162/43738 [2:33:36<2:25:35,  2.70it/s]

step:1260, train_loss:0.13181416566446574, acc:0.34654300168634067


 46%|████▌     | 20163/43738 [2:33:37<2:13:14,  2.95it/s]

step:1260, train_loss:0.13181256272105435, acc:0.34652581461092097


 46%|████▌     | 20164/43738 [2:33:38<3:06:01,  2.11it/s]

step:1260, train_loss:0.1318131033534615, acc:0.3465086292402301


 46%|████▌     | 20165/43738 [2:33:38<3:20:00,  1.96it/s]

step:1260, train_loss:0.13181486890450186, acc:0.3464914455740144


 46%|████▌     | 20166/43738 [2:33:39<3:19:32,  1.97it/s]

step:1260, train_loss:0.13181823189019382, acc:0.3464742636120202


 46%|████▌     | 20167/43738 [2:33:39<2:59:44,  2.19it/s]

step:1260, train_loss:0.13181744703206383, acc:0.34650666931125107


 46%|████▌     | 20168/43738 [2:33:40<3:30:09,  1.87it/s]

step:1260, train_loss:0.1318220793935145, acc:0.34648948829829435


 46%|████▌     | 20169/43738 [2:33:40<3:19:03,  1.97it/s]

step:1260, train_loss:0.13181842681518033, acc:0.3464723089890426


 46%|████▌     | 20170/43738 [2:33:40<2:46:40,  2.36it/s]

step:1260, train_loss:0.1318169004749972, acc:0.34645513138324247


 46%|████▌     | 20171/43738 [2:33:41<2:24:06,  2.73it/s]

step:1260, train_loss:0.13181610597705, acc:0.34643795548064055


 46%|████▌     | 20172/43738 [2:33:41<2:33:27,  2.56it/s]

step:1260, train_loss:0.1318111848042031, acc:0.34647035494745193


 46%|████▌     | 20173/43738 [2:33:41<2:22:24,  2.76it/s]

step:1260, train_loss:0.1318047610786035, acc:0.34650275120210183


 46%|████▌     | 20174/43738 [2:33:42<2:27:12,  2.67it/s]

step:1260, train_loss:0.13180575688464521, acc:0.3464855754932091


 46%|████▌     | 20175/43738 [2:33:42<2:26:39,  2.68it/s]

step:1260, train_loss:0.13180747210718682, acc:0.3464684014869889


 47%|████▋     | 20480/43738 [2:36:07<2:56:35,  2.20it/s]

step:1280, train_loss:0.13139850376269385, acc:0.347802734375


 47%|████▋     | 20481/43738 [2:36:08<3:21:53,  1.92it/s]

step:1280, train_loss:0.13139301346414275, acc:0.3478345783897271


 47%|████▋     | 20482/43738 [2:36:08<3:35:34,  1.80it/s]

step:1280, train_loss:0.13138677082929864, acc:0.34786641929499074


 47%|████▋     | 20483/43738 [2:36:09<3:08:04,  2.06it/s]

step:1280, train_loss:0.13138163085191784, acc:0.3478982570912464


 47%|████▋     | 20484/43738 [2:36:09<2:40:54,  2.41it/s]

step:1280, train_loss:0.13137814132517664, acc:0.3479300917789494


 47%|████▋     | 20485/43738 [2:36:10<2:56:13,  2.20it/s]

step:1280, train_loss:0.13137255466066033, acc:0.347961923358555


 47%|████▋     | 20486/43738 [2:36:10<2:43:11,  2.37it/s]

step:1280, train_loss:0.13136972691226442, acc:0.3479937518305184


 47%|████▋     | 20487/43738 [2:36:11<3:01:24,  2.14it/s]

step:1280, train_loss:0.13136744053565522, acc:0.3480255771952946


 47%|████▋     | 20488/43738 [2:36:11<2:33:49,  2.52it/s]

step:1280, train_loss:0.13136266768295993, acc:0.3480573994533385


 47%|████▋     | 20489/43738 [2:36:11<2:38:38,  2.44it/s]

step:1280, train_loss:0.13136975630446762, acc:0.3480404119283518


 47%|████▋     | 20490/43738 [2:36:12<2:28:20,  2.61it/s]

step:1280, train_loss:0.13136875713389565, acc:0.3480234260614934


 47%|████▋     | 20491/43738 [2:36:12<2:27:02,  2.64it/s]

step:1280, train_loss:0.1313661987838792, acc:0.34805524376555563


 47%|████▋     | 20492/43738 [2:36:12<2:35:11,  2.50it/s]

step:1280, train_loss:0.13136703332426383, acc:0.3480382588327152


 47%|████▋     | 20493/43738 [2:36:13<2:50:31,  2.27it/s]

step:1280, train_loss:0.13137191803268378, acc:0.34802127555750745


 47%|████▋     | 20494/43738 [2:36:13<2:30:29,  2.57it/s]

step:1280, train_loss:0.1313697277930743, acc:0.3480042939396897


 47%|████▋     | 20495/43738 [2:36:14<3:17:45,  1.96it/s]

step:1280, train_loss:0.13137113908012701, acc:0.3479873139790193


 48%|████▊     | 20800/43738 [2:38:35<3:38:23,  1.75it/s]

step:1300, train_loss:0.13094753960998262, acc:0.34918269230769233


 48%|████▊     | 20801/43738 [2:38:36<3:42:54,  1.71it/s]

step:1300, train_loss:0.13094172536713278, acc:0.34921398009711074


 48%|████▊     | 20802/43738 [2:38:36<3:14:01,  1.97it/s]

step:1300, train_loss:0.13094141840948756, acc:0.34919719257763676


 48%|████▊     | 20803/43738 [2:38:37<3:41:26,  1.73it/s]

step:1300, train_loss:0.1309447196840388, acc:0.3491804066721146


 48%|████▊     | 20804/43738 [2:38:37<3:13:35,  1.97it/s]

step:1300, train_loss:0.13093850168433033, acc:0.3492116900596039


 48%|████▊     | 20805/43738 [2:38:38<3:01:23,  2.11it/s]

step:1300, train_loss:0.1309380867916667, acc:0.3491949050708964


 48%|████▊     | 20806/43738 [2:38:38<2:44:17,  2.33it/s]

step:1300, train_loss:0.13093606304032862, acc:0.3491781216956647


 48%|████▊     | 20807/43738 [2:38:39<3:18:14,  1.93it/s]

step:1300, train_loss:0.13093553774739922, acc:0.34916133993367615


 48%|████▊     | 20808/43738 [2:38:39<3:00:23,  2.12it/s]

step:1300, train_loss:0.13093044245961166, acc:0.3491926182237601


 48%|████▊     | 20809/43738 [2:38:39<2:43:00,  2.34it/s]

step:1300, train_loss:0.1309241799379796, acc:0.3492238935076169


 48%|████▊     | 20810/43738 [2:38:40<3:15:12,  1.96it/s]

step:1300, train_loss:0.1309263420850221, acc:0.3492071119654013


 48%|████▊     | 20811/43738 [2:38:41<3:24:06,  1.87it/s]

step:1300, train_loss:0.1309207772985473, acc:0.3492383835471626


 48%|████▊     | 20812/43738 [2:38:41<3:31:17,  1.81it/s]

step:1300, train_loss:0.13091448967823197, acc:0.34926965212377475


 48%|████▊     | 20813/43738 [2:38:42<3:16:53,  1.94it/s]

step:1300, train_loss:0.13091413077404304, acc:0.34925287080190265


 48%|████▊     | 20814/43738 [2:38:42<3:08:10,  2.03it/s]

step:1300, train_loss:0.13091075427601517, acc:0.3492360910925339


 48%|████▊     | 20815/43738 [2:38:42<2:37:26,  2.43it/s]

step:1300, train_loss:0.13091113884695518, acc:0.34921931299543596


 48%|████▊     | 21120/43738 [2:41:07<3:33:44,  1.76it/s]

step:1320, train_loss:0.1305786227716567, acc:0.350094696969697


 48%|████▊     | 21121/43738 [2:41:07<3:11:17,  1.97it/s]

step:1320, train_loss:0.13057576747816063, acc:0.35012546754415036


 48%|████▊     | 21122/43738 [2:41:08<2:50:19,  2.21it/s]

step:1320, train_loss:0.13056972371376419, acc:0.3501562352049995


 48%|████▊     | 21123/43738 [2:41:08<2:29:52,  2.51it/s]

step:1320, train_loss:0.13056411328913156, acc:0.35018699995265823


 48%|████▊     | 21124/43738 [2:41:09<3:16:45,  1.92it/s]

step:1320, train_loss:0.13056234135997036, acc:0.35017042226850975


 48%|████▊     | 21125/43738 [2:41:09<3:14:41,  1.94it/s]

step:1320, train_loss:0.13056931495075783, acc:0.35015384615384615


 48%|████▊     | 21126/43738 [2:41:10<3:13:13,  1.95it/s]

step:1320, train_loss:0.1305672227303886, acc:0.3501846066458392


 48%|████▊     | 21127/43738 [2:41:10<2:46:26,  2.26it/s]

step:1320, train_loss:0.13056189016900022, acc:0.35021536422587213


 48%|████▊     | 21128/43738 [2:41:11<2:39:24,  2.36it/s]

step:1320, train_loss:0.13055577444544592, acc:0.3502461188943582


 48%|████▊     | 21129/43738 [2:41:11<2:35:42,  2.42it/s]

step:1320, train_loss:0.130551552202436, acc:0.3502768706517109


 48%|████▊     | 21130/43738 [2:41:12<2:56:22,  2.14it/s]

step:1320, train_loss:0.1305455094928845, acc:0.3503076194983436


 48%|████▊     | 21131/43738 [2:41:12<2:55:24,  2.15it/s]

step:1320, train_loss:0.13054754197019997, acc:0.35029104159765273


 48%|████▊     | 21132/43738 [2:41:12<2:31:11,  2.49it/s]

step:1320, train_loss:0.13055063307178866, acc:0.35027446526594735


 48%|████▊     | 21133/43738 [2:41:13<2:50:17,  2.21it/s]

step:1320, train_loss:0.13054912867664378, acc:0.35025789050300477


 48%|████▊     | 21134/43738 [2:41:13<2:29:27,  2.52it/s]

step:1320, train_loss:0.13054297255124608, acc:0.350288634427936


 48%|████▊     | 21135/43738 [2:41:13<2:14:37,  2.80it/s]

step:1320, train_loss:0.13053861399625621, acc:0.350319375443577


 49%|████▉     | 21440/43738 [2:43:32<3:17:16,  1.88it/s]

step:1340, train_loss:0.13015542173292957, acc:0.3516791044776119


 49%|████▉     | 21441/43738 [2:43:33<3:48:25,  1.63it/s]

step:1340, train_loss:0.13015528673185908, acc:0.35166270229933305


 49%|████▉     | 21442/43738 [2:43:33<3:16:09,  1.89it/s]

step:1340, train_loss:0.1301509968549402, acc:0.3516929390915027


 49%|████▉     | 21443/43738 [2:43:34<3:46:05,  1.64it/s]

step:1340, train_loss:0.13015425916439707, acc:0.35167653779788277


 49%|████▉     | 21444/43738 [2:43:34<3:37:13,  1.71it/s]

step:1340, train_loss:0.13015046903731642, acc:0.35170677112479015


 49%|████▉     | 21445/43738 [2:43:35<3:34:46,  1.73it/s]

step:1340, train_loss:0.1301543432738228, acc:0.3516903707157846


 49%|████▉     | 21446/43738 [2:43:35<3:14:02,  1.91it/s]

step:1340, train_loss:0.13014920337158611, acc:0.3517206005781964


 49%|████▉     | 21447/43738 [2:43:35<2:42:19,  2.29it/s]

step:1340, train_loss:0.1301433878659227, acc:0.3517508276215788


 49%|████▉     | 21448/43738 [2:43:36<2:17:27,  2.70it/s]

step:1340, train_loss:0.1301423802923688, acc:0.3517344274524431


 49%|████▉     | 21449/43738 [2:43:36<2:23:40,  2.59it/s]

step:1340, train_loss:0.1301414181032919, acc:0.3517646510326822


 49%|████▉     | 21450/43738 [2:43:36<2:21:55,  2.62it/s]

step:1340, train_loss:0.13014187444511635, acc:0.35174825174825175


 49%|████▉     | 21451/43738 [2:43:37<2:10:46,  2.84it/s]

step:1340, train_loss:0.1301411967487295, acc:0.35173185399282086


 49%|████▉     | 21452/43738 [2:43:37<2:37:03,  2.37it/s]

step:1340, train_loss:0.13014146996687323, acc:0.35171545776617563


 49%|████▉     | 21453/43738 [2:43:38<2:33:43,  2.42it/s]

step:1340, train_loss:0.13014517043426074, acc:0.3516990630681024


 49%|████▉     | 21454/43738 [2:43:38<2:14:49,  2.75it/s]

step:1340, train_loss:0.13014300860697894, acc:0.35168266989838726


 49%|████▉     | 21455/43738 [2:43:38<2:23:04,  2.60it/s]

step:1340, train_loss:0.13014363144046723, acc:0.35166627825681657


 50%|████▉     | 21760/43738 [2:45:56<2:49:19,  2.16it/s]

step:1360, train_loss:0.12968472537055203, acc:0.3531709558823529


 50%|████▉     | 21761/43738 [2:45:57<2:58:33,  2.05it/s]

step:1360, train_loss:0.12968863290093727, acc:0.35315472634529665


 50%|████▉     | 21762/43738 [2:45:57<3:31:34,  1.73it/s]

step:1360, train_loss:0.12969753587103328, acc:0.3531384982997886


 50%|████▉     | 21763/43738 [2:45:58<3:29:48,  1.75it/s]

step:1360, train_loss:0.12969357586550412, acc:0.3531682212930203


 50%|████▉     | 21764/43738 [2:45:58<3:02:13,  2.01it/s]

step:1360, train_loss:0.1296891019777132, acc:0.35319794155486123


 50%|████▉     | 21765/43738 [2:45:59<2:55:18,  2.09it/s]

step:1360, train_loss:0.12970037393880857, acc:0.35318171376062485


 50%|████▉     | 21766/43738 [2:45:59<3:09:08,  1.94it/s]

step:1360, train_loss:0.12970104074779734, acc:0.35316548745750254


 50%|████▉     | 21767/43738 [2:46:00<2:49:49,  2.16it/s]

step:1360, train_loss:0.12969510557120428, acc:0.35319520374879404


 50%|████▉     | 21768/43738 [2:46:00<2:54:32,  2.10it/s]

step:1360, train_loss:0.12969104572957366, acc:0.3532249173098126


 50%|████▉     | 21769/43738 [2:46:01<2:44:17,  2.23it/s]

step:1360, train_loss:0.12968516248037534, acc:0.35325462814093433


 50%|████▉     | 21770/43738 [2:46:01<2:40:42,  2.28it/s]

step:1360, train_loss:0.1296805367951589, acc:0.3532843362425356


 50%|████▉     | 21771/43738 [2:46:01<2:30:57,  2.43it/s]

step:1360, train_loss:0.12967628340820925, acc:0.3533140416149924


 50%|████▉     | 21772/43738 [2:46:02<2:22:54,  2.56it/s]

step:1360, train_loss:0.12967131623848002, acc:0.35334374425868087


 50%|████▉     | 21773/43738 [2:46:02<2:38:09,  2.31it/s]

step:1360, train_loss:0.12967131043643326, acc:0.35332751573049187


 50%|████▉     | 21774/43738 [2:46:03<2:17:55,  2.65it/s]

step:1360, train_loss:0.12966582591648362, acc:0.35335721502709655


 50%|████▉     | 21775/43738 [2:46:03<2:06:12,  2.90it/s]

step:1360, train_loss:0.1296606969177923, acc:0.3533869115958668


 50%|█████     | 22080/43738 [2:48:19<2:54:30,  2.07it/s]

step:1380, train_loss:0.12923545149179036, acc:0.35466485507246376


 50%|█████     | 22081/43738 [2:48:19<2:54:32,  2.07it/s]

step:1380, train_loss:0.12922968023862963, acc:0.35469408088401794


 50%|█████     | 22082/43738 [2:48:19<2:32:27,  2.37it/s]

step:1380, train_loss:0.12922410267634174, acc:0.35472330404854635


 50%|█████     | 22083/43738 [2:48:20<2:36:23,  2.31it/s]

step:1380, train_loss:0.1292219036685137, acc:0.35475252456640854


 50%|█████     | 22084/43738 [2:48:20<2:13:23,  2.71it/s]

step:1380, train_loss:0.12922215178448085, acc:0.3547364607860895


 50%|█████     | 22085/43738 [2:48:21<2:38:32,  2.28it/s]

step:1380, train_loss:0.1292168742325561, acc:0.3547656780620331


 50%|█████     | 22086/43738 [2:48:21<2:58:56,  2.02it/s]

step:1380, train_loss:0.12921492005743956, acc:0.3547496151408132


 50%|█████     | 22087/43738 [2:48:22<3:24:12,  1.77it/s]

step:1380, train_loss:0.1292118140970348, acc:0.3547335536741069


 51%|█████     | 22088/43738 [2:48:23<3:16:49,  1.83it/s]

step:1380, train_loss:0.12921327917905537, acc:0.3547174936617168


 51%|█████     | 22089/43738 [2:48:23<3:24:17,  1.77it/s]

step:1380, train_loss:0.12921378816789947, acc:0.35470143510344515


 51%|█████     | 22090/43738 [2:48:24<3:22:40,  1.78it/s]

step:1380, train_loss:0.12920833920278768, acc:0.35473064735174287


 51%|█████     | 22091/43738 [2:48:24<3:28:04,  1.73it/s]

step:1380, train_loss:0.1292061009757537, acc:0.3547598569553212


 51%|█████     | 22092/43738 [2:48:25<3:09:41,  1.90it/s]

step:1380, train_loss:0.1292033085747978, acc:0.3547437986601485


 51%|█████     | 22093/43738 [2:48:25<2:49:29,  2.13it/s]

step:1380, train_loss:0.12921873201163275, acc:0.3547277418186756


 51%|█████     | 22094/43738 [2:48:25<2:23:49,  2.51it/s]

step:1380, train_loss:0.1292130526267911, acc:0.35475694758758036


 51%|█████     | 22095/43738 [2:48:26<2:16:13,  2.65it/s]

step:1380, train_loss:0.12920792197680517, acc:0.354786150712831


 51%|█████     | 22400/43738 [2:50:37<2:09:50,  2.74it/s]

step:1400, train_loss:0.12873739796512934, acc:0.35625


 51%|█████     | 22401/43738 [2:50:38<2:14:46,  2.64it/s]

step:1400, train_loss:0.1287393072318473, acc:0.356234096692112


 51%|█████     | 22402/43738 [2:50:38<2:14:03,  2.65it/s]

step:1400, train_loss:0.12873371708310258, acc:0.35626283367556466


 51%|█████     | 22403/43738 [2:50:39<2:56:54,  2.01it/s]

step:1400, train_loss:0.12874089616796672, acc:0.35624693121456946


 51%|█████     | 22404/43738 [2:50:39<2:50:18,  2.09it/s]

step:1400, train_loss:0.12873649976120555, acc:0.35627566505981073


 51%|█████     | 22405/43738 [2:50:40<2:41:43,  2.20it/s]

step:1400, train_loss:0.12873594453665396, acc:0.35625976344565946


 51%|█████     | 22406/43738 [2:50:40<2:48:04,  2.12it/s]

step:1400, train_loss:0.12873390429595027, acc:0.35624386325091495


 51%|█████     | 22407/43738 [2:50:41<3:01:27,  1.96it/s]

step:1400, train_loss:0.12874208182599145, acc:0.3562279644753872


 51%|█████     | 22408/43738 [2:50:41<2:52:54,  2.06it/s]

step:1400, train_loss:0.12873975590032244, acc:0.3562120671188861


 51%|█████     | 22409/43738 [2:50:42<3:02:50,  1.94it/s]

step:1400, train_loss:0.12873981606634904, acc:0.35624079610870635


 51%|█████     | 22410/43738 [2:50:42<3:34:18,  1.66it/s]

step:1400, train_loss:0.12874283226962668, acc:0.3562248995983936


 51%|█████     | 22411/43738 [2:50:43<2:58:10,  1.99it/s]

step:1400, train_loss:0.12873972529292754, acc:0.35620900450671544


 51%|█████     | 22412/43738 [2:50:43<2:34:12,  2.30it/s]

step:1400, train_loss:0.12873772973096556, acc:0.35619311083348204


 51%|█████     | 22413/43738 [2:50:43<2:31:31,  2.35it/s]

step:1400, train_loss:0.12873615864156782, acc:0.35617721857850354


 51%|█████     | 22414/43738 [2:50:44<2:54:43,  2.03it/s]

step:1400, train_loss:0.12873668848484213, acc:0.3561613277415901


 51%|█████     | 22415/43738 [2:50:44<2:31:06,  2.35it/s]

step:1400, train_loss:0.1287310377598172, acc:0.35619005130492976


 52%|█████▏    | 22720/43738 [2:53:06<2:04:54,  2.80it/s]

step:1420, train_loss:0.12832450429691913, acc:0.3576144366197183


 52%|█████▏    | 22721/43738 [2:53:06<2:09:07,  2.71it/s]

step:1420, train_loss:0.1283207265595919, acc:0.357642709387791


 52%|█████▏    | 22722/43738 [2:53:07<2:14:23,  2.61it/s]

step:1420, train_loss:0.1283214729350376, acc:0.357626969456914


 52%|█████▏    | 22723/43738 [2:53:07<2:20:55,  2.49it/s]

step:1420, train_loss:0.1283184398222839, acc:0.3576552391849668


 52%|█████▏    | 22724/43738 [2:53:08<2:15:40,  2.58it/s]

step:1420, train_loss:0.12831663697212484, acc:0.35763950008801265


 52%|█████▏    | 22725/43738 [2:53:08<2:07:50,  2.74it/s]

step:1420, train_loss:0.1283177391946323, acc:0.3576237623762376


 52%|█████▏    | 22726/43738 [2:53:08<2:13:33,  2.62it/s]

step:1420, train_loss:0.12831677674773204, acc:0.3576080260494588


 52%|█████▏    | 22727/43738 [2:53:09<2:30:14,  2.33it/s]

step:1420, train_loss:0.12832104549399456, acc:0.3575922911074933


 52%|█████▏    | 22728/43738 [2:53:09<2:11:20,  2.67it/s]

step:1420, train_loss:0.1283182612095404, acc:0.35762055614220345


 52%|█████▏    | 22729/43738 [2:53:10<2:07:42,  2.74it/s]

step:1420, train_loss:0.12831442129146434, acc:0.3576488186897796


 52%|█████▏    | 22730/43738 [2:53:10<2:45:44,  2.11it/s]

step:1420, train_loss:0.1283126814258208, acc:0.35767707875054994


 52%|█████▏    | 22731/43738 [2:53:11<2:20:39,  2.49it/s]

step:1420, train_loss:0.12830976713108116, acc:0.3576613435396595


 52%|█████▏    | 22732/43738 [2:53:11<2:04:54,  2.80it/s]

step:1420, train_loss:0.12830507713581665, acc:0.35768960056308285


 52%|█████▏    | 22733/43738 [2:53:11<2:07:42,  2.74it/s]

step:1420, train_loss:0.12830257414977764, acc:0.3576738661857212


 52%|█████▏    | 22734/43738 [2:53:12<2:07:24,  2.75it/s]

step:1420, train_loss:0.1282994042666053, acc:0.357702120172429


 52%|█████▏    | 22735/43738 [2:53:12<2:04:18,  2.82it/s]

step:1420, train_loss:0.12829584842923653, acc:0.3576863866285463


 53%|█████▎    | 23040/43738 [2:55:27<3:13:19,  1.78it/s]

step:1440, train_loss:0.1278820749550205, acc:0.3589409722222222


 53%|█████▎    | 23041/43738 [2:55:28<3:03:24,  1.88it/s]

step:1440, train_loss:0.12787737711139158, acc:0.35896879475717197


 53%|█████▎    | 23042/43738 [2:55:28<2:42:08,  2.13it/s]

step:1440, train_loss:0.12787487467244454, acc:0.3589532158666782


 53%|█████▎    | 23043/43738 [2:55:28<2:21:39,  2.43it/s]

step:1440, train_loss:0.12786941441144098, acc:0.35898103545545285


 53%|█████▎    | 23044/43738 [2:55:29<2:34:01,  2.24it/s]

step:1440, train_loss:0.1278639182657356, acc:0.3590088526297518


 53%|█████▎    | 23045/43738 [2:55:29<2:29:10,  2.31it/s]

step:1440, train_loss:0.12786270437052313, acc:0.35899327402907355


 53%|█████▎    | 23046/43738 [2:55:30<2:12:25,  2.60it/s]

step:1440, train_loss:0.1278581513759785, acc:0.35902108825826606


 53%|█████▎    | 23047/43738 [2:55:30<2:32:12,  2.27it/s]

step:1440, train_loss:0.12785344009633837, acc:0.35904890007376233


 53%|█████▎    | 23048/43738 [2:55:31<2:47:22,  2.06it/s]

step:1440, train_loss:0.1278512824426423, acc:0.3590333217632766


 53%|█████▎    | 23049/43738 [2:55:31<2:59:10,  1.92it/s]

step:1440, train_loss:0.12785752424915955, acc:0.35901774480454685


 53%|█████▎    | 23050/43738 [2:55:32<3:05:14,  1.86it/s]

step:1440, train_loss:0.1278547794893973, acc:0.3590455531453362


 53%|█████▎    | 23051/43738 [2:55:33<3:11:50,  1.80it/s]

step:1440, train_loss:0.12784996126734577, acc:0.3590733590733591


 53%|█████▎    | 23052/43738 [2:55:33<2:42:41,  2.12it/s]

step:1440, train_loss:0.1278506279452051, acc:0.3590577824049974


 53%|█████▎    | 23053/43738 [2:55:33<2:40:36,  2.15it/s]

step:1440, train_loss:0.12784551554741705, acc:0.3590855853901878


 53%|█████▎    | 23054/43738 [2:55:34<3:15:14,  1.77it/s]

step:1440, train_loss:0.12784086337482636, acc:0.3591133859633903


 53%|█████▎    | 23055/43738 [2:55:34<2:52:12,  2.00it/s]

step:1440, train_loss:0.12783531922758395, acc:0.3591411841249187


 53%|█████▎    | 23360/43738 [2:57:53<2:06:57,  2.68it/s]

step:1460, train_loss:0.12750407258153437, acc:0.3605308219178082


 53%|█████▎    | 23361/43738 [2:57:54<2:10:41,  2.60it/s]

step:1460, train_loss:0.1274994995774583, acc:0.3605581952827362


 53%|█████▎    | 23362/43738 [2:57:54<2:10:01,  2.61it/s]

step:1460, train_loss:0.1274967053524772, acc:0.36058556630425476


 53%|█████▎    | 23363/43738 [2:57:55<2:14:08,  2.53it/s]

step:1460, train_loss:0.1274962115278422, acc:0.3606129349826649


 53%|█████▎    | 23364/43738 [2:57:55<2:01:12,  2.80it/s]

step:1460, train_loss:0.12749080014639597, acc:0.3606403013182674


 53%|█████▎    | 23365/43738 [2:57:55<2:03:41,  2.75it/s]

step:1460, train_loss:0.12749235355927546, acc:0.36062486625294243


 53%|█████▎    | 23366/43738 [2:57:56<2:29:42,  2.27it/s]

step:1460, train_loss:0.1274906809335756, acc:0.3606094325087734


 53%|█████▎    | 23367/43738 [2:57:56<2:17:37,  2.47it/s]

step:1460, train_loss:0.1274916467294621, acc:0.3605940000855908


 53%|█████▎    | 23368/43738 [2:57:57<2:32:15,  2.23it/s]

step:1460, train_loss:0.12749652821954047, acc:0.36057856898322493


 53%|█████▎    | 23369/43738 [2:57:57<2:53:10,  1.96it/s]

step:1460, train_loss:0.1275023773336115, acc:0.3605631392015063


 53%|█████▎    | 23370/43738 [2:57:58<3:23:52,  1.67it/s]

step:1460, train_loss:0.1275040737152767, acc:0.3605477107402653


 53%|█████▎    | 23371/43738 [2:57:59<3:43:39,  1.52it/s]

step:1460, train_loss:0.12750601285772328, acc:0.3605322835993325


 53%|█████▎    | 23372/43738 [2:57:59<3:05:57,  1.83it/s]

step:1460, train_loss:0.12750086350113848, acc:0.36055964401848367


 53%|█████▎    | 23373/43738 [2:58:00<3:04:48,  1.84it/s]

step:1460, train_loss:0.12749570778950775, acc:0.36058700209643607


 53%|█████▎    | 23374/43738 [2:58:00<2:57:27,  1.91it/s]

step:1460, train_loss:0.127490258335207, acc:0.3606143578334902


 53%|█████▎    | 23375/43738 [2:58:01<2:29:31,  2.27it/s]

step:1460, train_loss:0.12748482710234155, acc:0.36064171122994654


 54%|█████▍    | 23680/43738 [3:00:16<2:53:12,  1.93it/s]

step:1480, train_loss:0.1271676517154966, acc:0.36186655405405405


 54%|█████▍    | 23681/43738 [3:00:17<2:45:12,  2.02it/s]

step:1480, train_loss:0.1271640312948373, acc:0.36189350111904056


 54%|█████▍    | 23682/43738 [3:00:17<2:40:26,  2.08it/s]

step:1480, train_loss:0.12715887518180108, acc:0.36192044590828476


 54%|█████▍    | 23683/43738 [3:00:17<2:16:51,  2.44it/s]

step:1480, train_loss:0.127153760068259, acc:0.3619473884220749


 54%|█████▍    | 23684/43738 [3:00:18<2:14:27,  2.49it/s]

step:1480, train_loss:0.12715186462193717, acc:0.361932106063165


 54%|█████▍    | 23685/43738 [3:00:18<2:01:14,  2.76it/s]

step:1480, train_loss:0.12714666929437618, acc:0.3619590458095841


 54%|█████▍    | 23686/43738 [3:00:18<1:51:37,  2.99it/s]

step:1480, train_loss:0.1271441844879766, acc:0.36194376424892344


 54%|█████▍    | 23687/43738 [3:00:19<1:41:26,  3.29it/s]

step:1480, train_loss:0.12714027070098652, acc:0.361970701228522


 54%|█████▍    | 23688/43738 [3:00:19<2:02:05,  2.74it/s]

step:1480, train_loss:0.12713522566203606, acc:0.36199763593380613


 54%|█████▍    | 23689/43738 [3:00:19<1:58:07,  2.83it/s]

step:1480, train_loss:0.1271339319032219, acc:0.36198235467938705


 54%|█████▍    | 23690/43738 [3:00:20<2:18:05,  2.42it/s]

step:1480, train_loss:0.12713296946008737, acc:0.36200928661882653


 54%|█████▍    | 23691/43738 [3:00:20<2:24:20,  2.31it/s]

step:1480, train_loss:0.12713618624839695, acc:0.3619940061626778


 54%|█████▍    | 23692/43738 [3:00:21<3:02:28,  1.83it/s]

step:1480, train_loss:0.12713267801551684, acc:0.36202093533682256


 54%|█████▍    | 23693/43738 [3:00:22<3:21:33,  1.66it/s]

step:1480, train_loss:0.12713810194410463, acc:0.3620056556788925


 54%|█████▍    | 23694/43738 [3:00:23<3:09:41,  1.76it/s]

step:1480, train_loss:0.12713984739899017, acc:0.3619903773107116


 54%|█████▍    | 23695/43738 [3:00:23<3:05:46,  1.80it/s]

step:1480, train_loss:0.12713479737456532, acc:0.3620173032285292


 55%|█████▍    | 24000/43738 [3:02:43<2:26:02,  2.25it/s]

step:1500, train_loss:0.12681225420376996, acc:0.36320833333333336


 55%|█████▍    | 24001/43738 [3:02:44<3:01:19,  1.81it/s]

step:1500, train_loss:0.12680775931060792, acc:0.36323486521394943


 55%|█████▍    | 24002/43738 [3:02:44<2:39:35,  2.06it/s]

step:1500, train_loss:0.12680265042515906, acc:0.3632613948837597


 55%|█████▍    | 24003/43738 [3:02:45<2:18:30,  2.37it/s]

step:1500, train_loss:0.1267974580382682, acc:0.36328792234304047


 55%|█████▍    | 24004/43738 [3:02:45<2:00:25,  2.73it/s]

step:1500, train_loss:0.1267947809200279, acc:0.36327278786868855


 55%|█████▍    | 24005/43738 [3:02:45<2:03:45,  2.66it/s]

step:1500, train_loss:0.12679181359868863, acc:0.36325765465528015


 55%|█████▍    | 24006/43738 [3:02:46<2:09:04,  2.55it/s]

step:1500, train_loss:0.12678847633350096, acc:0.36328417895526116


 55%|█████▍    | 24007/43738 [3:02:46<1:53:20,  2.90it/s]

step:1500, train_loss:0.12678645077142145, acc:0.3633107010455284


 55%|█████▍    | 24008/43738 [3:02:46<2:08:22,  2.56it/s]

step:1500, train_loss:0.12678489210879254, acc:0.36329556814395203


 55%|█████▍    | 24009/43738 [3:02:47<2:11:49,  2.49it/s]

step:1500, train_loss:0.12678494794108208, acc:0.36328043650297803


 55%|█████▍    | 24010/43738 [3:02:47<2:04:03,  2.65it/s]

step:1500, train_loss:0.12678138434183234, acc:0.36330695543523533


 55%|█████▍    | 24011/43738 [3:02:48<2:14:09,  2.45it/s]

step:1500, train_loss:0.12678088742917173, acc:0.36329182458040066


 55%|█████▍    | 24012/43738 [3:02:48<2:11:26,  2.50it/s]

step:1500, train_loss:0.1267778015810544, acc:0.3632766949858404


 55%|█████▍    | 24013/43738 [3:02:48<1:58:18,  2.78it/s]

step:1500, train_loss:0.12677611818624157, acc:0.36326156665139714


 55%|█████▍    | 24014/43738 [3:02:49<1:54:58,  2.86it/s]

step:1500, train_loss:0.12677084932746235, acc:0.36328808195219453


 55%|█████▍    | 24015/43738 [3:02:49<2:22:29,  2.31it/s]

step:1500, train_loss:0.12676822093656046, acc:0.3633145950447637


 56%|█████▌    | 24320/43738 [3:05:07<1:48:53,  2.97it/s]

step:1520, train_loss:0.126395125016849, acc:0.3652138157894737


 56%|█████▌    | 24321/43738 [3:05:07<1:46:57,  3.03it/s]

step:1520, train_loss:0.12639665135119307, acc:0.3651987993914724


 56%|█████▌    | 24322/43738 [3:05:08<1:58:25,  2.73it/s]

step:1520, train_loss:0.12639515547716404, acc:0.36522489926815227


 56%|█████▌    | 24323/43738 [3:05:08<2:28:59,  2.17it/s]

step:1520, train_loss:0.12639841805200838, acc:0.3652098836492209


 56%|█████▌    | 24324/43738 [3:05:09<2:26:30,  2.21it/s]

step:1520, train_loss:0.12639573653325537, acc:0.3652359809241901


 56%|█████▌    | 24325/43738 [3:05:09<2:27:57,  2.19it/s]

step:1520, train_loss:0.12639973293568296, acc:0.36522096608427546


 56%|█████▌    | 24326/43738 [3:05:10<2:37:56,  2.05it/s]

step:1520, train_loss:0.12639835736328936, acc:0.36520595247882925


 56%|█████▌    | 24327/43738 [3:05:10<2:35:55,  2.07it/s]

step:1520, train_loss:0.1263949382734626, acc:0.36523204669708553


 56%|█████▌    | 24328/43738 [3:05:11<2:27:53,  2.19it/s]

step:1520, train_loss:0.1263907933307905, acc:0.3652581387701414


 56%|█████▌    | 24329/43738 [3:05:11<2:48:11,  1.92it/s]

step:1520, train_loss:0.12638630424248087, acc:0.3652842286982613


 56%|█████▌    | 24330/43738 [3:05:12<2:29:23,  2.17it/s]

step:1520, train_loss:0.12638998630268994, acc:0.36526921496095355


 56%|█████▌    | 24331/43738 [3:05:12<2:08:34,  2.52it/s]

step:1520, train_loss:0.12638506971541572, acc:0.3652953022892606


 56%|█████▌    | 24332/43738 [3:05:12<1:54:07,  2.83it/s]

step:1520, train_loss:0.12638002134309934, acc:0.3653213874732862


 56%|█████▌    | 24333/43738 [3:05:12<1:44:07,  3.11it/s]

step:1520, train_loss:0.12637487030588598, acc:0.3653474705132947


 56%|█████▌    | 24334/43738 [3:05:13<2:30:03,  2.16it/s]

step:1520, train_loss:0.1263777697237986, acc:0.36533245664502345


 56%|█████▌    | 24335/43738 [3:05:14<2:27:00,  2.20it/s]

step:1520, train_loss:0.1263824871341068, acc:0.3653174440106842


 56%|█████▋    | 24640/43738 [3:07:31<2:22:36,  2.23it/s]

step:1540, train_loss:0.12598289344408775, acc:0.3672077922077922


 56%|█████▋    | 24641/43738 [3:07:32<2:03:55,  2.57it/s]

step:1540, train_loss:0.12598067905487723, acc:0.3671928898989489


 56%|█████▋    | 24642/43738 [3:07:32<2:28:23,  2.14it/s]

step:1540, train_loss:0.12597742246707846, acc:0.3672185699212726


 56%|█████▋    | 24643/43738 [3:07:33<2:21:24,  2.25it/s]

step:1540, train_loss:0.12598002099986536, acc:0.3672036683845311


 56%|█████▋    | 24644/43738 [3:07:33<2:28:35,  2.14it/s]

step:1540, train_loss:0.1259754714873675, acc:0.36722934588540823


 56%|█████▋    | 24645/43738 [3:07:33<2:10:06,  2.45it/s]

step:1540, train_loss:0.1259748447130364, acc:0.36721444512071416


 56%|█████▋    | 24646/43738 [3:07:34<2:15:11,  2.35it/s]

step:1540, train_loss:0.12596978780771675, acc:0.36724012010062485


 56%|█████▋    | 24647/43738 [3:07:34<2:00:36,  2.64it/s]

step:1540, train_loss:0.12596853101271935, acc:0.3672657929971193


 56%|█████▋    | 24648/43738 [3:07:35<2:14:39,  2.36it/s]

step:1540, train_loss:0.1259656007921304, acc:0.36729146381045114


 56%|█████▋    | 24649/43738 [3:07:35<1:57:56,  2.70it/s]

step:1540, train_loss:0.12596053626420306, acc:0.3673171325408739


 56%|█████▋    | 24650/43738 [3:07:35<1:53:43,  2.80it/s]

step:1540, train_loss:0.12595709968526436, acc:0.367342799188641


 56%|█████▋    | 24651/43738 [3:07:36<1:50:30,  2.88it/s]

step:1540, train_loss:0.1259541239522849, acc:0.3673278974483794


 56%|█████▋    | 24652/43738 [3:07:36<1:57:03,  2.72it/s]

step:1540, train_loss:0.1259507061334275, acc:0.367353561577154


 56%|█████▋    | 24653/43738 [3:07:37<2:12:23,  2.40it/s]

step:1540, train_loss:0.12595208310709732, acc:0.36733866060925646


 56%|█████▋    | 24654/43738 [3:07:37<2:03:00,  2.59it/s]

step:1540, train_loss:0.12594765108675982, acc:0.36736432221951815


 56%|█████▋    | 24655/43738 [3:07:37<2:14:38,  2.36it/s]

step:1540, train_loss:0.12594303372562568, acc:0.3673899817481241


 57%|█████▋    | 24960/43738 [3:09:58<2:26:41,  2.13it/s]

step:1560, train_loss:0.1257017364515217, acc:0.3683894230769231


 57%|█████▋    | 24961/43738 [3:09:59<2:54:59,  1.79it/s]

step:1560, train_loss:0.12569882222308956, acc:0.36841472697407956


 57%|█████▋    | 24962/43738 [3:09:59<2:34:41,  2.02it/s]

step:1560, train_loss:0.12569908205072158, acc:0.36839996795128593


 57%|█████▋    | 24963/43738 [3:09:59<2:16:13,  2.30it/s]

step:1560, train_loss:0.12570082295583587, acc:0.3683852101109642


 57%|█████▋    | 24964/43738 [3:09:59<2:04:40,  2.51it/s]

step:1560, train_loss:0.12569902359261112, acc:0.36837045345297226


 57%|█████▋    | 24965/43738 [3:10:00<2:33:36,  2.04it/s]

step:1560, train_loss:0.12570151212134112, acc:0.36835569797716805


 57%|█████▋    | 24966/43738 [3:10:01<2:42:46,  1.92it/s]

step:1560, train_loss:0.12569882711146088, acc:0.3683809981574942


 57%|█████▋    | 24967/43738 [3:10:01<2:16:17,  2.30it/s]

step:1560, train_loss:0.12569536313699367, acc:0.3684062963111307


 57%|█████▋    | 24968/43738 [3:10:01<2:14:50,  2.32it/s]

step:1560, train_loss:0.12569484997197053, acc:0.3684315924383211


 57%|█████▋    | 24969/43738 [3:10:02<1:58:49,  2.63it/s]

step:1560, train_loss:0.1257004132758971, acc:0.36841683687772836


 57%|█████▋    | 24970/43738 [3:10:02<2:08:13,  2.44it/s]

step:1560, train_loss:0.12569700105592838, acc:0.368442130556668


 57%|█████▋    | 24971/43738 [3:10:03<2:25:32,  2.15it/s]

step:1560, train_loss:0.1256969567140988, acc:0.3684273757558768


 57%|█████▋    | 24972/43738 [3:10:03<2:12:06,  2.37it/s]

step:1560, train_loss:0.12569347141414206, acc:0.36845266698702545


 57%|█████▋    | 24973/43738 [3:10:04<2:29:43,  2.09it/s]

step:1560, train_loss:0.1256995502584618, acc:0.36843791294598166


 57%|█████▋    | 24974/43738 [3:10:04<2:09:20,  2.42it/s]

step:1560, train_loss:0.12569947067619935, acc:0.36842316008648995


 57%|█████▋    | 24975/43738 [3:10:04<2:05:44,  2.49it/s]

step:1560, train_loss:0.12570010993829225, acc:0.3684084084084084


 58%|█████▊    | 25280/43738 [3:12:20<2:34:34,  1.99it/s]

step:1580, train_loss:0.12541693024555783, acc:0.36954113924050636


 58%|█████▊    | 25281/43738 [3:12:20<2:22:35,  2.16it/s]

step:1580, train_loss:0.12541574426277197, acc:0.36952652189391244


 58%|█████▊    | 25282/43738 [3:12:20<2:08:24,  2.40it/s]

step:1580, train_loss:0.1254109345136057, acc:0.3695514595364291


 58%|█████▊    | 25283/43738 [3:12:21<2:12:40,  2.32it/s]

step:1580, train_loss:0.1254063461739902, acc:0.36957639520626506


 58%|█████▊    | 25284/43738 [3:12:21<2:35:31,  1.98it/s]

step:1580, train_loss:0.12540456305802491, acc:0.3696013289036545


 58%|█████▊    | 25285/43738 [3:12:22<2:42:17,  1.90it/s]

step:1580, train_loss:0.12540504950961903, acc:0.3695867114890251


 58%|█████▊    | 25286/43738 [3:12:23<2:41:22,  1.91it/s]

step:1580, train_loss:0.12540482732850136, acc:0.3695720952305624


 58%|█████▊    | 25287/43738 [3:12:23<2:37:29,  1.95it/s]

step:1580, train_loss:0.1254061943693487, acc:0.36955748012812906


 58%|█████▊    | 25288/43738 [3:12:23<2:32:51,  2.01it/s]

step:1580, train_loss:0.12540293932049845, acc:0.3695824106295476


 58%|█████▊    | 25289/43738 [3:12:24<2:47:45,  1.83it/s]

step:1580, train_loss:0.12540396395531228, acc:0.3695677962750603


 58%|█████▊    | 25290/43738 [3:12:24<2:29:04,  2.06it/s]

step:1580, train_loss:0.1254017286385074, acc:0.36959272439699487


 58%|█████▊    | 25291/43738 [3:12:25<2:25:24,  2.11it/s]

step:1580, train_loss:0.1254015758410351, acc:0.36957811079039976


 58%|█████▊    | 25292/43738 [3:12:25<2:06:21,  2.43it/s]

step:1580, train_loss:0.12539737056019148, acc:0.3696030365332912


 58%|█████▊    | 25293/43738 [3:12:26<2:07:00,  2.42it/s]

step:1580, train_loss:0.12540044012864923, acc:0.3695884236745345


 58%|█████▊    | 25294/43738 [3:12:26<2:27:28,  2.08it/s]

step:1580, train_loss:0.12539814609059338, acc:0.3696133470388234


 58%|█████▊    | 25295/43738 [3:12:26<2:06:43,  2.43it/s]

step:1580, train_loss:0.12539401809744055, acc:0.36963826843249653


 59%|█████▊    | 25600/43738 [3:14:45<2:16:33,  2.21it/s]

step:1600, train_loss:0.12513756858703373, acc:0.3708203125


 59%|█████▊    | 25601/43738 [3:14:46<2:25:12,  2.08it/s]

step:1600, train_loss:0.12514863771502774, acc:0.3708058278973478


 59%|█████▊    | 25602/43738 [3:14:46<2:11:10,  2.30it/s]

step:1600, train_loss:0.1251447093640132, acc:0.3708304038746973


 59%|█████▊    | 25603/43738 [3:14:47<2:15:32,  2.23it/s]

step:1600, train_loss:0.1251459872744665, acc:0.3708159200093739


 59%|█████▊    | 25604/43738 [3:14:47<2:03:37,  2.44it/s]

step:1600, train_loss:0.1251415736801639, acc:0.3708404936728636


 59%|█████▊    | 25605/43738 [3:14:48<2:38:03,  1.91it/s]

step:1600, train_loss:0.12514097861673953, acc:0.37082601054481545


 59%|█████▊    | 25606/43738 [3:14:48<2:27:07,  2.05it/s]

step:1600, train_loss:0.1251433006228499, acc:0.37081152854799654


 59%|█████▊    | 25607/43738 [3:14:48<2:07:28,  2.37it/s]

step:1600, train_loss:0.12514005302818806, acc:0.37083609950404184


 59%|█████▊    | 25608/43738 [3:14:49<2:41:24,  1.87it/s]

step:1600, train_loss:0.125138901810898, acc:0.3708606685410809


 59%|█████▊    | 25609/43738 [3:14:50<3:02:58,  1.65it/s]

step:1600, train_loss:0.12513801004017797, acc:0.3708461868874224


 59%|█████▊    | 25610/43738 [3:14:50<2:54:08,  1.73it/s]

step:1600, train_loss:0.12513599325313665, acc:0.3708317063647013


 59%|█████▊    | 25611/43738 [3:14:51<2:56:55,  1.71it/s]

step:1600, train_loss:0.1251347842416819, acc:0.37081722697278513


 59%|█████▊    | 25612/43738 [3:14:51<2:36:39,  1.93it/s]

step:1600, train_loss:0.12513268671199482, acc:0.37080274871154145


 59%|█████▊    | 25613/43738 [3:14:52<2:40:28,  1.88it/s]

step:1600, train_loss:0.12513386803818352, acc:0.3707882715808379


 59%|█████▊    | 25614/43738 [3:14:52<2:31:36,  1.99it/s]

step:1600, train_loss:0.12513171938444206, acc:0.37081283672991333


 59%|█████▊    | 25615/43738 [3:14:53<2:08:44,  2.35it/s]

step:1600, train_loss:0.12512687646369247, acc:0.3708373999609604


 59%|█████▉    | 25920/43738 [3:17:13<2:52:08,  1.73it/s]

step:1620, train_loss:0.12476668254492825, acc:0.3724922839506173


 59%|█████▉    | 25921/43738 [3:17:14<2:36:45,  1.89it/s]

step:1620, train_loss:0.12477662147266304, acc:0.3724779136607384


 59%|█████▉    | 25922/43738 [3:17:14<2:17:19,  2.16it/s]

step:1620, train_loss:0.12477746780692824, acc:0.3724635444795926


 59%|█████▉    | 25923/43738 [3:17:14<2:14:43,  2.20it/s]

step:1620, train_loss:0.12477317044142285, acc:0.3724877521891756


 59%|█████▉    | 25924/43738 [3:17:15<2:08:45,  2.31it/s]

step:1620, train_loss:0.12477497086213832, acc:0.3724733837370776


 59%|█████▉    | 25925/43738 [3:17:15<2:23:42,  2.07it/s]

step:1620, train_loss:0.124772872028526, acc:0.3724975891996143


 59%|█████▉    | 25926/43738 [3:17:16<2:12:39,  2.24it/s]

step:1620, train_loss:0.1247734067163418, acc:0.3724832214765101


 59%|█████▉    | 25927/43738 [3:17:16<2:08:04,  2.32it/s]

step:1620, train_loss:0.12477365267453824, acc:0.37246885486172715


 59%|█████▉    | 25928/43738 [3:17:16<1:54:47,  2.59it/s]

step:1620, train_loss:0.12476893945984568, acc:0.37249305769824126


 59%|█████▉    | 25929/43738 [3:17:17<2:17:38,  2.16it/s]

step:1620, train_loss:0.12476449648586163, acc:0.3725172586679008


 59%|█████▉    | 25930/43738 [3:17:17<2:00:13,  2.47it/s]

step:1620, train_loss:0.1247610178355682, acc:0.37254145777092174


 59%|█████▉    | 25931/43738 [3:17:17<1:45:10,  2.82it/s]

step:1620, train_loss:0.1247563104120522, acc:0.37256565500752


 59%|█████▉    | 25932/43738 [3:17:18<1:34:45,  3.13it/s]

step:1620, train_loss:0.12476087094928705, acc:0.37255128798395803


 59%|█████▉    | 25933/43738 [3:17:18<1:41:48,  2.91it/s]

step:1620, train_loss:0.12475737781191983, acc:0.3725754829753596


 59%|█████▉    | 25934/43738 [3:17:19<2:15:18,  2.19it/s]

step:1620, train_loss:0.12475290446833256, acc:0.37259967610087147


 59%|█████▉    | 25935/43738 [3:17:19<2:06:46,  2.34it/s]

step:1620, train_loss:0.1247492335842509, acc:0.37262386736070946


 60%|█████▉    | 26240/43738 [3:19:39<2:19:40,  2.09it/s]

step:1640, train_loss:0.12436461335226634, acc:0.37378048780487805


 60%|█████▉    | 26241/43738 [3:19:39<2:13:14,  2.19it/s]

step:1640, train_loss:0.12436910972673457, acc:0.3737662436644945


 60%|█████▉    | 26242/43738 [3:19:39<1:54:04,  2.56it/s]

step:1640, train_loss:0.12436528738601112, acc:0.3737901074613215


 60%|██████    | 26243/43738 [3:19:40<1:48:30,  2.69it/s]

step:1640, train_loss:0.12436059903816889, acc:0.3738139694394696


 60%|██████    | 26244/43738 [3:19:40<1:54:38,  2.54it/s]

step:1640, train_loss:0.12437694536527666, acc:0.3737997256515775


 60%|██████    | 26245/43738 [3:19:40<1:46:24,  2.74it/s]

step:1640, train_loss:0.12437445071500527, acc:0.3738235854448466


 60%|██████    | 26246/43738 [3:19:41<1:36:49,  3.01it/s]

step:1640, train_loss:0.12438234404954883, acc:0.3738093423759811


 60%|██████    | 26247/43738 [3:19:41<1:47:28,  2.71it/s]

step:1640, train_loss:0.12437895041934008, acc:0.37383319998476017


 60%|██████    | 26248/43738 [3:19:42<1:53:28,  2.57it/s]

step:1640, train_loss:0.12437661225009917, acc:0.3738570557756781


 60%|██████    | 26249/43738 [3:19:42<1:47:30,  2.71it/s]

step:1640, train_loss:0.12437226405303986, acc:0.37388090974894284


 60%|██████    | 26250/43738 [3:19:42<1:50:25,  2.64it/s]

step:1640, train_loss:0.12437896423877377, acc:0.3738666666666667


 60%|██████    | 26251/43738 [3:19:43<1:45:11,  2.77it/s]

step:1640, train_loss:0.12437497094498311, acc:0.3738905184564398


 60%|██████    | 26252/43738 [3:19:43<1:55:15,  2.53it/s]

step:1640, train_loss:0.12437154599180583, acc:0.37391436842907205


 60%|██████    | 26253/43738 [3:19:44<2:12:57,  2.19it/s]

step:1640, train_loss:0.12437456103532911, acc:0.37390012569992


 60%|██████    | 26254/43738 [3:19:44<2:07:47,  2.28it/s]

step:1640, train_loss:0.1243761140891871, acc:0.37388588405576295


 60%|██████    | 26255/43738 [3:19:44<2:00:44,  2.41it/s]

step:1640, train_loss:0.12437543914932023, acc:0.37387164349647684


 61%|██████    | 26560/43738 [3:22:04<3:13:08,  1.48it/s]

step:1660, train_loss:0.12406417978261289, acc:0.37484939759036146


 61%|██████    | 26561/43738 [3:22:04<2:38:08,  1.81it/s]

step:1660, train_loss:0.12406123719897987, acc:0.3748729340009789


 61%|██████    | 26562/43738 [3:22:05<2:26:12,  1.96it/s]

step:1660, train_loss:0.12405711119372825, acc:0.3748964686394097


 61%|██████    | 26563/43738 [3:22:05<2:28:25,  1.93it/s]

step:1660, train_loss:0.12405939205488924, acc:0.37488235515566765


 61%|██████    | 26564/43738 [3:22:06<2:41:50,  1.77it/s]

step:1660, train_loss:0.12406108439976438, acc:0.37486824273452796


 61%|██████    | 26565/43738 [3:22:06<2:26:47,  1.95it/s]

step:1660, train_loss:0.12405667081548907, acc:0.3748917748917749


 61%|██████    | 26566/43738 [3:22:07<3:01:14,  1.58it/s]

step:1660, train_loss:0.12405571403050056, acc:0.37487766317849885


 61%|██████    | 26567/43738 [3:22:07<2:42:55,  1.76it/s]

step:1660, train_loss:0.1240569660258502, acc:0.3748635525275718


 61%|██████    | 26568/43738 [3:22:08<2:30:55,  1.90it/s]

step:1660, train_loss:0.12405879878619495, acc:0.37484944293887384


 61%|██████    | 26569/43738 [3:22:09<2:36:13,  1.83it/s]

step:1660, train_loss:0.12405641875401037, acc:0.37487297226090555


 61%|██████    | 26570/43738 [3:22:09<2:26:44,  1.95it/s]

step:1660, train_loss:0.12406221975456413, acc:0.3748588633797516


 61%|██████    | 26571/43738 [3:22:09<2:02:54,  2.33it/s]

step:1660, train_loss:0.12405966892031646, acc:0.37484475556057356


 61%|██████    | 26572/43738 [3:22:10<1:53:33,  2.52it/s]

step:1660, train_loss:0.12405525238849598, acc:0.37486828240252895


 61%|██████    | 26573/43738 [3:22:10<1:55:05,  2.49it/s]

step:1660, train_loss:0.1240554219324192, acc:0.37485417529070864


 61%|██████    | 26574/43738 [3:22:10<2:03:26,  2.32it/s]

step:1660, train_loss:0.12405248257579216, acc:0.3748777000075261


 61%|██████    | 26575/43738 [3:22:11<2:00:21,  2.38it/s]

step:1660, train_loss:0.12405009191973917, acc:0.37490122295390405


 61%|██████▏   | 26880/43738 [3:24:27<2:16:10,  2.06it/s]

step:1680, train_loss:0.1236815971311793, acc:0.3765625


 61%|██████▏   | 26881/43738 [3:24:27<2:28:12,  1.90it/s]

step:1680, train_loss:0.12368223275210823, acc:0.37658569249655893


 61%|██████▏   | 26882/43738 [3:24:28<2:20:16,  2.00it/s]

step:1680, train_loss:0.12367954808101653, acc:0.376608883267614


 61%|██████▏   | 26883/43738 [3:24:28<2:11:10,  2.14it/s]

step:1680, train_loss:0.12368046283484366, acc:0.3765948740839936


 61%|██████▏   | 26884/43738 [3:24:29<2:02:15,  2.30it/s]

step:1680, train_loss:0.12367861259613616, acc:0.37661806278827553


 61%|██████▏   | 26885/43738 [3:24:29<1:55:49,  2.42it/s]

step:1680, train_loss:0.12367953663374594, acc:0.37660405430537475


 61%|██████▏   | 26886/43738 [3:24:29<1:52:54,  2.49it/s]

step:1680, train_loss:0.12367551322256544, acc:0.37662724094324185


 61%|██████▏   | 26887/43738 [3:24:30<1:38:16,  2.86it/s]

step:1680, train_loss:0.1236748287483381, acc:0.3766132331610072


 61%|██████▏   | 26888/43738 [3:24:30<1:42:18,  2.74it/s]

step:1680, train_loss:0.12367513201353496, acc:0.37659922642070814


 61%|██████▏   | 26889/43738 [3:24:30<1:57:52,  2.38it/s]

step:1680, train_loss:0.1236771210109248, acc:0.37658522072222844


 61%|██████▏   | 26890/43738 [3:24:31<2:18:15,  2.03it/s]

step:1680, train_loss:0.12367289380302736, acc:0.3766084046113797


 61%|██████▏   | 26891/43738 [3:24:32<2:28:07,  1.90it/s]

step:1680, train_loss:0.12367163744501013, acc:0.3765943996132535


 61%|██████▏   | 26892/43738 [3:24:32<2:16:58,  2.05it/s]

step:1680, train_loss:0.1236730379152861, acc:0.3765803956567009


 61%|██████▏   | 26893/43738 [3:24:33<2:37:22,  1.78it/s]

step:1680, train_loss:0.1236712688316307, acc:0.3766035771390325


 61%|██████▏   | 26894/43738 [3:24:33<2:36:32,  1.79it/s]

step:1680, train_loss:0.1236702248572633, acc:0.37662675689744923


 61%|██████▏   | 26895/43738 [3:24:34<2:29:47,  1.87it/s]

step:1680, train_loss:0.1236670473764502, acc:0.37664993493214355


 62%|██████▏   | 27200/43738 [3:26:51<1:55:07,  2.39it/s]

step:1700, train_loss:0.12340219765179132, acc:0.37768382352941177


 62%|██████▏   | 27201/43738 [3:26:51<1:56:48,  2.36it/s]

step:1700, train_loss:0.12339859231427946, acc:0.3777067019594868


 62%|██████▏   | 27202/43738 [3:26:52<1:42:22,  2.69it/s]

step:1700, train_loss:0.12339514934935612, acc:0.377729578707448


 62%|██████▏   | 27203/43738 [3:26:52<1:35:18,  2.89it/s]

step:1700, train_loss:0.12339161912864209, acc:0.3777524537734809


 62%|██████▏   | 27204/43738 [3:26:52<1:33:31,  2.95it/s]

step:1700, train_loss:0.1233878809346376, acc:0.3777753271577709


 62%|██████▏   | 27205/43738 [3:26:53<1:30:36,  3.04it/s]

step:1700, train_loss:0.12339133766196192, acc:0.37776144091159714


 62%|██████▏   | 27206/43738 [3:26:53<1:40:31,  2.74it/s]

step:1700, train_loss:0.12339130153160399, acc:0.3777475556862457


 62%|██████▏   | 27207/43738 [3:26:54<2:00:17,  2.29it/s]

step:1700, train_loss:0.12338953509685974, acc:0.37773367148160397


 62%|██████▏   | 27208/43738 [3:26:54<1:55:02,  2.39it/s]

step:1700, train_loss:0.12338595182324248, acc:0.3777565421934725


 62%|██████▏   | 27209/43738 [3:26:54<2:00:39,  2.28it/s]

step:1700, train_loss:0.12338159979255747, acc:0.37777941122422726


 62%|██████▏   | 27210/43738 [3:26:55<1:44:56,  2.62it/s]

step:1700, train_loss:0.12337732844397682, acc:0.37780227857405363


 62%|██████▏   | 27211/43738 [3:26:55<1:45:58,  2.60it/s]

step:1700, train_loss:0.12337450356399356, acc:0.37782514424313696


 62%|██████▏   | 27212/43738 [3:26:56<2:06:31,  2.18it/s]

step:1700, train_loss:0.12338153111699689, acc:0.3778112597383507


 62%|██████▏   | 27213/43738 [3:26:56<1:50:39,  2.49it/s]

step:1700, train_loss:0.12337733102603124, acc:0.37783412339690586


 62%|██████▏   | 27214/43738 [3:26:56<1:43:20,  2.66it/s]

step:1700, train_loss:0.12337356119157208, acc:0.37785698537517454


 62%|██████▏   | 27215/43738 [3:26:57<1:38:37,  2.79it/s]

step:1700, train_loss:0.123369780568885, acc:0.37787984567334193


 63%|██████▎   | 27520/43738 [3:29:18<2:17:22,  1.97it/s]

step:1720, train_loss:0.12302768055023382, acc:0.3793604651162791


 63%|██████▎   | 27521/43738 [3:29:19<2:08:45,  2.10it/s]

step:1720, train_loss:0.12302998037865555, acc:0.3793466807165437


 63%|██████▎   | 27522/43738 [3:29:19<2:03:21,  2.19it/s]

step:1720, train_loss:0.12302813205278418, acc:0.3793692318872175


 63%|██████▎   | 27523/43738 [3:29:19<1:46:14,  2.54it/s]

step:1720, train_loss:0.12302674538818774, acc:0.37935544817062095


 63%|██████▎   | 27524/43738 [3:29:20<2:18:20,  1.95it/s]

step:1720, train_loss:0.12302685113471634, acc:0.3793416654556024


 63%|██████▎   | 27525/43738 [3:29:21<2:43:41,  1.65it/s]

step:1720, train_loss:0.12303092308680838, acc:0.3793278837420527


 63%|██████▎   | 27526/43738 [3:29:22<2:35:39,  1.74it/s]

step:1720, train_loss:0.12302783732110673, acc:0.3793504323185352


 63%|██████▎   | 27527/43738 [3:29:22<2:11:03,  2.06it/s]

step:1720, train_loss:0.12302478083326876, acc:0.37937297925672975


 63%|██████▎   | 27528/43738 [3:29:22<2:17:09,  1.97it/s]

step:1720, train_loss:0.1230328581540176, acc:0.379359197907585


 63%|██████▎   | 27529/43738 [3:29:23<2:10:41,  2.07it/s]

step:1720, train_loss:0.12303156484032261, acc:0.37934541755966433


 63%|██████▎   | 27530/43738 [3:29:23<1:50:03,  2.45it/s]

step:1720, train_loss:0.1230271040881106, acc:0.37936796222302943


 63%|██████▎   | 27531/43738 [3:29:24<2:05:20,  2.16it/s]

step:1720, train_loss:0.12302703157941174, acc:0.37935418255784387


 63%|██████▎   | 27532/43738 [3:29:24<1:59:55,  2.25it/s]

step:1720, train_loss:0.12302736868309545, acc:0.37934040389365103


 63%|██████▎   | 27533/43738 [3:29:25<2:07:58,  2.11it/s]

step:1720, train_loss:0.12302689849392731, acc:0.37932662623034175


 63%|██████▎   | 27534/43738 [3:29:25<2:09:28,  2.09it/s]

step:1720, train_loss:0.12302504354321982, acc:0.3793128495678071


 63%|██████▎   | 27535/43738 [3:29:25<1:51:05,  2.43it/s]

step:1720, train_loss:0.12302825185878725, acc:0.3792990739059379


 64%|██████▎   | 27840/43738 [3:31:46<1:54:29,  2.31it/s]

step:1740, train_loss:0.12275931438633224, acc:0.3800646551724138


 64%|██████▎   | 27841/43738 [3:31:46<1:55:35,  2.29it/s]

step:1740, train_loss:0.12276121779297133, acc:0.3800510039150893


 64%|██████▎   | 27842/43738 [3:31:47<1:52:23,  2.36it/s]

step:1740, train_loss:0.12276057309437179, acc:0.38003735363838803


 64%|██████▎   | 27843/43738 [3:31:47<1:44:33,  2.53it/s]

step:1740, train_loss:0.12275686148455504, acc:0.38005962001221133


 64%|██████▎   | 27844/43738 [3:31:47<1:50:23,  2.40it/s]

step:1740, train_loss:0.12275876503001891, acc:0.3800459704065508


 64%|██████▎   | 27845/43738 [3:31:48<2:19:38,  1.90it/s]

step:1740, train_loss:0.12275591475160309, acc:0.3800682348716107


 64%|██████▎   | 27846/43738 [3:31:49<2:35:12,  1.71it/s]

step:1740, train_loss:0.1227538598166768, acc:0.38009049773755654


 64%|██████▎   | 27847/43738 [3:31:50<2:32:37,  1.74it/s]

step:1740, train_loss:0.12275258687154583, acc:0.38011275900456065


 64%|██████▎   | 27848/43738 [3:31:50<2:17:00,  1.93it/s]

step:1740, train_loss:0.12274825666467687, acc:0.3801350186727952


 64%|██████▎   | 27849/43738 [3:31:50<2:13:20,  1.99it/s]

step:1740, train_loss:0.12274405332525334, acc:0.3801572767424324


 64%|██████▎   | 27850/43738 [3:31:51<2:18:02,  1.92it/s]

step:1740, train_loss:0.12274285425450444, acc:0.3801436265709156


 64%|██████▎   | 27851/43738 [3:31:51<2:04:05,  2.13it/s]

step:1740, train_loss:0.12273934797745452, acc:0.3801658827331155


 64%|██████▎   | 27852/43738 [3:31:52<2:15:21,  1.96it/s]

step:1740, train_loss:0.12273655917221857, acc:0.380188137297142


 64%|██████▎   | 27853/43738 [3:31:52<1:53:02,  2.34it/s]

step:1740, train_loss:0.12273396050608144, acc:0.3801744874878828


 64%|██████▎   | 27854/43738 [3:31:53<2:20:00,  1.89it/s]

step:1740, train_loss:0.12273174215058964, acc:0.380196740145042


 64%|██████▎   | 27855/43738 [3:31:53<2:10:29,  2.03it/s]

step:1740, train_loss:0.12272773627883905, acc:0.38021899120445163


 64%|██████▍   | 28160/43738 [3:34:13<2:16:02,  1.91it/s]

step:1760, train_loss:0.12244636576010247, acc:0.38117897727272726


 64%|██████▍   | 28161/43738 [3:34:13<2:34:47,  1.68it/s]

step:1760, train_loss:0.12244340727161786, acc:0.38120095167075035


 64%|██████▍   | 28162/43738 [3:34:14<2:09:40,  2.00it/s]

step:1760, train_loss:0.12243906288364712, acc:0.38122292450820255


 64%|██████▍   | 28163/43738 [3:34:14<2:20:56,  1.84it/s]

step:1760, train_loss:0.12243756905458178, acc:0.38124489578525017


 64%|██████▍   | 28164/43738 [3:34:15<2:21:29,  1.83it/s]

step:1760, train_loss:0.12243362062639415, acc:0.38126686550205935


 64%|██████▍   | 28165/43738 [3:34:16<2:30:06,  1.73it/s]

step:1760, train_loss:0.12243296634600971, acc:0.3812533285993254


 64%|██████▍   | 28166/43738 [3:34:16<2:14:22,  1.93it/s]

step:1760, train_loss:0.12243269874976154, acc:0.3812397926578144


 64%|██████▍   | 28167/43738 [3:34:16<2:06:39,  2.05it/s]

step:1760, train_loss:0.12243306890221856, acc:0.38122625767742396


 64%|██████▍   | 28168/43738 [3:34:17<2:14:55,  1.92it/s]

step:1760, train_loss:0.12243304673501139, acc:0.3812127236580517


 64%|██████▍   | 28169/43738 [3:34:17<2:12:19,  1.96it/s]

step:1760, train_loss:0.12242978940714894, acc:0.38123469061734533


 64%|██████▍   | 28170/43738 [3:34:18<2:12:28,  1.96it/s]

step:1760, train_loss:0.12242571820409444, acc:0.3812566560170394


 64%|██████▍   | 28171/43738 [3:34:18<2:06:39,  2.05it/s]

step:1760, train_loss:0.12242188767900747, acc:0.38127861985730005


 64%|██████▍   | 28172/43738 [3:34:19<2:00:39,  2.15it/s]

step:1760, train_loss:0.12241780740191363, acc:0.3813005821382933


 64%|██████▍   | 28173/43738 [3:34:19<2:06:45,  2.05it/s]

step:1760, train_loss:0.12241368400961683, acc:0.3813225428601853


 64%|██████▍   | 28174/43738 [3:34:20<2:04:07,  2.09it/s]

step:1760, train_loss:0.12241235832285971, acc:0.38130900830552994


 64%|██████▍   | 28175/43738 [3:34:20<2:10:53,  1.98it/s]

step:1760, train_loss:0.12240831840237977, acc:0.3813309671694765


 65%|██████▌   | 28480/43738 [3:36:42<1:46:43,  2.38it/s]

step:1780, train_loss:0.12216562517961838, acc:0.3821629213483146


 65%|██████▌   | 28481/43738 [3:36:42<1:39:15,  2.56it/s]

step:1780, train_loss:0.12216190672889814, acc:0.38218461430427303


 65%|██████▌   | 28482/43738 [3:36:43<1:43:10,  2.46it/s]

step:1780, train_loss:0.12216012991804488, acc:0.3822063057369567


 65%|██████▌   | 28483/43738 [3:36:43<2:03:03,  2.07it/s]

step:1780, train_loss:0.1221630911043653, acc:0.38219288698521925


 65%|██████▌   | 28484/43738 [3:36:44<1:55:09,  2.21it/s]

step:1780, train_loss:0.12216233220488945, acc:0.3821794691756776


 65%|██████▌   | 28485/43738 [3:36:44<1:48:09,  2.35it/s]

step:1780, train_loss:0.12216123545073863, acc:0.3821660523082324


 65%|██████▌   | 28486/43738 [3:36:44<1:53:47,  2.23it/s]

step:1780, train_loss:0.12216181674054022, acc:0.3821526363827845


 65%|██████▌   | 28487/43738 [3:36:45<1:51:59,  2.27it/s]

step:1780, train_loss:0.12215975074099306, acc:0.3821743251307614


 65%|██████▌   | 28488/43738 [3:36:45<1:50:42,  2.30it/s]

step:1780, train_loss:0.12216052115786082, acc:0.3821609098567818


 65%|██████▌   | 28489/43738 [3:36:46<1:43:45,  2.45it/s]

step:1780, train_loss:0.12215789162017976, acc:0.38218259679174416


 65%|██████▌   | 28490/43738 [3:36:46<1:38:21,  2.58it/s]

step:1780, train_loss:0.12215506241448633, acc:0.3822042822042822


 65%|██████▌   | 28491/43738 [3:36:46<1:38:08,  2.59it/s]

step:1780, train_loss:0.12215562193712656, acc:0.3821908672914254


 65%|██████▌   | 28492/43738 [3:36:47<1:38:17,  2.59it/s]

step:1780, train_loss:0.12215235656248694, acc:0.3822125508914783


 65%|██████▌   | 28493/43738 [3:36:47<1:38:20,  2.58it/s]

step:1780, train_loss:0.12215025936256761, acc:0.38223423296950126


 65%|██████▌   | 28494/43738 [3:36:48<1:43:55,  2.44it/s]

step:1780, train_loss:0.1221466093185086, acc:0.3822559135256545


 65%|██████▌   | 28495/43738 [3:36:48<1:53:49,  2.23it/s]

step:1780, train_loss:0.12214619537675264, acc:0.38224249868397964


 66%|██████▌   | 28800/43738 [3:39:12<1:54:04,  2.18it/s]

step:1800, train_loss:0.12184709414823601, acc:0.3835763888888889


 66%|██████▌   | 28801/43738 [3:39:13<2:15:01,  1.84it/s]

step:1800, train_loss:0.12185070905887493, acc:0.38356307072671086


 66%|██████▌   | 28802/43738 [3:39:14<2:04:03,  2.01it/s]

step:1800, train_loss:0.12185543200852729, acc:0.383549753489341


 66%|██████▌   | 28803/43738 [3:39:14<1:57:53,  2.11it/s]

step:1800, train_loss:0.12185699750238278, acc:0.383536437176683


 66%|██████▌   | 28804/43738 [3:39:15<1:58:27,  2.10it/s]

step:1800, train_loss:0.12185822684576388, acc:0.38352312178864045


 66%|██████▌   | 28805/43738 [3:39:15<1:51:01,  2.24it/s]

step:1800, train_loss:0.12185539613873816, acc:0.38354452352022217


 66%|██████▌   | 28806/43738 [3:39:15<1:55:26,  2.16it/s]

step:1800, train_loss:0.12185122958908046, acc:0.3835659237658821


 66%|██████▌   | 28807/43738 [3:39:16<2:04:05,  2.01it/s]

step:1800, train_loss:0.12185048165362566, acc:0.383552608740931


 66%|██████▌   | 28808/43738 [3:39:16<1:59:14,  2.09it/s]

step:1800, train_loss:0.12185699194374265, acc:0.38353929464037767


 66%|██████▌   | 28809/43738 [3:39:17<1:53:29,  2.19it/s]

step:1800, train_loss:0.12185382370813233, acc:0.38356069283904337


 66%|██████▌   | 28810/43738 [3:39:17<1:52:54,  2.20it/s]

step:1800, train_loss:0.1218527540250004, acc:0.383547379382159


 66%|██████▌   | 28811/43738 [3:39:18<1:50:33,  2.25it/s]

step:1800, train_loss:0.12185338764791499, acc:0.3835340668494672


 66%|██████▌   | 28812/43738 [3:39:18<2:02:48,  2.03it/s]

step:1800, train_loss:0.12184948570840551, acc:0.38355546300152715


 66%|██████▌   | 28813/43738 [3:39:19<1:56:15,  2.14it/s]

step:1800, train_loss:0.12185247494786697, acc:0.38354215111234513


 66%|██████▌   | 28814/43738 [3:39:19<1:59:25,  2.08it/s]

step:1800, train_loss:0.12185273125768122, acc:0.3835288401471507


 66%|██████▌   | 28815/43738 [3:39:20<1:56:59,  2.13it/s]

step:1800, train_loss:0.12185144183206077, acc:0.38351553010584766


 67%|██████▋   | 29120/43738 [3:41:39<1:37:08,  2.51it/s]

step:1820, train_loss:0.12153906473840159, acc:0.38427197802197804


 67%|██████▋   | 29121/43738 [3:41:39<1:31:26,  2.66it/s]

step:1820, train_loss:0.12154222871193794, acc:0.38425878232203564


 67%|██████▋   | 29122/43738 [3:41:39<1:28:54,  2.74it/s]

step:1820, train_loss:0.121542931848574, acc:0.3842455875283291


 67%|██████▋   | 29123/43738 [3:41:40<1:48:36,  2.24it/s]

step:1820, train_loss:0.1215423575984151, acc:0.38423239364076506


 67%|██████▋   | 29124/43738 [3:41:40<1:43:49,  2.35it/s]

step:1820, train_loss:0.12154234065960232, acc:0.3842192006592501


 67%|██████▋   | 29125/43738 [3:41:41<1:29:54,  2.71it/s]

step:1820, train_loss:0.12153998666117992, acc:0.38420600858369097


 67%|██████▋   | 29126/43738 [3:41:41<1:22:30,  2.95it/s]

step:1820, train_loss:0.1215411712551807, acc:0.38419281741399436


 67%|██████▋   | 29127/43738 [3:41:41<1:32:49,  2.62it/s]

step:1820, train_loss:0.12154020915854803, acc:0.3841796271500669


 67%|██████▋   | 29128/43738 [3:41:42<1:23:03,  2.93it/s]

step:1820, train_loss:0.12153735233953573, acc:0.3842007690195001


 67%|██████▋   | 29129/43738 [3:41:42<1:27:10,  2.79it/s]

step:1820, train_loss:0.12153851995867229, acc:0.3841875793882385


 67%|██████▋   | 29130/43738 [3:41:43<1:58:34,  2.05it/s]

step:1820, train_loss:0.12155091197302602, acc:0.3841743906625472


 67%|██████▋   | 29131/43738 [3:41:43<1:47:42,  2.26it/s]

step:1820, train_loss:0.12155390271622261, acc:0.3841612028423329


 67%|██████▋   | 29132/43738 [3:41:43<1:32:22,  2.64it/s]

step:1820, train_loss:0.12155482616145476, acc:0.3841480159275024


 67%|██████▋   | 29133/43738 [3:41:44<1:38:42,  2.47it/s]

step:1820, train_loss:0.12155181963912516, acc:0.3841691552534926


 67%|██████▋   | 29134/43738 [3:41:44<1:49:50,  2.22it/s]

step:1820, train_loss:0.12155358372428246, acc:0.38415596897096177


 67%|██████▋   | 29135/43738 [3:41:45<1:36:19,  2.53it/s]

step:1820, train_loss:0.12154949112641164, acc:0.38417710657285054


 67%|██████▋   | 29440/43738 [3:44:03<1:40:16,  2.38it/s]

step:1840, train_loss:0.12122613263268471, acc:0.38559782608695653


 67%|██████▋   | 29441/43738 [3:44:04<1:44:52,  2.27it/s]

step:1840, train_loss:0.12122306129081166, acc:0.38561869501715296


 67%|██████▋   | 29442/43738 [3:44:05<2:09:12,  1.84it/s]

step:1840, train_loss:0.12122213823218467, acc:0.3856055974458257


 67%|██████▋   | 29443/43738 [3:44:05<1:56:08,  2.05it/s]

step:1840, train_loss:0.1212181245077828, acc:0.38562646469449446


 67%|██████▋   | 29444/43738 [3:44:06<2:02:37,  1.94it/s]

step:1840, train_loss:0.12121580851381687, acc:0.3856473305257438


 67%|██████▋   | 29445/43738 [3:44:06<2:14:44,  1.77it/s]

step:1840, train_loss:0.12121775923790645, acc:0.38563423331635255


 67%|██████▋   | 29446/43738 [3:44:07<2:29:37,  1.59it/s]

step:1840, train_loss:0.12121629722422578, acc:0.38562113699653605


 67%|██████▋   | 29447/43738 [3:44:07<2:13:53,  1.78it/s]

step:1840, train_loss:0.12121371896380008, acc:0.38564200088294226


 67%|██████▋   | 29448/43738 [3:44:08<2:05:25,  1.90it/s]

step:1840, train_loss:0.12121454958412652, acc:0.3856289051888074


 67%|██████▋   | 29449/43738 [3:44:09<2:20:05,  1.70it/s]

step:1840, train_loss:0.12121288689874529, acc:0.3856497673944786


 67%|██████▋   | 29450/43738 [3:44:09<2:20:42,  1.69it/s]

step:1840, train_loss:0.12120958530442523, acc:0.3856706281833616


 67%|██████▋   | 29451/43738 [3:44:10<2:16:23,  1.75it/s]

step:1840, train_loss:0.12121068049015016, acc:0.38565753285117654


 67%|██████▋   | 29452/43738 [3:44:10<1:54:13,  2.08it/s]

step:1840, train_loss:0.12121144105242367, acc:0.3856444384082575


 67%|██████▋   | 29453/43738 [3:44:11<2:16:50,  1.74it/s]

step:1840, train_loss:0.12121069926261577, acc:0.38563134485451395


 67%|██████▋   | 29454/43738 [3:44:11<2:19:25,  1.71it/s]

step:1840, train_loss:0.12121087345486689, acc:0.38561825218985535


 67%|██████▋   | 29455/43738 [3:44:12<2:34:12,  1.54it/s]

step:1840, train_loss:0.12120954045783462, acc:0.38560516041419113


 68%|██████▊   | 29760/43738 [3:46:37<2:36:21,  1.49it/s]

step:1860, train_loss:0.12092748610220291, acc:0.38679435483870966


 68%|██████▊   | 29761/43738 [3:46:37<2:17:42,  1.69it/s]

step:1860, train_loss:0.12093071378092908, acc:0.38678135815328785


 68%|██████▊   | 29762/43738 [3:46:38<1:59:51,  1.94it/s]

step:1860, train_loss:0.12092677787025005, acc:0.3868019622337209


 68%|██████▊   | 29763/43738 [3:46:38<1:47:14,  2.17it/s]

step:1860, train_loss:0.12092337696561412, acc:0.3868225649296106


 68%|██████▊   | 29764/43738 [3:46:38<1:31:40,  2.54it/s]

step:1860, train_loss:0.12092184754572496, acc:0.3868095686063701


 68%|██████▊   | 29765/43738 [3:46:39<1:31:19,  2.55it/s]

step:1860, train_loss:0.1209225681442424, acc:0.3867965731563917


 68%|██████▊   | 29766/43738 [3:46:39<1:40:22,  2.32it/s]

step:1860, train_loss:0.12091889235618608, acc:0.38681717395686355


 68%|██████▊   | 29767/43738 [3:46:39<1:29:00,  2.62it/s]

step:1860, train_loss:0.12091486387503787, acc:0.3868377733731985


 68%|██████▊   | 29768/43738 [3:46:40<1:43:39,  2.25it/s]

step:1860, train_loss:0.12091832350580702, acc:0.3868247782854071


 68%|██████▊   | 29769/43738 [3:46:40<1:45:33,  2.21it/s]

step:1860, train_loss:0.12091721632068443, acc:0.38681178407067757


 68%|██████▊   | 29770/43738 [3:46:41<1:45:35,  2.20it/s]

step:1860, train_loss:0.12091477079285166, acc:0.3868323815922069


 68%|██████▊   | 29771/43738 [3:46:42<1:56:10,  2.00it/s]

step:1860, train_loss:0.12091739552900793, acc:0.38681938799502874


 68%|██████▊   | 29772/43738 [3:46:42<1:51:36,  2.09it/s]

step:1860, train_loss:0.12091874412861538, acc:0.3868063952707242


 68%|██████▊   | 29773/43738 [3:46:42<1:33:57,  2.48it/s]

step:1860, train_loss:0.12091469406336974, acc:0.3868269908977933


 68%|██████▊   | 29774/43738 [3:46:43<1:55:45,  2.01it/s]

step:1860, train_loss:0.12091496673612226, acc:0.3868139987908914


 68%|██████▊   | 29775/43738 [3:46:43<1:37:08,  2.40it/s]

step:1860, train_loss:0.12091396091246688, acc:0.38680100755667507


 69%|██████▉   | 30080/43738 [3:48:59<1:52:18,  2.03it/s]

step:1880, train_loss:0.12060911442816763, acc:0.38789893617021276


 69%|██████▉   | 30081/43738 [3:48:59<1:48:14,  2.10it/s]

step:1880, train_loss:0.12060891769793758, acc:0.3878860410225724


 69%|██████▉   | 30082/43738 [3:49:00<2:10:28,  1.74it/s]

step:1880, train_loss:0.12060505669400583, acc:0.38790638920284554


 69%|██████▉   | 30083/43738 [3:49:00<1:50:34,  2.06it/s]

step:1880, train_loss:0.12060647599389718, acc:0.3878934946647608


 69%|██████▉   | 30084/43738 [3:49:00<1:46:31,  2.14it/s]

step:1880, train_loss:0.12060247018735952, acc:0.38791384124451533


 69%|██████▉   | 30085/43738 [3:49:01<1:53:04,  2.01it/s]

step:1880, train_loss:0.12060345644870024, acc:0.38790094731593816


 69%|██████▉   | 30086/43738 [3:49:02<1:52:05,  2.03it/s]

step:1880, train_loss:0.12060341328483322, acc:0.3878880542444991


 69%|██████▉   | 30087/43738 [3:49:02<1:56:12,  1.96it/s]

step:1880, train_loss:0.12060434462892229, acc:0.3878751620301127


 69%|██████▉   | 30088/43738 [3:49:02<1:38:20,  2.31it/s]

step:1880, train_loss:0.12060150581777554, acc:0.38789550651422494


 69%|██████▉   | 30089/43738 [3:49:03<1:43:23,  2.20it/s]

step:1880, train_loss:0.12060219772706339, acc:0.387882614909103


 69%|██████▉   | 30090/43738 [3:49:04<2:06:32,  1.80it/s]

step:1880, train_loss:0.1206001678843559, acc:0.3879029577932868


 69%|██████▉   | 30091/43738 [3:49:04<1:53:59,  2.00it/s]

step:1880, train_loss:0.12059664922107528, acc:0.38792329932537967


 69%|██████▉   | 30092/43738 [3:49:05<1:56:12,  1.96it/s]

step:1880, train_loss:0.12059440009506682, acc:0.3879104080818822


 69%|██████▉   | 30093/43738 [3:49:05<1:58:55,  1.91it/s]

step:1880, train_loss:0.12059312629797742, acc:0.3879307480144884


 69%|██████▉   | 30094/43738 [3:49:05<1:48:09,  2.10it/s]

step:1880, train_loss:0.12058914573222336, acc:0.3879510865953346


 69%|██████▉   | 30095/43738 [3:49:06<1:39:55,  2.28it/s]

step:1880, train_loss:0.12058517621608633, acc:0.3879714238245556


 70%|██████▉   | 30400/43738 [3:51:29<1:49:13,  2.04it/s]

step:1900, train_loss:0.12035470535767696, acc:0.3886842105263158


 70%|██████▉   | 30401/43738 [3:51:30<1:53:13,  1.96it/s]

step:1900, train_loss:0.12035236574814838, acc:0.38870431893687707


 70%|██████▉   | 30402/43738 [3:51:30<1:41:12,  2.20it/s]

step:1900, train_loss:0.12035335489835206, acc:0.3886915334517466


 70%|██████▉   | 30403/43738 [3:51:31<2:03:23,  1.80it/s]

step:1900, train_loss:0.12035348125438329, acc:0.38867874880768344


 70%|██████▉   | 30404/43738 [3:51:31<1:47:43,  2.06it/s]

step:1900, train_loss:0.12034953888106055, acc:0.38869885541376137


 70%|██████▉   | 30405/43738 [3:51:32<1:42:05,  2.18it/s]

step:1900, train_loss:0.12034596699267258, acc:0.38871896069725376


 70%|██████▉   | 30406/43738 [3:51:32<1:47:16,  2.07it/s]

step:1900, train_loss:0.12034554965668912, acc:0.3887061764125502


 70%|██████▉   | 30407/43738 [3:51:33<2:07:52,  1.74it/s]

step:1900, train_loss:0.12034260951080131, acc:0.38872628013286414


 70%|██████▉   | 30408/43738 [3:51:34<2:01:55,  1.82it/s]

step:1900, train_loss:0.1203419433609199, acc:0.3887463825309129


 70%|██████▉   | 30409/43738 [3:51:34<2:10:50,  1.70it/s]

step:1900, train_loss:0.12034629887046468, acc:0.38873359860567597


 70%|██████▉   | 30410/43738 [3:51:35<2:14:41,  1.65it/s]

step:1900, train_loss:0.12034806261698251, acc:0.38872081552121013


 70%|██████▉   | 30411/43738 [3:51:36<2:28:44,  1.49it/s]

step:1900, train_loss:0.12034757377071297, acc:0.38870803327743253


 70%|██████▉   | 30412/43738 [3:51:36<2:28:16,  1.50it/s]

step:1900, train_loss:0.1203494830376224, acc:0.3886952518742602


 70%|██████▉   | 30413/43738 [3:51:37<2:00:56,  1.84it/s]

step:1900, train_loss:0.12034782870785037, acc:0.38868247131161016


 70%|██████▉   | 30414/43738 [3:51:37<2:18:49,  1.60it/s]

step:1900, train_loss:0.12034668144725973, acc:0.388702571184323


 70%|██████▉   | 30415/43738 [3:51:38<1:55:12,  1.93it/s]

step:1900, train_loss:0.12034551499226004, acc:0.38868979122143676


 70%|███████   | 30720/43738 [3:53:56<1:34:52,  2.29it/s]

step:1920, train_loss:0.1200778815041647, acc:0.38984375


 70%|███████   | 30721/43738 [3:53:56<1:27:45,  2.47it/s]

step:1920, train_loss:0.1200761858602257, acc:0.38986361121057256


 70%|███████   | 30722/43738 [3:53:57<1:54:33,  1.89it/s]

step:1920, train_loss:0.1200754817634188, acc:0.3898834711281818


 70%|███████   | 30723/43738 [3:53:57<1:41:38,  2.13it/s]

step:1920, train_loss:0.12007317201156624, acc:0.38990332975295383


 70%|███████   | 30724/43738 [3:53:58<1:36:14,  2.25it/s]

step:1920, train_loss:0.12007409257237782, acc:0.3898906392396823


 70%|███████   | 30725/43738 [3:53:58<1:56:10,  1.87it/s]

step:1920, train_loss:0.12007074928822427, acc:0.38991049633848657


 70%|███████   | 30726/43738 [3:53:59<1:46:05,  2.04it/s]

step:1920, train_loss:0.12006685513333457, acc:0.3899303521447634


 70%|███████   | 30727/43738 [3:53:59<1:30:53,  2.39it/s]

step:1920, train_loss:0.1200633555932388, acc:0.389950206658639


 70%|███████   | 30728/43738 [3:54:00<1:55:41,  1.87it/s]

step:1920, train_loss:0.12006554803258587, acc:0.38993751627180423


 70%|███████   | 30729/43738 [3:54:00<1:49:49,  1.97it/s]

step:1920, train_loss:0.12006168464600565, acc:0.38995736926030783


 70%|███████   | 30730/43738 [3:54:01<2:10:59,  1.66it/s]

step:1920, train_loss:0.12006254255368355, acc:0.38994467946631955


 70%|███████   | 30731/43738 [3:54:02<2:16:19,  1.59it/s]

step:1920, train_loss:0.12006910248364747, acc:0.389931990498194


 70%|███████   | 30732/43738 [3:54:02<1:53:44,  1.91it/s]

step:1920, train_loss:0.12006581093434884, acc:0.38995184172849146


 70%|███████   | 30733/43738 [3:54:03<1:46:00,  2.04it/s]

step:1920, train_loss:0.12006262744151627, acc:0.3899716916669378


 70%|███████   | 30734/43738 [3:54:03<1:27:17,  2.48it/s]

step:1920, train_loss:0.12006599498538433, acc:0.389959003058502


 70%|███████   | 30735/43738 [3:54:03<1:21:10,  2.67it/s]

step:1920, train_loss:0.12006398939391767, acc:0.38994631527574425


 71%|███████   | 31040/43738 [3:56:27<1:52:46,  1.88it/s]

step:1940, train_loss:0.11977270181440024, acc:0.39107603092783505


 71%|███████   | 31041/43738 [3:56:28<1:46:57,  1.98it/s]

step:1940, train_loss:0.11977171063013595, acc:0.39106343223478623


 71%|███████   | 31042/43738 [3:56:28<1:41:35,  2.08it/s]

step:1940, train_loss:0.11977208147068127, acc:0.3910508343534566


 71%|███████   | 31043/43738 [3:56:29<1:35:41,  2.21it/s]

step:1940, train_loss:0.11976823310968153, acc:0.39107045066520635


 71%|███████   | 31044/43738 [3:56:29<1:28:26,  2.39it/s]

step:1940, train_loss:0.11976804502352928, acc:0.3910578533694112


 71%|███████   | 31045/43738 [3:56:29<1:19:04,  2.68it/s]

step:1940, train_loss:0.11977440905846894, acc:0.39104525688516667


 71%|███████   | 31046/43738 [3:56:29<1:10:57,  2.98it/s]

step:1940, train_loss:0.11977206130073824, acc:0.39106487148102814


 71%|███████   | 31047/43738 [3:56:30<1:21:54,  2.58it/s]

step:1940, train_loss:0.11977162435072879, acc:0.39105227558218186


 71%|███████   | 31048/43738 [3:56:30<1:13:05,  2.89it/s]

step:1940, train_loss:0.11976817191195614, acc:0.39107188868848236


 71%|███████   | 31049/43738 [3:56:31<1:07:17,  3.14it/s]

step:1940, train_loss:0.11976533899769644, acc:0.39109150053141806


 71%|███████   | 31050/43738 [3:56:31<1:13:44,  2.87it/s]

step:1940, train_loss:0.11976685180492654, acc:0.3910789049919485


 71%|███████   | 31051/43738 [3:56:31<1:18:07,  2.71it/s]

step:1940, train_loss:0.11976415632117086, acc:0.39109851534572154


 71%|███████   | 31052/43738 [3:56:32<1:11:51,  2.94it/s]

step:1940, train_loss:0.11976130222466612, acc:0.39111812443642924


 71%|███████   | 31053/43738 [3:56:32<1:24:39,  2.50it/s]

step:1940, train_loss:0.11976087159682623, acc:0.39110552925643255


 71%|███████   | 31054/43738 [3:56:33<1:22:08,  2.57it/s]

step:1940, train_loss:0.1197572708958378, acc:0.39112513685837574


 71%|███████   | 31055/43738 [3:56:33<1:45:18,  2.01it/s]

step:1940, train_loss:0.11976247814013055, acc:0.39111254226372566


 72%|███████▏  | 31360/43738 [3:58:53<1:53:44,  1.81it/s]

step:1960, train_loss:0.1194425205647611, acc:0.392155612244898


 72%|███████▏  | 31361/43738 [3:58:54<2:11:13,  1.57it/s]

step:1960, train_loss:0.11943873186773467, acc:0.3921749944198208


 72%|███████▏  | 31362/43738 [3:58:54<2:12:50,  1.55it/s]

step:1960, train_loss:0.11943841598439708, acc:0.3921624896371405


 72%|███████▏  | 31363/43738 [3:58:55<2:03:35,  1.67it/s]

step:1960, train_loss:0.11943909743046026, acc:0.3921499856518828


 72%|███████▏  | 31364/43738 [3:58:55<1:59:46,  1.72it/s]

step:1960, train_loss:0.11944083693286486, acc:0.39213748246397145


 72%|███████▏  | 31365/43738 [3:58:56<1:46:21,  1.94it/s]

step:1960, train_loss:0.11943939978017964, acc:0.39212498007333013


 72%|███████▏  | 31366/43738 [3:58:56<1:48:53,  1.89it/s]

step:1960, train_loss:0.11943659813553295, acc:0.3921443601351782


 72%|███████▏  | 31367/43738 [3:58:57<1:41:26,  2.03it/s]

step:1960, train_loss:0.1194329117952623, acc:0.3921637389613288


 72%|███████▏  | 31368/43738 [3:58:57<1:37:17,  2.12it/s]

step:1960, train_loss:0.11943272530318506, acc:0.3921512369293548


 72%|███████▏  | 31369/43738 [3:58:57<1:28:11,  2.34it/s]

step:1960, train_loss:0.11942901694929706, acc:0.3921706143007428


 72%|███████▏  | 31370/43738 [3:58:58<1:35:16,  2.16it/s]

step:1960, train_loss:0.11943333527272347, acc:0.3921581128466688


 72%|███████▏  | 31371/43738 [3:58:58<1:33:43,  2.20it/s]

step:1960, train_loss:0.11943480952702018, acc:0.39214561218960187


 72%|███████▏  | 31372/43738 [3:58:59<1:38:38,  2.09it/s]

step:1960, train_loss:0.11944143249450752, acc:0.39213311232946574


 72%|███████▏  | 31373/43738 [3:58:59<1:42:00,  2.02it/s]

step:1960, train_loss:0.11944917159762132, acc:0.3921206132661843


 72%|███████▏  | 31374/43738 [3:59:00<1:26:44,  2.38it/s]

step:1960, train_loss:0.11944538936444911, acc:0.39213998852553067


 72%|███████▏  | 31375/43738 [3:59:00<1:28:41,  2.32it/s]

step:1960, train_loss:0.11944881297622004, acc:0.39212749003984065


 72%|███████▏  | 31680/43738 [4:01:14<1:56:51,  1.72it/s]

step:1980, train_loss:0.1191287639438395, acc:0.3933712121212121


 72%|███████▏  | 31681/43738 [4:01:14<1:35:41,  2.10it/s]

step:1980, train_loss:0.11913110484847715, acc:0.3933587954925665


 72%|███████▏  | 31682/43738 [4:01:15<1:53:25,  1.77it/s]

step:1980, train_loss:0.11912910150107538, acc:0.3933779433116596


 72%|███████▏  | 31683/43738 [4:01:16<1:47:32,  1.87it/s]

step:1980, train_loss:0.1191266587476156, acc:0.3933970899220402


 72%|███████▏  | 31684/43738 [4:01:16<1:34:53,  2.12it/s]

step:1980, train_loss:0.11912465592978141, acc:0.3933846736523166


 72%|███████▏  | 31685/43738 [4:01:16<1:34:27,  2.13it/s]

step:1980, train_loss:0.11912881895052725, acc:0.39337225816632476


 72%|███████▏  | 31686/43738 [4:01:17<1:44:36,  1.92it/s]

step:1980, train_loss:0.11913239001587214, acc:0.3933598434639904


 72%|███████▏  | 31687/43738 [4:01:17<1:29:30,  2.24it/s]

step:1980, train_loss:0.11912873596407207, acc:0.3933789882286111


 72%|███████▏  | 31688/43738 [4:01:18<1:26:54,  2.31it/s]

step:1980, train_loss:0.11912657215062648, acc:0.3933981317849028


 72%|███████▏  | 31689/43738 [4:01:18<1:26:01,  2.33it/s]

step:1980, train_loss:0.11912578875134662, acc:0.39338571744138345


 72%|███████▏  | 31690/43738 [4:01:19<1:27:13,  2.30it/s]

step:1980, train_loss:0.11912449090118309, acc:0.39340485957715365


 72%|███████▏  | 31691/43738 [4:01:19<1:20:31,  2.49it/s]

step:1980, train_loss:0.11912083859687703, acc:0.3934240005048752


 72%|███████▏  | 31692/43738 [4:01:19<1:11:11,  2.82it/s]

step:1980, train_loss:0.11911719757944308, acc:0.39344314022466237


 72%|███████▏  | 31693/43738 [4:01:20<1:14:53,  2.68it/s]

step:1980, train_loss:0.11911735579193909, acc:0.39343072602782947


 72%|███████▏  | 31694/43738 [4:01:20<1:28:16,  2.27it/s]

step:1980, train_loss:0.11911817379374054, acc:0.393418312614375


 72%|███████▏  | 31695/43738 [4:01:21<1:34:59,  2.11it/s]

step:1980, train_loss:0.11911897700966423, acc:0.39340589998422465


 73%|███████▎  | 32000/43738 [4:03:44<1:12:10,  2.71it/s]

step:2000, train_loss:0.11893305602432293, acc:0.39403125


 73%|███████▎  | 32001/43738 [4:03:45<1:03:55,  3.06it/s]

step:2000, train_loss:0.11893039898920345, acc:0.3940501859316896


 73%|███████▎  | 32002/43738 [4:03:45<1:17:15,  2.53it/s]

step:2000, train_loss:0.11893004409274659, acc:0.3940378726329604


 73%|███████▎  | 32003/43738 [4:03:46<1:19:24,  2.46it/s]

step:2000, train_loss:0.11893245313872246, acc:0.3940255601037403


 73%|███████▎  | 32004/43738 [4:03:46<1:09:56,  2.80it/s]

step:2000, train_loss:0.11892919213888369, acc:0.3940444944381952


 73%|███████▎  | 32005/43738 [4:03:47<1:31:31,  2.14it/s]

step:2000, train_loss:0.11893088014783346, acc:0.39403218247148886


 73%|███████▎  | 32006/43738 [4:03:47<1:43:06,  1.90it/s]

step:2000, train_loss:0.11892992572824425, acc:0.3940511154158595


 73%|███████▎  | 32007/43738 [4:03:48<1:45:00,  1.86it/s]

step:2000, train_loss:0.11892937377669553, acc:0.39407004717717997


 73%|███████▎  | 32008/43738 [4:03:48<1:38:13,  1.99it/s]

step:2000, train_loss:0.1189283800554291, acc:0.39405773556610846


 73%|███████▎  | 32009/43738 [4:03:49<1:50:31,  1.77it/s]

step:2000, train_loss:0.11893659867057721, acc:0.39404542472429627


 73%|███████▎  | 32010/43738 [4:03:49<1:33:10,  2.10it/s]

step:2000, train_loss:0.11893369526705677, acc:0.39406435488909713


 73%|███████▎  | 32011/43738 [4:03:50<1:25:25,  2.29it/s]

step:2000, train_loss:0.1189342625621999, acc:0.3940520446096654


 73%|███████▎  | 32012/43738 [4:03:50<1:26:42,  2.25it/s]

step:2000, train_loss:0.11893125139638905, acc:0.39407097338498065


 73%|███████▎  | 32013/43738 [4:03:50<1:24:40,  2.31it/s]

step:2000, train_loss:0.11893146653708783, acc:0.39405866366788495


 73%|███████▎  | 32014/43738 [4:03:51<1:20:51,  2.42it/s]

step:2000, train_loss:0.11893072185180704, acc:0.3940463547198101


 73%|███████▎  | 32015/43738 [4:03:51<1:25:46,  2.28it/s]

step:2000, train_loss:0.11893155245149728, acc:0.3940340465406841


 74%|███████▍  | 32320/43738 [4:06:12<1:10:50,  2.69it/s]

step:2020, train_loss:0.1186615762306458, acc:0.39492574257425744


 74%|███████▍  | 32321/43738 [4:06:13<1:32:15,  2.06it/s]

step:2020, train_loss:0.11866300884002959, acc:0.3949135237152316


 74%|███████▍  | 32322/43738 [4:06:14<1:38:14,  1.94it/s]

step:2020, train_loss:0.11865949907222928, acc:0.3949322442918136


 74%|███████▍  | 32323/43738 [4:06:14<1:34:56,  2.00it/s]

step:2020, train_loss:0.11865824565965374, acc:0.3949509637100517


 74%|███████▍  | 32324/43738 [4:06:15<1:36:18,  1.98it/s]

step:2020, train_loss:0.11865920653373768, acc:0.3949387452048014


 74%|███████▍  | 32325/43738 [4:06:15<1:30:18,  2.11it/s]

step:2020, train_loss:0.11866062622807395, acc:0.39492652745552975


 74%|███████▍  | 32326/43738 [4:06:15<1:17:39,  2.45it/s]

step:2020, train_loss:0.11865729172686892, acc:0.39494524531337005


 74%|███████▍  | 32327/43738 [4:06:16<1:14:28,  2.55it/s]

step:2020, train_loss:0.11865812308690574, acc:0.3949330281189099


 74%|███████▍  | 32328/43738 [4:06:16<1:16:12,  2.50it/s]

step:2020, train_loss:0.11865612388299687, acc:0.3949517446176689


 74%|███████▍  | 32329/43738 [4:06:16<1:06:34,  2.86it/s]

step:2020, train_loss:0.1186537109786594, acc:0.3949704599585511


 74%|███████▍  | 32330/43738 [4:06:17<1:12:23,  2.63it/s]

step:2020, train_loss:0.11865818276516482, acc:0.3949582431178472


 74%|███████▍  | 32331/43738 [4:06:17<1:16:51,  2.47it/s]

step:2020, train_loss:0.11865520975612479, acc:0.3949769570999969


 74%|███████▍  | 32332/43738 [4:06:18<1:28:29,  2.15it/s]

step:2020, train_loss:0.11865396081909954, acc:0.39499566992453294


 74%|███████▍  | 32333/43738 [4:06:18<1:25:29,  2.22it/s]

step:2020, train_loss:0.11865029412679153, acc:0.3950143815915628


 74%|███████▍  | 32334/43738 [4:06:18<1:13:28,  2.59it/s]

step:2020, train_loss:0.11864668721051197, acc:0.3950330921011938


 74%|███████▍  | 32335/43738 [4:06:19<1:22:45,  2.30it/s]

step:2020, train_loss:0.11864309567517649, acc:0.3950518014535333


 75%|███████▍  | 32640/43738 [4:08:41<1:46:49,  1.73it/s]

step:2040, train_loss:0.11837476848283596, acc:0.39598651960784315


 75%|███████▍  | 32641/43738 [4:08:41<1:39:32,  1.86it/s]

step:2040, train_loss:0.11837468126156152, acc:0.3959743880395821


 75%|███████▍  | 32642/43738 [4:08:42<1:26:11,  2.15it/s]

step:2040, train_loss:0.11837224406125085, acc:0.39599289259236564


 75%|███████▍  | 32643/43738 [4:08:42<1:30:54,  2.03it/s]

step:2040, train_loss:0.11837171662057087, acc:0.39598076157215945


 75%|███████▍  | 32644/43738 [4:08:42<1:17:46,  2.38it/s]

step:2040, train_loss:0.11836989850273695, acc:0.3959992647959809


 75%|███████▍  | 32645/43738 [4:08:43<1:29:42,  2.06it/s]

step:2040, train_loss:0.11836907930544564, acc:0.3959871343237862


 75%|███████▍  | 32646/43738 [4:08:44<1:35:48,  1.93it/s]

step:2040, train_loss:0.11836744987221237, acc:0.3959750045947436


 75%|███████▍  | 32647/43738 [4:08:44<1:46:09,  1.74it/s]

step:2040, train_loss:0.11837401977254988, acc:0.3959628756087849


 75%|███████▍  | 32648/43738 [4:08:45<1:34:44,  1.95it/s]

step:2040, train_loss:0.11837277823945913, acc:0.3959813771134526


 75%|███████▍  | 32649/43738 [4:08:45<1:31:56,  2.01it/s]

step:2040, train_loss:0.11836959967801984, acc:0.3959998774847622


 75%|███████▍  | 32650/43738 [4:08:46<1:27:40,  2.11it/s]

step:2040, train_loss:0.11836995166883785, acc:0.3959877488514548


 75%|███████▍  | 32651/43738 [4:08:46<1:32:47,  1.99it/s]

step:2040, train_loss:0.11837015118334782, acc:0.3959756209610732


 75%|███████▍  | 32652/43738 [4:08:47<1:26:04,  2.15it/s]

step:2040, train_loss:0.11836678201885549, acc:0.39599411980889376


 75%|███████▍  | 32653/43738 [4:08:47<1:32:29,  2.00it/s]

step:2040, train_loss:0.11836874575383427, acc:0.3959819924662359


 75%|███████▍  | 32654/43738 [4:08:48<1:29:46,  2.06it/s]

step:2040, train_loss:0.1183659544679632, acc:0.3960004899859129


 75%|███████▍  | 32655/43738 [4:08:48<1:32:05,  2.01it/s]

step:2040, train_loss:0.11837066710964016, acc:0.39598836319093556


 75%|███████▌  | 32960/43738 [4:11:08<1:26:07,  2.09it/s]

step:2060, train_loss:0.11811569282821995, acc:0.3972997572815534


 75%|███████▌  | 32961/43738 [4:11:08<1:14:54,  2.40it/s]

step:2060, train_loss:0.11811219136277218, acc:0.39731804253511727


 75%|███████▌  | 32962/43738 [4:11:09<1:25:33,  2.10it/s]

step:2060, train_loss:0.11811053708253415, acc:0.3973363266792064


 75%|███████▌  | 32963/43738 [4:11:09<1:19:55,  2.25it/s]

step:2060, train_loss:0.11811048383789555, acc:0.39732427266935655


 75%|███████▌  | 32964/43738 [4:11:09<1:16:40,  2.34it/s]

step:2060, train_loss:0.11810914403690209, acc:0.3973425555151074


 75%|███████▌  | 32965/43738 [4:11:10<1:06:43,  2.69it/s]

step:2060, train_loss:0.11811986798449287, acc:0.39733050204762627


 75%|███████▌  | 32966/43738 [4:11:10<1:04:58,  2.76it/s]

step:2060, train_loss:0.1181163447249021, acc:0.3973487835952193


 75%|███████▌  | 32967/43738 [4:11:10<58:55,  3.05it/s]  

step:2060, train_loss:0.11811901034465618, acc:0.39733673067006403


 75%|███████▌  | 32968/43738 [4:11:11<1:03:12,  2.84it/s]

step:2060, train_loss:0.11811620232885402, acc:0.3973550109196797


 75%|███████▌  | 32969/43738 [4:11:11<1:24:17,  2.13it/s]

step:2060, train_loss:0.11811742878533128, acc:0.3973429585368073


 75%|███████▌  | 32970/43738 [4:11:12<1:30:05,  1.99it/s]

step:2060, train_loss:0.11811799941643426, acc:0.397361237488626


 75%|███████▌  | 32971/43738 [4:11:13<1:39:32,  1.80it/s]

step:2060, train_loss:0.11812048041174963, acc:0.3973491856479937


 75%|███████▌  | 32972/43738 [4:11:13<1:23:23,  2.15it/s]

step:2060, train_loss:0.11811788769477594, acc:0.3973674633021958


 75%|███████▌  | 32973/43738 [4:11:13<1:18:01,  2.30it/s]

step:2060, train_loss:0.11811550513769983, acc:0.3973857398477542


 75%|███████▌  | 32974/43738 [4:11:14<1:20:24,  2.23it/s]

step:2060, train_loss:0.11811386080655091, acc:0.3973736883605265


 75%|███████▌  | 32975/43738 [4:11:14<1:26:46,  2.07it/s]

step:2060, train_loss:0.11811669442717011, acc:0.39736163760424564


 76%|███████▌  | 33280/43738 [4:13:36<1:18:28,  2.22it/s]

step:2080, train_loss:0.11790954642099878, acc:0.39774639423076924


 76%|███████▌  | 33281/43738 [4:13:36<1:14:04,  2.35it/s]

step:2080, train_loss:0.11790789878458618, acc:0.3977344430756287


 76%|███████▌  | 33282/43738 [4:13:37<1:04:00,  2.72it/s]

step:2080, train_loss:0.11790565022786235, acc:0.3977525389099213


 76%|███████▌  | 33283/43738 [4:13:37<1:01:56,  2.81it/s]

step:2080, train_loss:0.11790236352213848, acc:0.3977706336568218


 76%|███████▌  | 33284/43738 [4:13:37<1:10:20,  2.48it/s]

step:2080, train_loss:0.11790136075010815, acc:0.39775868285061894


 76%|███████▌  | 33285/43738 [4:13:38<1:24:26,  2.06it/s]

step:2080, train_loss:0.11790436752983488, acc:0.3977467327625056


 76%|███████▌  | 33286/43738 [4:13:38<1:11:40,  2.43it/s]

step:2080, train_loss:0.11790626778102503, acc:0.39773478339241725


 76%|███████▌  | 33287/43738 [4:13:39<1:04:31,  2.70it/s]

step:2080, train_loss:0.11790509918489339, acc:0.3977528764983327


 76%|███████▌  | 33288/43738 [4:13:39<57:30,  3.03it/s]  

step:2080, train_loss:0.11791332028047655, acc:0.3977409276616198


 76%|███████▌  | 33289/43738 [4:13:40<1:21:16,  2.14it/s]

step:2080, train_loss:0.11791420168893985, acc:0.3977289795427919


 76%|███████▌  | 33290/43738 [4:13:40<1:18:50,  2.21it/s]

step:2080, train_loss:0.11791269307947908, acc:0.39771703214178433


 76%|███████▌  | 33291/43738 [4:13:40<1:14:01,  2.35it/s]

step:2080, train_loss:0.1179091528467853, acc:0.3977351236069809


 76%|███████▌  | 33292/43738 [4:13:41<1:11:42,  2.43it/s]

step:2080, train_loss:0.11791129295185014, acc:0.3977231767391566


 76%|███████▌  | 33293/43738 [4:13:41<1:20:38,  2.16it/s]

step:2080, train_loss:0.11791543780701723, acc:0.3977112305890127


 76%|███████▌  | 33294/43738 [4:13:42<1:25:05,  2.05it/s]

step:2080, train_loss:0.11791349808421789, acc:0.397729320598306


 76%|███████▌  | 33295/43738 [4:13:42<1:12:30,  2.40it/s]

step:2080, train_loss:0.11791044496880568, acc:0.3977474095209491


 77%|███████▋  | 33600/43738 [4:16:00<1:06:08,  2.55it/s]

step:2100, train_loss:0.11769620036995765, acc:0.39875


 77%|███████▋  | 33601/43738 [4:16:01<1:10:39,  2.39it/s]

step:2100, train_loss:0.11769579929217089, acc:0.3987381327936669


 77%|███████▋  | 33602/43738 [4:16:02<1:29:37,  1.88it/s]

step:2100, train_loss:0.11769870830225691, acc:0.39872626629367297


 77%|███████▋  | 33603/43738 [4:16:02<1:19:58,  2.11it/s]

step:2100, train_loss:0.11769735976241973, acc:0.3987441597476416


 77%|███████▋  | 33604/43738 [4:16:02<1:08:08,  2.48it/s]

step:2100, train_loss:0.11769434788290224, acc:0.3987620521366504


 77%|███████▋  | 33605/43738 [4:16:03<1:15:00,  2.25it/s]

step:2100, train_loss:0.11769352366400977, acc:0.39877994346079454


 77%|███████▋  | 33606/43738 [4:16:03<1:09:06,  2.44it/s]

step:2100, train_loss:0.11769027940276966, acc:0.398797833720169


 77%|███████▋  | 33607/43738 [4:16:03<1:01:00,  2.77it/s]

step:2100, train_loss:0.11768742285746062, acc:0.39881572291486894


 77%|███████▋  | 33608/43738 [4:16:04<1:10:41,  2.39it/s]

step:2100, train_loss:0.11768411582820727, acc:0.39883361104498927


 77%|███████▋  | 33609/43738 [4:16:04<1:03:06,  2.67it/s]

step:2100, train_loss:0.11768298879146802, acc:0.39882174417566724


 77%|███████▋  | 33610/43738 [4:16:05<1:00:40,  2.78it/s]

step:2100, train_loss:0.11768133970966795, acc:0.3988098780124963


 77%|███████▋  | 33611/43738 [4:16:05<1:07:11,  2.51it/s]

step:2100, train_loss:0.1176779360248206, acc:0.39882776471988335


 77%|███████▋  | 33612/43738 [4:16:05<1:08:00,  2.48it/s]

step:2100, train_loss:0.11767672863704176, acc:0.3988456503629656


 77%|███████▋  | 33613/43738 [4:16:06<1:05:55,  2.56it/s]

step:2100, train_loss:0.11767450298901147, acc:0.39886353494183796


 77%|███████▋  | 33614/43738 [4:16:07<1:29:09,  1.89it/s]

step:2100, train_loss:0.11767453134873204, acc:0.3988516689474624


 77%|███████▋  | 33615/43738 [4:16:07<1:31:32,  1.84it/s]

step:2100, train_loss:0.11767187858225704, acc:0.3988695522832069


 78%|███████▊  | 33920/43738 [4:18:24<1:04:37,  2.53it/s]

step:2120, train_loss:0.11742859061702571, acc:0.39964622641509434


 78%|███████▊  | 33921/43738 [4:18:25<57:35,  2.84it/s]  

step:2120, train_loss:0.11742701709274601, acc:0.3996344447392471


 78%|███████▊  | 33922/43738 [4:18:25<1:06:04,  2.48it/s]

step:2120, train_loss:0.11742598727468195, acc:0.3996521431519368


 78%|███████▊  | 33923/43738 [4:18:26<1:05:32,  2.50it/s]

step:2120, train_loss:0.11742536369488295, acc:0.3996403619962857


 78%|███████▊  | 33924/43738 [4:18:26<1:08:06,  2.40it/s]

step:2120, train_loss:0.11742830311638756, acc:0.3996285815351963


 78%|███████▊  | 33925/43738 [4:18:26<59:16,  2.76it/s]  

step:2120, train_loss:0.11742498038890958, acc:0.39964627855563745


 78%|███████▊  | 33926/43738 [4:18:27<1:04:25,  2.54it/s]

step:2120, train_loss:0.1174215222280203, acc:0.3996639745328067


 78%|███████▊  | 33927/43738 [4:18:27<1:14:21,  2.20it/s]

step:2120, train_loss:0.11742037510302926, acc:0.39965219441742567


 78%|███████▊  | 33928/43738 [4:18:28<1:21:35,  2.00it/s]

step:2120, train_loss:0.1174234466430979, acc:0.3996404149964631


 78%|███████▊  | 33929/43738 [4:18:28<1:13:03,  2.24it/s]

step:2120, train_loss:0.11742208422811118, acc:0.3996286362698576


 78%|███████▊  | 33930/43738 [4:18:29<1:10:22,  2.32it/s]

step:2120, train_loss:0.11742083145868815, acc:0.39964633068081346


 78%|███████▊  | 33931/43738 [4:18:29<1:01:04,  2.68it/s]

step:2120, train_loss:0.1174189758451123, acc:0.3996345524741387


 78%|███████▊  | 33932/43738 [4:18:29<55:04,  2.97it/s]  

step:2120, train_loss:0.11741552017919593, acc:0.3996522456678062


 78%|███████▊  | 33933/43738 [4:18:30<1:00:03,  2.72it/s]

step:2120, train_loss:0.11741368702033467, acc:0.39966993781864263


 78%|███████▊  | 33934/43738 [4:18:30<55:17,  2.96it/s]  

step:2120, train_loss:0.11741060154983982, acc:0.39968762892674015


 78%|███████▊  | 33935/43738 [4:18:30<57:15,  2.85it/s]

step:2120, train_loss:0.11740928118374779, acc:0.39967585089141006


 78%|███████▊  | 34240/43738 [4:20:51<1:10:11,  2.26it/s]

step:2140, train_loss:0.11719666723011853, acc:0.40029205607476637


 78%|███████▊  | 34241/43738 [4:20:51<1:01:00,  2.59it/s]

step:2140, train_loss:0.11719337525972025, acc:0.4003095703980608


 78%|███████▊  | 34242/43738 [4:20:52<1:08:29,  2.31it/s]

step:2140, train_loss:0.11719120000382227, acc:0.4003270836983821


 78%|███████▊  | 34243/43738 [4:20:52<1:09:13,  2.29it/s]

step:2140, train_loss:0.11719083565891979, acc:0.40031539292702156


 78%|███████▊  | 34244/43738 [4:20:53<1:20:55,  1.96it/s]

step:2140, train_loss:0.11718833978628329, acc:0.40033290503445856


 78%|███████▊  | 34245/43738 [4:20:53<1:09:27,  2.28it/s]

step:2140, train_loss:0.11718547784417584, acc:0.40035041611914146


 78%|███████▊  | 34246/43738 [4:20:54<1:15:57,  2.08it/s]

step:2140, train_loss:0.11718302458145773, acc:0.40036792618115985


 78%|███████▊  | 34247/43738 [4:20:54<1:04:35,  2.45it/s]

step:2140, train_loss:0.11717991163430926, acc:0.40038543522060327


 78%|███████▊  | 34248/43738 [4:20:54<1:00:27,  2.62it/s]

step:2140, train_loss:0.1171766518178791, acc:0.4004029432375613


 78%|███████▊  | 34249/43738 [4:20:55<1:06:11,  2.39it/s]

step:2140, train_loss:0.11717392833536226, acc:0.4004204502321236


 78%|███████▊  | 34250/43738 [4:20:55<1:19:49,  1.98it/s]

step:2140, train_loss:0.11717279691467815, acc:0.40043795620437955


 78%|███████▊  | 34251/43738 [4:20:56<1:14:43,  2.12it/s]

step:2140, train_loss:0.11717116252938346, acc:0.4004554611544188


 78%|███████▊  | 34252/43738 [4:20:56<1:12:49,  2.17it/s]

step:2140, train_loss:0.11716887902013445, acc:0.40047296508233093


 78%|███████▊  | 34253/43738 [4:20:57<1:01:33,  2.57it/s]

step:2140, train_loss:0.11716648515210494, acc:0.4004904679882054


 78%|███████▊  | 34254/43738 [4:20:57<1:07:47,  2.33it/s]

step:2140, train_loss:0.11716382220066039, acc:0.4005079698721317


 78%|███████▊  | 34255/43738 [4:20:58<1:18:13,  2.02it/s]

step:2140, train_loss:0.11716186163665107, acc:0.40052547073419936


 79%|███████▉  | 34560/43738 [4:23:11<1:03:36,  2.40it/s]

step:2160, train_loss:0.11690333267690213, acc:0.4015914351851852


 79%|███████▉  | 34561/43738 [4:23:11<55:59,  2.73it/s]  

step:2160, train_loss:0.11690364626877878, acc:0.40157981539886


 79%|███████▉  | 34562/43738 [4:23:12<1:15:23,  2.03it/s]

step:2160, train_loss:0.11690292735807122, acc:0.40156819628493723


 79%|███████▉  | 34563/43738 [4:23:13<1:25:28,  1.79it/s]

step:2160, train_loss:0.11690469945735615, acc:0.4015565778433585


 79%|███████▉  | 34564/43738 [4:23:13<1:27:50,  1.74it/s]

step:2160, train_loss:0.11690757338384837, acc:0.4015449600740655


 79%|███████▉  | 34565/43738 [4:23:14<1:12:10,  2.12it/s]

step:2160, train_loss:0.11691100615853578, acc:0.40153334297699983


 79%|███████▉  | 34566/43738 [4:23:14<1:15:25,  2.03it/s]

step:2160, train_loss:0.11691370335846009, acc:0.4015217265521032


 79%|███████▉  | 34567/43738 [4:23:14<1:08:08,  2.24it/s]

step:2160, train_loss:0.11691435060700892, acc:0.40151011079931725


 79%|███████▉  | 34568/43738 [4:23:15<1:07:16,  2.27it/s]

step:2160, train_loss:0.11691543509184023, acc:0.40149849571858365


 79%|███████▉  | 34569/43738 [4:23:15<1:12:34,  2.11it/s]

step:2160, train_loss:0.11691352699919119, acc:0.40151580896178657


 79%|███████▉  | 34570/43738 [4:23:16<1:05:53,  2.32it/s]

step:2160, train_loss:0.11691434298109565, acc:0.40150419438819784


 79%|███████▉  | 34571/43738 [4:23:16<58:44,  2.60it/s]  

step:2160, train_loss:0.1169114177662609, acc:0.40152150646495616


 79%|███████▉  | 34572/43738 [4:23:16<59:53,  2.55it/s]

step:2160, train_loss:0.11690847904950766, acc:0.40153881754020593


 79%|███████▉  | 34573/43738 [4:23:17<52:22,  2.92it/s]

step:2160, train_loss:0.11690559213896587, acc:0.4015561276140341


 79%|███████▉  | 34574/43738 [4:23:17<48:49,  3.13it/s]

step:2160, train_loss:0.1169056453220335, acc:0.4015445132180251


 79%|███████▉  | 34575/43738 [4:23:17<52:51,  2.89it/s]

step:2160, train_loss:0.11690306093012238, acc:0.40156182212581343


 80%|███████▉  | 34880/43738 [4:25:44<1:25:40,  1.72it/s]

step:2180, train_loss:0.11668649712430267, acc:0.40243692660550456


 80%|███████▉  | 34881/43738 [4:25:45<1:20:14,  1.84it/s]

step:2180, train_loss:0.11668368510491794, acc:0.4024540580831972


 80%|███████▉  | 34882/43738 [4:25:45<1:20:32,  1.83it/s]

step:2180, train_loss:0.11668164037376685, acc:0.40247118857863656


 80%|███████▉  | 34883/43738 [4:25:46<1:18:22,  1.88it/s]

step:2180, train_loss:0.11667849752423493, acc:0.4024883180919072


 80%|███████▉  | 34884/43738 [4:25:46<1:21:34,  1.81it/s]

step:2180, train_loss:0.11667522122841677, acc:0.4025054466230937


 80%|███████▉  | 34885/43738 [4:25:47<1:27:20,  1.69it/s]

step:2180, train_loss:0.11667434982313854, acc:0.4025225741722803


 80%|███████▉  | 34886/43738 [4:25:48<1:23:02,  1.78it/s]

step:2180, train_loss:0.11667150513650473, acc:0.4025397007395517


 80%|███████▉  | 34887/43738 [4:25:48<1:19:48,  1.85it/s]

step:2180, train_loss:0.11666900059537086, acc:0.4025568263249921


 80%|███████▉  | 34888/43738 [4:25:49<1:28:22,  1.67it/s]

step:2180, train_loss:0.11667004443089803, acc:0.40254528777803256


 80%|███████▉  | 34889/43738 [4:25:50<1:43:07,  1.43it/s]

step:2180, train_loss:0.11666728419626794, acc:0.40256241222161715


 80%|███████▉  | 34890/43738 [4:25:50<1:35:52,  1.54it/s]

step:2180, train_loss:0.11666577803506661, acc:0.4025508741759817


 80%|███████▉  | 34891/43738 [4:25:51<1:30:03,  1.64it/s]

step:2180, train_loss:0.11666261698212824, acc:0.4025679974778596


 80%|███████▉  | 34892/43738 [4:25:51<1:25:03,  1.73it/s]

step:2180, train_loss:0.11666012372570647, acc:0.4025851197982345


 80%|███████▉  | 34893/43738 [4:25:52<1:21:26,  1.81it/s]

step:2180, train_loss:0.1166569741134006, acc:0.40260224113719084


 80%|███████▉  | 34894/43738 [4:25:52<1:17:13,  1.91it/s]

step:2180, train_loss:0.11665365125138819, acc:0.40261936149481287


 80%|███████▉  | 34895/43738 [4:25:53<1:16:00,  1.94it/s]

step:2180, train_loss:0.1166513392109956, acc:0.402636480871185


 80%|████████  | 35200/43738 [4:28:18<1:15:31,  1.88it/s]

step:2200, train_loss:0.11648179694644527, acc:0.40357954545454544


 80%|████████  | 35201/43738 [4:28:18<1:12:43,  1.96it/s]

step:2200, train_loss:0.11648103406885148, acc:0.4035680804522599


 80%|████████  | 35202/43738 [4:28:19<1:14:57,  1.90it/s]

step:2200, train_loss:0.11647848571170172, acc:0.4035850235782058


 80%|████████  | 35203/43738 [4:28:19<1:15:59,  1.87it/s]

step:2200, train_loss:0.11647935008112514, acc:0.40357355907167003


 80%|████████  | 35204/43738 [4:28:20<1:14:10,  1.92it/s]

step:2200, train_loss:0.1164788448074164, acc:0.4035620952164527


 80%|████████  | 35205/43738 [4:28:20<1:04:13,  2.21it/s]

step:2200, train_loss:0.11647841799043417, acc:0.40355063201249824


 80%|████████  | 35206/43738 [4:28:20<57:00,  2.49it/s]  

step:2200, train_loss:0.11647633412527603, acc:0.40356757370902685


 80%|████████  | 35207/43738 [4:28:21<1:00:58,  2.33it/s]

step:2200, train_loss:0.11647893962106881, acc:0.4035561110006533


 80%|████████  | 35208/43738 [4:28:21<59:51,  2.38it/s]  

step:2200, train_loss:0.11647917596334754, acc:0.40354464894342196


 80%|████████  | 35209/43738 [4:28:22<1:01:36,  2.31it/s]

step:2200, train_loss:0.11647806107103728, acc:0.4035331875372774


 81%|████████  | 35210/43738 [4:28:22<58:27,  2.43it/s]  

step:2200, train_loss:0.1164769168017874, acc:0.403550127804601


 81%|████████  | 35211/43738 [4:28:23<58:55,  2.41it/s]

step:2200, train_loss:0.1164765385183182, acc:0.4035386668938684


 81%|████████  | 35212/43738 [4:28:23<1:06:09,  2.15it/s]

step:2200, train_loss:0.11647400499413978, acc:0.4035556060433943


 81%|████████  | 35213/43738 [4:28:23<56:51,  2.50it/s]  

step:2200, train_loss:0.11647076098783614, acc:0.40357254423082384


 81%|████████  | 35214/43738 [4:28:24<50:27,  2.82it/s]

step:2200, train_loss:0.11646784950115323, acc:0.403589481456239


 81%|████████  | 35215/43738 [4:28:24<54:15,  2.62it/s]

step:2200, train_loss:0.11646506342553435, acc:0.4036064177197217


 81%|████████  | 35520/43738 [4:30:43<1:12:30,  1.89it/s]

step:2220, train_loss:0.11617862433163774, acc:0.4047578828828829


 81%|████████  | 35521/43738 [4:30:44<1:00:58,  2.25it/s]

step:2220, train_loss:0.11617607996366011, acc:0.4047746403535937


 81%|████████  | 35522/43738 [4:30:44<1:01:58,  2.21it/s]

step:2220, train_loss:0.11617790560267184, acc:0.4047632453127639


 81%|████████  | 35523/43738 [4:30:45<1:02:36,  2.19it/s]

step:2220, train_loss:0.116178677440594, acc:0.40475185091349264


 81%|████████  | 35524/43738 [4:30:45<1:01:16,  2.23it/s]

step:2220, train_loss:0.1161761208601617, acc:0.4047686071388357


 81%|████████  | 35525/43738 [4:30:45<52:45,  2.59it/s]  

step:2220, train_loss:0.11617405176293923, acc:0.4047853624208304


 81%|████████  | 35526/43738 [4:30:46<47:15,  2.90it/s]

step:2220, train_loss:0.11617390140086914, acc:0.4047739683612002


 81%|████████  | 35527/43738 [4:30:46<54:23,  2.52it/s]

step:2220, train_loss:0.11617213332545501, acc:0.4047907225490472


 81%|████████  | 35528/43738 [4:30:46<53:46,  2.54it/s]

step:2220, train_loss:0.11617367500639682, acc:0.4047793289799595


 81%|████████  | 35529/43738 [4:30:47<1:05:07,  2.10it/s]

step:2220, train_loss:0.11617423419676218, acc:0.40476793605223904


 81%|████████  | 35530/43738 [4:30:48<1:06:29,  2.06it/s]

step:2220, train_loss:0.11617421916668559, acc:0.4047565437658317


 81%|████████  | 35531/43738 [4:30:48<59:50,  2.29it/s]  

step:2220, train_loss:0.11617473366926441, acc:0.40474515212068335


 81%|████████  | 35532/43738 [4:30:48<59:39,  2.29it/s]

step:2220, train_loss:0.11617233326321554, acc:0.40476190476190477


 81%|████████  | 35533/43738 [4:30:49<54:27,  2.51it/s]

step:2220, train_loss:0.11616906504314439, acc:0.40477865646019195


 81%|████████  | 35534/43738 [4:30:49<50:09,  2.73it/s]

step:2220, train_loss:0.11616615365093987, acc:0.40479540721562446


 81%|████████  | 35535/43738 [4:30:50<1:01:14,  2.23it/s]

step:2220, train_loss:0.11616723622140901, acc:0.4047840157591107


 82%|████████▏ | 35840/43738 [4:33:15<1:14:02,  1.78it/s]

step:2240, train_loss:0.1159673029050424, acc:0.4056919642857143


 82%|████████▏ | 35841/43738 [4:33:15<1:08:31,  1.92it/s]

step:2240, train_loss:0.11597120107726552, acc:0.4056806450712871


 82%|████████▏ | 35842/43738 [4:33:16<58:41,  2.24it/s]  

step:2240, train_loss:0.11597179594068321, acc:0.4056693264884772


 82%|████████▏ | 35843/43738 [4:33:16<56:41,  2.32it/s]

step:2240, train_loss:0.11596977470187982, acc:0.40568590798761267


 82%|████████▏ | 35844/43738 [4:33:16<52:21,  2.51it/s]

step:2240, train_loss:0.11596768967887545, acc:0.40570248856154445


 82%|████████▏ | 35845/43738 [4:33:17<45:52,  2.87it/s]

step:2240, train_loss:0.11596494274701713, acc:0.40571906821035014


 82%|████████▏ | 35846/43738 [4:33:17<44:36,  2.95it/s]

step:2240, train_loss:0.115962057996747, acc:0.405735646934107


 82%|████████▏ | 35847/43738 [4:33:17<41:49,  3.14it/s]

step:2240, train_loss:0.11596196336812595, acc:0.4057243283956816


 82%|████████▏ | 35848/43738 [4:33:18<44:15,  2.97it/s]

step:2240, train_loss:0.11596099093117052, acc:0.4057409060477572


 82%|████████▏ | 35849/43738 [4:33:18<45:53,  2.87it/s]

step:2240, train_loss:0.11595918046237769, acc:0.4057295879940863


 82%|████████▏ | 35850/43738 [4:33:18<47:35,  2.76it/s]

step:2240, train_loss:0.11595639283133785, acc:0.40574616457461643


 82%|████████▏ | 35851/43738 [4:33:19<52:00,  2.53it/s]

step:2240, train_loss:0.11595392361578426, acc:0.40576274023039804


 82%|████████▏ | 35852/43738 [4:33:19<50:35,  2.60it/s]

step:2240, train_loss:0.11595165901040275, acc:0.40577931496150843


 82%|████████▏ | 35853/43738 [4:33:20<52:29,  2.50it/s]

step:2240, train_loss:0.11594849368751498, acc:0.40579588876802497


 82%|████████▏ | 35854/43738 [4:33:20<55:17,  2.38it/s]

step:2240, train_loss:0.11594775958850673, acc:0.4058124616500251


 82%|████████▏ | 35855/43738 [4:33:21<55:07,  2.38it/s]

step:2240, train_loss:0.11595372872710612, acc:0.40580114349463114


 83%|████████▎ | 36160/43738 [4:35:36<58:20,  2.17it/s]  

step:2260, train_loss:0.11569978004428205, acc:0.40696902654867256


 83%|████████▎ | 36161/43738 [4:35:36<55:56,  2.26it/s]

step:2260, train_loss:0.11569661136544888, acc:0.40698542628798984


 83%|████████▎ | 36162/43738 [4:35:37<1:04:21,  1.96it/s]

step:2260, train_loss:0.11569600876707346, acc:0.40697417178253414


 83%|████████▎ | 36163/43738 [4:35:38<1:06:57,  1.89it/s]

step:2260, train_loss:0.115698892113655, acc:0.40696291789951056


 83%|████████▎ | 36164/43738 [4:35:38<1:09:15,  1.82it/s]

step:2260, train_loss:0.11570045521134048, acc:0.4069516646388674


 83%|████████▎ | 36165/43738 [4:35:39<1:08:04,  1.85it/s]

step:2260, train_loss:0.11570355067505872, acc:0.40694041200055303


 83%|████████▎ | 36166/43738 [4:35:39<1:00:11,  2.10it/s]

step:2260, train_loss:0.11570275684466262, acc:0.40695681026378366


 83%|████████▎ | 36167/43738 [4:35:39<58:04,  2.17it/s]  

step:2260, train_loss:0.11570138205131333, acc:0.40694555810545524


 83%|████████▎ | 36168/43738 [4:35:40<59:15,  2.13it/s]

step:2260, train_loss:0.11570030611837777, acc:0.40696195531961954


 83%|████████▎ | 36169/43738 [4:35:40<58:13,  2.17it/s]

step:2260, train_loss:0.11570289134229038, acc:0.40695070364123975


 83%|████████▎ | 36170/43738 [4:35:41<57:28,  2.19it/s]

step:2260, train_loss:0.11570321963188628, acc:0.4069394525850152


 83%|████████▎ | 36171/43738 [4:35:41<51:58,  2.43it/s]

step:2260, train_loss:0.11570011356694788, acc:0.4069558486080009


 83%|████████▎ | 36172/43738 [4:35:41<45:15,  2.79it/s]

step:2260, train_loss:0.11569819767969011, acc:0.4069445980316267


 83%|████████▎ | 36173/43738 [4:35:42<49:59,  2.52it/s]

step:2260, train_loss:0.11569778496880635, acc:0.4069609930058331


 83%|████████▎ | 36174/43738 [4:35:42<55:48,  2.26it/s]

step:2260, train_loss:0.11569774195612444, acc:0.40697738707358877


 83%|████████▎ | 36175/43738 [4:35:43<54:20,  2.32it/s]

step:2260, train_loss:0.11569669442569396, acc:0.4069661368348307


 83%|████████▎ | 36480/43738 [4:38:05<57:03,  2.12it/s]  

step:2280, train_loss:0.11553509728531289, acc:0.40737390350877195


 83%|████████▎ | 36481/43738 [4:38:06<1:04:10,  1.88it/s]

step:2280, train_loss:0.1155348685382818, acc:0.40736273676708423


 83%|████████▎ | 36482/43738 [4:38:06<1:05:08,  1.86it/s]

step:2280, train_loss:0.11553668547034295, acc:0.4073515706375747


 83%|████████▎ | 36483/43738 [4:38:07<57:21,  2.11it/s]  

step:2280, train_loss:0.11553432736456064, acc:0.40736781514678067


 83%|████████▎ | 36484/43738 [4:38:07<53:26,  2.26it/s]

step:2280, train_loss:0.11553229512192024, acc:0.4073840587654862


 83%|████████▎ | 36485/43738 [4:38:08<51:16,  2.36it/s]

step:2280, train_loss:0.11553314390485314, acc:0.4073728929697136


 83%|████████▎ | 36486/43738 [4:38:08<52:52,  2.29it/s]

step:2280, train_loss:0.11553569257108684, acc:0.4073617277860001


 83%|████████▎ | 36487/43738 [4:38:08<50:50,  2.38it/s]

step:2280, train_loss:0.11553416700408117, acc:0.40737797023597444


 83%|████████▎ | 36488/43738 [4:38:09<52:02,  2.32it/s]

step:2280, train_loss:0.11553331368157894, acc:0.4073942117956588


 83%|████████▎ | 36489/43738 [4:38:09<46:16,  2.61it/s]

step:2280, train_loss:0.1155304297833788, acc:0.40741045246512647


 83%|████████▎ | 36490/43738 [4:38:10<57:23,  2.10it/s]

step:2280, train_loss:0.11553540305025581, acc:0.40739928747602083


 83%|████████▎ | 36491/43738 [4:38:10<1:02:19,  1.94it/s]

step:2280, train_loss:0.11553321666771171, acc:0.40741552711627527


 83%|████████▎ | 36492/43738 [4:38:11<58:19,  2.07it/s]  

step:2280, train_loss:0.11553646880452223, acc:0.40740436260002194


 83%|████████▎ | 36493/43738 [4:38:11<1:04:01,  1.89it/s]

step:2280, train_loss:0.11553605100823995, acc:0.40742060121119117


 83%|████████▎ | 36494/43738 [4:38:12<1:07:39,  1.78it/s]

step:2280, train_loss:0.11553591779419684, acc:0.40740943716775363


 83%|████████▎ | 36495/43738 [4:38:12<57:32,  2.10it/s]  

step:2280, train_loss:0.11553373102452008, acc:0.40742567474996577


 84%|████████▍ | 36800/43738 [4:40:30<47:59,  2.41it/s]  

step:2300, train_loss:0.11532312350097844, acc:0.40817934782608695


 84%|████████▍ | 36801/43738 [4:40:31<57:29,  2.01it/s]

step:2300, train_loss:0.11532057886003312, acc:0.4081954294720252


 84%|████████▍ | 36802/43738 [4:40:32<53:14,  2.17it/s]

step:2300, train_loss:0.11531905070082928, acc:0.4082115102440085


 84%|████████▍ | 36803/43738 [4:40:32<1:04:54,  1.78it/s]

step:2300, train_loss:0.11532067765291945, acc:0.40820041844414856


 84%|████████▍ | 36804/43738 [4:40:33<1:04:23,  1.79it/s]

step:2300, train_loss:0.11532212273530651, acc:0.4081893272470384


 84%|████████▍ | 36805/43738 [4:40:34<1:07:20,  1.72it/s]

step:2300, train_loss:0.11532383875689574, acc:0.4081782366526287


 84%|████████▍ | 36806/43738 [4:40:34<1:14:50,  1.54it/s]

step:2300, train_loss:0.11532695230492773, acc:0.4081671466608705


 84%|████████▍ | 36807/43738 [4:40:35<1:01:48,  1.87it/s]

step:2300, train_loss:0.11532534939571511, acc:0.4081832260167903


 84%|████████▍ | 36808/43738 [4:40:35<57:24,  2.01it/s]  

step:2300, train_loss:0.11532252718238505, acc:0.408199304499022


 84%|████████▍ | 36809/43738 [4:40:35<55:33,  2.08it/s]

step:2300, train_loss:0.11532169292066363, acc:0.4081882148387623


 84%|████████▍ | 36810/43738 [4:40:36<1:03:07,  1.83it/s]

step:2300, train_loss:0.11533931666212713, acc:0.40817712578103776


 84%|████████▍ | 36811/43738 [4:40:37<1:02:57,  1.83it/s]

step:2300, train_loss:0.11534312204388363, acc:0.40816603732579937


 84%|████████▍ | 36812/43738 [4:40:37<52:49,  2.19it/s]  

step:2300, train_loss:0.11534010359214558, acc:0.40818211452787134


 84%|████████▍ | 36813/43738 [4:40:37<48:28,  2.38it/s]

step:2300, train_loss:0.11533710598824226, acc:0.4081981908564909


 84%|████████▍ | 36814/43738 [4:40:38<56:06,  2.06it/s]

step:2300, train_loss:0.11533583061287264, acc:0.40821426631172925


 84%|████████▍ | 36815/43738 [4:40:38<50:51,  2.27it/s]

step:2300, train_loss:0.11534192171677106, acc:0.4082031780524243


 85%|████████▍ | 37120/43738 [4:42:57<47:59,  2.30it/s]  

step:2320, train_loss:0.11516824714127159, acc:0.4087823275862069


 85%|████████▍ | 37121/43738 [4:42:57<47:47,  2.31it/s]

step:2320, train_loss:0.11516574626019868, acc:0.4087982543573718


 85%|████████▍ | 37122/43738 [4:42:58<47:11,  2.34it/s]

step:2320, train_loss:0.11516336136946928, acc:0.40881418027045957


 85%|████████▍ | 37123/43738 [4:42:58<44:45,  2.46it/s]

step:2320, train_loss:0.11516240533076576, acc:0.4088031678474261


 85%|████████▍ | 37124/43738 [4:42:58<43:16,  2.55it/s]

step:2320, train_loss:0.11515999890193017, acc:0.40881909277017564


 85%|████████▍ | 37125/43738 [4:42:59<56:28,  1.95it/s]

step:2320, train_loss:0.11515965820470886, acc:0.40883501683501683


 85%|████████▍ | 37126/43738 [4:43:00<53:35,  2.06it/s]

step:2320, train_loss:0.11515825887672533, acc:0.40882400474061303


 85%|████████▍ | 37127/43738 [4:43:00<57:07,  1.93it/s]

step:2320, train_loss:0.11515597844762561, acc:0.40883992781533657


 85%|████████▍ | 37128/43738 [4:43:01<56:28,  1.95it/s]

step:2320, train_loss:0.11515762389779649, acc:0.40882891618185735


 85%|████████▍ | 37129/43738 [4:43:01<51:13,  2.15it/s]

step:2320, train_loss:0.11515474044812782, acc:0.4088448382665841


 85%|████████▍ | 37130/43738 [4:43:01<50:02,  2.20it/s]

step:2320, train_loss:0.11515430249773669, acc:0.4088338270939941


 85%|████████▍ | 37131/43738 [4:43:02<54:21,  2.03it/s]

step:2320, train_loss:0.11515125913875762, acc:0.4088497481888449


 85%|████████▍ | 37132/43738 [4:43:03<53:31,  2.06it/s]

step:2320, train_loss:0.11515067872195976, acc:0.4088387374771087


 85%|████████▍ | 37133/43738 [4:43:03<45:58,  2.39it/s]

step:2320, train_loss:0.11514769726448226, acc:0.4088546575822045


 85%|████████▍ | 37134/43738 [4:43:03<47:21,  2.32it/s]

step:2320, train_loss:0.1151471501625924, acc:0.4088436473312867


 85%|████████▍ | 37135/43738 [4:43:04<42:02,  2.62it/s]

step:2320, train_loss:0.11514405317259692, acc:0.40885956644674837


 86%|████████▌ | 37440/43738 [4:45:21<1:00:23,  1.74it/s]

step:2340, train_loss:0.11491584387031448, acc:0.4098290598290598


 86%|████████▌ | 37441/43738 [4:45:21<50:35,  2.07it/s]  

step:2340, train_loss:0.1149129839053283, acc:0.409844822520766


 86%|████████▌ | 37442/43738 [4:45:21<50:11,  2.09it/s]

step:2340, train_loss:0.11491013778441235, acc:0.40986058437049305


 86%|████████▌ | 37443/43738 [4:45:22<48:26,  2.17it/s]

step:2340, train_loss:0.11490995252484579, acc:0.40987634537830836


 86%|████████▌ | 37444/43738 [4:45:22<41:36,  2.52it/s]

step:2340, train_loss:0.11491249390094381, acc:0.4098653989958338


 86%|████████▌ | 37445/43738 [4:45:22<37:17,  2.81it/s]

step:2340, train_loss:0.11490945629180273, acc:0.4098811590332488


 86%|████████▌ | 37446/43738 [4:45:23<38:21,  2.73it/s]

step:2340, train_loss:0.1149073548649269, acc:0.40989691822891633


 86%|████████▌ | 37447/43738 [4:45:23<34:40,  3.02it/s]

step:2340, train_loss:0.11490430846501828, acc:0.4099126765829038


 86%|████████▌ | 37448/43738 [4:45:24<47:58,  2.19it/s]

step:2340, train_loss:0.11490261849093027, acc:0.4099284340952788


 86%|████████▌ | 37449/43738 [4:45:24<45:17,  2.31it/s]

step:2340, train_loss:0.11489979137974259, acc:0.40994419076610855


 86%|████████▌ | 37450/43738 [4:45:25<52:48,  1.98it/s]

step:2340, train_loss:0.11489864199668924, acc:0.4099599465954606


 86%|████████▌ | 37451/43738 [4:45:25<47:04,  2.23it/s]

step:2340, train_loss:0.11489763575795496, acc:0.40994900002670154


 86%|████████▌ | 37452/43738 [4:45:26<50:11,  2.09it/s]

step:2340, train_loss:0.11489899706974041, acc:0.4099380540425077


 86%|████████▌ | 37453/43738 [4:45:26<42:59,  2.44it/s]

step:2340, train_loss:0.11489593080235613, acc:0.409953808773663


 86%|████████▌ | 37454/43738 [4:45:26<43:24,  2.41it/s]

step:2340, train_loss:0.11489516349404902, acc:0.40994286324558127


 86%|████████▌ | 37455/43738 [4:45:27<43:37,  2.40it/s]

step:2340, train_loss:0.11489819418339747, acc:0.40993191830196235


 86%|████████▋ | 37760/43738 [4:47:49<46:21,  2.15it/s]  

step:2360, train_loss:0.11475658005680296, acc:0.4103548728813559


 86%|████████▋ | 37761/43738 [4:47:50<49:29,  2.01it/s]

step:2360, train_loss:0.11475621400000785, acc:0.4103440057201875


 86%|████████▋ | 37762/43738 [4:47:50<42:35,  2.34it/s]

step:2360, train_loss:0.11475355407815391, acc:0.41035962078279753


 86%|████████▋ | 37763/43738 [4:47:50<39:34,  2.52it/s]

step:2360, train_loss:0.11475751184098787, acc:0.4103487540714456


 86%|████████▋ | 37764/43738 [4:47:51<44:34,  2.23it/s]

step:2360, train_loss:0.11475678417777482, acc:0.41033788793560005


 86%|████████▋ | 37765/43738 [4:47:51<47:31,  2.10it/s]

step:2360, train_loss:0.11475422855369125, acc:0.410353501919767


 86%|████████▋ | 37766/43738 [4:47:52<41:16,  2.41it/s]

step:2360, train_loss:0.11475242137008418, acc:0.4103691150770534


 86%|████████▋ | 37767/43738 [4:47:52<47:17,  2.10it/s]

step:2360, train_loss:0.11475386149345704, acc:0.41035824926523157


 86%|████████▋ | 37768/43738 [4:47:53<43:24,  2.29it/s]

step:2360, train_loss:0.11475385024823134, acc:0.4103738614700275


 86%|████████▋ | 37769/43738 [4:47:53<44:06,  2.26it/s]

step:2360, train_loss:0.11475226295142896, acc:0.41038947284810295


 86%|████████▋ | 37770/43738 [4:47:53<37:50,  2.63it/s]

step:2360, train_loss:0.11474951256792168, acc:0.4104050833995234


 86%|████████▋ | 37771/43738 [4:47:54<43:44,  2.27it/s]

step:2360, train_loss:0.11474883671817004, acc:0.41042069312435464


 86%|████████▋ | 37772/43738 [4:47:54<36:52,  2.70it/s]

step:2360, train_loss:0.11474597335085715, acc:0.4104363020226623


 86%|████████▋ | 37773/43738 [4:47:54<36:27,  2.73it/s]

step:2360, train_loss:0.11474569898537362, acc:0.41042543615810234


 86%|████████▋ | 37774/43738 [4:47:55<36:44,  2.71it/s]

step:2360, train_loss:0.11474569765340224, acc:0.4104145708688516


 86%|████████▋ | 37775/43738 [4:47:55<39:03,  2.54it/s]

step:2360, train_loss:0.11474653409377736, acc:0.4104037061548643


 87%|████████▋ | 38080/43738 [4:50:16<1:00:11,  1.57it/s]

step:2380, train_loss:0.11451805092464719, acc:0.4114233193277311


 87%|████████▋ | 38081/43738 [4:50:16<51:24,  1.83it/s]  

step:2380, train_loss:0.11451700000938361, acc:0.41141251542764107


 87%|████████▋ | 38082/43738 [4:50:16<45:42,  2.06it/s]

step:2380, train_loss:0.11451556480367177, acc:0.41142797121999897


 87%|████████▋ | 38083/43738 [4:50:17<42:44,  2.21it/s]

step:2380, train_loss:0.11451377851471119, acc:0.411443426200667


 87%|████████▋ | 38084/43738 [4:50:17<40:28,  2.33it/s]

step:2380, train_loss:0.11451314791979034, acc:0.41143262262367397


 87%|████████▋ | 38085/43738 [4:50:18<50:16,  1.87it/s]

step:2380, train_loss:0.11451186099225454, acc:0.41142181961402124


 87%|████████▋ | 38086/43738 [4:50:18<47:44,  1.97it/s]

step:2380, train_loss:0.11451010959409569, acc:0.4114372735388332


 87%|████████▋ | 38087/43738 [4:50:19<43:41,  2.16it/s]

step:2380, train_loss:0.11450751985472568, acc:0.4114527266521385


 87%|████████▋ | 38088/43738 [4:50:19<39:32,  2.38it/s]

step:2380, train_loss:0.11450578760429193, acc:0.4114681789540013


 87%|████████▋ | 38089/43738 [4:50:20<47:14,  1.99it/s]

step:2380, train_loss:0.1145040663379332, acc:0.4114573761453438


 87%|████████▋ | 38090/43738 [4:50:21<52:50,  1.78it/s]

step:2380, train_loss:0.11450144442138124, acc:0.41147282751378317


 87%|████████▋ | 38091/43738 [4:50:21<55:20,  1.70it/s]

step:2380, train_loss:0.11449885512061876, acc:0.4114882780709354


 87%|████████▋ | 38092/43738 [4:50:22<49:42,  1.89it/s]

step:2380, train_loss:0.11450149491378406, acc:0.41147747558542475


 87%|████████▋ | 38093/43738 [4:50:22<42:05,  2.24it/s]

step:2380, train_loss:0.11449854744479528, acc:0.41149292520935604


 87%|████████▋ | 38094/43738 [4:50:22<39:07,  2.40it/s]

step:2380, train_loss:0.11449557770937949, acc:0.41150837402215573


 87%|████████▋ | 38095/43738 [4:50:22<36:50,  2.55it/s]

step:2380, train_loss:0.11449364153459679, acc:0.41152382202388765


 88%|████████▊ | 38400/43738 [4:52:43<43:55,  2.03it/s]  

step:2400, train_loss:0.11430649804395633, acc:0.4125


 88%|████████▊ | 38401/43738 [4:52:44<47:47,  1.86it/s]

step:2400, train_loss:0.11430486986238281, acc:0.41251529908075313


 88%|████████▊ | 38402/43738 [4:52:44<43:46,  2.03it/s]

step:2400, train_loss:0.11430255420324334, acc:0.4125305973647206


 88%|████████▊ | 38403/43738 [4:52:44<42:32,  2.09it/s]

step:2400, train_loss:0.11430209463899804, acc:0.4125198552196443


 88%|████████▊ | 38404/43738 [4:52:45<40:25,  2.20it/s]

step:2400, train_loss:0.11430097380727648, acc:0.41250911363399645


 88%|████████▊ | 38405/43738 [4:52:45<35:37,  2.50it/s]

step:2400, train_loss:0.11429799850642527, acc:0.4125244108839995


 88%|████████▊ | 38406/43738 [4:52:46<37:59,  2.34it/s]

step:2400, train_loss:0.11429551713743166, acc:0.4125397073373952


 88%|████████▊ | 38407/43738 [4:52:46<47:52,  1.86it/s]

step:2400, train_loss:0.11429855547658871, acc:0.4125289660738928


 88%|████████▊ | 38408/43738 [4:52:47<50:37,  1.76it/s]

step:2400, train_loss:0.1142986119702107, acc:0.4125442616121641


 88%|████████▊ | 38409/43738 [4:52:47<41:38,  2.13it/s]

step:2400, train_loss:0.11429947088525942, acc:0.4125335207893983


 88%|████████▊ | 38410/43738 [4:52:48<45:03,  1.97it/s]

step:2400, train_loss:0.11429694909993195, acc:0.412548815412653


 88%|████████▊ | 38411/43738 [4:52:48<42:12,  2.10it/s]

step:2400, train_loss:0.11429438567685249, acc:0.41256410923954073


 88%|████████▊ | 38412/43738 [4:52:49<44:54,  1.98it/s]

step:2400, train_loss:0.11429234991475754, acc:0.4125794022701239


 88%|████████▊ | 38413/43738 [4:52:49<44:14,  2.01it/s]

step:2400, train_loss:0.11429239532589724, acc:0.4125686616510036


 88%|████████▊ | 38414/43738 [4:52:50<43:56,  2.02it/s]

step:2400, train_loss:0.11429510940098594, acc:0.41255792159108656


 88%|████████▊ | 38415/43738 [4:52:50<42:54,  2.07it/s]

step:2400, train_loss:0.114295090414679, acc:0.4125471820903293


 89%|████████▊ | 38720/43738 [4:55:09<36:25,  2.30it/s]

step:2420, train_loss:0.11409642468393406, acc:0.41366219008264465


 89%|████████▊ | 38721/43738 [4:55:09<37:23,  2.24it/s]

step:2420, train_loss:0.11409595672972937, acc:0.41367733271351464


 89%|████████▊ | 38722/43738 [4:55:10<33:11,  2.52it/s]

step:2420, train_loss:0.11409342461366573, acc:0.4136924745622643


 89%|████████▊ | 38723/43738 [4:55:10<30:01,  2.78it/s]

step:2420, train_loss:0.11409223883629674, acc:0.4136817911835343


 89%|████████▊ | 38724/43738 [4:55:11<35:56,  2.33it/s]

step:2420, train_loss:0.11409310950850572, acc:0.4136711083565747


 89%|████████▊ | 38725/43738 [4:55:11<40:27,  2.07it/s]

step:2420, train_loss:0.11409355422766093, acc:0.41368624919302777


 89%|████████▊ | 38726/43738 [4:55:12<42:49,  1.95it/s]

step:2420, train_loss:0.11409153151726951, acc:0.41370138924753397


 89%|████████▊ | 38727/43738 [4:55:12<42:59,  1.94it/s]

step:2420, train_loss:0.11409121976834272, acc:0.41369070674206626


 89%|████████▊ | 38728/43738 [4:55:13<45:28,  1.84it/s]

step:2420, train_loss:0.11409150477338496, acc:0.4136800247882669


 89%|████████▊ | 38729/43738 [4:55:13<38:49,  2.15it/s]

step:2420, train_loss:0.11408892684507412, acc:0.4136951638307212


 89%|████████▊ | 38730/43738 [4:55:14<36:39,  2.28it/s]

step:2420, train_loss:0.11408849653536797, acc:0.4136844823134521


 89%|████████▊ | 38731/43738 [4:55:14<32:27,  2.57it/s]

step:2420, train_loss:0.11408737854569279, acc:0.4136738013477576


 89%|████████▊ | 38732/43738 [4:55:14<33:21,  2.50it/s]

step:2420, train_loss:0.11408463100295332, acc:0.4136889393782919


 89%|████████▊ | 38733/43738 [4:55:15<33:55,  2.46it/s]

step:2420, train_loss:0.11408692609724858, acc:0.41367825884904347


 89%|████████▊ | 38734/43738 [4:55:15<43:42,  1.91it/s]

step:2420, train_loss:0.11408839334111745, acc:0.41366757887127587


 89%|████████▊ | 38735/43738 [4:55:16<50:52,  1.64it/s]

step:2420, train_loss:0.11409168740967131, acc:0.41365689944494644


 89%|████████▉ | 39040/43738 [4:57:30<34:02,  2.30it/s]

step:2440, train_loss:0.11389698642722124, acc:0.4149077868852459


 89%|████████▉ | 39041/43738 [4:57:31<38:07,  2.05it/s]

step:2440, train_loss:0.11389556107973232, acc:0.4149227734945314


 89%|████████▉ | 39042/43738 [4:57:32<43:18,  1.81it/s]

step:2440, train_loss:0.11389349561638312, acc:0.4149377593360996


 89%|████████▉ | 39043/43738 [4:57:32<35:49,  2.18it/s]

step:2440, train_loss:0.11389058005070789, acc:0.41495274441000946


 89%|████████▉ | 39044/43738 [4:57:32<31:32,  2.48it/s]

step:2440, train_loss:0.11388969480347953, acc:0.41496772871632004


 89%|████████▉ | 39045/43738 [4:57:32<28:20,  2.76it/s]

step:2440, train_loss:0.11388685491654055, acc:0.41498271225509026


 89%|████████▉ | 39046/43738 [4:57:33<32:29,  2.41it/s]

step:2440, train_loss:0.11388724867895879, acc:0.4149720842083696


 89%|████████▉ | 39047/43738 [4:57:34<38:24,  2.04it/s]

step:2440, train_loss:0.11388747884388024, acc:0.41496145670602097


 89%|████████▉ | 39048/43738 [4:57:34<41:33,  1.88it/s]

step:2440, train_loss:0.11388751384648617, acc:0.41495082974800246


 89%|████████▉ | 39049/43738 [4:57:35<47:31,  1.64it/s]

step:2440, train_loss:0.11389026615545772, acc:0.41494020333427234


 89%|████████▉ | 39050/43738 [4:57:36<51:52,  1.51it/s]

step:2440, train_loss:0.11388930518686748, acc:0.41492957746478876


 89%|████████▉ | 39051/43738 [4:57:36<42:15,  1.85it/s]

step:2440, train_loss:0.11388644384266705, acc:0.4149445596783693


 89%|████████▉ | 39052/43738 [4:57:36<34:42,  2.25it/s]

step:2440, train_loss:0.11388414543356581, acc:0.41495954112465433


 89%|████████▉ | 39053/43738 [4:57:37<31:16,  2.50it/s]

step:2440, train_loss:0.1138849591745479, acc:0.4149489155762682


 89%|████████▉ | 39054/43738 [4:57:37<32:19,  2.41it/s]

step:2440, train_loss:0.11388568680356902, acc:0.41493829057202847


 89%|████████▉ | 39055/43738 [4:57:37<30:05,  2.59it/s]

step:2440, train_loss:0.11388636634075279, acc:0.41492766611189347


 90%|████████▉ | 39360/43738 [4:59:57<36:09,  2.02it/s]

step:2460, train_loss:0.11366296323668926, acc:0.4159298780487805


 90%|████████▉ | 39361/43738 [4:59:58<30:55,  2.36it/s]

step:2460, train_loss:0.11366106608706986, acc:0.41594471685170603


 90%|████████▉ | 39362/43738 [4:59:58<26:23,  2.76it/s]

step:2460, train_loss:0.11366221570055406, acc:0.41593414968751585


 90%|████████▉ | 39363/43738 [4:59:58<25:51,  2.82it/s]

step:2460, train_loss:0.1136593293898935, acc:0.4159489876279755


 90%|████████▉ | 39364/43738 [4:59:59<27:40,  2.63it/s]

step:2460, train_loss:0.11365721015151883, acc:0.41596382481455135


 90%|█████████ | 39365/43738 [4:59:59<27:49,  2.62it/s]

step:2460, train_loss:0.11365507436006608, acc:0.41597866124730093


 90%|█████████ | 39366/43738 [4:59:59<28:55,  2.52it/s]

step:2460, train_loss:0.11365236878279776, acc:0.41599349692628157


 90%|█████████ | 39367/43738 [5:00:00<25:14,  2.89it/s]

step:2460, train_loss:0.11365418954542998, acc:0.4159829298651154


 90%|█████████ | 39368/43738 [5:00:00<26:42,  2.73it/s]

step:2460, train_loss:0.11365797996148311, acc:0.4159723633407844


 90%|█████████ | 39369/43738 [5:00:00<25:57,  2.81it/s]

step:2460, train_loss:0.11365569978341655, acc:0.41598719804922657


 90%|█████████ | 39370/43738 [5:00:01<22:46,  3.20it/s]

step:2460, train_loss:0.11365317371150907, acc:0.416002032004064


 90%|█████████ | 39371/43738 [5:00:01<20:54,  3.48it/s]

step:2460, train_loss:0.11365042139865353, acc:0.4160168652053542


 90%|█████████ | 39372/43738 [5:00:01<26:12,  2.78it/s]

step:2460, train_loss:0.11365440288103258, acc:0.41600629889261403


 90%|█████████ | 39373/43738 [5:00:02<30:53,  2.35it/s]

step:2460, train_loss:0.11365166447736098, acc:0.4160211312320626


 90%|█████████ | 39374/43738 [5:00:03<36:33,  1.99it/s]

step:2460, train_loss:0.1136523245767401, acc:0.4160105653476914


 90%|█████████ | 39375/43738 [5:00:03<30:43,  2.37it/s]

step:2460, train_loss:0.11364946201868052, acc:0.4160253968253968


 91%|█████████ | 39680/43738 [5:02:20<32:20,  2.09it/s]

step:2480, train_loss:0.11343221493258104, acc:0.41696068548387094


 91%|█████████ | 39681/43738 [5:02:21<30:39,  2.21it/s]

step:2480, train_loss:0.11343004061745213, acc:0.4169753786446914


 91%|█████████ | 39682/43738 [5:02:22<38:42,  1.75it/s]

step:2480, train_loss:0.11343016690549948, acc:0.4169648707222418


 91%|█████████ | 39683/43738 [5:02:22<34:26,  1.96it/s]

step:2480, train_loss:0.11342815865582787, acc:0.4169795630370688


 91%|█████████ | 39684/43738 [5:02:22<34:08,  1.98it/s]

step:2480, train_loss:0.11342693768475194, acc:0.4169942546114303


 91%|█████████ | 39685/43738 [5:02:23<37:57,  1.78it/s]

step:2480, train_loss:0.11342895088628684, acc:0.41698374700768553


 91%|█████████ | 39686/43738 [5:02:24<37:35,  1.80it/s]

step:2480, train_loss:0.11342973276881628, acc:0.4169732399334778


 91%|█████████ | 39687/43738 [5:02:24<33:33,  2.01it/s]

step:2480, train_loss:0.11342784266350445, acc:0.41698793055660544


 91%|█████████ | 39688/43738 [5:02:24<28:50,  2.34it/s]

step:2480, train_loss:0.11342498754249822, acc:0.41700262043942754


 91%|█████████ | 39689/43738 [5:02:25<35:00,  1.93it/s]

step:2480, train_loss:0.11342594374055329, acc:0.4169921136838923


 91%|█████████ | 39690/43738 [5:02:25<29:31,  2.29it/s]

step:2480, train_loss:0.11342462443092476, acc:0.41698160745779794


 91%|█████████ | 39691/43738 [5:02:26<29:41,  2.27it/s]

step:2480, train_loss:0.11342437706016262, acc:0.4169711017611045


 91%|█████████ | 39692/43738 [5:02:26<27:08,  2.49it/s]

step:2480, train_loss:0.11342397005554973, acc:0.41696059659377205


 91%|█████████ | 39693/43738 [5:02:27<34:46,  1.94it/s]

step:2480, train_loss:0.11342342509276597, acc:0.41695009195576044


 91%|█████████ | 39694/43738 [5:02:28<40:20,  1.67it/s]

step:2480, train_loss:0.11342308847450264, acc:0.416964780571371


 91%|█████████ | 39695/43738 [5:02:28<37:00,  1.82it/s]

step:2480, train_loss:0.11342137516938817, acc:0.4169794684469077


 91%|█████████▏| 40000/43738 [5:04:48<31:50,  1.96it/s]

step:2500, train_loss:0.11322642570428834, acc:0.417775


 91%|█████████▏| 40001/43738 [5:04:48<27:47,  2.24it/s]

step:2500, train_loss:0.11322360015758241, acc:0.4177895552611185


 91%|█████████▏| 40002/43738 [5:04:49<28:28,  2.19it/s]

step:2500, train_loss:0.11322225585761196, acc:0.41777911104444776


 91%|█████████▏| 40003/43738 [5:04:49<24:34,  2.53it/s]

step:2500, train_loss:0.11321988198433856, acc:0.4177936654750894


 91%|█████████▏| 40004/43738 [5:04:50<31:31,  1.97it/s]

step:2500, train_loss:0.11321928430054277, acc:0.4178082191780822


 91%|█████████▏| 40005/43738 [5:04:50<27:02,  2.30it/s]

step:2500, train_loss:0.11321773385677066, acc:0.4178227721534808


 91%|█████████▏| 40006/43738 [5:04:51<33:41,  1.85it/s]

step:2500, train_loss:0.1132165508511014, acc:0.4178373244013398


 91%|█████████▏| 40007/43738 [5:04:52<37:50,  1.64it/s]

step:2500, train_loss:0.11321633098862892, acc:0.4178518759217137


 91%|█████████▏| 40008/43738 [5:04:52<34:30,  1.80it/s]

step:2500, train_loss:0.11321724869822893, acc:0.4178414317136573


 91%|█████████▏| 40009/43738 [5:04:52<28:33,  2.18it/s]

step:2500, train_loss:0.11322090246006894, acc:0.41783098802769375


 91%|█████████▏| 40010/43738 [5:04:53<29:49,  2.08it/s]

step:2500, train_loss:0.11322065674262281, acc:0.41782054486378406


 91%|█████████▏| 40011/43738 [5:04:54<35:59,  1.73it/s]

step:2500, train_loss:0.11322151090802855, acc:0.41781010222188897


 91%|█████████▏| 40012/43738 [5:04:54<32:50,  1.89it/s]

step:2500, train_loss:0.11321990160385582, acc:0.41782465260421875


 91%|█████████▏| 40013/43738 [5:04:55<31:19,  1.98it/s]

step:2500, train_loss:0.11322200350625486, acc:0.417814210381626


 91%|█████████▏| 40014/43738 [5:04:55<30:44,  2.02it/s]

step:2500, train_loss:0.11322183921064279, acc:0.41780376868096164


 91%|█████████▏| 40015/43738 [5:04:55<26:34,  2.34it/s]

step:2500, train_loss:0.11322439850248353, acc:0.4177933275021867


 92%|█████████▏| 40320/43738 [5:07:10<31:34,  1.80it/s]

step:2520, train_loss:0.1129788934464141, acc:0.41865079365079366


 92%|█████████▏| 40321/43738 [5:07:11<29:40,  1.92it/s]

step:2520, train_loss:0.11297884373435196, acc:0.4186404107040996


 92%|█████████▏| 40322/43738 [5:07:11<25:34,  2.23it/s]

step:2520, train_loss:0.11297784761624273, acc:0.41865482862953224


 92%|█████████▏| 40323/43738 [5:07:11<25:00,  2.28it/s]

step:2520, train_loss:0.11297866375516182, acc:0.4186444460977606


 92%|█████████▏| 40324/43738 [5:07:12<24:22,  2.33it/s]

step:2520, train_loss:0.11297756821465849, acc:0.41863406408094433


 92%|█████████▏| 40325/43738 [5:07:12<27:45,  2.05it/s]

step:2520, train_loss:0.11297736807229014, acc:0.41862368257904525


 92%|█████████▏| 40326/43738 [5:07:13<25:51,  2.20it/s]

step:2520, train_loss:0.11297461799000139, acc:0.4186380994891633


 92%|█████████▏| 40327/43738 [5:07:13<22:37,  2.51it/s]

step:2520, train_loss:0.11297388341940971, acc:0.418652515684281


 92%|█████████▏| 40328/43738 [5:07:14<25:29,  2.23it/s]

step:2520, train_loss:0.11297673191375984, acc:0.41864213449712356


 92%|█████████▏| 40329/43738 [5:07:14<25:09,  2.26it/s]

step:2520, train_loss:0.11297399440941107, acc:0.41865654987725953


 92%|█████████▏| 40330/43738 [5:07:15<29:54,  1.90it/s]

step:2520, train_loss:0.11297298174972693, acc:0.41867096454252417


 92%|█████████▏| 40331/43738 [5:07:15<27:57,  2.03it/s]

step:2520, train_loss:0.112972399470359, acc:0.4186605836701297


 92%|█████████▏| 40332/43738 [5:07:16<26:05,  2.17it/s]

step:2520, train_loss:0.11297201674272651, acc:0.4186502033125062


 92%|█████████▏| 40333/43738 [5:07:16<24:45,  2.29it/s]

step:2520, train_loss:0.11297426668628005, acc:0.4186398234696154


 92%|█████████▏| 40334/43738 [5:07:16<26:09,  2.17it/s]

step:2520, train_loss:0.11297515728735173, acc:0.41862944414141917


 92%|█████████▏| 40335/43738 [5:07:17<25:11,  2.25it/s]

step:2520, train_loss:0.11297700998418289, acc:0.418619065327879


 93%|█████████▎| 40640/43738 [5:09:39<27:00,  1.91it/s]

step:2540, train_loss:0.11285437217829286, acc:0.4191683070866142


 93%|█████████▎| 40641/43738 [5:09:39<25:20,  2.04it/s]

step:2540, train_loss:0.1128525212527746, acc:0.41918259885337467


 93%|█████████▎| 40642/43738 [5:09:40<25:40,  2.01it/s]

step:2540, train_loss:0.11285262785517992, acc:0.41917228482850255


 93%|█████████▎| 40643/43738 [5:09:41<28:47,  1.79it/s]

step:2540, train_loss:0.11284994168039685, acc:0.4191865757941097


 93%|█████████▎| 40644/43738 [5:09:41<25:39,  2.01it/s]

step:2540, train_loss:0.11285035775056228, acc:0.4191762621789194


 93%|█████████▎| 40645/43738 [5:09:41<24:40,  2.09it/s]

step:2540, train_loss:0.11285040235769492, acc:0.41916594907122645


 93%|█████████▎| 40646/43738 [5:09:42<23:56,  2.15it/s]

step:2540, train_loss:0.1128477902499423, acc:0.4191802391379226


 93%|█████████▎| 40647/43738 [5:09:42<20:21,  2.53it/s]

step:2540, train_loss:0.11284559542404414, acc:0.4191945285014884


 93%|█████████▎| 40648/43738 [5:09:43<25:19,  2.03it/s]

step:2540, train_loss:0.11284816684078602, acc:0.4191842157055698


 93%|█████████▎| 40649/43738 [5:09:43<24:18,  2.12it/s]

step:2540, train_loss:0.1128454997117209, acc:0.4191985042682477


 93%|█████████▎| 40650/43738 [5:09:43<21:08,  2.43it/s]

step:2540, train_loss:0.11284272546730849, acc:0.4192127921279213


 93%|█████████▎| 40651/43738 [5:09:44<27:40,  1.86it/s]

step:2540, train_loss:0.1128458994490569, acc:0.4192024796437972


 93%|█████████▎| 40652/43738 [5:09:45<31:20,  1.64it/s]

step:2540, train_loss:0.11284584209574436, acc:0.41919216766702744


 93%|█████████▎| 40653/43738 [5:09:46<33:24,  1.54it/s]

step:2540, train_loss:0.11285169605308998, acc:0.4191818561975746


 93%|█████████▎| 40654/43738 [5:09:46<30:36,  1.68it/s]

step:2540, train_loss:0.11284895421042901, acc:0.41919614306095343


 93%|█████████▎| 40655/43738 [5:09:47<25:34,  2.01it/s]

step:2540, train_loss:0.112847230153915, acc:0.419210429221498


 94%|█████████▎| 40960/43738 [5:12:06<23:15,  1.99it/s]

step:2560, train_loss:0.11264345703054168, acc:0.42001953125


 94%|█████████▎| 40961/43738 [5:12:07<23:54,  1.94it/s]

step:2560, train_loss:0.11264452537644798, acc:0.4200092771172579


 94%|█████████▎| 40962/43738 [5:12:07<21:03,  2.20it/s]

step:2560, train_loss:0.11264227006480801, acc:0.4200234363556467


 94%|█████████▎| 40963/43738 [5:12:08<21:26,  2.16it/s]

step:2560, train_loss:0.11264225900915172, acc:0.42001318262822546


 94%|█████████▎| 40964/43738 [5:12:08<24:23,  1.90it/s]

step:2560, train_loss:0.1126440554750006, acc:0.42000292940142564


 94%|█████████▎| 40965/43738 [5:12:09<22:00,  2.10it/s]

step:2560, train_loss:0.11264243345146728, acc:0.4200170877578421


 94%|█████████▎| 40966/43738 [5:12:09<18:56,  2.44it/s]

step:2560, train_loss:0.11263968470201761, acc:0.4200312454230337


 94%|█████████▎| 40967/43738 [5:12:09<21:15,  2.17it/s]

step:2560, train_loss:0.11264160767445204, acc:0.4200209925061635


 94%|█████████▎| 40968/43738 [5:12:10<23:27,  1.97it/s]

step:2560, train_loss:0.11264207110124527, acc:0.4200107400898262


 94%|█████████▎| 40969/43738 [5:12:10<20:35,  2.24it/s]

step:2560, train_loss:0.11264009842434536, acc:0.4200248968732456


 94%|█████████▎| 40970/43738 [5:12:11<18:04,  2.55it/s]

step:2560, train_loss:0.11263872521776622, acc:0.4200146448620942


 94%|█████████▎| 40971/43738 [5:12:11<18:58,  2.43it/s]

step:2560, train_loss:0.11263726405867837, acc:0.4200288008591443


 94%|█████████▎| 40972/43738 [5:12:12<21:18,  2.16it/s]

step:2560, train_loss:0.1126406958839564, acc:0.42001854925314847


 94%|█████████▎| 40973/43738 [5:12:12<18:38,  2.47it/s]

step:2560, train_loss:0.11263890292795145, acc:0.4200327044639153


 94%|█████████▎| 40974/43738 [5:12:13<24:02,  1.92it/s]

step:2560, train_loss:0.11263733301464184, acc:0.4200468589837458


 94%|█████████▎| 40975/43738 [5:12:13<23:20,  1.97it/s]

step:2560, train_loss:0.11263562995012356, acc:0.42006101281269065


 94%|█████████▍| 41280/43738 [5:14:32<14:48,  2.77it/s]

step:2580, train_loss:0.11245949390761413, acc:0.4208817829457364


 94%|█████████▍| 41281/43738 [5:14:32<13:41,  2.99it/s]

step:2580, train_loss:0.11245917134413597, acc:0.4208715874130956


 94%|█████████▍| 41282/43738 [5:14:32<12:39,  3.23it/s]

step:2580, train_loss:0.11245653406772949, acc:0.4208856160069764


 94%|█████████▍| 41283/43738 [5:14:33<15:54,  2.57it/s]

step:2580, train_loss:0.11245581001623531, acc:0.42089964392122664


 94%|█████████▍| 41284/43738 [5:14:33<16:38,  2.46it/s]

step:2580, train_loss:0.11245334332265976, acc:0.42091367115589573


 94%|█████████▍| 41285/43738 [5:14:34<18:24,  2.22it/s]

step:2580, train_loss:0.11245333716742684, acc:0.42092769771103306


 94%|█████████▍| 41286/43738 [5:14:34<17:35,  2.32it/s]

step:2580, train_loss:0.1124524513785685, acc:0.42091750230102215


 94%|█████████▍| 41287/43738 [5:14:34<14:55,  2.74it/s]

step:2580, train_loss:0.11244972793185555, acc:0.4209315280839005


 94%|█████████▍| 41288/43738 [5:14:35<15:03,  2.71it/s]

step:2580, train_loss:0.1124491804515076, acc:0.4209455531873668


 94%|█████████▍| 41289/43738 [5:14:35<15:10,  2.69it/s]

step:2580, train_loss:0.11244818045859128, acc:0.42093535808568866


 94%|█████████▍| 41290/43738 [5:14:36<19:37,  2.08it/s]

step:2580, train_loss:0.11244595210247858, acc:0.42094938241705016


 94%|█████████▍| 41291/43738 [5:14:36<18:21,  2.22it/s]

step:2580, train_loss:0.11244439963139731, acc:0.4209634060691192


 94%|█████████▍| 41292/43738 [5:14:37<20:22,  2.00it/s]

step:2580, train_loss:0.11244180848091212, acc:0.4209774290419452


 94%|█████████▍| 41293/43738 [5:14:37<18:36,  2.19it/s]

step:2580, train_loss:0.11243957761688182, acc:0.42099145133557747


 94%|█████████▍| 41294/43738 [5:14:38<18:19,  2.22it/s]

step:2580, train_loss:0.11243842192190658, acc:0.4210054729500654


 94%|█████████▍| 41295/43738 [5:14:38<15:52,  2.56it/s]

step:2580, train_loss:0.112437469499091, acc:0.4209952778786778


 95%|█████████▌| 41600/43738 [5:16:59<19:49,  1.80it/s]

step:2600, train_loss:0.11226831728002988, acc:0.42149038461538463


 95%|█████████▌| 41601/43738 [5:16:59<19:50,  1.80it/s]

step:2600, train_loss:0.11226846864094116, acc:0.4214802528785366


 95%|█████████▌| 41602/43738 [5:17:00<17:55,  1.99it/s]

step:2600, train_loss:0.11226906435162977, acc:0.42147012162876785


 95%|█████████▌| 41603/43738 [5:17:00<15:05,  2.36it/s]

step:2600, train_loss:0.11226653503053415, acc:0.42148402759416387


 95%|█████████▌| 41604/43738 [5:17:00<13:11,  2.70it/s]

step:2600, train_loss:0.11226412822737389, acc:0.4214979328910682


 95%|█████████▌| 41605/43738 [5:17:00<13:43,  2.59it/s]

step:2600, train_loss:0.11227317539366116, acc:0.4214878019468814


 95%|█████████▌| 41606/43738 [5:17:01<14:27,  2.46it/s]

step:2600, train_loss:0.11227538413023831, acc:0.421477671489689


 95%|█████████▌| 41607/43738 [5:17:01<13:29,  2.63it/s]

step:2600, train_loss:0.11227524157411864, acc:0.42146754151945587


 95%|█████████▌| 41608/43738 [5:17:02<15:41,  2.26it/s]

step:2600, train_loss:0.11227805106475064, acc:0.4214574120361469


 95%|█████████▌| 41609/43738 [5:17:02<14:20,  2.47it/s]

step:2600, train_loss:0.11227621734256617, acc:0.42147131630176166


 95%|█████████▌| 41610/43738 [5:17:03<14:26,  2.46it/s]

step:2600, train_loss:0.11227368152326071, acc:0.42148521989906274


 95%|█████████▌| 41611/43738 [5:17:03<18:29,  1.92it/s]

step:2600, train_loss:0.11227157736129548, acc:0.4214991228280983


 95%|█████████▌| 41612/43738 [5:17:04<17:13,  2.06it/s]

step:2600, train_loss:0.11227000242239074, acc:0.4214889935595501


 95%|█████████▌| 41613/43738 [5:17:04<15:30,  2.28it/s]

step:2600, train_loss:0.11227052451759656, acc:0.42147886477783386


 95%|█████████▌| 41614/43738 [5:17:04<13:39,  2.59it/s]

step:2600, train_loss:0.11226942308464158, acc:0.4214687364829144


 95%|█████████▌| 41615/43738 [5:17:05<12:01,  2.94it/s]

step:2600, train_loss:0.11226753239389023, acc:0.4214826384717049


 96%|█████████▌| 41920/43738 [5:19:31<14:49,  2.04it/s]

step:2620, train_loss:0.11202709106394733, acc:0.42251908396946564


 96%|█████████▌| 41921/43738 [5:19:32<17:13,  1.76it/s]

step:2620, train_loss:0.11202754170923611, acc:0.4225090050332769


 96%|█████████▌| 41922/43738 [5:19:32<14:59,  2.02it/s]

step:2620, train_loss:0.1120251102009867, acc:0.4225227804016984


 96%|█████████▌| 41923/43738 [5:19:33<12:38,  2.39it/s]

step:2620, train_loss:0.11202246898042488, acc:0.4225365551129452


 96%|█████████▌| 41924/43738 [5:19:33<11:47,  2.57it/s]

step:2620, train_loss:0.11202221315145908, acc:0.4225264764812518


 96%|█████████▌| 41925/43738 [5:19:33<13:36,  2.22it/s]

step:2620, train_loss:0.11202216600195748, acc:0.4225402504472272


 96%|█████████▌| 41926/43738 [5:19:34<11:57,  2.53it/s]

step:2620, train_loss:0.1120197138694506, acc:0.42255402375614176


 96%|█████████▌| 41927/43738 [5:19:34<14:22,  2.10it/s]

step:2620, train_loss:0.11201709256454072, acc:0.42256779640804254


 96%|█████████▌| 41928/43738 [5:19:35<13:13,  2.28it/s]

step:2620, train_loss:0.11201490561661841, acc:0.4225815684029765


 96%|█████████▌| 41929/43738 [5:19:35<12:22,  2.44it/s]

step:2620, train_loss:0.11201381774209139, acc:0.4225953397409907


 96%|█████████▌| 41930/43738 [5:19:35<10:53,  2.77it/s]

step:2620, train_loss:0.11201284026249973, acc:0.42258526114953493


 96%|█████████▌| 41931/43738 [5:19:36<11:25,  2.64it/s]

step:2620, train_loss:0.1120102684422424, acc:0.4225990317426248


 96%|█████████▌| 41932/43738 [5:19:36<10:14,  2.94it/s]

step:2620, train_loss:0.11200773468722874, acc:0.4226128016789087


 96%|█████████▌| 41933/43738 [5:19:36<09:25,  3.19it/s]

step:2620, train_loss:0.1120074786410752, acc:0.42260272339207783


 96%|█████████▌| 41934/43738 [5:19:37<09:27,  3.18it/s]

step:2620, train_loss:0.1120105027571338, acc:0.42259264558592075


 96%|█████████▌| 41935/43738 [5:19:37<10:20,  2.91it/s]

step:2620, train_loss:0.11201019541118912, acc:0.422582568260403


 97%|█████████▋| 42240/43738 [5:21:56<10:19,  2.42it/s]

step:2640, train_loss:0.11189998796665206, acc:0.4229403409090909


 97%|█████████▋| 42241/43738 [5:21:57<10:30,  2.37it/s]

step:2640, train_loss:0.11190434089456762, acc:0.4229303283539689


 97%|█████████▋| 42242/43738 [5:21:57<09:09,  2.72it/s]

step:2640, train_loss:0.11190170128212666, acc:0.42294398939444156


 97%|█████████▋| 42243/43738 [5:21:58<09:55,  2.51it/s]

step:2640, train_loss:0.11190210244550952, acc:0.4229339772269962


 97%|█████████▋| 42244/43738 [5:21:58<08:55,  2.79it/s]

step:2640, train_loss:0.11190554365324253, acc:0.42292396553356687


 97%|█████████▋| 42245/43738 [5:21:58<07:59,  3.11it/s]

step:2640, train_loss:0.11190310925944129, acc:0.42293762575452715


 97%|█████████▋| 42246/43738 [5:21:58<08:24,  2.96it/s]

step:2640, train_loss:0.11190108174756366, acc:0.4229512853287885


 97%|█████████▋| 42247/43738 [5:21:59<07:33,  3.29it/s]

step:2640, train_loss:0.11189843301811232, acc:0.4229649442563969


 97%|█████████▋| 42248/43738 [5:21:59<09:17,  2.67it/s]

step:2640, train_loss:0.1118968597378592, acc:0.4229786025373982


 97%|█████████▋| 42249/43738 [5:22:00<09:47,  2.53it/s]

step:2640, train_loss:0.11189719304252735, acc:0.4229685909725674


 97%|█████████▋| 42250/43738 [5:22:00<10:54,  2.27it/s]

step:2640, train_loss:0.11189895003738216, acc:0.4229585798816568


 97%|█████████▋| 42251/43738 [5:22:01<13:28,  1.84it/s]

step:2640, train_loss:0.11189677049473867, acc:0.4229722373434948


 97%|█████████▋| 42252/43738 [5:22:01<12:24,  1.99it/s]

step:2640, train_loss:0.11189478373279396, acc:0.4229858941588564


 97%|█████████▋| 42253/43738 [5:22:02<11:47,  2.10it/s]

step:2640, train_loss:0.11189224836691504, acc:0.42299955032778735


 97%|█████████▋| 42254/43738 [5:22:03<13:36,  1.82it/s]

step:2640, train_loss:0.11189226169340066, acc:0.4230132058503337


 97%|█████████▋| 42255/43738 [5:22:03<11:53,  2.08it/s]

step:2640, train_loss:0.11189010500225957, acc:0.4230268607265412


 97%|█████████▋| 42560/43738 [5:24:18<10:21,  1.90it/s]

step:2660, train_loss:0.11167046968089243, acc:0.4239896616541353


 97%|█████████▋| 42561/43738 [5:24:19<09:22,  2.09it/s]

step:2660, train_loss:0.11166948003968634, acc:0.42397969972510047


 97%|█████████▋| 42562/43738 [5:24:20<11:07,  1.76it/s]

step:2660, train_loss:0.11166765934656482, acc:0.42399323340068606


 97%|█████████▋| 42563/43738 [5:24:20<12:22,  1.58it/s]

step:2660, train_loss:0.11166904377805857, acc:0.42398327185583723


 97%|█████████▋| 42564/43738 [5:24:21<10:07,  1.93it/s]

step:2660, train_loss:0.11166666662683923, acc:0.42399680481157787


 97%|█████████▋| 42565/43738 [5:24:21<09:38,  2.03it/s]

step:2660, train_loss:0.11166467470299495, acc:0.42401033713144604


 97%|█████████▋| 42566/43738 [5:24:22<11:15,  1.74it/s]

step:2660, train_loss:0.11166605576139636, acc:0.42400037588685807


 97%|█████████▋| 42567/43738 [5:24:22<09:07,  2.14it/s]

step:2660, train_loss:0.11166370954171077, acc:0.42401390748702045


 97%|█████████▋| 42568/43738 [5:24:22<07:58,  2.44it/s]

step:2660, train_loss:0.11166108688791375, acc:0.4240274384514189


 97%|█████████▋| 42569/43738 [5:24:23<09:29,  2.05it/s]

step:2660, train_loss:0.11165975422245239, acc:0.42404096878009817


 97%|█████████▋| 42570/43738 [5:24:23<08:41,  2.24it/s]

step:2660, train_loss:0.11165802027063386, acc:0.4240544984731031


 97%|█████████▋| 42571/43738 [5:24:24<08:49,  2.21it/s]

step:2660, train_loss:0.11165762046584005, acc:0.42404453736111436


 97%|█████████▋| 42572/43738 [5:24:24<08:28,  2.29it/s]

step:2660, train_loss:0.11165695855142027, acc:0.42403457671709105


 97%|█████████▋| 42573/43738 [5:24:25<08:10,  2.37it/s]

step:2660, train_loss:0.11165672434538781, acc:0.42402461654100015


 97%|█████████▋| 42574/43738 [5:24:25<07:16,  2.67it/s]

step:2660, train_loss:0.11165427111063016, acc:0.42403814534692535


 97%|█████████▋| 42575/43738 [5:24:25<07:41,  2.52it/s]

step:2660, train_loss:0.11165272823134699, acc:0.42405167351732237


 98%|█████████▊| 42880/43738 [5:26:42<06:33,  2.18it/s]

step:2680, train_loss:0.11144443092449846, acc:0.4248134328358209


 98%|█████████▊| 42881/43738 [5:26:42<07:28,  1.91it/s]

step:2680, train_loss:0.11144867478217091, acc:0.42480352603717264


 98%|█████████▊| 42882/43738 [5:26:43<06:35,  2.16it/s]

step:2680, train_loss:0.1114461595939138, acc:0.4248169395084184


 98%|█████████▊| 42883/43738 [5:26:43<07:25,  1.92it/s]

step:2680, train_loss:0.11144719307658552, acc:0.4248070330900357


 98%|█████████▊| 42884/43738 [5:26:44<07:04,  2.01it/s]

step:2680, train_loss:0.11145295804879338, acc:0.4247971271336629


 98%|█████████▊| 42885/43738 [5:26:44<06:14,  2.28it/s]

step:2680, train_loss:0.11145214992690548, acc:0.4247872216392678


 98%|█████████▊| 42886/43738 [5:26:44<05:17,  2.68it/s]

step:2680, train_loss:0.11144967503424043, acc:0.424800634239612


 98%|█████████▊| 42887/43738 [5:26:45<06:21,  2.23it/s]

step:2680, train_loss:0.11145040233053961, acc:0.42479072912537597


 98%|█████████▊| 42888/43738 [5:26:45<05:29,  2.58it/s]

step:2680, train_loss:0.11144807869482468, acc:0.4248041410184667


 98%|█████████▊| 42889/43738 [5:26:45<05:12,  2.72it/s]

step:2680, train_loss:0.11144762610451042, acc:0.42479423628436197


 98%|█████████▊| 42890/43738 [5:26:46<05:01,  2.82it/s]

step:2680, train_loss:0.11144765184711468, acc:0.424784332012124


 98%|█████████▊| 42891/43738 [5:26:46<04:30,  3.13it/s]

step:2680, train_loss:0.11144505352828786, acc:0.4247977431162715


 98%|█████████▊| 42892/43738 [5:26:47<05:48,  2.42it/s]

step:2680, train_loss:0.11144441734197065, acc:0.424811153595076


 98%|█████████▊| 42893/43738 [5:26:47<04:55,  2.86it/s]

step:2680, train_loss:0.1114418192259593, acc:0.4248245634485813


 98%|█████████▊| 42894/43738 [5:26:47<05:20,  2.63it/s]

step:2680, train_loss:0.11144140754763525, acc:0.42481465939292207


 98%|█████████▊| 42895/43738 [5:26:47<04:38,  3.03it/s]

step:2680, train_loss:0.11143889990478995, acc:0.4248280685394568


 99%|█████████▉| 43200/43738 [5:29:02<04:53,  1.83it/s]

step:2700, train_loss:0.11125909894981878, acc:0.42564814814814816


 99%|█████████▉| 43201/43738 [5:29:02<04:05,  2.19it/s]

step:2700, train_loss:0.11125654575583421, acc:0.42566144302215225


 99%|█████████▉| 43202/43738 [5:29:03<03:49,  2.34it/s]

step:2700, train_loss:0.11125740005628637, acc:0.4256515902041572


 99%|█████████▉| 43203/43738 [5:29:03<03:43,  2.39it/s]

step:2700, train_loss:0.11125678303369257, acc:0.42566488438302896


 99%|█████████▉| 43204/43738 [5:29:04<04:30,  1.98it/s]

step:2700, train_loss:0.11125667758865512, acc:0.4256550319414869


 99%|█████████▉| 43205/43738 [5:29:04<03:50,  2.32it/s]

step:2700, train_loss:0.11125519186019726, acc:0.42564517995602363


 99%|█████████▉| 43206/43738 [5:29:05<03:54,  2.27it/s]

step:2700, train_loss:0.1112553819903575, acc:0.4256353284266074


 99%|█████████▉| 43207/43738 [5:29:05<03:26,  2.57it/s]

step:2700, train_loss:0.1112575651195232, acc:0.42562547735320666


 99%|█████████▉| 43208/43738 [5:29:05<03:25,  2.57it/s]

step:2700, train_loss:0.11125972061180285, acc:0.4256156267357897


 99%|█████████▉| 43209/43738 [5:29:05<03:04,  2.87it/s]

step:2700, train_loss:0.11126127363481138, acc:0.4256057765743248


 99%|█████████▉| 43210/43738 [5:29:06<04:01,  2.19it/s]

step:2700, train_loss:0.11125999416832066, acc:0.42561906965980095


 99%|█████████▉| 43211/43738 [5:29:07<03:52,  2.27it/s]

step:2700, train_loss:0.11125865357286964, acc:0.4256323621300132


 99%|█████████▉| 43212/43738 [5:29:07<04:09,  2.11it/s]

step:2700, train_loss:0.11126091270509945, acc:0.42562251226511155


 99%|█████████▉| 43213/43738 [5:29:07<03:42,  2.36it/s]

step:2700, train_loss:0.11126230252572487, acc:0.42561266285608496


 99%|█████████▉| 43214/43738 [5:29:08<03:55,  2.22it/s]

step:2700, train_loss:0.11126140541619134, acc:0.42560281390290183


 99%|█████████▉| 43215/43738 [5:29:08<03:44,  2.33it/s]

step:2700, train_loss:0.11125996698242137, acc:0.42561610551891704


100%|█████████▉| 43520/43738 [5:31:27<01:55,  1.89it/s]

step:2720, train_loss:0.11105013961044793, acc:0.4266314338235294


100%|█████████▉| 43521/43738 [5:31:28<01:48,  1.99it/s]

step:2720, train_loss:0.11104894242117236, acc:0.42664460834999196


100%|█████████▉| 43522/43738 [5:31:28<01:36,  2.24it/s]

step:2720, train_loss:0.11105346636729257, acc:0.4266348053857819


100%|█████████▉| 43523/43738 [5:31:28<01:22,  2.59it/s]

step:2720, train_loss:0.11105244749462825, acc:0.4266250028720447


100%|█████████▉| 43524/43738 [5:31:29<01:31,  2.33it/s]

step:2720, train_loss:0.11105004138964278, acc:0.42663817663817666


100%|█████████▉| 43525/43738 [5:31:29<01:28,  2.41it/s]

step:2720, train_loss:0.1110474902591075, acc:0.42665134979896613


100%|█████████▉| 43526/43738 [5:31:30<01:35,  2.22it/s]

step:2720, train_loss:0.11104558920907175, acc:0.4266645223544548


100%|█████████▉| 43527/43738 [5:31:30<01:26,  2.44it/s]

step:2720, train_loss:0.11104402945976687, acc:0.42667769430468444


100%|█████████▉| 43528/43738 [5:31:30<01:20,  2.60it/s]

step:2720, train_loss:0.11104149734425538, acc:0.42669086564969677


100%|█████████▉| 43529/43738 [5:31:31<01:34,  2.21it/s]

step:2720, train_loss:0.11104115916134476, acc:0.4266810631992465


100%|█████████▉| 43530/43738 [5:31:31<01:26,  2.42it/s]

step:2720, train_loss:0.11103956918072784, acc:0.4266942338617046


100%|█████████▉| 43531/43738 [5:31:32<01:20,  2.57it/s]

step:2720, train_loss:0.11104128230136383, acc:0.4266844317842457


100%|█████████▉| 43532/43738 [5:31:32<01:18,  2.62it/s]

step:2720, train_loss:0.11104130800586266, acc:0.4266746301571258


100%|█████████▉| 43533/43738 [5:31:32<01:12,  2.83it/s]

step:2720, train_loss:0.11103950890722336, acc:0.4266878000597248


100%|█████████▉| 43534/43738 [5:31:33<01:41,  2.01it/s]

step:2720, train_loss:0.11103925820074781, acc:0.4266779988055313


100%|█████████▉| 43535/43738 [5:31:33<01:28,  2.30it/s]

step:2720, train_loss:0.11103733186392407, acc:0.42669116802572643


100%|██████████| 43738/43738 [5:33:04<00:00,  2.52it/s]
  0%|          | 1/5129 [00:00<13:44,  6.22it/s]

eval on dev set


100%|██████████| 5129/5129 [13:51<00:00,  5.84it/s]
  0%|          | 0/43738 [00:00<?, ?it/s]

1.4611059231946903, 0.5006823942288945


  0%|          | 112/43738 [00:52<5:38:13,  2.15it/s]

step:2740, train_loss:0.08877913816000468, acc:0.5625


  0%|          | 113/43738 [00:52<5:21:26,  2.26it/s]

step:2740, train_loss:0.08844496589154005, acc:0.5663716814159292


  0%|          | 114/43738 [00:53<5:41:59,  2.13it/s]

step:2740, train_loss:0.08870967728363578, acc:0.5614035087719298


  0%|          | 115/43738 [00:53<5:37:43,  2.15it/s]

step:2740, train_loss:0.08874971396087304, acc:0.5565217391304348


  0%|          | 116/43738 [00:54<6:11:00,  1.96it/s]

step:2740, train_loss:0.08946875273250043, acc:0.5517241379310345


  0%|          | 117/43738 [00:54<6:05:37,  1.99it/s]

step:2740, train_loss:0.09034750065328474, acc:0.5470085470085471


  0%|          | 118/43738 [00:55<5:27:23,  2.22it/s]

step:2740, train_loss:0.08972667459935202, acc:0.5508474576271186


  0%|          | 119/43738 [00:55<5:29:45,  2.20it/s]

step:2740, train_loss:0.08972842834044654, acc:0.5462184873949579


  0%|          | 120/43738 [00:55<4:43:11,  2.57it/s]

step:2740, train_loss:0.09002930527397741, acc:0.5416666666666666


  0%|          | 121/43738 [00:56<4:51:37,  2.49it/s]

step:2740, train_loss:0.08960602517924772, acc:0.5454545454545454


  0%|          | 122/43738 [00:56<5:07:12,  2.37it/s]

step:2740, train_loss:0.08958249642193074, acc:0.5491803278688525


  0%|          | 123/43738 [00:57<4:54:12,  2.47it/s]

step:2740, train_loss:0.08923764377680977, acc:0.5528455284552846


  0%|          | 124/43738 [00:57<4:26:24,  2.73it/s]

step:2740, train_loss:0.08893109511794342, acc:0.5564516129032258


  0%|          | 125/43738 [00:57<4:05:43,  2.96it/s]

step:2740, train_loss:0.08924303426593543, acc:0.552


  0%|          | 126/43738 [00:58<4:50:22,  2.50it/s]

step:2740, train_loss:0.088556936712906, acc:0.5555555555555556


  0%|          | 127/43738 [00:58<5:26:04,  2.23it/s]

step:2740, train_loss:0.08924892582145967, acc:0.5511811023622047


  1%|          | 432/43738 [03:09<5:26:25,  2.21it/s]

step:2760, train_loss:0.07855842099417243, acc:0.5671296296296297


  1%|          | 433/43738 [03:09<4:57:14,  2.43it/s]

step:2760, train_loss:0.0791120840522609, acc:0.5658198614318707


  1%|          | 434/43738 [03:10<4:36:19,  2.61it/s]

step:2760, train_loss:0.07903474885090533, acc:0.5668202764976958


  1%|          | 435/43738 [03:10<5:19:31,  2.26it/s]

step:2760, train_loss:0.07916389429599217, acc:0.5655172413793104


  1%|          | 436/43738 [03:11<6:23:19,  1.88it/s]

step:2760, train_loss:0.07939492777703382, acc:0.5642201834862385


  1%|          | 437/43738 [03:12<7:04:24,  1.70it/s]

step:2760, train_loss:0.07924120163851901, acc:0.5652173913043478


  1%|          | 438/43738 [03:12<5:48:49,  2.07it/s]

step:2760, train_loss:0.0792674313216432, acc:0.5639269406392694


  1%|          | 439/43738 [03:12<5:35:21,  2.15it/s]

step:2760, train_loss:0.07921797749830371, acc:0.5649202733485194


  1%|          | 440/43738 [03:13<5:29:01,  2.19it/s]

step:2760, train_loss:0.07907316196942701, acc:0.5659090909090909


  1%|          | 441/43738 [03:13<5:30:39,  2.18it/s]

step:2760, train_loss:0.07933752471792285, acc:0.564625850340136


  1%|          | 442/43738 [03:14<5:32:59,  2.17it/s]

step:2760, train_loss:0.07957565868631579, acc:0.5633484162895928


  1%|          | 443/43738 [03:14<5:26:24,  2.21it/s]

step:2760, train_loss:0.07966385583201965, acc:0.5620767494356659


  1%|          | 444/43738 [03:14<4:46:14,  2.52it/s]

step:2760, train_loss:0.07954490991825289, acc:0.5630630630630631


  1%|          | 445/43738 [03:15<5:57:16,  2.02it/s]

step:2760, train_loss:0.07969517341051041, acc:0.5617977528089888


  1%|          | 446/43738 [03:15<5:34:02,  2.16it/s]

step:2760, train_loss:0.0796490405329123, acc:0.5627802690582959


  1%|          | 447/43738 [03:16<5:11:42,  2.31it/s]

step:2760, train_loss:0.07971105024414409, acc:0.5615212527964206


  2%|▏         | 752/43738 [05:35<5:24:48,  2.21it/s]

step:2780, train_loss:0.08190543551376941, acc:0.5558510638297872


  2%|▏         | 753/43738 [05:35<5:00:53,  2.38it/s]

step:2780, train_loss:0.08185621231341683, acc:0.5564409030544488


  2%|▏         | 754/43738 [05:36<5:30:43,  2.17it/s]

step:2780, train_loss:0.08202009191625413, acc:0.5557029177718833


  2%|▏         | 755/43738 [05:36<5:16:54,  2.26it/s]

step:2780, train_loss:0.08201582779219707, acc:0.5549668874172186


  2%|▏         | 756/43738 [05:37<5:46:15,  2.07it/s]

step:2780, train_loss:0.08253087446391681, acc:0.5542328042328042


  2%|▏         | 757/43738 [05:37<5:47:59,  2.06it/s]

step:2780, train_loss:0.08249433962702062, acc:0.5548216644649934


  2%|▏         | 758/43738 [05:38<6:19:27,  1.89it/s]

step:2780, train_loss:0.08267689112998977, acc:0.554089709762533


  2%|▏         | 759/43738 [05:38<5:50:22,  2.04it/s]

step:2780, train_loss:0.08275079491642723, acc:0.5533596837944664


  2%|▏         | 760/43738 [05:39<5:48:10,  2.06it/s]

step:2780, train_loss:0.08278922391626493, acc:0.5526315789473685


  2%|▏         | 761/43738 [05:39<5:03:14,  2.36it/s]

step:2780, train_loss:0.08272961999073077, acc:0.5532194480946123


  2%|▏         | 762/43738 [05:39<4:26:08,  2.69it/s]

step:2780, train_loss:0.08262105246094054, acc:0.5538057742782152


  2%|▏         | 763/43738 [05:40<5:09:28,  2.31it/s]

step:2780, train_loss:0.08262654716212443, acc:0.5543905635648755


  2%|▏         | 764/43738 [05:40<4:58:32,  2.40it/s]

step:2780, train_loss:0.08256696711760014, acc:0.5549738219895288


  2%|▏         | 765/43738 [05:40<4:20:05,  2.75it/s]

step:2780, train_loss:0.08256318603331844, acc:0.5542483660130719


  2%|▏         | 766/43738 [05:41<4:39:27,  2.56it/s]

step:2780, train_loss:0.08250277762802187, acc:0.5548302872062664


  2%|▏         | 767/43738 [05:41<4:06:58,  2.90it/s]

step:2780, train_loss:0.08248730333196545, acc:0.5541069100391134


  2%|▏         | 1072/43738 [08:02<5:07:40,  2.31it/s]

step:2800, train_loss:0.08406415511723678, acc:0.5438432835820896


  2%|▏         | 1073/43738 [08:02<5:30:57,  2.15it/s]

step:2800, train_loss:0.08398745904953496, acc:0.5442684063373718


  2%|▏         | 1074/43738 [08:03<5:53:21,  2.01it/s]

step:2800, train_loss:0.08397752680170595, acc:0.5437616387337058


  2%|▏         | 1075/43738 [08:03<5:06:39,  2.32it/s]

step:2800, train_loss:0.08396279498804794, acc:0.5432558139534883


  2%|▏         | 1076/43738 [08:04<4:45:32,  2.49it/s]

step:2800, train_loss:0.08400589380228068, acc:0.5427509293680297


  2%|▏         | 1077/43738 [08:04<4:55:17,  2.41it/s]

step:2800, train_loss:0.08401135846621383, acc:0.542246982358403


  2%|▏         | 1078/43738 [08:04<4:35:36,  2.58it/s]

step:2800, train_loss:0.08398159046861273, acc:0.5426716141001855


  2%|▏         | 1079/43738 [08:05<4:47:23,  2.47it/s]

step:2800, train_loss:0.08401111561927575, acc:0.5421686746987951


  2%|▏         | 1080/43738 [08:05<4:42:29,  2.52it/s]

step:2800, train_loss:0.08424815922777203, acc:0.5416666666666666


  2%|▏         | 1081/43738 [08:05<4:35:37,  2.58it/s]

step:2800, train_loss:0.08417292258587634, acc:0.5420906567992599


  2%|▏         | 1082/43738 [08:06<4:38:03,  2.56it/s]

step:2800, train_loss:0.08421331912136246, acc:0.5415896487985212


  2%|▏         | 1083/43738 [08:06<4:11:31,  2.83it/s]

step:2800, train_loss:0.08423682213017059, acc:0.541089566020314


  2%|▏         | 1084/43738 [08:06<3:46:49,  3.13it/s]

step:2800, train_loss:0.0841838134477616, acc:0.5415129151291513


  2%|▏         | 1085/43738 [08:07<4:14:33,  2.79it/s]

step:2800, train_loss:0.08410854137755827, acc:0.5419354838709678


  2%|▏         | 1086/43738 [08:07<4:40:10,  2.54it/s]

step:2800, train_loss:0.08412967174327683, acc:0.5414364640883977


  2%|▏         | 1087/43738 [08:08<4:28:05,  2.65it/s]

step:2800, train_loss:0.08420028007034942, acc:0.5409383624655014


  3%|▎         | 1392/43738 [10:28<6:08:58,  1.91it/s]

step:2820, train_loss:0.082665508452083, acc:0.5531609195402298


  3%|▎         | 1393/43738 [10:28<5:10:53,  2.27it/s]

step:2820, train_loss:0.08281147466071234, acc:0.5527638190954773


  3%|▎         | 1394/43738 [10:29<5:53:20,  2.00it/s]

step:2820, train_loss:0.08276329588223853, acc:0.5530846484935438


  3%|▎         | 1395/43738 [10:29<5:37:29,  2.09it/s]

step:2820, train_loss:0.08277367295724036, acc:0.5526881720430108


  3%|▎         | 1396/43738 [10:30<5:25:47,  2.17it/s]

step:2820, train_loss:0.08279006443564169, acc:0.5530085959885387


  3%|▎         | 1397/43738 [10:30<5:04:56,  2.31it/s]

step:2820, train_loss:0.08282216141765715, acc:0.5526127415891195


  3%|▎         | 1398/43738 [10:31<4:58:20,  2.37it/s]

step:2820, train_loss:0.08276964593458873, acc:0.5529327610872675


  3%|▎         | 1399/43738 [10:31<5:44:13,  2.05it/s]

step:2820, train_loss:0.08278275618271062, acc:0.5525375268048606


  3%|▎         | 1400/43738 [10:32<5:34:48,  2.11it/s]

step:2820, train_loss:0.08281354425175648, acc:0.5521428571428572


  3%|▎         | 1401/43738 [10:32<4:52:18,  2.41it/s]

step:2820, train_loss:0.08280684810622378, acc:0.5517487508922199


  3%|▎         | 1402/43738 [10:32<4:23:27,  2.68it/s]

step:2820, train_loss:0.08276605321733432, acc:0.5520684736091298


  3%|▎         | 1403/43738 [10:33<4:27:57,  2.63it/s]

step:2820, train_loss:0.08277436583691243, acc:0.5516749821810406


  3%|▎         | 1404/43738 [10:33<4:10:24,  2.82it/s]

step:2820, train_loss:0.0827378152476284, acc:0.551994301994302


  3%|▎         | 1405/43738 [10:33<4:22:26,  2.69it/s]

step:2820, train_loss:0.08267974052478781, acc:0.5523131672597865


  3%|▎         | 1406/43738 [10:34<5:03:51,  2.32it/s]

step:2820, train_loss:0.08265163558389552, acc:0.5526315789473685


  3%|▎         | 1407/43738 [10:34<5:40:15,  2.07it/s]

step:2820, train_loss:0.0826997864635975, acc:0.5522388059701493


  4%|▍         | 1712/43738 [12:54<4:30:48,  2.59it/s]

step:2840, train_loss:0.08290338590793517, acc:0.554322429906542


  4%|▍         | 1713/43738 [12:55<5:33:23,  2.10it/s]

step:2840, train_loss:0.08293387267423383, acc:0.5545826036193812


  4%|▍         | 1714/43738 [12:55<6:02:01,  1.93it/s]

step:2840, train_loss:0.08288614151219223, acc:0.5548424737456242


  4%|▍         | 1715/43738 [12:56<6:19:04,  1.85it/s]

step:2840, train_loss:0.08293040285444256, acc:0.5545189504373178


  4%|▍         | 1716/43738 [12:56<5:40:00,  2.06it/s]

step:2840, train_loss:0.08289091354309953, acc:0.5547785547785548


  4%|▍         | 1717/43738 [12:57<5:28:02,  2.13it/s]

step:2840, train_loss:0.08284293355675301, acc:0.5550378567268491


  4%|▍         | 1718/43738 [12:57<5:16:28,  2.21it/s]

step:2840, train_loss:0.08281651316772376, acc:0.5552968568102444


  4%|▍         | 1719/43738 [12:58<5:11:10,  2.25it/s]

step:2840, train_loss:0.08289839831547792, acc:0.5549738219895288


  4%|▍         | 1720/43738 [12:58<4:28:20,  2.61it/s]

step:2840, train_loss:0.0828502266443413, acc:0.5552325581395349


  4%|▍         | 1721/43738 [12:58<4:40:29,  2.50it/s]

step:2840, train_loss:0.08285027554636563, acc:0.5549099360836722


  4%|▍         | 1722/43738 [12:59<5:27:10,  2.14it/s]

step:2840, train_loss:0.08296835781770713, acc:0.5545876887340302


  4%|▍         | 1723/43738 [12:59<4:54:09,  2.38it/s]

step:2840, train_loss:0.08301348811412367, acc:0.5542658154381892


  4%|▍         | 1724/43738 [13:00<4:55:10,  2.37it/s]

step:2840, train_loss:0.08298454776959599, acc:0.5545243619489559


  4%|▍         | 1725/43738 [13:00<4:52:11,  2.40it/s]

step:2840, train_loss:0.08294141540631139, acc:0.5547826086956522


  4%|▍         | 1726/43738 [13:00<4:20:51,  2.68it/s]

step:2840, train_loss:0.08295774521705584, acc:0.5544611819235226


  4%|▍         | 1727/43738 [13:01<5:28:56,  2.13it/s]

step:2840, train_loss:0.0830618339951299, acc:0.554140127388535


  5%|▍         | 2032/43738 [15:20<4:56:24,  2.35it/s]

step:2860, train_loss:0.0840683058273484, acc:0.5501968503937008


  5%|▍         | 2033/43738 [15:20<4:23:03,  2.64it/s]

step:2860, train_loss:0.08404114570877406, acc:0.5504181013280866


  5%|▍         | 2034/43738 [15:20<4:43:47,  2.45it/s]

step:2860, train_loss:0.08403485014532956, acc:0.5501474926253688


  5%|▍         | 2035/43738 [15:21<4:50:24,  2.39it/s]

step:2860, train_loss:0.08404050632348485, acc:0.5498771498771499


  5%|▍         | 2036/43738 [15:21<4:39:18,  2.49it/s]

step:2860, train_loss:0.08400065927976795, acc:0.550098231827112


  5%|▍         | 2037/43738 [15:21<4:07:21,  2.81it/s]

step:2860, train_loss:0.08399590325398804, acc:0.5498281786941581


  5%|▍         | 2038/43738 [15:22<4:44:06,  2.45it/s]

step:2860, train_loss:0.08401463533593666, acc:0.549558390578999


  5%|▍         | 2039/43738 [15:23<5:56:46,  1.95it/s]

step:2860, train_loss:0.08406075554258245, acc:0.5492888670917117


  5%|▍         | 2040/43738 [15:23<5:29:45,  2.11it/s]

step:2860, train_loss:0.08403263811559038, acc:0.5495098039215687


  5%|▍         | 2041/43738 [15:23<4:42:23,  2.46it/s]

step:2860, train_loss:0.08399153237988204, acc:0.5497305242528172


  5%|▍         | 2042/43738 [15:24<5:55:38,  1.95it/s]

step:2860, train_loss:0.08395778148093108, acc:0.5499510284035259


  5%|▍         | 2043/43738 [15:24<4:57:39,  2.33it/s]

step:2860, train_loss:0.08398202708271016, acc:0.5496818404307391


  5%|▍         | 2044/43738 [15:25<4:35:59,  2.52it/s]

step:2860, train_loss:0.08397087222628716, acc:0.549412915851272


  5%|▍         | 2045/43738 [15:25<4:25:11,  2.62it/s]

step:2860, train_loss:0.08394876718182404, acc:0.5496332518337408


  5%|▍         | 2046/43738 [15:25<4:08:26,  2.80it/s]

step:2860, train_loss:0.08398895827625588, acc:0.5493646138807429


  5%|▍         | 2047/43738 [15:26<5:16:03,  2.20it/s]

step:2860, train_loss:0.08396051906740919, acc:0.5495847581827064


  5%|▌         | 2352/43738 [17:52<5:14:33,  2.19it/s]

step:2880, train_loss:0.083487993518674, acc:0.5497448979591837


  5%|▌         | 2353/43738 [17:52<4:43:37,  2.43it/s]

step:2880, train_loss:0.08346666341263677, acc:0.5499362515937102


  5%|▌         | 2354/43738 [17:53<4:06:13,  2.80it/s]

step:2880, train_loss:0.0834663572142618, acc:0.5497026338147833


  5%|▌         | 2355/43738 [17:53<4:34:17,  2.51it/s]

step:2880, train_loss:0.08346943456401273, acc:0.5494692144373673


  5%|▌         | 2356/43738 [17:54<4:50:57,  2.37it/s]

step:2880, train_loss:0.0835923817495593, acc:0.5492359932088285


  5%|▌         | 2357/43738 [17:54<5:39:54,  2.03it/s]

step:2880, train_loss:0.08358284823838397, acc:0.5490029698769623


  5%|▌         | 2358/43738 [17:55<5:08:39,  2.23it/s]

step:2880, train_loss:0.08361055824313288, acc:0.5487701441899915


  5%|▌         | 2359/43738 [17:55<5:02:44,  2.28it/s]

step:2880, train_loss:0.08364524034155618, acc:0.5485375158965663


  5%|▌         | 2360/43738 [17:55<4:24:36,  2.61it/s]

step:2880, train_loss:0.08360985740133801, acc:0.548728813559322


  5%|▌         | 2361/43738 [17:56<4:13:00,  2.73it/s]

step:2880, train_loss:0.08361980930026064, acc:0.5484963998305803


  5%|▌         | 2362/43738 [17:56<5:36:14,  2.05it/s]

step:2880, train_loss:0.08366060960440223, acc:0.548264182895851


  5%|▌         | 2363/43738 [17:57<6:38:24,  1.73it/s]

step:2880, train_loss:0.08363268379115811, acc:0.5484553533643673


  5%|▌         | 2364/43738 [17:57<5:44:57,  2.00it/s]

step:2880, train_loss:0.08360651491016134, acc:0.5486463620981388


  5%|▌         | 2365/43738 [17:58<5:17:57,  2.17it/s]

step:2880, train_loss:0.08362255748304741, acc:0.548414376321353


  5%|▌         | 2366/43738 [17:58<4:57:46,  2.32it/s]

step:2880, train_loss:0.0835908489687408, acc:0.5486052409129332


  5%|▌         | 2367/43738 [17:59<5:14:58,  2.19it/s]

step:2880, train_loss:0.08360084793802385, acc:0.5487959442332065


  6%|▌         | 2672/43738 [20:17<7:32:23,  1.51it/s]

step:2900, train_loss:0.08349503351325008, acc:0.5497754491017964


  6%|▌         | 2673/43738 [20:18<6:37:44,  1.72it/s]

step:2900, train_loss:0.08355838314456666, acc:0.5495697717919941


  6%|▌         | 2674/43738 [20:18<5:26:04,  2.10it/s]

step:2900, train_loss:0.08353192051890461, acc:0.5497382198952879


  6%|▌         | 2675/43738 [20:18<5:02:44,  2.26it/s]

step:2900, train_loss:0.08352249419310122, acc:0.5499065420560748


  6%|▌         | 2676/43738 [20:19<5:33:30,  2.05it/s]

step:2900, train_loss:0.08353672265983934, acc:0.5497010463378177


  6%|▌         | 2677/43738 [20:19<5:08:13,  2.22it/s]

step:2900, train_loss:0.08354019105923997, acc:0.5494957041464326


  6%|▌         | 2678/43738 [20:19<4:31:42,  2.52it/s]

step:2900, train_loss:0.08350947431401771, acc:0.549663928304705


  6%|▌         | 2679/43738 [20:20<5:48:22,  1.96it/s]

step:2900, train_loss:0.08353756880450673, acc:0.5494587532661441


  6%|▌         | 2680/43738 [20:20<4:55:10,  2.32it/s]

step:2900, train_loss:0.08351340057311403, acc:0.5496268656716418


  6%|▌         | 2681/43738 [20:21<6:11:28,  1.84it/s]

step:2900, train_loss:0.08352187586870898, acc:0.5494218575158523


  6%|▌         | 2682/43738 [20:22<5:16:15,  2.16it/s]

step:2900, train_loss:0.08349440614082142, acc:0.5495898583146905


  6%|▌         | 2683/43738 [20:22<4:50:42,  2.35it/s]

step:2900, train_loss:0.08349107772933612, acc:0.5493850167722698


  6%|▌         | 2684/43738 [20:22<5:04:48,  2.24it/s]

step:2900, train_loss:0.08346546511280738, acc:0.5495529061102832


  6%|▌         | 2685/43738 [20:23<4:23:56,  2.59it/s]

step:2900, train_loss:0.0834357647895787, acc:0.5497206703910614


  6%|▌         | 2686/43738 [20:23<5:01:17,  2.27it/s]

step:2900, train_loss:0.08341554005461542, acc:0.5498883097542815


  6%|▌         | 2687/43738 [20:23<4:13:18,  2.70it/s]

step:2900, train_loss:0.08338450434568567, acc:0.550055824339412


  7%|▋         | 2992/43738 [22:42<5:35:27,  2.02it/s]

step:2920, train_loss:0.08314580206375555, acc:0.5504679144385026


  7%|▋         | 2993/43738 [22:42<5:40:58,  1.99it/s]

step:2920, train_loss:0.08314517218563341, acc:0.5502839959906448


  7%|▋         | 2994/43738 [22:43<5:13:57,  2.16it/s]

step:2920, train_loss:0.08314814951103856, acc:0.5501002004008017


  7%|▋         | 2995/43738 [22:43<4:35:34,  2.46it/s]

step:2920, train_loss:0.083121791352088, acc:0.5502504173622704


  7%|▋         | 2996/43738 [22:43<4:20:46,  2.60it/s]

step:2920, train_loss:0.08319366251243833, acc:0.5500667556742324


  7%|▋         | 2997/43738 [22:44<5:20:32,  2.12it/s]

step:2920, train_loss:0.08316918842573807, acc:0.5502168835502169


  7%|▋         | 2998/43738 [22:44<5:11:38,  2.18it/s]

step:2920, train_loss:0.0831434693830866, acc:0.5503669112741828


  7%|▋         | 2999/43738 [22:45<5:24:09,  2.09it/s]

step:2920, train_loss:0.08318345085424661, acc:0.5501833944648216


  7%|▋         | 3000/43738 [22:45<5:02:22,  2.25it/s]

step:2920, train_loss:0.08323450517850385, acc:0.55


  7%|▋         | 3001/43738 [22:46<4:55:20,  2.30it/s]

step:2920, train_loss:0.08327820065097451, acc:0.5498167277574142


  7%|▋         | 3002/43738 [22:46<4:18:35,  2.63it/s]

step:2920, train_loss:0.08325076707742185, acc:0.549966688874084


  7%|▋         | 3003/43738 [22:46<3:54:53,  2.89it/s]

step:2920, train_loss:0.0832243885032132, acc:0.5501165501165501


  7%|▋         | 3004/43738 [22:47<4:00:30,  2.82it/s]

step:2920, train_loss:0.08324236665414377, acc:0.5499334221038615


  7%|▋         | 3005/43738 [22:47<5:06:22,  2.22it/s]

step:2920, train_loss:0.08327842813534135, acc:0.5497504159733777


  7%|▋         | 3006/43738 [22:48<5:03:25,  2.24it/s]

step:2920, train_loss:0.08328389580707693, acc:0.5495675316034597


  7%|▋         | 3007/43738 [22:48<4:29:11,  2.52it/s]

step:2920, train_loss:0.08330939280075239, acc:0.5493847688726305


  8%|▊         | 3312/43738 [25:09<4:31:00,  2.49it/s]

step:2940, train_loss:0.08356123440710905, acc:0.5498188405797102


  8%|▊         | 3313/43738 [25:10<4:25:19,  2.54it/s]

step:2940, train_loss:0.08354683620388918, acc:0.5499547238152732


  8%|▊         | 3314/43738 [25:10<4:25:52,  2.53it/s]

step:2940, train_loss:0.08357283518128403, acc:0.5497887748943875


  8%|▊         | 3315/43738 [25:11<4:03:23,  2.77it/s]

step:2940, train_loss:0.08357232249579652, acc:0.5496229260935144


  8%|▊         | 3316/43738 [25:11<3:59:46,  2.81it/s]

step:2940, train_loss:0.08355482020289332, acc:0.5497587454764777


  8%|▊         | 3317/43738 [25:11<4:17:50,  2.61it/s]

step:2940, train_loss:0.08354049463413349, acc:0.549894482966536


  8%|▊         | 3318/43738 [25:12<4:24:25,  2.55it/s]

step:2940, train_loss:0.08354282383565963, acc:0.5497287522603979


  8%|▊         | 3319/43738 [25:12<4:17:35,  2.62it/s]

step:2940, train_loss:0.08351858308994844, acc:0.5498644169930702


  8%|▊         | 3320/43738 [25:13<4:30:33,  2.49it/s]

step:2940, train_loss:0.08352852943221861, acc:0.5496987951807228


  8%|▊         | 3321/43738 [25:13<4:15:09,  2.64it/s]

step:2940, train_loss:0.08350666399405555, acc:0.5498343872327612


  8%|▊         | 3322/43738 [25:13<4:05:52,  2.74it/s]

step:2940, train_loss:0.08351086910283415, acc:0.5496688741721855


  8%|▊         | 3323/43738 [25:13<3:40:23,  3.06it/s]

step:2940, train_loss:0.08349622138668333, acc:0.5498043936202227


  8%|▊         | 3324/43738 [25:14<4:00:02,  2.81it/s]

step:2940, train_loss:0.08349071009060836, acc:0.5499398315282792


  8%|▊         | 3325/43738 [25:14<4:09:47,  2.70it/s]

step:2940, train_loss:0.08351380783289013, acc:0.5497744360902256


  8%|▊         | 3326/43738 [25:15<4:09:37,  2.70it/s]

step:2940, train_loss:0.0834938956711642, acc:0.5499098015634396


  8%|▊         | 3327/43738 [25:15<4:18:22,  2.61it/s]

step:2940, train_loss:0.08348560790916029, acc:0.5500450856627592


  8%|▊         | 3632/43738 [27:30<5:17:57,  2.10it/s]

step:2960, train_loss:0.0830545753073046, acc:0.5514867841409692


  8%|▊         | 3633/43738 [27:30<5:03:19,  2.20it/s]

step:2960, train_loss:0.08303524273765375, acc:0.5516102394715111


  8%|▊         | 3634/43738 [27:30<4:21:47,  2.55it/s]

step:2960, train_loss:0.08303661046327804, acc:0.5514584479911943


  8%|▊         | 3635/43738 [27:31<5:04:21,  2.20it/s]

step:2960, train_loss:0.0830864952195806, acc:0.5513067400275103


  8%|▊         | 3636/43738 [27:31<5:19:03,  2.09it/s]

step:2960, train_loss:0.08306368295646903, acc:0.5514301430143014


  8%|▊         | 3637/43738 [27:32<5:00:44,  2.22it/s]

step:2960, train_loss:0.08304089743026936, acc:0.5515534781413253


  8%|▊         | 3638/43738 [27:32<4:56:21,  2.26it/s]

step:2960, train_loss:0.08302332697757299, acc:0.5516767454645409


  8%|▊         | 3639/43738 [27:33<5:09:44,  2.16it/s]

step:2960, train_loss:0.0830072085763554, acc:0.5517999450398461


  8%|▊         | 3640/43738 [27:34<6:05:02,  1.83it/s]

step:2960, train_loss:0.08303436831096865, acc:0.5516483516483517


  8%|▊         | 3641/43738 [27:34<5:45:25,  1.93it/s]

step:2960, train_loss:0.0830148285704159, acc:0.5517714913485307


  8%|▊         | 3642/43738 [27:34<5:42:22,  1.95it/s]

step:2960, train_loss:0.08300995502805628, acc:0.5516199890170236


  8%|▊         | 3643/43738 [27:35<5:23:22,  2.07it/s]

step:2960, train_loss:0.08300320670493942, acc:0.5517430688992588


  8%|▊         | 3644/43738 [27:35<5:10:03,  2.16it/s]

step:2960, train_loss:0.08298912153910075, acc:0.5518660812294183


  8%|▊         | 3645/43738 [27:36<5:42:31,  1.95it/s]

step:2960, train_loss:0.08298763377499027, acc:0.5517146776406036


  8%|▊         | 3646/43738 [27:36<4:55:18,  2.26it/s]

step:2960, train_loss:0.08300256149413754, acc:0.5515633571036752


  8%|▊         | 3647/43738 [27:37<6:07:05,  1.82it/s]

step:2960, train_loss:0.08300714690213275, acc:0.5514121195503153


  9%|▉         | 3952/43738 [30:02<6:07:52,  1.80it/s]

step:2980, train_loss:0.08292609228714436, acc:0.5523785425101214


  9%|▉         | 3953/43738 [30:02<6:20:12,  1.74it/s]

step:2980, train_loss:0.08292543155145239, acc:0.5524917783961548


  9%|▉         | 3954/43738 [30:03<7:12:16,  1.53it/s]

step:2980, train_loss:0.08293173967008988, acc:0.5523520485584219


  9%|▉         | 3955/43738 [30:03<5:56:08,  1.86it/s]

step:2980, train_loss:0.08291504242413003, acc:0.5524652338811631


  9%|▉         | 3956/43738 [30:04<4:57:00,  2.23it/s]

step:2980, train_loss:0.08290263701041539, acc:0.5525783619817998


  9%|▉         | 3957/43738 [30:04<4:18:24,  2.57it/s]

step:2980, train_loss:0.0828912648720127, acc:0.5526914329037149


  9%|▉         | 3958/43738 [30:05<5:29:02,  2.01it/s]

step:2980, train_loss:0.08289375149624263, acc:0.5525517938352703


  9%|▉         | 3959/43738 [30:05<5:24:17,  2.04it/s]

step:2980, train_loss:0.0828754287533163, acc:0.5526648143470574


  9%|▉         | 3960/43738 [30:05<4:53:24,  2.26it/s]

step:2980, train_loss:0.08286643168758753, acc:0.5527777777777778


  9%|▉         | 3961/43738 [30:06<4:51:29,  2.27it/s]

step:2980, train_loss:0.08287559095061518, acc:0.5526382226710427


  9%|▉         | 3962/43738 [30:07<5:55:58,  1.86it/s]

step:2980, train_loss:0.08291521670081059, acc:0.5524987380111055


  9%|▉         | 3963/43738 [30:07<5:04:55,  2.17it/s]

step:2980, train_loss:0.08289917482808667, acc:0.5526116578349735


  9%|▉         | 3964/43738 [30:07<4:49:41,  2.29it/s]

step:2980, train_loss:0.08293512982095055, acc:0.5524722502522704


  9%|▉         | 3965/43738 [30:08<5:10:19,  2.14it/s]

step:2980, train_loss:0.0829654864952923, acc:0.5523329129886507


  9%|▉         | 3966/43738 [30:08<4:24:21,  2.51it/s]

step:2980, train_loss:0.08294837585399649, acc:0.5524457892082703


  9%|▉         | 3967/43738 [30:08<3:51:05,  2.87it/s]

step:2980, train_loss:0.08292750207291337, acc:0.5525586085202924


 10%|▉         | 4272/43738 [32:25<5:30:58,  1.99it/s]

step:3000, train_loss:0.08251290122586469, acc:0.5536048689138576


 10%|▉         | 4273/43738 [32:25<5:20:23,  2.05it/s]

step:3000, train_loss:0.08249689762262002, acc:0.5537093377018488


 10%|▉         | 4274/43738 [32:26<4:47:33,  2.29it/s]

step:3000, train_loss:0.0824925710523971, acc:0.5535797847449696


 10%|▉         | 4275/43738 [32:26<5:15:59,  2.08it/s]

step:3000, train_loss:0.0824748424848144, acc:0.5536842105263158


 10%|▉         | 4276/43738 [32:27<4:28:12,  2.45it/s]

step:3000, train_loss:0.08245561959313055, acc:0.5537885874649204


 10%|▉         | 4277/43738 [32:27<3:53:06,  2.82it/s]

step:3000, train_loss:0.08243874433654924, acc:0.5538929155950433


 10%|▉         | 4278/43738 [32:27<4:15:44,  2.57it/s]

step:3000, train_loss:0.08242341671066344, acc:0.5539971949509116


 10%|▉         | 4279/43738 [32:28<4:09:43,  2.63it/s]

step:3000, train_loss:0.08242272154185004, acc:0.5541014255667212


 10%|▉         | 4280/43738 [32:28<4:07:29,  2.66it/s]

step:3000, train_loss:0.08240389434522002, acc:0.5542056074766355


 10%|▉         | 4281/43738 [32:29<5:26:58,  2.01it/s]

step:3000, train_loss:0.08239826742609761, acc:0.5543097407147862


 10%|▉         | 4282/43738 [32:29<4:34:32,  2.40it/s]

step:3000, train_loss:0.08239653323256821, acc:0.5544138253152733


 10%|▉         | 4283/43738 [32:29<4:26:43,  2.47it/s]

step:3000, train_loss:0.08240613844438384, acc:0.5542843801074013


 10%|▉         | 4284/43738 [32:30<4:09:16,  2.64it/s]

step:3000, train_loss:0.0823969015328699, acc:0.5543884220354809


 10%|▉         | 4285/43738 [32:30<3:41:09,  2.97it/s]

step:3000, train_loss:0.08237844154597591, acc:0.5544924154025671


 10%|▉         | 4286/43738 [32:30<3:23:42,  3.23it/s]

step:3000, train_loss:0.0823645895767925, acc:0.5545963602426505


 10%|▉         | 4287/43738 [32:31<3:37:19,  3.03it/s]

step:3000, train_loss:0.08234791366178627, acc:0.5547002565896898


 10%|█         | 4592/43738 [34:49<5:57:02,  1.83it/s]

step:3020, train_loss:0.08270358097565757, acc:0.5533536585365854


 11%|█         | 4593/43738 [34:50<5:46:09,  1.88it/s]

step:3020, train_loss:0.08270910481837215, acc:0.5534509035488787


 11%|█         | 4594/43738 [34:50<5:52:45,  1.85it/s]

step:3020, train_loss:0.08271851387971103, acc:0.5533304309969526


 11%|█         | 4595/43738 [34:51<6:26:56,  1.69it/s]

step:3020, train_loss:0.08274399531450097, acc:0.5532100108813928


 11%|█         | 4596/43738 [34:51<5:25:18,  2.01it/s]

step:3020, train_loss:0.08272875657269305, acc:0.553307223672759


 11%|█         | 4597/43738 [34:52<4:55:44,  2.21it/s]

step:3020, train_loss:0.08271547988726353, acc:0.5534043941701109


 11%|█         | 4598/43738 [34:52<5:06:24,  2.13it/s]

step:3020, train_loss:0.0827303902656246, acc:0.553284036537625


 11%|█         | 4599/43738 [34:52<4:47:49,  2.27it/s]

step:3020, train_loss:0.08271246483775725, acc:0.553381169819526


 11%|█         | 4600/43738 [34:53<5:27:55,  1.99it/s]

step:3020, train_loss:0.08276721818419426, acc:0.5532608695652174


 11%|█         | 4601/43738 [34:54<6:02:50,  1.80it/s]

step:3020, train_loss:0.08279376852007946, acc:0.5531406216039991


 11%|█         | 4602/43738 [34:54<6:36:45,  1.64it/s]

step:3020, train_loss:0.0827760243269299, acc:0.5532377227292482


 11%|█         | 4603/43738 [34:55<5:41:41,  1.91it/s]

step:3020, train_loss:0.08275817044072678, acc:0.5533347816641321


 11%|█         | 4604/43738 [34:55<5:15:05,  2.07it/s]

step:3020, train_loss:0.08274843008890856, acc:0.5534317984361424


 11%|█         | 4605/43738 [34:56<5:20:23,  2.04it/s]

step:3020, train_loss:0.08274631372555113, acc:0.5535287730727471


 11%|█         | 4606/43738 [34:57<6:29:24,  1.67it/s]

step:3020, train_loss:0.08272903779575622, acc:0.5536257056013895


 11%|█         | 4607/43738 [34:57<6:01:50,  1.80it/s]

step:3020, train_loss:0.08271433227498273, acc:0.5537225960494899


 11%|█         | 4912/43738 [37:14<4:00:06,  2.70it/s]

step:3040, train_loss:0.08207634080637519, acc:0.5574104234527687


 11%|█         | 4913/43738 [37:14<3:37:11,  2.98it/s]

step:3040, train_loss:0.08206093548672197, acc:0.5575005088540607


 11%|█         | 4914/43738 [37:14<4:01:57,  2.67it/s]

step:3040, train_loss:0.08209414051637999, acc:0.5573870573870574


 11%|█         | 4915/43738 [37:15<4:16:16,  2.52it/s]

step:3040, train_loss:0.08208952194918469, acc:0.5574771108850458


 11%|█         | 4916/43738 [37:16<4:58:29,  2.17it/s]

step:3040, train_loss:0.08209940953174936, acc:0.5573637103336045


 11%|█         | 4917/43738 [37:16<4:55:30,  2.19it/s]

step:3040, train_loss:0.08208384477912076, acc:0.5574537319503763


 11%|█         | 4918/43738 [37:16<4:11:06,  2.58it/s]

step:3040, train_loss:0.08206884723887707, acc:0.557543716958113


 11%|█         | 4919/43738 [37:16<3:48:05,  2.84it/s]

step:3040, train_loss:0.08207378149195466, acc:0.5574303720268348


 11%|█         | 4920/43738 [37:17<4:40:53,  2.30it/s]

step:3040, train_loss:0.08210725804926436, acc:0.5573170731707318


 11%|█▏        | 4921/43738 [37:18<5:07:31,  2.10it/s]

step:3040, train_loss:0.08212650582008905, acc:0.5572038203617151


 11%|█▏        | 4922/43738 [37:18<6:01:51,  1.79it/s]

step:3040, train_loss:0.08211975705176064, acc:0.5572937830150345


 11%|█▏        | 4923/43738 [37:19<5:02:32,  2.14it/s]

step:3040, train_loss:0.08210335649532605, acc:0.557383709120455


 11%|█▏        | 4924/43738 [37:19<4:33:33,  2.36it/s]

step:3040, train_loss:0.08209345944309814, acc:0.5574735987002437


 11%|█▏        | 4925/43738 [37:19<3:57:17,  2.73it/s]

step:3040, train_loss:0.0820814108019482, acc:0.5575634517766498


 11%|█▏        | 4926/43738 [37:20<5:04:51,  2.12it/s]

step:3040, train_loss:0.08206716870031527, acc:0.5576532683719042


 11%|█▏        | 4927/43738 [37:20<5:04:02,  2.13it/s]

step:3040, train_loss:0.08205066324489381, acc:0.55774304850822


 12%|█▏        | 5232/43738 [39:33<4:11:39,  2.55it/s]

step:3060, train_loss:0.08241721770051906, acc:0.5556192660550459


 12%|█▏        | 5233/43738 [39:33<4:34:06,  2.34it/s]

step:3060, train_loss:0.08241987444727393, acc:0.5555130900057329


 12%|█▏        | 5234/43738 [39:33<4:14:45,  2.52it/s]

step:3060, train_loss:0.08241016345064495, acc:0.5555980129919755


 12%|█▏        | 5235/43738 [39:34<4:46:08,  2.24it/s]

step:3060, train_loss:0.08240653696500429, acc:0.5556829035339064


 12%|█▏        | 5236/43738 [39:34<4:26:10,  2.41it/s]

step:3060, train_loss:0.08239144951558373, acc:0.5557677616501145


 12%|█▏        | 5237/43738 [39:35<4:56:47,  2.16it/s]

step:3060, train_loss:0.08237591677910007, acc:0.5558525873591751


 12%|█▏        | 5238/43738 [39:35<5:12:23,  2.05it/s]

step:3060, train_loss:0.08237442250614273, acc:0.5559373806796487


 12%|█▏        | 5239/43738 [39:36<4:48:05,  2.23it/s]

step:3060, train_loss:0.0823587186336536, acc:0.5560221416300821


 12%|█▏        | 5240/43738 [39:37<5:24:51,  1.98it/s]

step:3060, train_loss:0.0823557170619055, acc:0.5561068702290076


 12%|█▏        | 5241/43738 [39:37<5:37:30,  1.90it/s]

step:3060, train_loss:0.08234639597582971, acc:0.5561915664949437


 12%|█▏        | 5242/43738 [39:37<5:15:16,  2.04it/s]

step:3060, train_loss:0.08234646476197638, acc:0.5560854635635254


 12%|█▏        | 5243/43738 [39:38<5:14:49,  2.04it/s]

step:3060, train_loss:0.08235057711241064, acc:0.5561701316040435


 12%|█▏        | 5244/43738 [39:38<4:49:59,  2.21it/s]

step:3060, train_loss:0.08233514580722945, acc:0.5562547673531655


 12%|█▏        | 5245/43738 [39:39<4:11:05,  2.56it/s]

step:3060, train_loss:0.08233215863834419, acc:0.5563393708293612


 12%|█▏        | 5246/43738 [39:39<3:48:11,  2.81it/s]

step:3060, train_loss:0.08232565780471042, acc:0.5564239420510866


 12%|█▏        | 5247/43738 [39:39<4:39:48,  2.29it/s]

step:3060, train_loss:0.08236000419637879, acc:0.5563178959405375


 13%|█▎        | 5552/43738 [42:05<4:45:09,  2.23it/s]

step:3080, train_loss:0.08250908374340216, acc:0.5554755043227666


 13%|█▎        | 5553/43738 [42:05<4:32:53,  2.33it/s]

step:3080, train_loss:0.0824944059926113, acc:0.5555555555555556


 13%|█▎        | 5554/43738 [42:06<4:32:47,  2.33it/s]

step:3080, train_loss:0.08248217803407683, acc:0.5556355779618293


 13%|█▎        | 5555/43738 [42:06<5:07:34,  2.07it/s]

step:3080, train_loss:0.0825044123175165, acc:0.5555355535553556


 13%|█▎        | 5556/43738 [42:07<5:18:04,  2.00it/s]

step:3080, train_loss:0.0824938597008684, acc:0.5556155507559395


 13%|█▎        | 5557/43738 [42:08<6:14:19,  1.70it/s]

step:3080, train_loss:0.08251549356651768, acc:0.5555155659528522


 13%|█▎        | 5558/43738 [42:08<6:06:45,  1.74it/s]

step:3080, train_loss:0.08250374823684714, acc:0.5555955379632962


 13%|█▎        | 5559/43738 [42:08<5:00:57,  2.11it/s]

step:3080, train_loss:0.08249105855762963, acc:0.555675481201655


 13%|█▎        | 5560/43738 [42:09<4:14:50,  2.50it/s]

step:3080, train_loss:0.08250245744415446, acc:0.5555755395683454


 13%|█▎        | 5561/43738 [42:09<4:59:22,  2.13it/s]

step:3080, train_loss:0.08251316310665409, acc:0.5554756338787987


 13%|█▎        | 5562/43738 [42:10<4:55:00,  2.16it/s]

step:3080, train_loss:0.08250547465430638, acc:0.5555555555555556


 13%|█▎        | 5563/43738 [42:10<4:35:07,  2.31it/s]

step:3080, train_loss:0.0824968044642171, acc:0.5556354484990114


 13%|█▎        | 5564/43738 [42:11<5:14:10,  2.03it/s]

step:3080, train_loss:0.08248658046302496, acc:0.5557153127246586


 13%|█▎        | 5565/43738 [42:11<5:19:33,  1.99it/s]

step:3080, train_loss:0.08250568339096798, acc:0.5556154537286613


 13%|█▎        | 5566/43738 [42:12<5:25:49,  1.95it/s]

step:3080, train_loss:0.08249678955370274, acc:0.5556952928494431


 13%|█▎        | 5567/43738 [42:12<4:52:48,  2.17it/s]

step:3080, train_loss:0.08252191203932643, acc:0.5555954733249506


 13%|█▎        | 5872/43738 [44:24<5:49:04,  1.81it/s]

step:3100, train_loss:0.08228634261441195, acc:0.555858310626703


 13%|█▎        | 5873/43738 [44:24<6:11:45,  1.70it/s]

step:3100, train_loss:0.08228434901519008, acc:0.5557636642261196


 13%|█▎        | 5874/43738 [44:25<5:47:21,  1.82it/s]

step:3100, train_loss:0.08228506527248788, acc:0.555839291794348


 13%|█▎        | 5875/43738 [44:26<6:02:52,  1.74it/s]

step:3100, train_loss:0.08228711518463976, acc:0.5557446808510639


 13%|█▎        | 5876/43738 [44:26<6:41:37,  1.57it/s]

step:3100, train_loss:0.08230528082400651, acc:0.5556501021102791


 13%|█▎        | 5877/43738 [44:27<5:55:42,  1.77it/s]

step:3100, train_loss:0.08231227862077108, acc:0.5555555555555556


 13%|█▎        | 5878/43738 [44:27<5:34:03,  1.89it/s]

step:3100, train_loss:0.0823135055531506, acc:0.555631167063627


 13%|█▎        | 5879/43738 [44:28<5:25:15,  1.94it/s]

step:3100, train_loss:0.08233526729653118, acc:0.5555366558938595


 13%|█▎        | 5880/43738 [44:28<6:19:14,  1.66it/s]

step:3100, train_loss:0.08233658681778264, acc:0.5556122448979591


 13%|█▎        | 5881/43738 [44:29<5:24:55,  1.94it/s]

step:3100, train_loss:0.0823414808906881, acc:0.55551776908689


 13%|█▎        | 5882/43738 [44:29<4:28:19,  2.35it/s]

step:3100, train_loss:0.08234245072462563, acc:0.555423325399524


 13%|█▎        | 5883/43738 [44:30<5:04:10,  2.07it/s]

step:3100, train_loss:0.0823294201471864, acc:0.5554988951215366


 13%|█▎        | 5884/43738 [44:30<4:38:45,  2.26it/s]

step:3100, train_loss:0.08235741746421363, acc:0.5554044867437118


 13%|█▎        | 5885/43738 [44:30<4:17:04,  2.45it/s]

step:3100, train_loss:0.08234343276436883, acc:0.5554800339847069


 13%|█▎        | 5886/43738 [44:31<4:01:56,  2.61it/s]

step:3100, train_loss:0.08234137850099371, acc:0.5553856608902481


 13%|█▎        | 5887/43738 [44:31<3:55:20,  2.68it/s]

step:3100, train_loss:0.08233570698951842, acc:0.5554611856633259


 14%|█▍        | 6192/43738 [46:43<5:11:23,  2.01it/s]

step:3120, train_loss:0.08245245061669228, acc:0.5549095607235142


 14%|█▍        | 6193/43738 [46:44<4:39:23,  2.24it/s]

step:3120, train_loss:0.0824489772643061, acc:0.5548199580171161


 14%|█▍        | 6194/43738 [46:44<5:08:31,  2.03it/s]

step:3120, train_loss:0.08243984858128064, acc:0.5548918308040038


 14%|█▍        | 6195/43738 [46:45<4:47:04,  2.18it/s]

step:3120, train_loss:0.08244046607845718, acc:0.5548022598870056


 14%|█▍        | 6196/43738 [46:45<4:57:51,  2.10it/s]

step:3120, train_loss:0.08244397386609151, acc:0.5547127178825049


 14%|█▍        | 6197/43738 [46:46<5:00:29,  2.08it/s]

step:3120, train_loss:0.08244443751912502, acc:0.5546232047765047


 14%|█▍        | 6198/43738 [46:46<4:55:00,  2.12it/s]

step:3120, train_loss:0.08245302364238608, acc:0.5545337205550177


 14%|█▍        | 6199/43738 [46:47<5:32:29,  1.88it/s]

step:3120, train_loss:0.0824601762965121, acc:0.5544442652040652


 14%|█▍        | 6200/43738 [46:47<5:03:05,  2.06it/s]

step:3120, train_loss:0.08245066258584097, acc:0.5545161290322581


 14%|█▍        | 6201/43738 [46:47<4:57:07,  2.11it/s]

step:3120, train_loss:0.08247019556836906, acc:0.5544267053701016


 14%|█▍        | 6202/43738 [46:48<5:27:56,  1.91it/s]

step:3120, train_loss:0.08247488977498707, acc:0.5543373105449855


 14%|█▍        | 6203/43738 [46:49<5:01:24,  2.08it/s]

step:3120, train_loss:0.08247048086020008, acc:0.554409156859584


 14%|█▍        | 6204/43738 [46:49<5:27:48,  1.91it/s]

step:3120, train_loss:0.08249104465891811, acc:0.5543197936814958


 14%|█▍        | 6205/43738 [46:50<6:27:16,  1.62it/s]

step:3120, train_loss:0.08249597439699446, acc:0.5542304593070104


 14%|█▍        | 6206/43738 [46:50<5:41:41,  1.83it/s]

step:3120, train_loss:0.08248306181778868, acc:0.5543022881082823


 14%|█▍        | 6207/43738 [46:51<5:09:13,  2.02it/s]

step:3120, train_loss:0.08251522784178539, acc:0.5542129853391332


 15%|█▍        | 6512/43738 [49:14<4:48:40,  2.15it/s]

step:3140, train_loss:0.08235888167564777, acc:0.5568181818181818


 15%|█▍        | 6513/43738 [49:14<4:29:57,  2.30it/s]

step:3140, train_loss:0.08235782183773145, acc:0.5568862275449101


 15%|█▍        | 6514/43738 [49:15<4:17:31,  2.41it/s]

step:3140, train_loss:0.0823476314654141, acc:0.5569542523794904


 15%|█▍        | 6515/43738 [49:15<4:22:15,  2.37it/s]

step:3140, train_loss:0.0823354332810836, acc:0.5570222563315426


 15%|█▍        | 6516/43738 [49:15<3:48:39,  2.71it/s]

step:3140, train_loss:0.08232312458715431, acc:0.5570902394106814


 15%|█▍        | 6517/43738 [49:16<4:29:10,  2.30it/s]

step:3140, train_loss:0.08233623987408206, acc:0.557004756789934


 15%|█▍        | 6518/43738 [49:17<4:54:19,  2.11it/s]

step:3140, train_loss:0.08232735903457589, acc:0.5570727216937711


 15%|█▍        | 6519/43738 [49:17<5:52:22,  1.76it/s]

step:3140, train_loss:0.08233648189497983, acc:0.5569872679858874


 15%|█▍        | 6520/43738 [49:18<5:26:44,  1.90it/s]

step:3140, train_loss:0.08232767508756063, acc:0.5570552147239264


 15%|█▍        | 6521/43738 [49:18<5:12:05,  1.99it/s]

step:3140, train_loss:0.08231550737711617, acc:0.5571231406226039


 15%|█▍        | 6522/43738 [49:19<5:35:19,  1.85it/s]

step:3140, train_loss:0.08232133844187099, acc:0.5570377184912604


 15%|█▍        | 6523/43738 [49:19<4:56:32,  2.09it/s]

step:3140, train_loss:0.08231058113885216, acc:0.5571056262455926


 15%|█▍        | 6524/43738 [49:20<5:02:33,  2.05it/s]

step:3140, train_loss:0.0823099564421688, acc:0.5570202329858982


 15%|█▍        | 6525/43738 [49:20<5:07:34,  2.02it/s]

step:3140, train_loss:0.08230224831316128, acc:0.557088122605364


 15%|█▍        | 6526/43738 [49:21<4:39:19,  2.22it/s]

step:3140, train_loss:0.08230146686670127, acc:0.5570027581979773


 15%|█▍        | 6527/43738 [49:21<4:24:49,  2.34it/s]

step:3140, train_loss:0.08229075010100015, acc:0.5570706296920485


 16%|█▌        | 6832/43738 [51:38<3:51:33,  2.66it/s]

step:3160, train_loss:0.08221641519417627, acc:0.5570843091334895


 16%|█▌        | 6833/43738 [51:38<4:05:56,  2.50it/s]

step:3160, train_loss:0.08220494643308324, acc:0.5571491292258159


 16%|█▌        | 6834/43738 [51:39<4:08:08,  2.48it/s]

step:3160, train_loss:0.08220323988801938, acc:0.5570676031606673


 16%|█▌        | 6835/43738 [51:39<4:07:11,  2.49it/s]

step:3160, train_loss:0.08219673537953921, acc:0.5571324067300658


 16%|█▌        | 6836/43738 [51:40<4:13:46,  2.42it/s]

step:3160, train_loss:0.08219209233261739, acc:0.5571971913399649


 16%|█▌        | 6837/43738 [51:40<5:28:46,  1.87it/s]

step:3160, train_loss:0.08222400521488094, acc:0.5571156940178441


 16%|█▌        | 6838/43738 [51:41<6:36:59,  1.55it/s]

step:3160, train_loss:0.08222825876728762, acc:0.5570342205323194


 16%|█▌        | 6839/43738 [51:42<5:29:20,  1.87it/s]

step:3160, train_loss:0.08222621622850812, acc:0.5569527708729346


 16%|█▌        | 6840/43738 [51:42<4:53:20,  2.10it/s]

step:3160, train_loss:0.08221427563838123, acc:0.5570175438596491


 16%|█▌        | 6841/43738 [51:42<4:27:25,  2.30it/s]

step:3160, train_loss:0.08220229495090652, acc:0.5570822979096623


 16%|█▌        | 6842/43738 [51:43<5:08:25,  1.99it/s]

step:3160, train_loss:0.08221270779472693, acc:0.5570008769365683


 16%|█▌        | 6843/43738 [51:43<4:48:44,  2.13it/s]

step:3160, train_loss:0.08221877445000848, acc:0.5569194797603391


 16%|█▌        | 6844/43738 [51:44<4:37:58,  2.21it/s]

step:3160, train_loss:0.08220708347120438, acc:0.5569842197545295


 16%|█▌        | 6845/43738 [51:44<4:37:21,  2.22it/s]

step:3160, train_loss:0.08219910569430668, acc:0.5570489408327246


 16%|█▌        | 6846/43738 [51:45<4:30:20,  2.27it/s]

step:3160, train_loss:0.08219348485570864, acc:0.5571136430032135


 16%|█▌        | 6847/43738 [51:45<4:23:03,  2.34it/s]

step:3160, train_loss:0.08218600942301978, acc:0.5571783262742807


 16%|█▋        | 7152/43738 [54:08<5:05:36,  2.00it/s]

step:3180, train_loss:0.08219941647248703, acc:0.5576062639821029


 16%|█▋        | 7153/43738 [54:08<4:35:42,  2.21it/s]

step:3180, train_loss:0.08219212497066879, acc:0.5576681112819796


 16%|█▋        | 7154/43738 [54:08<4:30:16,  2.26it/s]

step:3180, train_loss:0.08218961737198495, acc:0.5575901593514118


 16%|█▋        | 7155/43738 [54:09<3:55:04,  2.59it/s]

step:3180, train_loss:0.08218089323078401, acc:0.5576519916142557


 16%|█▋        | 7156/43738 [54:09<3:28:09,  2.93it/s]

step:3180, train_loss:0.08217030728139435, acc:0.5577138065958636


 16%|█▋        | 7157/43738 [54:09<3:40:59,  2.76it/s]

step:3180, train_loss:0.08216698183758282, acc:0.5577756043034792


 16%|█▋        | 7158/43738 [54:10<4:04:54,  2.49it/s]

step:3180, train_loss:0.08215638449353015, acc:0.557837384744342


 16%|█▋        | 7159/43738 [54:10<3:55:46,  2.59it/s]

step:3180, train_loss:0.08214516581195935, acc:0.557899147925688


 16%|█▋        | 7160/43738 [54:11<4:34:39,  2.22it/s]

step:3180, train_loss:0.0821403891890451, acc:0.5579608938547486


 16%|█▋        | 7161/43738 [54:12<5:40:17,  1.79it/s]

step:3180, train_loss:0.08216340905843698, acc:0.557882977237816


 16%|█▋        | 7162/43738 [54:12<6:29:17,  1.57it/s]

step:3180, train_loss:0.08216554007211722, acc:0.5578050823792237


 16%|█▋        | 7163/43738 [54:13<6:02:00,  1.68it/s]

step:3180, train_loss:0.08218114763519532, acc:0.557727209269859


 16%|█▋        | 7164/43738 [54:14<6:41:34,  1.52it/s]

step:3180, train_loss:0.08219411834833566, acc:0.5576493579006142


 16%|█▋        | 7165/43738 [54:14<6:24:52,  1.58it/s]

step:3180, train_loss:0.08221430195160295, acc:0.5575715282623867


 16%|█▋        | 7166/43738 [54:15<6:43:48,  1.51it/s]

step:3180, train_loss:0.0822179547418913, acc:0.5574937203460787


 16%|█▋        | 7167/43738 [54:15<6:02:12,  1.68it/s]

step:3180, train_loss:0.08221832652338239, acc:0.5575554625366262


 17%|█▋        | 7473/43738 [56:29<3:43:15,  2.71it/s]

step:3200, train_loss:0.08234421622939127, acc:0.5568790149892934
step:3200, train_loss:0.08233929920549989, acc:0.5569383112538472


 17%|█▋        | 7474/43738 [56:30<3:23:01,  2.98it/s]

step:3200, train_loss:0.08232865160555145, acc:0.556997591651057


 17%|█▋        | 7475/43738 [56:30<3:29:44,  2.88it/s]

step:3200, train_loss:0.08233469016151503, acc:0.556923076923077


 17%|█▋        | 7476/43738 [56:30<3:35:28,  2.80it/s]

step:3200, train_loss:0.08233615490116314, acc:0.556848582129481


 17%|█▋        | 7477/43738 [56:31<3:51:08,  2.61it/s]

step:3200, train_loss:0.08233437893102535, acc:0.556774107262271


 17%|█▋        | 7478/43738 [56:31<3:39:43,  2.75it/s]

step:3200, train_loss:0.08232344774835929, acc:0.5568333779085317


 17%|█▋        | 7479/43738 [56:31<3:19:05,  3.04it/s]

step:3200, train_loss:0.08231745204717031, acc:0.5568926327049071


 17%|█▋        | 7480/43738 [56:32<3:05:27,  3.26it/s]

step:3200, train_loss:0.08230644967381308, acc:0.5569518716577541


 17%|█▋        | 7481/43738 [56:32<3:33:45,  2.83it/s]

step:3200, train_loss:0.08231634517555435, acc:0.5568774228044379


 17%|█▋        | 7482/43738 [56:32<3:29:00,  2.89it/s]

step:3200, train_loss:0.08231073907687501, acc:0.5569366479550922


 17%|█▋        | 7483/43738 [56:33<3:11:22,  3.16it/s]

step:3200, train_loss:0.08232277517033067, acc:0.5568622210343445


 17%|█▋        | 7484/43738 [56:33<3:15:12,  3.10it/s]

step:3200, train_loss:0.08231540306787588, acc:0.5569214323890967


 17%|█▋        | 7485/43738 [56:34<4:06:09,  2.45it/s]

step:3200, train_loss:0.08231254502876308, acc:0.5569806279225117


 17%|█▋        | 7486/43738 [56:34<4:01:42,  2.50it/s]

step:3200, train_loss:0.08230233555989801, acc:0.5570398076409298


 17%|█▋        | 7487/43738 [56:35<4:57:11,  2.03it/s]

step:3200, train_loss:0.08229139792399119, acc:0.5570989715506879


 18%|█▊        | 7792/43738 [58:52<4:45:27,  2.10it/s]

step:3220, train_loss:0.08222584616802306, acc:0.5564681724845996


 18%|█▊        | 7793/43738 [58:52<4:23:46,  2.27it/s]

step:3220, train_loss:0.08222061162293894, acc:0.556525086616194


 18%|█▊        | 7794/43738 [58:52<4:02:14,  2.47it/s]

step:3220, train_loss:0.08222102113368478, acc:0.5565819861431871


 18%|█▊        | 7795/43738 [58:53<4:14:48,  2.35it/s]

step:3220, train_loss:0.08223544757709718, acc:0.5565105837075048


 18%|█▊        | 7796/43738 [58:53<4:15:13,  2.35it/s]

step:3220, train_loss:0.08224497965367585, acc:0.5564391995895331


 18%|█▊        | 7797/43738 [58:54<4:47:42,  2.08it/s]

step:3220, train_loss:0.08223446377760016, acc:0.5564960882390663


 18%|█▊        | 7798/43738 [58:54<4:38:16,  2.15it/s]

step:3220, train_loss:0.08224859647714833, acc:0.556424724288279


 18%|█▊        | 7799/43738 [58:54<4:02:04,  2.47it/s]

step:3220, train_loss:0.08224223280313561, acc:0.5564816002051545


 18%|█▊        | 7800/43738 [58:55<4:15:09,  2.35it/s]

step:3220, train_loss:0.08228339510825791, acc:0.5564102564102564


 18%|█▊        | 7801/43738 [58:55<3:45:56,  2.65it/s]

step:3220, train_loss:0.08227290494450065, acc:0.5564671196000512


 18%|█▊        | 7802/43738 [58:56<3:42:10,  2.70it/s]

step:3220, train_loss:0.08227511152879312, acc:0.5563957959497565


 18%|█▊        | 7803/43738 [58:56<3:22:32,  2.96it/s]

step:3220, train_loss:0.08226520148839561, acc:0.5564526464180444


 18%|█▊        | 7804/43738 [58:56<3:59:29,  2.50it/s]

step:3220, train_loss:0.0822601363400275, acc:0.5565094823167607


 18%|█▊        | 7805/43738 [58:57<4:38:17,  2.15it/s]

step:3220, train_loss:0.08225330885840236, acc:0.5565663036515055


 18%|█▊        | 7806/43738 [58:57<4:13:15,  2.36it/s]

step:3220, train_loss:0.08224741907781445, acc:0.556623110427876


 18%|█▊        | 7807/43738 [58:58<3:44:52,  2.66it/s]

step:3220, train_loss:0.08223828555571809, acc:0.5566799026514666


 19%|█▊        | 8112/43738 [1:01:23<4:53:28,  2.02it/s]

step:3240, train_loss:0.08213015324916456, acc:0.5562130177514792


 19%|█▊        | 8113/43738 [1:01:23<4:13:21,  2.34it/s]

step:3240, train_loss:0.08212384894257871, acc:0.5562677184765191


 19%|█▊        | 8114/43738 [1:01:23<3:44:27,  2.65it/s]

step:3240, train_loss:0.08211384108065538, acc:0.5563224057185112


 19%|█▊        | 8115/43738 [1:01:24<3:52:16,  2.56it/s]

step:3240, train_loss:0.08211959474750904, acc:0.5562538508934073


 19%|█▊        | 8116/43738 [1:01:24<4:52:00,  2.03it/s]

step:3240, train_loss:0.08211012868990954, acc:0.5563085263676688


 19%|█▊        | 8117/43738 [1:01:25<4:24:55,  2.24it/s]

step:3240, train_loss:0.08210461007972451, acc:0.5563631883700875


 19%|█▊        | 8118/43738 [1:01:25<3:53:40,  2.54it/s]

step:3240, train_loss:0.08209485552205517, acc:0.5564178369056418


 19%|█▊        | 8119/43738 [1:01:25<3:43:20,  2.66it/s]

step:3240, train_loss:0.08208740535166813, acc:0.5564724719793078


 19%|█▊        | 8120/43738 [1:01:25<3:17:44,  3.00it/s]

step:3240, train_loss:0.08208386643686913, acc:0.5564039408866995


 19%|█▊        | 8121/43738 [1:01:26<3:59:21,  2.48it/s]

step:3240, train_loss:0.08208763900862123, acc:0.5563354266715922


 19%|█▊        | 8122/43738 [1:01:26<3:55:10,  2.52it/s]

step:3240, train_loss:0.0821100128834573, acc:0.5562669293277518


 19%|█▊        | 8123/43738 [1:01:27<4:07:39,  2.40it/s]

step:3240, train_loss:0.08210151766837862, acc:0.5563215560753416


 19%|█▊        | 8124/43738 [1:01:27<3:34:48,  2.76it/s]

step:3240, train_loss:0.0820970866922057, acc:0.5563761693746923


 19%|█▊        | 8125/43738 [1:01:28<4:48:36,  2.06it/s]

step:3240, train_loss:0.0821006166301596, acc:0.5563076923076923


 19%|█▊        | 8126/43738 [1:01:28<4:10:12,  2.37it/s]

step:3240, train_loss:0.08209974928921068, acc:0.5562392320945114


 19%|█▊        | 8127/43738 [1:01:29<5:05:38,  1.94it/s]

step:3240, train_loss:0.08211788964251164, acc:0.5561707887289282


 19%|█▉        | 8432/43738 [1:03:57<4:53:26,  2.01it/s]

step:3260, train_loss:0.08189215952491928, acc:0.5578747628083491


 19%|█▉        | 8433/43738 [1:03:57<4:29:56,  2.18it/s]

step:3260, train_loss:0.08188257554739568, acc:0.5579271907980553


 19%|█▉        | 8434/43738 [1:03:57<4:29:59,  2.18it/s]

step:3260, train_loss:0.0818791760991912, acc:0.5579796063552288


 19%|█▉        | 8435/43738 [1:03:58<4:28:02,  2.20it/s]

step:3260, train_loss:0.08188685506681492, acc:0.5579134558387671


 19%|█▉        | 8436/43738 [1:03:58<3:51:31,  2.54it/s]

step:3260, train_loss:0.0818787836293279, acc:0.5579658605974396


 19%|█▉        | 8437/43738 [1:03:58<3:29:47,  2.80it/s]

step:3260, train_loss:0.08188410046297989, acc:0.5578997273912528


 19%|█▉        | 8438/43738 [1:03:59<4:45:37,  2.06it/s]

step:3260, train_loss:0.08187475774048981, acc:0.5579521213557715


 19%|█▉        | 8439/43738 [1:04:00<4:28:42,  2.19it/s]

step:3260, train_loss:0.08188346927824948, acc:0.5578860054508828


 19%|█▉        | 8440/43738 [1:04:00<4:47:47,  2.04it/s]

step:3260, train_loss:0.08191679395512226, acc:0.5578199052132702


 19%|█▉        | 8441/43738 [1:04:01<4:34:30,  2.14it/s]

step:3260, train_loss:0.08190716119032154, acc:0.5578722900130316


 19%|█▉        | 8442/43738 [1:04:01<4:21:49,  2.25it/s]

step:3260, train_loss:0.08189881772702536, acc:0.5579246624022743


 19%|█▉        | 8443/43738 [1:04:01<4:14:07,  2.31it/s]

step:3260, train_loss:0.08190297208071927, acc:0.5578585810730783


 19%|█▉        | 8444/43738 [1:04:02<4:19:48,  2.26it/s]

step:3260, train_loss:0.08190037379544558, acc:0.5577925153955471


 19%|█▉        | 8445/43738 [1:04:02<4:11:40,  2.34it/s]

step:3260, train_loss:0.08190017538935761, acc:0.5577264653641207


 19%|█▉        | 8446/43738 [1:04:02<3:38:04,  2.70it/s]

step:3260, train_loss:0.08190791702119259, acc:0.5576604309732418


 19%|█▉        | 8447/43738 [1:04:03<3:17:51,  2.97it/s]

step:3260, train_loss:0.0819274907323506, acc:0.5575944122173553


 20%|██        | 8752/43738 [1:06:21<5:42:29,  1.70it/s]

step:3280, train_loss:0.08230327076678179, acc:0.5564442413162706


 20%|██        | 8753/43738 [1:06:21<5:49:14,  1.67it/s]

step:3280, train_loss:0.08229413309655666, acc:0.5564949160287901


 20%|██        | 8754/43738 [1:06:22<6:33:24,  1.48it/s]

step:3280, train_loss:0.0823012463008618, acc:0.5564313456705506


 20%|██        | 8755/43738 [1:06:23<5:39:22,  1.72it/s]

step:3280, train_loss:0.08230605549508119, acc:0.5563677898343804


 20%|██        | 8756/43738 [1:06:23<5:29:34,  1.77it/s]

step:3280, train_loss:0.08229860449007585, acc:0.5564184559159433


 20%|██        | 8757/43738 [1:06:24<4:55:48,  1.97it/s]

step:3280, train_loss:0.08229704277052283, acc:0.5564691104259449


 20%|██        | 8758/43738 [1:06:24<4:08:23,  2.35it/s]

step:3280, train_loss:0.08229176527698104, acc:0.5565197533683489


 20%|██        | 8759/43738 [1:06:24<3:43:05,  2.61it/s]

step:3280, train_loss:0.08228265375555031, acc:0.5565703847471173


 20%|██        | 8760/43738 [1:06:25<3:55:17,  2.48it/s]

step:3280, train_loss:0.08227326104772403, acc:0.55662100456621


 20%|██        | 8761/43738 [1:06:25<3:50:15,  2.53it/s]

step:3280, train_loss:0.08226390507942913, acc:0.5566716128295857


 20%|██        | 8762/43738 [1:06:25<3:28:48,  2.79it/s]

step:3280, train_loss:0.08225579208151543, acc:0.5567222095412007


 20%|██        | 8763/43738 [1:06:26<4:11:29,  2.32it/s]

step:3280, train_loss:0.08226682488772084, acc:0.5566586785347484


 20%|██        | 8764/43738 [1:06:26<3:58:38,  2.44it/s]

step:3280, train_loss:0.08225800679800521, acc:0.5567092651757188


 20%|██        | 8765/43738 [1:06:26<3:30:24,  2.77it/s]

step:3280, train_loss:0.0822488487952817, acc:0.5567598402738163


 20%|██        | 8766/43738 [1:06:27<3:11:27,  3.04it/s]

step:3280, train_loss:0.08224258649974721, acc:0.5568104038329911


 20%|██        | 8767/43738 [1:06:27<3:55:37,  2.47it/s]

step:3280, train_loss:0.08224808690034453, acc:0.5567468917531653


 21%|██        | 9072/43738 [1:08:48<4:37:35,  2.08it/s]

step:3300, train_loss:0.0819912470940601, acc:0.558531746031746


 21%|██        | 9073/43738 [1:08:48<5:29:48,  1.75it/s]

step:3300, train_loss:0.08199636080953675, acc:0.5584701862669459


 21%|██        | 9074/43738 [1:08:49<4:43:19,  2.04it/s]

step:3300, train_loss:0.08198826777113727, acc:0.5585188450517964


 21%|██        | 9075/43738 [1:08:49<4:39:21,  2.07it/s]

step:3300, train_loss:0.08199076509243647, acc:0.5585674931129476


 21%|██        | 9076/43738 [1:08:49<4:01:16,  2.39it/s]

step:3300, train_loss:0.08199355604642863, acc:0.5585059497576025


 21%|██        | 9077/43738 [1:08:50<4:20:14,  2.22it/s]

step:3300, train_loss:0.08198493474313916, acc:0.5585545885204363


 21%|██        | 9078/43738 [1:08:50<4:04:35,  2.36it/s]

step:3300, train_loss:0.08199658301295833, acc:0.5584930601454065


 21%|██        | 9079/43738 [1:08:50<3:35:40,  2.68it/s]

step:3300, train_loss:0.08198755446760596, acc:0.5585416896133936


 21%|██        | 9080/43738 [1:08:51<3:27:40,  2.78it/s]

step:3300, train_loss:0.08198080653314338, acc:0.5585903083700441


 21%|██        | 9081/43738 [1:08:51<3:22:29,  2.85it/s]

step:3300, train_loss:0.0819721149852924, acc:0.5586389164188966


 21%|██        | 9082/43738 [1:08:52<4:04:45,  2.36it/s]

step:3300, train_loss:0.08198841088251352, acc:0.5585774058577406


 21%|██        | 9083/43738 [1:08:52<3:36:38,  2.67it/s]

step:3300, train_loss:0.08199155965452137, acc:0.5585159088406914


 21%|██        | 9084/43738 [1:08:52<3:18:39,  2.91it/s]

step:3300, train_loss:0.08198698087917608, acc:0.5585645090268604


 21%|██        | 9085/43738 [1:08:53<3:06:55,  3.09it/s]

step:3300, train_loss:0.08197975249539566, acc:0.5586130985140341


 21%|██        | 9086/43738 [1:08:53<2:59:27,  3.22it/s]

step:3300, train_loss:0.0819725412238051, acc:0.5586616773057451


 21%|██        | 9087/43738 [1:08:53<2:48:14,  3.43it/s]

step:3300, train_loss:0.08196451755750214, acc:0.5587102454055244


 21%|██▏       | 9392/43738 [1:11:05<4:54:03,  1.95it/s]

step:3320, train_loss:0.08182476923240223, acc:0.5596252129471891


 21%|██▏       | 9393/43738 [1:11:06<4:34:38,  2.08it/s]

step:3320, train_loss:0.08182213065356811, acc:0.5596720962418823


 21%|██▏       | 9394/43738 [1:11:06<4:58:53,  1.92it/s]

step:3320, train_loss:0.08182137805572037, acc:0.5597189695550351


 21%|██▏       | 9395/43738 [1:11:07<4:50:09,  1.97it/s]

step:3320, train_loss:0.08183839885240268, acc:0.5596593932943055


 21%|██▏       | 9396/43738 [1:11:07<4:06:35,  2.32it/s]

step:3320, train_loss:0.08183282101404817, acc:0.55970625798212


 21%|██▏       | 9397/43738 [1:11:07<3:58:09,  2.40it/s]

step:3320, train_loss:0.08184268339974524, acc:0.5596466957539641


 21%|██▏       | 9398/43738 [1:11:08<4:28:21,  2.13it/s]

step:3320, train_loss:0.08183925697696989, acc:0.5596935518195361


 21%|██▏       | 9399/43738 [1:11:08<4:03:39,  2.35it/s]

step:3320, train_loss:0.08184809598849463, acc:0.5596340036174061


 21%|██▏       | 9400/43738 [1:11:09<3:59:41,  2.39it/s]

step:3320, train_loss:0.08184146012533135, acc:0.5596808510638298


 21%|██▏       | 9401/43738 [1:11:09<3:43:06,  2.56it/s]

step:3320, train_loss:0.08183322391636984, acc:0.559727688543772


 21%|██▏       | 9402/43738 [1:11:09<3:32:30,  2.69it/s]

step:3320, train_loss:0.08182457969492145, acc:0.5597745160604127


 21%|██▏       | 9403/43738 [1:11:10<3:35:02,  2.66it/s]

step:3320, train_loss:0.08185004129845061, acc:0.5597149845793895


 22%|██▏       | 9404/43738 [1:11:10<3:36:06,  2.65it/s]

step:3320, train_loss:0.08184183820241582, acc:0.5597618034878775


 22%|██▏       | 9405/43738 [1:11:11<4:08:33,  2.30it/s]

step:3320, train_loss:0.08183328063302867, acc:0.5598086124401914


 22%|██▏       | 9406/43738 [1:11:11<4:22:15,  2.18it/s]

step:3320, train_loss:0.08184175273765197, acc:0.5597490963214969


 22%|██▏       | 9407/43738 [1:11:12<3:59:57,  2.38it/s]

step:3320, train_loss:0.08185817095323912, acc:0.5596895928563835


 22%|██▏       | 9712/43738 [1:13:31<4:08:38,  2.28it/s]

step:3340, train_loss:0.08166390641733672, acc:0.5594110378912686


 22%|██▏       | 9713/43738 [1:13:32<5:13:00,  1.81it/s]

step:3340, train_loss:0.08165556462059607, acc:0.5594563986409966


 22%|██▏       | 9714/43738 [1:13:32<4:40:01,  2.03it/s]

step:3340, train_loss:0.08164983976108571, acc:0.5595017500514721


 22%|██▏       | 9715/43738 [1:13:33<4:29:06,  2.11it/s]

step:3340, train_loss:0.08164934792538014, acc:0.5594441585177561


 22%|██▏       | 9716/43738 [1:13:33<3:57:06,  2.39it/s]

step:3340, train_loss:0.08164242557379248, acc:0.5594895018526143


 22%|██▏       | 9717/43738 [1:13:34<4:42:12,  2.01it/s]

step:3340, train_loss:0.0816343609364863, acc:0.5595348358546877


 22%|██▏       | 9718/43738 [1:13:34<4:42:07,  2.01it/s]

step:3340, train_loss:0.08164223375412692, acc:0.5594772586952048


 22%|██▏       | 9719/43738 [1:13:34<4:17:48,  2.20it/s]

step:3340, train_loss:0.08163391419073265, acc:0.5595225846280482


 22%|██▏       | 9720/43738 [1:13:35<4:13:01,  2.24it/s]

step:3340, train_loss:0.08163559549358657, acc:0.5594650205761317


 22%|██▏       | 9721/43738 [1:13:35<3:52:52,  2.43it/s]

step:3340, train_loss:0.08162772274524177, acc:0.559510338442547


 22%|██▏       | 9722/43738 [1:13:36<3:56:18,  2.40it/s]

step:3340, train_loss:0.08163029570194641, acc:0.5594527874922856


 22%|██▏       | 9723/43738 [1:13:36<4:59:07,  1.90it/s]

step:3340, train_loss:0.0816220844733732, acc:0.5594980972950735


 22%|██▏       | 9724/43738 [1:13:37<4:45:03,  1.99it/s]

step:3340, train_loss:0.08161981904731407, acc:0.5595433977786919


 22%|██▏       | 9725/43738 [1:13:37<4:31:27,  2.09it/s]

step:3340, train_loss:0.08161530444543967, acc:0.5595886889460154


 22%|██▏       | 9726/43738 [1:13:38<4:05:19,  2.31it/s]

step:3340, train_loss:0.08160725843373612, acc:0.5596339707999177


 22%|██▏       | 9727/43738 [1:13:38<3:45:26,  2.51it/s]

step:3340, train_loss:0.08160284215450617, acc:0.5596792433432713


 23%|██▎       | 10032/43738 [1:15:55<4:34:34,  2.05it/s]

step:3360, train_loss:0.08152920590856284, acc:0.560207336523126


 23%|██▎       | 10033/43738 [1:15:56<5:55:56,  1.58it/s]

step:3360, train_loss:0.08153633262410044, acc:0.5601515000498355


 23%|██▎       | 10034/43738 [1:15:57<5:31:58,  1.69it/s]

step:3360, train_loss:0.0815443784081363, acc:0.5600956747059996


 23%|██▎       | 10035/43738 [1:15:57<5:33:48,  1.68it/s]

step:3360, train_loss:0.0815370868183995, acc:0.5601395117090184


 23%|██▎       | 10036/43738 [1:15:58<5:09:31,  1.81it/s]

step:3360, train_loss:0.08154037910900061, acc:0.560083698684735


 23%|██▎       | 10037/43738 [1:15:58<5:16:01,  1.78it/s]

step:3360, train_loss:0.08153244758149404, acc:0.5601275281458603


 23%|██▎       | 10038/43738 [1:15:59<5:54:48,  1.58it/s]

step:3360, train_loss:0.08153789843763527, acc:0.5600717274357442


 23%|██▎       | 10039/43738 [1:16:00<5:50:41,  1.60it/s]

step:3360, train_loss:0.08155279478109312, acc:0.5600159378424145


 23%|██▎       | 10040/43738 [1:16:00<4:47:12,  1.96it/s]

step:3360, train_loss:0.08154526269617023, acc:0.5600597609561753


 23%|██▎       | 10041/43738 [1:16:00<3:59:40,  2.34it/s]

step:3360, train_loss:0.08155208472269034, acc:0.5600039836669655


 23%|██▎       | 10042/43738 [1:16:01<4:29:16,  2.09it/s]

step:3360, train_loss:0.08154761135720678, acc:0.5600477992431786


 23%|██▎       | 10043/43738 [1:16:01<4:03:10,  2.31it/s]

step:3360, train_loss:0.0815432946028902, acc:0.5600916060937967


 23%|██▎       | 10044/43738 [1:16:02<4:20:52,  2.15it/s]

step:3360, train_loss:0.08153594057441824, acc:0.5601354042214257


 23%|██▎       | 10045/43738 [1:16:02<4:10:17,  2.24it/s]

step:3360, train_loss:0.08153747799471943, acc:0.5600796416127427


 23%|██▎       | 10046/43738 [1:16:02<3:59:18,  2.35it/s]

step:3360, train_loss:0.081530155086308, acc:0.5601234322118256


 23%|██▎       | 10047/43738 [1:16:03<3:59:51,  2.34it/s]

step:3360, train_loss:0.08154336362406851, acc:0.560067681895093


 24%|██▎       | 10352/43738 [1:18:24<6:29:32,  1.43it/s]

step:3380, train_loss:0.08145564082145998, acc:0.5599884080370943


 24%|██▎       | 10353/43738 [1:18:24<5:42:57,  1.62it/s]

step:3380, train_loss:0.08144973847119168, acc:0.5600309089152903


 24%|██▎       | 10354/43738 [1:18:24<4:44:24,  1.96it/s]

step:3380, train_loss:0.08144256190786388, acc:0.5600734015839289


 24%|██▎       | 10355/43738 [1:18:25<4:28:08,  2.07it/s]

step:3380, train_loss:0.08143765777049862, acc:0.5601158860453888


 24%|██▎       | 10356/43738 [1:18:25<3:46:51,  2.45it/s]

step:3380, train_loss:0.08143868114659396, acc:0.5600617999227501


 24%|██▎       | 10357/43738 [1:18:25<3:16:57,  2.82it/s]

step:3380, train_loss:0.08143317770836656, acc:0.5601042773003766


 24%|██▎       | 10358/43738 [1:18:26<4:04:22,  2.28it/s]

step:3380, train_loss:0.08142644418314379, acc:0.5601467464761537


 24%|██▎       | 10359/43738 [1:18:26<3:52:15,  2.40it/s]

step:3380, train_loss:0.08142232299626778, acc:0.5601892074524568


 24%|██▎       | 10360/43738 [1:18:26<3:21:33,  2.76it/s]

step:3380, train_loss:0.08141647982992602, acc:0.5602316602316603


 24%|██▎       | 10361/43738 [1:18:27<3:06:08,  2.99it/s]

step:3380, train_loss:0.08140990812520628, acc:0.5602741048161375


 24%|██▎       | 10362/43738 [1:18:27<3:10:33,  2.92it/s]

step:3380, train_loss:0.08141936992042954, acc:0.5602200347423277


 24%|██▎       | 10363/43738 [1:18:27<3:28:34,  2.67it/s]

step:3380, train_loss:0.0814170359131119, acc:0.5602624722570684


 24%|██▎       | 10364/43738 [1:18:28<4:03:39,  2.28it/s]

step:3380, train_loss:0.0814157938202494, acc:0.5602084137398687


 24%|██▎       | 10365/43738 [1:18:28<3:45:09,  2.47it/s]

step:3380, train_loss:0.08141706472981448, acc:0.5601543656536421


 24%|██▎       | 10366/43738 [1:18:29<3:16:12,  2.83it/s]

step:3380, train_loss:0.08142293498128182, acc:0.5601003279953695


 24%|██▎       | 10367/43738 [1:18:29<3:26:37,  2.69it/s]

step:3380, train_loss:0.0814409458228278, acc:0.5600463007620333


 24%|██▍       | 10672/43738 [1:20:48<4:21:17,  2.11it/s]

step:3400, train_loss:0.08158633293330977, acc:0.5586581709145427


 24%|██▍       | 10673/43738 [1:20:48<4:10:35,  2.20it/s]

step:3400, train_loss:0.08158681917060984, acc:0.5586058277897499


 24%|██▍       | 10674/43738 [1:20:49<4:46:27,  1.92it/s]

step:3400, train_loss:0.08159479648121645, acc:0.5585534944725501


 24%|██▍       | 10675/43738 [1:20:50<5:02:44,  1.82it/s]

step:3400, train_loss:0.08158918064730504, acc:0.5585948477751757


 24%|██▍       | 10676/43738 [1:20:50<5:14:52,  1.75it/s]

step:3400, train_loss:0.08159894455190493, acc:0.558542525290371


 24%|██▍       | 10677/43738 [1:20:51<5:51:11,  1.57it/s]

step:3400, train_loss:0.08161581671584058, acc:0.5584902126065374


 24%|██▍       | 10678/43738 [1:20:51<4:55:41,  1.86it/s]

step:3400, train_loss:0.0816083088433012, acc:0.5585315602172691


 24%|██▍       | 10679/43738 [1:20:52<4:55:03,  1.87it/s]

step:3400, train_loss:0.08162080232359646, acc:0.5584792583575241


 24%|██▍       | 10680/43738 [1:20:52<4:23:52,  2.09it/s]

step:3400, train_loss:0.0816249014553952, acc:0.5584269662921348


 24%|██▍       | 10681/43738 [1:20:53<3:51:01,  2.38it/s]

step:3400, train_loss:0.08161761008966568, acc:0.5584683082108417


 24%|██▍       | 10682/43738 [1:20:53<4:17:03,  2.14it/s]

step:3400, train_loss:0.0816244372263023, acc:0.5584160269612433


 24%|██▍       | 10683/43738 [1:20:54<4:57:09,  1.85it/s]

step:3400, train_loss:0.08163788159533203, acc:0.5583637554993915


 24%|██▍       | 10684/43738 [1:20:54<4:01:11,  2.28it/s]

step:3400, train_loss:0.08163381239212836, acc:0.5584050917259453


 24%|██▍       | 10685/43738 [1:20:55<4:26:15,  2.07it/s]

step:3400, train_loss:0.08164135578609144, acc:0.5583528310715957


 24%|██▍       | 10686/43738 [1:20:55<3:47:57,  2.42it/s]

step:3400, train_loss:0.0816342032933623, acc:0.5583941605839416


 24%|██▍       | 10687/43738 [1:20:56<4:09:55,  2.20it/s]

step:3400, train_loss:0.08163052155811566, acc:0.5584354823617479


 25%|██▌       | 10992/43738 [1:23:20<5:58:05,  1.52it/s]

step:3420, train_loss:0.0816209874402591, acc:0.5592248908296943


 25%|██▌       | 10993/43738 [1:23:20<4:54:57,  1.85it/s]

step:3420, train_loss:0.08161416180397171, acc:0.5592649868097881


 25%|██▌       | 10994/43738 [1:23:20<4:21:43,  2.09it/s]

step:3420, train_loss:0.08160938400336619, acc:0.559305075495725


 25%|██▌       | 10995/43738 [1:23:21<4:08:12,  2.20it/s]

step:3420, train_loss:0.08160486649706508, acc:0.5593451568894953


 25%|██▌       | 10996/43738 [1:23:21<4:35:52,  1.98it/s]

step:3420, train_loss:0.08160973100947627, acc:0.5592942888323027


 25%|██▌       | 10997/43738 [1:23:22<5:01:52,  1.81it/s]

step:3420, train_loss:0.08160341650205462, acc:0.5593343639174321


 25%|██▌       | 10998/43738 [1:23:23<4:47:52,  1.90it/s]

step:3420, train_loss:0.08159600538476738, acc:0.5593744317148572


 25%|██▌       | 10999/43738 [1:23:23<5:30:44,  1.65it/s]

step:3420, train_loss:0.08160574108569373, acc:0.5593235748704428


 25%|██▌       | 11000/43738 [1:23:24<4:55:15,  1.85it/s]

step:3420, train_loss:0.08161255066631765, acc:0.5592727272727273


 25%|██▌       | 11001/43738 [1:23:24<4:01:04,  2.26it/s]

step:3420, train_loss:0.08160629856851567, acc:0.5593127897463867


 25%|██▌       | 11002/43738 [1:23:25<4:44:39,  1.92it/s]

step:3420, train_loss:0.08161727240557208, acc:0.5592619523722959


 25%|██▌       | 11003/43738 [1:23:25<4:13:48,  2.15it/s]

step:3420, train_loss:0.08160986165516565, acc:0.5593020085431246


 25%|██▌       | 11004/43738 [1:23:25<3:42:16,  2.45it/s]

step:3420, train_loss:0.08161568358907406, acc:0.559251181388586


 25%|██▌       | 11005/43738 [1:23:26<3:43:52,  2.44it/s]

step:3420, train_loss:0.08161616563275588, acc:0.5592003634711494


 25%|██▌       | 11006/43738 [1:23:26<4:02:42,  2.25it/s]

step:3420, train_loss:0.08160876430186462, acc:0.5592404143194621


 25%|██▌       | 11007/43738 [1:23:27<3:51:52,  2.35it/s]

step:3420, train_loss:0.0816047038288985, acc:0.5592804578904333


 26%|██▌       | 11312/43738 [1:25:46<4:26:33,  2.03it/s]

step:3440, train_loss:0.08143657214807433, acc:0.5603783592644979


 26%|██▌       | 11313/43738 [1:25:46<4:35:36,  1.96it/s]

step:3440, train_loss:0.08143053864927567, acc:0.5604172191284363


 26%|██▌       | 11314/43738 [1:25:47<5:18:10,  1.70it/s]

step:3440, train_loss:0.081442213177055, acc:0.5603676860526781


 26%|██▌       | 11315/43738 [1:25:47<4:41:53,  1.92it/s]

step:3440, train_loss:0.08143525302125235, acc:0.5604065399911622


 26%|██▌       | 11316/43738 [1:25:48<4:25:01,  2.04it/s]

step:3440, train_loss:0.08143084742786828, acc:0.5604453870625663


 26%|██▌       | 11317/43738 [1:25:48<4:36:13,  1.96it/s]

step:3440, train_loss:0.0814244317917527, acc:0.5604842272687108


 26%|██▌       | 11318/43738 [1:25:49<4:34:55,  1.97it/s]

step:3440, train_loss:0.08142734891969432, acc:0.560434705778406


 26%|██▌       | 11319/43738 [1:25:49<4:16:06,  2.11it/s]

step:3440, train_loss:0.0814212888115053, acc:0.5604735400653768


 26%|██▌       | 11320/43738 [1:25:50<4:26:57,  2.02it/s]

step:3440, train_loss:0.08143851316087963, acc:0.5604240282685512


 26%|██▌       | 11321/43738 [1:25:50<4:20:10,  2.08it/s]

step:3440, train_loss:0.08143455550196556, acc:0.5604628566381061


 26%|██▌       | 11322/43738 [1:25:51<5:12:34,  1.73it/s]

step:3440, train_loss:0.08143688316743622, acc:0.5604133545310016


 26%|██▌       | 11323/43738 [1:25:52<5:25:11,  1.66it/s]

step:3440, train_loss:0.08144201844380125, acc:0.5603638611675351


 26%|██▌       | 11324/43738 [1:25:52<4:42:00,  1.92it/s]

step:3440, train_loss:0.08144180770982737, acc:0.5603143765453903


 26%|██▌       | 11325/43738 [1:25:52<4:16:39,  2.10it/s]

step:3440, train_loss:0.08143800248588778, acc:0.5603532008830022


 26%|██▌       | 11326/43738 [1:25:53<4:22:23,  2.06it/s]

step:3440, train_loss:0.081444195832868, acc:0.5603037259403143


 26%|██▌       | 11327/43738 [1:25:53<4:13:42,  2.13it/s]

step:3440, train_loss:0.0814511248814245, acc:0.5602542597333804


 27%|██▋       | 11632/43738 [1:28:18<3:26:43,  2.59it/s]

step:3460, train_loss:0.08146604865456561, acc:0.5598349381017882


 27%|██▋       | 11633/43738 [1:28:19<3:18:00,  2.70it/s]

step:3460, train_loss:0.08145992586570497, acc:0.5598727757242328


 27%|██▋       | 11634/43738 [1:28:19<3:02:27,  2.93it/s]

step:3460, train_loss:0.08145965337440994, acc:0.5598246518824136


 27%|██▋       | 11635/43738 [1:28:19<3:00:21,  2.97it/s]

step:3460, train_loss:0.08147254466442079, acc:0.5597765363128492


 27%|██▋       | 11636/43738 [1:28:20<3:09:32,  2.82it/s]

step:3460, train_loss:0.08146926823841041, acc:0.5598143691990375


 27%|██▋       | 11637/43738 [1:28:20<3:04:51,  2.89it/s]

step:3460, train_loss:0.08146935494923367, acc:0.5597662627825041


 27%|██▋       | 11638/43738 [1:28:21<3:48:22,  2.34it/s]

step:3460, train_loss:0.0814717164262319, acc:0.5597181646330984


 27%|██▋       | 11639/43738 [1:28:21<3:20:43,  2.67it/s]

step:3460, train_loss:0.08147119826061223, acc:0.5597559927828851


 27%|██▋       | 11640/43738 [1:28:21<3:33:07,  2.51it/s]

step:3460, train_loss:0.0814662852343898, acc:0.5597938144329897


 27%|██▋       | 11641/43738 [1:28:22<3:54:02,  2.29it/s]

step:3460, train_loss:0.08147609473516657, acc:0.5597457263121725


 27%|██▋       | 11642/43738 [1:28:22<3:33:40,  2.50it/s]

step:3460, train_loss:0.08147380578278271, acc:0.5597835423466758


 27%|██▋       | 11644/43738 [1:28:23<2:39:18,  3.36it/s]

step:3460, train_loss:0.0814712619354562, acc:0.559821351885253
step:3460, train_loss:0.08146562136145549, acc:0.5598591549295775


 27%|██▋       | 11645/43738 [1:28:23<3:13:28,  2.76it/s]

step:3460, train_loss:0.08146322511742651, acc:0.5598969514813225


 27%|██▋       | 11646/43738 [1:28:24<4:01:07,  2.22it/s]

step:3460, train_loss:0.08145742420324223, acc:0.5599347415421604


 27%|██▋       | 11647/43738 [1:28:24<3:49:22,  2.33it/s]

step:3460, train_loss:0.0814504880412528, acc:0.5599725251137632


 27%|██▋       | 11952/43738 [1:30:41<3:42:05,  2.39it/s]

step:3480, train_loss:0.08156396282308367, acc:0.5589022757697456


 27%|██▋       | 11953/43738 [1:30:41<3:56:28,  2.24it/s]

step:3480, train_loss:0.08156224773297951, acc:0.5589391784489249


 27%|██▋       | 11954/43738 [1:30:42<4:42:57,  1.87it/s]

step:3480, train_loss:0.08155614013444301, acc:0.5589760749539903


 27%|██▋       | 11955/43738 [1:30:42<4:01:41,  2.19it/s]

step:3480, train_loss:0.08155018081803837, acc:0.559012965286491


 27%|██▋       | 11956/43738 [1:30:43<4:17:47,  2.05it/s]

step:3480, train_loss:0.08155420300862225, acc:0.5589662094345935


 27%|██▋       | 11957/43738 [1:30:44<4:48:02,  1.84it/s]

step:3480, train_loss:0.08156627242338237, acc:0.558919461403362


 27%|██▋       | 11958/43738 [1:30:45<5:30:25,  1.60it/s]

step:3480, train_loss:0.08157562289096212, acc:0.5588727211908346


 27%|██▋       | 11959/43738 [1:30:45<4:32:59,  1.94it/s]

step:3480, train_loss:0.081570304016692, acc:0.5589096078267414


 27%|██▋       | 11960/43738 [1:30:45<3:55:16,  2.25it/s]

step:3480, train_loss:0.0815634838755209, acc:0.5589464882943144


 27%|██▋       | 11961/43738 [1:30:46<4:18:17,  2.05it/s]

step:3480, train_loss:0.08156835926643438, acc:0.5588997575453557


 27%|██▋       | 11962/43738 [1:30:46<3:56:27,  2.24it/s]

step:3480, train_loss:0.08156188035667725, acc:0.5589366326701221


 27%|██▋       | 11963/43738 [1:30:47<4:37:57,  1.91it/s]

step:3480, train_loss:0.08156339100516377, acc:0.5589735016300259


 27%|██▋       | 11964/43738 [1:30:47<4:31:34,  1.95it/s]

step:3480, train_loss:0.08157105033962728, acc:0.558926780341023


 27%|██▋       | 11965/43738 [1:30:48<4:07:44,  2.14it/s]

step:3480, train_loss:0.08157205263479052, acc:0.5588800668616799


 27%|██▋       | 11966/43738 [1:30:48<3:51:20,  2.29it/s]

step:3480, train_loss:0.08157093003317696, acc:0.5588333611900385


 27%|██▋       | 11967/43738 [1:30:48<3:57:28,  2.23it/s]

step:3480, train_loss:0.0815646377835186, acc:0.5588702264560875


 28%|██▊       | 12272/43738 [1:33:12<5:08:43,  1.70it/s]

step:3500, train_loss:0.08158365221401898, acc:0.5596479791395046


 28%|██▊       | 12273/43738 [1:33:13<4:27:34,  1.96it/s]

step:3500, train_loss:0.08157706185771454, acc:0.5596838588772102


 28%|██▊       | 12274/43738 [1:33:13<4:03:42,  2.15it/s]

step:3500, train_loss:0.08157047623471123, acc:0.5597197327684537


 28%|██▊       | 12275/43738 [1:33:13<3:52:14,  2.26it/s]

step:3500, train_loss:0.08157347169041808, acc:0.5596741344195519


 28%|██▊       | 12276/43738 [1:33:14<3:18:14,  2.65it/s]

step:3500, train_loss:0.08156877879842771, acc:0.5597100032583904


 28%|██▊       | 12277/43738 [1:33:14<2:56:47,  2.97it/s]

step:3500, train_loss:0.0815623291795955, acc:0.5597458662539708


 28%|██▊       | 12278/43738 [1:33:14<3:15:02,  2.69it/s]

step:3500, train_loss:0.08156533322874392, acc:0.5597002769180648


 28%|██▊       | 12279/43738 [1:33:15<3:15:33,  2.68it/s]

step:3500, train_loss:0.081578579233835, acc:0.5596546950077368


 28%|██▊       | 12280/43738 [1:33:15<3:13:44,  2.71it/s]

step:3500, train_loss:0.08158113063203667, acc:0.5596091205211726


 28%|██▊       | 12281/43738 [1:33:15<3:25:27,  2.55it/s]

step:3500, train_loss:0.0815845635515053, acc:0.559563553456559


 28%|██▊       | 12282/43738 [1:33:16<3:21:34,  2.60it/s]

step:3500, train_loss:0.08158618165251176, acc:0.5595179938120827


 28%|██▊       | 12283/43738 [1:33:17<4:23:52,  1.99it/s]

step:3500, train_loss:0.08158383331543671, acc:0.5595538549214362


 28%|██▊       | 12284/43738 [1:33:17<3:56:13,  2.22it/s]

step:3500, train_loss:0.08158328612066448, acc:0.5595897101921198


 28%|██▊       | 12285/43738 [1:33:17<3:57:30,  2.21it/s]

step:3500, train_loss:0.08159419304087999, acc:0.5595441595441596


 28%|██▊       | 12286/43738 [1:33:18<3:48:00,  2.30it/s]

step:3500, train_loss:0.08160350417735385, acc:0.5594986163112485


 28%|██▊       | 12287/43738 [1:33:18<4:03:35,  2.15it/s]

step:3500, train_loss:0.08160372400014755, acc:0.5595344673231871


 29%|██▉       | 12592/43738 [1:35:43<4:17:08,  2.02it/s]

step:3520, train_loss:0.08168078963092125, acc:0.5588468869123253


 29%|██▉       | 12593/43738 [1:35:43<4:17:01,  2.02it/s]

step:3520, train_loss:0.08167839317560102, acc:0.5588819185261653


 29%|██▉       | 12594/43738 [1:35:44<4:07:07,  2.10it/s]

step:3520, train_loss:0.08168044639070053, acc:0.5589169445767826


 29%|██▉       | 12595/43738 [1:35:44<3:34:03,  2.42it/s]

step:3520, train_loss:0.08167474719999102, acc:0.5589519650655022


 29%|██▉       | 12596/43738 [1:35:44<3:12:45,  2.69it/s]

step:3520, train_loss:0.08167934486419447, acc:0.5589075897110194


 29%|██▉       | 12597/43738 [1:35:45<3:37:17,  2.39it/s]

step:3520, train_loss:0.08167634507124374, acc:0.5589426053822338


 29%|██▉       | 12598/43738 [1:35:46<4:13:52,  2.04it/s]

step:3520, train_loss:0.08166989391319183, acc:0.558977615494523


 29%|██▉       | 12599/43738 [1:35:46<4:25:07,  1.96it/s]

step:3520, train_loss:0.08167222752586398, acc:0.5589332486705294


 29%|██▉       | 12600/43738 [1:35:46<3:58:36,  2.18it/s]

step:3520, train_loss:0.08166875078290191, acc:0.558968253968254


 29%|██▉       | 12601/43738 [1:35:47<3:41:01,  2.35it/s]

step:3520, train_loss:0.08167952480717999, acc:0.5589238949289739


 29%|██▉       | 12602/43738 [1:35:47<4:07:39,  2.10it/s]

step:3520, train_loss:0.08169002578728995, acc:0.5588795429296937


 29%|██▉       | 12603/43738 [1:35:48<4:27:03,  1.94it/s]

step:3520, train_loss:0.08169311191457138, acc:0.5588351979687376


 29%|██▉       | 12604/43738 [1:35:48<4:18:39,  2.01it/s]

step:3520, train_loss:0.08169129646576784, acc:0.5588701999365281


 29%|██▉       | 12605/43738 [1:35:49<4:01:15,  2.15it/s]

step:3520, train_loss:0.08168613071833619, acc:0.5589051963506545


 29%|██▉       | 12606/43738 [1:35:49<3:43:16,  2.32it/s]

step:3520, train_loss:0.08167989319381545, acc:0.5589401872124385


 29%|██▉       | 12607/43738 [1:35:50<4:04:35,  2.12it/s]

step:3520, train_loss:0.08167950765378727, acc:0.5589751725232014


 30%|██▉       | 12912/43738 [1:38:01<4:39:14,  1.84it/s]

step:3540, train_loss:0.0814734903763361, acc:0.5594795539033457


 30%|██▉       | 12913/43738 [1:38:01<4:16:24,  2.00it/s]

step:3540, train_loss:0.08147269641275852, acc:0.5594362270580036


 30%|██▉       | 12914/43738 [1:38:02<4:39:16,  1.84it/s]

step:3540, train_loss:0.08147718986968229, acc:0.5593929069227195


 30%|██▉       | 12915/43738 [1:38:02<4:24:19,  1.94it/s]

step:3540, train_loss:0.08147929732809821, acc:0.559427022841657


 30%|██▉       | 12916/43738 [1:38:02<3:54:25,  2.19it/s]

step:3540, train_loss:0.08148054400926431, acc:0.5593837101269743


 30%|██▉       | 12917/43738 [1:38:03<4:13:51,  2.02it/s]

step:3540, train_loss:0.0814765038233562, acc:0.5594178214755748


 30%|██▉       | 12918/43738 [1:38:03<4:27:17,  1.92it/s]

step:3540, train_loss:0.08148094511154975, acc:0.559374516178975


 30%|██▉       | 12919/43738 [1:38:04<3:57:13,  2.17it/s]

step:3540, train_loss:0.08147570735435429, acc:0.5594086229584333


 30%|██▉       | 12920/43738 [1:38:04<3:50:10,  2.23it/s]

step:3540, train_loss:0.08147570500365674, acc:0.5593653250773993


 30%|██▉       | 12921/43738 [1:38:05<3:48:29,  2.25it/s]

step:3540, train_loss:0.08147542257469076, acc:0.559322033898305


 30%|██▉       | 12922/43738 [1:38:05<4:14:10,  2.02it/s]

step:3540, train_loss:0.08147101634554615, acc:0.5593561368209256


 30%|██▉       | 12923/43738 [1:38:06<3:41:17,  2.32it/s]

step:3540, train_loss:0.08146761982330898, acc:0.5593902344656814


 30%|██▉       | 12924/43738 [1:38:06<4:02:37,  2.12it/s]

step:3540, train_loss:0.08146998518961698, acc:0.5594243268337976


 30%|██▉       | 12925/43738 [1:38:06<3:41:51,  2.31it/s]

step:3540, train_loss:0.08146396860205596, acc:0.5594584139264991


 30%|██▉       | 12926/43738 [1:38:07<3:45:04,  2.28it/s]

step:3540, train_loss:0.08147017954646497, acc:0.5594151322915055


 30%|██▉       | 12927/43738 [1:38:07<3:13:24,  2.66it/s]

step:3540, train_loss:0.0814650223831045, acc:0.5594492148216911


 30%|███       | 13232/43738 [1:40:31<4:09:21,  2.04it/s]

step:3560, train_loss:0.08167595837145494, acc:0.5587968561064087


 30%|███       | 13233/43738 [1:40:31<3:43:52,  2.27it/s]

step:3560, train_loss:0.08166980581815854, acc:0.5588301972341873


 30%|███       | 13234/43738 [1:40:31<3:14:42,  2.61it/s]

step:3560, train_loss:0.08167250996918221, acc:0.5587879703793259


 30%|███       | 13235/43738 [1:40:32<3:23:16,  2.50it/s]

step:3560, train_loss:0.08166724766563613, acc:0.5588213071401587


 30%|███       | 13236/43738 [1:40:32<3:32:49,  2.39it/s]

step:3560, train_loss:0.0816650732717615, acc:0.558854638863705


 30%|███       | 13237/43738 [1:40:33<3:46:51,  2.24it/s]

step:3560, train_loss:0.08166891788358864, acc:0.5588124197325678


 30%|███       | 13238/43738 [1:40:33<3:51:06,  2.20it/s]

step:3560, train_loss:0.08166353311288553, acc:0.5588457470917056


 30%|███       | 13239/43738 [1:40:34<3:42:18,  2.29it/s]

step:3560, train_loss:0.08166145156064308, acc:0.5588790694161191


 30%|███       | 13240/43738 [1:40:34<4:04:09,  2.08it/s]

step:3560, train_loss:0.08166607593653444, acc:0.5588368580060423


 30%|███       | 13241/43738 [1:40:34<3:31:54,  2.40it/s]

step:3560, train_loss:0.08166279504533712, acc:0.5588701759685825


 30%|███       | 13242/43738 [1:40:35<3:04:12,  2.76it/s]

step:3560, train_loss:0.08165813252420588, acc:0.5589034888989579


 30%|███       | 13243/43738 [1:40:35<3:25:45,  2.47it/s]

step:3560, train_loss:0.08165720612103977, acc:0.5589367967983085


 30%|███       | 13244/43738 [1:40:36<3:23:08,  2.50it/s]

step:3560, train_loss:0.08165372661513384, acc:0.5589700996677741


 30%|███       | 13245/43738 [1:40:36<3:38:33,  2.33it/s]

step:3560, train_loss:0.08165436803127532, acc:0.5590033975084938


 30%|███       | 13246/43738 [1:40:37<4:11:55,  2.02it/s]

step:3560, train_loss:0.08165453944932527, acc:0.5589611958327042


 30%|███       | 13247/43738 [1:40:37<4:00:27,  2.11it/s]

step:3560, train_loss:0.0816483967396273, acc:0.5589944893183362


 31%|███       | 13552/43738 [1:42:55<3:47:19,  2.21it/s]

step:3580, train_loss:0.08162069775999892, acc:0.5585153482880756


 31%|███       | 13553/43738 [1:42:55<3:37:07,  2.32it/s]

step:3580, train_loss:0.08161467807755524, acc:0.5585479229690843


 31%|███       | 13554/43738 [1:42:56<4:00:45,  2.09it/s]

step:3580, train_loss:0.08162796100350171, acc:0.5585067138851999


 31%|███       | 13555/43738 [1:42:56<3:26:08,  2.44it/s]

step:3580, train_loss:0.08163707865990749, acc:0.5584655108815935


 31%|███       | 13556/43738 [1:42:57<3:00:17,  2.79it/s]

step:3580, train_loss:0.08163213732114397, acc:0.5584980820300973


 31%|███       | 13557/43738 [1:42:57<3:00:25,  2.79it/s]

step:3580, train_loss:0.08162733187416982, acc:0.5585306483735339


 31%|███       | 13558/43738 [1:42:57<2:46:18,  3.02it/s]

step:3580, train_loss:0.08164072317832329, acc:0.5584894527216404


 31%|███       | 13559/43738 [1:42:57<2:46:03,  3.03it/s]

step:3580, train_loss:0.08163473868978834, acc:0.5585220148978538


 31%|███       | 13560/43738 [1:42:58<3:00:46,  2.78it/s]

step:3580, train_loss:0.0816403238443712, acc:0.5584808259587021


 31%|███       | 13561/43738 [1:42:58<3:15:49,  2.57it/s]

step:3580, train_loss:0.0816343274555908, acc:0.5585133839687338


 31%|███       | 13562/43738 [1:42:59<3:45:12,  2.23it/s]

step:3580, train_loss:0.08162879564069493, acc:0.5585459371774074


 31%|███       | 13563/43738 [1:42:59<3:38:00,  2.31it/s]

step:3580, train_loss:0.08162388332536234, acc:0.5585784855857848


 31%|███       | 13564/43738 [1:43:00<3:31:13,  2.38it/s]

step:3580, train_loss:0.08162075990335133, acc:0.5586110291949278


 31%|███       | 13565/43738 [1:43:00<3:49:46,  2.19it/s]

step:3580, train_loss:0.08162208987403197, acc:0.5585698488757833


 31%|███       | 13566/43738 [1:43:01<3:41:17,  2.27it/s]

step:3580, train_loss:0.08161645210687722, acc:0.5586023883237505


 31%|███       | 13567/43738 [1:43:01<3:23:50,  2.47it/s]

step:3580, train_loss:0.08161079519094315, acc:0.5586349229748655


 32%|███▏      | 13872/43738 [1:45:21<4:15:09,  1.95it/s]

step:3600, train_loss:0.08149563285465332, acc:0.5590397923875432


 32%|███▏      | 13873/43738 [1:45:21<4:05:10,  2.03it/s]

step:3600, train_loss:0.08149107262635137, acc:0.5590715778851005


 32%|███▏      | 13874/43738 [1:45:21<3:27:30,  2.40it/s]

step:3600, train_loss:0.08148522214164783, acc:0.5591033588006343


 32%|███▏      | 13875/43738 [1:45:22<3:27:49,  2.39it/s]

step:3600, train_loss:0.08147956115271694, acc:0.5591351351351351


 32%|███▏      | 13876/43738 [1:45:22<3:22:03,  2.46it/s]

step:3600, train_loss:0.08147979746027512, acc:0.5590948400115306


 32%|███▏      | 13877/43738 [1:45:22<3:21:19,  2.47it/s]

step:3600, train_loss:0.08147393071875769, acc:0.5591266123801975


 32%|███▏      | 13878/43738 [1:45:23<3:47:34,  2.19it/s]

step:3600, train_loss:0.08147865379744945, acc:0.5590863236777633


 32%|███▏      | 13879/43738 [1:45:23<3:27:22,  2.40it/s]

step:3600, train_loss:0.08147331367699115, acc:0.559118092081562


 32%|███▏      | 13880/43738 [1:45:24<3:27:04,  2.40it/s]

step:3600, train_loss:0.08146896855579387, acc:0.559149855907781


 32%|███▏      | 13881/43738 [1:45:25<4:30:33,  1.84it/s]

step:3600, train_loss:0.08146534036143273, acc:0.5591816151574094


 32%|███▏      | 13882/43738 [1:45:25<5:07:05,  1.62it/s]

step:3600, train_loss:0.0814727813194486, acc:0.5591413341017144


 32%|███▏      | 13883/43738 [1:45:26<4:14:43,  1.95it/s]

step:3600, train_loss:0.08146691286929124, acc:0.5591730893899013


 32%|███▏      | 13884/43738 [1:45:26<3:57:17,  2.10it/s]

step:3600, train_loss:0.08147067897145296, acc:0.5591328147507922


 32%|███▏      | 13885/43738 [1:45:27<3:52:35,  2.14it/s]

step:3600, train_loss:0.08146509291974913, acc:0.559164566078502


 32%|███▏      | 13886/43738 [1:45:27<3:54:01,  2.13it/s]

step:3600, train_loss:0.08146637711435309, acc:0.5591242978539536


 32%|███▏      | 13887/43738 [1:45:27<3:48:15,  2.18it/s]

step:3600, train_loss:0.08147142408300828, acc:0.5590840354288183


 32%|███▏      | 14192/43738 [1:47:46<3:42:10,  2.22it/s]

step:3620, train_loss:0.08149344795819742, acc:0.5587655016910936


 32%|███▏      | 14193/43738 [1:47:46<3:10:19,  2.59it/s]

step:3620, train_loss:0.08148771683730398, acc:0.5587965898682449


 32%|███▏      | 14194/43738 [1:47:46<2:45:20,  2.98it/s]

step:3620, train_loss:0.081506530627166, acc:0.5587572213611385


 32%|███▏      | 14195/43738 [1:47:47<2:54:11,  2.83it/s]

step:3620, train_loss:0.08150134381094955, acc:0.5587883057414582


 32%|███▏      | 14196/43738 [1:47:48<3:40:08,  2.24it/s]

step:3620, train_loss:0.08149683880595267, acc:0.5588193857424627


 32%|███▏      | 14197/43738 [1:47:48<3:27:28,  2.37it/s]

step:3620, train_loss:0.08149146040920532, acc:0.5588504613650771


 32%|███▏      | 14198/43738 [1:47:48<3:42:39,  2.21it/s]

step:3620, train_loss:0.08148610590464189, acc:0.5588815326102268


 32%|███▏      | 14199/43738 [1:47:49<3:41:17,  2.22it/s]

step:3620, train_loss:0.08148794287181395, acc:0.5588421719839425


 32%|███▏      | 14200/43738 [1:47:49<3:12:41,  2.55it/s]

step:3620, train_loss:0.08148611552638664, acc:0.5588028169014084


 32%|███▏      | 14201/43738 [1:47:49<3:05:14,  2.66it/s]

step:3620, train_loss:0.08149138378555301, acc:0.5587634673614534


 32%|███▏      | 14202/43738 [1:47:50<2:52:21,  2.86it/s]

step:3620, train_loss:0.0815008085882903, acc:0.5587241233629067


 32%|███▏      | 14203/43738 [1:47:50<3:07:49,  2.62it/s]

step:3620, train_loss:0.08149714835963275, acc:0.5587551925649511


 32%|███▏      | 14204/43738 [1:47:51<3:09:07,  2.60it/s]

step:3620, train_loss:0.08149411869145345, acc:0.5587862573922838


 32%|███▏      | 14205/43738 [1:47:51<2:53:46,  2.83it/s]

step:3620, train_loss:0.08149994530801756, acc:0.5587469200985569


 32%|███▏      | 14206/43738 [1:47:51<3:07:24,  2.63it/s]

step:3620, train_loss:0.08151996581763621, acc:0.5587075883429536


 32%|███▏      | 14207/43738 [1:47:52<4:07:24,  1.99it/s]

step:3620, train_loss:0.08152985148700821, acc:0.5586682621243049


 33%|███▎      | 14512/43738 [1:50:14<2:46:40,  2.92it/s]

step:3640, train_loss:0.08140799346259253, acc:0.5592613009922822


 33%|███▎      | 14513/43738 [1:50:14<2:33:42,  3.17it/s]

step:3640, train_loss:0.08140238504463293, acc:0.5592916695376559


 33%|███▎      | 14514/43738 [1:50:14<2:27:43,  3.30it/s]

step:3640, train_loss:0.08139679607574726, acc:0.559322033898305


 33%|███▎      | 14515/43738 [1:50:14<2:43:45,  2.97it/s]

step:3640, train_loss:0.08139152865024535, acc:0.5593523940750947


 33%|███▎      | 14516/43738 [1:50:15<2:49:16,  2.88it/s]

step:3640, train_loss:0.08139918442974109, acc:0.5593138605676495


 33%|███▎      | 14517/43738 [1:50:15<2:51:16,  2.84it/s]

step:3640, train_loss:0.08139683782912155, acc:0.5593442171247502


 33%|███▎      | 14518/43738 [1:50:16<2:50:53,  2.85it/s]

step:3640, train_loss:0.08139872694972787, acc:0.5593056894889104


 33%|███▎      | 14519/43738 [1:50:16<3:18:42,  2.45it/s]

step:3640, train_loss:0.08139313838196698, acc:0.5593360424271644


 33%|███▎      | 14520/43738 [1:50:16<3:05:57,  2.62it/s]

step:3640, train_loss:0.08139094006706808, acc:0.559366391184573


 33%|███▎      | 14521/43738 [1:50:17<3:22:44,  2.40it/s]

step:3640, train_loss:0.08139774571448198, acc:0.5593278699814063


 33%|███▎      | 14522/43738 [1:50:18<4:23:14,  1.85it/s]

step:3640, train_loss:0.08139753503241347, acc:0.559358215121884


 33%|███▎      | 14523/43738 [1:50:18<4:00:33,  2.02it/s]

step:3640, train_loss:0.08140886538154415, acc:0.5593196997865455


 33%|███▎      | 14524/43738 [1:50:19<4:32:26,  1.79it/s]

step:3640, train_loss:0.08140367206886247, acc:0.5593500413109336


 33%|███▎      | 14525/43738 [1:50:19<3:47:44,  2.14it/s]

step:3640, train_loss:0.08140700254152511, acc:0.5593115318416523


 33%|███▎      | 14526/43738 [1:50:20<3:54:30,  2.08it/s]

step:3640, train_loss:0.0814072459954142, acc:0.5592730276745147


 33%|███▎      | 14527/43738 [1:50:20<4:49:36,  1.68it/s]

step:3640, train_loss:0.0814178088684189, acc:0.5592345288084257


 34%|███▍      | 14832/43738 [1:52:46<4:12:03,  1.91it/s]

step:3660, train_loss:0.0813653424499928, acc:0.5591963322545846


 34%|███▍      | 14833/43738 [1:52:46<4:21:27,  1.84it/s]

step:3660, train_loss:0.0813680683828092, acc:0.5591586327782647


 34%|███▍      | 14834/43738 [1:52:47<4:09:35,  1.93it/s]

step:3660, train_loss:0.08136269043817848, acc:0.5591883510853445


 34%|███▍      | 14835/43738 [1:52:47<3:43:02,  2.16it/s]

step:3660, train_loss:0.08138207801534343, acc:0.5591506572295247


 34%|███▍      | 14836/43738 [1:52:47<3:23:03,  2.37it/s]

step:3660, train_loss:0.08137934526601555, acc:0.5591803720679428


 34%|███▍      | 14837/43738 [1:52:48<3:23:16,  2.37it/s]

step:3660, train_loss:0.08137397979604247, acc:0.5592100829008559


 34%|███▍      | 14838/43738 [1:52:48<3:02:07,  2.64it/s]

step:3660, train_loss:0.08136866802179371, acc:0.559239789729074


 34%|███▍      | 14839/43738 [1:52:49<3:14:16,  2.48it/s]

step:3660, train_loss:0.08137045228007071, acc:0.5592021025675584


 34%|███▍      | 14840/43738 [1:52:49<3:21:07,  2.39it/s]

step:3660, train_loss:0.08137144219451753, acc:0.5592318059299192


 34%|███▍      | 14841/43738 [1:52:49<2:56:32,  2.73it/s]

step:3660, train_loss:0.08136680470329753, acc:0.559261505289401


 34%|███▍      | 14842/43738 [1:52:49<2:37:41,  3.05it/s]

step:3660, train_loss:0.08136132836508676, acc:0.5592912006468131


 34%|███▍      | 14843/43738 [1:52:50<3:24:00,  2.36it/s]

step:3660, train_loss:0.0813587737035865, acc:0.5593208920029643


 34%|███▍      | 14844/43738 [1:52:50<2:59:48,  2.68it/s]

step:3660, train_loss:0.08135579866990986, acc:0.5593505793586634


 34%|███▍      | 14845/43738 [1:52:51<3:28:54,  2.31it/s]

step:3660, train_loss:0.08135654382811845, acc:0.5593802627147187


 34%|███▍      | 14846/43738 [1:52:52<4:20:13,  1.85it/s]

step:3660, train_loss:0.081353188616544, acc:0.5594099420719386


 34%|███▍      | 14847/43738 [1:52:52<4:03:39,  1.98it/s]

step:3660, train_loss:0.08135401215478448, acc:0.5594396174311309


 35%|███▍      | 15152/43738 [1:55:04<3:02:02,  2.62it/s]

step:3680, train_loss:0.08113818548216012, acc:0.5606520591341078


 35%|███▍      | 15153/43738 [1:55:04<2:47:29,  2.84it/s]

step:3680, train_loss:0.08113539558223327, acc:0.5606810532567809


 35%|███▍      | 15154/43738 [1:55:05<3:22:16,  2.36it/s]

step:3680, train_loss:0.0811301025841419, acc:0.5607100435528574


 35%|███▍      | 15155/43738 [1:55:05<3:15:21,  2.44it/s]

step:3680, train_loss:0.08113315686200512, acc:0.560673045199604


 35%|███▍      | 15156/43738 [1:55:06<2:55:05,  2.72it/s]

step:3680, train_loss:0.08113230494003615, acc:0.5606360517286884


 35%|███▍      | 15157/43738 [1:55:06<3:02:26,  2.61it/s]

step:3680, train_loss:0.08112901741146573, acc:0.5606650392557894


 35%|███▍      | 15158/43738 [1:55:07<3:59:55,  1.99it/s]

step:3680, train_loss:0.08112635857146289, acc:0.5606940229581739


 35%|███▍      | 15159/43738 [1:55:07<4:06:50,  1.93it/s]

step:3680, train_loss:0.08112115421409614, acc:0.5607230028365987


 35%|███▍      | 15160/43738 [1:55:08<3:31:10,  2.26it/s]

step:3680, train_loss:0.08111597080339492, acc:0.5607519788918206


 35%|███▍      | 15161/43738 [1:55:08<3:20:24,  2.38it/s]

step:3680, train_loss:0.08111525334245788, acc:0.560780951124596


 35%|███▍      | 15162/43738 [1:55:08<3:23:11,  2.34it/s]

step:3680, train_loss:0.08111570068785261, acc:0.5607439651760981


 35%|███▍      | 15163/43738 [1:55:09<3:25:37,  2.32it/s]

step:3680, train_loss:0.08111635806312267, acc:0.5607069841060476


 35%|███▍      | 15164/43738 [1:55:09<3:46:33,  2.10it/s]

step:3680, train_loss:0.08111401142833508, acc:0.5607359535742548


 35%|███▍      | 15165/43738 [1:55:10<3:33:32,  2.23it/s]

step:3680, train_loss:0.08110867122441077, acc:0.5607649192218925


 35%|███▍      | 15166/43738 [1:55:10<3:30:37,  2.26it/s]

step:3680, train_loss:0.08110676110621506, acc:0.5607938810497165


 35%|███▍      | 15167/43738 [1:55:11<3:35:03,  2.21it/s]

step:3680, train_loss:0.08110164762287467, acc:0.5608228390584822


 35%|███▌      | 15472/43738 [1:57:31<3:00:51,  2.60it/s]

step:3700, train_loss:0.08110441107152208, acc:0.5607549120992761


 35%|███▌      | 15473/43738 [1:57:31<3:24:03,  2.31it/s]

step:3700, train_loss:0.08111179262613018, acc:0.5607186712337621


 35%|███▌      | 15474/43738 [1:57:32<3:34:55,  2.19it/s]

step:3700, train_loss:0.08111381447579007, acc:0.5606824350523458


 35%|███▌      | 15475/43738 [1:57:33<4:17:31,  1.83it/s]

step:3700, train_loss:0.08110987416395589, acc:0.5607108239095315


 35%|███▌      | 15476/43738 [1:57:33<3:55:17,  2.00it/s]

step:3700, train_loss:0.0811107762498723, acc:0.5606745929180666


 35%|███▌      | 15477/43738 [1:57:33<3:45:32,  2.09it/s]

step:3700, train_loss:0.08110987856774761, acc:0.5606383666085158


 35%|███▌      | 15478/43738 [1:57:34<3:30:03,  2.24it/s]

step:3700, train_loss:0.08110463905086021, acc:0.5606667528104406


 35%|███▌      | 15479/43738 [1:57:34<2:59:52,  2.62it/s]

step:3700, train_loss:0.0810994025444676, acc:0.5606951353446605


 35%|███▌      | 15480/43738 [1:57:34<2:56:36,  2.67it/s]

step:3700, train_loss:0.08109417381096728, acc:0.5607235142118863


 35%|███▌      | 15481/43738 [1:57:35<2:58:37,  2.64it/s]

step:3700, train_loss:0.08109052609614531, acc:0.5607518894128286


 35%|███▌      | 15482/43738 [1:57:35<2:44:34,  2.86it/s]

step:3700, train_loss:0.08108774448950593, acc:0.5607802609481979


 35%|███▌      | 15483/43738 [1:57:36<3:25:02,  2.30it/s]

step:3700, train_loss:0.08108315231559717, acc:0.5608086288187044


 35%|███▌      | 15484/43738 [1:57:36<3:25:35,  2.29it/s]

step:3700, train_loss:0.08108286761840221, acc:0.5607724102299148


 35%|███▌      | 15485/43738 [1:57:36<3:02:42,  2.58it/s]

step:3700, train_loss:0.08107816157944489, acc:0.5608007749434937


 35%|███▌      | 15486/43738 [1:57:37<3:15:40,  2.41it/s]

step:3700, train_loss:0.08107981505378366, acc:0.560764561539455


 35%|███▌      | 15487/43738 [1:57:38<4:02:04,  1.95it/s]

step:3700, train_loss:0.08107987987024443, acc:0.5607283528120359


 36%|███▌      | 15792/43738 [1:59:59<3:17:51,  2.35it/s]

step:3720, train_loss:0.08109573584496288, acc:0.5606003039513677


 36%|███▌      | 15793/43738 [2:00:00<3:32:39,  2.19it/s]

step:3720, train_loss:0.08109126028859764, acc:0.5606281263851073


 36%|███▌      | 15794/43738 [2:00:00<3:04:52,  2.52it/s]

step:3720, train_loss:0.08108617525237842, acc:0.5606559452956819


 36%|███▌      | 15795/43738 [2:00:00<2:47:30,  2.78it/s]

step:3720, train_loss:0.08108135708751456, acc:0.5606837606837607


 36%|███▌      | 15796/43738 [2:00:00<2:42:16,  2.87it/s]

step:3720, train_loss:0.0810771145914899, acc:0.5607115725500127


 36%|███▌      | 15797/43738 [2:00:01<2:43:36,  2.85it/s]

step:3720, train_loss:0.08107243831802963, acc:0.5607393808951067


 36%|███▌      | 15798/43738 [2:00:01<2:40:02,  2.91it/s]

step:3720, train_loss:0.081072609901404, acc:0.5607671857197114


 36%|███▌      | 15799/43738 [2:00:02<2:48:24,  2.77it/s]

step:3720, train_loss:0.0810859915263743, acc:0.5607316918792329


 36%|███▌      | 15800/43738 [2:00:02<3:14:44,  2.39it/s]

step:3720, train_loss:0.08110023414227623, acc:0.5606962025316455


 36%|███▌      | 15801/43738 [2:00:02<3:14:38,  2.39it/s]

step:3720, train_loss:0.08110193663310653, acc:0.5607240048098222


 36%|███▌      | 15802/43738 [2:00:03<3:35:51,  2.16it/s]

step:3720, train_loss:0.08110473967162637, acc:0.5606885204404506


 36%|███▌      | 15803/43738 [2:00:04<3:57:13,  1.96it/s]

step:3720, train_loss:0.0811126448390574, acc:0.5606530405619187


 36%|███▌      | 15804/43738 [2:00:04<3:23:52,  2.28it/s]

step:3720, train_loss:0.08110752423995651, acc:0.5606808402935965


 36%|███▌      | 15805/43738 [2:00:05<4:11:21,  1.85it/s]

step:3720, train_loss:0.0811107626381707, acc:0.5606453653906991


 36%|███▌      | 15806/43738 [2:00:05<3:36:16,  2.15it/s]

step:3720, train_loss:0.08110919958485623, acc:0.5606098949765912


 36%|███▌      | 15807/43738 [2:00:06<3:47:04,  2.05it/s]

step:3720, train_loss:0.08110518561319954, acc:0.5606376921617006


 37%|███▋      | 16112/43738 [2:02:20<3:11:01,  2.41it/s]

step:3740, train_loss:0.08101226494257768, acc:0.5610724925521351


 37%|███▋      | 16113/43738 [2:02:21<3:08:46,  2.44it/s]

step:3740, train_loss:0.08101315004494601, acc:0.5610376714454167


 37%|███▋      | 16114/43738 [2:02:21<2:57:37,  2.59it/s]

step:3740, train_loss:0.08100921661142126, acc:0.5610649124984486


 37%|███▋      | 16115/43738 [2:02:21<2:42:25,  2.83it/s]

step:3740, train_loss:0.08102234311644138, acc:0.5610300961836798


 37%|███▋      | 16116/43738 [2:02:22<2:52:55,  2.66it/s]

step:3740, train_loss:0.0810247472717808, acc:0.5609952841896252


 37%|███▋      | 16117/43738 [2:02:22<3:17:47,  2.33it/s]

step:3740, train_loss:0.08102379828892603, acc:0.5610225228020103


 37%|███▋      | 16118/43738 [2:02:23<3:22:00,  2.28it/s]

step:3740, train_loss:0.08102161755921508, acc:0.5610497580344956


 37%|███▋      | 16119/43738 [2:02:23<3:13:19,  2.38it/s]

step:3740, train_loss:0.08103183929845734, acc:0.5610149512997085


 37%|███▋      | 16120/43738 [2:02:24<3:07:24,  2.46it/s]

step:3740, train_loss:0.08103043218681308, acc:0.5610421836228288


 37%|███▋      | 16121/43738 [2:02:24<3:35:27,  2.14it/s]

step:3740, train_loss:0.08103044744869174, acc:0.5610694125674586


 37%|███▋      | 16122/43738 [2:02:25<3:31:08,  2.18it/s]

step:3740, train_loss:0.08102758499008682, acc:0.5610966381342265


 37%|███▋      | 16123/43738 [2:02:25<3:34:07,  2.15it/s]

step:3740, train_loss:0.08102780704614901, acc:0.561123860323761


 37%|███▋      | 16125/43738 [2:02:26<2:36:50,  2.93it/s]

step:3740, train_loss:0.0810268869303277, acc:0.5610890597866535
step:3740, train_loss:0.08102196007199557, acc:0.5611162790697675


 37%|███▋      | 16126/43738 [2:02:26<2:40:10,  2.87it/s]

step:3740, train_loss:0.0810218027405537, acc:0.561081483318864


 37%|███▋      | 16127/43738 [2:02:26<2:41:02,  2.86it/s]

step:3740, train_loss:0.08101821486291017, acc:0.5611086996961617


 38%|███▊      | 16432/43738 [2:04:40<3:13:00,  2.36it/s]

step:3760, train_loss:0.08099557495730646, acc:0.5616480038948394


 38%|███▊      | 16433/43738 [2:04:40<3:06:43,  2.44it/s]

step:3760, train_loss:0.08099620454925373, acc:0.5616138258382523


 38%|███▊      | 16434/43738 [2:04:41<2:47:09,  2.72it/s]

step:3760, train_loss:0.08099136692338202, acc:0.5616405013995376


 38%|███▊      | 16435/43738 [2:04:41<2:48:58,  2.69it/s]

step:3760, train_loss:0.08098745430265912, acc:0.5616671737146334


 38%|███▊      | 16436/43738 [2:04:41<2:34:34,  2.94it/s]

step:3760, train_loss:0.08098644942980189, acc:0.5616330007301047


 38%|███▊      | 16437/43738 [2:04:42<3:36:13,  2.10it/s]

step:3760, train_loss:0.0809815638736274, acc:0.5616596702561295


 38%|███▊      | 16438/43738 [2:04:42<3:15:41,  2.33it/s]

step:3760, train_loss:0.08097685882336272, acc:0.5616863365372916


 38%|███▊      | 16439/43738 [2:04:43<3:21:45,  2.26it/s]

step:3760, train_loss:0.08097942298424746, acc:0.5616521686233956


 38%|███▊      | 16440/43738 [2:04:43<3:35:39,  2.11it/s]

step:3760, train_loss:0.08097813678600167, acc:0.5616788321167884


 38%|███▊      | 16441/43738 [2:04:44<3:14:14,  2.34it/s]

step:3760, train_loss:0.08097671723059333, acc:0.5616446688157655


 38%|███▊      | 16442/43738 [2:04:44<3:46:50,  2.01it/s]

step:3760, train_loss:0.08098417331551734, acc:0.5616105096703564


 38%|███▊      | 16443/43738 [2:04:45<3:38:17,  2.08it/s]

step:3760, train_loss:0.08097930690432374, acc:0.5616371708325731


 38%|███▊      | 16444/43738 [2:04:46<4:13:49,  1.79it/s]

step:3760, train_loss:0.08098118382510631, acc:0.5616030162977378


 38%|███▊      | 16445/43738 [2:04:46<3:59:49,  1.90it/s]

step:3760, train_loss:0.08098591713682321, acc:0.561568865916692


 38%|███▊      | 16446/43738 [2:04:46<3:39:56,  2.07it/s]

step:3760, train_loss:0.08098238306912421, acc:0.5615955247476591


 38%|███▊      | 16447/43738 [2:04:47<3:05:34,  2.45it/s]

step:3760, train_loss:0.08097850228721389, acc:0.5616221803368395


 38%|███▊      | 16752/43738 [2:07:05<3:49:06,  1.96it/s]

step:3780, train_loss:0.08085674827942678, acc:0.5622015281757402


 38%|███▊      | 16753/43738 [2:07:06<4:25:14,  1.70it/s]

step:3780, train_loss:0.08086312800036796, acc:0.562167969915836


 38%|███▊      | 16754/43738 [2:07:06<4:34:53,  1.64it/s]

step:3780, train_loss:0.08086129252074571, acc:0.5621344156619315


 38%|███▊      | 16755/43738 [2:07:07<3:46:55,  1.98it/s]

step:3780, train_loss:0.0808565210386562, acc:0.5621605490898239


 38%|███▊      | 16756/43738 [2:07:07<3:35:40,  2.09it/s]

step:3780, train_loss:0.08085873475059757, acc:0.5621269992838386


 38%|███▊      | 16757/43738 [2:07:07<3:08:29,  2.39it/s]

step:3780, train_loss:0.08085391027594038, acc:0.5621531300352092


 38%|███▊      | 16758/43738 [2:07:07<2:46:19,  2.70it/s]

step:3780, train_loss:0.08085396173637002, acc:0.5621195846759757


 38%|███▊      | 16759/43738 [2:07:08<2:41:09,  2.79it/s]

step:3780, train_loss:0.08085155069174763, acc:0.5621457127513575


 38%|███▊      | 16760/43738 [2:07:08<2:55:46,  2.56it/s]

step:3780, train_loss:0.08084676935302615, acc:0.5621718377088305


 38%|███▊      | 16761/43738 [2:07:09<3:12:25,  2.34it/s]

step:3780, train_loss:0.08085971920725114, acc:0.562138297237635


 38%|███▊      | 16762/43738 [2:07:09<3:13:51,  2.32it/s]

step:3780, train_loss:0.08086399793416217, acc:0.5621047607684048


 38%|███▊      | 16763/43738 [2:07:10<3:09:41,  2.37it/s]

step:3780, train_loss:0.08086526959351607, acc:0.5620712283004236


 38%|███▊      | 16764/43738 [2:07:10<3:28:15,  2.16it/s]

step:3780, train_loss:0.08086231544508311, acc:0.5620973514674302


 38%|███▊      | 16765/43738 [2:07:10<2:59:35,  2.50it/s]

step:3780, train_loss:0.08085770508346792, acc:0.5621234715180435


 38%|███▊      | 16766/43738 [2:07:11<3:07:22,  2.40it/s]

step:3780, train_loss:0.0808615515128178, acc:0.5620899439341525


 38%|███▊      | 16767/43738 [2:07:12<3:45:43,  1.99it/s]

step:3780, train_loss:0.08086745090016371, acc:0.562056420349496


 39%|███▉      | 17072/43738 [2:09:23<3:36:17,  2.05it/s]

step:3800, train_loss:0.08079269850664803, acc:0.5619728209934396


 39%|███▉      | 17073/43738 [2:09:23<3:21:17,  2.21it/s]

step:3800, train_loss:0.08079068121495922, acc:0.5619984771276284


 39%|███▉      | 17074/43738 [2:09:24<3:19:43,  2.23it/s]

step:3800, train_loss:0.08079199356500982, acc:0.5620241302565304


 39%|███▉      | 17075/43738 [2:09:24<3:25:57,  2.16it/s]

step:3800, train_loss:0.08078726299227751, acc:0.5620497803806735


 39%|███▉      | 17076/43738 [2:09:24<2:52:21,  2.58it/s]

step:3800, train_loss:0.08078254875176827, acc:0.5620754275005856


 39%|███▉      | 17077/43738 [2:09:25<2:55:09,  2.54it/s]

step:3800, train_loss:0.08077969308992321, acc:0.5621010716167946


 39%|███▉      | 17078/43738 [2:09:25<2:40:03,  2.78it/s]

step:3800, train_loss:0.08077812065362447, acc:0.5621267127298278


 39%|███▉      | 17079/43738 [2:09:25<2:45:04,  2.69it/s]

step:3800, train_loss:0.08077507266083248, acc:0.5621523508402131


 39%|███▉      | 17080/43738 [2:09:26<2:56:53,  2.51it/s]

step:3800, train_loss:0.08077485469264382, acc:0.5621194379391101


 39%|███▉      | 17081/43738 [2:09:26<2:51:03,  2.60it/s]

step:3800, train_loss:0.0807721787466895, acc:0.56214507347345


 39%|███▉      | 17082/43738 [2:09:27<2:51:39,  2.59it/s]

step:3800, train_loss:0.08076830879059477, acc:0.5621707060063225


 39%|███▉      | 17083/43738 [2:09:27<2:59:50,  2.47it/s]

step:3800, train_loss:0.08077086983863685, acc:0.562137797810689


 39%|███▉      | 17084/43738 [2:09:28<3:35:57,  2.06it/s]

step:3800, train_loss:0.08076907813683175, acc:0.5621634277686725


 39%|███▉      | 17085/43738 [2:09:28<3:30:00,  2.12it/s]

step:3800, train_loss:0.08077330288851597, acc:0.5621305238513316


 39%|███▉      | 17086/43738 [2:09:28<3:13:12,  2.30it/s]

step:3800, train_loss:0.08077575242836293, acc:0.5620976237855554


 39%|███▉      | 17087/43738 [2:09:29<2:49:31,  2.62it/s]

step:3800, train_loss:0.08077260834092181, acc:0.5621232515947796


 40%|███▉      | 17392/43738 [2:11:46<3:48:57,  1.92it/s]

step:3820, train_loss:0.08076912039014852, acc:0.561982520699172


 40%|███▉      | 17393/43738 [2:11:46<3:23:37,  2.16it/s]

step:3820, train_loss:0.08077518598443384, acc:0.5619502098545391


 40%|███▉      | 17394/43738 [2:11:47<2:58:10,  2.46it/s]

step:3820, train_loss:0.08077057736302248, acc:0.5619753938139589


 40%|███▉      | 17395/43738 [2:11:47<2:53:17,  2.53it/s]

step:3820, train_loss:0.08077292214388006, acc:0.5620005748778385


 40%|███▉      | 17396/43738 [2:11:48<3:13:10,  2.27it/s]

step:3820, train_loss:0.08078193521432075, acc:0.5619682685674868


 40%|███▉      | 17397/43738 [2:11:48<3:18:30,  2.21it/s]

step:3820, train_loss:0.08078114146568349, acc:0.5619934471460597


 40%|███▉      | 17398/43738 [2:11:49<3:16:07,  2.24it/s]

step:3820, train_loss:0.08077712826158889, acc:0.5620186228302104


 40%|███▉      | 17399/43738 [2:11:49<3:12:10,  2.28it/s]

step:3820, train_loss:0.08077453222835997, acc:0.5620437956204379


 40%|███▉      | 17400/43738 [2:11:49<2:49:42,  2.59it/s]

step:3820, train_loss:0.08077134885467273, acc:0.5620689655172414


 40%|███▉      | 17401/43738 [2:11:50<2:46:05,  2.64it/s]

step:3820, train_loss:0.08076671130137442, acc:0.5620941325211195


 40%|███▉      | 17402/43738 [2:11:50<2:47:47,  2.62it/s]

step:3820, train_loss:0.0807639928192713, acc:0.562119296632571


 40%|███▉      | 17403/43738 [2:11:51<3:14:20,  2.26it/s]

step:3820, train_loss:0.08076045142314721, acc:0.5621444578520944


 40%|███▉      | 17404/43738 [2:11:51<3:34:23,  2.05it/s]

step:3820, train_loss:0.08075673987802168, acc:0.5621696161801885


 40%|███▉      | 17405/43738 [2:11:52<3:58:03,  1.84it/s]

step:3820, train_loss:0.08075548157527859, acc:0.5621373168629704


 40%|███▉      | 17406/43738 [2:11:52<4:07:47,  1.77it/s]

step:3820, train_loss:0.08075439600982197, acc:0.5621050212570378


 40%|███▉      | 17407/43738 [2:11:53<3:35:42,  2.03it/s]

step:3820, train_loss:0.08075139205764377, acc:0.5621301775147929


 40%|████      | 17712/43738 [2:14:14<3:39:54,  1.97it/s]

step:3840, train_loss:0.08070043331702813, acc:0.5622741644083108


 40%|████      | 17713/43738 [2:14:15<3:35:59,  2.01it/s]

step:3840, train_loss:0.08069785164865878, acc:0.5622988765313611


 41%|████      | 17714/43738 [2:14:15<3:01:36,  2.39it/s]

step:3840, train_loss:0.08069352498546552, acc:0.5623235858642881


 41%|████      | 17715/43738 [2:14:15<2:39:42,  2.72it/s]

step:3840, train_loss:0.08069979856292145, acc:0.5622918430708439


 41%|████      | 17716/43738 [2:14:15<2:49:22,  2.56it/s]

step:3840, train_loss:0.08071186854234236, acc:0.5622601038609167


 41%|████      | 17717/43738 [2:14:16<2:40:23,  2.70it/s]

step:3840, train_loss:0.08070754711365992, acc:0.5622848111982841


 41%|████      | 17718/43738 [2:14:16<3:09:31,  2.29it/s]

step:3840, train_loss:0.08070373437865551, acc:0.5623095157466983


 41%|████      | 17719/43738 [2:14:17<3:10:40,  2.27it/s]

step:3840, train_loss:0.08069971716222622, acc:0.5623342175066313


 41%|████      | 17720/43738 [2:14:17<3:09:01,  2.29it/s]

step:3840, train_loss:0.08069767021498792, acc:0.5623589164785553


 41%|████      | 17721/43738 [2:14:18<3:31:34,  2.05it/s]

step:3840, train_loss:0.0806974595655139, acc:0.5623836126629422


 41%|████      | 17722/43738 [2:14:18<3:23:32,  2.13it/s]

step:3840, train_loss:0.08070127630360535, acc:0.5623518790204266


 41%|████      | 17723/43738 [2:14:19<3:19:42,  2.17it/s]

step:3840, train_loss:0.0807032527510018, acc:0.5623201489589799


 41%|████      | 17724/43738 [2:14:19<3:10:59,  2.27it/s]

step:3840, train_loss:0.08070283213486107, acc:0.5622884224779959


 41%|████      | 17725/43738 [2:14:20<3:24:45,  2.12it/s]

step:3840, train_loss:0.08069855320115661, acc:0.5623131170662905


 41%|████      | 17726/43738 [2:14:20<3:31:52,  2.05it/s]

step:3840, train_loss:0.08069616243212928, acc:0.562337808868329


 41%|████      | 17727/43738 [2:14:21<3:49:42,  1.89it/s]

step:3840, train_loss:0.0806929808650118, acc:0.5623624978845828


 41%|████      | 18032/43738 [2:16:39<3:11:28,  2.24it/s]

step:3860, train_loss:0.08076074704228269, acc:0.5623336291038155


 41%|████      | 18033/43738 [2:16:40<3:20:58,  2.13it/s]

step:3860, train_loss:0.08076412800859134, acc:0.562302445516553


 41%|████      | 18034/43738 [2:16:40<3:02:34,  2.35it/s]

step:3860, train_loss:0.08075965759918535, acc:0.5623267162027282


 41%|████      | 18035/43738 [2:16:40<3:03:56,  2.33it/s]

step:3860, train_loss:0.08076085395597067, acc:0.5622955364568893


 41%|████      | 18036/43738 [2:16:41<3:05:53,  2.30it/s]

step:3860, train_loss:0.08075679701989431, acc:0.562319804834775


 41%|████      | 18037/43738 [2:16:41<3:01:38,  2.36it/s]

step:3860, train_loss:0.08075372228675962, acc:0.5623440705217054


 41%|████      | 18038/43738 [2:16:42<3:09:01,  2.27it/s]

step:3860, train_loss:0.0807542354874711, acc:0.5623128949994456


 41%|████      | 18039/43738 [2:16:42<2:44:26,  2.60it/s]

step:3860, train_loss:0.08074975944843163, acc:0.5623371583790676


 41%|████      | 18040/43738 [2:16:42<2:41:32,  2.65it/s]

step:3860, train_loss:0.08074781524657995, acc:0.5623614190687362


 41%|████      | 18041/43738 [2:16:43<2:54:41,  2.45it/s]

step:3860, train_loss:0.0807434604533349, acc:0.5623856770688986


 41%|████▏     | 18042/43738 [2:16:43<2:43:54,  2.61it/s]

step:3860, train_loss:0.08074570880135089, acc:0.5623545061523113


 41%|████▏     | 18043/43738 [2:16:44<2:40:28,  2.67it/s]

step:3860, train_loss:0.08074303524679421, acc:0.5623787618466995


 41%|████▏     | 18044/43738 [2:16:44<2:35:03,  2.76it/s]

step:3860, train_loss:0.08075004569861151, acc:0.5623475947683441


 41%|████▏     | 18045/43738 [2:16:44<2:51:29,  2.50it/s]

step:3860, train_loss:0.0807529780011891, acc:0.5623164311443614


 41%|████▏     | 18046/43738 [2:16:45<2:49:13,  2.53it/s]

step:3860, train_loss:0.08075948859264265, acc:0.5622852709741771


 41%|████▏     | 18047/43738 [2:16:45<2:47:44,  2.55it/s]

step:3860, train_loss:0.08075839456194098, acc:0.5623095251288303


 42%|████▏     | 18352/43738 [2:19:03<2:22:30,  2.97it/s]

step:3880, train_loss:0.0807678305103456, acc:0.5616826503923278


 42%|████▏     | 18353/43738 [2:19:04<2:44:29,  2.57it/s]

step:3880, train_loss:0.0807715062221081, acc:0.5616520459870321


 42%|████▏     | 18354/43738 [2:19:04<3:00:20,  2.35it/s]

step:3880, train_loss:0.08077570053605465, acc:0.5616214449166395


 42%|████▏     | 18355/43738 [2:19:05<2:32:09,  2.78it/s]

step:3880, train_loss:0.08077130210010634, acc:0.5616453282484337


 42%|████▏     | 18356/43738 [2:19:05<2:36:40,  2.70it/s]

step:3880, train_loss:0.08077599668648315, acc:0.5616147308781869


 42%|████▏     | 18357/43738 [2:19:05<2:21:26,  2.99it/s]

step:3880, train_loss:0.0807718224842944, acc:0.5616386119736341


 42%|████▏     | 18358/43738 [2:19:06<2:31:36,  2.79it/s]

step:3880, train_loss:0.08076790845522094, acc:0.5616624904673712


 42%|████▏     | 18359/43738 [2:19:06<3:12:37,  2.20it/s]

step:3880, train_loss:0.08076499489828508, acc:0.5616863663598235


 42%|████▏     | 18360/43738 [2:19:07<3:27:07,  2.04it/s]

step:3880, train_loss:0.0807629603126798, acc:0.5617102396514161


 42%|████▏     | 18361/43738 [2:19:07<3:10:14,  2.22it/s]

step:3880, train_loss:0.08075919101155311, acc:0.5617341103425739


 42%|████▏     | 18362/43738 [2:19:08<3:44:09,  1.89it/s]

step:3880, train_loss:0.08076394431758697, acc:0.5617035181352794


 42%|████▏     | 18363/43738 [2:19:09<3:51:41,  1.83it/s]

step:3880, train_loss:0.0807657088489764, acc:0.5616729292599248


 42%|████▏     | 18364/43738 [2:19:09<3:34:47,  1.97it/s]

step:3880, train_loss:0.08076185130805585, acc:0.5616967980832063


 42%|████▏     | 18365/43738 [2:19:09<3:08:37,  2.24it/s]

step:3880, train_loss:0.08075745536156811, acc:0.5617206643071059


 42%|████▏     | 18366/43738 [2:19:10<3:47:49,  1.86it/s]

step:3880, train_loss:0.08075744921839781, acc:0.5617445279320483


 42%|████▏     | 18367/43738 [2:19:10<3:30:11,  2.01it/s]

step:3880, train_loss:0.08075429972068583, acc:0.5617683889584582


 43%|████▎     | 18672/43738 [2:21:30<2:44:03,  2.55it/s]

step:3900, train_loss:0.0808402918821632, acc:0.5612146529562982


 43%|████▎     | 18674/43738 [2:21:31<2:05:04,  3.34it/s]

step:3900, train_loss:0.08083756584854544, acc:0.5612381513415091
step:3900, train_loss:0.08083453274989084, acc:0.5612616472100246


 43%|████▎     | 18675/43738 [2:21:31<1:55:54,  3.60it/s]

step:3900, train_loss:0.08083128147088416, acc:0.561285140562249


 43%|████▎     | 18676/43738 [2:21:31<1:53:26,  3.68it/s]

step:3900, train_loss:0.08083080863470275, acc:0.5612550867423431


 43%|████▎     | 18677/43738 [2:21:32<2:35:37,  2.68it/s]

step:3900, train_loss:0.0808395587671778, acc:0.5612250361407078


 43%|████▎     | 18678/43738 [2:21:32<2:48:46,  2.47it/s]

step:3900, train_loss:0.08083704573540258, acc:0.5612485276796231


 43%|████▎     | 18679/43738 [2:21:33<3:35:38,  1.94it/s]

step:3900, train_loss:0.08083667077857525, acc:0.5612720167032497


 43%|████▎     | 18680/43738 [2:21:33<3:01:53,  2.30it/s]

step:3900, train_loss:0.08083245022557312, acc:0.5612955032119914


 43%|████▎     | 18681/43738 [2:21:34<3:05:17,  2.25it/s]

step:3900, train_loss:0.08083090010006709, acc:0.5613189872062524


 43%|████▎     | 18682/43738 [2:21:34<3:08:53,  2.21it/s]

step:3900, train_loss:0.08082659491684178, acc:0.5613424686864361


 43%|████▎     | 18683/43738 [2:21:34<2:44:04,  2.55it/s]

step:3900, train_loss:0.08082857779300599, acc:0.5613124230583953


 43%|████▎     | 18684/43738 [2:21:35<2:37:12,  2.66it/s]

step:3900, train_loss:0.08082704810153156, acc:0.5612823806465425


 43%|████▎     | 18685/43738 [2:21:35<3:20:09,  2.09it/s]

step:3900, train_loss:0.08082674147345247, acc:0.5612523414503613


 43%|████▎     | 18686/43738 [2:21:36<3:01:20,  2.30it/s]

step:3900, train_loss:0.08082695816963688, acc:0.5612223054693354


 43%|████▎     | 18687/43738 [2:21:36<3:02:01,  2.29it/s]

step:3900, train_loss:0.08082519962581358, acc:0.5611922727029486


 43%|████▎     | 18992/43738 [2:23:54<3:38:25,  1.89it/s]

step:3920, train_loss:0.0808051334145824, acc:0.5614995787700084


 43%|████▎     | 18993/43738 [2:23:55<3:26:30,  2.00it/s]

step:3920, train_loss:0.0808055502197387, acc:0.5614700152687833


 43%|████▎     | 18994/43738 [2:23:55<2:55:18,  2.35it/s]

step:3920, train_loss:0.0808039335155875, acc:0.5614931030851847


 43%|████▎     | 18995/43738 [2:23:55<2:54:27,  2.36it/s]

step:3920, train_loss:0.08080736047264814, acc:0.5614635430376415


 43%|████▎     | 18996/43738 [2:23:56<2:56:59,  2.33it/s]

step:3920, train_loss:0.08080812777642189, acc:0.5614339861023373


 43%|████▎     | 18997/43738 [2:23:56<3:07:05,  2.20it/s]

step:3920, train_loss:0.08080838025163725, acc:0.5614570721692899


 43%|████▎     | 18998/43738 [2:23:56<2:41:19,  2.56it/s]

step:3920, train_loss:0.08080491766854367, acc:0.5614801558058743


 43%|████▎     | 18999/43738 [2:23:57<2:37:33,  2.62it/s]

step:3920, train_loss:0.08080129879913191, acc:0.5615032370124743


 43%|████▎     | 19000/43738 [2:23:57<2:45:06,  2.50it/s]

step:3920, train_loss:0.08080036935930165, acc:0.5615263157894737


 43%|████▎     | 19001/43738 [2:23:58<2:34:28,  2.67it/s]

step:3920, train_loss:0.08080128418363111, acc:0.5614967633282458


 43%|████▎     | 19002/43738 [2:23:58<2:34:20,  2.67it/s]

step:3920, train_loss:0.08079907085194825, acc:0.5615198400168403


 43%|████▎     | 19003/43738 [2:23:58<2:32:07,  2.71it/s]

step:3920, train_loss:0.08079482449984228, acc:0.5615429142766931


 43%|████▎     | 19004/43738 [2:23:59<2:26:15,  2.82it/s]

step:3920, train_loss:0.08079306350517904, acc:0.5615659861081878


 43%|████▎     | 19005/43738 [2:23:59<2:30:54,  2.73it/s]

step:3920, train_loss:0.0807922356429928, acc:0.5615890555117075


 43%|████▎     | 19006/43738 [2:24:00<3:20:49,  2.05it/s]

step:3920, train_loss:0.08078920459725024, acc:0.5616121224876355


 43%|████▎     | 19007/43738 [2:24:00<3:00:29,  2.28it/s]

step:3920, train_loss:0.08078833337460625, acc:0.5616351870363551


 44%|████▍     | 19312/43738 [2:26:19<3:43:49,  1.82it/s]

step:3940, train_loss:0.08083741968586151, acc:0.5615679370339686


 44%|████▍     | 19313/43738 [2:26:20<3:16:04,  2.08it/s]

step:3940, train_loss:0.08083471048346695, acc:0.561590638430073


 44%|████▍     | 19314/43738 [2:26:20<2:51:01,  2.38it/s]

step:3940, train_loss:0.08083301975285215, acc:0.5615615615615616


 44%|████▍     | 19315/43738 [2:26:20<2:42:39,  2.50it/s]

step:3940, train_loss:0.08082900339027652, acc:0.5615842609370956


 44%|████▍     | 19316/43738 [2:26:21<2:41:24,  2.52it/s]

step:3940, train_loss:0.08083660378586766, acc:0.5615551874094016


 44%|████▍     | 19317/43738 [2:26:21<2:20:50,  2.89it/s]

step:3940, train_loss:0.08083356440158486, acc:0.561577884764715


 44%|████▍     | 19318/43738 [2:26:21<2:24:38,  2.81it/s]

step:3940, train_loss:0.08083129698898492, acc:0.5616005797701625


 44%|████▍     | 19319/43738 [2:26:22<2:47:28,  2.43it/s]

step:3940, train_loss:0.08083562559349573, acc:0.5615715099125214


 44%|████▍     | 19320/43738 [2:26:22<2:26:14,  2.78it/s]

step:3940, train_loss:0.08083196743901826, acc:0.5615942028985508


 44%|████▍     | 19321/43738 [2:26:22<2:47:20,  2.43it/s]

step:3940, train_loss:0.08083101622032798, acc:0.5616168935355312


 44%|████▍     | 19322/43738 [2:26:23<3:02:06,  2.23it/s]

step:3940, train_loss:0.0808303174994083, acc:0.5616395818238278


 44%|████▍     | 19323/43738 [2:26:23<2:35:33,  2.62it/s]

step:3940, train_loss:0.0808265466825774, acc:0.5616622677638048


 44%|████▍     | 19324/43738 [2:26:23<2:14:07,  3.03it/s]

step:3940, train_loss:0.08082300179069242, acc:0.561684951355827


 44%|████▍     | 19325/43738 [2:26:24<2:07:31,  3.19it/s]

step:3940, train_loss:0.0808209418485129, acc:0.5617076326002587


 44%|████▍     | 19326/43738 [2:26:24<2:26:32,  2.78it/s]

step:3940, train_loss:0.08081810636973248, acc:0.5617303114974646


 44%|████▍     | 19327/43738 [2:26:25<2:49:29,  2.40it/s]

step:3940, train_loss:0.08082400522086597, acc:0.5617012469602111


 45%|████▍     | 19632/43738 [2:28:40<3:02:07,  2.21it/s]

step:3960, train_loss:0.08072476581504757, acc:0.5624490627546862


 45%|████▍     | 19633/43738 [2:28:41<2:48:05,  2.39it/s]

step:3960, train_loss:0.08072807438034302, acc:0.5624204146080579


 45%|████▍     | 19634/43738 [2:28:41<2:41:15,  2.49it/s]

step:3960, train_loss:0.08072697910942755, acc:0.5624427014362839


 45%|████▍     | 19635/43738 [2:28:42<2:46:46,  2.41it/s]

step:3960, train_loss:0.08072822535279968, acc:0.5624140565317036


 45%|████▍     | 19636/43738 [2:28:42<2:54:45,  2.30it/s]

step:3960, train_loss:0.0807249245441183, acc:0.5624363414137299


 45%|████▍     | 19637/43738 [2:28:42<2:47:54,  2.39it/s]

step:3960, train_loss:0.08072802264784103, acc:0.562407699750471


 45%|████▍     | 19638/43738 [2:28:43<2:33:35,  2.62it/s]

step:3960, train_loss:0.0807265101944782, acc:0.5624299826866279


 45%|████▍     | 19639/43738 [2:28:43<3:16:23,  2.05it/s]

step:3960, train_loss:0.08072993536026535, acc:0.5624013442639646


 45%|████▍     | 19640/43738 [2:28:44<3:42:56,  1.80it/s]

step:3960, train_loss:0.08073592555024324, acc:0.5623727087576375


 45%|████▍     | 19641/43738 [2:28:44<3:15:19,  2.06it/s]

step:3960, train_loss:0.08073717973339853, acc:0.5623440761672013


 45%|████▍     | 19642/43738 [2:28:45<3:50:35,  1.74it/s]

step:3960, train_loss:0.0807369148972239, acc:0.5623663578047042


 45%|████▍     | 19643/43738 [2:28:46<4:15:02,  1.57it/s]

step:3960, train_loss:0.08074009839407494, acc:0.562337728452884


 45%|████▍     | 19644/43738 [2:28:46<3:29:09,  1.92it/s]

step:3960, train_loss:0.08074203269793855, acc:0.5623091020158827


 45%|████▍     | 19645/43738 [2:28:47<2:57:17,  2.27it/s]

step:3960, train_loss:0.08074289927415888, acc:0.5622804784932552


 45%|████▍     | 19646/43738 [2:28:47<3:14:55,  2.06it/s]

step:3960, train_loss:0.08074391698664683, acc:0.5623027588313143


 45%|████▍     | 19647/43738 [2:28:47<2:49:04,  2.37it/s]

step:3960, train_loss:0.08074357742679154, acc:0.5622741385453249


 46%|████▌     | 19952/43738 [2:31:13<3:23:24,  1.95it/s]

step:3980, train_loss:0.08075892483981749, acc:0.5619486768243785


 46%|████▌     | 19953/43738 [2:31:14<2:50:24,  2.33it/s]

step:3980, train_loss:0.08075531105639973, acc:0.5619706309828096


 46%|████▌     | 19954/43738 [2:31:14<3:33:31,  1.86it/s]

step:3980, train_loss:0.08075193424358834, acc:0.5619925829407637


 46%|████▌     | 19955/43738 [2:31:15<4:00:50,  1.65it/s]

step:3980, train_loss:0.08074820932175075, acc:0.5620145326985718


 46%|████▌     | 19956/43738 [2:31:15<3:31:55,  1.87it/s]

step:3980, train_loss:0.0807493987902934, acc:0.5619863700140308


 46%|████▌     | 19957/43738 [2:31:16<2:55:48,  2.25it/s]

step:3980, train_loss:0.08074537145119404, acc:0.5620083178834494


 46%|████▌     | 19958/43738 [2:31:16<3:04:22,  2.15it/s]

step:3980, train_loss:0.08074786328540502, acc:0.5619801583324983


 46%|████▌     | 19959/43738 [2:31:17<2:54:10,  2.28it/s]

step:3980, train_loss:0.08074403588913412, acc:0.5620021043138433


 46%|████▌     | 19960/43738 [2:31:17<2:39:52,  2.48it/s]

step:3980, train_loss:0.0807438432777971, acc:0.5619739478957916


 46%|████▌     | 19961/43738 [2:31:18<3:13:06,  2.05it/s]

step:3980, train_loss:0.08074567425165405, acc:0.5619457942988828


 46%|████▌     | 19962/43738 [2:31:18<3:36:15,  1.83it/s]

step:3980, train_loss:0.08074363652441341, acc:0.5619677387035367


 46%|████▌     | 19963/43738 [2:31:19<3:30:40,  1.88it/s]

step:3980, train_loss:0.08074001970160848, acc:0.5619896809096829


 46%|████▌     | 19964/43738 [2:31:19<3:43:36,  1.77it/s]

step:3980, train_loss:0.08074116781838761, acc:0.5619615307553596


 46%|████▌     | 19965/43738 [2:31:20<3:24:34,  1.94it/s]

step:3980, train_loss:0.08073936274493612, acc:0.5619834710743802


 46%|████▌     | 19966/43738 [2:31:20<3:09:05,  2.10it/s]

step:3980, train_loss:0.08074020857546514, acc:0.5619553240508866


 46%|████▌     | 19967/43738 [2:31:21<3:09:58,  2.09it/s]

step:3980, train_loss:0.08073830429133809, acc:0.5619772624830971


 46%|████▋     | 20272/43738 [2:33:40<4:04:33,  1.60it/s]

step:4000, train_loss:0.08062782730346511, acc:0.5625


 46%|████▋     | 20273/43738 [2:33:41<4:21:45,  1.49it/s]

step:4000, train_loss:0.08063118549076366, acc:0.5624722537364968


 46%|████▋     | 20274/43738 [2:33:42<4:38:55,  1.40it/s]

step:4000, train_loss:0.08063695186731439, acc:0.5624445102101213


 46%|████▋     | 20275/43738 [2:33:42<4:01:48,  1.62it/s]

step:4000, train_loss:0.08063297736622121, acc:0.562466091245376


 46%|████▋     | 20276/43738 [2:33:43<4:24:06,  1.48it/s]

step:4000, train_loss:0.08063323481103743, acc:0.5624876701519037


 46%|████▋     | 20277/43738 [2:33:44<3:57:09,  1.65it/s]

step:4000, train_loss:0.08063473806108687, acc:0.5624599299699167


 46%|████▋     | 20278/43738 [2:33:44<3:56:05,  1.66it/s]

step:4000, train_loss:0.08063562893553287, acc:0.5624815070519775


 46%|████▋     | 20279/43738 [2:33:45<3:24:58,  1.91it/s]

step:4000, train_loss:0.08063240722119726, acc:0.5625030820060161


 46%|████▋     | 20280/43738 [2:33:45<3:09:00,  2.07it/s]

step:4000, train_loss:0.08062912449961104, acc:0.5625246548323471


 46%|████▋     | 20281/43738 [2:33:46<3:22:21,  1.93it/s]

step:4000, train_loss:0.08062567341944307, acc:0.5625462255312854


 46%|████▋     | 20282/43738 [2:33:46<2:59:26,  2.18it/s]

step:4000, train_loss:0.08062203697233887, acc:0.5625677941031456


 46%|████▋     | 20283/43738 [2:33:46<2:50:20,  2.29it/s]

step:4000, train_loss:0.08061977295672862, acc:0.5625893605482424


 46%|████▋     | 20284/43738 [2:33:46<2:24:50,  2.70it/s]

step:4000, train_loss:0.08061943472528874, acc:0.5625616249260501


 46%|████▋     | 20285/43738 [2:33:47<2:22:10,  2.75it/s]

step:4000, train_loss:0.08061871193362562, acc:0.5625831895489278


 46%|████▋     | 20286/43738 [2:33:48<3:11:44,  2.04it/s]

step:4000, train_loss:0.08061724144245525, acc:0.5626047520457458


 46%|████▋     | 20287/43738 [2:33:48<2:44:04,  2.38it/s]

step:4000, train_loss:0.08061577765935324, acc:0.5626263124168186


 47%|████▋     | 20592/43738 [2:36:06<3:37:26,  1.77it/s]

step:4020, train_loss:0.08055117413767923, acc:0.5625


 47%|████▋     | 20593/43738 [2:36:07<3:12:33,  2.00it/s]

step:4020, train_loss:0.08055077897702492, acc:0.5625212450832807


 47%|████▋     | 20594/43738 [2:36:07<2:59:57,  2.14it/s]

step:4020, train_loss:0.08055038175965551, acc:0.5624939302709527


 47%|████▋     | 20595/43738 [2:36:08<3:08:56,  2.04it/s]

step:4020, train_loss:0.08055275307335054, acc:0.562466618111192


 47%|████▋     | 20596/43738 [2:36:08<3:24:06,  1.89it/s]

step:4020, train_loss:0.08054893595833547, acc:0.5624878617207225


 47%|████▋     | 20597/43738 [2:36:09<2:55:07,  2.20it/s]

step:4020, train_loss:0.0805453324300006, acc:0.5625091032674662


 47%|████▋     | 20598/43738 [2:36:09<3:13:44,  1.99it/s]

step:4020, train_loss:0.08054204817605022, acc:0.5625303427517234


 47%|████▋     | 20599/43738 [2:36:10<3:00:58,  2.13it/s]

step:4020, train_loss:0.08054516271307381, acc:0.5625030341278703


 47%|████▋     | 20600/43738 [2:36:10<3:17:31,  1.95it/s]

step:4020, train_loss:0.08054511628954025, acc:0.5624757281553399


 47%|████▋     | 20601/43738 [2:36:11<2:50:18,  2.26it/s]

step:4020, train_loss:0.0805412367527625, acc:0.562496966166691


 47%|████▋     | 20602/43738 [2:36:11<3:01:40,  2.12it/s]

step:4020, train_loss:0.0805416885939109, acc:0.562469663139501


 47%|████▋     | 20603/43738 [2:36:12<2:56:37,  2.18it/s]

step:4020, train_loss:0.0805422006854632, acc:0.5624423627627044


 47%|████▋     | 20604/43738 [2:36:12<3:25:47,  1.87it/s]

step:4020, train_loss:0.08054637037910617, acc:0.5624150650359153


 47%|████▋     | 20605/43738 [2:36:13<2:57:12,  2.18it/s]

step:4020, train_loss:0.08054255261353166, acc:0.5624363018684785


 47%|████▋     | 20606/43738 [2:36:13<2:50:00,  2.27it/s]

step:4020, train_loss:0.08053941325776891, acc:0.5624575366398137


 47%|████▋     | 20607/43738 [2:36:13<2:31:16,  2.55it/s]

step:4020, train_loss:0.08053850605636784, acc:0.5624302421507255


 48%|████▊     | 20912/43738 [2:38:34<2:46:01,  2.29it/s]

step:4040, train_loss:0.08062774113793218, acc:0.5621174445294568


 48%|████▊     | 20913/43738 [2:38:34<2:57:04,  2.15it/s]

step:4040, train_loss:0.08062568320219966, acc:0.5621383828240807


 48%|████▊     | 20914/43738 [2:38:35<2:52:17,  2.21it/s]

step:4040, train_loss:0.08063359841060071, acc:0.5621115042555226


 48%|████▊     | 20915/43738 [2:38:35<3:31:39,  1.80it/s]

step:4040, train_loss:0.08063283600357853, acc:0.5620846282572316


 48%|████▊     | 20916/43738 [2:38:36<3:24:23,  1.86it/s]

step:4040, train_loss:0.08063339828800649, acc:0.5620577548288391


 48%|████▊     | 20917/43738 [2:38:36<3:13:55,  1.96it/s]

step:4040, train_loss:0.08063922906403885, acc:0.5620308839699766


 48%|████▊     | 20918/43738 [2:38:37<3:36:39,  1.76it/s]

step:4040, train_loss:0.08063540782370003, acc:0.5620518213978392


 48%|████▊     | 20919/43738 [2:38:37<3:00:44,  2.10it/s]

step:4040, train_loss:0.08063506234198575, acc:0.5620249533916535


 48%|████▊     | 20920/43738 [2:38:38<2:39:00,  2.39it/s]

step:4040, train_loss:0.08063629408647777, acc:0.5619980879541109


 48%|████▊     | 20921/43738 [2:38:38<3:08:58,  2.01it/s]

step:4040, train_loss:0.08063442020573953, acc:0.56201902394723


 48%|████▊     | 20922/43738 [2:38:39<3:22:48,  1.88it/s]

step:4040, train_loss:0.08063354172321574, acc:0.5620399579390115


 48%|████▊     | 20923/43738 [2:38:39<3:31:30,  1.80it/s]

step:4040, train_loss:0.08063703128033092, acc:0.562013095636381


 48%|████▊     | 20924/43738 [2:38:40<3:28:13,  1.83it/s]

step:4040, train_loss:0.08063621550331042, acc:0.5619862359013573


 48%|████▊     | 20925/43738 [2:38:41<3:33:58,  1.78it/s]

step:4040, train_loss:0.08063330889112576, acc:0.5620071684587814


 48%|████▊     | 20926/43738 [2:38:41<3:42:52,  1.71it/s]

step:4040, train_loss:0.08064566101681789, acc:0.5619803115741183


 48%|████▊     | 20927/43738 [2:38:42<3:49:31,  1.66it/s]

step:4040, train_loss:0.0806474833990699, acc:0.5619534572561762


 49%|████▊     | 21232/43738 [2:40:59<2:50:25,  2.20it/s]

step:4060, train_loss:0.0807230095680346, acc:0.5616522230595328


 49%|████▊     | 21233/43738 [2:40:59<2:39:40,  2.35it/s]

step:4060, train_loss:0.08072451989264186, acc:0.5616257712051994


 49%|████▊     | 21234/43738 [2:41:00<2:43:34,  2.29it/s]

step:4060, train_loss:0.08072686519967456, acc:0.5615993218423283


 49%|████▊     | 21235/43738 [2:41:00<2:23:05,  2.62it/s]

step:4060, train_loss:0.08072307466410127, acc:0.5616199670355545


 49%|████▊     | 21236/43738 [2:41:01<2:43:09,  2.30it/s]

step:4060, train_loss:0.0807214146014599, acc:0.5616406102844227


 49%|████▊     | 21237/43738 [2:41:01<2:38:06,  2.37it/s]

step:4060, train_loss:0.08072030277890714, acc:0.5616141639591279


 49%|████▊     | 21238/43738 [2:41:01<2:22:31,  2.63it/s]

step:4060, train_loss:0.0807167015770524, acc:0.5616348055372445


 49%|████▊     | 21239/43738 [2:41:02<2:16:45,  2.74it/s]

step:4060, train_loss:0.08071445237930229, acc:0.5616554451716183


 49%|████▊     | 21240/43738 [2:41:02<2:45:52,  2.26it/s]

step:4060, train_loss:0.08072070964684305, acc:0.5616290018832392


 49%|████▊     | 21241/43738 [2:41:03<3:04:59,  2.03it/s]

step:4060, train_loss:0.08071721996260885, acc:0.5616496398474649


 49%|████▊     | 21242/43738 [2:41:03<3:03:39,  2.04it/s]

step:4060, train_loss:0.08072105290300308, acc:0.5616231993220977


 49%|████▊     | 21243/43738 [2:41:04<2:34:32,  2.43it/s]

step:4060, train_loss:0.08071859510425437, acc:0.5616438356164384


 49%|████▊     | 21244/43738 [2:41:04<2:32:24,  2.46it/s]

step:4060, train_loss:0.08071487499034806, acc:0.5616644699679909


 49%|████▊     | 21245/43738 [2:41:04<2:16:45,  2.74it/s]

step:4060, train_loss:0.08071393947197363, acc:0.5616851023770298


 49%|████▊     | 21246/43738 [2:41:05<2:30:02,  2.50it/s]

step:4060, train_loss:0.08071014891156841, acc:0.5617057328438294


 49%|████▊     | 21247/43738 [2:41:05<2:11:00,  2.86it/s]

step:4060, train_loss:0.08071045380894494, acc:0.5616792959005977


 49%|████▉     | 21552/43738 [2:43:28<3:20:40,  1.84it/s]

step:4080, train_loss:0.08073249361165472, acc:0.5616648106904232


 49%|████▉     | 21553/43738 [2:43:28<2:52:42,  2.14it/s]

step:4080, train_loss:0.08073573868748363, acc:0.5616387509859416


 49%|████▉     | 21554/43738 [2:43:29<3:04:45,  2.00it/s]

step:4080, train_loss:0.08073230721829575, acc:0.5616590888002227


 49%|████▉     | 21555/43738 [2:43:29<2:47:55,  2.20it/s]

step:4080, train_loss:0.08072863600118618, acc:0.5616794247274415


 49%|████▉     | 21556/43738 [2:43:29<2:40:27,  2.30it/s]

step:4080, train_loss:0.08072499163481495, acc:0.5616997587678605


 49%|████▉     | 21557/43738 [2:43:30<2:25:06,  2.55it/s]

step:4080, train_loss:0.08072125454186985, acc:0.5617200909217424


 49%|████▉     | 21558/43738 [2:43:30<2:56:26,  2.10it/s]

step:4080, train_loss:0.08072457860074755, acc:0.5616940346970962


 49%|████▉     | 21559/43738 [2:43:31<2:42:59,  2.27it/s]

step:4080, train_loss:0.08072131721526227, acc:0.5617143652302983


 49%|████▉     | 21560/43738 [2:43:31<2:45:28,  2.23it/s]

step:4080, train_loss:0.08071819524927491, acc:0.561734693877551


 49%|████▉     | 21561/43738 [2:43:32<3:26:09,  1.79it/s]

step:4080, train_loss:0.08072576249074745, acc:0.5617086406010853


 49%|████▉     | 21562/43738 [2:43:33<3:59:03,  1.55it/s]

step:4080, train_loss:0.08072620046186228, acc:0.5617289676282349


 49%|████▉     | 21563/43738 [2:43:33<3:20:51,  1.84it/s]

step:4080, train_loss:0.08072895985339965, acc:0.5617029170338079


 49%|████▉     | 21564/43738 [2:43:33<2:53:50,  2.13it/s]

step:4080, train_loss:0.08072543684319422, acc:0.5617232424411055


 49%|████▉     | 21565/43738 [2:43:34<3:03:47,  2.01it/s]

step:4080, train_loss:0.08072174135289474, acc:0.5617435659633666


 49%|████▉     | 21566/43738 [2:43:34<2:40:14,  2.31it/s]

step:4080, train_loss:0.08071799840599694, acc:0.5617638876008532


 49%|████▉     | 21567/43738 [2:43:35<2:45:29,  2.23it/s]

step:4080, train_loss:0.08071646633597682, acc:0.5617842073538276


 50%|█████     | 21872/43738 [2:45:56<3:10:59,  1.91it/s]

step:4100, train_loss:0.08069806041573316, acc:0.5620427944403804


 50%|█████     | 21873/43738 [2:45:57<3:08:31,  1.93it/s]

step:4100, train_loss:0.08069437670319551, acc:0.5620628171718557


 50%|█████     | 21874/43738 [2:45:57<3:19:46,  1.82it/s]

step:4100, train_loss:0.08069665882621552, acc:0.5620371216969918


 50%|█████     | 21875/43738 [2:45:58<3:03:04,  1.99it/s]

step:4100, train_loss:0.0806967485028665, acc:0.5620114285714286


 50%|█████     | 21876/43738 [2:45:58<3:14:59,  1.87it/s]

step:4100, train_loss:0.0806942061471207, acc:0.5620314499908575


 50%|█████     | 21877/43738 [2:45:59<3:25:30,  1.77it/s]

step:4100, train_loss:0.08070114136188905, acc:0.5620057594734196


 50%|█████     | 21878/43738 [2:45:59<3:03:04,  1.99it/s]

step:4100, train_loss:0.08069756676387731, acc:0.5620257793216931


 50%|█████     | 21879/43738 [2:46:00<3:04:02,  1.98it/s]

step:4100, train_loss:0.08069958141596796, acc:0.5620000914118561


 50%|█████     | 21880/43738 [2:46:00<3:06:42,  1.95it/s]

step:4100, train_loss:0.08070017776906493, acc:0.5619744058500914


 50%|█████     | 21881/43738 [2:46:01<3:21:26,  1.81it/s]

step:4100, train_loss:0.08070133699628546, acc:0.561948722636077


 50%|█████     | 21882/43738 [2:46:02<3:24:59,  1.78it/s]

step:4100, train_loss:0.08070522007937408, acc:0.5619230417694909


 50%|█████     | 21883/43738 [2:46:03<3:56:58,  1.54it/s]

step:4100, train_loss:0.08070774371652868, acc:0.5618973632500114


 50%|█████     | 21884/43738 [2:46:03<3:41:21,  1.65it/s]

step:4100, train_loss:0.08070482400578996, acc:0.5619173825626028


 50%|█████     | 21885/43738 [2:46:03<3:18:59,  1.83it/s]

step:4100, train_loss:0.08070953832121673, acc:0.5618917066483893


 50%|█████     | 21886/43738 [2:46:04<3:49:14,  1.59it/s]

step:4100, train_loss:0.08070853161015669, acc:0.561911724390021


 50%|█████     | 21887/43738 [2:46:05<3:11:10,  1.90it/s]

step:4100, train_loss:0.08070699910492697, acc:0.5618860510805501


 51%|█████     | 22192/43738 [2:48:17<2:16:15,  2.64it/s]

step:4120, train_loss:0.08063477291276848, acc:0.562229632299928


 51%|█████     | 22193/43738 [2:48:18<2:14:30,  2.67it/s]

step:4120, train_loss:0.08063453137771685, acc:0.5622042986527284


 51%|█████     | 22194/43738 [2:48:18<2:06:58,  2.83it/s]

step:4120, train_loss:0.08063815159596247, acc:0.5621789672884563


 51%|█████     | 22195/43738 [2:48:19<2:53:46,  2.07it/s]

step:4120, train_loss:0.08063528416893884, acc:0.5621986933994143


 51%|█████     | 22196/43738 [2:48:19<2:34:41,  2.32it/s]

step:4120, train_loss:0.08063605146305498, acc:0.5621733645701928


 51%|█████     | 22197/43738 [2:48:20<3:07:52,  1.91it/s]

step:4120, train_loss:0.08063699152660288, acc:0.5621930891561923


 51%|█████     | 22198/43738 [2:48:20<3:07:00,  1.92it/s]

step:4120, train_loss:0.08064048040485862, acc:0.562167762861519


 51%|█████     | 22199/43738 [2:48:21<3:10:06,  1.89it/s]

step:4120, train_loss:0.08063698177745124, acc:0.5621874859227893


 51%|█████     | 22200/43738 [2:48:21<3:08:35,  1.90it/s]

step:4120, train_loss:0.08063335689463808, acc:0.5622072072072072


 51%|█████     | 22201/43738 [2:48:22<2:43:15,  2.20it/s]

step:4120, train_loss:0.08063777182728077, acc:0.5621818836989325


 51%|█████     | 22202/43738 [2:48:22<2:26:59,  2.44it/s]

step:4120, train_loss:0.0806396466313783, acc:0.5621565624718494


 51%|█████     | 22203/43738 [2:48:23<2:50:19,  2.11it/s]

step:4120, train_loss:0.08063753027248097, acc:0.5621762824843489


 51%|█████     | 22204/43738 [2:48:23<3:04:27,  1.95it/s]

step:4120, train_loss:0.08063645953032814, acc:0.5621960007205908


 51%|█████     | 22205/43738 [2:48:24<2:47:38,  2.14it/s]

step:4120, train_loss:0.080633161032508, acc:0.5622157171808151


 51%|█████     | 22206/43738 [2:48:25<3:26:24,  1.74it/s]

step:4120, train_loss:0.08063136852311194, acc:0.5622354318652616


 51%|█████     | 22207/43738 [2:48:25<3:03:17,  1.96it/s]

step:4120, train_loss:0.08062788822291783, acc:0.5622551447741703


 51%|█████▏    | 22512/43738 [2:50:52<2:32:16,  2.32it/s]

step:4140, train_loss:0.08061278785390404, acc:0.5626332622601279


 51%|█████▏    | 22513/43738 [2:50:52<2:29:11,  2.37it/s]

step:4140, train_loss:0.08060961600237161, acc:0.5626526895571448


 51%|█████▏    | 22514/43738 [2:50:53<3:21:56,  1.75it/s]

step:4140, train_loss:0.08060616623347312, acc:0.5626721151283646


 51%|█████▏    | 22515/43738 [2:50:53<3:11:59,  1.84it/s]

step:4140, train_loss:0.08060266166087696, acc:0.5626915389740174


 51%|█████▏    | 22516/43738 [2:50:54<3:09:47,  1.86it/s]

step:4140, train_loss:0.08060187262727787, acc:0.5627109610943329


 51%|█████▏    | 22517/43738 [2:50:54<2:57:49,  1.99it/s]

step:4140, train_loss:0.08059840455052522, acc:0.5627303814895412


 51%|█████▏    | 22518/43738 [2:50:55<2:53:36,  2.04it/s]

step:4140, train_loss:0.08060104314570504, acc:0.5627053912425615


 51%|█████▏    | 22519/43738 [2:50:55<3:14:53,  1.81it/s]

step:4140, train_loss:0.08060018847193849, acc:0.5627248101603091


 51%|█████▏    | 22520/43738 [2:50:56<3:03:10,  1.93it/s]

step:4140, train_loss:0.08060195314189561, acc:0.5626998223801065


 51%|█████▏    | 22521/43738 [2:50:56<3:01:00,  1.95it/s]

step:4140, train_loss:0.08059838874158591, acc:0.5627192398206119


 51%|█████▏    | 22522/43738 [2:50:57<3:09:58,  1.86it/s]

step:4140, train_loss:0.08059683822666523, acc:0.5627386555368085


 51%|█████▏    | 22523/43738 [2:50:57<3:09:14,  1.87it/s]

step:4140, train_loss:0.08059328644808952, acc:0.562758069528926


 51%|█████▏    | 22524/43738 [2:50:58<3:02:04,  1.94it/s]

step:4140, train_loss:0.08059450000827664, acc:0.562733084709643


 51%|█████▏    | 22525/43738 [2:50:59<3:30:55,  1.68it/s]

step:4140, train_loss:0.08060925936112935, acc:0.562708102108768


 52%|█████▏    | 22526/43738 [2:50:59<3:10:49,  1.85it/s]

step:4140, train_loss:0.08061291162751963, acc:0.5626831217260055


 52%|█████▏    | 22527/43738 [2:51:00<3:27:45,  1.70it/s]

step:4140, train_loss:0.08060968448599468, acc:0.5627025347360944


 52%|█████▏    | 22832/43738 [2:53:23<2:34:30,  2.26it/s]

step:4160, train_loss:0.08052626476547677, acc:0.5632445690259286


 52%|█████▏    | 22833/43738 [2:53:23<2:13:15,  2.61it/s]

step:4160, train_loss:0.080522741014052, acc:0.5632636972802523


 52%|█████▏    | 22834/43738 [2:53:24<2:15:33,  2.57it/s]

step:4160, train_loss:0.08052539781351624, acc:0.5632390295173864


 52%|█████▏    | 22835/43738 [2:53:24<1:59:08,  2.92it/s]

step:4160, train_loss:0.0805251625504005, acc:0.5632581563389534


 52%|█████▏    | 22836/43738 [2:53:24<1:50:23,  3.16it/s]

step:4160, train_loss:0.080522914465156, acc:0.5632772814853739


 52%|█████▏    | 22837/43738 [2:53:25<2:02:29,  2.84it/s]

step:4160, train_loss:0.08052151582292036, acc:0.5632526163681745


 52%|█████▏    | 22838/43738 [2:53:25<2:15:23,  2.57it/s]

step:4160, train_loss:0.08051988282389058, acc:0.563271740082319


 52%|█████▏    | 22839/43738 [2:53:26<2:57:31,  1.96it/s]

step:4160, train_loss:0.08052100864719051, acc:0.5632470773676606


 52%|█████▏    | 22840/43738 [2:53:27<2:54:40,  1.99it/s]

step:4160, train_loss:0.08052693850855933, acc:0.5632224168126094


 52%|█████▏    | 22841/43738 [2:53:27<3:00:06,  1.93it/s]

step:4160, train_loss:0.08052702325529143, acc:0.5631977584168819


 52%|█████▏    | 22842/43738 [2:53:27<2:42:54,  2.14it/s]

step:4160, train_loss:0.080524818936959, acc:0.5632168811837842


 52%|█████▏    | 22843/43738 [2:53:28<2:18:06,  2.52it/s]

step:4160, train_loss:0.08052213354955201, acc:0.5632360022764086


 52%|█████▏    | 22844/43738 [2:53:28<2:15:28,  2.57it/s]

step:4160, train_loss:0.08051994166537964, acc:0.5632551216949746


 52%|█████▏    | 22845/43738 [2:53:28<2:01:22,  2.87it/s]

step:4160, train_loss:0.08051672109178395, acc:0.5632742394397023


 52%|█████▏    | 22846/43738 [2:53:29<2:20:14,  2.48it/s]

step:4160, train_loss:0.08052051698311008, acc:0.563249584172284


 52%|█████▏    | 22847/43738 [2:53:29<2:13:57,  2.60it/s]

step:4160, train_loss:0.08051876163034989, acc:0.5632687004858405


 53%|█████▎    | 23152/43738 [2:56:00<2:41:01,  2.13it/s]

step:4180, train_loss:0.08045016015880635, acc:0.5639253628196268


 53%|█████▎    | 23153/43738 [2:56:01<2:39:16,  2.15it/s]

step:4180, train_loss:0.0804539306147819, acc:0.5639010063490693


 53%|█████▎    | 23154/43738 [2:56:01<2:19:07,  2.47it/s]

step:4180, train_loss:0.0804531016774282, acc:0.563919841064179


 53%|█████▎    | 23155/43738 [2:56:02<2:41:22,  2.13it/s]

step:4180, train_loss:0.08045243230976444, acc:0.563895486935867


 53%|█████▎    | 23156/43738 [2:56:02<2:42:37,  2.11it/s]

step:4180, train_loss:0.08045202258038185, acc:0.5639143202625669


 53%|█████▎    | 23157/43738 [2:56:03<2:45:13,  2.08it/s]

step:4180, train_loss:0.0804609018656518, acc:0.5638899684760548


 53%|█████▎    | 23158/43738 [2:56:03<2:26:05,  2.35it/s]

step:4180, train_loss:0.08046356034739507, acc:0.5638656187926419


 53%|█████▎    | 23159/43738 [2:56:03<2:24:53,  2.37it/s]

step:4180, train_loss:0.0804623234216043, acc:0.5638412712120557


 53%|█████▎    | 23160/43738 [2:56:04<2:10:57,  2.62it/s]

step:4180, train_loss:0.08045928531088797, acc:0.563860103626943


 53%|█████▎    | 23161/43738 [2:56:04<2:10:04,  2.64it/s]

step:4180, train_loss:0.08045621242056697, acc:0.5638789344156124


 53%|█████▎    | 23162/43738 [2:56:04<1:57:08,  2.93it/s]

step:4180, train_loss:0.08045444018406224, acc:0.5638977635782748


 53%|█████▎    | 23163/43738 [2:56:05<2:01:08,  2.83it/s]

step:4180, train_loss:0.08045242230250743, acc:0.5639165911151405


 53%|█████▎    | 23164/43738 [2:56:05<1:58:39,  2.89it/s]

step:4180, train_loss:0.08045138851054061, acc:0.5639354170264204


 53%|█████▎    | 23165/43738 [2:56:05<2:04:29,  2.75it/s]

step:4180, train_loss:0.0804513423151461, acc:0.563911072739046


 53%|█████▎    | 23166/43738 [2:56:06<2:09:38,  2.64it/s]

step:4180, train_loss:0.08044795283280687, acc:0.5639298972632306


 53%|█████▎    | 23167/43738 [2:56:06<2:35:24,  2.21it/s]

step:4180, train_loss:0.08044472087872874, acc:0.5639487201622998


 54%|█████▎    | 23472/43738 [2:58:36<2:48:25,  2.01it/s]

step:4200, train_loss:0.08046596765537095, acc:0.5636503067484663


 54%|█████▎    | 23473/43738 [2:58:36<2:28:24,  2.28it/s]

step:4200, train_loss:0.08046278280454855, acc:0.5636688961785882


 54%|█████▎    | 23474/43738 [2:58:37<3:06:39,  1.81it/s]

step:4200, train_loss:0.08047199909850883, acc:0.5636448837011161


 54%|█████▎    | 23475/43738 [2:58:38<3:14:33,  1.74it/s]

step:4200, train_loss:0.08047793563832732, acc:0.5636208732694356


 54%|█████▎    | 23476/43738 [2:58:38<2:55:49,  1.92it/s]

step:4200, train_loss:0.0804778475748908, acc:0.5635968648832851


 54%|█████▎    | 23477/43738 [2:58:39<3:30:47,  1.60it/s]

step:4200, train_loss:0.0804771474063314, acc:0.5636154534224986


 54%|█████▎    | 23478/43738 [2:58:40<3:19:52,  1.69it/s]

step:4200, train_loss:0.08047449810329195, acc:0.5636340403782264


 54%|█████▎    | 23479/43738 [2:58:40<3:07:33,  1.80it/s]

step:4200, train_loss:0.08047108065049145, acc:0.5636526257506708


 54%|█████▎    | 23480/43738 [2:58:41<3:00:56,  1.87it/s]

step:4200, train_loss:0.08048138933802548, acc:0.5636286201022147


 54%|█████▎    | 23481/43738 [2:58:41<3:04:09,  1.83it/s]

step:4200, train_loss:0.08048193715029361, acc:0.5636046164984455


 54%|█████▎    | 23482/43738 [2:58:42<2:51:04,  1.97it/s]

step:4200, train_loss:0.08047980496582793, acc:0.5636232007495102


 54%|█████▎    | 23483/43738 [2:58:42<2:28:01,  2.28it/s]

step:4200, train_loss:0.08047821142453494, acc:0.5636417834177916


 54%|█████▎    | 23484/43738 [2:58:43<2:46:17,  2.03it/s]

step:4200, train_loss:0.08047813889061031, acc:0.5636177823198774


 54%|█████▎    | 23485/43738 [2:58:43<3:13:44,  1.74it/s]

step:4200, train_loss:0.08047728213730546, acc:0.5636363636363636


 54%|█████▎    | 23486/43738 [2:58:44<3:01:29,  1.86it/s]

step:4200, train_loss:0.08047396582698121, acc:0.5636549433705186


 54%|█████▎    | 23487/43738 [2:58:45<3:31:30,  1.60it/s]

step:4200, train_loss:0.08047677916365893, acc:0.5636309447779623


 54%|█████▍    | 23792/43738 [3:01:10<2:49:27,  1.96it/s]

step:4220, train_loss:0.08044410903505902, acc:0.5636768661735037


 54%|█████▍    | 23793/43738 [3:01:11<2:44:15,  2.02it/s]

step:4220, train_loss:0.08044100843379609, acc:0.5636952044719035


 54%|█████▍    | 23794/43738 [3:01:11<2:29:47,  2.22it/s]

step:4220, train_loss:0.08044278165150268, acc:0.5636715138270152


 54%|█████▍    | 23795/43738 [3:01:12<3:01:59,  1.83it/s]

step:4220, train_loss:0.08043969325687701, acc:0.5636898508089935


 54%|█████▍    | 23796/43738 [3:01:12<3:03:53,  1.81it/s]

step:4220, train_loss:0.08043879608408994, acc:0.5637081862497899


 54%|█████▍    | 23797/43738 [3:01:13<2:59:48,  1.85it/s]

step:4220, train_loss:0.08043609363873412, acc:0.5637265201495987


 54%|█████▍    | 23798/43738 [3:01:13<2:34:37,  2.15it/s]

step:4220, train_loss:0.08043288631901804, acc:0.5637448525086142


 54%|█████▍    | 23799/43738 [3:01:14<2:45:12,  2.01it/s]

step:4220, train_loss:0.0804295125283039, acc:0.5637631833270306


 54%|█████▍    | 23800/43738 [3:01:14<3:02:38,  1.82it/s]

step:4220, train_loss:0.08042622997202938, acc:0.5637815126050421


 54%|█████▍    | 23801/43738 [3:01:15<3:22:31,  1.64it/s]

step:4220, train_loss:0.08042393119264397, acc:0.5637998403428427


 54%|█████▍    | 23802/43738 [3:01:16<3:04:35,  1.80it/s]

step:4220, train_loss:0.08042076220138522, acc:0.5638181665406269


 54%|█████▍    | 23803/43738 [3:01:16<3:07:30,  1.77it/s]

step:4220, train_loss:0.08042080668200974, acc:0.5637944796874343


 54%|█████▍    | 23804/43738 [3:01:17<3:13:25,  1.72it/s]

step:4220, train_loss:0.08042400842223921, acc:0.5637707948243993


 54%|█████▍    | 23806/43738 [3:01:17<2:22:32,  2.33it/s]

step:4220, train_loss:0.08042302955657644, acc:0.5637471119512707
step:4220, train_loss:0.08041967917879697, acc:0.5637654372847182


 54%|█████▍    | 23807/43738 [3:01:18<2:19:02,  2.39it/s]

step:4220, train_loss:0.08042109291429873, acc:0.5637417566262024


 55%|█████▌    | 24112/43738 [3:03:37<2:17:59,  2.37it/s]

step:4240, train_loss:0.08035363213633322, acc:0.5645321831453218


 55%|█████▌    | 24113/43738 [3:03:37<2:01:13,  2.70it/s]

step:4240, train_loss:0.08035030297172914, acc:0.564550242607722


 55%|█████▌    | 24114/43738 [3:03:38<2:21:56,  2.30it/s]

step:4240, train_loss:0.08036509467633994, acc:0.5645268308866219


 55%|█████▌    | 24115/43738 [3:03:38<2:38:06,  2.07it/s]

step:4240, train_loss:0.08036247983601612, acc:0.564544889073191


 55%|█████▌    | 24116/43738 [3:03:39<3:11:16,  1.71it/s]

step:4240, train_loss:0.08036427855018505, acc:0.5645214795156742


 55%|█████▌    | 24117/43738 [3:03:40<3:06:03,  1.76it/s]

step:4240, train_loss:0.08036705619692061, acc:0.56449807189949


 55%|█████▌    | 24118/43738 [3:03:40<2:57:55,  1.84it/s]

step:4240, train_loss:0.08036759251648662, acc:0.5644746662243967


 55%|█████▌    | 24119/43738 [3:03:41<3:24:03,  1.60it/s]

step:4240, train_loss:0.08037167761936069, acc:0.564451262490153


 55%|█████▌    | 24120/43738 [3:03:42<3:31:42,  1.54it/s]

step:4240, train_loss:0.08036912708485033, acc:0.564469320066335


 55%|█████▌    | 24121/43738 [3:03:42<3:26:28,  1.58it/s]

step:4240, train_loss:0.0803738437574653, acc:0.5644459184942581


 55%|█████▌    | 24122/43738 [3:03:43<3:15:49,  1.67it/s]

step:4240, train_loss:0.08037327283867364, acc:0.5644639747947932


 55%|█████▌    | 24123/43738 [3:03:43<3:12:38,  1.70it/s]

step:4240, train_loss:0.08037553071450823, acc:0.5644405753844879


 55%|█████▌    | 24124/43738 [3:03:44<3:23:43,  1.60it/s]

step:4240, train_loss:0.08037241543505451, acc:0.5644586304095507


 55%|█████▌    | 24126/43738 [3:03:45<2:44:09,  1.99it/s]

step:4240, train_loss:0.08037896832554008, acc:0.5644352331606217
step:4240, train_loss:0.08038060419630359, acc:0.5644118378512808


 55%|█████▌    | 24127/43738 [3:03:45<2:23:12,  2.28it/s]

step:4240, train_loss:0.08037770700094446, acc:0.5644298918224396


 56%|█████▌    | 24432/43738 [3:06:08<3:10:45,  1.69it/s]

step:4260, train_loss:0.08042249792513165, acc:0.5645874263261297


 56%|█████▌    | 24433/43738 [3:06:09<3:13:36,  1.66it/s]

step:4260, train_loss:0.08042114381757211, acc:0.5646052470020055


 56%|█████▌    | 24434/43738 [3:06:10<3:32:13,  1.52it/s]

step:4260, train_loss:0.08041864844840602, acc:0.5646230662192028


 56%|█████▌    | 24435/43738 [3:06:10<3:16:08,  1.64it/s]

step:4260, train_loss:0.08041664278556343, acc:0.5646408839779006


 56%|█████▌    | 24436/43738 [3:06:10<2:43:36,  1.97it/s]

step:4260, train_loss:0.080420496338574, acc:0.5646177770502537


 56%|█████▌    | 24437/43738 [3:06:11<2:53:10,  1.86it/s]

step:4260, train_loss:0.08042666973157052, acc:0.5645946720137497


 56%|█████▌    | 24438/43738 [3:06:11<2:25:39,  2.21it/s]

step:4260, train_loss:0.0804286025670298, acc:0.5645715688681562


 56%|█████▌    | 24439/43738 [3:06:12<2:30:47,  2.13it/s]

step:4260, train_loss:0.08042652847943875, acc:0.5645893858177503


 56%|█████▌    | 24440/43738 [3:06:12<2:22:40,  2.25it/s]

step:4260, train_loss:0.08042732660400771, acc:0.5645662847790507


 56%|█████▌    | 24441/43738 [3:06:13<2:19:21,  2.31it/s]

step:4260, train_loss:0.08042719907837959, acc:0.5645431856307025


 56%|█████▌    | 24442/43738 [3:06:13<2:09:20,  2.49it/s]

step:4260, train_loss:0.08042434730634043, acc:0.5645610015547009


 56%|█████▌    | 24443/43738 [3:06:14<2:48:05,  1.91it/s]

step:4260, train_loss:0.08042681745189931, acc:0.5645379045125394


 56%|█████▌    | 24444/43738 [3:06:15<3:15:57,  1.64it/s]

step:4260, train_loss:0.0804260210954435, acc:0.5645557191948944


 56%|█████▌    | 24445/43738 [3:06:15<2:54:24,  1.84it/s]

step:4260, train_loss:0.08042725336483718, acc:0.5645326242585396


 56%|█████▌    | 24446/43738 [3:06:15<2:39:33,  2.02it/s]

step:4260, train_loss:0.08042412714461411, acc:0.5645504376994191


 56%|█████▌    | 24447/43738 [3:06:16<2:33:01,  2.10it/s]

step:4260, train_loss:0.08042412928722466, acc:0.564527344868491


 57%|█████▋    | 24752/43738 [3:08:48<2:26:45,  2.16it/s]

step:4280, train_loss:0.08042422647052215, acc:0.5644392372333549


 57%|█████▋    | 24753/43738 [3:08:48<2:30:04,  2.11it/s]

step:4280, train_loss:0.0804211857993641, acc:0.5644568335151294


 57%|█████▋    | 24754/43738 [3:08:49<2:21:31,  2.24it/s]

step:4280, train_loss:0.08042315465160337, acc:0.5644340308636988


 57%|█████▋    | 24755/43738 [3:08:49<2:11:51,  2.40it/s]

step:4280, train_loss:0.08042612510851908, acc:0.5644112300545344


 57%|█████▋    | 24756/43738 [3:08:50<2:52:35,  1.83it/s]

step:4280, train_loss:0.08042663632928695, acc:0.5644288253352723


 57%|█████▋    | 24757/43738 [3:08:51<3:20:10,  1.58it/s]

step:4280, train_loss:0.08042742735980979, acc:0.5644464191945713


 57%|█████▋    | 24758/43738 [3:08:51<3:06:31,  1.70it/s]

step:4280, train_loss:0.08043199139608347, acc:0.5644236206478714


 57%|█████▋    | 24759/43738 [3:08:51<2:42:00,  1.95it/s]

step:4280, train_loss:0.080429709866605, acc:0.5644412132961751


 57%|█████▋    | 24760/43738 [3:08:52<2:56:33,  1.79it/s]

step:4280, train_loss:0.08042925055608847, acc:0.5644184168012925


 57%|█████▋    | 24761/43738 [3:08:52<2:36:03,  2.03it/s]

step:4280, train_loss:0.08042630767656887, acc:0.5644360082387626


 57%|█████▋    | 24762/43738 [3:08:53<2:35:32,  2.03it/s]

step:4280, train_loss:0.08043348922618809, acc:0.5644132137953316


 57%|█████▋    | 24763/43738 [3:08:53<2:20:59,  2.24it/s]

step:4280, train_loss:0.08043380666004164, acc:0.5643904211929087


 57%|█████▋    | 24764/43738 [3:08:54<2:10:15,  2.43it/s]

step:4280, train_loss:0.08043546004184195, acc:0.5643676304312712


 57%|█████▋    | 24765/43738 [3:08:54<2:02:40,  2.58it/s]

step:4280, train_loss:0.08043502520511009, acc:0.5643448415101958


 57%|█████▋    | 24766/43738 [3:08:54<1:58:14,  2.67it/s]

step:4280, train_loss:0.08043201345386047, acc:0.5643624323669547


 57%|█████▋    | 24767/43738 [3:08:55<1:55:31,  2.74it/s]

step:4280, train_loss:0.08042919189997488, acc:0.5643800218032059


 57%|█████▋    | 25072/43738 [3:11:24<2:23:30,  2.17it/s]

step:4300, train_loss:0.08042856584084274, acc:0.5642948308870454


 57%|█████▋    | 25073/43738 [3:11:24<2:01:43,  2.56it/s]

step:4300, train_loss:0.08042537355147239, acc:0.5643122083516133


 57%|█████▋    | 25074/43738 [3:11:25<2:25:20,  2.14it/s]

step:4300, train_loss:0.08042283279862338, acc:0.564329584430087


 57%|█████▋    | 25075/43738 [3:11:25<2:03:45,  2.51it/s]

step:4300, train_loss:0.08042008522241947, acc:0.5643469591226321


 57%|█████▋    | 25076/43738 [3:11:26<1:51:48,  2.78it/s]

step:4300, train_loss:0.08042169485323086, acc:0.5643244536608709


 57%|█████▋    | 25077/43738 [3:11:26<2:29:07,  2.09it/s]

step:4300, train_loss:0.08041989780178306, acc:0.5643418271723093


 57%|█████▋    | 25078/43738 [3:11:27<2:22:09,  2.19it/s]

step:4300, train_loss:0.08042130231377694, acc:0.5643193237100247


 57%|█████▋    | 25079/43738 [3:11:27<2:01:27,  2.56it/s]

step:4300, train_loss:0.08041810645038132, acc:0.564336696040512


 57%|█████▋    | 25080/43738 [3:11:27<1:54:50,  2.71it/s]

step:4300, train_loss:0.08041936454670545, acc:0.5643141945773524


 57%|█████▋    | 25081/43738 [3:11:28<2:18:46,  2.24it/s]

step:4300, train_loss:0.08041776422687229, acc:0.5643315657270443


 57%|█████▋    | 25082/43738 [3:11:28<2:21:42,  2.19it/s]

step:4300, train_loss:0.0804202270873708, acc:0.5643090662626585


 57%|█████▋    | 25083/43738 [3:11:29<2:00:50,  2.57it/s]

step:4300, train_loss:0.08041725204290957, acc:0.5643264362317107


 57%|█████▋    | 25084/43738 [3:11:29<1:54:45,  2.71it/s]

step:4300, train_loss:0.08042257671514494, acc:0.5643039387657471


 57%|█████▋    | 25085/43738 [3:11:29<2:14:46,  2.31it/s]

step:4300, train_loss:0.08042799806305762, acc:0.5642814430934822


 57%|█████▋    | 25086/43738 [3:11:30<2:18:07,  2.25it/s]

step:4300, train_loss:0.08042482314358751, acc:0.5642988120864227


 57%|█████▋    | 25087/43738 [3:11:31<2:46:29,  1.87it/s]

step:4300, train_loss:0.08042179095803857, acc:0.5643161796946625


 58%|█████▊    | 25392/43738 [3:13:57<2:47:01,  1.83it/s]

step:4320, train_loss:0.08041902431467235, acc:0.5647448015122873


 58%|█████▊    | 25393/43738 [3:13:57<2:56:57,  1.73it/s]

step:4320, train_loss:0.08043159739880898, acc:0.5647225613358012


 58%|█████▊    | 25394/43738 [3:13:58<3:24:21,  1.50it/s]

step:4320, train_loss:0.08043286779754061, acc:0.5647003229109239


 58%|█████▊    | 25395/43738 [3:13:59<3:14:26,  1.57it/s]

step:4320, train_loss:0.0804319497329295, acc:0.5647174640677298


 58%|█████▊    | 25396/43738 [3:14:00<3:35:31,  1.42it/s]

step:4320, train_loss:0.0804296439168398, acc:0.5647346038746259


 58%|█████▊    | 25397/43738 [3:14:00<3:25:13,  1.49it/s]

step:4320, train_loss:0.08042997517531789, acc:0.5647123676024728


 58%|█████▊    | 25398/43738 [3:14:00<2:55:09,  1.75it/s]

step:4320, train_loss:0.08042681211709277, acc:0.5647295062603355


 58%|█████▊    | 25399/43738 [3:14:01<2:34:18,  1.98it/s]

step:4320, train_loss:0.08042602628339425, acc:0.5647072719398402


 58%|█████▊    | 25400/43738 [3:14:01<2:24:58,  2.11it/s]

step:4320, train_loss:0.08042380851643494, acc:0.5647244094488189


 58%|█████▊    | 25401/43738 [3:14:01<2:06:03,  2.42it/s]

step:4320, train_loss:0.08042235915269585, acc:0.5647415456084406


 58%|█████▊    | 25402/43738 [3:14:02<2:40:40,  1.90it/s]

step:4320, train_loss:0.08042571958912254, acc:0.5647193134398866


 58%|█████▊    | 25403/43738 [3:14:03<2:32:52,  2.00it/s]

step:4320, train_loss:0.08042376138855227, acc:0.5647364484509704


 58%|█████▊    | 25404/43738 [3:14:03<2:23:31,  2.13it/s]

step:4320, train_loss:0.08042322775566546, acc:0.564753582113053


 58%|█████▊    | 25405/43738 [3:14:04<2:20:11,  2.18it/s]

step:4320, train_loss:0.08042620609837839, acc:0.5647313520960441


 58%|█████▊    | 25406/43738 [3:14:04<2:12:48,  2.30it/s]

step:4320, train_loss:0.0804285287531967, acc:0.5647091238290167


 58%|█████▊    | 25407/43738 [3:14:04<2:12:18,  2.31it/s]

step:4320, train_loss:0.08042959744146462, acc:0.5646868973117645


 59%|█████▉    | 25712/43738 [3:16:30<2:02:17,  2.46it/s]

step:4340, train_loss:0.08039898765720997, acc:0.5645612943372744


 59%|█████▉    | 25713/43738 [3:16:30<2:22:24,  2.11it/s]

step:4340, train_loss:0.0804038126681976, acc:0.564539338078015


 59%|█████▉    | 25714/43738 [3:16:31<2:00:49,  2.49it/s]

step:4340, train_loss:0.08040079817903333, acc:0.5645562728474761


 59%|█████▉    | 25715/43738 [3:16:31<1:53:59,  2.64it/s]

step:4340, train_loss:0.0803976825761451, acc:0.564573206299825


 59%|█████▉    | 25716/43738 [3:16:31<2:07:24,  2.36it/s]

step:4340, train_loss:0.08039853280041549, acc:0.5645512521387464


 59%|█████▉    | 25717/43738 [3:16:32<2:00:43,  2.49it/s]

step:4340, train_loss:0.08040003211252574, acc:0.5645292996850333


 59%|█████▉    | 25718/43738 [3:16:32<2:25:08,  2.07it/s]

step:4340, train_loss:0.08040154347034911, acc:0.5645073489384866


 59%|█████▉    | 25719/43738 [3:16:33<2:23:00,  2.10it/s]

step:4340, train_loss:0.08040388886975641, acc:0.5644853998989074


 59%|█████▉    | 25720/43738 [3:16:34<2:42:39,  1.85it/s]

step:4340, train_loss:0.080408014481811, acc:0.5644634525660964


 59%|█████▉    | 25721/43738 [3:16:34<2:33:06,  1.96it/s]

step:4340, train_loss:0.08040736012930048, acc:0.5644803856770733


 59%|█████▉    | 25722/43738 [3:16:34<2:26:37,  2.05it/s]

step:4340, train_loss:0.08040605209729783, acc:0.5644973174714253


 59%|█████▉    | 25723/43738 [3:16:35<2:52:09,  1.74it/s]

step:4340, train_loss:0.08040549895283872, acc:0.564514247949306


 59%|█████▉    | 25724/43738 [3:16:36<2:52:56,  1.74it/s]

step:4340, train_loss:0.08041108481119866, acc:0.5644923029077904


 59%|█████▉    | 25725/43738 [3:16:36<2:26:23,  2.05it/s]

step:4340, train_loss:0.08041336516352357, acc:0.5644703595724004


 59%|█████▉    | 25726/43738 [3:16:37<2:46:58,  1.80it/s]

step:4340, train_loss:0.08041943684016047, acc:0.5644484179429371


 59%|█████▉    | 25727/43738 [3:16:37<2:26:19,  2.05it/s]

step:4340, train_loss:0.08041687367591253, acc:0.5644653476891981


 60%|█████▉    | 26032/43738 [3:19:00<2:27:02,  2.01it/s]

step:4360, train_loss:0.0804472455973459, acc:0.5644207129686539


 60%|█████▉    | 26033/43738 [3:19:01<2:14:33,  2.19it/s]

step:4360, train_loss:0.08044904353880386, acc:0.5643990319978489


 60%|█████▉    | 26034/43738 [3:19:01<2:01:54,  2.42it/s]

step:4360, train_loss:0.08044598422410862, acc:0.5644157640009219


 60%|█████▉    | 26035/43738 [3:19:01<1:59:33,  2.47it/s]

step:4360, train_loss:0.08044502997651695, acc:0.564432494718648


 60%|█████▉    | 26036/43738 [3:19:02<1:42:25,  2.88it/s]

step:4360, train_loss:0.08044413817980686, acc:0.5644492241511753


 60%|█████▉    | 26037/43738 [3:19:02<1:46:46,  2.76it/s]

step:4360, train_loss:0.08044622007025641, acc:0.5644275454161386


 60%|█████▉    | 26038/43738 [3:19:02<1:48:16,  2.72it/s]

step:4360, train_loss:0.08044397535182464, acc:0.5644442737537445


 60%|█████▉    | 26039/43738 [3:19:03<1:51:05,  2.66it/s]

step:4360, train_loss:0.08044362517779763, acc:0.5644610008064825


 60%|█████▉    | 26040/43738 [3:19:03<1:58:13,  2.49it/s]

step:4360, train_loss:0.08044472239864972, acc:0.5644393241167435


 60%|█████▉    | 26041/43738 [3:19:04<2:13:43,  2.21it/s]

step:4360, train_loss:0.08044662849187073, acc:0.5644176490918168


 60%|█████▉    | 26042/43738 [3:19:04<1:55:58,  2.54it/s]

step:4360, train_loss:0.08044966068828961, acc:0.5643959757315107


 60%|█████▉    | 26043/43738 [3:19:05<2:08:05,  2.30it/s]

step:4360, train_loss:0.08044728986088182, acc:0.564412702069654


 60%|█████▉    | 26044/43738 [3:19:05<2:12:30,  2.23it/s]

step:4360, train_loss:0.0804447029604615, acc:0.5644294271233298


 60%|█████▉    | 26045/43738 [3:19:05<1:55:14,  2.56it/s]

step:4360, train_loss:0.0804449137424468, acc:0.5644077558072567


 60%|█████▉    | 26046/43738 [3:19:06<1:45:41,  2.79it/s]

step:4360, train_loss:0.08044296405269832, acc:0.5644244797665668


 60%|█████▉    | 26047/43738 [3:19:06<1:52:29,  2.62it/s]

step:4360, train_loss:0.08044388043610548, acc:0.5644412024417399


 60%|██████    | 26352/43738 [3:21:27<1:43:56,  2.79it/s]

step:4380, train_loss:0.08034566104808241, acc:0.5648907103825137


 60%|██████    | 26353/43738 [3:21:28<2:17:51,  2.10it/s]

step:4380, train_loss:0.08034747587864917, acc:0.5648692748453686


 60%|██████    | 26354/43738 [3:21:29<2:27:17,  1.97it/s]

step:4380, train_loss:0.08034558799593378, acc:0.5648857858389619


 60%|██████    | 26355/43738 [3:21:29<2:40:00,  1.81it/s]

step:4380, train_loss:0.08034398685548429, acc:0.5649022955795864


 60%|██████    | 26356/43738 [3:21:30<2:13:08,  2.18it/s]

step:4380, train_loss:0.0803429976180806, acc:0.5648808620427986


 60%|██████    | 26357/43738 [3:21:30<1:55:25,  2.51it/s]

step:4380, train_loss:0.08034162893633301, acc:0.5648973707174565


 60%|██████    | 26358/43738 [3:21:30<1:49:41,  2.64it/s]

step:4380, train_loss:0.08034554733034305, acc:0.5648759389938539


 60%|██████    | 26359/43738 [3:21:31<2:05:38,  2.31it/s]

step:4380, train_loss:0.08034453841482402, acc:0.5648924466026785


 60%|██████    | 26360/43738 [3:21:31<2:01:09,  2.39it/s]

step:4380, train_loss:0.08034186899350311, acc:0.5649089529590289


 60%|██████    | 26361/43738 [3:21:31<1:45:15,  2.75it/s]

step:4380, train_loss:0.0803393092290839, acc:0.5649254580630477


 60%|██████    | 26362/43738 [3:21:32<1:47:15,  2.70it/s]

step:4380, train_loss:0.08033866150124837, acc:0.5649419619148774


 60%|██████    | 26363/43738 [3:21:32<1:38:44,  2.93it/s]

step:4380, train_loss:0.080335618401117, acc:0.5649584645146607


 60%|██████    | 26364/43738 [3:21:32<1:32:48,  3.12it/s]

step:4380, train_loss:0.08033554835108286, acc:0.5649749658625398


 60%|██████    | 26365/43738 [3:21:33<1:33:57,  3.08it/s]

step:4380, train_loss:0.08033433062432498, acc:0.5649914659586573


 60%|██████    | 26366/43738 [3:21:33<1:36:58,  2.99it/s]

step:4380, train_loss:0.08033205686448681, acc:0.5650079648031556


 60%|██████    | 26367/43738 [3:21:34<1:57:42,  2.46it/s]

step:4380, train_loss:0.08034070917986245, acc:0.5649865362005537


 61%|██████    | 26672/43738 [3:23:55<2:07:08,  2.24it/s]

step:4400, train_loss:0.08034059135376595, acc:0.5650119976004799


 61%|██████    | 26673/43738 [3:23:56<1:59:47,  2.37it/s]

step:4400, train_loss:0.0803422407173828, acc:0.5649908146815131


 61%|██████    | 26674/43738 [3:23:56<1:46:39,  2.67it/s]

step:4400, train_loss:0.08033922968480822, acc:0.5650071230411636


 61%|██████    | 26675/43738 [3:23:56<1:40:58,  2.82it/s]

step:4400, train_loss:0.08033988350617849, acc:0.5649859418931584


 61%|██████    | 26676/43738 [3:23:57<1:48:43,  2.62it/s]

step:4400, train_loss:0.08033717563052455, acc:0.5650022492127755


 61%|██████    | 26677/43738 [3:23:57<1:52:35,  2.53it/s]

step:4400, train_loss:0.08033676757621842, acc:0.5649810698354387


 61%|██████    | 26678/43738 [3:23:58<1:55:21,  2.46it/s]

step:4400, train_loss:0.08033805027881478, acc:0.5649598920458805


 61%|██████    | 26679/43738 [3:23:58<2:14:28,  2.11it/s]

step:4400, train_loss:0.08033985245286904, acc:0.5649387158439222


 61%|██████    | 26680/43738 [3:23:59<2:18:37,  2.05it/s]

step:4400, train_loss:0.08033733006761458, acc:0.5649550224887556


 61%|██████    | 26681/43738 [3:23:59<2:05:19,  2.27it/s]

step:4400, train_loss:0.08033846448492502, acc:0.5649338480566696


 61%|██████    | 26682/43738 [3:24:00<2:25:25,  1.95it/s]

step:4400, train_loss:0.08033554993066999, acc:0.5649501536616446


 61%|██████    | 26683/43738 [3:24:00<2:35:23,  1.83it/s]

step:4400, train_loss:0.08033400641374645, acc:0.5649664580444478


 61%|██████    | 26684/43738 [3:24:01<2:21:02,  2.02it/s]

step:4400, train_loss:0.08033197467056678, acc:0.5649827612052166


 61%|██████    | 26685/43738 [3:24:01<2:18:05,  2.06it/s]

step:4400, train_loss:0.08033196223670337, acc:0.564961588907626


 61%|██████    | 26686/43738 [3:24:02<2:14:30,  2.11it/s]

step:4400, train_loss:0.08032895257501824, acc:0.564977891029004


 61%|██████    | 26687/43738 [3:24:02<2:14:01,  2.12it/s]

step:4400, train_loss:0.08032594280246914, acc:0.5649941919286544


 62%|██████▏   | 26992/43738 [3:26:30<2:14:56,  2.07it/s]

step:4420, train_loss:0.08035327520589332, acc:0.5649451689389449


 62%|██████▏   | 26993/43738 [3:26:31<2:17:38,  2.03it/s]

step:4420, train_loss:0.08035829930519553, acc:0.5649242396176787


 62%|██████▏   | 26994/43738 [3:26:31<1:55:51,  2.41it/s]

step:4420, train_loss:0.08035574687598936, acc:0.5649403571163962


 62%|██████▏   | 26995/43738 [3:26:31<2:05:30,  2.22it/s]

step:4420, train_loss:0.08035380225781708, acc:0.5649564734210039


 62%|██████▏   | 26996/43738 [3:26:32<1:54:14,  2.44it/s]

step:4420, train_loss:0.08035553209938227, acc:0.5649355460068158


 62%|██████▏   | 26997/43738 [3:26:33<2:30:57,  1.85it/s]

step:4420, train_loss:0.08035260231015715, acc:0.5649516612956995


 62%|██████▏   | 26998/43738 [3:26:33<2:50:14,  1.64it/s]

step:4420, train_loss:0.08035001250630137, acc:0.5649677753907697


 62%|██████▏   | 26999/43738 [3:26:34<2:38:32,  1.76it/s]

step:4420, train_loss:0.0803470679034727, acc:0.564983888292159


 62%|██████▏   | 27000/43738 [3:26:34<2:37:59,  1.77it/s]

step:4420, train_loss:0.0803511388237476, acc:0.564962962962963


 62%|██████▏   | 27001/43738 [3:26:35<2:23:18,  1.95it/s]

step:4420, train_loss:0.08035653087929093, acc:0.564942039183734


 62%|██████▏   | 27002/43738 [3:26:35<2:17:22,  2.03it/s]

step:4420, train_loss:0.08035461649313895, acc:0.5649581512480557


 62%|██████▏   | 27003/43738 [3:26:36<2:13:19,  2.09it/s]

step:4420, train_loss:0.0803516710887541, acc:0.5649742621190238


 62%|██████▏   | 27004/43738 [3:26:36<2:15:30,  2.06it/s]

step:4420, train_loss:0.08035238355321804, acc:0.5649533402458895


 62%|██████▏   | 27005/43738 [3:26:36<2:06:52,  2.20it/s]

step:4420, train_loss:0.08035642083085831, acc:0.5649324199222366


 62%|██████▏   | 27006/43738 [3:26:37<2:13:33,  2.09it/s]

step:4420, train_loss:0.08035747668339763, acc:0.564911501147893


 62%|██████▏   | 27007/43738 [3:26:38<2:35:46,  1.79it/s]

step:4420, train_loss:0.08035579568930482, acc:0.5649276113600178


 62%|██████▏   | 27312/43738 [3:29:10<1:52:20,  2.44it/s]

step:4440, train_loss:0.0802994419424636, acc:0.565209431751611


 62%|██████▏   | 27313/43738 [3:29:11<1:38:29,  2.78it/s]

step:4440, train_loss:0.08029654332081336, acc:0.5652253505656647


 62%|██████▏   | 27314/43738 [3:29:11<1:42:28,  2.67it/s]

step:4440, train_loss:0.08029604211730224, acc:0.5652046569524786


 62%|██████▏   | 27315/43738 [3:29:12<1:48:48,  2.52it/s]

step:4440, train_loss:0.08029653312069943, acc:0.5651839648544755


 62%|██████▏   | 27316/43738 [3:29:12<1:45:50,  2.59it/s]

step:4440, train_loss:0.08029365587747901, acc:0.5651998828525406


 62%|██████▏   | 27317/43738 [3:29:12<1:52:09,  2.44it/s]

step:4440, train_loss:0.08029980421428677, acc:0.5651791924442655


 62%|██████▏   | 27318/43738 [3:29:13<1:52:00,  2.44it/s]

step:4440, train_loss:0.08029977813308715, acc:0.5651585035507724


 62%|██████▏   | 27319/43738 [3:29:14<2:21:21,  1.94it/s]

step:4440, train_loss:0.08029697646993547, acc:0.5651744207328233


 62%|██████▏   | 27320/43738 [3:29:14<2:21:10,  1.94it/s]

step:4440, train_loss:0.08029411285735123, acc:0.5651903367496339


 62%|██████▏   | 27321/43738 [3:29:15<2:17:27,  1.99it/s]

step:4440, train_loss:0.08029648412307587, acc:0.5651696497199956


 62%|██████▏   | 27322/43738 [3:29:15<1:57:51,  2.32it/s]

step:4440, train_loss:0.08029356783851825, acc:0.5651855647463583


 62%|██████▏   | 27323/43738 [3:29:15<2:01:54,  2.24it/s]

step:4440, train_loss:0.08029236692036781, acc:0.5652014786077664


 62%|██████▏   | 27324/43738 [3:29:16<1:53:50,  2.40it/s]

step:4440, train_loss:0.08028959486454307, acc:0.5652173913043478


 62%|██████▏   | 27325/43738 [3:29:16<1:58:31,  2.31it/s]

step:4440, train_loss:0.08029038445130655, acc:0.5651967063129003


 62%|██████▏   | 27326/43738 [3:29:17<1:59:18,  2.29it/s]

step:4440, train_loss:0.08028945862560559, acc:0.5652126180194686


 62%|██████▏   | 27327/43738 [3:29:17<1:44:02,  2.63it/s]

step:4440, train_loss:0.08028698236001083, acc:0.565228528561496


 63%|██████▎   | 27632/43738 [3:31:40<1:39:12,  2.71it/s]

step:4460, train_loss:0.08028179786605599, acc:0.564997104806022


 63%|██████▎   | 27633/43738 [3:31:40<1:40:35,  2.67it/s]

step:4460, train_loss:0.08028038157129397, acc:0.5650128469583469


 63%|██████▎   | 27634/43738 [3:31:41<2:00:41,  2.22it/s]

step:4460, train_loss:0.08028150380834562, acc:0.5649924006658464


 63%|██████▎   | 27635/43738 [3:31:42<2:32:42,  1.76it/s]

step:4460, train_loss:0.08028164811557939, acc:0.5649719558530849


 63%|██████▎   | 27636/43738 [3:31:42<2:27:43,  1.82it/s]

step:4460, train_loss:0.08028287042964033, acc:0.5649515125199016


 63%|██████▎   | 27637/43738 [3:31:42<2:00:55,  2.22it/s]

step:4460, train_loss:0.08028146362917508, acc:0.5649672540434925


 63%|██████▎   | 27638/43738 [3:31:43<2:08:05,  2.09it/s]

step:4460, train_loss:0.08027857688216528, acc:0.5649829944279615


 63%|██████▎   | 27639/43738 [3:31:43<2:03:15,  2.18it/s]

step:4460, train_loss:0.08027864137132319, acc:0.5649625529143601


 63%|██████▎   | 27640/43738 [3:31:44<2:05:17,  2.14it/s]

step:4460, train_loss:0.08027734166852446, acc:0.5649782923299566


 63%|██████▎   | 27641/43738 [3:31:44<2:15:28,  1.98it/s]

step:4460, train_loss:0.08027515839553062, acc:0.5649940306067074


 63%|██████▎   | 27642/43738 [3:31:45<2:21:56,  1.89it/s]

step:4460, train_loss:0.08027231378494344, acc:0.5650097677447363


 63%|██████▎   | 27643/43738 [3:31:46<2:26:31,  1.83it/s]

step:4460, train_loss:0.08026987241283262, acc:0.5650255037441667


 63%|██████▎   | 27644/43738 [3:31:46<2:19:36,  1.92it/s]

step:4460, train_loss:0.080269104168627, acc:0.5650050643901028


 63%|██████▎   | 27645/43738 [3:31:47<2:21:43,  1.89it/s]

step:4460, train_loss:0.08027005889273812, acc:0.5649846265147405


 63%|██████▎   | 27646/43738 [3:31:47<2:38:12,  1.70it/s]

step:4460, train_loss:0.0802775813485759, acc:0.5649641901179194


 63%|██████▎   | 27647/43738 [3:31:48<2:41:00,  1.67it/s]

step:4460, train_loss:0.08027493005364197, acc:0.5649799254892032


 64%|██████▍   | 27952/43738 [3:34:14<2:38:02,  1.66it/s]

step:4480, train_loss:0.08023423073268701, acc:0.5656124785346308


 64%|██████▍   | 27953/43738 [3:34:14<2:34:58,  1.70it/s]

step:4480, train_loss:0.08023190937006425, acc:0.5656280184595571


 64%|██████▍   | 27954/43738 [3:34:15<2:37:18,  1.67it/s]

step:4480, train_loss:0.0802314985045299, acc:0.5656435572726622


 64%|██████▍   | 27955/43738 [3:34:15<2:29:49,  1.76it/s]

step:4480, train_loss:0.08022924300058192, acc:0.5656590949740654


 64%|██████▍   | 27956/43738 [3:34:16<2:54:37,  1.51it/s]

step:4480, train_loss:0.08023445475774113, acc:0.5656388610673916


 64%|██████▍   | 27957/43738 [3:34:17<2:38:49,  1.66it/s]

step:4480, train_loss:0.08023159623291642, acc:0.5656543978252316


 64%|██████▍   | 27958/43738 [3:34:17<2:26:57,  1.79it/s]

step:4480, train_loss:0.08022922158791919, acc:0.5656699334716361


 64%|██████▍   | 27959/43738 [3:34:17<2:20:50,  1.87it/s]

step:4480, train_loss:0.08023002465683504, acc:0.565649701348403


 64%|██████▍   | 27960/43738 [3:34:18<2:06:50,  2.07it/s]

step:4480, train_loss:0.08023190974336038, acc:0.5656294706723891


 64%|██████▍   | 27961/43738 [3:34:18<1:51:46,  2.35it/s]

step:4480, train_loss:0.08023738268852833, acc:0.565609241443439


 64%|██████▍   | 27962/43738 [3:34:18<1:44:17,  2.52it/s]

step:4480, train_loss:0.08023483078947927, acc:0.565624776482369


 64%|██████▍   | 27963/43738 [3:34:19<1:58:44,  2.21it/s]

step:4480, train_loss:0.08023257659632435, acc:0.5656403104101849


 64%|██████▍   | 27964/43738 [3:34:20<1:59:01,  2.21it/s]

step:4480, train_loss:0.08022976084112952, acc:0.5656558432270061


 64%|██████▍   | 27965/43738 [3:34:20<1:43:56,  2.53it/s]

step:4480, train_loss:0.08022934920988131, acc:0.5656713749329519


 64%|██████▍   | 27966/43738 [3:34:20<1:48:28,  2.42it/s]

step:4480, train_loss:0.08022998817057304, acc:0.5656511478223557


 64%|██████▍   | 27967/43738 [3:34:20<1:37:04,  2.71it/s]

step:4480, train_loss:0.08022976732021904, acc:0.5656666785854757


 65%|██████▍   | 28272/43738 [3:36:46<2:17:44,  1.87it/s]

step:4500, train_loss:0.08020702544420116, acc:0.5656126202603282


 65%|██████▍   | 28273/43738 [3:36:46<1:56:20,  2.22it/s]

step:4500, train_loss:0.08020426417920436, acc:0.5656279842959714


 65%|██████▍   | 28274/43738 [3:36:46<1:41:29,  2.54it/s]

step:4500, train_loss:0.08020409700252301, acc:0.5656433472448186


 65%|██████▍   | 28275/43738 [3:36:46<1:29:22,  2.88it/s]

step:4500, train_loss:0.08020152114348397, acc:0.565658709106985


 65%|██████▍   | 28276/43738 [3:36:47<1:31:14,  2.82it/s]

step:4500, train_loss:0.08020220770464444, acc:0.5656387042014429


 65%|██████▍   | 28277/43738 [3:36:48<2:08:35,  2.00it/s]

step:4500, train_loss:0.0801997041229901, acc:0.5656540651412809


 65%|██████▍   | 28278/43738 [3:36:48<2:00:13,  2.14it/s]

step:4500, train_loss:0.08019740962468315, acc:0.5656694249946955


 65%|██████▍   | 28279/43738 [3:36:48<1:51:41,  2.31it/s]

step:4500, train_loss:0.08019887653539864, acc:0.5656494218324551


 65%|██████▍   | 28280/43738 [3:36:49<1:36:26,  2.67it/s]

step:4500, train_loss:0.08019604176942391, acc:0.5656647807637907


 65%|██████▍   | 28281/43738 [3:36:49<1:28:29,  2.91it/s]

step:4500, train_loss:0.08019357492343877, acc:0.56568013860896


 65%|██████▍   | 28282/43738 [3:36:49<1:33:32,  2.75it/s]

step:4500, train_loss:0.08019131785447216, acc:0.5656954953680786


 65%|██████▍   | 28283/43738 [3:36:50<2:00:40,  2.13it/s]

step:4500, train_loss:0.08018959729945593, acc:0.5657108510412615


 65%|██████▍   | 28284/43738 [3:36:50<1:56:49,  2.20it/s]

step:4500, train_loss:0.08019237830490844, acc:0.565690849950502


 65%|██████▍   | 28285/43738 [3:36:51<1:55:57,  2.22it/s]

step:4500, train_loss:0.08019285656479722, acc:0.5656708502739968


 65%|██████▍   | 28286/43738 [3:36:51<1:48:25,  2.38it/s]

step:4500, train_loss:0.08019002167261141, acc:0.5656862051898466


 65%|██████▍   | 28287/43738 [3:36:52<2:04:30,  2.07it/s]

step:4500, train_loss:0.08019088763080359, acc:0.5656662070915969


 65%|██████▌   | 28592/43738 [3:39:16<2:24:58,  1.74it/s]

step:4520, train_loss:0.08021061028160005, acc:0.5653679350867375


 65%|██████▌   | 28593/43738 [3:39:16<2:17:43,  1.83it/s]

step:4520, train_loss:0.08020841108986525, acc:0.565383135732522


 65%|██████▌   | 28594/43738 [3:39:17<2:01:57,  2.07it/s]

step:4520, train_loss:0.08020662245474519, acc:0.5653983353151011


 65%|██████▌   | 28595/43738 [3:39:17<2:27:35,  1.71it/s]

step:4520, train_loss:0.08020456745339138, acc:0.5654135338345865


 65%|██████▌   | 28596/43738 [3:39:18<2:48:42,  1.50it/s]

step:4520, train_loss:0.08020223246164579, acc:0.5654287312910896


 65%|██████▌   | 28597/43738 [3:39:19<2:26:39,  1.72it/s]

step:4520, train_loss:0.08020000506360891, acc:0.5654439276847222


 65%|██████▌   | 28598/43738 [3:39:19<2:21:22,  1.78it/s]

step:4520, train_loss:0.0801975321611288, acc:0.5654591230155955


 65%|██████▌   | 28599/43738 [3:39:20<2:21:44,  1.78it/s]

step:4520, train_loss:0.08019946165320346, acc:0.5654393510262596


 65%|██████▌   | 28600/43738 [3:39:20<1:57:58,  2.14it/s]

step:4520, train_loss:0.08019668296346046, acc:0.5654545454545454


 65%|██████▌   | 28601/43738 [3:39:20<1:58:23,  2.13it/s]

step:4520, train_loss:0.08019705072361322, acc:0.5654347750078669


 65%|██████▌   | 28602/43738 [3:39:21<2:01:57,  2.07it/s]

step:4520, train_loss:0.08019811004682374, acc:0.5654150059436404


 65%|██████▌   | 28603/43738 [3:39:21<1:58:20,  2.13it/s]

step:4520, train_loss:0.08019530750593866, acc:0.5654301996294095


 65%|██████▌   | 28604/43738 [3:39:22<2:16:33,  1.85it/s]

step:4520, train_loss:0.08019600699731387, acc:0.5654104321073976


 65%|██████▌   | 28605/43738 [3:39:22<1:55:14,  2.19it/s]

step:4520, train_loss:0.08019419977509354, acc:0.5654256248907533


 65%|██████▌   | 28606/43738 [3:39:23<2:02:50,  2.05it/s]

step:4520, train_loss:0.0802007552287551, acc:0.565405858910718


 65%|██████▌   | 28607/43738 [3:39:23<1:49:05,  2.31it/s]

step:4520, train_loss:0.08020030161683554, acc:0.5653860943125808


 66%|██████▌   | 28912/43738 [3:41:45<1:59:05,  2.07it/s]

step:4540, train_loss:0.08019150506627809, acc:0.5654745434421693


 66%|██████▌   | 28913/43738 [3:41:45<1:47:25,  2.30it/s]

step:4540, train_loss:0.08019104036537046, acc:0.5654549856465949


 66%|██████▌   | 28914/43738 [3:41:46<1:48:13,  2.28it/s]

step:4540, train_loss:0.08019121045700697, acc:0.5654700145258352


 66%|██████▌   | 28915/43738 [3:41:46<1:42:27,  2.41it/s]

step:4540, train_loss:0.08018863448039473, acc:0.5654850423655542


 66%|██████▌   | 28916/43738 [3:41:46<1:30:01,  2.74it/s]

step:4540, train_loss:0.0801887667103362, acc:0.5655000691658597


 66%|██████▌   | 28917/43738 [3:41:47<1:38:31,  2.51it/s]

step:4540, train_loss:0.08019220696315182, acc:0.5654805131929315


 66%|██████▌   | 28918/43738 [3:41:47<1:36:25,  2.56it/s]

step:4540, train_loss:0.08018943608511118, acc:0.5654955391105886


 66%|██████▌   | 28919/43738 [3:41:47<1:41:42,  2.43it/s]

step:4540, train_loss:0.08018760680695886, acc:0.565510563989073


 66%|██████▌   | 28920/43738 [3:41:48<2:08:57,  1.92it/s]

step:4540, train_loss:0.08019669581495328, acc:0.565491009681881


 66%|██████▌   | 28921/43738 [3:41:49<2:12:27,  1.86it/s]

step:4540, train_loss:0.08019624326494494, acc:0.5655060336779503


 66%|██████▌   | 28922/43738 [3:41:49<2:04:39,  1.98it/s]

step:4540, train_loss:0.08020009968320392, acc:0.5654864808796072


 66%|██████▌   | 28923/43738 [3:41:50<2:01:32,  2.03it/s]

step:4540, train_loss:0.08020186075512734, acc:0.565466929433323


 66%|██████▌   | 28924/43738 [3:41:50<2:15:41,  1.82it/s]

step:4540, train_loss:0.08020069636993957, acc:0.5654819527036371


 66%|██████▌   | 28925/43738 [3:41:51<2:25:11,  1.70it/s]

step:4540, train_loss:0.08019900881555922, acc:0.5654969749351771


 66%|██████▌   | 28926/43738 [3:41:52<2:33:42,  1.61it/s]

step:4540, train_loss:0.08020401746211905, acc:0.5654774251538408


 66%|██████▌   | 28927/43738 [3:41:52<2:23:08,  1.72it/s]

step:4540, train_loss:0.08020248567791867, acc:0.5654924465032668


 67%|██████▋   | 29232/43738 [3:44:13<2:30:54,  1.60it/s]

step:4560, train_loss:0.08015200545857709, acc:0.5654761904761905


 67%|██████▋   | 29233/43738 [3:44:13<2:14:35,  1.80it/s]

step:4560, train_loss:0.08015191326179853, acc:0.5654568467143297


 67%|██████▋   | 29234/43738 [3:44:14<2:13:01,  1.82it/s]

step:4560, train_loss:0.08014961054263123, acc:0.5654717110214135


 67%|██████▋   | 29235/43738 [3:44:14<1:52:41,  2.15it/s]

step:4560, train_loss:0.08014815297546277, acc:0.5654865743116128


 67%|██████▋   | 29236/43738 [3:44:15<2:11:50,  1.83it/s]

step:4560, train_loss:0.08015553384766826, acc:0.5654672321795047


 67%|██████▋   | 29237/43738 [3:44:15<1:53:37,  2.13it/s]

step:4560, train_loss:0.08015584315401253, acc:0.5654478913705236


 67%|██████▋   | 29238/43738 [3:44:16<2:14:53,  1.79it/s]

step:4560, train_loss:0.08015941837488019, acc:0.5654285518845338


 67%|██████▋   | 29239/43738 [3:44:16<1:55:36,  2.09it/s]

step:4560, train_loss:0.08016107639241282, acc:0.5654092137213995


 67%|██████▋   | 29240/43738 [3:44:16<1:37:27,  2.48it/s]

step:4560, train_loss:0.0801583398990009, acc:0.5654240766073871


 67%|██████▋   | 29241/43738 [3:44:17<1:55:30,  2.09it/s]

step:4560, train_loss:0.08016644117580035, acc:0.5654047399199754


 67%|██████▋   | 29242/43738 [3:44:18<2:02:42,  1.97it/s]

step:4560, train_loss:0.08016893845601436, acc:0.565385404555092


 67%|██████▋   | 29243/43738 [3:44:18<1:48:40,  2.22it/s]

step:4560, train_loss:0.08016639676169961, acc:0.5654002667304996


 67%|██████▋   | 29244/43738 [3:44:18<1:37:05,  2.49it/s]

step:4560, train_loss:0.08016366528580068, acc:0.5654151278894816


 67%|██████▋   | 29245/43738 [3:44:18<1:26:36,  2.79it/s]

step:4560, train_loss:0.08016371517118337, acc:0.5653957941528467


 67%|██████▋   | 29246/43738 [3:44:19<1:18:21,  3.08it/s]

step:4560, train_loss:0.08016469733386485, acc:0.5653764617383574


 67%|██████▋   | 29247/43738 [3:44:19<1:10:47,  3.41it/s]

step:4560, train_loss:0.08016384404622075, acc:0.5653571306458782


 68%|██████▊   | 29552/43738 [3:46:37<1:33:28,  2.53it/s]

step:4580, train_loss:0.08014849388108493, acc:0.5653086085544126


 68%|██████▊   | 29553/43738 [3:46:37<1:38:20,  2.40it/s]

step:4580, train_loss:0.08014956500074612, acc:0.5652894799174365


 68%|██████▊   | 29554/43738 [3:46:38<1:40:26,  2.35it/s]

step:4580, train_loss:0.08014910027739534, acc:0.5652703525749475


 68%|██████▊   | 29555/43738 [3:46:38<1:40:28,  2.35it/s]

step:4580, train_loss:0.08014784302925684, acc:0.565285061749281


 68%|██████▊   | 29556/43738 [3:46:39<1:51:22,  2.12it/s]

step:4580, train_loss:0.08015148787279726, acc:0.5652659358505887


 68%|██████▊   | 29557/43738 [3:46:39<2:09:34,  1.82it/s]

step:4580, train_loss:0.08015175031447667, acc:0.5652468112460669


 68%|██████▊   | 29558/43738 [3:46:40<2:03:52,  1.91it/s]

step:4580, train_loss:0.08014921640074558, acc:0.5652615197239326


 68%|██████▊   | 29559/43738 [3:46:40<1:45:04,  2.25it/s]

step:4580, train_loss:0.08014771785058451, acc:0.5652762272066038


 68%|██████▊   | 29560/43738 [3:46:40<1:31:29,  2.58it/s]

step:4580, train_loss:0.08016199468019589, acc:0.5652571041948579


 68%|██████▊   | 29561/43738 [3:46:41<1:45:34,  2.24it/s]

step:4580, train_loss:0.08015978259301279, acc:0.5652718108318392


 68%|██████▊   | 29562/43738 [3:46:41<1:31:26,  2.58it/s]

step:4580, train_loss:0.08015758910954854, acc:0.5652865164738515


 68%|██████▊   | 29563/43738 [3:46:42<1:19:52,  2.96it/s]

step:4580, train_loss:0.0801550762702548, acc:0.5653012211209958


 68%|██████▊   | 29564/43738 [3:46:42<1:25:46,  2.75it/s]

step:4580, train_loss:0.08015537949854624, acc:0.5652820998511704


 68%|██████▊   | 29565/43738 [3:46:42<1:24:55,  2.78it/s]

step:4580, train_loss:0.0801535790010616, acc:0.5652968036529681


 68%|██████▊   | 29566/43738 [3:46:43<1:16:26,  3.09it/s]

step:4580, train_loss:0.08015130042364328, acc:0.5653115064601231


 68%|██████▊   | 29567/43738 [3:46:43<1:19:28,  2.97it/s]

step:4580, train_loss:0.0801492529844806, acc:0.5653262082727365


 68%|██████▊   | 29872/43738 [3:49:01<1:50:52,  2.08it/s]

step:4600, train_loss:0.0800912560951208, acc:0.5655798071772897


 68%|██████▊   | 29873/43738 [3:49:02<1:59:55,  1.93it/s]

step:4600, train_loss:0.08009033185738772, acc:0.565594349412513


 68%|██████▊   | 29874/43738 [3:49:02<1:55:38,  2.00it/s]

step:4600, train_loss:0.08009092477376943, acc:0.5655754167503515


 68%|██████▊   | 29875/43738 [3:49:02<1:40:17,  2.30it/s]

step:4600, train_loss:0.0800899234322079, acc:0.5655564853556485


 68%|██████▊   | 29876/43738 [3:49:03<1:59:13,  1.94it/s]

step:4600, train_loss:0.08009101837987716, acc:0.5655375552282769


 68%|██████▊   | 29877/43738 [3:49:04<2:07:27,  1.81it/s]

step:4600, train_loss:0.08009163710686278, acc:0.5655186263681092


 68%|██████▊   | 29878/43738 [3:49:05<2:25:03,  1.59it/s]

step:4600, train_loss:0.08008927074978289, acc:0.5655331682174175


 68%|██████▊   | 29879/43738 [3:49:05<2:33:17,  1.51it/s]

step:4600, train_loss:0.08009095294122742, acc:0.5655142407711101


 68%|██████▊   | 29880/43738 [3:49:06<2:44:18,  1.41it/s]

step:4600, train_loss:0.08009012492844897, acc:0.565528781793842


 68%|██████▊   | 29881/43738 [3:49:07<2:28:52,  1.55it/s]

step:4600, train_loss:0.08009310491967322, acc:0.5655098557611861


 68%|██████▊   | 29882/43738 [3:49:08<2:41:26,  1.43it/s]

step:4600, train_loss:0.08009046071939786, acc:0.5655243959574325


 68%|██████▊   | 29883/43738 [3:49:08<2:24:09,  1.60it/s]

step:4600, train_loss:0.08009338325191699, acc:0.565505471338219


 68%|██████▊   | 29884/43738 [3:49:08<2:06:33,  1.82it/s]

step:4600, train_loss:0.0800911527773759, acc:0.5655200107080712


 68%|██████▊   | 29885/43738 [3:49:09<1:54:14,  2.02it/s]

step:4600, train_loss:0.08009240405743469, acc:0.5655010875020914


 68%|██████▊   | 29886/43738 [3:49:09<1:36:12,  2.40it/s]

step:4600, train_loss:0.08008972688465363, acc:0.5655156260456401


 68%|██████▊   | 29887/43738 [3:49:09<1:40:52,  2.29it/s]

step:4600, train_loss:0.08009133180677899, acc:0.5654967042526852


 69%|██████▉   | 30192/43738 [3:51:32<1:47:23,  2.10it/s]

step:4620, train_loss:0.08000430289692238, acc:0.5658121356650768


 69%|██████▉   | 30193/43738 [3:51:33<2:08:34,  1.76it/s]

step:4620, train_loss:0.08000501372180238, acc:0.5657933958202233


 69%|██████▉   | 30194/43738 [3:51:33<1:59:38,  1.89it/s]

step:4620, train_loss:0.0800116377762431, acc:0.5657746572166655


 69%|██████▉   | 30195/43738 [3:51:34<1:50:39,  2.04it/s]

step:4620, train_loss:0.08001413122580754, acc:0.5657559198542805


 69%|██████▉   | 30196/43738 [3:51:34<1:49:55,  2.05it/s]

step:4620, train_loss:0.0800120906523006, acc:0.5657703007020798


 69%|██████▉   | 30197/43738 [3:51:35<1:38:13,  2.30it/s]

step:4620, train_loss:0.08001099268031468, acc:0.5657515647249727


 69%|██████▉   | 30198/43738 [3:51:35<1:30:05,  2.50it/s]

step:4620, train_loss:0.08000950557875859, acc:0.565765944764554


 69%|██████▉   | 30199/43738 [3:51:35<1:30:57,  2.48it/s]

step:4620, train_loss:0.08000707417641932, acc:0.5657803238517831


 69%|██████▉   | 30200/43738 [3:51:36<1:32:15,  2.45it/s]

step:4620, train_loss:0.08000617762641489, acc:0.565794701986755


 69%|██████▉   | 30201/43738 [3:51:36<1:27:17,  2.58it/s]

step:4620, train_loss:0.08000389235552165, acc:0.5658090791695639


 69%|██████▉   | 30202/43738 [3:51:36<1:27:27,  2.58it/s]

step:4620, train_loss:0.08000125705993706, acc:0.5658234554003047


 69%|██████▉   | 30203/43738 [3:51:37<1:49:55,  2.05it/s]

step:4620, train_loss:0.07999891311094212, acc:0.5658378306790716


 69%|██████▉   | 30204/43738 [3:51:38<1:54:20,  1.97it/s]

step:4620, train_loss:0.08000475295308516, acc:0.5658190968083697


 69%|██████▉   | 30205/43738 [3:51:38<1:50:06,  2.05it/s]

step:4620, train_loss:0.08000452416410199, acc:0.5658334712795895


 69%|██████▉   | 30206/43738 [3:51:39<1:44:58,  2.15it/s]

step:4620, train_loss:0.08000246267052338, acc:0.5658478447990466


 69%|██████▉   | 30207/43738 [3:51:39<1:29:37,  2.52it/s]

step:4620, train_loss:0.07999982188483887, acc:0.5658622173668355


 70%|██████▉   | 30512/43738 [3:53:58<1:41:51,  2.16it/s]

step:4640, train_loss:0.07992530037463066, acc:0.5663017829050865


 70%|██████▉   | 30513/43738 [3:53:58<1:37:20,  2.26it/s]

step:4640, train_loss:0.07992268191438175, acc:0.566315996460525


 70%|██████▉   | 30514/43738 [3:53:59<1:25:44,  2.57it/s]

step:4640, train_loss:0.07992006326267669, acc:0.5663302090843547


 70%|██████▉   | 30515/43738 [3:53:59<1:23:42,  2.63it/s]

step:4640, train_loss:0.07992087705376151, acc:0.5663116500081927


 70%|██████▉   | 30516/43738 [3:53:59<1:36:29,  2.28it/s]

step:4640, train_loss:0.07992493162169843, acc:0.5662930921483812


 70%|██████▉   | 30517/43738 [3:54:00<1:32:11,  2.39it/s]

step:4640, train_loss:0.07992685050748027, acc:0.5662745355048006


 70%|██████▉   | 30518/43738 [3:54:00<1:19:54,  2.76it/s]

step:4640, train_loss:0.07992424969570897, acc:0.5662887476243529


 70%|██████▉   | 30519/43738 [3:54:01<1:25:22,  2.58it/s]

step:4640, train_loss:0.07992165807153452, acc:0.566302958812543


 70%|██████▉   | 30520/43738 [3:54:01<1:28:04,  2.50it/s]

step:4640, train_loss:0.0799195003523013, acc:0.5663171690694626


 70%|██████▉   | 30521/43738 [3:54:01<1:18:24,  2.81it/s]

step:4640, train_loss:0.07991763329980686, acc:0.5663313783952033


 70%|██████▉   | 30522/43738 [3:54:02<1:23:22,  2.64it/s]

step:4640, train_loss:0.079915135291844, acc:0.5663455867898565


 70%|██████▉   | 30523/43738 [3:54:02<1:26:50,  2.54it/s]

step:4640, train_loss:0.07992155943178204, acc:0.5663270320741736


 70%|██████▉   | 30524/43738 [3:54:02<1:16:41,  2.87it/s]

step:4640, train_loss:0.0799198624109293, acc:0.5663412396802516


 70%|██████▉   | 30525/43738 [3:54:03<1:42:33,  2.15it/s]

step:4640, train_loss:0.07991857197003921, acc:0.5663554463554463


 70%|██████▉   | 30526/43738 [3:54:04<1:46:01,  2.08it/s]

step:4640, train_loss:0.07992018961904104, acc:0.5663368931402739


 70%|██████▉   | 30527/43738 [3:54:04<1:28:46,  2.48it/s]

step:4640, train_loss:0.07991761783369268, acc:0.5663510990270908


 70%|███████   | 30832/43738 [3:56:26<1:30:47,  2.37it/s]

step:4660, train_loss:0.07991871698778866, acc:0.5664244940321743


 70%|███████   | 30833/43738 [3:56:26<1:32:48,  2.32it/s]

step:4660, train_loss:0.07991731230080043, acc:0.5664385560924983


 70%|███████   | 30834/43738 [3:56:27<1:37:57,  2.20it/s]

step:4660, train_loss:0.07991870095716701, acc:0.5664201855095025


 70%|███████   | 30835/43738 [3:56:27<1:42:37,  2.10it/s]

step:4660, train_loss:0.07991641947981815, acc:0.5664342467974705


 71%|███████   | 30836/43738 [3:56:28<1:48:21,  1.98it/s]

step:4660, train_loss:0.07992087158873833, acc:0.5664158775457258


 71%|███████   | 30837/43738 [3:56:28<1:33:36,  2.30it/s]

step:4660, train_loss:0.07992220786782377, acc:0.5663975094853585


 71%|███████   | 30838/43738 [3:56:29<1:57:05,  1.84it/s]

step:4660, train_loss:0.07992115837022795, acc:0.5664115701407355


 71%|███████   | 30839/43738 [3:56:29<1:43:01,  2.09it/s]

step:4660, train_loss:0.07991942815346635, acc:0.5664256298842375


 71%|███████   | 30840/43738 [3:56:29<1:26:39,  2.48it/s]

step:4660, train_loss:0.07991683703478845, acc:0.5664396887159533


 71%|███████   | 30841/43738 [3:56:30<1:26:08,  2.50it/s]

step:4660, train_loss:0.07991425565652244, acc:0.5664537466359716


 71%|███████   | 30842/43738 [3:56:30<1:40:01,  2.15it/s]

step:4660, train_loss:0.07991288131747111, acc:0.566467803644381


 71%|███████   | 30843/43738 [3:56:31<1:40:09,  2.15it/s]

step:4660, train_loss:0.07991789179116364, acc:0.566449437473657


 71%|███████   | 30844/43738 [3:56:31<1:37:13,  2.21it/s]

step:4660, train_loss:0.07991595459821069, acc:0.566463493710284


 71%|███████   | 30845/43738 [3:56:32<1:32:57,  2.31it/s]

step:4660, train_loss:0.07991576715617864, acc:0.5664451288701572


 71%|███████   | 30846/43738 [3:56:32<1:43:49,  2.07it/s]

step:4660, train_loss:0.07991381366599848, acc:0.566459184335084


 71%|███████   | 30847/43738 [3:56:32<1:33:16,  2.30it/s]

step:4660, train_loss:0.07991145177265134, acc:0.5664732388887088


 71%|███████   | 31152/43738 [3:58:55<2:08:45,  1.63it/s]

step:4680, train_loss:0.07990969486652005, acc:0.5666088854648177


 71%|███████   | 31153/43738 [3:58:55<1:54:59,  1.82it/s]

step:4680, train_loss:0.07990771659861275, acc:0.5666227971623921


 71%|███████   | 31154/43738 [3:58:56<1:47:47,  1.95it/s]

step:4680, train_loss:0.0799081383599304, acc:0.5666046093599538


 71%|███████   | 31155/43738 [3:58:56<1:31:33,  2.29it/s]

step:4680, train_loss:0.07990686996362434, acc:0.5666185203017172


 71%|███████   | 31156/43738 [3:58:56<1:31:29,  2.29it/s]

step:4680, train_loss:0.07990547083074051, acc:0.5666324303504943


 71%|███████   | 31157/43738 [3:58:57<1:27:53,  2.39it/s]

step:4680, train_loss:0.07990308925714844, acc:0.566646339506371


 71%|███████   | 31158/43738 [3:58:57<1:21:31,  2.57it/s]

step:4680, train_loss:0.07990189426295498, acc:0.5666602477694332


 71%|███████   | 31159/43738 [3:58:57<1:22:47,  2.53it/s]

step:4680, train_loss:0.07990136295669145, acc:0.5666420616836227


 71%|███████   | 31160/43738 [3:58:58<1:19:28,  2.64it/s]

step:4680, train_loss:0.07990040109668321, acc:0.5666559691912708


 71%|███████   | 31161/43738 [3:58:58<1:22:35,  2.54it/s]

step:4680, train_loss:0.07989801635123753, acc:0.5666698758062964


 71%|███████   | 31162/43738 [3:58:59<1:25:53,  2.44it/s]

step:4680, train_loss:0.07989946993263176, acc:0.5666516911623131


 71%|███████   | 31163/43738 [3:58:59<1:26:28,  2.42it/s]

step:4680, train_loss:0.07989799589030436, acc:0.5666655970221095


 71%|███████▏  | 31164/43738 [3:59:00<1:38:08,  2.14it/s]

step:4680, train_loss:0.07990334834028795, acc:0.5666474136824541


 71%|███████▏  | 31165/43738 [3:59:00<1:45:15,  1.99it/s]

step:4680, train_loss:0.07990706481280124, acc:0.5666292315097065


 71%|███████▏  | 31166/43738 [3:59:01<1:57:29,  1.78it/s]

step:4680, train_loss:0.0799128773881331, acc:0.566611050503754


 71%|███████▏  | 31167/43738 [3:59:01<1:55:27,  1.81it/s]

step:4680, train_loss:0.07991429088859829, acc:0.5665928706644848


 72%|███████▏  | 31472/43738 [4:01:19<1:55:49,  1.77it/s]

step:4700, train_loss:0.07995295953882552, acc:0.56605871886121


 72%|███████▏  | 31473/43738 [4:01:19<1:37:59,  2.09it/s]

step:4700, train_loss:0.0799509600119362, acc:0.5660725065929527


 72%|███████▏  | 31474/43738 [4:01:19<1:42:12,  2.00it/s]

step:4700, train_loss:0.07994846170111719, acc:0.5660862934485608


 72%|███████▏  | 31475/43738 [4:01:20<1:40:11,  2.04it/s]

step:4700, train_loss:0.07994682948239114, acc:0.5661000794281176


 72%|███████▏  | 31476/43738 [4:01:20<1:34:54,  2.15it/s]

step:4700, train_loss:0.07994483427726419, acc:0.5661138645317066


 72%|███████▏  | 31477/43738 [4:01:21<1:47:26,  1.90it/s]

step:4700, train_loss:0.07994559641647947, acc:0.5660958795310862


 72%|███████▏  | 31478/43738 [4:01:21<1:37:14,  2.10it/s]

step:4700, train_loss:0.07994358416460096, acc:0.5661096638922422


 72%|███████▏  | 31479/43738 [4:01:22<1:40:20,  2.04it/s]

step:4700, train_loss:0.0799410808205886, acc:0.5661234473776168


 72%|███████▏  | 31480/43738 [4:01:22<1:25:27,  2.39it/s]

step:4700, train_loss:0.07993859894657598, acc:0.5661372299872935


 72%|███████▏  | 31481/43738 [4:01:23<1:33:10,  2.19it/s]

step:4700, train_loss:0.07993616186449563, acc:0.5661510117213557


 72%|███████▏  | 31482/43738 [4:01:23<1:25:21,  2.39it/s]

step:4700, train_loss:0.07993476695969867, acc:0.5661647925798869


 72%|███████▏  | 31483/43738 [4:01:23<1:26:27,  2.36it/s]

step:4700, train_loss:0.0799336177767305, acc:0.5661785725629704


 72%|███████▏  | 31484/43738 [4:01:24<1:39:04,  2.06it/s]

step:4700, train_loss:0.07993141608601324, acc:0.5661923516706898


 72%|███████▏  | 31485/43738 [4:01:24<1:32:32,  2.21it/s]

step:4700, train_loss:0.07993033136653518, acc:0.5662061299031285


 72%|███████▏  | 31486/43738 [4:01:25<1:28:55,  2.30it/s]

step:4700, train_loss:0.0799288180959076, acc:0.5662199072603696


 72%|███████▏  | 31487/43738 [4:01:25<1:36:54,  2.11it/s]

step:4700, train_loss:0.07992965528651984, acc:0.5662336837424969


 73%|███████▎  | 31792/43738 [4:03:45<1:26:27,  2.30it/s]

step:4720, train_loss:0.07984384209253571, acc:0.5669979869149472


 73%|███████▎  | 31793/43738 [4:03:45<1:27:56,  2.26it/s]

step:4720, train_loss:0.07985290245620158, acc:0.566980152863838


 73%|███████▎  | 31794/43738 [4:03:46<1:48:10,  1.84it/s]

step:4720, train_loss:0.07985153714840505, acc:0.5669937724098887


 73%|███████▎  | 31795/43738 [4:03:47<1:43:52,  1.92it/s]

step:4720, train_loss:0.07985326658764941, acc:0.5669759396131467


 73%|███████▎  | 31796/43738 [4:03:47<1:27:30,  2.27it/s]

step:4720, train_loss:0.07985082468035869, acc:0.5669895584350233


 73%|███████▎  | 31797/43738 [4:03:47<1:32:55,  2.14it/s]

step:4720, train_loss:0.07985398588499816, acc:0.5669717268924741


 73%|███████▎  | 31798/43738 [4:03:48<1:51:45,  1.78it/s]

step:4720, train_loss:0.07985498028690019, acc:0.5669538964714762


 73%|███████▎  | 31799/43738 [4:03:49<1:46:44,  1.86it/s]

step:4720, train_loss:0.0798548768531827, acc:0.5669360671719237


 73%|███████▎  | 31800/43738 [4:03:49<1:37:13,  2.05it/s]

step:4720, train_loss:0.07985242616975535, acc:0.5669496855345912


 73%|███████▎  | 31801/43738 [4:03:50<1:48:38,  1.83it/s]

step:4720, train_loss:0.0798509278668731, acc:0.5669633030407849


 73%|███████▎  | 31802/43738 [4:03:50<1:35:47,  2.08it/s]

step:4720, train_loss:0.07985339334882154, acc:0.5669454751273505


 73%|███████▎  | 31803/43738 [4:03:50<1:36:42,  2.06it/s]

step:4720, train_loss:0.07985536429011982, acc:0.5669276483350627


 73%|███████▎  | 31804/43738 [4:03:51<1:27:28,  2.27it/s]

step:4720, train_loss:0.07985533879791669, acc:0.5669412652496542


 73%|███████▎  | 31805/43738 [4:03:51<1:17:05,  2.58it/s]

step:4720, train_loss:0.07985339472331132, acc:0.5669548813079704


 73%|███████▎  | 31806/43738 [4:03:51<1:10:36,  2.82it/s]

step:4720, train_loss:0.07985089857660306, acc:0.5669684965100924


 73%|███████▎  | 31807/43738 [4:03:52<1:11:51,  2.77it/s]

step:4720, train_loss:0.07985114485260009, acc:0.5669506712358915


 73%|███████▎  | 32112/43738 [4:06:18<1:34:08,  2.06it/s]

step:4740, train_loss:0.07988649062867846, acc:0.5665171898355755


 73%|███████▎  | 32113/43738 [4:06:19<1:24:07,  2.30it/s]

step:4740, train_loss:0.07988452867912707, acc:0.5665306885062125


 73%|███████▎  | 32114/43738 [4:06:19<1:17:53,  2.49it/s]

step:4740, train_loss:0.07988813174316783, acc:0.5665130472691038


 73%|███████▎  | 32115/43738 [4:06:19<1:17:49,  2.49it/s]

step:4740, train_loss:0.07988841064717027, acc:0.5664954071306243


 73%|███████▎  | 32116/43738 [4:06:20<1:34:38,  2.05it/s]

step:4740, train_loss:0.07989079542848561, acc:0.5664777680906713


 73%|███████▎  | 32117/43738 [4:06:20<1:27:21,  2.22it/s]

step:4740, train_loss:0.07989099222986402, acc:0.5664601301491422


 73%|███████▎  | 32118/43738 [4:06:21<1:15:47,  2.56it/s]

step:4740, train_loss:0.07988903945863259, acc:0.566473628494925


 73%|███████▎  | 32119/43738 [4:06:21<1:12:23,  2.67it/s]

step:4740, train_loss:0.0798886508877694, acc:0.5664871260001868


 73%|███████▎  | 32120/43738 [4:06:21<1:12:43,  2.66it/s]

step:4740, train_loss:0.07988616663010005, acc:0.5665006226650062


 73%|███████▎  | 32121/43738 [4:06:22<1:41:38,  1.90it/s]

step:4740, train_loss:0.07988739703017768, acc:0.5664829862083995


 73%|███████▎  | 32122/43738 [4:06:23<1:32:25,  2.09it/s]

step:4740, train_loss:0.0798859405952368, acc:0.5664964821617583


 73%|███████▎  | 32123/43738 [4:06:23<1:38:04,  1.97it/s]

step:4740, train_loss:0.07988386984793651, acc:0.5665099772748498


 73%|███████▎  | 32124/43738 [4:06:24<1:35:07,  2.03it/s]

step:4740, train_loss:0.07988498626133118, acc:0.5664923421740754


 73%|███████▎  | 32125/43738 [4:06:24<1:29:43,  2.16it/s]

step:4740, train_loss:0.07988316659012026, acc:0.5665058365758755


 73%|███████▎  | 32126/43738 [4:06:24<1:23:48,  2.31it/s]

step:4740, train_loss:0.07988410877753542, acc:0.5664882027018614


 73%|███████▎  | 32127/43738 [4:06:25<1:22:42,  2.34it/s]

step:4740, train_loss:0.07988732550637775, acc:0.5664705699256077


 74%|███████▍  | 32432/43738 [4:08:43<1:28:45,  2.12it/s]

step:4760, train_loss:0.07984085198995873, acc:0.5668167242229897


 74%|███████▍  | 32433/43738 [4:08:44<1:17:27,  2.43it/s]

step:4760, train_loss:0.0798465515713779, acc:0.5667992476798323


 74%|███████▍  | 32434/43738 [4:08:44<1:19:42,  2.36it/s]

step:4760, train_loss:0.07985709688922656, acc:0.566781772214343


 74%|███████▍  | 32435/43738 [4:08:45<1:33:24,  2.02it/s]

step:4760, train_loss:0.0798555958238182, acc:0.5667951287189764


 74%|███████▍  | 32436/43738 [4:08:45<1:20:43,  2.33it/s]

step:4760, train_loss:0.07985558199797002, acc:0.5667776544580096


 74%|███████▍  | 32437/43738 [4:08:46<1:22:24,  2.29it/s]

step:4760, train_loss:0.07985368601073556, acc:0.5667910102660542


 74%|███████▍  | 32438/43738 [4:08:46<1:27:25,  2.15it/s]

step:4760, train_loss:0.07985626129725595, acc:0.5667735372094457


 74%|███████▍  | 32439/43738 [4:08:47<1:41:39,  1.85it/s]

step:4760, train_loss:0.07985916682755194, acc:0.5667560652301242


 74%|███████▍  | 32440/43738 [4:08:48<1:53:44,  1.66it/s]

step:4760, train_loss:0.07986159895017375, acc:0.5667385943279901


 74%|███████▍  | 32441/43738 [4:08:48<1:45:36,  1.78it/s]

step:4760, train_loss:0.0798594774952746, acc:0.5667519496932893


 74%|███████▍  | 32442/43738 [4:08:49<1:43:10,  1.82it/s]

step:4760, train_loss:0.07985750948335667, acc:0.5667653042352506


 74%|███████▍  | 32443/43738 [4:08:49<1:37:12,  1.94it/s]

step:4760, train_loss:0.07985624459246096, acc:0.56677865795395


 74%|███████▍  | 32444/43738 [4:08:49<1:37:32,  1.93it/s]

step:4760, train_loss:0.0798539287608289, acc:0.5667920108494637


 74%|███████▍  | 32445/43738 [4:08:50<1:29:16,  2.11it/s]

step:4760, train_loss:0.07985164074415839, acc:0.5668053629218678


 74%|███████▍  | 32446/43738 [4:08:50<1:23:58,  2.24it/s]

step:4760, train_loss:0.07985108423777659, acc:0.5667878937311225


 74%|███████▍  | 32447/43738 [4:08:51<1:34:44,  1.99it/s]

step:4760, train_loss:0.07985223341478027, acc:0.5668012451074059


 75%|███████▍  | 32752/43738 [4:11:09<1:16:54,  2.38it/s]

step:4780, train_loss:0.07974469266898108, acc:0.5669272105520273


 75%|███████▍  | 32753/43738 [4:11:09<1:11:38,  2.56it/s]

step:4780, train_loss:0.07974500742991503, acc:0.5669404329374409


 75%|███████▍  | 32754/43738 [4:11:09<1:02:50,  2.91it/s]

step:4780, train_loss:0.07974263267854247, acc:0.5669536545154791


 75%|███████▍  | 32755/43738 [4:11:10<1:16:49,  2.38it/s]

step:4780, train_loss:0.07974020168909743, acc:0.5669668752862158


 75%|███████▍  | 32756/43738 [4:11:10<1:14:46,  2.45it/s]

step:4780, train_loss:0.07973776746716212, acc:0.5669800952497253


 75%|███████▍  | 32757/43738 [4:11:11<1:25:11,  2.15it/s]

step:4780, train_loss:0.07973763329120913, acc:0.5669627865799677


 75%|███████▍  | 32758/43738 [4:11:11<1:26:02,  2.13it/s]

step:4780, train_loss:0.0797403072822558, acc:0.5669454789669699


 75%|███████▍  | 32759/43738 [4:11:12<1:26:26,  2.12it/s]

step:4780, train_loss:0.07974161380899296, acc:0.5669281724106352


 75%|███████▍  | 32760/43738 [4:11:12<1:26:53,  2.11it/s]

step:4780, train_loss:0.07974332696396634, acc:0.5669108669108669


 75%|███████▍  | 32761/43738 [4:11:13<1:20:41,  2.27it/s]

step:4780, train_loss:0.07974377905981882, acc:0.5669240865663441


 75%|███████▍  | 32762/43738 [4:11:13<1:14:09,  2.47it/s]

step:4780, train_loss:0.07974296960573572, acc:0.5669067822477261


 75%|███████▍  | 32763/43738 [4:11:13<1:11:28,  2.56it/s]

step:4780, train_loss:0.0797410055623292, acc:0.5669200012208894


 75%|███████▍  | 32764/43738 [4:11:14<1:19:44,  2.29it/s]

step:4780, train_loss:0.07974617965795756, acc:0.5669026980832621


 75%|███████▍  | 32765/43738 [4:11:15<1:38:28,  1.86it/s]

step:4780, train_loss:0.07975335565628057, acc:0.5668853960018312


 75%|███████▍  | 32766/43738 [4:11:15<1:45:06,  1.74it/s]

step:4780, train_loss:0.07975172990992103, acc:0.5668986144173839


 75%|███████▍  | 32767/43738 [4:11:16<1:44:30,  1.75it/s]

step:4780, train_loss:0.07974956800778239, acc:0.5669118320261238


 76%|███████▌  | 33072/43738 [4:13:33<1:48:41,  1.64it/s]

step:4800, train_loss:0.07968323204083659, acc:0.567428640541848


 76%|███████▌  | 33073/43738 [4:13:33<1:37:12,  1.83it/s]

step:4800, train_loss:0.07968202574730877, acc:0.5674417198318871


 76%|███████▌  | 33074/43738 [4:13:34<1:44:48,  1.70it/s]

step:4800, train_loss:0.07968310571780629, acc:0.5674245631009252


 76%|███████▌  | 33075/43738 [4:13:35<1:44:51,  1.69it/s]

step:4800, train_loss:0.07968298032697683, acc:0.567437641723356


 76%|███████▌  | 33076/43738 [4:13:35<1:27:13,  2.04it/s]

step:4800, train_loss:0.07968112784301308, acc:0.5674507195549643


 76%|███████▌  | 33077/43738 [4:13:36<1:46:58,  1.66it/s]

step:4800, train_loss:0.07967922554538973, acc:0.5674637965958219


 76%|███████▌  | 33078/43738 [4:13:36<1:46:27,  1.67it/s]

step:4800, train_loss:0.07967694291737278, acc:0.5674768728460003


 76%|███████▌  | 33079/43738 [4:13:37<1:44:09,  1.71it/s]

step:4800, train_loss:0.07967908005033224, acc:0.5674597176456362


 76%|███████▌  | 33080/43738 [4:13:37<1:30:24,  1.96it/s]

step:4800, train_loss:0.07967975493214503, acc:0.5674425634824668


 76%|███████▌  | 33081/43738 [4:13:38<1:27:56,  2.02it/s]

step:4800, train_loss:0.07968767929145393, acc:0.5674254103563979


 76%|███████▌  | 33082/43738 [4:13:38<1:28:54,  2.00it/s]

step:4800, train_loss:0.07969547067197985, acc:0.5674082582673358


 76%|███████▌  | 33083/43738 [4:13:39<1:46:23,  1.67it/s]

step:4800, train_loss:0.07969342847871459, acc:0.5674213342199922


 76%|███████▌  | 33084/43738 [4:13:40<1:54:25,  1.55it/s]

step:4800, train_loss:0.0796919651461067, acc:0.5674344093821787


 76%|███████▌  | 33085/43738 [4:13:40<1:50:10,  1.61it/s]

step:4800, train_loss:0.07969122332601467, acc:0.567447483753967


 76%|███████▌  | 33086/43738 [4:13:41<1:58:22,  1.50it/s]

step:4800, train_loss:0.07969679480584055, acc:0.5674303330713897


 76%|███████▌  | 33087/43738 [4:13:42<2:15:28,  1.31it/s]

step:4800, train_loss:0.07969505010609837, acc:0.5674434067760752


 76%|███████▋  | 33392/43738 [4:16:00<1:06:16,  2.60it/s]

step:4820, train_loss:0.07968067340039933, acc:0.5676509343555343


 76%|███████▋  | 33393/43738 [4:16:00<1:04:53,  2.66it/s]

step:4820, train_loss:0.0796786418587007, acc:0.5676638816518432


 76%|███████▋  | 33394/43738 [4:16:01<57:41,  2.99it/s]  

step:4820, train_loss:0.07968498627232336, acc:0.5676468826735341


 76%|███████▋  | 33395/43738 [4:16:01<59:33,  2.89it/s]

step:4820, train_loss:0.07968834571874212, acc:0.5676298847132805


 76%|███████▋  | 33396/43738 [4:16:02<1:06:42,  2.58it/s]

step:4820, train_loss:0.07969102960351063, acc:0.5676128877709905


 76%|███████▋  | 33397/43738 [4:16:02<1:07:31,  2.55it/s]

step:4820, train_loss:0.07968865089217178, acc:0.5676258346558074


 76%|███████▋  | 33398/43738 [4:16:02<1:06:02,  2.61it/s]

step:4820, train_loss:0.07968633662374403, acc:0.5676387807653153


 76%|███████▋  | 33399/43738 [4:16:03<57:19,  3.01it/s]  

step:4820, train_loss:0.07968590292605776, acc:0.5676517260995838


 76%|███████▋  | 33400/43738 [4:16:03<1:03:42,  2.70it/s]

step:4820, train_loss:0.07969201252598651, acc:0.5676347305389221


 76%|███████▋  | 33401/43738 [4:16:03<1:01:51,  2.79it/s]

step:4820, train_loss:0.07969001920137411, acc:0.5676476752193048


 76%|███████▋  | 33402/43738 [4:16:04<1:01:56,  2.78it/s]

step:4820, train_loss:0.07969271608953586, acc:0.5676306807975571


 76%|███████▋  | 33403/43738 [4:16:04<1:01:56,  2.78it/s]

step:4820, train_loss:0.07969466781481713, acc:0.5676136873933479


 76%|███████▋  | 33404/43738 [4:16:05<1:23:35,  2.06it/s]

step:4820, train_loss:0.0796922980329657, acc:0.5676266315411328


 76%|███████▋  | 33405/43738 [4:16:05<1:11:24,  2.41it/s]

step:4820, train_loss:0.07969001156237425, acc:0.567639574913935


 76%|███████▋  | 33406/43738 [4:16:05<1:10:29,  2.44it/s]

step:4820, train_loss:0.07968864200972385, acc:0.5676525175118242


 76%|███████▋  | 33407/43738 [4:16:06<1:25:20,  2.02it/s]

step:4820, train_loss:0.07968751438151717, acc:0.5676654593348699


 77%|███████▋  | 33712/43738 [4:18:27<1:30:53,  1.84it/s]

step:4840, train_loss:0.0797336223493832, acc:0.5676020408163265


 77%|███████▋  | 33713/43738 [4:18:27<1:30:29,  1.85it/s]

step:4840, train_loss:0.07973490425773452, acc:0.5675852045205114


 77%|███████▋  | 33714/43738 [4:18:28<1:32:47,  1.80it/s]

step:4840, train_loss:0.07973270057477957, acc:0.5675980304917838


 77%|███████▋  | 33715/43738 [4:18:29<1:29:04,  1.88it/s]

step:4840, train_loss:0.07973035942889917, acc:0.5676108557022097


 77%|███████▋  | 33716/43738 [4:18:29<1:22:29,  2.02it/s]

step:4840, train_loss:0.07972846228764681, acc:0.5676236801518567


 77%|███████▋  | 33717/43738 [4:18:29<1:18:28,  2.13it/s]

step:4840, train_loss:0.07973118397475853, acc:0.5676068452116143


 77%|███████▋  | 33718/43738 [4:18:30<1:25:01,  1.96it/s]

step:4840, train_loss:0.07973003469422674, acc:0.5676196690195148


 77%|███████▋  | 33719/43738 [4:18:30<1:19:17,  2.11it/s]

step:4840, train_loss:0.07972806425681336, acc:0.5676324920667872


 77%|███████▋  | 33720/43738 [4:18:31<1:16:09,  2.19it/s]

step:4840, train_loss:0.07972882272909051, acc:0.5676156583629893


 77%|███████▋  | 33721/43738 [4:18:31<1:09:10,  2.41it/s]

step:4840, train_loss:0.07972962042366652, acc:0.5675988256576021


 77%|███████▋  | 33722/43738 [4:18:32<1:13:53,  2.26it/s]

step:4840, train_loss:0.0797286242330727, acc:0.5676116481821956


 77%|███████▋  | 33723/43738 [4:18:32<1:30:09,  1.85it/s]

step:4840, train_loss:0.07972702334763385, acc:0.5676244699463274


 77%|███████▋  | 33724/43738 [4:18:33<1:29:23,  1.87it/s]

step:4840, train_loss:0.07972904159602161, acc:0.567607638477049


 77%|███████▋  | 33725/43738 [4:18:33<1:30:52,  1.84it/s]

step:4840, train_loss:0.0797305654506634, acc:0.5675908080059303


 77%|███████▋  | 33726/43738 [4:18:34<1:29:15,  1.87it/s]

step:4840, train_loss:0.07973462534079115, acc:0.5675739785328826


 77%|███████▋  | 33727/43738 [4:18:34<1:21:02,  2.06it/s]

step:4840, train_loss:0.07973351773815643, acc:0.5675867998932606


 78%|███████▊  | 34032/43738 [4:20:53<1:02:18,  2.60it/s]

step:4860, train_loss:0.07975752520944668, acc:0.5673189938881054


 78%|███████▊  | 34033/43738 [4:20:53<58:02,  2.79it/s]  

step:4860, train_loss:0.07975539973220531, acc:0.5673317074604061


 78%|███████▊  | 34034/43738 [4:20:53<51:54,  3.12it/s]

step:4860, train_loss:0.07975449336766417, acc:0.5673444202855967


 78%|███████▊  | 34035/43738 [4:20:54<49:16,  3.28it/s]

step:4860, train_loss:0.07975215211505904, acc:0.5673571323637432


 78%|███████▊  | 34036/43738 [4:20:54<45:54,  3.52it/s]

step:4860, train_loss:0.0797498095488119, acc:0.5673698436949113


 78%|███████▊  | 34037/43738 [4:20:55<1:00:46,  2.66it/s]

step:4860, train_loss:0.07974871683772487, acc:0.5673825542791668


 78%|███████▊  | 34038/43738 [4:20:55<1:01:23,  2.63it/s]

step:4860, train_loss:0.07974717386258723, acc:0.5673952641165756


 78%|███████▊  | 34039/43738 [4:20:56<1:09:12,  2.34it/s]

step:4860, train_loss:0.07974526487134419, acc:0.5674079732072035


 78%|███████▊  | 34040/43738 [4:20:56<59:20,  2.72it/s]  

step:4860, train_loss:0.07974338636497923, acc:0.5674206815511164


 78%|███████▊  | 34041/43738 [4:20:56<56:51,  2.84it/s]

step:4860, train_loss:0.07974298266784241, acc:0.5674333891483799


 78%|███████▊  | 34042/43738 [4:20:56<1:00:21,  2.68it/s]

step:4860, train_loss:0.07974099593405082, acc:0.5674460959990599


 78%|███████▊  | 34043/43738 [4:20:57<1:16:56,  2.10it/s]

step:4860, train_loss:0.07973877862014479, acc:0.5674588021032224


 78%|███████▊  | 34044/43738 [4:20:58<1:15:01,  2.15it/s]

step:4860, train_loss:0.07974172965984715, acc:0.5674421337093174


 78%|███████▊  | 34045/43738 [4:20:58<1:09:50,  2.31it/s]

step:4860, train_loss:0.07974396537899822, acc:0.5674254662946101


 78%|███████▊  | 34046/43738 [4:20:58<1:11:46,  2.25it/s]

step:4860, train_loss:0.07974494499579271, acc:0.5674087998590143


 78%|███████▊  | 34047/43738 [4:20:59<1:05:48,  2.45it/s]

step:4860, train_loss:0.07974447954177523, acc:0.5673921344024436


 79%|███████▊  | 34353/43738 [4:23:18<51:34,  3.03it/s]  

step:4880, train_loss:0.07973951711651397, acc:0.5674196553330229
step:4880, train_loss:0.07974145112942872, acc:0.5674031380083253


 79%|███████▊  | 34354/43738 [4:23:19<1:02:55,  2.49it/s]

step:4880, train_loss:0.07975574251753126, acc:0.5673866216452232


 79%|███████▊  | 34355/43738 [4:23:19<55:37,  2.81it/s]  

step:4880, train_loss:0.07975347581158519, acc:0.5673992140881968


 79%|███████▊  | 34356/43738 [4:23:19<1:01:36,  2.54it/s]

step:4880, train_loss:0.07975124469052465, acc:0.5674118057981139


 79%|███████▊  | 34357/43738 [4:23:20<54:06,  2.89it/s]  

step:4880, train_loss:0.07974893048322977, acc:0.5674243967750385


 79%|███████▊  | 34358/43738 [4:23:20<50:10,  3.12it/s]

step:4880, train_loss:0.07974667046909951, acc:0.5674369870190349


 79%|███████▊  | 34359/43738 [4:23:20<1:04:50,  2.41it/s]

step:4880, train_loss:0.0797479277679815, acc:0.5674204720742746


 79%|███████▊  | 34360/43738 [4:23:21<1:18:13,  2.00it/s]

step:4880, train_loss:0.07975171513842616, acc:0.5674039580908032


 79%|███████▊  | 34361/43738 [4:23:22<1:13:17,  2.13it/s]

step:4880, train_loss:0.07975054680939177, acc:0.5674165478303891


 79%|███████▊  | 34362/43738 [4:23:22<1:01:25,  2.54it/s]

step:4880, train_loss:0.07974971738097658, acc:0.5674291368372039


 79%|███████▊  | 34363/43738 [4:23:22<1:07:33,  2.31it/s]

step:4880, train_loss:0.07975099113801248, acc:0.5674126240433024


 79%|███████▊  | 34364/43738 [4:23:23<1:09:10,  2.26it/s]

step:4880, train_loss:0.07975460278878, acc:0.5673961122104528


 79%|███████▊  | 34365/43738 [4:23:23<1:11:05,  2.20it/s]

step:4880, train_loss:0.07975310513046077, acc:0.5674087007129347


 79%|███████▊  | 34366/43738 [4:23:24<1:10:14,  2.22it/s]

step:4880, train_loss:0.07975176835100788, acc:0.5674212884828028


 79%|███████▊  | 34367/43738 [4:23:24<1:10:29,  2.22it/s]

step:4880, train_loss:0.07975147134993765, acc:0.5674047778392062


 79%|███████▉  | 34672/43738 [4:25:46<1:46:27,  1.42it/s]

step:4900, train_loss:0.07970744032421208, acc:0.5678068758652515


 79%|███████▉  | 34673/43738 [4:25:46<1:43:10,  1.46it/s]

step:4900, train_loss:0.07971074650274271, acc:0.5677904998125343


 79%|███████▉  | 34674/43738 [4:25:47<1:37:11,  1.55it/s]

step:4900, train_loss:0.07971532573965967, acc:0.5677741247043895


 79%|███████▉  | 34675/43738 [4:25:47<1:28:47,  1.70it/s]

step:4900, train_loss:0.0797148595724833, acc:0.5677577505407354


 79%|███████▉  | 34676/43738 [4:25:48<1:34:32,  1.60it/s]

step:4900, train_loss:0.07971294603895335, acc:0.5677702157111547


 79%|███████▉  | 34677/43738 [4:25:49<1:22:35,  1.83it/s]

step:4900, train_loss:0.07972104903866109, acc:0.5677538426046083


 79%|███████▉  | 34678/43738 [4:25:49<1:22:33,  1.83it/s]

step:4900, train_loss:0.07971875206061274, acc:0.5677663071688102


 79%|███████▉  | 34679/43738 [4:25:50<1:16:50,  1.96it/s]

step:4900, train_loss:0.07971645374764962, acc:0.5677787710141584


 79%|███████▉  | 34680/43738 [4:25:50<1:28:30,  1.71it/s]

step:4900, train_loss:0.07972101051000145, acc:0.567762399077278


 79%|███████▉  | 34681/43738 [4:25:51<1:38:10,  1.54it/s]

step:4900, train_loss:0.07972208067450222, acc:0.5677748623165422


 79%|███████▉  | 34682/43738 [4:25:52<1:31:31,  1.65it/s]

step:4900, train_loss:0.07972916972931934, acc:0.56775849143648


 79%|███████▉  | 34683/43738 [4:25:52<1:40:07,  1.51it/s]

step:4900, train_loss:0.07972845016859813, acc:0.5677709540697171


 79%|███████▉  | 34684/43738 [4:25:53<1:27:55,  1.72it/s]

step:4900, train_loss:0.079726194404266, acc:0.5677834159843156


 79%|███████▉  | 34685/43738 [4:25:53<1:33:13,  1.62it/s]

step:4900, train_loss:0.07972477395334676, acc:0.5677958771803373


 79%|███████▉  | 34686/43738 [4:25:54<1:20:22,  1.88it/s]

step:4900, train_loss:0.07972347564445674, acc:0.5678083376578447


 79%|███████▉  | 34687/43738 [4:25:54<1:14:51,  2.01it/s]

step:4900, train_loss:0.07972591846235211, acc:0.5677919681725142


 80%|████████  | 34992/43738 [4:28:17<1:12:21,  2.01it/s]

step:4920, train_loss:0.07970945787905069, acc:0.5678726566072245


 80%|████████  | 34993/43738 [4:28:17<1:08:10,  2.14it/s]

step:4920, train_loss:0.07970871709798226, acc:0.5678850055725431


 80%|████████  | 34994/43738 [4:28:18<1:08:40,  2.12it/s]

step:4920, train_loss:0.07970787197873358, acc:0.5678687775047151


 80%|████████  | 34995/43738 [4:28:18<1:03:20,  2.30it/s]

step:4920, train_loss:0.07970617613557732, acc:0.5678811258751251


 80%|████████  | 34996/43738 [4:28:18<1:03:44,  2.29it/s]

step:4920, train_loss:0.07970806339688012, acc:0.5678934735398331


 80%|████████  | 34997/43738 [4:28:19<54:41,  2.66it/s]  

step:4920, train_loss:0.0797058888176826, acc:0.5679058204988999


 80%|████████  | 34998/43738 [4:28:19<1:04:57,  2.24it/s]

step:4920, train_loss:0.07970606813758171, acc:0.5679181667523858


 80%|████████  | 34999/43738 [4:28:19<56:36,  2.57it/s]  

step:4920, train_loss:0.07970770849844634, acc:0.5679019400554302


 80%|████████  | 35000/43738 [4:28:20<51:18,  2.84it/s]

step:4920, train_loss:0.07970779897179621, acc:0.5678857142857143


 80%|████████  | 35001/43738 [4:28:20<49:44,  2.93it/s]

step:4920, train_loss:0.07970873473762499, acc:0.5678694894431587


 80%|████████  | 35002/43738 [4:28:20<48:09,  3.02it/s]

step:4920, train_loss:0.07970651855670509, acc:0.5678818353236957


 80%|████████  | 35003/43738 [4:28:21<47:33,  3.06it/s]

step:4920, train_loss:0.07970511179844952, acc:0.5678941804988143


 80%|████████  | 35004/43738 [4:28:21<46:40,  3.12it/s]

step:4920, train_loss:0.07970925827280731, acc:0.5678779568049366


 80%|████████  | 35005/43738 [4:28:21<49:15,  2.96it/s]

step:4920, train_loss:0.0797107436966794, acc:0.5678617340379946


 80%|████████  | 35006/43738 [4:28:22<46:06,  3.16it/s]

step:4920, train_loss:0.07971107154714546, acc:0.5678455121979089


 80%|████████  | 35007/43738 [4:28:22<59:27,  2.45it/s]

step:4920, train_loss:0.0797106400870986, acc:0.5678292912846002


 81%|████████  | 35312/43738 [4:30:41<1:12:46,  1.93it/s]

step:4940, train_loss:0.07970231503127218, acc:0.5679938830992297


 81%|████████  | 35313/43738 [4:30:42<1:06:09,  2.12it/s]

step:4940, train_loss:0.07970024129801963, acc:0.5680061167275507


 81%|████████  | 35314/43738 [4:30:42<56:02,  2.51it/s]  

step:4940, train_loss:0.07970314016814012, acc:0.5679900322818145


 81%|████████  | 35315/43738 [4:30:42<1:01:01,  2.30it/s]

step:4940, train_loss:0.07970475335726258, acc:0.5679739487469914


 81%|████████  | 35316/43738 [4:30:43<53:20,  2.63it/s]  

step:4940, train_loss:0.07970273420922355, acc:0.5679861819005549


 81%|████████  | 35317/43738 [4:30:43<47:12,  2.97it/s]

step:4940, train_loss:0.07970075110041962, acc:0.5679984143613557


 81%|████████  | 35318/43738 [4:30:43<43:24,  3.23it/s]

step:4940, train_loss:0.07969896493147148, acc:0.5680106461294524


 81%|████████  | 35319/43738 [4:30:44<55:35,  2.52it/s]

step:4940, train_loss:0.07970071403685255, acc:0.5679945638324981


 81%|████████  | 35320/43738 [4:30:44<50:05,  2.80it/s]

step:4940, train_loss:0.07969982987004924, acc:0.5680067950169876


 81%|████████  | 35321/43738 [4:30:44<51:19,  2.73it/s]

step:4940, train_loss:0.07970587949889557, acc:0.5679907137397016


 81%|████████  | 35322/43738 [4:30:45<1:06:49,  2.10it/s]

step:4940, train_loss:0.07970721428510004, acc:0.5679746333729687


 81%|████████  | 35323/43738 [4:30:45<1:00:13,  2.33it/s]

step:4940, train_loss:0.0797122626546588, acc:0.5679585539167115


 81%|████████  | 35324/43738 [4:30:46<1:01:26,  2.28it/s]

step:4940, train_loss:0.07971240224425452, acc:0.5679424753708526


 81%|████████  | 35325/43738 [4:30:46<57:17,  2.45it/s]  

step:4940, train_loss:0.07971070852522327, acc:0.5679547062986553


 81%|████████  | 35326/43738 [4:30:47<1:04:32,  2.17it/s]

step:4940, train_loss:0.0797093661299099, acc:0.5679669365339977


 81%|████████  | 35327/43738 [4:30:47<1:06:52,  2.10it/s]

step:4940, train_loss:0.0797076612977841, acc:0.5679791660769383


 81%|████████▏ | 35632/43738 [4:33:06<1:14:14,  1.82it/s]

step:4960, train_loss:0.07965714117406623, acc:0.5680006735518635


 81%|████████▏ | 35633/43738 [4:33:07<1:15:44,  1.78it/s]

step:4960, train_loss:0.07965514536763854, acc:0.5680127971262594


 81%|████████▏ | 35634/43738 [4:33:07<1:17:11,  1.75it/s]

step:4960, train_loss:0.07965393585772297, acc:0.5680249200202054


 81%|████████▏ | 35635/43738 [4:33:08<1:11:42,  1.88it/s]

step:4960, train_loss:0.07965170098706653, acc:0.568037042233759


 81%|████████▏ | 35636/43738 [4:33:08<1:10:27,  1.92it/s]

step:4960, train_loss:0.0796537866824905, acc:0.5680211022561454


 81%|████████▏ | 35637/43738 [4:33:09<1:16:51,  1.76it/s]

step:4960, train_loss:0.07965353544564396, acc:0.5680051631731066


 81%|████████▏ | 35638/43738 [4:33:10<1:25:53,  1.57it/s]

step:4960, train_loss:0.07965152827692734, acc:0.5680172849205903


 81%|████████▏ | 35639/43738 [4:33:10<1:11:11,  1.90it/s]

step:4960, train_loss:0.07964949255328833, acc:0.5680294059878224


 81%|████████▏ | 35640/43738 [4:33:10<1:03:20,  2.13it/s]

step:4960, train_loss:0.07964735146089204, acc:0.5680415263748597


 81%|████████▏ | 35641/43738 [4:33:11<1:00:07,  2.24it/s]

step:4960, train_loss:0.07964677828847419, acc:0.5680255885076176


 81%|████████▏ | 35642/43738 [4:33:11<51:16,  2.63it/s]  

step:4960, train_loss:0.07964578118798317, acc:0.5680096515347063


 81%|████████▏ | 35643/43738 [4:33:11<45:07,  2.99it/s]

step:4960, train_loss:0.07964566444156884, acc:0.5679937154560503


 81%|████████▏ | 35644/43738 [4:33:11<41:39,  3.24it/s]

step:4960, train_loss:0.0796462802640936, acc:0.5679777802715744


 81%|████████▏ | 35645/43738 [4:33:12<46:03,  2.93it/s]

step:4960, train_loss:0.07964625630794663, acc:0.5679618459812036


 81%|████████▏ | 35646/43738 [4:33:12<50:47,  2.65it/s]

step:4960, train_loss:0.0796446618796094, acc:0.5679739662234192


 82%|████████▏ | 35647/43738 [4:33:13<54:43,  2.46it/s]

step:4960, train_loss:0.07964395052367224, acc:0.5679860857856202


 82%|████████▏ | 35952/43738 [4:35:39<1:09:35,  1.86it/s]

step:4980, train_loss:0.07960006668699915, acc:0.5682576769025367


 82%|████████▏ | 35953/43738 [4:35:39<59:25,  2.18it/s]  

step:4980, train_loss:0.07959785706522597, acc:0.5682696854226351


 82%|████████▏ | 35954/43738 [4:35:40<59:07,  2.19it/s]

step:4980, train_loss:0.07960631077995696, acc:0.5682538799577238


 82%|████████▏ | 35955/43738 [4:35:40<1:11:18,  1.82it/s]

step:4980, train_loss:0.07960412831453424, acc:0.5682658879154499


 82%|████████▏ | 35956/43738 [4:35:41<1:09:09,  1.88it/s]

step:4980, train_loss:0.07960367625740664, acc:0.5682778952052508


 82%|████████▏ | 35957/43738 [4:35:42<1:13:37,  1.76it/s]

step:4980, train_loss:0.079601489147094, acc:0.5682899018271824


 82%|████████▏ | 35958/43738 [4:35:42<1:03:56,  2.03it/s]

step:4980, train_loss:0.07960636500051826, acc:0.5682740975582624


 82%|████████▏ | 35959/43738 [4:35:42<59:53,  2.16it/s]  

step:4980, train_loss:0.07960797565002818, acc:0.5682582941683584


 82%|████████▏ | 35960/43738 [4:35:43<50:56,  2.55it/s]

step:4980, train_loss:0.07960583819580314, acc:0.5682703003337041


 82%|████████▏ | 35961/43738 [4:35:43<51:35,  2.51it/s]

step:4980, train_loss:0.07960577436588126, acc:0.5682823058313172


 82%|████████▏ | 35962/43738 [4:35:43<45:54,  2.82it/s]

step:4980, train_loss:0.07960519698707823, acc:0.5682943106612536


 82%|████████▏ | 35963/43738 [4:35:44<44:44,  2.90it/s]

step:4980, train_loss:0.07960384016935645, acc:0.5683063148235686


 82%|████████▏ | 35964/43738 [4:35:44<41:14,  3.14it/s]

step:4980, train_loss:0.07960163646425382, acc:0.5683183183183184


 82%|████████▏ | 35965/43738 [4:35:44<43:09,  3.00it/s]

step:4980, train_loss:0.07960090879562108, acc:0.5683303211455581


 82%|████████▏ | 35966/43738 [4:35:45<44:30,  2.91it/s]

step:4980, train_loss:0.07959875412045862, acc:0.568342323305344


 82%|████████▏ | 35967/43738 [4:35:45<48:13,  2.69it/s]

step:4980, train_loss:0.07960008944962581, acc:0.5683265215336281


 83%|████████▎ | 36272/43738 [4:38:04<55:08,  2.26it/s]  

step:5000, train_loss:0.07955592011628539, acc:0.5685652845169828


 83%|████████▎ | 36273/43738 [4:38:04<47:10,  2.64it/s]

step:5000, train_loss:0.0795538561956009, acc:0.5685771786177046


 83%|████████▎ | 36274/43738 [4:38:05<45:03,  2.76it/s]

step:5000, train_loss:0.07955300023578414, acc:0.5685890720626344


 83%|████████▎ | 36275/43738 [4:38:05<48:37,  2.56it/s]

step:5000, train_loss:0.07955176204295834, acc:0.5686009648518263


 83%|████████▎ | 36276/43738 [4:38:06<43:57,  2.83it/s]

step:5000, train_loss:0.07955044410562423, acc:0.5686128569853347


 83%|████████▎ | 36277/43738 [4:38:06<49:54,  2.49it/s]

step:5000, train_loss:0.07955358610895737, acc:0.5685971827879924


 83%|████████▎ | 36278/43738 [4:38:06<48:15,  2.58it/s]

step:5000, train_loss:0.07955264288767908, acc:0.5686090743701416


 83%|████████▎ | 36279/43738 [4:38:07<54:46,  2.27it/s]

step:5000, train_loss:0.07955261821691408, acc:0.5686209652967281


 83%|████████▎ | 36280/43738 [4:38:08<1:15:12,  1.65it/s]

step:5000, train_loss:0.07955092214405979, acc:0.568632855567806


 83%|████████▎ | 36281/43738 [4:38:09<1:26:06,  1.44it/s]

step:5000, train_loss:0.07954976846823347, acc:0.5686447451834293


 83%|████████▎ | 36282/43738 [4:38:10<1:33:22,  1.33it/s]

step:5000, train_loss:0.07955520054949751, acc:0.5686290722672399


 83%|████████▎ | 36283/43738 [4:38:10<1:18:12,  1.59it/s]

step:5000, train_loss:0.07955355282632474, acc:0.5686409613317531


 83%|████████▎ | 36284/43738 [4:38:11<1:19:58,  1.55it/s]

step:5000, train_loss:0.07955823190405877, acc:0.5686252893837505


 83%|████████▎ | 36285/43738 [4:38:12<1:27:40,  1.42it/s]

step:5000, train_loss:0.07955735450653607, acc:0.5686096182995728


 83%|████████▎ | 36286/43738 [4:38:12<1:27:31,  1.42it/s]

step:5000, train_loss:0.07955517435089539, acc:0.5686215069172683


 83%|████████▎ | 36287/43738 [4:38:13<1:21:14,  1.53it/s]

step:5000, train_loss:0.07955483788804853, acc:0.568633394879709


 84%|████████▎ | 36592/43738 [4:40:32<57:52,  2.06it/s]  

step:5020, train_loss:0.07956196616355521, acc:0.5686488850021862


 84%|████████▎ | 36593/43738 [4:40:33<54:04,  2.20it/s]

step:5020, train_loss:0.07955994010358694, acc:0.5686606728062744


 84%|████████▎ | 36594/43738 [4:40:33<55:09,  2.16it/s]

step:5020, train_loss:0.07956017825007336, acc:0.568645133081926


 84%|████████▎ | 36595/43738 [4:40:34<47:27,  2.51it/s]

step:5020, train_loss:0.07955800600925284, acc:0.5686569203443094


 84%|████████▎ | 36596/43738 [4:40:34<41:31,  2.87it/s]

step:5020, train_loss:0.07955595925157379, acc:0.5686687069625096


 84%|████████▎ | 36597/43738 [4:40:34<37:50,  3.14it/s]

step:5020, train_loss:0.07955380229045347, acc:0.5686804929365795


 84%|████████▎ | 36598/43738 [4:40:35<44:46,  2.66it/s]

step:5020, train_loss:0.07955167366364908, acc:0.568692278266572


 84%|████████▎ | 36599/43738 [4:40:35<47:18,  2.51it/s]

step:5020, train_loss:0.07955161692106268, acc:0.5687040629525397


 84%|████████▎ | 36600/43738 [4:40:36<58:54,  2.02it/s]

step:5020, train_loss:0.0795496207645053, acc:0.5687158469945355


 84%|████████▎ | 36601/43738 [4:40:36<1:02:31,  1.90it/s]

step:5020, train_loss:0.07955068491990044, acc:0.568700308734734


 84%|████████▎ | 36602/43738 [4:40:37<1:00:37,  1.96it/s]

step:5020, train_loss:0.07955296882387486, acc:0.5686847713239713


 84%|████████▎ | 36603/43738 [4:40:37<53:20,  2.23it/s]  

step:5020, train_loss:0.07955156545965691, acc:0.5686965549271917


 84%|████████▎ | 36604/43738 [4:40:37<48:43,  2.44it/s]

step:5020, train_loss:0.07955182724620434, acc:0.568681018467927


 84%|████████▎ | 36605/43738 [4:40:38<44:00,  2.70it/s]

step:5020, train_loss:0.07955000602235263, acc:0.5686928015298457


 84%|████████▎ | 36606/43738 [4:40:38<55:06,  2.16it/s]

step:5020, train_loss:0.07954962555851942, acc:0.5686772660219637


 84%|████████▎ | 36607/43738 [4:40:39<54:17,  2.19it/s]

step:5020, train_loss:0.07955296377235883, acc:0.5686617313628541


 84%|████████▍ | 36912/43738 [4:42:55<44:45,  2.54it/s]  

step:5040, train_loss:0.07957606479086972, acc:0.5685143042912874


 84%|████████▍ | 36913/43738 [4:42:56<41:52,  2.72it/s]

step:5040, train_loss:0.0795746690638516, acc:0.568525993552407


 84%|████████▍ | 36914/43738 [4:42:56<38:19,  2.97it/s]

step:5040, train_loss:0.0795725177694997, acc:0.5685376821802026


 84%|████████▍ | 36915/43738 [4:42:56<42:08,  2.70it/s]

step:5040, train_loss:0.07957257180290118, acc:0.568522280915617


 84%|████████▍ | 36916/43738 [4:42:57<52:06,  2.18it/s]

step:5040, train_loss:0.0795704349261318, acc:0.5685339690107271


 84%|████████▍ | 36917/43738 [4:42:57<51:40,  2.20it/s]

step:5040, train_loss:0.07957241258574824, acc:0.5685185686810954


 84%|████████▍ | 36918/43738 [4:42:58<50:52,  2.23it/s]

step:5040, train_loss:0.07957179243622853, acc:0.568503169185763


 84%|████████▍ | 36919/43738 [4:42:58<44:18,  2.56it/s]

step:5040, train_loss:0.07956990961600857, acc:0.568514856848777


 84%|████████▍ | 36920/43738 [4:42:59<58:51,  1.93it/s]

step:5040, train_loss:0.07956970258058145, acc:0.5684994582881907


 84%|████████▍ | 36921/43738 [4:43:00<1:04:52,  1.75it/s]

step:5040, train_loss:0.0795715929054018, acc:0.56848406056174


 84%|████████▍ | 36922/43738 [4:43:00<1:02:04,  1.83it/s]

step:5040, train_loss:0.07957181884686898, acc:0.568468663669357


 84%|████████▍ | 36923/43738 [4:43:00<55:41,  2.04it/s]  

step:5040, train_loss:0.07957132426945958, acc:0.5684532676109741


 84%|████████▍ | 36924/43738 [4:43:01<51:37,  2.20it/s]

step:5040, train_loss:0.07956933609313387, acc:0.5684649550427906


 84%|████████▍ | 36925/43738 [4:43:01<48:39,  2.33it/s]

step:5040, train_loss:0.07956718386394131, acc:0.5684766418415708


 84%|████████▍ | 36926/43738 [4:43:01<44:35,  2.55it/s]

step:5040, train_loss:0.07956677106904067, acc:0.568488328007366


 84%|████████▍ | 36927/43738 [4:43:02<57:33,  1.97it/s]

step:5040, train_loss:0.07956484305295343, acc:0.568500013540228


 85%|████████▌ | 37232/43738 [4:45:29<1:08:43,  1.58it/s]

step:5060, train_loss:0.07959667517955213, acc:0.5681403094112591


 85%|████████▌ | 37233/43738 [4:45:30<1:16:06,  1.42it/s]

step:5060, train_loss:0.07959641072928537, acc:0.5681519082534311


 85%|████████▌ | 37234/43738 [4:45:31<1:05:12,  1.66it/s]

step:5060, train_loss:0.07959593371225489, acc:0.5681366492990277


 85%|████████▌ | 37235/43738 [4:45:31<56:01,  1.93it/s]  

step:5060, train_loss:0.07959379981447101, acc:0.5681482476164899


 85%|████████▌ | 37236/43738 [4:45:31<58:11,  1.86it/s]

step:5060, train_loss:0.07959665616870706, acc:0.5681329895799764


 85%|████████▌ | 37237/43738 [4:45:32<51:00,  2.12it/s]

step:5060, train_loss:0.07959481179648893, acc:0.5681445873727744


 85%|████████▌ | 37238/43738 [4:45:32<1:00:15,  1.80it/s]

step:5060, train_loss:0.07959841528470062, acc:0.5681293302540416


 85%|████████▌ | 37239/43738 [4:45:33<1:00:05,  1.80it/s]

step:5060, train_loss:0.07959745775165374, acc:0.5681409275222213


 85%|████████▌ | 37240/43738 [4:45:33<56:42,  1.91it/s]  

step:5060, train_loss:0.0795982878833929, acc:0.5681256713211601


 85%|████████▌ | 37241/43738 [4:45:34<46:32,  2.33it/s]

step:5060, train_loss:0.07959710946169511, acc:0.5681372680647673


 85%|████████▌ | 37242/43738 [4:45:34<41:48,  2.59it/s]

step:5060, train_loss:0.0795949732510427, acc:0.568148864185597


 85%|████████▌ | 37243/43738 [4:45:34<41:47,  2.59it/s]

step:5060, train_loss:0.07959793191425375, acc:0.568133609000349


 85%|████████▌ | 37244/43738 [4:45:35<40:47,  2.65it/s]

step:5060, train_loss:0.07959683924760488, acc:0.5681452045967136


 85%|████████▌ | 37245/43738 [4:45:35<42:34,  2.54it/s]

step:5060, train_loss:0.07959787050779672, acc:0.5681299503289032


 85%|████████▌ | 37246/43738 [4:45:36<44:31,  2.43it/s]

step:5060, train_loss:0.07959958353935583, acc:0.5681146968802019


 85%|████████▌ | 37247/43738 [4:45:36<46:19,  2.34it/s]

step:5060, train_loss:0.07960498359758257, acc:0.5680994442505437


 86%|████████▌ | 37552/43738 [4:48:04<1:05:37,  1.57it/s]

step:5080, train_loss:0.07959775589573866, acc:0.5679324669791223


 86%|████████▌ | 37553/43738 [4:48:04<1:09:12,  1.49it/s]

step:5080, train_loss:0.07959663006070179, acc:0.56794397251884


 86%|████████▌ | 37554/43738 [4:48:05<1:09:17,  1.49it/s]

step:5080, train_loss:0.079595273161685, acc:0.5679554774458113


 86%|████████▌ | 37555/43738 [4:48:05<1:02:32,  1.65it/s]

step:5080, train_loss:0.07959499784299733, acc:0.5679669817600852


 86%|████████▌ | 37556/43738 [4:48:06<1:04:53,  1.59it/s]

step:5080, train_loss:0.0795979513388148, acc:0.5679518585578869


 86%|████████▌ | 37557/43738 [4:48:07<1:03:26,  1.62it/s]

step:5080, train_loss:0.07959669232377117, acc:0.5679633623558857


 86%|████████▌ | 37558/43738 [4:48:07<54:13,  1.90it/s]  

step:5080, train_loss:0.07959508068070359, acc:0.5679748655412962


 86%|████████▌ | 37559/43738 [4:48:08<54:31,  1.89it/s]

step:5080, train_loss:0.07959869463603356, acc:0.5679597433371496


 86%|████████▌ | 37560/43738 [4:48:08<50:39,  2.03it/s]

step:5080, train_loss:0.07959660554045574, acc:0.5679712460063898


 86%|████████▌ | 37561/43738 [4:48:08<47:29,  2.17it/s]

step:5080, train_loss:0.07959532183829655, acc:0.5679827480631506


 86%|████████▌ | 37562/43738 [4:48:09<48:21,  2.13it/s]

step:5080, train_loss:0.07959347472601576, acc:0.567994249507481


 86%|████████▌ | 37563/43738 [4:48:09<48:12,  2.13it/s]

step:5080, train_loss:0.07959227178911857, acc:0.5680057503394298


 86%|████████▌ | 37564/43738 [4:48:10<47:53,  2.15it/s]

step:5080, train_loss:0.07959135013026555, acc:0.568017250559046


 86%|████████▌ | 37565/43738 [4:48:10<41:44,  2.46it/s]

step:5080, train_loss:0.0795901532423055, acc:0.5680287501663783


 86%|████████▌ | 37566/43738 [4:48:10<40:37,  2.53it/s]

step:5080, train_loss:0.07959032821104937, acc:0.5680402491614758


 86%|████████▌ | 37567/43738 [4:48:11<45:24,  2.27it/s]

step:5080, train_loss:0.07958897280452441, acc:0.5680517475443874


 87%|████████▋ | 37872/43738 [4:50:28<45:39,  2.14it/s]  

step:5100, train_loss:0.07958846467900317, acc:0.5682298267849598


 87%|████████▋ | 37873/43738 [4:50:29<47:26,  2.06it/s]

step:5100, train_loss:0.07959102143094578, acc:0.5682148232249887


 87%|████████▋ | 37874/43738 [4:50:29<47:49,  2.04it/s]

step:5100, train_loss:0.07959566271458161, acc:0.5681998204573058


 87%|████████▋ | 37875/43738 [4:50:30<51:10,  1.91it/s]

step:5100, train_loss:0.07959576004267004, acc:0.5682112211221122


 87%|████████▋ | 37876/43738 [4:50:30<46:12,  2.11it/s]

step:5100, train_loss:0.07959372623282994, acc:0.5682226211849192


 87%|████████▋ | 37877/43738 [4:50:30<40:03,  2.44it/s]

step:5100, train_loss:0.07959390084336412, acc:0.5682076193996357


 87%|████████▋ | 37878/43738 [4:50:31<39:42,  2.46it/s]

step:5100, train_loss:0.0795931899274978, acc:0.5681926184064628


 87%|████████▋ | 37879/43738 [4:50:31<43:52,  2.23it/s]

step:5100, train_loss:0.07959456959368873, acc:0.5681776182053381


 87%|████████▋ | 37880/43738 [4:50:32<46:25,  2.10it/s]

step:5100, train_loss:0.07959306806679431, acc:0.5681890179514255


 87%|████████▋ | 37881/43738 [4:50:32<43:55,  2.22it/s]

step:5100, train_loss:0.07959505428746479, acc:0.5681740186373115


 87%|████████▋ | 37882/43738 [4:50:33<39:27,  2.47it/s]

step:5100, train_loss:0.07959552635332749, acc:0.5681590201150942


 87%|████████▋ | 37883/43738 [4:50:33<37:51,  2.58it/s]

step:5100, train_loss:0.07959542462816946, acc:0.5681440223847108


 87%|████████▋ | 37884/43738 [4:50:34<44:24,  2.20it/s]

step:5100, train_loss:0.07959346596621891, acc:0.5681554218139584


 87%|████████▋ | 37885/43738 [4:50:34<44:48,  2.18it/s]

step:5100, train_loss:0.07959139752020898, acc:0.5681668206414148


 87%|████████▋ | 37886/43738 [4:50:34<41:17,  2.36it/s]

step:5100, train_loss:0.07959410260711527, acc:0.5681518238927308


 87%|████████▋ | 37887/43738 [4:50:35<45:33,  2.14it/s]

step:5100, train_loss:0.07959465082908043, acc:0.5681368279357035


 87%|████████▋ | 38192/43738 [4:52:55<47:32,  1.94it/s]  

step:5120, train_loss:0.07953349084501735, acc:0.5682603686635944


 87%|████████▋ | 38193/43738 [4:52:55<45:17,  2.04it/s]

step:5120, train_loss:0.07953607986404343, acc:0.5682454900112586


 87%|████████▋ | 38194/43738 [4:52:56<45:49,  2.02it/s]

step:5120, train_loss:0.07953673238775438, acc:0.5682306121380322


 87%|████████▋ | 38195/43738 [4:52:56<38:26,  2.40it/s]

step:5120, train_loss:0.07953651082890312, acc:0.5682157350438539


 87%|████████▋ | 38196/43738 [4:52:56<33:45,  2.74it/s]

step:5120, train_loss:0.07953446389369813, acc:0.5682270394805738


 87%|████████▋ | 38197/43738 [4:52:57<36:57,  2.50it/s]

step:5120, train_loss:0.07953361361112563, acc:0.568238343325392


 87%|████████▋ | 38198/43738 [4:52:57<32:58,  2.80it/s]

step:5120, train_loss:0.07953154176750658, acc:0.5682496465783549


 87%|████████▋ | 38199/43738 [4:52:57<33:06,  2.79it/s]

step:5120, train_loss:0.07953012163668073, acc:0.5682609492395089


 87%|████████▋ | 38200/43738 [4:52:58<32:05,  2.88it/s]

step:5120, train_loss:0.07952960178583186, acc:0.5682722513089006


 87%|████████▋ | 38201/43738 [4:52:58<31:25,  2.94it/s]

step:5120, train_loss:0.07952831032966781, acc:0.5682835527865763


 87%|████████▋ | 38202/43738 [4:52:59<34:28,  2.68it/s]

step:5120, train_loss:0.0795263298511497, acc:0.5682948536725826


 87%|████████▋ | 38203/43738 [4:52:59<32:52,  2.81it/s]

step:5120, train_loss:0.07952720968366365, acc:0.568279978012198


 87%|████████▋ | 38204/43738 [4:53:00<44:46,  2.06it/s]

step:5120, train_loss:0.07952551153685788, acc:0.5682912784001676


 87%|████████▋ | 38205/43738 [4:53:00<44:46,  2.06it/s]

step:5120, train_loss:0.07952345421002165, acc:0.5683025781965712


 87%|████████▋ | 38206/43738 [4:53:01<45:32,  2.02it/s]

step:5120, train_loss:0.0795213880716754, acc:0.5683138774014552


 87%|████████▋ | 38207/43738 [4:53:01<46:40,  1.98it/s]

step:5120, train_loss:0.07952328563491835, acc:0.5682990028005339


 88%|████████▊ | 38512/43738 [4:55:19<44:31,  1.96it/s]

step:5140, train_loss:0.07945989922918716, acc:0.5687577897798088


 88%|████████▊ | 38513/43738 [4:55:19<43:39,  1.99it/s]

step:5140, train_loss:0.07946569505404903, acc:0.5687430218367824


 88%|████████▊ | 38514/43738 [4:55:20<37:05,  2.35it/s]

step:5140, train_loss:0.0794645561511405, acc:0.5687542192449498


 88%|████████▊ | 38515/43738 [4:55:20<34:16,  2.54it/s]

step:5140, train_loss:0.07946253343889326, acc:0.5687654160716604


 88%|████████▊ | 38516/43738 [4:55:20<33:52,  2.57it/s]

step:5140, train_loss:0.07946093370648419, acc:0.5687766123169592


 88%|████████▊ | 38517/43738 [4:55:21<36:10,  2.41it/s]

step:5140, train_loss:0.07946297658461797, acc:0.568761845418906


 88%|████████▊ | 38518/43738 [4:55:21<36:21,  2.39it/s]

step:5140, train_loss:0.07946318364990952, acc:0.5687470792876058


 88%|████████▊ | 38519/43738 [4:55:21<35:45,  2.43it/s]

step:5140, train_loss:0.07946780390041942, acc:0.568732313922999


 88%|████████▊ | 38520/43738 [4:55:22<34:46,  2.50it/s]

step:5140, train_loss:0.07947018696821935, acc:0.5687175493250259


 88%|████████▊ | 38521/43738 [4:55:22<34:16,  2.54it/s]

step:5140, train_loss:0.07946930257942095, acc:0.568728745359674


 88%|████████▊ | 38522/43738 [4:55:23<34:12,  2.54it/s]

step:5140, train_loss:0.07946767070049647, acc:0.5687399408130419


 88%|████████▊ | 38523/43738 [4:55:23<38:42,  2.25it/s]

step:5140, train_loss:0.07946642401561296, acc:0.5687511356851751


 88%|████████▊ | 38524/43738 [4:55:24<38:58,  2.23it/s]

step:5140, train_loss:0.07946642040541876, acc:0.5687623299761188


 88%|████████▊ | 38525/43738 [4:55:24<33:26,  2.60it/s]

step:5140, train_loss:0.07946436039383489, acc:0.5687735236859183


 88%|████████▊ | 38526/43738 [4:55:24<33:13,  2.61it/s]

step:5140, train_loss:0.07946461877308739, acc:0.5687587603177076


 88%|████████▊ | 38527/43738 [4:55:25<31:33,  2.75it/s]

step:5140, train_loss:0.07946399336338301, acc:0.5687699535390764


 89%|████████▉ | 38832/43738 [4:57:40<35:54,  2.28it/s]

step:5160, train_loss:0.07944132444287283, acc:0.5687834775442934


 89%|████████▉ | 38833/43738 [4:57:40<34:48,  2.35it/s]

step:5160, train_loss:0.07944368814046396, acc:0.5687688306337394


 89%|████████▉ | 38834/43738 [4:57:41<44:36,  1.83it/s]

step:5160, train_loss:0.07944751136092094, acc:0.5687541844775197


 89%|████████▉ | 38835/43738 [4:57:42<43:02,  1.90it/s]

step:5160, train_loss:0.07944671015798904, acc:0.5687652890433887


 89%|████████▉ | 38836/43738 [4:57:42<39:28,  2.07it/s]

step:5160, train_loss:0.07944905871641157, acc:0.5687506437326192


 89%|████████▉ | 38837/43738 [4:57:42<33:24,  2.44it/s]

step:5160, train_loss:0.07944714718788877, acc:0.5687617478178026


 89%|████████▉ | 38838/43738 [4:57:43<31:46,  2.57it/s]

step:5160, train_loss:0.07944511054707147, acc:0.5687728513311705


 89%|████████▉ | 38839/43738 [4:57:43<37:26,  2.18it/s]

step:5160, train_loss:0.0794467239596169, acc:0.5687582069569247


 89%|████████▉ | 38840/43738 [4:57:44<35:15,  2.32it/s]

step:5160, train_loss:0.07944709845242587, acc:0.5687435633367662


 89%|████████▉ | 38841/43738 [4:57:44<37:36,  2.17it/s]

step:5160, train_loss:0.07944648551503168, acc:0.5687546664606987


 89%|████████▉ | 38842/43738 [4:57:45<36:11,  2.25it/s]

step:5160, train_loss:0.07944446349728247, acc:0.5687657690129242


 89%|████████▉ | 38843/43738 [4:57:45<35:12,  2.32it/s]

step:5160, train_loss:0.07944330902501547, acc:0.5687768709934866


 89%|████████▉ | 38844/43738 [4:57:46<40:57,  1.99it/s]

step:5160, train_loss:0.07944458204573952, acc:0.5687622284007826


 89%|████████▉ | 38845/43738 [4:57:46<42:30,  1.92it/s]

step:5160, train_loss:0.07944732019187911, acc:0.5687475865619771


 89%|████████▉ | 38846/43738 [4:57:47<43:41,  1.87it/s]

step:5160, train_loss:0.0794473666620491, acc:0.5687329454770118


 89%|████████▉ | 38847/43738 [4:57:47<36:24,  2.24it/s]

step:5160, train_loss:0.07944544386007381, acc:0.5687440471593688


 90%|████████▉ | 39152/43738 [5:00:03<39:00,  1.96it/s]

step:5180, train_loss:0.07942790415798884, acc:0.5689875357580712


 90%|████████▉ | 39153/43738 [5:00:03<39:25,  1.94it/s]

step:5180, train_loss:0.07942749727730941, acc:0.5689985441728603


 90%|████████▉ | 39154/43738 [5:00:03<33:22,  2.29it/s]

step:5180, train_loss:0.07943182669483095, acc:0.5689840118506411


 90%|████████▉ | 39155/43738 [5:00:04<33:23,  2.29it/s]

step:5180, train_loss:0.07943251910517012, acc:0.5689694802707189


 90%|████████▉ | 39156/43738 [5:00:05<41:39,  1.83it/s]

step:5180, train_loss:0.07943191768975216, acc:0.5689804883031975


 90%|████████▉ | 39157/43738 [5:00:05<39:16,  1.94it/s]

step:5180, train_loss:0.07942990964487327, acc:0.5689914957734249


 90%|████████▉ | 39158/43738 [5:00:05<35:52,  2.13it/s]

step:5180, train_loss:0.07942807116511506, acc:0.5690025026814444


 90%|████████▉ | 39159/43738 [5:00:06<34:41,  2.20it/s]

step:5180, train_loss:0.07942620749762509, acc:0.569013509027299


 90%|████████▉ | 39160/43738 [5:00:06<29:46,  2.56it/s]

step:5180, train_loss:0.07942836803602894, acc:0.5689989785495404


 90%|████████▉ | 39161/43738 [5:00:06<26:28,  2.88it/s]

step:5180, train_loss:0.07942651847794001, acc:0.5690099844232782


 90%|████████▉ | 39162/43738 [5:00:07<30:22,  2.51it/s]

step:5180, train_loss:0.07942808131477515, acc:0.5690209897349472


 90%|████████▉ | 39163/43738 [5:00:07<32:24,  2.35it/s]

step:5180, train_loss:0.07942842594648333, acc:0.5690064601792508


 90%|████████▉ | 39164/43738 [5:00:08<28:21,  2.69it/s]

step:5180, train_loss:0.07942680948308391, acc:0.5690174650188949


 90%|████████▉ | 39165/43738 [5:00:08<31:46,  2.40it/s]

step:5180, train_loss:0.07943002635635645, acc:0.5690029362951615


 90%|████████▉ | 39166/43738 [5:00:09<33:12,  2.29it/s]

step:5180, train_loss:0.07943107198983909, acc:0.568988408313333


 90%|████████▉ | 39167/43738 [5:00:09<30:30,  2.50it/s]

step:5180, train_loss:0.07943304101467463, acc:0.5689738810733526


 90%|█████████ | 39472/43738 [5:02:31<35:16,  2.02it/s]

step:5200, train_loss:0.07938630722569655, acc:0.5695936359951358


 90%|█████████ | 39473/43738 [5:02:31<39:51,  1.78it/s]

step:5200, train_loss:0.07939045604302684, acc:0.5695792060395714


 90%|█████████ | 39474/43738 [5:02:32<33:11,  2.14it/s]

step:5200, train_loss:0.07938844491199366, acc:0.5695901099457871


 90%|█████████ | 39475/43738 [5:02:32<39:53,  1.78it/s]

step:5200, train_loss:0.07939066541967311, acc:0.5695756808106397


 90%|█████████ | 39476/43738 [5:02:33<36:55,  1.92it/s]

step:5200, train_loss:0.07939031129877572, acc:0.5695865842537238


 90%|█████████ | 39477/43738 [5:02:33<38:54,  1.83it/s]

step:5200, train_loss:0.07939161550930961, acc:0.5695721559389011


 90%|█████████ | 39478/43738 [5:02:34<39:57,  1.78it/s]

step:5200, train_loss:0.07938960710578319, acc:0.5695830589188915


 90%|█████████ | 39479/43738 [5:02:34<37:30,  1.89it/s]

step:5200, train_loss:0.07938864236633973, acc:0.5695939613465386


 90%|█████████ | 39480/43738 [5:02:35<36:53,  1.92it/s]

step:5200, train_loss:0.07939065158010923, acc:0.5695795339412361


 90%|█████████ | 39481/43738 [5:02:35<34:36,  2.05it/s]

step:5200, train_loss:0.07939972795003192, acc:0.5695651072667866


 90%|█████████ | 39482/43738 [5:02:36<35:16,  2.01it/s]

step:5200, train_loss:0.07939785499818532, acc:0.5695760093207031


 90%|█████████ | 39483/43738 [5:02:36<37:37,  1.88it/s]

step:5200, train_loss:0.07940363175456985, acc:0.5695615834663019


 90%|█████████ | 39484/43738 [5:02:37<31:33,  2.25it/s]

step:5200, train_loss:0.07940371730509184, acc:0.5695471583426198


 90%|█████████ | 39485/43738 [5:02:37<30:30,  2.32it/s]

step:5200, train_loss:0.07940475070250164, acc:0.5695327339496011


 90%|█████████ | 39486/43738 [5:02:37<29:04,  2.44it/s]

step:5200, train_loss:0.07940372444413475, acc:0.569543635718989


 90%|█████████ | 39487/43738 [5:02:38<27:11,  2.60it/s]

step:5200, train_loss:0.07940213656544487, acc:0.5695545369362068


 91%|█████████ | 39792/43738 [5:04:53<37:48,  1.74it/s]

step:5220, train_loss:0.07936668036734496, acc:0.5695617209489344


 91%|█████████ | 39793/43738 [5:04:53<34:43,  1.89it/s]

step:5220, train_loss:0.07936683165861441, acc:0.5695474078355489


 91%|█████████ | 39794/43738 [5:04:54<32:28,  2.02it/s]

step:5220, train_loss:0.0793649881281018, acc:0.5695582248580188


 91%|█████████ | 39795/43738 [5:04:54<27:24,  2.40it/s]

step:5220, train_loss:0.0793629983513922, acc:0.5695690413368514


 91%|█████████ | 39796/43738 [5:04:54<27:07,  2.42it/s]

step:5220, train_loss:0.07936255815767701, acc:0.5695798572720876


 91%|█████████ | 39797/43738 [5:04:54<23:17,  2.82it/s]

step:5220, train_loss:0.07936267602197095, acc:0.5695655451415936


 91%|█████████ | 39798/43738 [5:04:55<27:53,  2.35it/s]

step:5220, train_loss:0.07936599671192096, acc:0.5695512337303382


 91%|█████████ | 39799/43738 [5:04:56<34:25,  1.91it/s]

step:5220, train_loss:0.07936400420667342, acc:0.569562049297721


 91%|█████████ | 39800/43738 [5:04:56<35:11,  1.86it/s]

step:5220, train_loss:0.079363477552307, acc:0.5695477386934673


 91%|█████████ | 39801/43738 [5:04:57<32:44,  2.00it/s]

step:5220, train_loss:0.07936320135543305, acc:0.5695334288083214


 91%|█████████ | 39802/43738 [5:04:57<29:05,  2.26it/s]

step:5220, train_loss:0.07936170309040301, acc:0.5695442440078388


 91%|█████████ | 39803/43738 [5:04:58<31:58,  2.05it/s]

step:5220, train_loss:0.07936328777693696, acc:0.569529934929528


 91%|█████████ | 39804/43738 [5:04:58<31:26,  2.09it/s]

step:5220, train_loss:0.07936371045116472, acc:0.569515626570194


 91%|█████████ | 39805/43738 [5:04:58<28:12,  2.32it/s]

step:5220, train_loss:0.07936238518035256, acc:0.569526441401834


 91%|█████████ | 39806/43738 [5:04:59<35:02,  1.87it/s]

step:5220, train_loss:0.07936310505194082, acc:0.5695121338491684


 91%|█████████ | 39807/43738 [5:05:00<30:59,  2.11it/s]

step:5220, train_loss:0.07936238134303214, acc:0.569497827015349


 92%|█████████▏| 40112/43738 [5:07:22<36:11,  1.67it/s]

step:5240, train_loss:0.07931918673194792, acc:0.5699042680494615


 92%|█████████▏| 40113/43738 [5:07:22<31:12,  1.94it/s]

step:5240, train_loss:0.07931906283699487, acc:0.5699149901528183


 92%|█████████▏| 40114/43738 [5:07:23<32:37,  1.85it/s]

step:5240, train_loss:0.07931937930773278, acc:0.569900782769108


 92%|█████████▏| 40115/43738 [5:07:23<32:58,  1.83it/s]

step:5240, train_loss:0.0793205852009874, acc:0.5698865760937305


 92%|█████████▏| 40116/43738 [5:07:24<36:04,  1.67it/s]

step:5240, train_loss:0.07932117242699054, acc:0.5698723701266327


 92%|█████████▏| 40117/43738 [5:07:25<32:48,  1.84it/s]

step:5240, train_loss:0.07932061020037946, acc:0.5698581648677618


 92%|█████████▏| 40118/43738 [5:07:25<29:08,  2.07it/s]

step:5240, train_loss:0.07931950764443704, acc:0.5698688867839873


 92%|█████████▏| 40119/43738 [5:07:25<30:02,  2.01it/s]

step:5240, train_loss:0.07932058115870207, acc:0.5698546823200977


 92%|█████████▏| 40120/43738 [5:07:26<29:34,  2.04it/s]

step:5240, train_loss:0.07931954941184882, acc:0.5698654037886342


 92%|█████████▏| 40121/43738 [5:07:27<33:22,  1.81it/s]

step:5240, train_loss:0.07931775255752906, acc:0.5698761247227138


 92%|█████████▏| 40122/43738 [5:07:27<28:21,  2.13it/s]

step:5240, train_loss:0.07931589357870385, acc:0.5698868451223768


 92%|█████████▏| 40123/43738 [5:07:28<33:23,  1.80it/s]

step:5240, train_loss:0.07931879131863893, acc:0.569872641626997


 92%|█████████▏| 40124/43738 [5:07:28<30:07,  2.00it/s]

step:5240, train_loss:0.07931693326075549, acc:0.5698833615791048


 92%|█████████▏| 40125/43738 [5:07:29<32:32,  1.85it/s]

step:5240, train_loss:0.07931768244906906, acc:0.5698691588785046


 92%|█████████▏| 40126/43738 [5:07:29<31:11,  1.93it/s]

step:5240, train_loss:0.07931688402981808, acc:0.5698798783830933


 92%|█████████▏| 40127/43738 [5:07:30<30:19,  1.99it/s]

step:5240, train_loss:0.07931825658136325, acc:0.5698656764771849


 92%|█████████▏| 40432/43738 [5:09:51<32:13,  1.71it/s]

step:5260, train_loss:0.0792952861428483, acc:0.5699693312227938


 92%|█████████▏| 40433/43738 [5:09:52<35:19,  1.56it/s]

step:5260, train_loss:0.07929534225868592, acc:0.5699552345856108


 92%|█████████▏| 40434/43738 [5:09:53<37:27,  1.47it/s]

step:5260, train_loss:0.0792991552768016, acc:0.5699411386456942


 92%|█████████▏| 40435/43738 [5:09:53<36:23,  1.51it/s]

step:5260, train_loss:0.07930173790794474, acc:0.5699270434029925


 92%|█████████▏| 40436/43738 [5:09:54<31:07,  1.77it/s]

step:5260, train_loss:0.07930022431942153, acc:0.5699376792956771


 92%|█████████▏| 40437/43738 [5:09:54<25:53,  2.13it/s]

step:5260, train_loss:0.07929828556548911, acc:0.5699483146623142


 92%|█████████▏| 40438/43738 [5:09:54<24:01,  2.29it/s]

step:5260, train_loss:0.07930437886326817, acc:0.569934220287848


 92%|█████████▏| 40439/43738 [5:09:55<23:47,  2.31it/s]

step:5260, train_loss:0.07930402580027321, acc:0.5699201266104503


 92%|█████████▏| 40440/43738 [5:09:55<23:37,  2.33it/s]

step:5260, train_loss:0.07930371687013552, acc:0.5699060336300692


 92%|█████████▏| 40441/43738 [5:09:56<25:36,  2.15it/s]

step:5260, train_loss:0.0793053383540791, acc:0.5698919413466531


 92%|█████████▏| 40442/43738 [5:09:56<23:57,  2.29it/s]

step:5260, train_loss:0.0793049693566539, acc:0.5698778497601503


 92%|█████████▏| 40443/43738 [5:09:56<20:52,  2.63it/s]

step:5260, train_loss:0.07930303606431094, acc:0.5698884850283115


 92%|█████████▏| 40444/43738 [5:09:57<27:38,  1.99it/s]

step:5260, train_loss:0.07930194811976564, acc:0.569899119770547


 92%|█████████▏| 40445/43738 [5:09:57<24:44,  2.22it/s]

step:5260, train_loss:0.07930022032426669, acc:0.5699097539868958


 92%|█████████▏| 40446/43738 [5:09:58<28:27,  1.93it/s]

step:5260, train_loss:0.07929830336547879, acc:0.5699203876773971


 92%|█████████▏| 40447/43738 [5:09:58<23:39,  2.32it/s]

step:5260, train_loss:0.07929636300784469, acc:0.5699310208420897


 93%|█████████▎| 40752/43738 [5:12:14<18:18,  2.72it/s]

step:5280, train_loss:0.07932221550249827, acc:0.5695916764821358


 93%|█████████▎| 40753/43738 [5:12:15<24:13,  2.05it/s]

step:5280, train_loss:0.07932247150579595, acc:0.5696022378720585


 93%|█████████▎| 40754/43738 [5:12:15<24:06,  2.06it/s]

step:5280, train_loss:0.0793215783187889, acc:0.5696127987436816


 93%|█████████▎| 40755/43738 [5:12:16<28:36,  1.74it/s]

step:5280, train_loss:0.07932204742565697, acc:0.5696233590970433


 93%|█████████▎| 40756/43738 [5:12:17<26:05,  1.90it/s]

step:5280, train_loss:0.07932081614011606, acc:0.5696339189321817


 93%|█████████▎| 40757/43738 [5:12:17<22:20,  2.22it/s]

step:5280, train_loss:0.07932241032640197, acc:0.5696199425865496


 93%|█████████▎| 40758/43738 [5:12:17<24:52,  2.00it/s]

step:5280, train_loss:0.07932512188335337, acc:0.5696059669267383


 93%|█████████▎| 40759/43738 [5:12:18<26:59,  1.84it/s]

step:5280, train_loss:0.07932657831865284, acc:0.5695919919526976


 93%|█████████▎| 40760/43738 [5:12:18<24:18,  2.04it/s]

step:5280, train_loss:0.07932603714435806, acc:0.5695780176643769


 93%|█████████▎| 40761/43738 [5:12:19<22:24,  2.21it/s]

step:5280, train_loss:0.07932611046781061, acc:0.5695640440617257


 93%|█████████▎| 40762/43738 [5:12:19<21:04,  2.35it/s]

step:5280, train_loss:0.0793284296151403, acc:0.5695500711446936


 93%|█████████▎| 40763/43738 [5:12:20<20:21,  2.44it/s]

step:5280, train_loss:0.07932648363425393, acc:0.5695606309643549


 93%|█████████▎| 40764/43738 [5:12:20<22:27,  2.21it/s]

step:5280, train_loss:0.07932880462857844, acc:0.5695466588166029


 93%|█████████▎| 40765/43738 [5:12:21<24:28,  2.02it/s]

step:5280, train_loss:0.07932858195739549, acc:0.5695572182018889


 93%|█████████▎| 40766/43738 [5:12:21<25:59,  1.91it/s]

step:5280, train_loss:0.07932670477331791, acc:0.5695677770691262


 93%|█████████▎| 40767/43738 [5:12:22<24:15,  2.04it/s]

step:5280, train_loss:0.07932482045682727, acc:0.5695783354183531


 94%|█████████▍| 41072/43738 [5:14:51<27:11,  1.63it/s]

step:5300, train_loss:0.07924953696585647, acc:0.5699990261005065


 94%|█████████▍| 41073/43738 [5:14:52<27:51,  1.59it/s]

step:5300, train_loss:0.07924779382190388, acc:0.5700094952888759


 94%|█████████▍| 41074/43738 [5:14:52<27:01,  1.64it/s]

step:5300, train_loss:0.07924765001669472, acc:0.5700199639674733


 94%|█████████▍| 41075/43738 [5:14:53<25:59,  1.71it/s]

step:5300, train_loss:0.07925194797965279, acc:0.5700060864272672


 94%|█████████▍| 41076/43738 [5:14:54<29:14,  1.52it/s]

step:5300, train_loss:0.07925401544350502, acc:0.5699922095627618


 94%|█████████▍| 41077/43738 [5:14:54<31:13,  1.42it/s]

step:5300, train_loss:0.07925303812422184, acc:0.570002677897607


 94%|█████████▍| 41078/43738 [5:14:55<30:13,  1.47it/s]

step:5300, train_loss:0.07925122223996077, acc:0.5700131457227713


 94%|█████████▍| 41079/43738 [5:14:56<32:37,  1.36it/s]

step:5300, train_loss:0.07925199210880567, acc:0.5699992696998466


 94%|█████████▍| 41080/43738 [5:14:56<31:18,  1.41it/s]

step:5300, train_loss:0.07925817859968226, acc:0.569985394352483


 94%|█████████▍| 41081/43738 [5:14:57<28:15,  1.57it/s]

step:5300, train_loss:0.07925630524147247, acc:0.5699958618339378


 94%|█████████▍| 41082/43738 [5:14:57<24:15,  1.82it/s]

step:5300, train_loss:0.07925612230249338, acc:0.5699819872450221


 94%|█████████▍| 41083/43738 [5:14:58<25:25,  1.74it/s]

step:5300, train_loss:0.07925948897678402, acc:0.5699681133315483


 94%|█████████▍| 41084/43738 [5:14:58<22:00,  2.01it/s]

step:5300, train_loss:0.07925771197114183, acc:0.5699785804692824


 94%|█████████▍| 41085/43738 [5:14:59<21:45,  2.03it/s]

step:5300, train_loss:0.07925838258722423, acc:0.5699647073141049


 94%|█████████▍| 41086/43738 [5:14:59<24:12,  1.83it/s]

step:5300, train_loss:0.0792565326845452, acc:0.5699751740252154


 94%|█████████▍| 41087/43738 [5:15:00<21:56,  2.01it/s]

step:5300, train_loss:0.07925529357240661, acc:0.5699856402268357


 95%|█████████▍| 41392/43738 [5:17:21<19:26,  2.01it/s]

step:5320, train_loss:0.07930220626831211, acc:0.5697719366061075


 95%|█████████▍| 41393/43738 [5:17:22<22:55,  1.70it/s]

step:5320, train_loss:0.07930282008614853, acc:0.5697581716715386


 95%|█████████▍| 41394/43738 [5:17:23<25:30,  1.53it/s]

step:5320, train_loss:0.079302561338875, acc:0.5697444074020389


 95%|█████████▍| 41395/43738 [5:17:23<20:46,  1.88it/s]

step:5320, train_loss:0.07930114410874516, acc:0.5697548013045054


 95%|█████████▍| 41396/43738 [5:17:24<23:59,  1.63it/s]

step:5320, train_loss:0.07930014188733576, acc:0.5697651947048024


 95%|█████████▍| 41397/43738 [5:17:24<19:59,  1.95it/s]

step:5320, train_loss:0.07930161648840905, acc:0.569751431263135


 95%|█████████▍| 41398/43738 [5:17:25<19:01,  2.05it/s]

step:5320, train_loss:0.07930051873543677, acc:0.569761824242717


 95%|█████████▍| 41399/43738 [5:17:25<22:40,  1.72it/s]

step:5320, train_loss:0.07930057448234158, acc:0.5697722167202106


 95%|█████████▍| 41400/43738 [5:17:26<20:25,  1.91it/s]

step:5320, train_loss:0.07930242410680287, acc:0.5697584541062802


 95%|█████████▍| 41401/43738 [5:17:26<17:14,  2.26it/s]

step:5320, train_loss:0.0793005095760579, acc:0.5697688461631362


 95%|█████████▍| 41402/43738 [5:17:26<14:48,  2.63it/s]

step:5320, train_loss:0.07929908004332913, acc:0.5697792377179847


 95%|█████████▍| 41403/43738 [5:17:27<16:06,  2.42it/s]

step:5320, train_loss:0.07929833556573271, acc:0.569789628770862


 95%|█████████▍| 41404/43738 [5:17:27<17:50,  2.18it/s]

step:5320, train_loss:0.07929758065344453, acc:0.5698000193218047


 95%|█████████▍| 41405/43738 [5:17:28<16:14,  2.39it/s]

step:5320, train_loss:0.07929870898622887, acc:0.5697862576983456


 95%|█████████▍| 41406/43738 [5:17:28<16:01,  2.42it/s]

step:5320, train_loss:0.07929804642229572, acc:0.5697966478288171


 95%|█████████▍| 41407/43738 [5:17:28<15:54,  2.44it/s]

step:5320, train_loss:0.07929718028152759, acc:0.5698070374574348


 95%|█████████▌| 41712/43738 [5:19:48<13:17,  2.54it/s]

step:5340, train_loss:0.07927480164016079, acc:0.5700038358266206


 95%|█████████▌| 41713/43738 [5:19:49<14:37,  2.31it/s]

step:5340, train_loss:0.07927315926774586, acc:0.57001414427157


 95%|█████████▌| 41714/43738 [5:19:49<12:31,  2.69it/s]

step:5340, train_loss:0.07927126147492325, acc:0.5700244522222755


 95%|█████████▌| 41715/43738 [5:19:50<16:41,  2.02it/s]

step:5340, train_loss:0.07927311779799884, acc:0.5700107874865157


 95%|█████████▌| 41716/43738 [5:19:50<16:45,  2.01it/s]

step:5340, train_loss:0.07927490665072282, acc:0.5699971234058875


 95%|█████████▌| 41717/43738 [5:19:51<15:57,  2.11it/s]

step:5340, train_loss:0.07927441827471825, acc:0.5699834599803437


 95%|█████████▌| 41718/43738 [5:19:51<14:24,  2.34it/s]

step:5340, train_loss:0.07927256836754557, acc:0.5699937676782204


 95%|█████████▌| 41719/43738 [5:19:52<15:35,  2.16it/s]

step:5340, train_loss:0.07927498860629247, acc:0.5699801049881349


 95%|█████████▌| 41720/43738 [5:19:52<17:23,  1.93it/s]

step:5340, train_loss:0.07927312993344243, acc:0.5699904122722914


 95%|█████████▌| 41721/43738 [5:19:53<16:34,  2.03it/s]

step:5340, train_loss:0.07927183434292817, acc:0.5700007190623427


 95%|█████████▌| 41722/43738 [5:19:53<16:28,  2.04it/s]

step:5340, train_loss:0.07927058436302871, acc:0.5700110253583242


 95%|█████████▌| 41723/43738 [5:19:54<16:44,  2.01it/s]

step:5340, train_loss:0.07927150456594162, acc:0.5699973635644608


 95%|█████████▌| 41724/43738 [5:19:54<17:31,  1.91it/s]

step:5340, train_loss:0.07927057400393586, acc:0.5700076694468411


 95%|█████████▌| 41725/43738 [5:19:55<18:33,  1.81it/s]

step:5340, train_loss:0.07926880298988556, acc:0.5700179748352306


 95%|█████████▌| 41726/43738 [5:19:55<16:38,  2.02it/s]

step:5340, train_loss:0.07926840783383945, acc:0.570028279729665


 95%|█████████▌| 41727/43738 [5:19:56<15:37,  2.14it/s]

step:5340, train_loss:0.07926664036538872, acc:0.5700385841301795


 96%|█████████▌| 42032/43738 [5:22:15<19:45,  1.44it/s]

step:5360, train_loss:0.07923517986368232, acc:0.5701608298439285


 96%|█████████▌| 42033/43738 [5:22:15<17:01,  1.67it/s]

step:5360, train_loss:0.07923521722811111, acc:0.570147265243975


 96%|█████████▌| 42034/43738 [5:22:16<15:18,  1.85it/s]

step:5360, train_loss:0.07923333436624128, acc:0.5701574915544559


 96%|█████████▌| 42035/43738 [5:22:16<14:27,  1.96it/s]

step:5360, train_loss:0.07923419545375672, acc:0.5701439276793149


 96%|█████████▌| 42036/43738 [5:22:16<12:25,  2.28it/s]

step:5360, train_loss:0.0792324214581816, acc:0.5701541535826434


 96%|█████████▌| 42037/43738 [5:22:17<12:11,  2.33it/s]

step:5360, train_loss:0.07923055075548174, acc:0.5701643789994528


 96%|█████████▌| 42038/43738 [5:22:17<11:13,  2.52it/s]

step:5360, train_loss:0.07922868979467795, acc:0.5701746039297778


 96%|█████████▌| 42039/43738 [5:22:18<13:16,  2.13it/s]

step:5360, train_loss:0.07922800058668782, acc:0.570184828373653


 96%|█████████▌| 42040/43738 [5:22:18<12:46,  2.22it/s]

step:5360, train_loss:0.0792325140523157, acc:0.5701712654614652


 96%|█████████▌| 42041/43738 [5:22:18<11:44,  2.41it/s]

step:5360, train_loss:0.07923069218341203, acc:0.5701814894983469


 96%|█████████▌| 42042/43738 [5:22:19<10:20,  2.73it/s]

step:5360, train_loss:0.07922921988838023, acc:0.5701917130488559


 96%|█████████▌| 42043/43738 [5:22:19<11:03,  2.56it/s]

step:5360, train_loss:0.07923050804864964, acc:0.5701781509407036


 96%|█████████▌| 42044/43738 [5:22:19<10:28,  2.70it/s]

step:5360, train_loss:0.07922873950070082, acc:0.5701883740842927


 96%|█████████▌| 42045/43738 [5:22:20<09:26,  2.99it/s]

step:5360, train_loss:0.07922896102254187, acc:0.5701748127006778


 96%|█████████▌| 42046/43738 [5:22:20<08:51,  3.19it/s]

step:5360, train_loss:0.07922751070142185, acc:0.5701850354373781


 96%|█████████▌| 42047/43738 [5:22:20<09:44,  2.89it/s]

step:5360, train_loss:0.07922588929821521, acc:0.5701952576878255


 97%|█████████▋| 42352/43738 [5:24:36<09:13,  2.50it/s]

step:5380, train_loss:0.0791598024920077, acc:0.5705515678126181


 97%|█████████▋| 42353/43738 [5:24:37<08:17,  2.78it/s]

step:5380, train_loss:0.07915793957764691, acc:0.5705617075531839


 97%|█████████▋| 42354/43738 [5:24:37<11:19,  2.04it/s]

step:5380, train_loss:0.07915775500174674, acc:0.5705482362940927


 97%|█████████▋| 42355/43738 [5:24:38<10:55,  2.11it/s]

step:5380, train_loss:0.07915678326203193, acc:0.5705583756345177


 97%|█████████▋| 42356/43738 [5:24:38<09:52,  2.33it/s]

step:5380, train_loss:0.07915498859109887, acc:0.5705685144961753


 97%|█████████▋| 42357/43738 [5:24:39<11:56,  1.93it/s]

step:5380, train_loss:0.07915474514201402, acc:0.570578652879099


 97%|█████████▋| 42358/43738 [5:24:39<10:57,  2.10it/s]

step:5380, train_loss:0.07916030404168244, acc:0.5705651824920912


 97%|█████████▋| 42359/43738 [5:24:40<09:18,  2.47it/s]

step:5380, train_loss:0.07915907969380842, acc:0.5705753204749876


 97%|█████████▋| 42360/43738 [5:24:40<08:09,  2.82it/s]

step:5380, train_loss:0.07915760092279381, acc:0.5705854579792257


 97%|█████████▋| 42361/43738 [5:24:40<07:27,  3.08it/s]

step:5380, train_loss:0.07915575465950339, acc:0.5705955950048394


 97%|█████████▋| 42362/43738 [5:24:40<07:44,  2.97it/s]

step:5380, train_loss:0.0791550311292724, acc:0.5706057315518626


 97%|█████████▋| 42363/43738 [5:24:41<08:18,  2.76it/s]

step:5380, train_loss:0.0791539579480094, acc:0.570615867620329


 97%|█████████▋| 42364/43738 [5:24:41<07:41,  2.98it/s]

step:5380, train_loss:0.07915371770030644, acc:0.5706023982626759


 97%|█████████▋| 42365/43738 [5:24:41<07:37,  3.00it/s]

step:5380, train_loss:0.07915333597800889, acc:0.5706125339313112


 97%|█████████▋| 42366/43738 [5:24:42<10:11,  2.24it/s]

step:5380, train_loss:0.07915368018672195, acc:0.5706226691214653


 97%|█████████▋| 42367/43738 [5:24:43<10:17,  2.22it/s]

step:5380, train_loss:0.07915181877132782, acc:0.570632803833172


 98%|█████████▊| 42672/43738 [5:27:00<07:41,  2.31it/s]

step:5400, train_loss:0.07910408076440296, acc:0.5708661417322834


 98%|█████████▊| 42673/43738 [5:27:00<06:28,  2.74it/s]

step:5400, train_loss:0.07910263355167216, acc:0.5708761980643499


 98%|█████████▊| 42674/43738 [5:27:00<05:55,  2.99it/s]

step:5400, train_loss:0.07910148459026801, acc:0.5708862539251066


 98%|█████████▊| 42675/43738 [5:27:00<06:14,  2.84it/s]

step:5400, train_loss:0.07909963640517363, acc:0.570896309314587


 98%|█████████▊| 42676/43738 [5:27:01<05:30,  3.21it/s]

step:5400, train_loss:0.07909886838397902, acc:0.570906364232824


 98%|█████████▊| 42677/43738 [5:27:01<06:14,  2.83it/s]

step:5400, train_loss:0.07910322080125735, acc:0.5708929868547461


 98%|█████████▊| 42678/43738 [5:27:02<06:48,  2.60it/s]

step:5400, train_loss:0.07910384289234042, acc:0.5708796101035662


 98%|█████████▊| 42679/43738 [5:27:02<06:23,  2.76it/s]

step:5400, train_loss:0.07910214568047892, acc:0.5708896647062959


 98%|█████████▊| 42680/43738 [5:27:02<07:23,  2.39it/s]

step:5400, train_loss:0.07910062750051645, acc:0.5708997188378632


 98%|█████████▊| 42681/43738 [5:27:03<08:09,  2.16it/s]

step:5400, train_loss:0.07910039150689675, acc:0.5709097724983013


 98%|█████████▊| 42682/43738 [5:27:03<07:58,  2.21it/s]

step:5400, train_loss:0.0790986203863344, acc:0.5709198256876435


 98%|█████████▊| 42683/43738 [5:27:04<08:24,  2.09it/s]

step:5400, train_loss:0.0790969864737972, acc:0.5709298784059227


 98%|█████████▊| 42684/43738 [5:27:04<08:43,  2.01it/s]

step:5400, train_loss:0.07909856714930862, acc:0.57091650267079


 98%|█████████▊| 42685/43738 [5:27:05<08:44,  2.01it/s]

step:5400, train_loss:0.07909753020862813, acc:0.5709265549959002


 98%|█████████▊| 42686/43738 [5:27:06<10:27,  1.68it/s]

step:5400, train_loss:0.07909577685330027, acc:0.5709366068500211


 98%|█████████▊| 42687/43738 [5:27:06<08:55,  1.96it/s]

step:5400, train_loss:0.0790948096728384, acc:0.5709466582331858


 98%|█████████▊| 42992/43738 [5:29:29<05:08,  2.42it/s]

step:5420, train_loss:0.07911887877877985, acc:0.5709899516189059


 98%|█████████▊| 42993/43738 [5:29:29<04:46,  2.60it/s]

step:5420, train_loss:0.07912494475577729, acc:0.5709766706207987


 98%|█████████▊| 42994/43738 [5:29:30<04:40,  2.66it/s]

step:5420, train_loss:0.07912336421969264, acc:0.5709866492999023


 98%|█████████▊| 42995/43738 [5:29:30<04:35,  2.70it/s]

step:5420, train_loss:0.0791250660417892, acc:0.570973368996395


 98%|█████████▊| 42996/43738 [5:29:31<05:44,  2.15it/s]

step:5420, train_loss:0.07912324645623534, acc:0.5709833472881198


 98%|█████████▊| 42997/43738 [5:29:31<04:52,  2.53it/s]

step:5420, train_loss:0.07912149166760674, acc:0.5709933251157058


 98%|█████████▊| 42998/43738 [5:29:31<04:34,  2.69it/s]

step:5420, train_loss:0.07912215272134096, acc:0.5709800455835156


 98%|█████████▊| 42999/43738 [5:29:32<04:48,  2.56it/s]

step:5420, train_loss:0.0791203825921033, acc:0.5709900230237912


 98%|█████████▊| 43000/43738 [5:29:32<04:57,  2.48it/s]

step:5420, train_loss:0.07912695437949287, acc:0.5709767441860465


 98%|█████████▊| 43001/43738 [5:29:32<04:47,  2.57it/s]

step:5420, train_loss:0.0791269900828774, acc:0.5709634659659077


 98%|█████████▊| 43002/43738 [5:29:33<04:11,  2.93it/s]

step:5420, train_loss:0.07912515055340788, acc:0.57097344309567


 98%|█████████▊| 43003/43738 [5:29:33<05:01,  2.43it/s]

step:5420, train_loss:0.07912348864322856, acc:0.570983419761412


 98%|█████████▊| 43004/43738 [5:29:34<06:11,  1.98it/s]

step:5420, train_loss:0.0791231068659642, acc:0.5709933959631662


 98%|█████████▊| 43005/43738 [5:29:34<05:12,  2.35it/s]

step:5420, train_loss:0.07912130696133284, acc:0.571003371700965


 98%|█████████▊| 43006/43738 [5:29:35<05:31,  2.21it/s]

step:5420, train_loss:0.07912014298091677, acc:0.5710133469748407


 98%|█████████▊| 43007/43738 [5:29:35<05:58,  2.04it/s]

step:5420, train_loss:0.07911905244199184, acc:0.5710233217848257


 99%|█████████▉| 43312/43738 [5:31:57<03:14,  2.19it/s]

step:5440, train_loss:0.07913164120170596, acc:0.5711350203176949


 99%|█████████▉| 43313/43738 [5:31:57<02:45,  2.57it/s]

step:5440, train_loss:0.07912982236467449, acc:0.5711449218479441


 99%|█████████▉| 43314/43738 [5:31:57<03:14,  2.18it/s]

step:5440, train_loss:0.07912981115371723, acc:0.571131735697465


 99%|█████████▉| 43315/43738 [5:31:58<03:48,  1.85it/s]

step:5440, train_loss:0.0791316925761752, acc:0.5711185501558351


 99%|█████████▉| 43316/43738 [5:31:59<03:40,  1.91it/s]

step:5440, train_loss:0.07913361238108785, acc:0.5711053652230123


 99%|█████████▉| 43317/43738 [5:31:59<03:06,  2.26it/s]

step:5440, train_loss:0.07913200768962421, acc:0.5711152665235358


 99%|█████████▉| 43318/43738 [5:31:59<03:08,  2.23it/s]

step:5440, train_loss:0.0791344741361882, acc:0.5711020822752666


 99%|█████████▉| 43319/43738 [5:32:00<03:12,  2.18it/s]

step:5440, train_loss:0.07913269124723049, acc:0.5711119831944412


 99%|█████████▉| 43320/43738 [5:32:00<02:43,  2.56it/s]

step:5440, train_loss:0.07913142464224629, acc:0.5711218836565097


 99%|█████████▉| 43321/43738 [5:32:00<02:35,  2.68it/s]

step:5440, train_loss:0.07913179051818699, acc:0.5711087001685095


 99%|█████████▉| 43322/43738 [5:32:01<02:38,  2.62it/s]

step:5440, train_loss:0.07913106490759488, acc:0.571118600249296


 99%|█████████▉| 43323/43738 [5:32:01<02:25,  2.84it/s]

step:5440, train_loss:0.07912962708918454, acc:0.5711284998730467


 99%|█████████▉| 43324/43738 [5:32:01<02:12,  3.12it/s]

step:5440, train_loss:0.07912828487841181, acc:0.5711383990397931


 99%|█████████▉| 43325/43738 [5:32:02<02:41,  2.56it/s]

step:5440, train_loss:0.07912971021590455, acc:0.5711252163877669


 99%|█████████▉| 43326/43738 [5:32:03<03:09,  2.17it/s]

step:5440, train_loss:0.07913064908829724, acc:0.571135115173337


 99%|█████████▉| 43327/43738 [5:32:03<02:57,  2.31it/s]

step:5440, train_loss:0.0791289992521837, acc:0.5711450135019733


100%|█████████▉| 43632/43738 [5:34:20<00:46,  2.28it/s]

step:5460, train_loss:0.07908184210146736, acc:0.5714383938393839


100%|█████████▉| 43633/43738 [5:34:20<00:40,  2.56it/s]

step:5460, train_loss:0.07908302099714262, acc:0.571425297366672


100%|█████████▉| 43634/43738 [5:34:21<00:50,  2.08it/s]

step:5460, train_loss:0.07908228650666761, acc:0.571435119402301


100%|█████████▉| 43635/43738 [5:34:21<00:43,  2.37it/s]

step:5460, train_loss:0.07908047461240782, acc:0.5714449409877392


100%|█████████▉| 43636/43738 [5:34:22<00:41,  2.47it/s]

step:5460, train_loss:0.07907927748553295, acc:0.5714547621230177


100%|█████████▉| 43637/43738 [5:34:23<00:52,  1.92it/s]

step:5460, train_loss:0.07907907010031903, acc:0.5714416664756973


100%|█████████▉| 43638/43738 [5:34:23<00:43,  2.31it/s]

step:5460, train_loss:0.07907784689570552, acc:0.5714514872358953


100%|█████████▉| 43639/43738 [5:34:23<00:43,  2.27it/s]

step:5460, train_loss:0.07907729206759513, acc:0.5714613075460024


100%|█████████▉| 43640/43738 [5:34:24<00:45,  2.15it/s]

step:5460, train_loss:0.07907620970234383, acc:0.5714711274060496


100%|█████████▉| 43641/43738 [5:34:24<00:41,  2.35it/s]

step:5460, train_loss:0.07907949072162768, acc:0.5714580325840379


100%|█████████▉| 43642/43738 [5:34:24<00:39,  2.45it/s]

step:5460, train_loss:0.07907958285848674, acc:0.5714449383621282


100%|█████████▉| 43643/43738 [5:34:25<00:43,  2.18it/s]

step:5460, train_loss:0.07907827459882114, acc:0.5714547579222327


100%|█████████▉| 43644/43738 [5:34:25<00:39,  2.36it/s]

step:5460, train_loss:0.07907815767958118, acc:0.571441664375401


100%|█████████▉| 43645/43738 [5:34:26<00:40,  2.28it/s]

step:5460, train_loss:0.07907745409610863, acc:0.5714514835605453


100%|█████████▉| 43646/43738 [5:34:27<00:46,  1.96it/s]

step:5460, train_loss:0.07908022202346354, acc:0.5714383906887229


100%|█████████▉| 43647/43738 [5:34:27<00:42,  2.12it/s]

step:5460, train_loss:0.07908044576676483, acc:0.5714252984168442


100%|██████████| 43738/43738 [5:35:07<00:00,  2.63it/s]
  0%|          | 1/5129 [00:00<13:43,  6.23it/s]

eval on dev set


100%|██████████| 5129/5129 [14:11<00:00,  5.73it/s]
  0%|          | 0/43738 [00:00<?, ?it/s]

1.322256080236458, 0.5507896276077208


  1%|          | 224/43738 [01:41<4:55:10,  2.46it/s]

step:5480, train_loss:0.07526242234078902, acc:0.5892857142857143


  1%|          | 225/43738 [01:41<4:50:49,  2.49it/s]

step:5480, train_loss:0.07499512182341682, acc:0.5911111111111111


  1%|          | 226/43738 [01:42<4:49:23,  2.51it/s]

step:5480, train_loss:0.07555404562602001, acc:0.588495575221239


  1%|          | 227/43738 [01:42<5:54:50,  2.04it/s]

step:5480, train_loss:0.07597246628238241, acc:0.5859030837004405


  1%|          | 228/43738 [01:43<5:42:52,  2.11it/s]

step:5480, train_loss:0.07576994678789847, acc:0.5877192982456141


  1%|          | 229/43738 [01:43<5:50:18,  2.07it/s]

step:5480, train_loss:0.0759518672854489, acc:0.5851528384279476


  1%|          | 230/43738 [01:44<6:43:28,  1.80it/s]

step:5480, train_loss:0.07604985799964355, acc:0.5869565217391305


  1%|          | 231/43738 [01:44<5:44:28,  2.11it/s]

step:5480, train_loss:0.07572686535939252, acc:0.5887445887445888


  1%|          | 232/43738 [01:45<5:43:58,  2.11it/s]

step:5480, train_loss:0.0754119567492784, acc:0.5905172413793104


  1%|          | 233/43738 [01:45<5:43:39,  2.11it/s]

step:5480, train_loss:0.07509950544413323, acc:0.592274678111588


  1%|          | 234/43738 [01:46<5:58:39,  2.02it/s]

step:5480, train_loss:0.07480429137587292, acc:0.594017094017094


  1%|          | 235/43738 [01:46<5:01:13,  2.41it/s]

step:5480, train_loss:0.0750090620539924, acc:0.5914893617021276


  1%|          | 236/43738 [01:47<5:11:54,  2.32it/s]

step:5480, train_loss:0.07478000967905431, acc:0.5932203389830508


  1%|          | 237/43738 [01:47<5:38:54,  2.14it/s]

step:5480, train_loss:0.07512343180550553, acc:0.5907172995780591


  1%|          | 238/43738 [01:48<5:23:12,  2.24it/s]

step:5480, train_loss:0.07544188012815073, acc:0.5882352941176471


  1%|          | 239/43738 [01:48<4:39:23,  2.59it/s]

step:5480, train_loss:0.07530613404905198, acc:0.5899581589958159


  1%|          | 544/43738 [04:13<6:21:44,  1.89it/s]

step:5500, train_loss:0.06997949313047096, acc:0.6139705882352942


  1%|          | 545/43738 [04:14<5:16:43,  2.27it/s]

step:5500, train_loss:0.06985113810183942, acc:0.6146788990825688


  1%|          | 546/43738 [04:14<5:03:36,  2.37it/s]

step:5500, train_loss:0.07017496611066708, acc:0.6135531135531136


  1%|▏         | 547/43738 [04:14<4:22:33,  2.74it/s]

step:5500, train_loss:0.07006826362839919, acc:0.6142595978062158


  1%|▏         | 548/43738 [04:15<5:38:27,  2.13it/s]

step:5500, train_loss:0.07005686822167197, acc:0.6131386861313869


  1%|▏         | 549/43738 [04:16<5:42:04,  2.10it/s]

step:5500, train_loss:0.07008858932392546, acc:0.6120218579234973


  1%|▏         | 550/43738 [04:16<5:41:08,  2.11it/s]

step:5500, train_loss:0.06996700492179529, acc:0.6127272727272727


  1%|▏         | 551/43738 [04:16<5:26:06,  2.21it/s]

step:5500, train_loss:0.06989845201565788, acc:0.6134301270417423


  1%|▏         | 552/43738 [04:17<5:12:53,  2.30it/s]

step:5500, train_loss:0.06984913426846184, acc:0.6141304347826086


  1%|▏         | 553/43738 [04:17<4:38:53,  2.58it/s]

step:5500, train_loss:0.06973474681677186, acc:0.6148282097649186


  1%|▏         | 554/43738 [04:18<4:54:37,  2.44it/s]

step:5500, train_loss:0.06961107063140815, acc:0.6155234657039711


  1%|▏         | 555/43738 [04:18<4:35:30,  2.61it/s]

step:5500, train_loss:0.06952772249660648, acc:0.6162162162162163


  1%|▏         | 556/43738 [04:18<5:04:48,  2.36it/s]

step:5500, train_loss:0.06944406522092872, acc:0.6169064748201439


  1%|▏         | 557/43738 [04:19<4:33:32,  2.63it/s]

step:5500, train_loss:0.06931970686245462, acc:0.6175942549371634


  1%|▏         | 558/43738 [04:19<4:06:38,  2.92it/s]

step:5500, train_loss:0.06920820174615257, acc:0.6182795698924731


  1%|▏         | 559/43738 [04:19<4:30:03,  2.66it/s]

step:5500, train_loss:0.06931940466582055, acc:0.6171735241502684


  2%|▏         | 864/43738 [06:37<7:23:38,  1.61it/s]

step:5520, train_loss:0.07205135187379913, acc:0.6099537037037037


  2%|▏         | 865/43738 [06:37<6:52:52,  1.73it/s]

step:5520, train_loss:0.07216657722379603, acc:0.6092485549132948


  2%|▏         | 866/43738 [06:37<5:41:42,  2.09it/s]

step:5520, train_loss:0.07208338987370458, acc:0.6096997690531177


  2%|▏         | 867/43738 [06:38<5:47:24,  2.06it/s]

step:5520, train_loss:0.0721537584778364, acc:0.6089965397923875


  2%|▏         | 868/43738 [06:38<6:30:37,  1.83it/s]

step:5520, train_loss:0.07214424481147617, acc:0.6094470046082949


  2%|▏         | 869/43738 [06:39<6:34:32,  1.81it/s]

step:5520, train_loss:0.07207149834028082, acc:0.6098964326812428


  2%|▏         | 870/43738 [06:40<6:38:36,  1.79it/s]

step:5520, train_loss:0.07215952382626376, acc:0.6091954022988506


  2%|▏         | 871/43738 [06:40<5:50:50,  2.04it/s]

step:5520, train_loss:0.07208269207613237, acc:0.6096440872560276


  2%|▏         | 872/43738 [06:40<5:04:09,  2.35it/s]

step:5520, train_loss:0.07200724413776514, acc:0.6100917431192661


  2%|▏         | 873/43738 [06:41<5:17:29,  2.25it/s]

step:5520, train_loss:0.07197030942741982, acc:0.6105383734249714


  2%|▏         | 874/43738 [06:41<6:21:58,  1.87it/s]

step:5520, train_loss:0.07195580949435688, acc:0.6098398169336384


  2%|▏         | 875/43738 [06:42<6:39:32,  1.79it/s]

step:5520, train_loss:0.07258165675508124, acc:0.6091428571428571


  2%|▏         | 876/43738 [06:42<5:57:32,  2.00it/s]

step:5520, train_loss:0.07274184217059891, acc:0.6084474885844748


  2%|▏         | 877/43738 [06:43<5:44:03,  2.08it/s]

step:5520, train_loss:0.0726595772281629, acc:0.6088939566704675


  2%|▏         | 878/43738 [06:43<5:10:05,  2.30it/s]

step:5520, train_loss:0.07263258432307554, acc:0.6093394077448747


  2%|▏         | 879/43738 [06:43<4:45:31,  2.50it/s]

step:5520, train_loss:0.07255019257809285, acc:0.6097838452787259


  3%|▎         | 1184/43738 [09:03<6:13:44,  1.90it/s]

step:5540, train_loss:0.07093226484642867, acc:0.6199324324324325


  3%|▎         | 1185/43738 [09:03<6:13:41,  1.90it/s]

step:5540, train_loss:0.07091538429904713, acc:0.620253164556962


  3%|▎         | 1186/43738 [09:04<6:26:53,  1.83it/s]

step:5540, train_loss:0.07085700581026143, acc:0.6205733558178752


  3%|▎         | 1187/43738 [09:04<5:25:50,  2.18it/s]

step:5540, train_loss:0.07082521979851615, acc:0.6208930075821398


  3%|▎         | 1188/43738 [09:05<4:45:26,  2.48it/s]

step:5540, train_loss:0.07080007539293807, acc:0.6212121212121212


  3%|▎         | 1189/43738 [09:05<4:29:15,  2.63it/s]

step:5540, train_loss:0.07077976527778139, acc:0.6206896551724138


  3%|▎         | 1190/43738 [09:05<4:15:51,  2.77it/s]

step:5540, train_loss:0.0707329301969657, acc:0.6210084033613446


  3%|▎         | 1191/43738 [09:06<4:38:43,  2.54it/s]

step:5540, train_loss:0.07098125802833152, acc:0.6204869857262805


  3%|▎         | 1192/43738 [09:06<4:34:42,  2.58it/s]

step:5540, train_loss:0.07093163815423871, acc:0.6208053691275168


  3%|▎         | 1193/43738 [09:07<4:50:07,  2.44it/s]

step:5540, train_loss:0.07088149222877105, acc:0.6211232187761945


  3%|▎         | 1194/43738 [09:07<4:14:49,  2.78it/s]

step:5540, train_loss:0.07083785164574008, acc:0.6214405360134003


  3%|▎         | 1195/43738 [09:08<5:44:52,  2.06it/s]

step:5540, train_loss:0.07078125975698732, acc:0.6217573221757322


  3%|▎         | 1196/43738 [09:08<5:56:16,  1.99it/s]

step:5540, train_loss:0.0707938962165939, acc:0.6212374581939799


  3%|▎         | 1197/43738 [09:08<5:13:21,  2.26it/s]

step:5540, train_loss:0.07074109813526783, acc:0.6215538847117794


  3%|▎         | 1198/43738 [09:09<4:46:02,  2.48it/s]

step:5540, train_loss:0.07070521494833899, acc:0.6218697829716193


  3%|▎         | 1199/43738 [09:09<4:38:45,  2.54it/s]

step:5540, train_loss:0.07066853423435877, acc:0.622185154295246


  3%|▎         | 1504/43738 [11:30<6:14:22,  1.88it/s]

step:5560, train_loss:0.07011404048039657, acc:0.6230053191489362


  3%|▎         | 1505/43738 [11:30<6:21:41,  1.84it/s]

step:5560, train_loss:0.070262227987753, acc:0.6225913621262459


  3%|▎         | 1506/43738 [11:31<5:59:36,  1.96it/s]

step:5560, train_loss:0.07021573531301631, acc:0.6228419654714475


  3%|▎         | 1507/43738 [11:31<5:59:06,  1.96it/s]

step:5560, train_loss:0.0701811934994965, acc:0.6230922362309224


  3%|▎         | 1508/43738 [11:32<6:15:32,  1.87it/s]

step:5560, train_loss:0.07019242643383616, acc:0.6226790450928382


  3%|▎         | 1509/43738 [11:32<5:49:15,  2.02it/s]

step:5560, train_loss:0.07016188513630763, acc:0.6229290921139827


  3%|▎         | 1510/43738 [11:33<5:32:21,  2.12it/s]

step:5560, train_loss:0.07011559595791808, acc:0.6231788079470199


  3%|▎         | 1511/43738 [11:33<5:25:44,  2.16it/s]

step:5560, train_loss:0.07022561004766263, acc:0.6227663798808736


  3%|▎         | 1512/43738 [11:33<5:00:17,  2.34it/s]

step:5560, train_loss:0.07035534932443173, acc:0.6223544973544973


  3%|▎         | 1513/43738 [11:34<4:54:26,  2.39it/s]

step:5560, train_loss:0.07037307209811085, acc:0.6219431592861864


  3%|▎         | 1514/43738 [11:34<4:22:31,  2.68it/s]

step:5560, train_loss:0.07032708298658541, acc:0.6221928665785997


  3%|▎         | 1515/43738 [11:35<4:41:16,  2.50it/s]

step:5560, train_loss:0.07032773165776096, acc:0.6224422442244224


  3%|▎         | 1516/43738 [11:35<4:32:53,  2.58it/s]

step:5560, train_loss:0.07029126521192569, acc:0.6226912928759895


  3%|▎         | 1517/43738 [11:36<5:26:01,  2.16it/s]

step:5560, train_loss:0.07028558458666327, acc:0.6222808174027686


  3%|▎         | 1518/43738 [11:36<5:25:40,  2.16it/s]

step:5560, train_loss:0.07024519714159948, acc:0.6225296442687747


  3%|▎         | 1519/43738 [11:37<5:33:35,  2.11it/s]

step:5560, train_loss:0.07022240145133171, acc:0.6227781435154707


  4%|▍         | 1824/43738 [13:55<5:07:31,  2.27it/s]

step:5580, train_loss:0.0710209645228149, acc:0.618421052631579


  4%|▍         | 1825/43738 [13:55<4:24:08,  2.64it/s]

step:5580, train_loss:0.07098889748929452, acc:0.6186301369863013


  4%|▍         | 1826/43738 [13:56<5:48:37,  2.00it/s]

step:5580, train_loss:0.07110466504909929, acc:0.6182913472070098


  4%|▍         | 1827/43738 [13:57<6:04:22,  1.92it/s]

step:5580, train_loss:0.07130108987799115, acc:0.6179529282977558


  4%|▍         | 1828/43738 [13:57<6:27:00,  1.80it/s]

step:5580, train_loss:0.0712695311004356, acc:0.6181619256017505


  4%|▍         | 1829/43738 [13:58<5:22:52,  2.16it/s]

step:5580, train_loss:0.07123320873162493, acc:0.6183706943685073


  4%|▍         | 1830/43738 [13:58<4:53:25,  2.38it/s]

step:5580, train_loss:0.07119591681107298, acc:0.6185792349726776


  4%|▍         | 1831/43738 [13:58<4:48:57,  2.42it/s]

step:5580, train_loss:0.0711629965155274, acc:0.618787547788094


  4%|▍         | 1832/43738 [13:59<4:59:58,  2.33it/s]

step:5580, train_loss:0.07112428270615627, acc:0.618995633187773


  4%|▍         | 1833/43738 [13:59<4:49:54,  2.41it/s]

step:5580, train_loss:0.07109364445695468, acc:0.6192034915439171


  4%|▍         | 1834/43738 [13:59<4:19:23,  2.69it/s]

step:5580, train_loss:0.07112177093319073, acc:0.61886586695747


  4%|▍         | 1835/43738 [14:00<4:54:14,  2.37it/s]

step:5580, train_loss:0.07111478933109078, acc:0.6190735694822889


  4%|▍         | 1836/43738 [14:00<4:17:13,  2.71it/s]

step:5580, train_loss:0.0711221783208252, acc:0.6187363834422658


  4%|▍         | 1837/43738 [14:01<4:23:17,  2.65it/s]

step:5580, train_loss:0.07114838419407839, acc:0.6183995645073489


  4%|▍         | 1838/43738 [14:01<3:57:18,  2.94it/s]

step:5580, train_loss:0.07128810335726703, acc:0.6180631120783461


  4%|▍         | 1839/43738 [14:01<4:44:37,  2.45it/s]

step:5580, train_loss:0.07131190686731013, acc:0.6177270255573681


  5%|▍         | 2144/43738 [16:15<6:54:47,  1.67it/s]

step:5600, train_loss:0.07085326348153184, acc:0.6194029850746269


  5%|▍         | 2145/43738 [16:15<5:43:11,  2.02it/s]

step:5600, train_loss:0.07085458826754537, acc:0.6195804195804195


  5%|▍         | 2146/43738 [16:16<5:26:08,  2.13it/s]

step:5600, train_loss:0.07083392146151758, acc:0.6197576887232059


  5%|▍         | 2147/43738 [16:16<5:29:59,  2.10it/s]

step:5600, train_loss:0.07081721666292645, acc:0.6199347927340475


  5%|▍         | 2148/43738 [16:17<5:42:30,  2.02it/s]

step:5600, train_loss:0.07080904486777956, acc:0.6201117318435754


  5%|▍         | 2149/43738 [16:17<4:47:34,  2.41it/s]

step:5600, train_loss:0.07078623144958678, acc:0.6202885062819916


  5%|▍         | 2150/43738 [16:17<4:12:17,  2.75it/s]

step:5600, train_loss:0.07076557342520763, acc:0.6204651162790698


  5%|▍         | 2151/43738 [16:18<5:13:42,  2.21it/s]

step:5600, train_loss:0.07074874869630265, acc:0.6206415620641562


  5%|▍         | 2152/43738 [16:18<4:46:32,  2.42it/s]

step:5600, train_loss:0.07077624556147814, acc:0.6203531598513011


  5%|▍         | 2153/43738 [16:18<4:35:51,  2.51it/s]

step:5600, train_loss:0.07078576064890288, acc:0.6200650255457502


  5%|▍         | 2154/43738 [16:19<4:47:32,  2.41it/s]

step:5600, train_loss:0.07079678017877608, acc:0.6197771587743732


  5%|▍         | 2155/43738 [16:19<5:09:21,  2.24it/s]

step:5600, train_loss:0.07079499585639357, acc:0.619953596287703


  5%|▍         | 2156/43738 [16:20<5:25:29,  2.13it/s]

step:5600, train_loss:0.07077325612486078, acc:0.6201298701298701


  5%|▍         | 2157/43738 [16:20<4:36:20,  2.51it/s]

step:5600, train_loss:0.07074063110099885, acc:0.6203059805285118


  5%|▍         | 2158/43738 [16:20<4:20:12,  2.66it/s]

step:5600, train_loss:0.07072102936994383, acc:0.6204819277108434


  5%|▍         | 2159/43738 [16:21<4:25:21,  2.61it/s]

step:5600, train_loss:0.07069698072517624, acc:0.6206577119036591


  6%|▌         | 2464/43738 [18:38<4:46:55,  2.40it/s]

step:5620, train_loss:0.06950081431497963, acc:0.6278409090909091


  6%|▌         | 2465/43738 [18:39<4:49:38,  2.37it/s]

step:5620, train_loss:0.06949322568134614, acc:0.6279918864097364


  6%|▌         | 2466/43738 [18:39<4:30:06,  2.55it/s]

step:5620, train_loss:0.06951170595262135, acc:0.6277372262773723


  6%|▌         | 2467/43738 [18:40<4:37:55,  2.47it/s]

step:5620, train_loss:0.06952134427301364, acc:0.6274827725982975


  6%|▌         | 2468/43738 [18:40<5:40:03,  2.02it/s]

step:5620, train_loss:0.06952668771930183, acc:0.6276337115072933


  6%|▌         | 2469/43738 [18:41<5:14:50,  2.18it/s]

step:5620, train_loss:0.06954609919444094, acc:0.627379505872823


  6%|▌         | 2470/43738 [18:41<5:33:09,  2.06it/s]

step:5620, train_loss:0.06952222710760944, acc:0.6275303643724697


  6%|▌         | 2471/43738 [18:41<4:49:20,  2.38it/s]

step:5620, train_loss:0.06952060951640432, acc:0.6272764063132336


  6%|▌         | 2472/43738 [18:42<4:29:57,  2.55it/s]

step:5620, train_loss:0.06952699515146307, acc:0.6274271844660194


  6%|▌         | 2473/43738 [18:42<3:56:49,  2.90it/s]

step:5620, train_loss:0.06950156354113586, acc:0.6275778406793369


  6%|▌         | 2474/43738 [18:42<3:40:57,  3.11it/s]

step:5620, train_loss:0.06949169793057616, acc:0.627728375101051


  6%|▌         | 2475/43738 [18:43<4:28:25,  2.56it/s]

step:5620, train_loss:0.06948296689226133, acc:0.6278787878787879


  6%|▌         | 2476/43738 [18:43<4:05:38,  2.80it/s]

step:5620, train_loss:0.06949524500087371, acc:0.6276252019386107


  6%|▌         | 2477/43738 [18:43<3:50:21,  2.99it/s]

step:5620, train_loss:0.06946736420370334, acc:0.6277755349212757


  6%|▌         | 2478/43738 [18:44<4:42:06,  2.44it/s]

step:5620, train_loss:0.06944039377606195, acc:0.6279257465698144


  6%|▌         | 2479/43738 [18:45<5:17:28,  2.17it/s]

step:5620, train_loss:0.06941673845651372, acc:0.628075837031061


  6%|▋         | 2784/43738 [21:05<5:33:35,  2.05it/s]

step:5640, train_loss:0.06956647231922473, acc:0.6285919540229885


  6%|▋         | 2785/43738 [21:06<6:20:21,  1.79it/s]

step:5640, train_loss:0.06960887162351757, acc:0.6283662477558348


  6%|▋         | 2786/43738 [21:06<5:39:36,  2.01it/s]

step:5640, train_loss:0.06965022058454653, acc:0.628140703517588


  6%|▋         | 2787/43738 [21:07<5:49:41,  1.95it/s]

step:5640, train_loss:0.06963231685711889, acc:0.6282741298887693


  6%|▋         | 2788/43738 [21:07<5:40:11,  2.01it/s]

step:5640, train_loss:0.06961640181470699, acc:0.6284074605451937


  6%|▋         | 2789/43738 [21:07<4:45:04,  2.39it/s]

step:5640, train_loss:0.06965514758194227, acc:0.6281821441376838


  6%|▋         | 2790/43738 [21:08<4:38:30,  2.45it/s]

step:5640, train_loss:0.06963033744857536, acc:0.6283154121863799


  6%|▋         | 2791/43738 [21:08<4:29:19,  2.53it/s]

step:5640, train_loss:0.06960584806101994, acc:0.6284485847366535


  6%|▋         | 2792/43738 [21:08<4:00:08,  2.84it/s]

step:5640, train_loss:0.06958195012953249, acc:0.6285816618911175


  6%|▋         | 2793/43738 [21:09<4:48:05,  2.37it/s]

step:5640, train_loss:0.06961805576779852, acc:0.6283566058002148


  6%|▋         | 2794/43738 [21:09<4:11:26,  2.71it/s]

step:5640, train_loss:0.0696736206531804, acc:0.6281317108088762


  6%|▋         | 2795/43738 [21:10<3:51:21,  2.95it/s]

step:5640, train_loss:0.06967070757728804, acc:0.627906976744186


  6%|▋         | 2796/43738 [21:10<4:48:18,  2.37it/s]

step:5640, train_loss:0.06965017356910311, acc:0.6280400572246065


  6%|▋         | 2797/43738 [21:11<4:50:55,  2.35it/s]

step:5640, train_loss:0.06975375544552018, acc:0.6278155166249553


  6%|▋         | 2798/43738 [21:11<4:58:00,  2.29it/s]

step:5640, train_loss:0.06977602224702979, acc:0.6275911365260901


  6%|▋         | 2799/43738 [21:12<5:20:35,  2.13it/s]

step:5640, train_loss:0.0698106815303064, acc:0.6273669167559843


  7%|▋         | 3104/43738 [23:30<6:37:07,  1.71it/s]

step:5660, train_loss:0.06923171040702092, acc:0.6272551546391752


  7%|▋         | 3105/43738 [23:30<5:58:30,  1.89it/s]

step:5660, train_loss:0.06923033385436334, acc:0.6273752012882448


  7%|▋         | 3106/43738 [23:30<5:31:22,  2.04it/s]

step:5660, train_loss:0.06921482985311968, acc:0.6274951706374758


  7%|▋         | 3107/43738 [23:31<5:29:58,  2.05it/s]

step:5660, train_loss:0.06920692977444684, acc:0.6276150627615062


  7%|▋         | 3108/43738 [23:31<4:52:52,  2.31it/s]

step:5660, train_loss:0.06921123969349909, acc:0.6274131274131274


  7%|▋         | 3109/43738 [23:32<5:08:36,  2.19it/s]

step:5660, train_loss:0.06919045353378865, acc:0.6275329688002573


  7%|▋         | 3110/43738 [23:32<5:47:12,  1.95it/s]

step:5660, train_loss:0.06919639789094321, acc:0.627331189710611


  7%|▋         | 3111/43738 [23:33<5:29:28,  2.06it/s]

step:5660, train_loss:0.06917440828111211, acc:0.6274509803921569


  7%|▋         | 3112/43738 [23:33<5:28:16,  2.06it/s]

step:5660, train_loss:0.0692366689814606, acc:0.6272493573264781


  7%|▋         | 3113/43738 [23:34<6:00:48,  1.88it/s]

step:5660, train_loss:0.06922608549896463, acc:0.6273690973337617


  7%|▋         | 3114/43738 [23:34<6:09:24,  1.83it/s]

step:5660, train_loss:0.0692047073477631, acc:0.6274887604367373


  7%|▋         | 3115/43738 [23:35<6:24:04,  1.76it/s]

step:5660, train_loss:0.06918819275535912, acc:0.6276083467094703


  7%|▋         | 3116/43738 [23:36<6:59:11,  1.62it/s]

step:5660, train_loss:0.06920749318781631, acc:0.6274069319640565


  7%|▋         | 3117/43738 [23:36<6:44:30,  1.67it/s]

step:5660, train_loss:0.06918529623620873, acc:0.6275264677574591


  7%|▋         | 3118/43738 [23:37<5:53:48,  1.91it/s]

step:5660, train_loss:0.0692050608122232, acc:0.627325208466966


  7%|▋         | 3119/43738 [23:37<5:42:32,  1.98it/s]

step:5660, train_loss:0.0691991555904247, acc:0.6274446938121193


  8%|▊         | 3424/43738 [25:56<5:17:40,  2.12it/s]

step:5680, train_loss:0.06910438000215147, acc:0.6299649532710281


  8%|▊         | 3425/43738 [25:56<4:47:45,  2.33it/s]

step:5680, train_loss:0.06911354829533448, acc:0.6297810218978103


  8%|▊         | 3426/43738 [25:57<5:43:20,  1.96it/s]

step:5680, train_loss:0.06910882535483932, acc:0.6298890834792761


  8%|▊         | 3427/43738 [25:58<5:50:18,  1.92it/s]

step:5680, train_loss:0.06912239446544023, acc:0.6297052815873942


  8%|▊         | 3428/43738 [25:58<5:43:28,  1.96it/s]

step:5680, train_loss:0.069153288591868, acc:0.6295215869311552


  8%|▊         | 3429/43738 [25:58<5:06:13,  2.19it/s]

step:5680, train_loss:0.06915347193116955, acc:0.6293379994167396


  8%|▊         | 3430/43738 [25:59<4:52:18,  2.30it/s]

step:5680, train_loss:0.06915187101886518, acc:0.6294460641399416


  8%|▊         | 3431/43738 [25:59<5:07:20,  2.19it/s]

step:5680, train_loss:0.0691384047751837, acc:0.6295540658700087


  8%|▊         | 3432/43738 [26:00<5:04:32,  2.21it/s]

step:5680, train_loss:0.06917173318555529, acc:0.6293706293706294


  8%|▊         | 3433/43738 [26:00<4:52:14,  2.30it/s]

step:5680, train_loss:0.06915689848521152, acc:0.6294785901543839


  8%|▊         | 3434/43738 [26:01<4:36:49,  2.43it/s]

step:5680, train_loss:0.06913682435208392, acc:0.6295864880605707


  8%|▊         | 3435/43738 [26:01<5:09:57,  2.17it/s]

step:5680, train_loss:0.06919204069927112, acc:0.6294032023289665


  8%|▊         | 3436/43738 [26:02<4:55:37,  2.27it/s]

step:5680, train_loss:0.06917872595989331, acc:0.6295110593713621


  8%|▊         | 3437/43738 [26:02<4:51:39,  2.30it/s]

step:5680, train_loss:0.0691807785559923, acc:0.6293279022403259


  8%|▊         | 3438/43738 [26:02<4:18:31,  2.60it/s]

step:5680, train_loss:0.06916436450073057, acc:0.629435718440954


  8%|▊         | 3439/43738 [26:02<3:55:43,  2.85it/s]

step:5680, train_loss:0.06914502997573532, acc:0.6295434719395173


  9%|▊         | 3744/43738 [28:18<4:54:47,  2.26it/s]

step:5700, train_loss:0.06873842733264797, acc:0.6319444444444444


  9%|▊         | 3745/43738 [28:18<6:06:56,  1.82it/s]

step:5700, train_loss:0.0687682216741749, acc:0.6317757009345795


  9%|▊         | 3746/43738 [28:19<5:35:30,  1.99it/s]

step:5700, train_loss:0.06875125511006308, acc:0.6318739989321943


  9%|▊         | 3747/43738 [28:19<5:26:11,  2.04it/s]

step:5700, train_loss:0.0687419393028448, acc:0.6319722444622364


  9%|▊         | 3748/43738 [28:20<4:43:13,  2.35it/s]

step:5700, train_loss:0.06877257792127424, acc:0.6318036286019211


  9%|▊         | 3749/43738 [28:20<4:37:12,  2.40it/s]

step:5700, train_loss:0.06876455500103244, acc:0.6319018404907976


  9%|▊         | 3750/43738 [28:20<5:00:35,  2.22it/s]

step:5700, train_loss:0.06876690611351902, acc:0.632


  9%|▊         | 3751/43738 [28:21<4:50:28,  2.29it/s]

step:5700, train_loss:0.06875541192366834, acc:0.632098107171421


  9%|▊         | 3752/43738 [28:22<6:05:11,  1.82it/s]

step:5700, train_loss:0.06873941869009659, acc:0.6321961620469083


  9%|▊         | 3753/43738 [28:22<6:11:49,  1.79it/s]

step:5700, train_loss:0.06875515375925959, acc:0.6320277111644018


  9%|▊         | 3754/43738 [28:23<5:39:32,  1.96it/s]

step:5700, train_loss:0.06874572976214363, acc:0.6321257325519446


  9%|▊         | 3755/43738 [28:23<5:02:40,  2.20it/s]

step:5700, train_loss:0.06875012465495338, acc:0.6319573901464713


  9%|▊         | 3756/43738 [28:24<5:24:04,  2.06it/s]

step:5700, train_loss:0.06873262028245943, acc:0.6320553780617678


  9%|▊         | 3757/43738 [28:24<6:33:01,  1.70it/s]

step:5700, train_loss:0.06872657942997765, acc:0.6321533138142135


  9%|▊         | 3758/43738 [28:25<6:37:36,  1.68it/s]

step:5700, train_loss:0.06872049328236518, acc:0.6322511974454497


  9%|▊         | 3759/43738 [28:25<6:02:10,  1.84it/s]

step:5700, train_loss:0.0687027804744716, acc:0.6323490289970737


  9%|▉         | 4064/43738 [30:47<5:03:23,  2.18it/s]

step:5720, train_loss:0.06887482322062577, acc:0.6311515748031497


  9%|▉         | 4065/43738 [30:47<4:21:09,  2.53it/s]

step:5720, train_loss:0.0688581996826171, acc:0.6312423124231242


  9%|▉         | 4066/43738 [30:48<4:31:57,  2.43it/s]

step:5720, train_loss:0.06890043057555716, acc:0.6310870634530251


  9%|▉         | 4067/43738 [30:48<4:22:14,  2.52it/s]

step:5720, train_loss:0.06890656164816286, acc:0.6311777723137447


  9%|▉         | 4068/43738 [30:49<4:06:35,  2.68it/s]

step:5720, train_loss:0.06890568467352842, acc:0.6310226155358899


  9%|▉         | 4069/43738 [30:49<5:15:10,  2.10it/s]

step:5720, train_loss:0.06893373223963592, acc:0.6308675350208897


  9%|▉         | 4070/43738 [30:50<4:58:21,  2.22it/s]

step:5720, train_loss:0.06895517722027907, acc:0.6307125307125308


  9%|▉         | 4071/43738 [30:50<4:21:57,  2.52it/s]

step:5720, train_loss:0.06893840027847575, acc:0.6308032424465734


  9%|▉         | 4072/43738 [30:50<3:48:13,  2.90it/s]

step:5720, train_loss:0.06893501853924734, acc:0.6308939096267191


  9%|▉         | 4073/43738 [30:50<3:24:42,  3.23it/s]

step:5720, train_loss:0.06891810224648645, acc:0.6309845322857844


  9%|▉         | 4074/43738 [30:51<3:37:44,  3.04it/s]

step:5720, train_loss:0.06890137603484843, acc:0.6310751104565537


  9%|▉         | 4075/43738 [30:51<4:20:08,  2.54it/s]

step:5720, train_loss:0.06888554595876485, acc:0.6311656441717791


  9%|▉         | 4076/43738 [30:52<4:05:59,  2.69it/s]

step:5720, train_loss:0.06890132553999173, acc:0.6310107948969578


  9%|▉         | 4077/43738 [30:52<3:37:41,  3.04it/s]

step:5720, train_loss:0.06890799673837819, acc:0.6308560215844984


  9%|▉         | 4078/43738 [30:52<4:00:21,  2.75it/s]

step:5720, train_loss:0.06889406375297746, acc:0.6309465424227563


  9%|▉         | 4079/43738 [30:53<3:53:38,  2.83it/s]

step:5720, train_loss:0.06890862752340636, acc:0.6307918607501839


 10%|█         | 4384/43738 [33:09<4:59:28,  2.19it/s]

step:5740, train_loss:0.06856399423550168, acc:0.6307025547445255


 10%|█         | 4385/43738 [33:10<4:40:29,  2.34it/s]

step:5740, train_loss:0.06856520734034466, acc:0.6305587229190421


 10%|█         | 4386/43738 [33:11<5:51:04,  1.87it/s]

step:5740, train_loss:0.06858770860616747, acc:0.6304149566803465


 10%|█         | 4387/43738 [33:11<4:55:29,  2.22it/s]

step:5740, train_loss:0.06857207568414048, acc:0.6304992021882836


 10%|█         | 4388/43738 [33:11<4:16:39,  2.56it/s]

step:5740, train_loss:0.06855689991063799, acc:0.6305834092980857


 10%|█         | 4389/43738 [33:12<4:17:39,  2.55it/s]

step:5740, train_loss:0.06856135023741583, acc:0.630667578035999


 10%|█         | 4390/43738 [33:12<3:46:08,  2.90it/s]

step:5740, train_loss:0.06854703200242221, acc:0.630751708428246


 10%|█         | 4391/43738 [33:12<4:03:29,  2.69it/s]

step:5740, train_loss:0.06856227542727696, acc:0.6306080619448873


 10%|█         | 4392/43738 [33:13<4:09:10,  2.63it/s]

step:5740, train_loss:0.0685710050126159, acc:0.6306921675774135


 10%|█         | 4393/43738 [33:13<4:06:04,  2.66it/s]

step:5740, train_loss:0.06855540138955196, acc:0.6307762349191897


 10%|█         | 4394/43738 [33:14<4:48:06,  2.28it/s]

step:5740, train_loss:0.06857464035165305, acc:0.630632680928539


 10%|█         | 4395/43738 [33:14<4:59:18,  2.19it/s]

step:5740, train_loss:0.06856353287958562, acc:0.630716723549488


 10%|█         | 4396/43738 [33:15<5:17:50,  2.06it/s]

step:5740, train_loss:0.06858022741450008, acc:0.6305732484076433


 10%|█         | 4397/43738 [33:15<5:02:19,  2.17it/s]

step:5740, train_loss:0.06857580332801297, acc:0.6304298385262679


 10%|█         | 4398/43738 [33:15<4:17:39,  2.54it/s]

step:5740, train_loss:0.06856145100335503, acc:0.6305138699408822


 10%|█         | 4399/43738 [33:15<3:46:58,  2.89it/s]

step:5740, train_loss:0.06854661870425473, acc:0.6305978631507161


 11%|█         | 4704/43738 [35:34<6:59:51,  1.55it/s]

step:5760, train_loss:0.06859234457002136, acc:0.6324404761904762


 11%|█         | 4705/43738 [35:35<5:45:00,  1.89it/s]

step:5760, train_loss:0.06859103273315796, acc:0.6323060573857598


 11%|█         | 4706/43738 [35:35<5:30:09,  1.97it/s]

step:5760, train_loss:0.06862378302424584, acc:0.6321716957076073


 11%|█         | 4707/43738 [35:35<4:54:42,  2.21it/s]

step:5760, train_loss:0.0686184926146134, acc:0.6322498406628426


 11%|█         | 4708/43738 [35:36<5:01:49,  2.16it/s]

step:5760, train_loss:0.06861158637239435, acc:0.6323279524214104


 11%|█         | 4709/43738 [35:37<5:56:52,  1.82it/s]

step:5760, train_loss:0.06860144367308729, acc:0.6324060310044596


 11%|█         | 4710/43738 [35:37<5:04:48,  2.13it/s]

step:5760, train_loss:0.06858829367056098, acc:0.632484076433121


 11%|█         | 4711/43738 [35:37<5:24:18,  2.01it/s]

step:5760, train_loss:0.06857844346063637, acc:0.6325620887285077


 11%|█         | 4712/43738 [35:38<4:51:31,  2.23it/s]

step:5760, train_loss:0.06856390673771255, acc:0.6326400679117148


 11%|█         | 4713/43738 [35:38<4:51:51,  2.23it/s]

step:5760, train_loss:0.06854972236918899, acc:0.6327180140038192


 11%|█         | 4714/43738 [35:39<4:47:16,  2.26it/s]

step:5760, train_loss:0.06855675282176979, acc:0.632583792957149


 11%|█         | 4715/43738 [35:39<5:38:17,  1.92it/s]

step:5760, train_loss:0.06857645675899957, acc:0.6324496288441145


 11%|█         | 4716/43738 [35:40<5:24:12,  2.01it/s]

step:5760, train_loss:0.06856256167229484, acc:0.6325275657336726


 11%|█         | 4717/43738 [35:40<5:32:22,  1.96it/s]

step:5760, train_loss:0.06855118002945201, acc:0.6326054695781217


 11%|█         | 4718/43738 [35:41<5:23:47,  2.01it/s]

step:5760, train_loss:0.06854244662305552, acc:0.632683340398474


 11%|█         | 4719/43738 [35:41<4:39:38,  2.33it/s]

step:5760, train_loss:0.06852839904442895, acc:0.6327611782157236


 11%|█▏        | 5024/43738 [37:57<3:56:31,  2.73it/s]

step:5780, train_loss:0.0684193607836498, acc:0.634952229299363


 11%|█▏        | 5025/43738 [37:58<4:38:02,  2.32it/s]

step:5780, train_loss:0.06840805849878219, acc:0.6350248756218906


 11%|█▏        | 5026/43738 [37:58<4:44:17,  2.27it/s]

step:5780, train_loss:0.06840028586337221, acc:0.6350974930362117


 11%|█▏        | 5027/43738 [37:59<4:58:35,  2.16it/s]

step:5780, train_loss:0.06840744208606464, acc:0.634971155758902


 11%|█▏        | 5028/43738 [37:59<4:33:23,  2.36it/s]

step:5780, train_loss:0.06842546037064592, acc:0.6348448687350835


 11%|█▏        | 5029/43738 [37:59<4:23:55,  2.44it/s]

step:5780, train_loss:0.06841265205394359, acc:0.634917478623981


 12%|█▏        | 5030/43738 [38:00<3:59:37,  2.69it/s]

step:5780, train_loss:0.06844454880011829, acc:0.6347912524850895


 12%|█▏        | 5031/43738 [38:00<4:44:13,  2.27it/s]

step:5780, train_loss:0.06843109472526196, acc:0.6348638441661697


 12%|█▏        | 5032/43738 [38:00<4:07:34,  2.61it/s]

step:5780, train_loss:0.06842698479939377, acc:0.6349364069952306


 12%|█▏        | 5033/43738 [38:01<4:23:34,  2.45it/s]

step:5780, train_loss:0.06842082395762247, acc:0.6350089409894695


 12%|█▏        | 5034/43738 [38:01<3:48:15,  2.83it/s]

step:5780, train_loss:0.06842054460297313, acc:0.6350814461660708


 12%|█▏        | 5035/43738 [38:01<3:43:57,  2.88it/s]

step:5780, train_loss:0.0684115253724833, acc:0.6351539225422046


 12%|█▏        | 5036/43738 [38:02<3:28:59,  3.09it/s]

step:5780, train_loss:0.06843996142541432, acc:0.6350277998411438


 12%|█▏        | 5037/43738 [38:02<3:18:31,  3.25it/s]

step:5780, train_loss:0.06842741715025744, acc:0.635100258090133


 12%|█▏        | 5038/43738 [38:03<4:27:13,  2.41it/s]

step:5780, train_loss:0.06842422606205825, acc:0.6349741961095673


 12%|█▏        | 5039/43738 [38:03<3:50:25,  2.80it/s]

step:5780, train_loss:0.06841066199817161, acc:0.6350466362373487


 12%|█▏        | 5344/43738 [40:17<4:51:38,  2.19it/s]

step:5800, train_loss:0.06814501664338896, acc:0.6347305389221557


 12%|█▏        | 5345/43738 [40:17<4:29:23,  2.38it/s]

step:5800, train_loss:0.06814152872160693, acc:0.634798877455566


 12%|█▏        | 5346/43738 [40:18<4:02:10,  2.64it/s]

step:5800, train_loss:0.0681385150841654, acc:0.6346801346801347


 12%|█▏        | 5347/43738 [40:18<4:26:06,  2.40it/s]

step:5800, train_loss:0.0681299237319165, acc:0.6347484570787357


 12%|█▏        | 5348/43738 [40:18<3:54:21,  2.73it/s]

step:5800, train_loss:0.0681209333650268, acc:0.6348167539267016


 12%|█▏        | 5349/43738 [40:19<4:49:44,  2.21it/s]

step:5800, train_loss:0.06814014681904919, acc:0.6346980744064311


 12%|█▏        | 5350/43738 [40:19<4:57:56,  2.15it/s]

step:5800, train_loss:0.06812783688944372, acc:0.6347663551401869


 12%|█▏        | 5351/43738 [40:20<4:43:43,  2.25it/s]

step:5800, train_loss:0.06811590418731699, acc:0.634834610353205


 12%|█▏        | 5352/43738 [40:20<4:08:14,  2.58it/s]

step:5800, train_loss:0.06810319631946289, acc:0.6349028400597907


 12%|█▏        | 5353/43738 [40:20<3:46:04,  2.83it/s]

step:5800, train_loss:0.06809571491592742, acc:0.6349710442742388


 12%|█▏        | 5354/43738 [40:21<4:08:07,  2.58it/s]

step:5800, train_loss:0.06809857191419581, acc:0.6348524467687711


 12%|█▏        | 5355/43738 [40:21<4:11:43,  2.54it/s]

step:5800, train_loss:0.06808953131764282, acc:0.6349206349206349


 12%|█▏        | 5356/43738 [40:22<4:51:13,  2.20it/s]

step:5800, train_loss:0.06808142379337774, acc:0.6349887976101568


 12%|█▏        | 5357/43738 [40:22<4:11:38,  2.54it/s]

step:5800, train_loss:0.06806871502217154, acc:0.6350569348515961


 12%|█▏        | 5358/43738 [40:22<4:06:02,  2.60it/s]

step:5800, train_loss:0.06806673746137226, acc:0.6349384098544233


 12%|█▏        | 5359/43738 [40:23<3:38:26,  2.93it/s]

step:5800, train_loss:0.06805622819311034, acc:0.6350065310692293


 13%|█▎        | 5664/43738 [42:44<5:06:08,  2.07it/s]

step:5820, train_loss:0.06851301339800989, acc:0.631885593220339


 13%|█▎        | 5665/43738 [42:44<4:35:58,  2.30it/s]

step:5820, train_loss:0.06851010599470757, acc:0.6319505736981466


 13%|█▎        | 5666/43738 [42:45<4:17:41,  2.46it/s]

step:5820, train_loss:0.06850885224612521, acc:0.6318390398870455


 13%|█▎        | 5667/43738 [42:45<3:49:18,  2.77it/s]

step:5820, train_loss:0.06849701444695891, acc:0.6319040056467267


 13%|█▎        | 5668/43738 [42:46<4:49:52,  2.19it/s]

step:5820, train_loss:0.06850175059799532, acc:0.6317925194071983


 13%|█▎        | 5669/43738 [42:46<4:08:06,  2.56it/s]

step:5820, train_loss:0.06850482604977218, acc:0.631681072499559


 13%|█▎        | 5670/43738 [42:46<3:56:18,  2.68it/s]

step:5820, train_loss:0.06849274813023853, acc:0.6317460317460317


 13%|█▎        | 5671/43738 [42:47<4:45:31,  2.22it/s]

step:5820, train_loss:0.0684807141865276, acc:0.6318109680832305


 13%|█▎        | 5672/43738 [42:47<4:39:21,  2.27it/s]

step:5820, train_loss:0.06847574766802884, acc:0.6318758815232722


 13%|█▎        | 5673/43738 [42:48<4:41:07,  2.26it/s]

step:5820, train_loss:0.06846686702344511, acc:0.6319407720782655


 13%|█▎        | 5674/43738 [42:48<4:02:01,  2.62it/s]

step:5820, train_loss:0.06845515487969771, acc:0.6320056397603102


 13%|█▎        | 5675/43738 [42:48<4:01:58,  2.62it/s]

step:5820, train_loss:0.06845499850237241, acc:0.6318942731277533


 13%|█▎        | 5676/43738 [42:48<3:41:02,  2.87it/s]

step:5820, train_loss:0.06845657803285017, acc:0.6317829457364341


 13%|█▎        | 5677/43738 [42:49<3:57:50,  2.67it/s]

step:5820, train_loss:0.06846077337988447, acc:0.6316716575656156


 13%|█▎        | 5678/43738 [42:49<3:37:24,  2.92it/s]

step:5820, train_loss:0.06845374370662445, acc:0.6317365269461078


 13%|█▎        | 5679/43738 [42:50<3:41:49,  2.86it/s]

step:5820, train_loss:0.0684455707307036, acc:0.6318013734812467


 14%|█▎        | 5984/43738 [45:10<4:14:57,  2.47it/s]

step:5840, train_loss:0.06850424346236315, acc:0.6303475935828877


 14%|█▎        | 5985/43738 [45:11<4:17:11,  2.45it/s]

step:5840, train_loss:0.06849281036043299, acc:0.6304093567251462


 14%|█▎        | 5986/43738 [45:11<5:23:19,  1.95it/s]

step:5840, train_loss:0.0685007867466612, acc:0.630304042766455


 14%|█▎        | 5987/43738 [45:12<5:01:13,  2.09it/s]

step:5840, train_loss:0.06849520977246071, acc:0.6303657925505262


 14%|█▎        | 5988/43738 [45:13<5:56:43,  1.76it/s]

step:5840, train_loss:0.0684860328701488, acc:0.6304275217100869


 14%|█▎        | 5989/43738 [45:13<5:27:44,  1.92it/s]

step:5840, train_loss:0.06847659652417214, acc:0.6304892302554683


 14%|█▎        | 5990/43738 [45:13<4:38:08,  2.26it/s]

step:5840, train_loss:0.06849417838286251, acc:0.6303839732888147


 14%|█▎        | 5991/43738 [45:14<4:12:49,  2.49it/s]

step:5840, train_loss:0.06848319798368374, acc:0.6304456685027542


 14%|█▎        | 5992/43738 [45:14<3:48:08,  2.76it/s]

step:5840, train_loss:0.06847213368592968, acc:0.6305073431241656


 14%|█▎        | 5993/43738 [45:14<4:07:36,  2.54it/s]

step:5840, train_loss:0.06846143981499277, acc:0.6305689971633572


 14%|█▎        | 5994/43738 [45:15<5:00:38,  2.09it/s]

step:5840, train_loss:0.06845261529350218, acc:0.6306306306306306


 14%|█▎        | 5995/43738 [45:15<4:37:43,  2.26it/s]

step:5840, train_loss:0.06847124160269355, acc:0.6305254378648875


 14%|█▎        | 5996/43738 [45:16<4:13:27,  2.48it/s]

step:5840, train_loss:0.06846644005828209, acc:0.6305870580386924


 14%|█▎        | 5997/43738 [45:16<3:43:35,  2.81it/s]

step:5840, train_loss:0.06846045600336942, acc:0.6306486576621644


 14%|█▎        | 5998/43738 [45:16<3:37:42,  2.89it/s]

step:5840, train_loss:0.06846858083062658, acc:0.6305435145048349


 14%|█▎        | 5999/43738 [45:17<3:20:07,  3.14it/s]

step:5840, train_loss:0.0684587803906141, acc:0.6306051008501417


 14%|█▍        | 6304/43738 [47:32<4:49:18,  2.16it/s]

step:5860, train_loss:0.06867286191836545, acc:0.6296002538071066


 14%|█▍        | 6305/43738 [47:33<5:59:43,  1.73it/s]

step:5860, train_loss:0.0686624460474571, acc:0.6296590007930214


 14%|█▍        | 6306/43738 [47:34<5:14:20,  1.98it/s]

step:5860, train_loss:0.06865264697880533, acc:0.6297177291468443


 14%|█▍        | 6307/43738 [47:34<4:47:47,  2.17it/s]

step:5860, train_loss:0.06865378266453447, acc:0.629617884889805


 14%|█▍        | 6308/43738 [47:34<4:04:59,  2.55it/s]

step:5860, train_loss:0.06868927599889901, acc:0.6295180722891566


 14%|█▍        | 6309/43738 [47:35<5:22:02,  1.94it/s]

step:5860, train_loss:0.06869664163465475, acc:0.6295767950546838


 14%|█▍        | 6310/43738 [47:35<5:06:23,  2.04it/s]

step:5860, train_loss:0.06868778653972356, acc:0.629635499207607


 14%|█▍        | 6311/43738 [47:36<5:09:22,  2.02it/s]

step:5860, train_loss:0.06869149248796687, acc:0.6296941847567739


 14%|█▍        | 6312/43738 [47:36<4:51:15,  2.14it/s]

step:5860, train_loss:0.06868062144396127, acc:0.6297528517110266


 14%|█▍        | 6313/43738 [47:37<4:45:39,  2.18it/s]

step:5860, train_loss:0.068674204492492, acc:0.6298115000792016


 14%|█▍        | 6314/43738 [47:37<4:20:35,  2.39it/s]

step:5860, train_loss:0.0686648066273724, acc:0.6298701298701299


 14%|█▍        | 6315/43738 [47:37<3:45:39,  2.76it/s]

step:5860, train_loss:0.06865394315190144, acc:0.6299287410926366


 14%|█▍        | 6316/43738 [47:38<3:56:12,  2.64it/s]

step:5860, train_loss:0.0686589853921656, acc:0.62982900569981


 14%|█▍        | 6317/43738 [47:38<4:02:55,  2.57it/s]

step:5860, train_loss:0.06865680612974503, acc:0.6298876048757321


 14%|█▍        | 6318/43738 [47:39<3:57:00,  2.63it/s]

step:5860, train_loss:0.0686710697998636, acc:0.6297879075656854


 14%|█▍        | 6319/43738 [47:39<3:59:55,  2.60it/s]

step:5860, train_loss:0.06866325303959842, acc:0.6298464946985283


 15%|█▌        | 6624/43738 [49:54<5:42:18,  1.81it/s]

step:5880, train_loss:0.06863197264034689, acc:0.6313405797101449


 15%|█▌        | 6625/43738 [49:55<6:22:56,  1.62it/s]

step:5880, train_loss:0.06864950235126505, acc:0.631245283018868


 15%|█▌        | 6626/43738 [49:55<6:45:32,  1.53it/s]

step:5880, train_loss:0.06864100930981584, acc:0.6313009357078176


 15%|█▌        | 6627/43738 [49:56<5:58:15,  1.73it/s]

step:5880, train_loss:0.06863113289761988, acc:0.6313565716010261


 15%|█▌        | 6628/43738 [49:56<5:14:53,  1.96it/s]

step:5880, train_loss:0.06862453121139232, acc:0.6314121907060953


 15%|█▌        | 6629/43738 [49:57<5:08:13,  2.01it/s]

step:5880, train_loss:0.0686231033112015, acc:0.63131694071504


 15%|█▌        | 6630/43738 [49:57<5:01:57,  2.05it/s]

step:5880, train_loss:0.06861284168945979, acc:0.6313725490196078


 15%|█▌        | 6631/43738 [49:57<4:15:28,  2.42it/s]

step:5880, train_loss:0.06862798298190764, acc:0.6312773337354849


 15%|█▌        | 6632/43738 [49:58<4:28:15,  2.31it/s]

step:5880, train_loss:0.0686260529715194, acc:0.6311821471652593


 15%|█▌        | 6633/43738 [49:58<3:59:09,  2.59it/s]

step:5880, train_loss:0.06864369942970025, acc:0.6310869892959445


 15%|█▌        | 6634/43738 [49:58<3:57:13,  2.61it/s]

step:5880, train_loss:0.06866300625444474, acc:0.6309918601145613


 15%|█▌        | 6635/43738 [49:59<4:08:54,  2.48it/s]

step:5880, train_loss:0.06867511463855108, acc:0.6308967596081386


 15%|█▌        | 6636/43738 [49:59<4:41:28,  2.20it/s]

step:5880, train_loss:0.06866659442375239, acc:0.6309523809523809


 15%|█▌        | 6637/43738 [50:00<5:37:05,  1.83it/s]

step:5880, train_loss:0.06866389427553886, acc:0.6310079855356335


 15%|█▌        | 6638/43738 [50:00<4:48:04,  2.15it/s]

step:5880, train_loss:0.06867372308428486, acc:0.630912925579994


 15%|█▌        | 6639/43738 [50:01<5:14:15,  1.97it/s]

step:5880, train_loss:0.06867403966821278, acc:0.6309685193553246


 16%|█▌        | 6944/43738 [52:21<4:47:55,  2.13it/s]

step:5900, train_loss:0.06848552844125076, acc:0.6324884792626728


 16%|█▌        | 6945/43738 [52:21<4:03:22,  2.52it/s]

step:5900, train_loss:0.06847567314148295, acc:0.6325413966882649


 16%|█▌        | 6946/43738 [52:22<5:08:58,  1.98it/s]

step:5900, train_loss:0.06846792225800297, acc:0.6325942988770515


 16%|█▌        | 6947/43738 [52:22<4:27:11,  2.29it/s]

step:5900, train_loss:0.0684641404357766, acc:0.6326471858356125


 16%|█▌        | 6948/43738 [52:23<5:11:14,  1.97it/s]

step:5900, train_loss:0.06849315349699511, acc:0.6325561312607945


 16%|█▌        | 6949/43738 [52:23<4:39:12,  2.20it/s]

step:5900, train_loss:0.06848422629640574, acc:0.6326090084904302


 16%|█▌        | 6950/43738 [52:24<4:02:29,  2.53it/s]

step:5900, train_loss:0.06847529840905206, acc:0.6326618705035971


 16%|█▌        | 6951/43738 [52:24<3:50:41,  2.66it/s]

step:5900, train_loss:0.06847571052018732, acc:0.6327147173068624


 16%|█▌        | 6952/43738 [52:24<4:08:28,  2.47it/s]

step:5900, train_loss:0.06847052825954075, acc:0.6327675489067894


 16%|█▌        | 6953/43738 [52:25<4:00:57,  2.54it/s]

step:5900, train_loss:0.06847236511136723, acc:0.6326765424996404


 16%|█▌        | 6954/43738 [52:25<5:14:18,  1.95it/s]

step:5900, train_loss:0.06851335515735617, acc:0.6325855622663216


 16%|█▌        | 6955/43738 [52:26<5:09:18,  1.98it/s]

step:5900, train_loss:0.0685175100715207, acc:0.6324946081955428


 16%|█▌        | 6956/43738 [52:26<4:22:17,  2.34it/s]

step:5900, train_loss:0.06852385317030107, acc:0.6324036802760207


 16%|█▌        | 6957/43738 [52:27<4:10:35,  2.45it/s]

step:5900, train_loss:0.06851461483602367, acc:0.6324565186143453


 16%|█▌        | 6958/43738 [52:27<3:55:14,  2.61it/s]

step:5900, train_loss:0.06850478743595764, acc:0.6325093417648749


 16%|█▌        | 6959/43738 [52:27<3:31:38,  2.90it/s]

step:5900, train_loss:0.06850839975673223, acc:0.6324184509268573


 17%|█▋        | 7264/43738 [54:46<4:31:23,  2.24it/s]

step:5920, train_loss:0.06830521341544227, acc:0.6320209251101322


 17%|█▋        | 7265/43738 [54:46<4:37:20,  2.19it/s]

step:5920, train_loss:0.0683096522297896, acc:0.631933929800413


 17%|█▋        | 7266/43738 [54:47<4:45:05,  2.13it/s]

step:5920, train_loss:0.06832327255320038, acc:0.6318469584365538


 17%|█▋        | 7267/43738 [54:48<5:47:22,  1.75it/s]

step:5920, train_loss:0.0683296340280704, acc:0.6317600110086693


 17%|█▋        | 7268/43738 [54:48<5:30:13,  1.84it/s]

step:5920, train_loss:0.06832201728329407, acc:0.631810676940011


 17%|█▋        | 7269/43738 [54:48<4:52:41,  2.08it/s]

step:5920, train_loss:0.06831362042265698, acc:0.6318613289310772


 17%|█▋        | 7270/43738 [54:49<4:42:43,  2.15it/s]

step:5920, train_loss:0.06831087721426853, acc:0.6319119669876203


 17%|█▋        | 7271/43738 [54:49<4:53:30,  2.07it/s]

step:5920, train_loss:0.0683014922670905, acc:0.6319625911153899


 17%|█▋        | 7272/43738 [54:50<4:49:14,  2.10it/s]

step:5920, train_loss:0.06830360303736076, acc:0.6318756875687569


 17%|█▋        | 7273/43738 [54:50<4:43:23,  2.14it/s]

step:5920, train_loss:0.0683092751118024, acc:0.631788807919703


 17%|█▋        | 7274/43738 [54:51<4:32:21,  2.23it/s]

step:5920, train_loss:0.06829990001191706, acc:0.6318394281000825


 17%|█▋        | 7275/43738 [54:51<4:22:04,  2.32it/s]

step:5920, train_loss:0.06829577587908166, acc:0.6318900343642612


 17%|█▋        | 7276/43738 [54:51<3:50:55,  2.63it/s]

step:5920, train_loss:0.06829431911048463, acc:0.6319406267179769


 17%|█▋        | 7277/43738 [54:52<4:55:32,  2.06it/s]

step:5920, train_loss:0.0682912560665872, acc:0.6319912051669644


 17%|█▋        | 7278/43738 [54:53<5:17:01,  1.92it/s]

step:5920, train_loss:0.0682820080418858, acc:0.6320417697169552


 17%|█▋        | 7279/43738 [54:53<5:24:03,  1.88it/s]

step:5920, train_loss:0.06827993483161653, acc:0.6319549388652287


 17%|█▋        | 7584/43738 [57:19<6:10:47,  1.63it/s]

step:5940, train_loss:0.06823675072054, acc:0.6319883966244726


 17%|█▋        | 7585/43738 [57:19<4:58:49,  2.02it/s]

step:5940, train_loss:0.06823321050556344, acc:0.6320369149637443


 17%|█▋        | 7586/43738 [57:20<4:32:43,  2.21it/s]

step:5940, train_loss:0.06822807061608531, acc:0.6320854205114685


 17%|█▋        | 7587/43738 [57:20<5:08:53,  1.95it/s]

step:5940, train_loss:0.06822958333767679, acc:0.6321339132727033


 17%|█▋        | 7588/43738 [57:21<4:22:18,  2.30it/s]

step:5940, train_loss:0.0682205917852906, acc:0.6321823932525039


 17%|█▋        | 7589/43738 [57:21<4:08:44,  2.42it/s]

step:5940, train_loss:0.06821616652886701, acc:0.632230860455923


 17%|█▋        | 7590/43738 [57:22<4:58:44,  2.02it/s]

step:5940, train_loss:0.06823219809834864, acc:0.6321475625823452


 17%|█▋        | 7591/43738 [57:22<5:03:05,  1.99it/s]

step:5940, train_loss:0.06826181738771302, acc:0.6320642866552496


 17%|█▋        | 7592/43738 [57:23<5:37:29,  1.79it/s]

step:5940, train_loss:0.06825285943254038, acc:0.6321127502634352


 17%|█▋        | 7593/43738 [57:23<5:17:33,  1.90it/s]

step:5940, train_loss:0.06825071578901129, acc:0.6321612011062822


 17%|█▋        | 7594/43738 [57:24<5:27:24,  1.84it/s]

step:5940, train_loss:0.06826523785751441, acc:0.6320779562812747


 17%|█▋        | 7595/43738 [57:24<5:26:27,  1.85it/s]

step:5940, train_loss:0.06826192310193052, acc:0.6321263989466754


 17%|█▋        | 7596/43738 [57:25<4:50:16,  2.08it/s]

step:5940, train_loss:0.06825366614779334, acc:0.6321748288572933


 17%|█▋        | 7597/43738 [57:25<4:05:54,  2.45it/s]

step:5940, train_loss:0.06824772871789048, acc:0.632223246018165


 17%|█▋        | 7598/43738 [57:25<3:36:30,  2.78it/s]

step:5940, train_loss:0.06824187992819286, acc:0.6322716504343249


 17%|█▋        | 7599/43738 [57:26<3:28:30,  2.89it/s]

step:5940, train_loss:0.0682363073732786, acc:0.6323200421108041


 18%|█▊        | 7904/43738 [59:44<5:23:16,  1.85it/s]

step:5960, train_loss:0.0682490972355853, acc:0.6328441295546559


 18%|█▊        | 7905/43738 [59:45<4:32:04,  2.20it/s]

step:5960, train_loss:0.06824150475298434, acc:0.6328905755850728


 18%|█▊        | 7906/43738 [59:45<4:16:30,  2.33it/s]

step:5960, train_loss:0.06827353979958303, acc:0.6328105236529218


 18%|█▊        | 7907/43738 [59:46<4:53:26,  2.04it/s]

step:5960, train_loss:0.06827252050475677, acc:0.6328569621854053


 18%|█▊        | 7908/43738 [59:46<4:11:19,  2.38it/s]

step:5960, train_loss:0.06826395086298313, acc:0.6329033889731916


 18%|█▊        | 7909/43738 [59:46<3:39:34,  2.72it/s]

step:5960, train_loss:0.0682598937280366, acc:0.6329498040207359


 18%|█▊        | 7910/43738 [59:47<4:15:37,  2.34it/s]

step:5960, train_loss:0.06825676095307252, acc:0.6329962073324905


 18%|█▊        | 7911/43738 [59:48<5:23:33,  1.85it/s]

step:5960, train_loss:0.06826881577268504, acc:0.6329161926431551


 18%|█▊        | 7912/43738 [59:48<5:02:44,  1.97it/s]

step:5960, train_loss:0.0682626745145335, acc:0.6329625884732053


 18%|█▊        | 7913/43738 [59:48<4:19:31,  2.30it/s]

step:5960, train_loss:0.0682548227465064, acc:0.6330089725767724


 18%|█▊        | 7914/43738 [59:49<3:45:14,  2.65it/s]

step:5960, train_loss:0.06824902661917247, acc:0.6330553449583017


 18%|█▊        | 7915/43738 [59:49<4:59:08,  2.00it/s]

step:5960, train_loss:0.06825896461936395, acc:0.6329753632343651


 18%|█▊        | 7916/43738 [59:50<5:36:24,  1.77it/s]

step:5960, train_loss:0.0682859658480248, acc:0.6328954017180394


 18%|█▊        | 7917/43738 [59:51<5:47:18,  1.72it/s]

step:5960, train_loss:0.06830561495246018, acc:0.6328154604016673


 18%|█▊        | 7918/43738 [59:51<6:09:30,  1.62it/s]

step:5960, train_loss:0.06830774970522553, acc:0.6327355392775954


 18%|█▊        | 7919/43738 [59:52<5:41:51,  1.75it/s]

step:5960, train_loss:0.06830718113629113, acc:0.6326556383381741


 19%|█▉        | 8224/43738 [1:02:10<4:26:25,  2.22it/s]

step:5980, train_loss:0.06853900190250795, acc:0.6303501945525292


 19%|█▉        | 8225/43738 [1:02:10<4:02:36,  2.44it/s]

step:5980, train_loss:0.06853078587950238, acc:0.6303951367781155


 19%|█▉        | 8226/43738 [1:02:11<4:43:17,  2.09it/s]

step:5980, train_loss:0.06853743927118147, acc:0.6303185023097496


 19%|█▉        | 8227/43738 [1:02:12<5:11:06,  1.90it/s]

step:5980, train_loss:0.06853016547647761, acc:0.6303634374620153


 19%|█▉        | 8228/43738 [1:02:13<5:59:35,  1.65it/s]

step:5980, train_loss:0.06852708152369062, acc:0.6304083616917842


 19%|█▉        | 8229/43738 [1:02:13<5:24:11,  1.83it/s]

step:5980, train_loss:0.06852063032481746, acc:0.630453275003038


 19%|█▉        | 8230/43738 [1:02:13<4:33:26,  2.16it/s]

step:5980, train_loss:0.06851822290976078, acc:0.630498177399757


 19%|█▉        | 8231/43738 [1:02:14<4:30:54,  2.18it/s]

step:5980, train_loss:0.06850992095870939, acc:0.630543068885919


 19%|█▉        | 8232/43738 [1:02:14<3:58:52,  2.48it/s]

step:5980, train_loss:0.06850178996783506, acc:0.6305879494655005


 19%|█▉        | 8233/43738 [1:02:15<4:30:06,  2.19it/s]

step:5980, train_loss:0.06849971704254748, acc:0.6306328191424754


 19%|█▉        | 8234/43738 [1:02:15<5:05:17,  1.94it/s]

step:5980, train_loss:0.06851080284376847, acc:0.6305562302647559


 19%|█▉        | 8235/43738 [1:02:15<4:18:45,  2.29it/s]

step:5980, train_loss:0.0685185062027953, acc:0.6304796599878567


 19%|█▉        | 8236/43738 [1:02:16<4:52:50,  2.02it/s]

step:5980, train_loss:0.06852087744937352, acc:0.6304031083050025


 19%|█▉        | 8237/43738 [1:02:16<4:43:37,  2.09it/s]

step:5980, train_loss:0.06852483328662103, acc:0.6303265752094209


 19%|█▉        | 8238/43738 [1:02:17<4:16:47,  2.30it/s]

step:5980, train_loss:0.06852729758791797, acc:0.6302500606943433


 19%|█▉        | 8239/43738 [1:02:17<4:48:30,  2.05it/s]

step:5980, train_loss:0.0685237905481866, acc:0.6302949387061536


 20%|█▉        | 8544/43738 [1:04:34<4:31:02,  2.16it/s]

step:6000, train_loss:0.0681350510043803, acc:0.6326076779026217


 20%|█▉        | 8545/43738 [1:04:35<4:10:22,  2.34it/s]

step:6000, train_loss:0.06812852887185253, acc:0.6326506729081334


 20%|█▉        | 8546/43738 [1:04:35<3:59:35,  2.45it/s]

step:6000, train_loss:0.06812063154555291, acc:0.6326936578516265


 20%|█▉        | 8547/43738 [1:04:35<3:36:22,  2.71it/s]

step:6000, train_loss:0.06811266481293465, acc:0.6327366327366327


 20%|█▉        | 8548/43738 [1:04:36<3:55:38,  2.49it/s]

step:6000, train_loss:0.06812480073391959, acc:0.6326626111371081


 20%|█▉        | 8549/43738 [1:04:36<4:05:58,  2.38it/s]

step:6000, train_loss:0.06811835241984382, acc:0.6327055795999532


 20%|█▉        | 8550/43738 [1:04:37<4:40:14,  2.09it/s]

step:6000, train_loss:0.06812364270634219, acc:0.6326315789473684


 20%|█▉        | 8551/43738 [1:04:37<3:56:50,  2.48it/s]

step:6000, train_loss:0.06811580936181938, acc:0.6326745409893579


 20%|█▉        | 8552/43738 [1:04:38<4:25:24,  2.21it/s]

step:6000, train_loss:0.06811441501582764, acc:0.632600561272217


 20%|█▉        | 8553/43738 [1:04:38<3:53:22,  2.51it/s]

step:6000, train_loss:0.0681305429086031, acc:0.6325265988542031


 20%|█▉        | 8554/43738 [1:04:38<3:43:07,  2.63it/s]

step:6000, train_loss:0.0681225829652821, acc:0.632569558101473


 20%|█▉        | 8555/43738 [1:04:38<3:25:57,  2.85it/s]

step:6000, train_loss:0.06811466916122168, acc:0.6326125073056692


 20%|█▉        | 8556/43738 [1:04:39<3:42:16,  2.64it/s]

step:6000, train_loss:0.06811692322819718, acc:0.6325385694249649


 20%|█▉        | 8557/43738 [1:04:39<3:24:56,  2.86it/s]

step:6000, train_loss:0.06810897277287242, acc:0.6325815122122239


 20%|█▉        | 8558/43738 [1:04:40<3:30:42,  2.78it/s]

step:6000, train_loss:0.06810539444254952, acc:0.6326244449637766


 20%|█▉        | 8559/43738 [1:04:40<4:19:15,  2.26it/s]

step:6000, train_loss:0.06810009268695223, acc:0.6326673676831406


 20%|██        | 8864/43738 [1:06:54<4:07:58,  2.34it/s]

step:6020, train_loss:0.06838206701049851, acc:0.6307536101083032


 20%|██        | 8865/43738 [1:06:54<3:55:57,  2.46it/s]

step:6020, train_loss:0.06838285487038291, acc:0.630682459108855


 20%|██        | 8866/43738 [1:06:55<4:03:06,  2.39it/s]

step:6020, train_loss:0.06837883173046527, acc:0.6307241145950824


 20%|██        | 8867/43738 [1:06:55<3:58:23,  2.44it/s]

step:6020, train_loss:0.06838412146754695, acc:0.630652982970565


 20%|██        | 8868/43738 [1:06:56<3:58:52,  2.43it/s]

step:6020, train_loss:0.06837651648365281, acc:0.6306946323861073


 20%|██        | 8869/43738 [1:06:56<3:53:26,  2.49it/s]

step:6020, train_loss:0.06837462431618407, acc:0.6307362724095162


 20%|██        | 8870/43738 [1:06:56<3:42:48,  2.61it/s]

step:6020, train_loss:0.0683809819421941, acc:0.6306651634723788


 20%|██        | 8871/43738 [1:06:57<4:56:59,  1.96it/s]

step:6020, train_loss:0.06838411574404755, acc:0.6305940705670161


 20%|██        | 8872/43738 [1:06:58<5:20:49,  1.81it/s]

step:6020, train_loss:0.06837640886360705, acc:0.6306357078449053


 20%|██        | 8873/43738 [1:06:58<4:28:33,  2.16it/s]

step:6020, train_loss:0.06837061916693517, acc:0.630677335737631


 20%|██        | 8874/43738 [1:06:59<4:44:37,  2.04it/s]

step:6020, train_loss:0.06837028036352298, acc:0.630718954248366


 20%|██        | 8875/43738 [1:06:59<4:14:57,  2.28it/s]

step:6020, train_loss:0.06836462284620641, acc:0.6307605633802816


 20%|██        | 8876/43738 [1:06:59<3:39:43,  2.64it/s]

step:6020, train_loss:0.06835754858258805, acc:0.630802163136548


 20%|██        | 8877/43738 [1:07:00<4:11:19,  2.31it/s]

step:6020, train_loss:0.06836281440412924, acc:0.630731102850062


 20%|██        | 8878/43738 [1:07:00<3:36:15,  2.69it/s]

step:6020, train_loss:0.06835519227921875, acc:0.6307726965532777


 20%|██        | 8879/43738 [1:07:01<4:26:43,  2.18it/s]

step:6020, train_loss:0.06837953918344501, acc:0.6307016555918459


 21%|██        | 9184/43738 [1:09:15<6:14:12,  1.54it/s]

step:6040, train_loss:0.06848729062717798, acc:0.6303353658536586


 21%|██        | 9185/43738 [1:09:15<5:31:12,  1.74it/s]

step:6040, train_loss:0.06848204905527432, acc:0.6303756124115406


 21%|██        | 9186/43738 [1:09:16<5:25:49,  1.77it/s]

step:6040, train_loss:0.06848144034297501, acc:0.6303069888961463


 21%|██        | 9187/43738 [1:09:16<5:38:30,  1.70it/s]

step:6040, train_loss:0.0684747361477611, acc:0.6303472297812126


 21%|██        | 9188/43738 [1:09:17<6:09:16,  1.56it/s]

step:6040, train_loss:0.06849980369955717, acc:0.6302786242925555


 21%|██        | 9189/43738 [1:09:18<6:56:42,  1.38it/s]

step:6040, train_loss:0.06852152329739179, acc:0.6302100337359887


 21%|██        | 9190/43738 [1:09:19<6:44:18,  1.42it/s]

step:6040, train_loss:0.06851454679166363, acc:0.6302502720348204


 21%|██        | 9191/43738 [1:09:19<6:07:20,  1.57it/s]

step:6040, train_loss:0.0685121142976364, acc:0.6302905015776303


 21%|██        | 9192/43738 [1:09:20<6:03:50,  1.58it/s]

step:6040, train_loss:0.068507016295962, acc:0.6303307223672759


 21%|██        | 9193/43738 [1:09:21<6:23:45,  1.50it/s]

step:6040, train_loss:0.06850692179922685, acc:0.6303709344066137


 21%|██        | 9194/43738 [1:09:22<6:56:06,  1.38it/s]

step:6040, train_loss:0.06852066893291248, acc:0.6303023711115945


 21%|██        | 9195/43738 [1:09:22<6:16:08,  1.53it/s]

step:6040, train_loss:0.06852095585318699, acc:0.6302338227297444


 21%|██        | 9196/43738 [1:09:23<6:02:18,  1.59it/s]

step:6040, train_loss:0.06851632307227525, acc:0.6302740321879078


 21%|██        | 9197/43738 [1:09:24<6:52:19,  1.40it/s]

step:6040, train_loss:0.06852223899430797, acc:0.6302055017940633


 21%|██        | 9198/43738 [1:09:24<6:09:47,  1.56it/s]

step:6040, train_loss:0.06852074274127277, acc:0.6302457055881714


 21%|██        | 9199/43738 [1:09:25<6:04:22,  1.58it/s]

step:6040, train_loss:0.06851605519264661, acc:0.630285900641374


 22%|██▏       | 9504/43738 [1:11:47<3:47:45,  2.51it/s]

step:6060, train_loss:0.06881973800019948, acc:0.6289983164983165


 22%|██▏       | 9505/43738 [1:11:47<3:45:32,  2.53it/s]

step:6060, train_loss:0.06881405760318302, acc:0.6290373487638086


 22%|██▏       | 9506/43738 [1:11:47<3:25:46,  2.77it/s]

step:6060, train_loss:0.06880683200216597, acc:0.6290763728171681


 22%|██▏       | 9507/43738 [1:11:48<4:42:51,  2.02it/s]

step:6060, train_loss:0.0688287853383934, acc:0.6290102030083097


 22%|██▏       | 9508/43738 [1:11:49<4:34:36,  2.08it/s]

step:6060, train_loss:0.06882573501820355, acc:0.6290492217080353


 22%|██▏       | 9509/43738 [1:11:49<4:33:26,  2.09it/s]

step:6060, train_loss:0.06882147607720442, acc:0.6290882322010727


 22%|██▏       | 9510/43738 [1:11:49<3:57:11,  2.41it/s]

step:6060, train_loss:0.06882008899520588, acc:0.6291272344900105


 22%|██▏       | 9511/43738 [1:11:50<4:27:00,  2.14it/s]

step:6060, train_loss:0.06882618556302487, acc:0.6290610871622332


 22%|██▏       | 9512/43738 [1:11:50<3:49:50,  2.48it/s]

step:6060, train_loss:0.0688189525326035, acc:0.6291000841042893


 22%|██▏       | 9513/43738 [1:11:51<4:23:56,  2.16it/s]

step:6060, train_loss:0.06881510405478901, acc:0.6291390728476821


 22%|██▏       | 9514/43738 [1:11:51<4:01:00,  2.37it/s]

step:6060, train_loss:0.06881191204647809, acc:0.6291780533949969


 22%|██▏       | 9515/43738 [1:11:52<4:05:36,  2.32it/s]

step:6060, train_loss:0.06882106347952363, acc:0.6291119285338939


 22%|██▏       | 9516/43738 [1:11:52<3:55:50,  2.42it/s]

step:6060, train_loss:0.06881824976746465, acc:0.6291509037410676


 22%|██▏       | 9517/43738 [1:11:52<4:01:52,  2.36it/s]

step:6060, train_loss:0.06881450022294779, acc:0.6291898707575917


 22%|██▏       | 9518/43738 [1:11:53<4:27:02,  2.14it/s]

step:6060, train_loss:0.06881310767211604, acc:0.6292288295860475


 22%|██▏       | 9519/43738 [1:11:53<4:08:29,  2.30it/s]

step:6060, train_loss:0.06881032662533139, acc:0.6292677802290156


 22%|██▏       | 9824/43738 [1:14:11<4:19:40,  2.18it/s]

step:6080, train_loss:0.06891393766345098, acc:0.6289698697068404


 22%|██▏       | 9825/43738 [1:14:11<3:48:51,  2.47it/s]

step:6080, train_loss:0.06891148631692018, acc:0.6289058524173028


 22%|██▏       | 9826/43738 [1:14:12<4:17:33,  2.19it/s]

step:6080, train_loss:0.068905427759607, acc:0.6289436189700793


 22%|██▏       | 9827/43738 [1:14:12<3:53:40,  2.42it/s]

step:6080, train_loss:0.06890344324097285, acc:0.6289813778365727


 22%|██▏       | 9828/43738 [1:14:13<4:57:10,  1.90it/s]

step:6080, train_loss:0.0689092827816455, acc:0.6289173789173789


 22%|██▏       | 9829/43738 [1:14:13<4:19:27,  2.18it/s]

step:6080, train_loss:0.06890963853464739, acc:0.6289551327703734


 22%|██▏       | 9830/43738 [1:14:14<4:15:26,  2.21it/s]

step:6080, train_loss:0.06891276362866763, acc:0.6289928789420143


 22%|██▏       | 9831/43738 [1:14:15<4:55:30,  1.91it/s]

step:6080, train_loss:0.06890586683093562, acc:0.6290306174346455


 22%|██▏       | 9832/43738 [1:14:15<4:56:23,  1.91it/s]

step:6080, train_loss:0.06891475226900848, acc:0.628966639544345


 22%|██▏       | 9833/43738 [1:14:15<4:42:42,  2.00it/s]

step:6080, train_loss:0.068924853330592, acc:0.6289026746669378


 22%|██▏       | 9834/43738 [1:14:16<4:24:24,  2.14it/s]

step:6080, train_loss:0.06892372023000813, acc:0.6288387227984543


 22%|██▏       | 9835/43738 [1:14:16<3:44:59,  2.51it/s]

step:6080, train_loss:0.0689173778237436, acc:0.6288764616166751


 22%|██▏       | 9836/43738 [1:14:16<3:22:45,  2.79it/s]

step:6080, train_loss:0.0689104160281462, acc:0.6289141927612851


 22%|██▏       | 9837/43738 [1:14:17<3:03:10,  3.08it/s]

step:6080, train_loss:0.0689034114799803, acc:0.6289519162346244


 22%|██▏       | 9838/43738 [1:14:17<2:48:11,  3.36it/s]

step:6080, train_loss:0.06889693194878976, acc:0.6289896320390324


 22%|██▏       | 9839/43738 [1:14:17<3:15:54,  2.88it/s]

step:6080, train_loss:0.06889503172006751, acc:0.6290273401768472


 23%|██▎       | 10144/43738 [1:16:42<4:09:06,  2.25it/s]

step:6100, train_loss:0.06918310320006152, acc:0.6279574132492114


 23%|██▎       | 10145/43738 [1:16:42<4:15:20,  2.19it/s]

step:6100, train_loss:0.06920204466979257, acc:0.6278955150320354


 23%|██▎       | 10146/43738 [1:16:43<3:40:23,  2.54it/s]

step:6100, train_loss:0.06919633178018585, acc:0.6279321900256258


 23%|██▎       | 10147/43738 [1:16:43<4:18:06,  2.17it/s]

step:6100, train_loss:0.06918986221612085, acc:0.62796885779048


 23%|██▎       | 10148/43738 [1:16:44<4:30:16,  2.07it/s]

step:6100, train_loss:0.06919297442223282, acc:0.627906976744186


 23%|██▎       | 10149/43738 [1:16:44<3:54:29,  2.39it/s]

step:6100, train_loss:0.06918921511736503, acc:0.6279436397674648


 23%|██▎       | 10150/43738 [1:16:44<3:27:16,  2.70it/s]

step:6100, train_loss:0.06918989881548544, acc:0.6278817733990147


 23%|██▎       | 10151/43738 [1:16:45<3:11:01,  2.93it/s]

step:6100, train_loss:0.06918312527473323, acc:0.6279184316816078


 23%|██▎       | 10152/43738 [1:16:45<3:07:59,  2.98it/s]

step:6100, train_loss:0.06917848662200846, acc:0.6279550827423168


 23%|██▎       | 10153/43738 [1:16:45<2:52:16,  3.25it/s]

step:6100, train_loss:0.06917432578586988, acc:0.6279917265832758


 23%|██▎       | 10154/43738 [1:16:46<3:29:50,  2.67it/s]

step:6100, train_loss:0.06916756323502812, acc:0.628028363206618


 23%|██▎       | 10155/43738 [1:16:46<3:31:10,  2.65it/s]

step:6100, train_loss:0.06916280235350727, acc:0.6280649926144757


 23%|██▎       | 10156/43738 [1:16:47<3:58:26,  2.35it/s]

step:6100, train_loss:0.06917760428450438, acc:0.62800315084679


 23%|██▎       | 10157/43738 [1:16:47<4:09:47,  2.24it/s]

step:6100, train_loss:0.06918706797950065, acc:0.6279413212562764


 23%|██▎       | 10158/43738 [1:16:48<4:38:57,  2.01it/s]

step:6100, train_loss:0.06918155541750083, acc:0.6279779484150423


 23%|██▎       | 10159/43738 [1:16:48<4:12:07,  2.22it/s]

step:6100, train_loss:0.06917775372982779, acc:0.6280145683630278


 24%|██▍       | 10464/43738 [1:19:14<5:55:53,  1.56it/s]

step:6120, train_loss:0.069249029438062, acc:0.6283448012232415


 24%|██▍       | 10465/43738 [1:19:14<4:51:45,  1.90it/s]

step:6120, train_loss:0.0692496795149972, acc:0.6282847587195414


 24%|██▍       | 10466/43738 [1:19:15<4:35:56,  2.01it/s]

step:6120, train_loss:0.06924339130815763, acc:0.6283202751767628


 24%|██▍       | 10467/43738 [1:19:15<4:46:28,  1.94it/s]

step:6120, train_loss:0.06923900958045143, acc:0.6283557848476163


 24%|██▍       | 10468/43738 [1:19:16<4:28:51,  2.06it/s]

step:6120, train_loss:0.0692388931878665, acc:0.6282957585021016


 24%|██▍       | 10469/43738 [1:19:16<5:10:07,  1.79it/s]

step:6120, train_loss:0.06923346759527305, acc:0.6283312637310153


 24%|██▍       | 10470/43738 [1:19:17<4:46:05,  1.94it/s]

step:6120, train_loss:0.06922896820550371, acc:0.6283667621776504


 24%|██▍       | 10471/43738 [1:19:17<4:01:23,  2.30it/s]

step:6120, train_loss:0.06922470219643435, acc:0.62840225384395


 24%|██▍       | 10472/43738 [1:19:17<3:52:11,  2.39it/s]

step:6120, train_loss:0.06924118107196374, acc:0.6283422459893048


 24%|██▍       | 10473/43738 [1:19:18<3:48:04,  2.43it/s]

step:6120, train_loss:0.06923612695606217, acc:0.628377733218753


 24%|██▍       | 10474/43738 [1:19:19<4:50:13,  1.91it/s]

step:6120, train_loss:0.06923315226683373, acc:0.6284132136719496


 24%|██▍       | 10475/43738 [1:19:19<5:31:31,  1.67it/s]

step:6120, train_loss:0.06922965499460208, acc:0.6284486873508354


 24%|██▍       | 10476/43738 [1:19:20<4:41:05,  1.97it/s]

step:6120, train_loss:0.06922739967376887, acc:0.6284841542573502


 24%|██▍       | 10477/43738 [1:19:20<3:55:42,  2.35it/s]

step:6120, train_loss:0.06922837529355491, acc:0.6284241672234419


 24%|██▍       | 10478/43738 [1:19:20<4:10:41,  2.21it/s]

step:6120, train_loss:0.06922186251817411, acc:0.6284596297003245


 24%|██▍       | 10479/43738 [1:19:21<3:36:27,  2.56it/s]

step:6120, train_loss:0.06921532859140611, acc:0.6284950854089131


 25%|██▍       | 10784/43738 [1:21:42<5:05:44,  1.80it/s]

step:6140, train_loss:0.06907981596690424, acc:0.629543768545994


 25%|██▍       | 10785/43738 [1:21:43<5:41:34,  1.61it/s]

step:6140, train_loss:0.06909119398103217, acc:0.6294853963838665


 25%|██▍       | 10786/43738 [1:21:43<5:12:44,  1.76it/s]

step:6140, train_loss:0.06909266146144984, acc:0.6294270350454293


 25%|██▍       | 10787/43738 [1:21:44<4:20:58,  2.10it/s]

step:6140, train_loss:0.06909780894164007, acc:0.6293686845276723


 25%|██▍       | 10788/43738 [1:21:44<4:01:57,  2.27it/s]

step:6140, train_loss:0.06914667974536834, acc:0.6293103448275862


 25%|██▍       | 10789/43738 [1:21:44<4:07:22,  2.22it/s]

step:6140, train_loss:0.06914250467434453, acc:0.6293447029381778


 25%|██▍       | 10790/43738 [1:21:45<4:33:39,  2.01it/s]

step:6140, train_loss:0.06913704323864812, acc:0.6293790546802595


 25%|██▍       | 10791/43738 [1:21:45<4:13:41,  2.16it/s]

step:6140, train_loss:0.06913701088453873, acc:0.6293207302381615


 25%|██▍       | 10792/43738 [1:21:46<4:30:54,  2.03it/s]

step:6140, train_loss:0.0691306925543417, acc:0.6293550778354337


 25%|██▍       | 10793/43738 [1:21:46<3:54:57,  2.34it/s]

step:6140, train_loss:0.06912463743339607, acc:0.6293894190679143


 25%|██▍       | 10794/43738 [1:21:47<3:25:32,  2.67it/s]

step:6140, train_loss:0.06912724658145085, acc:0.6293311098758569


 25%|██▍       | 10795/43738 [1:21:47<3:28:53,  2.63it/s]

step:6140, train_loss:0.06912341878511963, acc:0.629365446966188


 25%|██▍       | 10796/43738 [1:21:47<3:38:06,  2.52it/s]

step:6140, train_loss:0.06911839824684232, acc:0.6293997776954428


 25%|██▍       | 10797/43738 [1:21:48<3:18:16,  2.77it/s]

step:6140, train_loss:0.06912528991701504, acc:0.6293414837454848


 25%|██▍       | 10798/43738 [1:21:48<3:07:43,  2.92it/s]

step:6140, train_loss:0.0691308678189774, acc:0.6292832005927024


 25%|██▍       | 10799/43738 [1:21:48<3:25:42,  2.67it/s]

step:6140, train_loss:0.0691336741426924, acc:0.6293175294008705


 25%|██▌       | 11104/43738 [1:24:10<4:34:03,  1.98it/s]

step:6160, train_loss:0.06913580995635171, acc:0.62914265129683


 25%|██▌       | 11105/43738 [1:24:10<3:48:52,  2.38it/s]

step:6160, train_loss:0.069140566223618, acc:0.6290859972985142


 25%|██▌       | 11106/43738 [1:24:11<3:44:40,  2.42it/s]

step:6160, train_loss:0.06914475406193234, acc:0.6290293535026112


 25%|██▌       | 11107/43738 [1:24:11<4:07:05,  2.20it/s]

step:6160, train_loss:0.06914380834076471, acc:0.6290627532186909


 25%|██▌       | 11108/43738 [1:24:12<3:56:34,  2.30it/s]

step:6160, train_loss:0.06914992116729418, acc:0.62900612171408


 25%|██▌       | 11109/43738 [1:24:12<4:36:15,  1.97it/s]

step:6160, train_loss:0.06915675793560101, acc:0.6289495004050769


 25%|██▌       | 11110/43738 [1:24:13<4:36:42,  1.97it/s]

step:6160, train_loss:0.06915172235647293, acc:0.628982898289829


 25%|██▌       | 11111/43738 [1:24:13<4:25:05,  2.05it/s]

step:6160, train_loss:0.06914991539248645, acc:0.6290162901629016


 25%|██▌       | 11112/43738 [1:24:14<3:50:53,  2.36it/s]

step:6160, train_loss:0.06915079765700052, acc:0.628959683225342


 25%|██▌       | 11113/43738 [1:24:14<3:26:47,  2.63it/s]

step:6160, train_loss:0.06914476875421635, acc:0.6289930711778997


 25%|██▌       | 11114/43738 [1:24:14<3:09:13,  2.87it/s]

step:6160, train_loss:0.06913968922150063, acc:0.6290264531221882


 25%|██▌       | 11115/43738 [1:24:14<3:07:34,  2.90it/s]

step:6160, train_loss:0.06915933726925413, acc:0.6289698605488079


 25%|██▌       | 11116/43738 [1:24:15<3:13:45,  2.81it/s]

step:6160, train_loss:0.0691633764202079, acc:0.6289132781576107


 25%|██▌       | 11117/43738 [1:24:16<4:07:47,  2.19it/s]

step:6160, train_loss:0.06915753283860247, acc:0.6289466582711163


 25%|██▌       | 11118/43738 [1:24:16<3:42:08,  2.45it/s]

step:6160, train_loss:0.06915811863267358, acc:0.6288900881453499


 25%|██▌       | 11119/43738 [1:24:16<4:05:05,  2.22it/s]

step:6160, train_loss:0.06917165794274917, acc:0.6288335281949816


 26%|██▌       | 11424/43738 [1:26:36<3:36:02,  2.49it/s]

step:6180, train_loss:0.06906122234951388, acc:0.6281512605042017


 26%|██▌       | 11425/43738 [1:26:36<3:43:43,  2.41it/s]

step:6180, train_loss:0.06907430508101657, acc:0.6280962800875274


 26%|██▌       | 11426/43738 [1:26:36<3:50:47,  2.33it/s]

step:6180, train_loss:0.06906834495281859, acc:0.628128828986522


 26%|██▌       | 11427/43738 [1:26:37<3:55:48,  2.28it/s]

step:6180, train_loss:0.06906462195739527, acc:0.628161372188676


 26%|██▌       | 11428/43738 [1:26:37<4:04:02,  2.21it/s]

step:6180, train_loss:0.0690688937637898, acc:0.628106405320266


 26%|██▌       | 11429/43738 [1:26:38<4:05:28,  2.19it/s]

step:6180, train_loss:0.06906300147255078, acc:0.6281389447895704


 26%|██▌       | 11430/43738 [1:26:38<3:44:42,  2.40it/s]

step:6180, train_loss:0.0690622426324511, acc:0.6281714785651793


 26%|██▌       | 11431/43738 [1:26:39<3:33:31,  2.52it/s]

step:6180, train_loss:0.06906834664311069, acc:0.6281165252383868


 26%|██▌       | 11432/43738 [1:26:39<3:48:35,  2.36it/s]

step:6180, train_loss:0.06907889156442772, acc:0.6280615815255424


 26%|██▌       | 11433/43738 [1:26:39<3:46:42,  2.37it/s]

step:6180, train_loss:0.06908058170652581, acc:0.6280066474241232


 26%|██▌       | 11434/43738 [1:26:40<3:45:19,  2.39it/s]

step:6180, train_loss:0.06907458191131978, acc:0.6280391813888403


 26%|██▌       | 11435/43738 [1:26:40<3:38:50,  2.46it/s]

step:6180, train_loss:0.06907150482645467, acc:0.6280717096633144


 26%|██▌       | 11436/43738 [1:26:41<3:55:00,  2.29it/s]

step:6180, train_loss:0.06908482687211098, acc:0.6280167890870934


 26%|██▌       | 11437/43738 [1:26:41<4:47:58,  1.87it/s]

step:6180, train_loss:0.06907961122661135, acc:0.628049313631197


 26%|██▌       | 11438/43738 [1:26:42<4:31:33,  1.98it/s]

step:6180, train_loss:0.0690952350977253, acc:0.6279944046161916


 26%|██▌       | 11439/43738 [1:26:42<4:11:44,  2.14it/s]

step:6180, train_loss:0.06909486190272829, acc:0.6279395052015037


 27%|██▋       | 11744/43738 [1:29:00<3:15:38,  2.73it/s]

step:6200, train_loss:0.06915539483806334, acc:0.6270435967302452


 27%|██▋       | 11745/43738 [1:29:00<3:26:50,  2.58it/s]

step:6200, train_loss:0.06915033634631308, acc:0.6270753512132823


 27%|██▋       | 11746/43738 [1:29:01<4:12:59,  2.11it/s]

step:6200, train_loss:0.06914449057419644, acc:0.6271071002894603


 27%|██▋       | 11747/43738 [1:29:01<4:06:30,  2.16it/s]

step:6200, train_loss:0.06914962276242487, acc:0.6270537158423427


 27%|██▋       | 11748/43738 [1:29:02<3:51:42,  2.30it/s]

step:6200, train_loss:0.06914709430423348, acc:0.6270854613551243


 27%|██▋       | 11749/43738 [1:29:02<3:41:16,  2.41it/s]

step:6200, train_loss:0.06915005041743212, acc:0.6270320878372627


 27%|██▋       | 11750/43738 [1:29:02<3:47:07,  2.35it/s]

step:6200, train_loss:0.06917078775178363, acc:0.6269787234042553


 27%|██▋       | 11751/43738 [1:29:03<3:33:48,  2.49it/s]

step:6200, train_loss:0.0691696216538275, acc:0.6269253680537826


 27%|██▋       | 11752/43738 [1:29:04<4:34:11,  1.94it/s]

step:6200, train_loss:0.06917023847880781, acc:0.6268720217835262


 27%|██▋       | 11753/43738 [1:29:04<4:39:02,  1.91it/s]

step:6200, train_loss:0.06917469876019489, acc:0.6269037692504041


 27%|██▋       | 11754/43738 [1:29:05<4:36:58,  1.92it/s]

step:6200, train_loss:0.06918539300759473, acc:0.6268504338948443


 27%|██▋       | 11755/43738 [1:29:05<4:06:09,  2.17it/s]

step:6200, train_loss:0.06919265940181495, acc:0.6267971076137814


 27%|██▋       | 11756/43738 [1:29:05<4:04:23,  2.18it/s]

step:6200, train_loss:0.06918848470034145, acc:0.6268288533514801


 27%|██▋       | 11757/43738 [1:29:06<3:58:31,  2.23it/s]

step:6200, train_loss:0.06918578183716616, acc:0.6268605936888662


 27%|██▋       | 11758/43738 [1:29:06<4:01:05,  2.21it/s]

step:6200, train_loss:0.0691856715115134, acc:0.6268072801496853


 27%|██▋       | 11759/43738 [1:29:07<3:49:31,  2.32it/s]

step:6200, train_loss:0.0691817823809008, acc:0.6268390169232078


 28%|██▊       | 12064/43738 [1:31:24<4:06:40,  2.14it/s]

step:6220, train_loss:0.06931985211732916, acc:0.6264920424403183


 28%|██▊       | 12065/43738 [1:31:24<4:56:29,  1.78it/s]

step:6220, train_loss:0.0693165903296835, acc:0.6265230004144219


 28%|██▊       | 12066/43738 [1:31:25<4:03:55,  2.16it/s]

step:6220, train_loss:0.0693130095343886, acc:0.6265539532570861


 28%|██▊       | 12067/43738 [1:31:25<4:18:11,  2.04it/s]

step:6220, train_loss:0.06930813958456516, acc:0.6265849009695865


 28%|██▊       | 12068/43738 [1:31:25<3:39:51,  2.40it/s]

step:6220, train_loss:0.06930520556097361, acc:0.6266158435531985


 28%|██▊       | 12069/43738 [1:31:26<3:30:18,  2.51it/s]

step:6220, train_loss:0.06929951069050831, acc:0.6266467810091971


 28%|██▊       | 12070/43738 [1:31:26<3:17:08,  2.68it/s]

step:6220, train_loss:0.06929958977970135, acc:0.6265948632974316


 28%|██▊       | 12071/43738 [1:31:26<3:08:12,  2.80it/s]

step:6220, train_loss:0.06930068305936177, acc:0.6265429541877227


 28%|██▊       | 12072/43738 [1:31:27<2:54:05,  3.03it/s]

step:6220, train_loss:0.06929557072302484, acc:0.626573889993373


 28%|██▊       | 12073/43738 [1:31:27<4:00:46,  2.19it/s]

step:6220, train_loss:0.06929047515409353, acc:0.6266048206742317


 28%|██▊       | 12074/43738 [1:31:28<4:10:59,  2.10it/s]

step:6220, train_loss:0.06929160521776019, acc:0.6265529236375683


 28%|██▊       | 12075/43738 [1:31:29<4:22:58,  2.01it/s]

step:6220, train_loss:0.0693027892507763, acc:0.6265010351966873


 28%|██▊       | 12076/43738 [1:31:29<4:15:23,  2.07it/s]

step:6220, train_loss:0.06930054482511994, acc:0.6265319642265651


 28%|██▊       | 12077/43738 [1:31:29<3:42:32,  2.37it/s]

step:6220, train_loss:0.06929877503203674, acc:0.6265628881344705


 28%|██▊       | 12078/43738 [1:31:30<3:19:06,  2.65it/s]

step:6220, train_loss:0.06929304556138122, acc:0.6265938069216758


 28%|██▊       | 12079/43738 [1:31:30<3:31:21,  2.50it/s]

step:6220, train_loss:0.0692924201880928, acc:0.6266247205894527


 28%|██▊       | 12384/43738 [1:33:44<3:12:45,  2.71it/s]

step:6240, train_loss:0.06924866776724004, acc:0.626937984496124


 28%|██▊       | 12385/43738 [1:33:45<3:26:55,  2.53it/s]

step:6240, train_loss:0.06924394579388263, acc:0.626968106580541


 28%|██▊       | 12386/43738 [1:33:45<4:01:37,  2.16it/s]

step:6240, train_loss:0.06925192025804068, acc:0.6269174874858712


 28%|██▊       | 12387/43738 [1:33:46<3:55:38,  2.22it/s]

step:6240, train_loss:0.06925258059505537, acc:0.6268668765641399


 28%|██▊       | 12388/43738 [1:33:46<3:41:37,  2.36it/s]

step:6240, train_loss:0.06924760657829165, acc:0.6268969970939618


 28%|██▊       | 12389/43738 [1:33:47<3:40:50,  2.37it/s]

step:6240, train_loss:0.06924662750294736, acc:0.6268463959964484


 28%|██▊       | 12390/43738 [1:33:47<3:24:53,  2.55it/s]

step:6240, train_loss:0.06925743805424779, acc:0.6267958030669895


 28%|██▊       | 12391/43738 [1:33:47<3:13:23,  2.70it/s]

step:6240, train_loss:0.06925694722208234, acc:0.6268259220401905


 28%|██▊       | 12392/43738 [1:33:48<4:34:12,  1.91it/s]

step:6240, train_loss:0.06925144444997887, acc:0.6268560361523564


 28%|██▊       | 12393/43738 [1:33:49<5:03:11,  1.72it/s]

step:6240, train_loss:0.06925139276332107, acc:0.626805454692165


 28%|██▊       | 12394/43738 [1:33:50<5:20:15,  1.63it/s]

step:6240, train_loss:0.06924622506935521, acc:0.6268355655962563


 28%|██▊       | 12395/43738 [1:33:50<5:04:09,  1.72it/s]

step:6240, train_loss:0.06925695432317543, acc:0.6267849939491731


 28%|██▊       | 12396/43738 [1:33:50<4:34:43,  1.90it/s]

step:6240, train_loss:0.06925153134718996, acc:0.6268151016456922


 28%|██▊       | 12397/43738 [1:33:51<4:24:39,  1.97it/s]

step:6240, train_loss:0.0692539240291803, acc:0.626764539808018


 28%|██▊       | 12398/43738 [1:33:52<4:39:37,  1.87it/s]

step:6240, train_loss:0.06924834730846198, acc:0.6267946442974673


 28%|██▊       | 12399/43738 [1:33:52<4:26:06,  1.96it/s]

step:6240, train_loss:0.0692485046835183, acc:0.6267440922655053


 29%|██▉       | 12704/43738 [1:36:08<3:28:43,  2.48it/s]

step:6260, train_loss:0.06921884514316023, acc:0.6273614609571788


 29%|██▉       | 12705/43738 [1:36:09<4:15:23,  2.03it/s]

step:6260, train_loss:0.06921941513331796, acc:0.6273120818575364


 29%|██▉       | 12706/43738 [1:36:10<4:38:24,  1.86it/s]

step:6260, train_loss:0.0692298441279958, acc:0.627262710530458


 29%|██▉       | 12707/43738 [1:36:10<4:28:01,  1.93it/s]

step:6260, train_loss:0.06923776916968669, acc:0.6272133469741088


 29%|██▉       | 12708/43738 [1:36:11<4:28:00,  1.93it/s]

step:6260, train_loss:0.06923257647377956, acc:0.6272426817752597


 29%|██▉       | 12709/43738 [1:36:11<3:58:35,  2.17it/s]

step:6260, train_loss:0.06922822241221331, acc:0.6272720119600284


 29%|██▉       | 12710/43738 [1:36:11<3:27:01,  2.50it/s]

step:6260, train_loss:0.06923492664937846, acc:0.6272226593233674


 29%|██▉       | 12711/43738 [1:36:12<3:30:54,  2.45it/s]

step:6260, train_loss:0.0692296317328491, acc:0.6272519864684132


 29%|██▉       | 12712/43738 [1:36:12<3:30:12,  2.46it/s]

step:6260, train_loss:0.06922948121442789, acc:0.6272813089993706


 29%|██▉       | 12713/43738 [1:36:12<3:09:34,  2.73it/s]

step:6260, train_loss:0.06922403599942618, acc:0.6273106269173288


 29%|██▉       | 12714/43738 [1:36:12<2:55:14,  2.95it/s]

step:6260, train_loss:0.06922199436111917, acc:0.6273399402233758


 29%|██▉       | 12715/43738 [1:36:13<2:53:43,  2.98it/s]

step:6260, train_loss:0.0692209051824455, acc:0.6272906016515926


 29%|██▉       | 12716/43738 [1:36:13<3:24:56,  2.52it/s]

step:6260, train_loss:0.06921627575245928, acc:0.627319911921988


 29%|██▉       | 12717/43738 [1:36:14<3:29:17,  2.47it/s]

step:6260, train_loss:0.06921124760521379, acc:0.6273492175827632


 29%|██▉       | 12718/43738 [1:36:14<3:16:50,  2.63it/s]

step:6260, train_loss:0.06920601525431645, acc:0.6273785186350055


 29%|██▉       | 12719/43738 [1:36:15<4:01:14,  2.14it/s]

step:6260, train_loss:0.06920059509187834, acc:0.6274078150798019


 30%|██▉       | 13024/43738 [1:38:28<3:10:25,  2.69it/s]

step:6280, train_loss:0.06917180172851874, acc:0.6276873464373465


 30%|██▉       | 13025/43738 [1:38:28<3:16:09,  2.61it/s]

step:6280, train_loss:0.06918249048756511, acc:0.6276391554702495


 30%|██▉       | 13026/43738 [1:38:28<2:55:45,  2.91it/s]

step:6280, train_loss:0.06918028088296493, acc:0.6276677414401965


 30%|██▉       | 13027/43738 [1:38:29<2:55:33,  2.92it/s]

step:6280, train_loss:0.0691753894151617, acc:0.6276963230214171


 30%|██▉       | 13028/43738 [1:38:29<3:32:32,  2.41it/s]

step:6280, train_loss:0.06917299758686339, acc:0.6277249002149217


 30%|██▉       | 13029/43738 [1:38:30<3:19:10,  2.57it/s]

step:6280, train_loss:0.06916839771964846, acc:0.6277534730217208


 30%|██▉       | 13030/43738 [1:38:30<3:24:25,  2.50it/s]

step:6280, train_loss:0.06917104584171886, acc:0.6277052954719877


 30%|██▉       | 13031/43738 [1:38:30<3:00:21,  2.84it/s]

step:6280, train_loss:0.06916578336552981, acc:0.6277338653978973


 30%|██▉       | 13032/43738 [1:38:31<3:01:19,  2.82it/s]

step:6280, train_loss:0.06917600708612834, acc:0.6276856967464702


 30%|██▉       | 13033/43738 [1:38:31<3:20:32,  2.55it/s]

step:6280, train_loss:0.0691709783561121, acc:0.6277142637919129


 30%|██▉       | 13034/43738 [1:38:32<3:21:52,  2.53it/s]

step:6280, train_loss:0.06916567148741036, acc:0.6277428264538898


 30%|██▉       | 13035/43738 [1:38:32<3:21:14,  2.54it/s]

step:6280, train_loss:0.06917060556008628, acc:0.6276946682009973


 30%|██▉       | 13036/43738 [1:38:32<3:02:07,  2.81it/s]

step:6280, train_loss:0.0691865595109956, acc:0.6276465173366064


 30%|██▉       | 13037/43738 [1:38:33<3:14:17,  2.63it/s]

step:6280, train_loss:0.06918171783899021, acc:0.6276750786223825


 30%|██▉       | 13038/43738 [1:38:33<3:41:18,  2.31it/s]

step:6280, train_loss:0.06917931968751076, acc:0.6277036355269213


 30%|██▉       | 13039/43738 [1:38:33<3:20:57,  2.55it/s]

step:6280, train_loss:0.06917422982322566, acc:0.6277321880512309


 31%|███       | 13344/43738 [1:40:52<4:03:19,  2.08it/s]

step:6300, train_loss:0.06921972518163917, acc:0.627248201438849


 31%|███       | 13345/43738 [1:40:52<3:44:12,  2.26it/s]

step:6300, train_loss:0.06921839362512659, acc:0.627201198950918


 31%|███       | 13346/43738 [1:40:53<4:10:28,  2.02it/s]

step:6300, train_loss:0.06921385009319313, acc:0.6272291323242919


 31%|███       | 13347/43738 [1:40:53<4:07:54,  2.04it/s]

step:6300, train_loss:0.06920918590828129, acc:0.6272570615119503


 31%|███       | 13348/43738 [1:40:54<4:08:37,  2.04it/s]

step:6300, train_loss:0.06923183414852281, acc:0.6272100689241834


 31%|███       | 13349/43738 [1:40:54<3:51:25,  2.19it/s]

step:6300, train_loss:0.06923756700472047, acc:0.627163083377032


 31%|███       | 13350/43738 [1:40:55<4:37:06,  1.83it/s]

step:6300, train_loss:0.06924666346548725, acc:0.6271161048689139


 31%|███       | 13351/43738 [1:40:56<5:06:38,  1.65it/s]

step:6300, train_loss:0.06924534058711568, acc:0.627144034154745


 31%|███       | 13352/43738 [1:40:57<5:40:26,  1.49it/s]

step:6300, train_loss:0.06924383126405118, acc:0.6271719592570402


 31%|███       | 13353/43738 [1:40:57<5:16:09,  1.60it/s]

step:6300, train_loss:0.0692423368706913, acc:0.6271998801767393


 31%|███       | 13354/43738 [1:40:58<4:51:33,  1.74it/s]

step:6300, train_loss:0.06923736227294336, acc:0.6272277969147821


 31%|███       | 13355/43738 [1:40:58<4:40:57,  1.80it/s]

step:6300, train_loss:0.06923255033403856, acc:0.6272557094721078


 31%|███       | 13356/43738 [1:40:59<4:16:37,  1.97it/s]

step:6300, train_loss:0.06922739477793524, acc:0.6272836178496556


 31%|███       | 13357/43738 [1:40:59<4:01:01,  2.10it/s]

step:6300, train_loss:0.06922789065256661, acc:0.6272366549374859


 31%|███       | 13358/43738 [1:41:00<5:02:54,  1.67it/s]

step:6300, train_loss:0.06922419692142961, acc:0.6272645605629585


 31%|███       | 13359/43738 [1:41:00<4:08:42,  2.04it/s]

step:6300, train_loss:0.06922579887577018, acc:0.6272176061082416


 31%|███       | 13664/43738 [1:43:21<3:22:55,  2.47it/s]

step:6320, train_loss:0.06923752128916205, acc:0.6269759953161592


 31%|███       | 13665/43738 [1:43:22<4:21:05,  1.92it/s]

step:6320, train_loss:0.06923706610704967, acc:0.6269301134284669


 31%|███       | 13666/43738 [1:43:22<3:59:35,  2.09it/s]

step:6320, train_loss:0.06923205980281388, acc:0.6269574125567101


 31%|███       | 13667/43738 [1:43:22<3:23:20,  2.46it/s]

step:6320, train_loss:0.06922706291708319, acc:0.6269847076900563


 31%|███       | 13668/43738 [1:43:23<3:47:56,  2.20it/s]

step:6320, train_loss:0.06922502984339106, acc:0.6270119988293825


 31%|███▏      | 13669/43738 [1:43:23<3:15:15,  2.57it/s]

step:6320, train_loss:0.06921996599176482, acc:0.6270392859755651


 31%|███▏      | 13670/43738 [1:43:23<3:10:27,  2.63it/s]

step:6320, train_loss:0.06921745731338454, acc:0.6270665691294807


 31%|███▏      | 13671/43738 [1:43:24<3:56:53,  2.12it/s]

step:6320, train_loss:0.0692141928321175, acc:0.627093848292005


 31%|███▏      | 13672/43738 [1:43:24<3:39:07,  2.29it/s]

step:6320, train_loss:0.06922050976496583, acc:0.6270479812755998


 31%|███▏      | 13673/43738 [1:43:25<4:31:26,  1.85it/s]

step:6320, train_loss:0.06922620910462914, acc:0.6270021209683317


 31%|███▏      | 13674/43738 [1:43:26<4:10:42,  2.00it/s]

step:6320, train_loss:0.06922943630748425, acc:0.626956267368729


 31%|███▏      | 13675/43738 [1:43:26<4:48:10,  1.74it/s]

step:6320, train_loss:0.06924623073576702, acc:0.62691042047532


 31%|███▏      | 13676/43738 [1:43:27<4:41:53,  1.78it/s]

step:6320, train_loss:0.06924498546908571, acc:0.6269377010821878


 31%|███▏      | 13677/43738 [1:43:28<5:02:23,  1.66it/s]

step:6320, train_loss:0.06925501710216692, acc:0.6268918622504935


 31%|███▏      | 13678/43738 [1:43:28<4:08:57,  2.01it/s]

step:6320, train_loss:0.0692499987765983, acc:0.6269191402251791


 31%|███▏      | 13679/43738 [1:43:29<4:53:56,  1.70it/s]

step:6320, train_loss:0.06925623243947074, acc:0.6268733094524453


 32%|███▏      | 13984/43738 [1:45:49<4:04:14,  2.03it/s]

step:6340, train_loss:0.06924086047169928, acc:0.6267877574370709


 32%|███▏      | 13985/43738 [1:45:49<3:48:34,  2.17it/s]

step:6340, train_loss:0.06923704440686332, acc:0.6268144440471934


 32%|███▏      | 13986/43738 [1:45:49<3:27:43,  2.39it/s]

step:6340, train_loss:0.0692373881410486, acc:0.6267696267696268


 32%|███▏      | 13987/43738 [1:45:50<3:34:19,  2.31it/s]

step:6340, train_loss:0.06923527248914581, acc:0.6267963108600844


 32%|███▏      | 13988/43738 [1:45:50<3:10:51,  2.60it/s]

step:6340, train_loss:0.0692303975345417, acc:0.6268229911352587


 32%|███▏      | 13989/43738 [1:45:51<3:43:03,  2.22it/s]

step:6340, train_loss:0.06922844594437003, acc:0.6268496675959683


 32%|███▏      | 13990/43738 [1:45:51<4:02:08,  2.05it/s]

step:6340, train_loss:0.06922553398334698, acc:0.6268763402430307


 32%|███▏      | 13991/43738 [1:45:51<3:37:59,  2.27it/s]

step:6340, train_loss:0.0692208921731758, acc:0.6269030090772639


 32%|███▏      | 13992/43738 [1:45:52<3:07:06,  2.65it/s]

step:6340, train_loss:0.0692159532567402, acc:0.6269296740994854


 32%|███▏      | 13993/43738 [1:45:52<2:45:43,  2.99it/s]

step:6340, train_loss:0.06921111954283289, acc:0.6269563353105124


 32%|███▏      | 13994/43738 [1:45:52<3:02:27,  2.72it/s]

step:6340, train_loss:0.06920652539514124, acc:0.626982992711162


 32%|███▏      | 13995/43738 [1:45:53<2:52:54,  2.87it/s]

step:6340, train_loss:0.06920230887302375, acc:0.6270096463022508


 32%|███▏      | 13996/43738 [1:45:53<3:55:31,  2.10it/s]

step:6340, train_loss:0.06921069084184872, acc:0.6269648470991712


 32%|███▏      | 13997/43738 [1:45:54<4:20:14,  1.90it/s]

step:6340, train_loss:0.06920683658327333, acc:0.626991498178181


 32%|███▏      | 13998/43738 [1:45:55<4:05:58,  2.02it/s]

step:6340, train_loss:0.06920699562939392, acc:0.6269467066723817


 32%|███▏      | 13999/43738 [1:45:55<4:50:41,  1.71it/s]

step:6340, train_loss:0.06922404200550453, acc:0.6269019215658261


 33%|███▎      | 14304/43738 [1:48:16<3:15:02,  2.52it/s]

step:6360, train_loss:0.06932400145029467, acc:0.6264681208053692


 33%|███▎      | 14305/43738 [1:48:17<2:53:05,  2.83it/s]

step:6360, train_loss:0.06931916166825935, acc:0.6264942327857392


 33%|███▎      | 14306/43738 [1:48:17<2:41:37,  3.03it/s]

step:6360, train_loss:0.06931432573399066, acc:0.6265203411156158


 33%|███▎      | 14307/43738 [1:48:17<3:04:36,  2.66it/s]

step:6360, train_loss:0.06931436013762442, acc:0.6264765499405885


 33%|███▎      | 14308/43738 [1:48:18<2:52:04,  2.85it/s]

step:6360, train_loss:0.06930959202409688, acc:0.6265026558568633


 33%|███▎      | 14309/43738 [1:48:18<3:20:18,  2.45it/s]

step:6360, train_loss:0.06930831832759916, acc:0.6264588720385771


 33%|███▎      | 14310/43738 [1:48:19<2:57:52,  2.76it/s]

step:6360, train_loss:0.06930348825572477, acc:0.6264849755415793


 33%|███▎      | 14311/43738 [1:48:19<3:00:49,  2.71it/s]

step:6360, train_loss:0.06930342584768326, acc:0.6265110753965482


 33%|███▎      | 14312/43738 [1:48:19<2:53:01,  2.83it/s]

step:6360, train_loss:0.06930191278200572, acc:0.6265371716042482


 33%|███▎      | 14313/43738 [1:48:20<2:35:08,  3.16it/s]

step:6360, train_loss:0.06929717810108209, acc:0.6265632641654441


 33%|███▎      | 14314/43738 [1:48:20<2:43:58,  2.99it/s]

step:6360, train_loss:0.06929559401973225, acc:0.6265893530808998


 33%|███▎      | 14315/43738 [1:48:20<3:14:39,  2.52it/s]

step:6360, train_loss:0.06929473774207556, acc:0.6265455815578065


 33%|███▎      | 14316/43738 [1:48:21<4:02:10,  2.02it/s]

step:6360, train_loss:0.06929041521755498, acc:0.6265716680637049


 33%|███▎      | 14317/43738 [1:48:21<3:34:34,  2.29it/s]

step:6360, train_loss:0.06928599205572018, acc:0.6265977509254732


 33%|███▎      | 14318/43738 [1:48:22<4:16:48,  1.91it/s]

step:6360, train_loss:0.06928374786443615, acc:0.6266238301438748


 33%|███▎      | 14319/43738 [1:48:23<3:57:57,  2.06it/s]

step:6360, train_loss:0.06929944148599862, acc:0.6265800684405336


 33%|███▎      | 14624/43738 [1:50:49<4:02:18,  2.00it/s]

step:6380, train_loss:0.0694653990926015, acc:0.6258889496717724


 33%|███▎      | 14625/43738 [1:50:49<4:29:48,  1.80it/s]

step:6380, train_loss:0.06946928426698964, acc:0.6258461538461538


 33%|███▎      | 14626/43738 [1:50:50<4:26:36,  1.82it/s]

step:6380, train_loss:0.06946466111627046, acc:0.6258717352659647


 33%|███▎      | 14627/43738 [1:50:50<4:17:41,  1.88it/s]

step:6380, train_loss:0.0694657123541283, acc:0.625828946468859


 33%|███▎      | 14628/43738 [1:50:51<3:47:31,  2.13it/s]

step:6380, train_loss:0.06946534376630377, acc:0.6257861635220126


 33%|███▎      | 14629/43738 [1:50:51<3:54:25,  2.07it/s]

step:6380, train_loss:0.06947150290107312, acc:0.6257433864242259


 33%|███▎      | 14630/43738 [1:50:51<3:31:42,  2.29it/s]

step:6380, train_loss:0.06946677448085885, acc:0.6257689678742311


 33%|███▎      | 14631/43738 [1:50:52<4:16:28,  1.89it/s]

step:6380, train_loss:0.06946405570149465, acc:0.6257945458273528


 33%|███▎      | 14632/43738 [1:50:53<4:34:03,  1.77it/s]

step:6380, train_loss:0.0694620728023177, acc:0.6258201202843083


 33%|███▎      | 14633/43738 [1:50:53<4:32:59,  1.78it/s]

step:6380, train_loss:0.06946234003870502, acc:0.6257773525592838


 33%|███▎      | 14634/43738 [1:50:54<4:36:17,  1.76it/s]

step:6380, train_loss:0.06945786269723744, acc:0.6258029246959136


 33%|███▎      | 14635/43738 [1:50:54<3:47:14,  2.13it/s]

step:6380, train_loss:0.06945406824481926, acc:0.6258284933378886


 33%|███▎      | 14636/43738 [1:50:55<4:02:33,  2.00it/s]

step:6380, train_loss:0.06945650412029081, acc:0.6257857338070512


 33%|███▎      | 14637/43738 [1:50:55<4:10:33,  1.94it/s]

step:6380, train_loss:0.06945555764246163, acc:0.625811300129808


 33%|███▎      | 14638/43738 [1:50:56<3:43:43,  2.17it/s]

step:6380, train_loss:0.06946031047199218, acc:0.6257685476157945


 33%|███▎      | 14639/43738 [1:50:56<3:13:05,  2.51it/s]

step:6380, train_loss:0.06945701597526253, acc:0.6257941116196462


 34%|███▍      | 14944/43738 [1:53:14<2:52:56,  2.77it/s]

step:6400, train_loss:0.0694233843919791, acc:0.6261375802997858


 34%|███▍      | 14945/43738 [1:53:14<3:16:54,  2.44it/s]

step:6400, train_loss:0.06942398242811168, acc:0.6261625961860154


 34%|███▍      | 14946/43738 [1:53:14<2:57:40,  2.70it/s]

step:6400, train_loss:0.06941977169794616, acc:0.6261876087247424


 34%|███▍      | 14947/43738 [1:53:15<3:52:34,  2.06it/s]

step:6400, train_loss:0.06942471677493098, acc:0.626145714859169


 34%|███▍      | 14948/43738 [1:53:15<3:16:42,  2.44it/s]

step:6400, train_loss:0.06942486881315309, acc:0.6261707251806262


 34%|███▍      | 14949/43738 [1:53:16<3:19:22,  2.41it/s]

step:6400, train_loss:0.06942424707909364, acc:0.6261957321559971


 34%|███▍      | 14950/43738 [1:53:16<3:07:35,  2.56it/s]

step:6400, train_loss:0.06942075340107651, acc:0.6262207357859532


 34%|███▍      | 14951/43738 [1:53:17<3:17:37,  2.43it/s]

step:6400, train_loss:0.06941616891058229, acc:0.6262457360711658


 34%|███▍      | 14952/43738 [1:53:17<2:55:08,  2.74it/s]

step:6400, train_loss:0.06941473758115949, acc:0.6262707330123061


 34%|███▍      | 14953/43738 [1:53:17<2:36:31,  3.06it/s]

step:6400, train_loss:0.0694101316298588, acc:0.6262957266100448


 34%|███▍      | 14954/43738 [1:53:17<2:25:32,  3.30it/s]

step:6400, train_loss:0.06943636355362577, acc:0.6262538451250501


 34%|███▍      | 14955/43738 [1:53:18<2:48:04,  2.85it/s]

step:6400, train_loss:0.06944693913990778, acc:0.6262119692410565


 34%|███▍      | 14956/43738 [1:53:18<3:11:05,  2.51it/s]

step:6400, train_loss:0.06944995415195698, acc:0.6261700989569403


 34%|███▍      | 14957/43738 [1:53:19<3:30:11,  2.28it/s]

step:6400, train_loss:0.06944813056691065, acc:0.6261950925987831


 34%|███▍      | 14958/43738 [1:53:19<3:55:14,  2.04it/s]

step:6400, train_loss:0.06944907101452955, acc:0.6261532290413157


 34%|███▍      | 14959/43738 [1:53:20<3:46:21,  2.12it/s]

step:6400, train_loss:0.0694453066343218, acc:0.6261782204692827


 35%|███▍      | 15264/43738 [1:55:34<4:11:25,  1.89it/s]

step:6420, train_loss:0.06922820267931575, acc:0.6264412997903563


 35%|███▍      | 15265/43738 [1:55:34<3:31:17,  2.25it/s]

step:6420, train_loss:0.06922581311969786, acc:0.6264657713724205


 35%|███▍      | 15266/43738 [1:55:35<4:12:15,  1.88it/s]

step:6420, train_loss:0.06923101311086353, acc:0.6264247347045723


 35%|███▍      | 15267/43738 [1:55:35<3:53:41,  2.03it/s]

step:6420, train_loss:0.06922679877352848, acc:0.6264492041658479


 35%|███▍      | 15268/43738 [1:55:36<3:56:45,  2.00it/s]

step:6420, train_loss:0.06922226689217392, acc:0.6264736704217972


 35%|███▍      | 15269/43738 [1:55:36<3:50:28,  2.06it/s]

step:6420, train_loss:0.0692183742727414, acc:0.62649813347305


 35%|███▍      | 15270/43738 [1:55:36<3:43:26,  2.12it/s]

step:6420, train_loss:0.06921585410339795, acc:0.6265225933202357


 35%|███▍      | 15271/43738 [1:55:37<3:34:10,  2.22it/s]

step:6420, train_loss:0.06921269968551354, acc:0.626547049963984


 35%|███▍      | 15272/43738 [1:55:37<3:52:17,  2.04it/s]

step:6420, train_loss:0.06921069556261021, acc:0.6265715034049241


 35%|███▍      | 15273/43738 [1:55:38<3:31:52,  2.24it/s]

step:6420, train_loss:0.06920900575146083, acc:0.6265959536436849


 35%|███▍      | 15274/43738 [1:55:38<3:43:39,  2.12it/s]

step:6420, train_loss:0.06921453888240585, acc:0.626554929946314


 35%|███▍      | 15275/43738 [1:55:39<3:36:11,  2.19it/s]

step:6420, train_loss:0.06921007052697271, acc:0.6265793780687398


 35%|███▍      | 15276/43738 [1:55:39<3:07:42,  2.53it/s]

step:6420, train_loss:0.06920668265080909, acc:0.6266038229903116


 35%|███▍      | 15277/43738 [1:55:40<3:59:34,  1.98it/s]

step:6420, train_loss:0.06922189901408575, acc:0.6265628068338025


 35%|███▍      | 15278/43738 [1:55:40<3:35:37,  2.20it/s]

step:6420, train_loss:0.06921765084912508, acc:0.6265872496400052


 35%|███▍      | 15279/43738 [1:55:40<3:26:38,  2.30it/s]

step:6420, train_loss:0.06921475164489588, acc:0.6266116892466784


 36%|███▌      | 15584/43738 [1:58:02<4:09:51,  1.88it/s]

step:6440, train_loss:0.06936076120030463, acc:0.6258341889117043


 36%|███▌      | 15585/43738 [1:58:02<3:40:44,  2.13it/s]

step:6440, train_loss:0.06935854390236518, acc:0.6258581969842798


 36%|███▌      | 15586/43738 [1:58:02<3:20:51,  2.34it/s]

step:6440, train_loss:0.06935925699731939, acc:0.6258180418324137


 36%|███▌      | 15587/43738 [1:58:03<3:24:25,  2.30it/s]

step:6440, train_loss:0.0693581936818851, acc:0.6258420478603964


 36%|███▌      | 15588/43738 [1:58:03<3:37:57,  2.15it/s]

step:6440, train_loss:0.06935974822814092, acc:0.6258018988965871


 36%|███▌      | 15589/43738 [1:58:04<3:11:24,  2.45it/s]

step:6440, train_loss:0.0693613829627483, acc:0.6257617550837129


 36%|███▌      | 15590/43738 [1:58:04<3:54:05,  2.00it/s]

step:6440, train_loss:0.06936327298472751, acc:0.6257216164207825


 36%|███▌      | 15591/43738 [1:58:05<3:58:37,  1.97it/s]

step:6440, train_loss:0.06935916469850346, acc:0.6257456224745045


 36%|███▌      | 15592/43738 [1:58:05<3:22:14,  2.32it/s]

step:6440, train_loss:0.06935472249553455, acc:0.6257696254489482


 36%|███▌      | 15593/43738 [1:58:05<3:16:35,  2.39it/s]

step:6440, train_loss:0.06935030991257493, acc:0.625793625344706


 36%|███▌      | 15594/43738 [1:58:06<3:27:06,  2.26it/s]

step:6440, train_loss:0.06935586793531633, acc:0.6257534949339489


 36%|███▌      | 15595/43738 [1:58:07<3:52:07,  2.02it/s]

step:6440, train_loss:0.06935828962386742, acc:0.625713369669766


 36%|███▌      | 15596/43738 [1:58:07<3:16:04,  2.39it/s]

step:6440, train_loss:0.06935618130492303, acc:0.62573736855604


 36%|███▌      | 15597/43738 [1:58:07<3:00:19,  2.60it/s]

step:6440, train_loss:0.06935191336993858, acc:0.625761364364942


 36%|███▌      | 15598/43738 [1:58:08<3:58:38,  1.97it/s]

step:6440, train_loss:0.06935512307306438, acc:0.6257212463136299


 36%|███▌      | 15599/43738 [1:58:08<3:47:30,  2.06it/s]

step:6440, train_loss:0.06935337754778276, acc:0.6257452400794923


 36%|███▋      | 15904/43738 [2:00:25<4:14:00,  1.83it/s]

step:6460, train_loss:0.06934228080755794, acc:0.6258174044265593


 36%|███▋      | 15905/43738 [2:00:26<4:27:17,  1.74it/s]

step:6460, train_loss:0.06934467235942983, acc:0.6257780572147124


 36%|███▋      | 15906/43738 [2:00:26<3:51:31,  2.00it/s]

step:6460, train_loss:0.06934902946122395, acc:0.6257387149503332


 36%|███▋      | 15907/43738 [2:00:27<4:04:02,  1.90it/s]

step:6460, train_loss:0.06934867372132911, acc:0.6257622430376564


 36%|███▋      | 15908/43738 [2:00:27<4:10:38,  1.85it/s]

step:6460, train_loss:0.0693458604698528, acc:0.62578576816696


 36%|███▋      | 15909/43738 [2:00:28<3:45:42,  2.05it/s]

step:6460, train_loss:0.06934407847594433, acc:0.625809290338802


 36%|███▋      | 15910/43738 [2:00:28<3:14:58,  2.38it/s]

step:6460, train_loss:0.0693446655045843, acc:0.6257699560025142


 36%|███▋      | 15911/43738 [2:00:28<2:56:10,  2.63it/s]

step:6460, train_loss:0.0693403184190825, acc:0.625793476211426


 36%|███▋      | 15912/43738 [2:00:29<2:56:34,  2.63it/s]

step:6460, train_loss:0.06934329167366103, acc:0.6257541478129713


 36%|███▋      | 15913/43738 [2:00:29<2:45:13,  2.81it/s]

step:6460, train_loss:0.06935068639290114, acc:0.6257148243574436


 36%|███▋      | 15914/43738 [2:00:29<2:49:42,  2.73it/s]

step:6460, train_loss:0.06935483236826034, acc:0.6256755058439111


 36%|███▋      | 15915/43738 [2:00:30<2:44:41,  2.82it/s]

step:6460, train_loss:0.06935048511949202, acc:0.6256990260760289


 36%|███▋      | 15916/43738 [2:00:30<3:00:26,  2.57it/s]

step:6460, train_loss:0.06935933115481777, acc:0.6256597134958533


 36%|███▋      | 15917/43738 [2:00:30<3:09:09,  2.45it/s]

step:6460, train_loss:0.0693550829468316, acc:0.6256832317647798


 36%|███▋      | 15918/43738 [2:00:31<2:44:47,  2.81it/s]

step:6460, train_loss:0.06935420837108672, acc:0.6256439251162206


 36%|███▋      | 15919/43738 [2:00:31<2:27:34,  3.14it/s]

step:6460, train_loss:0.06935008701321081, acc:0.6256674414221999


 37%|███▋      | 16224/43738 [2:02:56<3:30:53,  2.17it/s]

step:6480, train_loss:0.0693631169181857, acc:0.6262943786982249


 37%|███▋      | 16225/43738 [2:02:57<3:37:48,  2.11it/s]

step:6480, train_loss:0.06936518570358359, acc:0.6262557781201848


 37%|███▋      | 16226/43738 [2:02:57<3:09:40,  2.42it/s]

step:6480, train_loss:0.06936401743494697, acc:0.6262788117835573


 37%|███▋      | 16227/43738 [2:02:58<3:11:16,  2.40it/s]

step:6480, train_loss:0.0693597635786767, acc:0.626301842607999


 37%|███▋      | 16228/43738 [2:02:58<3:27:27,  2.21it/s]

step:6480, train_loss:0.06935554676957176, acc:0.626324870594035


 37%|███▋      | 16229/43738 [2:02:59<4:10:51,  1.83it/s]

step:6480, train_loss:0.06935788797901843, acc:0.6262862776511183


 37%|███▋      | 16230/43738 [2:02:59<4:10:12,  1.83it/s]

step:6480, train_loss:0.0693734137962728, acc:0.6262476894639556


 37%|███▋      | 16231/43738 [2:03:00<3:32:30,  2.16it/s]

step:6480, train_loss:0.0693692781598964, acc:0.6262707165300967


 37%|███▋      | 16232/43738 [2:03:01<4:16:26,  1.79it/s]

step:6480, train_loss:0.06937697849058866, acc:0.6262321340561853


 37%|███▋      | 16233/43738 [2:03:01<3:55:48,  1.94it/s]

step:6480, train_loss:0.06937274839652263, acc:0.6262551592435163


 37%|███▋      | 16234/43738 [2:03:01<3:35:11,  2.13it/s]

step:6480, train_loss:0.06937063464575305, acc:0.626278181594185


 37%|███▋      | 16235/43738 [2:03:02<3:15:13,  2.35it/s]

step:6480, train_loss:0.06937366028822638, acc:0.62623960578996


 37%|███▋      | 16236/43738 [2:03:02<3:48:13,  2.01it/s]

step:6480, train_loss:0.0693774904148079, acc:0.6262010347376201


 37%|███▋      | 16237/43738 [2:03:03<3:46:32,  2.02it/s]

step:6480, train_loss:0.06938097289650869, acc:0.6261624684362875


 37%|███▋      | 16238/43738 [2:03:03<4:11:44,  1.82it/s]

step:6480, train_loss:0.06937982102493402, acc:0.6261854908239931


 37%|███▋      | 16239/43738 [2:03:04<3:42:36,  2.06it/s]

step:6480, train_loss:0.06937573889869356, acc:0.6262085103762547


 38%|███▊      | 16544/43738 [2:05:21<3:35:21,  2.10it/s]

step:6500, train_loss:0.06931573382056005, acc:0.6267529013539652


 38%|███▊      | 16545/43738 [2:05:22<3:02:33,  2.48it/s]

step:6500, train_loss:0.06932925574742745, acc:0.6267150196433968


 38%|███▊      | 16546/43738 [2:05:22<2:40:31,  2.82it/s]

step:6500, train_loss:0.06932506579721198, acc:0.6267375800797775


 38%|███▊      | 16547/43738 [2:05:23<3:38:42,  2.07it/s]

step:6500, train_loss:0.06932728654354019, acc:0.626699703873814


 38%|███▊      | 16548/43738 [2:05:23<3:48:17,  1.98it/s]

step:6500, train_loss:0.06932332553300453, acc:0.6267222625090645


 38%|███▊      | 16549/43738 [2:05:24<3:44:31,  2.02it/s]

step:6500, train_loss:0.06933078046930279, acc:0.6266843918061514


 38%|███▊      | 16550/43738 [2:05:24<3:39:20,  2.07it/s]

step:6500, train_loss:0.0693266570974835, acc:0.6267069486404834


 38%|███▊      | 16551/43738 [2:05:25<3:52:05,  1.95it/s]

step:6500, train_loss:0.06932328716697043, acc:0.6267295027490786


 38%|███▊      | 16552/43738 [2:05:25<3:49:29,  1.97it/s]

step:6500, train_loss:0.069327507062882, acc:0.6266916384726922


 38%|███▊      | 16553/43738 [2:05:25<3:32:52,  2.13it/s]

step:6500, train_loss:0.06932808769843152, acc:0.6267141907811273


 38%|███▊      | 16554/43738 [2:05:26<4:17:19,  1.76it/s]

step:6500, train_loss:0.06932717164229302, acc:0.6266763320043494


 38%|███▊      | 16555/43738 [2:05:27<3:49:23,  1.97it/s]

step:6500, train_loss:0.06932310086311924, acc:0.626698882512836


 38%|███▊      | 16556/43738 [2:05:27<3:13:15,  2.34it/s]

step:6500, train_loss:0.06932034551452508, acc:0.6267214302971732


 38%|███▊      | 16557/43738 [2:05:27<3:41:05,  2.05it/s]

step:6500, train_loss:0.06933998707228593, acc:0.626683577942864


 38%|███▊      | 16558/43738 [2:05:28<4:12:36,  1.79it/s]

step:6500, train_loss:0.06933598935139348, acc:0.6267061239280106


 38%|███▊      | 16559/43738 [2:05:29<4:12:44,  1.79it/s]

step:6500, train_loss:0.06933288287504875, acc:0.6267286671900477


 39%|███▊      | 16864/43738 [2:07:53<3:37:53,  2.06it/s]

step:6520, train_loss:0.06926348526896783, acc:0.6276091081593927


 39%|███▊      | 16865/43738 [2:07:54<3:47:42,  1.97it/s]

step:6520, train_loss:0.0692768219859961, acc:0.6275718944559739


 39%|███▊      | 16866/43738 [2:07:54<4:01:53,  1.85it/s]

step:6520, train_loss:0.06928946846630328, acc:0.6275346851654215


 39%|███▊      | 16867/43738 [2:07:54<3:30:08,  2.13it/s]

step:6520, train_loss:0.06928540642693087, acc:0.6275567676528132


 39%|███▊      | 16868/43738 [2:07:55<3:14:33,  2.30it/s]

step:6520, train_loss:0.0692958245295062, acc:0.627519563670856


 39%|███▊      | 16869/43738 [2:07:55<2:55:00,  2.56it/s]

step:6520, train_loss:0.06929370915507674, acc:0.6275416444365404


 39%|███▊      | 16870/43738 [2:07:56<3:43:42,  2.00it/s]

step:6520, train_loss:0.06929763476918098, acc:0.6275044457617072


 39%|███▊      | 16871/43738 [2:07:56<3:20:34,  2.23it/s]

step:6520, train_loss:0.06929393375516502, acc:0.6275265248058799


 39%|███▊      | 16872/43738 [2:07:57<3:20:58,  2.23it/s]

step:6520, train_loss:0.06929044187605103, acc:0.6275486012328118


 39%|███▊      | 16873/43738 [2:07:57<4:02:21,  1.85it/s]

step:6520, train_loss:0.06928837129303486, acc:0.627570675042968


 39%|███▊      | 16874/43738 [2:07:58<3:22:20,  2.21it/s]

step:6520, train_loss:0.06928490316620103, acc:0.627592746236814


 39%|███▊      | 16875/43738 [2:07:58<3:10:47,  2.35it/s]

step:6520, train_loss:0.06928143621996666, acc:0.6276148148148148


 39%|███▊      | 16876/43738 [2:07:58<3:15:28,  2.29it/s]

step:6520, train_loss:0.06927843037825018, acc:0.6276368807774354


 39%|███▊      | 16877/43738 [2:07:59<3:14:56,  2.30it/s]

step:6520, train_loss:0.06928540315685935, acc:0.6275996918883688


 39%|███▊      | 16878/43738 [2:07:59<2:59:59,  2.49it/s]

step:6520, train_loss:0.0692816796965859, acc:0.6276217561322431


 39%|███▊      | 16879/43738 [2:07:59<2:39:45,  2.80it/s]

step:6520, train_loss:0.06927769494932354, acc:0.6276438177617157


 39%|███▉      | 17184/43738 [2:10:19<3:12:02,  2.30it/s]

step:6540, train_loss:0.06925701387300899, acc:0.6278514897579144


 39%|███▉      | 17185/43738 [2:10:20<3:58:32,  1.86it/s]

step:6540, train_loss:0.06925830121885686, acc:0.6278149549025313


 39%|███▉      | 17186/43738 [2:10:20<4:26:42,  1.66it/s]

step:6540, train_loss:0.06925468423083492, acc:0.6278366111951589


 39%|███▉      | 17187/43738 [2:10:21<3:41:07,  2.00it/s]

step:6540, train_loss:0.06925143756702577, acc:0.6278582649677081


 39%|███▉      | 17188/43738 [2:10:21<4:22:43,  1.68it/s]

step:6540, train_loss:0.06925665759923161, acc:0.62782173609495


 39%|███▉      | 17189/43738 [2:10:22<3:53:35,  1.89it/s]

step:6540, train_loss:0.06925279526088789, acc:0.6278433882133923


 39%|███▉      | 17190/43738 [2:10:22<3:27:26,  2.13it/s]

step:6540, train_loss:0.06926042660026878, acc:0.6278068644560791


 39%|███▉      | 17191/43738 [2:10:22<3:10:29,  2.32it/s]

step:6540, train_loss:0.06925640850093995, acc:0.627828514920598


 39%|███▉      | 17192/43738 [2:10:23<3:05:15,  2.39it/s]

step:6540, train_loss:0.06925239025279033, acc:0.6278501628664495


 39%|███▉      | 17193/43738 [2:10:23<3:22:14,  2.19it/s]

step:6540, train_loss:0.06924914814584848, acc:0.6278718082940732


 39%|███▉      | 17194/43738 [2:10:24<3:25:23,  2.15it/s]

step:6540, train_loss:0.06925062422081267, acc:0.6278352913807143


 39%|███▉      | 17195/43738 [2:10:24<3:14:10,  2.28it/s]

step:6540, train_loss:0.06924868915340053, acc:0.6278569351555685


 39%|███▉      | 17196/43738 [2:10:25<3:03:22,  2.41it/s]

step:6540, train_loss:0.06924469312066468, acc:0.6278785764131193


 39%|███▉      | 17197/43738 [2:10:25<3:25:43,  2.15it/s]

step:6540, train_loss:0.06924563190361707, acc:0.6278420654765366


 39%|███▉      | 17198/43738 [2:10:25<3:03:39,  2.41it/s]

step:6540, train_loss:0.06924198634482526, acc:0.6278637050819863


 39%|███▉      | 17199/43738 [2:10:26<2:59:41,  2.46it/s]

step:6540, train_loss:0.06923918559425753, acc:0.6278853421710564


 40%|████      | 17504/43738 [2:12:47<4:09:45,  1.75it/s]

step:6560, train_loss:0.06936179172649476, acc:0.6269995429616088


 40%|████      | 17505/43738 [2:12:47<3:27:02,  2.11it/s]

step:6560, train_loss:0.06935796252385931, acc:0.6270208511853756


 40%|████      | 17506/43738 [2:12:48<3:21:16,  2.17it/s]

step:6560, train_loss:0.06936351060421292, acc:0.6269850337027305


 40%|████      | 17507/43738 [2:12:48<3:04:00,  2.38it/s]

step:6560, train_loss:0.06936161764911304, acc:0.6270063403210144


 40%|████      | 17508/43738 [2:12:48<3:13:11,  2.26it/s]

step:6560, train_loss:0.06935795635400323, acc:0.627027644505369


 40%|████      | 17509/43738 [2:12:49<2:47:34,  2.61it/s]

step:6560, train_loss:0.06935411374575352, acc:0.6270489462562111


 40%|████      | 17510/43738 [2:12:49<2:30:56,  2.90it/s]

step:6560, train_loss:0.06935271436690889, acc:0.6270702455739577


 40%|████      | 17511/43738 [2:12:50<3:10:04,  2.30it/s]

step:6560, train_loss:0.06934876896569066, acc:0.6270915424590258


 40%|████      | 17512/43738 [2:12:50<2:45:38,  2.64it/s]

step:6560, train_loss:0.06934482708600584, acc:0.6271128369118318


 40%|████      | 17513/43738 [2:12:50<2:29:00,  2.93it/s]

step:6560, train_loss:0.0693408715330146, acc:0.6271341289327927


 40%|████      | 17514/43738 [2:12:51<3:02:10,  2.40it/s]

step:6560, train_loss:0.06934096493530575, acc:0.627155418522325


 40%|████      | 17515/43738 [2:12:51<3:12:28,  2.27it/s]

step:6560, train_loss:0.06935591911503916, acc:0.6271196117613475


 40%|████      | 17516/43738 [2:12:52<3:15:13,  2.24it/s]

step:6560, train_loss:0.06935323689671447, acc:0.627140899748801


 40%|████      | 17517/43738 [2:12:52<3:49:43,  1.90it/s]

step:6560, train_loss:0.06935234634375527, acc:0.627162185305703


 40%|████      | 17518/43738 [2:12:53<3:22:57,  2.15it/s]

step:6560, train_loss:0.06935344154237422, acc:0.6271263842904441


 40%|████      | 17519/43738 [2:12:54<4:06:38,  1.77it/s]

step:6560, train_loss:0.06935030271370911, acc:0.6271476682459044


 41%|████      | 17824/43738 [2:15:16<3:27:49,  2.08it/s]

step:6580, train_loss:0.0693711836867195, acc:0.6266831238779175


 41%|████      | 17825/43738 [2:15:17<3:28:23,  2.07it/s]

step:6580, train_loss:0.06937126220490321, acc:0.6266479663394109


 41%|████      | 17826/43738 [2:15:17<3:26:22,  2.09it/s]

step:6580, train_loss:0.06937223163335583, acc:0.6266689105800516


 41%|████      | 17827/43738 [2:15:18<3:24:36,  2.11it/s]

step:6580, train_loss:0.06937254321728165, acc:0.6266337577831379


 41%|████      | 17828/43738 [2:15:18<3:20:03,  2.16it/s]

step:6580, train_loss:0.0693876687613375, acc:0.6265986089297734


 41%|████      | 17829/43738 [2:15:19<3:34:46,  2.01it/s]

step:6580, train_loss:0.06939283502574484, acc:0.6265634640192944


 41%|████      | 17830/43738 [2:15:19<3:32:37,  2.03it/s]

step:6580, train_loss:0.06938895902534582, acc:0.6265844083006169


 41%|████      | 17831/43738 [2:15:19<2:58:05,  2.42it/s]

step:6580, train_loss:0.06938707597459415, acc:0.6266053502327408


 41%|████      | 17832/43738 [2:15:20<3:04:48,  2.34it/s]

step:6580, train_loss:0.06939163561222357, acc:0.6265702108568865


 41%|████      | 17833/43738 [2:15:21<3:50:34,  1.87it/s]

step:6580, train_loss:0.06938819600522188, acc:0.6265911512364717


 41%|████      | 17834/43738 [2:15:21<3:18:20,  2.18it/s]

step:6580, train_loss:0.06938430899211387, acc:0.626612089267691


 41%|████      | 17835/43738 [2:15:21<3:01:13,  2.38it/s]

step:6580, train_loss:0.06938947050273536, acc:0.6265769554247267


 41%|████      | 17836/43738 [2:15:22<2:46:56,  2.59it/s]

step:6580, train_loss:0.06938607161057707, acc:0.6265978919040144


 41%|████      | 17837/43738 [2:15:22<3:12:46,  2.24it/s]

step:6580, train_loss:0.06938307497349129, acc:0.6266188260357684


 41%|████      | 17838/43738 [2:15:23<3:08:59,  2.28it/s]

step:6580, train_loss:0.06938043185238053, acc:0.6266397578203835


 41%|████      | 17839/43738 [2:15:23<3:01:20,  2.38it/s]

step:6580, train_loss:0.06937759234045873, acc:0.6266606872582544


 41%|████▏     | 18144/43738 [2:17:44<3:28:21,  2.05it/s]

step:6600, train_loss:0.0694849304420141, acc:0.6259369488536155


 41%|████▏     | 18145/43738 [2:17:44<2:54:15,  2.45it/s]

step:6600, train_loss:0.06948239091315983, acc:0.6259575640672361


 41%|████▏     | 18146/43738 [2:17:45<2:46:04,  2.57it/s]

step:6600, train_loss:0.06947859365259959, acc:0.6259781770087072


 41%|████▏     | 18147/43738 [2:17:45<3:25:47,  2.07it/s]

step:6600, train_loss:0.06948594714902141, acc:0.6259436821513198


 41%|████▏     | 18148/43738 [2:17:46<3:22:08,  2.11it/s]

step:6600, train_loss:0.06948849042040979, acc:0.6259091910954375


 41%|████▏     | 18149/43738 [2:17:47<3:53:18,  1.83it/s]

step:6600, train_loss:0.06948471546697449, acc:0.6259298032949474


 41%|████▏     | 18150/43738 [2:17:47<3:34:11,  1.99it/s]

step:6600, train_loss:0.06948157612701619, acc:0.6259504132231405


 41%|████▏     | 18151/43738 [2:17:47<2:59:33,  2.38it/s]

step:6600, train_loss:0.0694832106281362, acc:0.6259159274971076


 42%|████▏     | 18152/43738 [2:17:48<3:15:34,  2.18it/s]

step:6600, train_loss:0.06948024500002678, acc:0.625936535918907


 42%|████▏     | 18153/43738 [2:17:48<2:51:16,  2.49it/s]

step:6600, train_loss:0.06947648500330238, acc:0.6259571420701813


 42%|████▏     | 18154/43738 [2:17:49<3:10:58,  2.23it/s]

step:6600, train_loss:0.06947600865126234, acc:0.6259777459513055


 42%|████▏     | 18155/43738 [2:17:49<3:15:52,  2.18it/s]

step:6600, train_loss:0.06948409793230366, acc:0.6259432663178188


 42%|████▏     | 18156/43738 [2:17:49<2:49:43,  2.51it/s]

step:6600, train_loss:0.0694823975950913, acc:0.6259638686935448


 42%|████▏     | 18157/43738 [2:17:50<2:32:06,  2.80it/s]

step:6600, train_loss:0.06948253116572016, acc:0.6259293936222944


 42%|████▏     | 18158/43738 [2:17:50<2:35:34,  2.74it/s]

step:6600, train_loss:0.06947947243179227, acc:0.6259499944927855


 42%|████▏     | 18159/43738 [2:17:50<2:29:45,  2.85it/s]

step:6600, train_loss:0.06948153096827926, acc:0.6259155239825982


 42%|████▏     | 18464/43738 [2:20:06<3:14:42,  2.16it/s]

step:6620, train_loss:0.069368218360026, acc:0.6264081455805892


 42%|████▏     | 18465/43738 [2:20:06<3:02:36,  2.31it/s]

step:6620, train_loss:0.06936447545276066, acc:0.626428378012456


 42%|████▏     | 18466/43738 [2:20:06<2:38:04,  2.66it/s]

step:6620, train_loss:0.06936273921619729, acc:0.6264486082530055


 42%|████▏     | 18467/43738 [2:20:07<2:29:03,  2.83it/s]

step:6620, train_loss:0.06936803488277499, acc:0.6264146856554936


 42%|████▏     | 18468/43738 [2:20:07<2:30:30,  2.80it/s]

step:6620, train_loss:0.06936656219328534, acc:0.6264349144466104


 42%|████▏     | 18469/43738 [2:20:08<3:04:36,  2.28it/s]

step:6620, train_loss:0.06936564578503246, acc:0.6264551410471602


 42%|████▏     | 18470/43738 [2:20:08<2:57:02,  2.38it/s]

step:6620, train_loss:0.0693622679894385, acc:0.6264753654574986


 42%|████▏     | 18471/43738 [2:20:08<2:42:08,  2.60it/s]

step:6620, train_loss:0.06936699466906296, acc:0.6264414487575117


 42%|████▏     | 18472/43738 [2:20:09<2:42:08,  2.60it/s]

step:6620, train_loss:0.06937705172975764, acc:0.6264075357297532


 42%|████▏     | 18473/43738 [2:20:09<2:23:13,  2.94it/s]

step:6620, train_loss:0.06937483339056794, acc:0.6264277594326856


 42%|████▏     | 18474/43738 [2:20:09<2:42:06,  2.60it/s]

step:6620, train_loss:0.06937305873255781, acc:0.6264479809461947


 42%|████▏     | 18475/43738 [2:20:10<3:24:38,  2.06it/s]

step:6620, train_loss:0.06937357485299699, acc:0.626468200270636


 42%|████▏     | 18476/43738 [2:20:11<3:22:05,  2.08it/s]

step:6620, train_loss:0.06937557620589069, acc:0.6264342931370427


 42%|████▏     | 18477/43738 [2:20:11<2:52:54,  2.44it/s]

step:6620, train_loss:0.06937243302452388, acc:0.6264545110136926


 42%|████▏     | 18478/43738 [2:20:11<2:41:29,  2.61it/s]

step:6620, train_loss:0.06936963402939342, acc:0.626474726702024


 42%|████▏     | 18479/43738 [2:20:12<3:10:51,  2.21it/s]

step:6620, train_loss:0.06936844024766001, acc:0.6264949402023919


 43%|████▎     | 18784/43738 [2:22:29<2:56:10,  2.36it/s]

step:6640, train_loss:0.06936305959562193, acc:0.6268632879045997


 43%|████▎     | 18785/43738 [2:22:29<2:32:13,  2.73it/s]

step:6640, train_loss:0.06936011375414232, acc:0.6268831514506255


 43%|████▎     | 18786/43738 [2:22:30<2:32:20,  2.73it/s]

step:6640, train_loss:0.06936718064812906, acc:0.6268497817523688


 43%|████▎     | 18787/43738 [2:22:30<2:50:56,  2.43it/s]

step:6640, train_loss:0.0693634889191986, acc:0.6268696439026987


 43%|████▎     | 18788/43738 [2:22:30<2:41:34,  2.57it/s]

step:6640, train_loss:0.06935988498204036, acc:0.6268895039386843


 43%|████▎     | 18789/43738 [2:22:31<2:27:50,  2.81it/s]

step:6640, train_loss:0.0693621979905079, acc:0.6269093618606632


 43%|████▎     | 18790/43738 [2:22:31<2:27:41,  2.82it/s]

step:6640, train_loss:0.06935981188888936, acc:0.6269292176689728


 43%|████▎     | 18791/43738 [2:22:32<3:20:30,  2.07it/s]

step:6640, train_loss:0.0693632436671649, acc:0.6268958543983822


 43%|████▎     | 18792/43738 [2:22:32<2:54:03,  2.39it/s]

step:6640, train_loss:0.06936642930381796, acc:0.6268624946785867


 43%|████▎     | 18793/43738 [2:22:33<3:01:59,  2.28it/s]

step:6640, train_loss:0.06936364788883745, acc:0.6268823498110999


 43%|████▎     | 18794/43738 [2:22:33<2:54:32,  2.38it/s]

step:6640, train_loss:0.06936856272326787, acc:0.626848994359902


 43%|████▎     | 18795/43738 [2:22:33<3:05:39,  2.24it/s]

step:6640, train_loss:0.06937818309414157, acc:0.6268156424581005


 43%|████▎     | 18796/43738 [2:22:34<3:04:38,  2.25it/s]

step:6640, train_loss:0.06939904758251197, acc:0.6267822941051288


 43%|████▎     | 18797/43738 [2:22:35<3:25:13,  2.03it/s]

step:6640, train_loss:0.06939991068988549, acc:0.6267489493004202


 43%|████▎     | 18798/43738 [2:22:35<3:20:40,  2.07it/s]

step:6640, train_loss:0.06940184097989512, acc:0.6267156080434089


 43%|████▎     | 18799/43738 [2:22:36<3:40:34,  1.88it/s]

step:6640, train_loss:0.06939823368555988, acc:0.6267354646523752


 44%|████▎     | 19104/43738 [2:24:50<3:06:08,  2.21it/s]

step:6660, train_loss:0.06932736282675735, acc:0.6268320770519263


 44%|████▎     | 19105/43738 [2:24:51<3:38:05,  1.88it/s]

step:6660, train_loss:0.0693404362760505, acc:0.6267992672075373


 44%|████▎     | 19106/43738 [2:24:52<4:09:41,  1.64it/s]

step:6660, train_loss:0.06934495517453211, acc:0.6267664607976552


 44%|████▎     | 19107/43738 [2:24:52<4:03:11,  1.69it/s]

step:6660, train_loss:0.06934173230872509, acc:0.6267859946616423


 44%|████▎     | 19108/43738 [2:24:53<3:42:23,  1.85it/s]

step:6660, train_loss:0.06934664069734871, acc:0.6267531923801549


 44%|████▎     | 19109/43738 [2:24:53<3:24:48,  2.00it/s]

step:6660, train_loss:0.06934600467082626, acc:0.626772724894029


 44%|████▎     | 19110/43738 [2:24:53<2:54:14,  2.36it/s]

step:6660, train_loss:0.06934518520221075, acc:0.6267922553636839


 44%|████▎     | 19111/43738 [2:24:54<3:41:20,  1.85it/s]

step:6660, train_loss:0.06934688012676139, acc:0.6268117837894407


 44%|████▎     | 19112/43738 [2:24:55<3:40:11,  1.86it/s]

step:6660, train_loss:0.06934824932037306, acc:0.6267789870238594


 44%|████▎     | 19113/43738 [2:24:55<3:23:59,  2.01it/s]

step:6660, train_loss:0.06934599578093399, acc:0.6267985141003506


 44%|████▎     | 19114/43738 [2:24:55<3:15:00,  2.10it/s]

step:6660, train_loss:0.06934596125193286, acc:0.6267657214607094


 44%|████▎     | 19115/43738 [2:24:56<3:51:58,  1.77it/s]

step:6660, train_loss:0.06935072007579358, acc:0.626732932252158


 44%|████▎     | 19116/43738 [2:24:57<3:23:43,  2.01it/s]

step:6660, train_loss:0.06934819805968548, acc:0.6267524586733626


 44%|████▎     | 19117/43738 [2:24:57<3:01:01,  2.27it/s]

step:6660, train_loss:0.06934457099935672, acc:0.6267719830517341


 44%|████▎     | 19118/43738 [2:24:58<3:38:35,  1.88it/s]

step:6660, train_loss:0.06934286265162067, acc:0.6267915053875929


 44%|████▎     | 19119/43738 [2:24:58<3:02:14,  2.25it/s]

step:6660, train_loss:0.06934166721300641, acc:0.626758721690465


 44%|████▍     | 19424/43738 [2:27:19<2:30:38,  2.69it/s]

step:6680, train_loss:0.06938932763536214, acc:0.6264415156507414


 44%|████▍     | 19425/43738 [2:27:20<3:06:14,  2.18it/s]

step:6680, train_loss:0.06938651494054061, acc:0.6264607464607465


 44%|████▍     | 19426/43738 [2:27:20<2:43:12,  2.48it/s]

step:6680, train_loss:0.06938662457737624, acc:0.6264799752908473


 44%|████▍     | 19427/43738 [2:27:21<3:02:12,  2.22it/s]

step:6680, train_loss:0.06938661302389304, acc:0.6264992021413497


 44%|████▍     | 19428/43738 [2:27:21<3:10:21,  2.13it/s]

step:6680, train_loss:0.06938434802071794, acc:0.6265184270125592


 44%|████▍     | 19429/43738 [2:27:21<2:52:19,  2.35it/s]

step:6680, train_loss:0.06938685715432547, acc:0.6264861804519017


 44%|████▍     | 19430/43738 [2:27:22<2:47:36,  2.42it/s]

step:6680, train_loss:0.0693869609795579, acc:0.6264539372104992


 44%|████▍     | 19431/43738 [2:27:22<2:49:13,  2.39it/s]

step:6680, train_loss:0.0693863831420916, acc:0.626421697287839


 44%|████▍     | 19432/43738 [2:27:23<2:52:05,  2.35it/s]

step:6680, train_loss:0.06938930229709188, acc:0.6264409221902018


 44%|████▍     | 19433/43738 [2:27:23<2:51:29,  2.36it/s]

step:6680, train_loss:0.0693911393900807, acc:0.6264086862553389


 44%|████▍     | 19434/43738 [2:27:24<3:31:44,  1.91it/s]

step:6680, train_loss:0.06939756669815758, acc:0.6263764536379541


 44%|████▍     | 19435/43738 [2:27:24<3:17:19,  2.05it/s]

step:6680, train_loss:0.06939484870288239, acc:0.6263956779006946


 44%|████▍     | 19436/43738 [2:27:25<3:54:38,  1.73it/s]

step:6680, train_loss:0.06939258842499711, acc:0.6264149001852233


 44%|████▍     | 19437/43738 [2:27:25<3:36:01,  1.87it/s]

step:6680, train_loss:0.06938903759275974, acc:0.6264341204918454


 44%|████▍     | 19438/43738 [2:27:26<3:09:06,  2.14it/s]

step:6680, train_loss:0.06939470745049037, acc:0.6264018931988887


 44%|████▍     | 19439/43738 [2:27:26<3:04:06,  2.20it/s]

step:6680, train_loss:0.06939114457435189, acc:0.6264211121971295


 45%|████▌     | 19744/43738 [2:29:38<3:32:47,  1.88it/s]

step:6700, train_loss:0.06947422736768817, acc:0.6261649108589952


 45%|████▌     | 19745/43738 [2:29:38<2:58:24,  2.24it/s]

step:6700, train_loss:0.06947298081444386, acc:0.626183844011142


 45%|████▌     | 19746/43738 [2:29:38<2:48:28,  2.37it/s]

step:6700, train_loss:0.06946978957009495, acc:0.6262027752456194


 45%|████▌     | 19747/43738 [2:29:39<2:56:21,  2.27it/s]

step:6700, train_loss:0.06946941435342266, acc:0.6261710639590824


 45%|████▌     | 19748/43738 [2:29:39<2:31:24,  2.64it/s]

step:6700, train_loss:0.0694659086360656, acc:0.6261899939234353


 45%|████▌     | 19749/43738 [2:29:40<3:07:51,  2.13it/s]

step:6700, train_loss:0.06946321263714589, acc:0.6262089219707327


 45%|████▌     | 19750/43738 [2:29:40<3:06:14,  2.15it/s]

step:6700, train_loss:0.06946413750861936, acc:0.6262278481012659


 45%|████▌     | 19751/43738 [2:29:41<2:59:50,  2.22it/s]

step:6700, train_loss:0.06946345009034481, acc:0.6262467723153258


 45%|████▌     | 19752/43738 [2:29:41<3:25:50,  1.94it/s]

step:6700, train_loss:0.06946024737632707, acc:0.6262656946132037


 45%|████▌     | 19753/43738 [2:29:42<3:54:33,  1.70it/s]

step:6700, train_loss:0.0694675530126776, acc:0.6262339897737053


 45%|████▌     | 19754/43738 [2:29:43<3:27:23,  1.93it/s]

step:6700, train_loss:0.06947214488646426, acc:0.6262022881441733


 45%|████▌     | 19755/43738 [2:29:43<3:36:49,  1.84it/s]

step:6700, train_loss:0.06947484929883316, acc:0.6261705897241204


 45%|████▌     | 19756/43738 [2:29:44<4:02:14,  1.65it/s]

step:6700, train_loss:0.06947249350398334, acc:0.626189512046973


 45%|████▌     | 19757/43738 [2:29:44<3:57:36,  1.68it/s]

step:6700, train_loss:0.06947104607277207, acc:0.62620843245432


 45%|████▌     | 19758/43738 [2:29:45<3:54:21,  1.71it/s]

step:6700, train_loss:0.069474367388365, acc:0.6261767385362891


 45%|████▌     | 19759/43738 [2:29:45<3:23:51,  1.96it/s]

step:6700, train_loss:0.06947386212736999, acc:0.626145047826307


 46%|████▌     | 20064/43738 [2:32:13<3:34:33,  1.84it/s]

step:6720, train_loss:0.06945384493239712, acc:0.626146331738437


 46%|████▌     | 20065/43738 [2:32:13<3:00:19,  2.19it/s]

step:6720, train_loss:0.06945056325844261, acc:0.6261649638674308


 46%|████▌     | 20066/43738 [2:32:13<2:49:31,  2.33it/s]

step:6720, train_loss:0.06946077513544806, acc:0.6261337585966311


 46%|████▌     | 20067/43738 [2:32:13<2:41:07,  2.45it/s]

step:6720, train_loss:0.06946710408141786, acc:0.6261025564359396


 46%|████▌     | 20068/43738 [2:32:14<3:15:48,  2.01it/s]

step:6720, train_loss:0.06947807096743303, acc:0.6260713573848914


 46%|████▌     | 20069/43738 [2:32:14<2:43:00,  2.42it/s]

step:6720, train_loss:0.06947551702102957, acc:0.6260899895361004


 46%|████▌     | 20070/43738 [2:32:15<2:41:41,  2.44it/s]

step:6720, train_loss:0.06947214816990503, acc:0.626108619830593


 46%|████▌     | 20071/43738 [2:32:15<2:23:29,  2.75it/s]

step:6720, train_loss:0.06947128718410514, acc:0.6260774251407504


 46%|████▌     | 20072/43738 [2:32:16<2:39:27,  2.47it/s]

step:6720, train_loss:0.06947226677497696, acc:0.626046233559187


 46%|████▌     | 20073/43738 [2:32:16<2:38:35,  2.49it/s]

step:6720, train_loss:0.06946947345576669, acc:0.6260648632491407


 46%|████▌     | 20074/43738 [2:32:16<2:31:00,  2.61it/s]

step:6720, train_loss:0.06946702860200954, acc:0.6260834910829929


 46%|████▌     | 20075/43738 [2:32:17<3:17:30,  2.00it/s]

step:6720, train_loss:0.06946698556465397, acc:0.6261021170610211


 46%|████▌     | 20076/43738 [2:32:18<3:35:48,  1.83it/s]

step:6720, train_loss:0.06946888006330891, acc:0.6260709304642359


 46%|████▌     | 20077/43738 [2:32:18<3:06:22,  2.12it/s]

step:6720, train_loss:0.0694722732606865, acc:0.6260397469741495


 46%|████▌     | 20078/43738 [2:32:18<2:58:16,  2.21it/s]

step:6720, train_loss:0.06947586497835198, acc:0.6260085665902978


 46%|████▌     | 20079/43738 [2:32:19<3:37:05,  1.82it/s]

step:6720, train_loss:0.06948499474936817, acc:0.6259773893122167


 47%|████▋     | 20384/43738 [2:34:40<2:49:04,  2.30it/s]

step:6740, train_loss:0.06944990786625847, acc:0.6257358712715856


 47%|████▋     | 20385/43738 [2:34:40<2:52:48,  2.25it/s]

step:6740, train_loss:0.06944781329923042, acc:0.6257542310522443


 47%|████▋     | 20386/43738 [2:34:41<2:38:16,  2.46it/s]

step:6740, train_loss:0.06944706391843217, acc:0.6257725890316884


 47%|████▋     | 20387/43738 [2:34:41<2:53:21,  2.25it/s]

step:6740, train_loss:0.06944817259013825, acc:0.625790945210183


 47%|████▋     | 20388/43738 [2:34:42<2:50:47,  2.28it/s]

step:6740, train_loss:0.06944945456509465, acc:0.6257602511281146


 47%|████▋     | 20389/43738 [2:34:42<2:37:32,  2.47it/s]

step:6740, train_loss:0.0694526744203411, acc:0.6257295600568934


 47%|████▋     | 20390/43738 [2:34:42<2:28:37,  2.62it/s]

step:6740, train_loss:0.0694506404285867, acc:0.6257479156449239


 47%|████▋     | 20391/43738 [2:34:43<2:11:53,  2.95it/s]

step:6740, train_loss:0.06944772673522243, acc:0.6257662694325928


 47%|████▋     | 20392/43738 [2:34:43<2:01:10,  3.21it/s]

step:6740, train_loss:0.06944436516490016, acc:0.6257846214201648


 47%|████▋     | 20393/43738 [2:34:43<1:52:25,  3.46it/s]

step:6740, train_loss:0.06944139915466822, acc:0.6258029716079047


 47%|████▋     | 20394/43738 [2:34:43<1:46:55,  3.64it/s]

step:6740, train_loss:0.06943964779263012, acc:0.6258213199960773


 47%|████▋     | 20395/43738 [2:34:44<2:02:54,  3.17it/s]

step:6740, train_loss:0.06943664165313537, acc:0.6258396665849473


 47%|████▋     | 20396/43738 [2:34:44<2:03:06,  3.16it/s]

step:6740, train_loss:0.06944133346438255, acc:0.6258089821533634


 47%|████▋     | 20397/43738 [2:34:45<2:28:34,  2.62it/s]

step:6740, train_loss:0.06944534839914973, acc:0.6257783007304996


 47%|████▋     | 20398/43738 [2:34:45<2:23:01,  2.72it/s]

step:6740, train_loss:0.06944206991909496, acc:0.6257966467300716


 47%|████▋     | 20399/43738 [2:34:45<2:20:39,  2.77it/s]

step:6740, train_loss:0.06944505661508325, acc:0.6257659689200451


 47%|████▋     | 20704/43738 [2:37:05<2:09:52,  2.96it/s]

step:6760, train_loss:0.06942062056095133, acc:0.625579598145286


 47%|████▋     | 20705/43738 [2:37:06<2:16:08,  2.82it/s]

step:6760, train_loss:0.06941820262751422, acc:0.6255976817193915


 47%|████▋     | 20706/43738 [2:37:06<2:28:39,  2.58it/s]

step:6760, train_loss:0.06941490980501198, acc:0.625615763546798


 47%|████▋     | 20707/43738 [2:37:06<2:31:55,  2.53it/s]

step:6760, train_loss:0.06941159335952785, acc:0.6256338436277588


 47%|████▋     | 20708/43738 [2:37:07<2:35:59,  2.46it/s]

step:6760, train_loss:0.06940840906083814, acc:0.6256519219625266


 47%|████▋     | 20709/43738 [2:37:07<2:15:28,  2.83it/s]

step:6760, train_loss:0.06940510890372366, acc:0.6256699985513545


 47%|████▋     | 20710/43738 [2:37:08<2:35:31,  2.47it/s]

step:6760, train_loss:0.06940574654066188, acc:0.6256397875422501


 47%|████▋     | 20711/43738 [2:37:08<3:19:24,  1.92it/s]

step:6760, train_loss:0.06940297266493149, acc:0.6256578629713678


 47%|████▋     | 20712/43738 [2:37:09<3:12:47,  1.99it/s]

step:6760, train_loss:0.0694019989003405, acc:0.6256759366550791


 47%|████▋     | 20713/43738 [2:37:09<3:09:56,  2.02it/s]

step:6760, train_loss:0.06939866222200565, acc:0.6256940085936369


 47%|████▋     | 20714/43738 [2:37:10<2:40:14,  2.39it/s]

step:6760, train_loss:0.06939618875594054, acc:0.6257120787872936


 47%|████▋     | 20715/43738 [2:37:10<2:20:04,  2.74it/s]

step:6760, train_loss:0.06939286623910645, acc:0.6257301472363022


 47%|████▋     | 20716/43738 [2:37:10<2:27:20,  2.60it/s]

step:6760, train_loss:0.06938956050222042, acc:0.6257482139409153


 47%|████▋     | 20717/43738 [2:37:11<2:28:36,  2.58it/s]

step:6760, train_loss:0.06938743888487403, acc:0.6257662789013854


 47%|████▋     | 20718/43738 [2:37:11<2:20:57,  2.72it/s]

step:6760, train_loss:0.0693858042205868, acc:0.6257843421179651


 47%|████▋     | 20719/43738 [2:37:11<2:15:29,  2.83it/s]

step:6760, train_loss:0.06938473187170881, acc:0.6258024035909069


 48%|████▊     | 21024/43738 [2:39:31<3:57:38,  1.59it/s]

step:6780, train_loss:0.0693508410854416, acc:0.625951293759513


 48%|████▊     | 21025/43738 [2:39:31<3:31:00,  1.79it/s]

step:6780, train_loss:0.06935087113606042, acc:0.6259690844233056


 48%|████▊     | 21026/43738 [2:39:31<3:05:33,  2.04it/s]

step:6780, train_loss:0.06935028294314023, acc:0.6259868733948445


 48%|████▊     | 21027/43738 [2:39:32<3:13:21,  1.96it/s]

step:6780, train_loss:0.06935289239484109, acc:0.6259571027726256


 48%|████▊     | 21028/43738 [2:39:32<2:59:15,  2.11it/s]

step:6780, train_loss:0.06935553500833304, acc:0.6259273349819289


 48%|████▊     | 21029/43738 [2:39:33<3:27:13,  1.83it/s]

step:6780, train_loss:0.06935743094837883, acc:0.6258975700223501


 48%|████▊     | 21030/43738 [2:39:34<3:33:26,  1.77it/s]

step:6780, train_loss:0.06935898180150306, acc:0.6258678078934855


 48%|████▊     | 21031/43738 [2:39:34<3:13:06,  1.96it/s]

step:6780, train_loss:0.06935630143681638, acc:0.6258855974513813


 48%|████▊     | 21032/43738 [2:39:34<2:41:46,  2.34it/s]

step:6780, train_loss:0.06935328667299412, acc:0.6259033853176112


 48%|████▊     | 21033/43738 [2:39:35<2:38:41,  2.38it/s]

step:6780, train_loss:0.06935692975767958, acc:0.6258736271573242


 48%|████▊     | 21034/43738 [2:39:35<2:32:57,  2.47it/s]

step:6780, train_loss:0.0693568514562597, acc:0.6258914139013027


 48%|████▊     | 21035/43738 [2:39:36<3:16:25,  1.93it/s]

step:6780, train_loss:0.06935455208310937, acc:0.6259091989541241


 48%|████▊     | 21036/43738 [2:39:37<3:26:47,  1.83it/s]

step:6780, train_loss:0.06935872037112867, acc:0.6258794447613615


 48%|████▊     | 21037/43738 [2:39:37<3:13:26,  1.96it/s]

step:6780, train_loss:0.0693554467545741, acc:0.625897228692304


 48%|████▊     | 21038/43738 [2:39:38<3:46:55,  1.67it/s]

step:6780, train_loss:0.06935223850847261, acc:0.6259150109325982


 48%|████▊     | 21039/43738 [2:39:38<3:41:11,  1.71it/s]

step:6780, train_loss:0.06935395682721562, acc:0.6258852607063073


 49%|████▉     | 21344/43738 [2:41:57<2:51:35,  2.18it/s]

step:6800, train_loss:0.06938136047206356, acc:0.6258433283358321


 49%|████▉     | 21345/43738 [2:41:57<2:44:29,  2.27it/s]

step:6800, train_loss:0.06938643583040552, acc:0.6258140079643945


 49%|████▉     | 21346/43738 [2:41:58<2:48:33,  2.21it/s]

step:6800, train_loss:0.06938319506666821, acc:0.6258315375245947


 49%|████▉     | 21347/43738 [2:41:58<2:42:31,  2.30it/s]

step:6800, train_loss:0.06937994674378198, acc:0.6258490654424509


 49%|████▉     | 21348/43738 [2:41:58<2:20:30,  2.66it/s]

step:6800, train_loss:0.06937787536412555, acc:0.6258665917181937


 49%|████▉     | 21349/43738 [2:41:59<2:31:59,  2.46it/s]

step:6800, train_loss:0.06937480073269042, acc:0.625884116352054


 49%|████▉     | 21350/43738 [2:41:59<2:30:02,  2.49it/s]

step:6800, train_loss:0.06937333781445841, acc:0.6259016393442624


 49%|████▉     | 21351/43738 [2:41:59<2:14:31,  2.77it/s]

step:6800, train_loss:0.06937534596706849, acc:0.6258723244812889


 49%|████▉     | 21352/43738 [2:42:00<2:06:41,  2.94it/s]

step:6800, train_loss:0.06937217756882734, acc:0.6258898463844136


 49%|████▉     | 21353/43738 [2:42:01<2:59:39,  2.08it/s]

step:6800, train_loss:0.06937918171611311, acc:0.6258605348194634


 49%|████▉     | 21354/43738 [2:42:01<2:34:50,  2.41it/s]

step:6800, train_loss:0.06937761152975461, acc:0.625878055633605


 49%|████▉     | 21355/43738 [2:42:01<2:28:42,  2.51it/s]

step:6800, train_loss:0.06937701945460287, acc:0.6258955748068368


 49%|████▉     | 21356/43738 [2:42:01<2:13:51,  2.79it/s]

step:6800, train_loss:0.06937884637634396, acc:0.6258662670912156


 49%|████▉     | 21357/43738 [2:42:02<2:09:25,  2.88it/s]

step:6800, train_loss:0.06937574371375495, acc:0.6258837851758206


 49%|████▉     | 21358/43738 [2:42:02<2:06:00,  2.96it/s]

step:6800, train_loss:0.06937273999265227, acc:0.6259013016200019


 49%|████▉     | 21359/43738 [2:42:02<2:04:16,  3.00it/s]

step:6800, train_loss:0.06937381453578288, acc:0.6259188164239898


 50%|████▉     | 21664/43738 [2:44:25<3:15:52,  1.88it/s]

step:6820, train_loss:0.06944338872588081, acc:0.6256000738552437


 50%|████▉     | 21665/43738 [2:44:25<2:42:36,  2.26it/s]

step:6820, train_loss:0.06944136672501781, acc:0.6256173551811678


 50%|████▉     | 21666/43738 [2:44:25<2:23:31,  2.56it/s]

step:6820, train_loss:0.0694381658892682, acc:0.6256346349118435


 50%|████▉     | 21667/43738 [2:44:26<2:29:05,  2.47it/s]

step:6820, train_loss:0.0694367374682125, acc:0.6256519130474916


 50%|████▉     | 21668/43738 [2:44:26<2:26:11,  2.52it/s]

step:6820, train_loss:0.0694390321747784, acc:0.6256230385822411


 50%|████▉     | 21669/43738 [2:44:26<2:09:38,  2.84it/s]

step:6820, train_loss:0.069435851560076, acc:0.6256403156583137


 50%|████▉     | 21670/43738 [2:44:27<2:12:50,  2.77it/s]

step:6820, train_loss:0.06943862170923983, acc:0.6256114443931703


 50%|████▉     | 21671/43738 [2:44:27<2:17:14,  2.68it/s]

step:6820, train_loss:0.06944855045586408, acc:0.6255825757925338


 50%|████▉     | 21672/43738 [2:44:28<2:44:24,  2.24it/s]

step:6820, train_loss:0.06944543494475662, acc:0.6255998523440384


 50%|████▉     | 21673/43738 [2:44:28<2:41:38,  2.28it/s]

step:6820, train_loss:0.06944350771834115, acc:0.6256171273012504


 50%|████▉     | 21674/43738 [2:44:29<2:42:58,  2.26it/s]

step:6820, train_loss:0.06944300679696622, acc:0.625588262434253


 50%|████▉     | 21675/43738 [2:44:29<2:40:58,  2.28it/s]

step:6820, train_loss:0.06944004081532568, acc:0.62560553633218


 50%|████▉     | 21676/43738 [2:44:30<3:09:43,  1.94it/s]

step:6820, train_loss:0.06943718131490224, acc:0.6256228086362797


 50%|████▉     | 21677/43738 [2:44:30<2:57:32,  2.07it/s]

step:6820, train_loss:0.06944706722632267, acc:0.6255939475019606


 50%|████▉     | 21678/43738 [2:44:31<3:31:08,  1.74it/s]

step:6820, train_loss:0.06944389436795291, acc:0.6256112187471169


 50%|████▉     | 21679/43738 [2:44:31<3:03:07,  2.01it/s]

step:6820, train_loss:0.06944069121089026, acc:0.6256284883989114


 50%|█████     | 21984/43738 [2:46:52<3:23:32,  1.78it/s]

step:6840, train_loss:0.06942333649049284, acc:0.6255913391557496


 50%|█████     | 21985/43738 [2:46:52<2:49:40,  2.14it/s]

step:6840, train_loss:0.06942126103748077, acc:0.6256083693427337


 50%|█████     | 21986/43738 [2:46:53<3:26:12,  1.76it/s]

step:6840, train_loss:0.0694243307905731, acc:0.6255799144910398


 50%|█████     | 21987/43738 [2:46:53<2:49:52,  2.13it/s]

step:6840, train_loss:0.06942522863079133, acc:0.62555146222768


 50%|█████     | 21988/43738 [2:46:54<3:05:28,  1.95it/s]

step:6840, train_loss:0.06942460466857112, acc:0.6255684919046752


 50%|█████     | 21989/43738 [2:46:54<2:34:52,  2.34it/s]

step:6840, train_loss:0.06942583661365083, acc:0.625540042748647


 50%|█████     | 21990/43738 [2:46:54<2:46:43,  2.17it/s]

step:6840, train_loss:0.06942572510022665, acc:0.6255115961800819


 50%|█████     | 21991/43738 [2:46:55<2:36:50,  2.31it/s]

step:6840, train_loss:0.06942520384719751, acc:0.6254831521986267


 50%|█████     | 21992/43738 [2:46:56<3:15:59,  1.85it/s]

step:6840, train_loss:0.06942731493047749, acc:0.6254547108039287


 50%|█████     | 21993/43738 [2:46:56<3:23:41,  1.78it/s]

step:6840, train_loss:0.06942646424427981, acc:0.6254717410085027


 50%|█████     | 21994/43738 [2:46:57<3:19:52,  1.81it/s]

step:6840, train_loss:0.06942377782714897, acc:0.625488769664454


 50%|█████     | 21995/43738 [2:46:57<3:03:37,  1.97it/s]

step:6840, train_loss:0.0694211318064698, acc:0.6255057967719936


 50%|█████     | 21996/43738 [2:46:58<3:01:01,  2.00it/s]

step:6840, train_loss:0.06941799334031162, acc:0.6255228223313329


 50%|█████     | 21997/43738 [2:46:58<2:35:48,  2.33it/s]

step:6840, train_loss:0.06941819236285823, acc:0.625494385598036


 50%|█████     | 21998/43738 [2:46:58<2:38:08,  2.29it/s]

step:6840, train_loss:0.06941750528216958, acc:0.6255114101281934


 50%|█████     | 21999/43738 [2:46:59<3:07:03,  1.94it/s]

step:6840, train_loss:0.06941950128663481, acc:0.6254829764989318


 51%|█████     | 22304/43738 [2:49:16<2:22:36,  2.51it/s]

step:6860, train_loss:0.06947373832409047, acc:0.6251345050215208


 51%|█████     | 22305/43738 [2:49:16<2:39:58,  2.23it/s]

step:6860, train_loss:0.06947066138985807, acc:0.6251513113651648


 51%|█████     | 22306/43738 [2:49:17<2:27:23,  2.42it/s]

step:6860, train_loss:0.0694679358146515, acc:0.6251681162019188


 51%|█████     | 22307/43738 [2:49:17<2:10:53,  2.73it/s]

step:6860, train_loss:0.06946482635222624, acc:0.6251849195319855


 51%|█████     | 22308/43738 [2:49:17<1:56:26,  3.07it/s]

step:6860, train_loss:0.06946222279407278, acc:0.6252017213555675


 51%|█████     | 22309/43738 [2:49:17<1:57:51,  3.03it/s]

step:6860, train_loss:0.0694600574455083, acc:0.6252185216728674


 51%|█████     | 22310/43738 [2:49:18<2:10:10,  2.74it/s]

step:6860, train_loss:0.06946312356510902, acc:0.6251904975347378


 51%|█████     | 22311/43738 [2:49:18<2:13:41,  2.67it/s]

step:6860, train_loss:0.06946063960992109, acc:0.6252072968490879


 51%|█████     | 22312/43738 [2:49:19<2:44:18,  2.17it/s]

step:6860, train_loss:0.06945754973499106, acc:0.6252240946575833


 51%|█████     | 22313/43738 [2:49:20<3:00:18,  1.98it/s]

step:6860, train_loss:0.06946079201993414, acc:0.6251960740375566


 51%|█████     | 22314/43738 [2:49:20<2:41:07,  2.22it/s]

step:6860, train_loss:0.06945779102774473, acc:0.6252128708434167


 51%|█████     | 22315/43738 [2:49:20<2:27:49,  2.42it/s]

step:6860, train_loss:0.06945470333689763, acc:0.6252296661438494


 51%|█████     | 22316/43738 [2:49:21<2:20:52,  2.53it/s]

step:6860, train_loss:0.06945247548025948, acc:0.6252464599390571


 51%|█████     | 22317/43738 [2:49:21<2:31:59,  2.35it/s]

step:6860, train_loss:0.06945311318430221, acc:0.6252184433391584


 51%|█████     | 22318/43738 [2:49:22<2:45:35,  2.16it/s]

step:6860, train_loss:0.06946034716357026, acc:0.6251904292499328


 51%|█████     | 22319/43738 [2:49:22<2:25:07,  2.46it/s]

step:6860, train_loss:0.06945749885658714, acc:0.625207222545813


 52%|█████▏    | 22624/43738 [2:51:37<3:34:05,  1.64it/s]

step:6880, train_loss:0.06939486061150904, acc:0.6259282178217822


 52%|█████▏    | 22625/43738 [2:51:38<2:57:39,  1.98it/s]

step:6880, train_loss:0.06939326626908558, acc:0.6259447513812155


 52%|█████▏    | 22626/43738 [2:51:38<3:05:07,  1.90it/s]

step:6880, train_loss:0.06939032118868438, acc:0.6259612834791832


 52%|█████▏    | 22627/43738 [2:51:39<3:03:42,  1.92it/s]

step:6880, train_loss:0.06938923522227276, acc:0.6259778141158793


 52%|█████▏    | 22628/43738 [2:51:39<2:57:27,  1.98it/s]

step:6880, train_loss:0.06938680743751374, acc:0.6259943432914973


 52%|█████▏    | 22629/43738 [2:51:40<2:36:00,  2.26it/s]

step:6880, train_loss:0.0693837413464135, acc:0.626010871006231


 52%|█████▏    | 22630/43738 [2:51:40<2:20:34,  2.50it/s]

step:6880, train_loss:0.06938215484005425, acc:0.626027397260274


 52%|█████▏    | 22631/43738 [2:51:40<2:08:47,  2.73it/s]

step:6880, train_loss:0.06937934632600713, acc:0.62604392205382


 52%|█████▏    | 22632/43738 [2:51:41<2:49:22,  2.08it/s]

step:6880, train_loss:0.06938080262539431, acc:0.6260162601626016


 52%|█████▏    | 22633/43738 [2:51:41<2:38:02,  2.23it/s]

step:6880, train_loss:0.06937939685190302, acc:0.6260327839879821


 52%|█████▏    | 22634/43738 [2:51:42<2:31:48,  2.32it/s]

step:6880, train_loss:0.06938175153261292, acc:0.626005125033136


 52%|█████▏    | 22635/43738 [2:51:42<2:46:56,  2.11it/s]

step:6880, train_loss:0.06938264728197972, acc:0.6259774685222002


 52%|█████▏    | 22636/43738 [2:51:43<2:48:41,  2.08it/s]

step:6880, train_loss:0.0693855985751573, acc:0.6259498144548507


 52%|█████▏    | 22637/43738 [2:51:43<2:30:00,  2.34it/s]

step:6880, train_loss:0.06938261223375293, acc:0.6259663382957106


 52%|█████▏    | 22638/43738 [2:51:44<2:41:59,  2.17it/s]

step:6880, train_loss:0.06938026904999102, acc:0.6259828606767383


 52%|█████▏    | 22639/43738 [2:51:44<2:33:31,  2.29it/s]

step:6880, train_loss:0.06937955430302187, acc:0.6259993815981272


 52%|█████▏    | 22944/43738 [2:54:01<2:22:32,  2.43it/s]

step:6900, train_loss:0.06938848981324407, acc:0.6256101813110181


 52%|█████▏    | 22945/43738 [2:54:01<2:02:53,  2.82it/s]

step:6900, train_loss:0.06938551027996942, acc:0.6256264981477446


 52%|█████▏    | 22946/43738 [2:54:02<2:05:07,  2.77it/s]

step:6900, train_loss:0.0693881965991923, acc:0.6255992329817833


 52%|█████▏    | 22947/43738 [2:54:02<2:33:08,  2.26it/s]

step:6900, train_loss:0.06939256146402538, acc:0.625571970192182


 52%|█████▏    | 22948/43738 [2:54:03<2:21:31,  2.45it/s]

step:6900, train_loss:0.06939362053544067, acc:0.6255882865609204


 52%|█████▏    | 22949/43738 [2:54:03<2:11:51,  2.63it/s]

step:6900, train_loss:0.06939079564172457, acc:0.6256046015076909


 52%|█████▏    | 22950/43738 [2:54:04<2:13:28,  2.60it/s]

step:6900, train_loss:0.06939439265901773, acc:0.6255773420479303


 52%|█████▏    | 22951/43738 [2:54:04<2:55:19,  1.98it/s]

step:6900, train_loss:0.069400302244344, acc:0.6255500849636182


 52%|█████▏    | 22952/43738 [2:54:05<2:35:11,  2.23it/s]

step:6900, train_loss:0.0693987800740786, acc:0.6255663994423144


 52%|█████▏    | 22953/43738 [2:54:05<2:48:25,  2.06it/s]

step:6900, train_loss:0.06939897821557546, acc:0.6255391452097765


 52%|█████▏    | 22954/43738 [2:54:06<2:48:40,  2.05it/s]

step:6900, train_loss:0.06940185165945413, acc:0.6255118933519213


 52%|█████▏    | 22955/43738 [2:54:06<3:19:53,  1.73it/s]

step:6900, train_loss:0.06939892385656682, acc:0.6255282073622305


 52%|█████▏    | 22956/43738 [2:54:07<3:16:10,  1.77it/s]

step:6900, train_loss:0.06939603366681919, acc:0.625544519951211


 52%|█████▏    | 22957/43738 [2:54:07<3:07:23,  1.85it/s]

step:6900, train_loss:0.06939316584011708, acc:0.6255608311190487


 52%|█████▏    | 22958/43738 [2:54:08<2:48:27,  2.06it/s]

step:6900, train_loss:0.06939076741762315, acc:0.6255771408659291


 52%|█████▏    | 22959/43738 [2:54:09<3:13:08,  1.79it/s]

step:6900, train_loss:0.0693895972324828, acc:0.625593449192038


 53%|█████▎    | 23264/43738 [2:56:27<3:27:52,  1.64it/s]

step:6920, train_loss:0.06940583591880507, acc:0.6255588033012379


 53%|█████▎    | 23265/43738 [2:56:27<3:12:30,  1.77it/s]

step:6920, train_loss:0.06940521549645084, acc:0.625531914893617


 53%|█████▎    | 23266/43738 [2:56:28<3:03:07,  1.86it/s]

step:6920, train_loss:0.06940234217253885, acc:0.6255480099716324


 53%|█████▎    | 23267/43738 [2:56:28<2:32:53,  2.23it/s]

step:6920, train_loss:0.0693997819959824, acc:0.6255641036661366


 53%|█████▎    | 23268/43738 [2:56:28<2:20:24,  2.43it/s]

step:6920, train_loss:0.06939820872860314, acc:0.6255801959773079


 53%|█████▎    | 23269/43738 [2:56:28<2:07:54,  2.67it/s]

step:6920, train_loss:0.06940141898462114, acc:0.6255533112725085


 53%|█████▎    | 23270/43738 [2:56:29<2:25:15,  2.35it/s]

step:6920, train_loss:0.0694048146855753, acc:0.6255264288783842


 53%|█████▎    | 23271/43738 [2:56:29<2:15:58,  2.51it/s]

step:6920, train_loss:0.06940310303049058, acc:0.6255425207339608


 53%|█████▎    | 23272/43738 [2:56:30<2:15:33,  2.52it/s]

step:6920, train_loss:0.06940074296278316, acc:0.6255586112066002


 53%|█████▎    | 23273/43738 [2:56:30<2:21:53,  2.40it/s]

step:6920, train_loss:0.06939803503639821, acc:0.6255747002964809


 53%|█████▎    | 23274/43738 [2:56:30<2:13:06,  2.56it/s]

step:6920, train_loss:0.069395140253557, acc:0.6255907880037811


 53%|█████▎    | 23275/43738 [2:56:31<1:58:46,  2.87it/s]

step:6920, train_loss:0.06939236384485603, acc:0.6256068743286788


 53%|█████▎    | 23276/43738 [2:56:31<2:24:40,  2.36it/s]

step:6920, train_loss:0.06939035115244757, acc:0.6256229592713525


 53%|█████▎    | 23277/43738 [2:56:32<3:04:12,  1.85it/s]

step:6920, train_loss:0.06939342873251322, acc:0.6255960819693259


 53%|█████▎    | 23278/43738 [2:56:33<2:52:24,  1.98it/s]

step:6920, train_loss:0.06939329237730459, acc:0.6256121659936421


 53%|█████▎    | 23279/43738 [2:56:33<2:56:16,  1.93it/s]

step:6920, train_loss:0.06940481802764535, acc:0.62558529146441


 54%|█████▍    | 23584/43738 [2:58:53<2:19:09,  2.41it/s]

step:6940, train_loss:0.0694284185546965, acc:0.6256784260515604


 54%|█████▍    | 23585/43738 [2:58:54<2:30:27,  2.23it/s]

step:6940, train_loss:0.06942803491073445, acc:0.6256942972228111


 54%|█████▍    | 23586/43738 [2:58:54<2:36:59,  2.14it/s]

step:6940, train_loss:0.06942997269880646, acc:0.6256677690155177


 54%|█████▍    | 23587/43738 [2:58:55<2:32:14,  2.21it/s]

step:6940, train_loss:0.06943048036040338, acc:0.6256412430576165


 54%|█████▍    | 23588/43738 [2:58:55<2:23:53,  2.33it/s]

step:6940, train_loss:0.06942882714791358, acc:0.6256571137866712


 54%|█████▍    | 23589/43738 [2:58:56<2:12:36,  2.53it/s]

step:6940, train_loss:0.06942699148030393, acc:0.6256729831701217


 54%|█████▍    | 23590/43738 [2:58:56<2:19:40,  2.40it/s]

step:6940, train_loss:0.06942443953578155, acc:0.625688851208139


 54%|█████▍    | 23591/43738 [2:58:56<2:00:38,  2.78it/s]

step:6940, train_loss:0.06942761003668393, acc:0.625662328854224


 54%|█████▍    | 23592/43738 [2:58:57<2:22:30,  2.36it/s]

step:6940, train_loss:0.06943264221980838, acc:0.6256358087487284


 54%|█████▍    | 23593/43738 [2:58:57<2:04:39,  2.69it/s]

step:6940, train_loss:0.06944074261700833, acc:0.6256092908913661


 54%|█████▍    | 23594/43738 [2:58:58<2:27:30,  2.28it/s]

step:6940, train_loss:0.0694381296431611, acc:0.6256251589387132


 54%|█████▍    | 23595/43738 [2:58:58<2:42:20,  2.07it/s]

step:6940, train_loss:0.06943948280501076, acc:0.6255986437804619


 54%|█████▍    | 23596/43738 [2:58:59<2:39:53,  2.10it/s]

step:6940, train_loss:0.06943743447898523, acc:0.6256145109340566


 54%|█████▍    | 23597/43738 [2:58:59<2:47:42,  2.00it/s]

step:6940, train_loss:0.06944348944061472, acc:0.6255879984743823


 54%|█████▍    | 23598/43738 [2:59:00<2:35:38,  2.16it/s]

step:6940, train_loss:0.06944334385928283, acc:0.6255614882617171


 54%|█████▍    | 23599/43738 [2:59:00<2:25:34,  2.31it/s]

step:6940, train_loss:0.06944054759982823, acc:0.6255773549726683


 55%|█████▍    | 23904/43738 [3:01:19<2:37:09,  2.10it/s]

step:6960, train_loss:0.0694317032193648, acc:0.6255856760374833


 55%|█████▍    | 23905/43738 [3:01:20<3:09:20,  1.75it/s]

step:6960, train_loss:0.06943290476177909, acc:0.6255595063794185


 55%|█████▍    | 23906/43738 [3:01:20<2:52:03,  1.92it/s]

step:6960, train_loss:0.06943121432062026, acc:0.6255751694135363


 55%|█████▍    | 23907/43738 [3:01:20<2:39:19,  2.07it/s]

step:6960, train_loss:0.06942976539793781, acc:0.6255908311373238


 55%|█████▍    | 23908/43738 [3:01:21<3:02:47,  1.81it/s]

step:6960, train_loss:0.06942995343585569, acc:0.6256064915509453


 55%|█████▍    | 23909/43738 [3:01:21<2:35:13,  2.13it/s]

step:6960, train_loss:0.06942865358809916, acc:0.6256221506545653


 55%|█████▍    | 23910/43738 [3:01:22<2:10:41,  2.53it/s]

step:6960, train_loss:0.06942714674225019, acc:0.6256378084483479


 55%|█████▍    | 23911/43738 [3:01:22<1:57:05,  2.82it/s]

step:6960, train_loss:0.06942426086329123, acc:0.6256534649324579


 55%|█████▍    | 23912/43738 [3:01:22<1:47:12,  3.08it/s]

step:6960, train_loss:0.06942451661382983, acc:0.625627300100368


 55%|█████▍    | 23913/43738 [3:01:22<1:38:43,  3.35it/s]

step:6960, train_loss:0.06942255652728517, acc:0.6256429557144649


 55%|█████▍    | 23914/43738 [3:01:23<1:32:23,  3.58it/s]

step:6960, train_loss:0.0694230280764413, acc:0.6256167935100778


 55%|█████▍    | 23915/43738 [3:01:23<1:49:10,  3.03it/s]

step:6960, train_loss:0.06942053634866174, acc:0.6256324482542337


 55%|█████▍    | 23916/43738 [3:01:23<1:38:21,  3.36it/s]

step:6960, train_loss:0.06941773525725782, acc:0.6256481016892457


 55%|█████▍    | 23917/43738 [3:01:24<1:44:22,  3.17it/s]

step:6960, train_loss:0.06941518287135941, acc:0.6256637538152778


 55%|█████▍    | 23918/43738 [3:01:24<1:47:52,  3.06it/s]

step:6960, train_loss:0.06941247930955816, acc:0.6256794046324944


 55%|█████▍    | 23919/43738 [3:01:25<2:12:30,  2.49it/s]

step:6960, train_loss:0.06941427899302836, acc:0.6256532463731761


 55%|█████▌    | 24224/43738 [3:03:49<2:40:03,  2.03it/s]

step:6980, train_loss:0.06950890416719079, acc:0.6249587186261559


 55%|█████▌    | 24225/43738 [3:03:49<2:14:25,  2.42it/s]

step:6980, train_loss:0.06950622257230885, acc:0.6249742002063984


 55%|█████▌    | 24226/43738 [3:03:50<2:18:03,  2.36it/s]

step:6980, train_loss:0.06950565129866466, acc:0.6249896805085445


 55%|█████▌    | 24227/43738 [3:03:50<2:11:32,  2.47it/s]

step:6980, train_loss:0.06950302900933031, acc:0.6250051595327527


 55%|█████▌    | 24228/43738 [3:03:50<2:06:27,  2.57it/s]

step:6980, train_loss:0.06950047675167582, acc:0.6250206372791811


 55%|█████▌    | 24229/43738 [3:03:51<2:14:13,  2.42it/s]

step:6980, train_loss:0.06950014861705929, acc:0.625036113747988


 55%|█████▌    | 24230/43738 [3:03:52<2:32:54,  2.13it/s]

step:6980, train_loss:0.06949731893634939, acc:0.6250515889393314


 55%|█████▌    | 24231/43738 [3:03:52<2:19:16,  2.33it/s]

step:6980, train_loss:0.06949580109947956, acc:0.6250670628533697


 55%|█████▌    | 24232/43738 [3:03:52<2:29:32,  2.17it/s]

step:6980, train_loss:0.06949300543820938, acc:0.6250825354902608


 55%|█████▌    | 24233/43738 [3:03:53<2:11:13,  2.48it/s]

step:6980, train_loss:0.06949205172721293, acc:0.625098006850163


 55%|█████▌    | 24234/43738 [3:03:53<2:16:00,  2.39it/s]

step:6980, train_loss:0.06948958303839133, acc:0.6251134769332343


 55%|█████▌    | 24235/43738 [3:03:54<2:12:20,  2.46it/s]

step:6980, train_loss:0.06948792008049827, acc:0.6251289457396327


 55%|█████▌    | 24236/43738 [3:03:54<2:28:45,  2.18it/s]

step:6980, train_loss:0.06948790314399786, acc:0.6251444132695164


 55%|█████▌    | 24237/43738 [3:03:55<2:25:06,  2.24it/s]

step:6980, train_loss:0.06949091417317373, acc:0.6251186202912902


 55%|█████▌    | 24238/43738 [3:03:55<2:07:30,  2.55it/s]

step:6980, train_loss:0.06948901699535592, acc:0.6251340869708721


 55%|█████▌    | 24239/43738 [3:03:55<2:09:58,  2.50it/s]

step:6980, train_loss:0.0694863407077123, acc:0.6251495523742728


 56%|█████▌    | 24544/43738 [3:06:16<2:38:29,  2.02it/s]

step:7000, train_loss:0.06960155771736203, acc:0.6241036505867015


 56%|█████▌    | 24545/43738 [3:06:16<2:38:41,  2.02it/s]

step:7000, train_loss:0.06960015530805619, acc:0.6241189651660216


 56%|█████▌    | 24546/43738 [3:06:17<2:57:38,  1.80it/s]

step:7000, train_loss:0.06960046351892644, acc:0.6240935386621038


 56%|█████▌    | 24547/43738 [3:06:17<2:49:22,  1.89it/s]

step:7000, train_loss:0.06959770550682336, acc:0.6241088524055893


 56%|█████▌    | 24548/43738 [3:06:18<2:52:06,  1.86it/s]

step:7000, train_loss:0.06960305085105507, acc:0.6240834283852045


 56%|█████▌    | 24549/43738 [3:06:18<3:08:40,  1.70it/s]

step:7000, train_loss:0.06960156096700656, acc:0.6240987412929243


 56%|█████▌    | 24550/43738 [3:06:19<2:57:43,  1.80it/s]

step:7000, train_loss:0.06960125555772981, acc:0.6241140529531568


 56%|█████▌    | 24551/43738 [3:06:20<3:21:13,  1.59it/s]

step:7000, train_loss:0.069600787853189, acc:0.6241293633660543


 56%|█████▌    | 24552/43738 [3:06:20<2:43:30,  1.96it/s]

step:7000, train_loss:0.06959807555610878, acc:0.6241446725317693


 56%|█████▌    | 24553/43738 [3:06:21<2:45:14,  1.94it/s]

step:7000, train_loss:0.06959541734881645, acc:0.6241599804504541


 56%|█████▌    | 24554/43738 [3:06:21<2:34:55,  2.06it/s]

step:7000, train_loss:0.06959594396570995, acc:0.6241752871222611


 56%|█████▌    | 24555/43738 [3:06:21<2:35:54,  2.05it/s]

step:7000, train_loss:0.06960115212591034, acc:0.6241498676440643


 56%|█████▌    | 24556/43738 [3:06:22<2:40:06,  2.00it/s]

step:7000, train_loss:0.06959999421827126, acc:0.624165173481023


 56%|█████▌    | 24557/43738 [3:06:22<2:14:31,  2.38it/s]

step:7000, train_loss:0.06959761597411339, acc:0.6241804780714256


 56%|█████▌    | 24558/43738 [3:06:23<2:11:07,  2.44it/s]

step:7000, train_loss:0.06959508290022227, acc:0.6241957814154248


 56%|█████▌    | 24559/43738 [3:06:23<2:26:57,  2.18it/s]

step:7000, train_loss:0.06959805757284356, acc:0.6241703652428845


 57%|█████▋    | 24864/43738 [3:08:42<3:03:05,  1.72it/s]

step:7020, train_loss:0.06960142213408747, acc:0.6237129987129987


 57%|█████▋    | 24865/43738 [3:08:42<2:42:11,  1.94it/s]

step:7020, train_loss:0.0696009343493118, acc:0.6237281319123266


 57%|█████▋    | 24866/43738 [3:08:43<3:11:59,  1.64it/s]

step:7020, train_loss:0.06959911598598541, acc:0.6237432638944744


 57%|█████▋    | 24867/43738 [3:08:43<3:06:33,  1.69it/s]

step:7020, train_loss:0.06960073727054893, acc:0.623718180721438


 57%|█████▋    | 24868/43738 [3:08:44<2:50:44,  1.84it/s]

step:7020, train_loss:0.06960461477798756, acc:0.623693099565707


 57%|█████▋    | 24869/43738 [3:08:44<3:01:54,  1.73it/s]

step:7020, train_loss:0.06960723079093403, acc:0.6236680204270377


 57%|█████▋    | 24870/43738 [3:08:45<2:29:13,  2.11it/s]

step:7020, train_loss:0.06960443798127991, acc:0.6236831523924407


 57%|█████▋    | 24871/43738 [3:08:45<2:06:41,  2.48it/s]

step:7020, train_loss:0.06960552575300846, acc:0.6236580756704596


 57%|█████▋    | 24872/43738 [3:08:45<1:59:28,  2.63it/s]

step:7020, train_loss:0.06960342065738645, acc:0.6236732068189128


 57%|█████▋    | 24873/43738 [3:08:46<1:58:03,  2.66it/s]

step:7020, train_loss:0.06960156599271826, acc:0.6236883367506936


 57%|█████▋    | 24874/43738 [3:08:46<2:05:02,  2.51it/s]

step:7020, train_loss:0.0695987789927122, acc:0.6237034654659483


 57%|█████▋    | 24875/43738 [3:08:46<1:51:13,  2.83it/s]

step:7020, train_loss:0.06959737950021719, acc:0.6237185929648241


 57%|█████▋    | 24876/43738 [3:08:46<1:43:19,  3.04it/s]

step:7020, train_loss:0.06959681530169154, acc:0.6237337192474675


 57%|█████▋    | 24877/43738 [3:08:47<2:04:35,  2.52it/s]

step:7020, train_loss:0.06959831877404035, acc:0.6237086465409817


 57%|█████▋    | 24878/43738 [3:08:47<2:07:26,  2.47it/s]

step:7020, train_loss:0.06959779323865635, acc:0.623723772007396


 57%|█████▋    | 24879/43738 [3:08:48<2:04:56,  2.52it/s]

step:7020, train_loss:0.06959845374070235, acc:0.6236987017163069


 58%|█████▊    | 25184/43738 [3:11:09<2:39:02,  1.94it/s]

step:7040, train_loss:0.06952166427642105, acc:0.6240867217280813


 58%|█████▊    | 25185/43738 [3:11:09<2:16:10,  2.27it/s]

step:7040, train_loss:0.06951894953458695, acc:0.6241016478062339


 58%|█████▊    | 25186/43738 [3:11:10<2:12:29,  2.33it/s]

step:7040, train_loss:0.06951799965050583, acc:0.6241165726991186


 58%|█████▊    | 25187/43738 [3:11:10<2:07:50,  2.42it/s]

step:7040, train_loss:0.0695153125973066, acc:0.6241314964068766


 58%|█████▊    | 25188/43738 [3:11:11<2:06:28,  2.44it/s]

step:7040, train_loss:0.06951302762027407, acc:0.6241464189296491


 58%|█████▊    | 25189/43738 [3:11:11<1:58:47,  2.60it/s]

step:7040, train_loss:0.06951420191606553, acc:0.6241216403985866


 58%|█████▊    | 25190/43738 [3:11:11<1:45:56,  2.92it/s]

step:7040, train_loss:0.0695128479863715, acc:0.6241365621278285


 58%|█████▊    | 25191/43738 [3:11:12<1:58:11,  2.62it/s]

step:7040, train_loss:0.06951053413921839, acc:0.624151482672383


 58%|█████▊    | 25192/43738 [3:11:12<1:52:05,  2.76it/s]

step:7040, train_loss:0.06951405379208335, acc:0.6241267068910765


 58%|█████▊    | 25193/43738 [3:11:12<2:11:13,  2.36it/s]

step:7040, train_loss:0.06951179851811576, acc:0.6241416266423213


 58%|█████▊    | 25194/43738 [3:11:13<2:15:50,  2.28it/s]

step:7040, train_loss:0.06950992505177263, acc:0.6241565452091767


 58%|█████▊    | 25195/43738 [3:11:14<2:38:06,  1.95it/s]

step:7040, train_loss:0.06952314707234557, acc:0.6241317721770192


 58%|█████▊    | 25196/43738 [3:11:14<2:52:48,  1.79it/s]

step:7040, train_loss:0.0695240098360817, acc:0.6241070011112875


 58%|█████▊    | 25197/43738 [3:11:15<2:36:41,  1.97it/s]

step:7040, train_loss:0.06952125491888553, acc:0.6241219192761043


 58%|█████▊    | 25198/43738 [3:11:15<2:33:37,  2.01it/s]

step:7040, train_loss:0.06951964075852851, acc:0.6241368362568458


 58%|█████▊    | 25199/43738 [3:11:16<2:17:30,  2.25it/s]

step:7040, train_loss:0.06952581932191199, acc:0.6241120679392039


 58%|█████▊    | 25504/43738 [3:13:32<2:14:07,  2.27it/s]

step:7060, train_loss:0.0695835462036352, acc:0.6236276662484316


 58%|█████▊    | 25505/43738 [3:13:32<2:20:49,  2.16it/s]

step:7060, train_loss:0.06958466798838342, acc:0.6236032150558714


 58%|█████▊    | 25506/43738 [3:13:33<2:33:46,  1.98it/s]

step:7060, train_loss:0.06958564323040398, acc:0.6235787657806007


 58%|█████▊    | 25507/43738 [3:13:33<2:22:26,  2.13it/s]

step:7060, train_loss:0.06958535544265557, acc:0.6235543184223938


 58%|█████▊    | 25508/43738 [3:13:34<2:40:46,  1.89it/s]

step:7060, train_loss:0.06958372402245186, acc:0.6235690763681982


 58%|█████▊    | 25509/43738 [3:13:34<2:22:52,  2.13it/s]

step:7060, train_loss:0.06958924567423991, acc:0.6235446313065977


 58%|█████▊    | 25510/43738 [3:13:35<2:23:57,  2.11it/s]

step:7060, train_loss:0.06959366010076341, acc:0.6235201881615053


 58%|█████▊    | 25511/43738 [3:13:36<2:54:09,  1.74it/s]

step:7060, train_loss:0.06959100728667303, acc:0.6235349457096938


 58%|█████▊    | 25512/43738 [3:13:36<2:58:32,  1.70it/s]

step:7060, train_loss:0.06959521197566476, acc:0.6235105048604578


 58%|█████▊    | 25513/43738 [3:13:37<2:42:06,  1.87it/s]

step:7060, train_loss:0.06959253863059184, acc:0.6235252616313252


 58%|█████▊    | 25514/43738 [3:13:37<2:33:47,  1.97it/s]

step:7060, train_loss:0.06959305660175875, acc:0.6235008230775261


 58%|█████▊    | 25515/43738 [3:13:37<2:30:37,  2.02it/s]

step:7060, train_loss:0.06959896626739764, acc:0.6234763864393494


 58%|█████▊    | 25516/43738 [3:13:38<2:58:03,  1.71it/s]

step:7060, train_loss:0.06961068204226145, acc:0.62345195171657


 58%|█████▊    | 25517/43738 [3:13:39<2:41:00,  1.89it/s]

step:7060, train_loss:0.06960845246555457, acc:0.6234667084688639


 58%|█████▊    | 25518/43738 [3:13:39<2:22:06,  2.14it/s]

step:7060, train_loss:0.06960722495646243, acc:0.6234814640645818


 58%|█████▊    | 25519/43738 [3:13:40<2:33:47,  1.97it/s]

step:7060, train_loss:0.06960471004301431, acc:0.6234962185038598


 59%|█████▉    | 25824/43738 [3:15:57<2:15:51,  2.20it/s]

step:7080, train_loss:0.06961763052838342, acc:0.6236059479553904


 59%|█████▉    | 25825/43738 [3:15:57<2:07:06,  2.35it/s]

step:7080, train_loss:0.06961764406775496, acc:0.6235818005808326


 59%|█████▉    | 25826/43738 [3:15:58<1:53:22,  2.63it/s]

step:7080, train_loss:0.06961518448990774, acc:0.6235963757453729


 59%|█████▉    | 25827/43738 [3:15:58<1:49:39,  2.72it/s]

step:7080, train_loss:0.0696129635493763, acc:0.6236109497812367


 59%|█████▉    | 25828/43738 [3:15:58<1:44:34,  2.85it/s]

step:7080, train_loss:0.0696103807258198, acc:0.6236255226885551


 59%|█████▉    | 25829/43738 [3:15:59<1:52:39,  2.65it/s]

step:7080, train_loss:0.06960886995231849, acc:0.623640094467459


 59%|█████▉    | 25830/43738 [3:15:59<2:18:18,  2.16it/s]

step:7080, train_loss:0.06960690072078038, acc:0.6236546651180798


 59%|█████▉    | 25831/43738 [3:16:00<1:59:00,  2.51it/s]

step:7080, train_loss:0.06960423010224301, acc:0.6236692346405481


 59%|█████▉    | 25832/43738 [3:16:00<2:08:33,  2.32it/s]

step:7080, train_loss:0.06960598985265232, acc:0.623645091359554


 59%|█████▉    | 25833/43738 [3:16:01<2:07:13,  2.35it/s]

step:7080, train_loss:0.06961580369932434, acc:0.6236209499477413


 59%|█████▉    | 25834/43738 [3:16:01<2:17:51,  2.16it/s]

step:7080, train_loss:0.0696172165467438, acc:0.6235968104048928


 59%|█████▉    | 25835/43738 [3:16:01<2:08:40,  2.32it/s]

step:7080, train_loss:0.06962472144847999, acc:0.6235726727307915


 59%|█████▉    | 25836/43738 [3:16:02<2:01:40,  2.45it/s]

step:7080, train_loss:0.06962214237679296, acc:0.6235872426072148


 59%|█████▉    | 25837/43738 [3:16:02<2:19:27,  2.14it/s]

step:7080, train_loss:0.06962048508190402, acc:0.6236018113558076


 59%|█████▉    | 25838/43738 [3:16:03<2:15:08,  2.21it/s]

step:7080, train_loss:0.069621538558668, acc:0.6235776762907346


 59%|█████▉    | 25839/43738 [3:16:03<2:01:35,  2.45it/s]

step:7080, train_loss:0.0696190881210637, acc:0.6235922442818994


 60%|█████▉    | 26144/43738 [3:18:18<2:22:06,  2.06it/s]

step:7100, train_loss:0.06963040493416461, acc:0.623546511627907


 60%|█████▉    | 26145/43738 [3:18:18<2:16:51,  2.14it/s]

step:7100, train_loss:0.06962838116703968, acc:0.6235609103078983


 60%|█████▉    | 26146/43738 [3:18:19<2:05:13,  2.34it/s]

step:7100, train_loss:0.06963072631632364, acc:0.6235370611183355


 60%|█████▉    | 26147/43738 [3:18:19<2:07:17,  2.30it/s]

step:7100, train_loss:0.0696309287782688, acc:0.6235132137530118


 60%|█████▉    | 26148/43738 [3:18:20<2:37:40,  1.86it/s]

step:7100, train_loss:0.06964227814697785, acc:0.623489368211718


 60%|█████▉    | 26149/43738 [3:18:20<2:46:04,  1.77it/s]

step:7100, train_loss:0.06963988469067303, acc:0.6235037668744503


 60%|█████▉    | 26150/43738 [3:18:21<2:26:24,  2.00it/s]

step:7100, train_loss:0.06963884619737969, acc:0.6235181644359464


 60%|█████▉    | 26151/43738 [3:18:21<2:15:18,  2.17it/s]

step:7100, train_loss:0.06964042246406958, acc:0.6234943214408627


 60%|█████▉    | 26152/43738 [3:18:22<2:24:09,  2.03it/s]

step:7100, train_loss:0.06964166991402677, acc:0.6234704802691955


 60%|█████▉    | 26153/43738 [3:18:22<2:15:57,  2.16it/s]

step:7100, train_loss:0.06964160680453237, acc:0.6234466409207357


 60%|█████▉    | 26154/43738 [3:18:23<2:12:47,  2.21it/s]

step:7100, train_loss:0.0696390721616132, acc:0.6234610384644796


 60%|█████▉    | 26155/43738 [3:18:23<2:10:05,  2.25it/s]

step:7100, train_loss:0.06964286280620964, acc:0.6234372012999426


 60%|█████▉    | 26156/43738 [3:18:23<2:02:56,  2.38it/s]

step:7100, train_loss:0.06964135722355053, acc:0.6234515981036856


 60%|█████▉    | 26157/43738 [3:18:24<2:29:16,  1.96it/s]

step:7100, train_loss:0.06964168490418648, acc:0.6234277631226822


 60%|█████▉    | 26158/43738 [3:18:25<2:58:44,  1.64it/s]

step:7100, train_loss:0.06964336659612319, acc:0.6234039299640646


 60%|█████▉    | 26159/43738 [3:18:25<2:46:00,  1.76it/s]

step:7100, train_loss:0.06964115383707671, acc:0.6234183263886234


 61%|██████    | 26464/43738 [3:20:43<2:16:57,  2.10it/s]

step:7120, train_loss:0.06967724329954629, acc:0.6229217049576784


 61%|██████    | 26465/43738 [3:20:43<2:10:53,  2.20it/s]

step:7120, train_loss:0.06968011312094034, acc:0.6228981673908937


 61%|██████    | 26466/43738 [3:20:44<2:13:22,  2.16it/s]

step:7120, train_loss:0.06968245317402827, acc:0.6228746316028112


 61%|██████    | 26467/43738 [3:20:44<2:23:40,  2.00it/s]

step:7120, train_loss:0.06968445900743274, acc:0.6228510975932293


 61%|██████    | 26468/43738 [3:20:45<2:28:10,  1.94it/s]

step:7120, train_loss:0.06968848900199753, acc:0.6228275653619465


 61%|██████    | 26469/43738 [3:20:45<2:22:02,  2.03it/s]

step:7120, train_loss:0.06968613794378599, acc:0.6228418149533417


 61%|██████    | 26470/43738 [3:20:46<2:50:21,  1.69it/s]

step:7120, train_loss:0.06968491451197861, acc:0.6228560634680771


 61%|██████    | 26471/43738 [3:20:47<2:57:02,  1.63it/s]

step:7120, train_loss:0.06968403662217772, acc:0.6228703109062748


 61%|██████    | 26472/43738 [3:20:47<2:36:44,  1.84it/s]

step:7120, train_loss:0.06968155221969624, acc:0.6228845572680568


 61%|██████    | 26473/43738 [3:20:47<2:22:08,  2.02it/s]

step:7120, train_loss:0.06967894564573382, acc:0.6228988025535451


 61%|██████    | 26474/43738 [3:20:48<2:16:26,  2.11it/s]

step:7120, train_loss:0.06967632626830489, acc:0.6229130467628616


 61%|██████    | 26475/43738 [3:20:49<2:29:58,  1.92it/s]

step:7120, train_loss:0.06968569655573115, acc:0.6228895184135977


 61%|██████    | 26476/43738 [3:20:49<2:30:11,  1.92it/s]

step:7120, train_loss:0.06968421493819667, acc:0.6229037618975676


 61%|██████    | 26477/43738 [3:20:50<2:57:30,  1.62it/s]

step:7120, train_loss:0.06968238459390277, acc:0.6229180043056237


 61%|██████    | 26478/43738 [3:20:50<2:30:55,  1.91it/s]

step:7120, train_loss:0.06968001242017038, acc:0.6229322456378881


 61%|██████    | 26479/43738 [3:20:51<2:34:13,  1.87it/s]

step:7120, train_loss:0.06969402457283297, acc:0.6229087201178293


 61%|██████    | 26784/43738 [3:23:11<2:30:08,  1.88it/s]

step:7140, train_loss:0.06971088016880272, acc:0.6228345280764636


 61%|██████    | 26785/43738 [3:23:12<2:35:24,  1.82it/s]

step:7140, train_loss:0.06971017902092042, acc:0.6228486092962479


 61%|██████    | 26786/43738 [3:23:12<2:27:17,  1.92it/s]

step:7140, train_loss:0.06970857193022764, acc:0.6228626894646457


 61%|██████    | 26787/43738 [3:23:12<2:11:46,  2.14it/s]

step:7140, train_loss:0.0697191301246703, acc:0.6228394370403554


 61%|██████    | 26788/43738 [3:23:13<2:38:29,  1.78it/s]

step:7140, train_loss:0.06971828708576198, acc:0.6228535164999254


 61%|██████    | 26789/43738 [3:23:14<2:50:52,  1.65it/s]

step:7140, train_loss:0.06971778376980027, acc:0.6228675949083579


 61%|██████▏   | 26790/43738 [3:23:14<2:27:27,  1.92it/s]

step:7140, train_loss:0.06972338815199755, acc:0.6228443449048152


 61%|██████▏   | 26791/43738 [3:23:15<2:43:27,  1.73it/s]

step:7140, train_loss:0.06972708098669085, acc:0.6228210966369303


 61%|██████▏   | 26792/43738 [3:23:16<3:01:16,  1.56it/s]

step:7140, train_loss:0.06972519535286581, acc:0.6228351746790086


 61%|██████▏   | 26793/43738 [3:23:16<2:31:04,  1.87it/s]

step:7140, train_loss:0.06972285269063606, acc:0.6228492516702123


 61%|██████▏   | 26794/43738 [3:23:16<2:23:23,  1.97it/s]

step:7140, train_loss:0.06972233526934231, acc:0.6228633276106591


 61%|██████▏   | 26795/43738 [3:23:17<2:26:26,  1.93it/s]

step:7140, train_loss:0.06972279820261582, acc:0.6228400821048703


 61%|██████▏   | 26796/43738 [3:23:17<2:15:05,  2.09it/s]

step:7140, train_loss:0.06973068503212236, acc:0.6228168383340797


 61%|██████▏   | 26797/43738 [3:23:18<1:58:33,  2.38it/s]

step:7140, train_loss:0.0697283884276694, acc:0.6228309139082733


 61%|██████▏   | 26798/43738 [3:23:18<2:18:36,  2.04it/s]

step:7140, train_loss:0.06972580452865348, acc:0.6228449884319726


 61%|██████▏   | 26799/43738 [3:23:19<2:10:40,  2.16it/s]

step:7140, train_loss:0.06972424773872964, acc:0.6228590619052949


 62%|██████▏   | 27104/43738 [3:25:45<2:31:35,  1.83it/s]

step:7160, train_loss:0.06980576905199949, acc:0.6224173553719008


 62%|██████▏   | 27105/43738 [3:25:45<2:08:51,  2.15it/s]

step:7160, train_loss:0.06980650811091285, acc:0.6223943921785648


 62%|██████▏   | 27106/43738 [3:25:46<2:05:10,  2.21it/s]

step:7160, train_loss:0.06980475088025942, acc:0.622408322880543


 62%|██████▏   | 27107/43738 [3:25:46<2:14:20,  2.06it/s]

step:7160, train_loss:0.06981160580496372, acc:0.6223853617146863


 62%|██████▏   | 27108/43738 [3:25:46<1:56:21,  2.38it/s]

step:7160, train_loss:0.06981149351768923, acc:0.6223624022428803


 62%|██████▏   | 27109/43738 [3:25:47<2:01:01,  2.29it/s]

step:7160, train_loss:0.06981492378927848, acc:0.6223394444649378


 62%|██████▏   | 27110/43738 [3:25:47<1:56:24,  2.38it/s]

step:7160, train_loss:0.06982004464932594, acc:0.6223164883806713


 62%|██████▏   | 27111/43738 [3:25:48<2:10:13,  2.13it/s]

step:7160, train_loss:0.0698175243607219, acc:0.6223304193869647


 62%|██████▏   | 27112/43738 [3:25:48<2:07:06,  2.18it/s]

step:7160, train_loss:0.06981983838500269, acc:0.6223074653290056


 62%|██████▏   | 27113/43738 [3:25:49<2:13:02,  2.08it/s]

step:7160, train_loss:0.06981835580270623, acc:0.6223213956404676


 62%|██████▏   | 27114/43738 [3:25:49<2:25:10,  1.91it/s]

step:7160, train_loss:0.0698185881146647, acc:0.6223353249243933


 62%|██████▏   | 27115/43738 [3:25:50<2:24:12,  1.92it/s]

step:7160, train_loss:0.06982201527120499, acc:0.6223123732251521


 62%|██████▏   | 27116/43738 [3:25:51<2:48:58,  1.64it/s]

step:7160, train_loss:0.06982054108710259, acc:0.6223263018144269


 62%|██████▏   | 27117/43738 [3:25:51<2:22:33,  1.94it/s]

step:7160, train_loss:0.06982199259043967, acc:0.6223033521407235


 62%|██████▏   | 27118/43738 [3:25:52<2:42:21,  1.71it/s]

step:7160, train_loss:0.0698270793518849, acc:0.6222804041595988


 62%|██████▏   | 27119/43738 [3:25:53<2:52:51,  1.60it/s]

step:7160, train_loss:0.06983350043968373, acc:0.6222574578708654


 63%|██████▎   | 27424/43738 [3:28:10<1:46:47,  2.55it/s]

step:7180, train_loss:0.06975120735865559, acc:0.6226662777129521


 63%|██████▎   | 27425/43738 [3:28:11<1:52:53,  2.41it/s]

step:7180, train_loss:0.06975048701680642, acc:0.6226800364630811


 63%|██████▎   | 27426/43738 [3:28:11<2:08:18,  2.12it/s]

step:7180, train_loss:0.06974808826945944, acc:0.6226937942098738


 63%|██████▎   | 27427/43738 [3:28:12<2:04:47,  2.18it/s]

step:7180, train_loss:0.06974555072728171, acc:0.62270755095344


 63%|██████▎   | 27428/43738 [3:28:12<2:15:44,  2.00it/s]

step:7180, train_loss:0.06974472610888742, acc:0.6227213066938895


 63%|██████▎   | 27429/43738 [3:28:13<1:53:35,  2.39it/s]

step:7180, train_loss:0.06974231943236667, acc:0.6227350614313318


 63%|██████▎   | 27430/43738 [3:28:13<1:38:50,  2.75it/s]

step:7180, train_loss:0.06974048118463397, acc:0.6227488151658768


 63%|██████▎   | 27431/43738 [3:28:13<1:47:01,  2.54it/s]

step:7180, train_loss:0.06973821131425072, acc:0.6227625678976341


 63%|██████▎   | 27432/43738 [3:28:14<1:39:34,  2.73it/s]

step:7180, train_loss:0.06973573592232687, acc:0.6227763196267133


 63%|██████▎   | 27433/43738 [3:28:14<1:36:11,  2.82it/s]

step:7180, train_loss:0.06973435095449564, acc:0.6227900703532242


 63%|██████▎   | 27434/43738 [3:28:15<2:09:30,  2.10it/s]

step:7180, train_loss:0.06973302676135898, acc:0.6228038200772764


 63%|██████▎   | 27435/43738 [3:28:15<2:07:55,  2.12it/s]

step:7180, train_loss:0.06973458529615362, acc:0.6227811190085657


 63%|██████▎   | 27436/43738 [3:28:16<2:05:43,  2.16it/s]

step:7180, train_loss:0.06973515708595895, acc:0.6227584195946931


 63%|██████▎   | 27437/43738 [3:28:16<2:06:37,  2.15it/s]

step:7180, train_loss:0.06973353651085876, acc:0.6227721689689106


 63%|██████▎   | 27438/43738 [3:28:16<1:59:36,  2.27it/s]

step:7180, train_loss:0.06973158470616206, acc:0.622785917340914


 63%|██████▎   | 27439/43738 [3:28:17<1:45:19,  2.58it/s]

step:7180, train_loss:0.06973500407140457, acc:0.6227632202339736


 63%|██████▎   | 27744/43738 [3:30:39<2:50:13,  1.57it/s]

step:7200, train_loss:0.06967168312759073, acc:0.6231617647058824


 63%|██████▎   | 27745/43738 [3:30:39<2:25:09,  1.84it/s]

step:7200, train_loss:0.06967071651807408, acc:0.6231753469093531


 63%|██████▎   | 27746/43738 [3:30:40<2:25:12,  1.84it/s]

step:7200, train_loss:0.06967445487568039, acc:0.6231528869026166


 63%|██████▎   | 27747/43738 [3:30:40<2:11:45,  2.02it/s]

step:7200, train_loss:0.06967582516839971, acc:0.6231304285147944


 63%|██████▎   | 27748/43738 [3:30:40<2:00:33,  2.21it/s]

step:7200, train_loss:0.06967661725631599, acc:0.6231079717457114


 63%|██████▎   | 27749/43738 [3:30:41<2:25:42,  1.83it/s]

step:7200, train_loss:0.06968132505953951, acc:0.6230855165951926


 63%|██████▎   | 27750/43738 [3:30:41<2:01:21,  2.20it/s]

step:7200, train_loss:0.06967910854480028, acc:0.6230990990990991


 63%|██████▎   | 27751/43738 [3:30:42<1:44:32,  2.55it/s]

step:7200, train_loss:0.06967661556468206, acc:0.6231126806241216


 63%|██████▎   | 27752/43738 [3:30:42<1:32:18,  2.89it/s]

step:7200, train_loss:0.06967859411397653, acc:0.6230902277313347


 63%|██████▎   | 27753/43738 [3:30:42<1:29:55,  2.96it/s]

step:7200, train_loss:0.06967633366463173, acc:0.6231038085972688


 63%|██████▎   | 27754/43738 [3:30:43<1:36:15,  2.77it/s]

step:7200, train_loss:0.06967886185081192, acc:0.6230813576421417


 63%|██████▎   | 27755/43738 [3:30:43<1:25:41,  3.11it/s]

step:7200, train_loss:0.06967635577708528, acc:0.6230949378490362


 63%|██████▎   | 27756/43738 [3:30:43<1:23:30,  3.19it/s]

step:7200, train_loss:0.06967397293776802, acc:0.6231085170773887


 63%|██████▎   | 27757/43738 [3:30:44<1:37:18,  2.74it/s]

step:7200, train_loss:0.06967787604284179, acc:0.6230860683791476


 63%|██████▎   | 27758/43738 [3:30:44<1:36:39,  2.76it/s]

step:7200, train_loss:0.06967609897651009, acc:0.6230996469486274


 63%|██████▎   | 27759/43738 [3:30:44<1:37:54,  2.72it/s]

step:7200, train_loss:0.0696751328355221, acc:0.6231132245397889


 64%|██████▍   | 28064/43738 [3:33:05<1:33:01,  2.81it/s]

step:7220, train_loss:0.06971442747643058, acc:0.6228263968072976


 64%|██████▍   | 28065/43738 [3:33:06<1:54:07,  2.29it/s]

step:7220, train_loss:0.0697137119212193, acc:0.62283983609478


 64%|██████▍   | 28066/43738 [3:33:06<1:47:28,  2.43it/s]

step:7220, train_loss:0.06971167136949348, acc:0.6228532744245706


 64%|██████▍   | 28067/43738 [3:33:06<1:46:05,  2.46it/s]

step:7220, train_loss:0.06971120634884771, acc:0.622866711796772


 64%|██████▍   | 28068/43738 [3:33:07<1:39:47,  2.62it/s]

step:7220, train_loss:0.06971038268388915, acc:0.6228445204503349


 64%|██████▍   | 28069/43738 [3:33:08<2:10:30,  2.00it/s]

step:7220, train_loss:0.06970839203835937, acc:0.6228579571769568


 64%|██████▍   | 28070/43738 [3:33:08<1:56:51,  2.23it/s]

step:7220, train_loss:0.06970825321071115, acc:0.6228357677235483


 64%|██████▍   | 28071/43738 [3:33:08<1:56:38,  2.24it/s]

step:7220, train_loss:0.06971024568314463, acc:0.6228135798510919


 64%|██████▍   | 28072/43738 [3:33:09<2:06:33,  2.06it/s]

step:7220, train_loss:0.06971009144062797, acc:0.6228270162439441


 64%|██████▍   | 28073/43738 [3:33:10<2:21:32,  1.84it/s]

step:7220, train_loss:0.06971660206860669, acc:0.6228048302639547


 64%|██████▍   | 28074/43738 [3:33:10<2:03:47,  2.11it/s]

step:7220, train_loss:0.06971411886925091, acc:0.622818266011256


 64%|██████▍   | 28075/43738 [3:33:11<2:17:22,  1.90it/s]

step:7220, train_loss:0.06971204615179209, acc:0.6228317008014248


 64%|██████▍   | 28076/43738 [3:33:11<2:26:22,  1.78it/s]

step:7220, train_loss:0.06971434716514356, acc:0.6228095170252173


 64%|██████▍   | 28077/43738 [3:33:12<2:12:38,  1.97it/s]

step:7220, train_loss:0.06972428400443645, acc:0.6227873348292197


 64%|██████▍   | 28078/43738 [3:33:12<2:04:46,  2.09it/s]

step:7220, train_loss:0.06972350105371161, acc:0.6228007692855616


 64%|██████▍   | 28079/43738 [3:33:12<2:01:43,  2.14it/s]

step:7220, train_loss:0.06972182703519192, acc:0.6228142027849994


 65%|██████▍   | 28384/43738 [3:35:32<1:37:09,  2.63it/s]

step:7240, train_loss:0.06965992093708095, acc:0.6231327508455468


 65%|██████▍   | 28385/43738 [3:35:33<1:26:52,  2.95it/s]

step:7240, train_loss:0.06965747976719144, acc:0.6231460278316012


 65%|██████▍   | 28386/43738 [3:35:33<1:40:15,  2.55it/s]

step:7240, train_loss:0.0696591610782191, acc:0.6231240752483619


 65%|██████▍   | 28387/43738 [3:35:34<2:01:14,  2.11it/s]

step:7240, train_loss:0.06966094652511347, acc:0.6231021242117871


 65%|██████▍   | 28388/43738 [3:35:34<1:42:03,  2.51it/s]

step:7240, train_loss:0.06965849360701493, acc:0.6231154008736086


 65%|██████▍   | 28389/43738 [3:35:34<1:45:48,  2.42it/s]

step:7240, train_loss:0.06965635345332752, acc:0.6231286766000916


 65%|██████▍   | 28390/43738 [3:35:35<1:50:43,  2.31it/s]

step:7240, train_loss:0.0696598356301835, acc:0.6231067277210285


 65%|██████▍   | 28391/43738 [3:35:35<1:38:01,  2.61it/s]

step:7240, train_loss:0.06965805823203372, acc:0.6231200028177943


 65%|██████▍   | 28392/43738 [3:35:36<1:41:27,  2.52it/s]

step:7240, train_loss:0.06966188975233015, acc:0.6230980557903635


 65%|██████▍   | 28393/43738 [3:35:36<1:28:35,  2.89it/s]

step:7240, train_loss:0.06965943673019366, acc:0.6231113302574578


 65%|██████▍   | 28394/43738 [3:35:36<1:47:52,  2.37it/s]

step:7240, train_loss:0.06965698514088567, acc:0.623124603789533


 65%|██████▍   | 28395/43738 [3:35:37<2:16:20,  1.88it/s]

step:7240, train_loss:0.06965735798523423, acc:0.6231378763866878


 65%|██████▍   | 28396/43738 [3:35:38<2:37:27,  1.62it/s]

step:7240, train_loss:0.06965524804033636, acc:0.6231511480490209


 65%|██████▍   | 28397/43738 [3:35:38<2:18:38,  1.84it/s]

step:7240, train_loss:0.06965589110776499, acc:0.6231292037891326


 65%|██████▍   | 28398/43738 [3:35:39<1:54:32,  2.23it/s]

step:7240, train_loss:0.06965348164922933, acc:0.6231424748221706


 65%|██████▍   | 28399/43738 [3:35:39<1:53:37,  2.25it/s]

step:7240, train_loss:0.06965432102419004, acc:0.6231205324131132


 66%|██████▌   | 28704/43738 [3:38:07<1:44:19,  2.40it/s]

step:7260, train_loss:0.06965018947142694, acc:0.6232929208472687


 66%|██████▌   | 28705/43738 [3:38:07<1:31:26,  2.74it/s]

step:7260, train_loss:0.06964781338765615, acc:0.6233060442431632


 66%|██████▌   | 28706/43738 [3:38:08<2:01:45,  2.06it/s]

step:7260, train_loss:0.0696498932190017, acc:0.6232843308019229


 66%|██████▌   | 28707/43738 [3:38:08<1:59:12,  2.10it/s]

step:7260, train_loss:0.06964966891143778, acc:0.6232974535827499


 66%|██████▌   | 28708/43738 [3:38:09<1:54:49,  2.18it/s]

step:7260, train_loss:0.06965063196378005, acc:0.6232757419534625


 66%|██████▌   | 28709/43738 [3:38:09<1:47:31,  2.33it/s]

step:7260, train_loss:0.06965016586102826, acc:0.6232540318367062


 66%|██████▌   | 28710/43738 [3:38:09<1:46:30,  2.35it/s]

step:7260, train_loss:0.06964785053172894, acc:0.623267154301637


 66%|██████▌   | 28711/43738 [3:38:10<1:41:28,  2.47it/s]

step:7260, train_loss:0.06964787756872401, acc:0.623245445996308


 66%|██████▌   | 28712/43738 [3:38:10<1:37:59,  2.56it/s]

step:7260, train_loss:0.06964676065738508, acc:0.6232585678461967


 66%|██████▌   | 28713/43738 [3:38:11<2:02:19,  2.05it/s]

step:7260, train_loss:0.06964443420276592, acc:0.6232716887820847


 66%|██████▌   | 28714/43738 [3:38:11<1:59:15,  2.10it/s]

step:7260, train_loss:0.06964499035543961, acc:0.6232499825868915


 66%|██████▌   | 28715/43738 [3:38:12<1:53:34,  2.20it/s]

step:7260, train_loss:0.06964364711257219, acc:0.6232631029078879


 66%|██████▌   | 28716/43738 [3:38:12<1:43:19,  2.42it/s]

step:7260, train_loss:0.06964204735022446, acc:0.6232762223150856


 66%|██████▌   | 28717/43738 [3:38:12<1:32:19,  2.71it/s]

step:7260, train_loss:0.06964041344872669, acc:0.6232893408085803


 66%|██████▌   | 28718/43738 [3:38:12<1:23:27,  3.00it/s]

step:7260, train_loss:0.06963850366575584, acc:0.6233024583884672


 66%|██████▌   | 28719/43738 [3:38:13<1:25:40,  2.92it/s]

step:7260, train_loss:0.06963828133192584, acc:0.6232807549009367


 66%|██████▋   | 29024/43738 [3:40:26<1:52:36,  2.18it/s]

step:7280, train_loss:0.06954278684570495, acc:0.6235529217199559


 66%|██████▋   | 29025/43738 [3:40:27<1:48:47,  2.25it/s]

step:7280, train_loss:0.06954049414846185, acc:0.6235658914728682


 66%|██████▋   | 29026/43738 [3:40:27<1:42:36,  2.39it/s]

step:7280, train_loss:0.06953893341196611, acc:0.6235788603321161


 66%|██████▋   | 29027/43738 [3:40:28<1:43:18,  2.37it/s]

step:7280, train_loss:0.06953663038361413, acc:0.6235918282977917


 66%|██████▋   | 29028/43738 [3:40:28<1:56:11,  2.11it/s]

step:7280, train_loss:0.06953848078937447, acc:0.6235703458729502


 66%|██████▋   | 29029/43738 [3:40:28<1:37:57,  2.50it/s]

step:7280, train_loss:0.06954049174830297, acc:0.6235488649281753


 66%|██████▋   | 29030/43738 [3:40:29<1:34:17,  2.60it/s]

step:7280, train_loss:0.06954012864072698, acc:0.6235273854633138


 66%|██████▋   | 29031/43738 [3:40:29<1:22:47,  2.96it/s]

step:7280, train_loss:0.06953804674738512, acc:0.6235403534153147


 66%|██████▋   | 29032/43738 [3:40:29<1:16:29,  3.20it/s]

step:7280, train_loss:0.06953566086970153, acc:0.6235533204739597


 66%|██████▋   | 29033/43738 [3:40:30<1:22:17,  2.98it/s]

step:7280, train_loss:0.06953359942972343, acc:0.6235662866393414


 66%|██████▋   | 29034/43738 [3:40:30<1:30:09,  2.72it/s]

step:7280, train_loss:0.06953120555573077, acc:0.623579251911552


 66%|██████▋   | 29035/43738 [3:40:31<1:41:53,  2.41it/s]

step:7280, train_loss:0.06952892567949617, acc:0.6235922162906836


 66%|██████▋   | 29036/43738 [3:40:31<1:56:48,  2.10it/s]

step:7280, train_loss:0.0695293685681093, acc:0.6236051797768287


 66%|██████▋   | 29037/43738 [3:40:32<1:45:26,  2.32it/s]

step:7280, train_loss:0.06952991745708555, acc:0.6236181423700795


 66%|██████▋   | 29039/43738 [3:40:32<1:25:13,  2.87it/s]

step:7280, train_loss:0.06952761423149523, acc:0.6236311040705282
step:7280, train_loss:0.0695310034494188, acc:0.6236096284307311


 67%|██████▋   | 29344/43738 [3:42:51<2:09:59,  1.85it/s]

step:7300, train_loss:0.06947996900754008, acc:0.6238072519083969


 67%|██████▋   | 29345/43738 [3:42:52<2:08:22,  1.87it/s]

step:7300, train_loss:0.06948209035208605, acc:0.6237859942068495


 67%|██████▋   | 29346/43738 [3:42:53<2:29:19,  1.61it/s]

step:7300, train_loss:0.06948254144295038, acc:0.6237988141484359


 67%|██████▋   | 29347/43738 [3:42:53<2:04:37,  1.92it/s]

step:7300, train_loss:0.06948181251207107, acc:0.6237775581831192


 67%|██████▋   | 29348/43738 [3:42:54<2:01:07,  1.98it/s]

step:7300, train_loss:0.06947999652139415, acc:0.6237903775385035


 67%|██████▋   | 29349/43738 [3:42:54<1:50:15,  2.18it/s]

step:7300, train_loss:0.06947763115200024, acc:0.6238031960203073


 67%|██████▋   | 29350/43738 [3:42:54<1:46:53,  2.24it/s]

step:7300, train_loss:0.0694752685127806, acc:0.62381601362862


 67%|██████▋   | 29351/43738 [3:42:55<1:54:06,  2.10it/s]

step:7300, train_loss:0.06947324933774063, acc:0.6238288303635311


 67%|██████▋   | 29352/43738 [3:42:55<1:41:41,  2.36it/s]

step:7300, train_loss:0.06947089840600809, acc:0.6238416462251295


 67%|██████▋   | 29353/43738 [3:42:56<1:41:55,  2.35it/s]

step:7300, train_loss:0.06946989704519743, acc:0.6238544612135046


 67%|██████▋   | 29354/43738 [3:42:56<1:57:35,  2.04it/s]

step:7300, train_loss:0.06947489407255833, acc:0.6238332084213395


 67%|██████▋   | 29355/43738 [3:42:56<1:42:18,  2.34it/s]

step:7300, train_loss:0.06947885298889263, acc:0.6238119570771589


 67%|██████▋   | 29356/43738 [3:42:57<1:37:24,  2.46it/s]

step:7300, train_loss:0.06947650933668838, acc:0.6238247717672707


 67%|██████▋   | 29357/43738 [3:42:57<1:48:31,  2.21it/s]

step:7300, train_loss:0.0694764124988774, acc:0.6238035221582586


 67%|██████▋   | 29358/43738 [3:42:58<1:41:06,  2.37it/s]

step:7300, train_loss:0.06947442089020409, acc:0.6238163362626882


 67%|██████▋   | 29359/43738 [3:42:58<1:45:17,  2.28it/s]

step:7300, train_loss:0.06947210283946902, acc:0.6238291494941925


 68%|██████▊   | 29664/43738 [3:45:33<1:58:39,  1.98it/s]

step:7320, train_loss:0.06943884973803337, acc:0.6238875404530745


 68%|██████▊   | 29665/43738 [3:45:33<1:40:08,  2.34it/s]

step:7320, train_loss:0.06943656704674446, acc:0.6239002191134333


 68%|██████▊   | 29666/43738 [3:45:34<1:42:00,  2.30it/s]

step:7320, train_loss:0.06943636806900919, acc:0.6239128969190318


 68%|██████▊   | 29667/43738 [3:45:34<1:26:33,  2.71it/s]

step:7320, train_loss:0.06943414572369098, acc:0.6239255738699565


 68%|██████▊   | 29668/43738 [3:45:34<1:28:28,  2.65it/s]

step:7320, train_loss:0.06943445854227805, acc:0.6239045436160172


 68%|██████▊   | 29669/43738 [3:45:35<1:42:36,  2.29it/s]

step:7320, train_loss:0.06943247429782981, acc:0.623917219993933


 68%|██████▊   | 29670/43738 [3:45:35<1:28:55,  2.64it/s]

step:7320, train_loss:0.06943023792961178, acc:0.6239298955173576


 68%|██████▊   | 29671/43738 [3:45:36<1:38:40,  2.38it/s]

step:7320, train_loss:0.06942932296316222, acc:0.6239425701863772


 68%|██████▊   | 29672/43738 [3:45:37<2:03:45,  1.89it/s]

step:7320, train_loss:0.06942771469236883, acc:0.6239552440010785


 68%|██████▊   | 29673/43738 [3:45:37<1:45:12,  2.23it/s]

step:7320, train_loss:0.06943325504631268, acc:0.6239342162909042


 68%|██████▊   | 29674/43738 [3:45:37<1:30:56,  2.58it/s]

step:7320, train_loss:0.0694316574991835, acc:0.6239468895329244


 68%|██████▊   | 29675/43738 [3:45:38<1:33:45,  2.50it/s]

step:7320, train_loss:0.06943291103876319, acc:0.6239258635214827


 68%|██████▊   | 29676/43738 [3:45:38<1:56:10,  2.02it/s]

step:7320, train_loss:0.06943209061837499, acc:0.6239385361908613


 68%|██████▊   | 29677/43738 [3:45:39<1:40:46,  2.33it/s]

step:7320, train_loss:0.06942985474639832, acc:0.6239512080062001


 68%|██████▊   | 29678/43738 [3:45:39<1:49:24,  2.14it/s]

step:7320, train_loss:0.06942778234138187, acc:0.6239638789675854


 68%|██████▊   | 29679/43738 [3:45:39<1:35:35,  2.45it/s]

step:7320, train_loss:0.06942791527541309, acc:0.6239428552174938


 69%|██████▊   | 29984/43738 [3:47:57<1:55:51,  1.98it/s]

step:7340, train_loss:0.06945358800243676, acc:0.6236993062966916


 69%|██████▊   | 29985/43738 [3:47:58<1:43:40,  2.21it/s]

step:7340, train_loss:0.0694516948973006, acc:0.623711855927964


 69%|██████▊   | 29986/43738 [3:47:58<1:51:22,  2.06it/s]

step:7340, train_loss:0.06945838818506941, acc:0.6236910558260521


 69%|██████▊   | 29987/43738 [3:47:59<1:44:01,  2.20it/s]

step:7340, train_loss:0.06946044898106374, acc:0.623670257111415


 69%|██████▊   | 29988/43738 [3:47:59<1:38:38,  2.32it/s]

step:7340, train_loss:0.06946149748937325, acc:0.6236494597839136


 69%|██████▊   | 29989/43738 [3:47:59<1:25:09,  2.69it/s]

step:7340, train_loss:0.06946339672175103, acc:0.6236286638434092


 69%|██████▊   | 29990/43738 [3:48:00<1:17:51,  2.94it/s]

step:7340, train_loss:0.06946115521960004, acc:0.6236412137379126


 69%|██████▊   | 29991/43738 [3:48:00<1:24:24,  2.71it/s]

step:7340, train_loss:0.06946212870438749, acc:0.6236204194591711


 69%|██████▊   | 29992/43738 [3:48:00<1:16:29,  3.00it/s]

step:7340, train_loss:0.06945982731486341, acc:0.6236329687916777


 69%|██████▊   | 29993/43738 [3:48:01<1:16:10,  3.01it/s]

step:7340, train_loss:0.06946078197730535, acc:0.6236121761744408


 69%|██████▊   | 29994/43738 [3:48:01<1:12:06,  3.18it/s]

step:7340, train_loss:0.06946424711373261, acc:0.6235913849436554


 69%|██████▊   | 29995/43738 [3:48:01<1:18:06,  2.93it/s]

step:7340, train_loss:0.06946310305344108, acc:0.6236039339889982


 69%|██████▊   | 29996/43738 [3:48:02<1:13:06,  3.13it/s]

step:7340, train_loss:0.06946463836637014, acc:0.6235831444192559


 69%|██████▊   | 29997/43738 [3:48:02<1:19:18,  2.89it/s]

step:7340, train_loss:0.06946423504947132, acc:0.6235956929026236


 69%|██████▊   | 29998/43738 [3:48:02<1:17:29,  2.96it/s]

step:7340, train_loss:0.0694623765880261, acc:0.6236082405493699


 69%|██████▊   | 29999/43738 [3:48:03<1:11:14,  3.21it/s]

step:7340, train_loss:0.06946019789550985, acc:0.6236207873595786


 69%|██████▉   | 30304/43738 [3:50:22<1:45:08,  2.13it/s]

step:7360, train_loss:0.06942817730169067, acc:0.6236800422386484


 69%|██████▉   | 30305/43738 [3:50:22<1:44:22,  2.14it/s]

step:7360, train_loss:0.06942702141160596, acc:0.6236924599901006


 69%|██████▉   | 30306/43738 [3:50:23<1:50:39,  2.02it/s]

step:7360, train_loss:0.06942594591636925, acc:0.6237048769220617


 69%|██████▉   | 30307/43738 [3:50:23<1:35:53,  2.33it/s]

step:7360, train_loss:0.06942365521727724, acc:0.6237172930346124


 69%|██████▉   | 30308/43738 [3:50:23<1:31:28,  2.45it/s]

step:7360, train_loss:0.06942158649484746, acc:0.6237297083278343


 69%|██████▉   | 30309/43738 [3:50:23<1:22:15,  2.72it/s]

step:7360, train_loss:0.06941958850016018, acc:0.6237421228018081


 69%|██████▉   | 30310/43738 [3:50:24<1:48:50,  2.06it/s]

step:7360, train_loss:0.06942135486678627, acc:0.6237215440448697


 69%|██████▉   | 30311/43738 [3:50:25<2:02:33,  1.83it/s]

step:7360, train_loss:0.06942750269316686, acc:0.6237009666457721


 69%|██████▉   | 30312/43738 [3:50:25<1:45:57,  2.11it/s]

step:7360, train_loss:0.06942549176650159, acc:0.6237133808392715


 69%|██████▉   | 30313/43738 [3:50:26<1:38:23,  2.27it/s]

step:7360, train_loss:0.06942489932533304, acc:0.6237257942137037


 69%|██████▉   | 30314/43738 [3:50:26<1:33:22,  2.40it/s]

step:7360, train_loss:0.06942314361023275, acc:0.6237382067691496


 69%|██████▉   | 30315/43738 [3:50:26<1:37:21,  2.30it/s]

step:7360, train_loss:0.069427400953679, acc:0.6237176315355435


 69%|██████▉   | 30316/43738 [3:50:27<1:37:13,  2.30it/s]

step:7360, train_loss:0.06942598237207499, acc:0.6237300435413643


 69%|██████▉   | 30317/43738 [3:50:27<1:48:47,  2.06it/s]

step:7360, train_loss:0.06943241164889252, acc:0.6237094699343603


 69%|██████▉   | 30318/43738 [3:50:28<1:32:03,  2.43it/s]

step:7360, train_loss:0.06943027486903473, acc:0.623721881390593


 69%|██████▉   | 30319/43738 [3:50:28<1:52:40,  1.99it/s]

step:7360, train_loss:0.06943522298203464, acc:0.623701309409941


 70%|███████   | 30624/43738 [3:52:47<1:31:30,  2.39it/s]

step:7380, train_loss:0.06937856128369749, acc:0.624085684430512


 70%|███████   | 30625/43738 [3:52:47<1:40:12,  2.18it/s]

step:7380, train_loss:0.0693810137727944, acc:0.6240653061224489


 70%|███████   | 30626/43738 [3:52:48<1:25:25,  2.56it/s]

step:7380, train_loss:0.06937876835533571, acc:0.6240775811402076


 70%|███████   | 30627/43738 [3:52:48<1:19:10,  2.76it/s]

step:7380, train_loss:0.06937785500161922, acc:0.6240898553563848


 70%|███████   | 30628/43738 [3:52:49<1:49:10,  2.00it/s]

step:7380, train_loss:0.06937962767736569, acc:0.6240694789081885


 70%|███████   | 30629/43738 [3:52:49<1:58:43,  1.84it/s]

step:7380, train_loss:0.0693810960785792, acc:0.6240491037905254


 70%|███████   | 30630/43738 [3:52:50<1:36:57,  2.25it/s]

step:7380, train_loss:0.06938193175956567, acc:0.6240287300032648


 70%|███████   | 30631/43738 [3:52:50<1:28:19,  2.47it/s]

step:7380, train_loss:0.06937968626406792, acc:0.6240410042114198


 70%|███████   | 30632/43738 [3:52:50<1:17:29,  2.82it/s]

step:7380, train_loss:0.06937949838479199, acc:0.6240206320188039


 70%|███████   | 30633/43738 [3:52:50<1:11:09,  3.07it/s]

step:7380, train_loss:0.06937727021156521, acc:0.6240329056899422


 70%|███████   | 30634/43738 [3:52:51<1:17:35,  2.81it/s]

step:7380, train_loss:0.06937805965241232, acc:0.6240451785597702


 70%|███████   | 30635/43738 [3:52:51<1:10:41,  3.09it/s]

step:7380, train_loss:0.06937594910386759, acc:0.6240574506283663


 70%|███████   | 30636/43738 [3:52:51<1:10:37,  3.09it/s]

step:7380, train_loss:0.06937371721375479, acc:0.6240697218958089


 70%|███████   | 30637/43738 [3:52:52<1:04:46,  3.37it/s]

step:7380, train_loss:0.06937150291328609, acc:0.6240819923621764


 70%|███████   | 30638/43738 [3:52:52<1:13:49,  2.96it/s]

step:7380, train_loss:0.06936933924466476, acc:0.6240942620275475


 70%|███████   | 30639/43738 [3:52:52<1:06:47,  3.27it/s]

step:7380, train_loss:0.06936731900605589, acc:0.6241065308920004


 71%|███████   | 30944/43738 [3:55:10<1:58:43,  1.80it/s]

step:7400, train_loss:0.06932248187592893, acc:0.6244183040330921


 71%|███████   | 30945/43738 [3:55:10<1:51:53,  1.91it/s]

step:7400, train_loss:0.06932336650602752, acc:0.6243981257068993


 71%|███████   | 30946/43738 [3:55:11<1:39:12,  2.15it/s]

step:7400, train_loss:0.06932114713375567, acc:0.6244102630388418


 71%|███████   | 30947/43738 [3:55:11<1:25:27,  2.49it/s]

step:7400, train_loss:0.06931939498146769, acc:0.6244223995863897


 71%|███████   | 30948/43738 [3:55:11<1:35:00,  2.24it/s]

step:7400, train_loss:0.06931937501606376, acc:0.6244345353496187


 71%|███████   | 30949/43738 [3:55:12<1:29:56,  2.37it/s]

step:7400, train_loss:0.06931982716487829, acc:0.6244143591069178


 71%|███████   | 30950/43738 [3:55:12<1:21:13,  2.62it/s]

step:7400, train_loss:0.06931959638712658, acc:0.624394184168013


 71%|███████   | 30951/43738 [3:55:13<1:26:44,  2.46it/s]

step:7400, train_loss:0.06931860175141134, acc:0.6244063196665698


 71%|███████   | 30952/43738 [3:55:13<1:40:59,  2.11it/s]

step:7400, train_loss:0.06931638864174561, acc:0.624418454380977


 71%|███████   | 30953/43738 [3:55:14<1:41:35,  2.10it/s]

step:7400, train_loss:0.06931691114540577, acc:0.624398281265144


 71%|███████   | 30954/43738 [3:55:14<1:33:40,  2.27it/s]

step:7400, train_loss:0.06931716222127643, acc:0.6243781094527363


 71%|███████   | 30955/43738 [3:55:14<1:38:45,  2.16it/s]

step:7400, train_loss:0.06931533457062339, acc:0.624390243902439


 71%|███████   | 30956/43738 [3:55:15<1:46:28,  2.00it/s]

step:7400, train_loss:0.06931368376622701, acc:0.6244023775681613


 71%|███████   | 30957/43738 [3:55:15<1:35:58,  2.22it/s]

step:7400, train_loss:0.06931160415106981, acc:0.624414510449979


 71%|███████   | 30958/43738 [3:55:16<1:43:45,  2.05it/s]

step:7400, train_loss:0.06931381209324607, acc:0.6243943407196847


 71%|███████   | 30959/43738 [3:55:17<2:05:46,  1.69it/s]

step:7400, train_loss:0.06932207637033656, acc:0.6243741722923867


 71%|███████▏  | 31264/43738 [3:57:32<1:57:18,  1.77it/s]

step:7420, train_loss:0.06941262890384906, acc:0.6240724155578301


 71%|███████▏  | 31265/43738 [3:57:32<1:45:23,  1.97it/s]

step:7420, train_loss:0.06941079206338692, acc:0.6240844394690549


 71%|███████▏  | 31266/43738 [3:57:32<1:32:17,  2.25it/s]

step:7420, train_loss:0.06940857647854204, acc:0.624096462611143


 71%|███████▏  | 31267/43738 [3:57:33<1:36:18,  2.16it/s]

step:7420, train_loss:0.0694063694812119, acc:0.6241084849841686


 71%|███████▏  | 31268/43738 [3:57:33<1:30:01,  2.31it/s]

step:7420, train_loss:0.06940715461504095, acc:0.6240885250095944


 71%|███████▏  | 31269/43738 [3:57:34<2:06:40,  1.64it/s]

step:7420, train_loss:0.06941178071610798, acc:0.6240685663116825


 71%|███████▏  | 31270/43738 [3:57:35<2:03:41,  1.68it/s]

step:7420, train_loss:0.06941283626502107, acc:0.6240486088903102


 71%|███████▏  | 31271/43738 [3:57:35<1:53:58,  1.82it/s]

step:7420, train_loss:0.0694106166945851, acc:0.6240606312557961


 71%|███████▏  | 31272/43738 [3:57:36<2:02:39,  1.69it/s]

step:7420, train_loss:0.069408738260249, acc:0.6240726528523919


 72%|███████▏  | 31273/43738 [3:57:37<2:20:05,  1.48it/s]

step:7420, train_loss:0.06940656635890938, acc:0.6240846736801714


 72%|███████▏  | 31274/43738 [3:57:37<2:17:42,  1.51it/s]

step:7420, train_loss:0.06940918047359973, acc:0.6240647182963484


 72%|███████▏  | 31275/43738 [3:57:38<2:17:15,  1.51it/s]

step:7420, train_loss:0.06940700792324662, acc:0.6240767386091127


 72%|███████▏  | 31276/43738 [3:57:39<2:09:28,  1.60it/s]

step:7420, train_loss:0.06940788863949614, acc:0.6240567847550837


 72%|███████▏  | 31277/43738 [3:57:39<1:46:14,  1.95it/s]

step:7420, train_loss:0.069408351368237, acc:0.6240368321769991


 72%|███████▏  | 31278/43738 [3:57:39<1:47:21,  1.93it/s]

step:7420, train_loss:0.06941526407036214, acc:0.6240168808747363


 72%|███████▏  | 31279/43738 [3:57:40<1:45:33,  1.97it/s]

step:7420, train_loss:0.0694148763278865, acc:0.6240289011797052


 72%|███████▏  | 31584/43738 [4:00:00<1:40:44,  2.01it/s]

step:7440, train_loss:0.06940027149408272, acc:0.6239551671732523


 72%|███████▏  | 31585/43738 [4:00:00<1:29:08,  2.27it/s]

step:7440, train_loss:0.06939809355579867, acc:0.6239670729776793


 72%|███████▏  | 31586/43738 [4:00:00<1:40:37,  2.01it/s]

step:7440, train_loss:0.06940234703757339, acc:0.6239473184322168


 72%|███████▏  | 31587/43738 [4:00:01<1:42:03,  1.98it/s]

step:7440, train_loss:0.06940153040909136, acc:0.6239275651375565


 72%|███████▏  | 31588/43738 [4:00:01<1:32:13,  2.20it/s]

step:7440, train_loss:0.06939933465408521, acc:0.6239394706850703


 72%|███████▏  | 31589/43738 [4:00:02<1:43:57,  1.95it/s]

step:7440, train_loss:0.06940550050329682, acc:0.6239197188894868


 72%|███████▏  | 31590/43738 [4:00:02<1:36:06,  2.11it/s]

step:7440, train_loss:0.06940343458522646, acc:0.6239316239316239


 72%|███████▏  | 31591/43738 [4:00:03<1:33:04,  2.18it/s]

step:7440, train_loss:0.06940125030633457, acc:0.6239435282200627


 72%|███████▏  | 31592/43738 [4:00:03<1:23:55,  2.41it/s]

step:7440, train_loss:0.06940103491348885, acc:0.623923778171689


 72%|███████▏  | 31593/43738 [4:00:04<1:44:22,  1.94it/s]

step:7440, train_loss:0.06940431139663472, acc:0.6239040293735955


 72%|███████▏  | 31594/43738 [4:00:04<1:35:43,  2.11it/s]

step:7440, train_loss:0.06940540452349479, acc:0.6238842818256631


 72%|███████▏  | 31595/43738 [4:00:05<1:36:29,  2.10it/s]

step:7440, train_loss:0.0694063473683042, acc:0.6238645355277734


 72%|███████▏  | 31596/43738 [4:00:05<1:35:29,  2.12it/s]

step:7440, train_loss:0.06940439910178534, acc:0.6238764400557033


 72%|███████▏  | 31597/43738 [4:00:05<1:25:22,  2.37it/s]

step:7440, train_loss:0.06940453520787006, acc:0.6238566952558787


 72%|███████▏  | 31598/43738 [4:00:06<1:24:53,  2.38it/s]

step:7440, train_loss:0.06940606472330749, acc:0.6238369517058041


 72%|███████▏  | 31599/43738 [4:00:06<1:26:51,  2.33it/s]

step:7440, train_loss:0.06940390016393023, acc:0.6238488559764549


 73%|███████▎  | 31904/43738 [4:02:25<1:19:39,  2.48it/s]

step:7460, train_loss:0.06942485407649955, acc:0.6236835506519559


 73%|███████▎  | 31905/43738 [4:02:26<1:22:57,  2.38it/s]

step:7460, train_loss:0.06942671544983528, acc:0.623664002507444


 73%|███████▎  | 31906/43738 [4:02:26<1:22:28,  2.39it/s]

step:7460, train_loss:0.06942580924173496, acc:0.6236757976556133


 73%|███████▎  | 31907/43738 [4:02:27<1:11:03,  2.78it/s]

step:7460, train_loss:0.06942372134321466, acc:0.6236875920644372


 73%|███████▎  | 31908/43738 [4:02:27<1:30:55,  2.17it/s]

step:7460, train_loss:0.06942613826454515, acc:0.6236680456311897


 73%|███████▎  | 31909/43738 [4:02:28<1:27:11,  2.26it/s]

step:7460, train_loss:0.06942661601967781, acc:0.6236485004230782


 73%|███████▎  | 31910/43738 [4:02:28<1:19:55,  2.47it/s]

step:7460, train_loss:0.06942713348114075, acc:0.6236289564399875


 73%|███████▎  | 31911/43738 [4:02:28<1:17:11,  2.55it/s]

step:7460, train_loss:0.06942636125695406, acc:0.623640750838269


 73%|███████▎  | 31912/43738 [4:02:29<1:35:08,  2.07it/s]

step:7460, train_loss:0.06943251946335412, acc:0.6236212083228879


 73%|███████▎  | 31913/43738 [4:02:29<1:20:29,  2.45it/s]

step:7460, train_loss:0.0694305639968914, acc:0.6236330022247987


 73%|███████▎  | 31914/43738 [4:02:30<1:20:50,  2.44it/s]

step:7460, train_loss:0.06943114732301398, acc:0.623613461176913


 73%|███████▎  | 31915/43738 [4:02:30<1:17:45,  2.53it/s]

step:7460, train_loss:0.0694289802378161, acc:0.6236252545824847


 73%|███████▎  | 31916/43738 [4:02:30<1:19:32,  2.48it/s]

step:7460, train_loss:0.06942747318397854, acc:0.6236370472490287


 73%|███████▎  | 31917/43738 [4:02:31<1:37:33,  2.02it/s]

step:7460, train_loss:0.06943032806762518, acc:0.6236175079111446


 73%|███████▎  | 31918/43738 [4:02:31<1:22:09,  2.40it/s]

step:7460, train_loss:0.06942815295073558, acc:0.6236293000814588


 73%|███████▎  | 31919/43738 [4:02:32<1:34:02,  2.09it/s]

step:7460, train_loss:0.06942738832111285, acc:0.623641091512892


 74%|███████▎  | 32224/43738 [4:04:48<1:36:49,  1.98it/s]

step:7480, train_loss:0.06937230421418483, acc:0.6237897219463754


 74%|███████▎  | 32225/43738 [4:04:48<1:26:18,  2.22it/s]

step:7480, train_loss:0.06937101757600925, acc:0.6238013964313421


 74%|███████▎  | 32226/43738 [4:04:48<1:14:21,  2.58it/s]

step:7480, train_loss:0.06936887654456815, acc:0.6238130701917706


 74%|███████▎  | 32227/43738 [4:04:49<1:14:45,  2.57it/s]

step:7480, train_loss:0.06936805645485854, acc:0.6238247432277283


 74%|███████▎  | 32228/43738 [4:04:49<1:13:08,  2.62it/s]

step:7480, train_loss:0.06936639428046339, acc:0.6238364155392826


 74%|███████▎  | 32229/43738 [4:04:49<1:08:45,  2.79it/s]

step:7480, train_loss:0.06936702552353806, acc:0.6238170591703125


 74%|███████▎  | 32230/43738 [4:04:50<1:15:55,  2.53it/s]

step:7480, train_loss:0.06937191550843809, acc:0.6237977040024821


 74%|███████▎  | 32231/43738 [4:04:50<1:17:10,  2.49it/s]

step:7480, train_loss:0.06937110672714751, acc:0.6238093760665198


 74%|███████▎  | 32232/43738 [4:04:51<1:08:31,  2.80it/s]

step:7480, train_loss:0.06936908007108057, acc:0.6238210474063043


 74%|███████▎  | 32233/43738 [4:04:51<1:15:34,  2.54it/s]

step:7480, train_loss:0.06936803811171335, acc:0.623832718021903


 74%|███████▎  | 32234/43738 [4:04:51<1:16:49,  2.50it/s]

step:7480, train_loss:0.06936830807992758, acc:0.6238133647701185


 74%|███████▎  | 32235/43738 [4:04:52<1:08:06,  2.81it/s]

step:7480, train_loss:0.06936795937964908, acc:0.6238250348999534


 74%|███████▎  | 32236/43738 [4:04:52<1:15:42,  2.53it/s]

step:7480, train_loss:0.06936655642536023, acc:0.6238367043057451


 74%|███████▎  | 32237/43738 [4:04:53<1:23:37,  2.29it/s]

step:7480, train_loss:0.06936489287492233, acc:0.6238483729875609


 74%|███████▎  | 32238/43738 [4:04:53<1:25:12,  2.25it/s]

step:7480, train_loss:0.06936299431311212, acc:0.6238600409454681


 74%|███████▎  | 32239/43738 [4:04:54<1:18:18,  2.45it/s]

step:7480, train_loss:0.06936115989288004, acc:0.6238717081795341


 74%|███████▍  | 32544/43738 [4:07:07<1:23:35,  2.23it/s]

step:7500, train_loss:0.06931406191915947, acc:0.6243239921337267


 74%|███████▍  | 32545/43738 [4:07:07<1:28:35,  2.11it/s]

step:7500, train_loss:0.06931200676861034, acc:0.6243355354125057


 74%|███████▍  | 32546/43738 [4:07:08<1:36:04,  1.94it/s]

step:7500, train_loss:0.06931000907417412, acc:0.6243470779819332


 74%|███████▍  | 32547/43738 [4:07:08<1:22:33,  2.26it/s]

step:7500, train_loss:0.06930867443797437, acc:0.6243586198420745


 74%|███████▍  | 32548/43738 [4:07:09<1:33:59,  1.98it/s]

step:7500, train_loss:0.0693079631765399, acc:0.6243701609929949


 74%|███████▍  | 32549/43738 [4:07:09<1:24:44,  2.20it/s]

step:7500, train_loss:0.06930590094003285, acc:0.6243817014347599


 74%|███████▍  | 32550/43738 [4:07:09<1:17:51,  2.39it/s]

step:7500, train_loss:0.06930380551652882, acc:0.6243932411674347


 74%|███████▍  | 32551/43738 [4:07:10<1:24:41,  2.20it/s]

step:7500, train_loss:0.06930589909343919, acc:0.6243740591686892


 74%|███████▍  | 32552/43738 [4:07:10<1:25:16,  2.19it/s]

step:7500, train_loss:0.06930384486813795, acc:0.6243855984271319


 74%|███████▍  | 32553/43738 [4:07:11<1:21:21,  2.29it/s]

step:7500, train_loss:0.06930429025333153, acc:0.6243664178416736


 74%|███████▍  | 32554/43738 [4:07:11<1:25:49,  2.17it/s]

step:7500, train_loss:0.06930512788237685, acc:0.6243472384346009


 74%|███████▍  | 32555/43738 [4:07:12<1:20:41,  2.31it/s]

step:7500, train_loss:0.06930303291853461, acc:0.6243587774535402


 74%|███████▍  | 32556/43738 [4:07:12<1:16:33,  2.43it/s]

step:7500, train_loss:0.06930248799158728, acc:0.6243703157636074


 74%|███████▍  | 32557/43738 [4:07:13<1:37:08,  1.92it/s]

step:7500, train_loss:0.06930066203603738, acc:0.6243818533648677


 74%|███████▍  | 32558/43738 [4:07:13<1:31:56,  2.03it/s]

step:7500, train_loss:0.06929918886801914, acc:0.6243933902573868


 74%|███████▍  | 32559/43738 [4:07:14<1:40:02,  1.86it/s]

step:7500, train_loss:0.06929819463400372, acc:0.6244049264412298


 75%|███████▌  | 32864/43738 [4:09:32<1:19:15,  2.29it/s]

step:7520, train_loss:0.06932486047542416, acc:0.6243305744888024


 75%|███████▌  | 32865/43738 [4:09:33<1:39:10,  1.83it/s]

step:7520, train_loss:0.06932902759926042, acc:0.6243115776662103


 75%|███████▌  | 32866/43738 [4:09:34<1:34:02,  1.93it/s]

step:7520, train_loss:0.06932834391348955, acc:0.6242925819996349


 75%|███████▌  | 32867/43738 [4:09:34<1:23:51,  2.16it/s]

step:7520, train_loss:0.06932652488534848, acc:0.624304013143883


 75%|███████▌  | 32868/43738 [4:09:34<1:26:30,  2.09it/s]

step:7520, train_loss:0.06933339663085107, acc:0.6242850188633321


 75%|███████▌  | 32869/43738 [4:09:35<1:27:41,  2.07it/s]

step:7520, train_loss:0.06933146957124695, acc:0.6242964495421217


 75%|███████▌  | 32870/43738 [4:09:35<1:22:39,  2.19it/s]

step:7520, train_loss:0.06933353624617686, acc:0.6242774566473989


 75%|███████▌  | 32871/43738 [4:09:36<1:29:11,  2.03it/s]

step:7520, train_loss:0.0693349170110219, acc:0.6242584649082779


 75%|███████▌  | 32872/43738 [4:09:36<1:30:05,  2.01it/s]

step:7520, train_loss:0.06933431994258253, acc:0.6242698953516671


 75%|███████▌  | 32873/43738 [4:09:37<1:34:12,  1.92it/s]

step:7520, train_loss:0.06933374279923323, acc:0.6242813250996259


 75%|███████▌  | 32874/43738 [4:09:38<1:32:42,  1.95it/s]

step:7520, train_loss:0.06933424469616288, acc:0.6242623349759688


 75%|███████▌  | 32875/43738 [4:09:38<1:38:30,  1.84it/s]

step:7520, train_loss:0.06933322822919272, acc:0.6242737642585552


 75%|███████▌  | 32876/43738 [4:09:39<1:42:44,  1.76it/s]

step:7520, train_loss:0.06933405427835233, acc:0.6242547755201363


 75%|███████▌  | 32877/43738 [4:09:39<1:42:17,  1.77it/s]

step:7520, train_loss:0.06933194544944107, acc:0.6242662043373787


 75%|███████▌  | 32878/43738 [4:09:40<1:47:14,  1.69it/s]

step:7520, train_loss:0.0693318862145842, acc:0.6242472169840014


 75%|███████▌  | 32879/43738 [4:09:41<1:45:12,  1.72it/s]

step:7520, train_loss:0.06932977765603142, acc:0.6242586453359287


 76%|███████▌  | 33184/43738 [4:12:01<1:20:37,  2.18it/s]

step:7540, train_loss:0.0693651123410158, acc:0.6241863548698168


 76%|███████▌  | 33185/43738 [4:12:02<1:19:31,  2.21it/s]

step:7540, train_loss:0.06936581554093892, acc:0.6241675455778213


 76%|███████▌  | 33186/43738 [4:12:02<1:18:23,  2.24it/s]

step:7540, train_loss:0.06937092807515624, acc:0.6241487374193937


 76%|███████▌  | 33187/43738 [4:12:02<1:12:04,  2.44it/s]

step:7540, train_loss:0.06937082793731554, acc:0.6241600626751439


 76%|███████▌  | 33188/43738 [4:12:03<1:03:19,  2.78it/s]

step:7540, train_loss:0.06936911267238323, acc:0.624171387248403


 76%|███████▌  | 33189/43738 [4:12:03<56:39,  3.10it/s]  

step:7540, train_loss:0.06936705622802312, acc:0.6241827111392328


 76%|███████▌  | 33190/43738 [4:12:03<59:39,  2.95it/s]

step:7540, train_loss:0.06936763989993311, acc:0.6241639047905996


 76%|███████▌  | 33191/43738 [4:12:04<1:06:02,  2.66it/s]

step:7540, train_loss:0.0693703296219275, acc:0.624145099575186


 76%|███████▌  | 33192/43738 [4:12:04<59:29,  2.95it/s]  

step:7540, train_loss:0.06937101310584266, acc:0.6241262954928899


 76%|███████▌  | 33193/43738 [4:12:04<1:01:38,  2.85it/s]

step:7540, train_loss:0.06936954016249605, acc:0.6241376193775796


 76%|███████▌  | 33194/43738 [4:12:05<1:14:23,  2.36it/s]

step:7540, train_loss:0.0693712961386761, acc:0.6241188166536121


 76%|███████▌  | 33195/43738 [4:12:05<1:06:08,  2.66it/s]

step:7540, train_loss:0.0693700357097924, acc:0.6241301400813376


 76%|███████▌  | 33196/43738 [4:12:05<58:25,  3.01it/s]  

step:7540, train_loss:0.0693713374263723, acc:0.624111338715508


 76%|███████▌  | 33197/43738 [4:12:06<52:47,  3.33it/s]

step:7540, train_loss:0.06937691083158364, acc:0.624092538482393


 76%|███████▌  | 33198/43738 [4:12:06<58:40,  2.99it/s]

step:7540, train_loss:0.06937498692744845, acc:0.6241038616784144


 76%|███████▌  | 33199/43738 [4:12:07<1:16:44,  2.29it/s]

step:7540, train_loss:0.06937480774027245, acc:0.6240850628030965


 77%|███████▋  | 33504/43738 [4:14:25<1:08:49,  2.48it/s]

step:7560, train_loss:0.069398587868991, acc:0.6238956542502387


 77%|███████▋  | 33505/43738 [4:14:25<1:29:51,  1.90it/s]

step:7560, train_loss:0.06939795151429289, acc:0.6239068795702134


 77%|███████▋  | 33506/43738 [4:14:26<1:23:49,  2.03it/s]

step:7560, train_loss:0.06940403200205578, acc:0.623888258819316


 77%|███████▋  | 33507/43738 [4:14:26<1:20:27,  2.12it/s]

step:7560, train_loss:0.069402011731668, acc:0.6238994836899753


 77%|███████▋  | 33508/43738 [4:14:27<1:13:29,  2.32it/s]

step:7560, train_loss:0.06940253767169814, acc:0.623910707890653


 77%|███████▋  | 33509/43738 [4:14:27<1:10:50,  2.41it/s]

step:7560, train_loss:0.06940252645015217, acc:0.62389208869259


 77%|███████▋  | 33510/43738 [4:14:27<1:05:40,  2.60it/s]

step:7560, train_loss:0.06940224196439142, acc:0.6238734706057893


 77%|███████▋  | 33511/43738 [4:14:27<58:35,  2.91it/s]  

step:7560, train_loss:0.06940157376358404, acc:0.6238548536301513


 77%|███████▋  | 33512/43738 [4:14:28<1:10:52,  2.40it/s]

step:7560, train_loss:0.0694009928093753, acc:0.6238660778228694


 77%|███████▋  | 33513/43738 [4:14:28<1:02:18,  2.73it/s]

step:7560, train_loss:0.06939928349047943, acc:0.6238773013457465


 77%|███████▋  | 33514/43738 [4:14:29<1:22:04,  2.08it/s]

step:7560, train_loss:0.06939945706002781, acc:0.6238586859223011


 77%|███████▋  | 33515/43738 [4:14:30<1:36:34,  1.76it/s]

step:7560, train_loss:0.06940181056551037, acc:0.623840071609727


 77%|███████▋  | 33516/43738 [4:14:30<1:20:22,  2.12it/s]

step:7560, train_loss:0.06939997094926344, acc:0.6238512949039264


 77%|███████▋  | 33517/43738 [4:14:31<1:26:35,  1.97it/s]

step:7560, train_loss:0.06939928511118808, acc:0.6238625175284184


 77%|███████▋  | 33518/43738 [4:14:31<1:39:09,  1.72it/s]

step:7560, train_loss:0.06939913324134055, acc:0.6238439047675876


 77%|███████▋  | 33519/43738 [4:14:32<1:22:25,  2.07it/s]

step:7560, train_loss:0.06940031371258647, acc:0.6238252931173365


 77%|███████▋  | 33824/43738 [4:16:53<1:17:36,  2.13it/s]

step:7580, train_loss:0.06942366876148398, acc:0.6236695837275308


 77%|███████▋  | 33825/43738 [4:16:53<1:22:16,  2.01it/s]

step:7580, train_loss:0.06942186461190789, acc:0.623680709534368


 77%|███████▋  | 33826/43738 [4:16:53<1:07:43,  2.44it/s]

step:7580, train_loss:0.06941981452451594, acc:0.6236918346833796


 77%|███████▋  | 33827/43738 [4:16:54<1:05:07,  2.54it/s]

step:7580, train_loss:0.0694178174522363, acc:0.6237029591746238


 77%|███████▋  | 33828/43738 [4:16:54<1:03:16,  2.61it/s]

step:7580, train_loss:0.06941803128933051, acc:0.6237140830081589


 77%|███████▋  | 33829/43738 [4:16:55<1:12:09,  2.29it/s]

step:7580, train_loss:0.06942137463205525, acc:0.6236956457477313


 77%|███████▋  | 33830/43738 [4:16:55<1:09:52,  2.36it/s]

step:7580, train_loss:0.06942264237043838, acc:0.6236772095772982


 77%|███████▋  | 33831/43738 [4:16:55<1:05:43,  2.51it/s]

step:7580, train_loss:0.0694254146750049, acc:0.6236587744967633


 77%|███████▋  | 33832/43738 [4:16:56<1:17:09,  2.14it/s]

step:7580, train_loss:0.06942636566845498, acc:0.6236403405060298


 77%|███████▋  | 33833/43738 [4:16:56<1:07:51,  2.43it/s]

step:7580, train_loss:0.06942769564959918, acc:0.623621907605001


 77%|███████▋  | 33834/43738 [4:16:57<1:27:59,  1.88it/s]

step:7580, train_loss:0.06942843841470815, acc:0.6236330318614411


 77%|███████▋  | 33835/43738 [4:16:57<1:14:44,  2.21it/s]

step:7580, train_loss:0.06942647752995706, acc:0.6236441554603221


 77%|███████▋  | 33836/43738 [4:16:58<1:18:15,  2.11it/s]

step:7580, train_loss:0.06942695490878253, acc:0.6236257240808606


 77%|███████▋  | 33837/43738 [4:16:59<1:23:39,  1.97it/s]

step:7580, train_loss:0.06942548151388472, acc:0.6236368472382303


 77%|███████▋  | 33838/43738 [4:16:59<1:29:37,  1.84it/s]

step:7580, train_loss:0.06942492887505192, acc:0.6236479697381642


 77%|███████▋  | 33839/43738 [4:17:00<1:21:16,  2.03it/s]

step:7580, train_loss:0.06942309968618447, acc:0.6236590915807204


 78%|███████▊  | 34144/43738 [4:19:20<1:15:35,  2.12it/s]

step:7600, train_loss:0.06935854092678466, acc:0.6240335051546392


 78%|███████▊  | 34145/43738 [4:19:20<1:17:25,  2.06it/s]

step:7600, train_loss:0.06935653022231426, acc:0.6240445160345585


 78%|███████▊  | 34146/43738 [4:19:21<1:11:40,  2.23it/s]

step:7600, train_loss:0.06935449907091272, acc:0.6240555262695484


 78%|███████▊  | 34147/43738 [4:19:21<1:12:25,  2.21it/s]

step:7600, train_loss:0.06935867291830447, acc:0.6240372507101649


 78%|███████▊  | 34148/43738 [4:19:21<1:06:03,  2.42it/s]

step:7600, train_loss:0.06935783283381527, acc:0.6240482605130608


 78%|███████▊  | 34149/43738 [4:19:22<1:05:49,  2.43it/s]

step:7600, train_loss:0.06936005375640711, acc:0.6240299862367858


 78%|███████▊  | 34150/43738 [4:19:22<1:06:51,  2.39it/s]

step:7600, train_loss:0.06936127568329725, acc:0.6240117130307467


 78%|███████▊  | 34151/43738 [4:19:23<58:55,  2.71it/s]  

step:7600, train_loss:0.06936383421262332, acc:0.6239934408948493


 78%|███████▊  | 34152/43738 [4:19:23<1:03:37,  2.51it/s]

step:7600, train_loss:0.06936367730838888, acc:0.6240044506910284


 78%|███████▊  | 34153/43738 [4:19:23<1:01:55,  2.58it/s]

step:7600, train_loss:0.069363379088353, acc:0.6239861798377888


 78%|███████▊  | 34154/43738 [4:19:24<1:17:17,  2.07it/s]

step:7600, train_loss:0.06936639427746907, acc:0.6239679100544592


 78%|███████▊  | 34155/43738 [4:19:25<1:17:06,  2.07it/s]

step:7600, train_loss:0.06936443380648248, acc:0.6239789196310935


 78%|███████▊  | 34156/43738 [4:19:25<1:07:00,  2.38it/s]

step:7600, train_loss:0.06936360065119478, acc:0.6239899285630636


 78%|███████▊  | 34157/43738 [4:19:25<1:05:28,  2.44it/s]

step:7600, train_loss:0.06936333953756624, acc:0.624000936850426


 78%|███████▊  | 34158/43738 [4:19:26<1:23:23,  1.91it/s]

step:7600, train_loss:0.06937238727812083, acc:0.6239826687745185


 78%|███████▊  | 34159/43738 [4:19:26<1:15:21,  2.12it/s]

step:7600, train_loss:0.06937482818067275, acc:0.6239644017682017


 79%|███████▉  | 34464/43738 [4:21:41<1:34:03,  1.64it/s]

step:7620, train_loss:0.06935922878991822, acc:0.6240134633240483


 79%|███████▉  | 34465/43738 [4:21:41<1:19:04,  1.95it/s]

step:7620, train_loss:0.06935728339779638, acc:0.6240243725518642


 79%|███████▉  | 34466/43738 [4:21:42<1:15:13,  2.05it/s]

step:7620, train_loss:0.0693552729245768, acc:0.6240352811466373


 79%|███████▉  | 34467/43738 [4:21:42<1:11:03,  2.17it/s]

step:7620, train_loss:0.06935459070896481, acc:0.6240171758493631


 79%|███████▉  | 34468/43738 [4:21:43<1:27:44,  1.76it/s]

step:7620, train_loss:0.069352815985424, acc:0.6240280840199606


 79%|███████▉  | 34469/43738 [4:21:44<1:35:04,  1.62it/s]

step:7620, train_loss:0.06935081237228277, acc:0.6240389915576315


 79%|███████▉  | 34470/43738 [4:21:44<1:45:25,  1.47it/s]

step:7620, train_loss:0.06934985517380682, acc:0.6240498984624311


 79%|███████▉  | 34471/43738 [4:21:45<1:37:39,  1.58it/s]

step:7620, train_loss:0.06934800830562056, acc:0.6240608047344144


 79%|███████▉  | 34472/43738 [4:21:46<1:47:38,  1.43it/s]

step:7620, train_loss:0.06934840334155151, acc:0.6240427013228127


 79%|███████▉  | 34473/43738 [4:21:46<1:28:22,  1.75it/s]

step:7620, train_loss:0.06934825791136709, acc:0.6240245989615061


 79%|███████▉  | 34474/43738 [4:21:47<1:24:42,  1.82it/s]

step:7620, train_loss:0.06934711145006453, acc:0.6240355050182746


 79%|███████▉  | 34475/43738 [4:21:47<1:17:49,  1.98it/s]

step:7620, train_loss:0.06934511958564543, acc:0.6240464104423495


 79%|███████▉  | 34476/43738 [4:21:48<1:32:46,  1.66it/s]

step:7620, train_loss:0.06934403683872967, acc:0.6240573152337858


 79%|███████▉  | 34477/43738 [4:21:48<1:24:08,  1.83it/s]

step:7620, train_loss:0.06934404501086176, acc:0.6240682193926386


 79%|███████▉  | 34478/43738 [4:21:49<1:18:02,  1.98it/s]

step:7620, train_loss:0.06934348412481595, acc:0.6240791229189628


 79%|███████▉  | 34479/43738 [4:21:49<1:11:14,  2.17it/s]

step:7620, train_loss:0.06934926298587189, acc:0.624061022651469


 80%|███████▉  | 34784/43738 [4:24:15<1:16:30,  1.95it/s]

step:7640, train_loss:0.06933471023626832, acc:0.6241087856485741


 80%|███████▉  | 34785/43738 [4:24:15<1:08:23,  2.18it/s]

step:7640, train_loss:0.06933346456703073, acc:0.6241195917780653


 80%|███████▉  | 34786/43738 [4:24:15<58:35,  2.55it/s]  

step:7640, train_loss:0.06933310550293692, acc:0.6241016500891163


 80%|███████▉  | 34787/43738 [4:24:16<1:17:17,  1.93it/s]

step:7640, train_loss:0.06933164111722623, acc:0.6241124558024549


 80%|███████▉  | 34788/43738 [4:24:17<1:13:05,  2.04it/s]

step:7640, train_loss:0.0693340318610725, acc:0.6240945153501207


 80%|███████▉  | 34789/43738 [4:24:17<1:03:56,  2.33it/s]

step:7640, train_loss:0.06933208170298079, acc:0.624105320647331


 80%|███████▉  | 34790/43738 [4:24:17<1:07:22,  2.21it/s]

step:7640, train_loss:0.06933009135704662, acc:0.6241161253233688


 80%|███████▉  | 34791/43738 [4:24:18<1:22:29,  1.81it/s]

step:7640, train_loss:0.0693336028838587, acc:0.6240981863125521


 80%|███████▉  | 34792/43738 [4:24:19<1:32:09,  1.62it/s]

step:7640, train_loss:0.06933563537263915, acc:0.6240802483329501


 80%|███████▉  | 34793/43738 [4:24:19<1:23:36,  1.78it/s]

step:7640, train_loss:0.06933366078238279, acc:0.6240910527979766


 80%|███████▉  | 34794/43738 [4:24:20<1:18:34,  1.90it/s]

step:7640, train_loss:0.06933536529414643, acc:0.6240731160544921


 80%|███████▉  | 34795/43738 [4:24:20<1:05:28,  2.28it/s]

step:7640, train_loss:0.06933419293202432, acc:0.6240839201034631


 80%|███████▉  | 34796/43738 [4:24:20<1:02:04,  2.40it/s]

step:7640, train_loss:0.06933872870531733, acc:0.6240659845959305


 80%|███████▉  | 34797/43738 [4:24:21<55:07,  2.70it/s]  

step:7640, train_loss:0.06933674993122499, acc:0.6240767882288702


 80%|███████▉  | 34798/43738 [4:24:21<56:24,  2.64it/s]

step:7640, train_loss:0.06934482822140814, acc:0.624058853957124


 80%|███████▉  | 34799/43738 [4:24:21<56:25,  2.64it/s]

step:7640, train_loss:0.06934370655280485, acc:0.6240696571740567


 80%|████████  | 35104/43738 [4:26:42<1:08:07,  2.11it/s]

step:7660, train_loss:0.06933831802509723, acc:0.624059936189608


 80%|████████  | 35105/43738 [4:26:43<1:11:00,  2.03it/s]

step:7660, train_loss:0.06933828834024619, acc:0.6240706452072354


 80%|████████  | 35106/43738 [4:26:43<59:35,  2.41it/s]  

step:7660, train_loss:0.06933636225384156, acc:0.6240813536147667


 80%|████████  | 35107/43738 [4:26:43<1:04:57,  2.21it/s]

step:7660, train_loss:0.06933836811452566, acc:0.6240635770644031


 80%|████████  | 35108/43738 [4:26:44<55:26,  2.59it/s]  

step:7660, train_loss:0.0693378095607391, acc:0.6240458015267175


 80%|████████  | 35109/43738 [4:26:44<1:01:43,  2.33it/s]

step:7660, train_loss:0.06933972035077156, acc:0.6240280270016235


 80%|████████  | 35110/43738 [4:26:44<58:46,  2.45it/s]  

step:7660, train_loss:0.0693377661630642, acc:0.6240387354030191


 80%|████████  | 35111/43738 [4:26:45<51:54,  2.77it/s]

step:7660, train_loss:0.06933653879276154, acc:0.6240494431944404


 80%|████████  | 35112/43738 [4:26:45<55:30,  2.59it/s]

step:7660, train_loss:0.0693435485667123, acc:0.6240316700843017


 80%|████████  | 35113/43738 [4:26:45<49:38,  2.90it/s]

step:7660, train_loss:0.0693427275528469, acc:0.624042377467035


 80%|████████  | 35114/43738 [4:26:46<51:12,  2.81it/s]

step:7660, train_loss:0.0693414554366874, acc:0.6240530842399044


 80%|████████  | 35115/43738 [4:26:46<47:28,  3.03it/s]

step:7660, train_loss:0.06934028414439453, acc:0.6240637904029617


 80%|████████  | 35116/43738 [4:26:47<58:28,  2.46it/s]

step:7660, train_loss:0.06934568749414859, acc:0.6240460189087595


 80%|████████  | 35117/43738 [4:26:47<58:01,  2.48it/s]

step:7660, train_loss:0.06934530646270676, acc:0.6240282484266879


 80%|████████  | 35118/43738 [4:26:48<1:07:17,  2.14it/s]

step:7660, train_loss:0.06934596831020924, acc:0.6240104789566604


 80%|████████  | 35119/43738 [4:26:48<57:30,  2.50it/s]  

step:7660, train_loss:0.06934408804531149, acc:0.6240211851134714


 81%|████████  | 35424/43738 [4:29:10<55:28,  2.50it/s]  

step:7680, train_loss:0.06935248178397965, acc:0.623927280939476


 81%|████████  | 35425/43738 [4:29:10<58:40,  2.36it/s]

step:7680, train_loss:0.06935434012545139, acc:0.623909668313338


 81%|████████  | 35426/43738 [4:29:10<51:22,  2.70it/s]

step:7680, train_loss:0.06935266277906431, acc:0.623920284536781


 81%|████████  | 35427/43738 [4:29:11<51:19,  2.70it/s]

step:7680, train_loss:0.06935070828425677, acc:0.6239309001608943


 81%|████████  | 35428/43738 [4:29:11<47:06,  2.94it/s]

step:7680, train_loss:0.06935377607648482, acc:0.6239132889240149


 81%|████████  | 35429/43738 [4:29:11<43:08,  3.21it/s]

step:7680, train_loss:0.06935207835431795, acc:0.6239239041463208


 81%|████████  | 35430/43738 [4:29:12<1:03:34,  2.18it/s]

step:7680, train_loss:0.06935223577198725, acc:0.6239062941010444


 81%|████████  | 35431/43738 [4:29:12<55:21,  2.50it/s]  

step:7680, train_loss:0.06936480604949744, acc:0.6238886850498151


 81%|████████  | 35432/43738 [4:29:13<1:01:59,  2.23it/s]

step:7680, train_loss:0.06936291201344244, acc:0.6238993000677354


 81%|████████  | 35433/43738 [4:29:13<1:04:46,  2.14it/s]

step:7680, train_loss:0.06936266879803393, acc:0.6238816922078289


 81%|████████  | 35434/43738 [4:29:14<1:10:00,  1.98it/s]

step:7680, train_loss:0.06936164601212559, acc:0.6238923068239544


 81%|████████  | 35435/43738 [4:29:14<1:10:53,  1.95it/s]

step:7680, train_loss:0.06936304555758309, acc:0.6239029208409764


 81%|████████  | 35436/43738 [4:29:15<1:03:27,  2.18it/s]

step:7680, train_loss:0.0693637725559573, acc:0.6238853143695676


 81%|████████  | 35437/43738 [4:29:15<1:01:11,  2.26it/s]

step:7680, train_loss:0.069366701933267, acc:0.6238677088918362


 81%|████████  | 35438/43738 [4:29:16<55:51,  2.48it/s]  

step:7680, train_loss:0.06936675604129894, acc:0.6238783227044415


 81%|████████  | 35439/43738 [4:29:16<59:07,  2.34it/s]

step:7680, train_loss:0.06936656385201888, acc:0.6238889359180564


 82%|████████▏ | 35744/43738 [4:31:37<55:46,  2.39it/s]  

step:7700, train_loss:0.06937011994680818, acc:0.6240487914055506


 82%|████████▏ | 35745/43738 [4:31:37<59:24,  2.24it/s]

step:7700, train_loss:0.06937534972206197, acc:0.6240313330535739


 82%|████████▏ | 35746/43738 [4:31:38<1:13:03,  1.82it/s]

step:7700, train_loss:0.06937765700220827, acc:0.6240138756783976


 82%|████████▏ | 35747/43738 [4:31:39<1:12:04,  1.85it/s]

step:7700, train_loss:0.06938014625740187, acc:0.6239964192799395


 82%|████████▏ | 35748/43738 [4:31:39<1:11:56,  1.85it/s]

step:7700, train_loss:0.06937987844446093, acc:0.6240069374510462


 82%|████████▏ | 35749/43738 [4:31:40<1:08:24,  1.95it/s]

step:7700, train_loss:0.06938073471629998, acc:0.623989482223279


 82%|████████▏ | 35750/43738 [4:31:40<58:27,  2.28it/s]  

step:7700, train_loss:0.06938145691439855, acc:0.623972027972028


 82%|████████▏ | 35751/43738 [4:31:40<51:45,  2.57it/s]

step:7700, train_loss:0.06937963669666766, acc:0.6239825459427708


 82%|████████▏ | 35752/43738 [4:31:41<58:35,  2.27it/s]

step:7700, train_loss:0.06937784400403292, acc:0.6239930633251287


 82%|████████▏ | 35753/43738 [4:31:41<1:04:20,  2.07it/s]

step:7700, train_loss:0.0693775366293758, acc:0.6240035801191508


 82%|████████▏ | 35754/43738 [4:31:42<1:08:34,  1.94it/s]

step:7700, train_loss:0.06937610028618905, acc:0.6240140963248867


 82%|████████▏ | 35755/43738 [4:31:42<1:00:11,  2.21it/s]

step:7700, train_loss:0.06937416067166455, acc:0.6240246119423857


 82%|████████▏ | 35756/43738 [4:31:43<59:08,  2.25it/s]  

step:7700, train_loss:0.06937806721589637, acc:0.6240071596375434


 82%|████████▏ | 35757/43738 [4:31:43<1:01:14,  2.17it/s]

step:7700, train_loss:0.06937620705520026, acc:0.6240176748608665


 82%|████████▏ | 35758/43738 [4:31:44<1:14:59,  1.77it/s]

step:7700, train_loss:0.06937562470102518, acc:0.6240281894960569


 82%|████████▏ | 35759/43738 [4:31:44<1:05:44,  2.02it/s]

step:7700, train_loss:0.0693747509929417, acc:0.6240387035431639


 82%|████████▏ | 36064/43738 [4:33:57<47:31,  2.69it/s]  

step:7720, train_loss:0.06934751894684213, acc:0.6241404170363798


 82%|████████▏ | 36065/43738 [4:33:58<49:41,  2.57it/s]

step:7720, train_loss:0.06934570749978644, acc:0.624150838763344


 82%|████████▏ | 36066/43738 [4:33:58<54:04,  2.36it/s]

step:7720, train_loss:0.06934433337909948, acc:0.6241612599123828


 82%|████████▏ | 36067/43738 [4:33:59<52:51,  2.42it/s]

step:7720, train_loss:0.06935041040479306, acc:0.6241439543072614


 82%|████████▏ | 36068/43738 [4:33:59<54:07,  2.36it/s]

step:7720, train_loss:0.06934956279145378, acc:0.6241543750693135


 82%|████████▏ | 36069/43738 [4:34:00<1:00:51,  2.10it/s]

step:7720, train_loss:0.06935186524441167, acc:0.6241370706146553


 82%|████████▏ | 36070/43738 [4:34:00<52:16,  2.44it/s]  

step:7720, train_loss:0.06935006138639123, acc:0.6241474909897422


 82%|████████▏ | 36071/43738 [4:34:01<53:58,  2.37it/s]

step:7720, train_loss:0.06934858067718477, acc:0.6241579107870588


 82%|████████▏ | 36072/43738 [4:34:01<1:08:19,  1.87it/s]

step:7720, train_loss:0.06935224243741635, acc:0.6241406076735418


 82%|████████▏ | 36073/43738 [4:34:02<1:00:38,  2.11it/s]

step:7720, train_loss:0.06935665480528905, acc:0.6241233055193635


 82%|████████▏ | 36074/43738 [4:34:02<59:40,  2.14it/s]  

step:7720, train_loss:0.06935982517033246, acc:0.6241060043244442


 82%|████████▏ | 36075/43738 [4:34:03<1:00:48,  2.10it/s]

step:7720, train_loss:0.06936197267773794, acc:0.6240887040887041


 82%|████████▏ | 36076/43738 [4:34:03<1:08:43,  1.86it/s]

step:7720, train_loss:0.0693676057961403, acc:0.6240714048120635


 82%|████████▏ | 36077/43738 [4:34:04<58:00,  2.20it/s]  

step:7720, train_loss:0.06936592025496031, acc:0.6240818249854478


 82%|████████▏ | 36078/43738 [4:34:04<57:36,  2.22it/s]

step:7720, train_loss:0.06936607753035667, acc:0.6240645268584734


 82%|████████▏ | 36079/43738 [4:34:04<57:24,  2.22it/s]

step:7720, train_loss:0.06936428534815058, acc:0.6240749466448626


 83%|████████▎ | 36384/43738 [4:36:23<1:07:50,  1.81it/s]

step:7740, train_loss:0.06939111450906091, acc:0.6238731310466139


 83%|████████▎ | 36385/43738 [4:36:23<1:02:47,  1.95it/s]

step:7740, train_loss:0.06938920836002936, acc:0.6238834684622784


 83%|████████▎ | 36386/43738 [4:36:24<55:48,  2.20it/s]  

step:7740, train_loss:0.06938750280531354, acc:0.6238938053097345


 83%|████████▎ | 36387/43738 [4:36:24<52:13,  2.35it/s]

step:7740, train_loss:0.06939152899304818, acc:0.6238766592464342


 83%|████████▎ | 36388/43738 [4:36:24<52:10,  2.35it/s]

step:7740, train_loss:0.06938974107374073, acc:0.6238869957128724


 83%|████████▎ | 36389/43738 [4:36:25<57:07,  2.14it/s]

step:7740, train_loss:0.06938860465301638, acc:0.6238973316112012


 83%|████████▎ | 36390/43738 [4:36:25<55:25,  2.21it/s]

step:7740, train_loss:0.06939204312424384, acc:0.6238801868645232


 83%|████████▎ | 36391/43738 [4:36:26<48:43,  2.51it/s]

step:7740, train_loss:0.06939030326414089, acc:0.6238905223819076


 83%|████████▎ | 36392/43738 [4:36:26<48:35,  2.52it/s]

step:7740, train_loss:0.06938928964760868, acc:0.6239008573312816


 83%|████████▎ | 36393/43738 [4:36:27<53:50,  2.27it/s]

step:7740, train_loss:0.0693874187882923, acc:0.623911191712692


 83%|████████▎ | 36394/43738 [4:36:27<1:02:38,  1.95it/s]

step:7740, train_loss:0.06938626379359253, acc:0.6239215255261856


 83%|████████▎ | 36395/43738 [4:36:28<58:14,  2.10it/s]  

step:7740, train_loss:0.06938868518199214, acc:0.6239043824701195


 83%|████████▎ | 36396/43738 [4:36:28<58:09,  2.10it/s]

step:7740, train_loss:0.0693869333838457, acc:0.6239147159028464


 83%|████████▎ | 36397/43738 [4:36:28<50:29,  2.42it/s]

step:7740, train_loss:0.06938586160183917, acc:0.6239250487677556


 83%|████████▎ | 36398/43738 [4:36:29<45:56,  2.66it/s]

step:7740, train_loss:0.06938481157052197, acc:0.6239353810648937


 83%|████████▎ | 36399/43738 [4:36:29<57:25,  2.13it/s]

step:7740, train_loss:0.06939267355405467, acc:0.6239182395120745


 84%|████████▍ | 36704/43738 [4:38:49<57:12,  2.05it/s]  

step:7760, train_loss:0.06936334751735906, acc:0.6240464254577158


 84%|████████▍ | 36705/43738 [4:38:50<1:00:14,  1.95it/s]

step:7760, train_loss:0.06936148661932375, acc:0.6240566680288789


 84%|████████▍ | 36706/43738 [4:38:50<57:58,  2.02it/s]  

step:7760, train_loss:0.06936090592543512, acc:0.624066910041955


 84%|████████▍ | 36707/43738 [4:38:51<59:48,  1.96it/s]

step:7760, train_loss:0.06935906685551557, acc:0.6240771514969897


 84%|████████▍ | 36708/43738 [4:38:51<50:13,  2.33it/s]

step:7760, train_loss:0.06935906265723284, acc:0.6240873923940286


 84%|████████▍ | 36709/43738 [4:38:51<43:28,  2.69it/s]

step:7760, train_loss:0.06935723050857992, acc:0.6240976327331172


 84%|████████▍ | 36710/43738 [4:38:51<38:34,  3.04it/s]

step:7760, train_loss:0.06935676060184194, acc:0.6240806319803868


 84%|████████▍ | 36711/43738 [4:38:52<41:04,  2.85it/s]

step:7760, train_loss:0.06935487888672366, acc:0.6240908719457383


 84%|████████▍ | 36712/43738 [4:38:52<43:46,  2.67it/s]

step:7760, train_loss:0.0693586512646692, acc:0.6240738723033341


 84%|████████▍ | 36713/43738 [4:38:52<41:53,  2.80it/s]

step:7760, train_loss:0.06935686164140158, acc:0.624084111894969


 84%|████████▍ | 36714/43738 [4:38:53<40:20,  2.90it/s]

step:7760, train_loss:0.06935498601397166, acc:0.6240943509288011


 84%|████████▍ | 36715/43738 [4:38:53<43:59,  2.66it/s]

step:7760, train_loss:0.06935493605385674, acc:0.6240773525806891


 84%|████████▍ | 36716/43738 [4:38:54<48:41,  2.40it/s]

step:7760, train_loss:0.0693544947642346, acc:0.6240875912408759


 84%|████████▍ | 36717/43738 [4:38:54<50:54,  2.30it/s]

step:7760, train_loss:0.06935309233986528, acc:0.6240978293433559


 84%|████████▍ | 36718/43738 [4:38:55<47:57,  2.44it/s]

step:7760, train_loss:0.06935132537364364, acc:0.6241080668881748


 84%|████████▍ | 36719/43738 [4:38:55<49:20,  2.37it/s]

step:7760, train_loss:0.06934966443059337, acc:0.6241183038753779


 85%|████████▍ | 37024/43738 [4:41:14<43:48,  2.55it/s]  

step:7780, train_loss:0.06933459371310575, acc:0.6242977528089888


 85%|████████▍ | 37025/43738 [4:41:14<41:36,  2.69it/s]

step:7780, train_loss:0.06933378684176145, acc:0.6243079000675219


 85%|████████▍ | 37026/43738 [4:41:15<47:05,  2.38it/s]

step:7780, train_loss:0.06933589935938539, acc:0.6242910387295414


 85%|████████▍ | 37027/43738 [4:41:15<53:23,  2.09it/s]

step:7780, train_loss:0.06933495318799081, acc:0.6243011856213033


 85%|████████▍ | 37028/43738 [4:41:15<45:16,  2.47it/s]

step:7780, train_loss:0.06933348752482103, acc:0.6243113319649994


 85%|████████▍ | 37029/43738 [4:41:16<45:44,  2.44it/s]

step:7780, train_loss:0.06933221274239855, acc:0.624321477760674


 85%|████████▍ | 37030/43738 [4:41:16<50:34,  2.21it/s]

step:7780, train_loss:0.06933346346831079, acc:0.6243046178773967


 85%|████████▍ | 37031/43738 [4:41:17<55:23,  2.02it/s]

step:7780, train_loss:0.06933657448520142, acc:0.6242877589047015


 85%|████████▍ | 37032/43738 [4:41:17<53:12,  2.10it/s]

step:7780, train_loss:0.06933475792422407, acc:0.624297904515014


 85%|████████▍ | 37033/43738 [4:41:18<53:37,  2.08it/s]

step:7780, train_loss:0.06933320730662994, acc:0.6243080495774039


 85%|████████▍ | 37034/43738 [4:41:18<46:52,  2.38it/s]

step:7780, train_loss:0.06933254216586289, acc:0.6243181940919156


 85%|████████▍ | 37035/43738 [4:41:18<42:51,  2.61it/s]

step:7780, train_loss:0.06933311203996384, acc:0.6243013365735115


 85%|████████▍ | 37036/43738 [4:41:19<49:30,  2.26it/s]

step:7780, train_loss:0.06933234956111795, acc:0.6243114807214603


 85%|████████▍ | 37037/43738 [4:41:19<43:13,  2.58it/s]

step:7780, train_loss:0.0693304815487756, acc:0.6243216243216243


 85%|████████▍ | 37038/43738 [4:41:20<39:29,  2.83it/s]

step:7780, train_loss:0.06932959802560124, acc:0.6243317673740483


 85%|████████▍ | 37039/43738 [4:41:20<45:59,  2.43it/s]

step:7780, train_loss:0.06932774153194733, acc:0.6243419098787765


 85%|████████▌ | 37344/43738 [4:43:41<46:32,  2.29it/s]  

step:7800, train_loss:0.06931402812109, acc:0.6242769922879178


 85%|████████▌ | 37345/43738 [4:43:41<41:15,  2.58it/s]

step:7800, train_loss:0.06931222897029478, acc:0.6242870531530326


 85%|████████▌ | 37346/43738 [4:43:42<54:05,  1.97it/s]

step:7800, train_loss:0.06931475074865553, acc:0.6242703368499973


 85%|████████▌ | 37347/43738 [4:43:42<45:21,  2.35it/s]

step:7800, train_loss:0.069312911083788, acc:0.6242803973545399


 85%|████████▌ | 37348/43738 [4:43:43<45:03,  2.36it/s]

step:7800, train_loss:0.06931584438474084, acc:0.6242636821248795


 85%|████████▌ | 37349/43738 [4:43:43<45:34,  2.34it/s]

step:7800, train_loss:0.06931555693541117, acc:0.6242737422688693


 85%|████████▌ | 37350/43738 [4:43:44<49:19,  2.16it/s]

step:7800, train_loss:0.06931753321267306, acc:0.6242570281124498


 85%|████████▌ | 37351/43738 [4:43:44<41:57,  2.54it/s]

step:7800, train_loss:0.06931789613144566, acc:0.6242403148510081


 85%|████████▌ | 37352/43738 [4:43:45<46:53,  2.27it/s]

step:7800, train_loss:0.06931647150062213, acc:0.6242503748125937


 85%|████████▌ | 37353/43738 [4:43:45<41:28,  2.57it/s]

step:7800, train_loss:0.06931463832163498, acc:0.6242604342355367


 85%|████████▌ | 37354/43738 [4:43:45<40:46,  2.61it/s]

step:7800, train_loss:0.0693128263738486, acc:0.62427049311988


 85%|████████▌ | 37355/43738 [4:43:46<46:56,  2.27it/s]

step:7800, train_loss:0.06931863037073298, acc:0.6242537812876455


 85%|████████▌ | 37356/43738 [4:43:46<43:02,  2.47it/s]

step:7800, train_loss:0.06931824587061769, acc:0.624263839811543


 85%|████████▌ | 37357/43738 [4:43:47<49:58,  2.13it/s]

step:7800, train_loss:0.06932410639008094, acc:0.6242471290521188


 85%|████████▌ | 37358/43738 [4:43:47<52:49,  2.01it/s]

step:7800, train_loss:0.06932691184937398, acc:0.6242304191873227


 85%|████████▌ | 37359/43738 [4:43:48<51:35,  2.06it/s]

step:7800, train_loss:0.0693250566457117, acc:0.6242404775288418


 86%|████████▌ | 37664/43738 [4:46:09<59:21,  1.71it/s]  

step:7820, train_loss:0.06928144593734227, acc:0.6245220900594732


 86%|████████▌ | 37665/43738 [4:46:10<51:03,  1.98it/s]

step:7820, train_loss:0.06927964340862312, acc:0.6245320589406611


 86%|████████▌ | 37666/43738 [4:46:10<42:42,  2.37it/s]

step:7820, train_loss:0.06927854683860848, acc:0.6245420272925184


 86%|████████▌ | 37667/43738 [4:46:10<41:18,  2.45it/s]

step:7820, train_loss:0.06927833463440364, acc:0.624525446677463


 86%|████████▌ | 37668/43738 [4:46:11<38:29,  2.63it/s]

step:7820, train_loss:0.06927673854853593, acc:0.6245354146755867


 86%|████████▌ | 37669/43738 [4:46:11<36:44,  2.75it/s]

step:7820, train_loss:0.06927622612319043, acc:0.624545382144469


 86%|████████▌ | 37670/43738 [4:46:11<40:01,  2.53it/s]

step:7820, train_loss:0.06927756261540385, acc:0.6245288027608177


 86%|████████▌ | 37671/43738 [4:46:12<39:24,  2.57it/s]

step:7820, train_loss:0.06927622303735355, acc:0.624538769876032


 86%|████████▌ | 37672/43738 [4:46:12<34:26,  2.94it/s]

step:7820, train_loss:0.06927566094427658, acc:0.6245487364620939


 86%|████████▌ | 37673/43738 [4:46:12<38:06,  2.65it/s]

step:7820, train_loss:0.0692850001664997, acc:0.6245321583096648


 86%|████████▌ | 37674/43738 [4:46:13<40:42,  2.48it/s]

step:7820, train_loss:0.06928483431814451, acc:0.6245155810373202


 86%|████████▌ | 37675/43738 [4:46:13<38:15,  2.64it/s]

step:7820, train_loss:0.06928338352071217, acc:0.6245255474452555


 86%|████████▌ | 37676/43738 [4:46:14<38:11,  2.65it/s]

step:7820, train_loss:0.06928382388919573, acc:0.6245089712283682


 86%|████████▌ | 37677/43738 [4:46:14<44:46,  2.26it/s]

step:7820, train_loss:0.0692820628008112, acc:0.6245189372826924


 86%|████████▌ | 37678/43738 [4:46:15<45:54,  2.20it/s]

step:7820, train_loss:0.0692820616819664, acc:0.6245023621211317


 86%|████████▌ | 37679/43738 [4:46:15<41:39,  2.42it/s]

step:7820, train_loss:0.06928285151570192, acc:0.62448578783938


 87%|████████▋ | 37984/43738 [4:48:31<42:51,  2.24it/s]  

step:7840, train_loss:0.06930133009555238, acc:0.6241575400168492


 87%|████████▋ | 37985/43738 [4:48:32<42:00,  2.28it/s]

step:7840, train_loss:0.06929973385988818, acc:0.6241674345136238


 87%|████████▋ | 37986/43738 [4:48:32<40:15,  2.38it/s]

step:7840, train_loss:0.06929911002845313, acc:0.6241773284894435


 87%|████████▋ | 37987/43738 [4:48:32<34:38,  2.77it/s]

step:7840, train_loss:0.06929728622481332, acc:0.6241872219443494


 87%|████████▋ | 37988/43738 [4:48:33<45:41,  2.10it/s]

step:7840, train_loss:0.0692956631851595, acc:0.6241971148783827


 87%|████████▋ | 37989/43738 [4:48:33<41:07,  2.33it/s]

step:7840, train_loss:0.06929445823855174, acc:0.6242070072915844


 87%|████████▋ | 37990/43738 [4:48:34<45:04,  2.13it/s]

step:7840, train_loss:0.06929560349129095, acc:0.6241905764674914


 87%|████████▋ | 37991/43738 [4:48:35<54:06,  1.77it/s]

step:7840, train_loss:0.06929667732870674, acc:0.6241741465083835


 87%|████████▋ | 37992/43738 [4:48:35<49:21,  1.94it/s]

step:7840, train_loss:0.06929719503765154, acc:0.6241577174141925


 87%|████████▋ | 37993/43738 [4:48:36<44:59,  2.13it/s]

step:7840, train_loss:0.06929646469757722, acc:0.6241676098228621


 87%|████████▋ | 37994/43738 [4:48:36<40:42,  2.35it/s]

step:7840, train_loss:0.06929466300190901, acc:0.6241775017107964


 87%|████████▋ | 37995/43738 [4:48:36<45:54,  2.08it/s]

step:7840, train_loss:0.06929611766776968, acc:0.6241610738255033


 87%|████████▋ | 37996/43738 [4:48:37<43:03,  2.22it/s]

step:7840, train_loss:0.06929449739523251, acc:0.6241709653647752


 87%|████████▋ | 37997/43738 [4:48:37<41:57,  2.28it/s]

step:7840, train_loss:0.0692947409180577, acc:0.6241545385161986


 87%|████████▋ | 37998/43738 [4:48:38<38:38,  2.48it/s]

step:7840, train_loss:0.06930023190719531, acc:0.6241381125322385


 87%|████████▋ | 37999/43738 [4:48:38<37:51,  2.53it/s]

step:7840, train_loss:0.06929841716315273, acc:0.6241480038948394


 88%|████████▊ | 38304/43738 [4:51:01<45:29,  1.99it/s]  

step:7860, train_loss:0.06933585774351927, acc:0.6238774018379282


 88%|████████▊ | 38305/43738 [4:51:01<40:27,  2.24it/s]

step:7860, train_loss:0.06933505861165372, acc:0.623887220989427


 88%|████████▊ | 38306/43738 [4:51:01<34:50,  2.60it/s]

step:7860, train_loss:0.06933472616201768, acc:0.6238970396282567


 88%|████████▊ | 38307/43738 [4:51:02<40:19,  2.24it/s]

step:7860, train_loss:0.06933323591258703, acc:0.6239068577544574


 88%|████████▊ | 38308/43738 [4:51:02<44:24,  2.04it/s]

step:7860, train_loss:0.06933160355911172, acc:0.6239166753680694


 88%|████████▊ | 38309/43738 [4:51:03<52:18,  1.73it/s]

step:7860, train_loss:0.0693301734252698, acc:0.6239264924691326


 88%|████████▊ | 38310/43738 [4:51:04<46:03,  1.96it/s]

step:7860, train_loss:0.06933055671877267, acc:0.6239363090576873


 88%|████████▊ | 38311/43738 [4:51:04<45:23,  1.99it/s]

step:7860, train_loss:0.0693288503254308, acc:0.6239461251337736


 88%|████████▊ | 38312/43738 [4:51:05<44:52,  2.02it/s]

step:7860, train_loss:0.06932717813744474, acc:0.6239559406974317


 88%|████████▊ | 38313/43738 [4:51:05<41:40,  2.17it/s]

step:7860, train_loss:0.06932537453175595, acc:0.6239657557487015


 88%|████████▊ | 38314/43738 [4:51:05<36:23,  2.48it/s]

step:7860, train_loss:0.06932389957110179, acc:0.6239755702876233


 88%|████████▊ | 38315/43738 [4:51:05<32:13,  2.80it/s]

step:7860, train_loss:0.06932209062374503, acc:0.6239853843142372


 88%|████████▊ | 38316/43738 [4:51:06<32:24,  2.79it/s]

step:7860, train_loss:0.06932144458037341, acc:0.6239951978285834


 88%|████████▊ | 38317/43738 [4:51:06<33:12,  2.72it/s]

step:7860, train_loss:0.06931967161776127, acc:0.6240050108307018


 88%|████████▊ | 38318/43738 [4:51:07<32:12,  2.81it/s]

step:7860, train_loss:0.0693256264261849, acc:0.6239887259251526


 88%|████████▊ | 38319/43738 [4:51:07<29:24,  3.07it/s]

step:7860, train_loss:0.06932395697318261, acc:0.6239985385839922


 88%|████████▊ | 38624/43738 [4:53:26<39:41,  2.15it/s]

step:7880, train_loss:0.06935734867087272, acc:0.6236795774647887


 88%|████████▊ | 38625/43738 [4:53:27<39:22,  2.16it/s]

step:7880, train_loss:0.06935883963384898, acc:0.6236634304207119


 88%|████████▊ | 38626/43738 [4:53:27<34:52,  2.44it/s]

step:7880, train_loss:0.06935861320612689, acc:0.6236472842127064


 88%|████████▊ | 38627/43738 [4:53:27<33:16,  2.56it/s]

step:7880, train_loss:0.06935681776347448, acc:0.6236570274678334


 88%|████████▊ | 38628/43738 [4:53:28<33:09,  2.57it/s]

step:7880, train_loss:0.06935546347222218, acc:0.6236667702184944


 88%|████████▊ | 38629/43738 [4:53:28<42:41,  1.99it/s]

step:7880, train_loss:0.06935423852094666, acc:0.6236765124647285


 88%|████████▊ | 38630/43738 [4:53:29<42:19,  2.01it/s]

step:7880, train_loss:0.0693524931378062, acc:0.6236862542065752


 88%|████████▊ | 38631/43738 [4:53:29<40:48,  2.09it/s]

step:7880, train_loss:0.06935139221079807, acc:0.6236959954440734


 88%|████████▊ | 38632/43738 [4:53:30<38:15,  2.22it/s]

step:7880, train_loss:0.06935083071821217, acc:0.6237057361772623


 88%|████████▊ | 38633/43738 [4:53:30<35:45,  2.38it/s]

step:7880, train_loss:0.0693503607715954, acc:0.6236895917997567


 88%|████████▊ | 38634/43738 [4:53:30<35:59,  2.36it/s]

step:7880, train_loss:0.06935185383989524, acc:0.6236734482580111


 88%|████████▊ | 38635/43738 [4:53:31<41:00,  2.07it/s]

step:7880, train_loss:0.06935618130367516, acc:0.6236573055519606


 88%|████████▊ | 38636/43738 [4:53:31<34:24,  2.47it/s]

step:7880, train_loss:0.06935444302223086, acc:0.6236670462780827


 88%|████████▊ | 38637/43738 [4:53:32<38:35,  2.20it/s]

step:7880, train_loss:0.06935420856424573, acc:0.623676786499987


 88%|████████▊ | 38638/43738 [4:53:32<41:06,  2.07it/s]

step:7880, train_loss:0.0693525581404522, acc:0.6236865262177131


 88%|████████▊ | 38639/43738 [4:53:33<44:25,  1.91it/s]

step:7880, train_loss:0.06935083278837846, acc:0.6236962654313


 89%|████████▉ | 38944/43738 [4:55:53<33:22,  2.39it/s]

step:7900, train_loss:0.06938040093009566, acc:0.6235363599013969


 89%|████████▉ | 38945/43738 [4:55:54<39:48,  2.01it/s]

step:7900, train_loss:0.06938078714550812, acc:0.623520349210425


 89%|████████▉ | 38946/43738 [4:55:55<44:04,  1.81it/s]

step:7900, train_loss:0.06938039955163282, acc:0.6235300159194782


 89%|████████▉ | 38947/43738 [4:55:55<40:30,  1.97it/s]

step:7900, train_loss:0.06937918867941843, acc:0.6235396821321283


 89%|████████▉ | 38948/43738 [4:55:56<43:11,  1.85it/s]

step:7900, train_loss:0.06937827544593166, acc:0.6235493478484133


 89%|████████▉ | 38949/43738 [4:55:56<36:24,  2.19it/s]

step:7900, train_loss:0.06937649744733705, acc:0.6235590130683715


 89%|████████▉ | 38950/43738 [4:55:57<35:48,  2.23it/s]

step:7900, train_loss:0.06937487272115452, acc:0.6235686777920411


 89%|████████▉ | 38951/43738 [4:55:57<40:30,  1.97it/s]

step:7900, train_loss:0.069375711316739, acc:0.6235526687376447


 89%|████████▉ | 38952/43738 [4:55:58<44:06,  1.81it/s]

step:7900, train_loss:0.06937907032539534, acc:0.6235366605052373


 89%|████████▉ | 38953/43738 [4:55:58<45:52,  1.74it/s]

step:7900, train_loss:0.06937949660652254, acc:0.6235206530947552


 89%|████████▉ | 38954/43738 [4:55:59<38:28,  2.07it/s]

step:7900, train_loss:0.0693777804369821, acc:0.6235303178107512


 89%|████████▉ | 38955/43738 [4:55:59<33:33,  2.38it/s]

step:7900, train_loss:0.06937935204796251, acc:0.6235143113849313


 89%|████████▉ | 38956/43738 [4:56:00<42:48,  1.86it/s]

step:7900, train_loss:0.06937802803660438, acc:0.6235239757675326


 89%|████████▉ | 38957/43738 [4:56:01<49:30,  1.61it/s]

step:7900, train_loss:0.06937851468777127, acc:0.6235079703262572


 89%|████████▉ | 38958/43738 [4:56:01<48:31,  1.64it/s]

step:7900, train_loss:0.0693770145513662, acc:0.6235176343754812


 89%|████████▉ | 38959/43738 [4:56:02<42:57,  1.85it/s]

step:7900, train_loss:0.06937528858881557, acc:0.6235272979285916


 90%|████████▉ | 39264/43738 [4:58:20<36:53,  2.02it/s]

step:7920, train_loss:0.06939987004543308, acc:0.6234718826405868


 90%|████████▉ | 39265/43738 [4:58:20<31:03,  2.40it/s]

step:7920, train_loss:0.06939898374780315, acc:0.6234814720488985


 90%|████████▉ | 39266/43738 [4:58:20<31:59,  2.33it/s]

step:7920, train_loss:0.06940353364278348, acc:0.6234655936433555


 90%|████████▉ | 39267/43738 [4:58:21<30:55,  2.41it/s]

step:7920, train_loss:0.06940181689414554, acc:0.6234751827234064


 90%|████████▉ | 39268/43738 [4:58:21<29:44,  2.51it/s]

step:7920, train_loss:0.0694026780983509, acc:0.6234593052867475


 90%|████████▉ | 39269/43738 [4:58:21<29:08,  2.56it/s]

step:7920, train_loss:0.06940173462823701, acc:0.6234688940385545


 90%|████████▉ | 39270/43738 [4:58:22<27:59,  2.66it/s]

step:7920, train_loss:0.0694031268349352, acc:0.6234784823020117


 90%|████████▉ | 39271/43738 [4:58:22<31:44,  2.35it/s]

step:7920, train_loss:0.06940566281100954, acc:0.6234626059942451


 90%|████████▉ | 39272/43738 [4:58:23<31:53,  2.33it/s]

step:7920, train_loss:0.06940886300111818, acc:0.6234467304950092


 90%|████████▉ | 39273/43738 [4:58:23<35:52,  2.07it/s]

step:7920, train_loss:0.06941181968777405, acc:0.6234308558042421


 90%|████████▉ | 39274/43738 [4:58:24<33:43,  2.21it/s]

step:7920, train_loss:0.06941574338032672, acc:0.6234149819218822


 90%|████████▉ | 39275/43738 [4:58:24<33:33,  2.22it/s]

step:7920, train_loss:0.06941514239868951, acc:0.6234245703373648


 90%|████████▉ | 39276/43738 [4:58:25<32:55,  2.26it/s]

step:7920, train_loss:0.06941352438763258, acc:0.623434158264589


 90%|████████▉ | 39277/43738 [4:58:25<30:17,  2.45it/s]

step:7920, train_loss:0.06941310454389035, acc:0.6234182855106042


 90%|████████▉ | 39278/43738 [4:58:25<32:07,  2.31it/s]

step:7920, train_loss:0.06941200126970316, acc:0.6234278731096288


 90%|████████▉ | 39279/43738 [4:58:26<30:59,  2.40it/s]

step:7920, train_loss:0.06941386781263971, acc:0.6234120013238627


 91%|█████████ | 39584/43738 [5:00:42<34:56,  1.98it/s]

step:7940, train_loss:0.06942214417491119, acc:0.6232821341956346


 91%|█████████ | 39585/43738 [5:00:42<31:08,  2.22it/s]

step:7940, train_loss:0.06942046486232913, acc:0.6232916508778578


 91%|█████████ | 39586/43738 [5:00:42<31:03,  2.23it/s]

step:7940, train_loss:0.06941874502360018, acc:0.6233011670792704


 91%|█████████ | 39587/43738 [5:00:43<28:30,  2.43it/s]

step:7940, train_loss:0.06941710547988497, acc:0.6233106827999091


 91%|█████████ | 39588/43738 [5:00:43<32:02,  2.16it/s]

step:7940, train_loss:0.06941776860975432, acc:0.6233201980398101


 91%|█████████ | 39589/43738 [5:00:44<29:01,  2.38it/s]

step:7940, train_loss:0.06941843775406199, acc:0.623304453257218


 91%|█████████ | 39590/43738 [5:00:44<30:09,  2.29it/s]

step:7940, train_loss:0.06941668539387906, acc:0.6233139681737813


 91%|█████████ | 39591/43738 [5:00:45<33:17,  2.08it/s]

step:7940, train_loss:0.06941751282651856, acc:0.6233234826096841


 91%|█████████ | 39592/43738 [5:00:45<34:31,  2.00it/s]

step:7940, train_loss:0.06941582109894247, acc:0.6233329965649627


 91%|█████████ | 39593/43738 [5:00:46<29:44,  2.32it/s]

step:7940, train_loss:0.0694140699319655, acc:0.6233425100396535


 91%|█████████ | 39594/43738 [5:00:46<35:24,  1.95it/s]

step:7940, train_loss:0.06941295971778612, acc:0.623352023033793


 91%|█████████ | 39595/43738 [5:00:47<39:24,  1.75it/s]

step:7940, train_loss:0.06941660398111592, acc:0.6233362798333123


 91%|█████████ | 39596/43738 [5:00:47<35:14,  1.96it/s]

step:7940, train_loss:0.06941486128533324, acc:0.6233457925042933


 91%|█████████ | 39597/43738 [5:00:48<36:18,  1.90it/s]

step:7940, train_loss:0.06941619187244194, acc:0.6233300502563326


 91%|█████████ | 39598/43738 [5:00:48<31:11,  2.21it/s]

step:7940, train_loss:0.06941460540123279, acc:0.6233395626041719


 91%|█████████ | 39599/43738 [5:00:48<27:44,  2.49it/s]

step:7940, train_loss:0.06941453281783297, acc:0.6233238213086189


 91%|█████████ | 39904/43738 [5:03:09<34:12,  1.87it/s]

step:7960, train_loss:0.06943459580215654, acc:0.6233961507618284


 91%|█████████ | 39905/43738 [5:03:10<29:14,  2.18it/s]

step:7960, train_loss:0.06943421417677284, acc:0.623380528755795


 91%|█████████ | 39906/43738 [5:03:11<35:43,  1.79it/s]

step:7960, train_loss:0.06943583880466682, acc:0.6233649075327018


 91%|█████████ | 39907/43738 [5:03:11<32:02,  1.99it/s]

step:7960, train_loss:0.06943488577921648, acc:0.6233743453529456


 91%|█████████ | 39908/43738 [5:03:11<26:51,  2.38it/s]

step:7960, train_loss:0.06943339958990233, acc:0.6233837827002104


 91%|█████████ | 39909/43738 [5:03:12<27:09,  2.35it/s]

step:7960, train_loss:0.06943170536324468, acc:0.6233932195745321


 91%|█████████ | 39910/43738 [5:03:12<25:18,  2.52it/s]

step:7960, train_loss:0.06943118587431377, acc:0.623377599599098


 91%|█████████▏| 39911/43738 [5:03:12<22:26,  2.84it/s]

step:7960, train_loss:0.06943037365657749, acc:0.6233870361554459


 91%|█████████▏| 39912/43738 [5:03:13<30:59,  2.06it/s]

step:7960, train_loss:0.0694344411619055, acc:0.6233714171176589


 91%|█████████▏| 39913/43738 [5:03:13<30:27,  2.09it/s]

step:7960, train_loss:0.06943639479643955, acc:0.623355798862526


 91%|█████████▏| 39914/43738 [5:03:14<35:43,  1.78it/s]

step:7960, train_loss:0.06944229911721576, acc:0.6233401813899885


 91%|█████████▏| 39915/43738 [5:03:15<31:36,  2.02it/s]

step:7960, train_loss:0.06944360757774311, acc:0.6233245646999874


 91%|█████████▏| 39916/43738 [5:03:15<29:47,  2.14it/s]

step:7960, train_loss:0.06944187162247219, acc:0.6233340014029461


 91%|█████████▏| 39917/43738 [5:03:15<26:02,  2.45it/s]

step:7960, train_loss:0.06944029198884348, acc:0.6233434376330886


 91%|█████████▏| 39918/43738 [5:03:15<23:07,  2.75it/s]

step:7960, train_loss:0.06943864442760621, acc:0.6233528733904504


 91%|█████████▏| 39919/43738 [5:03:16<20:42,  3.07it/s]

step:7960, train_loss:0.06944238499028084, acc:0.6233372579473434


 92%|█████████▏| 40224/43738 [5:05:27<33:08,  1.77it/s]

step:7980, train_loss:0.06940025728624018, acc:0.6235083532219571


 92%|█████████▏| 40225/43738 [5:05:28<33:46,  1.73it/s]

step:7980, train_loss:0.06940161175292067, acc:0.6234928527035426


 92%|█████████▏| 40226/43738 [5:05:28<28:22,  2.06it/s]

step:7980, train_loss:0.06940016583674205, acc:0.6235022124993785


 92%|█████████▏| 40227/43738 [5:05:29<30:48,  1.90it/s]

step:7980, train_loss:0.06940184131213065, acc:0.6234867129042683


 92%|█████████▏| 40228/43738 [5:05:29<26:21,  2.22it/s]

step:7980, train_loss:0.06940604912411658, acc:0.6234712140797455


 92%|█████████▏| 40229/43738 [5:05:29<25:51,  2.26it/s]

step:7980, train_loss:0.06940454262664258, acc:0.6234805737154789


 92%|█████████▏| 40230/43738 [5:05:30<31:27,  1.86it/s]

step:7980, train_loss:0.06940426152769603, acc:0.6234650758140691


 92%|█████████▏| 40231/43738 [5:05:30<27:55,  2.09it/s]

step:7980, train_loss:0.06941107742381393, acc:0.623449578683105


 92%|█████████▏| 40232/43738 [5:05:31<28:04,  2.08it/s]

step:7980, train_loss:0.06941266797959884, acc:0.6234340823225293


 92%|█████████▏| 40233/43738 [5:05:31<23:45,  2.46it/s]

step:7980, train_loss:0.06941817431169993, acc:0.6234185867322845


 92%|█████████▏| 40234/43738 [5:05:31<20:44,  2.81it/s]

step:7980, train_loss:0.06941697536837055, acc:0.6234279465128996


 92%|█████████▏| 40235/43738 [5:05:32<19:14,  3.03it/s]

step:7980, train_loss:0.06941728933498166, acc:0.623437305828259


 92%|█████████▏| 40236/43738 [5:05:32<20:12,  2.89it/s]

step:7980, train_loss:0.06941624883298508, acc:0.6234466646783975


 92%|█████████▏| 40237/43738 [5:05:32<19:46,  2.95it/s]

step:7980, train_loss:0.06941798101636926, acc:0.6234311703158785


 92%|█████████▏| 40238/43738 [5:05:33<23:44,  2.46it/s]

step:7980, train_loss:0.06941788719221502, acc:0.6234405288533227


 92%|█████████▏| 40239/43738 [5:05:33<24:13,  2.41it/s]

step:7980, train_loss:0.06942139724257387, acc:0.6234250354134049


 93%|█████████▎| 40544/43738 [5:07:52<21:09,  2.52it/s]

step:8000, train_loss:0.06941267176194281, acc:0.6232734806629834


 93%|█████████▎| 40545/43738 [5:07:52<23:33,  2.26it/s]

step:8000, train_loss:0.06941243159118667, acc:0.6232581082747565


 93%|█████████▎| 40546/43738 [5:07:53<22:53,  2.32it/s]

step:8000, train_loss:0.06941234376160824, acc:0.6232427366447985


 93%|█████████▎| 40547/43738 [5:07:53<21:04,  2.52it/s]

step:8000, train_loss:0.06941922277165521, acc:0.6232273657730535


 93%|█████████▎| 40548/43738 [5:07:53<21:01,  2.53it/s]

step:8000, train_loss:0.0694200264468362, acc:0.6232119956594653


 93%|█████████▎| 40549/43738 [5:07:54<19:32,  2.72it/s]

step:8000, train_loss:0.06942120563066652, acc:0.6231966263039779


 93%|█████████▎| 40550/43738 [5:07:54<23:56,  2.22it/s]

step:8000, train_loss:0.0694208884721294, acc:0.6231812577065351


 93%|█████████▎| 40551/43738 [5:07:55<22:16,  2.38it/s]

step:8000, train_loss:0.06941925862268201, acc:0.6231905501713891


 93%|█████████▎| 40552/43738 [5:07:55<22:26,  2.37it/s]

step:8000, train_loss:0.06941755214150151, acc:0.6231998421779443


 93%|█████████▎| 40553/43738 [5:07:55<19:43,  2.69it/s]

step:8000, train_loss:0.06941836355348587, acc:0.6231844746381279


 93%|█████████▎| 40554/43738 [5:07:56<25:15,  2.10it/s]

step:8000, train_loss:0.06942081067383961, acc:0.6231691078561917


 93%|█████████▎| 40555/43738 [5:07:56<24:21,  2.18it/s]

step:8000, train_loss:0.06942178154328317, acc:0.6231783997041055


 93%|█████████▎| 40556/43738 [5:07:57<23:19,  2.27it/s]

step:8000, train_loss:0.06942115234182664, acc:0.6231876910937962


 93%|█████████▎| 40557/43738 [5:07:57<19:55,  2.66it/s]

step:8000, train_loss:0.06942007128143168, acc:0.6231969820252977


 93%|█████████▎| 40558/43738 [5:07:57<20:09,  2.63it/s]

step:8000, train_loss:0.06942478323959289, acc:0.6231816164505153


 93%|█████████▎| 40559/43738 [5:07:58<20:25,  2.59it/s]

step:8000, train_loss:0.06942753973311847, acc:0.6231662516334229


 93%|█████████▎| 40864/43738 [5:10:13<22:59,  2.08it/s]

step:8020, train_loss:0.06940076414810088, acc:0.6231157008613939


 93%|█████████▎| 40865/43738 [5:10:13<24:36,  1.95it/s]

step:8020, train_loss:0.06940631277589973, acc:0.6231004527101431


 93%|█████████▎| 40866/43738 [5:10:14<22:40,  2.11it/s]

step:8020, train_loss:0.06940461454059188, acc:0.6231096755248862


 93%|█████████▎| 40867/43738 [5:10:14<21:55,  2.18it/s]

step:8020, train_loss:0.06940416884800252, acc:0.6231188978882717


 93%|█████████▎| 40868/43738 [5:10:15<21:43,  2.20it/s]

step:8020, train_loss:0.0694026607657789, acc:0.6231281198003328


 93%|█████████▎| 40869/43738 [5:10:15<19:59,  2.39it/s]

step:8020, train_loss:0.06940384346191787, acc:0.6231128728376031


 93%|█████████▎| 40870/43738 [5:10:15<17:40,  2.71it/s]

step:8020, train_loss:0.06940244677152461, acc:0.6231220944458038


 93%|█████████▎| 40871/43738 [5:10:16<16:08,  2.96it/s]

step:8020, train_loss:0.06940306818291933, acc:0.6231068483765996


 93%|█████████▎| 40872/43738 [5:10:16<21:56,  2.18it/s]

step:8020, train_loss:0.06940313086788705, acc:0.6231160696809552


 93%|█████████▎| 40873/43738 [5:10:17<21:48,  2.19it/s]

step:8020, train_loss:0.06940250730597992, acc:0.6231252905340934


 93%|█████████▎| 40874/43738 [5:10:17<24:14,  1.97it/s]

step:8020, train_loss:0.0694009626010879, acc:0.6231345109360473


 93%|█████████▎| 40875/43738 [5:10:18<20:50,  2.29it/s]

step:8020, train_loss:0.0693998760715688, acc:0.6231437308868502


 93%|█████████▎| 40876/43738 [5:10:18<17:59,  2.65it/s]

step:8020, train_loss:0.06940166410342768, acc:0.623128486153244


 93%|█████████▎| 40877/43738 [5:10:19<22:43,  2.10it/s]

step:8020, train_loss:0.06940011362740293, acc:0.6231377058003278


 93%|█████████▎| 40878/43738 [5:10:19<26:08,  1.82it/s]

step:8020, train_loss:0.0694001284675834, acc:0.6231469249963305


 93%|█████████▎| 40879/43738 [5:10:20<23:03,  2.07it/s]

step:8020, train_loss:0.06940062579660569, acc:0.6231561437412853


 94%|█████████▍| 41184/43738 [5:12:33<15:34,  2.73it/s]

step:8040, train_loss:0.0694045015122505, acc:0.6230817793317793


 94%|█████████▍| 41185/43738 [5:12:33<17:50,  2.39it/s]

step:8040, train_loss:0.06940556776124768, acc:0.6230666504795436


 94%|█████████▍| 41186/43738 [5:12:34<17:48,  2.39it/s]

step:8040, train_loss:0.06940479453664025, acc:0.6230758024571457


 94%|█████████▍| 41187/43738 [5:12:34<17:16,  2.46it/s]

step:8040, train_loss:0.06940600390300236, acc:0.6230606744846675


 94%|█████████▍| 41188/43738 [5:12:35<18:15,  2.33it/s]

step:8040, train_loss:0.06940560640867505, acc:0.6230698261629601


 94%|█████████▍| 41189/43738 [5:12:35<17:43,  2.40it/s]

step:8040, train_loss:0.06940398383502394, acc:0.6230789773968778


 94%|█████████▍| 41190/43738 [5:12:35<15:52,  2.67it/s]

step:8040, train_loss:0.06940235453467454, acc:0.623088128186453


 94%|█████████▍| 41191/43738 [5:12:36<17:15,  2.46it/s]

step:8040, train_loss:0.06940231454772565, acc:0.6230972785317181


 94%|█████████▍| 41192/43738 [5:12:36<18:11,  2.33it/s]

step:8040, train_loss:0.06940262032413734, acc:0.6231064284327054


 94%|█████████▍| 41193/43738 [5:12:37<16:14,  2.61it/s]

step:8040, train_loss:0.06940255083457865, acc:0.6230913019202292


 94%|█████████▍| 41194/43738 [5:12:37<17:30,  2.42it/s]

step:8040, train_loss:0.06940095667137608, acc:0.6231004515220663


 94%|█████████▍| 41195/43738 [5:12:38<17:59,  2.35it/s]

step:8040, train_loss:0.06940028925621097, acc:0.6231096006796941


 94%|█████████▍| 41196/43738 [5:12:38<22:28,  1.89it/s]

step:8040, train_loss:0.06940018907322101, acc:0.6231187493931449


 94%|█████████▍| 41197/43738 [5:12:39<21:19,  1.99it/s]

step:8040, train_loss:0.0694002705636049, acc:0.623103624050295


 94%|█████████▍| 41198/43738 [5:12:39<18:21,  2.31it/s]

step:8040, train_loss:0.06939948056467359, acc:0.6231127724646828


 94%|█████████▍| 41199/43738 [5:12:39<16:19,  2.59it/s]

step:8040, train_loss:0.06939780158068432, acc:0.6231219204349621


 95%|█████████▍| 41504/43738 [5:14:56<18:01,  2.07it/s]

step:8060, train_loss:0.0693342326441272, acc:0.623506168080185


 95%|█████████▍| 41505/43738 [5:14:57<18:25,  2.02it/s]

step:8060, train_loss:0.06933816704915088, acc:0.623491145645103


 95%|█████████▍| 41506/43738 [5:14:57<21:41,  1.72it/s]

step:8060, train_loss:0.06933732070095508, acc:0.6235002168361201


 95%|█████████▍| 41507/43738 [5:14:58<19:56,  1.87it/s]

step:8060, train_loss:0.06933612693624275, acc:0.6235092875900451


 95%|█████████▍| 41508/43738 [5:14:58<18:28,  2.01it/s]

step:8060, train_loss:0.06933454774748356, acc:0.6235183579069095


 95%|█████████▍| 41509/43738 [5:14:59<17:18,  2.15it/s]

step:8060, train_loss:0.06933311278359724, acc:0.6235274277867451


 95%|█████████▍| 41510/43738 [5:14:59<16:23,  2.27it/s]

step:8060, train_loss:0.069335177234323, acc:0.6235124066490002


 95%|█████████▍| 41511/43738 [5:14:59<14:30,  2.56it/s]

step:8060, train_loss:0.06933368733431032, acc:0.6235214762352148


 95%|█████████▍| 41512/43738 [5:15:00<18:27,  2.01it/s]

step:8060, train_loss:0.06933497840298067, acc:0.6235064559645404


 95%|█████████▍| 41513/43738 [5:15:00<16:33,  2.24it/s]

step:8060, train_loss:0.06933330924691802, acc:0.6235155252571484


 95%|█████████▍| 41514/43738 [5:15:01<14:14,  2.60it/s]

step:8060, train_loss:0.06933215894912675, acc:0.6235245941128295


 95%|█████████▍| 41515/43738 [5:15:01<12:35,  2.94it/s]

step:8060, train_loss:0.06933049042702863, acc:0.6235336625316151


 95%|█████████▍| 41516/43738 [5:15:01<16:11,  2.29it/s]

step:8060, train_loss:0.06932973760488352, acc:0.6235427305135369


 95%|█████████▍| 41517/43738 [5:15:02<16:01,  2.31it/s]

step:8060, train_loss:0.06932894995361771, acc:0.6235517980586266


 95%|█████████▍| 41518/43738 [5:15:03<18:56,  1.95it/s]

step:8060, train_loss:0.06933221389578476, acc:0.6235367792282865


 95%|█████████▍| 41519/43738 [5:15:03<15:58,  2.32it/s]

step:8060, train_loss:0.06933370920299038, acc:0.6235217611214143


 96%|█████████▌| 41824/43738 [5:17:19<15:36,  2.04it/s]

step:8080, train_loss:0.06928292204707243, acc:0.6238523335883703


 96%|█████████▌| 41825/43738 [5:17:19<18:05,  1.76it/s]

step:8080, train_loss:0.0692881763136547, acc:0.6238374178123132


 96%|█████████▌| 41826/43738 [5:17:20<16:32,  1.93it/s]

step:8080, train_loss:0.0692890566518557, acc:0.6238225027494859


 96%|█████████▌| 41827/43738 [5:17:20<15:49,  2.01it/s]

step:8080, train_loss:0.0692880100247892, acc:0.6238314964018457


 96%|█████████▌| 41828/43738 [5:17:20<13:41,  2.32it/s]

step:8080, train_loss:0.06928641999250033, acc:0.6238404896241752


 96%|█████████▌| 41829/43738 [5:17:21<12:13,  2.60it/s]

step:8080, train_loss:0.06928476371575637, acc:0.6238494824165053


 96%|█████████▌| 41830/43738 [5:17:22<16:24,  1.94it/s]

step:8080, train_loss:0.0692880886867742, acc:0.6238345684915133


 96%|█████████▌| 41831/43738 [5:17:22<13:54,  2.28it/s]

step:8080, train_loss:0.06928806288308248, acc:0.6238196552795774


 96%|█████████▌| 41832/43738 [5:17:22<15:35,  2.04it/s]

step:8080, train_loss:0.06928874003699059, acc:0.6238047427806463


 96%|█████████▌| 41833/43738 [5:17:23<18:36,  1.71it/s]

step:8080, train_loss:0.06928944317651552, acc:0.6237898309946692


 96%|█████████▌| 41834/43738 [5:17:24<16:03,  1.98it/s]

step:8080, train_loss:0.06928951442104282, acc:0.6237749199215948


 96%|█████████▌| 41835/43738 [5:17:24<15:26,  2.05it/s]

step:8080, train_loss:0.06928802796205703, acc:0.6237839129915143


 96%|█████████▌| 41836/43738 [5:17:24<14:28,  2.19it/s]

step:8080, train_loss:0.06928637576932932, acc:0.6237929056315136


 96%|█████████▌| 41837/43738 [5:17:25<16:27,  1.93it/s]

step:8080, train_loss:0.06928652868815752, acc:0.6238018978416234


 96%|█████████▌| 41838/43738 [5:17:25<15:00,  2.11it/s]

step:8080, train_loss:0.06928497160862071, acc:0.6238108896218748


 96%|█████████▌| 41839/43738 [5:17:26<15:22,  2.06it/s]

step:8080, train_loss:0.06928489928453474, acc:0.6238198809722986


 96%|█████████▋| 42144/43738 [5:19:43<11:32,  2.30it/s]

step:8100, train_loss:0.06922987696176394, acc:0.6240746013667426


 96%|█████████▋| 42145/43738 [5:19:43<10:38,  2.49it/s]

step:8100, train_loss:0.06922856741207928, acc:0.6240835211768893


 96%|█████████▋| 42146/43738 [5:19:43<09:36,  2.76it/s]

step:8100, train_loss:0.06922692495079726, acc:0.6240924405637546


 96%|█████████▋| 42147/43738 [5:19:44<10:11,  2.60it/s]

step:8100, train_loss:0.06922722984361249, acc:0.6241013595273684


 96%|█████████▋| 42148/43738 [5:19:44<10:27,  2.53it/s]

step:8100, train_loss:0.06922561563761978, acc:0.6241102780677612


 96%|█████████▋| 42149/43738 [5:19:45<11:22,  2.33it/s]

step:8100, train_loss:0.06922400348802021, acc:0.6241191961849629


 96%|█████████▋| 42150/43738 [5:19:45<11:32,  2.29it/s]

step:8100, train_loss:0.0692236740524289, acc:0.6241281138790036


 96%|█████████▋| 42151/43738 [5:19:46<13:28,  1.96it/s]

step:8100, train_loss:0.06922333469435478, acc:0.6241370311499134


 96%|█████████▋| 42152/43738 [5:19:46<12:33,  2.11it/s]

step:8100, train_loss:0.06922268283064534, acc:0.6241459479977225


 96%|█████████▋| 42153/43738 [5:19:47<12:05,  2.19it/s]

step:8100, train_loss:0.06922378915742732, acc:0.6241311413185301


 96%|█████████▋| 42154/43738 [5:19:47<11:05,  2.38it/s]

step:8100, train_loss:0.0692231640676598, acc:0.6241400578830004


 96%|█████████▋| 42155/43738 [5:19:48<13:58,  1.89it/s]

step:8100, train_loss:0.06922403796044051, acc:0.6241252520460207


 96%|█████████▋| 42156/43738 [5:19:48<14:16,  1.85it/s]

step:8100, train_loss:0.06922263420978123, acc:0.6241341683271657


 96%|█████████▋| 42157/43738 [5:19:49<13:59,  1.88it/s]

step:8100, train_loss:0.06922539888776648, acc:0.6241193633323054


 96%|█████████▋| 42158/43738 [5:19:49<12:33,  2.10it/s]

step:8100, train_loss:0.06922422040612303, acc:0.624128279330139


 96%|█████████▋| 42159/43738 [5:19:50<15:52,  1.66it/s]

step:8100, train_loss:0.06922259380938049, acc:0.6241371949050025


 97%|█████████▋| 42464/43738 [5:22:11<10:51,  1.96it/s]

step:8120, train_loss:0.06920151582277696, acc:0.6241993217784476


 97%|█████████▋| 42465/43738 [5:22:12<09:55,  2.14it/s]

step:8120, train_loss:0.06920203537550457, acc:0.6241846226304015


 97%|█████████▋| 42466/43738 [5:22:12<09:51,  2.15it/s]

step:8120, train_loss:0.06920254075176539, acc:0.6241699241746338


 97%|█████████▋| 42467/43738 [5:22:12<09:13,  2.30it/s]

step:8120, train_loss:0.0692009837722175, acc:0.6241787741069537


 97%|█████████▋| 42468/43738 [5:22:13<09:15,  2.28it/s]

step:8120, train_loss:0.06920068962712413, acc:0.6241640764811152


 97%|█████████▋| 42469/43738 [5:22:13<09:06,  2.32it/s]

step:8120, train_loss:0.06919959432259543, acc:0.6241729261343568


 97%|█████████▋| 42470/43738 [5:22:14<11:18,  1.87it/s]

step:8120, train_loss:0.06919938972647192, acc:0.6241582293383565


 97%|█████████▋| 42471/43738 [5:22:14<10:12,  2.07it/s]

step:8120, train_loss:0.06919995717019356, acc:0.6241435332344423


 97%|█████████▋| 42472/43738 [5:22:15<09:11,  2.30it/s]

step:8120, train_loss:0.06919838140063536, acc:0.6241523827462799


 97%|█████████▋| 42473/43738 [5:22:15<10:05,  2.09it/s]

step:8120, train_loss:0.0692045486420056, acc:0.624137687472041


 97%|█████████▋| 42474/43738 [5:22:16<11:11,  1.88it/s]

step:8120, train_loss:0.06920448363820957, acc:0.6241465367048077


 97%|█████████▋| 42475/43738 [5:22:16<09:51,  2.14it/s]

step:8120, train_loss:0.06920290745555091, acc:0.6241553855208947


 97%|█████████▋| 42476/43738 [5:22:17<09:36,  2.19it/s]

step:8120, train_loss:0.0692044311180498, acc:0.624140691213862


 97%|█████████▋| 42477/43738 [5:22:17<08:26,  2.49it/s]

step:8120, train_loss:0.06920380040264185, acc:0.624149539750924


 97%|█████████▋| 42478/43738 [5:22:17<08:41,  2.42it/s]

step:8120, train_loss:0.06920218205422883, acc:0.6241583878713687


 97%|█████████▋| 42479/43738 [5:22:18<11:07,  1.89it/s]

step:8120, train_loss:0.06920532779363797, acc:0.6241436945314155


 98%|█████████▊| 42784/43738 [5:24:38<07:44,  2.05it/s]

step:8140, train_loss:0.06928991581870307, acc:0.6237845923709798


 98%|█████████▊| 42785/43738 [5:24:38<06:56,  2.29it/s]

step:8140, train_loss:0.06929105886243792, acc:0.6237700128549726


 98%|█████████▊| 42786/43738 [5:24:38<05:59,  2.65it/s]

step:8140, train_loss:0.06928962330400447, acc:0.6237788061515449


 98%|█████████▊| 42787/43738 [5:24:39<06:12,  2.55it/s]

step:8140, train_loss:0.06928915948706735, acc:0.6237875990370907


 98%|█████████▊| 42788/43738 [5:24:39<05:22,  2.94it/s]

step:8140, train_loss:0.06928762020793172, acc:0.6237963915116388


 98%|█████████▊| 42789/43738 [5:24:39<05:45,  2.75it/s]

step:8140, train_loss:0.06929001457561476, acc:0.6237818130828017


 98%|█████████▊| 42790/43738 [5:24:40<05:53,  2.68it/s]

step:8140, train_loss:0.0692885542485273, acc:0.6237906052816079


 98%|█████████▊| 42791/43738 [5:24:40<05:39,  2.79it/s]

step:8140, train_loss:0.06929067498099312, acc:0.6237760276693697


 98%|█████████▊| 42792/43738 [5:24:41<06:58,  2.26it/s]

step:8140, train_loss:0.06929004913309918, acc:0.6237848195924471


 98%|█████████▊| 42793/43738 [5:24:41<07:26,  2.12it/s]

step:8140, train_loss:0.06929117206128894, acc:0.6237702427967191


 98%|█████████▊| 42794/43738 [5:24:42<07:00,  2.24it/s]

step:8140, train_loss:0.06928955337613653, acc:0.6237790344440809


 98%|█████████▊| 42795/43738 [5:24:42<07:22,  2.13it/s]

step:8140, train_loss:0.06929126561088529, acc:0.6237644584647739


 98%|█████████▊| 42796/43738 [5:24:43<08:41,  1.81it/s]

step:8140, train_loss:0.06929072337950268, acc:0.6237732498364333


 98%|█████████▊| 42797/43738 [5:24:43<07:20,  2.14it/s]

step:8140, train_loss:0.06929250226494812, acc:0.6237586746734585


 98%|█████████▊| 42798/43738 [5:24:44<06:46,  2.31it/s]

step:8140, train_loss:0.06929102627992052, acc:0.6237674657694284


 98%|█████████▊| 42799/43738 [5:24:44<05:56,  2.63it/s]

step:8140, train_loss:0.06928941900048917, acc:0.62377625645459


 99%|█████████▊| 43104/43738 [5:27:07<04:15,  2.48it/s]

step:8160, train_loss:0.06923813788109225, acc:0.6239792130660727


 99%|█████████▊| 43105/43738 [5:27:08<04:17,  2.45it/s]

step:8160, train_loss:0.06924150638605604, acc:0.6239647372694583


 99%|█████████▊| 43106/43738 [5:27:08<04:42,  2.23it/s]

step:8160, train_loss:0.06924003363192399, acc:0.6239734607711224


 99%|█████████▊| 43107/43738 [5:27:09<04:49,  2.18it/s]

step:8160, train_loss:0.069242600241915, acc:0.6239589857795718


 99%|█████████▊| 43108/43738 [5:27:09<04:11,  2.51it/s]

step:8160, train_loss:0.06924099401916003, acc:0.6239677090099286


 99%|█████████▊| 43109/43738 [5:27:10<03:59,  2.62it/s]

step:8160, train_loss:0.06924281360602295, acc:0.6239532348233547


 99%|█████████▊| 43110/43738 [5:27:10<04:16,  2.45it/s]

step:8160, train_loss:0.06924256002658861, acc:0.623961957782417


 99%|█████████▊| 43111/43738 [5:27:10<04:30,  2.32it/s]

step:8160, train_loss:0.06924215749474633, acc:0.623970680336805


 99%|█████████▊| 43112/43738 [5:27:11<05:30,  1.89it/s]

step:8160, train_loss:0.0692437571528125, acc:0.6239562070885136


 99%|█████████▊| 43113/43738 [5:27:12<05:04,  2.05it/s]

step:8160, train_loss:0.06924231610933085, acc:0.6239649293716513


 99%|█████████▊| 43114/43738 [5:27:12<04:49,  2.16it/s]

step:8160, train_loss:0.06924160937139548, acc:0.623973651250174


 99%|█████████▊| 43115/43738 [5:27:12<04:30,  2.31it/s]

step:8160, train_loss:0.06924000368954028, acc:0.62398237272411


 99%|█████████▊| 43116/43738 [5:27:13<04:17,  2.42it/s]

step:8160, train_loss:0.06924034587053851, acc:0.6239679005473606


 99%|█████████▊| 43117/43738 [5:27:13<04:27,  2.32it/s]

step:8160, train_loss:0.06924330240719236, acc:0.6239534290419092


 99%|█████████▊| 43118/43738 [5:27:14<04:11,  2.46it/s]

step:8160, train_loss:0.06924386475466159, acc:0.6239389582077091


 99%|█████████▊| 43119/43738 [5:27:14<03:44,  2.75it/s]

step:8160, train_loss:0.06924400660957516, acc:0.6239244880447135


 99%|█████████▉| 43424/43738 [5:29:34<03:20,  1.56it/s]

step:8180, train_loss:0.06928649849286153, acc:0.6238715917464996


 99%|█████████▉| 43425/43738 [5:29:35<03:11,  1.64it/s]

step:8180, train_loss:0.06928603617315597, acc:0.6238802533103052


 99%|█████████▉| 43426/43738 [5:29:35<02:51,  1.82it/s]

step:8180, train_loss:0.06928446622118874, acc:0.6238889144751992


 99%|█████████▉| 43427/43738 [5:29:35<02:23,  2.17it/s]

step:8180, train_loss:0.06928521894520533, acc:0.6238745480922007


 99%|█████████▉| 43428/43738 [5:29:36<02:10,  2.38it/s]

step:8180, train_loss:0.06928371505325427, acc:0.6238832089895919


 99%|█████████▉| 43429/43738 [5:29:36<02:00,  2.57it/s]

step:8180, train_loss:0.0692821202150683, acc:0.62389186948813


 99%|█████████▉| 43430/43738 [5:29:36<01:48,  2.83it/s]

step:8180, train_loss:0.06928055663623, acc:0.6239005295878425


 99%|█████████▉| 43431/43738 [5:29:37<01:56,  2.64it/s]

step:8180, train_loss:0.06927938235313912, acc:0.6239091892887568


 99%|█████████▉| 43432/43738 [5:29:37<01:43,  2.97it/s]

step:8180, train_loss:0.06927948908258666, acc:0.6238948240928348


 99%|█████████▉| 43433/43738 [5:29:37<01:49,  2.79it/s]

step:8180, train_loss:0.06927871405141371, acc:0.6239034835263509


 99%|█████████▉| 43434/43738 [5:29:38<01:49,  2.77it/s]

step:8180, train_loss:0.06927773625000626, acc:0.6239121425611273


 99%|█████████▉| 43435/43738 [5:29:38<01:40,  3.03it/s]

step:8180, train_loss:0.06927614541526068, acc:0.6239208011971912


 99%|█████████▉| 43436/43738 [5:29:38<01:42,  2.95it/s]

step:8180, train_loss:0.06927529642800964, acc:0.6239294594345705


 99%|█████████▉| 43437/43738 [5:29:38<01:40,  3.00it/s]

step:8180, train_loss:0.06927619146081884, acc:0.6239150954255589


 99%|█████████▉| 43438/43738 [5:29:39<01:59,  2.50it/s]

step:8180, train_loss:0.06927506433692056, acc:0.6239237533956443


 99%|█████████▉| 43439/43738 [5:29:39<01:45,  2.82it/s]

step:8180, train_loss:0.0692735166960307, acc:0.6239324109671033


100%|██████████| 43738/43738 [5:31:47<00:00,  3.26it/s]
  0%|          | 1/5129 [00:00<13:30,  6.33it/s]

eval on dev set


100%|██████████| 5129/5129 [13:28<00:00,  5.81it/s]
  0%|          | 0/43738 [00:00<?, ?it/s]

1.2851559255678162, 0.5683369077792942


  0%|          | 16/43738 [00:06<5:23:18,  2.25it/s]

step:8200, train_loss:0.07709436397999525, acc:0.5


  0%|          | 17/43738 [00:07<4:57:05,  2.45it/s]

step:8200, train_loss:0.07406612704781924, acc:0.5294117647058824


  0%|          | 18/43738 [00:07<5:12:54,  2.33it/s]

step:8200, train_loss:0.07445391184753841, acc:0.5


  0%|          | 19/43738 [00:08<5:10:40,  2.35it/s]

step:8200, train_loss:0.0724069899634311, acc:0.5263157894736842


  0%|          | 20/43738 [00:08<5:03:03,  2.40it/s]

step:8200, train_loss:0.06879353821277619, acc:0.55


  0%|          | 21/43738 [00:08<4:45:55,  2.55it/s]

step:8200, train_loss:0.06707629561424255, acc:0.5714285714285714


  0%|          | 22/43738 [00:09<4:18:24,  2.82it/s]

step:8200, train_loss:0.06891892477869987, acc:0.5454545454545454


  0%|          | 23/43738 [00:09<4:41:02,  2.59it/s]

step:8200, train_loss:0.06602279809506043, acc:0.5652173913043478


  0%|          | 24/43738 [00:09<4:13:52,  2.87it/s]

step:8200, train_loss:0.06417309027165174, acc:0.5833333333333334


  0%|          | 25/43738 [00:10<4:45:49,  2.55it/s]

step:8200, train_loss:0.06244762808084488, acc:0.6


  0%|          | 26/43738 [00:10<4:54:10,  2.48it/s]

step:8200, train_loss:0.060046034650160715, acc:0.6153846153846154


  0%|          | 27/43738 [00:11<5:16:31,  2.30it/s]

step:8200, train_loss:0.0584145356659536, acc:0.6296296296296297


  0%|          | 28/43738 [00:11<4:29:12,  2.71it/s]

step:8200, train_loss:0.0584442245640925, acc:0.6071428571428571


  0%|          | 29/43738 [00:12<5:08:50,  2.36it/s]

step:8200, train_loss:0.06165093928575516, acc:0.5862068965517241


  0%|          | 30/43738 [00:12<5:27:31,  2.22it/s]

step:8200, train_loss:0.059800100574890776, acc:0.6


  0%|          | 31/43738 [00:12<4:43:43,  2.57it/s]

step:8200, train_loss:0.05811267874894604, acc:0.6129032258064516


  1%|          | 336/43738 [02:26<5:34:32,  2.16it/s]

step:8220, train_loss:0.06219844963163182, acc:0.6815476190476191


  1%|          | 337/43738 [02:26<6:01:07,  2.00it/s]

step:8220, train_loss:0.062286063051818476, acc:0.6795252225519288


  1%|          | 338/43738 [02:27<6:30:03,  1.85it/s]

step:8220, train_loss:0.06258322381877379, acc:0.6775147928994083


  1%|          | 339/43738 [02:28<7:09:47,  1.68it/s]

step:8220, train_loss:0.06260040819584488, acc:0.6784660766961652


  1%|          | 340/43738 [02:28<6:34:32,  1.83it/s]

step:8220, train_loss:0.06255435279214426, acc:0.6794117647058824


  1%|          | 341/43738 [02:29<7:00:35,  1.72it/s]

step:8220, train_loss:0.062502261148793, acc:0.6803519061583577


  1%|          | 342/43738 [02:29<6:20:18,  1.90it/s]

step:8220, train_loss:0.06264304040321177, acc:0.6783625730994152


  1%|          | 343/43738 [02:30<6:26:26,  1.87it/s]

step:8220, train_loss:0.06246290928969274, acc:0.6793002915451894


  1%|          | 344/43738 [02:30<6:36:48,  1.82it/s]

step:8220, train_loss:0.06237924995231117, acc:0.6802325581395349


  1%|          | 345/43738 [02:31<5:38:48,  2.13it/s]

step:8220, train_loss:0.06241527264127913, acc:0.6811594202898551


  1%|          | 346/43738 [02:31<4:55:20,  2.45it/s]

step:8220, train_loss:0.06231505083151678, acc:0.6820809248554913


  1%|          | 347/43738 [02:31<5:11:07,  2.32it/s]

step:8220, train_loss:0.06220436654045577, acc:0.6829971181556196


  1%|          | 348/43738 [02:32<5:23:42,  2.23it/s]

step:8220, train_loss:0.06212940606176211, acc:0.6839080459770115


  1%|          | 349/43738 [02:32<4:38:27,  2.60it/s]

step:8220, train_loss:0.06197313707786209, acc:0.6848137535816619


  1%|          | 350/43738 [02:33<4:37:15,  2.61it/s]

step:8220, train_loss:0.061799681054960406, acc:0.6857142857142857


  1%|          | 351/43738 [02:33<4:41:04,  2.57it/s]

step:8220, train_loss:0.06222065510175209, acc:0.6837606837606838


  1%|▏         | 656/43738 [04:49<5:41:47,  2.10it/s]

step:8240, train_loss:0.06338440421322736, acc:0.6692073170731707


  2%|▏         | 657/43738 [04:49<5:09:46,  2.32it/s]

step:8240, train_loss:0.06347531831009623, acc:0.6681887366818874


  2%|▏         | 658/43738 [04:49<4:27:42,  2.68it/s]

step:8240, train_loss:0.06349624199743205, acc:0.6671732522796353


  2%|▏         | 659/43738 [04:50<5:04:58,  2.35it/s]

step:8240, train_loss:0.0634037052512265, acc:0.6676783004552352


  2%|▏         | 660/43738 [04:50<4:42:08,  2.54it/s]

step:8240, train_loss:0.06330878683824487, acc:0.6681818181818182


  2%|▏         | 661/43738 [04:51<5:19:40,  2.25it/s]

step:8240, train_loss:0.06337180412473065, acc:0.6671709531013615


  2%|▏         | 662/43738 [04:51<5:51:19,  2.04it/s]

step:8240, train_loss:0.06335443701191307, acc:0.6661631419939577


  2%|▏         | 663/43738 [04:52<4:59:44,  2.40it/s]

step:8240, train_loss:0.06340940825417807, acc:0.665158371040724


  2%|▏         | 664/43738 [04:52<4:24:10,  2.72it/s]

step:8240, train_loss:0.06331416996545158, acc:0.6656626506024096


  2%|▏         | 665/43738 [04:52<4:59:29,  2.40it/s]

step:8240, train_loss:0.06330683713715832, acc:0.6646616541353384


  2%|▏         | 666/43738 [04:53<4:51:48,  2.46it/s]

step:8240, train_loss:0.06341843591763864, acc:0.6636636636636637


  2%|▏         | 667/43738 [04:53<5:15:47,  2.27it/s]

step:8240, train_loss:0.06353025433867109, acc:0.6626686656671664


  2%|▏         | 668/43738 [04:54<4:52:04,  2.46it/s]

step:8240, train_loss:0.06343991506299629, acc:0.6631736526946108


  2%|▏         | 669/43738 [04:54<6:14:23,  1.92it/s]

step:8240, train_loss:0.0634499568278485, acc:0.6621823617339312


  2%|▏         | 670/43738 [04:55<6:04:50,  1.97it/s]

step:8240, train_loss:0.06337353618878093, acc:0.6626865671641791


  2%|▏         | 671/43738 [04:55<5:45:17,  2.08it/s]

step:8240, train_loss:0.06348731008499872, acc:0.6616989567809239


  2%|▏         | 976/43738 [07:15<6:21:32,  1.87it/s]

step:8260, train_loss:0.06492130855956214, acc:0.6485655737704918


  2%|▏         | 977/43738 [07:16<6:54:45,  1.72it/s]

step:8260, train_loss:0.06510891574587456, acc:0.6479017400204709


  2%|▏         | 978/43738 [07:16<6:08:10,  1.94it/s]

step:8260, train_loss:0.06518005234487392, acc:0.647239263803681


  2%|▏         | 979/43738 [07:17<5:43:54,  2.07it/s]

step:8260, train_loss:0.06534053035821495, acc:0.6465781409601634


  2%|▏         | 980/43738 [07:17<5:33:29,  2.14it/s]

step:8260, train_loss:0.0654013080776631, acc:0.6459183673469387


  2%|▏         | 981/43738 [07:18<5:54:57,  2.01it/s]

step:8260, train_loss:0.06536060528261027, acc:0.6462793068297655


  2%|▏         | 982/43738 [07:18<5:33:16,  2.14it/s]

step:8260, train_loss:0.06533163759260877, acc:0.6466395112016293


  2%|▏         | 983/43738 [07:19<5:25:55,  2.19it/s]

step:8260, train_loss:0.06535004771983446, acc:0.6459816887080366


  2%|▏         | 984/43738 [07:19<5:44:57,  2.07it/s]

step:8260, train_loss:0.06546666130789829, acc:0.6453252032520326


  2%|▏         | 985/43738 [07:20<5:37:17,  2.11it/s]

step:8260, train_loss:0.06575958607647435, acc:0.6446700507614214


  2%|▏         | 986/43738 [07:20<6:01:50,  1.97it/s]

step:8260, train_loss:0.06575847007442855, acc:0.6440162271805274


  2%|▏         | 987/43738 [07:20<5:06:26,  2.33it/s]

step:8260, train_loss:0.06571242385241935, acc:0.6443768996960486


  2%|▏         | 988/43738 [07:21<6:25:03,  1.85it/s]

step:8260, train_loss:0.06574493479092043, acc:0.6437246963562753


  2%|▏         | 989/43738 [07:22<6:45:09,  1.76it/s]

step:8260, train_loss:0.0656964114739553, acc:0.6440849342770475


  2%|▏         | 990/43738 [07:22<6:03:23,  1.96it/s]

step:8260, train_loss:0.06564619993353545, acc:0.6444444444444445


  2%|▏         | 991/43738 [07:23<6:33:00,  1.81it/s]

step:8260, train_loss:0.06558331068935047, acc:0.644803229061554


  3%|▎         | 1296/43738 [09:45<4:59:48,  2.36it/s]

step:8280, train_loss:0.06529280839018656, acc:0.6419753086419753


  3%|▎         | 1297/43738 [09:46<4:49:35,  2.44it/s]

step:8280, train_loss:0.06528523949415796, acc:0.6422513492675405


  3%|▎         | 1298/43738 [09:46<4:55:38,  2.39it/s]

step:8280, train_loss:0.06527362659610147, acc:0.6425269645608629


  3%|▎         | 1299/43738 [09:47<5:05:18,  2.32it/s]

step:8280, train_loss:0.06522342933394198, acc:0.642802155504234


  3%|▎         | 1300/43738 [09:47<4:26:26,  2.65it/s]

step:8280, train_loss:0.06518482645167611, acc:0.6430769230769231


  3%|▎         | 1301/43738 [09:47<4:46:01,  2.47it/s]

step:8280, train_loss:0.06526755704973995, acc:0.6425826287471176


  3%|▎         | 1302/43738 [09:48<4:44:15,  2.49it/s]

step:8280, train_loss:0.0652296814039999, acc:0.6428571428571429


  3%|▎         | 1303/43738 [09:48<5:05:32,  2.31it/s]

step:8280, train_loss:0.06531143161226621, acc:0.6423637759017652


  3%|▎         | 1304/43738 [09:49<5:57:48,  1.98it/s]

step:8280, train_loss:0.06527882517494712, acc:0.6426380368098159


  3%|▎         | 1305/43738 [09:49<5:54:51,  1.99it/s]

step:8280, train_loss:0.06531045186989923, acc:0.6421455938697318


  3%|▎         | 1306/43738 [09:50<6:10:37,  1.91it/s]

step:8280, train_loss:0.0652813267490112, acc:0.6424196018376723


  3%|▎         | 1307/43738 [09:50<5:08:02,  2.30it/s]

step:8280, train_loss:0.06523196895353142, acc:0.6426931905126243


  3%|▎         | 1308/43738 [09:51<5:11:19,  2.27it/s]

step:8280, train_loss:0.06522587194824478, acc:0.6429663608562691


  3%|▎         | 1309/43738 [09:51<4:48:23,  2.45it/s]

step:8280, train_loss:0.06517609457750047, acc:0.6432391138273491


  3%|▎         | 1310/43738 [09:52<5:20:03,  2.21it/s]

step:8280, train_loss:0.0651300946511826, acc:0.6435114503816793


  3%|▎         | 1311/43738 [09:52<5:54:35,  1.99it/s]

step:8280, train_loss:0.06508839978519643, acc:0.6437833714721587


  4%|▎         | 1616/43738 [12:09<5:56:06,  1.97it/s]

step:8300, train_loss:0.06513344310670699, acc:0.650990099009901


  4%|▎         | 1617/43738 [12:09<5:18:56,  2.20it/s]

step:8300, train_loss:0.06513419976799496, acc:0.6505875077303649


  4%|▎         | 1618/43738 [12:10<5:38:09,  2.08it/s]

step:8300, train_loss:0.06509609110592962, acc:0.6508034610630408


  4%|▎         | 1619/43738 [12:10<5:18:51,  2.20it/s]

step:8300, train_loss:0.06506274752833704, acc:0.6510191476219889


  4%|▎         | 1620/43738 [12:10<4:39:23,  2.51it/s]

step:8300, train_loss:0.06502259573252457, acc:0.6512345679012346


  4%|▎         | 1621/43738 [12:11<4:20:26,  2.70it/s]

step:8300, train_loss:0.06524928294700777, acc:0.6508328192473781


  4%|▎         | 1622/43738 [12:11<4:11:39,  2.79it/s]

step:8300, train_loss:0.06522232133297436, acc:0.6510480887792849


  4%|▎         | 1623/43738 [12:11<4:19:51,  2.70it/s]

step:8300, train_loss:0.06518234980914647, acc:0.6512630930375847


  4%|▎         | 1624/43738 [12:12<4:26:31,  2.63it/s]

step:8300, train_loss:0.06514550685831529, acc:0.6514778325123153


  4%|▎         | 1625/43738 [12:12<4:55:24,  2.38it/s]

step:8300, train_loss:0.06510595052961547, acc:0.6516923076923077


  4%|▎         | 1626/43738 [12:13<5:22:35,  2.18it/s]

step:8300, train_loss:0.06518635249981339, acc:0.6512915129151291


  4%|▎         | 1627/43738 [12:13<4:46:27,  2.45it/s]

step:8300, train_loss:0.06516211150633658, acc:0.6515058389674248


  4%|▎         | 1628/43738 [12:13<4:53:38,  2.39it/s]

step:8300, train_loss:0.06513836449890281, acc:0.6517199017199017


  4%|▎         | 1629/43738 [12:14<4:24:34,  2.65it/s]

step:8300, train_loss:0.06510017118862664, acc:0.6519337016574586


  4%|▎         | 1630/43738 [12:14<4:04:33,  2.87it/s]

step:8300, train_loss:0.06506023576091746, acc:0.6521472392638037


  4%|▎         | 1631/43738 [12:15<4:34:21,  2.56it/s]

step:8300, train_loss:0.06509896421220877, acc:0.6517473942366646


  4%|▍         | 1936/43738 [14:38<6:12:08,  1.87it/s]

step:8320, train_loss:0.06427402852364326, acc:0.6518595041322314


  4%|▍         | 1937/43738 [14:38<5:10:51,  2.24it/s]

step:8320, train_loss:0.06427415590634725, acc:0.6515229736706247


  4%|▍         | 1938/43738 [14:38<4:49:58,  2.40it/s]

step:8320, train_loss:0.0642413702562511, acc:0.651702786377709


  4%|▍         | 1939/43738 [14:39<5:21:15,  2.17it/s]

step:8320, train_loss:0.06430036282080863, acc:0.6513666838576586


  4%|▍         | 1940/43738 [14:40<6:13:57,  1.86it/s]

step:8320, train_loss:0.06427224002209656, acc:0.6515463917525773


  4%|▍         | 1941/43738 [14:40<5:41:18,  2.04it/s]

step:8320, train_loss:0.06424291641763298, acc:0.6517259144770736


  4%|▍         | 1942/43738 [14:40<4:55:08,  2.36it/s]

step:8320, train_loss:0.06423148577570068, acc:0.6519052523171988


  4%|▍         | 1943/43738 [14:41<4:52:19,  2.38it/s]

step:8320, train_loss:0.06422522399760015, acc:0.6520844055584148


  4%|▍         | 1944/43738 [14:41<5:30:04,  2.11it/s]

step:8320, train_loss:0.06419245606954707, acc:0.6522633744855967


  4%|▍         | 1945/43738 [14:41<4:51:10,  2.39it/s]

step:8320, train_loss:0.06426883252818798, acc:0.6519280205655527


  4%|▍         | 1946/43738 [14:42<4:26:29,  2.61it/s]

step:8320, train_loss:0.0642756761633432, acc:0.6515930113052415


  4%|▍         | 1947/43738 [14:42<4:07:36,  2.81it/s]

step:8320, train_loss:0.06425260067352968, acc:0.6517719568567026


  4%|▍         | 1948/43738 [14:42<3:58:40,  2.92it/s]

step:8320, train_loss:0.06421970220622597, acc:0.6519507186858317


  4%|▍         | 1949/43738 [14:43<3:45:16,  3.09it/s]

step:8320, train_loss:0.06418699076078573, acc:0.6521292970754233


  4%|▍         | 1950/43738 [14:43<3:47:47,  3.06it/s]

step:8320, train_loss:0.06421594063333498, acc:0.6517948717948718


  4%|▍         | 1951/43738 [14:44<4:33:58,  2.54it/s]

step:8320, train_loss:0.0642769189587513, acc:0.6514607893388006


  5%|▌         | 2256/43738 [16:59<5:18:15,  2.17it/s]

step:8340, train_loss:0.06413145617328846, acc:0.6493794326241135


  5%|▌         | 2257/43738 [16:59<4:30:51,  2.55it/s]

step:8340, train_loss:0.06410336500088618, acc:0.6495347806823216


  5%|▌         | 2258/43738 [16:59<4:01:53,  2.86it/s]

step:8340, train_loss:0.06409141364760212, acc:0.6496899911426041


  5%|▌         | 2259/43738 [17:00<5:02:24,  2.29it/s]

step:8340, train_loss:0.06406489365087717, acc:0.6498450641876937


  5%|▌         | 2260/43738 [17:01<6:05:29,  1.89it/s]

step:8340, train_loss:0.06407648751757364, acc:0.65


  5%|▌         | 2261/43738 [17:01<6:50:50,  1.68it/s]

step:8340, train_loss:0.06411018242208104, acc:0.6497125165855816


  5%|▌         | 2262/43738 [17:02<6:08:51,  1.87it/s]

step:8340, train_loss:0.06408340200173704, acc:0.649867374005305


  5%|▌         | 2263/43738 [17:02<5:57:29,  1.93it/s]

step:8340, train_loss:0.06409106189782196, acc:0.650022094564737


  5%|▌         | 2264/43738 [17:03<6:18:31,  1.83it/s]

step:8340, train_loss:0.0641475637749211, acc:0.6497349823321554


  5%|▌         | 2265/43738 [17:04<7:13:04,  1.60it/s]

step:8340, train_loss:0.06414917594152883, acc:0.649448123620309


  5%|▌         | 2266/43738 [17:04<5:51:12,  1.97it/s]

step:8340, train_loss:0.06412100480096325, acc:0.649602824360106


  5%|▌         | 2267/43738 [17:04<5:12:31,  2.21it/s]

step:8340, train_loss:0.0642116505049381, acc:0.6493162770180856


  5%|▌         | 2268/43738 [17:05<5:36:38,  2.05it/s]

step:8340, train_loss:0.06418724137190417, acc:0.6494708994708994


  5%|▌         | 2269/43738 [17:05<5:39:50,  2.03it/s]

step:8340, train_loss:0.06416462167287668, acc:0.6496253856324372


  5%|▌         | 2270/43738 [17:06<5:48:34,  1.98it/s]

step:8340, train_loss:0.06415034687661439, acc:0.6497797356828194


  5%|▌         | 2271/43738 [17:06<6:11:25,  1.86it/s]

step:8340, train_loss:0.06420568216545733, acc:0.6494936151475121


  6%|▌         | 2576/43738 [19:27<4:44:19,  2.41it/s]

step:8360, train_loss:0.06378422330907942, acc:0.6541149068322981


  6%|▌         | 2577/43738 [19:28<4:37:57,  2.47it/s]

step:8360, train_loss:0.06375961515581854, acc:0.6542491268917345


  6%|▌         | 2578/43738 [19:28<4:45:48,  2.40it/s]

step:8360, train_loss:0.06373619177117074, acc:0.6543832428238945


  6%|▌         | 2579/43738 [19:28<4:26:04,  2.58it/s]

step:8360, train_loss:0.06371297186527913, acc:0.6545172547499031


  6%|▌         | 2580/43738 [19:29<4:10:56,  2.73it/s]

step:8360, train_loss:0.06373435465522857, acc:0.6542635658914728


  6%|▌         | 2581/43738 [19:29<4:12:54,  2.71it/s]

step:8360, train_loss:0.06371763578250464, acc:0.6543975203409531


  6%|▌         | 2582/43738 [19:29<3:43:21,  3.07it/s]

step:8360, train_loss:0.06369648205971155, acc:0.6545313710302091


  6%|▌         | 2583/43738 [19:29<3:30:34,  3.26it/s]

step:8360, train_loss:0.06367344527380134, acc:0.6546651180797523


  6%|▌         | 2584/43738 [19:30<4:03:06,  2.82it/s]

step:8360, train_loss:0.06366498112833417, acc:0.6547987616099071


  6%|▌         | 2585/43738 [19:30<3:42:00,  3.09it/s]

step:8360, train_loss:0.06364963865319692, acc:0.6549323017408124


  6%|▌         | 2586/43738 [19:31<4:04:21,  2.81it/s]

step:8360, train_loss:0.06368220648391129, acc:0.6546790409899459


  6%|▌         | 2587/43738 [19:31<4:08:23,  2.76it/s]

step:8360, train_loss:0.06365840326111125, acc:0.6548125241592578


  6%|▌         | 2588/43738 [19:31<4:28:21,  2.56it/s]

step:8360, train_loss:0.06363657021488317, acc:0.6549459041731066


  6%|▌         | 2589/43738 [19:32<4:56:20,  2.31it/s]

step:8360, train_loss:0.06371456669161403, acc:0.6546929316338355


  6%|▌         | 2590/43738 [19:33<5:23:03,  2.12it/s]

step:8360, train_loss:0.06372589398367406, acc:0.6544401544401545


  6%|▌         | 2591/43738 [19:33<4:33:10,  2.51it/s]

step:8360, train_loss:0.06371092505064972, acc:0.6545735237360093


  7%|▋         | 2896/43738 [21:55<6:47:23,  1.67it/s]

step:8380, train_loss:0.06334380219233121, acc:0.6598756906077348


  7%|▋         | 2897/43738 [21:55<6:12:11,  1.83it/s]

step:8380, train_loss:0.06335352182008237, acc:0.6596479116327235


  7%|▋         | 2898/43738 [21:56<5:46:23,  1.97it/s]

step:8380, train_loss:0.06333166078371195, acc:0.6597653554175293


  7%|▋         | 2899/43738 [21:56<5:15:38,  2.16it/s]

step:8380, train_loss:0.06331160723860352, acc:0.6598827181786823


  7%|▋         | 2900/43738 [21:56<4:35:52,  2.47it/s]

step:8380, train_loss:0.06328978951825712, acc:0.66


  7%|▋         | 2901/43738 [21:57<4:34:56,  2.48it/s]

step:8380, train_loss:0.06327584482629833, acc:0.6601172009651844


  7%|▋         | 2902/43738 [21:57<4:31:45,  2.50it/s]

step:8380, train_loss:0.06328318187121548, acc:0.6598897312198484


  7%|▋         | 2903/43738 [21:57<4:02:39,  2.80it/s]

step:8380, train_loss:0.06326157166907702, acc:0.6600068894247331


  7%|▋         | 2904/43738 [21:58<3:47:53,  2.99it/s]

step:8380, train_loss:0.06324985977093488, acc:0.6601239669421488


  7%|▋         | 2905/43738 [21:58<3:32:29,  3.20it/s]

step:8380, train_loss:0.0632284343877219, acc:0.6602409638554216


  7%|▋         | 2906/43738 [21:58<4:01:59,  2.81it/s]

step:8380, train_loss:0.06323011545814924, acc:0.6603578802477632


  7%|▋         | 2907/43738 [21:59<4:33:38,  2.49it/s]

step:8380, train_loss:0.06320865206433797, acc:0.6604747162022704


  7%|▋         | 2908/43738 [21:59<4:28:28,  2.53it/s]

step:8380, train_loss:0.06319089940692021, acc:0.6605914718019257


  7%|▋         | 2909/43738 [22:00<4:59:15,  2.27it/s]

step:8380, train_loss:0.06321650029095012, acc:0.6603643863870746


  7%|▋         | 2910/43738 [22:00<4:17:41,  2.64it/s]

step:8380, train_loss:0.0631954322636081, acc:0.6604810996563574


  7%|▋         | 2911/43738 [22:01<5:07:43,  2.21it/s]

step:8380, train_loss:0.06319823296695781, acc:0.6602542081758845


  7%|▋         | 3216/43738 [24:16<6:18:49,  1.78it/s]

step:8400, train_loss:0.06325321503237179, acc:0.6598258706467661


  7%|▋         | 3217/43738 [24:17<6:14:56,  1.80it/s]

step:8400, train_loss:0.06324371588995037, acc:0.6599316133043208


  7%|▋         | 3218/43738 [24:17<5:50:36,  1.93it/s]

step:8400, train_loss:0.06325586430967994, acc:0.6597265382224985


  7%|▋         | 3219/43738 [24:18<6:02:21,  1.86it/s]

step:8400, train_loss:0.06337026781876652, acc:0.6595215905560733


  7%|▋         | 3220/43738 [24:18<5:37:08,  2.00it/s]

step:8400, train_loss:0.06336670543386395, acc:0.6596273291925466


  7%|▋         | 3221/43738 [24:19<5:37:56,  2.00it/s]

step:8400, train_loss:0.06336923456887501, acc:0.6597330021732382


  7%|▋         | 3222/43738 [24:19<5:25:18,  2.08it/s]

step:8400, train_loss:0.0633505264415169, acc:0.65983860955928


  7%|▋         | 3223/43738 [24:20<5:04:31,  2.22it/s]

step:8400, train_loss:0.0633317852475544, acc:0.6599441514117282


  7%|▋         | 3224/43738 [24:20<4:37:29,  2.43it/s]

step:8400, train_loss:0.06335238770134537, acc:0.6597394540942928


  7%|▋         | 3225/43738 [24:20<4:00:26,  2.81it/s]

step:8400, train_loss:0.0633602960235728, acc:0.6595348837209303


  7%|▋         | 3226/43738 [24:21<4:12:55,  2.67it/s]

step:8400, train_loss:0.06334070741025091, acc:0.6596404215747055


  7%|▋         | 3227/43738 [24:21<4:09:58,  2.70it/s]

step:8400, train_loss:0.06332108860274893, acc:0.6597458940192129


  7%|▋         | 3228/43738 [24:21<4:14:46,  2.65it/s]

step:8400, train_loss:0.0633164161155074, acc:0.6598513011152416


  7%|▋         | 3229/43738 [24:22<3:46:06,  2.99it/s]

step:8400, train_loss:0.06329718656959006, acc:0.6599566429235058


  7%|▋         | 3230/43738 [24:22<3:26:10,  3.27it/s]

step:8400, train_loss:0.06327891814140041, acc:0.660061919504644


  7%|▋         | 3231/43738 [24:22<4:00:56,  2.80it/s]

step:8400, train_loss:0.06334425017125751, acc:0.6598576292169607


  8%|▊         | 3536/43738 [26:48<5:57:26,  1.87it/s]

step:8420, train_loss:0.06379031308432996, acc:0.6589366515837104


  8%|▊         | 3537/43738 [26:48<5:29:14,  2.03it/s]

step:8420, train_loss:0.06379167476926931, acc:0.658750353406842


  8%|▊         | 3538/43738 [26:48<5:07:47,  2.18it/s]

step:8420, train_loss:0.0637773569105076, acc:0.6588468061051441


  8%|▊         | 3539/43738 [26:49<4:48:14,  2.32it/s]

step:8420, train_loss:0.06376366814142582, acc:0.6589432042949986


  8%|▊         | 3540/43738 [26:49<4:31:29,  2.47it/s]

step:8420, train_loss:0.06374566504535446, acc:0.6590395480225989


  8%|▊         | 3541/43738 [26:50<5:05:37,  2.19it/s]

step:8420, train_loss:0.06375509009854548, acc:0.6588534312341147


  8%|▊         | 3542/43738 [26:50<5:44:20,  1.95it/s]

step:8420, train_loss:0.06375573998323893, acc:0.6589497459062676


  8%|▊         | 3543/43738 [26:51<5:06:13,  2.19it/s]

step:8420, train_loss:0.06373858106150872, acc:0.659046006209427


  8%|▊         | 3544/43738 [26:51<5:46:48,  1.93it/s]

step:8420, train_loss:0.06373833249396059, acc:0.6591422121896162


  8%|▊         | 3545/43738 [26:52<5:14:13,  2.13it/s]

step:8420, train_loss:0.06378547891431616, acc:0.6589562764456982


  8%|▊         | 3546/43738 [26:52<4:27:36,  2.50it/s]

step:8420, train_loss:0.06376787179709571, acc:0.6590524534686971


  8%|▊         | 3547/43738 [26:52<4:31:32,  2.47it/s]

step:8420, train_loss:0.0637591398156299, acc:0.6591485762616296


  8%|▊         | 3548/43738 [26:53<4:11:42,  2.66it/s]

step:8420, train_loss:0.06374151592315534, acc:0.6592446448703495


  8%|▊         | 3549/43738 [26:53<4:07:29,  2.71it/s]

step:8420, train_loss:0.06374975362128102, acc:0.6590588898281206


  8%|▊         | 3550/43738 [26:54<4:58:38,  2.24it/s]

step:8420, train_loss:0.0637362248505446, acc:0.6591549295774648


  8%|▊         | 3551/43738 [26:54<4:23:45,  2.54it/s]

step:8420, train_loss:0.06374280399585382, acc:0.6589693044212898


  9%|▉         | 3856/43738 [29:11<5:22:42,  2.06it/s]

step:8440, train_loss:0.06342426610793143, acc:0.6600103734439834


  9%|▉         | 3857/43738 [29:11<5:00:54,  2.21it/s]

step:8440, train_loss:0.06340867169523426, acc:0.6600985221674877


  9%|▉         | 3858/43738 [29:12<5:59:16,  1.85it/s]

step:8440, train_loss:0.06341144360504798, acc:0.6599274235355106


  9%|▉         | 3859/43738 [29:12<5:38:54,  1.96it/s]

step:8440, train_loss:0.0634403220556543, acc:0.6597564135786473


  9%|▉         | 3860/43738 [29:13<6:32:27,  1.69it/s]

step:8440, train_loss:0.063453828020566, acc:0.6595854922279792


  9%|▉         | 3861/43738 [29:14<7:16:54,  1.52it/s]

step:8440, train_loss:0.06343831992046922, acc:0.6596736596736597


  9%|▉         | 3862/43738 [29:14<6:12:17,  1.79it/s]

step:8440, train_loss:0.06342235874891479, acc:0.6597617814603832


  9%|▉         | 3863/43738 [29:14<5:06:18,  2.17it/s]

step:8440, train_loss:0.06340626302050038, acc:0.6598498576236086


  9%|▉         | 3864/43738 [29:15<4:20:55,  2.55it/s]

step:8440, train_loss:0.06340004308036841, acc:0.6599378881987578


  9%|▉         | 3865/43738 [29:15<5:41:06,  1.95it/s]

step:8440, train_loss:0.0634565700864676, acc:0.6597671410090556


  9%|▉         | 3866/43738 [29:16<6:04:27,  1.82it/s]

step:8440, train_loss:0.0634543011540592, acc:0.6598551474392137


  9%|▉         | 3867/43738 [29:16<5:34:51,  1.98it/s]

step:8440, train_loss:0.06343808503931254, acc:0.6599431083527282


  9%|▉         | 3868/43738 [29:17<4:45:34,  2.33it/s]

step:8440, train_loss:0.06342168469409058, acc:0.6600310237849017


  9%|▉         | 3869/43738 [29:17<5:10:12,  2.14it/s]

step:8440, train_loss:0.06341656491001943, acc:0.6601188937710003


  9%|▉         | 3870/43738 [29:18<5:01:51,  2.20it/s]

step:8440, train_loss:0.06340551119926746, acc:0.6602067183462532


  9%|▉         | 3871/43738 [29:18<6:04:11,  1.82it/s]

step:8440, train_loss:0.0633941372525296, acc:0.6602944975458538


 10%|▉         | 4176/43738 [31:37<7:39:42,  1.43it/s]

step:8460, train_loss:0.06359215769510601, acc:0.6592432950191571


 10%|▉         | 4177/43738 [31:38<6:29:24,  1.69it/s]

step:8460, train_loss:0.0635781559851889, acc:0.6593248743117069


 10%|▉         | 4178/43738 [31:38<5:58:02,  1.84it/s]

step:8460, train_loss:0.06356766098171956, acc:0.6594064145524174


 10%|▉         | 4179/43738 [31:39<6:12:39,  1.77it/s]

step:8460, train_loss:0.06357308959164254, acc:0.6592486240727446


 10%|▉         | 4180/43738 [31:39<5:48:51,  1.89it/s]

step:8460, train_loss:0.06359506892980328, acc:0.6590909090909091


 10%|▉         | 4181/43738 [31:40<5:30:32,  1.99it/s]

step:8460, train_loss:0.0635956197280718, acc:0.6589332695527386


 10%|▉         | 4182/43738 [31:40<5:02:51,  2.18it/s]

step:8460, train_loss:0.06358969157867814, acc:0.6590148254423721


 10%|▉         | 4183/43738 [31:41<5:26:22,  2.02it/s]

step:8460, train_loss:0.06357484170781562, acc:0.6590963423380349


 10%|▉         | 4184/43738 [31:41<6:07:36,  1.79it/s]

step:8460, train_loss:0.06357207384595039, acc:0.6591778202676865


 10%|▉         | 4185/43738 [31:42<5:25:18,  2.03it/s]

step:8460, train_loss:0.06355688627746697, acc:0.6592592592592592


 10%|▉         | 4186/43738 [31:42<4:37:07,  2.38it/s]

step:8460, train_loss:0.06355365288056017, acc:0.65910176779742


 10%|▉         | 4187/43738 [31:42<4:18:50,  2.55it/s]

step:8460, train_loss:0.06353901655059044, acc:0.6591831860520659


 10%|▉         | 4188/43738 [31:43<4:04:47,  2.69it/s]

step:8460, train_loss:0.06353298746989344, acc:0.6592645654250239


 10%|▉         | 4189/43738 [31:43<5:26:06,  2.02it/s]

step:8460, train_loss:0.06352457765642498, acc:0.6593459059441394


 10%|▉         | 4190/43738 [31:44<6:03:40,  1.81it/s]

step:8460, train_loss:0.06350984889973646, acc:0.6594272076372315


 10%|▉         | 4191/43738 [31:44<5:00:25,  2.19it/s]

step:8460, train_loss:0.06351200733057996, acc:0.6592698639942735


 10%|█         | 4496/43738 [34:05<5:29:46,  1.98it/s]

step:8480, train_loss:0.06353765343165668, acc:0.660364768683274


 10%|█         | 4497/43738 [34:05<5:16:42,  2.06it/s]

step:8480, train_loss:0.06352821385671145, acc:0.6604402935290193


 10%|█         | 4498/43738 [34:06<6:18:06,  1.73it/s]

step:8480, train_loss:0.0635228269899933, acc:0.6605157847932415


 10%|█         | 4499/43738 [34:06<5:54:42,  1.84it/s]

step:8480, train_loss:0.06350873054861333, acc:0.660591242498333


 10%|█         | 4500/43738 [34:07<5:19:26,  2.05it/s]

step:8480, train_loss:0.06349464981054835, acc:0.6606666666666666


 10%|█         | 4501/43738 [34:07<4:35:47,  2.37it/s]

step:8480, train_loss:0.06351157495128078, acc:0.6605198844701178


 10%|█         | 4502/43738 [34:07<4:35:45,  2.37it/s]

step:8480, train_loss:0.06350128490627314, acc:0.6605952909817858


 10%|█         | 4503/43738 [34:08<4:32:22,  2.40it/s]

step:8480, train_loss:0.06351268123495878, acc:0.6604485898290029


 10%|█         | 4504/43738 [34:08<4:14:44,  2.57it/s]

step:8480, train_loss:0.06350481959574863, acc:0.6605239786856127


 10%|█         | 4505/43738 [34:08<4:01:04,  2.71it/s]

step:8480, train_loss:0.06351034499328329, acc:0.660377358490566


 10%|█         | 4506/43738 [34:09<3:38:24,  2.99it/s]

step:8480, train_loss:0.06349681997349037, acc:0.6604527296937417


 10%|█         | 4507/43738 [34:09<4:24:32,  2.47it/s]

step:8480, train_loss:0.06348751483161247, acc:0.6605280674506323


 10%|█         | 4508/43738 [34:09<3:57:53,  2.75it/s]

step:8480, train_loss:0.06349717098405325, acc:0.6603815439219166


 10%|█         | 4509/43738 [34:10<4:31:58,  2.40it/s]

step:8480, train_loss:0.06349131498629303, acc:0.6604568640496784


 10%|█         | 4510/43738 [34:10<4:07:08,  2.65it/s]

step:8480, train_loss:0.06347784690042398, acc:0.6605321507760532


 10%|█         | 4511/43738 [34:11<4:08:15,  2.63it/s]

step:8480, train_loss:0.06346784722632828, acc:0.6606074041232542


 11%|█         | 4816/43738 [36:26<5:15:38,  2.06it/s]

step:8500, train_loss:0.06305788697383011, acc:0.6625830564784053


 11%|█         | 4817/43738 [36:26<4:55:27,  2.20it/s]

step:8500, train_loss:0.06304634441693863, acc:0.6626531035914469


 11%|█         | 4818/43738 [36:27<5:22:38,  2.01it/s]

step:8500, train_loss:0.06308330051974148, acc:0.6625155666251556


 11%|█         | 4819/43738 [36:27<5:32:06,  1.95it/s]

step:8500, train_loss:0.06309573973286643, acc:0.6623780867399875


 11%|█         | 4820/43738 [36:28<5:08:53,  2.10it/s]

step:8500, train_loss:0.0631118336534374, acc:0.6622406639004149


 11%|█         | 4821/43738 [36:28<4:52:15,  2.22it/s]

step:8500, train_loss:0.06312013582732488, acc:0.6621032980709396


 11%|█         | 4822/43738 [36:29<4:38:53,  2.33it/s]

step:8500, train_loss:0.06310769156658837, acc:0.6621733720447947


 11%|█         | 4823/43738 [36:29<3:59:25,  2.71it/s]

step:8500, train_loss:0.06311035567827523, acc:0.6622434169603981


 11%|█         | 4824/43738 [36:29<3:49:48,  2.82it/s]

step:8500, train_loss:0.06312951789732324, acc:0.662106135986733


 11%|█         | 4825/43738 [36:29<3:27:43,  3.12it/s]

step:8500, train_loss:0.06313139158251593, acc:0.6619689119170984


 11%|█         | 4826/43738 [36:30<4:12:43,  2.57it/s]

step:8500, train_loss:0.06318196290632846, acc:0.661831744716121


 11%|█         | 4827/43738 [36:31<4:52:20,  2.22it/s]

step:8500, train_loss:0.06319877586842595, acc:0.6616946343484565


 11%|█         | 4828/43738 [36:31<5:27:42,  1.98it/s]

step:8500, train_loss:0.06321330798805821, acc:0.6615575807787903


 11%|█         | 4829/43738 [36:32<4:55:24,  2.20it/s]

step:8500, train_loss:0.06320767241984451, acc:0.6616276661834748


 11%|█         | 4830/43738 [36:32<4:17:41,  2.52it/s]

step:8500, train_loss:0.06319476643129633, acc:0.6616977225672878


 11%|█         | 4831/43738 [36:32<3:50:27,  2.81it/s]

step:8500, train_loss:0.06318713728356747, acc:0.6617677499482508


 12%|█▏        | 5136/43738 [38:53<4:03:19,  2.64it/s]

step:8520, train_loss:0.06367801908788558, acc:0.6582943925233645


 12%|█▏        | 5137/43738 [38:54<4:27:22,  2.41it/s]

step:8520, train_loss:0.06369066659849675, acc:0.6581662448900136


 12%|█▏        | 5138/43738 [38:54<4:58:29,  2.16it/s]

step:8520, train_loss:0.06371407739788143, acc:0.6580381471389646


 12%|█▏        | 5139/43738 [38:55<5:59:43,  1.79it/s]

step:8520, train_loss:0.06370399983635439, acc:0.6581046896283324


 12%|█▏        | 5140/43738 [38:56<6:15:27,  1.71it/s]

step:8520, train_loss:0.06371356777611038, acc:0.657976653696498


 12%|█▏        | 5141/43738 [38:56<5:22:28,  1.99it/s]

step:8520, train_loss:0.06371135882996093, acc:0.6580431822602606


 12%|█▏        | 5142/43738 [38:56<4:54:17,  2.19it/s]

step:8520, train_loss:0.06370880631736979, acc:0.6581096849474912


 12%|█▏        | 5143/43738 [38:57<4:50:09,  2.22it/s]

step:8520, train_loss:0.06378077490549455, acc:0.6579817227299242


 12%|█▏        | 5144/43738 [38:57<4:10:43,  2.57it/s]

step:8520, train_loss:0.06382066224528392, acc:0.6578538102643857


 12%|█▏        | 5145/43738 [38:58<5:15:36,  2.04it/s]

step:8520, train_loss:0.06382110091536083, acc:0.6577259475218659


 12%|█▏        | 5146/43738 [38:58<4:58:21,  2.16it/s]

step:8520, train_loss:0.0638290624772717, acc:0.6575981344733773


 12%|█▏        | 5147/43738 [38:59<4:50:16,  2.22it/s]

step:8520, train_loss:0.06382869137300642, acc:0.6576646590246745


 12%|█▏        | 5149/43738 [38:59<4:38:02,  2.31it/s]

step:8520, train_loss:0.06384461500056487, acc:0.6575369075369075
step:8520, train_loss:0.06383286515291453, acc:0.6576034181394446


 12%|█▏        | 5150/43738 [39:00<4:17:17,  2.50it/s]

step:8520, train_loss:0.06382263469106282, acc:0.6576699029126214


 12%|█▏        | 5151/43738 [39:00<4:13:49,  2.53it/s]

step:8520, train_loss:0.06381110116189011, acc:0.6577363618714812


 12%|█▏        | 5456/43738 [41:18<6:41:21,  1.59it/s]

step:8540, train_loss:0.06362812602040527, acc:0.6590909090909091


 12%|█▏        | 5457/43738 [41:18<5:44:40,  1.85it/s]

step:8540, train_loss:0.06362042064277049, acc:0.6591533809785597


 12%|█▏        | 5458/43738 [41:19<5:34:02,  1.91it/s]

step:8540, train_loss:0.06363872799385001, acc:0.6590326126786369


 12%|█▏        | 5459/43738 [41:19<4:55:42,  2.16it/s]

step:8540, train_loss:0.0636270848266284, acc:0.6590950723575747


 12%|█▏        | 5460/43738 [41:19<4:36:38,  2.31it/s]

step:8540, train_loss:0.06363800766330081, acc:0.658974358974359


 12%|█▏        | 5461/43738 [41:20<5:47:13,  1.84it/s]

step:8540, train_loss:0.06364043425668464, acc:0.6588536898004028


 12%|█▏        | 5462/43738 [41:21<5:38:46,  1.88it/s]

step:8540, train_loss:0.063653000575481, acc:0.6587330648114244


 12%|█▏        | 5463/43738 [41:21<5:27:00,  1.95it/s]

step:8540, train_loss:0.06364136733588095, acc:0.6587955335896027


 12%|█▏        | 5464/43738 [41:22<5:27:35,  1.95it/s]

step:8540, train_loss:0.06365423296173374, acc:0.6586749633967789


 12%|█▏        | 5465/43738 [41:22<5:42:25,  1.86it/s]

step:8540, train_loss:0.0636703802628314, acc:0.6585544373284538


 12%|█▏        | 5466/43738 [41:22<4:41:22,  2.27it/s]

step:8540, train_loss:0.06366631182629785, acc:0.6586169045005489


 12%|█▏        | 5467/43738 [41:23<4:46:13,  2.23it/s]

step:8540, train_loss:0.06365466684858147, acc:0.6586793488201939


 13%|█▎        | 5468/43738 [41:23<4:36:58,  2.30it/s]

step:8540, train_loss:0.06366641421121862, acc:0.658558888076079


 13%|█▎        | 5469/43738 [41:24<5:47:19,  1.84it/s]

step:8540, train_loss:0.06369216464731127, acc:0.6584384713841653


 13%|█▎        | 5470/43738 [41:25<5:15:48,  2.02it/s]

step:8540, train_loss:0.06368072454146556, acc:0.6585009140767825


 13%|█▎        | 5471/43738 [41:25<4:42:43,  2.26it/s]

step:8540, train_loss:0.06366913660462059, acc:0.6585633339426065


 13%|█▎        | 5776/43738 [43:40<4:08:21,  2.55it/s]

step:8560, train_loss:0.06341774226596618, acc:0.6592797783933518


 13%|█▎        | 5777/43738 [43:40<4:46:41,  2.21it/s]

step:8560, train_loss:0.06343409482491695, acc:0.659165656915354


 13%|█▎        | 5778/43738 [43:41<4:46:01,  2.21it/s]

step:8560, train_loss:0.06342399242551787, acc:0.6592246452059536


 13%|█▎        | 5779/43738 [43:41<4:35:05,  2.30it/s]

step:8560, train_loss:0.06341424131066986, acc:0.659283613081848


 13%|█▎        | 5780/43738 [43:41<4:04:16,  2.59it/s]

step:8560, train_loss:0.06340342881749135, acc:0.6593425605536333


 13%|█▎        | 5781/43738 [43:42<4:22:26,  2.41it/s]

step:8560, train_loss:0.06340262989491303, acc:0.6594014876318975


 13%|█▎        | 5782/43738 [43:42<4:25:54,  2.38it/s]

step:8560, train_loss:0.06339173878910767, acc:0.6594603943272224


 13%|█▎        | 5783/43738 [43:42<3:57:05,  2.67it/s]

step:8560, train_loss:0.06340983902945779, acc:0.6593463600207504


 13%|█▎        | 5784/43738 [43:43<4:01:06,  2.62it/s]

step:8560, train_loss:0.06339896149541221, acc:0.6594052558782849


 13%|█▎        | 5785/43738 [43:43<4:10:16,  2.53it/s]

step:8560, train_loss:0.06343084632968626, acc:0.6592912705272256


 13%|█▎        | 5786/43738 [43:44<3:55:18,  2.69it/s]

step:8560, train_loss:0.06342002999749075, acc:0.6593501555478741


 13%|█▎        | 5787/43738 [43:44<3:28:48,  3.03it/s]

step:8560, train_loss:0.06340917346214137, acc:0.6594090202177294


 13%|█▎        | 5788/43738 [43:44<3:09:18,  3.34it/s]

step:8560, train_loss:0.06343456090978997, acc:0.6592950932964755


 13%|█▎        | 5789/43738 [43:45<4:33:52,  2.31it/s]

step:8560, train_loss:0.06342544611139413, acc:0.6593539471411297


 13%|█▎        | 5790/43738 [43:45<4:04:07,  2.59it/s]

step:8560, train_loss:0.0634174596548337, acc:0.6594127806563039


 13%|█▎        | 5791/43738 [43:45<3:58:15,  2.65it/s]

step:8560, train_loss:0.0634065490413969, acc:0.6594715938525297


 14%|█▍        | 6096/43738 [46:05<6:08:08,  1.70it/s]

step:8580, train_loss:0.06343785247550182, acc:0.6579724409448819


 14%|█▍        | 6097/43738 [46:05<5:42:21,  1.83it/s]

step:8580, train_loss:0.0634384082147259, acc:0.6580285386255535


 14%|█▍        | 6098/43738 [46:06<5:24:45,  1.93it/s]

step:8580, train_loss:0.06343071954663702, acc:0.6580846179075106


 14%|█▍        | 6099/43738 [46:06<4:38:44,  2.25it/s]

step:8580, train_loss:0.063423627469724, acc:0.6581406787998032


 14%|█▍        | 6100/43738 [46:07<4:38:38,  2.25it/s]

step:8580, train_loss:0.06341489115931269, acc:0.6581967213114754


 14%|█▍        | 6101/43738 [46:07<4:32:11,  2.30it/s]

step:8580, train_loss:0.06340454805521886, acc:0.6582527454515653


 14%|█▍        | 6102/43738 [46:07<3:58:16,  2.63it/s]

step:8580, train_loss:0.06339449938178916, acc:0.6583087512291053


 14%|█▍        | 6103/43738 [46:08<4:05:20,  2.56it/s]

step:8580, train_loss:0.0633843359576622, acc:0.6583647386531214


 14%|█▍        | 6104/43738 [46:08<5:19:44,  1.96it/s]

step:8580, train_loss:0.0633758017892758, acc:0.6584207077326344


 14%|█▍        | 6105/43738 [46:09<5:03:02,  2.07it/s]

step:8580, train_loss:0.06338081061003889, acc:0.6583128583128584


 14%|█▍        | 6106/43738 [46:09<5:07:27,  2.04it/s]

step:8580, train_loss:0.06339667939934342, acc:0.6582050442188012


 14%|█▍        | 6107/43738 [46:10<4:58:00,  2.10it/s]

step:8580, train_loss:0.06339119355128048, acc:0.658261011953496


 14%|█▍        | 6108/43738 [46:10<5:04:18,  2.06it/s]

step:8580, train_loss:0.06340796698925102, acc:0.6581532416502947


 14%|█▍        | 6109/43738 [46:11<4:46:48,  2.19it/s]

step:8580, train_loss:0.0634006251911965, acc:0.6582091995416598


 14%|█▍        | 6110/43738 [46:11<4:49:42,  2.16it/s]

step:8580, train_loss:0.06340145423246049, acc:0.65810147299509


 14%|█▍        | 6111/43738 [46:11<4:08:54,  2.52it/s]

step:8580, train_loss:0.06339157475144666, acc:0.658157421044019


 15%|█▍        | 6416/43738 [48:27<5:02:41,  2.06it/s]

step:8600, train_loss:0.06345251721592249, acc:0.6578865336658354


 15%|█▍        | 6417/43738 [48:28<5:32:26,  1.87it/s]

step:8600, train_loss:0.06344375535555308, acc:0.6579398472806608


 15%|█▍        | 6418/43738 [48:28<5:11:28,  2.00it/s]

step:8600, train_loss:0.0634339899444588, acc:0.6579931442817077


 15%|█▍        | 6419/43738 [48:28<5:07:19,  2.02it/s]

step:8600, train_loss:0.06343253610739925, acc:0.6580464246767409


 15%|█▍        | 6420/43738 [48:29<4:52:05,  2.13it/s]

step:8600, train_loss:0.06343485104355118, acc:0.6579439252336449


 15%|█▍        | 6421/43738 [48:29<4:35:21,  2.26it/s]

step:8600, train_loss:0.06342517980054395, acc:0.6579971966983336


 15%|█▍        | 6422/43738 [48:30<4:23:28,  2.36it/s]

step:8600, train_loss:0.06341562614851713, acc:0.6580504515727188


 15%|█▍        | 6423/43738 [48:30<4:05:54,  2.53it/s]

step:8600, train_loss:0.06341625296905115, acc:0.6581036898645493


 15%|█▍        | 6424/43738 [48:31<4:38:52,  2.23it/s]

step:8600, train_loss:0.06343551977684686, acc:0.6580012453300125


 15%|█▍        | 6425/43738 [48:31<4:05:14,  2.54it/s]

step:8600, train_loss:0.06342566370082556, acc:0.6580544747081712


 15%|█▍        | 6426/43738 [48:31<4:09:00,  2.50it/s]

step:8600, train_loss:0.06343346005690982, acc:0.6579520697167756


 15%|█▍        | 6427/43738 [48:32<3:59:30,  2.60it/s]

step:8600, train_loss:0.06343125890142094, acc:0.6578496965925004


 15%|█▍        | 6428/43738 [48:32<4:05:19,  2.53it/s]

step:8600, train_loss:0.06342681221751861, acc:0.6579029247044181


 15%|█▍        | 6429/43738 [48:32<4:08:15,  2.50it/s]

step:8600, train_loss:0.06341695313663764, acc:0.6579561362575829


 15%|█▍        | 6430/43738 [48:33<3:58:36,  2.61it/s]

step:8600, train_loss:0.063407878040524, acc:0.65800933125972


 15%|█▍        | 6431/43738 [48:33<4:47:24,  2.16it/s]

step:8600, train_loss:0.0634185656286495, acc:0.6579070129062354


 15%|█▌        | 6736/43738 [50:48<5:16:41,  1.95it/s]

step:8620, train_loss:0.06339859226934058, acc:0.6582541567695962


 15%|█▌        | 6737/43738 [50:49<4:53:27,  2.10it/s]

step:8620, train_loss:0.06339362274976301, acc:0.6583048834792935


 15%|█▌        | 6738/43738 [50:49<4:53:37,  2.10it/s]

step:8620, train_loss:0.06338441622000746, acc:0.6583555951320866


 15%|█▌        | 6739/43738 [50:50<5:05:06,  2.02it/s]

step:8620, train_loss:0.06337502005463286, acc:0.6584062917346787


 15%|█▌        | 6740/43738 [50:50<5:46:40,  1.78it/s]

step:8620, train_loss:0.06336583465254673, acc:0.6584569732937685


 15%|█▌        | 6741/43738 [50:51<6:28:40,  1.59it/s]

step:8620, train_loss:0.06335907306954669, acc:0.658507639816051


 15%|█▌        | 6742/43738 [50:52<6:27:54,  1.59it/s]

step:8620, train_loss:0.06335301581097004, acc:0.6585582913082172


 15%|█▌        | 6743/43738 [50:52<6:27:55,  1.59it/s]

step:8620, train_loss:0.06336571376836306, acc:0.6584606258341984


 15%|█▌        | 6744/43738 [50:53<5:57:24,  1.73it/s]

step:8620, train_loss:0.06336215729366546, acc:0.6585112692763938


 15%|█▌        | 6745/43738 [50:54<6:27:26,  1.59it/s]

step:8620, train_loss:0.06340428229004215, acc:0.6584136397331356


 15%|█▌        | 6746/43738 [50:54<6:12:12,  1.66it/s]

step:8620, train_loss:0.06340940023730553, acc:0.6583160391343018


 15%|█▌        | 6747/43738 [50:54<5:42:58,  1.80it/s]

step:8620, train_loss:0.06340366740299103, acc:0.6583666814880688


 15%|█▌        | 6748/43738 [50:55<5:20:17,  1.92it/s]

step:8620, train_loss:0.06341327897415815, acc:0.6582691167753408


 15%|█▌        | 6749/43738 [50:56<5:44:07,  1.79it/s]

step:8620, train_loss:0.06345002386188367, acc:0.6581715809749592


 15%|█▌        | 6750/43738 [50:56<6:00:18,  1.71it/s]

step:8620, train_loss:0.06344567554626028, acc:0.6582222222222223


 15%|█▌        | 6751/43738 [50:57<6:31:02,  1.58it/s]

step:8620, train_loss:0.0634553272051408, acc:0.6581247222633684


 16%|█▌        | 7056/43738 [53:14<4:46:58,  2.13it/s]

step:8640, train_loss:0.06339305741759803, acc:0.6575963718820862


 16%|█▌        | 7057/43738 [53:14<4:55:25,  2.07it/s]

step:8640, train_loss:0.06338647092789124, acc:0.6576448915969959


 16%|█▌        | 7058/43738 [53:14<4:09:12,  2.45it/s]

step:8640, train_loss:0.06337805914468887, acc:0.657693397563049


 16%|█▌        | 7059/43738 [53:15<3:43:32,  2.73it/s]

step:8640, train_loss:0.0633727406132182, acc:0.6577418897860887


 16%|█▌        | 7060/43738 [53:15<3:54:49,  2.60it/s]

step:8640, train_loss:0.0633802090242763, acc:0.6576487252124645


 16%|█▌        | 7061/43738 [53:16<4:12:24,  2.42it/s]

step:8640, train_loss:0.06337801046023551, acc:0.6576972100269084


 16%|█▌        | 7062/43738 [53:16<4:13:18,  2.41it/s]

step:8640, train_loss:0.06336904224366866, acc:0.6577456811101671


 16%|█▌        | 7063/43738 [53:17<4:45:36,  2.14it/s]

step:8640, train_loss:0.06337148269766685, acc:0.657652555571287


 16%|█▌        | 7064/43738 [53:17<4:06:15,  2.48it/s]

step:8640, train_loss:0.06336267482418516, acc:0.6577010192525481


 16%|█▌        | 7065/43738 [53:18<5:03:22,  2.01it/s]

step:8640, train_loss:0.06335805996487731, acc:0.6577494692144373


 16%|█▌        | 7066/43738 [53:18<4:37:06,  2.21it/s]

step:8640, train_loss:0.0633750031873812, acc:0.6576563826776111


 16%|█▌        | 7067/43738 [53:19<5:51:38,  1.74it/s]

step:8640, train_loss:0.0633661333877417, acc:0.6577048252440922


 16%|█▌        | 7068/43738 [53:19<5:54:48,  1.72it/s]

step:8640, train_loss:0.06335771385380906, acc:0.6577532541029995


 16%|█▌        | 7069/43738 [53:20<5:07:14,  1.99it/s]

step:8640, train_loss:0.06335037740189425, acc:0.6578016692601499


 16%|█▌        | 7070/43738 [53:20<5:22:29,  1.90it/s]

step:8640, train_loss:0.06337182510928567, acc:0.6577086280056577


 16%|█▌        | 7071/43738 [53:21<4:35:39,  2.22it/s]

step:8640, train_loss:0.06336457059545832, acc:0.6577570357799463


 17%|█▋        | 7376/43738 [55:37<4:03:18,  2.49it/s]

step:8660, train_loss:0.0630710221679033, acc:0.6594360086767896


 17%|█▋        | 7377/43738 [55:37<3:54:41,  2.58it/s]

step:8660, train_loss:0.06306862912837638, acc:0.6594821743256066


 17%|█▋        | 7378/43738 [55:38<3:59:04,  2.53it/s]

step:8660, train_loss:0.0630726631904976, acc:0.6593927893738141


 17%|█▋        | 7379/43738 [55:38<4:31:20,  2.23it/s]

step:8660, train_loss:0.06307090369782158, acc:0.6594389483669874


 17%|█▋        | 7380/43738 [55:39<4:21:06,  2.32it/s]

step:8660, train_loss:0.06306269717667609, acc:0.6594850948509485


 17%|█▋        | 7381/43738 [55:39<3:42:09,  2.73it/s]

step:8660, train_loss:0.06305415602939522, acc:0.6595312288307817


 17%|█▋        | 7382/43738 [55:40<4:00:00,  2.52it/s]

step:8660, train_loss:0.06304594881462032, acc:0.6595773503115687


 17%|█▋        | 7383/43738 [55:40<4:08:22,  2.44it/s]

step:8660, train_loss:0.06304665476595239, acc:0.6596234592983882


 17%|█▋        | 7384/43738 [55:40<3:44:01,  2.70it/s]

step:8660, train_loss:0.06303943127997966, acc:0.6596695557963164


 17%|█▋        | 7385/43738 [55:40<3:22:41,  2.99it/s]

step:8660, train_loss:0.06303749866773373, acc:0.6597156398104266


 17%|█▋        | 7386/43738 [55:41<3:34:15,  2.83it/s]

step:8660, train_loss:0.06304695134437083, acc:0.6596263200649878


 17%|█▋        | 7387/43738 [55:41<3:48:32,  2.65it/s]

step:8660, train_loss:0.06303843852040114, acc:0.6596723974549885


 17%|█▋        | 7388/43738 [55:42<3:49:29,  2.64it/s]

step:8660, train_loss:0.06303530491996272, acc:0.6597184623714131


 17%|█▋        | 7389/43738 [55:42<3:51:40,  2.61it/s]

step:8660, train_loss:0.06302677474891624, acc:0.6597645148193261


 17%|█▋        | 7390/43738 [55:43<4:09:48,  2.43it/s]

step:8660, train_loss:0.0630403136490691, acc:0.6596752368064953


 17%|█▋        | 7391/43738 [55:43<4:06:53,  2.45it/s]

step:8660, train_loss:0.0630319386101188, acc:0.6597212826410499


 18%|█▊        | 7696/43738 [58:06<5:49:33,  1.72it/s]

step:8680, train_loss:0.06317054787050325, acc:0.6599532224532224


 18%|█▊        | 7697/43738 [58:06<5:44:11,  1.75it/s]

step:8680, train_loss:0.06319523772256819, acc:0.6598674808366897


 18%|█▊        | 7698/43738 [58:07<5:26:56,  1.84it/s]

step:8680, train_loss:0.06319219264741008, acc:0.6599116653676279


 18%|█▊        | 7699/43738 [58:07<5:28:35,  1.83it/s]

step:8680, train_loss:0.0631975474388867, acc:0.6598259514222626


 18%|█▊        | 7700/43738 [58:08<5:02:24,  1.99it/s]

step:8680, train_loss:0.06321162629669212, acc:0.6597402597402597


 18%|█▊        | 7701/43738 [58:08<4:37:11,  2.17it/s]

step:8680, train_loss:0.0632045098836183, acc:0.659784443578756


 18%|█▊        | 7702/43738 [58:08<4:35:24,  2.18it/s]

step:8680, train_loss:0.06319746904556738, acc:0.6598286159439106


 18%|█▊        | 7703/43738 [58:09<4:17:37,  2.33it/s]

step:8680, train_loss:0.06318927906645937, acc:0.6598727768401922


 18%|█▊        | 7704/43738 [58:09<3:42:02,  2.70it/s]

step:8680, train_loss:0.06318701611945751, acc:0.6599169262720664


 18%|█▊        | 7705/43738 [58:10<4:47:35,  2.09it/s]

step:8680, train_loss:0.06320440992896549, acc:0.6598312783906555


 18%|█▊        | 7706/43738 [58:10<4:27:42,  2.24it/s]

step:8680, train_loss:0.06322107824847017, acc:0.6597456527381261


 18%|█▊        | 7707/43738 [58:11<5:40:37,  1.76it/s]

step:8680, train_loss:0.06325948072230601, acc:0.6596600493058259


 18%|█▊        | 7708/43738 [58:12<6:37:03,  1.51it/s]

step:8680, train_loss:0.06327806299636266, acc:0.6595744680851063


 18%|█▊        | 7709/43738 [58:13<7:09:14,  1.40it/s]

step:8680, train_loss:0.06327111771585478, acc:0.6596186275781554


 18%|█▊        | 7710/43738 [58:13<6:03:37,  1.65it/s]

step:8680, train_loss:0.06326853648137185, acc:0.659662775616083


 18%|█▊        | 7711/43738 [58:13<5:52:42,  1.70it/s]

step:8680, train_loss:0.06329619188942255, acc:0.6595772273375697


 18%|█▊        | 8016/43738 [1:00:34<5:50:20,  1.70it/s]

step:8700, train_loss:0.06357458198887658, acc:0.6594311377245509


 18%|█▊        | 8017/43738 [1:00:35<5:34:10,  1.78it/s]

step:8700, train_loss:0.06357562223755192, acc:0.6594736185605589


 18%|█▊        | 8018/43738 [1:00:35<4:46:08,  2.08it/s]

step:8700, train_loss:0.06357650900757537, acc:0.6595160888001995


 18%|█▊        | 8019/43738 [1:00:35<4:22:24,  2.27it/s]

step:8700, train_loss:0.06357249283097044, acc:0.6595585484474373


 18%|█▊        | 8020/43738 [1:00:36<4:02:09,  2.46it/s]

step:8700, train_loss:0.06356603775426432, acc:0.6596009975062345


 18%|█▊        | 8021/43738 [1:00:36<4:59:21,  1.99it/s]

step:8700, train_loss:0.06358055724326704, acc:0.659518763246478


 18%|█▊        | 8022/43738 [1:00:37<4:19:16,  2.30it/s]

step:8700, train_loss:0.06357263629908552, acc:0.6595612066816255


 18%|█▊        | 8023/43738 [1:00:37<4:19:38,  2.29it/s]

step:8700, train_loss:0.06356490998589463, acc:0.659603639536333


 18%|█▊        | 8024/43738 [1:00:38<5:23:27,  1.84it/s]

step:8700, train_loss:0.06356035129485947, acc:0.6596460618145563


 18%|█▊        | 8025/43738 [1:00:38<4:56:26,  2.01it/s]

step:8700, train_loss:0.06355930670773077, acc:0.6595638629283489


 18%|█▊        | 8026/43738 [1:00:39<5:23:11,  1.84it/s]

step:8700, train_loss:0.06355967529356027, acc:0.6596062795913282


 18%|█▊        | 8027/43738 [1:00:39<4:33:19,  2.18it/s]

step:8700, train_loss:0.06355783227293559, acc:0.6596486856858104


 18%|█▊        | 8028/43738 [1:00:39<3:57:05,  2.51it/s]

step:8700, train_loss:0.06354991570265757, acc:0.6596910812157449


 18%|█▊        | 8029/43738 [1:00:40<3:45:17,  2.64it/s]

step:8700, train_loss:0.06354200805292856, acc:0.6597334661850791


 18%|█▊        | 8030/43738 [1:00:40<3:39:09,  2.72it/s]

step:8700, train_loss:0.06353586646237476, acc:0.6597758405977584


 18%|█▊        | 8031/43738 [1:00:40<3:47:34,  2.62it/s]

step:8700, train_loss:0.06352796406390947, acc:0.6598182044577263


 19%|█▉        | 8336/43738 [1:03:00<3:44:11,  2.63it/s]

step:8720, train_loss:0.06362279909881739, acc:0.6594289827255279


 19%|█▉        | 8337/43738 [1:03:01<4:39:36,  2.11it/s]

step:8720, train_loss:0.06361577794267556, acc:0.6594698332733597


 19%|█▉        | 8338/43738 [1:03:01<5:08:57,  1.91it/s]

step:8720, train_loss:0.06362152610208122, acc:0.6593907411849365


 19%|█▉        | 8339/43738 [1:03:02<5:22:46,  1.83it/s]

step:8720, train_loss:0.06362514188368949, acc:0.6594315865211656


 19%|█▉        | 8340/43738 [1:03:03<5:50:59,  1.68it/s]

step:8720, train_loss:0.06362351957784393, acc:0.6594724220623501


 19%|█▉        | 8341/43738 [1:03:03<5:30:27,  1.79it/s]

step:8720, train_loss:0.06362875449704286, acc:0.6593933581105383


 19%|█▉        | 8342/43738 [1:03:04<5:28:38,  1.80it/s]

step:8720, train_loss:0.06362972140157543, acc:0.6594341884440182


 19%|█▉        | 8343/43738 [1:03:04<4:58:52,  1.97it/s]

step:8720, train_loss:0.06362209994911501, acc:0.6594750089895721


 19%|█▉        | 8344/43738 [1:03:05<4:58:10,  1.98it/s]

step:8720, train_loss:0.06363525765863923, acc:0.6593959731543624


 19%|█▉        | 8345/43738 [1:03:05<6:10:43,  1.59it/s]

step:8720, train_loss:0.06364608116956694, acc:0.6593169562612343


 19%|█▉        | 8346/43738 [1:03:06<6:44:37,  1.46it/s]

step:8720, train_loss:0.06364036699702831, acc:0.6593577761802061


 19%|█▉        | 8347/43738 [1:03:07<6:31:49,  1.51it/s]

step:8720, train_loss:0.06365190101933473, acc:0.6592787827962142


 19%|█▉        | 8348/43738 [1:03:07<6:20:57,  1.55it/s]

step:8720, train_loss:0.06365844537662296, acc:0.6591998083373263


 19%|█▉        | 8349/43738 [1:03:08<7:01:07,  1.40it/s]

step:8720, train_loss:0.063657229945976, acc:0.6592406276200743


 19%|█▉        | 8350/43738 [1:03:09<6:10:37,  1.59it/s]

step:8720, train_loss:0.06365160450887933, acc:0.6592814371257485


 19%|█▉        | 8351/43738 [1:03:09<5:44:12,  1.71it/s]

step:8720, train_loss:0.06364400308079893, acc:0.6593222368578613


 20%|█▉        | 8656/43738 [1:05:29<3:38:14,  2.68it/s]

step:8740, train_loss:0.06335523027384067, acc:0.6605822550831792


 20%|█▉        | 8657/43738 [1:05:29<3:16:45,  2.97it/s]

step:8740, train_loss:0.06335470621275252, acc:0.6605059489430518


 20%|█▉        | 8658/43738 [1:05:30<3:26:56,  2.83it/s]

step:8740, train_loss:0.06336250211827178, acc:0.6604296604296604


 20%|█▉        | 8659/43738 [1:05:30<3:42:55,  2.62it/s]

step:8740, train_loss:0.06336270263510918, acc:0.6604688763136621


 20%|█▉        | 8660/43738 [1:05:31<4:03:12,  2.40it/s]

step:8740, train_loss:0.06336456543341033, acc:0.6603926096997691


 20%|█▉        | 8661/43738 [1:05:31<3:34:26,  2.73it/s]

step:8740, train_loss:0.06335736477414941, acc:0.6604318208059116


 20%|█▉        | 8662/43738 [1:05:31<3:12:05,  3.04it/s]

step:8740, train_loss:0.06335019165452258, acc:0.6604710228584623


 20%|█▉        | 8663/43738 [1:05:32<3:31:24,  2.77it/s]

step:8740, train_loss:0.06335842403023312, acc:0.6603947824079418


 20%|█▉        | 8664/43738 [1:05:32<3:38:31,  2.68it/s]

step:8740, train_loss:0.06336578238003522, acc:0.6603185595567868


 20%|█▉        | 8665/43738 [1:05:33<3:54:54,  2.49it/s]

step:8740, train_loss:0.06337337986962059, acc:0.6602423542989037


 20%|█▉        | 8666/43738 [1:05:33<3:25:03,  2.85it/s]

step:8740, train_loss:0.06337423881421368, acc:0.6602815601200093


 20%|█▉        | 8667/43738 [1:05:33<3:17:15,  2.96it/s]

step:8740, train_loss:0.06336693417598203, acc:0.6603207568939656


 20%|█▉        | 8668/43738 [1:05:33<3:03:47,  3.18it/s]

step:8740, train_loss:0.06336013857436494, acc:0.660359944623904


 20%|█▉        | 8669/43738 [1:05:34<4:24:03,  2.21it/s]

step:8740, train_loss:0.06335539103944564, acc:0.6603991233129543


 20%|█▉        | 8670/43738 [1:05:34<3:54:51,  2.49it/s]

step:8740, train_loss:0.06334823853651952, acc:0.6604382929642445


 20%|█▉        | 8671/43738 [1:05:35<3:55:04,  2.49it/s]

step:8740, train_loss:0.06335079371047227, acc:0.6603621266289932


 21%|██        | 8976/43738 [1:07:52<4:17:10,  2.25it/s]

step:8760, train_loss:0.06310930258685422, acc:0.6607620320855615


 21%|██        | 8977/43738 [1:07:53<5:16:04,  1.83it/s]

step:8760, train_loss:0.06310245900555744, acc:0.6607998217667372


 21%|██        | 8978/43738 [1:07:54<5:17:31,  1.82it/s]

step:8760, train_loss:0.06309656131858743, acc:0.660837603029628


 21%|██        | 8979/43738 [1:07:54<5:16:56,  1.83it/s]

step:8760, train_loss:0.06308968953740646, acc:0.6608753758770465


 21%|██        | 8980/43738 [1:07:55<6:02:28,  1.60it/s]

step:8760, train_loss:0.06309263082850546, acc:0.6608017817371937


 21%|██        | 8981/43738 [1:07:56<5:19:17,  1.81it/s]

step:8760, train_loss:0.0630946069176306, acc:0.660728203986193


 21%|██        | 8982/43738 [1:07:56<5:10:56,  1.86it/s]

step:8760, train_loss:0.06309598410395978, acc:0.6606546426185704


 21%|██        | 8983/43738 [1:07:56<4:40:39,  2.06it/s]

step:8760, train_loss:0.06309133602849795, acc:0.6606924190136926


 21%|██        | 8984/43738 [1:07:57<4:54:00,  1.97it/s]

step:8760, train_loss:0.06308792370589295, acc:0.6607301869991096


 21%|██        | 8985/43738 [1:07:57<4:06:43,  2.35it/s]

step:8760, train_loss:0.06308091180670501, acc:0.6607679465776294


 21%|██        | 8986/43738 [1:07:57<3:39:53,  2.63it/s]

step:8760, train_loss:0.063082564372459, acc:0.6606944135321612


 21%|██        | 8987/43738 [1:07:58<3:36:57,  2.67it/s]

step:8760, train_loss:0.06307697415344898, acc:0.660732168688105


 21%|██        | 8988/43738 [1:07:58<3:08:06,  3.08it/s]

step:8760, train_loss:0.06307012789869534, acc:0.6607699154428126


 21%|██        | 8989/43738 [1:07:58<3:15:41,  2.96it/s]

step:8760, train_loss:0.06306935629466105, acc:0.6608076537990878


 21%|██        | 8990/43738 [1:07:59<3:13:51,  2.99it/s]

step:8760, train_loss:0.06306237596942726, acc:0.660845383759733


 21%|██        | 8991/43738 [1:07:59<3:02:24,  3.17it/s]

step:8760, train_loss:0.06307051006698017, acc:0.6607718829941052


 21%|██▏       | 9296/43738 [1:10:24<3:40:19,  2.61it/s]

step:8780, train_loss:0.06308227897880646, acc:0.6596385542168675


 21%|██▏       | 9297/43738 [1:10:25<4:50:34,  1.98it/s]

step:8780, train_loss:0.06308946545274054, acc:0.659567602452404


 21%|██▏       | 9298/43738 [1:10:25<4:20:11,  2.21it/s]

step:8780, train_loss:0.06310641232900481, acc:0.6594966659496666


 21%|██▏       | 9299/43738 [1:10:25<3:45:10,  2.55it/s]

step:8780, train_loss:0.06309969163025823, acc:0.6595332831487257


 21%|██▏       | 9300/43738 [1:10:26<3:58:50,  2.40it/s]

step:8780, train_loss:0.06309339644945032, acc:0.6595698924731183


 21%|██▏       | 9301/43738 [1:10:26<3:42:54,  2.57it/s]

step:8780, train_loss:0.06309813645393542, acc:0.6596064939253844


 21%|██▏       | 9302/43738 [1:10:27<3:53:54,  2.45it/s]

step:8780, train_loss:0.06310041529553655, acc:0.6595355837454311


 21%|██▏       | 9303/43738 [1:10:27<4:48:19,  1.99it/s]

step:8780, train_loss:0.06309403057933717, acc:0.6595721810168763


 21%|██▏       | 9304/43738 [1:10:28<4:30:46,  2.12it/s]

step:8780, train_loss:0.06308946056390034, acc:0.6596087704213242


 21%|██▏       | 9305/43738 [1:10:28<4:25:17,  2.16it/s]

step:8780, train_loss:0.06308607176755826, acc:0.6596453519613111


 21%|██▏       | 9306/43738 [1:10:29<4:46:23,  2.00it/s]

step:8780, train_loss:0.06309232174528026, acc:0.6595744680851063


 21%|██▏       | 9307/43738 [1:10:29<4:07:51,  2.32it/s]

step:8780, train_loss:0.06309322698715722, acc:0.6595035994412808


 21%|██▏       | 9308/43738 [1:10:29<3:37:24,  2.64it/s]

step:8780, train_loss:0.06308645541989889, acc:0.6595401804899012


 21%|██▏       | 9309/43738 [1:10:30<3:56:34,  2.43it/s]

step:8780, train_loss:0.06308020929883745, acc:0.6595767536792352


 21%|██▏       | 9310/43738 [1:10:30<4:16:43,  2.24it/s]

step:8780, train_loss:0.06308227890337043, acc:0.6596133190118153


 21%|██▏       | 9311/43738 [1:10:31<4:55:38,  1.94it/s]

step:8780, train_loss:0.06307618230543749, acc:0.6596498764901729


 22%|██▏       | 9616/43738 [1:12:55<4:20:54,  2.18it/s]

step:8800, train_loss:0.06313461747822581, acc:0.6595257903494176


 22%|██▏       | 9617/43738 [1:12:55<3:45:00,  2.53it/s]

step:8800, train_loss:0.06312806293595505, acc:0.6595611937194551


 22%|██▏       | 9618/43738 [1:12:55<3:20:11,  2.84it/s]

step:8800, train_loss:0.06313313893937589, acc:0.6594926180079018


 22%|██▏       | 9619/43738 [1:12:56<3:39:19,  2.59it/s]

step:8800, train_loss:0.06313131343290396, acc:0.659528017465433


 22%|██▏       | 9620/43738 [1:12:56<3:53:01,  2.44it/s]

step:8800, train_loss:0.06313822133748685, acc:0.6594594594594595


 22%|██▏       | 9621/43738 [1:12:57<4:00:34,  2.36it/s]

step:8800, train_loss:0.06313483020536582, acc:0.6594948550046773


 22%|██▏       | 9622/43738 [1:12:57<3:53:42,  2.43it/s]

step:8800, train_loss:0.06313512841871205, acc:0.6595302431926834


 22%|██▏       | 9623/43738 [1:12:58<4:03:10,  2.34it/s]

step:8800, train_loss:0.06313283023668988, acc:0.6595656240257716


 22%|██▏       | 9624/43738 [1:12:58<3:37:21,  2.62it/s]

step:8800, train_loss:0.06314311236535784, acc:0.6594970906068163


 22%|██▏       | 9625/43738 [1:12:58<3:50:22,  2.47it/s]

step:8800, train_loss:0.06315690243630609, acc:0.6594285714285715


 22%|██▏       | 9626/43738 [1:12:59<3:42:27,  2.56it/s]

step:8800, train_loss:0.0631577419695602, acc:0.6593600664865988


 22%|██▏       | 9627/43738 [1:12:59<3:39:27,  2.59it/s]

step:8800, train_loss:0.06315119088800636, acc:0.6593954502960424


 22%|██▏       | 9628/43738 [1:12:59<3:31:02,  2.69it/s]

step:8800, train_loss:0.06314490003559844, acc:0.6594308267552971


 22%|██▏       | 9629/43738 [1:13:00<3:20:26,  2.84it/s]

step:8800, train_loss:0.06314992773596408, acc:0.6593623429224218


 22%|██▏       | 9630/43738 [1:13:00<3:46:34,  2.51it/s]

step:8800, train_loss:0.06314339338968505, acc:0.6593977154724818


 22%|██▏       | 9631/43738 [1:13:01<4:08:29,  2.29it/s]

step:8800, train_loss:0.06314602816377599, acc:0.6593292492991382


 23%|██▎       | 9936/43738 [1:15:19<4:19:48,  2.17it/s]

step:8820, train_loss:0.0632654670181576, acc:0.6594202898550725


 23%|██▎       | 9937/43738 [1:15:20<4:17:16,  2.19it/s]

step:8820, train_loss:0.06327404908436418, acc:0.6593539297574721


 23%|██▎       | 9938/43738 [1:15:20<5:09:46,  1.82it/s]

step:8820, train_loss:0.06326914453830806, acc:0.6593882068826725


 23%|██▎       | 9939/43738 [1:15:21<4:54:26,  1.91it/s]

step:8820, train_loss:0.06326286235565207, acc:0.6594224771103733


 23%|██▎       | 9940/43738 [1:15:21<5:05:45,  1.84it/s]

step:8820, train_loss:0.06325951781426861, acc:0.6594567404426559


 23%|██▎       | 9941/43738 [1:15:22<4:29:17,  2.09it/s]

step:8820, train_loss:0.0632589473357646, acc:0.6594909968816014


 23%|██▎       | 9942/43738 [1:15:22<4:36:00,  2.04it/s]

step:8820, train_loss:0.06325813679796254, acc:0.6595252464292899


 23%|██▎       | 9943/43738 [1:15:23<4:24:31,  2.13it/s]

step:8820, train_loss:0.0632643640240989, acc:0.659458915820175


 23%|██▎       | 9944/43738 [1:15:23<4:53:59,  1.92it/s]

step:8820, train_loss:0.06326631664175011, acc:0.6593925985518906


 23%|██▎       | 9945/43738 [1:15:24<4:24:34,  2.13it/s]

step:8820, train_loss:0.06325996903830328, acc:0.6594268476621418


 23%|██▎       | 9946/43738 [1:15:24<3:57:26,  2.37it/s]

step:8820, train_loss:0.06327653525420193, acc:0.6593605469535492


 23%|██▎       | 9947/43738 [1:15:25<4:04:54,  2.30it/s]

step:8820, train_loss:0.0632713364366568, acc:0.6593947923997185


 23%|██▎       | 9948/43738 [1:15:25<4:25:23,  2.12it/s]

step:8820, train_loss:0.06327769722752243, acc:0.6593285082428629


 23%|██▎       | 9949/43738 [1:15:26<5:21:38,  1.75it/s]

step:8820, train_loss:0.06328422942932409, acc:0.6593627500251281


 23%|██▎       | 9950/43738 [1:15:27<5:52:52,  1.60it/s]

step:8820, train_loss:0.06327850005902401, acc:0.6593969849246231


 23%|██▎       | 9951/43738 [1:15:27<6:02:09,  1.55it/s]

step:8820, train_loss:0.0632723800135255, acc:0.6594312129434228


 23%|██▎       | 10256/43738 [1:17:50<6:17:10,  1.48it/s]

step:8840, train_loss:0.06325456580973746, acc:0.6598088923556942


 23%|██▎       | 10257/43738 [1:17:50<5:10:30,  1.80it/s]

step:8840, train_loss:0.06324992116841645, acc:0.6598420590816028


 23%|██▎       | 10258/43738 [1:17:51<4:16:44,  2.17it/s]

step:8840, train_loss:0.06324375714924306, acc:0.6598752193410021


 23%|██▎       | 10259/43738 [1:17:51<4:29:36,  2.07it/s]

step:8840, train_loss:0.06324909149958316, acc:0.6598108977483186


 23%|██▎       | 10260/43738 [1:17:52<4:41:31,  1.98it/s]

step:8840, train_loss:0.06324632855607755, acc:0.6598440545808967


 23%|██▎       | 10261/43738 [1:17:52<4:38:19,  2.00it/s]

step:8840, train_loss:0.06324609567586031, acc:0.6598772049507845


 23%|██▎       | 10262/43738 [1:17:53<4:15:47,  2.18it/s]

step:8840, train_loss:0.06324427721343344, acc:0.6598129019684272


 23%|██▎       | 10263/43738 [1:17:53<4:12:44,  2.21it/s]

step:8840, train_loss:0.06324233066907212, acc:0.659846048913573


 23%|██▎       | 10264/43738 [1:17:53<3:39:34,  2.54it/s]

step:8840, train_loss:0.06324630134730884, acc:0.6597817614964926


 23%|██▎       | 10265/43738 [1:17:54<3:29:15,  2.67it/s]

step:8840, train_loss:0.0632407954641499, acc:0.6598149050170482


 23%|██▎       | 10266/43738 [1:17:54<4:36:01,  2.02it/s]

step:8840, train_loss:0.06324248502889784, acc:0.6598480420806546


 23%|██▎       | 10267/43738 [1:17:55<3:55:27,  2.37it/s]

step:8840, train_loss:0.06324208630226114, acc:0.6597837732541151


 23%|██▎       | 10268/43738 [1:17:55<3:39:57,  2.54it/s]

step:8840, train_loss:0.06324446323401256, acc:0.6597195169458512


 23%|██▎       | 10269/43738 [1:17:56<4:06:22,  2.26it/s]

step:8840, train_loss:0.06326365150080905, acc:0.6596552731522056


 23%|██▎       | 10270/43738 [1:17:56<4:34:05,  2.04it/s]

step:8840, train_loss:0.06327144882462972, acc:0.6595910418695229


 23%|██▎       | 10271/43738 [1:17:57<4:16:01,  2.18it/s]

step:8840, train_loss:0.06327092250279963, acc:0.6596241845974102


 24%|██▍       | 10576/43738 [1:20:22<5:13:58,  1.76it/s]

step:8860, train_loss:0.06319336365887492, acc:0.659322995461422


 24%|██▍       | 10577/43738 [1:20:22<5:44:26,  1.60it/s]

step:8860, train_loss:0.06319036293828269, acc:0.6593552046894204


 24%|██▍       | 10578/43738 [1:20:23<5:04:08,  1.82it/s]

step:8860, train_loss:0.06319341383415425, acc:0.6592928719984874


 24%|██▍       | 10579/43738 [1:20:23<5:00:26,  1.84it/s]

step:8860, train_loss:0.06318808000130406, acc:0.6593250779846866


 24%|██▍       | 10580/43738 [1:20:24<5:09:07,  1.79it/s]

step:8860, train_loss:0.06320815605090184, acc:0.6592627599243857


 24%|██▍       | 10581/43738 [1:20:24<4:36:38,  2.00it/s]

step:8860, train_loss:0.06321264048283863, acc:0.6592004536433229


 24%|██▍       | 10582/43738 [1:20:25<4:39:48,  1.97it/s]

step:8860, train_loss:0.06322450550569368, acc:0.6591381591381591


 24%|██▍       | 10583/43738 [1:20:25<4:24:53,  2.09it/s]

step:8860, train_loss:0.06322649791296298, acc:0.6590758764055561


 24%|██▍       | 10584/43738 [1:20:26<4:08:54,  2.22it/s]

step:8860, train_loss:0.06322053164450901, acc:0.6591080876795162


 24%|██▍       | 10585/43738 [1:20:26<3:33:15,  2.59it/s]

step:8860, train_loss:0.06321472098131325, acc:0.6591402928672649


 24%|██▍       | 10586/43738 [1:20:26<3:33:22,  2.59it/s]

step:8860, train_loss:0.06321476375900104, acc:0.659078027583601


 24%|██▍       | 10587/43738 [1:20:27<3:40:56,  2.50it/s]

step:8860, train_loss:0.0632227079625773, acc:0.6590157740625295


 24%|██▍       | 10588/43738 [1:20:27<3:13:46,  2.85it/s]

step:8860, train_loss:0.06321721719871586, acc:0.6590479788439743


 24%|██▍       | 10589/43738 [1:20:27<3:21:36,  2.74it/s]

step:8860, train_loss:0.0632139548989485, acc:0.6590801775427331


 24%|██▍       | 10590/43738 [1:20:28<4:27:27,  2.07it/s]

step:8860, train_loss:0.0632182898103821, acc:0.6590179414542021


 24%|██▍       | 10591/43738 [1:20:29<4:52:45,  1.89it/s]

step:8860, train_loss:0.0632283204211471, acc:0.658955717118308


 25%|██▍       | 10896/43738 [1:22:54<4:29:10,  2.03it/s]

step:8880, train_loss:0.0633591763900511, acc:0.6584067547723935


 25%|██▍       | 10897/43738 [1:22:54<4:30:21,  2.02it/s]

step:8880, train_loss:0.06337678966329326, acc:0.6583463338533542


 25%|██▍       | 10898/43738 [1:22:55<3:47:38,  2.40it/s]

step:8880, train_loss:0.06337099333944307, acc:0.6583776839787117


 25%|██▍       | 10899/43738 [1:22:55<4:16:43,  2.13it/s]

step:8880, train_loss:0.06338072706617774, acc:0.6583172768143867


 25%|██▍       | 10900/43738 [1:22:56<5:00:31,  1.82it/s]

step:8880, train_loss:0.06337986212541508, acc:0.658256880733945


 25%|██▍       | 10901/43738 [1:22:56<4:26:36,  2.05it/s]

step:8880, train_loss:0.0633752007934112, acc:0.6582882304375746


 25%|██▍       | 10902/43738 [1:22:57<4:00:01,  2.28it/s]

step:8880, train_loss:0.06336954013896967, acc:0.6583195743900202


 25%|██▍       | 10904/43738 [1:22:57<3:22:48,  2.70it/s]

step:8880, train_loss:0.06336509728614323, acc:0.6583509125928644
step:8880, train_loss:0.06335929734095884, acc:0.6583822450476889


 25%|██▍       | 10905/43738 [1:22:58<3:38:33,  2.50it/s]

step:8880, train_loss:0.06335896565044574, acc:0.6584135717560752


 25%|██▍       | 10906/43738 [1:22:58<3:46:41,  2.41it/s]

step:8880, train_loss:0.06335394629935581, acc:0.6584448927196039


 25%|██▍       | 10907/43738 [1:22:59<4:03:01,  2.25it/s]

step:8880, train_loss:0.06335007622124074, acc:0.6584762079398552


 25%|██▍       | 10908/43738 [1:22:59<4:04:32,  2.24it/s]

step:8880, train_loss:0.06334500224245752, acc:0.6585075174184085


 25%|██▍       | 10909/43738 [1:23:00<5:03:56,  1.80it/s]

step:8880, train_loss:0.06334104299321848, acc:0.6585388211568429


 25%|██▍       | 10910/43738 [1:23:00<4:28:52,  2.03it/s]

step:8880, train_loss:0.06333751585771497, acc:0.6585701191567369


 25%|██▍       | 10911/43738 [1:23:01<4:30:06,  2.03it/s]

step:8880, train_loss:0.06333412075188612, acc:0.6586014114196682


 26%|██▌       | 11216/43738 [1:25:25<4:46:59,  1.89it/s]

step:8900, train_loss:0.06351791471430461, acc:0.6584343794579173


 26%|██▌       | 11217/43738 [1:25:25<4:44:37,  1.90it/s]

step:8900, train_loss:0.06351647349942019, acc:0.6584648301684942


 26%|██▌       | 11218/43738 [1:25:26<4:27:27,  2.03it/s]

step:8900, train_loss:0.06352992999930357, acc:0.6584061330005349


 26%|██▌       | 11219/43738 [1:25:26<4:07:46,  2.19it/s]

step:8900, train_loss:0.06353116789619265, acc:0.6584365808004279


 26%|██▌       | 11220/43738 [1:25:26<4:07:21,  2.19it/s]

step:8900, train_loss:0.063527351652359, acc:0.6584670231729055


 26%|██▌       | 11221/43738 [1:25:27<3:33:59,  2.53it/s]

step:8900, train_loss:0.06352173303228198, acc:0.6584974601194189


 26%|██▌       | 11222/43738 [1:25:27<4:00:36,  2.25it/s]

step:8900, train_loss:0.06351607922692563, acc:0.6585278916414187


 26%|██▌       | 11223/43738 [1:25:28<4:22:22,  2.07it/s]

step:8900, train_loss:0.0635273061439937, acc:0.6584692150049006


 26%|██▌       | 11224/43738 [1:25:28<4:08:39,  2.18it/s]

step:8900, train_loss:0.06352863760025376, acc:0.6584105488239487


 26%|██▌       | 11225/43738 [1:25:29<4:00:17,  2.26it/s]

step:8900, train_loss:0.06352312221162267, acc:0.6584409799554566


 26%|██▌       | 11226/43738 [1:25:29<3:53:32,  2.32it/s]

step:8900, train_loss:0.06352015049575808, acc:0.6584714056654195


 26%|██▌       | 11227/43738 [1:25:30<4:22:52,  2.06it/s]

step:8900, train_loss:0.06351763257096248, acc:0.6585018259552864


 26%|██▌       | 11228/43738 [1:25:30<3:58:10,  2.27it/s]

step:8900, train_loss:0.06351217052807624, acc:0.6585322408265052


 26%|██▌       | 11229/43738 [1:25:30<3:50:10,  2.35it/s]

step:8900, train_loss:0.06351378099789236, acc:0.6584735951554012


 26%|██▌       | 11230/43738 [1:25:31<4:21:38,  2.07it/s]

step:8900, train_loss:0.06352704028108734, acc:0.6584149599287622


 26%|██▌       | 11231/43738 [1:25:31<3:57:30,  2.28it/s]

step:8900, train_loss:0.06352394988824764, acc:0.6584453744101149


 26%|██▋       | 11536/43738 [1:27:54<5:26:33,  1.64it/s]

step:8920, train_loss:0.0634296322839598, acc:0.6596740638002774


 26%|██▋       | 11537/43738 [1:27:54<4:33:21,  1.96it/s]

step:8920, train_loss:0.06342413829471304, acc:0.6597035624512438


 26%|██▋       | 11538/43738 [1:27:54<4:25:18,  2.02it/s]

step:8920, train_loss:0.06342253803440816, acc:0.6597330559889062


 26%|██▋       | 11539/43738 [1:27:55<4:37:37,  1.93it/s]

step:8920, train_loss:0.06342670606897978, acc:0.6596758817921831


 26%|██▋       | 11540/43738 [1:27:55<4:31:16,  1.98it/s]

step:8920, train_loss:0.06343032748476651, acc:0.6596187175043328


 26%|██▋       | 11541/43738 [1:27:56<4:18:25,  2.08it/s]

step:8920, train_loss:0.06342750888484523, acc:0.6596482107269734


 26%|██▋       | 11542/43738 [1:27:56<4:54:11,  1.82it/s]

step:8920, train_loss:0.06342600798847083, acc:0.6596776988390227


 26%|██▋       | 11543/43738 [1:27:57<3:59:09,  2.24it/s]

step:8920, train_loss:0.06342407461367595, acc:0.6597071818418089


 26%|██▋       | 11544/43738 [1:27:57<4:09:10,  2.15it/s]

step:8920, train_loss:0.06342212580718637, acc:0.6597366597366597


 26%|██▋       | 11545/43738 [1:27:58<4:03:29,  2.20it/s]

step:8920, train_loss:0.06342095755251494, acc:0.6597661325249026


 26%|██▋       | 11546/43738 [1:27:58<3:31:07,  2.54it/s]

step:8920, train_loss:0.06341828200112212, acc:0.6597956002078642


 26%|██▋       | 11547/43738 [1:27:59<4:37:13,  1.94it/s]

step:8920, train_loss:0.06342256379260756, acc:0.6597384602061142


 26%|██▋       | 11548/43738 [1:27:59<4:23:19,  2.04it/s]

step:8920, train_loss:0.0634200798779331, acc:0.6597679251818497


 26%|██▋       | 11549/43738 [1:28:00<4:17:59,  2.08it/s]

step:8920, train_loss:0.06343481346007285, acc:0.6597107974716425


 26%|██▋       | 11550/43738 [1:28:00<4:47:45,  1.86it/s]

step:8920, train_loss:0.0634422305146448, acc:0.6596536796536796


 26%|██▋       | 11551/43738 [1:28:01<4:59:06,  1.79it/s]

step:8920, train_loss:0.06344355951153052, acc:0.6596831443165094


 27%|██▋       | 11856/43738 [1:30:33<4:34:48,  1.93it/s]

step:8940, train_loss:0.06350578461619553, acc:0.6595816464237517


 27%|██▋       | 11857/43738 [1:30:34<4:20:36,  2.04it/s]

step:8940, train_loss:0.06350936791809199, acc:0.6596103567512862


 27%|██▋       | 11858/43738 [1:30:34<3:41:40,  2.40it/s]

step:8940, train_loss:0.0635090279747848, acc:0.6596390622364648


 27%|██▋       | 11859/43738 [1:30:34<3:30:20,  2.53it/s]

step:8940, train_loss:0.06350742632208399, acc:0.6595834387385109


 27%|██▋       | 11860/43738 [1:30:34<3:21:34,  2.64it/s]

step:8940, train_loss:0.06350757076944759, acc:0.6596121416526138


 27%|██▋       | 11861/43738 [1:30:35<3:32:11,  2.50it/s]

step:8940, train_loss:0.06350255652816597, acc:0.6596408397268358


 27%|██▋       | 11862/43738 [1:30:35<3:48:40,  2.32it/s]

step:8940, train_loss:0.06350451640090508, acc:0.6595852301466869


 27%|██▋       | 11863/43738 [1:30:36<3:23:56,  2.60it/s]

step:8940, train_loss:0.0634992000590551, acc:0.6596139256511844


 27%|██▋       | 11864/43738 [1:30:36<3:02:27,  2.91it/s]

step:8940, train_loss:0.06349410285313317, acc:0.6596426163182738


 27%|██▋       | 11865/43738 [1:30:36<3:18:39,  2.67it/s]

step:8940, train_loss:0.06348903492411279, acc:0.6596713021491782


 27%|██▋       | 11866/43738 [1:30:37<2:55:14,  3.03it/s]

step:8940, train_loss:0.06349292364181011, acc:0.6596999831451205


 27%|██▋       | 11867/43738 [1:30:37<2:42:32,  3.27it/s]

step:8940, train_loss:0.06348764805441819, acc:0.6597286593073228


 27%|██▋       | 11868/43738 [1:30:37<2:58:05,  2.98it/s]

step:8940, train_loss:0.063483377577943, acc:0.659757330637007


 27%|██▋       | 11869/43738 [1:30:38<3:53:29,  2.27it/s]

step:8940, train_loss:0.06351319632016725, acc:0.6597017440390934


 27%|██▋       | 11870/43738 [1:30:38<3:30:49,  2.52it/s]

step:8940, train_loss:0.06350786144654039, acc:0.6597304128053918


 27%|██▋       | 11871/43738 [1:30:39<3:35:36,  2.46it/s]

step:8940, train_loss:0.06350252746670029, acc:0.6597590767416392


 28%|██▊       | 12176/43738 [1:32:58<5:11:11,  1.69it/s]

step:8960, train_loss:0.06342598113040274, acc:0.6599868593955321


 28%|██▊       | 12177/43738 [1:32:59<5:01:55,  1.74it/s]

step:8960, train_loss:0.06342354367079846, acc:0.6600147819660015


 28%|██▊       | 12178/43738 [1:32:59<4:11:43,  2.09it/s]

step:8960, train_loss:0.06341833643326301, acc:0.6600426999507308


 28%|██▊       | 12179/43738 [1:32:59<3:38:50,  2.40it/s]

step:8960, train_loss:0.06341328825470785, acc:0.6600706133508498


 28%|██▊       | 12180/43738 [1:33:00<4:36:58,  1.90it/s]

step:8960, train_loss:0.06340857007026451, acc:0.6600985221674877


 28%|██▊       | 12181/43738 [1:33:01<5:14:40,  1.67it/s]

step:8960, train_loss:0.06340496492067876, acc:0.6601264264017732


 28%|██▊       | 12182/43738 [1:33:01<4:36:03,  1.91it/s]

step:8960, train_loss:0.06340511971359411, acc:0.660154326054835


 28%|██▊       | 12183/43738 [1:33:02<4:52:16,  1.80it/s]

step:8960, train_loss:0.06340574826231356, acc:0.660182221127801


 28%|██▊       | 12184/43738 [1:33:02<4:13:13,  2.08it/s]

step:8960, train_loss:0.06342824170732661, acc:0.6601280367695338


 28%|██▊       | 12185/43738 [1:33:02<4:00:52,  2.18it/s]

step:8960, train_loss:0.06342502224908654, acc:0.6601559294214198


 28%|██▊       | 12186/43738 [1:33:03<4:16:21,  2.05it/s]

step:8960, train_loss:0.06341994967518715, acc:0.6601838174954866


 28%|██▊       | 12187/43738 [1:33:04<4:33:30,  1.92it/s]

step:8960, train_loss:0.06341486873163228, acc:0.6602117009928612


 28%|██▊       | 12188/43738 [1:33:04<4:09:50,  2.10it/s]

step:8960, train_loss:0.06341008773379646, acc:0.6602395799146702


 28%|██▊       | 12189/43738 [1:33:04<3:31:18,  2.49it/s]

step:8960, train_loss:0.06340606773791398, acc:0.6602674542620396


 28%|██▊       | 12190/43738 [1:33:05<4:04:30,  2.15it/s]

step:8960, train_loss:0.06341192384515094, acc:0.6602132895816243


 28%|██▊       | 12191/43738 [1:33:05<3:40:56,  2.38it/s]

step:8960, train_loss:0.06340814010263265, acc:0.6602411615125913


 29%|██▊       | 12496/43738 [1:35:24<4:40:02,  1.86it/s]

step:8980, train_loss:0.06335898217933666, acc:0.6605313700384123


 29%|██▊       | 12497/43738 [1:35:25<4:17:26,  2.02it/s]

step:8980, train_loss:0.06336013224287536, acc:0.6605585340481716


 29%|██▊       | 12498/43738 [1:35:25<3:59:37,  2.17it/s]

step:8980, train_loss:0.06335584456012663, acc:0.6605856937109937


 29%|██▊       | 12499/43738 [1:35:26<4:49:56,  1.80it/s]

step:8980, train_loss:0.06335901108161085, acc:0.6605328426274102


 29%|██▊       | 12500/43738 [1:35:26<4:14:51,  2.04it/s]

step:8980, train_loss:0.06335713513537776, acc:0.66056


 29%|██▊       | 12501/43738 [1:35:27<3:54:38,  2.22it/s]

step:8980, train_loss:0.06335207017768553, acc:0.6605871530277578


 29%|██▊       | 12502/43738 [1:35:27<4:01:19,  2.16it/s]

step:8980, train_loss:0.06336232904496895, acc:0.6605343145096785


 29%|██▊       | 12503/43738 [1:35:28<3:59:33,  2.17it/s]

step:8980, train_loss:0.06335886170467268, acc:0.6605614652483404


 29%|██▊       | 12504/43738 [1:35:28<3:48:44,  2.28it/s]

step:8980, train_loss:0.06335399542775037, acc:0.6605886116442738


 29%|██▊       | 12505/43738 [1:35:29<3:49:22,  2.27it/s]

step:8980, train_loss:0.06335022221700602, acc:0.6606157536985205


 29%|██▊       | 12506/43738 [1:35:29<4:55:17,  1.76it/s]

step:8980, train_loss:0.06335335702710991, acc:0.660562929793699


 29%|██▊       | 12507/43738 [1:35:30<5:23:16,  1.61it/s]

step:8980, train_loss:0.0633580752011624, acc:0.6605101143359718


 29%|██▊       | 12508/43738 [1:35:31<5:27:12,  1.59it/s]

step:8980, train_loss:0.06335301210818603, acc:0.6605372561560601


 29%|██▊       | 12509/43738 [1:35:31<4:58:37,  1.74it/s]

step:8980, train_loss:0.0633479633357114, acc:0.6605643936365817


 29%|██▊       | 12510/43738 [1:35:32<4:38:07,  1.87it/s]

step:8980, train_loss:0.0633483675813969, acc:0.660511590727418


 29%|██▊       | 12511/43738 [1:35:32<3:59:27,  2.17it/s]

step:8980, train_loss:0.0633435227941869, acc:0.6605387259211893


 29%|██▉       | 12816/43738 [1:37:57<5:58:53,  1.44it/s]

step:9000, train_loss:0.06342249214455084, acc:0.6604244694132334


 29%|██▉       | 12817/43738 [1:37:57<5:02:03,  1.71it/s]

step:9000, train_loss:0.06342338048151328, acc:0.660372942186159


 29%|██▉       | 12818/43738 [1:37:58<4:56:10,  1.74it/s]

step:9000, train_loss:0.06343081358944062, acc:0.6603214229989078


 29%|██▉       | 12819/43738 [1:37:58<4:23:31,  1.96it/s]

step:9000, train_loss:0.0634258754989419, acc:0.6603479210546844


 29%|██▉       | 12820/43738 [1:37:59<4:11:36,  2.05it/s]

step:9000, train_loss:0.06342894418973548, acc:0.6602964118564743


 29%|██▉       | 12821/43738 [1:37:59<4:52:55,  1.76it/s]

step:9000, train_loss:0.06342400603674575, acc:0.6603229077295063


 29%|██▉       | 12822/43738 [1:38:00<4:30:08,  1.91it/s]

step:9000, train_loss:0.06342458566077569, acc:0.660271408516612


 29%|██▉       | 12823/43738 [1:38:00<4:39:14,  1.85it/s]

step:9000, train_loss:0.06341966866351126, acc:0.6602979022069718


 29%|██▉       | 12824/43738 [1:38:01<4:25:33,  1.94it/s]

step:9000, train_loss:0.06341725694366593, acc:0.6603243917654398


 29%|██▉       | 12825/43738 [1:38:01<4:09:04,  2.07it/s]

step:9000, train_loss:0.0634123243780518, acc:0.6603508771929825


 29%|██▉       | 12826/43738 [1:38:02<3:35:47,  2.39it/s]

step:9000, train_loss:0.06342570104676243, acc:0.6602993918602837


 29%|██▉       | 12827/43738 [1:38:02<3:25:46,  2.50it/s]

step:9000, train_loss:0.06342105670467015, acc:0.6603258751071958


 29%|██▉       | 12828/43738 [1:38:03<3:50:43,  2.23it/s]

step:9000, train_loss:0.06342518392664594, acc:0.6602743997505457


 29%|██▉       | 12829/43738 [1:38:03<3:51:07,  2.23it/s]

step:9000, train_loss:0.06342026310297572, acc:0.6603008808168992


 29%|██▉       | 12830/43738 [1:38:03<3:38:22,  2.36it/s]

step:9000, train_loss:0.0634153213160102, acc:0.6603273577552611


 29%|██▉       | 12831/43738 [1:38:04<3:15:32,  2.63it/s]

step:9000, train_loss:0.06341039777983201, acc:0.6603538305665966


 30%|███       | 13136/43738 [1:40:29<3:40:40,  2.31it/s]

step:9020, train_loss:0.06344571214707348, acc:0.6603227771010962


 30%|███       | 13137/43738 [1:40:29<3:15:54,  2.60it/s]

step:9020, train_loss:0.06345014446648423, acc:0.6602725127502473


 30%|███       | 13138/43738 [1:40:30<3:30:22,  2.42it/s]

step:9020, train_loss:0.06344544192959364, acc:0.6602983711371594


 30%|███       | 13139/43738 [1:40:30<3:03:16,  2.78it/s]

step:9020, train_loss:0.06344331400483653, acc:0.6603242255879442


 30%|███       | 13140/43738 [1:40:30<2:46:28,  3.06it/s]

step:9020, train_loss:0.06343849041874668, acc:0.6603500761035007


 30%|███       | 13141/43738 [1:40:30<2:47:08,  3.05it/s]

step:9020, train_loss:0.06344570835931498, acc:0.6602998249752683


 30%|███       | 13142/43738 [1:40:31<3:30:01,  2.43it/s]

step:9020, train_loss:0.06344096186051765, acc:0.6603256734134835


 30%|███       | 13143/43738 [1:40:32<3:52:12,  2.20it/s]

step:9020, train_loss:0.06343616393957863, acc:0.6603515179182835


 30%|███       | 13144/43738 [1:40:32<3:50:02,  2.22it/s]

step:9020, train_loss:0.0634444257685737, acc:0.6603012781497261


 30%|███       | 13145/43738 [1:40:32<3:20:05,  2.55it/s]

step:9020, train_loss:0.06343961349571037, acc:0.6603271205781666


 30%|███       | 13146/43738 [1:40:33<4:03:52,  2.09it/s]

step:9020, train_loss:0.0634354148204872, acc:0.6603529590750038


 30%|███       | 13147/43738 [1:40:33<3:25:55,  2.48it/s]

step:9020, train_loss:0.06343081536150585, acc:0.6603787936411348


 30%|███       | 13148/43738 [1:40:34<3:49:22,  2.22it/s]

step:9020, train_loss:0.06343125301459245, acc:0.6604046242774566


 30%|███       | 13149/43738 [1:40:34<3:49:40,  2.22it/s]

step:9020, train_loss:0.06344185673333151, acc:0.6603543995741121


 30%|███       | 13150/43738 [1:40:35<3:52:06,  2.20it/s]

step:9020, train_loss:0.06344247246864906, acc:0.6603802281368821


 30%|███       | 13151/43738 [1:40:35<3:47:00,  2.25it/s]

step:9020, train_loss:0.06343854747745861, acc:0.6604060527716523


 31%|███       | 13456/43738 [1:43:02<4:11:59,  2.00it/s]

step:9040, train_loss:0.06342888370091886, acc:0.66044887039239


 31%|███       | 13457/43738 [1:43:02<3:29:01,  2.41it/s]

step:9040, train_loss:0.0634255301398022, acc:0.6604741026974809


 31%|███       | 13458/43738 [1:43:02<3:56:56,  2.13it/s]

step:9040, train_loss:0.06342219073147512, acc:0.6604993312527865


 31%|███       | 13459/43738 [1:43:03<3:27:19,  2.43it/s]

step:9040, train_loss:0.06341747916657345, acc:0.6605245560591426


 31%|███       | 13460/43738 [1:43:03<3:00:39,  2.79it/s]

step:9040, train_loss:0.06341312104938338, acc:0.6605497771173848


 31%|███       | 13461/43738 [1:43:04<3:43:45,  2.26it/s]

step:9040, train_loss:0.0634360185311021, acc:0.6605007057425154


 31%|███       | 13462/43738 [1:43:04<3:50:20,  2.19it/s]

step:9040, train_loss:0.06343245736228069, acc:0.6605259248254346


 31%|███       | 13463/43738 [1:43:05<4:37:38,  1.82it/s]

step:9040, train_loss:0.06343686252300688, acc:0.6604768625120702


 31%|███       | 13464/43738 [1:43:05<4:00:15,  2.10it/s]

step:9040, train_loss:0.06343934312389313, acc:0.660427807486631


 31%|███       | 13465/43738 [1:43:05<3:49:26,  2.20it/s]

step:9040, train_loss:0.06343919941732702, acc:0.6603787597474935


 31%|███       | 13466/43738 [1:43:06<4:09:59,  2.02it/s]

step:9040, train_loss:0.06343661353513913, acc:0.6604039803950691


 31%|███       | 13467/43738 [1:43:07<4:26:46,  1.89it/s]

step:9040, train_loss:0.06344428447403437, acc:0.6603549417093636


 31%|███       | 13468/43738 [1:43:07<3:48:54,  2.20it/s]

step:9040, train_loss:0.06343958241266573, acc:0.6603801603801603


 31%|███       | 13469/43738 [1:43:07<3:49:18,  2.20it/s]

step:9040, train_loss:0.06344489829122407, acc:0.660331130744673


 31%|███       | 13470/43738 [1:43:08<3:50:24,  2.19it/s]

step:9040, train_loss:0.06345426956949266, acc:0.6602821083890126


 31%|███       | 13471/43738 [1:43:08<3:23:42,  2.48it/s]

step:9040, train_loss:0.06345425840501887, acc:0.6602330933115582


 31%|███▏      | 13776/43738 [1:45:35<4:34:37,  1.82it/s]

step:9060, train_loss:0.06345518009521171, acc:0.6594802555168409


 31%|███▏      | 13777/43738 [1:45:36<4:43:54,  1.76it/s]

step:9060, train_loss:0.0634713106589326, acc:0.659432387312187


 32%|███▏      | 13778/43738 [1:45:36<3:50:51,  2.16it/s]

step:9060, train_loss:0.06346670438581713, acc:0.6594571055305559


 32%|███▏      | 13779/43738 [1:45:37<3:46:03,  2.21it/s]

step:9060, train_loss:0.06346634685210603, acc:0.6594818201611148


 32%|███▏      | 13780/43738 [1:45:37<3:36:08,  2.31it/s]

step:9060, train_loss:0.06346362964110754, acc:0.6595065312046444


 32%|███▏      | 13781/43738 [1:45:38<3:36:11,  2.31it/s]

step:9060, train_loss:0.06346299975094424, acc:0.6594586749873014


 32%|███▏      | 13782/43738 [1:45:38<3:50:49,  2.16it/s]

step:9060, train_loss:0.06345858794027733, acc:0.65948338412422


 32%|███▏      | 13783/43738 [1:45:39<4:10:48,  1.99it/s]

step:9060, train_loss:0.06346269084063054, acc:0.6594355365305086


 32%|███▏      | 13784/43738 [1:45:39<3:31:55,  2.36it/s]

step:9060, train_loss:0.06345895196687211, acc:0.6594602437608822


 32%|███▏      | 13785/43738 [1:45:39<3:35:38,  2.32it/s]

step:9060, train_loss:0.06347078341948653, acc:0.6594124047878128


 32%|███▏      | 13786/43738 [1:45:40<3:32:57,  2.34it/s]

step:9060, train_loss:0.06346695452345233, acc:0.6594371101117076


 32%|███▏      | 13787/43738 [1:45:40<3:49:03,  2.18it/s]

step:9060, train_loss:0.0634708008915277, acc:0.6593892797562921


 32%|███▏      | 13788/43738 [1:45:41<4:27:35,  1.87it/s]

step:9060, train_loss:0.06346705843663808, acc:0.6594139831737743


 32%|███▏      | 13789/43738 [1:45:41<4:16:39,  1.94it/s]

step:9060, train_loss:0.06346347224380766, acc:0.659438683008195


 32%|███▏      | 13790/43738 [1:45:42<4:10:43,  1.99it/s]

step:9060, train_loss:0.0634667770706365, acc:0.6593908629441624


 32%|███▏      | 13791/43738 [1:45:43<4:34:15,  1.82it/s]

step:9060, train_loss:0.06346899247999258, acc:0.6593430498150968


 32%|███▏      | 14096/43738 [1:48:03<5:03:04,  1.63it/s]

step:9080, train_loss:0.06358066638348069, acc:0.6589103291713961


 32%|███▏      | 14097/43738 [1:48:03<4:42:31,  1.75it/s]

step:9080, train_loss:0.06358041310359801, acc:0.6589345250762574


 32%|███▏      | 14098/43738 [1:48:04<4:30:33,  1.83it/s]

step:9080, train_loss:0.06358006858395675, acc:0.6589587175485885


 32%|███▏      | 14099/43738 [1:48:04<4:16:49,  1.92it/s]

step:9080, train_loss:0.06358424369910785, acc:0.6589119795730194


 32%|███▏      | 14100/43738 [1:48:04<3:56:19,  2.09it/s]

step:9080, train_loss:0.0635798159867099, acc:0.658936170212766


 32%|███▏      | 14101/43738 [1:48:05<3:49:35,  2.15it/s]

step:9080, train_loss:0.06357903952987759, acc:0.6588894404652152


 32%|███▏      | 14102/43738 [1:48:05<3:47:11,  2.17it/s]

step:9080, train_loss:0.06357462415676898, acc:0.6589136292724437


 32%|███▏      | 14103/43738 [1:48:06<3:50:59,  2.14it/s]

step:9080, train_loss:0.06357013604196819, acc:0.6589378146493654


 32%|███▏      | 14104/43738 [1:48:06<4:05:36,  2.01it/s]

step:9080, train_loss:0.06356732527598055, acc:0.6589619965967102


 32%|███▏      | 14105/43738 [1:48:07<3:26:01,  2.40it/s]

step:9080, train_loss:0.06356284272146716, acc:0.6589861751152074


 32%|███▏      | 14106/43738 [1:48:07<3:19:54,  2.47it/s]

step:9080, train_loss:0.0635629845506395, acc:0.6590103502055863


 32%|███▏      | 14107/43738 [1:48:08<3:51:17,  2.14it/s]

step:9080, train_loss:0.06355902548345659, acc:0.6590345218685759


 32%|███▏      | 14108/43738 [1:48:08<3:53:50,  2.11it/s]

step:9080, train_loss:0.06356692088505382, acc:0.658987808335696


 32%|███▏      | 14109/43738 [1:48:08<3:39:51,  2.25it/s]

step:9080, train_loss:0.06356241732318225, acc:0.6590119781699625


 32%|███▏      | 14110/43738 [1:48:09<3:38:16,  2.26it/s]

step:9080, train_loss:0.06355950054008822, acc:0.6590361445783133


 32%|███▏      | 14111/43738 [1:48:10<4:29:45,  1.83it/s]

step:9080, train_loss:0.06355894657732687, acc:0.6590603075614768


 33%|███▎      | 14416/43738 [1:50:33<4:09:44,  1.96it/s]

step:9100, train_loss:0.06337851603937403, acc:0.6596143174250833


 33%|███▎      | 14417/43738 [1:50:34<4:11:37,  1.94it/s]

step:9100, train_loss:0.06337664708759963, acc:0.6596379274467642


 33%|███▎      | 14418/43738 [1:50:34<3:45:09,  2.17it/s]

step:9100, train_loss:0.0633723277405789, acc:0.6596615341933694


 33%|███▎      | 14419/43738 [1:50:34<3:32:28,  2.30it/s]

step:9100, train_loss:0.06336793852658519, acc:0.6596851376655801


 33%|███▎      | 14420/43738 [1:50:35<4:15:34,  1.91it/s]

step:9100, train_loss:0.06337395477599991, acc:0.6596393897364771


 33%|███▎      | 14421/43738 [1:50:36<4:06:29,  1.98it/s]

step:9100, train_loss:0.0633695874471341, acc:0.6596629914707718


 33%|███▎      | 14422/43738 [1:50:36<3:31:33,  2.31it/s]

step:9100, train_loss:0.063367402812063, acc:0.6596865899320482


 33%|███▎      | 14423/43738 [1:50:36<3:18:25,  2.46it/s]

step:9100, train_loss:0.06337155857532853, acc:0.6596408514178742


 33%|███▎      | 14424/43738 [1:50:37<3:07:26,  2.61it/s]

step:9100, train_loss:0.06337001444631613, acc:0.6596644481419855


 33%|███▎      | 14425/43738 [1:50:37<2:48:21,  2.90it/s]

step:9100, train_loss:0.06336867990588005, acc:0.659688041594454


 33%|███▎      | 14426/43738 [1:50:37<3:05:13,  2.64it/s]

step:9100, train_loss:0.06336613547928423, acc:0.65971163177596


 33%|███▎      | 14427/43738 [1:50:38<2:54:26,  2.80it/s]

step:9100, train_loss:0.06336175034717473, acc:0.6597352186871838


 33%|███▎      | 14428/43738 [1:50:38<3:29:05,  2.34it/s]

step:9100, train_loss:0.06335967034720356, acc:0.6597588023288051


 33%|███▎      | 14429/43738 [1:50:39<4:14:07,  1.92it/s]

step:9100, train_loss:0.06335614419018179, acc:0.6597823827015039


 33%|███▎      | 14430/43738 [1:50:39<4:05:51,  1.99it/s]

step:9100, train_loss:0.06335175692807135, acc:0.6598059598059598


 33%|███▎      | 14431/43738 [1:50:40<4:28:48,  1.82it/s]

step:9100, train_loss:0.06334745515413061, acc:0.6598295336428522


 34%|███▎      | 14736/43738 [1:53:00<3:06:15,  2.60it/s]

step:9120, train_loss:0.06321260646752952, acc:0.6602198697068404


 34%|███▎      | 14737/43738 [1:53:01<3:32:10,  2.28it/s]

step:9120, train_loss:0.0632195691246231, acc:0.6601750695528262


 34%|███▎      | 14738/43738 [1:53:01<3:07:45,  2.57it/s]

step:9120, train_loss:0.06321535697395009, acc:0.6601981272899986


 34%|███▎      | 14739/43738 [1:53:01<2:46:44,  2.90it/s]

step:9120, train_loss:0.06321110778556113, acc:0.6602211818983649


 34%|███▎      | 14740/43738 [1:53:01<2:34:55,  3.12it/s]

step:9120, train_loss:0.06320682171155798, acc:0.6602442333785618


 34%|███▎      | 14741/43738 [1:53:02<2:19:46,  3.46it/s]

step:9120, train_loss:0.0632103589668093, acc:0.6601994437283767


 34%|███▎      | 14742/43738 [1:53:02<2:54:09,  2.77it/s]

step:9120, train_loss:0.06322445802323705, acc:0.6601546601546602


 34%|███▎      | 14743/43738 [1:53:03<3:37:13,  2.22it/s]

step:9120, train_loss:0.06322706267854049, acc:0.6601098826561758


 34%|███▎      | 14744/43738 [1:53:03<3:46:41,  2.13it/s]

step:9120, train_loss:0.06322545348668977, acc:0.6601329354313619


 34%|███▎      | 14745/43738 [1:53:03<3:13:43,  2.49it/s]

step:9120, train_loss:0.06322116778806457, acc:0.660155985079688


 34%|███▎      | 14746/43738 [1:53:04<3:10:52,  2.53it/s]

step:9120, train_loss:0.06321688368325198, acc:0.6601790316017904


 34%|███▎      | 14747/43738 [1:53:04<3:19:46,  2.42it/s]

step:9120, train_loss:0.06321874068981717, acc:0.6601342645961891


 34%|███▎      | 14748/43738 [1:53:05<3:31:42,  2.28it/s]

step:9120, train_loss:0.06321447208924716, acc:0.6601573094656903


 34%|███▎      | 14749/43738 [1:53:06<4:12:57,  1.91it/s]

step:9120, train_loss:0.0632185577904223, acc:0.6601125500033901


 34%|███▎      | 14750/43738 [1:53:06<4:19:24,  1.86it/s]

step:9120, train_loss:0.06321693188598443, acc:0.660135593220339


 34%|███▎      | 14751/43738 [1:53:07<4:13:37,  1.90it/s]

step:9120, train_loss:0.06321885855785593, acc:0.660090841298895


 34%|███▍      | 15056/43738 [1:55:31<4:16:09,  1.87it/s]

step:9140, train_loss:0.06315852084903235, acc:0.660135494155154


 34%|███▍      | 15057/43738 [1:55:32<4:16:45,  1.86it/s]

step:9140, train_loss:0.0631544887191271, acc:0.6601580660158066


 34%|███▍      | 15058/43738 [1:55:32<3:33:30,  2.24it/s]

step:9140, train_loss:0.063150489544294, acc:0.6601806348784699


 34%|███▍      | 15059/43738 [1:55:33<3:50:09,  2.08it/s]

step:9140, train_loss:0.06314728064556392, acc:0.6602032007437413


 34%|███▍      | 15060/43738 [1:55:33<3:13:05,  2.48it/s]

step:9140, train_loss:0.0631431641035412, acc:0.6602257636122177


 34%|███▍      | 15061/43738 [1:55:33<3:47:57,  2.10it/s]

step:9140, train_loss:0.06314338842630811, acc:0.6602483234844964


 34%|███▍      | 15062/43738 [1:55:34<3:45:29,  2.12it/s]

step:9140, train_loss:0.06313920041145767, acc:0.6602708803611738


 34%|███▍      | 15063/43738 [1:55:34<3:10:46,  2.51it/s]

step:9140, train_loss:0.0631429519279052, acc:0.6602270464050985


 34%|███▍      | 15064/43738 [1:55:35<3:22:33,  2.36it/s]

step:9140, train_loss:0.06315489091771904, acc:0.6601832182687202


 34%|███▍      | 15065/43738 [1:55:35<2:50:59,  2.79it/s]

step:9140, train_loss:0.06315094545573074, acc:0.6602057749751079


 34%|███▍      | 15066/43738 [1:55:35<2:44:32,  2.90it/s]

step:9140, train_loss:0.06314677034915511, acc:0.66022832868711


 34%|███▍      | 15067/43738 [1:55:36<3:03:46,  2.60it/s]

step:9140, train_loss:0.063147574489648, acc:0.6601845091922746


 34%|███▍      | 15068/43738 [1:55:36<4:02:18,  1.97it/s]

step:9140, train_loss:0.06314821197217403, acc:0.6602070613220069


 34%|███▍      | 15069/43738 [1:55:37<3:46:25,  2.11it/s]

step:9140, train_loss:0.06316809855741654, acc:0.66016324905435


 34%|███▍      | 15070/43738 [1:55:37<4:10:29,  1.91it/s]

step:9140, train_loss:0.06316749474759706, acc:0.6601194426011944


 34%|███▍      | 15071/43738 [1:55:38<3:34:30,  2.23it/s]

step:9140, train_loss:0.06316333540764744, acc:0.660141994559087


 35%|███▌      | 15376/43738 [1:57:58<4:11:19,  1.88it/s]

step:9160, train_loss:0.06312089930001241, acc:0.6605749219562955


 35%|███▌      | 15377/43738 [1:57:59<4:33:57,  1.73it/s]

step:9160, train_loss:0.06311923010070994, acc:0.6605969955127788


 35%|███▌      | 15378/43738 [1:57:59<3:44:15,  2.11it/s]

step:9160, train_loss:0.06312236308745887, acc:0.6605540382364417


 35%|███▌      | 15379/43738 [1:58:00<3:53:44,  2.02it/s]

step:9160, train_loss:0.0631323428637041, acc:0.6605110865465895


 35%|███▌      | 15380/43738 [1:58:00<3:18:14,  2.38it/s]

step:9160, train_loss:0.06312843391508129, acc:0.6605331599479844


 35%|███▌      | 15381/43738 [1:58:00<3:08:01,  2.51it/s]

step:9160, train_loss:0.0631248863633591, acc:0.6605552304791626


 35%|███▌      | 15382/43738 [1:58:01<3:09:12,  2.50it/s]

step:9160, train_loss:0.06312126215488321, acc:0.6605772981406839


 35%|███▌      | 15383/43738 [1:58:01<3:25:52,  2.30it/s]

step:9160, train_loss:0.06312416682865697, acc:0.6605343561073913


 35%|███▌      | 15384/43738 [1:58:02<4:18:05,  1.83it/s]

step:9160, train_loss:0.06312048279538984, acc:0.6605564222568903


 35%|███▌      | 15385/43738 [1:58:03<3:51:06,  2.04it/s]

step:9160, train_loss:0.06311827797377147, acc:0.6605784855378616


 35%|███▌      | 15386/43738 [1:58:03<3:39:50,  2.15it/s]

step:9160, train_loss:0.06311543015913128, acc:0.6606005459508644


 35%|███▌      | 15387/43738 [1:58:03<3:38:54,  2.16it/s]

step:9160, train_loss:0.0631158887164676, acc:0.6605576135698966


 35%|███▌      | 15388/43738 [1:58:04<3:45:17,  2.10it/s]

step:9160, train_loss:0.06311793473596486, acc:0.6605796724720562


 35%|███▌      | 15389/43738 [1:58:04<3:46:50,  2.08it/s]

step:9160, train_loss:0.0631216495532344, acc:0.6605367470270973


 35%|███▌      | 15390/43738 [1:58:05<3:19:59,  2.36it/s]

step:9160, train_loss:0.06311756301948274, acc:0.6605588044184535


 35%|███▌      | 15391/43738 [1:58:05<3:21:28,  2.35it/s]

step:9160, train_loss:0.06311425736717102, acc:0.6605808589435385


 36%|███▌      | 15696/43738 [2:00:23<3:52:44,  2.01it/s]

step:9180, train_loss:0.06312660121964712, acc:0.660486748216106


 36%|███▌      | 15697/43738 [2:00:23<3:20:08,  2.34it/s]

step:9180, train_loss:0.06312276608348286, acc:0.6605083773969548


 36%|███▌      | 15698/43738 [2:00:24<3:15:45,  2.39it/s]

step:9180, train_loss:0.06311943699992549, acc:0.660530003822143


 36%|███▌      | 15699/43738 [2:00:24<3:39:45,  2.13it/s]

step:9180, train_loss:0.06311880079968117, acc:0.6604879291674629


 36%|███▌      | 15700/43738 [2:00:25<3:37:49,  2.15it/s]

step:9180, train_loss:0.06312192326396034, acc:0.6604458598726115


 36%|███▌      | 15701/43738 [2:00:25<3:23:08,  2.30it/s]

step:9180, train_loss:0.06311790309887774, acc:0.6604674861473792


 36%|███▌      | 15702/43738 [2:00:26<3:48:03,  2.05it/s]

step:9180, train_loss:0.06311648821191404, acc:0.6604891096675582


 36%|███▌      | 15703/43738 [2:00:26<3:48:32,  2.04it/s]

step:9180, train_loss:0.06311606368769719, acc:0.6605107304336751


 36%|███▌      | 15704/43738 [2:00:27<3:12:33,  2.43it/s]

step:9180, train_loss:0.06311206042647101, acc:0.6605323484462557


 36%|███▌      | 15705/43738 [2:00:27<2:50:01,  2.75it/s]

step:9180, train_loss:0.06311018518893453, acc:0.6605539637058262


 36%|███▌      | 15706/43738 [2:00:27<2:50:29,  2.74it/s]

step:9180, train_loss:0.0631067540786062, acc:0.6605755762129123


 36%|███▌      | 15707/43738 [2:00:28<3:21:59,  2.31it/s]

step:9180, train_loss:0.06310864719436085, acc:0.6605335200865856


 36%|███▌      | 15708/43738 [2:00:28<3:17:42,  2.36it/s]

step:9180, train_loss:0.0631138956278708, acc:0.6604914693149987


 36%|███▌      | 15709/43738 [2:00:28<2:53:17,  2.70it/s]

step:9180, train_loss:0.06310993206712438, acc:0.6605130816729263


 36%|███▌      | 15710/43738 [2:00:29<3:08:25,  2.48it/s]

step:9180, train_loss:0.06311545207865156, acc:0.660471037555697


 36%|███▌      | 15711/43738 [2:00:29<3:27:35,  2.25it/s]

step:9180, train_loss:0.06311168399383256, acc:0.6604926484628604


 37%|███▋      | 16016/43738 [2:02:45<3:49:40,  2.01it/s]

step:9200, train_loss:0.0631307761889745, acc:0.6604645354645354


 37%|███▋      | 16017/43738 [2:02:45<4:09:56,  1.85it/s]

step:9200, train_loss:0.06312750333068372, acc:0.660485733907723


 37%|███▋      | 16018/43738 [2:02:46<3:49:49,  2.01it/s]

step:9200, train_loss:0.06312897233880714, acc:0.6604444999375703


 37%|███▋      | 16019/43738 [2:02:47<4:32:58,  1.69it/s]

step:9200, train_loss:0.06312995084471554, acc:0.6604032711155503


 37%|███▋      | 16020/43738 [2:02:47<4:08:16,  1.86it/s]

step:9200, train_loss:0.06312648966676933, acc:0.6604244694132334


 37%|███▋      | 16021/43738 [2:02:47<3:28:50,  2.21it/s]

step:9200, train_loss:0.06312351868636042, acc:0.6604456650646027


 37%|███▋      | 16022/43738 [2:02:48<3:34:50,  2.15it/s]

step:9200, train_loss:0.06312947000978124, acc:0.6604044438896517


 37%|███▋      | 16023/43738 [2:02:48<3:05:25,  2.49it/s]

step:9200, train_loss:0.06312584481289979, acc:0.6604256381451663


 37%|███▋      | 16024/43738 [2:02:48<3:14:34,  2.37it/s]

step:9200, train_loss:0.06312515614590175, acc:0.6603844233649526


 37%|███▋      | 16025/43738 [2:02:49<2:49:11,  2.73it/s]

step:9200, train_loss:0.06312221154950519, acc:0.660405616224649


 37%|███▋      | 16026/43738 [2:02:49<3:23:14,  2.27it/s]

step:9200, train_loss:0.0631200756300117, acc:0.6604268064395358


 37%|███▋      | 16027/43738 [2:02:49<2:55:19,  2.63it/s]

step:9200, train_loss:0.06311719308412991, acc:0.6604479940101079


 37%|███▋      | 16028/43738 [2:02:50<2:40:02,  2.89it/s]

step:9200, train_loss:0.06311345957334505, acc:0.6604691789368605


 37%|███▋      | 16029/43738 [2:02:51<3:47:29,  2.03it/s]

step:9200, train_loss:0.06311477407294887, acc:0.6604903612202883


 37%|███▋      | 16030/43738 [2:02:51<4:31:37,  1.70it/s]

step:9200, train_loss:0.06311727408920308, acc:0.6604491578290705


 37%|███▋      | 16031/43738 [2:02:52<3:45:10,  2.05it/s]

step:9200, train_loss:0.06312125927322507, acc:0.660407959578317


 37%|███▋      | 16336/43738 [2:05:07<3:06:21,  2.45it/s]

step:9220, train_loss:0.06307149480131516, acc:0.6601983349657199


 37%|███▋      | 16337/43738 [2:05:08<3:15:50,  2.33it/s]

step:9220, train_loss:0.06307015099662382, acc:0.6602191344800147


 37%|███▋      | 16338/43738 [2:05:08<2:53:51,  2.63it/s]

step:9220, train_loss:0.06307216036441259, acc:0.6601787244460766


 37%|███▋      | 16339/43738 [2:05:09<2:51:32,  2.66it/s]

step:9220, train_loss:0.06306887886493816, acc:0.6601995226146031


 37%|███▋      | 16340/43738 [2:05:09<2:52:13,  2.65it/s]

step:9220, train_loss:0.06306929723207891, acc:0.6601591187270501


 37%|███▋      | 16341/43738 [2:05:09<3:02:22,  2.50it/s]

step:9220, train_loss:0.06308067214906668, acc:0.6601187197845909


 37%|███▋      | 16342/43738 [2:05:10<3:02:10,  2.51it/s]

step:9220, train_loss:0.06307907524896822, acc:0.660139517806878


 37%|███▋      | 16343/43738 [2:05:11<3:44:11,  2.04it/s]

step:9220, train_loss:0.06307902305151979, acc:0.6601603132839748


 37%|███▋      | 16344/43738 [2:05:11<3:28:48,  2.19it/s]

step:9220, train_loss:0.0630769548324532, acc:0.6601811062163485


 37%|███▋      | 16345/43738 [2:05:12<4:14:48,  1.79it/s]

step:9220, train_loss:0.06307484944580785, acc:0.6602018966044662


 37%|███▋      | 16346/43738 [2:05:12<3:33:34,  2.14it/s]

step:9220, train_loss:0.06307203589399336, acc:0.6602226844487948


 37%|███▋      | 16347/43738 [2:05:13<4:07:47,  1.84it/s]

step:9220, train_loss:0.06306926722079784, acc:0.6602434697498012


 37%|███▋      | 16348/43738 [2:05:13<3:43:49,  2.04it/s]

step:9220, train_loss:0.06306750448728132, acc:0.6602642525079521


 37%|███▋      | 16349/43738 [2:05:13<3:35:44,  2.12it/s]

step:9220, train_loss:0.06306531436149228, acc:0.660285032723714


 37%|███▋      | 16350/43738 [2:05:14<4:22:49,  1.74it/s]

step:9220, train_loss:0.06306411727456182, acc:0.6603058103975535


 37%|███▋      | 16351/43738 [2:05:15<4:10:08,  1.82it/s]

step:9220, train_loss:0.06306032550520882, acc:0.6603265855299371


 38%|███▊      | 16656/43738 [2:07:36<3:55:43,  1.91it/s]

step:9240, train_loss:0.06304770101373383, acc:0.6598823246878002


 38%|███▊      | 16657/43738 [2:07:36<3:22:34,  2.23it/s]

step:9240, train_loss:0.06304928333545755, acc:0.6598427087710872


 38%|███▊      | 16658/43738 [2:07:37<3:29:43,  2.15it/s]

step:9240, train_loss:0.06304558302613496, acc:0.65986312882699


 38%|███▊      | 16659/43738 [2:07:37<3:45:03,  2.01it/s]

step:9240, train_loss:0.06304303798449845, acc:0.6598835464313584


 38%|███▊      | 16660/43738 [2:07:38<4:15:38,  1.77it/s]

step:9240, train_loss:0.06305497603578016, acc:0.65984393757503


 38%|███▊      | 16661/43738 [2:07:39<4:01:09,  1.87it/s]

step:9240, train_loss:0.0630573693434579, acc:0.659804333473381


 38%|███▊      | 16662/43738 [2:07:39<3:43:22,  2.02it/s]

step:9240, train_loss:0.06305613191687517, acc:0.6598247509302605


 38%|███▊      | 16663/43738 [2:07:39<3:38:51,  2.06it/s]

step:9240, train_loss:0.06306080279250949, acc:0.6597851527336014


 38%|███▊      | 16664/43738 [2:07:40<3:48:43,  1.97it/s]

step:9240, train_loss:0.0630678302863267, acc:0.6597455592894863


 38%|███▊      | 16665/43738 [2:07:41<4:05:50,  1.84it/s]

step:9240, train_loss:0.0630741866357653, acc:0.6597059705970597


 38%|███▊      | 16666/43738 [2:07:41<3:37:11,  2.08it/s]

step:9240, train_loss:0.0630802816550525, acc:0.6596663866554662


 38%|███▊      | 16667/43738 [2:07:41<3:33:58,  2.11it/s]

step:9240, train_loss:0.06308509790396381, acc:0.6596268074638507


 38%|███▊      | 16668/43738 [2:07:42<3:07:55,  2.40it/s]

step:9240, train_loss:0.06308169862282581, acc:0.6596472282217423


 38%|███▊      | 16669/43738 [2:07:42<2:53:56,  2.59it/s]

step:9240, train_loss:0.0630782075628568, acc:0.6596676465294858


 38%|███▊      | 16670/43738 [2:07:42<2:46:09,  2.72it/s]

step:9240, train_loss:0.0630745143099631, acc:0.6596880623875225


 38%|███▊      | 16671/43738 [2:07:43<3:05:28,  2.43it/s]

step:9240, train_loss:0.06307904631610785, acc:0.659648491392238


 39%|███▉      | 16976/43738 [2:09:59<3:17:16,  2.26it/s]

step:9260, train_loss:0.06308222889428979, acc:0.65934260131951


 39%|███▉      | 16977/43738 [2:09:59<2:48:20,  2.65it/s]

step:9260, train_loss:0.0630785338638552, acc:0.6593626671378925


 39%|███▉      | 16978/43738 [2:10:00<2:53:50,  2.57it/s]

step:9260, train_loss:0.06307763122347029, acc:0.6593827305925315


 39%|███▉      | 16979/43738 [2:10:00<3:01:39,  2.46it/s]

step:9260, train_loss:0.0630857456888651, acc:0.6593438954002002


 39%|███▉      | 16980/43738 [2:10:01<3:10:21,  2.34it/s]

step:9260, train_loss:0.06308203522361804, acc:0.6593639575971731


 39%|███▉      | 16981/43738 [2:10:01<3:43:47,  1.99it/s]

step:9260, train_loss:0.06307895353016801, acc:0.6593840174312466


 39%|███▉      | 16982/43738 [2:10:02<3:34:03,  2.08it/s]

step:9260, train_loss:0.06307900738659578, acc:0.6594040749028383


 39%|███▉      | 16983/43738 [2:10:02<3:35:24,  2.07it/s]

step:9260, train_loss:0.06307771673657259, acc:0.6594241300123653


 39%|███▉      | 16984/43738 [2:10:03<3:38:21,  2.04it/s]

step:9260, train_loss:0.06307423378148455, acc:0.659444182760245


 39%|███▉      | 16985/43738 [2:10:03<3:32:13,  2.10it/s]

step:9260, train_loss:0.06307070045939638, acc:0.6594642331468943


 39%|███▉      | 16986/43738 [2:10:04<3:28:40,  2.14it/s]

step:9260, train_loss:0.06307098303750482, acc:0.6594842811727305


 39%|███▉      | 16987/43738 [2:10:04<4:14:57,  1.75it/s]

step:9260, train_loss:0.06306733162929605, acc:0.6595043268381704


 39%|███▉      | 16988/43738 [2:10:05<3:41:47,  2.01it/s]

step:9260, train_loss:0.06306615214775714, acc:0.6595243701436307


 39%|███▉      | 16989/43738 [2:10:05<3:34:13,  2.08it/s]

step:9260, train_loss:0.0630721080602036, acc:0.6594855494731885


 39%|███▉      | 16990/43738 [2:10:05<3:01:23,  2.46it/s]

step:9260, train_loss:0.06306852861050612, acc:0.6595055915244261


 39%|███▉      | 16991/43738 [2:10:06<3:10:11,  2.34it/s]

step:9260, train_loss:0.06307757187598229, acc:0.6594667765287505


 40%|███▉      | 17296/43738 [2:12:28<2:50:19,  2.59it/s]

step:9280, train_loss:0.06313468084070785, acc:0.6588806660499538


 40%|███▉      | 17297/43738 [2:12:29<3:21:55,  2.18it/s]

step:9280, train_loss:0.06313165388203013, acc:0.6589003873504076


 40%|███▉      | 17298/43738 [2:12:29<2:53:58,  2.53it/s]

step:9280, train_loss:0.06312830831801507, acc:0.6589201063706787


 40%|███▉      | 17299/43738 [2:12:30<2:33:49,  2.86it/s]

step:9280, train_loss:0.06312645158538761, acc:0.6589398231111625


 40%|███▉      | 17300/43738 [2:12:30<2:31:32,  2.91it/s]

step:9280, train_loss:0.06313285920661982, acc:0.6589017341040463


 40%|███▉      | 17301/43738 [2:12:31<3:26:12,  2.14it/s]

step:9280, train_loss:0.06313535678243354, acc:0.6588636495000288


 40%|███▉      | 17302/43738 [2:12:31<3:32:56,  2.07it/s]

step:9280, train_loss:0.0631435347070597, acc:0.658825569298347


 40%|███▉      | 17303/43738 [2:12:32<3:25:26,  2.14it/s]

step:9280, train_loss:0.06314183532587798, acc:0.6588452869444605


 40%|███▉      | 17304/43738 [2:12:32<3:17:38,  2.23it/s]

step:9280, train_loss:0.06315017104712722, acc:0.6588072122052705


 40%|███▉      | 17305/43738 [2:12:32<3:01:44,  2.42it/s]

step:9280, train_loss:0.06315207229214764, acc:0.6587691418665126


 40%|███▉      | 17306/43738 [2:12:33<3:06:06,  2.37it/s]

step:9280, train_loss:0.06314901168712089, acc:0.658788859355137


 40%|███▉      | 17307/43738 [2:12:33<3:52:40,  1.89it/s]

step:9280, train_loss:0.06314539703027111, acc:0.6588085745652048


 40%|███▉      | 17308/43738 [2:12:34<4:11:12,  1.75it/s]

step:9280, train_loss:0.06314295496355776, acc:0.6588282874971112


 40%|███▉      | 17309/43738 [2:12:35<3:53:07,  1.89it/s]

step:9280, train_loss:0.06314362872264985, acc:0.6588479981512508


 40%|███▉      | 17310/43738 [2:12:35<3:17:38,  2.23it/s]

step:9280, train_loss:0.06313998102755079, acc:0.6588677065280185


 40%|███▉      | 17311/43738 [2:12:35<3:29:22,  2.10it/s]

step:9280, train_loss:0.06313816117114081, acc:0.6588874126278089


 40%|████      | 17616/43738 [2:15:01<3:52:58,  1.87it/s]

step:9300, train_loss:0.06320603102765375, acc:0.6586058128973661


 40%|████      | 17617/43738 [2:15:01<3:17:34,  2.20it/s]

step:9300, train_loss:0.0632054959891497, acc:0.6586251915763184


 40%|████      | 17618/43738 [2:15:02<3:45:32,  1.93it/s]

step:9300, train_loss:0.06320694251228115, acc:0.6585878079237144


 40%|████      | 17619/43738 [2:15:02<3:23:22,  2.14it/s]

step:9300, train_loss:0.06320367831637243, acc:0.6586071854248254


 40%|████      | 17620/43738 [2:15:02<2:55:40,  2.48it/s]

step:9300, train_loss:0.06320015971917393, acc:0.6586265607264472


 40%|████      | 17621/43738 [2:15:03<3:09:13,  2.30it/s]

step:9300, train_loss:0.06319664412001544, acc:0.6586459338289541


 40%|████      | 17622/43738 [2:15:04<4:00:08,  1.81it/s]

step:9300, train_loss:0.06319487476652305, acc:0.6586653047327204


 40%|████      | 17623/43738 [2:15:04<4:08:48,  1.75it/s]

step:9300, train_loss:0.06319210782453528, acc:0.6586846734381206


 40%|████      | 17624/43738 [2:15:05<3:44:24,  1.94it/s]

step:9300, train_loss:0.06318852553552502, acc:0.6587040399455288


 40%|████      | 17625/43738 [2:15:05<3:20:41,  2.17it/s]

step:9300, train_loss:0.06318554860549642, acc:0.6587234042553192


 40%|████      | 17626/43738 [2:15:06<3:34:11,  2.03it/s]

step:9300, train_loss:0.06318945348294938, acc:0.6586860319981845


 40%|████      | 17627/43738 [2:15:06<3:50:41,  1.89it/s]

step:9300, train_loss:0.06318629288833877, acc:0.6587053951324673


 40%|████      | 17628/43738 [2:15:07<3:34:38,  2.03it/s]

step:9300, train_loss:0.06318369430829891, acc:0.6587247560698888


 40%|████      | 17629/43738 [2:15:07<3:18:01,  2.20it/s]

step:9300, train_loss:0.0631802081787651, acc:0.6587441148108231


 40%|████      | 17630/43738 [2:15:08<3:25:33,  2.12it/s]

step:9300, train_loss:0.06318365455913726, acc:0.6587067498581962


 40%|████      | 17631/43738 [2:15:08<3:43:54,  1.94it/s]

step:9300, train_loss:0.06319224047173869, acc:0.6586693891441211


 41%|████      | 17936/43738 [2:17:25<4:04:27,  1.76it/s]

step:9320, train_loss:0.06315506289908555, acc:0.6582292595896521


 41%|████      | 17937/43738 [2:17:25<3:56:57,  1.81it/s]

step:9320, train_loss:0.06315167813107722, acc:0.6582483135418409


 41%|████      | 17938/43738 [2:17:26<3:38:03,  1.97it/s]

step:9320, train_loss:0.06316282990377695, acc:0.6582116177946259


 41%|████      | 17939/43738 [2:17:26<3:23:34,  2.11it/s]

step:9320, train_loss:0.0631593250240206, acc:0.6582306706059423


 41%|████      | 17940/43738 [2:17:26<2:59:07,  2.40it/s]

step:9320, train_loss:0.06315633170919907, acc:0.6582497212931996


 41%|████      | 17941/43738 [2:17:27<2:47:37,  2.56it/s]

step:9320, train_loss:0.06315435227820237, acc:0.6582687698567526


 41%|████      | 17942/43738 [2:17:27<2:57:31,  2.42it/s]

step:9320, train_loss:0.06315455596131428, acc:0.6582320811503735


 41%|████      | 17943/43738 [2:17:28<3:17:21,  2.18it/s]

step:9320, train_loss:0.06315404113960135, acc:0.6582511285738171


 41%|████      | 17944/43738 [2:17:28<3:10:42,  2.25it/s]

step:9320, train_loss:0.06315106363918714, acc:0.6582701738742756


 41%|████      | 17945/43738 [2:17:28<2:56:28,  2.44it/s]

step:9320, train_loss:0.06314756074849844, acc:0.6582892170521036


 41%|████      | 17946/43738 [2:17:29<2:34:21,  2.78it/s]

step:9320, train_loss:0.06314407249504952, acc:0.6583082581076563


 41%|████      | 17947/43738 [2:17:29<2:24:38,  2.97it/s]

step:9320, train_loss:0.06314066708626284, acc:0.6583272970412882


 41%|████      | 17948/43738 [2:17:29<2:15:36,  3.17it/s]

step:9320, train_loss:0.06314259214232014, acc:0.6582906173389793


 41%|████      | 17949/43738 [2:17:30<2:16:53,  3.14it/s]

step:9320, train_loss:0.06313911137948022, acc:0.6583096551339908


 41%|████      | 17950/43738 [2:17:30<3:16:26,  2.19it/s]

step:9320, train_loss:0.06313701891771603, acc:0.6583286908077994


 41%|████      | 17951/43738 [2:17:31<3:29:35,  2.05it/s]

step:9320, train_loss:0.06313694535771508, acc:0.6583477243607598


 42%|████▏     | 18256/43738 [2:19:51<3:13:10,  2.20it/s]

step:9340, train_loss:0.06317182447311463, acc:0.6577563540753725


 42%|████▏     | 18257/43738 [2:19:52<3:58:35,  1.78it/s]

step:9340, train_loss:0.06317435245190584, acc:0.6577203264501287


 42%|████▏     | 18258/43738 [2:19:53<4:04:17,  1.74it/s]

step:9340, train_loss:0.06317098619893508, acc:0.6577390732829445


 42%|████▏     | 18259/43738 [2:19:53<4:04:39,  1.74it/s]

step:9340, train_loss:0.06317189886823173, acc:0.6577030505504134


 42%|████▏     | 18260/43738 [2:19:53<3:28:00,  2.04it/s]

step:9340, train_loss:0.06317740331607223, acc:0.6576670317634173


 42%|████▏     | 18261/43738 [2:19:54<3:18:31,  2.14it/s]

step:9340, train_loss:0.06317439215397519, acc:0.6576857784349159


 42%|████▏     | 18262/43738 [2:19:54<3:12:14,  2.21it/s]

step:9340, train_loss:0.06317276656417863, acc:0.6577045230533348


 42%|████▏     | 18263/43738 [2:19:55<3:04:51,  2.30it/s]

step:9340, train_loss:0.06316930766662968, acc:0.6577232656190111


 42%|████▏     | 18264/43738 [2:19:55<3:07:06,  2.27it/s]

step:9340, train_loss:0.06316706280271665, acc:0.6577420061322821


 42%|████▏     | 18265/43738 [2:19:55<2:41:17,  2.63it/s]

step:9340, train_loss:0.063164056512963, acc:0.6577607445934848


 42%|████▏     | 18266/43738 [2:19:56<2:22:57,  2.97it/s]

step:9340, train_loss:0.06316061048191304, acc:0.6577794810029564


 42%|████▏     | 18267/43738 [2:19:56<2:38:23,  2.68it/s]

step:9340, train_loss:0.06317572550065434, acc:0.6577434718344556


 42%|████▏     | 18268/43738 [2:19:56<2:31:05,  2.81it/s]

step:9340, train_loss:0.06317480281291378, acc:0.6577622071381651


 42%|████▏     | 18269/43738 [2:19:57<3:18:59,  2.13it/s]

step:9340, train_loss:0.06317923893546028, acc:0.6577262028572992


 42%|████▏     | 18270/43738 [2:19:58<3:22:01,  2.10it/s]

step:9340, train_loss:0.06317663686814183, acc:0.6577449370552819


 42%|████▏     | 18271/43738 [2:19:58<3:15:15,  2.17it/s]

step:9340, train_loss:0.06318081986057664, acc:0.6577089376607739


 42%|████▏     | 18576/43738 [2:22:23<4:07:02,  1.70it/s]

step:9360, train_loss:0.06312001252865557, acc:0.657730404823428


 42%|████▏     | 18577/43738 [2:22:24<4:24:37,  1.58it/s]

step:9360, train_loss:0.063122267834814, acc:0.6576949991925499


 42%|████▏     | 18578/43738 [2:22:24<3:57:11,  1.77it/s]

step:9360, train_loss:0.06311960856411672, acc:0.6577134244805685


 42%|████▏     | 18579/43738 [2:22:25<4:03:46,  1.72it/s]

step:9360, train_loss:0.0631174354237894, acc:0.6577318477851337


 42%|████▏     | 18580/43738 [2:22:25<3:38:32,  1.92it/s]

step:9360, train_loss:0.0631140887854786, acc:0.6577502691065662


 42%|████▏     | 18581/43738 [2:22:26<3:12:49,  2.17it/s]

step:9360, train_loss:0.06311237083035531, acc:0.657768688445186


 42%|████▏     | 18582/43738 [2:22:26<2:59:39,  2.33it/s]

step:9360, train_loss:0.06311413975102514, acc:0.657733290280917


 42%|████▏     | 18583/43738 [2:22:26<2:51:49,  2.44it/s]

step:9360, train_loss:0.06311253443559381, acc:0.657751708550826


 42%|████▏     | 18584/43738 [2:22:27<2:33:34,  2.73it/s]

step:9360, train_loss:0.06310981716868647, acc:0.6577701248385708


 42%|████▏     | 18585/43738 [2:22:27<2:21:47,  2.96it/s]

step:9360, train_loss:0.06310647056180393, acc:0.6577885391444713


 42%|████▏     | 18586/43738 [2:22:28<2:50:13,  2.46it/s]

step:9360, train_loss:0.06310337671358236, acc:0.6578069514688475


 42%|████▏     | 18587/43738 [2:22:28<2:54:54,  2.40it/s]

step:9360, train_loss:0.063106534432203, acc:0.6577715607682789


 42%|████▏     | 18588/43738 [2:22:28<2:56:54,  2.37it/s]

step:9360, train_loss:0.063105858398364, acc:0.6577899720249624


 43%|████▎     | 18589/43738 [2:22:29<3:41:18,  1.89it/s]

step:9360, train_loss:0.06310360888457774, acc:0.6578083813007692


 43%|████▎     | 18590/43738 [2:22:30<3:41:12,  1.89it/s]

step:9360, train_loss:0.06310053175430674, acc:0.6578267885960194


 43%|████▎     | 18591/43738 [2:22:30<3:25:48,  2.04it/s]

step:9360, train_loss:0.06309894528199075, acc:0.6578451939110322


 43%|████▎     | 18896/43738 [2:24:48<3:16:19,  2.11it/s]

step:9380, train_loss:0.0631160114071024, acc:0.6580228619813717


 43%|████▎     | 18897/43738 [2:24:49<3:44:04,  1.85it/s]

step:9380, train_loss:0.06311404047495711, acc:0.6580409588823622


 43%|████▎     | 18898/43738 [2:24:50<3:50:41,  1.79it/s]

step:9380, train_loss:0.0631109744301034, acc:0.6580590538681342


 43%|████▎     | 18899/43738 [2:24:50<3:34:07,  1.93it/s]

step:9380, train_loss:0.06311217418938186, acc:0.6580771469389914


 43%|████▎     | 18900/43738 [2:24:50<3:09:36,  2.18it/s]

step:9380, train_loss:0.0631088778630568, acc:0.6580952380952381


 43%|████▎     | 18901/43738 [2:24:51<3:37:49,  1.90it/s]

step:9380, train_loss:0.06310607110891962, acc:0.658113327337178


 43%|████▎     | 18902/43738 [2:24:51<3:01:29,  2.28it/s]

step:9380, train_loss:0.06310273252453187, acc:0.6581314146651148


 43%|████▎     | 18903/43738 [2:24:52<3:00:43,  2.29it/s]

step:9380, train_loss:0.06309975573817576, acc:0.6581495000793525


 43%|████▎     | 18904/43738 [2:24:52<3:06:25,  2.22it/s]

step:9380, train_loss:0.06309728550619835, acc:0.6581675835801947


 43%|████▎     | 18905/43738 [2:24:53<3:15:44,  2.11it/s]

step:9380, train_loss:0.06311097030451962, acc:0.6581327691087014


 43%|████▎     | 18906/43738 [2:24:53<3:10:25,  2.17it/s]

step:9380, train_loss:0.06310765814389219, acc:0.6581508515815085


 43%|████▎     | 18907/43738 [2:24:54<3:12:38,  2.15it/s]

step:9380, train_loss:0.063107449324891, acc:0.6581160416776856


 43%|████▎     | 18908/43738 [2:24:54<2:46:36,  2.48it/s]

step:9380, train_loss:0.06310446529309717, acc:0.6581341231224879


 43%|████▎     | 18909/43738 [2:24:54<2:26:13,  2.83it/s]

step:9380, train_loss:0.06310795503410833, acc:0.6580993177851817


 43%|████▎     | 18910/43738 [2:24:55<3:01:31,  2.28it/s]

step:9380, train_loss:0.06310892736941312, acc:0.6580645161290323


 43%|████▎     | 18911/43738 [2:24:55<2:41:03,  2.57it/s]

step:9380, train_loss:0.06310588110478434, acc:0.6580825974300671


 44%|████▍     | 19216/43738 [2:27:21<3:02:36,  2.24it/s]

step:9400, train_loss:0.06306214938999888, acc:0.6579412989175687


 44%|████▍     | 19217/43738 [2:27:22<3:21:02,  2.03it/s]

step:9400, train_loss:0.0630632601067459, acc:0.6579070614560025


 44%|████▍     | 19218/43738 [2:27:22<3:17:12,  2.07it/s]

step:9400, train_loss:0.0630654301085077, acc:0.6578728275574982


 44%|████▍     | 19219/43738 [2:27:23<3:31:56,  1.93it/s]

step:9400, train_loss:0.06306724714972092, acc:0.6578906290649877


 44%|████▍     | 19220/43738 [2:27:23<3:31:31,  1.93it/s]

step:9400, train_loss:0.06307660937926775, acc:0.657856399583767


 44%|████▍     | 19221/43738 [2:27:24<3:35:53,  1.89it/s]

step:9400, train_loss:0.06308368392001829, acc:0.6578221736642215


 44%|████▍     | 19222/43738 [2:27:25<3:47:57,  1.79it/s]

step:9400, train_loss:0.06308096362446268, acc:0.657839975028613


 44%|████▍     | 19223/43738 [2:27:25<3:26:08,  1.98it/s]

step:9400, train_loss:0.06308533072760358, acc:0.6578057535244238


 44%|████▍     | 19224/43738 [2:27:25<3:35:16,  1.90it/s]

step:9400, train_loss:0.0630938999565403, acc:0.6577715355805244


 44%|████▍     | 19225/43738 [2:27:26<3:18:28,  2.06it/s]

step:9400, train_loss:0.06309572084592195, acc:0.6577373211963589


 44%|████▍     | 19226/43738 [2:27:26<3:15:49,  2.09it/s]

step:9400, train_loss:0.06309243905455371, acc:0.6577551232705711


 44%|████▍     | 19227/43738 [2:27:27<2:56:17,  2.32it/s]

step:9400, train_loss:0.0630897891868697, acc:0.6577729234930046


 44%|████▍     | 19228/43738 [2:27:27<2:32:50,  2.67it/s]

step:9400, train_loss:0.06308651160944372, acc:0.6577907218639484


 44%|████▍     | 19229/43738 [2:27:27<2:34:59,  2.64it/s]

step:9400, train_loss:0.0630922506851416, acc:0.6577565135992511


 44%|████▍     | 19230/43738 [2:27:28<2:52:10,  2.37it/s]

step:9400, train_loss:0.06310168400799308, acc:0.6577223088923557


 44%|████▍     | 19231/43738 [2:27:29<3:32:53,  1.92it/s]

step:9400, train_loss:0.0631030491837684, acc:0.6576881077427071


 45%|████▍     | 19536/43738 [2:29:51<2:43:28,  2.47it/s]

step:9420, train_loss:0.06314390655553533, acc:0.6572993447993448


 45%|████▍     | 19537/43738 [2:29:52<3:15:49,  2.06it/s]

step:9420, train_loss:0.06314312472101208, acc:0.6573168859087885


 45%|████▍     | 19538/43738 [2:29:52<3:14:52,  2.07it/s]

step:9420, train_loss:0.06314065561860768, acc:0.6573344252226431


 45%|████▍     | 19539/43738 [2:29:53<3:04:51,  2.18it/s]

step:9420, train_loss:0.06313759675723662, acc:0.6573519627411843


 45%|████▍     | 19540/43738 [2:29:53<2:41:19,  2.50it/s]

step:9420, train_loss:0.0631379669371647, acc:0.6573183213920164


 45%|████▍     | 19541/43738 [2:29:53<2:39:25,  2.53it/s]

step:9420, train_loss:0.06313477977870048, acc:0.6573358579397165


 45%|████▍     | 19542/43738 [2:29:54<3:25:28,  1.96it/s]

step:9420, train_loss:0.0631334643144158, acc:0.6573533926926619


 45%|████▍     | 19543/43738 [2:29:54<3:00:49,  2.23it/s]

step:9420, train_loss:0.06313824537209868, acc:0.657319756434529


 45%|████▍     | 19544/43738 [2:29:55<2:36:13,  2.58it/s]

step:9420, train_loss:0.06313501847477061, acc:0.6573372902169464


 45%|████▍     | 19545/43738 [2:29:55<2:27:40,  2.73it/s]

step:9420, train_loss:0.0631318112177805, acc:0.6573548222051676


 45%|████▍     | 19546/43738 [2:29:56<3:17:37,  2.04it/s]

step:9420, train_loss:0.06313932872466187, acc:0.6573211910365292


 45%|████▍     | 19547/43738 [2:29:56<2:52:04,  2.34it/s]

step:9420, train_loss:0.06313985352147881, acc:0.6573387220545353


 45%|████▍     | 19548/43738 [2:29:56<2:54:41,  2.31it/s]

step:9420, train_loss:0.06313669526972715, acc:0.6573562512789032


 45%|████▍     | 19549/43738 [2:29:57<2:40:37,  2.51it/s]

step:9420, train_loss:0.06313346564568452, acc:0.6573737787099084


 45%|████▍     | 19550/43738 [2:29:57<2:52:22,  2.34it/s]

step:9420, train_loss:0.06313033375532671, acc:0.6573913043478261


 45%|████▍     | 19551/43738 [2:29:58<3:08:29,  2.14it/s]

step:9420, train_loss:0.06312749444543539, acc:0.6574088281929313


 45%|████▌     | 19856/43738 [2:32:28<3:58:43,  1.67it/s]

step:9440, train_loss:0.06319943538304126, acc:0.6567284448025785


 45%|████▌     | 19857/43738 [2:32:29<4:24:29,  1.50it/s]

step:9440, train_loss:0.06320028524727495, acc:0.6567457319836834


 45%|████▌     | 19858/43738 [2:32:29<4:04:40,  1.63it/s]

step:9440, train_loss:0.06320316645909338, acc:0.6567126598851848


 45%|████▌     | 19859/43738 [2:32:30<4:08:49,  1.60it/s]

step:9440, train_loss:0.06320347751983739, acc:0.6566795911173775


 45%|████▌     | 19860/43738 [2:32:30<3:52:17,  1.71it/s]

step:9440, train_loss:0.06321357262618538, acc:0.6566465256797583


 45%|████▌     | 19861/43738 [2:32:31<4:27:57,  1.49it/s]

step:9440, train_loss:0.06321445965638946, acc:0.6566638135038517


 45%|████▌     | 19862/43738 [2:32:32<4:51:55,  1.36it/s]

step:9440, train_loss:0.06321889693247042, acc:0.6566307521901118


 45%|████▌     | 19863/43738 [2:32:33<4:15:29,  1.56it/s]

step:9440, train_loss:0.06321744267474565, acc:0.6566480390676132


 45%|████▌     | 19864/43738 [2:32:33<3:43:22,  1.78it/s]

step:9440, train_loss:0.06322056054411115, acc:0.6566149818767619


 45%|████▌     | 19865/43738 [2:32:33<3:38:30,  1.82it/s]

step:9440, train_loss:0.06321742392806787, acc:0.656632267807702


 45%|████▌     | 19866/43738 [2:32:34<4:12:33,  1.58it/s]

step:9440, train_loss:0.06321881940575827, acc:0.6565992147387496


 45%|████▌     | 19867/43738 [2:32:35<4:26:18,  1.49it/s]

step:9440, train_loss:0.06322535953031623, acc:0.6565661649972316


 45%|████▌     | 19868/43738 [2:32:35<3:52:00,  1.71it/s]

step:9440, train_loss:0.06322231896288423, acc:0.6565834507751158


 45%|████▌     | 19869/43738 [2:32:36<3:10:19,  2.09it/s]

step:9440, train_loss:0.0632191388470399, acc:0.6566007348130253


 45%|████▌     | 19870/43738 [2:32:36<3:19:51,  1.99it/s]

step:9440, train_loss:0.06321664949175461, acc:0.6566180171112229


 45%|████▌     | 19871/43738 [2:32:36<2:48:04,  2.37it/s]

step:9440, train_loss:0.06321364839929002, acc:0.6566352976699713


 46%|████▌     | 20176/43738 [2:34:59<3:31:52,  1.85it/s]

step:9460, train_loss:0.06312524068838003, acc:0.6570182394924663


 46%|████▌     | 20177/43738 [2:35:00<3:07:55,  2.09it/s]

step:9460, train_loss:0.06312213009059425, acc:0.6570352381424394


 46%|████▌     | 20178/43738 [2:35:00<3:03:51,  2.14it/s]

step:9460, train_loss:0.0631232063022269, acc:0.6570026761819804


 46%|████▌     | 20179/43738 [2:35:01<3:18:21,  1.98it/s]

step:9460, train_loss:0.0631212853289032, acc:0.6570196739184301


 46%|████▌     | 20180/43738 [2:35:01<3:09:54,  2.07it/s]

step:9460, train_loss:0.06311833120489249, acc:0.6570366699702676


 46%|████▌     | 20181/43738 [2:35:02<3:35:25,  1.82it/s]

step:9460, train_loss:0.06312076720730646, acc:0.6570041127793469


 46%|████▌     | 20182/43738 [2:35:02<3:09:00,  2.08it/s]

step:9460, train_loss:0.0631206722605937, acc:0.6569715588147854


 46%|████▌     | 20183/43738 [2:35:03<3:32:50,  1.84it/s]

step:9460, train_loss:0.06312471993100623, acc:0.6569390080761036


 46%|████▌     | 20184/43738 [2:35:03<3:14:15,  2.02it/s]

step:9460, train_loss:0.06312814700447432, acc:0.6569064605628221


 46%|████▌     | 20185/43738 [2:35:04<3:36:28,  1.81it/s]

step:9460, train_loss:0.06313543677785816, acc:0.6568739162744612


 46%|████▌     | 20186/43738 [2:35:05<4:00:40,  1.63it/s]

step:9460, train_loss:0.06313374921549147, acc:0.6568909144951947


 46%|████▌     | 20187/43738 [2:35:05<3:59:48,  1.64it/s]

step:9460, train_loss:0.06315447186390746, acc:0.6568583742012186


 46%|████▌     | 20188/43738 [2:35:06<3:25:12,  1.91it/s]

step:9460, train_loss:0.06315300454028824, acc:0.6568753715078264


 46%|████▌     | 20189/43738 [2:35:06<2:48:12,  2.33it/s]

step:9460, train_loss:0.06314987748954623, acc:0.6568923671306157


 46%|████▌     | 20190/43738 [2:35:06<2:56:04,  2.23it/s]

step:9460, train_loss:0.06315255876416453, acc:0.6568598315998019


 46%|████▌     | 20191/43738 [2:35:07<2:34:59,  2.53it/s]

step:9460, train_loss:0.06315045081740206, acc:0.6568768263087514


 47%|████▋     | 20496/43738 [2:37:24<2:53:13,  2.24it/s]

step:9480, train_loss:0.06309148686562875, acc:0.6568110850897736


 47%|████▋     | 20497/43738 [2:37:25<3:07:34,  2.06it/s]

step:9480, train_loss:0.0630961408144301, acc:0.6567790408352442


 47%|████▋     | 20498/43738 [2:37:25<3:05:51,  2.08it/s]

step:9480, train_loss:0.06309339689998669, acc:0.6567957849546298


 47%|████▋     | 20499/43738 [2:37:26<3:11:08,  2.03it/s]

step:9480, train_loss:0.06309038108117272, acc:0.6568125274403629


 47%|████▋     | 20500/43738 [2:37:27<3:25:23,  1.89it/s]

step:9480, train_loss:0.06309243796201049, acc:0.6567804878048781


 47%|████▋     | 20501/43738 [2:37:27<2:57:26,  2.18it/s]

step:9480, train_loss:0.06309481498916865, acc:0.6567484512950588


 47%|████▋     | 20502/43738 [2:37:27<3:02:42,  2.12it/s]

step:9480, train_loss:0.06309176079379437, acc:0.6567651936396449


 47%|████▋     | 20503/43738 [2:37:28<3:20:25,  1.93it/s]

step:9480, train_loss:0.06308904376739136, acc:0.6567819343510706


 47%|████▋     | 20504/43738 [2:37:28<3:09:36,  2.04it/s]

step:9480, train_loss:0.06309257156856994, acc:0.656749902458057


 47%|████▋     | 20505/43738 [2:37:29<2:40:41,  2.41it/s]

step:9480, train_loss:0.06308951773941623, acc:0.6567666422823701


 47%|████▋     | 20506/43738 [2:37:29<3:19:40,  1.94it/s]

step:9480, train_loss:0.06308994211449834, acc:0.6567833804740076


 47%|████▋     | 20507/43738 [2:37:30<2:49:22,  2.29it/s]

step:9480, train_loss:0.06308706581737195, acc:0.6568001170332082


 47%|████▋     | 20508/43738 [2:37:30<2:30:50,  2.57it/s]

step:9480, train_loss:0.06308497794160717, acc:0.6568168519602107


 47%|████▋     | 20509/43738 [2:37:30<2:13:10,  2.91it/s]

step:9480, train_loss:0.06308199926597982, acc:0.6568335852552538


 47%|████▋     | 20510/43738 [2:37:30<2:05:12,  3.09it/s]

step:9480, train_loss:0.06309087572982074, acc:0.6568015602145295


 47%|████▋     | 20511/43738 [2:37:31<2:21:18,  2.74it/s]

step:9480, train_loss:0.06308891963330138, acc:0.6568182926234704


 48%|████▊     | 20816/43738 [2:39:50<3:40:20,  1.73it/s]

step:9500, train_loss:0.06312540447598453, acc:0.6565622598001537


 48%|████▊     | 20817/43738 [2:39:51<3:32:00,  1.80it/s]

step:9500, train_loss:0.06312644838963154, acc:0.6565787577460729


 48%|████▊     | 20818/43738 [2:39:51<4:03:44,  1.57it/s]

step:9500, train_loss:0.06312860275035716, acc:0.6565472187530023


 48%|████▊     | 20819/43738 [2:39:52<3:49:37,  1.66it/s]

step:9500, train_loss:0.06312719132658616, acc:0.6565637158364955


 48%|████▊     | 20820/43738 [2:39:52<3:14:26,  1.96it/s]

step:9500, train_loss:0.06312586756554349, acc:0.6565802113352546


 48%|████▊     | 20821/43738 [2:39:53<2:52:23,  2.22it/s]

step:9500, train_loss:0.06312283575911168, acc:0.6565967052495078


 48%|████▊     | 20822/43738 [2:39:53<3:11:06,  2.00it/s]

step:9500, train_loss:0.06312126743923348, acc:0.6566131975794832


 48%|████▊     | 20823/43738 [2:39:54<3:11:56,  1.99it/s]

step:9500, train_loss:0.06311840056219382, acc:0.6566296883254094


 48%|████▊     | 20824/43738 [2:39:54<2:55:44,  2.17it/s]

step:9500, train_loss:0.06312837398425931, acc:0.6565981559738763


 48%|████▊     | 20825/43738 [2:39:55<2:55:09,  2.18it/s]

step:9500, train_loss:0.06312720091299893, acc:0.6566146458583433


 48%|████▊     | 20826/43738 [2:39:55<2:56:33,  2.16it/s]

step:9500, train_loss:0.06312423275728285, acc:0.656631134159224


 48%|████▊     | 20827/43738 [2:39:56<3:13:20,  1.98it/s]

step:9500, train_loss:0.06312289699338877, acc:0.6566476208767466


 48%|████▊     | 20828/43738 [2:39:56<2:47:38,  2.28it/s]

step:9500, train_loss:0.06312181653621139, acc:0.6566641060111389


 48%|████▊     | 20829/43738 [2:39:56<2:44:48,  2.32it/s]

step:9500, train_loss:0.06312200406920913, acc:0.656680589562629


 48%|████▊     | 20830/43738 [2:39:57<2:36:48,  2.43it/s]

step:9500, train_loss:0.06312204960825477, acc:0.656649063850216


 48%|████▊     | 20831/43738 [2:39:57<2:22:13,  2.68it/s]

step:9500, train_loss:0.06311902032916149, acc:0.6566655465412126


 48%|████▊     | 21136/43738 [2:42:12<3:18:44,  1.90it/s]

step:9520, train_loss:0.06302278722207126, acc:0.657503785011355


 48%|████▊     | 21137/43738 [2:42:13<2:56:39,  2.13it/s]

step:9520, train_loss:0.06301986834024971, acc:0.6575199886455031


 48%|████▊     | 21138/43738 [2:42:13<2:35:25,  2.42it/s]

step:9520, train_loss:0.06301691908140913, acc:0.6575361907465228


 48%|████▊     | 21139/43738 [2:42:13<2:29:41,  2.52it/s]

step:9520, train_loss:0.06301667924024296, acc:0.657505085387199


 48%|████▊     | 21140/43738 [2:42:14<2:12:16,  2.85it/s]

step:9520, train_loss:0.06301372870053669, acc:0.6575212866603595


 48%|████▊     | 21141/43738 [2:42:14<2:25:19,  2.59it/s]

step:9520, train_loss:0.06301213081778342, acc:0.6575374864008325


 48%|████▊     | 21142/43738 [2:42:14<2:09:43,  2.90it/s]

step:9520, train_loss:0.06301294577744444, acc:0.6575063853940024


 48%|████▊     | 21143/43738 [2:42:15<2:01:25,  3.10it/s]

step:9520, train_loss:0.06301672846134627, acc:0.6574752873291396


 48%|████▊     | 21144/43738 [2:42:15<2:26:23,  2.57it/s]

step:9520, train_loss:0.06301412589781168, acc:0.6574914869466515


 48%|████▊     | 21145/43738 [2:42:15<2:21:25,  2.66it/s]

step:9520, train_loss:0.06301529994130663, acc:0.6574603925277843


 48%|████▊     | 21146/43738 [2:42:16<2:23:22,  2.63it/s]

step:9520, train_loss:0.06301655857945511, acc:0.6574765913175069


 48%|████▊     | 21147/43738 [2:42:16<2:49:33,  2.22it/s]

step:9520, train_loss:0.06301378007226949, acc:0.6574927885752117


 48%|████▊     | 21148/43738 [2:42:17<2:57:11,  2.12it/s]

step:9520, train_loss:0.06301146829451196, acc:0.6575089843011159


 48%|████▊     | 21149/43738 [2:42:17<2:33:41,  2.45it/s]

step:9520, train_loss:0.06300849336983429, acc:0.6575251784954371


 48%|████▊     | 21150/43738 [2:42:18<3:16:53,  1.91it/s]

step:9520, train_loss:0.06301306693680116, acc:0.6574940898345154


 48%|████▊     | 21151/43738 [2:42:18<3:11:12,  1.97it/s]

step:9520, train_loss:0.0630189124279887, acc:0.6574630041132807


 49%|████▉     | 21456/43738 [2:44:44<3:33:21,  1.74it/s]

step:9540, train_loss:0.0631186488891562, acc:0.6569258016405667


 49%|████▉     | 21457/43738 [2:44:44<3:16:19,  1.89it/s]

step:9540, train_loss:0.06311705322610288, acc:0.65694179055786


 49%|████▉     | 21458/43738 [2:44:45<3:03:09,  2.03it/s]

step:9540, train_loss:0.06311543960614774, acc:0.6569577779849007


 49%|████▉     | 21459/43738 [2:44:45<3:04:20,  2.01it/s]

step:9540, train_loss:0.06311487030564705, acc:0.6569737639218975


 49%|████▉     | 21460/43738 [2:44:46<3:26:56,  1.79it/s]

step:9540, train_loss:0.06311274347718072, acc:0.6569897483690588


 49%|████▉     | 21461/43738 [2:44:46<3:13:43,  1.92it/s]

step:9540, train_loss:0.06311128601466466, acc:0.6570057313265925


 49%|████▉     | 21462/43738 [2:44:47<2:53:59,  2.13it/s]

step:9540, train_loss:0.06310844424914382, acc:0.6570217127947069


 49%|████▉     | 21463/43738 [2:44:47<3:37:14,  1.71it/s]

step:9540, train_loss:0.06310826348985754, acc:0.6570376927736103


 49%|████▉     | 21464/43738 [2:44:48<3:35:01,  1.73it/s]

step:9540, train_loss:0.06311420865893472, acc:0.6570070816250466


 49%|████▉     | 21465/43738 [2:44:48<3:14:29,  1.91it/s]

step:9540, train_loss:0.06311169112573421, acc:0.6570230607966457


 49%|████▉     | 21466/43738 [2:44:49<3:03:40,  2.02it/s]

step:9540, train_loss:0.06310890784698928, acc:0.6570390384794559


 49%|████▉     | 21467/43738 [2:44:49<2:43:27,  2.27it/s]

step:9540, train_loss:0.06310793101529942, acc:0.6570550146736852


 49%|████▉     | 21468/43738 [2:44:49<2:31:00,  2.46it/s]

step:9540, train_loss:0.06310615091683609, acc:0.6570709893795417


 49%|████▉     | 21469/43738 [2:44:50<2:21:34,  2.62it/s]

step:9540, train_loss:0.06310645621462277, acc:0.6570403838092133


 49%|████▉     | 21470/43738 [2:44:50<2:14:52,  2.75it/s]

step:9540, train_loss:0.06310863189757272, acc:0.6570097810898928


 49%|████▉     | 21471/43738 [2:44:50<2:14:30,  2.76it/s]

step:9540, train_loss:0.06310681684952975, acc:0.6570257556704392


 50%|████▉     | 21776/43738 [2:47:13<3:28:54,  1.75it/s]

step:9560, train_loss:0.06315926958859663, acc:0.6569617927994122


 50%|████▉     | 21777/43738 [2:47:13<3:46:23,  1.62it/s]

step:9560, train_loss:0.06315643337229125, acc:0.6569775451164073


 50%|████▉     | 21778/43738 [2:47:14<3:19:56,  1.83it/s]

step:9560, train_loss:0.06315714789487753, acc:0.6569932959867757


 50%|████▉     | 21779/43738 [2:47:14<3:38:45,  1.67it/s]

step:9560, train_loss:0.0631542850803894, acc:0.6570090454107167


 50%|████▉     | 21780/43738 [2:47:15<3:24:17,  1.79it/s]

step:9560, train_loss:0.06315139646612138, acc:0.6570247933884298


 50%|████▉     | 21781/43738 [2:47:16<3:38:59,  1.67it/s]

step:9560, train_loss:0.06314984984416602, acc:0.6570405399201139


 50%|████▉     | 21782/43738 [2:47:16<3:21:52,  1.81it/s]

step:9560, train_loss:0.06314697742767429, acc:0.6570562850059682


 50%|████▉     | 21783/43738 [2:47:16<2:52:42,  2.12it/s]

step:9560, train_loss:0.06314481035335458, acc:0.657072028646192


 50%|████▉     | 21784/43738 [2:47:17<3:11:36,  1.91it/s]

step:9560, train_loss:0.0631432044491861, acc:0.6570877708409842


 50%|████▉     | 21785/43738 [2:47:17<2:41:32,  2.27it/s]

step:9560, train_loss:0.0631407624442084, acc:0.6571035115905439


 50%|████▉     | 21786/43738 [2:47:18<2:55:30,  2.08it/s]

step:9560, train_loss:0.06313788039517784, acc:0.6571192508950703


 50%|████▉     | 21787/43738 [2:47:18<2:39:31,  2.29it/s]

step:9560, train_loss:0.06313814062591304, acc:0.6570890898242071


 50%|████▉     | 21788/43738 [2:47:18<2:20:25,  2.61it/s]

step:9560, train_loss:0.06313603157138403, acc:0.6571048283458785


 50%|████▉     | 21789/43738 [2:47:19<2:05:17,  2.92it/s]

step:9560, train_loss:0.06313315487629208, acc:0.6571205654229199


 50%|████▉     | 21790/43738 [2:47:19<2:17:54,  2.65it/s]

step:9560, train_loss:0.06313323731869004, acc:0.6570904084442405


 50%|████▉     | 21791/43738 [2:47:19<2:18:43,  2.64it/s]

step:9560, train_loss:0.06313608822870982, acc:0.6570602542333991


 51%|█████     | 22096/43738 [2:49:37<2:45:47,  2.18it/s]

step:9580, train_loss:0.0630407009960052, acc:0.6572230267921796


 51%|█████     | 22097/43738 [2:49:38<2:47:14,  2.16it/s]

step:9580, train_loss:0.06303825089678204, acc:0.6572385391682128


 51%|█████     | 22098/43738 [2:49:38<2:26:30,  2.46it/s]

step:9580, train_loss:0.06303559162690639, acc:0.6572540501402842


 51%|█████     | 22099/43738 [2:49:39<3:06:16,  1.94it/s]

step:9580, train_loss:0.06304057822140847, acc:0.657224308792253


 51%|█████     | 22100/43738 [2:49:39<2:36:16,  2.31it/s]

step:9580, train_loss:0.06303775098189296, acc:0.6572398190045249


 51%|█████     | 22101/43738 [2:49:39<2:50:41,  2.11it/s]

step:9580, train_loss:0.06303491230707334, acc:0.6572553278132212


 51%|█████     | 22102/43738 [2:49:40<2:43:34,  2.20it/s]

step:9580, train_loss:0.06303209047140221, acc:0.6572708352185322


 51%|█████     | 22103/43738 [2:49:40<2:25:35,  2.48it/s]

step:9580, train_loss:0.06302993117497206, acc:0.6572863412206488


 51%|█████     | 22104/43738 [2:49:40<2:12:47,  2.72it/s]

step:9580, train_loss:0.06302708045887565, acc:0.6573018458197611


 51%|█████     | 22105/43738 [2:49:41<2:36:58,  2.30it/s]

step:9580, train_loss:0.06302600445969618, acc:0.6573173490160598


 51%|█████     | 22106/43738 [2:49:41<2:21:37,  2.55it/s]

step:9580, train_loss:0.0630232094764327, acc:0.6573328508097349


 51%|█████     | 22107/43738 [2:49:41<2:05:40,  2.87it/s]

step:9580, train_loss:0.06302042834762699, acc:0.6573483512009771


 51%|█████     | 22108/43738 [2:49:42<2:21:01,  2.56it/s]

step:9580, train_loss:0.06301806653760295, acc:0.6573638501899765


 51%|█████     | 22109/43738 [2:49:42<2:26:07,  2.47it/s]

step:9580, train_loss:0.06301561888439593, acc:0.6573793477769234


 51%|█████     | 22110/43738 [2:49:43<2:26:05,  2.47it/s]

step:9580, train_loss:0.06301843312632725, acc:0.6573496155585707


 51%|█████     | 22111/43738 [2:49:43<2:26:23,  2.46it/s]

step:9580, train_loss:0.06301563898233696, acc:0.6573651123874994


 51%|█████▏    | 22416/43738 [2:52:13<3:59:46,  1.48it/s]

step:9600, train_loss:0.06304379464372824, acc:0.6573429693076374


 51%|█████▏    | 22417/43738 [2:52:13<3:32:55,  1.67it/s]

step:9600, train_loss:0.06304527554512376, acc:0.6573136458937413


 51%|█████▏    | 22418/43738 [2:52:13<3:04:09,  1.93it/s]

step:9600, train_loss:0.063042486858613, acc:0.6573289321081274


 51%|█████▏    | 22419/43738 [2:52:14<3:22:56,  1.75it/s]

step:9600, train_loss:0.06304124505418565, acc:0.6573442169588296


 51%|█████▏    | 22420/43738 [2:52:15<3:39:32,  1.62it/s]

step:9600, train_loss:0.06305189687188498, acc:0.6573148974130241


 51%|█████▏    | 22421/43738 [2:52:15<3:15:15,  1.82it/s]

step:9600, train_loss:0.06304908620279658, acc:0.6573301815262477


 51%|█████▏    | 22422/43738 [2:52:16<3:04:11,  1.93it/s]

step:9600, train_loss:0.06304711410266288, acc:0.6573454642761574


 51%|█████▏    | 22423/43738 [2:52:16<3:19:25,  1.78it/s]

step:9600, train_loss:0.06304938575408052, acc:0.6573161485974223


 51%|█████▏    | 22424/43738 [2:52:17<3:28:46,  1.70it/s]

step:9600, train_loss:0.06305858908798, acc:0.6572868355333571


 51%|█████▏    | 22425/43738 [2:52:18<3:22:22,  1.76it/s]

step:9600, train_loss:0.06305653066497656, acc:0.6573021181716834


 51%|█████▏    | 22426/43738 [2:52:18<3:17:28,  1.80it/s]

step:9600, train_loss:0.06305386244406247, acc:0.6573173994470703


 51%|█████▏    | 22427/43738 [2:52:18<2:58:05,  1.99it/s]

step:9600, train_loss:0.06305107023536304, acc:0.6573326793597004


 51%|█████▏    | 22428/43738 [2:52:19<2:55:11,  2.03it/s]

step:9600, train_loss:0.0630482709619425, acc:0.6573479579097556


 51%|█████▏    | 22429/43738 [2:52:19<2:47:42,  2.12it/s]

step:9600, train_loss:0.06304917798029641, acc:0.6573632350974186


 51%|█████▏    | 22430/43738 [2:52:20<3:01:30,  1.96it/s]

step:9600, train_loss:0.06304729277100878, acc:0.6573785109228711


 51%|█████▏    | 22431/43738 [2:52:20<2:46:38,  2.13it/s]

step:9600, train_loss:0.06304451130582077, acc:0.6573937853862958


 52%|█████▏    | 22736/43738 [2:54:39<2:07:45,  2.74it/s]

step:9620, train_loss:0.06306851818717571, acc:0.6567997888810697


 52%|█████▏    | 22737/43738 [2:54:39<2:04:57,  2.80it/s]

step:9620, train_loss:0.06306792801375964, acc:0.6568148832299776


 52%|█████▏    | 22738/43738 [2:54:39<2:12:53,  2.63it/s]

step:9620, train_loss:0.06306532849871596, acc:0.6568299762512094


 52%|█████▏    | 22739/43738 [2:54:40<1:57:59,  2.97it/s]

step:9620, train_loss:0.0630633311621461, acc:0.6568450679449405


 52%|█████▏    | 22740/43738 [2:54:40<2:09:14,  2.71it/s]

step:9620, train_loss:0.06306121676501447, acc:0.6568601583113457


 52%|█████▏    | 22741/43738 [2:54:40<2:03:31,  2.83it/s]

step:9620, train_loss:0.06306171250064059, acc:0.6568312739105581


 52%|█████▏    | 22742/43738 [2:54:41<1:52:40,  3.11it/s]

step:9620, train_loss:0.06305894534027229, acc:0.6568463635564155


 52%|█████▏    | 22743/43738 [2:54:41<1:45:41,  3.31it/s]

step:9620, train_loss:0.063060710316289, acc:0.6568174823022468


 52%|█████▏    | 22744/43738 [2:54:42<2:37:20,  2.22it/s]

step:9620, train_loss:0.06305798532826241, acc:0.6568325712275765


 52%|█████▏    | 22745/43738 [2:54:42<2:51:43,  2.04it/s]

step:9620, train_loss:0.06305800605312006, acc:0.6568476588261156


 52%|█████▏    | 22746/43738 [2:54:43<2:54:35,  2.00it/s]

step:9620, train_loss:0.06305695169006532, acc:0.6568627450980392


 52%|█████▏    | 22747/43738 [2:54:43<3:08:55,  1.85it/s]

step:9620, train_loss:0.06305438409271223, acc:0.6568778300435222


 52%|█████▏    | 22748/43738 [2:54:44<2:48:03,  2.08it/s]

step:9620, train_loss:0.06305202746759311, acc:0.6568929136627396


 52%|█████▏    | 22749/43738 [2:54:44<2:36:38,  2.23it/s]

step:9620, train_loss:0.06304939671475514, acc:0.6569079959558662


 52%|█████▏    | 22750/43738 [2:54:44<2:27:10,  2.38it/s]

step:9620, train_loss:0.0630483324427828, acc:0.6569230769230769


 52%|█████▏    | 22751/43738 [2:54:45<2:13:04,  2.63it/s]

step:9620, train_loss:0.06305156013544573, acc:0.6568942024526394


 53%|█████▎    | 23056/43738 [2:57:07<3:12:37,  1.79it/s]

step:9640, train_loss:0.06313818796514702, acc:0.6565752949340735


 53%|█████▎    | 23057/43738 [2:57:08<2:42:10,  2.13it/s]

step:9640, train_loss:0.06313643074872205, acc:0.6565901895302945


 53%|█████▎    | 23058/43738 [2:57:08<2:26:37,  2.35it/s]

step:9640, train_loss:0.06313418727663926, acc:0.6566050828345911


 53%|█████▎    | 23059/43738 [2:57:08<2:28:41,  2.32it/s]

step:9640, train_loss:0.06313722456897929, acc:0.6565766078320829


 53%|█████▎    | 23060/43738 [2:57:09<2:18:41,  2.48it/s]

step:9640, train_loss:0.06313448713493183, acc:0.6565915004336513


 53%|█████▎    | 23061/43738 [2:57:09<2:26:23,  2.35it/s]

step:9640, train_loss:0.06313292341329815, acc:0.6566063917436364


 53%|█████▎    | 23062/43738 [2:57:10<2:27:05,  2.34it/s]

step:9640, train_loss:0.06313347484073072, acc:0.6566212817622062


 53%|█████▎    | 23063/43738 [2:57:10<2:45:52,  2.08it/s]

step:9640, train_loss:0.06313180373731785, acc:0.6566361704895287


 53%|█████▎    | 23064/43738 [2:57:11<2:33:38,  2.24it/s]

step:9640, train_loss:0.06313365165663158, acc:0.6566077003121749


 53%|█████▎    | 23065/43738 [2:57:11<2:20:35,  2.45it/s]

step:9640, train_loss:0.06313810815324573, acc:0.6565792326035118


 53%|█████▎    | 23066/43738 [2:57:11<2:08:15,  2.69it/s]

step:9640, train_loss:0.06314098752084274, acc:0.6565507673632186


 53%|█████▎    | 23067/43738 [2:57:12<2:12:01,  2.61it/s]

step:9640, train_loss:0.06314170228759795, acc:0.6565223045909742


 53%|█████▎    | 23068/43738 [2:57:12<2:11:50,  2.61it/s]

step:9640, train_loss:0.06313983537072769, acc:0.6565371943818277


 53%|█████▎    | 23069/43738 [2:57:13<2:45:30,  2.08it/s]

step:9640, train_loss:0.06314187969091631, acc:0.6565520828817895


 53%|█████▎    | 23070/43738 [2:57:13<2:45:37,  2.08it/s]

step:9640, train_loss:0.0631394807319727, acc:0.6565669700910273


 53%|█████▎    | 23071/43738 [2:57:14<2:40:42,  2.14it/s]

step:9640, train_loss:0.06314284537501333, acc:0.6565385115512982


 53%|█████▎    | 23376/43738 [2:59:34<2:59:59,  1.89it/s]

step:9660, train_loss:0.06315077039866535, acc:0.6562286105407256


 53%|█████▎    | 23377/43738 [2:59:34<2:56:20,  1.92it/s]

step:9660, train_loss:0.06314848903550016, acc:0.6562433160799076


 53%|█████▎    | 23378/43738 [2:59:35<2:42:23,  2.09it/s]

step:9660, train_loss:0.06314731754444958, acc:0.6562580203610232


 53%|█████▎    | 23379/43738 [2:59:36<3:11:39,  1.77it/s]

step:9660, train_loss:0.0631462199825314, acc:0.6562727233842337


 53%|█████▎    | 23380/43738 [2:59:36<2:52:09,  1.97it/s]

step:9660, train_loss:0.06314987872487562, acc:0.6562446535500428


 53%|█████▎    | 23381/43738 [2:59:37<2:58:17,  1.90it/s]

step:9660, train_loss:0.0631477098091945, acc:0.6562593558872589


 53%|█████▎    | 23382/43738 [2:59:37<2:51:16,  1.98it/s]

step:9660, train_loss:0.0631451803851531, acc:0.6562740569668977


 53%|█████▎    | 23383/43738 [2:59:37<2:38:58,  2.13it/s]

step:9660, train_loss:0.06314851388234052, acc:0.6562459906769875


 53%|█████▎    | 23384/43738 [2:59:38<2:46:53,  2.03it/s]

step:9660, train_loss:0.06315509229828506, acc:0.6562179267875471


 53%|█████▎    | 23385/43738 [2:59:38<2:21:11,  2.40it/s]

step:9660, train_loss:0.06315425306915597, acc:0.656232627752833


 53%|█████▎    | 23386/43738 [2:59:39<2:14:28,  2.52it/s]

step:9660, train_loss:0.06315322384393865, acc:0.656247327460874


 53%|█████▎    | 23387/43738 [2:59:39<2:02:28,  2.77it/s]

step:9660, train_loss:0.06315077067258881, acc:0.6562620259118314


 53%|█████▎    | 23388/43738 [2:59:39<1:56:23,  2.91it/s]

step:9660, train_loss:0.06314825611471087, acc:0.6562767231058663


 53%|█████▎    | 23389/43738 [2:59:39<1:57:56,  2.88it/s]

step:9660, train_loss:0.06315117791346807, acc:0.6562486639018342


 53%|█████▎    | 23390/43738 [2:59:40<1:49:27,  3.10it/s]

step:9660, train_loss:0.06314897330239877, acc:0.6562633604104318


 53%|█████▎    | 23391/43738 [2:59:40<1:41:51,  3.33it/s]

step:9660, train_loss:0.06314635818532793, acc:0.6562780556624342


 54%|█████▍    | 23696/43738 [3:02:00<2:02:16,  2.73it/s]

step:9680, train_loss:0.06318853836582057, acc:0.6561444969615124


 54%|█████▍    | 23697/43738 [3:02:00<2:05:19,  2.67it/s]

step:9680, train_loss:0.06318674498775577, acc:0.6561590074692999


 54%|█████▍    | 23698/43738 [3:02:01<2:00:01,  2.78it/s]

step:9680, train_loss:0.06318939973125066, acc:0.6561313190986581


 54%|█████▍    | 23699/43738 [3:02:01<2:01:07,  2.76it/s]

step:9680, train_loss:0.06318798712152093, acc:0.6561458289379298


 54%|█████▍    | 23700/43738 [3:02:02<2:06:30,  2.64it/s]

step:9680, train_loss:0.06319834675331146, acc:0.6561181434599156


 54%|█████▍    | 23702/43738 [3:02:02<1:45:16,  3.17it/s]

step:9680, train_loss:0.06319672010477151, acc:0.6561326526306906
step:9680, train_loss:0.0631980455602339, acc:0.656104970044722


 54%|█████▍    | 23703/43738 [3:02:02<1:39:03,  3.37it/s]

step:9680, train_loss:0.06319735652404963, acc:0.6561194785470194


 54%|█████▍    | 23704/43738 [3:02:03<1:42:02,  3.27it/s]

step:9680, train_loss:0.06319623434679134, acc:0.6561339858251772


 54%|█████▍    | 23705/43738 [3:02:03<2:15:40,  2.46it/s]

step:9680, train_loss:0.06320949682436812, acc:0.6561063066863531


 54%|█████▍    | 23706/43738 [3:02:04<2:29:19,  2.24it/s]

step:9680, train_loss:0.06320991706054362, acc:0.6560786298827301


 54%|█████▍    | 23707/43738 [3:02:04<2:14:32,  2.48it/s]

step:9680, train_loss:0.06321124545897638, acc:0.6560509554140127


 54%|█████▍    | 23708/43738 [3:02:05<2:29:12,  2.24it/s]

step:9680, train_loss:0.06321286500748523, acc:0.6560654631348068


 54%|█████▍    | 23709/43738 [3:02:05<2:15:51,  2.46it/s]

step:9680, train_loss:0.06321502676030083, acc:0.6560377915559492


 54%|█████▍    | 23710/43738 [3:02:05<2:18:42,  2.41it/s]

step:9680, train_loss:0.06321392577965759, acc:0.6560522986081821


 54%|█████▍    | 23711/43738 [3:02:06<2:11:32,  2.54it/s]

step:9680, train_loss:0.06321437059716842, acc:0.6560246299186032


 55%|█████▍    | 24016/43738 [3:04:26<2:48:30,  1.95it/s]

step:9700, train_loss:0.06315731694763778, acc:0.6563957361758828


 55%|█████▍    | 24017/43738 [3:04:27<2:57:07,  1.86it/s]

step:9700, train_loss:0.06315906655888706, acc:0.6563684057126202


 55%|█████▍    | 24018/43738 [3:04:28<2:47:34,  1.96it/s]

step:9700, train_loss:0.06315646762575534, acc:0.6563827129652761


 55%|█████▍    | 24019/43738 [3:04:28<2:34:47,  2.12it/s]

step:9700, train_loss:0.06315385248605078, acc:0.656397019026604


 55%|█████▍    | 24020/43738 [3:04:28<2:10:37,  2.52it/s]

step:9700, train_loss:0.0631515697840318, acc:0.6564113238967527


 55%|█████▍    | 24021/43738 [3:04:29<2:10:51,  2.51it/s]

step:9700, train_loss:0.06315570580314343, acc:0.6563839973356647


 55%|█████▍    | 24022/43738 [3:04:29<1:54:17,  2.88it/s]

step:9700, train_loss:0.0631533925416743, acc:0.6563983015569061


 55%|█████▍    | 24023/43738 [3:04:29<2:29:36,  2.20it/s]

step:9700, train_loss:0.06315108409207174, acc:0.6564126045872706


 55%|█████▍    | 24024/43738 [3:04:30<2:45:05,  1.99it/s]

step:9700, train_loss:0.06315799406673185, acc:0.6563852813852814


 55%|█████▍    | 24025/43738 [3:04:30<2:31:51,  2.16it/s]

step:9700, train_loss:0.0631553861757563, acc:0.6563995837669094


 55%|█████▍    | 24026/43738 [3:04:31<2:47:57,  1.96it/s]

step:9700, train_loss:0.06315808160440604, acc:0.6563722633813369


 55%|█████▍    | 24027/43738 [3:04:31<2:20:43,  2.33it/s]

step:9700, train_loss:0.06315599888518804, acc:0.6563865651142464


 55%|█████▍    | 24028/43738 [3:04:32<2:35:20,  2.11it/s]

step:9700, train_loss:0.06315787328708555, acc:0.6563592475445313


 55%|█████▍    | 24029/43738 [3:04:32<2:20:16,  2.34it/s]

step:9700, train_loss:0.06315682542388117, acc:0.6563735486287403


 55%|█████▍    | 24030/43738 [3:04:33<2:22:42,  2.30it/s]

step:9700, train_loss:0.06315422307479152, acc:0.65638784852268


 55%|█████▍    | 24031/43738 [3:04:33<2:12:04,  2.49it/s]

step:9700, train_loss:0.06315160544088515, acc:0.6564021472264991


 56%|█████▌    | 24336/43738 [3:06:52<2:31:57,  2.13it/s]

step:9720, train_loss:0.06319128316494318, acc:0.6561061801446417


 56%|█████▌    | 24337/43738 [3:06:52<2:12:08,  2.45it/s]

step:9720, train_loss:0.06319231604145217, acc:0.656120310638123


 56%|█████▌    | 24338/43738 [3:06:52<1:56:43,  2.77it/s]

step:9720, train_loss:0.06319027460377132, acc:0.6561344399704166


 56%|█████▌    | 24339/43738 [3:06:53<1:48:12,  2.99it/s]

step:9720, train_loss:0.06319083229465555, acc:0.6561485681416657


 56%|█████▌    | 24340/43738 [3:06:53<2:26:36,  2.21it/s]

step:9720, train_loss:0.06319355640754143, acc:0.6561216105176664


 56%|█████▌    | 24341/43738 [3:06:54<2:21:58,  2.28it/s]

step:9720, train_loss:0.06319102496172885, acc:0.6561357380551334


 56%|█████▌    | 24342/43738 [3:06:54<2:39:20,  2.03it/s]

step:9720, train_loss:0.06319180671017902, acc:0.6561087831731164


 56%|█████▌    | 24343/43738 [3:06:55<2:36:38,  2.06it/s]

step:9720, train_loss:0.063198082611627, acc:0.6560818305056895


 56%|█████▌    | 24344/43738 [3:06:56<2:43:41,  1.97it/s]

step:9720, train_loss:0.0632028014149768, acc:0.6560548800525797


 56%|█████▌    | 24345/43738 [3:06:56<2:20:16,  2.30it/s]

step:9720, train_loss:0.06320285907919318, acc:0.656027931813514


 56%|█████▌    | 24346/43738 [3:06:56<2:06:41,  2.55it/s]

step:9720, train_loss:0.0632043862976038, acc:0.6560009857882199


 56%|█████▌    | 24347/43738 [3:06:57<2:50:40,  1.89it/s]

step:9720, train_loss:0.0632026832757569, acc:0.6560151147985378


 56%|█████▌    | 24348/43738 [3:06:58<3:24:59,  1.58it/s]

step:9720, train_loss:0.06320351482133832, acc:0.6560292426482668


 56%|█████▌    | 24349/43738 [3:06:59<3:50:04,  1.40it/s]

step:9720, train_loss:0.06320799649071097, acc:0.6560022998891125


 56%|█████▌    | 24350/43738 [3:07:00<3:58:53,  1.35it/s]

step:9720, train_loss:0.0632145861741055, acc:0.6559753593429158


 56%|█████▌    | 24351/43738 [3:07:00<3:15:18,  1.65it/s]

step:9720, train_loss:0.06321740705793286, acc:0.6559484210094041


 56%|█████▋    | 24656/43738 [3:09:13<2:25:16,  2.19it/s]

step:9740, train_loss:0.0631286127975383, acc:0.6563513951979234


 56%|█████▋    | 24657/43738 [3:09:14<2:33:52,  2.07it/s]

step:9740, train_loss:0.06313143276932603, acc:0.6563247759257006


 56%|█████▋    | 24658/43738 [3:09:14<2:21:16,  2.25it/s]

step:9740, train_loss:0.06312889095155126, acc:0.6563387136020764


 56%|█████▋    | 24659/43738 [3:09:14<2:12:53,  2.39it/s]

step:9740, train_loss:0.06312776274338408, acc:0.656352650148019


 56%|█████▋    | 24660/43738 [3:09:15<2:01:52,  2.61it/s]

step:9740, train_loss:0.06312526871413376, acc:0.6563665855636659


 56%|█████▋    | 24661/43738 [3:09:15<1:45:05,  3.03it/s]

step:9740, train_loss:0.0631241687490537, acc:0.6563805198491546


 56%|█████▋    | 24662/43738 [3:09:15<1:42:39,  3.10it/s]

step:9740, train_loss:0.0631262696386135, acc:0.6563539047927986


 56%|█████▋    | 24663/43738 [3:09:16<1:57:48,  2.70it/s]

step:9740, train_loss:0.06312674604785708, acc:0.6563272918947411


 56%|█████▋    | 24664/43738 [3:09:16<2:30:52,  2.11it/s]

step:9740, train_loss:0.0631241918591651, acc:0.656341226078495


 56%|█████▋    | 24665/43738 [3:09:17<3:00:23,  1.76it/s]

step:9740, train_loss:0.06312654844394178, acc:0.6563146158524225


 56%|█████▋    | 24666/43738 [3:09:17<2:45:20,  1.92it/s]

step:9740, train_loss:0.06312402138742036, acc:0.6563285494202546


 56%|█████▋    | 24667/43738 [3:09:18<2:58:59,  1.78it/s]

step:9740, train_loss:0.06312338695789238, acc:0.6563424818583533


 56%|█████▋    | 24668/43738 [3:09:19<3:02:14,  1.74it/s]

step:9740, train_loss:0.06312087120430135, acc:0.6563564131668559


 56%|█████▋    | 24669/43738 [3:09:19<2:51:31,  1.85it/s]

step:9740, train_loss:0.06312248300631408, acc:0.6563298066399125


 56%|█████▋    | 24670/43738 [3:09:20<2:49:39,  1.87it/s]

step:9740, train_loss:0.0631230389391455, acc:0.6563032022699635


 56%|█████▋    | 24671/43738 [3:09:20<2:51:29,  1.85it/s]

step:9740, train_loss:0.06312124369716178, acc:0.6563171334765514


 57%|█████▋    | 24976/43738 [3:11:53<2:52:22,  1.81it/s]

step:9760, train_loss:0.0631396925772216, acc:0.6560698270339526


 57%|█████▋    | 24977/43738 [3:11:54<2:30:38,  2.08it/s]

step:9760, train_loss:0.06313724493991406, acc:0.6560835969091564


 57%|█████▋    | 24978/43738 [3:11:54<2:56:08,  1.78it/s]

step:9760, train_loss:0.06313680282393853, acc:0.6560973656818


 57%|█████▋    | 24979/43738 [3:11:55<2:36:04,  2.00it/s]

step:9760, train_loss:0.06313481162869018, acc:0.6561111333520157


 57%|█████▋    | 24980/43738 [3:11:55<2:21:49,  2.20it/s]

step:9760, train_loss:0.06313228850807087, acc:0.6561248999199359


 57%|█████▋    | 24981/43738 [3:11:56<2:21:07,  2.22it/s]

step:9760, train_loss:0.06313166610995448, acc:0.6561386653856931


 57%|█████▋    | 24982/43738 [3:11:56<2:07:19,  2.46it/s]

step:9760, train_loss:0.06312969384959508, acc:0.6561524297494196


 57%|█████▋    | 24983/43738 [3:11:56<1:58:38,  2.63it/s]

step:9760, train_loss:0.06312876445323909, acc:0.6561661930112477


 57%|█████▋    | 24984/43738 [3:11:57<1:56:27,  2.68it/s]

step:9760, train_loss:0.06312658888927054, acc:0.6561799551713097


 57%|█████▋    | 24985/43738 [3:11:57<1:41:43,  3.07it/s]

step:9760, train_loss:0.06312407630856878, acc:0.6561937162297379


 57%|█████▋    | 24986/43738 [3:11:57<1:56:05,  2.69it/s]

step:9760, train_loss:0.06312363417356341, acc:0.6562074761866645


 57%|█████▋    | 24987/43738 [3:11:58<2:04:47,  2.50it/s]

step:9760, train_loss:0.06312189238520972, acc:0.656221235042222


 57%|█████▋    | 24988/43738 [3:11:58<2:03:25,  2.53it/s]

step:9760, train_loss:0.06312421775262568, acc:0.6561949735873219


 57%|█████▋    | 24989/43738 [3:11:59<2:39:07,  1.96it/s]

step:9760, train_loss:0.06312198372050068, acc:0.6562087318420105


 57%|█████▋    | 24990/43738 [3:11:59<2:14:18,  2.33it/s]

step:9760, train_loss:0.06312115664665197, acc:0.6562224889955982


 57%|█████▋    | 24991/43738 [3:11:59<1:54:56,  2.72it/s]

step:9760, train_loss:0.06311936204615369, acc:0.6562362450482173


 58%|█████▊    | 25296/43738 [3:14:22<2:42:47,  1.89it/s]

step:9780, train_loss:0.06305789555754673, acc:0.6565464895635673


 58%|█████▊    | 25297/43738 [3:14:22<2:15:37,  2.27it/s]

step:9780, train_loss:0.06305540626797763, acc:0.6565600664110369


 58%|█████▊    | 25298/43738 [3:14:22<2:15:27,  2.27it/s]

step:9780, train_loss:0.06305348201362244, acc:0.656573642185153


 58%|█████▊    | 25299/43738 [3:14:23<1:58:05,  2.60it/s]

step:9780, train_loss:0.06305239515900771, acc:0.6565872168860429


 58%|█████▊    | 25300/43738 [3:14:23<2:03:42,  2.48it/s]

step:9780, train_loss:0.06305302269660855, acc:0.6565612648221344


 58%|█████▊    | 25301/43738 [3:14:23<1:52:56,  2.72it/s]

step:9780, train_loss:0.06305123321284205, acc:0.6565748389391723


 58%|█████▊    | 25302/43738 [3:14:24<1:45:44,  2.91it/s]

step:9780, train_loss:0.06305149363414131, acc:0.6565884119832425


 58%|█████▊    | 25303/43738 [3:14:24<1:58:53,  2.58it/s]

step:9780, train_loss:0.063055195564126, acc:0.6565624629490574


 58%|█████▊    | 25304/43738 [3:14:24<1:44:59,  2.93it/s]

step:9780, train_loss:0.06305274639095572, acc:0.6565760354094214


 58%|█████▊    | 25305/43738 [3:14:25<1:54:35,  2.68it/s]

step:9780, train_loss:0.06305147015718145, acc:0.6565896067970757


 58%|█████▊    | 25306/43738 [3:14:25<1:53:25,  2.71it/s]

step:9780, train_loss:0.06305227297208787, acc:0.6565636607919071


 58%|█████▊    | 25307/43738 [3:14:26<1:47:27,  2.86it/s]

step:9780, train_loss:0.06305081034017507, acc:0.6565772315960011


 58%|█████▊    | 25308/43738 [3:14:26<2:13:37,  2.30it/s]

step:9780, train_loss:0.06304853273392311, acc:0.6565908013276435


 58%|█████▊    | 25309/43738 [3:14:27<2:19:19,  2.20it/s]

step:9780, train_loss:0.06304705843426052, acc:0.6566043699869611


 58%|█████▊    | 25310/43738 [3:14:27<2:11:12,  2.34it/s]

step:9780, train_loss:0.06304861440723042, acc:0.6565784274990123


 58%|█████▊    | 25311/43738 [3:14:28<2:28:09,  2.07it/s]

step:9780, train_loss:0.06305171913732006, acc:0.6565524870609616


 59%|█████▊    | 25616/43738 [3:16:43<2:03:30,  2.45it/s]

step:9800, train_loss:0.0630264613011842, acc:0.6568550905683948


 59%|█████▊    | 25617/43738 [3:16:43<2:02:23,  2.47it/s]

step:9800, train_loss:0.06302410744459912, acc:0.6568684857711676


 59%|█████▊    | 25618/43738 [3:16:44<2:08:00,  2.36it/s]

step:9800, train_loss:0.06302538022976899, acc:0.6568428448746975


 59%|█████▊    | 25619/43738 [3:16:44<2:37:10,  1.92it/s]

step:9800, train_loss:0.06302748844617662, acc:0.6568172059799368


 59%|█████▊    | 25620/43738 [3:16:45<2:32:50,  1.98it/s]

step:9800, train_loss:0.06302557527832606, acc:0.6568306010928961


 59%|█████▊    | 25621/43738 [3:16:45<2:37:50,  1.91it/s]

step:9800, train_loss:0.06302632169688667, acc:0.656804964677413


 59%|█████▊    | 25622/43738 [3:16:46<2:20:17,  2.15it/s]

step:9800, train_loss:0.0630238642732137, acc:0.6568183592225432


 59%|█████▊    | 25623/43738 [3:16:46<2:07:16,  2.37it/s]

step:9800, train_loss:0.06303004780095244, acc:0.656792725285876


 59%|█████▊    | 25624/43738 [3:16:46<2:01:48,  2.48it/s]

step:9800, train_loss:0.06302769929914742, acc:0.6568061192631908


 59%|█████▊    | 25625/43738 [3:16:47<1:54:46,  2.63it/s]

step:9800, train_loss:0.0630313835458035, acc:0.6567804878048781


 59%|█████▊    | 25626/43738 [3:16:47<1:59:31,  2.53it/s]

step:9800, train_loss:0.06302899715818878, acc:0.6567938812143916


 59%|█████▊    | 25627/43738 [3:16:48<2:09:35,  2.33it/s]

step:9800, train_loss:0.063028231559414, acc:0.6568072735786475


 59%|█████▊    | 25628/43738 [3:16:48<2:17:02,  2.20it/s]

step:9800, train_loss:0.06302686491802129, acc:0.656820664897768


 59%|█████▊    | 25629/43738 [3:16:48<2:00:14,  2.51it/s]

step:9800, train_loss:0.06302469309205931, acc:0.6568340551718757


 59%|█████▊    | 25630/43738 [3:16:49<2:19:09,  2.17it/s]

step:9800, train_loss:0.06302229901001508, acc:0.6568474444010924


 59%|█████▊    | 25631/43738 [3:16:49<2:10:11,  2.32it/s]

step:9800, train_loss:0.06302023316183152, acc:0.656860832585541


 59%|█████▉    | 25936/43738 [3:19:14<1:58:41,  2.50it/s]

step:9820, train_loss:0.06303822933374247, acc:0.656886181369525


 59%|█████▉    | 25937/43738 [3:19:14<1:49:52,  2.70it/s]

step:9820, train_loss:0.06304375690104017, acc:0.6568608551490149


 59%|█████▉    | 25938/43738 [3:19:15<1:49:28,  2.71it/s]

step:9820, train_loss:0.06304585525609385, acc:0.6568355308813324


 59%|█████▉    | 25939/43738 [3:19:15<1:57:42,  2.52it/s]

step:9820, train_loss:0.06304571642653449, acc:0.6568487605536065


 59%|█████▉    | 25940/43738 [3:19:16<1:47:53,  2.75it/s]

step:9820, train_loss:0.06304353875615507, acc:0.6568619892058597


 59%|█████▉    | 25941/43738 [3:19:16<1:47:18,  2.76it/s]

step:9820, train_loss:0.0630441016569722, acc:0.6568366678231371


 59%|█████▉    | 25942/43738 [3:19:16<1:58:41,  2.50it/s]

step:9820, train_loss:0.06304180418359881, acc:0.6568498959216714


 59%|█████▉    | 25943/43738 [3:19:17<1:49:57,  2.70it/s]

step:9820, train_loss:0.06303939476304009, acc:0.656863123000424


 59%|█████▉    | 25944/43738 [3:19:17<1:42:24,  2.90it/s]

step:9820, train_loss:0.06303699807968137, acc:0.6568763490595128


 59%|█████▉    | 25945/43738 [3:19:18<2:12:44,  2.23it/s]

step:9820, train_loss:0.06303672089721689, acc:0.6568510310271729


 59%|█████▉    | 25946/43738 [3:19:18<2:18:09,  2.15it/s]

step:9820, train_loss:0.06303658102941961, acc:0.6568642565327989


 59%|█████▉    | 25947/43738 [3:19:19<2:19:00,  2.13it/s]

step:9820, train_loss:0.06303636448658399, acc:0.6568774810190002


 59%|█████▉    | 25948/43738 [3:19:19<2:03:55,  2.39it/s]

step:9820, train_loss:0.06303396229279516, acc:0.6568907044858948


 59%|█████▉    | 25949/43738 [3:19:19<1:58:30,  2.50it/s]

step:9820, train_loss:0.06303728024050718, acc:0.6568653898030753


 59%|█████▉    | 25950/43738 [3:19:20<1:48:10,  2.74it/s]

step:9820, train_loss:0.06304296170915602, acc:0.656840077071291


 59%|█████▉    | 25951/43738 [3:19:20<1:52:13,  2.64it/s]

step:9820, train_loss:0.06304208911094689, acc:0.6568533004508497


 60%|██████    | 26256/43738 [3:21:39<3:02:05,  1.60it/s]

step:9840, train_loss:0.06301469081500494, acc:0.6571831200487508


 60%|██████    | 26257/43738 [3:21:40<2:29:43,  1.95it/s]

step:9840, train_loss:0.0630127960067881, acc:0.6571961762577598


 60%|██████    | 26258/43738 [3:21:40<2:22:35,  2.04it/s]

step:9840, train_loss:0.06301186033714815, acc:0.6572092314723132


 60%|██████    | 26259/43738 [3:21:40<2:12:16,  2.20it/s]

step:9840, train_loss:0.06301136002019651, acc:0.6572222856925245


 60%|██████    | 26260/43738 [3:21:41<2:42:29,  1.79it/s]

step:9840, train_loss:0.06301574812345666, acc:0.6571972581873572


 60%|██████    | 26261/43738 [3:21:42<2:29:41,  1.95it/s]

step:9840, train_loss:0.06301682108322776, acc:0.6571722325882488


 60%|██████    | 26262/43738 [3:21:42<2:32:01,  1.92it/s]

step:9840, train_loss:0.06301568589965202, acc:0.6571852867260681


 60%|██████    | 26264/43738 [3:21:43<2:00:37,  2.41it/s]

step:9840, train_loss:0.0630148379118654, acc:0.6571983398697788
step:9840, train_loss:0.06301506755497037, acc:0.6571733170880293


 60%|██████    | 26265/43738 [3:21:43<2:14:26,  2.17it/s]

step:9840, train_loss:0.06301276847991538, acc:0.6571863696935085


 60%|██████    | 26266/43738 [3:21:44<1:58:33,  2.46it/s]

step:9840, train_loss:0.06301077756410568, acc:0.6571994213051092


 60%|██████    | 26267/43738 [3:21:44<1:48:00,  2.70it/s]

step:9840, train_loss:0.06300838087244573, acc:0.6572124719229452


 60%|██████    | 26268/43738 [3:21:44<1:45:15,  2.77it/s]

step:9840, train_loss:0.06300695248662572, acc:0.6572255215471295


 60%|██████    | 26270/43738 [3:21:45<1:23:37,  3.48it/s]

step:9840, train_loss:0.0630049406033149, acc:0.6572385701777761
step:9840, train_loss:0.06300497889652525, acc:0.6572135515797488


 60%|██████    | 26271/43738 [3:21:45<1:56:02,  2.51it/s]

step:9840, train_loss:0.06300782937132862, acc:0.6571885348863766


 61%|██████    | 26576/43738 [3:24:01<2:22:44,  2.00it/s]

step:9860, train_loss:0.06297923680161277, acc:0.6570590006020469


 61%|██████    | 26577/43738 [3:24:02<2:20:00,  2.04it/s]

step:9860, train_loss:0.0629813859199811, acc:0.6570342777589645


 61%|██████    | 26578/43738 [3:24:02<1:57:33,  2.43it/s]

step:9860, train_loss:0.06297901640701656, acc:0.6570471818797502


 61%|██████    | 26579/43738 [3:24:02<1:58:02,  2.42it/s]

step:9860, train_loss:0.06298006190534657, acc:0.6570224613416608


 61%|██████    | 26580/43738 [3:24:03<1:58:00,  2.42it/s]

step:9860, train_loss:0.06297785381427512, acc:0.6570353649360421


 61%|██████    | 26581/43738 [3:24:03<2:00:06,  2.38it/s]

step:9860, train_loss:0.0629780578492269, acc:0.657048267559535


 61%|██████    | 26582/43738 [3:24:04<1:52:01,  2.55it/s]

step:9860, train_loss:0.06297581208249882, acc:0.6570611692122489


 61%|██████    | 26583/43738 [3:24:04<2:06:12,  2.27it/s]

step:9860, train_loss:0.06297776173813745, acc:0.6570364518677351


 61%|██████    | 26584/43738 [3:24:05<2:19:44,  2.05it/s]

step:9860, train_loss:0.06297731738597728, acc:0.6570117363827866


 61%|██████    | 26585/43738 [3:24:05<2:04:48,  2.29it/s]

step:9860, train_loss:0.0629756509806138, acc:0.6570246379537333


 61%|██████    | 26586/43738 [3:24:06<2:10:51,  2.18it/s]

step:9860, train_loss:0.06298577165447239, acc:0.6569999247724366


 61%|██████    | 26587/43738 [3:24:06<2:30:31,  1.90it/s]

step:9860, train_loss:0.06298493518768725, acc:0.6570128258171287


 61%|██████    | 26588/43738 [3:24:07<2:43:30,  1.75it/s]

step:9860, train_loss:0.06299133213913868, acc:0.6569881149390703


 61%|██████    | 26589/43738 [3:24:07<2:16:07,  2.10it/s]

step:9860, train_loss:0.06298896509691719, acc:0.65700101545752


 61%|██████    | 26590/43738 [3:24:07<1:57:31,  2.43it/s]

step:9860, train_loss:0.06298659840029673, acc:0.6570139150056412


 61%|██████    | 26591/43738 [3:24:08<2:32:18,  1.88it/s]

step:9860, train_loss:0.06299196402927863, acc:0.6569892068745065


 61%|██████▏   | 26896/43738 [3:26:28<2:30:22,  1.87it/s]

step:9880, train_loss:0.06300912514435529, acc:0.6568262938726949


 61%|██████▏   | 26897/43738 [3:26:28<2:32:31,  1.84it/s]

step:9880, train_loss:0.06300764191189423, acc:0.6568390526824552


 61%|██████▏   | 26898/43738 [3:26:29<2:24:42,  1.94it/s]

step:9880, train_loss:0.06300720012427923, acc:0.6568518105435348


 62%|██████▏   | 26899/43738 [3:26:29<2:01:08,  2.32it/s]

step:9880, train_loss:0.06300938739768479, acc:0.6568273913528384


 62%|██████▏   | 26900/43738 [3:26:29<1:43:58,  2.70it/s]

step:9880, train_loss:0.06300704506343381, acc:0.6568401486988847


 62%|██████▏   | 26901/43738 [3:26:29<1:45:46,  2.65it/s]

step:9880, train_loss:0.06300588140315798, acc:0.6568529050964648


 62%|██████▏   | 26902/43738 [3:26:30<1:41:16,  2.77it/s]

step:9880, train_loss:0.06300413574883751, acc:0.6568656605456843


 62%|██████▏   | 26903/43738 [3:26:30<1:49:53,  2.55it/s]

step:9880, train_loss:0.06300323722926839, acc:0.6568784150466491


 62%|██████▏   | 26904/43738 [3:26:30<1:41:22,  2.77it/s]

step:9880, train_loss:0.0630009779810079, acc:0.6568911685994647


 62%|██████▏   | 26905/43738 [3:26:31<1:41:14,  2.77it/s]

step:9880, train_loss:0.06299866479072733, acc:0.6569039212042371


 62%|██████▏   | 26906/43738 [3:26:31<1:44:08,  2.69it/s]

step:9880, train_loss:0.06299931435650838, acc:0.6569166728610719


 62%|██████▏   | 26907/43738 [3:26:32<2:05:40,  2.23it/s]

step:9880, train_loss:0.06299810458379286, acc:0.6569294235700747


 62%|██████▏   | 26908/43738 [3:26:32<2:01:09,  2.32it/s]

step:9880, train_loss:0.06300336606306782, acc:0.6569050096625539


 62%|██████▏   | 26909/43738 [3:26:33<1:59:45,  2.34it/s]

step:9880, train_loss:0.06300154083372447, acc:0.6569177598572968


 62%|██████▏   | 26910/43738 [3:26:33<2:01:22,  2.31it/s]

step:9880, train_loss:0.06300387207203197, acc:0.6568933481976961


 62%|██████▏   | 26911/43738 [3:26:34<2:25:34,  1.93it/s]

step:9880, train_loss:0.06300153184592212, acc:0.6569060978781911


 62%|██████▏   | 27216/43738 [3:28:51<1:52:02,  2.46it/s]

step:9900, train_loss:0.0629539844497983, acc:0.6571502057613169


 62%|██████▏   | 27217/43738 [3:28:51<2:06:58,  2.17it/s]

step:9900, train_loss:0.06295650201985376, acc:0.6571260609178087


 62%|██████▏   | 27218/43738 [3:28:52<1:53:05,  2.43it/s]

step:9900, train_loss:0.06295423348622865, acc:0.65713865824087


 62%|██████▏   | 27219/43738 [3:28:52<1:56:46,  2.36it/s]

step:9900, train_loss:0.06295207959200519, acc:0.6571512546383041


 62%|██████▏   | 27220/43738 [3:28:52<1:46:16,  2.59it/s]

step:9900, train_loss:0.0629540501925375, acc:0.6571271124173402


 62%|██████▏   | 27221/43738 [3:28:53<1:38:51,  2.78it/s]

step:9900, train_loss:0.0629550871215467, acc:0.6571029719701701


 62%|██████▏   | 27222/43738 [3:28:53<1:43:53,  2.65it/s]

step:9900, train_loss:0.06295277677024866, acc:0.6571155682903533


 62%|██████▏   | 27223/43738 [3:28:54<1:42:17,  2.69it/s]

step:9900, train_loss:0.06295047308054098, acc:0.6571281636851192


 62%|██████▏   | 27224/43738 [3:28:54<1:41:27,  2.71it/s]

step:9900, train_loss:0.0629481685296793, acc:0.6571407581545695


 62%|██████▏   | 27225/43738 [3:28:55<2:03:16,  2.23it/s]

step:9900, train_loss:0.06295056648470104, acc:0.6571166207529844


 62%|██████▏   | 27226/43738 [3:28:55<2:10:40,  2.11it/s]

step:9900, train_loss:0.06294857171639419, acc:0.6571292147212223


 62%|██████▏   | 27227/43738 [3:28:56<2:22:05,  1.94it/s]

step:9900, train_loss:0.06294632519400788, acc:0.6571418077643516


 62%|██████▏   | 27228/43738 [3:28:56<2:23:06,  1.92it/s]

step:9900, train_loss:0.06294678909396308, acc:0.6571176729836933


 62%|██████▏   | 27229/43738 [3:28:57<2:09:33,  2.12it/s]

step:9900, train_loss:0.06294473992106539, acc:0.6571302655257263


 62%|██████▏   | 27230/43738 [3:28:57<1:51:56,  2.46it/s]

step:9900, train_loss:0.06294367767732036, acc:0.6571428571428571


 62%|██████▏   | 27231/43738 [3:28:57<1:49:40,  2.51it/s]

step:9900, train_loss:0.06294145367575357, acc:0.6571554478351879


 63%|██████▎   | 27536/43738 [3:31:15<2:04:35,  2.17it/s]

step:9920, train_loss:0.0630733837665891, acc:0.6561955258570599


 63%|██████▎   | 27537/43738 [3:31:15<1:51:10,  2.43it/s]

step:9920, train_loss:0.06307209297669727, acc:0.6562080110396921
step:9920, train_loss:0.06307075748839314, acc:0.6562204953155639


 63%|██████▎   | 27539/43738 [3:31:15<1:34:57,  2.84it/s]

step:9920, train_loss:0.06307066027811403, acc:0.6562329786847744


 63%|██████▎   | 27540/43738 [3:31:16<1:25:15,  3.17it/s]

step:9920, train_loss:0.06306979442089476, acc:0.6562454611474219


 63%|██████▎   | 27541/43738 [3:31:16<1:40:55,  2.67it/s]

step:9920, train_loss:0.06307069099334235, acc:0.6562579427036055


 63%|██████▎   | 27542/43738 [3:31:16<1:41:15,  2.67it/s]

step:9920, train_loss:0.06307245719947467, acc:0.6562341151695592


 63%|██████▎   | 27543/43738 [3:31:17<1:37:17,  2.77it/s]

step:9920, train_loss:0.06307601509667979, acc:0.656210289365719


 63%|██████▎   | 27544/43738 [3:31:17<1:26:31,  3.12it/s]

step:9920, train_loss:0.0630774083597315, acc:0.6561864652918966


 63%|██████▎   | 27545/43738 [3:31:18<1:41:35,  2.66it/s]

step:9920, train_loss:0.0630751293453467, acc:0.6561989471773462


 63%|██████▎   | 27546/43738 [3:31:18<1:59:42,  2.25it/s]

step:9920, train_loss:0.06308160082440335, acc:0.6561751252450446


 63%|██████▎   | 27547/43738 [3:31:19<1:57:39,  2.29it/s]

step:9920, train_loss:0.063080157503258, acc:0.6561876066359313


 63%|██████▎   | 27548/43738 [3:31:19<2:18:34,  1.95it/s]

step:9920, train_loss:0.06308075579791007, acc:0.6562000871206621


 63%|██████▎   | 27549/43738 [3:31:20<2:04:36,  2.17it/s]

step:9920, train_loss:0.06307931826350836, acc:0.6562125666993357


 63%|██████▎   | 27550/43738 [3:31:20<1:53:58,  2.37it/s]

step:9920, train_loss:0.06308771344175361, acc:0.6561887477313975


 63%|██████▎   | 27551/43738 [3:31:21<2:06:39,  2.13it/s]

step:9920, train_loss:0.06308569265354537, acc:0.6562012268157236


 64%|██████▎   | 27856/43738 [3:33:37<2:29:31,  1.77it/s]

step:9940, train_loss:0.06313309110766405, acc:0.6559807581849512


 64%|██████▎   | 27857/43738 [3:33:38<2:33:35,  1.72it/s]

step:9940, train_loss:0.06313086143668967, acc:0.6559931076569624


 64%|██████▎   | 27858/43738 [3:33:38<2:49:50,  1.56it/s]

step:9940, train_loss:0.06313047087874481, acc:0.656005456242372


 64%|██████▎   | 27859/43738 [3:33:39<2:32:48,  1.73it/s]

step:9940, train_loss:0.06313050048707532, acc:0.6559819088983811


 64%|██████▎   | 27860/43738 [3:33:39<2:31:13,  1.75it/s]

step:9940, train_loss:0.06312855648769863, acc:0.6559942569992822


 64%|██████▎   | 27861/43738 [3:33:40<2:22:35,  1.86it/s]

step:9940, train_loss:0.06312664785025693, acc:0.6560066042137755


 64%|██████▎   | 27862/43738 [3:33:40<2:04:15,  2.13it/s]

step:9940, train_loss:0.06312601628820265, acc:0.6560189505419568


 64%|██████▎   | 27863/43738 [3:33:41<2:04:26,  2.13it/s]

step:9940, train_loss:0.0631262323161375, acc:0.6559954060941033


 64%|██████▎   | 27864/43738 [3:33:41<1:59:02,  2.22it/s]

step:9940, train_loss:0.06312491193777445, acc:0.6560077519379846


 64%|██████▎   | 27865/43738 [3:33:41<1:46:07,  2.49it/s]

step:9940, train_loss:0.06312482468048349, acc:0.6559842095819128


 64%|██████▎   | 27866/43738 [3:33:42<1:31:32,  2.89it/s]

step:9940, train_loss:0.06312411302542814, acc:0.6559965549415058


 64%|██████▎   | 27867/43738 [3:33:42<1:48:55,  2.43it/s]

step:9940, train_loss:0.06312609028470528, acc:0.6560088994150788


 64%|██████▎   | 27868/43738 [3:33:43<1:59:21,  2.22it/s]

step:9940, train_loss:0.06313189186947753, acc:0.6559853595521745


 64%|██████▎   | 27869/43738 [3:33:43<1:59:25,  2.21it/s]

step:9940, train_loss:0.06313625798106355, acc:0.6559618213785927


 64%|██████▎   | 27870/43738 [3:33:44<2:01:17,  2.18it/s]

step:9940, train_loss:0.06313400926775982, acc:0.6559741657696447


 64%|██████▎   | 27871/43738 [3:33:44<1:58:27,  2.23it/s]

step:9940, train_loss:0.06313304115708246, acc:0.6559865092748736


 64%|██████▍   | 28176/43738 [3:36:03<1:59:34,  2.17it/s]

step:9960, train_loss:0.06310733782052856, acc:0.6560902896081772


 64%|██████▍   | 28177/43738 [3:36:03<1:42:45,  2.52it/s]

step:9960, train_loss:0.06310512585150828, acc:0.6561024949426838


 64%|██████▍   | 28178/43738 [3:36:04<1:37:16,  2.67it/s]

step:9960, train_loss:0.06310764979382505, acc:0.6560792107317766


 64%|██████▍   | 28179/43738 [3:36:04<1:54:04,  2.27it/s]

step:9960, train_loss:0.06310600584991438, acc:0.6560914155931722


 64%|██████▍   | 28180/43738 [3:36:05<2:20:40,  1.84it/s]

step:9960, train_loss:0.0631062991397912, acc:0.6561036195883605


 64%|██████▍   | 28181/43738 [3:36:06<2:31:25,  1.71it/s]

step:9960, train_loss:0.06310830492140615, acc:0.6560803378162592


 64%|██████▍   | 28182/43738 [3:36:06<2:15:32,  1.91it/s]

step:9960, train_loss:0.06311057219761425, acc:0.656057057696402


 64%|██████▍   | 28183/43738 [3:36:07<2:10:25,  1.99it/s]

step:9960, train_loss:0.06311701385660555, acc:0.656033779228613


 64%|██████▍   | 28184/43738 [3:36:07<2:08:30,  2.02it/s]

step:9960, train_loss:0.06311843150768702, acc:0.6560105024127164


 64%|██████▍   | 28185/43738 [3:36:08<1:55:36,  2.24it/s]

step:9960, train_loss:0.06312076262756117, acc:0.6559872272485364


 64%|██████▍   | 28186/43738 [3:36:08<2:07:26,  2.03it/s]

step:9960, train_loss:0.06312023187893923, acc:0.6559994323422976


 64%|██████▍   | 28187/43738 [3:36:08<1:54:17,  2.27it/s]

step:9960, train_loss:0.0631184279841004, acc:0.65601163657005


 64%|██████▍   | 28188/43738 [3:36:09<1:58:03,  2.20it/s]

step:9960, train_loss:0.06311622511427308, acc:0.656023839931886


 64%|██████▍   | 28189/43738 [3:36:09<2:05:06,  2.07it/s]

step:9960, train_loss:0.06311937098671414, acc:0.6560005675972898


 64%|██████▍   | 28190/43738 [3:36:10<2:00:38,  2.15it/s]

step:9960, train_loss:0.06311853375591583, acc:0.6560127704859879


 64%|██████▍   | 28191/43738 [3:36:10<1:59:35,  2.17it/s]

step:9960, train_loss:0.06311635094546288, acc:0.6560249725089567


 65%|██████▌   | 28496/43738 [3:38:27<2:55:17,  1.45it/s]

step:9980, train_loss:0.06315119904773311, acc:0.6559868051656372


 65%|██████▌   | 28497/43738 [3:38:27<2:55:33,  1.45it/s]

step:9980, train_loss:0.06314902706728427, acc:0.6559988770747798


 65%|██████▌   | 28498/43738 [3:38:28<2:44:43,  1.54it/s]

step:9980, train_loss:0.06314689725473112, acc:0.6560109481367113


 65%|██████▌   | 28499/43738 [3:38:29<2:53:09,  1.47it/s]

step:9980, train_loss:0.06314468249139397, acc:0.6560230183515211


 65%|██████▌   | 28500/43738 [3:38:29<2:23:02,  1.78it/s]

step:9980, train_loss:0.06314247259904279, acc:0.6560350877192982


 65%|██████▌   | 28501/43738 [3:38:29<2:19:30,  1.82it/s]

step:9980, train_loss:0.06314038492622732, acc:0.6560471562401319


 65%|██████▌   | 28502/43738 [3:38:30<2:27:17,  1.72it/s]

step:9980, train_loss:0.06313945260939417, acc:0.6560592239141113


 65%|██████▌   | 28503/43738 [3:38:31<2:19:38,  1.82it/s]

step:9980, train_loss:0.06313768090119734, acc:0.6560712907413254


 65%|██████▌   | 28504/43738 [3:38:31<2:19:25,  1.82it/s]

step:9980, train_loss:0.06313615338159458, acc:0.6560833567218636


 65%|██████▌   | 28505/43738 [3:38:32<2:35:32,  1.63it/s]

step:9980, train_loss:0.06313482157925357, acc:0.6560954218558148


 65%|██████▌   | 28506/43738 [3:38:32<2:22:56,  1.78it/s]

step:9980, train_loss:0.06313261755179723, acc:0.6561074861432681


 65%|██████▌   | 28507/43738 [3:38:33<2:40:37,  1.58it/s]

step:9980, train_loss:0.06313122941287233, acc:0.6561195495843126


 65%|██████▌   | 28508/43738 [3:38:33<2:18:15,  1.84it/s]

step:9980, train_loss:0.06312959258654725, acc:0.6561316121790375


 65%|██████▌   | 28509/43738 [3:38:34<2:05:17,  2.03it/s]

step:9980, train_loss:0.06312861468732843, acc:0.6561436739275317


 65%|██████▌   | 28510/43738 [3:38:35<2:46:07,  1.53it/s]

step:9980, train_loss:0.06313138924715611, acc:0.6561206594177481


 65%|██████▌   | 28511/43738 [3:38:36<2:57:14,  1.43it/s]

step:9980, train_loss:0.0631360817076336, acc:0.6560976465223949


 66%|██████▌   | 28816/43738 [3:40:53<1:53:35,  2.19it/s]

step:10000, train_loss:0.06309680332156276, acc:0.6563020544142143


 66%|██████▌   | 28817/43738 [3:40:53<1:43:40,  2.40it/s]

step:10000, train_loss:0.06309698889275873, acc:0.6563139813304647


 66%|██████▌   | 28818/43738 [3:40:54<1:51:37,  2.23it/s]

step:10000, train_loss:0.0630948133949709, acc:0.6563259074189742


 66%|██████▌   | 28819/43738 [3:40:54<1:36:38,  2.57it/s]

step:10000, train_loss:0.0630926299127327, acc:0.6563378326798293


 66%|██████▌   | 28820/43738 [3:40:54<1:49:18,  2.27it/s]

step:10000, train_loss:0.06309049473831255, acc:0.6563497571131159


 66%|██████▌   | 28821/43738 [3:40:55<1:53:08,  2.20it/s]

step:10000, train_loss:0.06309543839646535, acc:0.6563269837965372


 66%|██████▌   | 28822/43738 [3:40:56<2:06:47,  1.96it/s]

step:10000, train_loss:0.06309944648507994, acc:0.6563042120602318


 66%|██████▌   | 28823/43738 [3:40:56<2:13:56,  1.86it/s]

step:10000, train_loss:0.06309740137855285, acc:0.6563161364188321


 66%|██████▌   | 28824/43738 [3:40:57<2:02:59,  2.02it/s]

step:10000, train_loss:0.06309587147873809, acc:0.6563280599500416


 66%|██████▌   | 28825/43738 [3:40:57<1:53:23,  2.19it/s]

step:10000, train_loss:0.06309928848727017, acc:0.6563052905464007


 66%|██████▌   | 28826/43738 [3:40:57<1:54:15,  2.18it/s]

step:10000, train_loss:0.06309718404507879, acc:0.6563172136265871


 66%|██████▌   | 28827/43738 [3:40:58<1:48:18,  2.29it/s]

step:10000, train_loss:0.06309504699704258, acc:0.6563291358795573


 66%|██████▌   | 28828/43738 [3:40:58<1:41:46,  2.44it/s]

step:10000, train_loss:0.06309418886170208, acc:0.6563410573053975


 66%|██████▌   | 28829/43738 [3:40:59<1:40:58,  2.46it/s]

step:10000, train_loss:0.06309947365662891, acc:0.6563182906101495


 66%|██████▌   | 28830/43738 [3:40:59<1:30:02,  2.76it/s]

step:10000, train_loss:0.06309905862130225, acc:0.6562955254942768


 66%|██████▌   | 28831/43738 [3:40:59<1:22:01,  3.03it/s]

step:10000, train_loss:0.06309688210586721, acc:0.6563074468454094


 67%|██████▋   | 29136/43738 [3:43:19<1:51:52,  2.18it/s]

step:10020, train_loss:0.06311672669855965, acc:0.6562671609006041


 67%|██████▋   | 29137/43738 [3:43:19<2:15:50,  1.79it/s]

step:10020, train_loss:0.06311456946137203, acc:0.6562789580258778


 67%|██████▋   | 29138/43738 [3:43:20<2:08:35,  1.89it/s]

step:10020, train_loss:0.06311246082480873, acc:0.6562907543414098


 67%|██████▋   | 29139/43738 [3:43:20<2:08:19,  1.90it/s]

step:10020, train_loss:0.06311813056882791, acc:0.6562682315796698


 67%|██████▋   | 29140/43738 [3:43:21<2:02:07,  1.99it/s]

step:10020, train_loss:0.06311740615402066, acc:0.656280027453672


 67%|██████▋   | 29141/43738 [3:43:21<2:12:35,  1.83it/s]

step:10020, train_loss:0.06311945546410751, acc:0.6562575066058132


 67%|██████▋   | 29142/43738 [3:43:22<2:05:11,  1.94it/s]

step:10020, train_loss:0.06311785519129845, acc:0.6562693020382953


 67%|██████▋   | 29143/43738 [3:43:22<2:01:20,  2.00it/s]

step:10020, train_loss:0.06311578204593331, acc:0.6562810966612909


 67%|██████▋   | 29144/43738 [3:43:23<2:09:02,  1.88it/s]

step:10020, train_loss:0.06311431859614666, acc:0.6562928904748834


 67%|██████▋   | 29145/43738 [3:43:23<1:49:03,  2.23it/s]

step:10020, train_loss:0.06311215306797387, acc:0.6563046834791559


 67%|██████▋   | 29146/43738 [3:43:24<1:59:00,  2.04it/s]

step:10020, train_loss:0.06311067724518113, acc:0.656316475674192


 67%|██████▋   | 29147/43738 [3:43:24<2:08:27,  1.89it/s]

step:10020, train_loss:0.06311277882482584, acc:0.6562939582118228


 67%|██████▋   | 29148/43738 [3:43:25<1:59:47,  2.03it/s]

step:10020, train_loss:0.06311111461214869, acc:0.6563057499656924


 67%|██████▋   | 29149/43738 [3:43:26<2:20:04,  1.74it/s]

step:10020, train_loss:0.06311692258461372, acc:0.656283234416275


 67%|██████▋   | 29150/43738 [3:43:26<2:05:14,  1.94it/s]

step:10020, train_loss:0.06311910395769084, acc:0.6562607204116638


 67%|██████▋   | 29151/43738 [3:43:27<2:26:14,  1.66it/s]

step:10020, train_loss:0.06311702241132806, acc:0.6562725120922095


 67%|██████▋   | 29456/43738 [3:45:46<2:06:44,  1.88it/s]

step:10040, train_loss:0.06319985824022212, acc:0.6559953829440521


 67%|██████▋   | 29457/43738 [3:45:46<1:47:57,  2.20it/s]

step:10040, train_loss:0.06319833910652756, acc:0.6560070611399668


 67%|██████▋   | 29458/43738 [3:45:46<1:47:19,  2.22it/s]

step:10040, train_loss:0.06319998191921107, acc:0.655984791907122


 67%|██████▋   | 29459/43738 [3:45:47<2:00:36,  1.97it/s]

step:10040, train_loss:0.06320441713130417, acc:0.6559625241861571


 67%|██████▋   | 29460/43738 [3:45:47<1:40:31,  2.37it/s]

step:10040, train_loss:0.06320243486481568, acc:0.6559742023082146


 67%|██████▋   | 29461/43738 [3:45:47<1:30:14,  2.64it/s]

step:10040, train_loss:0.06320031148849097, acc:0.6559858796374869


 67%|██████▋   | 29462/43738 [3:45:48<1:20:03,  2.97it/s]

step:10040, train_loss:0.06319817662321761, acc:0.6559975561740548


 67%|██████▋   | 29463/43738 [3:45:48<1:13:34,  3.23it/s]

step:10040, train_loss:0.06319603788612904, acc:0.6560092319179989


 67%|██████▋   | 29464/43738 [3:45:49<1:40:33,  2.37it/s]

step:10040, train_loss:0.0631941160964019, acc:0.6560209068694


 67%|██████▋   | 29465/43738 [3:45:49<1:33:03,  2.56it/s]

step:10040, train_loss:0.06319287296021835, acc:0.6560325810283387


 67%|██████▋   | 29466/43738 [3:45:50<1:56:32,  2.04it/s]

step:10040, train_loss:0.06319261197615168, acc:0.6560442543948958


 67%|██████▋   | 29467/43738 [3:45:50<1:51:28,  2.13it/s]

step:10040, train_loss:0.06319179566569041, acc:0.6560559269691519


 67%|██████▋   | 29468/43738 [3:45:51<1:51:45,  2.13it/s]

step:10040, train_loss:0.06319036992694566, acc:0.6560675987511877


 67%|██████▋   | 29469/43738 [3:45:51<1:44:09,  2.28it/s]

step:10040, train_loss:0.06319080207379688, acc:0.6560453357765788


 67%|██████▋   | 29470/43738 [3:45:51<1:45:33,  2.25it/s]

step:10040, train_loss:0.06318891082825666, acc:0.6560570071258908


 67%|██████▋   | 29471/43738 [3:45:52<1:37:09,  2.45it/s]

step:10040, train_loss:0.06318678765572583, acc:0.6560686776831461


 68%|██████▊   | 29776/43738 [3:48:16<2:17:33,  1.69it/s]

step:10060, train_loss:0.06322114916959087, acc:0.6558301988178399


 68%|██████▊   | 29777/43738 [3:48:16<2:03:20,  1.89it/s]

step:10060, train_loss:0.06321902631509663, acc:0.6558417570608187


 68%|██████▊   | 29778/43738 [3:48:17<2:20:34,  1.66it/s]

step:10060, train_loss:0.06322102521442655, acc:0.655819732688562


 68%|██████▊   | 29779/43738 [3:48:17<2:05:58,  1.85it/s]

step:10060, train_loss:0.06321934457937134, acc:0.655831290506733


 68%|██████▊   | 29780/43738 [3:48:18<2:12:57,  1.75it/s]

step:10060, train_loss:0.06321728543184009, acc:0.6558428475486904


 68%|██████▊   | 29781/43738 [3:48:18<2:17:00,  1.70it/s]

step:10060, train_loss:0.06321589312859673, acc:0.6558544038145127


 68%|██████▊   | 29782/43738 [3:48:19<1:53:57,  2.04it/s]

step:10060, train_loss:0.06321379170773755, acc:0.6558659593042777


 68%|██████▊   | 29783/43738 [3:48:19<2:13:55,  1.74it/s]

step:10060, train_loss:0.06321733076991938, acc:0.6558439378168754


 68%|██████▊   | 29784/43738 [3:48:20<1:54:08,  2.04it/s]

step:10060, train_loss:0.06321711567505309, acc:0.6558554928820843


 68%|██████▊   | 29785/43738 [3:48:20<1:46:46,  2.18it/s]

step:10060, train_loss:0.06321564993219134, acc:0.655867047171395


 68%|██████▊   | 29786/43738 [3:48:20<1:31:39,  2.54it/s]

step:10060, train_loss:0.06321353381198976, acc:0.6558786006848856


 68%|██████▊   | 29787/43738 [3:48:21<1:38:51,  2.35it/s]

step:10060, train_loss:0.06321585761151237, acc:0.655856581730285


 68%|██████▊   | 29788/43738 [3:48:21<1:41:48,  2.28it/s]

step:10060, train_loss:0.06321499171718568, acc:0.6558681348193903


 68%|██████▊   | 29789/43738 [3:48:22<1:37:37,  2.38it/s]

step:10060, train_loss:0.06321290943971432, acc:0.6558796871328343


 68%|██████▊   | 29790/43738 [3:48:22<1:28:47,  2.62it/s]

step:10060, train_loss:0.06321429635239079, acc:0.655857670359181


 68%|██████▊   | 29791/43738 [3:48:22<1:23:11,  2.79it/s]

step:10060, train_loss:0.06321264260679751, acc:0.65586922224833


 69%|██████▉   | 30096/43738 [3:50:42<2:07:52,  1.78it/s]

step:10080, train_loss:0.06323995965651966, acc:0.6556353003721425


 69%|██████▉   | 30097/43738 [3:50:43<2:25:36,  1.56it/s]

step:10080, train_loss:0.06324245081367962, acc:0.6556135162973054


 69%|██████▉   | 30098/43738 [3:50:43<2:20:40,  1.62it/s]

step:10080, train_loss:0.06324567348905334, acc:0.6555917336700113


 69%|██████▉   | 30099/43738 [3:50:44<2:08:01,  1.78it/s]

step:10080, train_loss:0.06324357464953298, acc:0.6556031761852553


 69%|██████▉   | 30100/43738 [3:50:44<1:49:35,  2.07it/s]

step:10080, train_loss:0.06324420431693606, acc:0.6556146179401994


 69%|██████▉   | 30101/43738 [3:50:45<2:05:29,  1.81it/s]

step:10080, train_loss:0.06324315510969344, acc:0.6556260589349191


 69%|██████▉   | 30102/43738 [3:50:45<1:55:23,  1.97it/s]

step:10080, train_loss:0.06324615065777597, acc:0.6556042787854628


 69%|██████▉   | 30103/43738 [3:50:46<1:51:51,  2.03it/s]

step:10080, train_loss:0.06324477259745666, acc:0.6556157193635186


 69%|██████▉   | 30104/43738 [3:50:47<2:16:34,  1.66it/s]

step:10080, train_loss:0.06324267805331157, acc:0.6556271591815042


 69%|██████▉   | 30105/43738 [3:50:47<2:12:13,  1.72it/s]

step:10080, train_loss:0.06324115149355493, acc:0.6556385982394951


 69%|██████▉   | 30106/43738 [3:50:48<2:14:46,  1.69it/s]

step:10080, train_loss:0.06324121258117123, acc:0.6556168205673287


 69%|██████▉   | 30107/43738 [3:50:48<1:52:54,  2.01it/s]

step:10080, train_loss:0.06323952169857743, acc:0.6556282592088218


 69%|██████▉   | 30108/43738 [3:50:49<2:05:42,  1.81it/s]

step:10080, train_loss:0.06324716301955054, acc:0.6556064833266906


 69%|██████▉   | 30109/43738 [3:50:49<2:05:10,  1.81it/s]

step:10080, train_loss:0.06325055375096074, acc:0.6555847088910293


 69%|██████▉   | 30110/43738 [3:50:50<1:52:37,  2.02it/s]

step:10080, train_loss:0.06325108056475795, acc:0.6555629359016938


 69%|██████▉   | 30111/43738 [3:50:50<1:54:34,  1.98it/s]

step:10080, train_loss:0.0632521444626022, acc:0.65554116435854


 70%|██████▉   | 30416/43738 [3:53:11<1:40:27,  2.21it/s]

step:10100, train_loss:0.06321234210321686, acc:0.6556417674907943


 70%|██████▉   | 30417/43738 [3:53:12<1:25:46,  2.59it/s]

step:10100, train_loss:0.06321026429706271, acc:0.6556530887332741


 70%|██████▉   | 30418/43738 [3:53:12<1:25:33,  2.59it/s]

step:10100, train_loss:0.06320839461232722, acc:0.6556644092313761


 70%|██████▉   | 30419/43738 [3:53:12<1:25:12,  2.61it/s]

step:10100, train_loss:0.06320806753746867, acc:0.6556428547947006


 70%|██████▉   | 30420/43738 [3:53:13<1:21:27,  2.72it/s]

step:10100, train_loss:0.06320606267508443, acc:0.6556541748849442


 70%|██████▉   | 30421/43738 [3:53:13<1:34:04,  2.36it/s]

step:10100, train_loss:0.0632042090491695, acc:0.6556654942309589


 70%|██████▉   | 30422/43738 [3:53:14<2:01:22,  1.83it/s]

step:10100, train_loss:0.06320610642630432, acc:0.6556439418841628


 70%|██████▉   | 30423/43738 [3:53:15<1:55:03,  1.93it/s]

step:10100, train_loss:0.06320407182489936, acc:0.6556552608224041


 70%|██████▉   | 30424/43738 [3:53:15<1:42:01,  2.17it/s]

step:10100, train_loss:0.06320201891886938, acc:0.6556665790165659


 70%|██████▉   | 30425/43738 [3:53:15<1:35:42,  2.32it/s]

step:10100, train_loss:0.0632000387972546, acc:0.6556778964667215


 70%|██████▉   | 30426/43738 [3:53:16<1:58:49,  1.87it/s]

step:10100, train_loss:0.0631989565134635, acc:0.6556892131729442


 70%|██████▉   | 30427/43738 [3:53:17<2:05:41,  1.76it/s]

step:10100, train_loss:0.06319836749939826, acc:0.6557005291353074


 70%|██████▉   | 30428/43738 [3:53:17<1:50:02,  2.02it/s]

step:10100, train_loss:0.0631990497820561, acc:0.6556789798869462


 70%|██████▉   | 30429/43738 [3:53:17<1:33:19,  2.38it/s]

step:10100, train_loss:0.0631991586657519, acc:0.6556574320549475


 70%|██████▉   | 30430/43738 [3:53:18<1:29:24,  2.48it/s]

step:10100, train_loss:0.06319709072258736, acc:0.6556687479461059


 70%|██████▉   | 30431/43738 [3:53:18<1:54:11,  1.94it/s]

step:10100, train_loss:0.06320222991034843, acc:0.6556472018665177


 70%|███████   | 30736/43738 [3:55:39<1:41:31,  2.13it/s]

step:10120, train_loss:0.06320288592227497, acc:0.6555179593961479


 70%|███████   | 30737/43738 [3:55:40<1:47:42,  2.01it/s]

step:10120, train_loss:0.06320083125208778, acc:0.6555291668022253


 70%|███████   | 30738/43738 [3:55:40<1:30:09,  2.40it/s]

step:10120, train_loss:0.06319877734968181, acc:0.6555403734790812


 70%|███████   | 30739/43738 [3:55:40<1:20:18,  2.70it/s]

step:10120, train_loss:0.06320177590786485, acc:0.6555190474641335


 70%|███████   | 30740/43738 [3:55:40<1:12:20,  2.99it/s]

step:10120, train_loss:0.06319972368766878, acc:0.655530253741054


 70%|███████   | 30741/43738 [3:55:41<1:20:25,  2.69it/s]

step:10120, train_loss:0.0631981640329611, acc:0.6555414592888975


 70%|███████   | 30742/43738 [3:55:41<1:20:57,  2.68it/s]

step:10120, train_loss:0.06319693077441046, acc:0.6555526641077354


 70%|███████   | 30743/43738 [3:55:42<1:24:32,  2.56it/s]

step:10120, train_loss:0.06319699506066896, acc:0.6555313404677487


 70%|███████   | 30744/43738 [3:55:42<1:22:24,  2.63it/s]

step:10120, train_loss:0.06319565160250763, acc:0.6555425448868072


 70%|███████   | 30745/43738 [3:55:42<1:15:37,  2.86it/s]

step:10120, train_loss:0.06319653166456003, acc:0.6555212229630835


 70%|███████   | 30746/43738 [3:55:43<1:36:56,  2.23it/s]

step:10120, train_loss:0.0631954948006807, acc:0.6555324269823717


 70%|███████   | 30747/43738 [3:55:44<1:53:17,  1.91it/s]

step:10120, train_loss:0.06319344655527771, acc:0.6555436302728721


 70%|███████   | 30748/43738 [3:55:44<1:48:12,  2.00it/s]

step:10120, train_loss:0.06319175476685311, acc:0.6555548328346559


 70%|███████   | 30749/43738 [3:55:45<1:51:04,  1.95it/s]

step:10120, train_loss:0.06319395476642342, acc:0.6555335132849849


 70%|███████   | 30750/43738 [3:55:45<1:43:01,  2.10it/s]

step:10120, train_loss:0.06319806871461546, acc:0.6555121951219512


 70%|███████   | 30751/43738 [3:55:46<1:58:43,  1.82it/s]

step:10120, train_loss:0.06319853230272272, acc:0.6554908783454196


 71%|███████   | 31056/43738 [3:58:15<1:48:22,  1.95it/s]

step:10140, train_loss:0.06322160814918276, acc:0.6551069036579082


 71%|███████   | 31057/43738 [3:58:15<1:30:30,  2.34it/s]

step:10140, train_loss:0.06321958432117926, acc:0.6551180088224877


 71%|███████   | 31058/43738 [3:58:15<1:17:44,  2.72it/s]

step:10140, train_loss:0.06321808893198985, acc:0.6551291132719428


 71%|███████   | 31059/43738 [3:58:16<1:17:11,  2.74it/s]

step:10140, train_loss:0.06321611288197893, acc:0.6551402170063427


 71%|███████   | 31060/43738 [3:58:16<1:29:05,  2.37it/s]

step:10140, train_loss:0.06321524534101727, acc:0.6551513200257566


 71%|███████   | 31061/43738 [3:58:17<1:17:16,  2.73it/s]

step:10140, train_loss:0.06321750249523556, acc:0.6551302276166253


 71%|███████   | 31062/43738 [3:58:17<1:33:40,  2.26it/s]

step:10140, train_loss:0.06322228653756155, acc:0.6551091365655786


 71%|███████   | 31063/43738 [3:58:18<1:44:22,  2.02it/s]

step:10140, train_loss:0.06322442194029028, acc:0.6551202395132473


 71%|███████   | 31064/43738 [3:58:18<1:33:20,  2.26it/s]

step:10140, train_loss:0.06322242270557654, acc:0.6551313417460726


 71%|███████   | 31065/43738 [3:58:18<1:24:36,  2.50it/s]

step:10140, train_loss:0.06322077333819939, acc:0.6551424432641236


 71%|███████   | 31066/43738 [3:58:19<1:44:57,  2.01it/s]

step:10140, train_loss:0.06322397312715758, acc:0.655121354535505


 71%|███████   | 31067/43738 [3:58:20<1:39:25,  2.12it/s]

step:10140, train_loss:0.06322201012279845, acc:0.655132455660347


 71%|███████   | 31068/43738 [3:58:20<1:45:37,  2.00it/s]

step:10140, train_loss:0.06322640911111337, acc:0.6551113686107892


 71%|███████   | 31069/43738 [3:58:21<1:45:26,  2.00it/s]

step:10140, train_loss:0.0632273402377699, acc:0.6550902829186649


 71%|███████   | 31070/43738 [3:58:21<1:30:35,  2.33it/s]

step:10140, train_loss:0.06322592532576456, acc:0.6551013839716768


 71%|███████   | 31071/43738 [3:58:22<1:44:23,  2.02it/s]

step:10140, train_loss:0.06322533861201417, acc:0.6550802999581603


 72%|███████▏  | 31376/43738 [4:00:33<1:21:21,  2.53it/s]

step:10160, train_loss:0.0632570868303886, acc:0.6551185619581846


 72%|███████▏  | 31377/43738 [4:00:33<1:32:05,  2.24it/s]

step:10160, train_loss:0.06325967904818794, acc:0.6550976830162221


 72%|███████▏  | 31378/43738 [4:00:34<1:26:32,  2.38it/s]

step:10160, train_loss:0.0632584523407954, acc:0.6551086748677417


 72%|███████▏  | 31379/43738 [4:00:34<1:20:06,  2.57it/s]

step:10160, train_loss:0.06325775057494461, acc:0.6551196660186749


 72%|███████▏  | 31380/43738 [4:00:35<1:46:55,  1.93it/s]

step:10160, train_loss:0.06325617550711518, acc:0.6551306564690886


 72%|███████▏  | 31381/43738 [4:00:35<1:46:07,  1.94it/s]

step:10160, train_loss:0.06325848316231668, acc:0.6551097798030655


 72%|███████▏  | 31382/43738 [4:00:36<1:36:17,  2.14it/s]

step:10160, train_loss:0.06325850125294774, acc:0.6550889044675292


 72%|███████▏  | 31383/43738 [4:00:36<1:54:25,  1.80it/s]

step:10160, train_loss:0.06325655333498247, acc:0.655099894847529


 72%|███████▏  | 31384/43738 [4:00:37<1:39:55,  2.06it/s]

step:10160, train_loss:0.06325595364731515, acc:0.6551108845271476


 72%|███████▏  | 31385/43738 [4:00:37<1:26:24,  2.38it/s]

step:10160, train_loss:0.0632577182433505, acc:0.6550900111518241


 72%|███████▏  | 31386/43738 [4:00:37<1:19:18,  2.60it/s]

step:10160, train_loss:0.0632559415912754, acc:0.6551010004460588


 72%|███████▏  | 31387/43738 [4:00:38<1:21:43,  2.52it/s]

step:10160, train_loss:0.06325471372629662, acc:0.6551119890400484


 72%|███████▏  | 31388/43738 [4:00:38<1:23:42,  2.46it/s]

step:10160, train_loss:0.06325299920785177, acc:0.6551229769338601


 72%|███████▏  | 31389/43738 [4:00:39<1:37:56,  2.10it/s]

step:10160, train_loss:0.06325146490098972, acc:0.6551339641275606


 72%|███████▏  | 31390/43738 [4:00:39<1:29:06,  2.31it/s]

step:10160, train_loss:0.06325055976773718, acc:0.6551449506212169


 72%|███████▏  | 31391/43738 [4:00:40<1:38:15,  2.09it/s]

step:10160, train_loss:0.0632486687309588, acc:0.655155936414896


 72%|███████▏  | 31696/43738 [4:02:59<1:46:41,  1.88it/s]

step:10180, train_loss:0.0632474260691583, acc:0.6551299848561333


 72%|███████▏  | 31697/43738 [4:03:00<2:05:02,  1.60it/s]

step:10180, train_loss:0.0632469674916864, acc:0.6551408650660946


 72%|███████▏  | 31698/43738 [4:03:01<2:14:57,  1.49it/s]

step:10180, train_loss:0.06324847288662487, acc:0.655120196857846


 72%|███████▏  | 31699/43738 [4:03:01<1:47:56,  1.86it/s]

step:10180, train_loss:0.06324733131981845, acc:0.6551310766901164


 72%|███████▏  | 31700/43738 [4:03:01<1:39:03,  2.03it/s]

step:10180, train_loss:0.06325046274076364, acc:0.6551104100946372


 72%|███████▏  | 31701/43738 [4:03:02<1:32:54,  2.16it/s]

step:10180, train_loss:0.06325099128628049, acc:0.6550897448030031


 72%|███████▏  | 31702/43738 [4:03:02<1:25:26,  2.35it/s]

step:10180, train_loss:0.0632491224694446, acc:0.6551006245662734


 72%|███████▏  | 31703/43738 [4:03:03<1:39:23,  2.02it/s]

step:10180, train_loss:0.06324720199880406, acc:0.6551115036431884


 72%|███████▏  | 31704/43738 [4:03:03<1:30:47,  2.21it/s]

step:10180, train_loss:0.06324547661980277, acc:0.6551223820338128


 72%|███████▏  | 31705/43738 [4:03:04<1:45:38,  1.90it/s]

step:10180, train_loss:0.06324676709580485, acc:0.655101718971771


 72%|███████▏  | 31706/43738 [4:03:04<1:46:01,  1.89it/s]

step:10180, train_loss:0.06324477668271947, acc:0.6551125969847978


 72%|███████▏  | 31707/43738 [4:03:04<1:35:14,  2.11it/s]

step:10180, train_loss:0.06324278259129262, acc:0.6551234743116662


 72%|███████▏  | 31708/43738 [4:03:05<1:48:48,  1.84it/s]

step:10180, train_loss:0.06324139268332242, acc:0.655134350952441


 72%|███████▏  | 31709/43738 [4:03:06<1:47:34,  1.86it/s]

step:10180, train_loss:0.06324031689503999, acc:0.6551452269071872


 72%|███████▏  | 31710/43738 [4:03:06<1:46:08,  1.89it/s]

step:10180, train_loss:0.06323987088001227, acc:0.6551245663828446


 73%|███████▎  | 31711/43738 [4:03:07<1:44:01,  1.93it/s]

step:10180, train_loss:0.0632404895308255, acc:0.6551354419602031


 73%|███████▎  | 32016/43738 [4:05:27<1:52:35,  1.74it/s]

step:10200, train_loss:0.06321796373391146, acc:0.6550787106446777


 73%|███████▎  | 32017/43738 [4:05:28<2:05:42,  1.55it/s]

step:10200, train_loss:0.0632207573734111, acc:0.6550582503045257


 73%|███████▎  | 32018/43738 [4:05:28<1:51:58,  1.74it/s]

step:10200, train_loss:0.06322509066756961, acc:0.6550377912424261


 73%|███████▎  | 32019/43738 [4:05:29<1:38:27,  1.98it/s]

step:10200, train_loss:0.06322782230232421, acc:0.6550173334582592


 73%|███████▎  | 32020/43738 [4:05:29<1:32:22,  2.11it/s]

step:10200, train_loss:0.06323293194757461, acc:0.654996876951905


 73%|███████▎  | 32021/43738 [4:05:29<1:26:00,  2.27it/s]

step:10200, train_loss:0.06323327438611956, acc:0.6549764217232441


 73%|███████▎  | 32022/43738 [4:05:30<1:35:03,  2.05it/s]

step:10200, train_loss:0.06323369759770597, acc:0.6549559677721566


 73%|███████▎  | 32023/43738 [4:05:30<1:22:14,  2.37it/s]

step:10200, train_loss:0.06323180799065342, acc:0.6549667426537177


 73%|███████▎  | 32024/43738 [4:05:31<1:28:40,  2.20it/s]

step:10200, train_loss:0.06323087077010442, acc:0.6549775168623533


 73%|███████▎  | 32025/43738 [4:05:31<1:35:30,  2.04it/s]

step:10200, train_loss:0.06322909199631796, acc:0.6549882903981264


 73%|███████▎  | 32026/43738 [4:05:32<1:42:55,  1.90it/s]

step:10200, train_loss:0.06322993639861513, acc:0.6549990632611004


 73%|███████▎  | 32027/43738 [4:05:32<1:39:59,  1.95it/s]

step:10200, train_loss:0.0632289330000062, acc:0.655009835451338


 73%|███████▎  | 32028/43738 [4:05:33<1:32:56,  2.10it/s]

step:10200, train_loss:0.06323293587590503, acc:0.6549893842887473


 73%|███████▎  | 32029/43738 [4:05:33<1:25:03,  2.29it/s]

step:10200, train_loss:0.06323198558016185, acc:0.6550001561085267


 73%|███████▎  | 32030/43738 [4:05:33<1:15:24,  2.59it/s]

step:10200, train_loss:0.06323147683031734, acc:0.6549797065251327


 73%|███████▎  | 32031/43738 [4:05:34<1:38:43,  1.98it/s]

step:10200, train_loss:0.0632310308382143, acc:0.6549904779744622


 74%|███████▍  | 32336/43738 [4:07:46<1:25:41,  2.22it/s]

step:10220, train_loss:0.0632695595406677, acc:0.6548119742701632


 74%|███████▍  | 32337/43738 [4:07:46<1:23:35,  2.27it/s]

step:10220, train_loss:0.06326987286076434, acc:0.654791724649782


 74%|███████▍  | 32338/43738 [4:07:47<1:21:28,  2.33it/s]

step:10220, train_loss:0.06326848738348845, acc:0.6548023996536583


 74%|███████▍  | 32339/43738 [4:07:47<1:18:50,  2.41it/s]

step:10220, train_loss:0.06326717324062282, acc:0.6548130739973407


 74%|███████▍  | 32340/43738 [4:07:47<1:24:29,  2.25it/s]

step:10220, train_loss:0.06326787102314761, acc:0.6548237476808906


 74%|███████▍  | 32341/43738 [4:07:48<1:25:52,  2.21it/s]

step:10220, train_loss:0.06327053978653709, acc:0.6548035002009833


 74%|███████▍  | 32342/43738 [4:07:49<1:33:02,  2.04it/s]

step:10220, train_loss:0.06327039374942132, acc:0.6548141735204996


 74%|███████▍  | 32343/43738 [4:07:49<1:19:12,  2.40it/s]

step:10220, train_loss:0.06326995599066577, acc:0.6547939275886591


 74%|███████▍  | 32344/43738 [4:07:49<1:26:38,  2.19it/s]

step:10220, train_loss:0.06326827490763712, acc:0.6548046005441504


 74%|███████▍  | 32345/43738 [4:07:50<1:38:37,  1.93it/s]

step:10220, train_loss:0.06326905698983908, acc:0.6547843561601484


 74%|███████▍  | 32346/43738 [4:07:51<1:42:26,  1.85it/s]

step:10220, train_loss:0.06326720002554866, acc:0.6547950287516231


 74%|███████▍  | 32347/43738 [4:07:51<1:42:48,  1.85it/s]

step:10220, train_loss:0.06326681658199652, acc:0.6548057006832164


 74%|███████▍  | 32348/43738 [4:07:52<1:58:11,  1.61it/s]

step:10220, train_loss:0.06326958191598395, acc:0.6547854581426982


 74%|███████▍  | 32349/43738 [4:07:52<1:35:00,  2.00it/s]

step:10220, train_loss:0.0632731458957571, acc:0.6547652168536895


 74%|███████▍  | 32350/43738 [4:07:52<1:21:22,  2.33it/s]

step:10220, train_loss:0.06327131441133449, acc:0.6547758887171561


 74%|███████▍  | 32351/43738 [4:07:53<1:33:20,  2.03it/s]

step:10220, train_loss:0.06327183336103512, acc:0.654786559920868


 75%|███████▍  | 32656/43738 [4:10:12<1:33:45,  1.97it/s]

step:10240, train_loss:0.06328428723726374, acc:0.6543667319941205


 75%|███████▍  | 32657/43738 [4:10:12<1:22:14,  2.25it/s]

step:10240, train_loss:0.06329484376766721, acc:0.6543466944299844


 75%|███████▍  | 32658/43738 [4:10:13<1:25:34,  2.16it/s]

step:10240, train_loss:0.06329417301370972, acc:0.6543572784616327


 75%|███████▍  | 32659/43738 [4:10:13<1:35:58,  1.92it/s]

step:10240, train_loss:0.06329526244374649, acc:0.6543372424140359


 75%|███████▍  | 32660/43738 [4:10:14<1:35:01,  1.94it/s]

step:10240, train_loss:0.06329663618405121, acc:0.6543172075933864


 75%|███████▍  | 32661/43738 [4:10:14<1:38:03,  1.88it/s]

step:10240, train_loss:0.06329796545588907, acc:0.6542971739995713


 75%|███████▍  | 32662/43738 [4:10:15<1:38:33,  1.87it/s]

step:10240, train_loss:0.06329932621218438, acc:0.6542771416324781


 75%|███████▍  | 32663/43738 [4:10:16<1:53:12,  1.63it/s]

step:10240, train_loss:0.06329843045870204, acc:0.654287726173346


 75%|███████▍  | 32664/43738 [4:10:16<1:32:48,  1.99it/s]

step:10240, train_loss:0.06329649674619933, acc:0.6542983100661278


 75%|███████▍  | 32665/43738 [4:10:16<1:18:46,  2.34it/s]

step:10240, train_loss:0.0632945601910206, acc:0.6543088933108832


 75%|███████▍  | 32666/43738 [4:10:17<1:22:38,  2.23it/s]

step:10240, train_loss:0.0632952073021905, acc:0.6542888630380211


 75%|███████▍  | 32667/43738 [4:10:17<1:32:30,  1.99it/s]

step:10240, train_loss:0.06329609657611282, acc:0.6542994459240212


 75%|███████▍  | 32668/43738 [4:10:18<1:25:11,  2.17it/s]

step:10240, train_loss:0.06329446486822429, acc:0.6543100281621158


 75%|███████▍  | 32669/43738 [4:10:18<1:19:35,  2.32it/s]

step:10240, train_loss:0.06329256673622821, acc:0.6543206097523646


 75%|███████▍  | 32670/43738 [4:10:18<1:11:51,  2.57it/s]

step:10240, train_loss:0.06329087006262367, acc:0.654331190694827


 75%|███████▍  | 32671/43738 [4:10:19<1:15:33,  2.44it/s]

step:10240, train_loss:0.06328917735009984, acc:0.6543417709895626


 75%|███████▌  | 32976/43738 [4:12:38<1:45:01,  1.71it/s]

step:10260, train_loss:0.06331715409976242, acc:0.6541424065987385


 75%|███████▌  | 32977/43738 [4:12:39<1:34:02,  1.91it/s]

step:10260, train_loss:0.06331523706075073, acc:0.6541528944415805


 75%|███████▌  | 32978/43738 [4:12:39<1:33:16,  1.92it/s]

step:10260, train_loss:0.06331340886176832, acc:0.6541633816483716


 75%|███████▌  | 32979/43738 [4:12:39<1:16:44,  2.34it/s]

step:10260, train_loss:0.06331149046798372, acc:0.6541738682191698


 75%|███████▌  | 32980/43738 [4:12:40<1:07:55,  2.64it/s]

step:10260, train_loss:0.06331579094256051, acc:0.6541540327471195


 75%|███████▌  | 32981/43738 [4:12:40<1:02:43,  2.86it/s]

step:10260, train_loss:0.06331648973734978, acc:0.654164518965465


 75%|███████▌  | 32982/43738 [4:12:40<59:00,  3.04it/s]  

step:10260, train_loss:0.0633174902961089, acc:0.6541446849796859


 75%|███████▌  | 32983/43738 [4:12:41<1:23:51,  2.14it/s]

step:10260, train_loss:0.0633212655844068, acc:0.6541248521965861


 75%|███████▌  | 32984/43738 [4:12:41<1:16:26,  2.34it/s]

step:10260, train_loss:0.06332001799787731, acc:0.6541353383458647


 75%|███████▌  | 32985/43738 [4:12:42<1:08:26,  2.62it/s]

step:10260, train_loss:0.06331811799186621, acc:0.65414582385933


 75%|███████▌  | 32986/43738 [4:12:42<1:17:08,  2.32it/s]

step:10260, train_loss:0.06331744219535373, acc:0.65415630873704


 75%|███████▌  | 32987/43738 [4:12:43<1:22:16,  2.18it/s]

step:10260, train_loss:0.06331887034785029, acc:0.6541364780064874


 75%|███████▌  | 32988/43738 [4:12:43<1:17:54,  2.30it/s]

step:10260, train_loss:0.06332051490227356, acc:0.6541166484782345


 75%|███████▌  | 32989/43738 [4:12:44<1:23:01,  2.16it/s]

step:10260, train_loss:0.06331868332203071, acc:0.6541271332868532


 75%|███████▌  | 32990/43738 [4:12:44<1:17:19,  2.32it/s]

step:10260, train_loss:0.06332207571844235, acc:0.6541073052440134


 75%|███████▌  | 32991/43738 [4:12:44<1:14:33,  2.40it/s]

step:10260, train_loss:0.06332027765376874, acc:0.6541177897002213


 76%|███████▌  | 33296/43738 [4:15:04<1:49:39,  1.59it/s]

step:10280, train_loss:0.06327851581465253, acc:0.6546131667467564


 76%|███████▌  | 33297/43738 [4:15:05<1:37:42,  1.78it/s]

step:10280, train_loss:0.063276615452156, acc:0.6546235396582275


 76%|███████▌  | 33298/43738 [4:15:05<1:34:52,  1.83it/s]

step:10280, train_loss:0.06327603519191698, acc:0.6546339119466634


 76%|███████▌  | 33299/43738 [4:15:06<1:27:20,  1.99it/s]

step:10280, train_loss:0.06327568803339495, acc:0.6546442836121205


 76%|███████▌  | 33300/43738 [4:15:06<1:30:23,  1.92it/s]

step:10280, train_loss:0.06327573285991706, acc:0.6546546546546547


 76%|███████▌  | 33301/43738 [4:15:07<1:32:51,  1.87it/s]

step:10280, train_loss:0.06327383771709381, acc:0.6546650250743221


 76%|███████▌  | 33302/43738 [4:15:07<1:29:08,  1.95it/s]

step:10280, train_loss:0.06327590469842283, acc:0.654645366644646


 76%|███████▌  | 33303/43738 [4:15:08<1:32:39,  1.88it/s]

step:10280, train_loss:0.06327780047437845, acc:0.65462570939555


 76%|███████▌  | 33304/43738 [4:15:08<1:33:03,  1.87it/s]

step:10280, train_loss:0.06327607387467152, acc:0.6546360797501801


 76%|███████▌  | 33305/43738 [4:15:09<1:37:52,  1.78it/s]

step:10280, train_loss:0.0632771020170159, acc:0.6546164239603663


 76%|███████▌  | 33306/43738 [4:15:09<1:29:23,  1.95it/s]

step:10280, train_loss:0.06328054665516251, acc:0.6545967693508677


 76%|███████▌  | 33307/43738 [4:15:10<1:28:48,  1.96it/s]

step:10280, train_loss:0.06328256784371009, acc:0.6545771159215781


 76%|███████▌  | 33308/43738 [4:15:11<1:36:30,  1.80it/s]

step:10280, train_loss:0.06328511881878754, acc:0.654557463672391


 76%|███████▌  | 33309/43738 [4:15:11<1:35:53,  1.81it/s]

step:10280, train_loss:0.06328543942014848, acc:0.6545378126032003


 76%|███████▌  | 33310/43738 [4:15:12<1:29:16,  1.95it/s]

step:10280, train_loss:0.06328406469925339, acc:0.65454818372861


 76%|███████▌  | 33311/43738 [4:15:12<1:37:45,  1.78it/s]

step:10280, train_loss:0.06328451740285072, acc:0.6545285341178589


 77%|███████▋  | 33616/43738 [4:17:33<1:50:08,  1.53it/s]

step:10300, train_loss:0.06331310920572254, acc:0.6543907663017611


 77%|███████▋  | 33617/43738 [4:17:34<1:33:47,  1.80it/s]

step:10300, train_loss:0.06331126827929828, acc:0.6544010470892703


 77%|███████▋  | 33618/43738 [4:17:34<1:34:52,  1.78it/s]

step:10300, train_loss:0.06331059651994651, acc:0.6544113272651556


 77%|███████▋  | 33619/43738 [4:17:34<1:20:07,  2.10it/s]

step:10300, train_loss:0.06330946285237181, acc:0.6544216068294715


 77%|███████▋  | 33620/43738 [4:17:35<1:08:56,  2.45it/s]

step:10300, train_loss:0.06331016567918775, acc:0.6544021415823914


 77%|███████▋  | 33621/43738 [4:17:35<1:03:37,  2.65it/s]

step:10300, train_loss:0.06330831958247879, acc:0.6544124208084233


 77%|███████▋  | 33622/43738 [4:17:35<1:06:34,  2.53it/s]

step:10300, train_loss:0.06330657598267668, acc:0.6544226994229968


 77%|███████▋  | 33623/43738 [4:17:36<1:04:09,  2.63it/s]

step:10300, train_loss:0.06330672943680596, acc:0.6544329774261666


 77%|███████▋  | 33624/43738 [4:17:37<1:23:49,  2.01it/s]

step:10300, train_loss:0.06330999107041149, acc:0.6544135141565548


 77%|███████▋  | 33625/43738 [4:17:37<1:30:06,  1.87it/s]

step:10300, train_loss:0.063309939643311, acc:0.6543940520446097


 77%|███████▋  | 33626/43738 [4:17:37<1:13:50,  2.28it/s]

step:10300, train_loss:0.0633104156202693, acc:0.6543745910902278


 77%|███████▋  | 33627/43738 [4:17:38<1:15:49,  2.22it/s]

step:10300, train_loss:0.06331483258993942, acc:0.654355131293306


 77%|███████▋  | 33628/43738 [4:17:38<1:21:02,  2.08it/s]

step:10300, train_loss:0.06331309525647948, acc:0.6543654097775663


 77%|███████▋  | 33629/43738 [4:17:39<1:30:01,  1.87it/s]

step:10300, train_loss:0.06331353754482466, acc:0.6543459514109846


 77%|███████▋  | 33630/43738 [4:17:39<1:17:07,  2.18it/s]

step:10300, train_loss:0.06331186544125986, acc:0.6543562295569432


 77%|███████▋  | 33631/43738 [4:17:40<1:18:44,  2.14it/s]

step:10300, train_loss:0.06330999477711949, acc:0.6543665070916713


 78%|███████▊  | 33936/43738 [4:19:58<1:24:41,  1.93it/s]

step:10320, train_loss:0.06334010942893993, acc:0.6543493635077794


 78%|███████▊  | 33937/43738 [4:19:59<1:16:25,  2.14it/s]

step:10320, train_loss:0.06333994655403204, acc:0.654330082211156


 78%|███████▊  | 33938/43738 [4:19:59<1:21:29,  2.00it/s]

step:10320, train_loss:0.06333859689035387, acc:0.6543402675467028


 78%|███████▊  | 33939/43738 [4:20:00<1:28:36,  1.84it/s]

step:10320, train_loss:0.06334039208478176, acc:0.654320987654321


 78%|███████▊  | 33940/43738 [4:20:00<1:30:12,  1.81it/s]

step:10320, train_loss:0.06333868475251346, acc:0.6543311726576311


 78%|███████▊  | 33941/43738 [4:20:01<1:19:40,  2.05it/s]

step:10320, train_loss:0.06333705799792168, acc:0.6543413570607819


 78%|███████▊  | 33942/43738 [4:20:01<1:15:45,  2.16it/s]

step:10320, train_loss:0.06333543674876908, acc:0.6543515408638265


 78%|███████▊  | 33943/43738 [4:20:01<1:09:15,  2.36it/s]

step:10320, train_loss:0.06333377422371185, acc:0.6543617240668179


 78%|███████▊  | 33944/43738 [4:20:02<1:12:34,  2.25it/s]

step:10320, train_loss:0.06333195395203164, acc:0.654371906669809


 78%|███████▊  | 33945/43738 [4:20:02<1:04:22,  2.54it/s]

step:10320, train_loss:0.06333045292398458, acc:0.6543820886728532


 78%|███████▊  | 33946/43738 [4:20:03<1:00:52,  2.68it/s]

step:10320, train_loss:0.0633297581518558, acc:0.6543922700760031


 78%|███████▊  | 33947/43738 [4:20:03<1:11:24,  2.29it/s]

step:10320, train_loss:0.0633278930744084, acc:0.6544024508793118


 78%|███████▊  | 33948/43738 [4:20:04<1:28:49,  1.84it/s]

step:10320, train_loss:0.06332727971922526, acc:0.6544126310828325


 78%|███████▊  | 33949/43738 [4:20:04<1:25:04,  1.92it/s]

step:10320, train_loss:0.06332644396032597, acc:0.6544228106866181


 78%|███████▊  | 33950/43738 [4:20:05<1:24:23,  1.93it/s]

step:10320, train_loss:0.0633307845453716, acc:0.6544035346097202


 78%|███████▊  | 33951/43738 [4:20:05<1:18:03,  2.09it/s]

step:10320, train_loss:0.06332911346074495, acc:0.6544137138817708


 78%|███████▊  | 34256/43738 [4:22:24<53:33,  2.95it/s]  

step:10340, train_loss:0.06332564897008487, acc:0.6544546940681925


 78%|███████▊  | 34257/43738 [4:22:25<1:14:35,  2.12it/s]

step:10340, train_loss:0.0633238321470331, acc:0.6544647809206877


 78%|███████▊  | 34258/43738 [4:22:25<1:03:24,  2.49it/s]

step:10340, train_loss:0.06332228026145553, acc:0.6544748671843074


 78%|███████▊  | 34259/43738 [4:22:26<1:11:20,  2.21it/s]

step:10340, train_loss:0.06332329961249343, acc:0.6544557634490207


 78%|███████▊  | 34260/43738 [4:22:27<1:14:30,  2.12it/s]

step:10340, train_loss:0.06332181121148564, acc:0.6544658493870403


 78%|███████▊  | 34261/43738 [4:22:27<1:13:09,  2.16it/s]

step:10340, train_loss:0.06332072852288134, acc:0.6544759347362891


 78%|███████▊  | 34262/43738 [4:22:27<1:12:05,  2.19it/s]

step:10340, train_loss:0.0633258086010089, acc:0.6544568326425778


 78%|███████▊  | 34263/43738 [4:22:28<1:23:52,  1.88it/s]

step:10340, train_loss:0.06332818108346407, acc:0.654437731663894


 78%|███████▊  | 34264/43738 [4:22:28<1:13:21,  2.15it/s]

step:10340, train_loss:0.06332644587618977, acc:0.6544478169507355


 78%|███████▊  | 34265/43738 [4:22:29<1:12:40,  2.17it/s]

step:10340, train_loss:0.06332497923582726, acc:0.6544579016489129


 78%|███████▊  | 34266/43738 [4:22:29<1:06:07,  2.39it/s]

step:10340, train_loss:0.06332314849192229, acc:0.6544679857584778


 78%|███████▊  | 34267/43738 [4:22:29<59:49,  2.64it/s]  

step:10340, train_loss:0.06332141945776716, acc:0.6544780692794817


 78%|███████▊  | 34268/43738 [4:22:30<59:09,  2.67it/s]

step:10340, train_loss:0.06332208333236011, acc:0.6544881522119762


 78%|███████▊  | 34269/43738 [4:22:30<59:28,  2.65it/s]

step:10340, train_loss:0.06332117058761066, acc:0.6544982345560127


 78%|███████▊  | 34270/43738 [4:22:31<1:10:29,  2.24it/s]

step:10340, train_loss:0.06331940886431492, acc:0.6545083163116429


 78%|███████▊  | 34271/43738 [4:22:31<1:18:20,  2.01it/s]

step:10340, train_loss:0.0633208812693109, acc:0.6544892182895159


 79%|███████▉  | 34576/43738 [4:24:51<1:12:38,  2.10it/s]

step:10360, train_loss:0.06326241003598301, acc:0.6550497454881999


 79%|███████▉  | 34577/43738 [4:24:51<1:13:06,  2.09it/s]

step:10360, train_loss:0.06326334034065217, acc:0.6550308008213552


 79%|███████▉  | 34578/43738 [4:24:52<1:10:18,  2.17it/s]

step:10360, train_loss:0.0632620737027766, acc:0.655040777372896


 79%|███████▉  | 34579/43738 [4:24:52<1:00:53,  2.51it/s]

step:10360, train_loss:0.06326088062136839, acc:0.6550507533474074


 79%|███████▉  | 34580/43738 [4:24:52<52:24,  2.91it/s]  

step:10360, train_loss:0.06325905129921704, acc:0.6550607287449393


 79%|███████▉  | 34581/43738 [4:24:53<1:09:39,  2.19it/s]

step:10360, train_loss:0.06326080238923007, acc:0.6550417859518233


 79%|███████▉  | 34582/43738 [4:24:53<1:11:35,  2.13it/s]

step:10360, train_loss:0.06326536515452615, acc:0.6550228442542363


 79%|███████▉  | 34583/43738 [4:24:54<1:06:06,  2.31it/s]

step:10360, train_loss:0.06326362124866736, acc:0.6550328195934418


 79%|███████▉  | 34584/43738 [4:24:54<1:04:32,  2.36it/s]

step:10360, train_loss:0.06326378037287216, acc:0.6550427943557715


 79%|███████▉  | 34585/43738 [4:24:55<1:21:15,  1.88it/s]

step:10360, train_loss:0.06326624276706057, acc:0.6550238542720833


 79%|███████▉  | 34586/43738 [4:24:55<1:15:12,  2.03it/s]

step:10360, train_loss:0.06326455641210026, acc:0.6550338287168218


 79%|███████▉  | 34587/43738 [4:24:56<1:07:23,  2.26it/s]

step:10360, train_loss:0.0632627287739253, acc:0.6550438025847862


 79%|███████▉  | 34588/43738 [4:24:56<1:07:28,  2.26it/s]

step:10360, train_loss:0.0632617445429824, acc:0.6550537758760264


 79%|███████▉  | 34589/43738 [4:24:57<1:05:18,  2.33it/s]

step:10360, train_loss:0.06326511807221467, acc:0.6550348376651537


 79%|███████▉  | 34590/43738 [4:24:57<56:59,  2.67it/s]  

step:10360, train_loss:0.06327123998006652, acc:0.6550159005492917


 79%|███████▉  | 34591/43738 [4:24:58<1:16:04,  2.00it/s]

step:10360, train_loss:0.06326964200745519, acc:0.6550258737821977


 80%|███████▉  | 34897/43738 [4:27:17<1:02:34,  2.35it/s]

step:10380, train_loss:0.06334322245095832, acc:0.6545449335167355
step:10380, train_loss:0.06334153384724664, acc:0.6545548327936499


 80%|███████▉  | 34899/43738 [4:27:18<56:38,  2.60it/s]  

step:10380, train_loss:0.06333986677999584, acc:0.654564731503238
step:10380, train_loss:0.06333807088904901, acc:0.6545746296455486


 80%|███████▉  | 34900/43738 [4:27:19<1:13:24,  2.01it/s]

step:10380, train_loss:0.0633396547501303, acc:0.6545558739255014


 80%|███████▉  | 34901/43738 [4:27:19<1:08:06,  2.16it/s]

step:10380, train_loss:0.06334125074972446, acc:0.6545371192802498


 80%|███████▉  | 34902/43738 [4:27:20<58:44,  2.51it/s]  

step:10380, train_loss:0.06333959374274205, acc:0.6545470173629019


 80%|███████▉  | 34903/43738 [4:27:20<1:00:38,  2.43it/s]

step:10380, train_loss:0.06334210462312023, acc:0.6545282640460706


 80%|███████▉  | 34904/43738 [4:27:20<54:32,  2.70it/s]  

step:10380, train_loss:0.06334039830453811, acc:0.6545381618152647


 80%|███████▉  | 34905/43738 [4:27:21<1:12:09,  2.04it/s]

step:10380, train_loss:0.06334556681822241, acc:0.6545194098266724


 80%|███████▉  | 34906/43738 [4:27:21<1:03:47,  2.31it/s]

step:10380, train_loss:0.06334562500236526, acc:0.6545006589125079


 80%|███████▉  | 34907/43738 [4:27:22<1:00:25,  2.44it/s]

step:10380, train_loss:0.06334430821856679, acc:0.654510556621881


 80%|███████▉  | 34908/43738 [4:27:22<1:10:14,  2.10it/s]

step:10380, train_loss:0.06334295415149815, acc:0.6545204537641801


 80%|███████▉  | 34909/43738 [4:27:23<1:05:52,  2.23it/s]

step:10380, train_loss:0.06334148096808379, acc:0.6545303503394541


 80%|███████▉  | 34910/43738 [4:27:24<1:23:39,  1.76it/s]

step:10380, train_loss:0.06334070499285417, acc:0.6545402463477513


 80%|███████▉  | 34911/43738 [4:27:24<1:32:36,  1.59it/s]

step:10380, train_loss:0.06334828644605164, acc:0.654521497522271


 81%|████████  | 35216/43738 [4:29:43<54:31,  2.60it/s]  

step:10400, train_loss:0.0633715266102751, acc:0.6545320308950477


 81%|████████  | 35217/43738 [4:29:44<1:04:09,  2.21it/s]

step:10400, train_loss:0.06337923013142512, acc:0.6545134452111196


 81%|████████  | 35218/43738 [4:29:44<1:18:19,  1.81it/s]

step:10400, train_loss:0.06337768995770464, acc:0.6545232551536146


 81%|████████  | 35219/43738 [4:29:45<1:13:30,  1.93it/s]

step:10400, train_loss:0.06337786827501427, acc:0.6545330645390273


 81%|████████  | 35220/43738 [4:29:45<1:11:18,  1.99it/s]

step:10400, train_loss:0.06337641764877326, acc:0.6545428733674049


 81%|████████  | 35221/43738 [4:29:46<1:15:12,  1.89it/s]

step:10400, train_loss:0.06337494686029349, acc:0.654552681638795


 81%|████████  | 35222/43738 [4:29:46<1:02:37,  2.27it/s]

step:10400, train_loss:0.06337333655498795, acc:0.6545624893532451


 81%|████████  | 35223/43738 [4:29:46<55:22,  2.56it/s]  

step:10400, train_loss:0.06337289333264441, acc:0.6545722965108026


 81%|████████  | 35224/43738 [4:29:47<56:56,  2.49it/s]

step:10400, train_loss:0.06337174045264703, acc:0.6545821031115149


 81%|████████  | 35225/43738 [4:29:47<58:36,  2.42it/s]

step:10400, train_loss:0.06337084643606056, acc:0.6545919091554294


 81%|████████  | 35226/43738 [4:29:48<58:19,  2.43it/s]

step:10400, train_loss:0.06336905341574095, acc:0.6546017146425935


 81%|████████  | 35227/43738 [4:29:48<1:01:06,  2.32it/s]

step:10400, train_loss:0.06336730174983654, acc:0.6546115195730547


 81%|████████  | 35228/43738 [4:29:48<59:08,  2.40it/s]  

step:10400, train_loss:0.0633655539234745, acc:0.6546213239468605


 81%|████████  | 35229/43738 [4:29:49<57:30,  2.47it/s]

step:10400, train_loss:0.06336505205844772, acc:0.6546027420590991


 81%|████████  | 35230/43738 [4:29:49<58:08,  2.44it/s]

step:10400, train_loss:0.06336801414804227, acc:0.6545841612262276


 81%|████████  | 35231/43738 [4:29:50<1:13:11,  1.94it/s]

step:10400, train_loss:0.06336628331164142, acc:0.6545939655417105


 81%|████████  | 35536/43738 [4:32:10<1:19:52,  1.71it/s]

step:10420, train_loss:0.06338785458886426, acc:0.6547444844664565


 81%|████████  | 35537/43738 [4:32:11<1:24:53,  1.61it/s]

step:10420, train_loss:0.06338649196142565, acc:0.6547541998480457


 81%|████████▏ | 35538/43738 [4:32:12<1:18:11,  1.75it/s]

step:10420, train_loss:0.063385195205895, acc:0.6547639146828746


 81%|████████▏ | 35539/43738 [4:32:12<1:06:11,  2.06it/s]

step:10420, train_loss:0.06338372902813544, acc:0.6547736289709896


 81%|████████▏ | 35540/43738 [4:32:12<1:05:11,  2.10it/s]

step:10420, train_loss:0.06338380962582828, acc:0.6547833427124367


 81%|████████▏ | 35541/43738 [4:32:13<1:09:27,  1.97it/s]

step:10420, train_loss:0.06338287037755574, acc:0.654793055907262


 81%|████████▏ | 35542/43738 [4:32:13<1:09:08,  1.98it/s]

step:10420, train_loss:0.063385493012352, acc:0.6547746328287659


 81%|████████▏ | 35543/43738 [4:32:14<1:19:29,  1.72it/s]

step:10420, train_loss:0.0633863752256455, acc:0.6547562107869341


 81%|████████▏ | 35544/43738 [4:32:15<1:21:36,  1.67it/s]

step:10420, train_loss:0.06339122300022881, acc:0.6547377897816791


 81%|████████▏ | 35545/43738 [4:32:15<1:10:23,  1.94it/s]

step:10420, train_loss:0.06339079789101353, acc:0.6547475031650021


 81%|████████▏ | 35546/43738 [4:32:16<1:21:07,  1.68it/s]

step:10420, train_loss:0.06339330643713763, acc:0.6547290834411749


 81%|████████▏ | 35547/43738 [4:32:16<1:14:35,  1.83it/s]

step:10420, train_loss:0.06339413016559838, acc:0.6547106647537063


 81%|████████▏ | 35548/43738 [4:32:17<1:18:28,  1.74it/s]

step:10420, train_loss:0.06339322034295816, acc:0.6547203780803421


 81%|████████▏ | 35549/43738 [4:32:17<1:05:23,  2.09it/s]

step:10420, train_loss:0.06339365506984206, acc:0.6547019606739992


 81%|████████▏ | 35550/43738 [4:32:17<57:02,  2.39it/s]  

step:10420, train_loss:0.06339402619775342, acc:0.6546835443037975


 81%|████████▏ | 35551/43738 [4:32:18<50:36,  2.70it/s]

step:10420, train_loss:0.06339224578234601, acc:0.6546932575736266


 82%|████████▏ | 35856/43738 [4:34:41<54:54,  2.39it/s]  

step:10440, train_loss:0.06346277114603499, acc:0.6543116912092816


 82%|████████▏ | 35857/43738 [4:34:41<51:10,  2.57it/s]

step:10440, train_loss:0.06346194498850716, acc:0.6543213319574979


 82%|████████▏ | 35858/43738 [4:34:41<48:16,  2.72it/s]

step:10440, train_loss:0.06346131397015217, acc:0.654330972167996


 82%|████████▏ | 35859/43738 [4:34:42<58:03,  2.26it/s]

step:10440, train_loss:0.06346004712391312, acc:0.654340611840821


 82%|████████▏ | 35860/43738 [4:34:42<53:02,  2.48it/s]

step:10440, train_loss:0.06345828771494282, acc:0.6543502509760178


 82%|████████▏ | 35861/43738 [4:34:43<47:10,  2.78it/s]

step:10440, train_loss:0.06345653316882376, acc:0.6543598895736316


 82%|████████▏ | 35862/43738 [4:34:43<43:39,  3.01it/s]

step:10440, train_loss:0.06345483664662686, acc:0.654369527633707


 82%|████████▏ | 35863/43738 [4:34:44<1:01:51,  2.12it/s]

step:10440, train_loss:0.06345649552027581, acc:0.6543512812648133


 82%|████████▏ | 35864/43738 [4:34:44<1:00:00,  2.19it/s]

step:10440, train_loss:0.063456076292024, acc:0.6543330359134508


 82%|████████▏ | 35865/43738 [4:34:45<1:12:57,  1.80it/s]

step:10440, train_loss:0.0634551458815534, acc:0.6543426739160741


 82%|████████▏ | 35866/43738 [4:34:45<1:05:11,  2.01it/s]

step:10440, train_loss:0.06345589790045915, acc:0.6543523113812525


 82%|████████▏ | 35867/43738 [4:34:46<55:21,  2.37it/s]  

step:10440, train_loss:0.06345413037885965, acc:0.6543619483090306


 82%|████████▏ | 35868/43738 [4:34:46<51:17,  2.56it/s]

step:10440, train_loss:0.06345242645857713, acc:0.6543715846994536


 82%|████████▏ | 35869/43738 [4:34:46<46:22,  2.83it/s]

step:10440, train_loss:0.06345464306030779, acc:0.6543533413253785


 82%|████████▏ | 35870/43738 [4:34:47<56:16,  2.33it/s]

step:10440, train_loss:0.0634560705824062, acc:0.6543350989684974


 82%|████████▏ | 35871/43738 [4:34:48<1:11:46,  1.83it/s]

step:10440, train_loss:0.06345886169489015, acc:0.6543168576287252


 83%|████████▎ | 36176/43738 [4:37:10<58:11,  2.17it/s]  

step:10460, train_loss:0.0634650546827903, acc:0.6539418398938522


 83%|████████▎ | 36177/43738 [4:37:10<49:30,  2.55it/s]

step:10460, train_loss:0.06346364943136303, acc:0.6539514055891865


 83%|████████▎ | 36178/43738 [4:37:10<46:09,  2.73it/s]

step:10460, train_loss:0.06346332433149664, acc:0.6539609707557079


 83%|████████▎ | 36179/43738 [4:37:11<49:41,  2.54it/s]

step:10460, train_loss:0.06346273973456772, acc:0.6539705353934603


 83%|████████▎ | 36180/43738 [4:37:11<50:01,  2.52it/s]

step:10460, train_loss:0.063475462601197, acc:0.6539524599226092


 83%|████████▎ | 36181/43738 [4:37:11<51:33,  2.44it/s]

step:10460, train_loss:0.06347667253826, acc:0.6539343854509273


 83%|████████▎ | 36182/43738 [4:37:12<46:31,  2.71it/s]

step:10460, train_loss:0.06347491826963891, acc:0.6539439500304018


 83%|████████▎ | 36183/43738 [4:37:12<41:54,  3.00it/s]

step:10460, train_loss:0.06347316978614831, acc:0.6539535140811984


 83%|████████▎ | 36184/43738 [4:37:12<50:42,  2.48it/s]

step:10460, train_loss:0.06347141677295733, acc:0.6539630776033606


 83%|████████▎ | 36185/43738 [4:37:13<52:43,  2.39it/s]

step:10460, train_loss:0.06347354206000831, acc:0.6539450048362582


 83%|████████▎ | 36186/43738 [4:37:14<1:06:33,  1.89it/s]

step:10460, train_loss:0.06347354127212303, acc:0.6539545680649975


 83%|████████▎ | 36187/43738 [4:37:14<55:22,  2.27it/s]  

step:10460, train_loss:0.06347181471346468, acc:0.6539641307651919


 83%|████████▎ | 36188/43738 [4:37:14<53:23,  2.36it/s]

step:10460, train_loss:0.06347016025885, acc:0.6539736929368851


 83%|████████▎ | 36189/43738 [4:37:15<54:46,  2.30it/s]

step:10460, train_loss:0.06346916162684915, acc:0.6539832545801211


 83%|████████▎ | 36190/43738 [4:37:15<50:27,  2.49it/s]

step:10460, train_loss:0.06347000533877499, acc:0.6539651837524177


 83%|████████▎ | 36191/43738 [4:37:16<1:00:20,  2.08it/s]

step:10460, train_loss:0.06346955252646844, acc:0.6539471139233511


 83%|████████▎ | 36496/43738 [4:39:42<47:54,  2.52it/s]  

step:10480, train_loss:0.06350805344348416, acc:0.6536880754055239


 83%|████████▎ | 36497/43738 [4:39:43<54:36,  2.21it/s]

step:10480, train_loss:0.06350842450949923, acc:0.6536701646710689


 83%|████████▎ | 36498/43738 [4:39:43<46:44,  2.58it/s]

step:10480, train_loss:0.06350668688042042, acc:0.6536796536796536


 83%|████████▎ | 36499/43738 [4:39:43<45:46,  2.64it/s]

step:10480, train_loss:0.06350780420352235, acc:0.6536617441573742


 83%|████████▎ | 36500/43738 [4:39:43<40:46,  2.96it/s]

step:10480, train_loss:0.06350607143501809, acc:0.6536712328767124


 83%|████████▎ | 36501/43738 [4:39:44<41:30,  2.91it/s]

step:10480, train_loss:0.06350580905304355, acc:0.653680721076135


 83%|████████▎ | 36502/43738 [4:39:44<44:04,  2.74it/s]

step:10480, train_loss:0.06350777985767123, acc:0.6536628129965482


 83%|████████▎ | 36503/43738 [4:39:45<44:27,  2.71it/s]

step:10480, train_loss:0.06350861186442841, acc:0.6536723009067748


 83%|████████▎ | 36504/43738 [4:39:45<48:29,  2.49it/s]

step:10480, train_loss:0.06351026428221393, acc:0.6536543940390094


 83%|████████▎ | 36505/43738 [4:39:46<54:23,  2.22it/s]

step:10480, train_loss:0.06350852490394052, acc:0.6536638816600465


 83%|████████▎ | 36506/43738 [4:39:46<52:59,  2.27it/s]

step:10480, train_loss:0.06350680946992016, acc:0.6536733687612996


 83%|████████▎ | 36507/43738 [4:39:46<46:33,  2.59it/s]

step:10480, train_loss:0.06350507728867205, acc:0.653682855342811


 83%|████████▎ | 36508/43738 [4:39:47<46:57,  2.57it/s]

step:10480, train_loss:0.06350952884380592, acc:0.6536649501479128


 83%|████████▎ | 36509/43738 [4:39:47<44:27,  2.71it/s]

step:10480, train_loss:0.06350788888861755, acc:0.6536744364403297


 83%|████████▎ | 36510/43738 [4:39:48<47:12,  2.55it/s]

step:10480, train_loss:0.06350701525517698, acc:0.6536839222130923


 83%|████████▎ | 36511/43738 [4:39:48<50:18,  2.39it/s]

step:10480, train_loss:0.0635078501241894, acc:0.6536660184601901


 84%|████████▍ | 36816/43738 [4:42:03<51:57,  2.22it/s]  

step:10500, train_loss:0.06353447931826449, acc:0.6534387222946545


 84%|████████▍ | 36817/43738 [4:42:04<53:01,  2.18it/s]

step:10500, train_loss:0.06353280545985039, acc:0.6534481353722466


 84%|████████▍ | 36818/43738 [4:42:04<48:24,  2.38it/s]

step:10500, train_loss:0.06353164737319476, acc:0.6534575479385083


 84%|████████▍ | 36819/43738 [4:42:04<46:20,  2.49it/s]

step:10500, train_loss:0.06353102432830955, acc:0.6534669599934816


 84%|████████▍ | 36820/43738 [4:42:05<45:35,  2.53it/s]

step:10500, train_loss:0.06352935499481907, acc:0.6534763715372081


 84%|████████▍ | 36821/43738 [4:42:05<48:36,  2.37it/s]

step:10500, train_loss:0.06353431051340974, acc:0.6534586241546944


 84%|████████▍ | 36822/43738 [4:42:05<43:27,  2.65it/s]

step:10500, train_loss:0.06353360903244329, acc:0.6534680354136114


 84%|████████▍ | 36823/43738 [4:42:06<41:57,  2.75it/s]

step:10500, train_loss:0.06354056683013497, acc:0.6534502892214106


 84%|████████▍ | 36824/43738 [4:42:06<37:27,  3.08it/s]

step:10500, train_loss:0.06353891853834212, acc:0.6534597001955247


 84%|████████▍ | 36825/43738 [4:42:06<38:37,  2.98it/s]

step:10500, train_loss:0.06353801831998689, acc:0.65346911065852


 84%|████████▍ | 36826/43738 [4:42:07<38:14,  3.01it/s]

step:10500, train_loss:0.06353829042531015, acc:0.6534513658828002


 84%|████████▍ | 36827/43738 [4:42:07<35:57,  3.20it/s]

step:10500, train_loss:0.06353659050414656, acc:0.6534607760610421


 84%|████████▍ | 36828/43738 [4:42:07<42:46,  2.69it/s]

step:10500, train_loss:0.06353708466857344, acc:0.6534701857282502


 84%|████████▍ | 36829/43738 [4:42:08<47:40,  2.42it/s]

step:10500, train_loss:0.0635377892651904, acc:0.6534524423687854


 84%|████████▍ | 36830/43738 [4:42:09<54:41,  2.11it/s]

step:10500, train_loss:0.0635397377589683, acc:0.6534346999728482


 84%|████████▍ | 36831/43738 [4:42:09<57:25,  2.00it/s]

step:10500, train_loss:0.0635422006158912, acc:0.65341695854036


 85%|████████▍ | 37136/43738 [4:44:28<51:59,  2.12it/s]  

step:10520, train_loss:0.06355716730505437, acc:0.6531128823782852


 85%|████████▍ | 37137/43738 [4:44:28<52:24,  2.10it/s]

step:10520, train_loss:0.06355709916914797, acc:0.6531222231197996


 85%|████████▍ | 37138/43738 [4:44:29<57:44,  1.90it/s]

step:10520, train_loss:0.06355613139104002, acc:0.6531315633582853


 85%|████████▍ | 37139/43738 [4:44:30<1:00:14,  1.83it/s]

step:10520, train_loss:0.06355616469014287, acc:0.6531409030937828


 85%|████████▍ | 37140/43738 [4:44:30<53:02,  2.07it/s]  

step:10520, train_loss:0.0635544773486844, acc:0.6531502423263328


 85%|████████▍ | 37141/43738 [4:44:31<1:03:26,  1.73it/s]

step:10520, train_loss:0.0635537235769391, acc:0.6531595810559758


 85%|████████▍ | 37142/43738 [4:44:31<1:05:32,  1.68it/s]

step:10520, train_loss:0.06355208273121639, acc:0.6531689192827527


 85%|████████▍ | 37143/43738 [4:44:32<56:48,  1.93it/s]  

step:10520, train_loss:0.06355193388646595, acc:0.6531513340333306


 85%|████████▍ | 37144/43738 [4:44:32<52:45,  2.08it/s]

step:10520, train_loss:0.06355137321258908, acc:0.6531606719793237


 85%|████████▍ | 37145/43738 [4:44:33<1:01:05,  1.80it/s]

step:10520, train_loss:0.0635524741982077, acc:0.6531430878987751


 85%|████████▍ | 37147/43738 [4:44:33<43:52,  2.50it/s]  

step:10520, train_loss:0.06355577400577833, acc:0.6531255047649814
step:10520, train_loss:0.06355516573105022, acc:0.6531348426521657


 85%|████████▍ | 37148/43738 [4:44:34<53:14,  2.06it/s]

step:10520, train_loss:0.06355454528630823, acc:0.6531441800366103


 85%|████████▍ | 37149/43738 [4:44:34<51:07,  2.15it/s]

step:10520, train_loss:0.06355283570955132, acc:0.6531535169183558


 85%|████████▍ | 37150/43738 [4:44:35<56:13,  1.95it/s]

step:10520, train_loss:0.06355492919289712, acc:0.6531359353970391


 85%|████████▍ | 37151/43738 [4:44:35<48:22,  2.27it/s]

step:10520, train_loss:0.06355835097091685, acc:0.653118354822212


 86%|████████▌ | 37456/43738 [4:46:53<41:21,  2.53it/s]  

step:10540, train_loss:0.0635399270165271, acc:0.6530862879111491


 86%|████████▌ | 37457/43738 [4:46:53<39:09,  2.67it/s]

step:10540, train_loss:0.06353823074016697, acc:0.6530955495634995


 86%|████████▌ | 37458/43738 [4:46:54<45:42,  2.29it/s]

step:10540, train_loss:0.06353668586214671, acc:0.6531048107213412


 86%|████████▌ | 37459/43738 [4:46:54<39:22,  2.66it/s]

step:10540, train_loss:0.0635380559086122, acc:0.65308737553058


 86%|████████▌ | 37460/43738 [4:46:55<48:51,  2.14it/s]

step:10540, train_loss:0.06353658715980613, acc:0.653096636412173


 86%|████████▌ | 37461/43738 [4:46:55<46:10,  2.27it/s]

step:10540, train_loss:0.06353524990680923, acc:0.653105896799338


 86%|████████▌ | 37462/43738 [4:46:56<51:07,  2.05it/s]

step:10540, train_loss:0.06353626766505055, acc:0.6530884629758155


 86%|████████▌ | 37463/43738 [4:46:56<50:42,  2.06it/s]

step:10540, train_loss:0.06353703664609231, acc:0.6530977230867789


 86%|████████▌ | 37464/43738 [4:46:57<47:55,  2.18it/s]

step:10540, train_loss:0.06353536024264676, acc:0.6531069827033953


 86%|████████▌ | 37465/43738 [4:46:57<40:54,  2.56it/s]

step:10540, train_loss:0.06353372941979227, acc:0.6531162418257039


 86%|████████▌ | 37466/43738 [4:46:57<41:36,  2.51it/s]

step:10540, train_loss:0.06353216793177725, acc:0.6531255004537447


 86%|████████▌ | 37467/43738 [4:46:58<52:36,  1.99it/s]

step:10540, train_loss:0.06353694096631377, acc:0.6531080684335548


 86%|████████▌ | 37468/43738 [4:46:59<54:57,  1.90it/s]

step:10540, train_loss:0.06353823833378529, acc:0.6530906373438667


 86%|████████▌ | 37469/43738 [4:46:59<51:04,  2.05it/s]

step:10540, train_loss:0.0635388330952887, acc:0.6530732071846059


 86%|████████▌ | 37470/43738 [4:47:00<47:50,  2.18it/s]

step:10540, train_loss:0.06353729080406419, acc:0.6530824659727782


 86%|████████▌ | 37471/43738 [4:47:00<43:27,  2.40it/s]

step:10540, train_loss:0.06353596709236758, acc:0.6530917242667663


 86%|████████▋ | 37776/43738 [4:49:22<49:57,  1.99it/s]  

step:10560, train_loss:0.0635228331206261, acc:0.6531660313426514


 86%|████████▋ | 37777/43738 [4:49:22<48:00,  2.07it/s]

step:10560, train_loss:0.06352131239941072, acc:0.6531752124308442


 86%|████████▋ | 37778/43738 [4:49:23<41:01,  2.42it/s]

step:10560, train_loss:0.06351964509913852, acc:0.6531843930329821


 86%|████████▋ | 37779/43738 [4:49:23<38:28,  2.58it/s]

step:10560, train_loss:0.06351931547924289, acc:0.653193573149104


 86%|████████▋ | 37780/43738 [4:49:23<40:22,  2.46it/s]

step:10560, train_loss:0.06351882628685912, acc:0.6532027527792483


 86%|████████▋ | 37781/43738 [4:49:24<38:09,  2.60it/s]

step:10560, train_loss:0.06351883228925922, acc:0.6531854635928112


 86%|████████▋ | 37782/43738 [4:49:24<38:47,  2.56it/s]

step:10560, train_loss:0.06351715299020377, acc:0.6531946429516701


 86%|████████▋ | 37783/43738 [4:49:25<46:04,  2.15it/s]

step:10560, train_loss:0.06351765010916564, acc:0.6532038218246301


 86%|████████▋ | 37784/43738 [4:49:25<43:53,  2.26it/s]

step:10560, train_loss:0.06351828062762513, acc:0.6531865339826382


 86%|████████▋ | 37785/43738 [4:49:25<37:42,  2.63it/s]

step:10560, train_loss:0.06351660646997663, acc:0.6531957125843588


 86%|████████▋ | 37786/43738 [4:49:26<35:56,  2.76it/s]

step:10560, train_loss:0.06351495561987283, acc:0.6532048907002593


 86%|████████▋ | 37787/43738 [4:49:26<37:23,  2.65it/s]

step:10560, train_loss:0.06351344528226266, acc:0.6532140683303782


 86%|████████▋ | 37788/43738 [4:49:27<40:21,  2.46it/s]

step:10560, train_loss:0.06351486153784289, acc:0.6531967820472108


 86%|████████▋ | 37789/43738 [4:49:27<41:01,  2.42it/s]

step:10560, train_loss:0.06351319250991568, acc:0.6532059594061764


 86%|████████▋ | 37790/43738 [4:49:27<40:22,  2.46it/s]

step:10560, train_loss:0.0635115128583608, acc:0.6532151362794391


 86%|████████▋ | 37791/43738 [4:49:28<41:56,  2.36it/s]

step:10560, train_loss:0.06351014273482519, acc:0.6532243126670372


 87%|████████▋ | 38096/43738 [4:51:47<49:38,  1.89it/s]  

step:10580, train_loss:0.06350924711907423, acc:0.6533231835363292


 87%|████████▋ | 38097/43738 [4:51:48<45:33,  2.06it/s]

step:10580, train_loss:0.06350803066480132, acc:0.6533322833818936


 87%|████████▋ | 38098/43738 [4:51:48<51:47,  1.82it/s]

step:10580, train_loss:0.06350734108898755, acc:0.6533413827497506


 87%|████████▋ | 38099/43738 [4:51:49<51:53,  1.81it/s]

step:10580, train_loss:0.06350930060888736, acc:0.6533242342318696


 87%|████████▋ | 38100/43738 [4:51:49<42:56,  2.19it/s]

step:10580, train_loss:0.06350811923621681, acc:0.6533333333333333


 87%|████████▋ | 38101/43738 [4:51:49<39:05,  2.40it/s]

step:10580, train_loss:0.0635077533555364, acc:0.6533424319571665


 87%|████████▋ | 38102/43738 [4:51:50<33:27,  2.81it/s]

step:10580, train_loss:0.0635060868521002, acc:0.6533515301034066


 87%|████████▋ | 38103/43738 [4:51:50<42:00,  2.24it/s]

step:10580, train_loss:0.06350758986615533, acc:0.6533343831194394


 87%|████████▋ | 38104/43738 [4:51:51<48:54,  1.92it/s]

step:10580, train_loss:0.06351155371768918, acc:0.6533172370354818


 87%|████████▋ | 38105/43738 [4:51:51<44:06,  2.13it/s]

step:10580, train_loss:0.06351315752013811, acc:0.6533000918514631


 87%|████████▋ | 38106/43738 [4:51:52<39:00,  2.41it/s]

step:10580, train_loss:0.06351191759232915, acc:0.6533091901537815


 87%|████████▋ | 38107/43738 [4:51:52<38:14,  2.45it/s]

step:10580, train_loss:0.06351050436082085, acc:0.6533182879785866


 87%|████████▋ | 38108/43738 [4:51:53<42:57,  2.18it/s]

step:10580, train_loss:0.06351050066845493, acc:0.6533011441167209


 87%|████████▋ | 38109/43738 [4:51:53<45:45,  2.05it/s]

step:10580, train_loss:0.06351366582518256, acc:0.653284001154583


 87%|████████▋ | 38110/43738 [4:51:54<47:37,  1.97it/s]

step:10580, train_loss:0.06351687102782483, acc:0.6532668590921018


 87%|████████▋ | 38111/43738 [4:51:54<46:25,  2.02it/s]

step:10580, train_loss:0.06351732052433376, acc:0.6532497179292068


 88%|████████▊ | 38416/43738 [4:54:15<47:31,  1.87it/s]  

step:10600, train_loss:0.06354842061225328, acc:0.6532694710537276


 88%|████████▊ | 38417/43738 [4:54:15<48:37,  1.82it/s]

step:10600, train_loss:0.06354848036416953, acc:0.6532784964989458


 88%|████████▊ | 38418/43738 [4:54:16<46:08,  1.92it/s]

step:10600, train_loss:0.06355029179881398, acc:0.6532614920089541


 88%|████████▊ | 38419/43738 [4:54:16<49:19,  1.80it/s]

step:10600, train_loss:0.06354922862181431, acc:0.6532705171920143


 88%|████████▊ | 38420/43738 [4:54:17<40:40,  2.18it/s]

step:10600, train_loss:0.06355233300355427, acc:0.6532535137948985


 88%|████████▊ | 38421/43738 [4:54:18<49:47,  1.78it/s]

step:10600, train_loss:0.063551253108257, acc:0.6532625387158064


 88%|████████▊ | 38422/43738 [4:54:18<47:14,  1.88it/s]

step:10600, train_loss:0.06355356908332455, acc:0.6532455364114309


 88%|████████▊ | 38423/43738 [4:54:18<41:38,  2.13it/s]

step:10600, train_loss:0.06355211414457014, acc:0.6532545610701923


 88%|████████▊ | 38424/43738 [4:54:19<39:50,  2.22it/s]

step:10600, train_loss:0.06355046500897595, acc:0.653263585259213


 88%|████████▊ | 38425/43738 [4:54:20<49:09,  1.80it/s]

step:10600, train_loss:0.06354887345346313, acc:0.6532726089785296


 88%|████████▊ | 38426/43738 [4:54:20<40:56,  2.16it/s]

step:10600, train_loss:0.06354960518373365, acc:0.6532556081819602


 88%|████████▊ | 38427/43738 [4:54:20<39:16,  2.25it/s]

step:10600, train_loss:0.06355430834773365, acc:0.6532386082702266


 88%|████████▊ | 38428/43738 [4:54:21<39:13,  2.26it/s]

step:10600, train_loss:0.06355268202394354, acc:0.6532476319350473


 88%|████████▊ | 38429/43738 [4:54:21<38:35,  2.29it/s]

step:10600, train_loss:0.0635521064312406, acc:0.6532566551302402


 88%|████████▊ | 38430/43738 [4:54:21<33:54,  2.61it/s]

step:10600, train_loss:0.06355264224657153, acc:0.653239656518345


 88%|████████▊ | 38431/43738 [4:54:22<33:15,  2.66it/s]

step:10600, train_loss:0.06355141788490326, acc:0.6532486794514845


 89%|████████▊ | 38736/43738 [4:56:42<48:07,  1.73it/s]

step:10620, train_loss:0.06357143269075431, acc:0.6530617513424205


 89%|████████▊ | 38737/43738 [4:56:43<42:53,  1.94it/s]

step:10620, train_loss:0.06357001705827178, acc:0.6530707075922245


 89%|████████▊ | 38738/43738 [4:56:43<43:32,  1.91it/s]

step:10620, train_loss:0.06356857538857116, acc:0.6530796633796272


 89%|████████▊ | 38739/43738 [4:56:44<40:18,  2.07it/s]

step:10620, train_loss:0.06356922379203477, acc:0.6530628049252691


 89%|████████▊ | 38740/43738 [4:56:44<43:08,  1.93it/s]

step:10620, train_loss:0.06356808106860411, acc:0.6530717604543108


 89%|████████▊ | 38741/43738 [4:56:45<45:02,  1.85it/s]

step:10620, train_loss:0.06356646652842064, acc:0.6530807155210242


 89%|████████▊ | 38742/43738 [4:56:45<40:00,  2.08it/s]

step:10620, train_loss:0.06356867028971297, acc:0.6530638583449486


 89%|████████▊ | 38743/43738 [4:56:46<35:10,  2.37it/s]

step:10620, train_loss:0.0635691732708267, acc:0.6530728131533439


 89%|████████▊ | 38744/43738 [4:56:46<43:14,  1.92it/s]

step:10620, train_loss:0.0635684702059332, acc:0.6530817674994838


 89%|████████▊ | 38745/43738 [4:56:47<38:24,  2.17it/s]

step:10620, train_loss:0.06356685415862882, acc:0.6530907213834043


 89%|████████▊ | 38746/43738 [4:56:47<38:53,  2.14it/s]

step:10620, train_loss:0.0635660259775913, acc:0.6530996748051412


 89%|████████▊ | 38747/43738 [4:56:48<38:51,  2.14it/s]

step:10620, train_loss:0.06356505375833721, acc:0.6531086277647302


 89%|████████▊ | 38748/43738 [4:56:48<44:25,  1.87it/s]

step:10620, train_loss:0.06356366144174147, acc:0.6531175802622071


 89%|████████▊ | 38749/43738 [4:56:49<40:36,  2.05it/s]

step:10620, train_loss:0.06356202385565253, acc:0.6531265322976076


 89%|████████▊ | 38750/43738 [4:56:50<48:23,  1.72it/s]

step:10620, train_loss:0.06356099960820058, acc:0.6531354838709678


 89%|████████▊ | 38751/43738 [4:56:50<44:17,  1.88it/s]

step:10620, train_loss:0.06356085808332394, acc:0.6531186291966659


 89%|████████▉ | 39056/43738 [4:59:08<39:39,  1.97it/s]

step:10640, train_loss:0.06357723956466496, acc:0.6529086439983613


 89%|████████▉ | 39057/43738 [4:59:08<37:41,  2.07it/s]

step:10640, train_loss:0.06357596247776058, acc:0.652917530788335


 89%|████████▉ | 39058/43738 [4:59:09<32:53,  2.37it/s]

step:10640, train_loss:0.06357437079483375, acc:0.6529264171232526


 89%|████████▉ | 39059/43738 [4:59:09<33:42,  2.31it/s]

step:10640, train_loss:0.06357370979360534, acc:0.6529353030031491


 89%|████████▉ | 39060/43738 [4:59:10<42:03,  1.85it/s]

step:10640, train_loss:0.06357489200868494, acc:0.6529185867895545


 89%|████████▉ | 39061/43738 [4:59:11<45:58,  1.70it/s]

step:10640, train_loss:0.06357358882266327, acc:0.6529274724149408


 89%|████████▉ | 39062/43738 [4:59:11<41:10,  1.89it/s]

step:10640, train_loss:0.06357281822341884, acc:0.652936357585377


 89%|████████▉ | 39063/43738 [4:59:11<38:40,  2.01it/s]

step:10640, train_loss:0.06357854024505144, acc:0.6529196426285744


 89%|████████▉ | 39064/43738 [4:59:12<45:49,  1.70it/s]

step:10640, train_loss:0.06357762568109133, acc:0.6529285275445423


 89%|████████▉ | 39065/43738 [4:59:12<38:44,  2.01it/s]

step:10640, train_loss:0.0635760000430838, acc:0.6529374120056316


 89%|████████▉ | 39066/43738 [4:59:13<46:09,  1.69it/s]

step:10640, train_loss:0.06357566385374662, acc:0.6529462960118774


 89%|████████▉ | 39067/43738 [4:59:14<42:32,  1.83it/s]

step:10640, train_loss:0.06357417634063726, acc:0.6529551795633143


 89%|████████▉ | 39068/43738 [4:59:14<47:24,  1.64it/s]

step:10640, train_loss:0.06357310228732221, acc:0.6529640626599775


 89%|████████▉ | 39069/43738 [4:59:15<43:27,  1.79it/s]

step:10640, train_loss:0.06357535239139174, acc:0.6529473495610331


 89%|████████▉ | 39070/43738 [4:59:15<40:27,  1.92it/s]

step:10640, train_loss:0.06357375521396572, acc:0.6529562324033785


 89%|████████▉ | 39071/43738 [4:59:16<37:38,  2.07it/s]

step:10640, train_loss:0.06357262622074586, acc:0.6529651147910215


 90%|█████████ | 39376/43738 [5:01:29<39:34,  1.84it/s]

step:10660, train_loss:0.0635433108528086, acc:0.6530373831775701


 90%|█████████ | 39377/43738 [5:01:30<36:31,  1.99it/s]

step:10660, train_loss:0.0635417101102573, acc:0.6530461944790106


 90%|█████████ | 39378/43738 [5:01:30<33:08,  2.19it/s]

step:10660, train_loss:0.06354030398109732, acc:0.653055005332927


 90%|█████████ | 39379/43738 [5:01:30<31:56,  2.27it/s]

step:10660, train_loss:0.06353997607982552, acc:0.6530638157393535


 90%|█████████ | 39380/43738 [5:01:31<28:37,  2.54it/s]

step:10660, train_loss:0.06353837056650546, acc:0.653072625698324


 90%|█████████ | 39381/43738 [5:01:31<25:07,  2.89it/s]

step:10660, train_loss:0.06353708431266906, acc:0.6530814352098728


 90%|█████████ | 39382/43738 [5:01:31<25:54,  2.80it/s]

step:10660, train_loss:0.0635358559012485, acc:0.6530902442740338


 90%|█████████ | 39383/43738 [5:01:32<30:22,  2.39it/s]

step:10660, train_loss:0.06353570602370485, acc:0.6530990528908412


 90%|█████████ | 39384/43738 [5:01:32<26:09,  2.77it/s]

step:10660, train_loss:0.06353620305198387, acc:0.6530824700385943


 90%|█████████ | 39385/43738 [5:01:33<28:30,  2.54it/s]

step:10660, train_loss:0.06353569115749928, acc:0.6530912784054843


 90%|█████████ | 39386/43738 [5:01:33<25:49,  2.81it/s]

step:10660, train_loss:0.06353409932748331, acc:0.6531000863250901


 90%|█████████ | 39387/43738 [5:01:33<32:08,  2.26it/s]

step:10660, train_loss:0.06353252776619871, acc:0.6531088937974459


 90%|█████████ | 39388/43738 [5:01:34<30:07,  2.41it/s]

step:10660, train_loss:0.06353108268278768, acc:0.6531177008225856


 90%|█████████ | 39389/43738 [5:01:34<28:59,  2.50it/s]

step:10660, train_loss:0.06353133173945912, acc:0.6531011196019193


 90%|█████████ | 39390/43738 [5:01:34<24:55,  2.91it/s]

step:10660, train_loss:0.06353006342214894, acc:0.6531099263772531


 90%|█████████ | 39391/43738 [5:01:35<23:09,  3.13it/s]

step:10660, train_loss:0.06353098662329143, acc:0.6530933461958315


 91%|█████████ | 39696/43738 [5:03:51<21:55,  3.07it/s]

step:10680, train_loss:0.06359382661543876, acc:0.6528113663845224


 91%|█████████ | 39697/43738 [5:03:51<23:39,  2.85it/s]

step:10680, train_loss:0.0635922556195155, acc:0.6528201123510593


 91%|█████████ | 39698/43738 [5:03:51<20:41,  3.25it/s]

step:10680, train_loss:0.06359098474597956, acc:0.6528288578769711


 91%|█████████ | 39699/43738 [5:03:52<23:07,  2.91it/s]

step:10680, train_loss:0.0635894531079684, acc:0.6528376029622912


 91%|█████████ | 39700/43738 [5:03:52<24:12,  2.78it/s]

step:10680, train_loss:0.06359130621188519, acc:0.6528211586901763


 91%|█████████ | 39701/43738 [5:03:53<22:00,  3.06it/s]

step:10680, train_loss:0.06358970572548724, acc:0.6528299035288784


 91%|█████████ | 39702/43738 [5:03:53<22:54,  2.94it/s]

step:10680, train_loss:0.06358847430923979, acc:0.6528386479270566


 91%|█████████ | 39703/43738 [5:03:53<22:23,  3.00it/s]

step:10680, train_loss:0.06358881109515893, acc:0.6528222048711684


 91%|█████████ | 39704/43738 [5:03:53<20:02,  3.35it/s]

step:10680, train_loss:0.06358850458199032, acc:0.6528057626435624


 91%|█████████ | 39705/43738 [5:03:54<19:10,  3.51it/s]

step:10680, train_loss:0.06358690337939146, acc:0.6528145069890442


 91%|█████████ | 39706/43738 [5:03:54<20:51,  3.22it/s]

step:10680, train_loss:0.06358666402602735, acc:0.6528232508940714


 91%|█████████ | 39707/43738 [5:03:54<19:17,  3.48it/s]

step:10680, train_loss:0.06358510077206952, acc:0.6528319943586773


 91%|█████████ | 39708/43738 [5:03:55<27:22,  2.45it/s]

step:10680, train_loss:0.06358726903704766, acc:0.6528155535408482


 91%|█████████ | 39709/43738 [5:03:56<32:04,  2.09it/s]

step:10680, train_loss:0.06358580992505904, acc:0.6528242967589212


 91%|█████████ | 39710/43738 [5:03:56<35:46,  1.88it/s]

step:10680, train_loss:0.06358422531678871, acc:0.6528330395366406


 91%|█████████ | 39711/43738 [5:03:57<35:44,  1.88it/s]

step:10680, train_loss:0.06358653497946418, acc:0.652816599934527


 91%|█████████▏| 40016/43738 [5:06:12<26:54,  2.31it/s]

step:10700, train_loss:0.06349359226397809, acc:0.6531887245101959


 91%|█████████▏| 40017/43738 [5:06:12<26:36,  2.33it/s]

step:10700, train_loss:0.0634956983458315, acc:0.653172401729265


 91%|█████████▏| 40018/43738 [5:06:12<24:41,  2.51it/s]

step:10700, train_loss:0.06349663749165171, acc:0.6531560797641062


 91%|█████████▏| 40019/43738 [5:06:13<30:41,  2.02it/s]

step:10700, train_loss:0.06349608855139789, acc:0.653164746745296


 91%|█████████▏| 40020/43738 [5:06:13<30:12,  2.05it/s]

step:10700, train_loss:0.06349781424758859, acc:0.6531484257871064


 92%|█████████▏| 40021/43738 [5:06:14<26:09,  2.37it/s]

step:10700, train_loss:0.06349658149096131, acc:0.6531570925264236


 92%|█████████▏| 40022/43738 [5:06:14<22:44,  2.72it/s]

step:10700, train_loss:0.06349499686500269, acc:0.653165758832642


 92%|█████████▏| 40023/43738 [5:06:14<20:51,  2.97it/s]

step:10700, train_loss:0.06349657074189236, acc:0.6531494390725333


 92%|█████████▏| 40024/43738 [5:06:15<29:01,  2.13it/s]

step:10700, train_loss:0.0634976264855775, acc:0.6531331201279232


 92%|█████████▏| 40025/43738 [5:06:15<25:11,  2.46it/s]

step:10700, train_loss:0.06349637924213757, acc:0.6531417863835103


 92%|█████████▏| 40026/43738 [5:06:16<23:35,  2.62it/s]

step:10700, train_loss:0.06349493317581074, acc:0.653150452206066


 92%|█████████▏| 40027/43738 [5:06:16<24:17,  2.55it/s]

step:10700, train_loss:0.06349336697325546, acc:0.653159117595623


 92%|█████████▏| 40028/43738 [5:06:16<24:45,  2.50it/s]

step:10700, train_loss:0.06349184044714969, acc:0.6531677825522134


 92%|█████████▏| 40029/43738 [5:06:17<26:00,  2.38it/s]

step:10700, train_loss:0.06349096593437799, acc:0.65317644707587


 92%|█████████▏| 40030/43738 [5:06:18<32:26,  1.91it/s]

step:10700, train_loss:0.06349138681508947, acc:0.6531601299025731


 92%|█████████▏| 40031/43738 [5:06:18<28:05,  2.20it/s]

step:10700, train_loss:0.06349188442630638, acc:0.653143813544503


 92%|█████████▏| 40336/43738 [5:08:44<26:45,  2.12it/s]

step:10720, train_loss:0.06352284088155473, acc:0.6530890519635065


 92%|█████████▏| 40337/43738 [5:08:44<22:28,  2.52it/s]

step:10720, train_loss:0.0635212806580192, acc:0.6530976522795449


 92%|█████████▏| 40338/43738 [5:08:44<22:00,  2.58it/s]

step:10720, train_loss:0.06351972740350817, acc:0.6531062521691705


 92%|█████████▏| 40339/43738 [5:08:45<28:08,  2.01it/s]

step:10720, train_loss:0.0635220229186362, acc:0.6530900617268648


 92%|█████████▏| 40340/43738 [5:08:45<27:47,  2.04it/s]

step:10720, train_loss:0.06352050939125702, acc:0.6530986613782845


 92%|█████████▏| 40341/43738 [5:08:46<26:39,  2.12it/s]

step:10720, train_loss:0.06352058440450774, acc:0.6531072606033563


 92%|█████████▏| 40342/43738 [5:08:47<30:25,  1.86it/s]

step:10720, train_loss:0.06352014264677805, acc:0.6531158594021119


 92%|█████████▏| 40343/43738 [5:08:47<26:35,  2.13it/s]

step:10720, train_loss:0.06351866072132879, acc:0.653124457774583


 92%|█████████▏| 40344/43738 [5:08:47<22:54,  2.47it/s]

step:10720, train_loss:0.06351708642858388, acc:0.6531330557208012


 92%|█████████▏| 40345/43738 [5:08:47<21:38,  2.61it/s]

step:10720, train_loss:0.06351568481767417, acc:0.6531416532407981


 92%|█████████▏| 40346/43738 [5:08:48<19:05,  2.96it/s]

step:10720, train_loss:0.06351489314539684, acc:0.6531502503346056


 92%|█████████▏| 40347/43738 [5:08:48<20:22,  2.77it/s]

step:10720, train_loss:0.06351433859472345, acc:0.6531588470022555


 92%|█████████▏| 40348/43738 [5:08:49<21:46,  2.59it/s]

step:10720, train_loss:0.06351645743321971, acc:0.6531426588678497


 92%|█████████▏| 40349/43738 [5:08:49<20:38,  2.74it/s]

step:10720, train_loss:0.06351770004477661, acc:0.6531264715358497


 92%|█████████▏| 40350/43738 [5:08:49<21:08,  2.67it/s]

step:10720, train_loss:0.06351703788787999, acc:0.6531350681536555


 92%|█████████▏| 40351/43738 [5:08:50<23:16,  2.43it/s]

step:10720, train_loss:0.06351556270047726, acc:0.6531436643453694


 93%|█████████▎| 40656/43738 [5:11:04<21:23,  2.40it/s]

step:10740, train_loss:0.06351607134495747, acc:0.6531877213695395


 93%|█████████▎| 40657/43738 [5:11:04<19:59,  2.57it/s]

step:10740, train_loss:0.06351491273591577, acc:0.6531962515679957


 93%|█████████▎| 40658/43738 [5:11:05<24:17,  2.11it/s]

step:10740, train_loss:0.06351817747791827, acc:0.6531801859412661


 93%|█████████▎| 40659/43738 [5:11:05<22:25,  2.29it/s]

step:10740, train_loss:0.06351724447022353, acc:0.6531887159054576


 93%|█████████▎| 40660/43738 [5:11:05<20:23,  2.52it/s]

step:10740, train_loss:0.06351577451698746, acc:0.6531972454500737


 93%|█████████▎| 40661/43738 [5:11:06<21:02,  2.44it/s]

step:10740, train_loss:0.06351492334226441, acc:0.6532057745751457


 93%|█████████▎| 40662/43738 [5:11:06<20:35,  2.49it/s]

step:10740, train_loss:0.06351749716904091, acc:0.653189710294624


 93%|█████████▎| 40663/43738 [5:11:06<18:05,  2.83it/s]

step:10740, train_loss:0.06351599540401978, acc:0.6531982391855004


 93%|█████████▎| 40664/43738 [5:11:07<16:03,  3.19it/s]

step:10740, train_loss:0.06351616806118546, acc:0.6531821758803856


 93%|█████████▎| 40665/43738 [5:11:07<16:07,  3.17it/s]

step:10740, train_loss:0.06351473102460936, acc:0.6531907045370712


 93%|█████████▎| 40666/43738 [5:11:08<23:27,  2.18it/s]

step:10740, train_loss:0.06351719771663182, acc:0.6531746422072493


 93%|█████████▎| 40667/43738 [5:11:09<28:15,  1.81it/s]

step:10740, train_loss:0.06351683939587101, acc:0.6531585806673715


 93%|█████████▎| 40668/43738 [5:11:09<26:40,  1.92it/s]

step:10740, train_loss:0.06351528362881569, acc:0.6531671092751057


 93%|█████████▎| 40669/43738 [5:11:09<23:07,  2.21it/s]

step:10740, train_loss:0.06351638572171271, acc:0.6531510487103199


 93%|█████████▎| 40670/43738 [5:11:10<20:08,  2.54it/s]

step:10740, train_loss:0.06351611876724829, acc:0.6531349889353332


 93%|█████████▎| 40671/43738 [5:11:10<17:35,  2.91it/s]

step:10740, train_loss:0.06351462268693606, acc:0.6531435174940375


 94%|█████████▎| 40976/43738 [5:13:27<21:28,  2.14it/s]

step:10760, train_loss:0.06349291305953747, acc:0.6532848496680984


 94%|█████████▎| 40977/43738 [5:13:28<21:00,  2.19it/s]

step:10760, train_loss:0.06349400954846082, acc:0.6532689069477999


 94%|█████████▎| 40978/43738 [5:13:28<21:45,  2.11it/s]

step:10760, train_loss:0.06349297032611306, acc:0.6532773683439894


 94%|█████████▎| 40979/43738 [5:13:29<23:09,  1.99it/s]

step:10760, train_loss:0.06349302009400218, acc:0.6532858293272163


 94%|█████████▎| 40980/43738 [5:13:30<24:20,  1.89it/s]

step:10760, train_loss:0.06349663950953212, acc:0.653269887750122


 94%|█████████▎| 40981/43738 [5:13:30<22:16,  2.06it/s]

step:10760, train_loss:0.06349574391111236, acc:0.6532783485029647


 94%|█████████▎| 40982/43738 [5:13:30<19:00,  2.42it/s]

step:10760, train_loss:0.0634944022789293, acc:0.6532868088429067


 94%|█████████▎| 40983/43738 [5:13:31<18:21,  2.50it/s]

step:10760, train_loss:0.06349515974873045, acc:0.6532708684088524


 94%|█████████▎| 40984/43738 [5:13:31<20:58,  2.19it/s]

step:10760, train_loss:0.06349362924341061, acc:0.6532793285184463


 94%|█████████▎| 40985/43738 [5:13:32<22:09,  2.07it/s]

step:10760, train_loss:0.06349428948489345, acc:0.6532633890447724


 94%|█████████▎| 40986/43738 [5:13:32<19:01,  2.41it/s]

step:10760, train_loss:0.06349434223388509, acc:0.6532474503488996


 94%|█████████▎| 40987/43738 [5:13:32<18:44,  2.45it/s]

step:10760, train_loss:0.06349762699588476, acc:0.6532315124307707


 94%|█████████▎| 40988/43738 [5:13:33<18:15,  2.51it/s]

step:10760, train_loss:0.06349624591453916, acc:0.6532399726749293


 94%|█████████▎| 40989/43738 [5:13:33<23:54,  1.92it/s]

step:10760, train_loss:0.06349916484624163, acc:0.6532240357168997


 94%|█████████▎| 40990/43738 [5:13:34<22:30,  2.03it/s]

step:10760, train_loss:0.06349763043167914, acc:0.653232495730666


 94%|█████████▎| 40991/43738 [5:13:35<26:20,  1.74it/s]

step:10760, train_loss:0.06349670248719608, acc:0.6532409553316582


 94%|█████████▍| 41296/43738 [5:15:47<26:54,  1.51it/s]

step:10780, train_loss:0.06346143582470891, acc:0.653525765207284


 94%|█████████▍| 41297/43738 [5:15:48<26:23,  1.54it/s]

step:10780, train_loss:0.06346524864426802, acc:0.65350994018936


 94%|█████████▍| 41298/43738 [5:15:48<23:24,  1.74it/s]

step:10780, train_loss:0.06347645175539389, acc:0.6534941159378178


 94%|█████████▍| 41299/43738 [5:15:49<19:05,  2.13it/s]

step:10780, train_loss:0.06347493439841982, acc:0.6535025061139494


 94%|█████████▍| 41300/43738 [5:15:49<17:59,  2.26it/s]

step:10780, train_loss:0.06347378604900511, acc:0.6535108958837772


 94%|█████████▍| 41301/43738 [5:15:49<17:55,  2.27it/s]

step:10780, train_loss:0.06347265953850972, acc:0.6535192852473306


 94%|█████████▍| 41302/43738 [5:15:50<16:22,  2.48it/s]

step:10780, train_loss:0.06347190534100501, acc:0.653527674204639


 94%|█████████▍| 41303/43738 [5:15:50<15:58,  2.54it/s]

step:10780, train_loss:0.06347040125724744, acc:0.6535360627557321


 94%|█████████▍| 41304/43738 [5:15:50<14:33,  2.79it/s]

step:10780, train_loss:0.06346903150360313, acc:0.6535444509006392


 94%|█████████▍| 41305/43738 [5:15:51<13:43,  2.96it/s]

step:10780, train_loss:0.06347013847170657, acc:0.6535286284953395


 94%|█████████▍| 41306/43738 [5:15:51<17:27,  2.32it/s]

step:10780, train_loss:0.06346898007800003, acc:0.6535370164140802


 94%|█████████▍| 41307/43738 [5:15:52<16:12,  2.50it/s]

step:10780, train_loss:0.06346996037500574, acc:0.6535211949548503


 94%|█████████▍| 41308/43738 [5:15:52<15:59,  2.53it/s]

step:10780, train_loss:0.06347030915104827, acc:0.6535295826474291


 94%|█████████▍| 41309/43738 [5:15:52<13:57,  2.90it/s]

step:10780, train_loss:0.06346880151730976, acc:0.6535379699339127


 94%|█████████▍| 41310/43738 [5:15:53<17:08,  2.36it/s]

step:10780, train_loss:0.06346985711716696, acc:0.653522149600581


 94%|█████████▍| 41311/43738 [5:15:53<16:12,  2.49it/s]

step:10780, train_loss:0.0634725420661078, acc:0.653506330033163


 95%|█████████▌| 41616/43738 [5:18:16<20:48,  1.70it/s]

step:10800, train_loss:0.06349382733444645, acc:0.6533064206074587


 95%|█████████▌| 41617/43738 [5:18:16<19:40,  1.80it/s]

step:10800, train_loss:0.06349446309752717, acc:0.6532907225412692


 95%|█████████▌| 41618/43738 [5:18:16<16:36,  2.13it/s]

step:10800, train_loss:0.06349293905393327, acc:0.6532990532942476


 95%|█████████▌| 41619/43738 [5:18:17<14:17,  2.47it/s]

step:10800, train_loss:0.06349269841014707, acc:0.653307383646892


 95%|█████████▌| 41620/43738 [5:18:17<18:01,  1.96it/s]

step:10800, train_loss:0.06349121072903116, acc:0.6533157135992311


 95%|█████████▌| 41621/43738 [5:18:18<20:03,  1.76it/s]

step:10800, train_loss:0.0634933057139242, acc:0.653300016818433


 95%|█████████▌| 41622/43738 [5:18:19<19:10,  1.84it/s]

step:10800, train_loss:0.0634926129384126, acc:0.6533083465474989


 95%|█████████▌| 41623/43738 [5:18:19<19:56,  1.77it/s]

step:10800, train_loss:0.06349143497943238, acc:0.6533166758763184


 95%|█████████▌| 41624/43738 [5:18:20<20:59,  1.68it/s]

step:10800, train_loss:0.06349155923140491, acc:0.6533009802037286


 95%|█████████▌| 41625/43738 [5:18:20<17:34,  2.00it/s]

step:10800, train_loss:0.06349003479395963, acc:0.6533093093093093


 95%|█████████▌| 41626/43738 [5:18:20<16:35,  2.12it/s]

step:10800, train_loss:0.06348857783472799, acc:0.6533176380147023


 95%|█████████▌| 41627/43738 [5:18:21<14:02,  2.50it/s]

step:10800, train_loss:0.06348911275074846, acc:0.6533019434501646


 95%|█████████▌| 41628/43738 [5:18:22<18:15,  1.93it/s]

step:10800, train_loss:0.06348771286840055, acc:0.6533102719323532


 95%|█████████▌| 41629/43738 [5:18:22<16:22,  2.15it/s]

step:10800, train_loss:0.06348668122141503, acc:0.653318600014413


 95%|█████████▌| 41630/43738 [5:18:22<15:16,  2.30it/s]

step:10800, train_loss:0.06349075038238632, acc:0.6533029065577708


 95%|█████████▌| 41631/43738 [5:18:23<14:29,  2.42it/s]

step:10800, train_loss:0.063491525280304, acc:0.65328721385506


 96%|█████████▌| 41936/43738 [5:20:40<14:37,  2.05it/s]

step:10820, train_loss:0.06346661297948508, acc:0.6535196489889356


 96%|█████████▌| 41937/43738 [5:20:40<12:23,  2.42it/s]

step:10820, train_loss:0.06346510175603486, acc:0.65352791091399


 96%|█████████▌| 41938/43738 [5:20:41<10:53,  2.75it/s]

step:10820, train_loss:0.06346358962967225, acc:0.6535361724450379


 96%|█████████▌| 41939/43738 [5:20:41<11:25,  2.62it/s]

step:10820, train_loss:0.06346208942450655, acc:0.6535444335821073


 96%|█████████▌| 41940/43738 [5:20:41<11:35,  2.59it/s]

step:10820, train_loss:0.06346058121071431, acc:0.6535526943252266


 96%|█████████▌| 41941/43738 [5:20:42<12:10,  2.46it/s]

step:10820, train_loss:0.06345906855312408, acc:0.6535609546744235


 96%|█████████▌| 41942/43738 [5:20:42<13:17,  2.25it/s]

step:10820, train_loss:0.06345812039133451, acc:0.6535692146297267


 96%|█████████▌| 41943/43738 [5:20:43<12:09,  2.46it/s]

step:10820, train_loss:0.06345660898364992, acc:0.6535774741911642


 96%|█████████▌| 41944/43738 [5:20:43<11:11,  2.67it/s]

step:10820, train_loss:0.06345869795814613, acc:0.6535618920465383


 96%|█████████▌| 41945/43738 [5:20:43<09:57,  3.00it/s]

step:10820, train_loss:0.06345722145716358, acc:0.6535701513887233


 96%|█████████▌| 41946/43738 [5:20:44<12:13,  2.44it/s]

step:10820, train_loss:0.06345575351408127, acc:0.6535784103371001


 96%|█████████▌| 41947/43738 [5:20:44<10:54,  2.74it/s]

step:10820, train_loss:0.06345424915451195, acc:0.6535866688916967


 96%|█████████▌| 41948/43738 [5:20:44<11:11,  2.67it/s]

step:10820, train_loss:0.0634527504757392, acc:0.6535949270525413


 96%|█████████▌| 41949/43738 [5:20:45<10:47,  2.76it/s]

step:10820, train_loss:0.06345579807242238, acc:0.6535793463491383


 96%|█████████▌| 41950/43738 [5:20:45<09:50,  3.03it/s]

step:10820, train_loss:0.06345428941687042, acc:0.6535876042908224


 96%|█████████▌| 41951/43738 [5:20:45<10:06,  2.95it/s]

step:10820, train_loss:0.06345673655003166, acc:0.6535720245047794


 97%|█████████▋| 42256/43738 [5:23:03<10:33,  2.34it/s]

step:10840, train_loss:0.06346270039848823, acc:0.653421999242711


 97%|█████████▋| 42257/43738 [5:23:04<10:04,  2.45it/s]

step:10840, train_loss:0.06346140194541887, acc:0.6534302009134582


 97%|█████████▋| 42258/43738 [5:23:04<09:59,  2.47it/s]

step:10840, train_loss:0.0634601147384896, acc:0.6534384021960339


 97%|█████████▋| 42259/43738 [5:23:04<10:06,  2.44it/s]

step:10840, train_loss:0.06345918904675156, acc:0.653446603090466


 97%|█████████▋| 42260/43738 [5:23:05<10:16,  2.40it/s]

step:10840, train_loss:0.0634609182464356, acc:0.6534311405584478


 97%|█████████▋| 42261/43738 [5:23:05<10:13,  2.41it/s]

step:10840, train_loss:0.06345944226029818, acc:0.6534393412366011


 97%|█████████▋| 42262/43738 [5:23:06<09:45,  2.52it/s]

step:10840, train_loss:0.06345976601022765, acc:0.653447541526667


 97%|█████████▋| 42263/43738 [5:23:06<12:20,  1.99it/s]

step:10840, train_loss:0.06346137218961809, acc:0.6534320800700376


 97%|█████████▋| 42264/43738 [5:23:07<12:22,  1.99it/s]

step:10840, train_loss:0.06346021859893564, acc:0.6534402801438577


 97%|█████████▋| 42265/43738 [5:23:07<12:27,  1.97it/s]

step:10840, train_loss:0.06345872768842399, acc:0.6534484798296463


 97%|█████████▋| 42266/43738 [5:23:08<11:54,  2.06it/s]

step:10840, train_loss:0.06345836738407676, acc:0.653456679127431


 97%|█████████▋| 42267/43738 [5:23:08<11:24,  2.15it/s]

step:10840, train_loss:0.06345836268744649, acc:0.6534648780372394


 97%|█████████▋| 42268/43738 [5:23:09<11:46,  2.08it/s]

step:10840, train_loss:0.06346050968621067, acc:0.6534494179994322


 97%|█████████▋| 42269/43738 [5:23:09<13:59,  1.75it/s]

step:10840, train_loss:0.0634681922598928, acc:0.653433958693132


 97%|█████████▋| 42270/43738 [5:23:10<12:10,  2.01it/s]

step:10840, train_loss:0.06346753728977568, acc:0.6534421575585522


 97%|█████████▋| 42271/43738 [5:23:11<13:50,  1.77it/s]

step:10840, train_loss:0.06346693321433072, acc:0.6534503560360531


 97%|█████████▋| 42576/43738 [5:25:34<11:25,  1.70it/s]

step:10860, train_loss:0.0634704774115152, acc:0.6534667418263811


 97%|█████████▋| 42577/43738 [5:25:35<10:27,  1.85it/s]

step:10860, train_loss:0.06347156680842418, acc:0.6534513939450878


 97%|█████████▋| 42578/43738 [5:25:35<09:27,  2.05it/s]

step:10860, train_loss:0.06347097367104342, acc:0.6534595330922073


 97%|█████████▋| 42579/43738 [5:25:36<09:16,  2.08it/s]

step:10860, train_loss:0.0634708334299262, acc:0.6534676718570187


 97%|█████████▋| 42580/43738 [5:25:36<08:15,  2.34it/s]

step:10860, train_loss:0.06347181849483398, acc:0.6534523250352278


 97%|█████████▋| 42581/43738 [5:25:36<07:22,  2.61it/s]

step:10860, train_loss:0.0634706584157137, acc:0.6534604635870459


 97%|█████████▋| 42582/43738 [5:25:37<06:45,  2.85it/s]

step:10860, train_loss:0.06346955490995076, acc:0.6534686017566108


 97%|█████████▋| 42583/43738 [5:25:37<07:05,  2.71it/s]

step:10860, train_loss:0.06347003790321051, acc:0.653453255994176


 97%|█████████▋| 42584/43738 [5:25:37<06:42,  2.87it/s]

step:10860, train_loss:0.06347104602021907, acc:0.6534379109524704


 97%|█████████▋| 42585/43738 [5:25:38<07:19,  2.62it/s]

step:10860, train_loss:0.06347138802862115, acc:0.6534460490783139


 97%|█████████▋| 42586/43738 [5:25:39<09:48,  1.96it/s]

step:10860, train_loss:0.06347572029901581, acc:0.6534307049265017


 97%|█████████▋| 42587/43738 [5:25:39<09:08,  2.10it/s]

step:10860, train_loss:0.06347445970887916, acc:0.6534388428393642


 97%|█████████▋| 42588/43738 [5:25:39<08:46,  2.19it/s]

step:10860, train_loss:0.06347297105569494, acc:0.6534469803700573


 97%|█████████▋| 42589/43738 [5:25:40<08:35,  2.23it/s]

step:10860, train_loss:0.06347149686826145, acc:0.6534551175186081


 97%|█████████▋| 42590/43738 [5:25:40<07:44,  2.47it/s]

step:10860, train_loss:0.06347170064800807, acc:0.6534397745949754


 97%|█████████▋| 42591/43738 [5:25:41<10:06,  1.89it/s]

step:10860, train_loss:0.06347688428525008, acc:0.6534244323918199


 98%|█████████▊| 42896/43738 [5:28:05<05:36,  2.50it/s]

step:10880, train_loss:0.06351659422972038, acc:0.6533010070869079


 98%|█████████▊| 42897/43738 [5:28:06<06:17,  2.23it/s]

step:10880, train_loss:0.06351521536445956, acc:0.6533090892136979


 98%|█████████▊| 42898/43738 [5:28:06<06:08,  2.28it/s]

step:10880, train_loss:0.06351427268314506, acc:0.6533171709636812


 98%|█████████▊| 42899/43738 [5:28:07<05:37,  2.49it/s]

step:10880, train_loss:0.06351284432815579, acc:0.6533252523368843


 98%|█████████▊| 42900/43738 [5:28:07<05:33,  2.51it/s]

step:10880, train_loss:0.06351629853165576, acc:0.6533100233100233


 98%|█████████▊| 42901/43738 [5:28:07<06:05,  2.29it/s]

step:10880, train_loss:0.06351508539122523, acc:0.6533181044730892


 98%|█████████▊| 42902/43738 [5:28:08<05:15,  2.65it/s]

step:10880, train_loss:0.06351551394297793, acc:0.6533028763227822


 98%|█████████▊| 42903/43738 [5:28:08<04:35,  3.03it/s]

step:10880, train_loss:0.06351625218189313, acc:0.6532876488823626


 98%|█████████▊| 42904/43738 [5:28:08<04:13,  3.29it/s]

step:10880, train_loss:0.06351523060506854, acc:0.6532957300018646


 98%|█████████▊| 42905/43738 [5:28:08<04:00,  3.47it/s]

step:10880, train_loss:0.06351375566275504, acc:0.6533038107446685


 98%|█████████▊| 42906/43738 [5:28:09<03:57,  3.50it/s]

step:10880, train_loss:0.06351230848584935, acc:0.6533118911108003


 98%|█████████▊| 42907/43738 [5:28:09<04:31,  3.06it/s]

step:10880, train_loss:0.06351450748533075, acc:0.6532966648798564


 98%|█████████▊| 42908/43738 [5:28:09<04:39,  2.97it/s]

step:10880, train_loss:0.06351484049042838, acc:0.6532814393586278


 98%|█████████▊| 42909/43738 [5:28:10<06:22,  2.17it/s]

step:10880, train_loss:0.06351624887474563, acc:0.6532662145470647


 98%|█████████▊| 42910/43738 [5:28:11<06:31,  2.11it/s]

step:10880, train_loss:0.06351487203564667, acc:0.6532742950361221


 98%|█████████▊| 42911/43738 [5:28:11<06:29,  2.13it/s]

step:10880, train_loss:0.06351346382159952, acc:0.6532823751485634


 99%|█████████▉| 43216/43738 [5:30:25<04:31,  1.93it/s]

step:10900, train_loss:0.06343033701372344, acc:0.6535310995927435


 99%|█████████▉| 43217/43738 [5:30:26<04:32,  1.91it/s]

step:10900, train_loss:0.0634303075293536, acc:0.6535159775088507


 99%|█████████▉| 43218/43738 [5:30:27<05:02,  1.72it/s]

step:10900, train_loss:0.06343592679320784, acc:0.6535008561247628


 99%|█████████▉| 43219/43738 [5:30:27<05:04,  1.71it/s]

step:10900, train_loss:0.06343518174894501, acc:0.6535088734121567


 99%|█████████▉| 43220/43738 [5:30:28<04:13,  2.04it/s]

step:10900, train_loss:0.0634337140221531, acc:0.6535168903285516


 99%|█████████▉| 43221/43738 [5:30:28<03:52,  2.22it/s]

step:10900, train_loss:0.06343287446337512, acc:0.6535249068739734


 99%|█████████▉| 43222/43738 [5:30:28<03:33,  2.42it/s]

step:10900, train_loss:0.0634314477435067, acc:0.6535329230484476


 99%|█████████▉| 43223/43738 [5:30:28<03:05,  2.78it/s]

step:10900, train_loss:0.06343297310266675, acc:0.6535178030215395


 99%|█████████▉| 43224/43738 [5:30:29<02:49,  3.03it/s]

step:10900, train_loss:0.06343153574787073, acc:0.6535258189894503


 99%|█████████▉| 43225/43738 [5:30:29<03:21,  2.54it/s]

step:10900, train_loss:0.06343301856171242, acc:0.6535106998264893


 99%|█████████▉| 43226/43738 [5:30:30<03:21,  2.55it/s]

step:10900, train_loss:0.06343155195119898, acc:0.6535187155878407


 99%|█████████▉| 43227/43738 [5:30:30<03:21,  2.54it/s]

step:10900, train_loss:0.06343009379348277, acc:0.6535267309783237


 99%|█████████▉| 43228/43738 [5:30:30<03:12,  2.65it/s]

step:10900, train_loss:0.06342866065271299, acc:0.6535347459979642


 99%|█████████▉| 43229/43738 [5:30:31<02:56,  2.88it/s]

step:10900, train_loss:0.06342808157620858, acc:0.6535427606467881


 99%|█████████▉| 43230/43738 [5:30:31<02:54,  2.90it/s]

step:10900, train_loss:0.06342718425727023, acc:0.6535507749248207


 99%|█████████▉| 43231/43738 [5:30:31<03:21,  2.52it/s]

step:10900, train_loss:0.06342880702985355, acc:0.6535356572829683


100%|█████████▉| 43536/43738 [5:32:48<02:00,  1.68it/s]

step:10920, train_loss:0.06342414867589895, acc:0.6534132671811834


100%|█████████▉| 43537/43738 [5:32:49<02:00,  1.67it/s]

step:10920, train_loss:0.063438604221775, acc:0.6533982589521556


100%|█████████▉| 43538/43738 [5:32:49<01:42,  1.96it/s]

step:10920, train_loss:0.0634380678338216, acc:0.6534062198539207


100%|█████████▉| 43539/43738 [5:32:50<01:44,  1.90it/s]

step:10920, train_loss:0.06343687773155794, acc:0.6534141803899952


100%|█████████▉| 43540/43738 [5:32:50<01:29,  2.21it/s]

step:10920, train_loss:0.06343543492515362, acc:0.6534221405604043


100%|█████████▉| 43541/43738 [5:32:50<01:19,  2.47it/s]

step:10920, train_loss:0.0634343718554258, acc:0.653430100365173


100%|█████████▉| 43542/43738 [5:32:51<01:23,  2.35it/s]

step:10920, train_loss:0.06343463363714692, acc:0.6534150934729687


100%|█████████▉| 43543/43738 [5:32:51<01:19,  2.46it/s]

step:10920, train_loss:0.06343434216098137, acc:0.6534000872700549


100%|█████████▉| 43544/43738 [5:32:52<01:34,  2.05it/s]

step:10920, train_loss:0.06343288953572328, acc:0.6534080470328862


100%|█████████▉| 43545/43738 [5:32:52<01:20,  2.40it/s]

step:10920, train_loss:0.06343143323349991, acc:0.6534160064301298


100%|█████████▉| 43546/43738 [5:32:53<01:42,  1.88it/s]

step:10920, train_loss:0.06343044578761205, acc:0.6534239654618105


100%|█████████▉| 43547/43738 [5:32:53<01:27,  2.18it/s]

step:10920, train_loss:0.06342912813577424, acc:0.6534319241279537


100%|█████████▉| 43548/43738 [5:32:54<01:24,  2.25it/s]

step:10920, train_loss:0.06342827641660576, acc:0.6534398824285845


100%|█████████▉| 43549/43738 [5:32:54<01:17,  2.44it/s]

step:10920, train_loss:0.06342683896709526, acc:0.6534478403637282


100%|█████████▉| 43550/43738 [5:32:54<01:11,  2.62it/s]

step:10920, train_loss:0.06342587826148026, acc:0.6534557979334099


100%|█████████▉| 43551/43738 [5:32:55<01:11,  2.61it/s]

step:10920, train_loss:0.06342443303373152, acc:0.6534637551376548


100%|██████████| 43738/43738 [5:34:21<00:00,  2.27it/s]
  0%|          | 1/5129 [00:00<13:47,  6.20it/s]

eval on dev set


100%|██████████| 5129/5129 [13:33<00:00,  5.86it/s]
  0%|          | 0/43738 [00:00<?, ?it/s]

1.2921124677663878, 0.5765256385260284


  0%|          | 128/43738 [00:58<5:28:45,  2.21it/s]

step:10940, train_loss:0.06367372958629858, acc:0.6484375


  0%|          | 129/43738 [00:58<4:46:32,  2.54it/s]

step:10940, train_loss:0.0638208327338446, acc:0.6434108527131783


  0%|          | 130/43738 [00:59<5:04:19,  2.39it/s]

step:10940, train_loss:0.06333327435243588, acc:0.6461538461538462


  0%|          | 131/43738 [00:59<4:43:43,  2.56it/s]

step:10940, train_loss:0.06326372007206651, acc:0.648854961832061


  0%|          | 132/43738 [00:59<4:18:50,  2.81it/s]

step:10940, train_loss:0.0628065264388693, acc:0.6515151515151515


  0%|          | 133/43738 [01:00<4:40:22,  2.59it/s]

step:10940, train_loss:0.0627457332146123, acc:0.6541353383458647


  0%|          | 134/43738 [01:00<4:14:40,  2.85it/s]

step:10940, train_loss:0.06365483561391706, acc:0.6492537313432836


  0%|          | 135/43738 [01:00<4:11:21,  2.89it/s]

step:10940, train_loss:0.06343776165611215, acc:0.6518518518518519


  0%|          | 136/43738 [01:01<4:52:13,  2.49it/s]

step:10940, train_loss:0.06359096333001028, acc:0.6470588235294118


  0%|          | 137/43738 [01:01<5:07:36,  2.36it/s]

step:10940, train_loss:0.06376063569455687, acc:0.6423357664233577


  0%|          | 138/43738 [01:02<4:36:32,  2.63it/s]

step:10940, train_loss:0.06438495959762645, acc:0.6376811594202898


  0%|          | 139/43738 [01:02<4:45:13,  2.55it/s]

step:10940, train_loss:0.0647680970217041, acc:0.6330935251798561


  0%|          | 140/43738 [01:02<4:50:19,  2.50it/s]

step:10940, train_loss:0.0643897653291268, acc:0.6357142857142857


  0%|          | 141/43738 [01:03<4:42:22,  2.57it/s]

step:10940, train_loss:0.06397295681138833, acc:0.6382978723404256


  0%|          | 142/43738 [01:03<4:07:23,  2.94it/s]

step:10940, train_loss:0.06352331789708893, acc:0.6408450704225352


  0%|          | 143/43738 [01:04<4:45:13,  2.55it/s]

step:10940, train_loss:0.06320932524448092, acc:0.6433566433566433


  1%|          | 448/43738 [03:21<4:43:28,  2.55it/s]

step:10960, train_loss:0.061032104156246145, acc:0.6651785714285714


  1%|          | 449/43738 [03:22<4:28:46,  2.68it/s]

step:10960, train_loss:0.06090922081191715, acc:0.6659242761692651


  1%|          | 450/43738 [03:22<4:48:07,  2.50it/s]

step:10960, train_loss:0.06100652149981923, acc:0.6644444444444444


  1%|          | 451/43738 [03:23<4:27:45,  2.69it/s]

step:10960, train_loss:0.0611524121493035, acc:0.6629711751662971


  1%|          | 452/43738 [03:23<4:04:20,  2.95it/s]

step:10960, train_loss:0.061045035884707374, acc:0.6637168141592921


  1%|          | 453/43738 [03:23<4:00:16,  3.00it/s]

step:10960, train_loss:0.06094708000093896, acc:0.6644591611479028


  1%|          | 454/43738 [03:24<5:38:41,  2.13it/s]

step:10960, train_loss:0.06121883374774246, acc:0.6629955947136564


  1%|          | 455/43738 [03:24<4:53:55,  2.45it/s]

step:10960, train_loss:0.06108485254776347, acc:0.6637362637362637


  1%|          | 456/43738 [03:24<4:28:24,  2.69it/s]

step:10960, train_loss:0.0612617703399768, acc:0.6622807017543859


  1%|          | 457/43738 [03:25<4:02:00,  2.98it/s]

step:10960, train_loss:0.061156173397662605, acc:0.6630196936542669


  1%|          | 458/43738 [03:25<3:58:48,  3.02it/s]

step:10960, train_loss:0.061022689468410336, acc:0.6637554585152838


  1%|          | 459/43738 [03:25<4:27:33,  2.70it/s]

step:10960, train_loss:0.06118752292006364, acc:0.6623093681917211


  1%|          | 460/43738 [03:26<5:10:06,  2.33it/s]

step:10960, train_loss:0.061105853246281976, acc:0.6630434782608695


  1%|          | 461/43738 [03:26<5:02:55,  2.38it/s]

step:10960, train_loss:0.06098535837132864, acc:0.6637744034707158


  1%|          | 462/43738 [03:27<5:48:25,  2.07it/s]

step:10960, train_loss:0.06086000399381825, acc:0.6645021645021645


  1%|          | 463/43738 [03:28<5:41:35,  2.11it/s]

step:10960, train_loss:0.06106817039216569, acc:0.6630669546436285


  2%|▏         | 768/43738 [05:48<5:35:04,  2.14it/s]

step:10980, train_loss:0.05729141240590252, acc:0.6770833333333334


  2%|▏         | 769/43738 [05:48<4:46:41,  2.50it/s]

step:10980, train_loss:0.057219239002667725, acc:0.6775032509752926


  2%|▏         | 770/43738 [05:49<4:28:24,  2.67it/s]

step:10980, train_loss:0.05714890679659007, acc:0.6779220779220779


  2%|▏         | 771/43738 [05:49<5:00:55,  2.38it/s]

step:10980, train_loss:0.05727647047311106, acc:0.6770428015564203


  2%|▏         | 772/43738 [05:50<5:02:18,  2.37it/s]

step:10980, train_loss:0.05726239167713594, acc:0.677461139896373


  2%|▏         | 773/43738 [05:50<5:06:25,  2.34it/s]

step:10980, train_loss:0.057343103025315804, acc:0.6765847347994826


  2%|▏         | 774/43738 [05:51<5:36:58,  2.12it/s]

step:10980, train_loss:0.05726903138407903, acc:0.6770025839793282


  2%|▏         | 775/43738 [05:51<6:08:56,  1.94it/s]

step:10980, train_loss:0.05719682236352275, acc:0.6774193548387096


  2%|▏         | 776/43738 [05:52<7:08:31,  1.67it/s]

step:10980, train_loss:0.05712384688811972, acc:0.6778350515463918


  2%|▏         | 777/43738 [05:53<6:26:12,  1.85it/s]

step:10980, train_loss:0.057277347614444515, acc:0.676962676962677


  2%|▏         | 778/43738 [05:53<5:32:35,  2.15it/s]

step:10980, train_loss:0.05739965926195018, acc:0.6760925449871465


  2%|▏         | 779/43738 [05:53<5:44:43,  2.08it/s]

step:10980, train_loss:0.05733429909766256, acc:0.6765083440308087


  2%|▏         | 780/43738 [05:54<5:30:50,  2.16it/s]

step:10980, train_loss:0.0572633852226994, acc:0.676923076923077


  2%|▏         | 781/43738 [05:54<4:46:48,  2.50it/s]

step:10980, train_loss:0.05719102594748655, acc:0.677336747759283


  2%|▏         | 782/43738 [05:55<5:03:06,  2.36it/s]

step:10980, train_loss:0.05721701317659729, acc:0.6764705882352942


  2%|▏         | 783/43738 [05:55<5:33:39,  2.15it/s]

step:10980, train_loss:0.057188366088982895, acc:0.6768837803320562


  2%|▏         | 1088/43738 [08:12<5:07:43,  2.31it/s]

step:11000, train_loss:0.058995274648837304, acc:0.6737132352941176


  2%|▏         | 1089/43738 [08:13<5:38:40,  2.10it/s]

step:11000, train_loss:0.05895466560667211, acc:0.6740128558310376


  2%|▏         | 1090/43738 [08:13<5:07:20,  2.31it/s]

step:11000, train_loss:0.058951819636816276, acc:0.6743119266055045


  2%|▏         | 1091/43738 [08:14<5:44:49,  2.06it/s]

step:11000, train_loss:0.05890032669186155, acc:0.6746104491292392


  2%|▏         | 1092/43738 [08:14<5:41:22,  2.08it/s]

step:11000, train_loss:0.05884731998633007, acc:0.674908424908425


  2%|▏         | 1093/43738 [08:15<5:57:25,  1.99it/s]

step:11000, train_loss:0.058840168426997706, acc:0.6752058554437328


  3%|▎         | 1094/43738 [08:16<6:53:55,  1.72it/s]

step:11000, train_loss:0.05886715267473318, acc:0.6745886654478976


  3%|▎         | 1095/43738 [08:16<6:43:49,  1.76it/s]

step:11000, train_loss:0.05883860160430817, acc:0.6748858447488585


  3%|▎         | 1096/43738 [08:17<6:22:44,  1.86it/s]

step:11000, train_loss:0.058872183699867804, acc:0.6751824817518248


  3%|▎         | 1097/43738 [08:17<6:08:42,  1.93it/s]

step:11000, train_loss:0.0588575265350783, acc:0.6754785779398359


  3%|▎         | 1098/43738 [08:18<6:25:04,  1.85it/s]

step:11000, train_loss:0.058925220328900546, acc:0.674863387978142


  3%|▎         | 1099/43738 [08:18<6:02:18,  1.96it/s]

step:11000, train_loss:0.05887348028925248, acc:0.6751592356687898


  3%|▎         | 1100/43738 [08:19<6:04:33,  1.95it/s]

step:11000, train_loss:0.058822026764127344, acc:0.6754545454545454


  3%|▎         | 1101/43738 [08:19<5:08:33,  2.30it/s]

step:11000, train_loss:0.058768984958949035, acc:0.6757493188010899


  3%|▎         | 1102/43738 [08:19<5:07:49,  2.31it/s]

step:11000, train_loss:0.058727843010842044, acc:0.6760435571687841


  3%|▎         | 1103/43738 [08:20<5:45:46,  2.06it/s]

step:11000, train_loss:0.05867937507474304, acc:0.6763372620126926


  3%|▎         | 1408/43738 [10:36<6:59:18,  1.68it/s]

step:11020, train_loss:0.05788303367444314, acc:0.6711647727272727


  3%|▎         | 1409/43738 [10:37<5:48:29,  2.02it/s]

step:11020, train_loss:0.05788167158103395, acc:0.6706884315117104


  3%|▎         | 1410/43738 [10:37<5:13:14,  2.25it/s]

step:11020, train_loss:0.05784421144383596, acc:0.6709219858156028


  3%|▎         | 1411/43738 [10:37<5:00:05,  2.35it/s]

step:11020, train_loss:0.057803321038362915, acc:0.6711552090715804


  3%|▎         | 1412/43738 [10:38<5:45:55,  2.04it/s]

step:11020, train_loss:0.05782663761756829, acc:0.6706798866855525


  3%|▎         | 1413/43738 [10:39<6:13:12,  1.89it/s]

step:11020, train_loss:0.05789006862364925, acc:0.670205237084218


  3%|▎         | 1414/43738 [10:39<6:01:54,  1.95it/s]

step:11020, train_loss:0.057904423128871466, acc:0.6697312588401697


  3%|▎         | 1415/43738 [10:39<5:53:07,  2.00it/s]

step:11020, train_loss:0.057913832353302955, acc:0.6692579505300353


  3%|▎         | 1416/43738 [10:40<5:45:27,  2.04it/s]

step:11020, train_loss:0.05790129845099008, acc:0.6694915254237288


  3%|▎         | 1417/43738 [10:40<5:32:09,  2.12it/s]

step:11020, train_loss:0.05787571991518424, acc:0.6697247706422018


  3%|▎         | 1418/43738 [10:41<5:05:00,  2.31it/s]

step:11020, train_loss:0.0578349463468252, acc:0.6699576868829337


  3%|▎         | 1419/43738 [10:41<4:18:39,  2.73it/s]

step:11020, train_loss:0.05779657925336692, acc:0.6701902748414377


  3%|▎         | 1420/43738 [10:41<5:01:55,  2.34it/s]

step:11020, train_loss:0.05776847630581805, acc:0.6704225352112676


  3%|▎         | 1421/43738 [10:42<4:55:54,  2.38it/s]

step:11020, train_loss:0.057740339003948996, acc:0.6706544686840253


  3%|▎         | 1422/43738 [10:42<5:27:14,  2.16it/s]

step:11020, train_loss:0.05772018222250935, acc:0.6708860759493671


  3%|▎         | 1423/43738 [10:43<5:30:29,  2.13it/s]

step:11020, train_loss:0.057788132492895286, acc:0.6704146170063247


  4%|▍         | 1728/43738 [13:01<5:29:41,  2.12it/s]

step:11040, train_loss:0.05829266011143638, acc:0.6695601851851852


  4%|▍         | 1729/43738 [13:02<5:53:10,  1.98it/s]

step:11040, train_loss:0.05827166909646047, acc:0.6697513013302487


  4%|▍         | 1730/43738 [13:02<4:59:35,  2.34it/s]

step:11040, train_loss:0.05823807476863609, acc:0.6699421965317919


  4%|▍         | 1731/43738 [13:02<5:08:25,  2.27it/s]

step:11040, train_loss:0.05820671746540645, acc:0.6701328711727326


  4%|▍         | 1732/43738 [13:03<5:56:22,  1.96it/s]

step:11040, train_loss:0.058281485997377, acc:0.6697459584295612


  4%|▍         | 1733/43738 [13:04<5:38:09,  2.07it/s]

step:11040, train_loss:0.05825645568099709, acc:0.6699365262550491


  4%|▍         | 1734/43738 [13:04<4:53:30,  2.39it/s]

step:11040, train_loss:0.05823642129697241, acc:0.6701268742791234


  4%|▍         | 1735/43738 [13:04<4:22:57,  2.66it/s]

step:11040, train_loss:0.05820519916380431, acc:0.6703170028818444


  4%|▍         | 1736/43738 [13:05<4:30:39,  2.59it/s]

step:11040, train_loss:0.05817170366419539, acc:0.6705069124423964


  4%|▍         | 1737/43738 [13:05<4:30:00,  2.59it/s]

step:11040, train_loss:0.058141785941880776, acc:0.6706966033390904


  4%|▍         | 1738/43738 [13:05<4:24:43,  2.64it/s]

step:11040, train_loss:0.058111065944551704, acc:0.6708860759493671


  4%|▍         | 1739/43738 [13:06<3:52:54,  3.01it/s]

step:11040, train_loss:0.058077686095675596, acc:0.6710753306497987


  4%|▍         | 1740/43738 [13:06<4:55:27,  2.37it/s]

step:11040, train_loss:0.05818998176422527, acc:0.6706896551724137


  4%|▍         | 1741/43738 [13:06<4:24:05,  2.65it/s]

step:11040, train_loss:0.05815655997826459, acc:0.6708788052843193


  4%|▍         | 1742/43738 [13:07<5:01:36,  2.32it/s]

step:11040, train_loss:0.05826523046284041, acc:0.6704936854190585


  4%|▍         | 1743/43738 [13:07<4:55:01,  2.37it/s]

step:11040, train_loss:0.0582712966082078, acc:0.6706827309236948


  5%|▍         | 2048/43738 [15:23<5:00:52,  2.31it/s]

step:11060, train_loss:0.058724890324128864, acc:0.67138671875


  5%|▍         | 2049/43738 [15:24<6:20:21,  1.83it/s]

step:11060, train_loss:0.05871542891816556, acc:0.6715470961444607


  5%|▍         | 2050/43738 [15:24<5:40:42,  2.04it/s]

step:11060, train_loss:0.0586891048194886, acc:0.6717073170731708


  5%|▍         | 2051/43738 [15:24<5:01:46,  2.30it/s]

step:11060, train_loss:0.05866364770655637, acc:0.6718673817649927


  5%|▍         | 2052/43738 [15:25<5:22:21,  2.16it/s]

step:11060, train_loss:0.058697761360739245, acc:0.6715399610136452


  5%|▍         | 2053/43738 [15:25<5:18:40,  2.18it/s]

step:11060, train_loss:0.05873131267821686, acc:0.6712128592303945


  5%|▍         | 2054/43738 [15:26<4:33:39,  2.54it/s]

step:11060, train_loss:0.058728724311150345, acc:0.6713729308666018


  5%|▍         | 2055/43738 [15:26<4:29:44,  2.58it/s]

step:11060, train_loss:0.05871187431755909, acc:0.6715328467153284


  5%|▍         | 2056/43738 [15:26<4:00:18,  2.89it/s]

step:11060, train_loss:0.05875986435379043, acc:0.6712062256809338


  5%|▍         | 2057/43738 [15:27<4:38:42,  2.49it/s]

step:11060, train_loss:0.058731461423967624, acc:0.6713660670879922


  5%|▍         | 2058/43738 [15:27<4:47:37,  2.42it/s]

step:11060, train_loss:0.058762501819562475, acc:0.6710398445092323


  5%|▍         | 2059/43738 [15:28<4:37:14,  2.51it/s]

step:11060, train_loss:0.05880958215962696, acc:0.6707139388052452


  5%|▍         | 2060/43738 [15:28<5:21:51,  2.16it/s]

step:11060, train_loss:0.05882900499318301, acc:0.670873786407767


  5%|▍         | 2061/43738 [15:29<5:54:38,  1.96it/s]

step:11060, train_loss:0.05881539153021908, acc:0.6710334788937409


  5%|▍         | 2062/43738 [15:29<5:25:47,  2.13it/s]

step:11060, train_loss:0.05879895237615923, acc:0.6711930164888458


  5%|▍         | 2063/43738 [15:30<5:06:39,  2.27it/s]

step:11060, train_loss:0.058771993655447774, acc:0.6713523994183228


  5%|▌         | 2368/43738 [17:52<5:25:06,  2.12it/s]

step:11080, train_loss:0.05836300687204474, acc:0.6773648648648649


  5%|▌         | 2369/43738 [17:53<6:29:20,  1.77it/s]

step:11080, train_loss:0.0583942604059359, acc:0.6770789362600254


  5%|▌         | 2370/43738 [17:54<7:03:58,  1.63it/s]

step:11080, train_loss:0.05839916231908407, acc:0.6767932489451477


  5%|▌         | 2371/43738 [17:54<6:37:49,  1.73it/s]

step:11080, train_loss:0.058390124177560995, acc:0.6769295655841417


  5%|▌         | 2372/43738 [17:55<5:30:55,  2.08it/s]

step:11080, train_loss:0.058366298088356285, acc:0.6770657672849916


  5%|▌         | 2373/43738 [17:55<5:30:10,  2.09it/s]

step:11080, train_loss:0.05834516863135744, acc:0.6772018541930046


  5%|▌         | 2374/43738 [17:56<5:31:38,  2.08it/s]

step:11080, train_loss:0.05843828666563716, acc:0.676916596461668


  5%|▌         | 2375/43738 [17:56<4:44:21,  2.42it/s]

step:11080, train_loss:0.05843292333283707, acc:0.6770526315789474


  5%|▌         | 2376/43738 [17:56<4:09:15,  2.77it/s]

step:11080, train_loss:0.05846054085647148, acc:0.6767676767676768


  5%|▌         | 2377/43738 [17:56<3:50:10,  2.99it/s]

step:11080, train_loss:0.058436111324311216, acc:0.6769036600757257


  5%|▌         | 2378/43738 [17:57<5:12:47,  2.20it/s]

step:11080, train_loss:0.058420229249080066, acc:0.6770395290159799


  5%|▌         | 2379/43738 [17:57<4:30:11,  2.55it/s]

step:11080, train_loss:0.05839567453302553, acc:0.6771752837326608


  5%|▌         | 2380/43738 [17:58<4:42:14,  2.44it/s]

step:11080, train_loss:0.058384380057774986, acc:0.6773109243697479


  5%|▌         | 2381/43738 [17:58<4:57:37,  2.32it/s]

step:11080, train_loss:0.05839035481371765, acc:0.6774464510709786


  5%|▌         | 2382/43738 [17:59<4:50:41,  2.37it/s]

step:11080, train_loss:0.05840673769971216, acc:0.6771620486985727


  5%|▌         | 2383/43738 [17:59<4:12:59,  2.72it/s]

step:11080, train_loss:0.05838223356355734, acc:0.6772975241292488


  6%|▌         | 2688/43738 [20:14<5:20:22,  2.14it/s]

step:11100, train_loss:0.05880664295736573, acc:0.6726190476190477


  6%|▌         | 2689/43738 [20:14<4:36:11,  2.48it/s]

step:11100, train_loss:0.058816280935857376, acc:0.6723689103756043


  6%|▌         | 2690/43738 [20:15<5:32:26,  2.06it/s]

step:11100, train_loss:0.05881886036121407, acc:0.6724907063197026


  6%|▌         | 2691/43738 [20:15<4:45:24,  2.40it/s]

step:11100, train_loss:0.0587970468115971, acc:0.6726124117428465


  6%|▌         | 2692/43738 [20:15<4:59:39,  2.28it/s]

step:11100, train_loss:0.05881119801031141, acc:0.6723625557206538


  6%|▌         | 2693/43738 [20:16<5:36:55,  2.03it/s]

step:11100, train_loss:0.05884039380238002, acc:0.6721128852580764


  6%|▌         | 2694/43738 [20:16<4:47:43,  2.38it/s]

step:11100, train_loss:0.05882960949082732, acc:0.6722345953971789


  6%|▌         | 2695/43738 [20:17<4:13:33,  2.70it/s]

step:11100, train_loss:0.05881312722374783, acc:0.6723562152133581


  6%|▌         | 2696/43738 [20:17<4:58:54,  2.29it/s]

step:11100, train_loss:0.05884723923634047, acc:0.672106824925816


  6%|▌         | 2697/43738 [20:18<6:04:07,  1.88it/s]

step:11100, train_loss:0.05887528091567559, acc:0.6718576195773082


  6%|▌         | 2698/43738 [20:18<5:13:17,  2.18it/s]

step:11100, train_loss:0.058856974275964966, acc:0.6719792438843588


  6%|▌         | 2699/43738 [20:18<4:33:51,  2.50it/s]

step:11100, train_loss:0.05883524089271121, acc:0.6721007780659504


  6%|▌         | 2700/43738 [20:19<4:42:52,  2.42it/s]

step:11100, train_loss:0.05882161811218355, acc:0.6722222222222223


  6%|▌         | 2701/43738 [20:19<4:06:15,  2.78it/s]

step:11100, train_loss:0.058801056335996374, acc:0.6723435764531654


  6%|▌         | 2702/43738 [20:20<4:06:08,  2.78it/s]

step:11100, train_loss:0.05878677505597908, acc:0.6724648408586232


  6%|▌         | 2703/43738 [20:20<4:46:21,  2.39it/s]

step:11100, train_loss:0.05881117152489785, acc:0.6722160562338143


  7%|▋         | 3008/43738 [22:38<7:10:44,  1.58it/s]

step:11120, train_loss:0.058466800374747996, acc:0.6768617021276596


  7%|▋         | 3009/43738 [22:39<8:19:57,  1.36it/s]

step:11120, train_loss:0.05845141026057395, acc:0.6769690927218345


  7%|▋         | 3010/43738 [22:40<7:18:47,  1.55it/s]

step:11120, train_loss:0.05844353072335975, acc:0.6770764119601329


  7%|▋         | 3011/43738 [22:40<7:30:06,  1.51it/s]

step:11120, train_loss:0.05842471737658951, acc:0.6771836599136499


  7%|▋         | 3012/43738 [22:41<7:42:51,  1.47it/s]

step:11120, train_loss:0.0584797782733465, acc:0.6769588313413014


  7%|▋         | 3013/43738 [22:42<7:41:09,  1.47it/s]

step:11120, train_loss:0.05847733431729716, acc:0.6770660471291072


  7%|▋         | 3014/43738 [22:42<7:06:53,  1.59it/s]

step:11120, train_loss:0.058466470751672855, acc:0.677173191771732


  7%|▋         | 3015/43738 [22:43<6:47:40,  1.66it/s]

step:11120, train_loss:0.05845516930360526, acc:0.6772802653399669


  7%|▋         | 3016/43738 [22:44<6:54:11,  1.64it/s]

step:11120, train_loss:0.058435992971562906, acc:0.6773872679045093


  7%|▋         | 3017/43738 [22:44<6:31:42,  1.73it/s]

step:11120, train_loss:0.058419980915194764, acc:0.6774941995359629


  7%|▋         | 3018/43738 [22:44<6:04:38,  1.86it/s]

step:11120, train_loss:0.05840315205054919, acc:0.6776010603048377


  7%|▋         | 3019/43738 [22:45<5:58:37,  1.89it/s]

step:11120, train_loss:0.05838686701526687, acc:0.6777078502815502


  7%|▋         | 3020/43738 [22:45<5:51:54,  1.93it/s]

step:11120, train_loss:0.05836879229003453, acc:0.6778145695364238


  7%|▋         | 3021/43738 [22:46<5:43:06,  1.98it/s]

step:11120, train_loss:0.0583919440217326, acc:0.677590201919894


  7%|▋         | 3022/43738 [22:47<6:23:06,  1.77it/s]

step:11120, train_loss:0.05837262381798286, acc:0.6776968894771674


  7%|▋         | 3023/43738 [22:47<6:14:00,  1.81it/s]

step:11120, train_loss:0.05836579975420366, acc:0.6778035064505458


  8%|▊         | 3328/43738 [25:01<4:53:43,  2.29it/s]

step:11140, train_loss:0.05916355989071375, acc:0.6727764423076923


  8%|▊         | 3329/43738 [25:01<4:15:30,  2.64it/s]

step:11140, train_loss:0.05918973187442347, acc:0.6725743466506459


  8%|▊         | 3330/43738 [25:01<4:49:49,  2.32it/s]

step:11140, train_loss:0.05920293618415905, acc:0.6723723723723724


  8%|▊         | 3331/43738 [25:02<4:48:53,  2.33it/s]

step:11140, train_loss:0.059185728766119175, acc:0.6724707295106575


  8%|▊         | 3332/43738 [25:02<5:09:43,  2.17it/s]

step:11140, train_loss:0.059173190337773476, acc:0.6725690276110444


  8%|▊         | 3333/43738 [25:03<5:11:08,  2.16it/s]

step:11140, train_loss:0.059182288258582655, acc:0.6723672367236724


  8%|▊         | 3334/43738 [25:03<5:02:29,  2.23it/s]

step:11140, train_loss:0.05917045906905127, acc:0.6724655068986203


  8%|▊         | 3335/43738 [25:03<4:22:17,  2.57it/s]

step:11140, train_loss:0.0591554567037706, acc:0.6725637181409295


  8%|▊         | 3336/43738 [25:04<5:21:57,  2.09it/s]

step:11140, train_loss:0.05913805840321988, acc:0.6726618705035972


  8%|▊         | 3337/43738 [25:05<5:17:15,  2.12it/s]

step:11140, train_loss:0.0591517119173877, acc:0.6724602936769554


  8%|▊         | 3338/43738 [25:05<4:41:26,  2.39it/s]

step:11140, train_loss:0.05918048410799182, acc:0.6722588376273217


  8%|▊         | 3339/43738 [25:05<4:30:07,  2.49it/s]

step:11140, train_loss:0.05916473850723346, acc:0.67235699311171


  8%|▊         | 3340/43738 [25:06<4:13:53,  2.65it/s]

step:11140, train_loss:0.05920042229835836, acc:0.6721556886227545


  8%|▊         | 3341/43738 [25:06<4:17:14,  2.62it/s]

step:11140, train_loss:0.05918518381569043, acc:0.6722538162226879


  8%|▊         | 3342/43738 [25:07<5:31:28,  2.03it/s]

step:11140, train_loss:0.059230841087502886, acc:0.6720526630760024


  8%|▊         | 3343/43738 [25:07<5:39:21,  1.98it/s]

step:11140, train_loss:0.0592324600656409, acc:0.6721507627879151


  8%|▊         | 3648/43738 [27:15<5:52:36,  1.89it/s]

step:11160, train_loss:0.059428654157831294, acc:0.6726973684210527


  8%|▊         | 3649/43738 [27:16<6:45:25,  1.65it/s]

step:11160, train_loss:0.05942946235912427, acc:0.6727870649493012


  8%|▊         | 3650/43738 [27:17<7:05:36,  1.57it/s]

step:11160, train_loss:0.05941815811490053, acc:0.6728767123287671


  8%|▊         | 3651/43738 [27:17<6:58:48,  1.60it/s]

step:11160, train_loss:0.05940573099096082, acc:0.6729663105998357


  8%|▊         | 3652/43738 [27:18<5:40:08,  1.96it/s]

step:11160, train_loss:0.0593936596279209, acc:0.6730558598028478


  8%|▊         | 3653/43738 [27:18<4:45:28,  2.34it/s]

step:11160, train_loss:0.059419059067023286, acc:0.6728716123733918


  8%|▊         | 3654/43738 [27:18<4:10:49,  2.66it/s]

step:11160, train_loss:0.05940332835666085, acc:0.6729611384783799


  8%|▊         | 3655/43738 [27:18<3:48:43,  2.92it/s]

step:11160, train_loss:0.059395152271410546, acc:0.6730506155950753


  8%|▊         | 3656/43738 [27:19<4:42:39,  2.36it/s]

step:11160, train_loss:0.059402005651306475, acc:0.6728665207877462


  8%|▊         | 3657/43738 [27:20<5:14:40,  2.12it/s]

step:11160, train_loss:0.05938642562499704, acc:0.6729559748427673


  8%|▊         | 3658/43738 [27:20<5:12:22,  2.14it/s]

step:11160, train_loss:0.059378774157517016, acc:0.6730453799890651


  8%|▊         | 3659/43738 [27:21<5:49:19,  1.91it/s]

step:11160, train_loss:0.0594003228978469, acc:0.6728614375512435


  8%|▊         | 3660/43738 [27:21<5:20:12,  2.09it/s]

step:11160, train_loss:0.05938441734061235, acc:0.6729508196721311


  8%|▊         | 3661/43738 [27:21<4:35:04,  2.43it/s]

step:11160, train_loss:0.0593701128739018, acc:0.6730401529636711


  8%|▊         | 3662/43738 [27:22<5:32:12,  2.01it/s]

step:11160, train_loss:0.059416541112647894, acc:0.6728563626433642


  8%|▊         | 3663/43738 [27:22<5:12:55,  2.13it/s]

step:11160, train_loss:0.05940169847572648, acc:0.672945672945673


  9%|▉         | 3968/43738 [29:40<4:32:08,  2.44it/s]

step:11180, train_loss:0.059236778040001575, acc:0.6716229838709677


  9%|▉         | 3969/43738 [29:41<4:24:46,  2.50it/s]

step:11180, train_loss:0.05922204179069345, acc:0.671705719324767


  9%|▉         | 3970/43738 [29:41<5:18:23,  2.08it/s]

step:11180, train_loss:0.05921043050580984, acc:0.6717884130982368


  9%|▉         | 3971/43738 [29:42<5:44:46,  1.92it/s]

step:11180, train_loss:0.05920347730267734, acc:0.6718710652228658


  9%|▉         | 3972/43738 [29:42<5:20:52,  2.07it/s]

step:11180, train_loss:0.05919099982320505, acc:0.6719536757301108


  9%|▉         | 3973/43738 [29:43<4:34:25,  2.41it/s]

step:11180, train_loss:0.059176326114726535, acc:0.6720362446513969


  9%|▉         | 3974/43738 [29:43<4:05:50,  2.70it/s]

step:11180, train_loss:0.05917462434129397, acc:0.6718671363865123


  9%|▉         | 3975/43738 [29:43<4:04:27,  2.71it/s]

step:11180, train_loss:0.0591597697061469, acc:0.6719496855345912


  9%|▉         | 3976/43738 [29:44<3:55:50,  2.81it/s]

step:11180, train_loss:0.059188598543695986, acc:0.6717806841046278


  9%|▉         | 3977/43738 [29:44<4:17:45,  2.57it/s]

step:11180, train_loss:0.05919302736360065, acc:0.6716117676640684


  9%|▉         | 3978/43738 [29:44<3:48:01,  2.91it/s]

step:11180, train_loss:0.05918446185803364, acc:0.6716943187531422


  9%|▉         | 3979/43738 [29:45<3:29:04,  3.17it/s]

step:11180, train_loss:0.059175455789441035, acc:0.6717768283488313


  9%|▉         | 3980/43738 [29:45<3:17:25,  3.36it/s]

step:11180, train_loss:0.0591710231699817, acc:0.671859296482412


  9%|▉         | 3981/43738 [29:46<4:34:02,  2.42it/s]

step:11180, train_loss:0.059210035932867694, acc:0.6716905300175835


  9%|▉         | 3982/43738 [29:46<4:01:38,  2.74it/s]

step:11180, train_loss:0.05919519822978116, acc:0.6717729784028127


  9%|▉         | 3983/43738 [29:46<4:02:17,  2.73it/s]

step:11180, train_loss:0.059181099058183376, acc:0.6718553853878986


 10%|▉         | 4288/43738 [32:02<5:03:46,  2.16it/s]

step:11200, train_loss:0.05927664951991234, acc:0.6716417910447762


 10%|▉         | 4289/43738 [32:02<4:21:51,  2.51it/s]

step:11200, train_loss:0.05927415615988656, acc:0.6714851946840755


 10%|▉         | 4290/43738 [32:02<5:00:09,  2.19it/s]

step:11200, train_loss:0.0592772472206946, acc:0.6715617715617715


 10%|▉         | 4291/43738 [32:03<4:29:34,  2.44it/s]

step:11200, train_loss:0.0592636428157007, acc:0.6716383127476113


 10%|▉         | 4292/43738 [32:03<3:56:35,  2.78it/s]

step:11200, train_loss:0.05925134403973068, acc:0.6717148182665424


 10%|▉         | 4293/43738 [32:04<5:20:16,  2.05it/s]

step:11200, train_loss:0.05929964518917765, acc:0.6715583508036338


 10%|▉         | 4294/43738 [32:04<4:30:49,  2.43it/s]

step:11200, train_loss:0.059293091593854194, acc:0.671634839310666


 10%|▉         | 4295/43738 [32:04<3:57:39,  2.77it/s]

step:11200, train_loss:0.05928203960201932, acc:0.6717112922002328


 10%|▉         | 4296/43738 [32:05<4:19:02,  2.54it/s]

step:11200, train_loss:0.0592686032834749, acc:0.6717877094972067


 10%|▉         | 4297/43738 [32:05<4:33:21,  2.40it/s]

step:11200, train_loss:0.059279186965380544, acc:0.6716313707237608


 10%|▉         | 4298/43738 [32:05<4:17:14,  2.56it/s]

step:11200, train_loss:0.05927358213560755, acc:0.6717077710563053


 10%|▉         | 4299/43738 [32:06<4:38:19,  2.36it/s]

step:11200, train_loss:0.05933247740066559, acc:0.6715515236101419


 10%|▉         | 4300/43738 [32:07<5:05:52,  2.15it/s]

step:11200, train_loss:0.059355792689115505, acc:0.6713953488372093


 10%|▉         | 4301/43738 [32:07<4:40:11,  2.35it/s]

step:11200, train_loss:0.0593483902079176, acc:0.6714717507556383


 10%|▉         | 4302/43738 [32:07<4:50:28,  2.26it/s]

step:11200, train_loss:0.05937142816152699, acc:0.6713156671315668


 10%|▉         | 4303/43738 [32:08<4:34:21,  2.40it/s]

step:11200, train_loss:0.05937808368189679, acc:0.6711596560539159


 11%|█         | 4608/43738 [34:23<5:25:33,  2.00it/s]

step:11220, train_loss:0.05967214819298129, acc:0.6688368055555556


 11%|█         | 4609/43738 [34:23<5:31:59,  1.96it/s]

step:11220, train_loss:0.05965967426225252, acc:0.6689086569754827


 11%|█         | 4610/43738 [34:24<4:56:38,  2.20it/s]

step:11220, train_loss:0.05966512886312368, acc:0.6687635574837311


 11%|█         | 4611/43738 [34:24<5:11:30,  2.09it/s]

step:11220, train_loss:0.05970002857506085, acc:0.6686185209282152


 11%|█         | 4612/43738 [34:25<4:30:15,  2.41it/s]

step:11220, train_loss:0.059692606615554464, acc:0.6686903729401561


 11%|█         | 4613/43738 [34:25<5:24:25,  2.01it/s]

step:11220, train_loss:0.05968030076302344, acc:0.66876219380013


 11%|█         | 4614/43738 [34:26<5:46:31,  1.88it/s]

step:11220, train_loss:0.05967507544449984, acc:0.6688339835283919


 11%|█         | 4615/43738 [34:26<5:08:21,  2.11it/s]

step:11220, train_loss:0.059662178139891456, acc:0.6689057421451787


 11%|█         | 4616/43738 [34:27<6:10:03,  1.76it/s]

step:11220, train_loss:0.059649765579150166, acc:0.6689774696707106


 11%|█         | 4617/43738 [34:27<5:15:58,  2.06it/s]

step:11220, train_loss:0.059638993679135326, acc:0.6690491661251895


 11%|█         | 4618/43738 [34:28<5:17:08,  2.06it/s]

step:11220, train_loss:0.05967779735120594, acc:0.6689042875703768


 11%|█         | 4619/43738 [34:28<4:35:11,  2.37it/s]

step:11220, train_loss:0.05968316175895665, acc:0.6689759688244209


 11%|█         | 4620/43738 [34:29<5:42:36,  1.90it/s]

step:11220, train_loss:0.0596811322137528, acc:0.669047619047619


 11%|█         | 4621/43738 [34:29<5:04:25,  2.14it/s]

step:11220, train_loss:0.05967431701729838, acc:0.6691192382601169


 11%|█         | 4622/43738 [34:30<4:47:19,  2.27it/s]

step:11220, train_loss:0.0596671339269721, acc:0.6691908264820424


 11%|█         | 4623/43738 [34:30<5:43:27,  1.90it/s]

step:11220, train_loss:0.059660535344115466, acc:0.6692623837335063


 11%|█▏        | 4928/43738 [36:49<4:48:51,  2.24it/s]

step:11240, train_loss:0.05940881900441328, acc:0.6698457792207793


 11%|█▏        | 4929/43738 [36:49<4:02:12,  2.67it/s]

step:11240, train_loss:0.059397122317361654, acc:0.6699127612091702


 11%|█▏        | 4930/43738 [36:49<3:38:12,  2.96it/s]

step:11240, train_loss:0.059385456650425314, acc:0.6699797160243408


 11%|█▏        | 4931/43738 [36:50<3:23:11,  3.18it/s]

step:11240, train_loss:0.05937341786439403, acc:0.670046643682823


 11%|█▏        | 4932/43738 [36:50<3:17:46,  3.27it/s]

step:11240, train_loss:0.05936218890093723, acc:0.6701135442011354


 11%|█▏        | 4933/43738 [36:50<3:22:17,  3.20it/s]

step:11240, train_loss:0.05935063751636441, acc:0.6701804175957835


 11%|█▏        | 4934/43738 [36:51<3:32:05,  3.05it/s]

step:11240, train_loss:0.05934606789177896, acc:0.6702472638832591


 11%|█▏        | 4935/43738 [36:51<3:13:36,  3.34it/s]

step:11240, train_loss:0.05934255227093654, acc:0.6703140830800405


 11%|█▏        | 4936/43738 [36:51<3:08:14,  3.44it/s]

step:11240, train_loss:0.05933056387839374, acc:0.6703808752025932


 11%|█▏        | 4937/43738 [36:51<3:09:49,  3.41it/s]

step:11240, train_loss:0.059318818212787686, acc:0.6704476402673688


 11%|█▏        | 4938/43738 [36:52<3:44:37,  2.88it/s]

step:11240, train_loss:0.05931119187059822, acc:0.670514378290806


 11%|█▏        | 4939/43738 [36:52<3:38:05,  2.97it/s]

step:11240, train_loss:0.05929994988818069, acc:0.6705810892893298


 11%|█▏        | 4940/43738 [36:53<3:49:05,  2.82it/s]

step:11240, train_loss:0.05929643815724549, acc:0.6706477732793522


 11%|█▏        | 4941/43738 [36:53<3:24:47,  3.16it/s]

step:11240, train_loss:0.059286850607020755, acc:0.6707144302772718


 11%|█▏        | 4942/43738 [36:53<4:25:34,  2.43it/s]

step:11240, train_loss:0.059277879386761115, acc:0.6707810602994739


 11%|█▏        | 4943/43738 [36:54<4:09:21,  2.59it/s]

step:11240, train_loss:0.05927863536174981, acc:0.6706453570706049


 12%|█▏        | 5248/43738 [39:12<4:45:14,  2.25it/s]

step:11260, train_loss:0.059544427575197116, acc:0.6686356707317073


 12%|█▏        | 5249/43738 [39:13<4:49:09,  2.22it/s]

step:11260, train_loss:0.05959384061238565, acc:0.6685082872928176


 12%|█▏        | 5250/43738 [39:13<4:27:07,  2.40it/s]

step:11260, train_loss:0.059582825591876394, acc:0.6685714285714286


 12%|█▏        | 5251/43738 [39:14<4:15:44,  2.51it/s]

step:11260, train_loss:0.059575386250321015, acc:0.6686345458007998


 12%|█▏        | 5252/43738 [39:14<5:01:12,  2.13it/s]

step:11260, train_loss:0.05956738074476812, acc:0.6686976389946687


 12%|█▏        | 5253/43738 [39:15<5:28:37,  1.95it/s]

step:11260, train_loss:0.05959833639588965, acc:0.6685703407576623


 12%|█▏        | 5254/43738 [39:15<5:39:18,  1.89it/s]

step:11260, train_loss:0.059629830406630585, acc:0.6684430909783022


 12%|█▏        | 5255/43738 [39:16<5:27:57,  1.96it/s]

step:11260, train_loss:0.05961938057764726, acc:0.6685061845861084


 12%|█▏        | 5256/43738 [39:17<6:08:28,  1.74it/s]

step:11260, train_loss:0.05961168485678041, acc:0.6685692541856926


 12%|█▏        | 5257/43738 [39:17<5:22:12,  1.99it/s]

step:11260, train_loss:0.0596093021039843, acc:0.6684420772303595


 12%|█▏        | 5258/43738 [39:18<6:04:36,  1.76it/s]

step:11260, train_loss:0.05960030232252572, acc:0.6685051350323317


 12%|█▏        | 5259/43738 [39:18<6:16:12,  1.70it/s]

step:11260, train_loss:0.059596888512729984, acc:0.6685681688533942


 12%|█▏        | 5260/43738 [39:19<5:54:46,  1.81it/s]

step:11260, train_loss:0.059585574456267484, acc:0.6686311787072243


 12%|█▏        | 5261/43738 [39:19<5:22:46,  1.99it/s]

step:11260, train_loss:0.05959705919922361, acc:0.668504086675537


 12%|█▏        | 5262/43738 [39:20<5:47:33,  1.85it/s]

step:11260, train_loss:0.05958637674043542, acc:0.6685670847586469


 12%|█▏        | 5263/43738 [39:20<5:53:53,  1.81it/s]

step:11260, train_loss:0.059581647610598684, acc:0.6686300589017671


 13%|█▎        | 5568/43738 [41:39<3:57:32,  2.68it/s]

step:11280, train_loss:0.05969666809411639, acc:0.6681034482758621


 13%|█▎        | 5569/43738 [41:39<4:13:19,  2.51it/s]

step:11280, train_loss:0.05968599365149506, acc:0.6681630454300592


 13%|█▎        | 5570/43738 [41:39<3:41:20,  2.87it/s]

step:11280, train_loss:0.059676758039582195, acc:0.6682226211849193


 13%|█▎        | 5571/43738 [41:40<3:53:57,  2.72it/s]

step:11280, train_loss:0.059666140892849995, acc:0.6682821755519656


 13%|█▎        | 5572/43738 [41:40<3:31:38,  3.01it/s]

step:11280, train_loss:0.059662014920744515, acc:0.6683417085427136


 13%|█▎        | 5573/43738 [41:41<4:51:19,  2.18it/s]

step:11280, train_loss:0.059656723995024405, acc:0.6684012201686704


 13%|█▎        | 5574/43738 [41:41<4:11:56,  2.52it/s]

step:11280, train_loss:0.059647422753893314, acc:0.6684607104413348


 13%|█▎        | 5575/43738 [41:41<3:41:37,  2.87it/s]

step:11280, train_loss:0.059668693583408546, acc:0.6683408071748879


 13%|█▎        | 5576/43738 [41:42<3:35:46,  2.95it/s]

step:11280, train_loss:0.05966390330973115, acc:0.668400286944046


 13%|█▎        | 5577/43738 [41:42<4:08:52,  2.56it/s]

step:11280, train_loss:0.05965466640461473, acc:0.6684597453828223


 13%|█▎        | 5578/43738 [41:43<4:38:48,  2.28it/s]

step:11280, train_loss:0.059655944302353534, acc:0.6683399067766225


 13%|█▎        | 5579/43738 [41:43<5:09:35,  2.05it/s]

step:11280, train_loss:0.059647227204110136, acc:0.6683993547230687


 13%|█▎        | 5580/43738 [41:44<5:45:38,  1.84it/s]

step:11280, train_loss:0.05963768886492377, acc:0.6684587813620072


 13%|█▎        | 5581/43738 [41:44<5:13:40,  2.03it/s]

step:11280, train_loss:0.05962723605798668, acc:0.6685181867048916


 13%|█▎        | 5582/43738 [41:45<4:52:06,  2.18it/s]

step:11280, train_loss:0.05962240228073058, acc:0.6685775707631674


 13%|█▎        | 5583/43738 [41:45<4:44:01,  2.24it/s]

step:11280, train_loss:0.05961272571157159, acc:0.6686369335482716


 13%|█▎        | 5888/43738 [43:58<5:19:09,  1.98it/s]

step:11300, train_loss:0.05925821820330938, acc:0.6696671195652174


 13%|█▎        | 5889/43738 [43:59<5:08:06,  2.05it/s]

step:11300, train_loss:0.059252814115381726, acc:0.6697232127695704


 13%|█▎        | 5890/43738 [43:59<4:21:31,  2.41it/s]

step:11300, train_loss:0.059247331483469855, acc:0.669779286926995


 13%|█▎        | 5891/43738 [44:00<4:52:50,  2.15it/s]

step:11300, train_loss:0.05925087777490153, acc:0.6698353420471906


 13%|█▎        | 5892/43738 [44:00<4:17:54,  2.45it/s]

step:11300, train_loss:0.05924489410867243, acc:0.6698913781398507


 13%|█▎        | 5893/43738 [44:00<4:58:48,  2.11it/s]

step:11300, train_loss:0.059235998943458185, acc:0.6699473952146615


 13%|█▎        | 5894/43738 [44:01<5:11:40,  2.02it/s]

step:11300, train_loss:0.05922657684721672, acc:0.670003393281303


 13%|█▎        | 5895/43738 [44:01<4:54:32,  2.14it/s]

step:11300, train_loss:0.05921938488128784, acc:0.6700593723494487


 13%|█▎        | 5896/43738 [44:02<5:41:40,  1.85it/s]

step:11300, train_loss:0.05923090388668572, acc:0.6699457259158752


 13%|█▎        | 5897/43738 [44:03<6:03:21,  1.74it/s]

step:11300, train_loss:0.0592400225491211, acc:0.669832118026115


 13%|█▎        | 5898/43738 [44:03<6:04:44,  1.73it/s]

step:11300, train_loss:0.05926399269744769, acc:0.6697185486605629


 13%|█▎        | 5899/43738 [44:04<4:54:31,  2.14it/s]

step:11300, train_loss:0.05929118918280548, acc:0.669605017799627


 13%|█▎        | 5900/43738 [44:04<4:19:38,  2.43it/s]

step:11300, train_loss:0.059281152583090434, acc:0.6696610169491526


 13%|█▎        | 5901/43738 [44:04<4:08:23,  2.54it/s]

step:11300, train_loss:0.059303839188627805, acc:0.6695475343162176


 13%|█▎        | 5902/43738 [44:05<4:20:25,  2.42it/s]

step:11300, train_loss:0.059294524283209256, acc:0.6696035242290749


 13%|█▎        | 5903/43738 [44:05<4:20:03,  2.42it/s]

step:11300, train_loss:0.05928450253371332, acc:0.6696594951719465


 14%|█▍        | 6208/43738 [46:20<4:15:35,  2.45it/s]

step:11320, train_loss:0.059238857133151995, acc:0.6699420103092784


 14%|█▍        | 6209/43738 [46:20<3:59:16,  2.61it/s]

step:11320, train_loss:0.05922933610134879, acc:0.6699951683040747


 14%|█▍        | 6210/43738 [46:20<4:18:48,  2.42it/s]

step:11320, train_loss:0.05922106495517008, acc:0.6700483091787439


 14%|█▍        | 6211/43738 [46:21<4:57:43,  2.10it/s]

step:11320, train_loss:0.05921504039867976, acc:0.6701014329415553


 14%|█▍        | 6212/43738 [46:22<5:40:19,  1.84it/s]

step:11320, train_loss:0.059230207127512355, acc:0.6699935608499678


 14%|█▍        | 6213/43738 [46:22<4:42:49,  2.21it/s]

step:11320, train_loss:0.05923015982138661, acc:0.6698857234830194


 14%|█▍        | 6214/43738 [46:22<4:51:21,  2.15it/s]

step:11320, train_loss:0.05922370901964377, acc:0.6699388477631155


 14%|█▍        | 6215/43738 [46:23<5:04:21,  2.05it/s]

step:11320, train_loss:0.05926296610163401, acc:0.6698310539018504


 14%|█▍        | 6216/43738 [46:23<4:54:22,  2.12it/s]

step:11320, train_loss:0.05925462328924161, acc:0.6698841698841699


 14%|█▍        | 6217/43738 [46:24<4:04:00,  2.56it/s]

step:11320, train_loss:0.05924581781162127, acc:0.6699372687791539


 14%|█▍        | 6218/43738 [46:24<4:25:20,  2.36it/s]

step:11320, train_loss:0.059236434140791126, acc:0.6699903505950466


 14%|█▍        | 6219/43738 [46:24<3:57:01,  2.64it/s]

step:11320, train_loss:0.059226910715458776, acc:0.6700434153400868


 14%|█▍        | 6220/43738 [46:25<4:02:04,  2.58it/s]

step:11320, train_loss:0.059241904221457156, acc:0.669935691318328


 14%|█▍        | 6221/43738 [46:25<4:08:34,  2.52it/s]

step:11320, train_loss:0.059236244805722475, acc:0.6699887477897444


 14%|█▍        | 6222/43738 [46:26<4:43:39,  2.20it/s]

step:11320, train_loss:0.05926872338553006, acc:0.6698810671809707


 14%|█▍        | 6223/43738 [46:26<4:06:33,  2.54it/s]

step:11320, train_loss:0.059263905842996394, acc:0.6699341153784348


 15%|█▍        | 6528/43738 [48:42<4:05:22,  2.53it/s]

step:11340, train_loss:0.0589548021655669, acc:0.6712622549019608


 15%|█▍        | 6529/43738 [48:42<3:51:29,  2.68it/s]

step:11340, train_loss:0.058947241290813794, acc:0.6713126052994333


 15%|█▍        | 6530/43738 [48:42<3:32:35,  2.92it/s]

step:11340, train_loss:0.058938252229448894, acc:0.6713629402756508


 15%|█▍        | 6531/43738 [48:43<4:33:31,  2.27it/s]

step:11340, train_loss:0.05892924143463747, acc:0.6714132598376972


 15%|█▍        | 6532/43738 [48:43<4:29:47,  2.30it/s]

step:11340, train_loss:0.05892045381370606, acc:0.6714635639926516


 15%|█▍        | 6533/43738 [48:44<4:42:36,  2.19it/s]

step:11340, train_loss:0.05892804387578227, acc:0.6713607837134548


 15%|█▍        | 6534/43738 [48:44<4:36:38,  2.24it/s]

step:11340, train_loss:0.05893372009369483, acc:0.6712580348943985


 15%|█▍        | 6535/43738 [48:45<3:53:15,  2.66it/s]

step:11340, train_loss:0.058937689952278695, acc:0.6711553175210405


 15%|█▍        | 6536/43738 [48:45<3:45:20,  2.75it/s]

step:11340, train_loss:0.05892867787904704, acc:0.6712056303549572


 15%|█▍        | 6537/43738 [48:45<3:28:42,  2.97it/s]

step:11340, train_loss:0.05895369198527749, acc:0.6711029524246597


 15%|█▍        | 6538/43738 [48:46<3:38:05,  2.84it/s]

step:11340, train_loss:0.0589451916880778, acc:0.6711532578770266


 15%|█▍        | 6539/43738 [48:46<4:36:22,  2.24it/s]

step:11340, train_loss:0.0589400464079227, acc:0.6712035479431105


 15%|█▍        | 6540/43738 [48:47<4:32:48,  2.27it/s]

step:11340, train_loss:0.05893920743709789, acc:0.6712538226299695


 15%|█▍        | 6541/43738 [48:47<4:48:17,  2.15it/s]

step:11340, train_loss:0.058930854938133805, acc:0.6713040819446567


 15%|█▍        | 6542/43738 [48:48<5:32:28,  1.86it/s]

step:11340, train_loss:0.05892238786070514, acc:0.671354325894222


 15%|█▍        | 6543/43738 [48:48<4:43:51,  2.18it/s]

step:11340, train_loss:0.058922438240058786, acc:0.671251719394773


 16%|█▌        | 6848/43738 [51:06<4:08:32,  2.47it/s]

step:11360, train_loss:0.059037489121590486, acc:0.6707067757009346


 16%|█▌        | 6849/43738 [51:07<5:08:43,  1.99it/s]

step:11360, train_loss:0.059029084055525816, acc:0.6707548547233173


 16%|█▌        | 6850/43738 [51:08<4:48:48,  2.13it/s]

step:11360, train_loss:0.05903183253386812, acc:0.6708029197080292


 16%|█▌        | 6851/43738 [51:08<4:38:14,  2.21it/s]

step:11360, train_loss:0.05902389558082453, acc:0.6708509706612173


 16%|█▌        | 6852/43738 [51:08<4:27:54,  2.29it/s]

step:11360, train_loss:0.059018184342005675, acc:0.6708990075890251


 16%|█▌        | 6853/43738 [51:09<4:08:23,  2.47it/s]

step:11360, train_loss:0.05901117542481076, acc:0.6709470304975923


 16%|█▌        | 6854/43738 [51:09<4:30:17,  2.27it/s]

step:11360, train_loss:0.05900284436484894, acc:0.6709950393930552


 16%|█▌        | 6855/43738 [51:10<4:19:16,  2.37it/s]

step:11360, train_loss:0.05900138551647823, acc:0.6708971553610503


 16%|█▌        | 6856/43738 [51:10<4:48:13,  2.13it/s]

step:11360, train_loss:0.05903998584879938, acc:0.6707992998833139


 16%|█▌        | 6857/43738 [51:11<4:21:50,  2.35it/s]

step:11360, train_loss:0.05903143508706676, acc:0.6708473093189441


 16%|█▌        | 6858/43738 [51:11<4:09:23,  2.46it/s]

step:11360, train_loss:0.05902444684107186, acc:0.6708953047535725


 16%|█▌        | 6859/43738 [51:11<4:19:23,  2.37it/s]

step:11360, train_loss:0.05902154823049049, acc:0.6709432861933227


 16%|█▌        | 6860/43738 [51:12<4:21:05,  2.35it/s]

step:11360, train_loss:0.05901308772140576, acc:0.6709912536443149


 16%|█▌        | 6861/43738 [51:12<4:27:32,  2.30it/s]

step:11360, train_loss:0.059008635937539904, acc:0.6710392071126658


 16%|█▌        | 6862/43738 [51:13<4:41:51,  2.18it/s]

step:11360, train_loss:0.059056350609808246, acc:0.6709414164966482


 16%|█▌        | 6863/43738 [51:13<4:31:04,  2.27it/s]

step:11360, train_loss:0.05904788680756759, acc:0.670989363252222


 16%|█▋        | 7168/43738 [53:32<3:53:26,  2.61it/s]

step:11380, train_loss:0.0591933269034988, acc:0.6707589285714286


 16%|█▋        | 7169/43738 [53:32<4:00:17,  2.54it/s]

step:11380, train_loss:0.05918507530406057, acc:0.6708048542335053


 16%|█▋        | 7170/43738 [53:32<4:00:23,  2.54it/s]

step:11380, train_loss:0.05917723896176745, acc:0.6708507670850767


 16%|█▋        | 7171/43738 [53:33<4:02:13,  2.52it/s]

step:11380, train_loss:0.05917191535147176, acc:0.6708966671315019


 16%|█▋        | 7172/43738 [53:33<3:38:50,  2.78it/s]

step:11380, train_loss:0.05917482338824953, acc:0.670803123257111


 16%|█▋        | 7173/43738 [53:33<3:46:55,  2.69it/s]

step:11380, train_loss:0.05916697130481784, acc:0.670849017147637


 16%|█▋        | 7174/43738 [53:34<4:29:43,  2.26it/s]

step:11380, train_loss:0.05917251133444351, acc:0.6707555059938668


 16%|█▋        | 7175/43738 [53:35<4:45:18,  2.14it/s]

step:11380, train_loss:0.05916460222587353, acc:0.670801393728223


 16%|█▋        | 7176/43738 [53:35<4:36:56,  2.20it/s]

step:11380, train_loss:0.05915727240314817, acc:0.6708472686733556


 16%|█▋        | 7177/43738 [53:35<4:02:50,  2.51it/s]

step:11380, train_loss:0.05915022394348025, acc:0.6708931308346106


 16%|█▋        | 7178/43738 [53:36<4:40:18,  2.17it/s]

step:11380, train_loss:0.059169992681008324, acc:0.6707996656450265


 16%|█▋        | 7179/43738 [53:36<4:17:25,  2.37it/s]

step:11380, train_loss:0.05916175109764081, acc:0.6708455216603983


 16%|█▋        | 7180/43738 [53:37<5:24:46,  1.88it/s]

step:11380, train_loss:0.05916083969673905, acc:0.6707520891364902


 16%|█▋        | 7181/43738 [53:37<5:09:00,  1.97it/s]

step:11380, train_loss:0.059171718570029334, acc:0.6706586826347305


 16%|█▋        | 7182/43738 [53:38<6:01:29,  1.69it/s]

step:11380, train_loss:0.05920438690562827, acc:0.6705653021442495


 16%|█▋        | 7183/43738 [53:39<5:04:15,  2.00it/s]

step:11380, train_loss:0.059205337413942585, acc:0.6704719476541835


 17%|█▋        | 7488/43738 [56:00<6:01:15,  1.67it/s]

step:11400, train_loss:0.059055285019482665, acc:0.6714743589743589


 17%|█▋        | 7489/43738 [56:01<5:55:20,  1.70it/s]

step:11400, train_loss:0.05906344145411566, acc:0.671384697556416


 17%|█▋        | 7490/43738 [56:01<5:08:50,  1.96it/s]

step:11400, train_loss:0.05905556431343563, acc:0.6714285714285714


 17%|█▋        | 7491/43738 [56:02<6:53:10,  1.46it/s]

step:11400, train_loss:0.05907018730602636, acc:0.671338940061407


 17%|█▋        | 7492/43738 [56:03<6:27:06,  1.56it/s]

step:11400, train_loss:0.05906873599186568, acc:0.6712493326214629


 17%|█▋        | 7493/43738 [56:03<5:59:10,  1.68it/s]

step:11400, train_loss:0.059061010775350094, acc:0.6712932069931936


 17%|█▋        | 7494/43738 [56:04<5:45:12,  1.75it/s]

step:11400, train_loss:0.059053200661309896, acc:0.6713370696557246


 17%|█▋        | 7495/43738 [56:04<5:31:34,  1.82it/s]

step:11400, train_loss:0.0590453578010102, acc:0.6713809206137425


 17%|█▋        | 7496/43738 [56:05<5:56:49,  1.69it/s]

step:11400, train_loss:0.059058841231046495, acc:0.6712913553895411


 17%|█▋        | 7497/43738 [56:06<6:14:15,  1.61it/s]

step:11400, train_loss:0.059053742271639696, acc:0.6713352007469654


 17%|█▋        | 7498/43738 [56:07<6:19:56,  1.59it/s]

step:11400, train_loss:0.0590519986458711, acc:0.6713790344091758


 17%|█▋        | 7499/43738 [56:07<6:34:09,  1.53it/s]

step:11400, train_loss:0.059047023703646326, acc:0.6714228563808508


 17%|█▋        | 7500/43738 [56:08<6:02:33,  1.67it/s]

step:11400, train_loss:0.05905088543357948, acc:0.6713333333333333


 17%|█▋        | 7501/43738 [56:08<5:36:35,  1.79it/s]

step:11400, train_loss:0.05905409339604803, acc:0.671243834155446


 17%|█▋        | 7502/43738 [56:09<5:03:46,  1.99it/s]

step:11400, train_loss:0.0590522018640139, acc:0.6712876566249


 17%|█▋        | 7503/43738 [56:09<4:18:27,  2.34it/s]

step:11400, train_loss:0.05904669878915236, acc:0.6713314674130347


 18%|█▊        | 7808/43738 [58:23<4:07:09,  2.42it/s]

step:11420, train_loss:0.05909845647979413, acc:0.6716188524590164


 18%|█▊        | 7809/43738 [58:24<4:50:44,  2.06it/s]

step:11420, train_loss:0.05909097572615188, acc:0.6716609040850301


 18%|█▊        | 7810/43738 [58:24<4:08:19,  2.41it/s]

step:11420, train_loss:0.05908341016191107, acc:0.6717029449423816


 18%|█▊        | 7811/43738 [58:25<5:15:42,  1.90it/s]

step:11420, train_loss:0.05907966046328315, acc:0.6717449750352068


 18%|█▊        | 7812/43738 [58:25<4:51:26,  2.05it/s]

step:11420, train_loss:0.059072635519707886, acc:0.6717869943676396


 18%|█▊        | 7813/43738 [58:25<4:39:23,  2.14it/s]

step:11420, train_loss:0.05907112662796669, acc:0.6718290029438116


 18%|█▊        | 7814/43738 [58:26<5:27:38,  1.83it/s]

step:11420, train_loss:0.059073210724884286, acc:0.6717430253391349


 18%|█▊        | 7815/43738 [58:26<4:38:30,  2.15it/s]

step:11420, train_loss:0.05907449486164313, acc:0.6716570697376839


 18%|█▊        | 7816/43738 [58:27<5:39:12,  1.76it/s]

step:11420, train_loss:0.059082379489574684, acc:0.6715711361310133


 18%|█▊        | 7817/43738 [58:28<5:06:14,  1.95it/s]

step:11420, train_loss:0.05908738587490544, acc:0.6716131508251247


 18%|█▊        | 7818/43738 [58:28<5:13:42,  1.91it/s]

step:11420, train_loss:0.05908134903417589, acc:0.6716551547710412


 18%|█▊        | 7819/43738 [58:28<4:25:01,  2.26it/s]

step:11420, train_loss:0.059074729021382784, acc:0.6716971479728866


 18%|█▊        | 7820/43738 [58:29<4:10:12,  2.39it/s]

step:11420, train_loss:0.05906717523794307, acc:0.6717391304347826


 18%|█▊        | 7821/43738 [58:29<4:39:39,  2.14it/s]

step:11420, train_loss:0.05906056132290945, acc:0.671781102160849


 18%|█▊        | 7822/43738 [58:30<4:34:43,  2.18it/s]

step:11420, train_loss:0.059054784291730805, acc:0.6718230631552032


 18%|█▊        | 7823/43738 [58:30<3:58:27,  2.51it/s]

step:11420, train_loss:0.059048452986842165, acc:0.6718650134219609


 19%|█▊        | 8128/43738 [1:00:49<6:06:58,  1.62it/s]

step:11440, train_loss:0.05902195407394375, acc:0.6729822834645669


 19%|█▊        | 8129/43738 [1:00:49<5:47:54,  1.71it/s]

step:11440, train_loss:0.059015831604406564, acc:0.6730225119940952


 19%|█▊        | 8130/43738 [1:00:50<5:43:13,  1.73it/s]

step:11440, train_loss:0.05900882481188407, acc:0.6730627306273063


 19%|█▊        | 8131/43738 [1:00:51<6:13:36,  1.59it/s]

step:11440, train_loss:0.05904203943751066, acc:0.672979953265281


 19%|█▊        | 8132/43738 [1:00:51<5:10:43,  1.91it/s]

step:11440, train_loss:0.05904430029806129, acc:0.6728971962616822


 19%|█▊        | 8133/43738 [1:00:51<5:13:09,  1.89it/s]

step:11440, train_loss:0.05903771048182475, acc:0.672937415467847


 19%|█▊        | 8134/43738 [1:00:52<4:52:01,  2.03it/s]

step:11440, train_loss:0.05903178674139344, acc:0.6729776247848537


 19%|█▊        | 8135/43738 [1:00:52<4:29:41,  2.20it/s]

step:11440, train_loss:0.059024531566311815, acc:0.6730178242163491


 19%|█▊        | 8136/43738 [1:00:52<3:53:43,  2.54it/s]

step:11440, train_loss:0.059017277903641474, acc:0.6730580137659784


 19%|█▊        | 8137/43738 [1:00:53<3:48:39,  2.60it/s]

step:11440, train_loss:0.05903715141498938, acc:0.6729752980213838


 19%|█▊        | 8138/43738 [1:00:53<4:02:58,  2.44it/s]

step:11440, train_loss:0.05904964714980086, acc:0.6728926026050627


 19%|█▊        | 8139/43738 [1:00:53<3:31:18,  2.81it/s]

step:11440, train_loss:0.05904240474241736, acc:0.6729327927263792


 19%|█▊        | 8140/43738 [1:00:54<3:41:34,  2.68it/s]

step:11440, train_loss:0.059037120862582504, acc:0.672972972972973


 19%|█▊        | 8141/43738 [1:00:54<3:22:35,  2.93it/s]

step:11440, train_loss:0.05902990716649202, acc:0.673013143348483


 19%|█▊        | 8142/43738 [1:00:55<3:44:53,  2.64it/s]

step:11440, train_loss:0.059028103287752466, acc:0.6730533038565463


 19%|█▊        | 8143/43738 [1:00:55<3:34:49,  2.76it/s]

step:11440, train_loss:0.059035925956445925, acc:0.6729706496377257


 19%|█▉        | 8448/43738 [1:03:08<4:46:19,  2.05it/s]

step:11460, train_loss:0.05889676307660094, acc:0.6732954545454546


 19%|█▉        | 8449/43738 [1:03:08<4:32:44,  2.16it/s]

step:11460, train_loss:0.05888979230516989, acc:0.6733341223813469


 19%|█▉        | 8450/43738 [1:03:08<3:55:34,  2.50it/s]

step:11460, train_loss:0.05888282881123868, acc:0.6733727810650888


 19%|█▉        | 8451/43738 [1:03:09<5:05:34,  1.92it/s]

step:11460, train_loss:0.058876987313162946, acc:0.673411430599929


 19%|█▉        | 8452/43738 [1:03:09<4:32:07,  2.16it/s]

step:11460, train_loss:0.05887024976029707, acc:0.673450070989115


 19%|█▉        | 8453/43738 [1:03:10<4:58:29,  1.97it/s]

step:11460, train_loss:0.05890463930003254, acc:0.6733704010410505


 19%|█▉        | 8454/43738 [1:03:10<4:56:16,  1.98it/s]

step:11460, train_loss:0.05890397288829268, acc:0.6734090371421813


 19%|█▉        | 8455/43738 [1:03:11<5:51:24,  1.67it/s]

step:11460, train_loss:0.058897274585098185, acc:0.6734476641040804


 19%|█▉        | 8456/43738 [1:03:12<5:13:07,  1.88it/s]

step:11460, train_loss:0.05890358152679038, acc:0.673368022705771


 19%|█▉        | 8457/43738 [1:03:12<4:51:02,  2.02it/s]

step:11460, train_loss:0.0588986096856108, acc:0.6734066453825234


 19%|█▉        | 8458/43738 [1:03:13<5:25:34,  1.81it/s]

step:11460, train_loss:0.05891014956416627, acc:0.673327027666115


 19%|█▉        | 8459/43738 [1:03:13<5:08:06,  1.91it/s]

step:11460, train_loss:0.05891152881343503, acc:0.6732474287740867


 19%|█▉        | 8460/43738 [1:03:13<4:16:47,  2.29it/s]

step:11460, train_loss:0.05891733730848148, acc:0.6731678486997635


 19%|█▉        | 8461/43738 [1:03:14<5:23:48,  1.82it/s]

step:11460, train_loss:0.05893973467206489, acc:0.6730882874364732


 19%|█▉        | 8462/43738 [1:03:15<5:31:43,  1.77it/s]

step:11460, train_loss:0.05895511105274057, acc:0.6730087449775467


 19%|█▉        | 8463/43738 [1:03:15<4:36:25,  2.13it/s]

step:11460, train_loss:0.058948217682728815, acc:0.6730473827248021


 20%|██        | 8768/43738 [1:05:31<3:54:07,  2.49it/s]

step:11480, train_loss:0.05909490016761514, acc:0.6719890510948905


 20%|██        | 8769/43738 [1:05:31<4:04:19,  2.39it/s]

step:11480, train_loss:0.05908994147168936, acc:0.6720264568365835


 20%|██        | 8770/43738 [1:05:32<5:07:16,  1.90it/s]

step:11480, train_loss:0.059089360689423644, acc:0.6719498289623718


 20%|██        | 8771/43738 [1:05:32<4:44:26,  2.05it/s]

step:11480, train_loss:0.05908675095453568, acc:0.6719872306464485


 20%|██        | 8772/43738 [1:05:32<3:58:26,  2.44it/s]

step:11480, train_loss:0.05908001523772386, acc:0.6720246238030095


 20%|██        | 8773/43738 [1:05:33<3:31:01,  2.76it/s]

step:11480, train_loss:0.059080537520268345, acc:0.6720620084349709


 20%|██        | 8774/43738 [1:05:33<3:39:21,  2.66it/s]

step:11480, train_loss:0.059085127491022474, acc:0.6719854114428995


 20%|██        | 8775/43738 [1:05:33<3:17:38,  2.95it/s]

step:11480, train_loss:0.059078472896009426, acc:0.672022792022792


 20%|██        | 8776/43738 [1:05:34<3:38:31,  2.67it/s]

step:11480, train_loss:0.059076065633109784, acc:0.672060164083865


 20%|██        | 8777/43738 [1:05:34<3:51:02,  2.52it/s]

step:11480, train_loss:0.059069897500307667, acc:0.6720975276290304


 20%|██        | 8778/43738 [1:05:35<4:04:20,  2.38it/s]

step:11480, train_loss:0.05906329067764724, acc:0.6721348826611985


 20%|██        | 8779/43738 [1:05:35<4:07:38,  2.35it/s]

step:11480, train_loss:0.059060792927420724, acc:0.6721722291832782


 20%|██        | 8780/43738 [1:05:36<4:09:17,  2.34it/s]

step:11480, train_loss:0.05909456184604852, acc:0.6720956719817768


 20%|██        | 8781/43738 [1:05:36<4:16:45,  2.27it/s]

step:11480, train_loss:0.05908824570028158, acc:0.6721330144630452


 20%|██        | 8782/43738 [1:05:36<4:04:05,  2.39it/s]

step:11480, train_loss:0.0590984759276656, acc:0.6720564791619221


 20%|██        | 8783/43738 [1:05:37<4:06:16,  2.37it/s]

step:11480, train_loss:0.059093501519352, acc:0.672093817602186


 21%|██        | 9088/43738 [1:07:50<4:13:44,  2.28it/s]

step:11500, train_loss:0.05904217496666039, acc:0.6714348591549296


 21%|██        | 9089/43738 [1:07:50<3:41:00,  2.61it/s]

step:11500, train_loss:0.05903738299440302, acc:0.6714710089118715


 21%|██        | 9090/43738 [1:07:51<3:47:43,  2.54it/s]

step:11500, train_loss:0.059031970157096936, acc:0.6715071507150715


 21%|██        | 9091/43738 [1:07:51<3:55:11,  2.46it/s]

step:11500, train_loss:0.0590354854371451, acc:0.6714332856671433


 21%|██        | 9092/43738 [1:07:52<4:53:23,  1.97it/s]

step:11500, train_loss:0.05904360605149798, acc:0.6713594368675759


 21%|██        | 9093/43738 [1:07:52<4:29:56,  2.14it/s]

step:11500, train_loss:0.05903711999484817, acc:0.6713955790168261


 21%|██        | 9094/43738 [1:07:52<3:55:33,  2.45it/s]

step:11500, train_loss:0.05903073968424419, acc:0.671431713217506


 21%|██        | 9095/43738 [1:07:53<3:28:15,  2.77it/s]

step:11500, train_loss:0.05902461531865674, acc:0.6714678394722375


 21%|██        | 9096/43738 [1:07:53<3:23:00,  2.84it/s]

step:11500, train_loss:0.05901988544357253, acc:0.6715039577836411


 21%|██        | 9097/43738 [1:07:54<4:17:48,  2.24it/s]

step:11500, train_loss:0.05901711175471966, acc:0.6715400681543366


 21%|██        | 9098/43738 [1:07:54<4:09:01,  2.32it/s]

step:11500, train_loss:0.059026640589808284, acc:0.6714662563200704


 21%|██        | 9099/43738 [1:07:54<3:33:38,  2.70it/s]

step:11500, train_loss:0.059021300794484574, acc:0.6715023628970217


 21%|██        | 9100/43738 [1:07:54<3:12:17,  3.00it/s]

step:11500, train_loss:0.05901581223062649, acc:0.6715384615384615


 21%|██        | 9101/43738 [1:07:55<2:57:27,  3.25it/s]

step:11500, train_loss:0.059009332931789824, acc:0.6715745522470058


 21%|██        | 9102/43738 [1:07:55<3:10:02,  3.04it/s]

step:11500, train_loss:0.05900285104241816, acc:0.6716106350252692


 21%|██        | 9103/43738 [1:07:55<3:18:18,  2.91it/s]

step:11500, train_loss:0.058996369356002935, acc:0.6716467098758651


 22%|██▏       | 9408/43738 [1:10:17<5:01:20,  1.90it/s]

step:11520, train_loss:0.0590364984415958, acc:0.6715561224489796


 22%|██▏       | 9409/43738 [1:10:17<4:12:47,  2.26it/s]

step:11520, train_loss:0.05903141714983353, acc:0.6715910298650228


 22%|██▏       | 9410/43738 [1:10:17<4:13:18,  2.26it/s]

step:11520, train_loss:0.05904290461438925, acc:0.6715196599362381


 22%|██▏       | 9411/43738 [1:10:18<4:29:53,  2.12it/s]

step:11520, train_loss:0.05904784789805938, acc:0.6714483051747955


 22%|██▏       | 9412/43738 [1:10:19<5:15:24,  1.81it/s]

step:11520, train_loss:0.05907189692518277, acc:0.6713769655758606


 22%|██▏       | 9413/43738 [1:10:19<4:57:25,  1.92it/s]

step:11520, train_loss:0.059065623813515364, acc:0.6714118771911186


 22%|██▏       | 9414/43738 [1:10:20<4:34:05,  2.09it/s]

step:11520, train_loss:0.059059976541061966, acc:0.67144678138942


 22%|██▏       | 9415/43738 [1:10:20<3:50:11,  2.49it/s]

step:11520, train_loss:0.05905380622908894, acc:0.671481678173128


 22%|██▏       | 9416/43738 [1:10:20<3:25:13,  2.79it/s]

step:11520, train_loss:0.05904753465721387, acc:0.671516567544605


 22%|██▏       | 9417/43738 [1:10:21<3:54:00,  2.44it/s]

step:11520, train_loss:0.05905054374872656, acc:0.6715514495062122


 22%|██▏       | 9418/43738 [1:10:21<3:47:47,  2.51it/s]

step:11520, train_loss:0.05904429490467827, acc:0.67158632406031


 22%|██▏       | 9419/43738 [1:10:21<3:51:16,  2.47it/s]

step:11520, train_loss:0.05904239433417262, acc:0.6716211912092579


 22%|██▏       | 9420/43738 [1:10:22<3:33:02,  2.68it/s]

step:11520, train_loss:0.05903636899188748, acc:0.671656050955414


 22%|██▏       | 9421/43738 [1:10:22<3:26:17,  2.77it/s]

step:11520, train_loss:0.059031440409440826, acc:0.6716909033011358


 22%|██▏       | 9422/43738 [1:10:23<4:33:18,  2.09it/s]

step:11520, train_loss:0.059041797505801664, acc:0.6716196136701337


 22%|██▏       | 9423/43738 [1:10:23<3:55:40,  2.43it/s]

step:11520, train_loss:0.05903715324806496, acc:0.671654462485408


 22%|██▏       | 9728/43738 [1:12:41<6:05:54,  1.55it/s]

step:11540, train_loss:0.0590483250866184, acc:0.6716694078947368


 22%|██▏       | 9729/43738 [1:12:42<5:26:48,  1.73it/s]

step:11540, train_loss:0.05906947364040883, acc:0.6716003700277521


 22%|██▏       | 9730/43738 [1:12:42<4:56:16,  1.91it/s]

step:11540, train_loss:0.05908168174326659, acc:0.6715313463514903


 22%|██▏       | 9731/43738 [1:12:43<5:09:55,  1.83it/s]

step:11540, train_loss:0.05908193279736733, acc:0.6715651012228959


 22%|██▏       | 9732/43738 [1:12:43<4:16:33,  2.21it/s]

step:11540, train_loss:0.0590940686885982, acc:0.6714960953555281


 22%|██▏       | 9733/43738 [1:12:43<4:15:56,  2.21it/s]

step:11540, train_loss:0.05909739346970424, acc:0.6715298469125655


 22%|██▏       | 9734/43738 [1:12:44<3:45:52,  2.51it/s]

step:11540, train_loss:0.05909554947176294, acc:0.6715635915348264


 22%|██▏       | 9735/43738 [1:12:44<3:19:19,  2.84it/s]

step:11540, train_loss:0.05908956352577549, acc:0.6715973292244478


 22%|██▏       | 9736/43738 [1:12:45<4:32:26,  2.08it/s]

step:11540, train_loss:0.059086136904607976, acc:0.6716310599835661


 22%|██▏       | 9737/43738 [1:12:45<4:50:24,  1.95it/s]

step:11540, train_loss:0.05909456535837379, acc:0.671562082777036


 22%|██▏       | 9738/43738 [1:12:45<4:06:39,  2.30it/s]

step:11540, train_loss:0.05908850061920328, acc:0.6715958102279729


 22%|██▏       | 9739/43738 [1:12:46<5:08:18,  1.84it/s]

step:11540, train_loss:0.05908463472315738, acc:0.671629530752644


 22%|██▏       | 9740/43738 [1:12:47<5:18:30,  1.78it/s]

step:11540, train_loss:0.05909382247207524, acc:0.6715605749486653


 22%|██▏       | 9741/43738 [1:12:48<5:56:18,  1.59it/s]

step:11540, train_loss:0.05908775689131651, acc:0.6715942921671286


 22%|██▏       | 9742/43738 [1:12:48<5:16:36,  1.79it/s]

step:11540, train_loss:0.05908543337744754, acc:0.6716280024635598


 22%|██▏       | 9743/43738 [1:12:48<4:44:34,  1.99it/s]

step:11540, train_loss:0.0590828564212654, acc:0.6716617058400903


 23%|██▎       | 10048/43738 [1:15:06<5:08:14,  1.82it/s]

step:11560, train_loss:0.05902320101056969, acc:0.6715764331210191


 23%|██▎       | 10049/43738 [1:15:06<4:59:20,  1.88it/s]

step:11560, train_loss:0.05903910970620851, acc:0.6715096029455667


 23%|██▎       | 10050/43738 [1:15:06<4:26:02,  2.11it/s]

step:11560, train_loss:0.059065373004912235, acc:0.6714427860696517


 23%|██▎       | 10051/43738 [1:15:07<4:13:16,  2.22it/s]

step:11560, train_loss:0.059060179128034705, acc:0.6714754750771068


 23%|██▎       | 10052/43738 [1:15:07<4:25:18,  2.12it/s]

step:11560, train_loss:0.05905476602547286, acc:0.671508157580581


 23%|██▎       | 10053/43738 [1:15:08<4:17:21,  2.18it/s]

step:11560, train_loss:0.05904893092424099, acc:0.6715408335820153


 23%|██▎       | 10054/43738 [1:15:08<3:59:13,  2.35it/s]

step:11560, train_loss:0.05904430979955673, acc:0.67157350308335


 23%|██▎       | 10055/43738 [1:15:08<3:36:16,  2.60it/s]

step:11560, train_loss:0.05903850308074586, acc:0.6716061660865241


 23%|██▎       | 10056/43738 [1:15:09<4:02:15,  2.32it/s]

step:11560, train_loss:0.05903263574723066, acc:0.6716388225934765


 23%|██▎       | 10057/43738 [1:15:10<4:31:40,  2.07it/s]

step:11560, train_loss:0.05903497515082594, acc:0.671671472606145


 23%|██▎       | 10058/43738 [1:15:10<4:15:54,  2.19it/s]

step:11560, train_loss:0.059030433466205116, acc:0.6717041161264665


 23%|██▎       | 10059/43738 [1:15:10<4:04:41,  2.29it/s]

step:11560, train_loss:0.059024657208861075, acc:0.6717367531563774


 23%|██▎       | 10060/43738 [1:15:11<3:30:50,  2.66it/s]

step:11560, train_loss:0.05901880761332834, acc:0.6717693836978131


 23%|██▎       | 10061/43738 [1:15:11<4:40:01,  2.00it/s]

step:11560, train_loss:0.059020917329337105, acc:0.671702614054269


 23%|██▎       | 10062/43738 [1:15:12<5:11:24,  1.80it/s]

step:11560, train_loss:0.05903628269278072, acc:0.6716358576823693


 23%|██▎       | 10063/43738 [1:15:13<5:54:55,  1.58it/s]

step:11560, train_loss:0.05904203720661642, acc:0.6715691145781576


 24%|██▎       | 10368/43738 [1:17:29<4:36:49,  2.01it/s]

step:11580, train_loss:0.05903116904243722, acc:0.6725501543209876


 24%|██▎       | 10369/43738 [1:17:30<4:52:03,  1.90it/s]

step:11580, train_loss:0.05904580567792004, acc:0.6724852926993924


 24%|██▎       | 10370/43738 [1:17:30<5:32:10,  1.67it/s]

step:11580, train_loss:0.05904628389319423, acc:0.6724204435872709


 24%|██▎       | 10371/43738 [1:17:31<5:32:03,  1.67it/s]

step:11580, train_loss:0.05904892923144672, acc:0.6724520296981968


 24%|██▎       | 10372/43738 [1:17:32<5:35:41,  1.66it/s]

step:11580, train_loss:0.059059303286315215, acc:0.6723871962977247


 24%|██▎       | 10373/43738 [1:17:32<5:04:02,  1.83it/s]

step:11580, train_loss:0.05905361211043951, acc:0.6724187795237636


 24%|██▎       | 10374/43738 [1:17:32<4:44:48,  1.95it/s]

step:11580, train_loss:0.05904810363517701, acc:0.6724503566608829


 24%|██▎       | 10375/43738 [1:17:33<4:01:14,  2.30it/s]

step:11580, train_loss:0.05904241526496024, acc:0.6724819277108434


 24%|██▎       | 10376/43738 [1:17:33<3:42:00,  2.50it/s]

step:11580, train_loss:0.05904452276764426, acc:0.6724171164225135


 24%|██▎       | 10377/43738 [1:17:34<4:03:45,  2.28it/s]

step:11580, train_loss:0.05904019809246303, acc:0.6724486845909222


 24%|██▎       | 10378/43738 [1:17:34<4:27:06,  2.08it/s]

step:11580, train_loss:0.059049168152673706, acc:0.672383888995953


 24%|██▎       | 10379/43738 [1:17:34<4:10:45,  2.22it/s]

step:11580, train_loss:0.05904382817806874, acc:0.6724154542826862


 24%|██▎       | 10380/43738 [1:17:35<4:09:28,  2.23it/s]

step:11580, train_loss:0.05904345348674081, acc:0.6723506743737958


 24%|██▎       | 10381/43738 [1:17:35<3:35:59,  2.57it/s]

step:11580, train_loss:0.05905505140874658, acc:0.672285906945381


 24%|██▎       | 10382/43738 [1:17:36<3:43:44,  2.48it/s]

step:11580, train_loss:0.05904937509053394, acc:0.6723174725486418


 24%|██▎       | 10383/43738 [1:17:36<3:52:45,  2.39it/s]

step:11580, train_loss:0.05905664153783901, acc:0.672252720793605


 24%|██▍       | 10688/43738 [1:19:54<4:16:31,  2.15it/s]

step:11600, train_loss:0.059123165650697286, acc:0.672436377245509


 24%|██▍       | 10689/43738 [1:19:55<4:11:45,  2.19it/s]

step:11600, train_loss:0.05912941662929121, acc:0.6723734680512676


 24%|██▍       | 10690/43738 [1:19:55<4:34:11,  2.01it/s]

step:11600, train_loss:0.059135072220779406, acc:0.6723105706267539


 24%|██▍       | 10691/43738 [1:19:56<4:22:16,  2.10it/s]

step:11600, train_loss:0.059130793746274474, acc:0.6723412215882518


 24%|██▍       | 10692/43738 [1:19:56<4:46:22,  1.92it/s]

step:11600, train_loss:0.059149985053241434, acc:0.6722783389450057


 24%|██▍       | 10693/43738 [1:19:57<4:20:10,  2.12it/s]

step:11600, train_loss:0.05915749333726813, acc:0.672215468063219


 24%|██▍       | 10694/43738 [1:19:57<4:45:20,  1.93it/s]

step:11600, train_loss:0.059153149261167465, acc:0.6722461193192444


 24%|██▍       | 10695/43738 [1:19:58<3:57:50,  2.32it/s]

step:11600, train_loss:0.059151286469864325, acc:0.6722767648433847


 24%|██▍       | 10696/43738 [1:19:58<3:39:52,  2.50it/s]

step:11600, train_loss:0.05915232071488312, acc:0.6723074046372476


 24%|██▍       | 10697/43738 [1:19:58<3:59:45,  2.30it/s]

step:11600, train_loss:0.059147858890365085, acc:0.6723380387024399


 24%|██▍       | 10698/43738 [1:19:59<4:26:05,  2.07it/s]

step:11600, train_loss:0.05914363052552914, acc:0.6723686670405683


 24%|██▍       | 10699/43738 [1:19:59<4:13:00,  2.18it/s]

step:11600, train_loss:0.0591384568381044, acc:0.6723992896532386


 24%|██▍       | 10700/43738 [1:20:00<4:40:17,  1.96it/s]

step:11600, train_loss:0.0591331893491241, acc:0.672429906542056


 24%|██▍       | 10701/43738 [1:20:00<4:19:35,  2.12it/s]

step:11600, train_loss:0.05912767660576027, acc:0.6724605177086254


 24%|██▍       | 10702/43738 [1:20:01<4:59:55,  1.84it/s]

step:11600, train_loss:0.05912305119564587, acc:0.6724911231545505


 24%|██▍       | 10703/43738 [1:20:01<4:13:21,  2.17it/s]

step:11600, train_loss:0.05911929245145119, acc:0.6725217228814351


 25%|██▌       | 11008/43738 [1:22:22<4:19:27,  2.10it/s]

step:11620, train_loss:0.0591758849966323, acc:0.6720566860465116


 25%|██▌       | 11009/43738 [1:22:23<3:53:20,  2.34it/s]

step:11620, train_loss:0.05918899220309349, acc:0.6719956399309656


 25%|██▌       | 11010/43738 [1:22:23<4:13:44,  2.15it/s]

step:11620, train_loss:0.05918582459243664, acc:0.6720254314259764


 25%|██▌       | 11011/43738 [1:22:23<3:36:05,  2.52it/s]

step:11620, train_loss:0.05918132248156755, acc:0.6720552175097629


 25%|██▌       | 11012/43738 [1:22:24<3:09:53,  2.87it/s]

step:11620, train_loss:0.05918481373849113, acc:0.6719941881583726


 25%|██▌       | 11013/43738 [1:22:24<4:08:27,  2.20it/s]

step:11620, train_loss:0.05918291946424236, acc:0.6720239716698447


 25%|██▌       | 11014/43738 [1:22:25<3:32:11,  2.57it/s]

step:11620, train_loss:0.05917756047349656, acc:0.6720537497730161


 25%|██▌       | 11015/43738 [1:22:25<3:49:57,  2.37it/s]

step:11620, train_loss:0.05917295421429505, acc:0.67208352246936


 25%|██▌       | 11016/43738 [1:22:25<3:20:56,  2.71it/s]

step:11620, train_loss:0.059167590682628904, acc:0.6721132897603486


 25%|██▌       | 11017/43738 [1:22:26<4:30:13,  2.02it/s]

step:11620, train_loss:0.05916785637045968, acc:0.6721430516474539


 25%|██▌       | 11018/43738 [1:22:26<4:09:36,  2.18it/s]

step:11620, train_loss:0.05916609801906735, acc:0.6721728081321474


 25%|██▌       | 11019/43738 [1:22:27<4:06:00,  2.22it/s]

step:11620, train_loss:0.05916239606279813, acc:0.6722025592158998


 25%|██▌       | 11020/43738 [1:22:27<4:11:11,  2.17it/s]

step:11620, train_loss:0.05917780451902726, acc:0.6721415607985481


 25%|██▌       | 11021/43738 [1:22:28<3:57:36,  2.29it/s]

step:11620, train_loss:0.0591756218930606, acc:0.6721713093185736


 25%|██▌       | 11022/43738 [1:22:28<3:27:26,  2.63it/s]

step:11620, train_loss:0.05917700865652439, acc:0.6721103248049356


 25%|██▌       | 11023/43738 [1:22:28<3:38:53,  2.49it/s]

step:11620, train_loss:0.0591721438027613, acc:0.6721400707611358


 26%|██▌       | 11328/43738 [1:24:45<4:02:09,  2.23it/s]

step:11640, train_loss:0.05917156365743681, acc:0.6728460451977402


 26%|██▌       | 11329/43738 [1:24:45<3:53:20,  2.31it/s]

step:11640, train_loss:0.059176629637369665, acc:0.6727866537205403


 26%|██▌       | 11330/43738 [1:24:46<3:23:29,  2.65it/s]

step:11640, train_loss:0.05917143060987904, acc:0.6728155339805825


 26%|██▌       | 11331/43738 [1:24:46<3:30:50,  2.56it/s]

step:11640, train_loss:0.05916629927450877, acc:0.6728444091430589


 26%|██▌       | 11332/43738 [1:24:46<3:11:32,  2.82it/s]

step:11640, train_loss:0.05916768672405837, acc:0.6727850335333568


 26%|██▌       | 11333/43738 [1:24:47<4:14:04,  2.13it/s]

step:11640, train_loss:0.05917708351590365, acc:0.6727256684020119


 26%|██▌       | 11334/43738 [1:24:48<4:52:32,  1.85it/s]

step:11640, train_loss:0.059174676605184995, acc:0.6727545438503617


 26%|██▌       | 11335/43738 [1:24:48<4:20:22,  2.07it/s]

step:11640, train_loss:0.059174642586465605, acc:0.6727834142037935


 26%|██▌       | 11336/43738 [1:24:49<4:03:57,  2.21it/s]

step:11640, train_loss:0.05916942429992569, acc:0.6728122794636556


 26%|██▌       | 11337/43738 [1:24:49<4:01:34,  2.24it/s]

step:11640, train_loss:0.059188322375456716, acc:0.6727529328746582


 26%|██▌       | 11338/43738 [1:24:49<3:48:36,  2.36it/s]

step:11640, train_loss:0.059190184454056015, acc:0.6726935967542776


 26%|██▌       | 11339/43738 [1:24:50<4:29:51,  2.00it/s]

step:11640, train_loss:0.05919640325039596, acc:0.6726342710997443


 26%|██▌       | 11340/43738 [1:24:51<4:21:34,  2.06it/s]

step:11640, train_loss:0.0591974911450493, acc:0.6725749559082892


 26%|██▌       | 11341/43738 [1:24:51<3:52:59,  2.32it/s]

step:11640, train_loss:0.059193176747923515, acc:0.6726038268230314


 26%|██▌       | 11342/43738 [1:24:51<3:26:11,  2.62it/s]

step:11640, train_loss:0.05919409301844907, acc:0.6725445247751719


 26%|██▌       | 11343/43738 [1:24:52<3:49:12,  2.36it/s]

step:11640, train_loss:0.05919191981048152, acc:0.6725733932822004


 27%|██▋       | 11648/43738 [1:27:08<5:00:55,  1.78it/s]

step:11660, train_loss:0.05911407482667878, acc:0.6726476648351648


 27%|██▋       | 11649/43738 [1:27:08<5:02:36,  1.77it/s]

step:11660, train_loss:0.059109164808478026, acc:0.6726757661601854


 27%|██▋       | 11650/43738 [1:27:08<4:14:14,  2.10it/s]

step:11660, train_loss:0.05910409136484437, acc:0.6727038626609442


 27%|██▋       | 11651/43738 [1:27:09<4:18:01,  2.07it/s]

step:11660, train_loss:0.05911344650411369, acc:0.6726461247961548


 27%|██▋       | 11652/43738 [1:27:09<3:54:44,  2.28it/s]

step:11660, train_loss:0.05911284024957505, acc:0.6725883968417439


 27%|██▋       | 11653/43738 [1:27:10<4:33:37,  1.95it/s]

step:11660, train_loss:0.05910798910058231, acc:0.6726164936067965


 27%|██▋       | 11654/43738 [1:27:11<5:00:30,  1.78it/s]

step:11660, train_loss:0.05910317170118558, acc:0.6726445855500257


 27%|██▋       | 11655/43738 [1:27:11<4:21:55,  2.04it/s]

step:11660, train_loss:0.05909815313861274, acc:0.6726726726726727


 27%|██▋       | 11656/43738 [1:27:11<3:55:15,  2.27it/s]

step:11660, train_loss:0.05909308298793486, acc:0.672700754975978


 27%|██▋       | 11657/43738 [1:27:12<4:59:22,  1.79it/s]

step:11660, train_loss:0.059090293759936394, acc:0.6727288324611821


 27%|██▋       | 11658/43738 [1:27:13<4:46:05,  1.87it/s]

step:11660, train_loss:0.05908547443388906, acc:0.6727569051295248


 27%|██▋       | 11659/43738 [1:27:13<5:05:26,  1.75it/s]

step:11660, train_loss:0.05909362486541057, acc:0.6726992023329617


 27%|██▋       | 11660/43738 [1:27:14<5:47:29,  1.54it/s]

step:11660, train_loss:0.05909533031093313, acc:0.6726415094339623


 27%|██▋       | 11661/43738 [1:27:15<5:47:36,  1.54it/s]

step:11660, train_loss:0.05909621424653106, acc:0.672669582368579


 27%|██▋       | 11662/43738 [1:27:15<4:58:08,  1.79it/s]

step:11660, train_loss:0.05909694622192221, acc:0.6726119019036186


 27%|██▋       | 11663/43738 [1:27:15<4:13:45,  2.11it/s]

step:11660, train_loss:0.059098433588006524, acc:0.6726399725628055


 27%|██▋       | 11968/43738 [1:29:30<5:35:55,  1.58it/s]

step:11680, train_loss:0.059165976288821254, acc:0.671875


 27%|██▋       | 11969/43738 [1:29:30<4:33:15,  1.94it/s]

step:11680, train_loss:0.05916109038436928, acc:0.671902414570975


 27%|██▋       | 11970/43738 [1:29:31<4:52:57,  1.81it/s]

step:11680, train_loss:0.059162897382870255, acc:0.6718462823725981


 27%|██▋       | 11971/43738 [1:29:31<5:04:25,  1.74it/s]

step:11680, train_loss:0.059160817617390775, acc:0.6718736947623424


 27%|██▋       | 11972/43738 [1:29:32<4:55:10,  1.79it/s]

step:11680, train_loss:0.059162525078915375, acc:0.671817574340127


 27%|██▋       | 11973/43738 [1:29:32<4:44:01,  1.86it/s]

step:11680, train_loss:0.05915847130861755, acc:0.6718449845485676


 27%|██▋       | 11974/43738 [1:29:33<5:27:38,  1.62it/s]

step:11680, train_loss:0.059174783622711995, acc:0.6717888758977785


 27%|██▋       | 11975/43738 [1:29:33<4:57:09,  1.78it/s]

step:11680, train_loss:0.05917219059853883, acc:0.6718162839248434


 27%|██▋       | 11976/43738 [1:29:34<4:07:41,  2.14it/s]

step:11680, train_loss:0.05916730700115044, acc:0.6718436873747495


 27%|██▋       | 11977/43738 [1:29:34<3:57:04,  2.23it/s]

step:11680, train_loss:0.059164451660525474, acc:0.6718710862486432


 27%|██▋       | 11978/43738 [1:29:35<4:26:45,  1.98it/s]

step:11680, train_loss:0.059171043881564654, acc:0.6718149941559526


 27%|██▋       | 11979/43738 [1:29:35<3:44:32,  2.36it/s]

step:11680, train_loss:0.05917329212142546, acc:0.6717589114283329


 27%|██▋       | 11980/43738 [1:29:35<3:15:03,  2.71it/s]

step:11680, train_loss:0.05916836148090793, acc:0.6717863105175292


 27%|██▋       | 11981/43738 [1:29:36<3:35:16,  2.46it/s]

step:11680, train_loss:0.05917044246889576, acc:0.6717302395459478


 27%|██▋       | 11982/43738 [1:29:36<3:12:52,  2.74it/s]

step:11680, train_loss:0.05916602655271043, acc:0.671757636454682


 27%|██▋       | 11983/43738 [1:29:36<3:00:21,  2.93it/s]

step:11680, train_loss:0.059166019508352985, acc:0.6717850287907869


 28%|██▊       | 12288/43738 [1:31:57<5:06:30,  1.71it/s]

step:11700, train_loss:0.05911116812809306, acc:0.6722819010416666


 28%|██▊       | 12289/43738 [1:31:58<4:35:30,  1.90it/s]

step:11700, train_loss:0.05911524753063041, acc:0.672227195052486


 28%|██▊       | 12290/43738 [1:31:58<4:20:12,  2.01it/s]

step:11700, train_loss:0.05911066717301678, acc:0.672253864930838


 28%|██▊       | 12291/43738 [1:31:59<5:07:29,  1.70it/s]

step:11700, train_loss:0.05911012927549939, acc:0.6722805304694492


 28%|██▊       | 12292/43738 [1:31:59<4:27:51,  1.96it/s]

step:11700, train_loss:0.05911620520260811, acc:0.6722258379433778


 28%|██▊       | 12293/43738 [1:32:00<4:22:35,  2.00it/s]

step:11700, train_loss:0.059111423461588444, acc:0.6722525014235744


 28%|██▊       | 12294/43738 [1:32:00<4:33:59,  1.91it/s]

step:11700, train_loss:0.059118784727968145, acc:0.6721978200748332


 28%|██▊       | 12295/43738 [1:32:01<3:52:01,  2.26it/s]

step:11700, train_loss:0.059119345011648235, acc:0.6721431476209841


 28%|██▊       | 12296/43738 [1:32:01<4:05:04,  2.14it/s]

step:11700, train_loss:0.05912148935359831, acc:0.6721698113207547


 28%|██▊       | 12297/43738 [1:32:02<4:05:57,  2.13it/s]

step:11700, train_loss:0.05911668261974989, acc:0.6721964706839066


 28%|██▊       | 12298/43738 [1:32:02<3:32:15,  2.47it/s]

step:11700, train_loss:0.05911201249621513, acc:0.6722231257114978


 28%|██▊       | 12299/43738 [1:32:02<3:12:27,  2.72it/s]

step:11700, train_loss:0.059107874809396006, acc:0.6722497764045857


 28%|██▊       | 12300/43738 [1:32:02<3:12:35,  2.72it/s]

step:11700, train_loss:0.05910409240121196, acc:0.6722764227642276


 28%|██▊       | 12301/43738 [1:32:03<3:48:58,  2.29it/s]

step:11700, train_loss:0.059101586023114445, acc:0.6723030647914804


 28%|██▊       | 12302/43738 [1:32:04<4:12:41,  2.07it/s]

step:11700, train_loss:0.059107053319601, acc:0.6722484148918875


 28%|██▊       | 12303/43738 [1:32:04<3:36:38,  2.42it/s]

step:11700, train_loss:0.059117590390321176, acc:0.6721937738762903


 29%|██▉       | 12608/43738 [1:34:24<3:05:35,  2.80it/s]

step:11720, train_loss:0.05919025421301135, acc:0.6717163705583756


 29%|██▉       | 12609/43738 [1:34:24<2:45:29,  3.13it/s]

step:11720, train_loss:0.05918559083059817, acc:0.6717424062177809


 29%|██▉       | 12610/43738 [1:34:25<2:56:38,  2.94it/s]

step:11720, train_loss:0.05918294932235521, acc:0.6717684377478191


 29%|██▉       | 12611/43738 [1:34:25<2:40:45,  3.23it/s]

step:11720, train_loss:0.059193010844339956, acc:0.6717151692966458


 29%|██▉       | 12612/43738 [1:34:26<3:17:50,  2.62it/s]

step:11720, train_loss:0.05919053909211562, acc:0.6717411988582302


 29%|██▉       | 12613/43738 [1:34:26<3:08:32,  2.75it/s]

step:11720, train_loss:0.05918949099326738, acc:0.6716879410132403


 29%|██▉       | 12614/43738 [1:34:27<3:55:50,  2.20it/s]

step:11720, train_loss:0.05918489659516246, acc:0.6717139686063105


 29%|██▉       | 12615/43738 [1:34:27<4:19:36,  2.00it/s]

step:11720, train_loss:0.05918681488173607, acc:0.6716607213634562


 29%|██▉       | 12616/43738 [1:34:28<4:06:08,  2.11it/s]

step:11720, train_loss:0.05918393934957192, acc:0.6716867469879518


 29%|██▉       | 12617/43738 [1:34:28<4:28:02,  1.94it/s]

step:11720, train_loss:0.05918957999601803, acc:0.6716335103431877


 29%|██▉       | 12618/43738 [1:34:29<4:13:36,  2.05it/s]

step:11720, train_loss:0.0591864993522087, acc:0.671659533999049


 29%|██▉       | 12619/43738 [1:34:29<3:36:29,  2.40it/s]

step:11720, train_loss:0.05919960059419994, acc:0.6716063079483319


 29%|██▉       | 12620/43738 [1:34:29<3:11:01,  2.71it/s]

step:11720, train_loss:0.05919978865116972, acc:0.6715530903328051


 29%|██▉       | 12621/43738 [1:34:29<2:51:23,  3.03it/s]

step:11720, train_loss:0.05919601592112146, acc:0.6715791141747881


 29%|██▉       | 12622/43738 [1:34:30<3:30:56,  2.46it/s]

step:11720, train_loss:0.059196991179948366, acc:0.6716051338932023


 29%|██▉       | 12623/43738 [1:34:30<3:51:35,  2.24it/s]

step:11720, train_loss:0.05919269141189777, acc:0.6716311494890279


 30%|██▉       | 12928/43738 [1:36:45<4:36:12,  1.86it/s]

step:11740, train_loss:0.059207944491174544, acc:0.6722617574257426


 30%|██▉       | 12929/43738 [1:36:46<5:08:21,  1.67it/s]

step:11740, train_loss:0.05920341576689841, acc:0.6722871065047568


 30%|██▉       | 12930/43738 [1:36:46<4:16:43,  2.00it/s]

step:11740, train_loss:0.05919885050750475, acc:0.6723124516627997


 30%|██▉       | 12931/43738 [1:36:47<4:12:19,  2.03it/s]

step:11740, train_loss:0.05919540415002989, acc:0.6723377929007811


 30%|██▉       | 12932/43738 [1:36:47<4:19:43,  1.98it/s]

step:11740, train_loss:0.059191645911334516, acc:0.6723631302196102


 30%|██▉       | 12933/43738 [1:36:48<3:45:13,  2.28it/s]

step:11740, train_loss:0.05918875839845129, acc:0.6723884636201964


 30%|██▉       | 12934/43738 [1:36:48<4:08:49,  2.06it/s]

step:11740, train_loss:0.059186615700024085, acc:0.6724137931034483


 30%|██▉       | 12935/43738 [1:36:48<3:36:14,  2.37it/s]

step:11740, train_loss:0.059182066418085454, acc:0.6724391186702744


 30%|██▉       | 12936/43738 [1:36:49<3:54:50,  2.19it/s]

step:11740, train_loss:0.059185494764219644, acc:0.672387136672851


 30%|██▉       | 12937/43738 [1:36:50<4:16:29,  2.00it/s]

step:11740, train_loss:0.059181702445396375, acc:0.6724124603849424


 30%|██▉       | 12938/43738 [1:36:50<4:47:52,  1.78it/s]

step:11740, train_loss:0.05919271770920764, acc:0.6723604884835369


 30%|██▉       | 12939/43738 [1:36:51<4:11:13,  2.04it/s]

step:11740, train_loss:0.05918823800942644, acc:0.67238581034083


 30%|██▉       | 12940/43738 [1:36:51<3:45:26,  2.28it/s]

step:11740, train_loss:0.05918367815804394, acc:0.6724111282843895


 30%|██▉       | 12941/43738 [1:36:51<3:16:18,  2.61it/s]

step:11740, train_loss:0.059179158349013085, acc:0.6724364423151225


 30%|██▉       | 12942/43738 [1:36:52<3:24:14,  2.51it/s]

step:11740, train_loss:0.05917458616098271, acc:0.672461752433936


 30%|██▉       | 12943/43738 [1:36:52<2:58:24,  2.88it/s]

step:11740, train_loss:0.05917824694069135, acc:0.6724097968013598


 30%|███       | 13248/43738 [1:39:15<4:14:25,  2.00it/s]

step:11760, train_loss:0.05931542856643683, acc:0.6720259661835749


 30%|███       | 13249/43738 [1:39:15<4:04:27,  2.08it/s]

step:11760, train_loss:0.059311616048809045, acc:0.6720507208091177


 30%|███       | 13250/43738 [1:39:15<3:51:09,  2.20it/s]

step:11760, train_loss:0.059307297115170236, acc:0.6720754716981132


 30%|███       | 13251/43738 [1:39:16<4:43:00,  1.80it/s]

step:11760, train_loss:0.05930639055270477, acc:0.6721002188514075


 30%|███       | 13252/43738 [1:39:17<4:06:03,  2.06it/s]

step:11760, train_loss:0.0593045444959506, acc:0.6721249622698461


 30%|███       | 13253/43738 [1:39:17<3:44:00,  2.27it/s]

step:11760, train_loss:0.05930841272000065, acc:0.6720742473402248


 30%|███       | 13254/43738 [1:39:17<3:16:57,  2.58it/s]

step:11760, train_loss:0.05930400560353696, acc:0.6720989889844575


 30%|███       | 13255/43738 [1:39:18<3:21:44,  2.52it/s]

step:11760, train_loss:0.05930687387477219, acc:0.6720482836665409


 30%|███       | 13256/43738 [1:39:18<3:16:39,  2.58it/s]

step:11760, train_loss:0.059304701005417855, acc:0.6720730235365118


 30%|███       | 13257/43738 [1:39:18<3:03:38,  2.77it/s]

step:11760, train_loss:0.05930024303578675, acc:0.6720977596741344


 30%|███       | 13258/43738 [1:39:19<3:11:36,  2.65it/s]

step:11760, train_loss:0.059306347360933175, acc:0.6720470659224619


 30%|███       | 13259/43738 [1:39:19<2:48:27,  3.02it/s]

step:11760, train_loss:0.05930187561720037, acc:0.6720718002865977


 30%|███       | 13260/43738 [1:39:19<2:35:09,  3.27it/s]

step:11760, train_loss:0.05930577666983521, acc:0.6720211161387633


 30%|███       | 13261/43738 [1:39:20<3:16:35,  2.58it/s]

step:11760, train_loss:0.059301973819222024, acc:0.6720458487293568


 30%|███       | 13262/43738 [1:39:20<3:24:21,  2.49it/s]

step:11760, train_loss:0.059297523594402324, acc:0.672070577590107


 30%|███       | 13263/43738 [1:39:21<3:26:10,  2.46it/s]

step:11760, train_loss:0.05929800426224041, acc:0.6720953027218578


 31%|███       | 13568/43738 [1:41:39<4:11:15,  2.00it/s]

step:11780, train_loss:0.059332133994560554, acc:0.6723172169811321


 31%|███       | 13569/43738 [1:41:40<4:35:53,  1.82it/s]

step:11780, train_loss:0.05933530326024511, acc:0.672267668951286


 31%|███       | 13570/43738 [1:41:40<4:35:34,  1.82it/s]

step:11780, train_loss:0.059332996893438385, acc:0.6722918201915992


 31%|███       | 13571/43738 [1:41:41<4:27:49,  1.88it/s]

step:11780, train_loss:0.05934018740257983, acc:0.6722422813352


 31%|███       | 13572/43738 [1:41:41<5:03:54,  1.65it/s]

step:11780, train_loss:0.05935335042482431, acc:0.6721927497789567


 31%|███       | 13573/43738 [1:41:42<4:08:00,  2.03it/s]

step:11780, train_loss:0.05934902293524602, acc:0.6722169012009136


 31%|███       | 13574/43738 [1:41:42<3:56:24,  2.13it/s]

step:11780, train_loss:0.05934700454309336, acc:0.6722410490643878


 31%|███       | 13575/43738 [1:41:42<3:48:15,  2.20it/s]

step:11780, train_loss:0.0593506169972365, acc:0.6721915285451197


 31%|███       | 13576/43738 [1:41:43<4:31:24,  1.85it/s]

step:11780, train_loss:0.05935845766590562, acc:0.672142015321155


 31%|███       | 13577/43738 [1:41:44<4:18:52,  1.94it/s]

step:11780, train_loss:0.05936432446207818, acc:0.6720925093908816


 31%|███       | 13578/43738 [1:41:44<4:01:30,  2.08it/s]

step:11780, train_loss:0.05936983973721546, acc:0.6720430107526881


 31%|███       | 13579/43738 [1:41:44<4:02:21,  2.07it/s]

step:11780, train_loss:0.05936548806208186, acc:0.6720671625303778


 31%|███       | 13580/43738 [1:41:45<4:15:35,  1.97it/s]

step:11780, train_loss:0.059374436109015764, acc:0.6720176730486009


 31%|███       | 13581/43738 [1:41:45<3:42:56,  2.25it/s]

step:11780, train_loss:0.05937035082860928, acc:0.6720418231352625


 31%|███       | 13582/43738 [1:41:46<3:30:03,  2.39it/s]

step:11780, train_loss:0.05936644432079419, acc:0.6720659696657341


 31%|███       | 13583/43738 [1:41:46<4:00:15,  2.09it/s]

step:11780, train_loss:0.05936707004160711, acc:0.672090112640801


 32%|███▏      | 13888/43738 [1:44:09<3:48:39,  2.18it/s]

step:11800, train_loss:0.059625058062262266, acc:0.6713709677419355


 32%|███▏      | 13889/43738 [1:44:09<3:44:56,  2.21it/s]

step:11800, train_loss:0.05962186318752166, acc:0.6713946288429693


 32%|███▏      | 13890/43738 [1:44:10<3:48:52,  2.17it/s]

step:11800, train_loss:0.059636428503460764, acc:0.6713462922966162


 32%|███▏      | 13891/43738 [1:44:11<4:07:23,  2.01it/s]

step:11800, train_loss:0.0596349050230827, acc:0.6713699517673314


 32%|███▏      | 13892/43738 [1:44:11<4:27:20,  1.86it/s]

step:11800, train_loss:0.05963291205849297, acc:0.6713936078318457


 32%|███▏      | 13893/43738 [1:44:11<3:47:49,  2.18it/s]

step:11800, train_loss:0.059638583070638365, acc:0.6713452817965883


 32%|███▏      | 13894/43738 [1:44:12<3:20:28,  2.48it/s]

step:11800, train_loss:0.05963484085264605, acc:0.6713689362314669


 32%|███▏      | 13895/43738 [1:44:12<3:08:08,  2.64it/s]

step:11800, train_loss:0.05964041061454914, acc:0.6713206189276718


 32%|███▏      | 13896/43738 [1:44:12<3:16:32,  2.53it/s]

step:11800, train_loss:0.05965374603515303, acc:0.671272308578008


 32%|███▏      | 13897/43738 [1:44:13<3:14:49,  2.55it/s]

step:11800, train_loss:0.059651352616014335, acc:0.671295963157516


 32%|███▏      | 13898/43738 [1:44:13<2:56:52,  2.81it/s]

step:11800, train_loss:0.059648774390782415, acc:0.6713196143329976


 32%|███▏      | 13899/43738 [1:44:13<2:44:19,  3.03it/s]

step:11800, train_loss:0.059644484737375984, acc:0.6713432621051875


 32%|███▏      | 13900/43738 [1:44:14<2:29:46,  3.32it/s]

step:11800, train_loss:0.059640849421559596, acc:0.6713669064748201


 32%|███▏      | 13901/43738 [1:44:14<2:51:54,  2.89it/s]

step:11800, train_loss:0.059646005534570154, acc:0.6713186101719301


 32%|███▏      | 13902/43738 [1:44:14<3:02:54,  2.72it/s]

step:11800, train_loss:0.05964173101753227, acc:0.6713422529132499


 32%|███▏      | 13903/43738 [1:44:15<2:48:39,  2.95it/s]

step:11800, train_loss:0.05963874640185099, acc:0.6713658922534704


 32%|███▏      | 14208/43738 [1:46:31<3:36:18,  2.28it/s]

step:11820, train_loss:0.05963976373302947, acc:0.670678490990991


 32%|███▏      | 14209/43738 [1:46:32<4:10:20,  1.97it/s]

step:11820, train_loss:0.059643253721740196, acc:0.6706312900274474


 32%|███▏      | 14210/43738 [1:46:32<4:43:18,  1.74it/s]

step:11820, train_loss:0.05964374185273854, acc:0.6706544686840253


 32%|███▏      | 14211/43738 [1:46:33<4:38:53,  1.76it/s]

step:11820, train_loss:0.059639583341654444, acc:0.6706776440785307


 32%|███▏      | 14212/43738 [1:46:33<3:49:21,  2.15it/s]

step:11820, train_loss:0.05963539132722596, acc:0.6707008162116521


 32%|███▏      | 14213/43738 [1:46:33<3:14:46,  2.53it/s]

step:11820, train_loss:0.05963119707824464, acc:0.6707239850840779


 32%|███▏      | 14214/43738 [1:46:34<2:50:25,  2.89it/s]

step:11820, train_loss:0.05963024135085677, acc:0.6707471506964964


 33%|███▎      | 14215/43738 [1:46:34<2:35:57,  3.15it/s]

step:11820, train_loss:0.05962663401637129, acc:0.6707703130495954


 33%|███▎      | 14216/43738 [1:46:35<3:32:59,  2.31it/s]

step:11820, train_loss:0.05962507010022495, acc:0.670793472144063


 33%|███▎      | 14217/43738 [1:46:35<3:22:58,  2.42it/s]

step:11820, train_loss:0.05963038435015223, acc:0.670746289653232


 33%|███▎      | 14218/43738 [1:46:35<3:20:15,  2.46it/s]

step:11820, train_loss:0.05963517877795878, acc:0.6706991137994092


 33%|███▎      | 14219/43738 [1:46:36<2:56:47,  2.78it/s]

step:11820, train_loss:0.05963110973330592, acc:0.67072227301498


 33%|███▎      | 14220/43738 [1:46:36<3:41:16,  2.22it/s]

step:11820, train_loss:0.059639216801186015, acc:0.670675105485232


 33%|███▎      | 14221/43738 [1:46:36<3:08:47,  2.61it/s]

step:11820, train_loss:0.059635023983586985, acc:0.6706982631319879


 33%|███▎      | 14222/43738 [1:46:37<3:36:55,  2.27it/s]

step:11820, train_loss:0.0596390856776954, acc:0.6706511039234988


 33%|███▎      | 14223/43738 [1:46:37<3:26:22,  2.38it/s]

step:11820, train_loss:0.05963527300424505, acc:0.6706742600014062


 33%|███▎      | 14528/43738 [1:48:58<3:37:54,  2.23it/s]

step:11840, train_loss:0.059629588602603395, acc:0.6709801762114538


 33%|███▎      | 14529/43738 [1:48:58<3:27:32,  2.35it/s]

step:11840, train_loss:0.059632285377965485, acc:0.6709339940808039


 33%|███▎      | 14530/43738 [1:48:58<3:20:20,  2.43it/s]

step:11840, train_loss:0.05962819584203225, acc:0.670956641431521


 33%|███▎      | 14531/43738 [1:48:59<4:13:34,  1.92it/s]

step:11840, train_loss:0.0596245759104752, acc:0.6709792856651298


 33%|███▎      | 14532/43738 [1:48:59<3:37:40,  2.24it/s]

step:11840, train_loss:0.059627267605439044, acc:0.6709331131296449


 33%|███▎      | 14533/43738 [1:49:00<3:07:29,  2.60it/s]

step:11840, train_loss:0.059624176754976735, acc:0.6709557558659602


 33%|███▎      | 14534/43738 [1:49:00<3:16:05,  2.48it/s]

step:11840, train_loss:0.059634530804615914, acc:0.6709095913031512


 33%|███▎      | 14535/43738 [1:49:00<2:54:27,  2.79it/s]

step:11840, train_loss:0.05963340893056689, acc:0.6709322325421396


 33%|███▎      | 14536/43738 [1:49:01<4:01:32,  2.01it/s]

step:11840, train_loss:0.059652178384072836, acc:0.6708860759493671


 33%|███▎      | 14537/43738 [1:49:01<3:24:36,  2.38it/s]

step:11840, train_loss:0.0596498725756513, acc:0.6709087156909954


 33%|███▎      | 14538/43738 [1:49:02<2:56:46,  2.75it/s]

step:11840, train_loss:0.05965259672973728, acc:0.6708625670656211


 33%|███▎      | 14539/43738 [1:49:02<3:23:03,  2.40it/s]

step:11840, train_loss:0.05964871019552916, acc:0.6708852053098563


 33%|███▎      | 14540/43738 [1:49:02<3:06:18,  2.61it/s]

step:11840, train_loss:0.05965060814210288, acc:0.6708390646492435


 33%|███▎      | 14541/43738 [1:49:03<2:56:34,  2.76it/s]

step:11840, train_loss:0.05964700785662262, acc:0.6708617013960525


 33%|███▎      | 14542/43738 [1:49:03<3:13:42,  2.51it/s]

step:11840, train_loss:0.05964419202907539, acc:0.6708843350295696


 33%|███▎      | 14543/43738 [1:49:04<2:49:12,  2.88it/s]

step:11840, train_loss:0.05964986793089308, acc:0.6708382039469161


 34%|███▍      | 14848/43738 [1:51:20<3:25:03,  2.35it/s]

step:11860, train_loss:0.05950779090682286, acc:0.6715382543103449


 34%|███▍      | 14849/43738 [1:51:20<2:59:12,  2.69it/s]

step:11860, train_loss:0.059503955757719114, acc:0.6715603744359889


 34%|███▍      | 14850/43738 [1:51:21<3:00:02,  2.67it/s]

step:11860, train_loss:0.05950411372721236, acc:0.6715824915824916


 34%|███▍      | 14851/43738 [1:51:21<3:05:33,  2.59it/s]

step:11860, train_loss:0.059508819823684576, acc:0.6715372702174938


 34%|███▍      | 14852/43738 [1:51:21<2:55:41,  2.74it/s]

step:11860, train_loss:0.05951080588269854, acc:0.6714920549420953


 34%|███▍      | 14853/43738 [1:51:22<2:54:46,  2.75it/s]

step:11860, train_loss:0.05951038880259921, acc:0.6715141722211001


 34%|███▍      | 14854/43738 [1:51:22<3:42:33,  2.16it/s]

step:11860, train_loss:0.05951864743202873, acc:0.671468964588663


 34%|███▍      | 14855/43738 [1:51:23<3:17:11,  2.44it/s]

step:11860, train_loss:0.05951469896254417, acc:0.6714910804442948


 34%|███▍      | 14856/43738 [1:51:23<2:49:53,  2.83it/s]

step:11860, train_loss:0.0595140667890168, acc:0.6715131933225633


 34%|███▍      | 14857/43738 [1:51:23<2:29:00,  3.23it/s]

step:11860, train_loss:0.05951171353748857, acc:0.6715353032240695


 34%|███▍      | 14858/43738 [1:51:24<2:48:56,  2.85it/s]

step:11860, train_loss:0.05950776286989487, acc:0.6715574101494145


 34%|███▍      | 14859/43738 [1:51:24<3:22:47,  2.37it/s]

step:11860, train_loss:0.059503820501031146, acc:0.6715795140991991


 34%|███▍      | 14860/43738 [1:51:25<3:21:12,  2.39it/s]

step:11860, train_loss:0.05951342755461486, acc:0.6715343203230149


 34%|███▍      | 14861/43738 [1:51:25<3:21:37,  2.39it/s]

step:11860, train_loss:0.05951027909846777, acc:0.6715564228517596


 34%|███▍      | 14862/43738 [1:51:25<3:28:10,  2.31it/s]

step:11860, train_loss:0.05950655150288425, acc:0.6715785224061365


 34%|███▍      | 14863/43738 [1:51:26<3:26:53,  2.33it/s]

step:11860, train_loss:0.05951628710424316, acc:0.6715333378187446


 35%|███▍      | 15168/43738 [1:53:46<4:17:41,  1.85it/s]

step:11880, train_loss:0.05930333376522579, acc:0.6726661392405063


 35%|███▍      | 15169/43738 [1:53:47<4:20:28,  1.83it/s]

step:11880, train_loss:0.05930995208089018, acc:0.6726217944492057


 35%|███▍      | 15170/43738 [1:53:48<4:57:04,  1.60it/s]

step:11880, train_loss:0.05930659175656221, acc:0.6726433750823995


 35%|███▍      | 15171/43738 [1:53:48<4:28:59,  1.77it/s]

step:11880, train_loss:0.059308676779761184, acc:0.6725990376375981


 35%|███▍      | 15172/43738 [1:53:49<3:54:16,  2.03it/s]

step:11880, train_loss:0.0593060964432203, acc:0.6726206169259161


 35%|███▍      | 15173/43738 [1:53:49<3:19:44,  2.38it/s]

step:11880, train_loss:0.05930262413727005, acc:0.6726421933698016


 35%|███▍      | 15174/43738 [1:53:49<3:17:29,  2.41it/s]

step:11880, train_loss:0.059300314987832656, acc:0.6726637669698168


 35%|███▍      | 15175/43738 [1:53:49<2:56:43,  2.69it/s]

step:11880, train_loss:0.05929904433388088, acc:0.6726853377265238


 35%|███▍      | 15176/43738 [1:53:50<3:18:47,  2.39it/s]

step:11880, train_loss:0.05929697397630309, acc:0.672706905640485


 35%|███▍      | 15177/43738 [1:53:50<3:18:15,  2.40it/s]

step:11880, train_loss:0.05929710730172352, acc:0.6727284707122619


 35%|███▍      | 15178/43738 [1:53:51<4:05:43,  1.94it/s]

step:11880, train_loss:0.059293229811536494, acc:0.6727500329424166


 35%|███▍      | 15179/43738 [1:53:52<5:14:17,  1.51it/s]

step:11880, train_loss:0.059289737045425166, acc:0.6727715923315106


 35%|███▍      | 15180/43738 [1:53:53<5:10:06,  1.53it/s]

step:11880, train_loss:0.0592884898870456, acc:0.6727931488801054


 35%|███▍      | 15181/43738 [1:53:53<4:44:43,  1.67it/s]

step:11880, train_loss:0.05928458456056169, acc:0.6728147025887623


 35%|███▍      | 15182/43738 [1:53:54<4:53:15,  1.62it/s]

step:11880, train_loss:0.05929066888554577, acc:0.6727703859834014


 35%|███▍      | 15183/43738 [1:53:54<4:41:46,  1.69it/s]

step:11880, train_loss:0.05929137171586133, acc:0.6727260752157018


 35%|███▌      | 15488/43738 [1:56:14<3:46:56,  2.07it/s]

step:11900, train_loss:0.05918013450412421, acc:0.6736182851239669


 35%|███▌      | 15489/43738 [1:56:15<4:20:43,  1.81it/s]

step:11900, train_loss:0.059176459694794264, acc:0.673639356963006


 35%|███▌      | 15490/43738 [1:56:15<3:57:07,  1.99it/s]

step:11900, train_loss:0.05917509578450149, acc:0.6736604260813428


 35%|███▌      | 15491/43738 [1:56:16<3:57:08,  1.99it/s]

step:11900, train_loss:0.05917843179920125, acc:0.6736169388677297


 35%|███▌      | 15492/43738 [1:56:16<3:54:10,  2.01it/s]

step:11900, train_loss:0.05917560467482462, acc:0.6736380067131422


 35%|███▌      | 15493/43738 [1:56:17<4:13:07,  1.86it/s]

step:11900, train_loss:0.059172429651615295, acc:0.673659071838895


 35%|███▌      | 15494/43738 [1:56:17<4:01:33,  1.95it/s]

step:11900, train_loss:0.05917170757568361, acc:0.6736155931328256


 35%|███▌      | 15495/43738 [1:56:18<3:54:23,  2.01it/s]

step:11900, train_loss:0.059173180525822676, acc:0.6736366569861245


 35%|███▌      | 15496/43738 [1:56:18<3:21:07,  2.34it/s]

step:11900, train_loss:0.059169636880521126, acc:0.6736577181208053


 35%|███▌      | 15497/43738 [1:56:18<2:57:14,  2.66it/s]

step:11900, train_loss:0.05917399639939975, acc:0.6736142479189521


 35%|███▌      | 15498/43738 [1:56:18<2:44:58,  2.85it/s]

step:11900, train_loss:0.05917500225982033, acc:0.6735707833268809


 35%|███▌      | 15499/43738 [1:56:19<2:59:35,  2.62it/s]

step:11900, train_loss:0.059171224084120295, acc:0.6735918446351378


 35%|███▌      | 15500/43738 [1:56:19<3:02:50,  2.57it/s]

step:11900, train_loss:0.05916762880728985, acc:0.6736129032258065


 35%|███▌      | 15501/43738 [1:56:20<3:11:49,  2.45it/s]

step:11900, train_loss:0.0591638889740552, acc:0.6736339590994129


 35%|███▌      | 15502/43738 [1:56:20<2:47:42,  2.81it/s]

step:11900, train_loss:0.059161235084524255, acc:0.673655012256483


 35%|███▌      | 15503/43738 [1:56:20<2:57:51,  2.65it/s]

step:11900, train_loss:0.05916731223684978, acc:0.6736115590530865


 36%|███▌      | 15808/43738 [1:58:40<3:01:42,  2.56it/s]

step:11920, train_loss:0.05896841824475266, acc:0.6744686234817814


 36%|███▌      | 15809/43738 [1:58:41<2:52:20,  2.70it/s]

step:11920, train_loss:0.05896477128293229, acc:0.6744892150041116


 36%|███▌      | 15810/43738 [1:58:41<3:45:43,  2.06it/s]

step:11920, train_loss:0.05896270413389167, acc:0.6745098039215687


 36%|███▌      | 15811/43738 [1:58:42<3:21:08,  2.31it/s]

step:11920, train_loss:0.058961337757357145, acc:0.6745303902346468


 36%|███▌      | 15812/43738 [1:58:42<3:26:55,  2.25it/s]

step:11920, train_loss:0.05896407650125998, acc:0.6744877308373387


 36%|███▌      | 15813/43738 [1:58:43<3:01:49,  2.56it/s]

step:11920, train_loss:0.05896037130704199, acc:0.6745083159425789


 36%|███▌      | 15814/43738 [1:58:43<2:42:14,  2.87it/s]

step:11920, train_loss:0.05895970195262015, acc:0.6745288984444163


 36%|███▌      | 15815/43738 [1:58:43<2:56:03,  2.64it/s]

step:11920, train_loss:0.05895764534587277, acc:0.6745494783433449


 36%|███▌      | 15816/43738 [1:58:44<3:52:34,  2.00it/s]

step:11920, train_loss:0.058961679968012895, acc:0.6745068285280729


 36%|███▌      | 15817/43738 [1:58:44<3:28:42,  2.23it/s]

step:11920, train_loss:0.058960469935349515, acc:0.6745274072200796


 36%|███▌      | 15818/43738 [1:58:45<3:00:45,  2.57it/s]

step:11920, train_loss:0.05895674251777957, acc:0.674547983310153


 36%|███▌      | 15819/43738 [1:58:45<3:28:36,  2.23it/s]

step:11920, train_loss:0.05896076372690687, acc:0.6745053416777294


 36%|███▌      | 15820/43738 [1:58:45<3:11:47,  2.43it/s]

step:11920, train_loss:0.058957037059641, acc:0.6745259165613148


 36%|███▌      | 15821/43738 [1:58:46<4:06:40,  1.89it/s]

step:11920, train_loss:0.05896358109489908, acc:0.6744832817141774


 36%|███▌      | 15822/43738 [1:58:47<3:39:03,  2.12it/s]

step:11920, train_loss:0.05895992483389113, acc:0.6745038553912274


 36%|███▌      | 15823/43738 [1:58:47<3:40:46,  2.11it/s]

step:11920, train_loss:0.05895622776983956, acc:0.6745244264678001


 37%|███▋      | 16128/43738 [2:01:07<4:21:28,  1.76it/s]

step:11940, train_loss:0.058896883650472125, acc:0.6748511904761905


 37%|███▋      | 16129/43738 [2:01:07<3:48:22,  2.01it/s]

step:11940, train_loss:0.05889378002014145, acc:0.6748713497426995


 37%|███▋      | 16130/43738 [2:01:08<4:00:51,  1.91it/s]

step:11940, train_loss:0.05889761893509775, acc:0.6748295102293862


 37%|███▋      | 16131/43738 [2:01:08<3:51:35,  1.99it/s]

step:11940, train_loss:0.05890388852610966, acc:0.6747876759035397


 37%|███▋      | 16132/43738 [2:01:09<3:41:54,  2.07it/s]

step:11940, train_loss:0.058900882399691004, acc:0.674807835358294


 37%|███▋      | 16133/43738 [2:01:09<3:37:23,  2.12it/s]

step:11940, train_loss:0.05889741613882181, acc:0.6748279923138908


 37%|███▋      | 16134/43738 [2:01:10<3:38:00,  2.11it/s]

step:11940, train_loss:0.05890676128496322, acc:0.6747861658609149


 37%|███▋      | 16135/43738 [2:01:10<3:31:08,  2.18it/s]

step:11940, train_loss:0.05890481582192905, acc:0.6748063216609854


 37%|███▋      | 16136/43738 [2:01:11<3:32:32,  2.16it/s]

step:11940, train_loss:0.05890361600993105, acc:0.6748264749628161


 37%|███▋      | 16137/43738 [2:01:11<3:45:07,  2.04it/s]

step:11940, train_loss:0.058900105629193546, acc:0.6748466257668712


 37%|███▋      | 16138/43738 [2:01:12<4:21:38,  1.76it/s]

step:11940, train_loss:0.058897986801727746, acc:0.6748667740736151


 37%|███▋      | 16139/43738 [2:01:12<4:26:19,  1.73it/s]

step:11940, train_loss:0.05889506072759162, acc:0.674886919883512


 37%|███▋      | 16140/43738 [2:01:13<4:07:05,  1.86it/s]

step:11940, train_loss:0.05889212554078702, acc:0.674907063197026


 37%|███▋      | 16141/43738 [2:01:13<4:03:16,  1.89it/s]

step:11940, train_loss:0.05889484974036797, acc:0.6748652499845115


 37%|███▋      | 16142/43738 [2:01:14<3:49:19,  2.01it/s]

step:11940, train_loss:0.05889180999906388, acc:0.6748853921447157


 37%|███▋      | 16143/43738 [2:01:14<3:25:56,  2.23it/s]

step:11940, train_loss:0.05889773966756095, acc:0.674843585454996


 38%|███▊      | 16448/43738 [2:03:33<3:33:06,  2.13it/s]

step:11960, train_loss:0.05900637656126746, acc:0.674489299610895


 38%|███▊      | 16449/43738 [2:03:34<4:00:28,  1.89it/s]

step:11960, train_loss:0.05900855307302932, acc:0.6744482947291629


 38%|███▊      | 16450/43738 [2:03:34<4:08:57,  1.83it/s]

step:11960, train_loss:0.059008917037774826, acc:0.674468085106383


 38%|███▊      | 16451/43738 [2:03:35<3:57:53,  1.91it/s]

step:11960, train_loss:0.05900533447073825, acc:0.6744878730776245


 38%|███▊      | 16452/43738 [2:03:36<4:25:47,  1.71it/s]

step:11960, train_loss:0.059008715470607376, acc:0.674446875759786


 38%|███▊      | 16453/43738 [2:03:36<4:22:56,  1.73it/s]

step:11960, train_loss:0.05901007749645986, acc:0.6744058834255151


 38%|███▊      | 16454/43738 [2:03:36<3:47:47,  2.00it/s]

step:11960, train_loss:0.0590073185789176, acc:0.6744256715692233


 38%|███▊      | 16455/43738 [2:03:37<3:39:28,  2.07it/s]

step:11960, train_loss:0.059014386124855864, acc:0.6743846855059252


 38%|███▊      | 16456/43738 [2:03:37<3:37:41,  2.09it/s]

step:11960, train_loss:0.05901092414175888, acc:0.6744044725328148


 38%|███▊      | 16457/43738 [2:03:38<3:32:50,  2.14it/s]

step:11960, train_loss:0.059007506197229276, acc:0.67442425715501


 38%|███▊      | 16458/43738 [2:03:38<3:28:48,  2.18it/s]

step:11960, train_loss:0.059003972374621606, acc:0.6744440393729493


 38%|███▊      | 16459/43738 [2:03:39<3:32:13,  2.14it/s]

step:11960, train_loss:0.05900923755760155, acc:0.6744030621544443


 38%|███▊      | 16460/43738 [2:03:40<4:15:02,  1.78it/s]

step:11960, train_loss:0.05901285605935478, acc:0.6743620899149453


 38%|███▊      | 16461/43738 [2:03:40<3:31:03,  2.15it/s]

step:11960, train_loss:0.05900927161383295, acc:0.6743818723042343


 38%|███▊      | 16462/43738 [2:03:41<4:22:01,  1.73it/s]

step:11960, train_loss:0.059015192148608356, acc:0.6743409063297291


 38%|███▊      | 16463/43738 [2:03:41<3:48:55,  1.99it/s]

step:11960, train_loss:0.059011904781647424, acc:0.6743606876025026


 38%|███▊      | 16768/43738 [2:06:01<3:48:00,  1.97it/s]

step:11980, train_loss:0.05909417086516837, acc:0.6743797709923665


 38%|███▊      | 16769/43738 [2:06:01<3:23:34,  2.21it/s]

step:11980, train_loss:0.05909070931011598, acc:0.6743991889796649


 38%|███▊      | 16770/43738 [2:06:01<2:55:48,  2.56it/s]

step:11980, train_loss:0.0590875442942598, acc:0.6744186046511628


 38%|███▊      | 16771/43738 [2:06:02<2:59:48,  2.50it/s]

step:11980, train_loss:0.05908404487150487, acc:0.6744380180072744


 38%|███▊      | 16772/43738 [2:06:02<2:39:22,  2.82it/s]

step:11980, train_loss:0.05908109391906002, acc:0.674457429048414


 38%|███▊      | 16773/43738 [2:06:02<3:17:25,  2.28it/s]

step:11980, train_loss:0.05907836270483467, acc:0.6744768377749956


 38%|███▊      | 16774/43738 [2:06:03<3:28:15,  2.16it/s]

step:11980, train_loss:0.059076484429360435, acc:0.6744962441874329


 38%|███▊      | 16775/43738 [2:06:03<3:30:45,  2.13it/s]

step:11980, train_loss:0.059082711469829664, acc:0.6744560357675112


 38%|███▊      | 16776/43738 [2:06:04<4:03:36,  1.84it/s]

step:11980, train_loss:0.05908045735018778, acc:0.6744754411063424


 38%|███▊      | 16777/43738 [2:06:05<3:46:02,  1.99it/s]

step:11980, train_loss:0.05907791027625955, acc:0.6744948441318471


 38%|███▊      | 16778/43738 [2:06:05<3:41:55,  2.02it/s]

step:11980, train_loss:0.05907584822044058, acc:0.6745142448444391


 38%|███▊      | 16779/43738 [2:06:06<3:56:13,  1.90it/s]

step:11980, train_loss:0.05907266791636639, acc:0.6745336432445318


 38%|███▊      | 16780/43738 [2:06:06<4:02:35,  1.85it/s]

step:11980, train_loss:0.05907016396747652, acc:0.6745530393325387


 38%|███▊      | 16781/43738 [2:06:07<3:51:12,  1.94it/s]

step:11980, train_loss:0.05907696188804635, acc:0.6745128419045349


 38%|███▊      | 16782/43738 [2:06:07<4:02:45,  1.85it/s]

step:11980, train_loss:0.059088200180262146, acc:0.6744726492670718


 38%|███▊      | 16783/43738 [2:06:08<4:49:20,  1.55it/s]

step:11980, train_loss:0.05909563139637267, acc:0.6744324614192934


 39%|███▉      | 17088/43738 [2:08:23<3:48:56,  1.94it/s]

step:12000, train_loss:0.05934318767500944, acc:0.6735720973782772


 39%|███▉      | 17089/43738 [2:08:24<4:12:23,  1.76it/s]

step:12000, train_loss:0.05934547738730681, acc:0.6735326818421207


 39%|███▉      | 17090/43738 [2:08:24<4:03:35,  1.82it/s]

step:12000, train_loss:0.05934994255768932, acc:0.6734932709186658


 39%|███▉      | 17091/43738 [2:08:25<4:37:38,  1.60it/s]

step:12000, train_loss:0.059346518012403676, acc:0.6735123749341759


 39%|███▉      | 17092/43738 [2:08:25<3:44:28,  1.98it/s]

step:12000, train_loss:0.05934304597087391, acc:0.6735314767142523


 39%|███▉      | 17093/43738 [2:08:26<3:11:59,  2.31it/s]

step:12000, train_loss:0.05934202370928267, acc:0.6735505762592874


 39%|███▉      | 17094/43738 [2:08:26<4:02:13,  1.83it/s]

step:12000, train_loss:0.05933856240699476, acc:0.6735696735696736


 39%|███▉      | 17095/43738 [2:08:27<3:34:34,  2.07it/s]

step:12000, train_loss:0.05933512839570472, acc:0.6735887686458029


 39%|███▉      | 17096/43738 [2:08:27<3:23:48,  2.18it/s]

step:12000, train_loss:0.05933166911321866, acc:0.6736078614880674


 39%|███▉      | 17097/43738 [2:08:28<3:57:20,  1.87it/s]

step:12000, train_loss:0.05933363432256846, acc:0.6735684623033281


 39%|███▉      | 17098/43738 [2:08:28<3:29:40,  2.12it/s]

step:12000, train_loss:0.05933111687361661, acc:0.6735875540998947


 39%|███▉      | 17099/43738 [2:08:28<2:56:51,  2.51it/s]

step:12000, train_loss:0.05932765123611449, acc:0.6736066436633721


 39%|███▉      | 17100/43738 [2:08:29<3:08:43,  2.35it/s]

step:12000, train_loss:0.05933310319778812, acc:0.6735672514619883


 39%|███▉      | 17101/43738 [2:08:29<3:12:28,  2.31it/s]

step:12000, train_loss:0.05932983209345974, acc:0.6735863399801181


 39%|███▉      | 17102/43738 [2:08:30<3:23:34,  2.18it/s]

step:12000, train_loss:0.059328152460541045, acc:0.6736054262659338


 39%|███▉      | 17103/43738 [2:08:30<2:54:07,  2.55it/s]

step:12000, train_loss:0.05932468452965444, acc:0.673624510319827


 40%|███▉      | 17408/43738 [2:10:51<2:53:44,  2.53it/s]

step:12020, train_loss:0.05939368706196667, acc:0.6728515625


 40%|███▉      | 17409/43738 [2:10:51<2:31:59,  2.89it/s]

step:12020, train_loss:0.05939052903448254, acc:0.6728703544143834


 40%|███▉      | 17410/43738 [2:10:51<2:18:48,  3.16it/s]

step:12020, train_loss:0.059387187403349244, acc:0.6728891441700172


 40%|███▉      | 17411/43738 [2:10:52<2:32:00,  2.89it/s]

step:12020, train_loss:0.05938763553736476, acc:0.67285049681236


 40%|███▉      | 17412/43738 [2:10:52<2:48:20,  2.61it/s]

step:12020, train_loss:0.059388784203836184, acc:0.6728118538938663


 40%|███▉      | 17413/43738 [2:10:53<2:57:26,  2.47it/s]

step:12020, train_loss:0.0593874733283771, acc:0.6728306437718946


 40%|███▉      | 17414/43738 [2:10:53<3:33:24,  2.06it/s]

step:12020, train_loss:0.059385096498294446, acc:0.672849431491903


 40%|███▉      | 17415/43738 [2:10:54<3:24:36,  2.14it/s]

step:12020, train_loss:0.05940170205938433, acc:0.6728107952914154


 40%|███▉      | 17416/43738 [2:10:54<3:51:21,  1.90it/s]

step:12020, train_loss:0.05939844416489634, acc:0.6728295819935691


 40%|███▉      | 17417/43738 [2:10:55<4:05:53,  1.78it/s]

step:12020, train_loss:0.05940325685059028, acc:0.6727909513693517


 40%|███▉      | 17418/43738 [2:10:55<3:46:01,  1.94it/s]

step:12020, train_loss:0.05940224632242799, acc:0.6728097370536227


 40%|███▉      | 17419/43738 [2:10:56<3:39:52,  1.99it/s]

step:12020, train_loss:0.05940545491329841, acc:0.6727711120041334


 40%|███▉      | 17420/43738 [2:10:56<3:50:19,  1.90it/s]

step:12020, train_loss:0.059403011201478964, acc:0.6727898966704937


 40%|███▉      | 17421/43738 [2:10:57<3:30:54,  2.08it/s]

step:12020, train_loss:0.05939960156278351, acc:0.6728086791802996


 40%|███▉      | 17422/43738 [2:10:57<3:14:59,  2.25it/s]

step:12020, train_loss:0.0593961967699453, acc:0.6728274595339226


 40%|███▉      | 17423/43738 [2:10:58<3:46:38,  1.94it/s]

step:12020, train_loss:0.059392816385302825, acc:0.6728462377317339


 41%|████      | 17728/43738 [2:13:15<3:05:56,  2.33it/s]

step:12040, train_loss:0.05931254261119421, acc:0.6730031588447654


 41%|████      | 17729/43738 [2:13:15<3:02:03,  2.38it/s]

step:12040, train_loss:0.05931424130965158, acc:0.6729651982627334


 41%|████      | 17730/43738 [2:13:15<2:42:32,  2.67it/s]

step:12040, train_loss:0.059315206493200205, acc:0.672927241962775


 41%|████      | 17731/43738 [2:13:16<3:16:21,  2.21it/s]

step:12040, train_loss:0.059317479697127945, acc:0.6728892899441656


 41%|████      | 17732/43738 [2:13:17<3:37:49,  1.99it/s]

step:12040, train_loss:0.0593143532090695, acc:0.6729077374238664


 41%|████      | 17733/43738 [2:13:17<3:21:02,  2.16it/s]

step:12040, train_loss:0.059315460766371925, acc:0.6728697907855411


 41%|████      | 17734/43738 [2:13:17<2:52:40,  2.51it/s]

step:12040, train_loss:0.059314967197808065, acc:0.6728318484267509


 41%|████      | 17735/43738 [2:13:18<2:35:50,  2.78it/s]

step:12040, train_loss:0.05931191110360487, acc:0.6728502960248097


 41%|████      | 17736/43738 [2:13:18<3:05:28,  2.34it/s]

step:12040, train_loss:0.059319712274871185, acc:0.6728123590437528


 41%|████      | 17737/43738 [2:13:19<3:09:42,  2.28it/s]

step:12040, train_loss:0.05932298735636753, acc:0.6727744263404183


 41%|████      | 17738/43738 [2:13:19<3:22:49,  2.14it/s]

step:12040, train_loss:0.05931982627236062, acc:0.6727928740556997


 41%|████      | 17739/43738 [2:13:20<3:32:20,  2.04it/s]

step:12040, train_loss:0.05932052845595677, acc:0.6728113196910762


 41%|████      | 17740/43738 [2:13:20<3:14:19,  2.23it/s]

step:12040, train_loss:0.059317251235951315, acc:0.6728297632468997


 41%|████      | 17741/43738 [2:13:20<2:50:17,  2.54it/s]

step:12040, train_loss:0.05931406073754675, acc:0.6728482047235218


 41%|████      | 17742/43738 [2:13:21<3:04:26,  2.35it/s]

step:12040, train_loss:0.05931437129075131, acc:0.6728102806898884


 41%|████      | 17743/43738 [2:13:21<2:42:28,  2.67it/s]

step:12040, train_loss:0.059317641863355475, acc:0.6727723609310714


 41%|████▏     | 18048/43738 [2:15:41<3:11:43,  2.23it/s]

step:12060, train_loss:0.05937073837660769, acc:0.6724290780141844


 41%|████▏     | 18049/43738 [2:15:41<3:04:49,  2.32it/s]

step:12060, train_loss:0.059367454884556095, acc:0.6724472269931853


 41%|████▏     | 18050/43738 [2:15:42<2:49:48,  2.52it/s]

step:12060, train_loss:0.05936417463584158, acc:0.6724653739612189


 41%|████▏     | 18051/43738 [2:15:42<3:38:41,  1.96it/s]

step:12060, train_loss:0.05936528776742357, acc:0.6724835189186195


 41%|████▏     | 18052/43738 [2:15:43<3:49:35,  1.86it/s]

step:12060, train_loss:0.05936509596860887, acc:0.6724462663416796


 41%|████▏     | 18053/43738 [2:15:43<3:26:39,  2.07it/s]

step:12060, train_loss:0.05936185828531799, acc:0.6724644103473107


 41%|████▏     | 18054/43738 [2:15:44<3:23:40,  2.10it/s]

step:12060, train_loss:0.05936313625560576, acc:0.6724271629555777


 41%|████▏     | 18055/43738 [2:15:44<2:54:29,  2.45it/s]

step:12060, train_loss:0.05935984900362371, acc:0.6724453060094157


 41%|████▏     | 18056/43738 [2:15:45<2:50:34,  2.51it/s]

step:12060, train_loss:0.05935918553364379, acc:0.6724080638015064


 41%|████▏     | 18057/43738 [2:15:45<2:28:45,  2.88it/s]

step:12060, train_loss:0.05935647902946513, acc:0.6724262059035278


 41%|████▏     | 18058/43738 [2:15:45<2:24:35,  2.96it/s]

step:12060, train_loss:0.05935321624051437, acc:0.6724443459962344


 41%|████▏     | 18059/43738 [2:15:45<2:22:18,  3.01it/s]

step:12060, train_loss:0.05934995078960688, acc:0.6724624840799601


 41%|████▏     | 18060/43738 [2:15:46<2:42:26,  2.63it/s]

step:12060, train_loss:0.059356984783097165, acc:0.6724252491694352


 41%|████▏     | 18061/43738 [2:15:46<3:08:32,  2.27it/s]

step:12060, train_loss:0.059363117846584386, acc:0.6723880183821493


 41%|████▏     | 18062/43738 [2:15:47<3:04:35,  2.32it/s]

step:12060, train_loss:0.05936106006948177, acc:0.6724061565718082


 41%|████▏     | 18063/43738 [2:15:47<2:38:36,  2.70it/s]

step:12060, train_loss:0.05935777382737695, acc:0.6724242927531418


 42%|████▏     | 18368/43738 [2:18:07<3:27:02,  2.04it/s]

step:12080, train_loss:0.05932093348081836, acc:0.6722560975609756


 42%|████▏     | 18369/43738 [2:18:07<4:05:07,  1.72it/s]

step:12080, train_loss:0.05931854001114127, acc:0.6722739397898634


 42%|████▏     | 18370/43738 [2:18:08<3:32:32,  1.99it/s]

step:12080, train_loss:0.05932607459808141, acc:0.6722373434948286


 42%|████▏     | 18371/43738 [2:18:08<3:03:11,  2.31it/s]

step:12080, train_loss:0.059325072005703326, acc:0.6722551848021338


 42%|████▏     | 18372/43738 [2:18:09<3:16:34,  2.15it/s]

step:12080, train_loss:0.059329807044989635, acc:0.6722185935118659


 42%|████▏     | 18373/43738 [2:18:09<3:59:31,  1.76it/s]

step:12080, train_loss:0.05932664795992564, acc:0.672236433897567


 42%|████▏     | 18374/43738 [2:18:10<3:29:11,  2.02it/s]

step:12080, train_loss:0.05933258783823123, acc:0.6721998476107544


 42%|████▏     | 18375/43738 [2:18:10<3:01:10,  2.33it/s]

step:12080, train_loss:0.059329358951172785, acc:0.67221768707483


 42%|████▏     | 18376/43738 [2:18:11<3:29:36,  2.02it/s]

step:12080, train_loss:0.059326204279920074, acc:0.6722355245973008


 42%|████▏     | 18377/43738 [2:18:11<3:33:41,  1.98it/s]

step:12080, train_loss:0.059323713333009724, acc:0.672253360178484


 42%|████▏     | 18378/43738 [2:18:12<3:19:14,  2.12it/s]

step:12080, train_loss:0.05933029907867864, acc:0.672216780933725


 42%|████▏     | 18379/43738 [2:18:12<3:53:43,  1.81it/s]

step:12080, train_loss:0.05933795317985895, acc:0.6721802056695141


 42%|████▏     | 18380/43738 [2:18:13<3:13:49,  2.18it/s]

step:12080, train_loss:0.05934232157149686, acc:0.6721436343852013


 42%|████▏     | 18381/43738 [2:18:13<2:46:30,  2.54it/s]

step:12080, train_loss:0.05933909313104511, acc:0.6721614710842718


 42%|████▏     | 18382/43738 [2:18:13<3:22:22,  2.09it/s]

step:12080, train_loss:0.059337818450169416, acc:0.6721793058426722


 42%|████▏     | 18383/43738 [2:18:14<3:12:39,  2.19it/s]

step:12080, train_loss:0.05934217077222921, acc:0.6721427405755317


 43%|████▎     | 18688/43738 [2:20:41<2:51:54,  2.43it/s]

step:12100, train_loss:0.05935901416610125, acc:0.6724636130136986


 43%|████▎     | 18689/43738 [2:20:41<2:32:14,  2.74it/s]

step:12100, train_loss:0.05935584089488943, acc:0.6724811386377013


 43%|████▎     | 18690/43738 [2:20:42<2:47:08,  2.50it/s]

step:12100, train_loss:0.05935296593464718, acc:0.6724986623863028


 43%|████▎     | 18691/43738 [2:20:42<2:30:26,  2.77it/s]

step:12100, train_loss:0.05935822814995736, acc:0.6724626825745011


 43%|████▎     | 18692/43738 [2:20:42<2:56:37,  2.36it/s]

step:12100, train_loss:0.05935687213451495, acc:0.6724802054354804


 43%|████▎     | 18693/43738 [2:20:43<3:12:52,  2.16it/s]

step:12100, train_loss:0.05935869453524166, acc:0.6724442304606002


 43%|████▎     | 18694/43738 [2:20:44<3:45:24,  1.85it/s]

step:12100, train_loss:0.05935971638308092, acc:0.672461752433936


 43%|████▎     | 18695/43738 [2:20:44<3:06:31,  2.24it/s]

step:12100, train_loss:0.05935679416842054, acc:0.6724792725327627


 43%|████▎     | 18696/43738 [2:20:45<3:23:11,  2.05it/s]

step:12100, train_loss:0.05935362015809957, acc:0.6724967907573812


 43%|████▎     | 18697/43738 [2:20:45<3:45:46,  1.85it/s]

step:12100, train_loss:0.059350448897371164, acc:0.6725143071080922


 43%|████▎     | 18698/43738 [2:20:46<3:28:29,  2.00it/s]

step:12100, train_loss:0.05934728318783089, acc:0.6725318215851963


 43%|████▎     | 18699/43738 [2:20:46<3:17:16,  2.12it/s]

step:12100, train_loss:0.05934412483873775, acc:0.6725493341889941


 43%|████▎     | 18700/43738 [2:20:47<3:56:28,  1.76it/s]

step:12100, train_loss:0.059341950027964996, acc:0.6725668449197861


 43%|████▎     | 18701/43738 [2:20:47<3:49:27,  1.82it/s]

step:12100, train_loss:0.05935154432967976, acc:0.6725308807015667


 43%|████▎     | 18702/43738 [2:20:48<4:22:00,  1.59it/s]

step:12100, train_loss:0.05936853026011875, acc:0.6724949203293765


 43%|████▎     | 18703/43738 [2:20:49<4:04:19,  1.71it/s]

step:12100, train_loss:0.059372455246036286, acc:0.6724589638025985


 43%|████▎     | 19008/43738 [2:23:01<2:55:53,  2.34it/s]

step:12120, train_loss:0.05932418184186121, acc:0.6727693602693603


 43%|████▎     | 19009/43738 [2:23:01<2:47:46,  2.46it/s]

step:12120, train_loss:0.059329202100644005, acc:0.672733968120364


 43%|████▎     | 19010/43738 [2:23:02<2:49:14,  2.44it/s]

step:12120, train_loss:0.05932813729599536, acc:0.6727511835875855


 43%|████▎     | 19011/43738 [2:23:02<3:12:05,  2.15it/s]

step:12120, train_loss:0.05932519725096231, acc:0.672768397243701


 43%|████▎     | 19012/43738 [2:23:03<3:54:57,  1.75it/s]

step:12120, train_loss:0.059341010211956965, acc:0.6727330107300652


 43%|████▎     | 19013/43738 [2:23:04<3:27:21,  1.99it/s]

step:12120, train_loss:0.05933807414651236, acc:0.6727502235312681


 43%|████▎     | 19014/43738 [2:23:04<2:58:01,  2.31it/s]

step:12120, train_loss:0.05933495565867002, acc:0.6727674345219312


 43%|████▎     | 19015/43738 [2:23:04<2:57:22,  2.32it/s]

step:12120, train_loss:0.05933663869254174, acc:0.6727320536418617


 43%|████▎     | 19016/43738 [2:23:05<2:37:09,  2.62it/s]

step:12120, train_loss:0.05933475652024494, acc:0.6727492637778713


 43%|████▎     | 19018/43738 [2:23:05<2:01:41,  3.39it/s]

step:12120, train_loss:0.05933163782630525, acc:0.672766472103907
step:12120, train_loss:0.05932949468592345, acc:0.6727836786202545


 43%|████▎     | 19019/43738 [2:23:05<1:53:49,  3.62it/s]

step:12120, train_loss:0.05932661270859135, acc:0.6728008833271991


 43%|████▎     | 19020/43738 [2:23:05<1:53:07,  3.64it/s]

step:12120, train_loss:0.05932373883332492, acc:0.6728180862250263


 43%|████▎     | 19021/43738 [2:23:06<2:16:18,  3.02it/s]

step:12120, train_loss:0.05932062329224288, acc:0.6728352873140213


 43%|████▎     | 19022/43738 [2:23:06<2:29:28,  2.76it/s]

step:12120, train_loss:0.05932225567196125, acc:0.6727999158868678


 43%|████▎     | 19023/43738 [2:23:07<3:18:18,  2.08it/s]

step:12120, train_loss:0.0593330117839779, acc:0.6727645481785207


 44%|████▍     | 19328/43738 [2:25:40<3:03:50,  2.21it/s]

step:12140, train_loss:0.05930663100008247, acc:0.6730649834437086


 44%|████▍     | 19329/43738 [2:25:41<2:40:56,  2.53it/s]

step:12140, train_loss:0.059303946448516165, acc:0.6730818976667184


 44%|████▍     | 19330/43738 [2:25:41<2:50:30,  2.39it/s]

step:12140, train_loss:0.059301258028132325, acc:0.6730988101396792


 44%|████▍     | 19331/43738 [2:25:42<2:55:50,  2.31it/s]

step:12140, train_loss:0.059300062666817276, acc:0.6731157208628628


 44%|████▍     | 19332/43738 [2:25:42<2:33:06,  2.66it/s]

step:12140, train_loss:0.05930015054155111, acc:0.6731326298365404


 44%|████▍     | 19333/43738 [2:25:42<2:28:47,  2.73it/s]

step:12140, train_loss:0.059298187158264425, acc:0.6731495370609838


 44%|████▍     | 19334/43738 [2:25:43<2:32:28,  2.67it/s]

step:12140, train_loss:0.05929921876118564, acc:0.6731147201820626


 44%|████▍     | 19335/43738 [2:25:43<2:13:04,  3.06it/s]

step:12140, train_loss:0.059298412945379346, acc:0.6731316265839152


 44%|████▍     | 19336/43738 [2:25:43<2:07:09,  3.20it/s]

step:12140, train_loss:0.059295349053532044, acc:0.6731485312370707


 44%|████▍     | 19337/43738 [2:25:44<3:07:26,  2.17it/s]

step:12140, train_loss:0.059306009216857435, acc:0.6731137198117598


 44%|████▍     | 19338/43738 [2:25:45<3:50:48,  1.76it/s]

step:12140, train_loss:0.0593068048432329, acc:0.6730789119867618


 44%|████▍     | 19339/43738 [2:25:45<3:21:01,  2.02it/s]

step:12140, train_loss:0.05930572051249273, acc:0.6730958167433683


 44%|████▍     | 19340/43738 [2:25:45<2:53:46,  2.34it/s]

step:12140, train_loss:0.059302711555428614, acc:0.6731127197518098


 44%|████▍     | 19341/43738 [2:25:46<3:39:51,  1.85it/s]

step:12140, train_loss:0.05932338104957193, acc:0.6730779173775916


 44%|████▍     | 19342/43738 [2:25:46<3:16:01,  2.07it/s]

step:12140, train_loss:0.05932523357953461, acc:0.673043118602006


 44%|████▍     | 19343/43738 [2:25:47<3:47:07,  1.79it/s]

step:12140, train_loss:0.059323436450782904, acc:0.6730600217132813


 45%|████▍     | 19648/43738 [2:28:09<3:39:25,  1.83it/s]

step:12160, train_loss:0.05928465488221038, acc:0.6730456026058632


 45%|████▍     | 19649/43738 [2:28:09<3:12:00,  2.09it/s]

step:12160, train_loss:0.059283831936904445, acc:0.6730622423533005


 45%|████▍     | 19650/43738 [2:28:10<3:18:12,  2.03it/s]

step:12160, train_loss:0.05929526600616145, acc:0.6730279898218829


 45%|████▍     | 19651/43738 [2:28:10<2:43:54,  2.45it/s]

step:12160, train_loss:0.05929442827882427, acc:0.6730446287720727


 45%|████▍     | 19652/43738 [2:28:11<3:17:00,  2.04it/s]

step:12160, train_loss:0.05929500443805785, acc:0.6730612660289029


 45%|████▍     | 19653/43738 [2:28:11<3:04:51,  2.17it/s]

step:12160, train_loss:0.05929856424730385, acc:0.6730270187757594


 45%|████▍     | 19654/43738 [2:28:11<2:57:23,  2.26it/s]

step:12160, train_loss:0.05930352431474785, acc:0.672992775007632


 45%|████▍     | 19655/43738 [2:28:12<3:07:59,  2.14it/s]

step:12160, train_loss:0.05930692086204343, acc:0.6729585347239888


 45%|████▍     | 19656/43738 [2:28:12<3:21:02,  2.00it/s]

step:12160, train_loss:0.05930635742129651, acc:0.672975172975173


 45%|████▍     | 19657/43738 [2:28:13<3:05:09,  2.17it/s]

step:12160, train_loss:0.05930400816399219, acc:0.6729918095334995


 45%|████▍     | 19658/43738 [2:28:13<3:05:30,  2.16it/s]

step:12160, train_loss:0.05930408051314083, acc:0.6730084443992268


 45%|████▍     | 19659/43738 [2:28:14<3:04:58,  2.17it/s]

step:12160, train_loss:0.05930259628921164, acc:0.6730250775726131


 45%|████▍     | 19660/43738 [2:28:14<3:15:10,  2.06it/s]

step:12160, train_loss:0.059303558911903316, acc:0.6729908443540183


 45%|████▍     | 19661/43738 [2:28:15<3:22:00,  1.99it/s]

step:12160, train_loss:0.05930664673629376, acc:0.6729566146177712


 45%|████▍     | 19662/43738 [2:28:15<3:14:47,  2.06it/s]

step:12160, train_loss:0.05930396523270802, acc:0.6729732478893297


 45%|████▍     | 19663/43738 [2:28:16<3:11:12,  2.10it/s]

step:12160, train_loss:0.059308353055371237, acc:0.6729390225296241


 46%|████▌     | 19968/43738 [2:30:34<3:01:07,  2.19it/s]

step:12180, train_loss:0.0592545746971277, acc:0.673026842948718


 46%|████▌     | 19969/43738 [2:30:34<2:51:54,  2.30it/s]

step:12180, train_loss:0.05925163237021136, acc:0.6730432169863289


 46%|████▌     | 19970/43738 [2:30:35<2:40:52,  2.46it/s]

step:12180, train_loss:0.05924873190153911, acc:0.6730595893840761


 46%|████▌     | 19971/43738 [2:30:35<3:19:56,  1.98it/s]

step:12180, train_loss:0.05924588562747037, acc:0.6730759601422062


 46%|████▌     | 19972/43738 [2:30:36<3:11:16,  2.07it/s]

step:12180, train_loss:0.05925577448175991, acc:0.673042259162828


 46%|████▌     | 19973/43738 [2:30:36<3:13:03,  2.05it/s]

step:12180, train_loss:0.05926227552444183, acc:0.6730085615581034


 46%|████▌     | 19974/43738 [2:30:37<3:22:46,  1.95it/s]

step:12180, train_loss:0.05926166705216202, acc:0.6730249324121358


 46%|████▌     | 19975/43738 [2:30:37<3:00:58,  2.19it/s]

step:12180, train_loss:0.05927073580475445, acc:0.672991239048811


 46%|████▌     | 19976/43738 [2:30:37<2:46:22,  2.38it/s]

step:12180, train_loss:0.05926878655760986, acc:0.6730076091309571


 46%|████▌     | 19977/43738 [2:30:38<2:26:40,  2.70it/s]

step:12180, train_loss:0.059267807244907225, acc:0.6730239775742104


 46%|████▌     | 19978/43738 [2:30:38<2:19:40,  2.83it/s]

step:12180, train_loss:0.05926484061211053, acc:0.6730403443788167


 46%|████▌     | 19979/43738 [2:30:39<2:37:48,  2.51it/s]

step:12180, train_loss:0.05926861651435057, acc:0.6730066569898393


 46%|████▌     | 19980/43738 [2:30:39<2:49:11,  2.34it/s]

step:12180, train_loss:0.059269002976469455, acc:0.672972972972973


 46%|████▌     | 19981/43738 [2:30:39<2:28:20,  2.67it/s]

step:12180, train_loss:0.05926649182470083, acc:0.6729893398728792


 46%|████▌     | 19982/43738 [2:30:40<2:16:57,  2.89it/s]

step:12180, train_loss:0.059265005070373786, acc:0.6730057051346212


 46%|████▌     | 19983/43738 [2:30:40<2:21:28,  2.80it/s]

step:12180, train_loss:0.05926219375661465, acc:0.6730220687584447


 46%|████▋     | 20288/43738 [2:32:59<2:21:42,  2.76it/s]

step:12200, train_loss:0.05922902863587199, acc:0.6729593848580442


 46%|████▋     | 20289/43738 [2:32:59<2:25:57,  2.68it/s]

step:12200, train_loss:0.05922842889859589, acc:0.6729262161762531


 46%|████▋     | 20290/43738 [2:33:00<3:03:35,  2.13it/s]

step:12200, train_loss:0.059225590533945605, acc:0.6729423361261705


 46%|████▋     | 20291/43738 [2:33:00<3:02:58,  2.14it/s]

step:12200, train_loss:0.059222764452386785, acc:0.672958454487211


 46%|████▋     | 20292/43738 [2:33:01<2:39:21,  2.45it/s]

step:12200, train_loss:0.05922995831073425, acc:0.6729252907549773


 46%|████▋     | 20293/43738 [2:33:01<2:32:01,  2.57it/s]

step:12200, train_loss:0.059237354233509905, acc:0.6728921302912334


 46%|████▋     | 20294/43738 [2:33:01<2:53:40,  2.25it/s]

step:12200, train_loss:0.05923567781282223, acc:0.672908248743471


 46%|████▋     | 20295/43738 [2:33:02<2:32:04,  2.57it/s]

step:12200, train_loss:0.05923275913603856, acc:0.6729243656072924


 46%|████▋     | 20296/43738 [2:33:03<3:18:15,  1.97it/s]

step:12200, train_loss:0.059241885783639696, acc:0.6728912100906582


 46%|████▋     | 20297/43738 [2:33:03<3:16:38,  1.99it/s]

step:12200, train_loss:0.05923943177951314, acc:0.6729073262058433


 46%|████▋     | 20298/43738 [2:33:04<3:34:37,  1.82it/s]

step:12200, train_loss:0.05924376892554808, acc:0.6728741747955463


 46%|████▋     | 20299/43738 [2:33:04<3:08:55,  2.07it/s]

step:12200, train_loss:0.059241084454064556, acc:0.6728902901620769


 46%|████▋     | 20300/43738 [2:33:04<3:03:05,  2.13it/s]

step:12200, train_loss:0.05924248320046237, acc:0.6728571428571428


 46%|████▋     | 20301/43738 [2:33:05<3:21:49,  1.94it/s]

step:12200, train_loss:0.05925326948923078, acc:0.6728239988177922


 46%|████▋     | 20302/43738 [2:33:05<2:45:10,  2.36it/s]

step:12200, train_loss:0.05925047611796903, acc:0.6728401142744557


 46%|████▋     | 20303/43738 [2:33:06<3:17:20,  1.98it/s]

step:12200, train_loss:0.05924759248666088, acc:0.6728562281436241


 47%|████▋     | 20609/43738 [2:35:27<2:33:26,  2.51it/s]

step:12220, train_loss:0.05930105410723299, acc:0.6725058229813664
step:12220, train_loss:0.05929819145427711, acc:0.672521713814353


 47%|████▋     | 20610/43738 [2:35:28<2:38:26,  2.43it/s]

step:12220, train_loss:0.05929536523955421, acc:0.6725376031052887


 47%|████▋     | 20611/43738 [2:35:28<2:22:59,  2.70it/s]

step:12220, train_loss:0.059292511518179984, acc:0.6725534908543981


 47%|████▋     | 20612/43738 [2:35:29<3:13:15,  1.99it/s]

step:12220, train_loss:0.059292439081351164, acc:0.6725693770619057


 47%|████▋     | 20613/43738 [2:35:29<2:52:08,  2.24it/s]

step:12220, train_loss:0.05929919678706342, acc:0.6725367486537622


 47%|████▋     | 20614/43738 [2:35:30<2:53:07,  2.23it/s]

step:12220, train_loss:0.05930031909321604, acc:0.6725041234112739


 47%|████▋     | 20615/43738 [2:35:30<2:31:51,  2.54it/s]

step:12220, train_loss:0.05929744895822361, acc:0.6725200097016736


 47%|████▋     | 20616/43738 [2:35:31<2:55:44,  2.19it/s]

step:12220, train_loss:0.05929959848221249, acc:0.6724873884361661


 47%|████▋     | 20617/43738 [2:35:31<2:39:44,  2.41it/s]

step:12220, train_loss:0.05930006887329054, acc:0.6724547703351603


 47%|████▋     | 20618/43738 [2:35:31<2:33:15,  2.51it/s]

step:12220, train_loss:0.05929720647842181, acc:0.6724706567077311


 47%|████▋     | 20619/43738 [2:35:32<2:45:16,  2.33it/s]

step:12220, train_loss:0.05929598458886609, acc:0.6724865415393569


 47%|████▋     | 20620/43738 [2:35:32<2:37:19,  2.45it/s]

step:12220, train_loss:0.05930432639030614, acc:0.6724539282250243


 47%|████▋     | 20621/43738 [2:35:32<2:27:24,  2.61it/s]

step:12220, train_loss:0.0593016901363724, acc:0.6724698123272392


 47%|████▋     | 20622/43738 [2:35:33<2:49:08,  2.28it/s]

step:12220, train_loss:0.05929885062403313, acc:0.6724856948889536


 47%|████▋     | 20623/43738 [2:35:34<3:03:40,  2.10it/s]

step:12220, train_loss:0.05929600239709255, acc:0.6725015759103913


 48%|████▊     | 20928/43738 [2:37:53<3:33:07,  1.78it/s]

step:12240, train_loss:0.059280556177558376, acc:0.6724006116207951


 48%|████▊     | 20929/43738 [2:37:53<3:16:31,  1.93it/s]

step:12240, train_loss:0.05927772522408136, acc:0.6724162645133547


 48%|████▊     | 20930/43738 [2:37:54<3:29:23,  1.82it/s]

step:12240, train_loss:0.05927492945241262, acc:0.6724319159101768


 48%|████▊     | 20931/43738 [2:37:55<3:32:54,  1.79it/s]

step:12240, train_loss:0.05928444619268161, acc:0.6723997897854856


 48%|████▊     | 20932/43738 [2:37:55<3:15:06,  1.95it/s]

step:12240, train_loss:0.05928161817636194, acc:0.6724154404739155


 48%|████▊     | 20933/43738 [2:37:55<2:59:28,  2.12it/s]

step:12240, train_loss:0.059282271753441834, acc:0.672383318205704


 48%|████▊     | 20934/43738 [2:37:56<2:46:53,  2.28it/s]

step:12240, train_loss:0.059281090046719234, acc:0.6723989681857265


 48%|████▊     | 20935/43738 [2:37:56<2:29:17,  2.55it/s]

step:12240, train_loss:0.059280808021079, acc:0.6724146166706473


 48%|████▊     | 20936/43738 [2:37:57<2:36:16,  2.43it/s]

step:12240, train_loss:0.05927797654160122, acc:0.6724302636606801


 48%|████▊     | 20937/43738 [2:37:57<3:05:10,  2.05it/s]

step:12240, train_loss:0.059279041752453646, acc:0.6723981468214166


 48%|████▊     | 20938/43738 [2:37:58<3:04:26,  2.06it/s]

step:12240, train_loss:0.05927636207050786, acc:0.6724137931034483


 48%|████▊     | 20939/43738 [2:37:58<2:37:20,  2.42it/s]

step:12240, train_loss:0.05927597753933556, acc:0.6723816801184392


 48%|████▊     | 20940/43738 [2:37:58<2:37:52,  2.41it/s]

step:12240, train_loss:0.05927504193907083, acc:0.6723973256924546


 48%|████▊     | 20941/43738 [2:37:59<2:42:54,  2.33it/s]

step:12240, train_loss:0.05927222684371336, acc:0.6724129697722172


 48%|████▊     | 20942/43738 [2:37:59<2:35:02,  2.45it/s]

step:12240, train_loss:0.05927142770415095, acc:0.6724286123579409


 48%|████▊     | 20943/43738 [2:38:00<2:36:05,  2.43it/s]

step:12240, train_loss:0.05927078447678976, acc:0.67244425344984


 49%|████▊     | 21248/43738 [2:40:22<3:07:32,  2.00it/s]

step:12260, train_loss:0.05934811083178862, acc:0.6711219879518072


 49%|████▊     | 21249/43738 [2:40:22<3:07:34,  2.00it/s]

step:12260, train_loss:0.05935163926231396, acc:0.6710904042543179


 49%|████▊     | 21250/43738 [2:40:23<2:41:49,  2.32it/s]

step:12260, train_loss:0.05935008162599167, acc:0.6711058823529412


 49%|████▊     | 21251/43738 [2:40:23<3:12:39,  1.95it/s]

step:12260, train_loss:0.05935640267560149, acc:0.67107430238577


 49%|████▊     | 21252/43738 [2:40:23<2:39:43,  2.35it/s]

step:12260, train_loss:0.05935579202545856, acc:0.6710897797854319


 49%|████▊     | 21253/43738 [2:40:24<2:34:27,  2.43it/s]

step:12260, train_loss:0.05935571927474432, acc:0.6710582035477345


 49%|████▊     | 21254/43738 [2:40:24<2:48:58,  2.22it/s]

step:12260, train_loss:0.05935566825583507, acc:0.6710736802484238


 49%|████▊     | 21255/43738 [2:40:25<2:38:59,  2.36it/s]

step:12260, train_loss:0.05935319547528404, acc:0.6710891554928252


 49%|████▊     | 21256/43738 [2:40:25<2:50:17,  2.20it/s]

step:12260, train_loss:0.059353174150189134, acc:0.6711046292811441


 49%|████▊     | 21257/43738 [2:40:26<3:13:15,  1.94it/s]

step:12260, train_loss:0.05936557106802767, acc:0.6710730582866821


 49%|████▊     | 21258/43738 [2:40:26<2:55:41,  2.13it/s]

step:12260, train_loss:0.059362778910877566, acc:0.671088531376423


 49%|████▊     | 21259/43738 [2:40:27<3:11:22,  1.96it/s]

step:12260, train_loss:0.05936791593836749, acc:0.6710569641093184


 49%|████▊     | 21260/43738 [2:40:27<3:01:20,  2.07it/s]

step:12260, train_loss:0.05936703237019692, acc:0.6710724365004703


 49%|████▊     | 21261/43738 [2:40:28<3:33:06,  1.76it/s]

step:12260, train_loss:0.059364392443094514, acc:0.6710879074361507


 49%|████▊     | 21262/43738 [2:40:29<3:23:33,  1.84it/s]

step:12260, train_loss:0.05936162046342173, acc:0.6711033769165647


 49%|████▊     | 21263/43738 [2:40:29<2:58:54,  2.09it/s]

step:12260, train_loss:0.059372474012231925, acc:0.6710718148897146


 49%|████▉     | 21568/43738 [2:42:55<3:40:08,  1.68it/s]

step:12280, train_loss:0.05935751687447973, acc:0.6710867952522255


 49%|████▉     | 21569/43738 [2:42:56<3:50:02,  1.61it/s]

step:12280, train_loss:0.05935634537092307, acc:0.6711020446010478


 49%|████▉     | 21570/43738 [2:42:56<3:36:32,  1.71it/s]

step:12280, train_loss:0.05935359547256549, acc:0.6711172925359296


 49%|████▉     | 21571/43738 [2:42:57<3:36:40,  1.71it/s]

step:12280, train_loss:0.05935377215030532, acc:0.6711325390570674


 49%|████▉     | 21572/43738 [2:42:57<3:01:45,  2.03it/s]

step:12280, train_loss:0.05935102413964761, acc:0.6711477841646579


 49%|████▉     | 21573/43738 [2:42:57<2:54:32,  2.12it/s]

step:12280, train_loss:0.05934871155941208, acc:0.6711630278588977


 49%|████▉     | 21574/43738 [2:42:58<3:28:09,  1.77it/s]

step:12280, train_loss:0.05934840933492784, acc:0.671131918049504


 49%|████▉     | 21575/43738 [2:42:59<3:28:57,  1.77it/s]

step:12280, train_loss:0.05934599047461427, acc:0.6711471610660487


 49%|████▉     | 21576/43738 [2:42:59<2:52:01,  2.15it/s]

step:12280, train_loss:0.05934463225131105, acc:0.6711624026696329


 49%|████▉     | 21578/43738 [2:43:00<2:08:53,  2.87it/s]

step:12280, train_loss:0.05934253069433275, acc:0.6711776428604532
step:12280, train_loss:0.059339926226576405, acc:0.671192881638706


 49%|████▉     | 21579/43738 [2:43:00<2:31:51,  2.43it/s]

step:12280, train_loss:0.05933725368716531, acc:0.6712081190045878


 49%|████▉     | 21580/43738 [2:43:00<2:14:38,  2.74it/s]

step:12280, train_loss:0.05933451491540865, acc:0.6712233549582948


 49%|████▉     | 21581/43738 [2:43:01<2:46:25,  2.22it/s]

step:12280, train_loss:0.059333539540889095, acc:0.6712385895000231


 49%|████▉     | 21582/43738 [2:43:01<2:39:50,  2.31it/s]

step:12280, train_loss:0.05933402716075477, acc:0.6712538226299695


 49%|████▉     | 21583/43738 [2:43:02<2:37:11,  2.35it/s]

step:12280, train_loss:0.05933130048012473, acc:0.6712690543483297


 50%|█████     | 21888/43738 [2:45:25<3:31:19,  1.72it/s]

step:12300, train_loss:0.05941958382324538, acc:0.670641447368421


 50%|█████     | 21889/43738 [2:45:26<3:12:29,  1.89it/s]

step:12300, train_loss:0.059419729420661024, acc:0.6706564941294714


 50%|█████     | 21890/43738 [2:45:26<2:49:13,  2.15it/s]

step:12300, train_loss:0.059417062662468476, acc:0.6706715395157606


 50%|█████     | 21891/43738 [2:45:26<2:50:23,  2.14it/s]

step:12300, train_loss:0.059414465858203765, acc:0.670686583527477


 50%|█████     | 21892/43738 [2:45:27<2:28:46,  2.45it/s]

step:12300, train_loss:0.0594121186251015, acc:0.6707016261648091


 50%|█████     | 21893/43738 [2:45:27<2:18:58,  2.62it/s]

step:12300, train_loss:0.05941737734869143, acc:0.6706709907276298


 50%|█████     | 21894/43738 [2:45:27<2:22:07,  2.56it/s]

step:12300, train_loss:0.05941879394030373, acc:0.6706403580889742


 50%|█████     | 21895/43738 [2:45:28<2:27:59,  2.46it/s]

step:12300, train_loss:0.05941620275818367, acc:0.6706554007764329


 50%|█████     | 21896/43738 [2:45:28<2:25:30,  2.50it/s]

step:12300, train_loss:0.05941349062852024, acc:0.6706704420898795


 50%|█████     | 21897/43738 [2:45:29<2:30:45,  2.41it/s]

step:12300, train_loss:0.059413541673104024, acc:0.6706398136731059


 50%|█████     | 21898/43738 [2:45:29<2:33:10,  2.38it/s]

step:12300, train_loss:0.05941105970139399, acc:0.6706548543245958


 50%|█████     | 21899/43738 [2:45:30<3:01:54,  2.00it/s]

step:12300, train_loss:0.059411616317347204, acc:0.6706242294168684


 50%|█████     | 21900/43738 [2:45:30<2:55:09,  2.08it/s]

step:12300, train_loss:0.05940986135514712, acc:0.6706392694063927


 50%|█████     | 21901/43738 [2:45:31<3:07:49,  1.94it/s]

step:12300, train_loss:0.05940727046614019, acc:0.6706543080224647


 50%|█████     | 21902/43738 [2:45:31<3:07:20,  1.94it/s]

step:12300, train_loss:0.05940456877551076, acc:0.6706693452652726


 50%|█████     | 21903/43738 [2:45:32<3:07:02,  1.95it/s]

step:12300, train_loss:0.05941731053501052, acc:0.6706387252887732


 51%|█████     | 22208/43738 [2:47:58<3:26:01,  1.74it/s]

step:12320, train_loss:0.05944090370705259, acc:0.6708843659942363


 51%|█████     | 22209/43738 [2:47:59<3:15:02,  1.84it/s]

step:12320, train_loss:0.05944053555630119, acc:0.6708541582241434


 51%|█████     | 22210/43738 [2:47:59<3:19:56,  1.79it/s]

step:12320, train_loss:0.05944164685899424, acc:0.6708239531742458


 51%|█████     | 22211/43738 [2:48:00<3:36:54,  1.65it/s]

step:12320, train_loss:0.05943911636032099, acc:0.6708387735806582


 51%|█████     | 22212/43738 [2:48:00<2:58:17,  2.01it/s]

step:12320, train_loss:0.0594382341489024, acc:0.6708535926526202


 51%|█████     | 22213/43738 [2:48:01<2:55:22,  2.05it/s]

step:12320, train_loss:0.05943659921495481, acc:0.670868410390312


 51%|█████     | 22214/43738 [2:48:01<3:27:38,  1.73it/s]

step:12320, train_loss:0.05944527914829077, acc:0.670838210137751


 51%|█████     | 22215/43738 [2:48:02<3:49:34,  1.56it/s]

step:12320, train_loss:0.05946210428573809, acc:0.6708080126040963


 51%|█████     | 22216/43738 [2:48:03<3:32:36,  1.69it/s]

step:12320, train_loss:0.05946056674140812, acc:0.6708228303925099


 51%|█████     | 22217/43738 [2:48:03<3:08:06,  1.91it/s]

step:12320, train_loss:0.05945793013993549, acc:0.6708376468470091


 51%|█████     | 22218/43738 [2:48:04<3:12:57,  1.86it/s]

step:12320, train_loss:0.05946007996533125, acc:0.6708074534161491


 51%|█████     | 22219/43738 [2:48:04<3:38:51,  1.64it/s]

step:12320, train_loss:0.05946907586829137, acc:0.6707772627030919


 51%|█████     | 22220/43738 [2:48:05<4:04:19,  1.47it/s]

step:12320, train_loss:0.05946869478341671, acc:0.6707920792079208


 51%|█████     | 22221/43738 [2:48:06<3:57:42,  1.51it/s]

step:12320, train_loss:0.0594704409066285, acc:0.6707618919040548


 51%|█████     | 22222/43738 [2:48:06<3:15:22,  1.84it/s]

step:12320, train_loss:0.059469251286264374, acc:0.6707767077670777


 51%|█████     | 22223/43738 [2:48:06<2:43:51,  2.19it/s]

step:12320, train_loss:0.05946663009812028, acc:0.6707915222967196


 52%|█████▏    | 22528/43738 [2:50:29<2:15:17,  2.61it/s]

step:12340, train_loss:0.059482477773098126, acc:0.67041015625


 52%|█████▏    | 22529/43738 [2:50:29<2:18:55,  2.54it/s]

step:12340, train_loss:0.05948029035452392, acc:0.6704247858315948


 52%|█████▏    | 22530/43738 [2:50:30<2:35:19,  2.28it/s]

step:12340, train_loss:0.05947914516575088, acc:0.670439414114514


 52%|█████▏    | 22531/43738 [2:50:30<2:13:32,  2.65it/s]

step:12340, train_loss:0.059476517185254124, acc:0.6704540410989304


 52%|█████▏    | 22532/43738 [2:50:30<1:59:51,  2.95it/s]

step:12340, train_loss:0.05947387757304372, acc:0.6704686667850168


 52%|█████▏    | 22533/43738 [2:50:31<1:49:10,  3.24it/s]

step:12340, train_loss:0.05947306925110554, acc:0.6704832911729464


 52%|█████▏    | 22534/43738 [2:50:31<2:39:22,  2.22it/s]

step:12340, train_loss:0.05947744567062428, acc:0.6704535368776072


 52%|█████▏    | 22535/43738 [2:50:32<2:37:43,  2.24it/s]

step:12340, train_loss:0.05947673607808034, acc:0.6704681606390059


 52%|█████▏    | 22536/43738 [2:50:32<2:23:52,  2.46it/s]

step:12340, train_loss:0.059478448492869394, acc:0.670438409655662


 52%|█████▏    | 22537/43738 [2:50:33<3:04:58,  1.91it/s]

step:12340, train_loss:0.059479751729827675, acc:0.6704086613125083


 52%|█████▏    | 22538/43738 [2:50:34<3:09:14,  1.87it/s]

step:12340, train_loss:0.05947952778095868, acc:0.6703789156091934


 52%|█████▏    | 22539/43738 [2:50:34<2:56:07,  2.01it/s]

step:12340, train_loss:0.05947688944204881, acc:0.670393540086073


 52%|█████▏    | 22540/43738 [2:50:35<3:01:54,  1.94it/s]

step:12340, train_loss:0.05947426215545836, acc:0.6704081632653062


 52%|█████▏    | 22541/43738 [2:50:35<2:39:14,  2.22it/s]

step:12340, train_loss:0.05947252900770618, acc:0.6704227851470653


 52%|█████▏    | 22542/43738 [2:50:35<2:18:30,  2.55it/s]

step:12340, train_loss:0.05946989094170498, acc:0.6704374057315233


 52%|█████▏    | 22543/43738 [2:50:36<3:02:18,  1.94it/s]

step:12340, train_loss:0.05947534283402384, acc:0.6704076653506632


 52%|█████▏    | 22848/43738 [2:53:02<3:51:26,  1.50it/s]

step:12360, train_loss:0.05956604557381763, acc:0.6704744397759104


 52%|█████▏    | 22849/43738 [2:53:03<3:51:43,  1.50it/s]

step:12360, train_loss:0.05956922115047025, acc:0.6704450960654733


 52%|█████▏    | 22850/43738 [2:53:03<3:21:01,  1.73it/s]

step:12360, train_loss:0.05957031674464529, acc:0.6704157549234135


 52%|█████▏    | 22851/43738 [2:53:04<3:53:31,  1.49it/s]

step:12360, train_loss:0.0595762925633085, acc:0.6703864163493939


 52%|█████▏    | 22852/43738 [2:53:05<4:07:35,  1.41it/s]

step:12360, train_loss:0.05957556942641717, acc:0.6704008401890426


 52%|█████▏    | 22853/43738 [2:53:06<3:42:44,  1.56it/s]

step:12360, train_loss:0.059572977000086076, acc:0.6704152627663764


 52%|█████▏    | 22854/43738 [2:53:06<3:24:22,  1.70it/s]

step:12360, train_loss:0.059570431156640236, acc:0.6704296840815612


 52%|█████▏    | 22855/43738 [2:53:07<3:24:12,  1.70it/s]

step:12360, train_loss:0.059569134120605285, acc:0.6704441041347626


 52%|█████▏    | 22856/43738 [2:53:07<3:05:59,  1.87it/s]

step:12360, train_loss:0.059567971424566, acc:0.6704585229261463


 52%|█████▏    | 22857/43738 [2:53:07<2:31:22,  2.30it/s]

step:12360, train_loss:0.059565369348233546, acc:0.6704729404558778


 52%|█████▏    | 22859/43738 [2:53:08<2:05:48,  2.77it/s]

step:12360, train_loss:0.05956311966972473, acc:0.6704873567241229
step:12360, train_loss:0.059560514889319786, acc:0.6705017717310469


 52%|█████▏    | 22860/43738 [2:53:08<2:21:10,  2.46it/s]

step:12360, train_loss:0.05956111656672897, acc:0.6705161854768154


 52%|█████▏    | 22861/43738 [2:53:09<2:44:40,  2.11it/s]

step:12360, train_loss:0.05955854281538571, acc:0.670530597961594


 52%|█████▏    | 22862/43738 [2:53:10<2:55:04,  1.99it/s]

step:12360, train_loss:0.059556909703553484, acc:0.670545009185548


 52%|█████▏    | 22863/43738 [2:53:10<2:28:28,  2.34it/s]

step:12360, train_loss:0.05955430961448171, acc:0.6705594191488431


 53%|█████▎    | 23168/43738 [2:55:30<3:08:43,  1.82it/s]

step:12380, train_loss:0.05958734064567695, acc:0.6698031767955801


 53%|█████▎    | 23169/43738 [2:55:31<2:54:35,  1.96it/s]

step:12380, train_loss:0.05958727284745834, acc:0.6698174284604428


 53%|█████▎    | 23170/43738 [2:55:32<3:27:06,  1.66it/s]

step:12380, train_loss:0.05958623980005872, acc:0.669831678895123


 53%|█████▎    | 23171/43738 [2:55:32<3:06:53,  1.83it/s]

step:12380, train_loss:0.05958388639816219, acc:0.6698459280997799


 53%|█████▎    | 23172/43738 [2:55:32<2:40:23,  2.14it/s]

step:12380, train_loss:0.059583429132810506, acc:0.6698601760745727


 53%|█████▎    | 23173/43738 [2:55:33<3:05:54,  1.84it/s]

step:12380, train_loss:0.05958094851275761, acc:0.6698744228196608


 53%|█████▎    | 23174/43738 [2:55:33<2:44:00,  2.09it/s]

step:12380, train_loss:0.05958001457359289, acc:0.6698886683352032


 53%|█████▎    | 23175/43738 [2:55:34<2:20:45,  2.43it/s]

step:12380, train_loss:0.05958236187517125, acc:0.6698597626752967


 53%|█████▎    | 23176/43738 [2:55:34<2:05:09,  2.74it/s]

step:12380, train_loss:0.059579860698454834, acc:0.6698740075940628


 53%|█████▎    | 23177/43738 [2:55:35<2:45:36,  2.07it/s]

step:12380, train_loss:0.0595828279777554, acc:0.6698451050610519


 53%|█████▎    | 23178/43738 [2:55:35<2:33:18,  2.24it/s]

step:12380, train_loss:0.05958037750997254, acc:0.6698593493830356


 53%|█████▎    | 23179/43738 [2:55:36<2:43:47,  2.09it/s]

step:12380, train_loss:0.05958329122188781, acc:0.6698304499762716


 53%|█████▎    | 23180/43738 [2:55:37<3:23:15,  1.69it/s]

step:12380, train_loss:0.05958850620596427, acc:0.6698015530629853


 53%|█████▎    | 23181/43738 [2:55:37<3:39:28,  1.56it/s]

step:12380, train_loss:0.05959133203144706, acc:0.6698157974203011


 53%|█████▎    | 23182/43738 [2:55:38<3:32:17,  1.61it/s]

step:12380, train_loss:0.05958880170436123, acc:0.6698300405487015


 53%|█████▎    | 23183/43738 [2:55:39<3:38:52,  1.57it/s]

step:12380, train_loss:0.05958923972201836, acc:0.6698442824483458


 54%|█████▎    | 23488/43738 [2:58:02<3:30:56,  1.60it/s]

step:12400, train_loss:0.059501388040826234, acc:0.6702145776566758


 54%|█████▎    | 23489/43738 [2:58:03<3:07:26,  1.80it/s]

step:12400, train_loss:0.05949886040792894, acc:0.6702286176508153


 54%|█████▎    | 23490/43738 [2:58:03<3:15:15,  1.73it/s]

step:12400, train_loss:0.05949771976880567, acc:0.670242656449553


 54%|█████▎    | 23491/43738 [2:58:03<2:42:59,  2.07it/s]

step:12400, train_loss:0.059495744481392374, acc:0.6702566940530416


 54%|█████▎    | 23492/43738 [2:58:04<2:34:42,  2.18it/s]

step:12400, train_loss:0.059493216078199546, acc:0.6702707304614337


 54%|█████▎    | 23493/43738 [2:58:04<2:39:00,  2.12it/s]

step:12400, train_loss:0.05949069244185382, acc:0.6702847656748819


 54%|█████▎    | 23494/43738 [2:58:05<2:29:32,  2.26it/s]

step:12400, train_loss:0.05948822109694644, acc:0.6702987996935388


 54%|█████▎    | 23495/43738 [2:58:05<2:37:04,  2.15it/s]

step:12400, train_loss:0.05948905148456707, acc:0.6702702702702703


 54%|█████▎    | 23496/43738 [2:58:06<2:31:39,  2.22it/s]

step:12400, train_loss:0.05949145824491614, acc:0.6702417432754512


 54%|█████▎    | 23497/43738 [2:58:06<2:31:44,  2.22it/s]

step:12400, train_loss:0.05948964999329829, acc:0.6702557773332766


 54%|█████▎    | 23498/43738 [2:58:07<3:05:44,  1.82it/s]

step:12400, train_loss:0.05948918825399381, acc:0.6702698101966125


 54%|█████▎    | 23499/43738 [2:58:07<3:03:51,  1.83it/s]

step:12400, train_loss:0.0594873232398751, acc:0.6702838418656113


 54%|█████▎    | 23500/43738 [2:58:08<3:28:35,  1.62it/s]

step:12400, train_loss:0.05948508032807149, acc:0.6702978723404255


 54%|█████▎    | 23501/43738 [2:58:09<3:23:02,  1.66it/s]

step:12400, train_loss:0.05948272337526644, acc:0.6703119016212076


 54%|█████▎    | 23502/43738 [2:58:09<3:21:18,  1.68it/s]

step:12400, train_loss:0.05948311978661004, acc:0.6702833801378606


 54%|█████▎    | 23503/43738 [2:58:10<3:13:41,  1.74it/s]

step:12400, train_loss:0.05948087472718712, acc:0.6702974088414245


 54%|█████▍    | 23808/43738 [3:00:32<2:42:32,  2.04it/s]

step:12420, train_loss:0.05940000491226226, acc:0.6705309139784946


 54%|█████▍    | 23809/43738 [3:00:33<3:21:46,  1.65it/s]

step:12420, train_loss:0.05939756755766405, acc:0.6705447519845437


 54%|█████▍    | 23810/43738 [3:00:34<3:02:54,  1.82it/s]

step:12420, train_loss:0.05940442867981425, acc:0.6705165896682066


 54%|█████▍    | 23811/43738 [3:00:34<2:50:45,  1.94it/s]

step:12420, train_loss:0.059401941797527474, acc:0.670530427113519


 54%|█████▍    | 23812/43738 [3:00:35<3:04:05,  1.80it/s]

step:12420, train_loss:0.059399840345291714, acc:0.6705442633966068


 54%|█████▍    | 23813/43738 [3:00:35<3:04:26,  1.80it/s]

step:12420, train_loss:0.05939778135948942, acc:0.6705580985176164


 54%|█████▍    | 23814/43738 [3:00:36<2:48:59,  1.97it/s]

step:12420, train_loss:0.05939555460162488, acc:0.6705719324766943


 54%|█████▍    | 23815/43738 [3:00:36<2:47:56,  1.98it/s]

step:12420, train_loss:0.059393188238288416, acc:0.670585765273987


 54%|█████▍    | 23816/43738 [3:00:37<2:53:18,  1.92it/s]

step:12420, train_loss:0.05939593443958674, acc:0.6705576083305341


 54%|█████▍    | 23817/43738 [3:00:37<2:44:15,  2.02it/s]

step:12420, train_loss:0.05939528024395766, acc:0.6705714405676617


 54%|█████▍    | 23818/43738 [3:00:38<3:03:50,  1.81it/s]

step:12420, train_loss:0.059392788171592346, acc:0.670585271643295


 54%|█████▍    | 23819/43738 [3:00:39<3:17:24,  1.68it/s]

step:12420, train_loss:0.05939163187892962, acc:0.6705991015575801


 54%|█████▍    | 23820/43738 [3:00:39<2:41:49,  2.05it/s]

step:12420, train_loss:0.05938916802012884, acc:0.6706129303106633


 54%|█████▍    | 23821/43738 [3:00:39<2:51:13,  1.94it/s]

step:12420, train_loss:0.059387558378443374, acc:0.6706267579026909


 54%|█████▍    | 23822/43738 [3:00:40<3:03:55,  1.80it/s]

step:12420, train_loss:0.05938713016642354, acc:0.6706405843338091


 54%|█████▍    | 23823/43738 [3:00:40<2:45:45,  2.00it/s]

step:12420, train_loss:0.05938688754211682, acc:0.670654409604164


 55%|█████▌    | 24128/43738 [3:03:05<3:17:34,  1.65it/s]

step:12440, train_loss:0.05936586868450281, acc:0.6709631962864722


 55%|█████▌    | 24129/43738 [3:03:06<3:26:59,  1.58it/s]

step:12440, train_loss:0.059371771576110795, acc:0.6709353889510548


 55%|█████▌    | 24130/43738 [3:03:06<3:08:10,  1.74it/s]

step:12440, train_loss:0.0593693902173139, acc:0.6709490261085785


 55%|█████▌    | 24131/43738 [3:03:07<3:18:44,  1.64it/s]

step:12440, train_loss:0.05936787117059169, acc:0.6709626621358419


 55%|█████▌    | 24132/43738 [3:03:07<2:49:27,  1.93it/s]

step:12440, train_loss:0.05936544128568812, acc:0.6709762970329852


 55%|█████▌    | 24133/43738 [3:03:07<2:23:25,  2.28it/s]

step:12440, train_loss:0.05936372761714051, acc:0.6709899308001491


 55%|█████▌    | 24134/43738 [3:03:08<2:19:54,  2.34it/s]

step:12440, train_loss:0.05936813223261817, acc:0.6709621281180078


 55%|█████▌    | 24135/43738 [3:03:08<2:25:08,  2.25it/s]

step:12440, train_loss:0.059368265058295594, acc:0.6709757613424487


 55%|█████▌    | 24136/43738 [3:03:09<2:26:12,  2.23it/s]

step:12440, train_loss:0.059369951373659455, acc:0.6709479615512098


 55%|█████▌    | 24137/43738 [3:03:09<2:21:19,  2.31it/s]

step:12440, train_loss:0.059372434345319476, acc:0.670920164063471


 55%|█████▌    | 24138/43738 [3:03:10<2:31:14,  2.16it/s]

step:12440, train_loss:0.05937841492270889, acc:0.6708923688789461


 55%|█████▌    | 24139/43738 [3:03:10<2:10:09,  2.51it/s]

step:12440, train_loss:0.05938936557799378, acc:0.6708645759973487


 55%|█████▌    | 24140/43738 [3:03:11<2:35:49,  2.10it/s]

step:12440, train_loss:0.05939424729231547, acc:0.6708367854183928


 55%|█████▌    | 24141/43738 [3:03:11<2:15:42,  2.41it/s]

step:12440, train_loss:0.05939700256926228, acc:0.670808997141792


 55%|█████▌    | 24142/43738 [3:03:11<2:14:51,  2.42it/s]

step:12440, train_loss:0.05939603839749233, acc:0.6708226327561926


 55%|█████▌    | 24143/43738 [3:03:12<2:05:20,  2.61it/s]

step:12440, train_loss:0.059397270102453326, acc:0.6707948473677671


 56%|█████▌    | 24448/43738 [3:05:35<2:43:22,  1.97it/s]

step:12460, train_loss:0.0593611583397966, acc:0.6712205497382199


 56%|█████▌    | 24449/43738 [3:05:36<2:47:03,  1.92it/s]

step:12460, train_loss:0.05936018100280806, acc:0.671233997300503


 56%|█████▌    | 24450/43738 [3:05:36<2:34:30,  2.08it/s]

step:12460, train_loss:0.05935870676583292, acc:0.6712474437627812


 56%|█████▌    | 24451/43738 [3:05:37<2:24:14,  2.23it/s]

step:12460, train_loss:0.05935668800898653, acc:0.6712608891251891


 56%|█████▌    | 24452/43738 [3:05:37<2:05:53,  2.55it/s]

step:12460, train_loss:0.059356729927550166, acc:0.6712334369376738


 56%|█████▌    | 24453/43738 [3:05:37<2:05:03,  2.57it/s]

step:12460, train_loss:0.05935590273838488, acc:0.6712468817731976


 56%|█████▌    | 24454/43738 [3:05:38<2:32:30,  2.11it/s]

step:12460, train_loss:0.05936223907042238, acc:0.6712194324036967


 56%|█████▌    | 24455/43738 [3:05:38<2:37:09,  2.05it/s]

step:12460, train_loss:0.059360379479582524, acc:0.6712328767123288


 56%|█████▌    | 24456/43738 [3:05:39<2:12:46,  2.42it/s]

step:12460, train_loss:0.059361014579043324, acc:0.6712054301602879


 56%|█████▌    | 24457/43738 [3:05:39<1:55:36,  2.78it/s]

step:12460, train_loss:0.05935983328974602, acc:0.6712188739420207


 56%|█████▌    | 24458/43738 [3:05:39<1:57:53,  2.73it/s]

step:12460, train_loss:0.05935793396928024, acc:0.6712323166244174


 56%|█████▌    | 24459/43738 [3:05:40<1:59:33,  2.69it/s]

step:12460, train_loss:0.05935841543353073, acc:0.6712048734617114


 56%|█████▌    | 24460/43738 [3:05:40<1:45:23,  3.05it/s]

step:12460, train_loss:0.05935831255470239, acc:0.6711774325429273


 56%|█████▌    | 24461/43738 [3:05:40<1:42:19,  3.14it/s]

step:12460, train_loss:0.059356000261684104, acc:0.6711908752708393


 56%|█████▌    | 24462/43738 [3:05:41<1:46:36,  3.01it/s]

step:12460, train_loss:0.05935372363219124, acc:0.6712043168996812


 56%|█████▌    | 24463/43738 [3:05:41<1:49:54,  2.92it/s]

step:12460, train_loss:0.05935231455543237, acc:0.6712177574295876


 57%|█████▋    | 24768/43738 [3:08:11<3:02:25,  1.73it/s]

step:12480, train_loss:0.0593500616014729, acc:0.671390503875969


 57%|█████▋    | 24769/43738 [3:08:11<2:34:45,  2.04it/s]

step:12480, train_loss:0.05934767084421962, acc:0.6714037708425855


 57%|█████▋    | 24770/43738 [3:08:12<2:50:07,  1.86it/s]

step:12480, train_loss:0.059345291781028285, acc:0.6714170367379895


 57%|█████▋    | 24771/43738 [3:08:12<2:25:46,  2.17it/s]

step:12480, train_loss:0.05934388470132125, acc:0.6714303015623108


 57%|█████▋    | 24772/43738 [3:08:13<2:38:28,  1.99it/s]

step:12480, train_loss:0.05934259137882745, acc:0.671443565315679


 57%|█████▋    | 24773/43738 [3:08:13<2:37:00,  2.01it/s]

step:12480, train_loss:0.059344895561693194, acc:0.671416461470149


 57%|█████▋    | 24774/43738 [3:08:14<2:46:17,  1.90it/s]

step:12480, train_loss:0.05935160110195896, acc:0.6713893598127069


 57%|█████▋    | 24775/43738 [3:08:14<2:53:01,  1.83it/s]

step:12480, train_loss:0.05935092645074221, acc:0.6714026236125126


 57%|█████▋    | 24776/43738 [3:08:15<2:45:30,  1.91it/s]

step:12480, train_loss:0.059351399326996956, acc:0.671415886341621


 57%|█████▋    | 24777/43738 [3:08:15<2:26:23,  2.16it/s]

step:12480, train_loss:0.05935248509433943, acc:0.6713887879888606


 57%|█████▋    | 24778/43738 [3:08:16<2:47:45,  1.88it/s]

step:12480, train_loss:0.05935033655362182, acc:0.6714020502058278


 57%|█████▋    | 24779/43738 [3:08:16<2:21:02,  2.24it/s]

step:12480, train_loss:0.059348219957654, acc:0.6714153113523548


 57%|█████▋    | 24780/43738 [3:08:17<2:48:25,  1.88it/s]

step:12480, train_loss:0.05935100630304195, acc:0.6713882163034706


 57%|█████▋    | 24781/43738 [3:08:17<2:37:26,  2.01it/s]

step:12480, train_loss:0.05935039339658649, acc:0.6714014769379767


 57%|█████▋    | 24782/43738 [3:08:18<2:51:14,  1.85it/s]

step:12480, train_loss:0.0593516548326274, acc:0.6713743846340086


 57%|█████▋    | 24783/43738 [3:08:18<2:42:14,  1.95it/s]

step:12480, train_loss:0.05935554300324668, acc:0.6713472945164024


 57%|█████▋    | 25088/43738 [3:10:46<2:35:27,  2.00it/s]

step:12500, train_loss:0.05928046498049335, acc:0.6717554209183674


 57%|█████▋    | 25089/43738 [3:10:47<2:22:35,  2.18it/s]

step:12500, train_loss:0.059281781672812374, acc:0.6717286460201682


 57%|█████▋    | 25090/43738 [3:10:47<2:08:15,  2.42it/s]

step:12500, train_loss:0.05927948974532841, acc:0.6717417297728179


 57%|█████▋    | 25091/43738 [3:10:47<1:48:33,  2.86it/s]

step:12500, train_loss:0.05927949661466495, acc:0.6717548124825635


 57%|█████▋    | 25092/43738 [3:10:48<1:39:25,  3.13it/s]

step:12500, train_loss:0.05927799910041697, acc:0.6717678941495298


 57%|█████▋    | 25093/43738 [3:10:48<1:45:48,  2.94it/s]

step:12500, train_loss:0.059275644557099307, acc:0.6717809747738414


 57%|█████▋    | 25094/43738 [3:10:48<1:41:39,  3.06it/s]

step:12500, train_loss:0.05927341254903451, acc:0.6717940543556229


 57%|█████▋    | 25095/43738 [3:10:49<1:47:27,  2.89it/s]

step:12500, train_loss:0.05928501275810223, acc:0.6717672843195855


 57%|█████▋    | 25096/43738 [3:10:49<2:06:39,  2.45it/s]

step:12500, train_loss:0.05928669386104466, acc:0.6717405164169589


 57%|█████▋    | 25097/43738 [3:10:49<1:56:12,  2.67it/s]

step:12500, train_loss:0.059284363690894036, acc:0.6717535960473363


 57%|█████▋    | 25098/43738 [3:10:50<2:32:47,  2.03it/s]

step:12500, train_loss:0.05928755106322565, acc:0.6717268308231732


 57%|█████▋    | 25099/43738 [3:10:51<2:28:16,  2.10it/s]

step:12500, train_loss:0.05928708628687033, acc:0.671739909956572


 57%|█████▋    | 25100/43738 [3:10:51<2:07:54,  2.43it/s]

step:12500, train_loss:0.05928503166192185, acc:0.6717529880478088


 57%|█████▋    | 25101/43738 [3:10:51<2:04:28,  2.50it/s]

step:12500, train_loss:0.059287626415215504, acc:0.671726226046771


 57%|█████▋    | 25102/43738 [3:10:52<2:21:24,  2.20it/s]

step:12500, train_loss:0.05928570712329904, acc:0.6717393036411441


 57%|█████▋    | 25103/43738 [3:10:52<2:16:52,  2.27it/s]

step:12500, train_loss:0.05928375986958064, acc:0.6717523801936024


 58%|█████▊    | 25408/43738 [3:13:14<3:00:08,  1.70it/s]

step:12520, train_loss:0.05929766752264969, acc:0.6717962846347607


 58%|█████▊    | 25409/43738 [3:13:15<3:14:55,  1.57it/s]

step:12520, train_loss:0.05929654381059276, acc:0.6718092014640482


 58%|█████▊    | 25410/43738 [3:13:15<3:01:17,  1.69it/s]

step:12520, train_loss:0.059298806515447534, acc:0.6717827626918536


 58%|█████▊    | 25411/43738 [3:13:16<2:41:10,  1.90it/s]

step:12520, train_loss:0.05930332702406517, acc:0.6717563260005509


 58%|█████▊    | 25412/43738 [3:13:16<2:29:39,  2.04it/s]

step:12520, train_loss:0.05930329902125912, acc:0.6717298913898946


 58%|█████▊    | 25413/43738 [3:13:17<2:25:23,  2.10it/s]

step:12520, train_loss:0.059303298973747115, acc:0.6717428087986463


 58%|█████▊    | 25414/43738 [3:13:17<2:37:26,  1.94it/s]

step:12520, train_loss:0.05930150206572333, acc:0.6717557251908397


 58%|█████▊    | 25415/43738 [3:13:18<2:32:57,  2.00it/s]

step:12520, train_loss:0.059300366619850296, acc:0.6717686405665946


 58%|█████▊    | 25416/43738 [3:13:19<2:58:12,  1.71it/s]

step:12520, train_loss:0.059303789387591115, acc:0.671742209631728


 58%|█████▊    | 25417/43738 [3:13:19<2:41:10,  1.89it/s]

step:12520, train_loss:0.059301501930112986, acc:0.671755124522957


 58%|█████▊    | 25418/43738 [3:13:19<2:41:34,  1.89it/s]

step:12520, train_loss:0.0592993183733335, acc:0.6717680383979857


 58%|█████▊    | 25419/43738 [3:13:20<2:16:36,  2.23it/s]

step:12520, train_loss:0.05930136761222426, acc:0.6717416106062394


 58%|█████▊    | 25420/43738 [3:13:20<2:14:07,  2.28it/s]

step:12520, train_loss:0.05929903657503322, acc:0.6717545239968529


 58%|█████▊    | 25421/43738 [3:13:20<2:03:35,  2.47it/s]

step:12520, train_loss:0.05929700684181072, acc:0.6717674363715038


 58%|█████▊    | 25422/43738 [3:13:21<2:18:37,  2.20it/s]

step:12520, train_loss:0.05929563633428667, acc:0.6717803477303124


 58%|█████▊    | 25423/43738 [3:13:21<2:04:32,  2.45it/s]

step:12520, train_loss:0.05929365525307264, acc:0.6717932580733981


 59%|█████▉    | 25728/43738 [3:15:48<3:23:23,  1.48it/s]

step:12540, train_loss:0.05935314294234486, acc:0.6712531094527363


 59%|█████▉    | 25729/43738 [3:15:49<3:41:03,  1.36it/s]

step:12540, train_loss:0.059351698822970644, acc:0.6712658867425861


 59%|█████▉    | 25730/43738 [3:15:49<3:38:34,  1.37it/s]

step:12540, train_loss:0.05934941818832872, acc:0.6712786630392538


 59%|█████▉    | 25731/43738 [3:15:50<3:14:03,  1.55it/s]

step:12540, train_loss:0.059347117230347746, acc:0.6712914383428549


 59%|█████▉    | 25732/43738 [3:15:50<2:51:13,  1.75it/s]

step:12540, train_loss:0.059352798080535445, acc:0.6712653505362972


 59%|█████▉    | 25733/43738 [3:15:51<2:35:45,  1.93it/s]

step:12540, train_loss:0.05935049173433015, acc:0.6712781253643182


 59%|█████▉    | 25734/43738 [3:15:51<2:36:38,  1.92it/s]

step:12540, train_loss:0.05934819951252673, acc:0.6712908991995026


 59%|█████▉    | 25735/43738 [3:15:52<2:35:18,  1.93it/s]

step:12540, train_loss:0.05935602972951826, acc:0.6712648144550224


 59%|█████▉    | 25736/43738 [3:15:52<2:24:01,  2.08it/s]

step:12540, train_loss:0.05935419091156277, acc:0.6712775878147342


 59%|█████▉    | 25737/43738 [3:15:52<2:20:14,  2.14it/s]

step:12540, train_loss:0.059351885219969765, acc:0.6712903601818394


 59%|█████▉    | 25738/43738 [3:15:53<2:41:39,  1.86it/s]

step:12540, train_loss:0.05934963648804208, acc:0.6713031315564535


 59%|█████▉    | 25739/43738 [3:15:54<2:46:01,  1.81it/s]

step:12540, train_loss:0.05935059336693687, acc:0.6713159019386923


 59%|█████▉    | 25740/43738 [3:15:54<2:51:01,  1.75it/s]

step:12540, train_loss:0.059348303655377015, acc:0.6713286713286714


 59%|█████▉    | 25741/43738 [3:15:55<2:34:59,  1.94it/s]

step:12540, train_loss:0.059346083343435475, acc:0.6713414397265064


 59%|█████▉    | 25742/43738 [3:15:56<3:01:45,  1.65it/s]

step:12540, train_loss:0.05934447537561944, acc:0.671354207132313


 59%|█████▉    | 25743/43738 [3:15:56<2:59:28,  1.67it/s]

step:12540, train_loss:0.05934377557480694, acc:0.6713669735462068


 60%|█████▉    | 26048/43738 [3:18:24<2:36:12,  1.89it/s]

step:12560, train_loss:0.05941302650805306, acc:0.6713759213759214


 60%|█████▉    | 26049/43738 [3:18:24<2:30:26,  1.96it/s]

step:12560, train_loss:0.059410918827660135, acc:0.6713885369879842


 60%|█████▉    | 26050/43738 [3:18:25<2:40:05,  1.84it/s]

step:12560, train_loss:0.05940877297048541, acc:0.671401151631478


 60%|█████▉    | 26051/43738 [3:18:25<2:44:54,  1.79it/s]

step:12560, train_loss:0.059409004575253485, acc:0.6714137653065142


 60%|█████▉    | 26052/43738 [3:18:26<2:30:46,  1.96it/s]

step:12560, train_loss:0.05941171485074824, acc:0.6713879932442807


 60%|█████▉    | 26053/43738 [3:18:26<2:32:28,  1.93it/s]

step:12560, train_loss:0.0594127735909245, acc:0.6713622231604806


 60%|█████▉    | 26054/43738 [3:18:27<2:09:25,  2.28it/s]

step:12560, train_loss:0.05941318759330338, acc:0.671336455054886


 60%|█████▉    | 26055/43738 [3:18:27<2:10:12,  2.26it/s]

step:12560, train_loss:0.059413117309621184, acc:0.6713490692765304


 60%|█████▉    | 26056/43738 [3:18:28<2:11:31,  2.24it/s]

step:12560, train_loss:0.059413272723648713, acc:0.671323303653669


 60%|█████▉    | 26057/43738 [3:18:28<2:09:43,  2.27it/s]

step:12560, train_loss:0.05941500861815006, acc:0.671297540008443


 60%|█████▉    | 26058/43738 [3:18:28<2:04:21,  2.37it/s]

step:12560, train_loss:0.05941446667820121, acc:0.6713101542712411


 60%|█████▉    | 26059/43738 [3:18:29<1:49:56,  2.68it/s]

step:12560, train_loss:0.05941277904071776, acc:0.6713227675659081


 60%|█████▉    | 26060/43738 [3:18:29<2:27:02,  2.00it/s]

step:12560, train_loss:0.05941063046017776, acc:0.6713353798925557


 60%|█████▉    | 26061/43738 [3:18:30<2:45:47,  1.78it/s]

step:12560, train_loss:0.05941986962843945, acc:0.6713096197383063


 60%|█████▉    | 26062/43738 [3:18:30<2:29:07,  1.98it/s]

step:12560, train_loss:0.05942075630730792, acc:0.6712838615608933


 60%|█████▉    | 26063/43738 [3:18:31<2:06:44,  2.32it/s]

step:12560, train_loss:0.05941851844399393, acc:0.6712964739285577


 60%|██████    | 26368/43738 [3:20:58<2:05:53,  2.30it/s]

step:12580, train_loss:0.05937575655286654, acc:0.6711544296116505


 60%|██████    | 26369/43738 [3:20:58<2:02:05,  2.37it/s]

step:12580, train_loss:0.05937865836621372, acc:0.6711289772080853


 60%|██████    | 26370/43738 [3:20:59<2:01:20,  2.39it/s]

step:12580, train_loss:0.05937640901626175, acc:0.6711414486158513


 60%|██████    | 26371/43738 [3:20:59<1:56:01,  2.49it/s]

step:12580, train_loss:0.059375951640368435, acc:0.6711539190777748


 60%|██████    | 26372/43738 [3:21:00<2:02:37,  2.36it/s]

step:12580, train_loss:0.05937443265386315, acc:0.6711663885939633


 60%|██████    | 26373/43738 [3:21:00<1:57:03,  2.47it/s]

step:12580, train_loss:0.05937613006256206, acc:0.6711409395973155


 60%|██████    | 26374/43738 [3:21:00<1:59:40,  2.42it/s]

step:12580, train_loss:0.059376321299514805, acc:0.6711154925305225


 60%|██████    | 26375/43738 [3:21:01<2:35:10,  1.86it/s]

step:12580, train_loss:0.05937431774532943, acc:0.671127962085308


 60%|██████    | 26376/43738 [3:21:02<2:27:46,  1.96it/s]

step:12580, train_loss:0.05937207198564551, acc:0.6711404306945709


 60%|██████    | 26377/43738 [3:21:02<2:25:00,  2.00it/s]

step:12580, train_loss:0.05938356995691365, acc:0.671114986541305


 60%|██████    | 26378/43738 [3:21:02<2:13:16,  2.17it/s]

step:12580, train_loss:0.059381381674719534, acc:0.6711274546970961


 60%|██████    | 26379/43738 [3:21:03<2:35:53,  1.86it/s]

step:12580, train_loss:0.05938087831182972, acc:0.671139921907578


 60%|██████    | 26380/43738 [3:21:04<2:54:17,  1.66it/s]

step:12580, train_loss:0.059378829411606264, acc:0.6711523881728583


 60%|██████    | 26381/43738 [3:21:04<2:36:50,  1.84it/s]

step:12580, train_loss:0.05937679591864391, acc:0.6711648534930442


 60%|██████    | 26382/43738 [3:21:05<2:55:53,  1.64it/s]

step:12580, train_loss:0.059376541553234276, acc:0.6711773178682435


 60%|██████    | 26383/43738 [3:21:06<3:00:26,  1.60it/s]

step:12580, train_loss:0.0593746426312994, acc:0.6711897812985634


 61%|██████    | 26688/43738 [3:23:44<2:02:34,  2.32it/s]

step:12600, train_loss:0.05942372444931986, acc:0.6713129496402878


 61%|██████    | 26689/43738 [3:23:45<2:25:40,  1.95it/s]

step:12600, train_loss:0.05942519338051916, acc:0.671287796470456


 61%|██████    | 26690/43738 [3:23:46<2:30:04,  1.89it/s]

step:12600, train_loss:0.05943183686631167, acc:0.6712626451854627


 61%|██████    | 26691/43738 [3:23:46<2:23:14,  1.98it/s]

step:12600, train_loss:0.05943414013716301, acc:0.6712374957850961


 61%|██████    | 26692/43738 [3:23:47<2:31:50,  1.87it/s]

step:12600, train_loss:0.05944447645969488, acc:0.6712123482691443


 61%|██████    | 26694/43738 [3:23:47<1:51:51,  2.54it/s]

step:12600, train_loss:0.05944225057283281, acc:0.6712246656426779
step:12600, train_loss:0.05944226395121661, acc:0.6711995204914962


 61%|██████    | 26695/43738 [3:23:48<1:51:17,  2.55it/s]

step:12600, train_loss:0.05944039533287182, acc:0.6712118374227384


 61%|██████    | 26696/43738 [3:23:48<2:02:15,  2.32it/s]

step:12600, train_loss:0.05944208694265, acc:0.6711866946359005


 61%|██████    | 26697/43738 [3:23:49<2:05:08,  2.27it/s]

step:12600, train_loss:0.059441908646168634, acc:0.6711990111248455


 61%|██████    | 26698/43738 [3:23:49<2:05:49,  2.26it/s]

step:12600, train_loss:0.05944751762956889, acc:0.6711738707019252


 61%|██████    | 26699/43738 [3:23:49<1:48:03,  2.63it/s]

step:12600, train_loss:0.05944590306141935, acc:0.6711861867485673


 61%|██████    | 26700/43738 [3:23:50<2:04:56,  2.27it/s]

step:12600, train_loss:0.0594439478875103, acc:0.6711985018726592


 61%|██████    | 26701/43738 [3:23:50<2:25:58,  1.95it/s]

step:12600, train_loss:0.05944886872919876, acc:0.6711733642934722


 61%|██████    | 26702/43738 [3:23:51<2:19:47,  2.03it/s]

step:12600, train_loss:0.05944800622750682, acc:0.6711856789753576


 61%|██████    | 26703/43738 [3:23:51<2:14:23,  2.11it/s]

step:12600, train_loss:0.059447268862122066, acc:0.6711979927348987


 62%|██████▏   | 27008/43738 [3:26:14<2:08:42,  2.17it/s]

step:12620, train_loss:0.0595067460088284, acc:0.6712085308056872


 62%|██████▏   | 27009/43738 [3:26:14<2:06:52,  2.20it/s]

step:12620, train_loss:0.05950737320922143, acc:0.671183679514236


 62%|██████▏   | 27010/43738 [3:26:15<2:19:48,  1.99it/s]

step:12620, train_loss:0.05950860611606328, acc:0.6711588300629396


 62%|██████▏   | 27011/43738 [3:26:15<2:29:13,  1.87it/s]

step:12620, train_loss:0.05950740653702947, acc:0.6711710044056125


 62%|██████▏   | 27012/43738 [3:26:16<2:15:25,  2.06it/s]

step:12620, train_loss:0.059505404832882344, acc:0.6711831778468829


 62%|██████▏   | 27013/43738 [3:26:16<1:57:37,  2.37it/s]

step:12620, train_loss:0.05950703042411696, acc:0.6711583311738792


 62%|██████▏   | 27014/43738 [3:26:17<2:11:04,  2.13it/s]

step:12620, train_loss:0.05951496073604685, acc:0.671133486340416


 62%|██████▏   | 27015/43738 [3:26:17<2:38:40,  1.76it/s]

step:12620, train_loss:0.059513005638938424, acc:0.6711456598186193


 62%|██████▏   | 27016/43738 [3:26:18<2:28:19,  1.88it/s]

step:12620, train_loss:0.0595136949005434, acc:0.6711578323956174


 62%|██████▏   | 27017/43738 [3:26:18<2:11:19,  2.12it/s]

step:12620, train_loss:0.05951712887197829, acc:0.6711329903394159


 62%|██████▏   | 27018/43738 [3:26:18<2:00:17,  2.32it/s]

step:12620, train_loss:0.0595178626588041, acc:0.6711081501221408


 62%|██████▏   | 27019/43738 [3:26:19<1:52:21,  2.48it/s]

step:12620, train_loss:0.059517738038598576, acc:0.6710833117435878


 62%|██████▏   | 27020/43738 [3:26:19<1:49:15,  2.55it/s]

step:12620, train_loss:0.0595177234874726, acc:0.6710584752035529


 62%|██████▏   | 27021/43738 [3:26:20<2:05:57,  2.21it/s]

step:12620, train_loss:0.059515566825052255, acc:0.6710706487546723


 62%|██████▏   | 27022/43738 [3:26:20<2:07:24,  2.19it/s]

step:12620, train_loss:0.059521820014420905, acc:0.671045814521501


 62%|██████▏   | 27023/43738 [3:26:20<1:48:40,  2.56it/s]

step:12620, train_loss:0.059520627048219976, acc:0.6710579876401583


 62%|██████▏   | 27328/43738 [3:28:45<2:45:09,  1.66it/s]

step:12640, train_loss:0.059422768469884146, acc:0.6718018149882904


 62%|██████▏   | 27329/43738 [3:28:45<2:35:43,  1.76it/s]

step:12640, train_loss:0.05942493276418226, acc:0.6717772329759596


 62%|██████▏   | 27330/43738 [3:28:45<2:18:02,  1.98it/s]

step:12640, train_loss:0.05942297866105375, acc:0.6717892425905598


 62%|██████▏   | 27331/43738 [3:28:46<2:14:44,  2.03it/s]

step:12640, train_loss:0.05943397620620029, acc:0.6717646628370715


 62%|██████▏   | 27332/43738 [3:28:47<2:35:21,  1.76it/s]

step:12640, train_loss:0.05943289661217806, acc:0.6717766720327821


 62%|██████▏   | 27333/43738 [3:28:47<2:13:04,  2.05it/s]

step:12640, train_loss:0.059432326632444556, acc:0.6717886803497604


 62%|██████▏   | 27334/43738 [3:28:47<2:00:38,  2.27it/s]

step:12640, train_loss:0.059430745692414695, acc:0.6718006877881028


 62%|██████▏   | 27335/43738 [3:28:48<1:49:29,  2.50it/s]

step:12640, train_loss:0.0594333695806614, acc:0.671776111212731


 62%|██████▏   | 27336/43738 [3:28:48<1:56:05,  2.35it/s]

step:12640, train_loss:0.05943543118110627, acc:0.6717515364354697


 63%|██████▎   | 27337/43738 [3:28:49<2:00:16,  2.27it/s]

step:12640, train_loss:0.059434892852051134, acc:0.6717635439148407


 63%|██████▎   | 27338/43738 [3:28:49<1:46:33,  2.57it/s]

step:12640, train_loss:0.059432877949858014, acc:0.6717755505157657


 63%|██████▎   | 27339/43738 [3:28:49<1:36:30,  2.83it/s]

step:12640, train_loss:0.059430706409930686, acc:0.6717875562383409


 63%|██████▎   | 27340/43738 [3:28:49<1:30:29,  3.02it/s]

step:12640, train_loss:0.059430540107906306, acc:0.6717995610826628


 63%|██████▎   | 27341/43738 [3:28:50<1:25:41,  3.19it/s]

step:12640, train_loss:0.059428477514346004, acc:0.6718115650488278


 63%|██████▎   | 27342/43738 [3:28:50<1:44:45,  2.61it/s]

step:12640, train_loss:0.0594263142661922, acc:0.6718235681369322


 63%|██████▎   | 27343/43738 [3:28:50<1:33:46,  2.91it/s]

step:12640, train_loss:0.05942595646805144, acc:0.6718355703470724


 63%|██████▎   | 27648/43738 [3:31:15<1:39:40,  2.69it/s]

step:12660, train_loss:0.059556393997885, acc:0.6709707754629629


 63%|██████▎   | 27649/43738 [3:31:15<1:28:22,  3.03it/s]

step:12660, train_loss:0.059554258150438376, acc:0.6709826756844732


 63%|██████▎   | 27650/43738 [3:31:16<1:58:13,  2.27it/s]

step:12660, train_loss:0.05955423899740149, acc:0.6709584086799276


 63%|██████▎   | 27651/43738 [3:31:17<2:44:09,  1.63it/s]

step:12660, train_loss:0.059554116617579894, acc:0.6709703084879389


 63%|██████▎   | 27652/43738 [3:31:17<2:54:00,  1.54it/s]

step:12660, train_loss:0.0595577348206755, acc:0.6709460436858093


 63%|██████▎   | 27653/43738 [3:31:18<2:56:34,  1.52it/s]

step:12660, train_loss:0.059558988915384796, acc:0.6709217806386287


 63%|██████▎   | 27654/43738 [3:31:19<2:59:15,  1.50it/s]

step:12660, train_loss:0.05955910631842606, acc:0.6709336804802198


 63%|██████▎   | 27655/43738 [3:31:20<3:21:26,  1.33it/s]

step:12660, train_loss:0.059557478326037555, acc:0.6709455794612186


 63%|██████▎   | 27656/43738 [3:31:20<3:18:13,  1.35it/s]

step:12660, train_loss:0.05955542215722274, acc:0.6709574775817182


 63%|██████▎   | 27657/43738 [3:31:21<2:48:41,  1.59it/s]

step:12660, train_loss:0.05955654981164712, acc:0.6709332176302564


 63%|██████▎   | 27658/43738 [3:31:21<2:39:16,  1.68it/s]

step:12660, train_loss:0.05955778036675656, acc:0.6709089594330754


 63%|██████▎   | 27659/43738 [3:31:22<2:21:21,  1.90it/s]

step:12660, train_loss:0.05955587543305964, acc:0.6709208575870422


 63%|██████▎   | 27660/43738 [3:31:22<2:11:07,  2.04it/s]

step:12660, train_loss:0.05955795457972003, acc:0.6708966015907447


 63%|██████▎   | 27661/43738 [3:31:22<2:07:43,  2.10it/s]

step:12660, train_loss:0.05956654872077852, acc:0.6708723473482521


 63%|██████▎   | 27662/43738 [3:31:23<2:25:25,  1.84it/s]

step:12660, train_loss:0.05956602943647213, acc:0.6708842455353915


 63%|██████▎   | 27663/43738 [3:31:24<2:24:44,  1.85it/s]

step:12660, train_loss:0.059568661509234705, acc:0.6708599934931135


 64%|██████▍   | 27968/43738 [3:33:48<2:20:51,  1.87it/s]

step:12680, train_loss:0.059663591922836705, acc:0.6703732837528604


 64%|██████▍   | 27969/43738 [3:33:48<2:09:23,  2.03it/s]

step:12680, train_loss:0.05966246520852562, acc:0.6703850691837392


 64%|██████▍   | 27970/43738 [3:33:49<2:05:04,  2.10it/s]

step:12680, train_loss:0.059660502543786464, acc:0.6703968537718985


 64%|██████▍   | 27971/43738 [3:33:49<2:11:21,  2.00it/s]

step:12680, train_loss:0.059660212729732, acc:0.6704086375174287


 64%|██████▍   | 27972/43738 [3:33:50<2:12:16,  1.99it/s]

step:12680, train_loss:0.0596581028127246, acc:0.6704204204204204


 64%|██████▍   | 27973/43738 [3:33:50<2:05:22,  2.10it/s]

step:12680, train_loss:0.05965624844276813, acc:0.6704322024809638


 64%|██████▍   | 27974/43738 [3:33:51<1:47:23,  2.45it/s]

step:12680, train_loss:0.05965413426178661, acc:0.6704439836991493


 64%|██████▍   | 27975/43738 [3:33:51<1:48:15,  2.43it/s]

step:12680, train_loss:0.059655556132354445, acc:0.670420017873101


 64%|██████▍   | 27976/43738 [3:33:51<1:44:11,  2.52it/s]

step:12680, train_loss:0.05965342488141565, acc:0.6704317986845868


 64%|██████▍   | 27977/43738 [3:33:52<1:38:08,  2.68it/s]

step:12680, train_loss:0.05965130073914175, acc:0.6704435786538943


 64%|██████▍   | 27978/43738 [3:33:52<1:33:49,  2.80it/s]

step:12680, train_loss:0.059649466731497285, acc:0.6704553577811138


 64%|██████▍   | 27979/43738 [3:33:52<1:34:23,  2.78it/s]

step:12680, train_loss:0.05966100306682879, acc:0.6704313949748025


 64%|██████▍   | 27980/43738 [3:33:53<1:47:38,  2.44it/s]

step:12680, train_loss:0.05966603723979569, acc:0.6704074338813438


 64%|██████▍   | 27981/43738 [3:33:53<1:45:30,  2.49it/s]

step:12680, train_loss:0.05966392811173927, acc:0.6704192130374182


 64%|██████▍   | 27982/43738 [3:33:53<1:33:49,  2.80it/s]

step:12680, train_loss:0.05966724286208655, acc:0.6703952540919162


 64%|██████▍   | 27983/43738 [3:33:54<1:31:35,  2.87it/s]

step:12680, train_loss:0.059665174109317966, acc:0.6704070328413679


 65%|██████▍   | 28288/43738 [3:36:25<2:49:33,  1.52it/s]

step:12700, train_loss:0.05960950148271967, acc:0.6705670248868778


 65%|██████▍   | 28289/43738 [3:36:26<2:40:29,  1.60it/s]

step:12700, train_loss:0.059615627103659155, acc:0.6705433207253703


 65%|██████▍   | 28290/43738 [3:36:26<2:21:54,  1.81it/s]

step:12700, train_loss:0.05961590545723882, acc:0.6705196182396607


 65%|██████▍   | 28291/43738 [3:36:27<2:26:32,  1.76it/s]

step:12700, train_loss:0.05962005526461666, acc:0.6704959174295713


 65%|██████▍   | 28292/43738 [3:36:27<2:07:56,  2.01it/s]

step:12700, train_loss:0.059621013633241036, acc:0.6704722182949243


 65%|██████▍   | 28293/43738 [3:36:28<2:17:51,  1.87it/s]

step:12700, train_loss:0.059619254737159094, acc:0.6704838652670272


 65%|██████▍   | 28294/43738 [3:36:28<2:05:26,  2.05it/s]

step:12700, train_loss:0.059619285360821256, acc:0.6704601682335477


 65%|██████▍   | 28295/43738 [3:36:28<1:53:51,  2.26it/s]

step:12700, train_loss:0.059618789200070486, acc:0.67047181480827


 65%|██████▍   | 28296/43738 [3:36:29<2:00:13,  2.14it/s]

step:12700, train_loss:0.05961843870248526, acc:0.6704834605597965


 65%|██████▍   | 28297/43738 [3:36:29<2:00:16,  2.14it/s]

step:12700, train_loss:0.05961663177215752, acc:0.6704951054882143


 65%|██████▍   | 28298/43738 [3:36:30<1:59:18,  2.16it/s]

step:12700, train_loss:0.05961589464021507, acc:0.6705067495936109


 65%|██████▍   | 28299/43738 [3:36:30<1:58:21,  2.17it/s]

step:12700, train_loss:0.05961402893062819, acc:0.6705183928760734


 65%|██████▍   | 28300/43738 [3:36:30<1:39:10,  2.59it/s]

step:12700, train_loss:0.0596119625509269, acc:0.670530035335689


 65%|██████▍   | 28301/43738 [3:36:31<1:34:53,  2.71it/s]

step:12700, train_loss:0.05960985636407204, acc:0.6705416769725452


 65%|██████▍   | 28302/43738 [3:36:31<1:25:24,  3.01it/s]

step:12700, train_loss:0.05960776558441612, acc:0.6705533177867289


 65%|██████▍   | 28303/43738 [3:36:31<1:21:39,  3.15it/s]

step:12700, train_loss:0.05960566982218328, acc:0.6705649577783274


 65%|██████▌   | 28608/43738 [3:38:59<1:51:10,  2.27it/s]

step:12720, train_loss:0.05954589637444698, acc:0.6707564317673378


 65%|██████▌   | 28609/43738 [3:39:00<1:58:52,  2.12it/s]

step:12720, train_loss:0.0595440170848525, acc:0.6707679401586913


 65%|██████▌   | 28610/43738 [3:39:00<2:05:08,  2.01it/s]

step:12720, train_loss:0.05954781513336762, acc:0.670744494931842


 65%|██████▌   | 28611/43738 [3:39:01<1:56:25,  2.17it/s]

step:12720, train_loss:0.05954573650172929, acc:0.6707560029359337


 65%|██████▌   | 28612/43738 [3:39:01<1:41:47,  2.48it/s]

step:12720, train_loss:0.059543663582752655, acc:0.6707675101356074


 65%|██████▌   | 28613/43738 [3:39:01<1:41:03,  2.49it/s]

step:12720, train_loss:0.05954159681121326, acc:0.6707790165309475


 65%|██████▌   | 28614/43738 [3:39:02<1:42:07,  2.47it/s]

step:12720, train_loss:0.059539517846175304, acc:0.6707905221220382


 65%|██████▌   | 28615/43738 [3:39:02<2:01:17,  2.08it/s]

step:12720, train_loss:0.05954230838951922, acc:0.6707670802026909


 65%|██████▌   | 28616/43738 [3:39:03<2:16:16,  1.85it/s]

step:12720, train_loss:0.05954676689095643, acc:0.6707436399217221


 65%|██████▌   | 28617/43738 [3:39:04<2:20:28,  1.79it/s]

step:12720, train_loss:0.0595448197559676, acc:0.6707551455428591


 65%|██████▌   | 28618/43738 [3:39:04<2:13:26,  1.89it/s]

step:12720, train_loss:0.0595429364303878, acc:0.6707666503599133


 65%|██████▌   | 28619/43738 [3:39:05<2:04:41,  2.02it/s]

step:12720, train_loss:0.05954539805971804, acc:0.6707432125511024


 65%|██████▌   | 28620/43738 [3:39:05<1:46:18,  2.37it/s]

step:12720, train_loss:0.059543350943181, acc:0.6707547169811321


 65%|██████▌   | 28621/43738 [3:39:05<1:43:12,  2.44it/s]

step:12720, train_loss:0.05954332205059918, acc:0.6707662206072464


 65%|██████▌   | 28622/43738 [3:39:06<1:38:01,  2.57it/s]

step:12720, train_loss:0.05954726849694245, acc:0.6707427852700719


 65%|██████▌   | 28623/43738 [3:39:06<1:43:56,  2.42it/s]

step:12720, train_loss:0.05954740429698115, acc:0.6707542885092408


 66%|██████▌   | 28928/43738 [3:41:32<1:57:06,  2.11it/s]

step:12740, train_loss:0.05955548218216549, acc:0.6710107853982301


 66%|██████▌   | 28929/43738 [3:41:32<1:41:32,  2.43it/s]

step:12740, train_loss:0.05955342465741927, acc:0.6710221576964291


 66%|██████▌   | 28930/43738 [3:41:32<1:35:20,  2.59it/s]

step:12740, train_loss:0.059551670148613296, acc:0.6710335292084342


 66%|██████▌   | 28931/43738 [3:41:33<1:31:31,  2.70it/s]

step:12740, train_loss:0.05955012424833103, acc:0.6710448999343265


 66%|██████▌   | 28932/43738 [3:41:33<1:38:21,  2.51it/s]

step:12740, train_loss:0.059550278016247474, acc:0.6710217060694041


 66%|██████▌   | 28933/43738 [3:41:34<2:06:39,  1.95it/s]

step:12740, train_loss:0.05954831861938331, acc:0.6710330764179311


 66%|██████▌   | 28934/43738 [3:41:35<2:25:42,  1.69it/s]

step:12740, train_loss:0.059547164314473155, acc:0.6710444459805074


 66%|██████▌   | 28935/43738 [3:41:35<2:14:11,  1.84it/s]

step:12740, train_loss:0.05954699743277095, acc:0.6710558147572144


 66%|██████▌   | 28936/43738 [3:41:35<1:51:46,  2.21it/s]

step:12740, train_loss:0.059548927806029425, acc:0.671032623721316


 66%|██████▌   | 28937/43738 [3:41:36<2:07:52,  1.93it/s]

step:12740, train_loss:0.05954737698847065, acc:0.6710439921208142


 66%|██████▌   | 28938/43738 [3:41:37<2:04:13,  1.99it/s]

step:12740, train_loss:0.059550433129801336, acc:0.6710208030962748


 66%|██████▌   | 28939/43738 [3:41:37<1:44:31,  2.36it/s]

step:12740, train_loss:0.059548402593420806, acc:0.6710321711185597


 66%|██████▌   | 28940/43738 [3:41:37<1:46:01,  2.33it/s]

step:12740, train_loss:0.05954679381295411, acc:0.6710435383552177


 66%|██████▌   | 28941/43738 [3:41:38<2:12:18,  1.86it/s]

step:12740, train_loss:0.05954576020563552, acc:0.6710549048063301


 66%|██████▌   | 28942/43738 [3:41:38<1:51:42,  2.21it/s]

step:12740, train_loss:0.05954861828185802, acc:0.6710317186096331


 66%|██████▌   | 28943/43738 [3:41:39<1:36:34,  2.55it/s]

step:12740, train_loss:0.05954656146694036, acc:0.6710430846836887


 67%|██████▋   | 29248/43738 [3:44:06<2:13:04,  1.81it/s]

step:12760, train_loss:0.05958796885368377, acc:0.6708150984682714


 67%|██████▋   | 29249/43738 [3:44:07<2:08:34,  1.88it/s]

step:12760, train_loss:0.05958750209418265, acc:0.6708263530377107


 67%|██████▋   | 29250/43738 [3:44:07<2:07:04,  1.90it/s]

step:12760, train_loss:0.05959022752697078, acc:0.6708034188034188


 67%|██████▋   | 29251/43738 [3:44:08<2:01:03,  1.99it/s]

step:12760, train_loss:0.05959451417883318, acc:0.6707804861372261


 67%|██████▋   | 29252/43738 [3:44:08<2:06:18,  1.91it/s]

step:12760, train_loss:0.059594074049947356, acc:0.6707917407356762


 67%|██████▋   | 29253/43738 [3:44:09<1:50:25,  2.19it/s]

step:12760, train_loss:0.05959208018431045, acc:0.6708029945646601


 67%|██████▋   | 29254/43738 [3:44:09<1:38:32,  2.45it/s]

step:12760, train_loss:0.05959228344462275, acc:0.6707800642647159


 67%|██████▋   | 29255/43738 [3:44:10<2:03:26,  1.96it/s]

step:12760, train_loss:0.05959311194590227, acc:0.6707571355323876


 67%|██████▋   | 29256/43738 [3:44:10<1:55:55,  2.08it/s]

step:12760, train_loss:0.05959147104132619, acc:0.6707683893902106


 67%|██████▋   | 29257/43738 [3:44:10<1:38:02,  2.46it/s]

step:12760, train_loss:0.05958983982323754, acc:0.670779642478723


 67%|██████▋   | 29258/43738 [3:44:11<1:34:30,  2.55it/s]

step:12760, train_loss:0.05959323618766469, acc:0.6707567161118326


 67%|██████▋   | 29259/43738 [3:44:11<1:52:26,  2.15it/s]

step:12760, train_loss:0.059591319057514955, acc:0.6707679688301036


 67%|██████▋   | 29260/43738 [3:44:12<1:48:49,  2.22it/s]

step:12760, train_loss:0.05959067166331259, acc:0.6707792207792208


 67%|██████▋   | 29261/43738 [3:44:12<1:43:41,  2.33it/s]

step:12760, train_loss:0.05959080217373307, acc:0.6707562967772803


 67%|██████▋   | 29262/43738 [3:44:13<1:55:32,  2.09it/s]

step:12760, train_loss:0.059589338640039305, acc:0.6707675483562299


 67%|██████▋   | 29263/43738 [3:44:13<2:21:42,  1.70it/s]

step:12760, train_loss:0.05958912252466226, acc:0.6707787991661825


 68%|██████▊   | 29568/43738 [3:46:44<2:08:30,  1.84it/s]

step:12780, train_loss:0.05956914601222301, acc:0.6707251082251082


 68%|██████▊   | 29569/43738 [3:46:44<1:47:24,  2.20it/s]

step:12780, train_loss:0.059569639699693144, acc:0.6707024248368223


 68%|██████▊   | 29570/43738 [3:46:44<1:32:53,  2.54it/s]

step:12780, train_loss:0.05956836953073536, acc:0.6707135610415962


 68%|██████▊   | 29571/43738 [3:46:44<1:25:48,  2.75it/s]

step:12780, train_loss:0.059566550801149185, acc:0.6707246964931859


 68%|██████▊   | 29572/43738 [3:46:45<1:47:00,  2.21it/s]

step:12780, train_loss:0.059570647393718354, acc:0.6707020154199919


 68%|██████▊   | 29573/43738 [3:46:45<1:31:09,  2.59it/s]

step:12780, train_loss:0.05956863331306872, acc:0.6707131505089101


 68%|██████▊   | 29574/43738 [3:46:46<1:54:32,  2.06it/s]

step:12780, train_loss:0.05956908202812122, acc:0.6706904713599784


 68%|██████▊   | 29575/43738 [3:46:47<1:50:02,  2.15it/s]

step:12780, train_loss:0.05956963422337416, acc:0.6706677937447169


 68%|██████▊   | 29576/43738 [3:46:47<1:39:25,  2.37it/s]

step:12780, train_loss:0.059567999765868275, acc:0.6706789288612388


 68%|██████▊   | 29577/43738 [3:46:47<1:32:30,  2.55it/s]

step:12780, train_loss:0.05956743330772731, acc:0.6706900632248031


 68%|██████▊   | 29578/43738 [3:46:48<1:32:31,  2.55it/s]

step:12780, train_loss:0.05956908284315715, acc:0.6706673879234566


 68%|██████▊   | 29579/43738 [3:46:48<1:22:17,  2.87it/s]

step:12780, train_loss:0.05956752114401901, acc:0.6706785219243382


 68%|██████▊   | 29580/43738 [3:46:48<1:24:51,  2.78it/s]

step:12780, train_loss:0.05957007446772456, acc:0.6706558485463151


 68%|██████▊   | 29581/43738 [3:46:49<1:32:55,  2.54it/s]

step:12780, train_loss:0.05956806458361475, acc:0.6706669821845104


 68%|██████▊   | 29582/43738 [3:46:49<2:00:25,  1.96it/s]

step:12780, train_loss:0.05956814111734027, acc:0.670678115069975


 68%|██████▊   | 29583/43738 [3:46:50<1:58:07,  2.00it/s]

step:12780, train_loss:0.05957063370121813, acc:0.6706554440050029


 68%|██████▊   | 29888/43738 [3:49:14<1:33:06,  2.48it/s]

step:12800, train_loss:0.059602864964105336, acc:0.6705366702355461


 68%|██████▊   | 29889/43738 [3:49:14<1:23:49,  2.75it/s]

step:12800, train_loss:0.05960542274273636, acc:0.6705142360065576


 68%|██████▊   | 29890/43738 [3:49:14<1:24:09,  2.74it/s]

step:12800, train_loss:0.059605071469069563, acc:0.6704918032786885


 68%|██████▊   | 29891/43738 [3:49:15<1:30:53,  2.54it/s]

step:12800, train_loss:0.059606358150696294, acc:0.6704693720517881


 68%|██████▊   | 29892/43738 [3:49:15<1:36:12,  2.40it/s]

step:12800, train_loss:0.059607159736418315, acc:0.6704469423257059


 68%|██████▊   | 29893/43738 [3:49:16<1:30:58,  2.54it/s]

step:12800, train_loss:0.0596056360654747, acc:0.6704579667480681


 68%|██████▊   | 29894/43738 [3:49:16<1:31:37,  2.52it/s]

step:12800, train_loss:0.059604606299655555, acc:0.6704689904328628


 68%|██████▊   | 29895/43738 [3:49:17<1:53:18,  2.04it/s]

step:12800, train_loss:0.05960933171860035, acc:0.6704465629703964


 68%|██████▊   | 29896/43738 [3:49:17<2:14:22,  1.72it/s]

step:12800, train_loss:0.059608085054470836, acc:0.6704575862991704


 68%|██████▊   | 29897/43738 [3:49:18<2:13:05,  1.73it/s]

step:12800, train_loss:0.05960613782110935, acc:0.6704686088905242


 68%|██████▊   | 29898/43738 [3:49:18<2:00:03,  1.92it/s]

step:12800, train_loss:0.059604245846521284, acc:0.6704796307445314


 68%|██████▊   | 29899/43738 [3:49:19<1:48:53,  2.12it/s]

step:12800, train_loss:0.05960749757990638, acc:0.6704572059266196


 68%|██████▊   | 29900/43738 [3:49:19<1:54:46,  2.01it/s]

step:12800, train_loss:0.059605515488519965, acc:0.6704682274247492


 68%|██████▊   | 29901/43738 [3:49:20<2:03:40,  1.86it/s]

step:12800, train_loss:0.05960456179156224, acc:0.6704792481856794


 68%|██████▊   | 29902/43738 [3:49:20<1:42:51,  2.24it/s]

step:12800, train_loss:0.05960262598041038, acc:0.6704902682094843


 68%|██████▊   | 29903/43738 [3:49:21<1:42:53,  2.24it/s]

step:12800, train_loss:0.05960601278192443, acc:0.6704678460355148


 69%|██████▉   | 30208/43738 [3:51:49<1:35:34,  2.36it/s]

step:12820, train_loss:0.0595941647586218, acc:0.6706170550847458


 69%|██████▉   | 30209/43738 [3:51:49<1:32:20,  2.44it/s]

step:12820, train_loss:0.05959219206828223, acc:0.6706279585553974


 69%|██████▉   | 30210/43738 [3:51:49<1:31:29,  2.46it/s]

step:12820, train_loss:0.059590239143940855, acc:0.6706388613042039


 69%|██████▉   | 30211/43738 [3:51:50<1:40:03,  2.25it/s]

step:12820, train_loss:0.05958922783336016, acc:0.6706497633312369


 69%|██████▉   | 30212/43738 [3:51:50<1:41:05,  2.23it/s]

step:12820, train_loss:0.059587270101132156, acc:0.6706606646365683


 69%|██████▉   | 30213/43738 [3:51:51<1:43:21,  2.18it/s]

step:12820, train_loss:0.059586749864024915, acc:0.6706715652202694


 69%|██████▉   | 30214/43738 [3:51:51<1:38:22,  2.29it/s]

step:12820, train_loss:0.05958483874591576, acc:0.6706824650824121


 69%|██████▉   | 30215/43738 [3:51:52<1:33:00,  2.42it/s]

step:12820, train_loss:0.059584787056990794, acc:0.670693364223068


 69%|██████▉   | 30216/43738 [3:51:52<1:35:51,  2.35it/s]

step:12820, train_loss:0.05958596792994165, acc:0.670671167593328


 69%|██████▉   | 30217/43738 [3:51:52<1:35:49,  2.35it/s]

step:12820, train_loss:0.05958560444328471, acc:0.6706489724327366


 69%|██████▉   | 30218/43738 [3:51:53<1:23:35,  2.70it/s]

step:12820, train_loss:0.05958451332194305, acc:0.6706598715997087


 69%|██████▉   | 30219/43738 [3:51:53<1:43:21,  2.18it/s]

step:12820, train_loss:0.05958344785661875, acc:0.6706707700453357


 69%|██████▉   | 30220/43738 [3:51:54<1:47:29,  2.10it/s]

step:12820, train_loss:0.0595839015375604, acc:0.670681667769689


 69%|██████▉   | 30221/43738 [3:51:55<2:10:36,  1.72it/s]

step:12820, train_loss:0.05958355586378239, acc:0.6706925647728401


 69%|██████▉   | 30222/43738 [3:51:55<1:55:17,  1.95it/s]

step:12820, train_loss:0.05958159742522657, acc:0.6707034610548607


 69%|██████▉   | 30223/43738 [3:51:56<2:05:05,  1.80it/s]

step:12820, train_loss:0.059579855746899016, acc:0.6707143566158223


 70%|██████▉   | 30528/43738 [3:54:20<2:05:05,  1.76it/s]

step:12840, train_loss:0.05960738970622612, acc:0.6707940251572327


 70%|██████▉   | 30529/43738 [3:54:20<2:18:00,  1.60it/s]

step:12840, train_loss:0.059610984127434326, acc:0.6707720528022536


 70%|██████▉   | 30530/43738 [3:54:21<2:24:06,  1.53it/s]

step:12840, train_loss:0.0596090936843096, acc:0.670782836554209


 70%|██████▉   | 30531/43738 [3:54:22<2:08:18,  1.72it/s]

step:12840, train_loss:0.05961140048101135, acc:0.670760866005044


 70%|██████▉   | 30532/43738 [3:54:22<2:02:56,  1.79it/s]

step:12840, train_loss:0.059613594515760975, acc:0.6707388968950609


 70%|██████▉   | 30533/43738 [3:54:23<2:22:33,  1.54it/s]

step:12840, train_loss:0.05961772604773693, acc:0.6707169292241182


 70%|██████▉   | 30534/43738 [3:54:23<2:07:10,  1.73it/s]

step:12840, train_loss:0.05961581736199681, acc:0.6707277133687037


 70%|██████▉   | 30535/43738 [3:54:24<1:55:42,  1.90it/s]

step:12840, train_loss:0.05961470473116983, acc:0.6707384968069429


 70%|██████▉   | 30536/43738 [3:54:24<1:58:30,  1.86it/s]

step:12840, train_loss:0.05961280129564744, acc:0.670749279538905


 70%|██████▉   | 30537/43738 [3:54:25<2:13:36,  1.65it/s]

step:12840, train_loss:0.05961278670465271, acc:0.6707600615646593


 70%|██████▉   | 30538/43738 [3:54:26<2:11:05,  1.68it/s]

step:12840, train_loss:0.059610837636674324, acc:0.6707708428842754


 70%|██████▉   | 30539/43738 [3:54:26<2:03:56,  1.77it/s]

step:12840, train_loss:0.059610471775158595, acc:0.6707816234978224


 70%|██████▉   | 30540/43738 [3:54:27<1:55:42,  1.90it/s]

step:12840, train_loss:0.05960878690464706, acc:0.67079240340537


 70%|██████▉   | 30541/43738 [3:54:27<1:51:56,  1.97it/s]

step:12840, train_loss:0.05960691047955932, acc:0.6708031826069873


 70%|██████▉   | 30542/43738 [3:54:28<1:54:55,  1.91it/s]

step:12840, train_loss:0.05960965429159593, acc:0.6707812193045642


 70%|██████▉   | 30543/43738 [3:54:28<2:19:30,  1.58it/s]

step:12840, train_loss:0.05961433194178673, acc:0.67075925744033


 71%|███████   | 30848/43738 [3:56:53<1:31:56,  2.34it/s]

step:12860, train_loss:0.059547771852612265, acc:0.6710321576763485


 71%|███████   | 30849/43738 [3:56:53<1:43:40,  2.07it/s]

step:12860, train_loss:0.059545927011028454, acc:0.6710428214852994


 71%|███████   | 30850/43738 [3:56:54<1:57:33,  1.83it/s]

step:12860, train_loss:0.05954593597707524, acc:0.6710534846029174


 71%|███████   | 30851/43738 [3:56:54<1:46:18,  2.02it/s]

step:12860, train_loss:0.059544154517138854, acc:0.6710641470292698


 71%|███████   | 30852/43738 [3:56:55<1:40:35,  2.13it/s]

step:12860, train_loss:0.05954252066819774, acc:0.6710748087644237


 71%|███████   | 30853/43738 [3:56:55<1:28:11,  2.43it/s]

step:12860, train_loss:0.05954080301974499, acc:0.6710854698084465


 71%|███████   | 30854/43738 [3:56:56<1:23:00,  2.59it/s]

step:12860, train_loss:0.05953950708204499, acc:0.6710961301614053


 71%|███████   | 30855/43738 [3:56:56<1:22:02,  2.62it/s]

step:12860, train_loss:0.05954160643848641, acc:0.6710743801652893


 71%|███████   | 30856/43738 [3:56:56<1:17:42,  2.76it/s]

step:12860, train_loss:0.059548911678882224, acc:0.6710526315789473


 71%|███████   | 30857/43738 [3:56:56<1:10:29,  3.05it/s]

step:12860, train_loss:0.05954768094166712, acc:0.671063291959685


 71%|███████   | 30858/43738 [3:56:57<1:35:23,  2.25it/s]

step:12860, train_loss:0.05954575793024123, acc:0.6710739516494912


 71%|███████   | 30859/43738 [3:56:58<1:40:55,  2.13it/s]

step:12860, train_loss:0.059544750826374206, acc:0.6710846106484332


 71%|███████   | 30860/43738 [3:56:59<2:03:07,  1.74it/s]

step:12860, train_loss:0.05954325769014526, acc:0.6710952689565781


 71%|███████   | 30861/43738 [3:56:59<1:48:47,  1.97it/s]

step:12860, train_loss:0.05954549162853255, acc:0.6710735232170053


 71%|███████   | 30862/43738 [3:57:00<2:02:23,  1.75it/s]

step:12860, train_loss:0.05954978467890425, acc:0.6710517788866567


 71%|███████   | 30863/43738 [3:57:00<1:45:52,  2.03it/s]

step:12860, train_loss:0.0595478552170886, acc:0.6710624372225642


 71%|███████▏  | 31168/43738 [3:59:20<1:32:59,  2.25it/s]

step:12880, train_loss:0.05960041660309836, acc:0.6708803901437371


 71%|███████▏  | 31169/43738 [3:59:20<1:29:57,  2.33it/s]

step:12880, train_loss:0.059598655212961475, acc:0.670890949340691


 71%|███████▏  | 31170/43738 [3:59:20<1:31:42,  2.28it/s]

step:12880, train_loss:0.059597459084371966, acc:0.6709015078601219


 71%|███████▏  | 31171/43738 [3:59:21<1:29:54,  2.33it/s]

step:12880, train_loss:0.05959686037101437, acc:0.6709120657020949


 71%|███████▏  | 31172/43738 [3:59:21<1:37:46,  2.14it/s]

step:12880, train_loss:0.059595950600629236, acc:0.6709226228666753


 71%|███████▏  | 31173/43738 [3:59:22<1:52:12,  1.87it/s]

step:12880, train_loss:0.05959406374423897, acc:0.670933179353928


 71%|███████▏  | 31174/43738 [3:59:23<1:54:02,  1.84it/s]

step:12880, train_loss:0.05959353786148704, acc:0.6709437351639187


 71%|███████▏  | 31175/43738 [3:59:23<1:34:16,  2.22it/s]

step:12880, train_loss:0.059591626303663305, acc:0.6709542902967122


 71%|███████▏  | 31176/43738 [3:59:23<1:27:07,  2.40it/s]

step:12880, train_loss:0.05959024340442657, acc:0.6709648447523736


 71%|███████▏  | 31177/43738 [3:59:24<1:29:00,  2.35it/s]

step:12880, train_loss:0.05958837990539384, acc:0.6709753985309683


 71%|███████▏  | 31178/43738 [3:59:24<1:41:53,  2.05it/s]

step:12880, train_loss:0.05958655489478945, acc:0.6709859516325615


 71%|███████▏  | 31179/43738 [3:59:25<1:41:07,  2.07it/s]

step:12880, train_loss:0.059585280613232657, acc:0.670996504057218


 71%|███████▏  | 31180/43738 [3:59:25<1:38:36,  2.12it/s]

step:12880, train_loss:0.059583741480277314, acc:0.6710070558050032


 71%|███████▏  | 31181/43738 [3:59:26<1:31:51,  2.28it/s]

step:12880, train_loss:0.05958188720531519, acc:0.6710176068759822


 71%|███████▏  | 31182/43738 [3:59:26<1:40:38,  2.08it/s]

step:12880, train_loss:0.059581078202643786, acc:0.67102815727022


 71%|███████▏  | 31183/43738 [3:59:27<1:52:26,  1.86it/s]

step:12880, train_loss:0.05958263087397736, acc:0.6710066382323702


 72%|███████▏  | 31488/43738 [4:01:56<2:03:43,  1.65it/s]

step:12900, train_loss:0.0596098572687487, acc:0.6709540142276422


 72%|███████▏  | 31489/43738 [4:01:57<2:21:05,  1.45it/s]

step:12900, train_loss:0.05960820951339955, acc:0.6709644637810029


 72%|███████▏  | 31490/43738 [4:01:57<2:05:29,  1.63it/s]

step:12900, train_loss:0.059606316939476024, acc:0.6709749126706891


 72%|███████▏  | 31491/43738 [4:01:58<1:54:21,  1.78it/s]

step:12900, train_loss:0.05960458044055225, acc:0.6709853608967642


 72%|███████▏  | 31492/43738 [4:01:58<1:46:39,  1.91it/s]

step:12900, train_loss:0.05960284954013943, acc:0.6709958084592913


 72%|███████▏  | 31493/43738 [4:01:59<1:43:11,  1.98it/s]

step:12900, train_loss:0.059604772940405566, acc:0.6709745022703458


 72%|███████▏  | 31494/43738 [4:01:59<1:40:48,  2.02it/s]

step:12900, train_loss:0.05960292776813775, acc:0.6709849495141932


 72%|███████▏  | 31495/43738 [4:01:59<1:35:46,  2.13it/s]

step:12900, train_loss:0.059601889681771866, acc:0.6709953960946182


 72%|███████▏  | 31496/43738 [4:02:00<1:33:11,  2.19it/s]

step:12900, train_loss:0.059601207671738814, acc:0.671005842011684


 72%|███████▏  | 31497/43738 [4:02:00<1:29:18,  2.28it/s]

step:12900, train_loss:0.05959931541334552, acc:0.6710162872654538


 72%|███████▏  | 31498/43738 [4:02:01<1:44:46,  1.95it/s]

step:12900, train_loss:0.05959850780411474, acc:0.6710267318559908


 72%|███████▏  | 31499/43738 [4:02:02<1:45:27,  1.93it/s]

step:12900, train_loss:0.059598636177599584, acc:0.6710371757833582


 72%|███████▏  | 31500/43738 [4:02:02<1:57:52,  1.73it/s]

step:12900, train_loss:0.059603097902788296, acc:0.6710158730158731


 72%|███████▏  | 31501/43738 [4:02:03<1:53:22,  1.80it/s]

step:12900, train_loss:0.05960171613113771, acc:0.671026316624869


 72%|███████▏  | 31502/43738 [4:02:03<1:49:10,  1.87it/s]

step:12900, train_loss:0.05960066188488074, acc:0.6710367595708209


 72%|███████▏  | 31503/43738 [4:02:04<1:55:24,  1.77it/s]

step:12900, train_loss:0.0596010748576228, acc:0.6710472018537917


 73%|███████▎  | 31808/43738 [4:04:31<1:50:00,  1.81it/s]

step:12920, train_loss:0.059600273289758025, acc:0.6712462273641852


 73%|███████▎  | 31809/43738 [4:04:32<2:02:09,  1.63it/s]

step:12920, train_loss:0.059602497062792276, acc:0.6712251249646326


 73%|███████▎  | 31810/43738 [4:04:32<1:50:58,  1.79it/s]

step:12920, train_loss:0.059605370215188594, acc:0.671204023891858


 73%|███████▎  | 31811/43738 [4:04:33<1:41:45,  1.95it/s]

step:12920, train_loss:0.05960351031611223, acc:0.6712143598126434


 73%|███████▎  | 31812/43738 [4:04:33<1:40:00,  1.99it/s]

step:12920, train_loss:0.05960430242727815, acc:0.6711932604048787


 73%|███████▎  | 31813/43738 [4:04:33<1:33:01,  2.14it/s]

step:12920, train_loss:0.05960363742787663, acc:0.671203596014208


 73%|███████▎  | 31814/43738 [4:04:34<1:30:12,  2.20it/s]

step:12920, train_loss:0.059601803901959814, acc:0.6712139309737851


 73%|███████▎  | 31816/43738 [4:04:34<1:09:03,  2.88it/s]

step:12920, train_loss:0.059600545725305556, acc:0.6712242652836712
step:12920, train_loss:0.059599671109706195, acc:0.6712345989439276


 73%|███████▎  | 31817/43738 [4:04:35<1:02:21,  3.19it/s]

step:12920, train_loss:0.0595978035841197, acc:0.6712449319546154


 73%|███████▎  | 31818/43738 [4:04:35<1:00:52,  3.26it/s]

step:12920, train_loss:0.059601270385825085, acc:0.6712238355647746


 73%|███████▎  | 31819/43738 [4:04:35<1:13:55,  2.69it/s]

step:12920, train_loss:0.05960764699092816, acc:0.6712027405009585


 73%|███████▎  | 31820/43738 [4:04:36<1:14:46,  2.66it/s]

step:12920, train_loss:0.05960597013802947, acc:0.6712130735386549


 73%|███████▎  | 31821/43738 [4:04:36<1:11:58,  2.76it/s]

step:12920, train_loss:0.05960457518266903, acc:0.6712234059269037


 73%|███████▎  | 31822/43738 [4:04:36<1:04:28,  3.08it/s]

step:12920, train_loss:0.05960317885656647, acc:0.6712337376657658


 73%|███████▎  | 31823/43738 [4:04:37<1:00:37,  3.28it/s]

step:12920, train_loss:0.05960130645276798, acc:0.6712440687553027


 73%|███████▎  | 32128/43738 [4:07:04<1:37:25,  1.99it/s]

step:12940, train_loss:0.059584303584850284, acc:0.6713147410358565


 73%|███████▎  | 32129/43738 [4:07:05<1:42:30,  1.89it/s]

step:12940, train_loss:0.05958253844821477, acc:0.6713249712098105


 73%|███████▎  | 32130/43738 [4:07:05<1:29:36,  2.16it/s]

step:12940, train_loss:0.05958069230125764, acc:0.6713352007469654


 73%|███████▎  | 32131/43738 [4:07:05<1:16:08,  2.54it/s]

step:12940, train_loss:0.05958015420795992, acc:0.6713454296473811


 73%|███████▎  | 32132/43738 [4:07:06<1:24:52,  2.28it/s]

step:12940, train_loss:0.0595790086380804, acc:0.6713556579111166


 73%|███████▎  | 32133/43738 [4:07:06<1:20:17,  2.41it/s]

step:12940, train_loss:0.05958239595652609, acc:0.6713347648834531


 73%|███████▎  | 32134/43738 [4:07:07<1:28:57,  2.17it/s]

step:12940, train_loss:0.05958143330742632, acc:0.6713449928424722


 73%|███████▎  | 32135/43738 [4:07:07<1:21:19,  2.38it/s]

step:12940, train_loss:0.059582871131619815, acc:0.6713241014470204


 73%|███████▎  | 32136/43738 [4:07:08<1:20:18,  2.41it/s]

step:12940, train_loss:0.05958108181877046, acc:0.6713343291013194


 73%|███████▎  | 32137/43738 [4:07:08<1:12:05,  2.68it/s]

step:12940, train_loss:0.059581703171404085, acc:0.6713134393378349


 73%|███████▎  | 32138/43738 [4:07:08<1:17:41,  2.49it/s]

step:12940, train_loss:0.05959128308963019, acc:0.6712925508743544


 73%|███████▎  | 32139/43738 [4:07:09<1:37:09,  1.99it/s]

step:12940, train_loss:0.05958963090776372, acc:0.6713027785556489


 73%|███████▎  | 32140/43738 [4:07:10<1:48:55,  1.77it/s]

step:12940, train_loss:0.05959103438923832, acc:0.6712818917237088


 73%|███████▎  | 32141/43738 [4:07:10<1:47:10,  1.80it/s]

step:12940, train_loss:0.05958963750673617, acc:0.6712921191002147


 73%|███████▎  | 32142/43738 [4:07:11<1:38:27,  1.96it/s]

step:12940, train_loss:0.059589215492203, acc:0.6712712338995707


 73%|███████▎  | 32143/43738 [4:07:11<1:22:21,  2.35it/s]

step:12940, train_loss:0.05958856575764597, acc:0.6712814609712846


 74%|███████▍  | 32448/43738 [4:09:42<1:41:45,  1.85it/s]

step:12960, train_loss:0.05960894750025879, acc:0.6712278106508875


 74%|███████▍  | 32449/43738 [4:09:42<1:35:10,  1.98it/s]

step:12960, train_loss:0.0596117338815879, acc:0.6712071250269654


 74%|███████▍  | 32450/43738 [4:09:42<1:24:46,  2.22it/s]

step:12960, train_loss:0.059610094641011, acc:0.6712172573189522


 74%|███████▍  | 32451/43738 [4:09:43<1:43:58,  1.81it/s]

step:12960, train_loss:0.059613884810787, acc:0.6711965732951218


 74%|███████▍  | 32452/43738 [4:09:44<1:43:49,  1.81it/s]

step:12960, train_loss:0.0596167864505179, acc:0.6711758905460372


 74%|███████▍  | 32453/43738 [4:09:44<1:40:53,  1.86it/s]

step:12960, train_loss:0.05961496811038564, acc:0.6711860228638339


 74%|███████▍  | 32454/43738 [4:09:45<1:45:04,  1.79it/s]

step:12960, train_loss:0.05961348815620837, acc:0.6711961545572195


 74%|███████▍  | 32455/43738 [4:09:46<1:54:56,  1.64it/s]

step:12960, train_loss:0.05961419570449429, acc:0.6712062856262517


 74%|███████▍  | 32456/43738 [4:09:46<1:54:53,  1.64it/s]

step:12960, train_loss:0.059612571166204505, acc:0.6712164160709884


 74%|███████▍  | 32457/43738 [4:09:47<1:46:20,  1.77it/s]

step:12960, train_loss:0.059614497676980856, acc:0.6711957358967249


 74%|███████▍  | 32458/43738 [4:09:47<1:28:23,  2.13it/s]

step:12960, train_loss:0.05961266429645934, acc:0.6712058660422701


 74%|███████▍  | 32459/43738 [4:09:47<1:17:42,  2.42it/s]

step:12960, train_loss:0.059610872496357714, acc:0.6712159955636341


 74%|███████▍  | 32460/43738 [4:09:48<1:19:34,  2.36it/s]

step:12960, train_loss:0.05961104481137723, acc:0.6711953173136168


 74%|███████▍  | 32461/43738 [4:09:48<1:19:44,  2.36it/s]

step:12960, train_loss:0.05960933165869324, acc:0.671205446535843


 74%|███████▍  | 32462/43738 [4:09:48<1:16:15,  2.46it/s]

step:12960, train_loss:0.05960764119120961, acc:0.6712155751340029


 74%|███████▍  | 32463/43738 [4:09:49<1:37:03,  1.94it/s]

step:12960, train_loss:0.05960648482103493, acc:0.6712257031081539


 75%|███████▍  | 32768/43738 [4:12:08<1:09:37,  2.63it/s]

step:12980, train_loss:0.05963401953684411, acc:0.67095947265625


 75%|███████▍  | 32769/43738 [4:12:09<1:03:27,  2.88it/s]

step:12980, train_loss:0.05963340648709013, acc:0.670969513869816


 75%|███████▍  | 32770/43738 [4:12:09<1:27:24,  2.09it/s]

step:12980, train_loss:0.0596322732189199, acc:0.6709795544705524


 75%|███████▍  | 32771/43738 [4:12:10<1:31:52,  1.99it/s]

step:12980, train_loss:0.05963565171466193, acc:0.6709590796741021


 75%|███████▍  | 32772/43738 [4:12:10<1:25:06,  2.15it/s]

step:12980, train_loss:0.059633905149458155, acc:0.6709691199804712


 75%|███████▍  | 32773/43738 [4:12:11<1:31:38,  1.99it/s]

step:12980, train_loss:0.059635726214020135, acc:0.6709486467518995


 75%|███████▍  | 32774/43738 [4:12:11<1:16:58,  2.37it/s]

step:12980, train_loss:0.059633906755213115, acc:0.6709586867638982


 75%|███████▍  | 32775/43738 [4:12:11<1:17:34,  2.36it/s]

step:12980, train_loss:0.059632218570841306, acc:0.6709687261632342


 75%|███████▍  | 32776/43738 [4:12:12<1:18:24,  2.33it/s]

step:12980, train_loss:0.05964064951452445, acc:0.6709482548206004


 75%|███████▍  | 32777/43738 [4:12:12<1:19:51,  2.29it/s]

step:12980, train_loss:0.05964484739751778, acc:0.6709277847270952


 75%|███████▍  | 32778/43738 [4:12:13<1:20:26,  2.27it/s]

step:12980, train_loss:0.0596436729625518, acc:0.6709378241503448


 75%|███████▍  | 32779/43738 [4:12:14<1:36:38,  1.89it/s]

step:12980, train_loss:0.05964347355632223, acc:0.6709478629610421


 75%|███████▍  | 32780/43738 [4:12:14<1:29:36,  2.04it/s]

step:12980, train_loss:0.05964502931675914, acc:0.6709273947528981


 75%|███████▍  | 32781/43738 [4:12:14<1:29:34,  2.04it/s]

step:12980, train_loss:0.05964323017500289, acc:0.6709374332692718


 75%|███████▍  | 32782/43738 [4:12:15<1:30:09,  2.03it/s]

step:12980, train_loss:0.059647732878716477, acc:0.6709169666280276


 75%|███████▍  | 32783/43738 [4:12:15<1:24:41,  2.16it/s]

step:12980, train_loss:0.05964868929908936, acc:0.6708965012353963


 76%|███████▌  | 33088/43738 [4:14:36<1:42:24,  1.73it/s]

step:13000, train_loss:0.05960952091555771, acc:0.6710287717601547


 76%|███████▌  | 33089/43738 [4:14:36<1:38:45,  1.80it/s]

step:13000, train_loss:0.059609347597867214, acc:0.6710387137719483


 76%|███████▌  | 33090/43738 [4:14:37<1:46:14,  1.67it/s]

step:13000, train_loss:0.05960756660171414, acc:0.6710486551828347


 76%|███████▌  | 33091/43738 [4:14:37<1:37:08,  1.83it/s]

step:13000, train_loss:0.05960716901128698, acc:0.6710585959928681


 76%|███████▌  | 33092/43738 [4:14:38<1:34:58,  1.87it/s]

step:13000, train_loss:0.05960770186653617, acc:0.671038317418107


 76%|███████▌  | 33093/43738 [4:14:38<1:24:27,  2.10it/s]

step:13000, train_loss:0.05960592426460629, acc:0.6710482579397455


 76%|███████▌  | 33094/43738 [4:14:39<1:14:36,  2.38it/s]

step:13000, train_loss:0.05960413714022731, acc:0.6710581978606394


 76%|███████▌  | 33095/43738 [4:14:39<1:08:42,  2.58it/s]

step:13000, train_loss:0.05960400151740295, acc:0.671068137180843


 76%|███████▌  | 33096/43738 [4:14:39<1:11:36,  2.48it/s]

step:13000, train_loss:0.05960529907562081, acc:0.671047860768673


 76%|███████▌  | 33097/43738 [4:14:40<1:10:50,  2.50it/s]

step:13000, train_loss:0.059603608466019774, acc:0.6710577998005861


 76%|███████▌  | 33098/43738 [4:14:40<1:24:18,  2.10it/s]

step:13000, train_loss:0.05960483405908546, acc:0.6710375249259773


 76%|███████▌  | 33099/43738 [4:14:41<1:28:43,  2.00it/s]

step:13000, train_loss:0.05960643542937377, acc:0.6710172512764736


 76%|███████▌  | 33100/43738 [4:14:41<1:13:14,  2.42it/s]

step:13000, train_loss:0.059604661460463636, acc:0.6710271903323263


 76%|███████▌  | 33101/43738 [4:14:42<1:21:32,  2.17it/s]

step:13000, train_loss:0.059603023153342954, acc:0.6710371287876499


 76%|███████▌  | 33102/43738 [4:14:42<1:10:49,  2.50it/s]

step:13000, train_loss:0.05960138675630961, acc:0.6710470666424989


 76%|███████▌  | 33103/43738 [4:14:43<1:18:29,  2.26it/s]

step:13000, train_loss:0.059602624799880266, acc:0.6710267951545177


 76%|███████▋  | 33408/43738 [4:16:55<1:22:24,  2.09it/s]

step:13020, train_loss:0.059619155076002434, acc:0.6709770114942529


 76%|███████▋  | 33409/43738 [4:16:56<1:28:05,  1.95it/s]

step:13020, train_loss:0.05961828130134314, acc:0.6709868598281901


 76%|███████▋  | 33410/43738 [4:16:56<1:15:52,  2.27it/s]

step:13020, train_loss:0.059616501965713053, acc:0.670996707572583


 76%|███████▋  | 33411/43738 [4:16:57<1:21:10,  2.12it/s]

step:13020, train_loss:0.05961831347356896, acc:0.6709766244649965


 76%|███████▋  | 33412/43738 [4:16:57<1:13:23,  2.34it/s]

step:13020, train_loss:0.059620723875854834, acc:0.6709565425595594


 76%|███████▋  | 33413/43738 [4:16:58<1:08:11,  2.52it/s]

step:13020, train_loss:0.059621257410363726, acc:0.6709364618561637


 76%|███████▋  | 33414/43738 [4:16:58<1:26:51,  1.98it/s]

step:13020, train_loss:0.059624221743011296, acc:0.6709163823547016


 76%|███████▋  | 33415/43738 [4:16:59<1:28:52,  1.94it/s]

step:13020, train_loss:0.05963208592000283, acc:0.6708963040550651


 76%|███████▋  | 33416/43738 [4:17:00<1:46:04,  1.62it/s]

step:13020, train_loss:0.059631051889942124, acc:0.6709061527412018


 76%|███████▋  | 33417/43738 [4:17:00<1:44:01,  1.65it/s]

step:13020, train_loss:0.059636233234138435, acc:0.6708860759493671


 76%|███████▋  | 33418/43738 [4:17:01<1:35:21,  1.80it/s]

step:13020, train_loss:0.05963445068156433, acc:0.6708959243521455


 76%|███████▋  | 33419/43738 [4:17:01<1:44:44,  1.64it/s]

step:13020, train_loss:0.05963648805129353, acc:0.6708758490678955


 76%|███████▋  | 33420/43738 [4:17:02<1:44:50,  1.64it/s]

step:13020, train_loss:0.05963473540856456, acc:0.670885697187313


 76%|███████▋  | 33421/43738 [4:17:02<1:29:53,  1.91it/s]

step:13020, train_loss:0.059633554919969745, acc:0.6708955447173932


 76%|███████▋  | 33422/43738 [4:17:03<1:28:24,  1.94it/s]

step:13020, train_loss:0.05963412030355018, acc:0.6708754712464844


 76%|███████▋  | 33423/43738 [4:17:03<1:23:13,  2.07it/s]

step:13020, train_loss:0.059633898269619165, acc:0.6708853184932532


 77%|███████▋  | 33728/43738 [4:19:22<1:18:04,  2.14it/s]

step:13040, train_loss:0.059628190559722904, acc:0.670748339658444


 77%|███████▋  | 33729/43738 [4:19:23<1:17:17,  2.16it/s]

step:13040, train_loss:0.05962650418330732, acc:0.6707581013371283


 77%|███████▋  | 33730/43738 [4:19:23<1:20:35,  2.07it/s]

step:13040, train_loss:0.05962500975147942, acc:0.6707678624369997


 77%|███████▋  | 33731/43738 [4:19:24<1:25:03,  1.96it/s]

step:13040, train_loss:0.05962331028989559, acc:0.6707776229581097


 77%|███████▋  | 33732/43738 [4:19:24<1:26:49,  1.92it/s]

step:13040, train_loss:0.05962222311899892, acc:0.6707873829005099


 77%|███████▋  | 33733/43738 [4:19:25<1:26:44,  1.92it/s]

step:13040, train_loss:0.05962601833265827, acc:0.6707674977025465


 77%|███████▋  | 33734/43738 [4:19:25<1:23:43,  1.99it/s]

step:13040, train_loss:0.05962599064560146, acc:0.6707772573664552


 77%|███████▋  | 33735/43738 [4:19:26<1:08:59,  2.42it/s]

step:13040, train_loss:0.059624490758809406, acc:0.6707870164517563


 77%|███████▋  | 33736/43738 [4:19:26<1:02:20,  2.67it/s]

step:13040, train_loss:0.05962732292120879, acc:0.6707671330329619


 77%|███████▋  | 33737/43738 [4:19:26<1:03:37,  2.62it/s]

step:13040, train_loss:0.059628864109966276, acc:0.670747250792898


 77%|███████▋  | 33738/43738 [4:19:27<57:53,  2.88it/s]  

step:13040, train_loss:0.05962868848873889, acc:0.6707273697314601


 77%|███████▋  | 33739/43738 [4:19:27<54:15,  3.07it/s]

step:13040, train_loss:0.059630250261054535, acc:0.6707074898485432


 77%|███████▋  | 33740/43738 [4:19:28<1:14:52,  2.23it/s]

step:13040, train_loss:0.05962993358693179, acc:0.6707172495554238


 77%|███████▋  | 33741/43738 [4:19:28<1:16:30,  2.18it/s]

step:13040, train_loss:0.05962910425761705, acc:0.6707270086837972


 77%|███████▋  | 33742/43738 [4:19:29<1:19:24,  2.10it/s]

step:13040, train_loss:0.05963275348608241, acc:0.6707071305791003


 77%|███████▋  | 33743/43738 [4:19:29<1:07:25,  2.47it/s]

step:13040, train_loss:0.059631026482726904, acc:0.6707168894289186


 78%|███████▊  | 34048/43738 [4:21:48<1:17:45,  2.08it/s]

step:13060, train_loss:0.05960183840919566, acc:0.6710526315789473


 78%|███████▊  | 34049/43738 [4:21:49<1:21:12,  1.99it/s]

step:13060, train_loss:0.05960145525137924, acc:0.6710329231401804


 78%|███████▊  | 34050/43738 [4:21:49<1:08:11,  2.37it/s]

step:13060, train_loss:0.0596010866550942, acc:0.6710132158590308


 78%|███████▊  | 34051/43738 [4:21:49<1:03:32,  2.54it/s]

step:13060, train_loss:0.059604060132680235, acc:0.670993509735397


 78%|███████▊  | 34052/43738 [4:21:50<1:09:45,  2.31it/s]

step:13060, train_loss:0.059602338152022295, acc:0.6710031716198754


 78%|███████▊  | 34053/43738 [4:21:50<1:01:23,  2.63it/s]

step:13060, train_loss:0.05960645961279128, acc:0.6709834669485801


 78%|███████▊  | 34054/43738 [4:21:50<1:13:23,  2.20it/s]

step:13060, train_loss:0.05960568888189068, acc:0.6709931285605215


 78%|███████▊  | 34055/43738 [4:21:51<1:10:46,  2.28it/s]

step:13060, train_loss:0.059606967312228434, acc:0.6709734253413596


 78%|███████▊  | 34056/43738 [4:21:51<1:12:57,  2.21it/s]

step:13060, train_loss:0.05960668270781052, acc:0.6709830866807611


 78%|███████▊  | 34057/43738 [4:21:52<1:14:28,  2.17it/s]

step:13060, train_loss:0.05960901094554808, acc:0.6709633849135274


 78%|███████▊  | 34058/43738 [4:21:52<1:14:48,  2.16it/s]

step:13060, train_loss:0.059613911804366405, acc:0.6709436843032474


 78%|███████▊  | 34059/43738 [4:21:53<1:10:53,  2.28it/s]

step:13060, train_loss:0.059612164700683486, acc:0.6709533456648756


 78%|███████▊  | 34060/43738 [4:21:53<1:06:33,  2.42it/s]

step:13060, train_loss:0.059611666184368514, acc:0.6709630064591897


 78%|███████▊  | 34061/43738 [4:21:54<1:17:35,  2.08it/s]

step:13060, train_loss:0.0596114355559898, acc:0.6709433075951968


 78%|███████▊  | 34062/43738 [4:21:54<1:23:41,  1.93it/s]

step:13060, train_loss:0.05961462659721527, acc:0.6709236098878516


 78%|███████▊  | 34063/43738 [4:21:55<1:10:16,  2.29it/s]

step:13060, train_loss:0.059612879735672676, acc:0.6709332707042832


 79%|███████▊  | 34368/43738 [4:24:13<1:08:22,  2.28it/s]

step:13080, train_loss:0.059632508575355916, acc:0.6709439013035382


 79%|███████▊  | 34369/43738 [4:24:13<1:00:25,  2.58it/s]

step:13080, train_loss:0.05963119553613659, acc:0.6709534755157264


 79%|███████▊  | 34370/43738 [4:24:14<1:19:25,  1.97it/s]

step:13080, train_loss:0.05963172358534225, acc:0.6709630491707885


 79%|███████▊  | 34371/43738 [4:24:14<1:07:57,  2.30it/s]

step:13080, train_loss:0.05963057902077005, acc:0.6709726222687731


 79%|███████▊  | 34372/43738 [4:24:14<58:27,  2.67it/s]  

step:13080, train_loss:0.05962884423964554, acc:0.6709821948097289


 79%|███████▊  | 34373/43738 [4:24:15<1:01:42,  2.53it/s]

step:13080, train_loss:0.05962733602520317, acc:0.6709917667937043


 79%|███████▊  | 34374/43738 [4:24:15<1:15:14,  2.07it/s]

step:13080, train_loss:0.059625961330969285, acc:0.6710013382207483


 79%|███████▊  | 34375/43738 [4:24:15<1:02:39,  2.49it/s]

step:13080, train_loss:0.05962432946466926, acc:0.6710109090909091


 79%|███████▊  | 34376/43738 [4:24:16<1:02:35,  2.49it/s]

step:13080, train_loss:0.05962396643741966, acc:0.6710204794042355


 79%|███████▊  | 34377/43738 [4:24:16<1:15:05,  2.08it/s]

step:13080, train_loss:0.059623742685214386, acc:0.6710300491607761


 79%|███████▊  | 34378/43738 [4:24:17<1:17:13,  2.02it/s]

step:13080, train_loss:0.05962274732221907, acc:0.6710396183605795


 79%|███████▊  | 34379/43738 [4:24:17<1:06:40,  2.34it/s]

step:13080, train_loss:0.05962340556685597, acc:0.6710200994793333


 79%|███████▊  | 34380/43738 [4:24:18<1:11:06,  2.19it/s]

step:13080, train_loss:0.05962210293316988, acc:0.6710296684118674


 79%|███████▊  | 34381/43738 [4:24:18<1:16:10,  2.05it/s]

step:13080, train_loss:0.05962121595733811, acc:0.6710392367877607


 79%|███████▊  | 34382/43738 [4:24:19<1:30:49,  1.72it/s]

step:13080, train_loss:0.05961950637952695, acc:0.6710488046070618


 79%|███████▊  | 34383/43738 [4:24:19<1:14:41,  2.09it/s]

step:13080, train_loss:0.05961777241476724, acc:0.6710583718698194


 79%|███████▉  | 34688/43738 [4:26:45<1:49:12,  1.38it/s]

step:13100, train_loss:0.05955010370369537, acc:0.6715290590405905


 79%|███████▉  | 34689/43738 [4:26:45<1:40:38,  1.50it/s]

step:13100, train_loss:0.0595483933813222, acc:0.6715385280636513


 79%|███████▉  | 34690/43738 [4:26:46<1:34:40,  1.59it/s]

step:13100, train_loss:0.059550784601873304, acc:0.6715191697895647


 79%|███████▉  | 34691/43738 [4:26:47<1:34:35,  1.59it/s]

step:13100, train_loss:0.059555009384324654, acc:0.6714998126315183


 79%|███████▉  | 34692/43738 [4:26:47<1:28:13,  1.71it/s]

step:13100, train_loss:0.0595548860038846, acc:0.6714804565894155


 79%|███████▉  | 34693/43738 [4:26:47<1:20:22,  1.88it/s]

step:13100, train_loss:0.05955384640049272, acc:0.6714899259216557


 79%|███████▉  | 34694/43738 [4:26:48<1:08:56,  2.19it/s]

step:13100, train_loss:0.05955242377168739, acc:0.6714993947080187


 79%|███████▉  | 34695/43738 [4:26:48<1:03:20,  2.38it/s]

step:13100, train_loss:0.059550773507272815, acc:0.6715088629485516


 79%|███████▉  | 34696/43738 [4:26:49<1:20:51,  1.86it/s]

step:13100, train_loss:0.059552951814401414, acc:0.671489508877104


 79%|███████▉  | 34697/43738 [4:26:49<1:09:36,  2.16it/s]

step:13100, train_loss:0.05955131607990027, acc:0.6714989768567887


 79%|███████▉  | 34698/43738 [4:26:50<1:12:34,  2.08it/s]

step:13100, train_loss:0.05955884370730294, acc:0.6714796241858321


 79%|███████▉  | 34699/43738 [4:26:50<1:06:13,  2.27it/s]

step:13100, train_loss:0.059557284606978476, acc:0.6714890919046659


 79%|███████▉  | 34700/43738 [4:26:51<1:10:57,  2.12it/s]

step:13100, train_loss:0.05955724078695215, acc:0.6714985590778098


 79%|███████▉  | 34701/43738 [4:26:51<1:15:59,  1.98it/s]

step:13100, train_loss:0.059558244930187854, acc:0.6714792080919858


 79%|███████▉  | 34702/43738 [4:26:52<1:28:54,  1.69it/s]

step:13100, train_loss:0.05956046709766578, acc:0.6714598582214282


 79%|███████▉  | 34703/43738 [4:26:52<1:15:07,  2.00it/s]

step:13100, train_loss:0.05955879441224883, acc:0.6714693254185518


 80%|████████  | 35008/43738 [4:29:14<1:07:58,  2.14it/s]

step:13120, train_loss:0.05959590593140634, acc:0.6715036563071298


 80%|████████  | 35009/43738 [4:29:14<1:01:21,  2.37it/s]

step:13120, train_loss:0.05959422940901495, acc:0.6715130395041276


 80%|████████  | 35010/43738 [4:29:15<1:07:11,  2.16it/s]

step:13120, train_loss:0.059594274741132346, acc:0.6714938588974578


 80%|████████  | 35011/43738 [4:29:16<1:22:19,  1.77it/s]

step:13120, train_loss:0.05959724942631425, acc:0.6714746793864785


 80%|████████  | 35012/43738 [4:29:16<1:15:50,  1.92it/s]

step:13120, train_loss:0.05960117378419813, acc:0.6714555009710956


 80%|████████  | 35013/43738 [4:29:17<1:04:44,  2.25it/s]

step:13120, train_loss:0.05959995696112658, acc:0.671464884471482


 80%|████████  | 35014/43738 [4:29:17<1:07:53,  2.14it/s]

step:13120, train_loss:0.059600660699720004, acc:0.6714457074313132


 80%|████████  | 35015/43738 [4:29:18<1:14:07,  1.96it/s]

step:13120, train_loss:0.059607177746875045, acc:0.6714265314865058


 80%|████████  | 35016/43738 [4:29:18<1:12:10,  2.01it/s]

step:13120, train_loss:0.05961060399007574, acc:0.6714073566369659


 80%|████████  | 35017/43738 [4:29:18<1:00:47,  2.39it/s]

step:13120, train_loss:0.05960900376021095, acc:0.6714167404403576


 80%|████████  | 35018/43738 [4:29:19<55:47,  2.60it/s]  

step:13120, train_loss:0.059608747661757486, acc:0.6713975669655605


 80%|████████  | 35019/43738 [4:29:19<52:15,  2.78it/s]

step:13120, train_loss:0.05961214681992053, acc:0.6713783945857963


 80%|████████  | 35020/43738 [4:29:20<1:02:02,  2.34it/s]

step:13120, train_loss:0.05961198399156486, acc:0.6713592233009709


 80%|████████  | 35021/43738 [4:29:20<58:25,  2.49it/s]  

step:13120, train_loss:0.059612500236307125, acc:0.6713686074069843


 80%|████████  | 35022/43738 [4:29:20<58:23,  2.49it/s]

step:13120, train_loss:0.05961419305316175, acc:0.6713494374964308


 80%|████████  | 35023/43738 [4:29:21<58:44,  2.47it/s]

step:13120, train_loss:0.05961249238287563, acc:0.6713588213459727


 81%|████████  | 35328/43738 [4:31:40<1:08:34,  2.04it/s]

step:13140, train_loss:0.059616427417498584, acc:0.6711956521739131


 81%|████████  | 35329/43738 [4:31:41<1:07:51,  2.07it/s]

step:13140, train_loss:0.05961474323716581, acc:0.6712049590987574


 81%|████████  | 35330/43738 [4:31:41<58:47,  2.38it/s]  

step:13140, train_loss:0.05961305865195287, acc:0.671214265496745


 81%|████████  | 35331/43738 [4:31:42<58:59,  2.38it/s]

step:13140, train_loss:0.05961148838451326, acc:0.6712235713679205


 81%|████████  | 35332/43738 [4:31:42<1:02:55,  2.23it/s]

step:13140, train_loss:0.05961286867172493, acc:0.6712045737575003


 81%|████████  | 35333/43738 [4:31:43<1:12:50,  1.92it/s]

step:13140, train_loss:0.059611190393537905, acc:0.6712138793762206


 81%|████████  | 35334/43738 [4:31:43<1:00:41,  2.31it/s]

step:13140, train_loss:0.05960950466487769, acc:0.6712231844682176


 81%|████████  | 35335/43738 [4:31:43<55:43,  2.51it/s]  

step:13140, train_loss:0.05961071212275684, acc:0.6712041884816754


 81%|████████  | 35336/43738 [4:31:44<57:06,  2.45it/s]

step:13140, train_loss:0.05961183135144831, acc:0.6711851935702966


 81%|████████  | 35337/43738 [4:31:44<57:00,  2.46it/s]

step:13140, train_loss:0.0596107478234943, acc:0.6711944986840989


 81%|████████  | 35338/43738 [4:31:45<55:00,  2.55it/s]

step:13140, train_loss:0.059611837517367365, acc:0.671175505121965


 81%|████████  | 35339/43738 [4:31:45<56:29,  2.48it/s]

step:13140, train_loss:0.059610922070756654, acc:0.6711848099833045


 81%|████████  | 35340/43738 [4:31:45<56:19,  2.48it/s]

step:13140, train_loss:0.05961013353168113, acc:0.6711941143180532


 81%|████████  | 35341/43738 [4:31:46<48:12,  2.90it/s]

step:13140, train_loss:0.05960844686057914, acc:0.6712034181262556


 81%|████████  | 35342/43738 [4:31:46<48:48,  2.87it/s]

step:13140, train_loss:0.05960739043102931, acc:0.6712127214079565


 81%|████████  | 35343/43738 [4:31:47<1:09:46,  2.01it/s]

step:13140, train_loss:0.05961066395585589, acc:0.6711937300172595


 82%|████████▏ | 35648/43738 [4:34:08<1:09:22,  1.94it/s]

step:13160, train_loss:0.059646034212941755, acc:0.6709773339317774


 82%|████████▏ | 35649/43738 [4:34:08<1:01:28,  2.19it/s]

step:13160, train_loss:0.059644658451667255, acc:0.6709865634379646


 82%|████████▏ | 35650/43738 [4:34:09<1:03:32,  2.12it/s]

step:13160, train_loss:0.05964435034444416, acc:0.6709957924263674


 82%|████████▏ | 35651/43738 [4:34:09<1:08:00,  1.98it/s]

step:13160, train_loss:0.05964283043411289, acc:0.6710050208970295


 82%|████████▏ | 35652/43738 [4:34:10<1:09:02,  1.95it/s]

step:13160, train_loss:0.05964122689074229, acc:0.6710142488499944


 82%|████████▏ | 35653/43738 [4:34:10<1:06:14,  2.03it/s]

step:13160, train_loss:0.05964637337015357, acc:0.6709954281547135


 82%|████████▏ | 35654/43738 [4:34:11<56:52,  2.37it/s]  

step:13160, train_loss:0.05964470085069851, acc:0.6710046558590902


 82%|████████▏ | 35655/43738 [4:34:11<57:11,  2.36it/s]

step:13160, train_loss:0.0596440966629978, acc:0.6710138830458561


 82%|████████▏ | 35656/43738 [4:34:11<50:59,  2.64it/s]

step:13160, train_loss:0.059642425148922484, acc:0.671023109715055


 82%|████████▏ | 35657/43738 [4:34:12<49:00,  2.75it/s]

step:13160, train_loss:0.059649207330134756, acc:0.6710042908825756


 82%|████████▏ | 35658/43738 [4:34:12<55:10,  2.44it/s]

step:13160, train_loss:0.05964763060836353, acc:0.67101351730327


 82%|████████▏ | 35659/43738 [4:34:13<51:53,  2.60it/s]

step:13160, train_loss:0.0596460621144804, acc:0.6710227432064836


 82%|████████▏ | 35660/43738 [4:34:13<1:03:04,  2.13it/s]

step:13160, train_loss:0.059645810782339985, acc:0.6710319685922602


 82%|████████▏ | 35661/43738 [4:34:14<1:02:06,  2.17it/s]

step:13160, train_loss:0.059644225607917135, acc:0.6710411934606433


 82%|████████▏ | 35662/43738 [4:34:14<1:08:58,  1.95it/s]

step:13160, train_loss:0.05964479851948911, acc:0.671022376759576


 82%|████████▏ | 35663/43738 [4:34:15<1:14:42,  1.80it/s]

step:13160, train_loss:0.05965057597304163, acc:0.6710035611137594


 82%|████████▏ | 35968/43738 [4:36:34<56:21,  2.30it/s]  

step:13180, train_loss:0.05971463092938875, acc:0.6707629003558719


 82%|████████▏ | 35969/43738 [4:36:35<53:50,  2.40it/s]

step:13180, train_loss:0.059715441930111565, acc:0.6707442519947733


 82%|████████▏ | 35970/43738 [4:36:36<1:06:24,  1.95it/s]

step:13180, train_loss:0.059715472721921105, acc:0.6707534056157909


 82%|████████▏ | 35971/43738 [4:36:36<1:04:07,  2.02it/s]

step:13180, train_loss:0.05971420135989467, acc:0.6707625587278642


 82%|████████▏ | 35972/43738 [4:36:36<54:39,  2.37it/s]  

step:13180, train_loss:0.05971363160703799, acc:0.6707717113310353


 82%|████████▏ | 35973/43738 [4:36:37<1:02:07,  2.08it/s]

step:13180, train_loss:0.05971603162063404, acc:0.670753064798599


 82%|████████▏ | 35974/43738 [4:36:37<52:56,  2.44it/s]  

step:13180, train_loss:0.059714372114487956, acc:0.6707622171568355


 82%|████████▏ | 35975/43738 [4:36:37<52:25,  2.47it/s]

step:13180, train_loss:0.05971663458640263, acc:0.6707435719249479


 82%|████████▏ | 35976/43738 [4:36:38<46:59,  2.75it/s]

step:13180, train_loss:0.059716126026906846, acc:0.6707527240382477


 82%|████████▏ | 35977/43738 [4:36:38<43:17,  2.99it/s]

step:13180, train_loss:0.0597144680173246, acc:0.6707618756427718


 82%|████████▏ | 35978/43738 [4:36:39<51:45,  2.50it/s]

step:13180, train_loss:0.05971295023887438, acc:0.6707710267385625


 82%|████████▏ | 35979/43738 [4:36:39<51:25,  2.51it/s]

step:13180, train_loss:0.05971185607051065, acc:0.6707801773256622


 82%|████████▏ | 35980/43738 [4:36:39<45:37,  2.83it/s]

step:13180, train_loss:0.059711433628460056, acc:0.6707615341856586


 82%|████████▏ | 35981/43738 [4:36:40<52:47,  2.45it/s]

step:13180, train_loss:0.05970980677149036, acc:0.6707706845279453


 82%|████████▏ | 35982/43738 [4:36:40<45:57,  2.81it/s]

step:13180, train_loss:0.05970817904044223, acc:0.6707798343616252


 82%|████████▏ | 35983/43738 [4:36:40<49:00,  2.64it/s]

step:13180, train_loss:0.059708579591064945, acc:0.6707611927854821


 83%|████████▎ | 36288/43738 [4:39:02<57:58,  2.14it/s]  

step:13200, train_loss:0.05967069627141111, acc:0.6710482804232805


 83%|████████▎ | 36289/43738 [4:39:02<50:38,  2.45it/s]

step:13200, train_loss:0.05966967884626276, acc:0.6710573452010251


 83%|████████▎ | 36290/43738 [4:39:02<52:56,  2.34it/s]

step:13200, train_loss:0.059668392906082925, acc:0.6710664094791954


 83%|████████▎ | 36291/43738 [4:39:03<55:36,  2.23it/s]

step:13200, train_loss:0.059668164915539085, acc:0.6710754732578326


 83%|████████▎ | 36292/43738 [4:39:04<1:05:20,  1.90it/s]

step:13200, train_loss:0.059668722017546486, acc:0.6710569822550424


 83%|████████▎ | 36293/43738 [4:39:04<57:50,  2.15it/s]  

step:13200, train_loss:0.05966714842860589, acc:0.6710660457939548


 83%|████████▎ | 36294/43738 [4:39:05<1:09:37,  1.78it/s]

step:13200, train_loss:0.059667023638648294, acc:0.6710475560698738


 83%|████████▎ | 36295/43738 [4:39:05<1:07:26,  1.84it/s]

step:13200, train_loss:0.05966553743834852, acc:0.6710566193690591


 83%|████████▎ | 36296/43738 [4:39:06<1:03:33,  1.95it/s]

step:13200, train_loss:0.05966543434238158, acc:0.671065682168834


 83%|████████▎ | 36297/43738 [4:39:06<1:00:17,  2.06it/s]

step:13200, train_loss:0.05966451297155969, acc:0.6710747444692399


 83%|████████▎ | 36298/43738 [4:39:06<54:28,  2.28it/s]  

step:13200, train_loss:0.05966286973298042, acc:0.6710838062703179


 83%|████████▎ | 36299/43738 [4:39:07<54:09,  2.29it/s]

step:13200, train_loss:0.059664228776266405, acc:0.6710653186038182


 83%|████████▎ | 36300/43738 [4:39:07<48:09,  2.57it/s]

step:13200, train_loss:0.059662689219237584, acc:0.6710743801652893


 83%|████████▎ | 36301/43738 [4:39:07<41:53,  2.96it/s]

step:13200, train_loss:0.059661045716803196, acc:0.6710834412275144


 83%|████████▎ | 36302/43738 [4:39:08<42:16,  2.93it/s]

step:13200, train_loss:0.059659781672503125, acc:0.6710925017905349


 83%|████████▎ | 36303/43738 [4:39:08<43:25,  2.85it/s]

step:13200, train_loss:0.059659922511862924, acc:0.6711015618543922


 84%|████████▎ | 36608/43738 [4:41:25<1:04:38,  1.84it/s]

step:13220, train_loss:0.059692226848861614, acc:0.6708916083916084


 84%|████████▎ | 36609/43738 [4:41:26<1:04:16,  1.85it/s]

step:13220, train_loss:0.05969234834588036, acc:0.6709005982135541


 84%|████████▎ | 36610/43738 [4:41:26<56:11,  2.11it/s]  

step:13220, train_loss:0.059690728118858824, acc:0.6709095875443868


 84%|████████▎ | 36611/43738 [4:41:26<57:39,  2.06it/s]

step:13220, train_loss:0.05969346956296864, acc:0.6708912621889596


 84%|████████▎ | 36612/43738 [4:41:27<51:51,  2.29it/s]

step:13220, train_loss:0.05969184822410672, acc:0.6709002512837321


 84%|████████▎ | 36613/43738 [4:41:27<53:26,  2.22it/s]

step:13220, train_loss:0.059691031982457905, acc:0.6709092398874716


 84%|████████▎ | 36614/43738 [4:41:28<1:04:22,  1.84it/s]

step:13220, train_loss:0.05968957936328807, acc:0.6709182280002185


 84%|████████▎ | 36615/43738 [4:41:28<1:02:07,  1.91it/s]

step:13220, train_loss:0.05968796135729093, acc:0.6709272156220129


 84%|████████▎ | 36616/43738 [4:41:29<1:00:01,  1.98it/s]

step:13220, train_loss:0.059686389600062735, acc:0.6709362027528949


 84%|████████▎ | 36617/43738 [4:41:29<55:17,  2.15it/s]  

step:13220, train_loss:0.05968485892854622, acc:0.6709451893929049


 84%|████████▎ | 36618/43738 [4:41:30<46:46,  2.54it/s]

step:13220, train_loss:0.05968323502436327, acc:0.6709541755420831


 84%|████████▎ | 36619/43738 [4:41:30<55:30,  2.14it/s]

step:13220, train_loss:0.059683728430623764, acc:0.6709358529725006


 84%|████████▎ | 36620/43738 [4:41:30<47:24,  2.50it/s]

step:13220, train_loss:0.059682135568021676, acc:0.6709448388858548


 84%|████████▎ | 36621/43738 [4:41:31<55:29,  2.14it/s]

step:13220, train_loss:0.05968126495612532, acc:0.6709538243084568


 84%|████████▎ | 36622/43738 [4:41:32<56:57,  2.08it/s]

step:13220, train_loss:0.059680671210282546, acc:0.6709628092403473


 84%|████████▎ | 36623/43738 [4:41:32<54:12,  2.19it/s]

step:13220, train_loss:0.059679088563966774, acc:0.6709717936815662


 84%|████████▍ | 36928/43738 [4:43:54<51:09,  2.22it/s]  

step:13240, train_loss:0.059641327525457595, acc:0.6713604852686309


 84%|████████▍ | 36929/43738 [4:43:54<44:29,  2.55it/s]

step:13240, train_loss:0.05963977183832253, acc:0.6713693844945706


 84%|████████▍ | 36930/43738 [4:43:55<40:52,  2.78it/s]

step:13240, train_loss:0.05963815697073989, acc:0.6713782832385594


 84%|████████▍ | 36931/43738 [4:43:55<36:19,  3.12it/s]

step:13240, train_loss:0.05963745552595714, acc:0.6713871815006364


 84%|████████▍ | 36932/43738 [4:43:56<45:40,  2.48it/s]

step:13240, train_loss:0.05964141466188564, acc:0.6713690024910647


 84%|████████▍ | 36933/43738 [4:43:56<46:03,  2.46it/s]

step:13240, train_loss:0.05964317545092872, acc:0.6713508244659248


 84%|████████▍ | 36934/43738 [4:43:56<40:55,  2.77it/s]

step:13240, train_loss:0.05964158806805865, acc:0.6713597227486868


 84%|████████▍ | 36935/43738 [4:43:57<47:33,  2.38it/s]

step:13240, train_loss:0.05964636576553558, acc:0.6713415459591173


 84%|████████▍ | 36936/43738 [4:43:57<42:05,  2.69it/s]

step:13240, train_loss:0.05964539239230516, acc:0.6713504440112628


 84%|████████▍ | 36937/43738 [4:43:57<39:16,  2.89it/s]

step:13240, train_loss:0.059643801303329864, acc:0.6713593415816119


 84%|████████▍ | 36938/43738 [4:43:58<48:17,  2.35it/s]

step:13240, train_loss:0.05964259753544431, acc:0.6713682386702041


 84%|████████▍ | 36939/43738 [4:43:59<1:00:54,  1.86it/s]

step:13240, train_loss:0.05964767916301898, acc:0.6713500636183979


 84%|████████▍ | 36940/43738 [4:43:59<59:53,  1.89it/s]  

step:13240, train_loss:0.05964672571581464, acc:0.6713589604764483


 84%|████████▍ | 36941/43738 [4:44:00<56:35,  2.00it/s]

step:13240, train_loss:0.0596457271815026, acc:0.6713678568528194


 84%|████████▍ | 36942/43738 [4:44:00<52:16,  2.17it/s]

step:13240, train_loss:0.05964594093543961, acc:0.6713496832873153


 84%|████████▍ | 36943/43738 [4:44:00<49:14,  2.30it/s]

step:13240, train_loss:0.05965039757643227, acc:0.6713315107056818


 85%|████████▌ | 37248/43738 [4:46:20<1:00:45,  1.78it/s]

step:13260, train_loss:0.05960976765192873, acc:0.6714991408934707


 85%|████████▌ | 37249/43738 [4:46:21<1:00:23,  1.79it/s]

step:13260, train_loss:0.05960820049342145, acc:0.6715079599452334


 85%|████████▌ | 37250/43738 [4:46:21<1:00:24,  1.79it/s]

step:13260, train_loss:0.05960943678971349, acc:0.6714899328859061


 85%|████████▌ | 37251/43738 [4:46:22<53:12,  2.03it/s]  

step:13260, train_loss:0.05961847695327619, acc:0.6714719067944485


 85%|████████▌ | 37252/43738 [4:46:23<1:02:02,  1.74it/s]

step:13260, train_loss:0.0596202381958628, acc:0.6714538816707828


 85%|████████▌ | 37253/43738 [4:46:23<1:06:12,  1.63it/s]

step:13260, train_loss:0.05962573343816919, acc:0.671435857514831


 85%|████████▌ | 37254/43738 [4:46:24<55:40,  1.94it/s]  

step:13260, train_loss:0.0596257668330913, acc:0.6714178343265153


 85%|████████▌ | 37255/43738 [4:46:24<46:32,  2.32it/s]

step:13260, train_loss:0.059624166378592094, acc:0.6714266541403838


 85%|████████▌ | 37256/43738 [4:46:24<52:46,  2.05it/s]

step:13260, train_loss:0.05962643375566494, acc:0.6714086321666308


 85%|████████▌ | 37257/43738 [4:46:25<48:18,  2.24it/s]

step:13260, train_loss:0.05962495767890805, acc:0.6714174517540328


 85%|████████▌ | 37258/43738 [4:46:25<49:34,  2.18it/s]

step:13260, train_loss:0.0596233629086373, acc:0.6714262708680016


 85%|████████▌ | 37259/43738 [4:46:26<50:15,  2.15it/s]

step:13260, train_loss:0.05962498511407715, acc:0.6714082503556188


 85%|████████▌ | 37260/43738 [4:46:26<48:09,  2.24it/s]

step:13260, train_loss:0.05962420258854854, acc:0.6714170692431563


 85%|████████▌ | 37261/43738 [4:46:26<44:42,  2.41it/s]

step:13260, train_loss:0.0596244902050333, acc:0.6713990499449827


 85%|████████▌ | 37262/43738 [4:46:27<47:14,  2.28it/s]

step:13260, train_loss:0.059624331197140716, acc:0.6714078686060866


 85%|████████▌ | 37263/43738 [4:46:27<47:39,  2.26it/s]

step:13260, train_loss:0.05962497287956595, acc:0.6713898505219655


 86%|████████▌ | 37568/43738 [4:48:41<48:17,  2.13it/s]  

step:13280, train_loss:0.0596140845933774, acc:0.671103066439523


 86%|████████▌ | 37569/43738 [4:48:42<52:00,  1.98it/s]

step:13280, train_loss:0.05961407427195815, acc:0.671085203226064


 86%|████████▌ | 37570/43738 [4:48:42<44:20,  2.32it/s]

step:13280, train_loss:0.05961400207517663, acc:0.671093957945169


 86%|████████▌ | 37571/43738 [4:48:43<50:35,  2.03it/s]

step:13280, train_loss:0.05961252312612025, acc:0.671102712198238


 86%|████████▌ | 37572/43738 [4:48:43<42:12,  2.43it/s]

step:13280, train_loss:0.05961093846833311, acc:0.6711114659853082


 86%|████████▌ | 37573/43738 [4:48:43<37:23,  2.75it/s]

step:13280, train_loss:0.0596109906523945, acc:0.671093604450004


 86%|████████▌ | 37574/43738 [4:48:44<45:09,  2.27it/s]

step:13280, train_loss:0.05961261041390485, acc:0.6710757438654389


 86%|████████▌ | 37575/43738 [4:48:44<41:08,  2.50it/s]

step:13280, train_loss:0.059613057734047854, acc:0.6710578842315369


 86%|████████▌ | 37576/43738 [4:48:45<37:48,  2.72it/s]

step:13280, train_loss:0.05961188594165107, acc:0.671066638279753


 86%|████████▌ | 37577/43738 [4:48:45<40:25,  2.54it/s]

step:13280, train_loss:0.059611471630543096, acc:0.6710753918620432


 86%|████████▌ | 37578/43738 [4:48:45<43:32,  2.36it/s]

step:13280, train_loss:0.05960989994064125, acc:0.6710841449784448


 86%|████████▌ | 37579/43738 [4:48:46<42:01,  2.44it/s]

step:13280, train_loss:0.059608339461385364, acc:0.6710928976289949


 86%|████████▌ | 37580/43738 [4:48:46<40:57,  2.51it/s]

step:13280, train_loss:0.05960675346079569, acc:0.6711016498137307


 86%|████████▌ | 37581/43738 [4:48:47<37:26,  2.74it/s]

step:13280, train_loss:0.059605323640231446, acc:0.6711104015326894


 86%|████████▌ | 37582/43738 [4:48:47<36:44,  2.79it/s]

step:13280, train_loss:0.05960969707610403, acc:0.6710925443031238


 86%|████████▌ | 37583/43738 [4:48:47<40:53,  2.51it/s]

step:13280, train_loss:0.0596081291379421, acc:0.6711012957986323


 87%|████████▋ | 37888/43738 [4:51:03<1:00:22,  1.62it/s]

step:13300, train_loss:0.05957994657244307, acc:0.6713207347972973


 87%|████████▋ | 37889/43738 [4:51:03<1:04:36,  1.51it/s]

step:13300, train_loss:0.059578904043122015, acc:0.6713294095911743


 87%|████████▋ | 37890/43738 [4:51:04<1:02:35,  1.56it/s]

step:13300, train_loss:0.05958478552296281, acc:0.6713116917392452


 87%|████████▋ | 37891/43738 [4:51:05<59:36,  1.63it/s]  

step:13300, train_loss:0.05958322133519364, acc:0.6713203663139004


 87%|████████▋ | 37892/43738 [4:51:05<56:58,  1.71it/s]

step:13300, train_loss:0.05958606340596058, acc:0.671302649635807


 87%|████████▋ | 37893/43738 [4:51:06<1:01:04,  1.60it/s]

step:13300, train_loss:0.059584678962119914, acc:0.6713113239912385


 87%|████████▋ | 37894/43738 [4:51:06<56:38,  1.72it/s]  

step:13300, train_loss:0.05958318738312877, acc:0.6713199978888479


 87%|████████▋ | 37895/43738 [4:51:07<47:01,  2.07it/s]

step:13300, train_loss:0.05958165682899793, acc:0.6713286713286714


 87%|████████▋ | 37896/43738 [4:51:07<42:34,  2.29it/s]

step:13300, train_loss:0.059580437014046375, acc:0.6713373443107452


 87%|████████▋ | 37897/43738 [4:51:07<42:42,  2.28it/s]

step:13300, train_loss:0.05957947907292453, acc:0.6713460168351056


 87%|████████▋ | 37898/43738 [4:51:08<40:16,  2.42it/s]

step:13300, train_loss:0.05957791146464955, acc:0.6713546889017891


 87%|████████▋ | 37899/43738 [4:51:08<36:30,  2.67it/s]

step:13300, train_loss:0.05957633978671715, acc:0.6713633605108315


 87%|████████▋ | 37900/43738 [4:51:09<47:12,  2.06it/s]

step:13300, train_loss:0.05957919456239017, acc:0.6713456464379948


 87%|████████▋ | 37901/43738 [4:51:09<45:08,  2.16it/s]

step:13300, train_loss:0.05957762948791193, acc:0.6713543178280257


 87%|████████▋ | 37902/43738 [4:51:10<43:00,  2.26it/s]

step:13300, train_loss:0.05958050156307279, acc:0.6713366049284998


 87%|████████▋ | 37903/43738 [4:51:10<42:32,  2.29it/s]

step:13300, train_loss:0.05957894469905434, acc:0.6713452760995172


 87%|████████▋ | 38208/43738 [4:53:28<43:08,  2.14it/s]  

step:13320, train_loss:0.05955433850398537, acc:0.671534757118928


 87%|████████▋ | 38209/43738 [4:53:28<37:46,  2.44it/s]

step:13320, train_loss:0.05955303253297424, acc:0.6715433536601324


 87%|████████▋ | 38210/43738 [4:53:28<33:12,  2.77it/s]

step:13320, train_loss:0.05955149019284334, acc:0.671551949751374


 87%|████████▋ | 38211/43738 [4:53:28<34:59,  2.63it/s]

step:13320, train_loss:0.05955142515206639, acc:0.671560545392688


 87%|████████▋ | 38212/43738 [4:53:29<33:34,  2.74it/s]

step:13320, train_loss:0.05955100566831572, acc:0.6715691405841097


 87%|████████▋ | 38213/43738 [4:53:29<31:02,  2.97it/s]

step:13320, train_loss:0.05954980069445746, acc:0.6715777353256746


 87%|████████▋ | 38214/43738 [4:53:30<38:07,  2.42it/s]

step:13320, train_loss:0.05954867013583339, acc:0.6715863296174177


 87%|████████▋ | 38215/43738 [4:53:30<36:44,  2.51it/s]

step:13320, train_loss:0.05954886996732409, acc:0.6715949234593745


 87%|████████▋ | 38216/43738 [4:53:31<46:01,  2.00it/s]

step:13320, train_loss:0.05954739212859178, acc:0.6716035168515805


 87%|████████▋ | 38217/43738 [4:53:31<45:52,  2.01it/s]

step:13320, train_loss:0.05954924531499532, acc:0.6715859434283172


 87%|████████▋ | 38218/43738 [4:53:32<42:41,  2.16it/s]

step:13320, train_loss:0.05954786465281173, acc:0.6715945366057878


 87%|████████▋ | 38219/43738 [4:53:32<36:16,  2.54it/s]

step:13320, train_loss:0.05954944301810372, acc:0.6715769643371098


 87%|████████▋ | 38220/43738 [4:53:32<38:46,  2.37it/s]

step:13320, train_loss:0.05955084636538029, acc:0.6715593929879644


 87%|████████▋ | 38221/43738 [4:53:33<47:47,  1.92it/s]

step:13320, train_loss:0.059554182808062635, acc:0.6715418225582794


 87%|████████▋ | 38222/43738 [4:53:34<44:28,  2.07it/s]

step:13320, train_loss:0.059552637075721754, acc:0.6715504159907907


 87%|████████▋ | 38223/43738 [4:53:34<49:24,  1.86it/s]

step:13320, train_loss:0.0595512075935152, acc:0.6715590089736546


 88%|████████▊ | 38528/43738 [4:55:53<55:20,  1.57it/s]

step:13340, train_loss:0.059590293881296705, acc:0.6714078073089701


 88%|████████▊ | 38529/43738 [4:55:53<50:44,  1.71it/s]

step:13340, train_loss:0.05959044814928066, acc:0.6713903812712503


 88%|████████▊ | 38530/43738 [4:55:54<43:48,  1.98it/s]

step:13340, train_loss:0.05958990192396211, acc:0.6713989099403063


 88%|████████▊ | 38531/43738 [4:55:54<46:33,  1.86it/s]

step:13340, train_loss:0.05958853110815965, acc:0.671407438166671


 88%|████████▊ | 38532/43738 [4:55:55<48:03,  1.81it/s]

step:13340, train_loss:0.05958900024273775, acc:0.6713900134952766


 88%|████████▊ | 38533/43738 [4:55:55<49:46,  1.74it/s]

step:13340, train_loss:0.05958799941043495, acc:0.6713985415098747


 88%|████████▊ | 38534/43738 [4:55:56<53:54,  1.61it/s]

step:13340, train_loss:0.05958871069360618, acc:0.6713811179737375


 88%|████████▊ | 38535/43738 [4:55:57<59:17,  1.46it/s]

step:13340, train_loss:0.05958888841970668, acc:0.6713896457765668


 88%|████████▊ | 38536/43738 [4:55:58<56:16,  1.54it/s]

step:13340, train_loss:0.0595873424193572, acc:0.6713981731368072


 88%|████████▊ | 38537/43738 [4:55:58<54:12,  1.60it/s]

step:13340, train_loss:0.05958737596587482, acc:0.6714067000544931


 88%|████████▊ | 38538/43738 [4:55:59<49:08,  1.76it/s]

step:13340, train_loss:0.059588100290328676, acc:0.6713892781151072


 88%|████████▊ | 38539/43738 [4:55:59<44:10,  1.96it/s]

step:13340, train_loss:0.0595906170737121, acc:0.6713718570798412


 88%|████████▊ | 38540/43738 [4:55:59<42:45,  2.03it/s]

step:13340, train_loss:0.05959260742126494, acc:0.6713544369486248


 88%|████████▊ | 38541/43738 [4:56:00<38:11,  2.27it/s]

step:13340, train_loss:0.05959684720074928, acc:0.6713370177213877


 88%|████████▊ | 38542/43738 [4:56:00<37:52,  2.29it/s]

step:13340, train_loss:0.059597487448956786, acc:0.6713455451196098


 88%|████████▊ | 38543/43738 [4:56:00<33:02,  2.62it/s]

step:13340, train_loss:0.0595968752111562, acc:0.6713540720753444


 89%|████████▉ | 38848/43738 [4:58:21<51:26,  1.58it/s]

step:13360, train_loss:0.059573715774584875, acc:0.6717462932454695


 89%|████████▉ | 38849/43738 [4:58:22<47:29,  1.72it/s]

step:13360, train_loss:0.059573388123411654, acc:0.6717547427218203


 89%|████████▉ | 38850/43738 [4:58:22<39:49,  2.05it/s]

step:13360, train_loss:0.05957186251207365, acc:0.6717631917631918


 89%|████████▉ | 38851/43738 [4:58:22<39:21,  2.07it/s]

step:13360, train_loss:0.059570732867811976, acc:0.6717716403696172


 89%|████████▉ | 38852/43738 [4:58:23<37:14,  2.19it/s]

step:13360, train_loss:0.05956927513820341, acc:0.6717800885411305


 89%|████████▉ | 38853/43738 [4:58:23<39:34,  2.06it/s]

step:13360, train_loss:0.059570076104522515, acc:0.6717885362777649


 89%|████████▉ | 38854/43738 [4:58:24<37:32,  2.17it/s]

step:13360, train_loss:0.059568546013629445, acc:0.6717969835795542


 89%|████████▉ | 38855/43738 [4:58:24<34:13,  2.38it/s]

step:13360, train_loss:0.059570945060139664, acc:0.6717796937331103


 89%|████████▉ | 38856/43738 [4:58:24<29:34,  2.75it/s]

step:13360, train_loss:0.059570015849598174, acc:0.6717881408276714


 89%|████████▉ | 38857/43738 [4:58:25<31:54,  2.55it/s]

step:13360, train_loss:0.05956855078129184, acc:0.671796587487454


 89%|████████▉ | 38858/43738 [4:58:25<30:10,  2.69it/s]

step:13360, train_loss:0.059567019486699305, acc:0.6718050337124917


 89%|████████▉ | 38859/43738 [4:58:26<38:32,  2.11it/s]

step:13360, train_loss:0.059573017302819005, acc:0.6717877454386372


 89%|████████▉ | 38860/43738 [4:58:26<37:50,  2.15it/s]

step:13360, train_loss:0.05957213648907763, acc:0.6717961914565106


 89%|████████▉ | 38861/43738 [4:58:27<36:04,  2.25it/s]

step:13360, train_loss:0.05957065842494739, acc:0.6718046370397056


 89%|████████▉ | 38862/43738 [4:58:27<34:46,  2.34it/s]

step:13360, train_loss:0.059569177328353935, acc:0.6718130821882559


 89%|████████▉ | 38863/43738 [4:58:27<30:48,  2.64it/s]

step:13360, train_loss:0.05956764889481285, acc:0.6718215269021949


 90%|████████▉ | 39168/43738 [5:00:48<33:23,  2.28it/s]

step:13380, train_loss:0.05958171767758442, acc:0.6714920343137255


 90%|████████▉ | 39169/43738 [5:00:48<30:55,  2.46it/s]

step:13380, train_loss:0.05958031824577678, acc:0.6715004212514999


 90%|████████▉ | 39170/43738 [5:00:49<37:27,  2.03it/s]

step:13380, train_loss:0.05957884212485148, acc:0.6715088077610416


 90%|████████▉ | 39171/43738 [5:00:49<40:04,  1.90it/s]

step:13380, train_loss:0.05957732211571412, acc:0.6715171938423834


 90%|████████▉ | 39172/43738 [5:00:50<36:48,  2.07it/s]

step:13380, train_loss:0.05957604071840541, acc:0.671525579495558


 90%|████████▉ | 39173/43738 [5:00:50<31:10,  2.44it/s]

step:13380, train_loss:0.0595745528765022, acc:0.6715339647205983


 90%|████████▉ | 39174/43738 [5:00:51<39:20,  1.93it/s]

step:13380, train_loss:0.05957992302442031, acc:0.6715168223821922


 90%|████████▉ | 39175/43738 [5:00:51<35:15,  2.16it/s]

step:13380, train_loss:0.05957843709585953, acc:0.6715252074026803


 90%|████████▉ | 39176/43738 [5:00:51<32:51,  2.31it/s]

step:13380, train_loss:0.05957696278950383, acc:0.671533591995099


 90%|████████▉ | 39177/43738 [5:00:52<29:05,  2.61it/s]

step:13380, train_loss:0.05957544209502661, acc:0.6715419761594813


 90%|████████▉ | 39178/43738 [5:00:52<26:06,  2.91it/s]

step:13380, train_loss:0.059575355697638624, acc:0.6715503598958599


 90%|████████▉ | 39179/43738 [5:00:52<28:29,  2.67it/s]

step:13380, train_loss:0.05957631148607101, acc:0.6715332193266801


 90%|████████▉ | 39180/43738 [5:00:53<27:30,  2.76it/s]

step:13380, train_loss:0.05957539085955403, acc:0.6715416028586013


 90%|████████▉ | 39181/43738 [5:00:53<27:51,  2.73it/s]

step:13380, train_loss:0.05957566495679298, acc:0.6715244633878665


 90%|████████▉ | 39182/43738 [5:00:53<28:11,  2.69it/s]

step:13380, train_loss:0.05957460293370897, acc:0.6715328467153284


 90%|████████▉ | 39183/43738 [5:00:54<25:59,  2.92it/s]

step:13380, train_loss:0.05957331209455079, acc:0.671541229614884


 90%|█████████ | 39488/43738 [5:03:03<36:12,  1.96it/s]

step:13400, train_loss:0.059522525160048835, acc:0.6716470826580226


 90%|█████████ | 39490/43738 [5:03:03<28:33,  2.48it/s]

step:13400, train_loss:0.05952353380513872, acc:0.6716300741978779
step:13400, train_loss:0.05952202674918952, acc:0.6716383894656875


 90%|█████████ | 39491/43738 [5:03:03<25:14,  2.80it/s]

step:13400, train_loss:0.05952103411715363, acc:0.6716467043123749


 90%|█████████ | 39493/43738 [5:03:04<23:47,  2.97it/s]

step:13400, train_loss:0.05951974332878589, acc:0.6716550187379723
step:13400, train_loss:0.05952226305942982, acc:0.6716380117995594


 90%|█████████ | 39494/43738 [5:03:04<21:05,  3.35it/s]

step:13400, train_loss:0.059522137903878704, acc:0.6716210057223883


 90%|█████████ | 39495/43738 [5:03:05<25:09,  2.81it/s]

step:13400, train_loss:0.05952065791162311, acc:0.6716293201671097


 90%|█████████ | 39496/43738 [5:03:05<25:34,  2.76it/s]

step:13400, train_loss:0.05951994117354662, acc:0.6716376341908041


 90%|█████████ | 39497/43738 [5:03:06<29:37,  2.39it/s]

step:13400, train_loss:0.05952126931319591, acc:0.6716206294148923


 90%|█████████ | 39498/43738 [5:03:06<25:15,  2.80it/s]

step:13400, train_loss:0.05952025762269691, acc:0.6716289432376323


 90%|█████████ | 39499/43738 [5:03:06<26:30,  2.67it/s]

step:13400, train_loss:0.05951875351992787, acc:0.6716372566394085


 90%|█████████ | 39500/43738 [5:03:07<26:30,  2.66it/s]

step:13400, train_loss:0.059520436406546695, acc:0.671620253164557


 90%|█████████ | 39501/43738 [5:03:07<25:15,  2.79it/s]

step:13400, train_loss:0.0595191375678783, acc:0.6716285663654085


 90%|█████████ | 39502/43738 [5:03:08<30:19,  2.33it/s]

step:13400, train_loss:0.05952092190414418, acc:0.6716115639714445


 90%|█████████ | 39503/43738 [5:03:08<28:07,  2.51it/s]

step:13400, train_loss:0.05952144084371112, acc:0.6715945624382959


 91%|█████████ | 39808/43738 [5:05:24<32:37,  2.01it/s]

step:13420, train_loss:0.05958650329965816, acc:0.6712469855305466


 91%|█████████ | 39809/43738 [5:05:25<38:41,  1.69it/s]

step:13420, train_loss:0.05958575725245012, acc:0.671255243789093


 91%|█████████ | 39810/43738 [5:05:26<32:52,  1.99it/s]

step:13420, train_loss:0.0595842889286783, acc:0.6712635016327556


 91%|█████████ | 39811/43738 [5:05:26<28:20,  2.31it/s]

step:13420, train_loss:0.059582998644447195, acc:0.6712717590615659


 91%|█████████ | 39812/43738 [5:05:26<29:04,  2.25it/s]

step:13420, train_loss:0.059581583500739124, acc:0.6712800160755551


 91%|█████████ | 39813/43738 [5:05:27<25:50,  2.53it/s]

step:13420, train_loss:0.05958706889825336, acc:0.6712631552507975


 91%|█████████ | 39814/43738 [5:05:27<29:00,  2.25it/s]

step:13420, train_loss:0.05958578543403677, acc:0.6712714120661074


 91%|█████████ | 39815/43738 [5:05:27<26:03,  2.51it/s]

step:13420, train_loss:0.059585898142291566, acc:0.6712796684666583


 91%|█████████ | 39816/43738 [5:05:28<25:10,  2.60it/s]

step:13420, train_loss:0.05958566478472751, acc:0.6712879244524814


 91%|█████████ | 39817/43738 [5:05:28<28:11,  2.32it/s]

step:13420, train_loss:0.05958911619399814, acc:0.6712710651229374


 91%|█████████ | 39818/43738 [5:05:29<25:15,  2.59it/s]

step:13420, train_loss:0.0595876212267516, acc:0.6712793209101412


 91%|█████████ | 39819/43738 [5:05:29<23:30,  2.78it/s]

step:13420, train_loss:0.059586480789804455, acc:0.6712875762826791


 91%|█████████ | 39820/43738 [5:05:29<23:29,  2.78it/s]

step:13420, train_loss:0.05958517342095881, acc:0.6712958312405827


 91%|█████████ | 39821/43738 [5:05:30<24:20,  2.68it/s]

step:13420, train_loss:0.05958561337986684, acc:0.6713040857838829


 91%|█████████ | 39822/43738 [5:05:30<25:59,  2.51it/s]

step:13420, train_loss:0.05958411976322638, acc:0.6713123399126111


 91%|█████████ | 39823/43738 [5:05:31<28:01,  2.33it/s]

step:13420, train_loss:0.05959207305638795, acc:0.6712954825101072


 92%|█████████▏| 40128/43738 [5:07:50<30:55,  1.95it/s]

step:13440, train_loss:0.05962392318372548, acc:0.6711772328548644


 92%|█████████▏| 40129/43738 [5:07:51<30:29,  1.97it/s]

step:13440, train_loss:0.05962278412968348, acc:0.6711854269979317


 92%|█████████▏| 40130/43738 [5:07:51<32:10,  1.87it/s]

step:13440, train_loss:0.05962169560995397, acc:0.671193620732619


 92%|█████████▏| 40131/43738 [5:07:52<26:48,  2.24it/s]

step:13440, train_loss:0.05962061804547926, acc:0.671201814058957


 92%|█████████▏| 40132/43738 [5:07:52<30:29,  1.97it/s]

step:13440, train_loss:0.059622430477016895, acc:0.671210006976976


 92%|█████████▏| 40133/43738 [5:07:53<26:40,  2.25it/s]

step:13440, train_loss:0.05962384780509516, acc:0.671193282336232


 92%|█████████▏| 40134/43738 [5:07:53<26:48,  2.24it/s]

step:13440, train_loss:0.05962253303447733, acc:0.6712014750585539


 92%|█████████▏| 40135/43738 [5:07:53<23:22,  2.57it/s]

step:13440, train_loss:0.059625265202587416, acc:0.6711847514638096


 92%|█████████▏| 40136/43738 [5:07:54<28:07,  2.13it/s]

step:13440, train_loss:0.059630028052760006, acc:0.6711680287024118


 92%|█████████▏| 40137/43738 [5:07:55<29:07,  2.06it/s]

step:13440, train_loss:0.05962880397586876, acc:0.6711762214415626


 92%|█████████▏| 40138/43738 [5:07:55<27:54,  2.15it/s]

step:13440, train_loss:0.05962918737734963, acc:0.6711594997259455


 92%|█████████▏| 40139/43738 [5:07:55<27:03,  2.22it/s]

step:13440, train_loss:0.05962886953329597, acc:0.671167692269364


 92%|█████████▏| 40140/43738 [5:07:56<26:03,  2.30it/s]

step:13440, train_loss:0.05962738882128086, acc:0.671175884404584


 92%|█████████▏| 40141/43738 [5:07:56<23:03,  2.60it/s]

step:13440, train_loss:0.059626858536048086, acc:0.671184076131636


 92%|█████████▏| 40142/43738 [5:07:56<23:05,  2.59it/s]

step:13440, train_loss:0.059626055733579514, acc:0.6711922674505505


 92%|█████████▏| 40143/43738 [5:07:57<24:04,  2.49it/s]

step:13440, train_loss:0.059627768677844806, acc:0.6711755474179807


 92%|█████████▏| 40448/43738 [5:10:18<35:50,  1.53it/s]

step:13460, train_loss:0.0596038570884724, acc:0.6715535996835443


 92%|█████████▏| 40449/43738 [5:10:18<33:06,  1.66it/s]

step:13460, train_loss:0.05960481388671702, acc:0.6715369972063586


 92%|█████████▏| 40450/43738 [5:10:19<27:55,  1.96it/s]

step:13460, train_loss:0.059605417977452994, acc:0.6715203955500618


 92%|█████████▏| 40451/43738 [5:10:19<31:45,  1.73it/s]

step:13460, train_loss:0.05960817904688276, acc:0.671503794714593


 92%|█████████▏| 40452/43738 [5:10:20<26:39,  2.05it/s]

step:13460, train_loss:0.05960743328701825, acc:0.6715119153564719


 92%|█████████▏| 40453/43738 [5:10:20<23:06,  2.37it/s]

step:13460, train_loss:0.059605963408588335, acc:0.6715200355968655


 92%|█████████▏| 40454/43738 [5:10:20<20:18,  2.70it/s]

step:13460, train_loss:0.05960514601190272, acc:0.6715281554358036


 92%|█████████▏| 40455/43738 [5:10:21<19:38,  2.79it/s]

step:13460, train_loss:0.0596036745688712, acc:0.671536274873316


 92%|█████████▏| 40456/43738 [5:10:21<23:33,  2.32it/s]

step:13460, train_loss:0.059604537834402976, acc:0.6715443939094324


 92%|█████████▏| 40457/43738 [5:10:22<26:27,  2.07it/s]

step:13460, train_loss:0.05960308043740477, acc:0.6715525125441827


 93%|█████████▎| 40458/43738 [5:10:22<25:28,  2.15it/s]

step:13460, train_loss:0.05960168079671823, acc:0.6715606307775965


 93%|█████████▎| 40459/43738 [5:10:23<26:31,  2.06it/s]

step:13460, train_loss:0.05960048575184271, acc:0.6715687486097036


 93%|█████████▎| 40460/43738 [5:10:23<31:44,  1.72it/s]

step:13460, train_loss:0.059599456414212916, acc:0.6715768660405339


 93%|█████████▎| 40461/43738 [5:10:24<28:05,  1.94it/s]

step:13460, train_loss:0.05959883826044831, acc:0.6715849830701169


 93%|█████████▎| 40462/43738 [5:10:25<31:38,  1.73it/s]

step:13460, train_loss:0.05959761311139479, acc:0.6715930996984826


 93%|█████████▎| 40463/43738 [5:10:25<27:30,  1.98it/s]

step:13460, train_loss:0.05959639439501038, acc:0.6716012159256605


 93%|█████████▎| 40768/43738 [5:12:45<23:01,  2.15it/s]

step:13480, train_loss:0.059622692419153384, acc:0.6715315934065934


 93%|█████████▎| 40769/43738 [5:12:46<25:31,  1.94it/s]

step:13480, train_loss:0.059629163589266704, acc:0.6715151217837082


 93%|█████████▎| 40770/43738 [5:12:46<23:48,  2.08it/s]

step:13480, train_loss:0.05962776674285593, acc:0.671523178807947


 93%|█████████▎| 40771/43738 [5:12:47<23:10,  2.13it/s]

step:13480, train_loss:0.059626463984254634, acc:0.6715312354369527


 93%|█████████▎| 40772/43738 [5:12:47<22:10,  2.23it/s]

step:13480, train_loss:0.05962833527097277, acc:0.6715147650348279


 93%|█████████▎| 40773/43738 [5:12:48<21:45,  2.27it/s]

step:13480, train_loss:0.059627338502013495, acc:0.6715228214749958


 93%|█████████▎| 40774/43738 [5:12:48<18:55,  2.61it/s]

step:13480, train_loss:0.05962674252319356, acc:0.6715308775199882


 93%|█████████▎| 40775/43738 [5:12:48<22:19,  2.21it/s]

step:13480, train_loss:0.059627838117245514, acc:0.6715144083384427


 93%|█████████▎| 40776/43738 [5:12:49<20:29,  2.41it/s]

step:13480, train_loss:0.05962714816591756, acc:0.6715224641946242


 93%|█████████▎| 40777/43738 [5:12:49<20:50,  2.37it/s]

step:13480, train_loss:0.05962859313193258, acc:0.6715059960271722


 93%|█████████▎| 40778/43738 [5:12:50<20:29,  2.41it/s]

step:13480, train_loss:0.059628323961794814, acc:0.6715140516945411


 93%|█████████▎| 40779/43738 [5:12:50<20:47,  2.37it/s]

step:13480, train_loss:0.059627780520482676, acc:0.6715221069668211


 93%|█████████▎| 40780/43738 [5:12:50<21:39,  2.28it/s]

step:13480, train_loss:0.05962633123799793, acc:0.6715301618440412


 93%|█████████▎| 40781/43738 [5:12:51<18:28,  2.67it/s]

step:13480, train_loss:0.059626640213067794, acc:0.6715136951031118


 93%|█████████▎| 40782/43738 [5:12:51<16:37,  2.96it/s]

step:13480, train_loss:0.059626650378431977, acc:0.6715217497915748


 93%|█████████▎| 40783/43738 [5:12:51<18:52,  2.61it/s]

step:13480, train_loss:0.05962519068037851, acc:0.6715298040850355


 94%|█████████▍| 41088/43738 [5:15:04<15:42,  2.81it/s]

step:13500, train_loss:0.05962349846095377, acc:0.6716559579439252


 94%|█████████▍| 41089/43738 [5:15:05<18:43,  2.36it/s]

step:13500, train_loss:0.05962755317565715, acc:0.6716396115748741


 94%|█████████▍| 41090/43738 [5:15:05<17:52,  2.47it/s]

step:13500, train_loss:0.059628302161597396, acc:0.6716232660014602


 94%|█████████▍| 41091/43738 [5:15:05<17:14,  2.56it/s]

step:13500, train_loss:0.0596269269628343, acc:0.6716312574529703


 94%|█████████▍| 41092/43738 [5:15:06<17:17,  2.55it/s]

step:13500, train_loss:0.05962662724682724, acc:0.6716392485155261


 94%|█████████▍| 41093/43738 [5:15:06<15:14,  2.89it/s]

step:13500, train_loss:0.05962519441921667, acc:0.6716472391891563


 94%|█████████▍| 41094/43738 [5:15:07<19:36,  2.25it/s]

step:13500, train_loss:0.05962400922861008, acc:0.6716552294738891


 94%|█████████▍| 41096/43738 [5:15:07<15:51,  2.78it/s]

step:13500, train_loss:0.05962726270578354, acc:0.671638885509186
step:13500, train_loss:0.05962581311321408, acc:0.6716468756083317


 94%|█████████▍| 41097/43738 [5:15:08<15:15,  2.89it/s]

step:13500, train_loss:0.059624573511664974, acc:0.6716548653186364


 94%|█████████▍| 41098/43738 [5:15:08<18:03,  2.44it/s]

step:13500, train_loss:0.059623258870371516, acc:0.6716628546401284


 94%|█████████▍| 41099/43738 [5:15:09<18:28,  2.38it/s]

step:13500, train_loss:0.05962253536652451, acc:0.6716708435728364


 94%|█████████▍| 41100/43738 [5:15:09<21:09,  2.08it/s]

step:13500, train_loss:0.05962140290415421, acc:0.6716788321167884


 94%|█████████▍| 41101/43738 [5:15:10<20:50,  2.11it/s]

step:13500, train_loss:0.05962004701391456, acc:0.6716868202720129


 94%|█████████▍| 41102/43738 [5:15:10<22:26,  1.96it/s]

step:13500, train_loss:0.05961924085006303, acc:0.6716948080385383


 94%|█████████▍| 41103/43738 [5:15:11<19:55,  2.20it/s]

step:13500, train_loss:0.05961802048367933, acc:0.671702795416393


 95%|█████████▍| 41408/43738 [5:17:30<17:04,  2.27it/s]

step:13520, train_loss:0.05966565042829459, acc:0.6714403013910355


 95%|█████████▍| 41409/43738 [5:17:30<14:38,  2.65it/s]

step:13520, train_loss:0.05966484294370406, acc:0.6714482358907484


 95%|█████████▍| 41410/43738 [5:17:31<19:21,  2.01it/s]

step:13520, train_loss:0.05966661430247917, acc:0.6714561700072447


 95%|█████████▍| 41411/43738 [5:17:31<16:42,  2.32it/s]

step:13520, train_loss:0.05966517670063597, acc:0.671464103740552


 95%|█████████▍| 41412/43738 [5:17:31<14:47,  2.62it/s]

step:13520, train_loss:0.05966374045325466, acc:0.6714720370906984


 95%|█████████▍| 41413/43738 [5:17:31<14:30,  2.67it/s]

step:13520, train_loss:0.059662376285160836, acc:0.6714799700577113


 95%|█████████▍| 41414/43738 [5:17:32<14:43,  2.63it/s]

step:13520, train_loss:0.05966219265578604, acc:0.6714879026416187


 95%|█████████▍| 41415/43738 [5:17:32<17:59,  2.15it/s]

step:13520, train_loss:0.05966496373279585, acc:0.6714716890015695


 95%|█████████▍| 41416/43738 [5:17:33<21:48,  1.77it/s]

step:13520, train_loss:0.059669140657538686, acc:0.6714554761444852


 95%|█████████▍| 41417/43738 [5:17:34<21:31,  1.80it/s]

step:13520, train_loss:0.05966964321914262, acc:0.6714634087452013


 95%|█████████▍| 41418/43738 [5:17:35<23:23,  1.65it/s]

step:13520, train_loss:0.059669194455598075, acc:0.6714713409628664


 95%|█████████▍| 41419/43738 [5:17:35<20:18,  1.90it/s]

step:13520, train_loss:0.059667754031174206, acc:0.6714792727975084


 95%|█████████▍| 41420/43738 [5:17:35<19:21,  2.00it/s]

step:13520, train_loss:0.059666680984889274, acc:0.671487204249155


 95%|█████████▍| 41421/43738 [5:17:36<20:53,  1.85it/s]

step:13520, train_loss:0.05966539773298463, acc:0.671495135317834


 95%|█████████▍| 41422/43738 [5:17:37<23:43,  1.63it/s]

step:13520, train_loss:0.05966395790241621, acc:0.671503066003573


 95%|█████████▍| 41423/43738 [5:17:37<21:47,  1.77it/s]

step:13520, train_loss:0.059665268539660254, acc:0.6714868551287931


 95%|█████████▌| 41728/43738 [5:19:54<15:31,  2.16it/s]

step:13540, train_loss:0.059668393724559454, acc:0.6716593174846626


 95%|█████████▌| 41729/43738 [5:19:54<13:08,  2.55it/s]

step:13540, train_loss:0.059666970214756025, acc:0.6716671858899087


 95%|█████████▌| 41730/43738 [5:19:55<11:28,  2.92it/s]

step:13540, train_loss:0.05966554142993448, acc:0.6716750539180446


 95%|█████████▌| 41731/43738 [5:19:55<12:20,  2.71it/s]

step:13540, train_loss:0.05966462352693717, acc:0.6716829215690974


 95%|█████████▌| 41732/43738 [5:19:55<13:35,  2.46it/s]

step:13540, train_loss:0.059663197110637486, acc:0.6716907888430941


 95%|█████████▌| 41733/43738 [5:19:56<16:35,  2.01it/s]

step:13540, train_loss:0.059662152392100494, acc:0.6716986557400618


 95%|█████████▌| 41734/43738 [5:19:57<15:37,  2.14it/s]

step:13540, train_loss:0.05966302835098392, acc:0.671682560981454


 95%|█████████▌| 41735/43738 [5:19:57<14:29,  2.30it/s]

step:13540, train_loss:0.05966638487510175, acc:0.6716664669941296


 95%|█████████▌| 41736/43738 [5:19:57<13:14,  2.52it/s]

step:13540, train_loss:0.05966541140208205, acc:0.6716743339083765


 95%|█████████▌| 41737/43738 [5:19:58<12:42,  2.62it/s]

step:13540, train_loss:0.05966471744576213, acc:0.6716822004456477


 95%|█████████▌| 41738/43738 [5:19:58<16:03,  2.08it/s]

step:13540, train_loss:0.05966575357971268, acc:0.6716661076237481


 95%|█████████▌| 41739/43738 [5:19:59<14:06,  2.36it/s]

step:13540, train_loss:0.05966701524029863, acc:0.6716500155729653


 95%|█████████▌| 41740/43738 [5:19:59<12:46,  2.61it/s]

step:13540, train_loss:0.05966650241397621, acc:0.6716578821274557


 95%|█████████▌| 41741/43738 [5:19:59<14:24,  2.31it/s]

step:13540, train_loss:0.05966555546708522, acc:0.6716657483050238


 95%|█████████▌| 41742/43738 [5:20:00<12:46,  2.60it/s]

step:13540, train_loss:0.05966419436883092, acc:0.6716736141056969


 95%|█████████▌| 41743/43738 [5:20:00<13:01,  2.55it/s]

step:13540, train_loss:0.05966297780026837, acc:0.671681479529502


 96%|█████████▌| 42048/43738 [5:22:17<14:27,  1.95it/s]

step:13560, train_loss:0.059667637792871334, acc:0.6715658295281582


 96%|█████████▌| 42049/43738 [5:22:17<12:24,  2.27it/s]

step:13560, train_loss:0.05966860490038492, acc:0.6715498584984185


 96%|█████████▌| 42050/43738 [5:22:17<11:53,  2.37it/s]

step:13560, train_loss:0.059668257403785174, acc:0.6715576694411415


 96%|█████████▌| 42051/43738 [5:22:18<13:23,  2.10it/s]

step:13560, train_loss:0.05966861686263831, acc:0.6715416993650567


 96%|█████████▌| 42052/43738 [5:22:18<12:18,  2.28it/s]

step:13560, train_loss:0.059667249584682, acc:0.6715495101303148


 96%|█████████▌| 42053/43738 [5:22:19<13:35,  2.07it/s]

step:13560, train_loss:0.059666895737881524, acc:0.6715573205241006


 96%|█████████▌| 42054/43738 [5:22:19<13:02,  2.15it/s]

step:13560, train_loss:0.05967159700087855, acc:0.6715413515955676


 96%|█████████▌| 42055/43738 [5:22:19<11:21,  2.47it/s]

step:13560, train_loss:0.059670910405352676, acc:0.6715491618119129


 96%|█████████▌| 42056/43738 [5:22:20<11:53,  2.36it/s]

step:13560, train_loss:0.05967101879095861, acc:0.6715569716568385


 96%|█████████▌| 42057/43738 [5:22:21<13:18,  2.11it/s]

step:13560, train_loss:0.05967267792214988, acc:0.6715410038756925


 96%|█████████▌| 42058/43738 [5:22:21<11:36,  2.41it/s]

step:13560, train_loss:0.05967130261123392, acc:0.6715488135432023


 96%|█████████▌| 42059/43738 [5:22:21<11:51,  2.36it/s]

step:13560, train_loss:0.05966992551764219, acc:0.6715566228393447


 96%|█████████▌| 42060/43738 [5:22:22<13:15,  2.11it/s]

step:13560, train_loss:0.05966851529707224, acc:0.6715644317641465


 96%|█████████▌| 42061/43738 [5:22:22<11:18,  2.47it/s]

step:13560, train_loss:0.059671789127187194, acc:0.671548465324172


 96%|█████████▌| 42062/43738 [5:22:23<11:45,  2.38it/s]

step:13560, train_loss:0.059670598834681156, acc:0.6715562740716086


 96%|█████████▌| 42063/43738 [5:22:23<11:13,  2.49it/s]

step:13560, train_loss:0.05967201325571378, acc:0.6715403085847419


 97%|█████████▋| 42368/43738 [5:24:38<08:42,  2.62it/s]

step:13580, train_loss:0.05964016277273644, acc:0.6716389728096677


 97%|█████████▋| 42369/43738 [5:24:38<09:56,  2.29it/s]

step:13580, train_loss:0.059638757310865924, acc:0.6716467228398121


 97%|█████████▋| 42370/43738 [5:24:39<10:02,  2.27it/s]

step:13580, train_loss:0.05963738091439713, acc:0.6716544725041302


 97%|█████████▋| 42371/43738 [5:24:39<10:11,  2.23it/s]

step:13580, train_loss:0.05963606278538172, acc:0.671662221802648


 97%|█████████▋| 42372/43738 [5:24:40<10:10,  2.24it/s]

step:13580, train_loss:0.05963514616529777, acc:0.6716699707353913


 97%|█████████▋| 42373/43738 [5:24:40<08:41,  2.62it/s]

step:13580, train_loss:0.05963441644928746, acc:0.671677719302386


 97%|█████████▋| 42374/43738 [5:24:40<07:33,  3.01it/s]

step:13580, train_loss:0.05963322563695299, acc:0.6716854675036579


 97%|█████████▋| 42375/43738 [5:24:40<07:46,  2.92it/s]

step:13580, train_loss:0.05963182343855352, acc:0.671693215339233


 97%|█████████▋| 42376/43738 [5:24:41<07:18,  3.10it/s]

step:13580, train_loss:0.05963042965184122, acc:0.6717009628091373


 97%|█████████▋| 42377/43738 [5:24:41<09:03,  2.50it/s]

step:13580, train_loss:0.05963082928005039, acc:0.6716851122070935


 97%|█████████▋| 42378/43738 [5:24:42<08:36,  2.63it/s]

step:13580, train_loss:0.05962961617854564, acc:0.6716928595025721


 97%|█████████▋| 42379/43738 [5:24:42<07:37,  2.97it/s]

step:13580, train_loss:0.05962824950313979, acc:0.6717006064324311


 97%|█████████▋| 42380/43738 [5:24:42<08:16,  2.74it/s]

step:13580, train_loss:0.05962739295989202, acc:0.6717083529966965


 97%|█████████▋| 42381/43738 [5:24:42<07:41,  2.94it/s]

step:13580, train_loss:0.05962848165614541, acc:0.671692503716288


 97%|█████████▋| 42382/43738 [5:24:43<09:18,  2.43it/s]

step:13580, train_loss:0.0596280145406163, acc:0.6717002501061772


 97%|█████████▋| 42383/43738 [5:24:43<08:05,  2.79it/s]

step:13580, train_loss:0.059626607676532335, acc:0.671707996130524


 98%|█████████▊| 42688/43738 [5:26:57<09:10,  1.91it/s]

step:13600, train_loss:0.05960744314920996, acc:0.6720155547226386


 98%|█████████▊| 42689/43738 [5:26:58<09:00,  1.94it/s]

step:13600, train_loss:0.059607255037698155, acc:0.672023237836445


 98%|█████████▊| 42690/43738 [5:26:58<08:42,  2.01it/s]

step:13600, train_loss:0.05960764539535417, acc:0.6720074959006793


 98%|█████████▊| 42691/43738 [5:26:59<08:16,  2.11it/s]

step:13600, train_loss:0.05960741162874363, acc:0.672015178843316


 98%|█████████▊| 42692/43738 [5:26:59<07:40,  2.27it/s]

step:13600, train_loss:0.059606021678472586, acc:0.6720228614260283


 98%|█████████▊| 42693/43738 [5:27:00<07:24,  2.35it/s]

step:13600, train_loss:0.0596046293291596, acc:0.6720305436488417


 98%|█████████▊| 42694/43738 [5:27:00<07:14,  2.40it/s]

step:13600, train_loss:0.05960412357097327, acc:0.6720382255117815


 98%|█████████▊| 42695/43738 [5:27:00<07:14,  2.40it/s]

step:13600, train_loss:0.05960281871332751, acc:0.672045907014873


 98%|█████████▊| 42696/43738 [5:27:01<06:44,  2.57it/s]

step:13600, train_loss:0.05960165397362031, acc:0.6720535881581413


 98%|█████████▊| 42697/43738 [5:27:01<06:54,  2.51it/s]

step:13600, train_loss:0.059601198179884005, acc:0.6720612689416118


 98%|█████████▊| 42698/43738 [5:27:02<06:46,  2.56it/s]

step:13600, train_loss:0.05959993823407641, acc:0.6720689493653098


 98%|█████████▊| 42699/43738 [5:27:02<08:16,  2.09it/s]

step:13600, train_loss:0.05959856919703734, acc:0.6720766294292606


 98%|█████████▊| 42700/43738 [5:27:03<08:39,  2.00it/s]

step:13600, train_loss:0.05959950418635331, acc:0.6720608899297423


 98%|█████████▊| 42701/43738 [5:27:03<07:32,  2.29it/s]

step:13600, train_loss:0.05959811627060544, acc:0.6720685698227208


 98%|█████████▊| 42702/43738 [5:27:04<09:19,  1.85it/s]

step:13600, train_loss:0.059599924441866514, acc:0.6720528312491219


 98%|█████████▊| 42703/43738 [5:27:04<08:36,  2.00it/s]

step:13600, train_loss:0.05959855507347391, acc:0.6720605109711262


 98%|█████████▊| 43008/43738 [5:29:19<07:16,  1.67it/s]

step:13620, train_loss:0.059626583881325926, acc:0.6719912574404762


 98%|█████████▊| 43009/43738 [5:29:20<06:35,  1.84it/s]

step:13620, train_loss:0.05962624245999591, acc:0.6719988839545211


 98%|█████████▊| 43010/43738 [5:29:20<06:18,  1.92it/s]

step:13620, train_loss:0.05962509090948545, acc:0.672006510113927


 98%|█████████▊| 43011/43738 [5:29:21<06:34,  1.84it/s]

step:13620, train_loss:0.059630760920631264, acc:0.6719908860524052


 98%|█████████▊| 43012/43738 [5:29:21<05:29,  2.20it/s]

step:13620, train_loss:0.05962938074938075, acc:0.6719985120431508


 98%|█████████▊| 43013/43738 [5:29:21<05:00,  2.42it/s]

step:13620, train_loss:0.059634203524247954, acc:0.6719828888940553


 98%|█████████▊| 43014/43738 [5:29:22<05:10,  2.33it/s]

step:13620, train_loss:0.05963340415020024, acc:0.6719905147161389


 98%|█████████▊| 43015/43738 [5:29:22<04:56,  2.44it/s]

step:13620, train_loss:0.0596339170636503, acc:0.6719748924793677


 98%|█████████▊| 43016/43738 [5:29:23<05:46,  2.08it/s]

step:13620, train_loss:0.05963770493826112, acc:0.6719592709689418


 98%|█████████▊| 43017/43738 [5:29:23<06:16,  1.92it/s]

step:13620, train_loss:0.05963656899538799, acc:0.6719668968082386


 98%|█████████▊| 43018/43738 [5:29:24<06:46,  1.77it/s]

step:13620, train_loss:0.05963647857023203, acc:0.6719745222929936


 98%|█████████▊| 43019/43738 [5:29:25<06:33,  1.83it/s]

step:13620, train_loss:0.059635126768530916, acc:0.6719821474232316


 98%|█████████▊| 43020/43738 [5:29:25<05:56,  2.01it/s]

step:13620, train_loss:0.059635233862188566, acc:0.6719897721989773


 98%|█████████▊| 43021/43738 [5:29:26<06:18,  1.89it/s]

step:13620, train_loss:0.059636469270734835, acc:0.6719741521582483


 98%|█████████▊| 43022/43738 [5:29:26<06:50,  1.75it/s]

step:13620, train_loss:0.05963645846886302, acc:0.6719585328436614


 98%|█████████▊| 43023/43738 [5:29:27<07:18,  1.63it/s]

step:13620, train_loss:0.05963581856404056, acc:0.6719661576366129


 99%|█████████▉| 43328/43738 [5:31:45<03:16,  2.08it/s]

step:13640, train_loss:0.0596229911821164, acc:0.6719903988183161


 99%|█████████▉| 43329/43738 [5:31:46<02:56,  2.32it/s]

step:13640, train_loss:0.05962174750511211, acc:0.671997969027672


 99%|█████████▉| 43330/43738 [5:31:46<02:57,  2.30it/s]

step:13640, train_loss:0.059620974804849965, acc:0.6720055388876067


 99%|█████████▉| 43331/43738 [5:31:47<02:57,  2.29it/s]

step:13640, train_loss:0.059622837984788346, acc:0.6719900302323971


 99%|█████████▉| 43332/43738 [5:31:47<02:48,  2.40it/s]

step:13640, train_loss:0.05962246850293606, acc:0.6719975999261516


 99%|█████████▉| 43333/43738 [5:31:48<03:00,  2.24it/s]

step:13640, train_loss:0.05962152532709685, acc:0.6720051692705329


 99%|█████████▉| 43334/43738 [5:31:48<03:20,  2.01it/s]

step:13640, train_loss:0.05962357643516381, acc:0.6719896616975124


 99%|█████████▉| 43335/43738 [5:31:48<02:50,  2.36it/s]

step:13640, train_loss:0.0596225782136542, acc:0.6719972308757356


 99%|█████████▉| 43336/43738 [5:31:49<03:21,  1.99it/s]

step:13640, train_loss:0.059623530481103526, acc:0.6719817242015876


 99%|█████████▉| 43337/43738 [5:31:50<03:59,  1.68it/s]

step:13640, train_loss:0.0596263487830359, acc:0.6719662182430718


 99%|█████████▉| 43338/43738 [5:31:50<03:15,  2.04it/s]

step:13640, train_loss:0.05962510677667224, acc:0.6719737874382758


 99%|█████████▉| 43339/43738 [5:31:51<03:14,  2.05it/s]

step:13640, train_loss:0.059626460880221985, acc:0.6719582823784582


 99%|█████████▉| 43340/43738 [5:31:51<02:54,  2.28it/s]

step:13640, train_loss:0.05962662922223841, acc:0.6719658514074758


 99%|█████████▉| 43341/43738 [5:31:51<02:29,  2.65it/s]

step:13640, train_loss:0.05962600341666783, acc:0.6719734200872154


 99%|█████████▉| 43342/43738 [5:31:52<02:48,  2.35it/s]

step:13640, train_loss:0.059626244072946644, acc:0.6719579161090858


 99%|█████████▉| 43343/43738 [5:31:52<02:36,  2.53it/s]

step:13640, train_loss:0.05962508724030953, acc:0.6719654846226611


100%|█████████▉| 43648/43738 [5:34:08<00:47,  1.89it/s]

step:13660, train_loss:0.0596530981846456, acc:0.6718979105571847


100%|█████████▉| 43649/43738 [5:34:09<00:52,  1.68it/s]

step:13660, train_loss:0.0596558933673102, acc:0.6718825173543495


100%|█████████▉| 43650/43738 [5:34:09<00:46,  1.89it/s]

step:13660, train_loss:0.05965626823248204, acc:0.6718671248568155


100%|█████████▉| 43651/43738 [5:34:10<00:47,  1.83it/s]

step:13660, train_loss:0.059656961253946005, acc:0.6718746420471466


100%|█████████▉| 43652/43738 [5:34:11<00:48,  1.76it/s]

step:13660, train_loss:0.059656994881953626, acc:0.6718821588930634


100%|█████████▉| 43653/43738 [5:34:11<00:44,  1.89it/s]

step:13660, train_loss:0.05965628595419834, acc:0.6718896753945891


100%|█████████▉| 43654/43738 [5:34:12<00:46,  1.80it/s]

step:13660, train_loss:0.05965560368671789, acc:0.6718971915517479


100%|█████████▉| 43655/43738 [5:34:12<00:38,  2.16it/s]

step:13660, train_loss:0.059654266706084313, acc:0.6719047073645631


100%|█████████▉| 43656/43738 [5:34:12<00:37,  2.17it/s]

step:13660, train_loss:0.059653372230886674, acc:0.6719122228330584


100%|█████████▉| 43657/43738 [5:34:13<00:34,  2.37it/s]

step:13660, train_loss:0.05965208325192781, acc:0.6719197379572577


100%|█████████▉| 43658/43738 [5:34:13<00:29,  2.74it/s]

step:13660, train_loss:0.05965086053548989, acc:0.6719272527371845


100%|█████████▉| 43659/43738 [5:34:14<00:38,  2.06it/s]

step:13660, train_loss:0.0596499888465756, acc:0.6719347671728624


100%|█████████▉| 43660/43738 [5:34:14<00:35,  2.17it/s]

step:13660, train_loss:0.059649924243253674, acc:0.6719193770041227


100%|█████████▉| 43661/43738 [5:34:14<00:30,  2.52it/s]

step:13660, train_loss:0.05964910742579968, acc:0.6719268912759672


100%|█████████▉| 43662/43738 [5:34:15<00:30,  2.53it/s]

step:13660, train_loss:0.05965206329925817, acc:0.6719115019925793


100%|█████████▉| 43663/43738 [5:34:15<00:35,  2.10it/s]

step:13660, train_loss:0.059652727936777374, acc:0.6718961134141035


100%|██████████| 43738/43738 [5:34:47<00:00,  2.10it/s]
  0%|          | 1/5129 [00:00<16:48,  5.08it/s]

eval on dev set


100%|██████████| 5129/5129 [13:40<00:00,  6.39it/s]

1.293597065150915, 0.5800350945603432





In [16]:
acces.avg

0.671795692532809

In [14]:
item = train_data[0]
criterion = torch.nn.CrossEntropyLoss()
choices, passages, question = item['choices'], item['passages'], item['question']
label = item['label'].to(device)
score = model(choices, passages, question)

loss = criterion(score, label)
loss = loss / config.gradient_accumulation_steps

loss.backward()
print(loss)

  alphas = self.softmax(alphas)  # (bsz, sent_len)


tensor(5.9813e-05, device='cuda:0', grad_fn=<DivBackward0>)


In [15]:
inputs = []

def hook(module, input, output):
    inputs.append(input)
    
