In [None]:
!pip install transformers

In [10]:
import pandas as pd
import numpy as np
from transformers import BertTokenizer
from transformers import BertForSequenceClassification
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW, get_linear_schedule_with_warmup
import torch
from tqdm import tqdm
import random
import numpy as np
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import re
seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [None]:
## Reading the full data
full_train_data = pd.read_csv("/content/training.1600000.processed.noemoticon.csv",encoding='latin-1')

In [2]:
# Reading full test data
full_test_data = pd.read_csv('full_testing_data.csv')

In [34]:
## Defining the preprocessing function removing twitter handles ,  website links , digits and double spacing
def preprocessing(tweet):
  temp = tweet.lower()
  temp = re.sub('(@[A-Za-z]+[A-Za-z0-9-_]+)', ' ', temp)
  temp = re.sub(r'https\S+', '', temp)
  temp = re.sub(r'http\S+', '', temp)
  temp = re.sub(r'www\S+', '', temp)
#   temp = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", " ", temp)
  temp = re.sub("[0-9]", " ", temp)
  temp = re.sub("\s\s+", " ", temp)
  return temp

In [14]:
full_test_data.iloc[5,:].tweet_text

'Milton on Bolton Wanderers 2 v 2 Leeds United, sat nav free night.: How Milton manages to write so much about su... http://t.co/MoIoMI4v'

In [4]:
full_test_data.shape

(28418, 4)

In [22]:
full_test_data.iloc[3,:].tweet_text

'Excuse the connectivity of this live stream, from Baba Amr, so many activists using only one Sat Modem. LIVE http://t.co/U283IhZ5 #Homs'

In [36]:
preprocessed_test_tweets = []
labels = []
for i in tqdm(range(full_test_data.shape[0])):
    row_val = full_test_data.iloc[i,:]
    if row_val.label == 1:
        continue
    tweet = preprocessing(row_val.tweet_text)
    preprocessed_test_tweets.append(tweet)
    if row_val.label == 2:
        labels.append(1)
    else:
        labels.append(0)

100%|██████████| 28418/28418 [00:03<00:00, 8207.29it/s]


In [51]:
test_info = {'Polarity':labels,'text':preprocessed_test_tweets}
full_test_data = pd.DataFrame(data=test_info)
full_test_data.to_csv("full_test_data.csv",index=True)

In [56]:
full_test_data.Polarity.value_counts()

1    10552
0     4356
Name: Polarity, dtype: int64

In [None]:
columns = full_train_data.columns

In [None]:
## renaming the columns
full_train_data.rename(columns={columns[0]: 'Polarity', columns[1]: 'tweetID', columns[2]:'date', columns[3]:'query', columns[4]:'user', columns[5]:'text'}, inplace=True)

In [None]:
pre_processed_all_tweets = []
for tweet in tqdm(full_train_data.text):
  tweet = preprocessing(tweet)
  pre_processed_all_tweets.append(tweet)

In [None]:
info = {'Polarity':full_train_data['Polarity'],'text':pre_processed_all_tweets}
full_training_data = pd.DataFrame(data = info)

In [None]:
## Changing the polarity of the positive tweet to 1
full_training_data['Polarity'].mask(full_training_data['Polarity'] == 4, 1, inplace=True)

In [None]:
## Writing to CSV file
full_training_data.to_csv("full_training_data.csv",index=True)

In [41]:
full_training_data = pd.read_csv('full_training_data.csv')

In [48]:
full_training_data.iloc[20,:].text

"one of my friend called me, and asked to meet with her at mid valley i've no time *sigh* "

In [11]:
X_training_data_full = full_training_data.text
Y_training_data_full = full_training_data.Polarity

In [57]:
X_testing_data = full_test_data.text
Y_testing_data = full_test_data.Polarity

In [58]:
# Using the BERT tokenizer to tokenize the values
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                         do_lower_case = True)

In [13]:
#encode train set
encoded_data_train = tokenizer.batch_encode_plus(X_training_data_full,
                                                add_special_tokens = True,
                                                return_attention_mask = True,
                                                pad_to_max_length = True,
                                                max_length = 256,
                                                return_tensors = 'pt')

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [60]:
#encode test set
encoded_data_test = tokenizer.batch_encode_plus(X_testing_data,
                                                add_special_tokens = True,
                                                return_attention_mask = True,
                                                pad_to_max_length = True,
                                                max_length = 256,
                                                return_tensors = 'pt')

In [14]:
#train set
input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(Y_training_data_full)


In [61]:
#test set
input_ids_test = encoded_data_test['input_ids']
attention_masks_test = encoded_data_test['attention_mask']
labels_test = torch.tensor(Y_testing_data)

In [62]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels = 2,
                                                      output_attentions = False,
                                                      output_hidden_states = False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [16]:
#train set
dataset_train = TensorDataset(input_ids_train, 
                              attention_masks_train,
                              labels_train)

In [63]:
#test set
dataset_test = TensorDataset(input_ids_test, 
                              attention_masks_test,
                              labels_test)

In [17]:
validation_set_size = int(len(dataset_train)*0.2)
train_set_size = int(len(dataset_train) - validation_set_size)

In [18]:
validation_dataset,train_dataset = torch.utils.data.random_split(dataset_train, [validation_set_size, train_set_size], generator=torch.Generator().manual_seed(42))

In [15]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [19]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [20]:
torch.cuda.memory_allocated(device=device)

0

In [21]:
batch_size = 32
#train set
dataloader_train = DataLoader(train_dataset,
                              sampler = RandomSampler(train_dataset),
                              batch_size = batch_size)
dataloader_val = DataLoader(validation_dataset,sampler = RandomSampler(validation_dataset),
                              batch_size = batch_size)

In [64]:
#test set
batch_size = 32
dataloader_test = DataLoader(dataset_test,batch_size = batch_size,shuffle=True,drop_last=True)

In [22]:
optimizer = AdamW(model.parameters(),
                 lr = 1e-5,
                 eps = 1e-8) #2e-5 > 5e-5
                 
epochs = 10

scheduler = get_linear_schedule_with_warmup(optimizer,
                                           num_warmup_steps = 0,
                                           num_training_steps = len(dataloader_train)*epochs)




In [23]:
label_dict = {'Negative' : 0 , 'Positive' : 1}

In [65]:
def evaluate(dataloader_val,model):

    model.cuda()
    #evaluation mode 
    model.eval()
    #tracking variables
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in tqdm(dataloader_val):
        
        #load into GPU
        batch = tuple(b.cuda() for b in batch)
        
        #define inputs
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2]}

        #compute logits
        with torch.no_grad():        
            outputs = model(**inputs)
        
        #compute loss
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        #compute accuracy
        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    #compute average loss
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [66]:
def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis = 1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds, average = 'weighted')

In [67]:
def accuracy_per_class(preds, labels):
    label_dict = {'Negative' : 0 , 'Positive' : 1}
    label_dict_inverse = {v: k for k, v in label_dict.items()}
    
    #make prediction
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    
    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy:{len(y_preds[y_preds==label])}/{len(y_true)}\n')
    
    print(f'Total accuracy is : {accuracy_score(labels_flat,preds_flat)}\n')
        
    

In [67]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels = 2,
                                                      output_attentions = False,
                                                      output_hidden_states = False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [68]:
model.load_state_dict(torch.load(f'Models/ BERT_ft_epoch3.model'))

<All keys matched successfully>

In [69]:
len(dataloader_test)

465

In [23]:
val_loss, predictions, true_vals = evaluate(dataloader_val,model)

100%|██████████| 10000/10000 [42:00<00:00,  3.97it/s]


In [24]:
print(val_loss)

0.27637956384159623


In [28]:
val_loss, predictions, true_vals = evaluate(dataloader_val,model)

100%|██████████| 10000/10000 [41:56<00:00,  3.97it/s]


In [29]:
print(val_loss)

0.2892338718647137


In [70]:
val_loss, predictions, true_vals = evaluate(dataloader_test,model)

100%|██████████| 465/465 [01:06<00:00,  7.01it/s]


In [None]:
val_f1 = f1_score_func(predictions, true_vals)
print("F1 score is:",val_f1)

In [72]:
accuracy_per_class(predictions, true_vals) # For epoch 3

Class: Negative
Accuracy:3310/4347

Class: Positive
Accuracy:8786/10533

Total accuracy is : 0.8129032258064516



In [None]:
train_loss = []
val_loss_total = []
for epoch in tqdm(range(3, epochs+1)):
    
    model.cuda()
    model.train()
    
    loss_train_total = 0
    
    progress_bar = tqdm(dataloader_train, 
                        desc = 'Epoch {:1d}'.format(epoch), 
                        leave = False, 
                        disable = False)
    counter = 0
    for batch in progress_bar:
        
        model.zero_grad() #set gradient to 0
        batch = tuple(b.cuda() for b in batch)
        
        inputs = {'input_ids': batch[0], 
                  'attention_mask': batch[1], 
                  'labels': batch[2]}        
        outputs = model(**inputs) #unpack the dict straight into inputs
        
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        optimizer.step()
        scheduler.step()
        counter += 1
#         if counter%2000 == 0:
#             print("Epoch: {}/{}...".format(epoch, epochs),
#                     "Step: {}...".format(counter),
#                     "Loss: {}...".format(loss.item()))
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item() / len(batch))})
        
    torch.save(model.state_dict(), f'Models/ BERT_ft_epoch{epoch}.model')
    
    tqdm.write('\n Epoch {epoch}')
    
    loss_train_ave = loss_train_total / len(dataloader_train)
    train_loss.append(loss_train_ave)
    tqdm.write('Training loss: {loss_train_avg}')
    
    val_loss, predictions, true_vals = evaluate(dataloader_val,model)
    val_loss_total.append(val_loss)
    val_f1 = f1_score_func(predictions, true_vals)
    tqdm.write(f'Validation loss: {val_loss}')
    tqdm.write(f'F1 Score (weighted): {val_f1}')
    accuracy_per_class(predictions, true_vals)

  0%|          | 0/8 [00:00<?, ?it/s]
Epoch 3:   0%|          | 0/40000 [00:00<?, ?it/s][A
Epoch 3:   0%|          | 0/40000 [00:00<?, ?it/s, training_loss=0.066][A
Epoch 3:   0%|          | 1/40000 [00:00<10:07:30,  1.10it/s, training_loss=0.066][A
Epoch 3:   0%|          | 1/40000 [00:01<10:07:30,  1.10it/s, training_loss=0.120][A
Epoch 3:   0%|          | 2/40000 [00:01<9:29:18,  1.17it/s, training_loss=0.120] [A
Epoch 3:   0%|          | 2/40000 [00:02<9:29:18,  1.17it/s, training_loss=0.080][A
Epoch 3:   0%|          | 3/40000 [00:02<9:18:06,  1.19it/s, training_loss=0.080][A
Epoch 3:   0%|          | 3/40000 [00:03<9:18:06,  1.19it/s, training_loss=0.133][A
Epoch 3:   0%|          | 4/40000 [00:03<9:12:00,  1.21it/s, training_loss=0.133][A
Epoch 3:   0%|          | 4/40000 [00:04<9:12:00,  1.21it/s, training_loss=0.033][A
Epoch 3:   0%|          | 5/40000 [00:04<9:09:06,  1.21it/s, training_loss=0.033][A
Epoch 3:   0%|          | 5/40000 [00:04<9:09:06,  1.21it/s, trai

Epoch 3:   0%|          | 95/40000 [01:17<9:01:16,  1.23it/s, training_loss=0.044][A
Epoch 3:   0%|          | 95/40000 [01:18<9:01:16,  1.23it/s, training_loss=0.101][A
Epoch 3:   0%|          | 96/40000 [01:18<9:02:08,  1.23it/s, training_loss=0.101][A
Epoch 3:   0%|          | 96/40000 [01:19<9:02:08,  1.23it/s, training_loss=0.092][A
Epoch 3:   0%|          | 97/40000 [01:19<9:01:44,  1.23it/s, training_loss=0.092][A
Epoch 3:   0%|          | 97/40000 [01:19<9:01:44,  1.23it/s, training_loss=0.072][A
Epoch 3:   0%|          | 98/40000 [01:19<9:01:22,  1.23it/s, training_loss=0.072][A
Epoch 3:   0%|          | 98/40000 [01:20<9:01:22,  1.23it/s, training_loss=0.125][A
Epoch 3:   0%|          | 99/40000 [01:20<9:01:36,  1.23it/s, training_loss=0.125][A
Epoch 3:   0%|          | 99/40000 [01:21<9:01:36,  1.23it/s, training_loss=0.027][A
Epoch 3:   0%|          | 100/40000 [01:21<9:00:57,  1.23it/s, training_loss=0.027][A
Epoch 3:   0%|          | 100/40000 [01:22<9:00:57,  

Epoch 3:   0%|          | 189/40000 [02:34<9:00:14,  1.23it/s, training_loss=0.095][A
Epoch 3:   0%|          | 189/40000 [02:34<9:00:14,  1.23it/s, training_loss=0.107][A
Epoch 3:   0%|          | 190/40000 [02:34<8:59:22,  1.23it/s, training_loss=0.107][A
Epoch 3:   0%|          | 190/40000 [02:35<8:59:22,  1.23it/s, training_loss=0.079][A
Epoch 3:   0%|          | 191/40000 [02:35<8:59:40,  1.23it/s, training_loss=0.079][A
Epoch 3:   0%|          | 191/40000 [02:36<8:59:40,  1.23it/s, training_loss=0.094][A
Epoch 3:   0%|          | 192/40000 [02:36<8:59:27,  1.23it/s, training_loss=0.094][A
Epoch 3:   0%|          | 192/40000 [02:37<8:59:27,  1.23it/s, training_loss=0.034][A
Epoch 3:   0%|          | 193/40000 [02:37<8:59:28,  1.23it/s, training_loss=0.034][A
Epoch 3:   0%|          | 193/40000 [02:38<8:59:28,  1.23it/s, training_loss=0.114][A
Epoch 3:   0%|          | 194/40000 [02:38<9:00:21,  1.23it/s, training_loss=0.114][A
Epoch 3:   0%|          | 194/40000 [02:38<

Epoch 3:   1%|          | 283/40000 [03:50<8:57:14,  1.23it/s, training_loss=0.081][A
Epoch 3:   1%|          | 283/40000 [03:51<8:57:14,  1.23it/s, training_loss=0.051][A
Epoch 3:   1%|          | 284/40000 [03:51<8:58:48,  1.23it/s, training_loss=0.051][A
Epoch 3:   1%|          | 284/40000 [03:52<8:58:48,  1.23it/s, training_loss=0.079][A
Epoch 3:   1%|          | 285/40000 [03:52<8:58:44,  1.23it/s, training_loss=0.079][A
Epoch 3:   1%|          | 285/40000 [03:52<8:58:44,  1.23it/s, training_loss=0.113][A
Epoch 3:   1%|          | 286/40000 [03:52<8:58:54,  1.23it/s, training_loss=0.113][A
Epoch 3:   1%|          | 286/40000 [03:53<8:58:54,  1.23it/s, training_loss=0.050][A
Epoch 3:   1%|          | 287/40000 [03:53<8:58:29,  1.23it/s, training_loss=0.050][A
Epoch 3:   1%|          | 287/40000 [03:54<8:58:29,  1.23it/s, training_loss=0.063][A
Epoch 3:   1%|          | 288/40000 [03:54<8:57:44,  1.23it/s, training_loss=0.063][A
Epoch 3:   1%|          | 288/40000 [03:55<

Epoch 3:   1%|          | 377/40000 [05:06<8:57:51,  1.23it/s, training_loss=0.042][A
Epoch 3:   1%|          | 377/40000 [05:07<8:57:51,  1.23it/s, training_loss=0.069][A
Epoch 3:   1%|          | 378/40000 [05:07<8:58:03,  1.23it/s, training_loss=0.069][A
Epoch 3:   1%|          | 378/40000 [05:08<8:58:03,  1.23it/s, training_loss=0.087][A
Epoch 3:   1%|          | 379/40000 [05:08<8:57:41,  1.23it/s, training_loss=0.087][A
Epoch 3:   1%|          | 379/40000 [05:09<8:57:41,  1.23it/s, training_loss=0.033][A
Epoch 3:   1%|          | 380/40000 [05:09<8:57:46,  1.23it/s, training_loss=0.033][A
Epoch 3:   1%|          | 380/40000 [05:10<8:57:46,  1.23it/s, training_loss=0.034][A
Epoch 3:   1%|          | 381/40000 [05:10<8:57:25,  1.23it/s, training_loss=0.034][A
Epoch 3:   1%|          | 381/40000 [05:11<8:57:25,  1.23it/s, training_loss=0.088][A
Epoch 3:   1%|          | 382/40000 [05:11<8:57:30,  1.23it/s, training_loss=0.088][A
Epoch 3:   1%|          | 382/40000 [05:11<

Epoch 3:   1%|          | 471/40000 [06:23<8:56:19,  1.23it/s, training_loss=0.064][A
Epoch 3:   1%|          | 471/40000 [06:24<8:56:19,  1.23it/s, training_loss=0.040][A
Epoch 3:   1%|          | 472/40000 [06:24<8:56:49,  1.23it/s, training_loss=0.040][A
Epoch 3:   1%|          | 472/40000 [06:25<8:56:49,  1.23it/s, training_loss=0.062][A
Epoch 3:   1%|          | 473/40000 [06:25<8:57:01,  1.23it/s, training_loss=0.062][A
Epoch 3:   1%|          | 473/40000 [06:25<8:57:01,  1.23it/s, training_loss=0.042][A
Epoch 3:   1%|          | 474/40000 [06:25<8:56:49,  1.23it/s, training_loss=0.042][A
Epoch 3:   1%|          | 474/40000 [06:26<8:56:49,  1.23it/s, training_loss=0.053][A
Epoch 3:   1%|          | 475/40000 [06:26<8:56:19,  1.23it/s, training_loss=0.053][A
Epoch 3:   1%|          | 475/40000 [06:27<8:56:19,  1.23it/s, training_loss=0.057][A
Epoch 3:   1%|          | 476/40000 [06:27<8:56:35,  1.23it/s, training_loss=0.057][A
Epoch 3:   1%|          | 476/40000 [06:28<

Epoch 3:   1%|▏         | 565/40000 [07:39<8:55:34,  1.23it/s, training_loss=0.107][A
Epoch 3:   1%|▏         | 565/40000 [07:40<8:55:34,  1.23it/s, training_loss=0.068][A
Epoch 3:   1%|▏         | 566/40000 [07:40<8:54:39,  1.23it/s, training_loss=0.068][A
Epoch 3:   1%|▏         | 566/40000 [07:41<8:54:39,  1.23it/s, training_loss=0.080][A
Epoch 3:   1%|▏         | 567/40000 [07:41<8:53:13,  1.23it/s, training_loss=0.080][A
Epoch 3:   1%|▏         | 567/40000 [07:42<8:53:13,  1.23it/s, training_loss=0.088][A
Epoch 3:   1%|▏         | 568/40000 [07:42<8:52:49,  1.23it/s, training_loss=0.088][A
Epoch 3:   1%|▏         | 568/40000 [07:43<8:52:49,  1.23it/s, training_loss=0.040][A
Epoch 3:   1%|▏         | 569/40000 [07:43<8:53:02,  1.23it/s, training_loss=0.040][A
Epoch 3:   1%|▏         | 569/40000 [07:44<8:53:02,  1.23it/s, training_loss=0.126][A
Epoch 3:   1%|▏         | 570/40000 [07:44<8:52:53,  1.23it/s, training_loss=0.126][A
Epoch 3:   1%|▏         | 570/40000 [07:44<

Epoch 3:   2%|▏         | 659/40000 [08:56<8:54:12,  1.23it/s, training_loss=0.074][A
Epoch 3:   2%|▏         | 659/40000 [08:57<8:54:12,  1.23it/s, training_loss=0.061][A
Epoch 3:   2%|▏         | 660/40000 [08:57<8:54:08,  1.23it/s, training_loss=0.061][A
Epoch 3:   2%|▏         | 660/40000 [08:58<8:54:08,  1.23it/s, training_loss=0.064][A
Epoch 3:   2%|▏         | 661/40000 [08:58<8:53:55,  1.23it/s, training_loss=0.064][A
Epoch 3:   2%|▏         | 661/40000 [08:58<8:53:55,  1.23it/s, training_loss=0.126][A
Epoch 3:   2%|▏         | 662/40000 [08:58<8:54:14,  1.23it/s, training_loss=0.126][A
Epoch 3:   2%|▏         | 662/40000 [08:59<8:54:14,  1.23it/s, training_loss=0.068][A
Epoch 3:   2%|▏         | 663/40000 [08:59<8:54:00,  1.23it/s, training_loss=0.068][A
Epoch 3:   2%|▏         | 663/40000 [09:00<8:54:00,  1.23it/s, training_loss=0.072][A
Epoch 3:   2%|▏         | 664/40000 [09:00<8:53:40,  1.23it/s, training_loss=0.072][A
Epoch 3:   2%|▏         | 664/40000 [09:01<

Epoch 3:   2%|▏         | 753/40000 [10:12<8:51:58,  1.23it/s, training_loss=0.086][A
Epoch 3:   2%|▏         | 753/40000 [10:13<8:51:58,  1.23it/s, training_loss=0.155][A
Epoch 3:   2%|▏         | 754/40000 [10:13<8:52:05,  1.23it/s, training_loss=0.155][A
Epoch 3:   2%|▏         | 754/40000 [10:14<8:52:05,  1.23it/s, training_loss=0.122][A
Epoch 3:   2%|▏         | 755/40000 [10:14<8:52:17,  1.23it/s, training_loss=0.122][A
Epoch 3:   2%|▏         | 755/40000 [10:15<8:52:17,  1.23it/s, training_loss=0.093][A
Epoch 3:   2%|▏         | 756/40000 [10:15<8:52:48,  1.23it/s, training_loss=0.093][A
Epoch 3:   2%|▏         | 756/40000 [10:16<8:52:48,  1.23it/s, training_loss=0.043][A
Epoch 3:   2%|▏         | 757/40000 [10:16<8:52:12,  1.23it/s, training_loss=0.043][A
Epoch 3:   2%|▏         | 757/40000 [10:17<8:52:12,  1.23it/s, training_loss=0.085][A
Epoch 3:   2%|▏         | 758/40000 [10:17<8:52:48,  1.23it/s, training_loss=0.085][A
Epoch 3:   2%|▏         | 758/40000 [10:17<

Epoch 3:   2%|▏         | 847/40000 [11:29<8:51:01,  1.23it/s, training_loss=0.053][A
Epoch 3:   2%|▏         | 847/40000 [11:30<8:51:01,  1.23it/s, training_loss=0.109][A
Epoch 3:   2%|▏         | 848/40000 [11:30<8:50:40,  1.23it/s, training_loss=0.109][A
Epoch 3:   2%|▏         | 848/40000 [11:31<8:50:40,  1.23it/s, training_loss=0.094][A
Epoch 3:   2%|▏         | 849/40000 [11:31<8:51:19,  1.23it/s, training_loss=0.094][A
Epoch 3:   2%|▏         | 849/40000 [11:31<8:51:19,  1.23it/s, training_loss=0.050][A
Epoch 3:   2%|▏         | 850/40000 [11:31<8:51:30,  1.23it/s, training_loss=0.050][A
Epoch 3:   2%|▏         | 850/40000 [11:32<8:51:30,  1.23it/s, training_loss=0.118][A
Epoch 3:   2%|▏         | 851/40000 [11:32<8:50:25,  1.23it/s, training_loss=0.118][A
Epoch 3:   2%|▏         | 851/40000 [11:33<8:50:25,  1.23it/s, training_loss=0.075][A
Epoch 3:   2%|▏         | 852/40000 [11:33<8:50:17,  1.23it/s, training_loss=0.075][A
Epoch 3:   2%|▏         | 852/40000 [11:34<

Epoch 3:   2%|▏         | 941/40000 [12:45<8:49:54,  1.23it/s, training_loss=0.053][A
Epoch 3:   2%|▏         | 941/40000 [12:46<8:49:54,  1.23it/s, training_loss=0.132][A
Epoch 3:   2%|▏         | 942/40000 [12:46<8:49:09,  1.23it/s, training_loss=0.132][A
Epoch 3:   2%|▏         | 942/40000 [12:47<8:49:09,  1.23it/s, training_loss=0.147][A
Epoch 3:   2%|▏         | 943/40000 [12:47<8:49:09,  1.23it/s, training_loss=0.147][A
Epoch 3:   2%|▏         | 943/40000 [12:48<8:49:09,  1.23it/s, training_loss=0.170][A
Epoch 3:   2%|▏         | 944/40000 [12:48<8:50:02,  1.23it/s, training_loss=0.170][A
Epoch 3:   2%|▏         | 944/40000 [12:49<8:50:02,  1.23it/s, training_loss=0.119][A
Epoch 3:   2%|▏         | 945/40000 [12:49<8:49:56,  1.23it/s, training_loss=0.119][A
Epoch 3:   2%|▏         | 945/40000 [12:49<8:49:56,  1.23it/s, training_loss=0.077][A
Epoch 3:   2%|▏         | 946/40000 [12:49<8:49:53,  1.23it/s, training_loss=0.077][A
Epoch 3:   2%|▏         | 946/40000 [12:50<

Epoch 3:   3%|▎         | 1034/40000 [14:02<8:47:42,  1.23it/s, training_loss=0.044][A
Epoch 3:   3%|▎         | 1035/40000 [14:02<8:48:06,  1.23it/s, training_loss=0.044][A
Epoch 3:   3%|▎         | 1035/40000 [14:03<8:48:06,  1.23it/s, training_loss=0.065][A
Epoch 3:   3%|▎         | 1036/40000 [14:03<8:48:45,  1.23it/s, training_loss=0.065][A
Epoch 3:   3%|▎         | 1036/40000 [14:03<8:48:45,  1.23it/s, training_loss=0.038][A
Epoch 3:   3%|▎         | 1037/40000 [14:03<8:49:03,  1.23it/s, training_loss=0.038][A
Epoch 3:   3%|▎         | 1037/40000 [14:04<8:49:03,  1.23it/s, training_loss=0.125][A
Epoch 3:   3%|▎         | 1038/40000 [14:04<8:47:51,  1.23it/s, training_loss=0.125][A
Epoch 3:   3%|▎         | 1038/40000 [14:05<8:47:51,  1.23it/s, training_loss=0.053][A
Epoch 3:   3%|▎         | 1039/40000 [14:05<8:48:02,  1.23it/s, training_loss=0.053][A
Epoch 3:   3%|▎         | 1039/40000 [14:06<8:48:02,  1.23it/s, training_loss=0.075][A
Epoch 3:   3%|▎         | 1040/4

Epoch 3:   3%|▎         | 1127/40000 [15:18<8:47:16,  1.23it/s, training_loss=0.044][A
Epoch 3:   3%|▎         | 1128/40000 [15:18<8:47:23,  1.23it/s, training_loss=0.044][A
Epoch 3:   3%|▎         | 1128/40000 [15:18<8:47:23,  1.23it/s, training_loss=0.055][A
Epoch 3:   3%|▎         | 1129/40000 [15:18<8:46:52,  1.23it/s, training_loss=0.055][A
Epoch 3:   3%|▎         | 1129/40000 [15:19<8:46:52,  1.23it/s, training_loss=0.029][A
Epoch 3:   3%|▎         | 1130/40000 [15:19<8:47:12,  1.23it/s, training_loss=0.029][A
Epoch 3:   3%|▎         | 1130/40000 [15:20<8:47:12,  1.23it/s, training_loss=0.040][A
Epoch 3:   3%|▎         | 1131/40000 [15:20<8:47:51,  1.23it/s, training_loss=0.040][A
Epoch 3:   3%|▎         | 1131/40000 [15:21<8:47:51,  1.23it/s, training_loss=0.041][A
Epoch 3:   3%|▎         | 1132/40000 [15:21<8:47:13,  1.23it/s, training_loss=0.041][A
Epoch 3:   3%|▎         | 1132/40000 [15:22<8:47:13,  1.23it/s, training_loss=0.071][A
Epoch 3:   3%|▎         | 1133/4

Epoch 3:   3%|▎         | 1220/40000 [16:33<8:44:26,  1.23it/s, training_loss=0.097][A
Epoch 3:   3%|▎         | 1221/40000 [16:33<8:44:08,  1.23it/s, training_loss=0.097][A
Epoch 3:   3%|▎         | 1221/40000 [16:34<8:44:08,  1.23it/s, training_loss=0.074][A
Epoch 3:   3%|▎         | 1222/40000 [16:34<8:44:02,  1.23it/s, training_loss=0.074][A
Epoch 3:   3%|▎         | 1222/40000 [16:35<8:44:02,  1.23it/s, training_loss=0.029][A
Epoch 3:   3%|▎         | 1223/40000 [16:35<8:44:01,  1.23it/s, training_loss=0.029][A
Epoch 3:   3%|▎         | 1223/40000 [16:36<8:44:01,  1.23it/s, training_loss=0.046][A
Epoch 3:   3%|▎         | 1224/40000 [16:36<8:43:53,  1.23it/s, training_loss=0.046][A
Epoch 3:   3%|▎         | 1224/40000 [16:36<8:43:53,  1.23it/s, training_loss=0.033][A
Epoch 3:   3%|▎         | 1225/40000 [16:36<8:44:27,  1.23it/s, training_loss=0.033][A
Epoch 3:   3%|▎         | 1225/40000 [16:37<8:44:27,  1.23it/s, training_loss=0.050][A
Epoch 3:   3%|▎         | 1226/4

Epoch 3:   3%|▎         | 1313/40000 [17:49<8:47:27,  1.22it/s, training_loss=0.087][A
Epoch 3:   3%|▎         | 1314/40000 [17:49<8:47:20,  1.22it/s, training_loss=0.087][A
Epoch 3:   3%|▎         | 1314/40000 [17:50<8:47:20,  1.22it/s, training_loss=0.141][A
Epoch 3:   3%|▎         | 1315/40000 [17:50<8:46:49,  1.22it/s, training_loss=0.141][A
Epoch 3:   3%|▎         | 1315/40000 [17:50<8:46:49,  1.22it/s, training_loss=0.072][A
Epoch 3:   3%|▎         | 1316/40000 [17:50<8:45:54,  1.23it/s, training_loss=0.072][A
Epoch 3:   3%|▎         | 1316/40000 [17:51<8:45:54,  1.23it/s, training_loss=0.052][A
Epoch 3:   3%|▎         | 1317/40000 [17:51<8:45:23,  1.23it/s, training_loss=0.052][A
Epoch 3:   3%|▎         | 1317/40000 [17:52<8:45:23,  1.23it/s, training_loss=0.050][A
Epoch 3:   3%|▎         | 1318/40000 [17:52<8:45:30,  1.23it/s, training_loss=0.050][A
Epoch 3:   3%|▎         | 1318/40000 [17:53<8:45:30,  1.23it/s, training_loss=0.077][A
Epoch 3:   3%|▎         | 1319/4

Epoch 3:   4%|▎         | 1406/40000 [19:04<8:44:19,  1.23it/s, training_loss=0.087][A
Epoch 3:   4%|▎         | 1407/40000 [19:04<8:43:35,  1.23it/s, training_loss=0.087][A
Epoch 3:   4%|▎         | 1407/40000 [19:05<8:43:35,  1.23it/s, training_loss=0.051][A
Epoch 3:   4%|▎         | 1408/40000 [19:05<8:42:18,  1.23it/s, training_loss=0.051][A
Epoch 3:   4%|▎         | 1408/40000 [19:06<8:42:18,  1.23it/s, training_loss=0.115][A
Epoch 3:   4%|▎         | 1409/40000 [19:06<8:42:15,  1.23it/s, training_loss=0.115][A
Epoch 3:   4%|▎         | 1409/40000 [19:07<8:42:15,  1.23it/s, training_loss=0.064][A
Epoch 3:   4%|▎         | 1410/40000 [19:07<8:42:19,  1.23it/s, training_loss=0.064][A
Epoch 3:   4%|▎         | 1410/40000 [19:08<8:42:19,  1.23it/s, training_loss=0.107][A
Epoch 3:   4%|▎         | 1411/40000 [19:08<8:43:01,  1.23it/s, training_loss=0.107][A
Epoch 3:   4%|▎         | 1411/40000 [19:09<8:43:01,  1.23it/s, training_loss=0.094][A
Epoch 3:   4%|▎         | 1412/4

Epoch 3:   4%|▎         | 1499/40000 [20:20<8:41:30,  1.23it/s, training_loss=0.169][A
Epoch 3:   4%|▍         | 1500/40000 [20:20<8:41:05,  1.23it/s, training_loss=0.169][A
Epoch 3:   4%|▍         | 1500/40000 [20:21<8:41:05,  1.23it/s, training_loss=0.015][A
Epoch 3:   4%|▍         | 1501/40000 [20:21<8:40:17,  1.23it/s, training_loss=0.015][A
Epoch 3:   4%|▍         | 1501/40000 [20:22<8:40:17,  1.23it/s, training_loss=0.074][A
Epoch 3:   4%|▍         | 1502/40000 [20:22<8:41:23,  1.23it/s, training_loss=0.074][A
Epoch 3:   4%|▍         | 1502/40000 [20:23<8:41:23,  1.23it/s, training_loss=0.073][A
Epoch 3:   4%|▍         | 1503/40000 [20:23<8:41:54,  1.23it/s, training_loss=0.073][A
Epoch 3:   4%|▍         | 1503/40000 [20:23<8:41:54,  1.23it/s, training_loss=0.083][A
Epoch 3:   4%|▍         | 1504/40000 [20:23<8:41:44,  1.23it/s, training_loss=0.083][A
Epoch 3:   4%|▍         | 1504/40000 [20:24<8:41:44,  1.23it/s, training_loss=0.040][A
Epoch 3:   4%|▍         | 1505/4

Epoch 3:   4%|▍         | 1592/40000 [21:36<8:40:47,  1.23it/s, training_loss=0.078][A
Epoch 3:   4%|▍         | 1593/40000 [21:36<8:40:45,  1.23it/s, training_loss=0.078][A
Epoch 3:   4%|▍         | 1593/40000 [21:37<8:40:45,  1.23it/s, training_loss=0.083][A
Epoch 3:   4%|▍         | 1594/40000 [21:37<8:39:30,  1.23it/s, training_loss=0.083][A
Epoch 3:   4%|▍         | 1594/40000 [21:37<8:39:30,  1.23it/s, training_loss=0.118][A
Epoch 3:   4%|▍         | 1595/40000 [21:37<8:39:22,  1.23it/s, training_loss=0.118][A
Epoch 3:   4%|▍         | 1595/40000 [21:38<8:39:22,  1.23it/s, training_loss=0.152][A
Epoch 3:   4%|▍         | 1596/40000 [21:38<8:39:47,  1.23it/s, training_loss=0.152][A
Epoch 3:   4%|▍         | 1596/40000 [21:39<8:39:47,  1.23it/s, training_loss=0.147][A
Epoch 3:   4%|▍         | 1597/40000 [21:39<8:39:48,  1.23it/s, training_loss=0.147][A
Epoch 3:   4%|▍         | 1597/40000 [21:40<8:39:48,  1.23it/s, training_loss=0.056][A
Epoch 3:   4%|▍         | 1598/4

Epoch 3:   4%|▍         | 1685/40000 [22:51<8:39:15,  1.23it/s, training_loss=0.065][A
Epoch 3:   4%|▍         | 1686/40000 [22:51<8:39:39,  1.23it/s, training_loss=0.065][A
Epoch 3:   4%|▍         | 1686/40000 [22:52<8:39:39,  1.23it/s, training_loss=0.073][A
Epoch 3:   4%|▍         | 1687/40000 [22:52<8:39:55,  1.23it/s, training_loss=0.073][A
Epoch 3:   4%|▍         | 1687/40000 [22:53<8:39:55,  1.23it/s, training_loss=0.035][A
Epoch 3:   4%|▍         | 1688/40000 [22:53<8:39:00,  1.23it/s, training_loss=0.035][A
Epoch 3:   4%|▍         | 1688/40000 [22:54<8:39:00,  1.23it/s, training_loss=0.114][A
Epoch 3:   4%|▍         | 1689/40000 [22:54<8:39:37,  1.23it/s, training_loss=0.114][A
Epoch 3:   4%|▍         | 1689/40000 [22:55<8:39:37,  1.23it/s, training_loss=0.082][A
Epoch 3:   4%|▍         | 1690/40000 [22:55<8:39:38,  1.23it/s, training_loss=0.082][A
Epoch 3:   4%|▍         | 1690/40000 [22:56<8:39:38,  1.23it/s, training_loss=0.076][A
Epoch 3:   4%|▍         | 1691/4

Epoch 3:   4%|▍         | 1778/40000 [24:07<8:37:31,  1.23it/s, training_loss=0.038][A
Epoch 3:   4%|▍         | 1779/40000 [24:07<8:37:40,  1.23it/s, training_loss=0.038][A
Epoch 3:   4%|▍         | 1779/40000 [24:08<8:37:40,  1.23it/s, training_loss=0.042][A
Epoch 3:   4%|▍         | 1780/40000 [24:08<8:38:01,  1.23it/s, training_loss=0.042][A
Epoch 3:   4%|▍         | 1780/40000 [24:09<8:38:01,  1.23it/s, training_loss=0.100][A
Epoch 3:   4%|▍         | 1781/40000 [24:09<8:38:17,  1.23it/s, training_loss=0.100][A
Epoch 3:   4%|▍         | 1781/40000 [24:10<8:38:17,  1.23it/s, training_loss=0.078][A
Epoch 3:   4%|▍         | 1782/40000 [24:10<8:39:07,  1.23it/s, training_loss=0.078][A
Epoch 3:   4%|▍         | 1782/40000 [24:10<8:39:07,  1.23it/s, training_loss=0.060][A
Epoch 3:   4%|▍         | 1783/40000 [24:10<8:38:01,  1.23it/s, training_loss=0.060][A
Epoch 3:   4%|▍         | 1783/40000 [24:11<8:38:01,  1.23it/s, training_loss=0.074][A
Epoch 3:   4%|▍         | 1784/4

Epoch 3:   5%|▍         | 1871/40000 [25:23<8:36:59,  1.23it/s, training_loss=0.088][A
Epoch 3:   5%|▍         | 1872/40000 [25:23<8:35:46,  1.23it/s, training_loss=0.088][A
Epoch 3:   5%|▍         | 1872/40000 [25:24<8:35:46,  1.23it/s, training_loss=0.068][A
Epoch 3:   5%|▍         | 1873/40000 [25:24<8:36:54,  1.23it/s, training_loss=0.068][A
Epoch 3:   5%|▍         | 1873/40000 [25:24<8:36:54,  1.23it/s, training_loss=0.092][A
Epoch 3:   5%|▍         | 1874/40000 [25:24<8:36:18,  1.23it/s, training_loss=0.092][A
Epoch 3:   5%|▍         | 1874/40000 [25:25<8:36:18,  1.23it/s, training_loss=0.118][A
Epoch 3:   5%|▍         | 1875/40000 [25:25<8:35:55,  1.23it/s, training_loss=0.118][A
Epoch 3:   5%|▍         | 1875/40000 [25:26<8:35:55,  1.23it/s, training_loss=0.090][A
Epoch 3:   5%|▍         | 1876/40000 [25:26<8:36:20,  1.23it/s, training_loss=0.090][A
Epoch 3:   5%|▍         | 1876/40000 [25:27<8:36:20,  1.23it/s, training_loss=0.053][A
Epoch 3:   5%|▍         | 1877/4

Epoch 3:   5%|▍         | 1964/40000 [26:38<8:34:50,  1.23it/s, training_loss=0.161][A
Epoch 3:   5%|▍         | 1965/40000 [26:38<8:35:35,  1.23it/s, training_loss=0.161][A
Epoch 3:   5%|▍         | 1965/40000 [26:39<8:35:35,  1.23it/s, training_loss=0.163][A
Epoch 3:   5%|▍         | 1966/40000 [26:39<8:35:52,  1.23it/s, training_loss=0.163][A
Epoch 3:   5%|▍         | 1966/40000 [26:40<8:35:52,  1.23it/s, training_loss=0.045][A
Epoch 3:   5%|▍         | 1967/40000 [26:40<8:35:56,  1.23it/s, training_loss=0.045][A
Epoch 3:   5%|▍         | 1967/40000 [26:41<8:35:56,  1.23it/s, training_loss=0.097][A
Epoch 3:   5%|▍         | 1968/40000 [26:41<8:36:16,  1.23it/s, training_loss=0.097][A
Epoch 3:   5%|▍         | 1968/40000 [26:42<8:36:16,  1.23it/s, training_loss=0.059][A
Epoch 3:   5%|▍         | 1969/40000 [26:42<8:36:09,  1.23it/s, training_loss=0.059][A
Epoch 3:   5%|▍         | 1969/40000 [26:42<8:36:09,  1.23it/s, training_loss=0.058][A
Epoch 3:   5%|▍         | 1970/4

Epoch 3:   5%|▌         | 2057/40000 [27:54<8:33:03,  1.23it/s, training_loss=0.035][A
Epoch 3:   5%|▌         | 2058/40000 [27:54<8:33:48,  1.23it/s, training_loss=0.035][A
Epoch 3:   5%|▌         | 2058/40000 [27:55<8:33:48,  1.23it/s, training_loss=0.110][A
Epoch 3:   5%|▌         | 2059/40000 [27:55<8:33:06,  1.23it/s, training_loss=0.110][A
Epoch 3:   5%|▌         | 2059/40000 [27:56<8:33:06,  1.23it/s, training_loss=0.162][A
Epoch 3:   5%|▌         | 2060/40000 [27:56<8:34:00,  1.23it/s, training_loss=0.162][A
Epoch 3:   5%|▌         | 2060/40000 [27:56<8:34:00,  1.23it/s, training_loss=0.026][A
Epoch 3:   5%|▌         | 2061/40000 [27:56<8:33:53,  1.23it/s, training_loss=0.026][A
Epoch 3:   5%|▌         | 2061/40000 [27:57<8:33:53,  1.23it/s, training_loss=0.052][A
Epoch 3:   5%|▌         | 2062/40000 [27:57<8:33:25,  1.23it/s, training_loss=0.052][A
Epoch 3:   5%|▌         | 2062/40000 [27:58<8:33:25,  1.23it/s, training_loss=0.035][A
Epoch 3:   5%|▌         | 2063/4

Epoch 3:   5%|▌         | 2150/40000 [29:10<8:33:02,  1.23it/s, training_loss=0.066][A
Epoch 3:   5%|▌         | 2151/40000 [29:10<8:32:48,  1.23it/s, training_loss=0.066][A
Epoch 3:   5%|▌         | 2151/40000 [29:10<8:32:48,  1.23it/s, training_loss=0.060][A
Epoch 3:   5%|▌         | 2152/40000 [29:10<8:32:27,  1.23it/s, training_loss=0.060][A
Epoch 3:   5%|▌         | 2152/40000 [29:11<8:32:27,  1.23it/s, training_loss=0.075][A
Epoch 3:   5%|▌         | 2153/40000 [29:11<8:32:28,  1.23it/s, training_loss=0.075][A
Epoch 3:   5%|▌         | 2153/40000 [29:12<8:32:28,  1.23it/s, training_loss=0.105][A
Epoch 3:   5%|▌         | 2154/40000 [29:12<8:32:10,  1.23it/s, training_loss=0.105][A
Epoch 3:   5%|▌         | 2154/40000 [29:13<8:32:10,  1.23it/s, training_loss=0.096][A
Epoch 3:   5%|▌         | 2155/40000 [29:13<8:32:33,  1.23it/s, training_loss=0.096][A
Epoch 3:   5%|▌         | 2155/40000 [29:14<8:32:33,  1.23it/s, training_loss=0.079][A
Epoch 3:   5%|▌         | 2156/4

Epoch 3:   6%|▌         | 2243/40000 [30:25<8:31:20,  1.23it/s, training_loss=0.040][A
Epoch 3:   6%|▌         | 2244/40000 [30:25<8:31:43,  1.23it/s, training_loss=0.040][A
Epoch 3:   6%|▌         | 2244/40000 [30:26<8:31:43,  1.23it/s, training_loss=0.134][A
Epoch 3:   6%|▌         | 2245/40000 [30:26<8:32:29,  1.23it/s, training_loss=0.134][A
Epoch 3:   6%|▌         | 2245/40000 [30:27<8:32:29,  1.23it/s, training_loss=0.033][A
Epoch 3:   6%|▌         | 2246/40000 [30:27<8:31:44,  1.23it/s, training_loss=0.033][A
Epoch 3:   6%|▌         | 2246/40000 [30:28<8:31:44,  1.23it/s, training_loss=0.151][A
Epoch 3:   6%|▌         | 2247/40000 [30:28<8:32:10,  1.23it/s, training_loss=0.151][A
Epoch 3:   6%|▌         | 2247/40000 [30:28<8:32:10,  1.23it/s, training_loss=0.082][A
Epoch 3:   6%|▌         | 2248/40000 [30:28<8:32:09,  1.23it/s, training_loss=0.082][A
Epoch 3:   6%|▌         | 2248/40000 [30:29<8:32:09,  1.23it/s, training_loss=0.087][A
Epoch 3:   6%|▌         | 2249/4

Epoch 3:   6%|▌         | 2336/40000 [31:41<8:30:05,  1.23it/s, training_loss=0.036][A
Epoch 3:   6%|▌         | 2337/40000 [31:41<8:30:44,  1.23it/s, training_loss=0.036][A
Epoch 3:   6%|▌         | 2337/40000 [31:42<8:30:44,  1.23it/s, training_loss=0.055][A
Epoch 3:   6%|▌         | 2338/40000 [31:42<8:29:54,  1.23it/s, training_loss=0.055][A
Epoch 3:   6%|▌         | 2338/40000 [31:42<8:29:54,  1.23it/s, training_loss=0.125][A
Epoch 3:   6%|▌         | 2339/40000 [31:42<8:29:50,  1.23it/s, training_loss=0.125][A
Epoch 3:   6%|▌         | 2339/40000 [31:43<8:29:50,  1.23it/s, training_loss=0.050][A
Epoch 3:   6%|▌         | 2340/40000 [31:43<8:30:07,  1.23it/s, training_loss=0.050][A
Epoch 3:   6%|▌         | 2340/40000 [31:44<8:30:07,  1.23it/s, training_loss=0.107][A
Epoch 3:   6%|▌         | 2341/40000 [31:44<8:30:17,  1.23it/s, training_loss=0.107][A
Epoch 3:   6%|▌         | 2341/40000 [31:45<8:30:17,  1.23it/s, training_loss=0.138][A
Epoch 3:   6%|▌         | 2342/4

Epoch 3:   6%|▌         | 2429/40000 [32:56<8:28:01,  1.23it/s, training_loss=0.106][A
Epoch 3:   6%|▌         | 2430/40000 [32:56<8:28:56,  1.23it/s, training_loss=0.106][A
Epoch 3:   6%|▌         | 2430/40000 [32:57<8:28:56,  1.23it/s, training_loss=0.128][A
Epoch 3:   6%|▌         | 2431/40000 [32:57<8:28:31,  1.23it/s, training_loss=0.128][A
Epoch 3:   6%|▌         | 2431/40000 [32:58<8:28:31,  1.23it/s, training_loss=0.071][A
Epoch 3:   6%|▌         | 2432/40000 [32:58<8:28:47,  1.23it/s, training_loss=0.071][A
Epoch 3:   6%|▌         | 2432/40000 [32:59<8:28:47,  1.23it/s, training_loss=0.113][A
Epoch 3:   6%|▌         | 2433/40000 [32:59<8:28:33,  1.23it/s, training_loss=0.113][A
Epoch 3:   6%|▌         | 2433/40000 [33:00<8:28:33,  1.23it/s, training_loss=0.069][A
Epoch 3:   6%|▌         | 2434/40000 [33:00<8:28:11,  1.23it/s, training_loss=0.069][A
Epoch 3:   6%|▌         | 2434/40000 [33:00<8:28:11,  1.23it/s, training_loss=0.101][A
Epoch 3:   6%|▌         | 2435/4

Epoch 3:   6%|▋         | 2522/40000 [34:12<8:27:21,  1.23it/s, training_loss=0.072][A
Epoch 3:   6%|▋         | 2523/40000 [34:12<8:27:16,  1.23it/s, training_loss=0.072][A
Epoch 3:   6%|▋         | 2523/40000 [34:13<8:27:16,  1.23it/s, training_loss=0.025][A
Epoch 3:   6%|▋         | 2524/40000 [34:13<8:26:48,  1.23it/s, training_loss=0.025][A
Epoch 3:   6%|▋         | 2524/40000 [34:14<8:26:48,  1.23it/s, training_loss=0.066][A
Epoch 3:   6%|▋         | 2525/40000 [34:14<8:28:19,  1.23it/s, training_loss=0.066][A
Epoch 3:   6%|▋         | 2525/40000 [34:14<8:28:19,  1.23it/s, training_loss=0.098][A
Epoch 3:   6%|▋         | 2526/40000 [34:14<8:28:37,  1.23it/s, training_loss=0.098][A
Epoch 3:   6%|▋         | 2526/40000 [34:15<8:28:37,  1.23it/s, training_loss=0.062][A
Epoch 3:   6%|▋         | 2527/40000 [34:15<8:28:14,  1.23it/s, training_loss=0.062][A
Epoch 3:   6%|▋         | 2527/40000 [34:16<8:28:14,  1.23it/s, training_loss=0.070][A
Epoch 3:   6%|▋         | 2528/4

Epoch 3:   7%|▋         | 2615/40000 [35:27<8:25:36,  1.23it/s, training_loss=0.090][A
Epoch 3:   7%|▋         | 2616/40000 [35:27<8:25:47,  1.23it/s, training_loss=0.090][A
Epoch 3:   7%|▋         | 2616/40000 [35:28<8:25:47,  1.23it/s, training_loss=0.042][A
Epoch 3:   7%|▋         | 2617/40000 [35:28<8:25:25,  1.23it/s, training_loss=0.042][A
Epoch 3:   7%|▋         | 2617/40000 [35:29<8:25:25,  1.23it/s, training_loss=0.046][A
Epoch 3:   7%|▋         | 2618/40000 [35:29<8:24:56,  1.23it/s, training_loss=0.046][A
Epoch 3:   7%|▋         | 2618/40000 [35:30<8:24:56,  1.23it/s, training_loss=0.038][A
Epoch 3:   7%|▋         | 2619/40000 [35:30<8:25:19,  1.23it/s, training_loss=0.038][A
Epoch 3:   7%|▋         | 2619/40000 [35:31<8:25:19,  1.23it/s, training_loss=0.037][A
Epoch 3:   7%|▋         | 2620/40000 [35:31<8:25:07,  1.23it/s, training_loss=0.037][A
Epoch 3:   7%|▋         | 2620/40000 [35:32<8:25:07,  1.23it/s, training_loss=0.054][A
Epoch 3:   7%|▋         | 2621/4

Epoch 3:   7%|▋         | 2708/40000 [36:43<8:24:33,  1.23it/s, training_loss=0.082][A
Epoch 3:   7%|▋         | 2709/40000 [36:43<8:25:26,  1.23it/s, training_loss=0.082][A
Epoch 3:   7%|▋         | 2709/40000 [36:44<8:25:26,  1.23it/s, training_loss=0.073][A
Epoch 3:   7%|▋         | 2710/40000 [36:44<8:25:15,  1.23it/s, training_loss=0.073][A
Epoch 3:   7%|▋         | 2710/40000 [36:45<8:25:15,  1.23it/s, training_loss=0.059][A
Epoch 3:   7%|▋         | 2711/40000 [36:45<8:25:05,  1.23it/s, training_loss=0.059][A
Epoch 3:   7%|▋         | 2711/40000 [36:45<8:25:05,  1.23it/s, training_loss=0.052][A
Epoch 3:   7%|▋         | 2712/40000 [36:45<8:24:52,  1.23it/s, training_loss=0.052][A
Epoch 3:   7%|▋         | 2712/40000 [36:46<8:24:52,  1.23it/s, training_loss=0.073][A
Epoch 3:   7%|▋         | 2713/40000 [36:46<8:24:23,  1.23it/s, training_loss=0.073][A
Epoch 3:   7%|▋         | 2713/40000 [36:47<8:24:23,  1.23it/s, training_loss=0.109][A
Epoch 3:   7%|▋         | 2714/4

Epoch 3:   7%|▋         | 2801/40000 [37:59<8:23:46,  1.23it/s, training_loss=0.057][A
Epoch 3:   7%|▋         | 2802/40000 [37:59<8:23:56,  1.23it/s, training_loss=0.057][A
Epoch 3:   7%|▋         | 2802/40000 [37:59<8:23:56,  1.23it/s, training_loss=0.115][A
Epoch 3:   7%|▋         | 2803/40000 [37:59<8:24:16,  1.23it/s, training_loss=0.115][A
Epoch 3:   7%|▋         | 2803/40000 [38:00<8:24:16,  1.23it/s, training_loss=0.083][A
Epoch 3:   7%|▋         | 2804/40000 [38:00<8:23:07,  1.23it/s, training_loss=0.083][A
Epoch 3:   7%|▋         | 2804/40000 [38:01<8:23:07,  1.23it/s, training_loss=0.102][A
Epoch 3:   7%|▋         | 2805/40000 [38:01<8:22:46,  1.23it/s, training_loss=0.102][A
Epoch 3:   7%|▋         | 2805/40000 [38:02<8:22:46,  1.23it/s, training_loss=0.076][A
Epoch 3:   7%|▋         | 2806/40000 [38:02<8:23:04,  1.23it/s, training_loss=0.076][A
Epoch 3:   7%|▋         | 2806/40000 [38:03<8:23:04,  1.23it/s, training_loss=0.105][A
Epoch 3:   7%|▋         | 2807/4

Epoch 3:   7%|▋         | 2894/40000 [39:14<8:22:22,  1.23it/s, training_loss=0.034][A
Epoch 3:   7%|▋         | 2895/40000 [39:14<8:22:09,  1.23it/s, training_loss=0.034][A
Epoch 3:   7%|▋         | 2895/40000 [39:15<8:22:09,  1.23it/s, training_loss=0.034][A
Epoch 3:   7%|▋         | 2896/40000 [39:15<8:22:11,  1.23it/s, training_loss=0.034][A
Epoch 3:   7%|▋         | 2896/40000 [39:16<8:22:11,  1.23it/s, training_loss=0.064][A
Epoch 3:   7%|▋         | 2897/40000 [39:16<8:22:32,  1.23it/s, training_loss=0.064][A
Epoch 3:   7%|▋         | 2897/40000 [39:17<8:22:32,  1.23it/s, training_loss=0.071][A
Epoch 3:   7%|▋         | 2898/40000 [39:17<8:22:34,  1.23it/s, training_loss=0.071][A
Epoch 3:   7%|▋         | 2898/40000 [39:17<8:22:34,  1.23it/s, training_loss=0.069][A
Epoch 3:   7%|▋         | 2899/40000 [39:17<8:22:09,  1.23it/s, training_loss=0.069][A
Epoch 3:   7%|▋         | 2899/40000 [39:18<8:22:09,  1.23it/s, training_loss=0.103][A
Epoch 3:   7%|▋         | 2900/4

Epoch 3:   7%|▋         | 2987/40000 [40:30<8:20:31,  1.23it/s, training_loss=0.067][A
Epoch 3:   7%|▋         | 2988/40000 [40:30<8:21:25,  1.23it/s, training_loss=0.067][A
Epoch 3:   7%|▋         | 2988/40000 [40:31<8:21:25,  1.23it/s, training_loss=0.035][A
Epoch 3:   7%|▋         | 2989/40000 [40:31<8:20:49,  1.23it/s, training_loss=0.035][A
Epoch 3:   7%|▋         | 2989/40000 [40:31<8:20:49,  1.23it/s, training_loss=0.102][A
Epoch 3:   7%|▋         | 2990/40000 [40:31<8:21:18,  1.23it/s, training_loss=0.102][A
Epoch 3:   7%|▋         | 2990/40000 [40:32<8:21:18,  1.23it/s, training_loss=0.121][A
Epoch 3:   7%|▋         | 2991/40000 [40:32<8:20:50,  1.23it/s, training_loss=0.121][A
Epoch 3:   7%|▋         | 2991/40000 [40:33<8:20:50,  1.23it/s, training_loss=0.056][A
Epoch 3:   7%|▋         | 2992/40000 [40:33<8:20:35,  1.23it/s, training_loss=0.056][A
Epoch 3:   7%|▋         | 2992/40000 [40:34<8:20:35,  1.23it/s, training_loss=0.114][A
Epoch 3:   7%|▋         | 2993/4

Epoch 3:   8%|▊         | 3080/40000 [41:45<8:21:41,  1.23it/s, training_loss=0.079][A
Epoch 3:   8%|▊         | 3081/40000 [41:45<8:21:31,  1.23it/s, training_loss=0.079][A
Epoch 3:   8%|▊         | 3081/40000 [41:46<8:21:31,  1.23it/s, training_loss=0.038][A
Epoch 3:   8%|▊         | 3082/40000 [41:46<8:21:44,  1.23it/s, training_loss=0.038][A
Epoch 3:   8%|▊         | 3082/40000 [41:47<8:21:44,  1.23it/s, training_loss=0.106][A
Epoch 3:   8%|▊         | 3083/40000 [41:47<8:20:51,  1.23it/s, training_loss=0.106][A
Epoch 3:   8%|▊         | 3083/40000 [41:48<8:20:51,  1.23it/s, training_loss=0.101][A
Epoch 3:   8%|▊         | 3084/40000 [41:48<8:20:26,  1.23it/s, training_loss=0.101][A
Epoch 3:   8%|▊         | 3084/40000 [41:49<8:20:26,  1.23it/s, training_loss=0.041][A
Epoch 3:   8%|▊         | 3085/40000 [41:49<8:20:04,  1.23it/s, training_loss=0.041][A
Epoch 3:   8%|▊         | 3085/40000 [41:49<8:20:04,  1.23it/s, training_loss=0.068][A
Epoch 3:   8%|▊         | 3086/4

Epoch 3:   8%|▊         | 3173/40000 [43:01<8:18:23,  1.23it/s, training_loss=0.071][A
Epoch 3:   8%|▊         | 3174/40000 [43:01<8:18:49,  1.23it/s, training_loss=0.071][A
Epoch 3:   8%|▊         | 3174/40000 [43:02<8:18:49,  1.23it/s, training_loss=0.042][A
Epoch 3:   8%|▊         | 3175/40000 [43:02<8:18:46,  1.23it/s, training_loss=0.042][A
Epoch 3:   8%|▊         | 3175/40000 [43:03<8:18:46,  1.23it/s, training_loss=0.056][A
Epoch 3:   8%|▊         | 3176/40000 [43:03<8:18:43,  1.23it/s, training_loss=0.056][A
Epoch 3:   8%|▊         | 3176/40000 [43:03<8:18:43,  1.23it/s, training_loss=0.128][A
Epoch 3:   8%|▊         | 3177/40000 [43:03<8:19:15,  1.23it/s, training_loss=0.128][A
Epoch 3:   8%|▊         | 3177/40000 [43:04<8:19:15,  1.23it/s, training_loss=0.068][A
Epoch 3:   8%|▊         | 3178/40000 [43:04<8:18:48,  1.23it/s, training_loss=0.068][A
Epoch 3:   8%|▊         | 3178/40000 [43:05<8:18:48,  1.23it/s, training_loss=0.069][A
Epoch 3:   8%|▊         | 3179/4

Epoch 3:   8%|▊         | 3266/40000 [44:17<8:18:29,  1.23it/s, training_loss=0.139][A
Epoch 3:   8%|▊         | 3267/40000 [44:17<8:18:13,  1.23it/s, training_loss=0.139][A
Epoch 3:   8%|▊         | 3267/40000 [44:17<8:18:13,  1.23it/s, training_loss=0.074][A
Epoch 3:   8%|▊         | 3268/40000 [44:17<8:17:55,  1.23it/s, training_loss=0.074][A
Epoch 3:   8%|▊         | 3268/40000 [44:18<8:17:55,  1.23it/s, training_loss=0.036][A
Epoch 3:   8%|▊         | 3269/40000 [44:18<8:16:38,  1.23it/s, training_loss=0.036][A
Epoch 3:   8%|▊         | 3269/40000 [44:19<8:16:38,  1.23it/s, training_loss=0.040][A
Epoch 3:   8%|▊         | 3270/40000 [44:19<8:18:08,  1.23it/s, training_loss=0.040][A
Epoch 3:   8%|▊         | 3270/40000 [44:20<8:18:08,  1.23it/s, training_loss=0.138][A
Epoch 3:   8%|▊         | 3271/40000 [44:20<8:17:41,  1.23it/s, training_loss=0.138][A
Epoch 3:   8%|▊         | 3271/40000 [44:21<8:17:41,  1.23it/s, training_loss=0.098][A
Epoch 3:   8%|▊         | 3272/4

Epoch 3:   8%|▊         | 3359/40000 [45:32<8:14:52,  1.23it/s, training_loss=0.061][A
Epoch 3:   8%|▊         | 3360/40000 [45:32<8:14:35,  1.23it/s, training_loss=0.061][A
Epoch 3:   8%|▊         | 3360/40000 [45:33<8:14:35,  1.23it/s, training_loss=0.086][A
Epoch 3:   8%|▊         | 3361/40000 [45:33<8:14:33,  1.23it/s, training_loss=0.086][A
Epoch 3:   8%|▊         | 3361/40000 [45:34<8:14:33,  1.23it/s, training_loss=0.067][A
Epoch 3:   8%|▊         | 3362/40000 [45:34<8:15:44,  1.23it/s, training_loss=0.067][A
Epoch 3:   8%|▊         | 3362/40000 [45:35<8:15:44,  1.23it/s, training_loss=0.035][A
Epoch 3:   8%|▊         | 3363/40000 [45:35<8:15:42,  1.23it/s, training_loss=0.035][A
Epoch 3:   8%|▊         | 3363/40000 [45:35<8:15:42,  1.23it/s, training_loss=0.118][A
Epoch 3:   8%|▊         | 3364/40000 [45:35<8:15:47,  1.23it/s, training_loss=0.118][A
Epoch 3:   8%|▊         | 3364/40000 [45:36<8:15:47,  1.23it/s, training_loss=0.065][A
Epoch 3:   8%|▊         | 3365/4

Epoch 3:   9%|▊         | 3452/40000 [46:48<8:13:42,  1.23it/s, training_loss=0.055][A
Epoch 3:   9%|▊         | 3453/40000 [46:48<8:14:51,  1.23it/s, training_loss=0.055][A
Epoch 3:   9%|▊         | 3453/40000 [46:49<8:14:51,  1.23it/s, training_loss=0.087][A
Epoch 3:   9%|▊         | 3454/40000 [46:49<8:15:24,  1.23it/s, training_loss=0.087][A
Epoch 3:   9%|▊         | 3454/40000 [46:49<8:15:24,  1.23it/s, training_loss=0.056][A
Epoch 3:   9%|▊         | 3455/40000 [46:49<8:14:52,  1.23it/s, training_loss=0.056][A
Epoch 3:   9%|▊         | 3455/40000 [46:50<8:14:52,  1.23it/s, training_loss=0.056][A
Epoch 3:   9%|▊         | 3456/40000 [46:50<8:14:41,  1.23it/s, training_loss=0.056][A
Epoch 3:   9%|▊         | 3456/40000 [46:51<8:14:41,  1.23it/s, training_loss=0.059][A
Epoch 3:   9%|▊         | 3457/40000 [46:51<8:14:46,  1.23it/s, training_loss=0.059][A
Epoch 3:   9%|▊         | 3457/40000 [46:52<8:14:46,  1.23it/s, training_loss=0.063][A
Epoch 3:   9%|▊         | 3458/4

Epoch 3:   9%|▉         | 3545/40000 [48:03<8:13:58,  1.23it/s, training_loss=0.075][A
Epoch 3:   9%|▉         | 3546/40000 [48:03<8:14:13,  1.23it/s, training_loss=0.075][A
Epoch 3:   9%|▉         | 3546/40000 [48:04<8:14:13,  1.23it/s, training_loss=0.042][A
Epoch 3:   9%|▉         | 3547/40000 [48:04<8:14:35,  1.23it/s, training_loss=0.042][A
Epoch 3:   9%|▉         | 3547/40000 [48:05<8:14:35,  1.23it/s, training_loss=0.066][A
Epoch 3:   9%|▉         | 3548/40000 [48:05<8:14:51,  1.23it/s, training_loss=0.066][A
Epoch 3:   9%|▉         | 3548/40000 [48:06<8:14:51,  1.23it/s, training_loss=0.036][A
Epoch 3:   9%|▉         | 3549/40000 [48:06<8:13:28,  1.23it/s, training_loss=0.036][A
Epoch 3:   9%|▉         | 3549/40000 [48:07<8:13:28,  1.23it/s, training_loss=0.106][A
Epoch 3:   9%|▉         | 3550/40000 [48:07<8:13:20,  1.23it/s, training_loss=0.106][A
Epoch 3:   9%|▉         | 3550/40000 [48:07<8:13:20,  1.23it/s, training_loss=0.050][A
Epoch 3:   9%|▉         | 3551/4

Epoch 3:   9%|▉         | 3638/40000 [49:19<8:12:56,  1.23it/s, training_loss=0.058][A
Epoch 3:   9%|▉         | 3639/40000 [49:19<8:12:27,  1.23it/s, training_loss=0.058][A
Epoch 3:   9%|▉         | 3639/40000 [49:20<8:12:27,  1.23it/s, training_loss=0.116][A
Epoch 3:   9%|▉         | 3640/40000 [49:20<8:13:03,  1.23it/s, training_loss=0.116][A
Epoch 3:   9%|▉         | 3640/40000 [49:21<8:13:03,  1.23it/s, training_loss=0.047][A
Epoch 3:   9%|▉         | 3641/40000 [49:21<8:12:32,  1.23it/s, training_loss=0.047][A
Epoch 3:   9%|▉         | 3641/40000 [49:21<8:12:32,  1.23it/s, training_loss=0.066][A
Epoch 3:   9%|▉         | 3642/40000 [49:21<8:12:03,  1.23it/s, training_loss=0.066][A
Epoch 3:   9%|▉         | 3642/40000 [49:22<8:12:03,  1.23it/s, training_loss=0.099][A
Epoch 3:   9%|▉         | 3643/40000 [49:22<8:11:53,  1.23it/s, training_loss=0.099][A
Epoch 3:   9%|▉         | 3643/40000 [49:23<8:11:53,  1.23it/s, training_loss=0.053][A
Epoch 3:   9%|▉         | 3644/4

Epoch 3:   9%|▉         | 3731/40000 [50:35<8:11:36,  1.23it/s, training_loss=0.122][A
Epoch 3:   9%|▉         | 3732/40000 [50:35<8:10:58,  1.23it/s, training_loss=0.122][A
Epoch 3:   9%|▉         | 3732/40000 [50:35<8:10:58,  1.23it/s, training_loss=0.122][A
Epoch 3:   9%|▉         | 3733/40000 [50:35<8:11:57,  1.23it/s, training_loss=0.122][A
Epoch 3:   9%|▉         | 3733/40000 [50:36<8:11:57,  1.23it/s, training_loss=0.148][A
Epoch 3:   9%|▉         | 3734/40000 [50:36<8:12:18,  1.23it/s, training_loss=0.148][A
Epoch 3:   9%|▉         | 3734/40000 [50:37<8:12:18,  1.23it/s, training_loss=0.108][A
Epoch 3:   9%|▉         | 3735/40000 [50:37<8:12:30,  1.23it/s, training_loss=0.108][A
Epoch 3:   9%|▉         | 3735/40000 [50:38<8:12:30,  1.23it/s, training_loss=0.088][A
Epoch 3:   9%|▉         | 3736/40000 [50:38<8:12:12,  1.23it/s, training_loss=0.088][A
Epoch 3:   9%|▉         | 3736/40000 [50:39<8:12:12,  1.23it/s, training_loss=0.036][A
Epoch 3:   9%|▉         | 3737/4

Epoch 3:  10%|▉         | 3824/40000 [51:50<8:09:37,  1.23it/s, training_loss=0.069][A
Epoch 3:  10%|▉         | 3825/40000 [51:50<8:10:30,  1.23it/s, training_loss=0.069][A
Epoch 3:  10%|▉         | 3825/40000 [51:51<8:10:30,  1.23it/s, training_loss=0.107][A
Epoch 3:  10%|▉         | 3826/40000 [51:51<8:09:17,  1.23it/s, training_loss=0.107][A
Epoch 3:  10%|▉         | 3826/40000 [51:52<8:09:17,  1.23it/s, training_loss=0.094][A
Epoch 3:  10%|▉         | 3827/40000 [51:52<8:09:41,  1.23it/s, training_loss=0.094][A
Epoch 3:  10%|▉         | 3827/40000 [51:53<8:09:41,  1.23it/s, training_loss=0.069][A
Epoch 3:  10%|▉         | 3828/40000 [51:53<8:09:57,  1.23it/s, training_loss=0.069][A
Epoch 3:  10%|▉         | 3828/40000 [51:53<8:09:57,  1.23it/s, training_loss=0.087][A
Epoch 3:  10%|▉         | 3829/40000 [51:53<8:09:43,  1.23it/s, training_loss=0.087][A
Epoch 3:  10%|▉         | 3829/40000 [51:54<8:09:43,  1.23it/s, training_loss=0.083][A
Epoch 3:  10%|▉         | 3830/4

Epoch 3:  10%|▉         | 3917/40000 [53:06<8:08:40,  1.23it/s, training_loss=0.160][A
Epoch 3:  10%|▉         | 3918/40000 [53:06<8:09:03,  1.23it/s, training_loss=0.160][A
Epoch 3:  10%|▉         | 3918/40000 [53:07<8:09:03,  1.23it/s, training_loss=0.112][A
Epoch 3:  10%|▉         | 3919/40000 [53:07<8:09:11,  1.23it/s, training_loss=0.112][A
Epoch 3:  10%|▉         | 3919/40000 [53:07<8:09:11,  1.23it/s, training_loss=0.054][A
Epoch 3:  10%|▉         | 3920/40000 [53:07<8:09:34,  1.23it/s, training_loss=0.054][A
Epoch 3:  10%|▉         | 3920/40000 [53:08<8:09:34,  1.23it/s, training_loss=0.079][A
Epoch 3:  10%|▉         | 3921/40000 [53:08<8:09:07,  1.23it/s, training_loss=0.079][A
Epoch 3:  10%|▉         | 3921/40000 [53:09<8:09:07,  1.23it/s, training_loss=0.098][A
Epoch 3:  10%|▉         | 3922/40000 [53:09<8:08:59,  1.23it/s, training_loss=0.098][A
Epoch 3:  10%|▉         | 3922/40000 [53:10<8:08:59,  1.23it/s, training_loss=0.098][A
Epoch 3:  10%|▉         | 3923/4

Epoch 3:  10%|█         | 4010/40000 [54:21<8:07:56,  1.23it/s, training_loss=0.054][A
Epoch 3:  10%|█         | 4011/40000 [54:21<8:08:38,  1.23it/s, training_loss=0.054][A
Epoch 3:  10%|█         | 4011/40000 [54:22<8:08:38,  1.23it/s, training_loss=0.068][A
Epoch 3:  10%|█         | 4012/40000 [54:22<8:08:01,  1.23it/s, training_loss=0.068][A
Epoch 3:  10%|█         | 4012/40000 [54:23<8:08:01,  1.23it/s, training_loss=0.139][A
Epoch 3:  10%|█         | 4013/40000 [54:23<8:07:24,  1.23it/s, training_loss=0.139][A
Epoch 3:  10%|█         | 4013/40000 [54:24<8:07:24,  1.23it/s, training_loss=0.098][A
Epoch 3:  10%|█         | 4014/40000 [54:24<8:06:41,  1.23it/s, training_loss=0.098][A
Epoch 3:  10%|█         | 4014/40000 [54:25<8:06:41,  1.23it/s, training_loss=0.095][A
Epoch 3:  10%|█         | 4015/40000 [54:25<8:07:15,  1.23it/s, training_loss=0.095][A
Epoch 3:  10%|█         | 4015/40000 [54:25<8:07:15,  1.23it/s, training_loss=0.076][A
Epoch 3:  10%|█         | 4016/4

Epoch 3:  10%|█         | 4103/40000 [55:37<8:05:24,  1.23it/s, training_loss=0.145][A
Epoch 3:  10%|█         | 4104/40000 [55:37<8:06:07,  1.23it/s, training_loss=0.145][A
Epoch 3:  10%|█         | 4104/40000 [55:38<8:06:07,  1.23it/s, training_loss=0.029][A
Epoch 3:  10%|█         | 4105/40000 [55:38<8:06:14,  1.23it/s, training_loss=0.029][A
Epoch 3:  10%|█         | 4105/40000 [55:39<8:06:14,  1.23it/s, training_loss=0.125][A
Epoch 3:  10%|█         | 4106/40000 [55:39<8:06:20,  1.23it/s, training_loss=0.125][A
Epoch 3:  10%|█         | 4106/40000 [55:39<8:06:20,  1.23it/s, training_loss=0.205][A
Epoch 3:  10%|█         | 4107/40000 [55:39<8:06:11,  1.23it/s, training_loss=0.205][A
Epoch 3:  10%|█         | 4107/40000 [55:40<8:06:11,  1.23it/s, training_loss=0.054][A
Epoch 3:  10%|█         | 4108/40000 [55:40<8:06:43,  1.23it/s, training_loss=0.054][A
Epoch 3:  10%|█         | 4108/40000 [55:41<8:06:43,  1.23it/s, training_loss=0.092][A
Epoch 3:  10%|█         | 4109/4

Epoch 3:  10%|█         | 4196/40000 [56:53<8:06:04,  1.23it/s, training_loss=0.123][A
Epoch 3:  10%|█         | 4197/40000 [56:53<8:06:02,  1.23it/s, training_loss=0.123][A
Epoch 3:  10%|█         | 4197/40000 [56:53<8:06:02,  1.23it/s, training_loss=0.167][A
Epoch 3:  10%|█         | 4198/40000 [56:53<8:05:34,  1.23it/s, training_loss=0.167][A
Epoch 3:  10%|█         | 4198/40000 [56:54<8:05:34,  1.23it/s, training_loss=0.058][A
Epoch 3:  10%|█         | 4199/40000 [56:54<8:06:18,  1.23it/s, training_loss=0.058][A
Epoch 3:  10%|█         | 4199/40000 [56:55<8:06:18,  1.23it/s, training_loss=0.050][A
Epoch 3:  10%|█         | 4200/40000 [56:55<8:06:06,  1.23it/s, training_loss=0.050][A
Epoch 3:  10%|█         | 4200/40000 [56:56<8:06:06,  1.23it/s, training_loss=0.153][A
Epoch 3:  11%|█         | 4201/40000 [56:56<8:05:55,  1.23it/s, training_loss=0.153][A
Epoch 3:  11%|█         | 4201/40000 [56:57<8:05:55,  1.23it/s, training_loss=0.054][A
Epoch 3:  11%|█         | 4202/4

Epoch 3:  11%|█         | 4289/40000 [58:08<8:04:47,  1.23it/s, training_loss=0.090][A
Epoch 3:  11%|█         | 4290/40000 [58:08<8:04:31,  1.23it/s, training_loss=0.090][A
Epoch 3:  11%|█         | 4290/40000 [58:09<8:04:31,  1.23it/s, training_loss=0.034][A
Epoch 3:  11%|█         | 4291/40000 [58:09<8:04:45,  1.23it/s, training_loss=0.034][A
Epoch 3:  11%|█         | 4291/40000 [58:10<8:04:45,  1.23it/s, training_loss=0.149][A
Epoch 3:  11%|█         | 4292/40000 [58:10<8:04:47,  1.23it/s, training_loss=0.149][A
Epoch 3:  11%|█         | 4292/40000 [58:11<8:04:47,  1.23it/s, training_loss=0.077][A
Epoch 3:  11%|█         | 4293/40000 [58:11<8:04:10,  1.23it/s, training_loss=0.077][A
Epoch 3:  11%|█         | 4293/40000 [58:11<8:04:10,  1.23it/s, training_loss=0.060][A
Epoch 3:  11%|█         | 4294/40000 [58:11<8:04:11,  1.23it/s, training_loss=0.060][A
Epoch 3:  11%|█         | 4294/40000 [58:12<8:04:11,  1.23it/s, training_loss=0.030][A
Epoch 3:  11%|█         | 4295/4

Epoch 3:  11%|█         | 4382/40000 [59:24<8:02:41,  1.23it/s, training_loss=0.068][A
Epoch 3:  11%|█         | 4383/40000 [59:24<8:01:49,  1.23it/s, training_loss=0.068][A
Epoch 3:  11%|█         | 4383/40000 [59:25<8:01:49,  1.23it/s, training_loss=0.155][A
Epoch 3:  11%|█         | 4384/40000 [59:25<8:02:34,  1.23it/s, training_loss=0.155][A
Epoch 3:  11%|█         | 4384/40000 [59:26<8:02:34,  1.23it/s, training_loss=0.149][A
Epoch 3:  11%|█         | 4385/40000 [59:26<8:02:02,  1.23it/s, training_loss=0.149][A
Epoch 3:  11%|█         | 4385/40000 [59:26<8:02:02,  1.23it/s, training_loss=0.089][A
Epoch 3:  11%|█         | 4386/40000 [59:26<8:02:28,  1.23it/s, training_loss=0.089][A
Epoch 3:  11%|█         | 4386/40000 [59:27<8:02:28,  1.23it/s, training_loss=0.055][A
Epoch 3:  11%|█         | 4387/40000 [59:27<8:02:03,  1.23it/s, training_loss=0.055][A
Epoch 3:  11%|█         | 4387/40000 [59:28<8:02:03,  1.23it/s, training_loss=0.120][A
Epoch 3:  11%|█         | 4388/4

Epoch 3:  11%|█         | 4474/40000 [1:00:38<8:02:15,  1.23it/s, training_loss=0.031][A
Epoch 3:  11%|█         | 4474/40000 [1:00:39<8:02:15,  1.23it/s, training_loss=0.045][A
Epoch 3:  11%|█         | 4475/40000 [1:00:39<8:02:03,  1.23it/s, training_loss=0.045][A
Epoch 3:  11%|█         | 4475/40000 [1:00:39<8:02:03,  1.23it/s, training_loss=0.065][A
Epoch 3:  11%|█         | 4476/40000 [1:00:39<8:02:37,  1.23it/s, training_loss=0.065][A
Epoch 3:  11%|█         | 4476/40000 [1:00:40<8:02:37,  1.23it/s, training_loss=0.021][A
Epoch 3:  11%|█         | 4477/40000 [1:00:40<8:02:28,  1.23it/s, training_loss=0.021][A
Epoch 3:  11%|█         | 4477/40000 [1:00:41<8:02:28,  1.23it/s, training_loss=0.095][A
Epoch 3:  11%|█         | 4478/40000 [1:00:41<8:02:21,  1.23it/s, training_loss=0.095][A
Epoch 3:  11%|█         | 4478/40000 [1:00:42<8:02:21,  1.23it/s, training_loss=0.075][A
Epoch 3:  11%|█         | 4479/40000 [1:00:42<8:02:41,  1.23it/s, training_loss=0.075][A
Epoch 3:  

Epoch 3:  11%|█▏        | 4565/40000 [1:01:52<7:58:54,  1.23it/s, training_loss=0.025][A
Epoch 3:  11%|█▏        | 4565/40000 [1:01:53<7:58:54,  1.23it/s, training_loss=0.222][A
Epoch 3:  11%|█▏        | 4566/40000 [1:01:53<7:59:13,  1.23it/s, training_loss=0.222][A
Epoch 3:  11%|█▏        | 4566/40000 [1:01:53<7:59:13,  1.23it/s, training_loss=0.057][A
Epoch 3:  11%|█▏        | 4567/40000 [1:01:53<7:58:15,  1.23it/s, training_loss=0.057][A
Epoch 3:  11%|█▏        | 4567/40000 [1:01:54<7:58:15,  1.23it/s, training_loss=0.117][A
Epoch 3:  11%|█▏        | 4568/40000 [1:01:54<7:59:14,  1.23it/s, training_loss=0.117][A
Epoch 3:  11%|█▏        | 4568/40000 [1:01:55<7:59:14,  1.23it/s, training_loss=0.115][A
Epoch 3:  11%|█▏        | 4569/40000 [1:01:55<8:00:17,  1.23it/s, training_loss=0.115][A
Epoch 3:  11%|█▏        | 4569/40000 [1:01:56<8:00:17,  1.23it/s, training_loss=0.080][A
Epoch 3:  11%|█▏        | 4570/40000 [1:01:56<8:00:35,  1.23it/s, training_loss=0.080][A
Epoch 3:  

Epoch 3:  12%|█▏        | 4656/40000 [1:03:06<7:59:17,  1.23it/s, training_loss=0.131][A
Epoch 3:  12%|█▏        | 4656/40000 [1:03:07<7:59:17,  1.23it/s, training_loss=0.022][A
Epoch 3:  12%|█▏        | 4657/40000 [1:03:07<7:59:18,  1.23it/s, training_loss=0.022][A
Epoch 3:  12%|█▏        | 4657/40000 [1:03:07<7:59:18,  1.23it/s, training_loss=0.079][A
Epoch 3:  12%|█▏        | 4658/40000 [1:03:07<7:59:59,  1.23it/s, training_loss=0.079][A
Epoch 3:  12%|█▏        | 4658/40000 [1:03:08<7:59:59,  1.23it/s, training_loss=0.037][A
Epoch 3:  12%|█▏        | 4659/40000 [1:03:08<8:00:04,  1.23it/s, training_loss=0.037][A
Epoch 3:  12%|█▏        | 4659/40000 [1:03:09<8:00:04,  1.23it/s, training_loss=0.196][A
Epoch 3:  12%|█▏        | 4660/40000 [1:03:09<8:00:00,  1.23it/s, training_loss=0.196][A
Epoch 3:  12%|█▏        | 4660/40000 [1:03:10<8:00:00,  1.23it/s, training_loss=0.087][A
Epoch 3:  12%|█▏        | 4661/40000 [1:03:10<7:59:39,  1.23it/s, training_loss=0.087][A
Epoch 3:  

Epoch 3:  12%|█▏        | 4747/40000 [1:04:20<7:58:19,  1.23it/s, training_loss=0.051][A
Epoch 3:  12%|█▏        | 4747/40000 [1:04:21<7:58:19,  1.23it/s, training_loss=0.080][A
Epoch 3:  12%|█▏        | 4748/40000 [1:04:21<7:58:11,  1.23it/s, training_loss=0.080][A
Epoch 3:  12%|█▏        | 4748/40000 [1:04:22<7:58:11,  1.23it/s, training_loss=0.064][A
Epoch 3:  12%|█▏        | 4749/40000 [1:04:22<7:58:55,  1.23it/s, training_loss=0.064][A
Epoch 3:  12%|█▏        | 4749/40000 [1:04:22<7:58:55,  1.23it/s, training_loss=0.077][A
Epoch 3:  12%|█▏        | 4750/40000 [1:04:22<7:58:31,  1.23it/s, training_loss=0.077][A
Epoch 3:  12%|█▏        | 4750/40000 [1:04:23<7:58:31,  1.23it/s, training_loss=0.045][A
Epoch 3:  12%|█▏        | 4751/40000 [1:04:23<7:58:18,  1.23it/s, training_loss=0.045][A
Epoch 3:  12%|█▏        | 4751/40000 [1:04:24<7:58:18,  1.23it/s, training_loss=0.103][A
Epoch 3:  12%|█▏        | 4752/40000 [1:04:24<7:57:32,  1.23it/s, training_loss=0.103][A
Epoch 3:  

Epoch 3:  12%|█▏        | 4838/40000 [1:05:34<7:57:31,  1.23it/s, training_loss=0.071][A
Epoch 3:  12%|█▏        | 4838/40000 [1:05:35<7:57:31,  1.23it/s, training_loss=0.077][A
Epoch 3:  12%|█▏        | 4839/40000 [1:05:35<7:56:14,  1.23it/s, training_loss=0.077][A
Epoch 3:  12%|█▏        | 4839/40000 [1:05:35<7:56:14,  1.23it/s, training_loss=0.070][A
Epoch 3:  12%|█▏        | 4840/40000 [1:05:35<7:55:57,  1.23it/s, training_loss=0.070][A
Epoch 3:  12%|█▏        | 4840/40000 [1:05:36<7:55:57,  1.23it/s, training_loss=0.036][A
Epoch 3:  12%|█▏        | 4841/40000 [1:05:36<7:56:49,  1.23it/s, training_loss=0.036][A
Epoch 3:  12%|█▏        | 4841/40000 [1:05:37<7:56:49,  1.23it/s, training_loss=0.103][A
Epoch 3:  12%|█▏        | 4842/40000 [1:05:37<7:56:57,  1.23it/s, training_loss=0.103][A
Epoch 3:  12%|█▏        | 4842/40000 [1:05:38<7:56:57,  1.23it/s, training_loss=0.039][A
Epoch 3:  12%|█▏        | 4843/40000 [1:05:38<7:56:35,  1.23it/s, training_loss=0.039][A
Epoch 3:  

Epoch 3:  12%|█▏        | 4929/40000 [1:06:48<7:56:54,  1.23it/s, training_loss=0.053][A
Epoch 3:  12%|█▏        | 4929/40000 [1:06:49<7:56:54,  1.23it/s, training_loss=0.067][A
Epoch 3:  12%|█▏        | 4930/40000 [1:06:49<7:56:39,  1.23it/s, training_loss=0.067][A
Epoch 3:  12%|█▏        | 4930/40000 [1:06:50<7:56:39,  1.23it/s, training_loss=0.068][A
Epoch 3:  12%|█▏        | 4931/40000 [1:06:50<7:55:45,  1.23it/s, training_loss=0.068][A
Epoch 3:  12%|█▏        | 4931/40000 [1:06:50<7:55:45,  1.23it/s, training_loss=0.144][A
Epoch 3:  12%|█▏        | 4932/40000 [1:06:50<7:56:01,  1.23it/s, training_loss=0.144][A
Epoch 3:  12%|█▏        | 4932/40000 [1:06:51<7:56:01,  1.23it/s, training_loss=0.080][A
Epoch 3:  12%|█▏        | 4933/40000 [1:06:51<7:55:45,  1.23it/s, training_loss=0.080][A
Epoch 3:  12%|█▏        | 4933/40000 [1:06:52<7:55:45,  1.23it/s, training_loss=0.081][A
Epoch 3:  12%|█▏        | 4934/40000 [1:06:52<7:55:34,  1.23it/s, training_loss=0.081][A
Epoch 3:  

Epoch 3:  13%|█▎        | 5020/40000 [1:08:02<7:54:50,  1.23it/s, training_loss=0.119][A
Epoch 3:  13%|█▎        | 5020/40000 [1:08:03<7:54:50,  1.23it/s, training_loss=0.085][A
Epoch 3:  13%|█▎        | 5021/40000 [1:08:03<7:53:39,  1.23it/s, training_loss=0.085][A
Epoch 3:  13%|█▎        | 5021/40000 [1:08:04<7:53:39,  1.23it/s, training_loss=0.051][A
Epoch 3:  13%|█▎        | 5022/40000 [1:08:04<7:53:28,  1.23it/s, training_loss=0.051][A
Epoch 3:  13%|█▎        | 5022/40000 [1:08:04<7:53:28,  1.23it/s, training_loss=0.061][A
Epoch 3:  13%|█▎        | 5023/40000 [1:08:04<7:53:36,  1.23it/s, training_loss=0.061][A
Epoch 3:  13%|█▎        | 5023/40000 [1:08:05<7:53:36,  1.23it/s, training_loss=0.096][A
Epoch 3:  13%|█▎        | 5024/40000 [1:08:05<7:54:14,  1.23it/s, training_loss=0.096][A
Epoch 3:  13%|█▎        | 5024/40000 [1:08:06<7:54:14,  1.23it/s, training_loss=0.098][A
Epoch 3:  13%|█▎        | 5025/40000 [1:08:06<7:55:27,  1.23it/s, training_loss=0.098][A
Epoch 3:  

Epoch 3:  13%|█▎        | 5111/40000 [1:09:16<7:52:48,  1.23it/s, training_loss=0.054][A
Epoch 3:  13%|█▎        | 5111/40000 [1:09:17<7:52:48,  1.23it/s, training_loss=0.080][A
Epoch 3:  13%|█▎        | 5112/40000 [1:09:17<7:53:54,  1.23it/s, training_loss=0.080][A
Epoch 3:  13%|█▎        | 5112/40000 [1:09:18<7:53:54,  1.23it/s, training_loss=0.111][A
Epoch 3:  13%|█▎        | 5113/40000 [1:09:18<7:54:33,  1.23it/s, training_loss=0.111][A
Epoch 3:  13%|█▎        | 5113/40000 [1:09:19<7:54:33,  1.23it/s, training_loss=0.047][A
Epoch 3:  13%|█▎        | 5114/40000 [1:09:19<7:54:47,  1.22it/s, training_loss=0.047][A
Epoch 3:  13%|█▎        | 5114/40000 [1:09:19<7:54:47,  1.22it/s, training_loss=0.038][A
Epoch 3:  13%|█▎        | 5115/40000 [1:09:19<7:53:51,  1.23it/s, training_loss=0.038][A
Epoch 3:  13%|█▎        | 5115/40000 [1:09:20<7:53:51,  1.23it/s, training_loss=0.082][A
Epoch 3:  13%|█▎        | 5116/40000 [1:09:20<7:53:25,  1.23it/s, training_loss=0.082][A
Epoch 3:  

Epoch 3:  13%|█▎        | 5202/40000 [1:10:30<7:52:24,  1.23it/s, training_loss=0.056][A
Epoch 3:  13%|█▎        | 5202/40000 [1:10:31<7:52:24,  1.23it/s, training_loss=0.043][A
Epoch 3:  13%|█▎        | 5203/40000 [1:10:31<7:52:43,  1.23it/s, training_loss=0.043][A
Epoch 3:  13%|█▎        | 5203/40000 [1:10:32<7:52:43,  1.23it/s, training_loss=0.035][A
Epoch 3:  13%|█▎        | 5204/40000 [1:10:32<7:52:10,  1.23it/s, training_loss=0.035][A
Epoch 3:  13%|█▎        | 5204/40000 [1:10:33<7:52:10,  1.23it/s, training_loss=0.127][A
Epoch 3:  13%|█▎        | 5205/40000 [1:10:33<7:52:03,  1.23it/s, training_loss=0.127][A
Epoch 3:  13%|█▎        | 5205/40000 [1:10:33<7:52:03,  1.23it/s, training_loss=0.092][A
Epoch 3:  13%|█▎        | 5206/40000 [1:10:33<7:51:52,  1.23it/s, training_loss=0.092][A
Epoch 3:  13%|█▎        | 5206/40000 [1:10:34<7:51:52,  1.23it/s, training_loss=0.035][A
Epoch 3:  13%|█▎        | 5207/40000 [1:10:34<7:50:59,  1.23it/s, training_loss=0.035][A
Epoch 3:  

Epoch 3:  13%|█▎        | 5293/40000 [1:11:44<7:51:06,  1.23it/s, training_loss=0.047][A
Epoch 3:  13%|█▎        | 5293/40000 [1:11:45<7:51:06,  1.23it/s, training_loss=0.034][A
Epoch 3:  13%|█▎        | 5294/40000 [1:11:45<7:51:17,  1.23it/s, training_loss=0.034][A
Epoch 3:  13%|█▎        | 5294/40000 [1:11:46<7:51:17,  1.23it/s, training_loss=0.102][A
Epoch 3:  13%|█▎        | 5295/40000 [1:11:46<7:51:08,  1.23it/s, training_loss=0.102][A
Epoch 3:  13%|█▎        | 5295/40000 [1:11:47<7:51:08,  1.23it/s, training_loss=0.071][A
Epoch 3:  13%|█▎        | 5296/40000 [1:11:47<7:50:13,  1.23it/s, training_loss=0.071][A
Epoch 3:  13%|█▎        | 5296/40000 [1:11:47<7:50:13,  1.23it/s, training_loss=0.086][A
Epoch 3:  13%|█▎        | 5297/40000 [1:11:47<7:51:29,  1.23it/s, training_loss=0.086][A
Epoch 3:  13%|█▎        | 5297/40000 [1:11:48<7:51:29,  1.23it/s, training_loss=0.107][A
Epoch 3:  13%|█▎        | 5298/40000 [1:11:48<7:50:45,  1.23it/s, training_loss=0.107][A
Epoch 3:  

Epoch 3:  13%|█▎        | 5384/40000 [1:12:58<7:49:16,  1.23it/s, training_loss=0.049][A
Epoch 3:  13%|█▎        | 5384/40000 [1:12:59<7:49:16,  1.23it/s, training_loss=0.041][A
Epoch 3:  13%|█▎        | 5385/40000 [1:12:59<7:48:51,  1.23it/s, training_loss=0.041][A
Epoch 3:  13%|█▎        | 5385/40000 [1:13:00<7:48:51,  1.23it/s, training_loss=0.062][A
Epoch 3:  13%|█▎        | 5386/40000 [1:13:00<7:49:24,  1.23it/s, training_loss=0.062][A
Epoch 3:  13%|█▎        | 5386/40000 [1:13:01<7:49:24,  1.23it/s, training_loss=0.085][A
Epoch 3:  13%|█▎        | 5387/40000 [1:13:01<7:49:44,  1.23it/s, training_loss=0.085][A
Epoch 3:  13%|█▎        | 5387/40000 [1:13:01<7:49:44,  1.23it/s, training_loss=0.077][A
Epoch 3:  13%|█▎        | 5388/40000 [1:13:01<7:49:30,  1.23it/s, training_loss=0.077][A
Epoch 3:  13%|█▎        | 5388/40000 [1:13:02<7:49:30,  1.23it/s, training_loss=0.089][A
Epoch 3:  13%|█▎        | 5389/40000 [1:13:02<7:49:59,  1.23it/s, training_loss=0.089][A
Epoch 3:  

Epoch 3:  14%|█▎        | 5475/40000 [1:14:12<7:47:52,  1.23it/s, training_loss=0.024][A
Epoch 3:  14%|█▎        | 5475/40000 [1:14:13<7:47:52,  1.23it/s, training_loss=0.037][A
Epoch 3:  14%|█▎        | 5476/40000 [1:14:13<7:48:33,  1.23it/s, training_loss=0.037][A
Epoch 3:  14%|█▎        | 5476/40000 [1:14:14<7:48:33,  1.23it/s, training_loss=0.058][A
Epoch 3:  14%|█▎        | 5477/40000 [1:14:14<7:47:51,  1.23it/s, training_loss=0.058][A
Epoch 3:  14%|█▎        | 5477/40000 [1:14:15<7:47:51,  1.23it/s, training_loss=0.053][A
Epoch 3:  14%|█▎        | 5478/40000 [1:14:15<7:48:00,  1.23it/s, training_loss=0.053][A
Epoch 3:  14%|█▎        | 5478/40000 [1:14:16<7:48:00,  1.23it/s, training_loss=0.065][A
Epoch 3:  14%|█▎        | 5479/40000 [1:14:16<7:48:30,  1.23it/s, training_loss=0.065][A
Epoch 3:  14%|█▎        | 5479/40000 [1:14:16<7:48:30,  1.23it/s, training_loss=0.052][A
Epoch 3:  14%|█▎        | 5480/40000 [1:14:16<7:48:50,  1.23it/s, training_loss=0.052][A
Epoch 3:  

Epoch 3:  14%|█▍        | 5566/40000 [1:15:26<7:47:53,  1.23it/s, training_loss=0.076][A
Epoch 3:  14%|█▍        | 5566/40000 [1:15:27<7:47:53,  1.23it/s, training_loss=0.077][A
Epoch 3:  14%|█▍        | 5567/40000 [1:15:27<7:48:03,  1.23it/s, training_loss=0.077][A
Epoch 3:  14%|█▍        | 5567/40000 [1:15:28<7:48:03,  1.23it/s, training_loss=0.060][A
Epoch 3:  14%|█▍        | 5568/40000 [1:15:28<7:47:54,  1.23it/s, training_loss=0.060][A
Epoch 3:  14%|█▍        | 5568/40000 [1:15:29<7:47:54,  1.23it/s, training_loss=0.050][A
Epoch 3:  14%|█▍        | 5569/40000 [1:15:29<7:47:41,  1.23it/s, training_loss=0.050][A
Epoch 3:  14%|█▍        | 5569/40000 [1:15:30<7:47:41,  1.23it/s, training_loss=0.069][A
Epoch 3:  14%|█▍        | 5570/40000 [1:15:30<7:47:35,  1.23it/s, training_loss=0.069][A
Epoch 3:  14%|█▍        | 5570/40000 [1:15:30<7:47:35,  1.23it/s, training_loss=0.038][A
Epoch 3:  14%|█▍        | 5571/40000 [1:15:30<7:48:10,  1.23it/s, training_loss=0.038][A
Epoch 3:  

Epoch 3:  14%|█▍        | 5657/40000 [1:16:41<7:45:12,  1.23it/s, training_loss=0.046][A
Epoch 3:  14%|█▍        | 5657/40000 [1:16:41<7:45:12,  1.23it/s, training_loss=0.026][A
Epoch 3:  14%|█▍        | 5658/40000 [1:16:41<7:45:27,  1.23it/s, training_loss=0.026][A
Epoch 3:  14%|█▍        | 5658/40000 [1:16:42<7:45:27,  1.23it/s, training_loss=0.092][A
Epoch 3:  14%|█▍        | 5659/40000 [1:16:42<7:45:41,  1.23it/s, training_loss=0.092][A
Epoch 3:  14%|█▍        | 5659/40000 [1:16:43<7:45:41,  1.23it/s, training_loss=0.049][A
Epoch 3:  14%|█▍        | 5660/40000 [1:16:43<7:45:56,  1.23it/s, training_loss=0.049][A
Epoch 3:  14%|█▍        | 5660/40000 [1:16:44<7:45:56,  1.23it/s, training_loss=0.134][A
Epoch 3:  14%|█▍        | 5661/40000 [1:16:44<7:45:51,  1.23it/s, training_loss=0.134][A
Epoch 3:  14%|█▍        | 5661/40000 [1:16:45<7:45:51,  1.23it/s, training_loss=0.056][A
Epoch 3:  14%|█▍        | 5662/40000 [1:16:45<7:46:24,  1.23it/s, training_loss=0.056][A
Epoch 3:  

Epoch 3:  14%|█▍        | 5748/40000 [1:17:55<7:45:09,  1.23it/s, training_loss=0.051][A
Epoch 3:  14%|█▍        | 5748/40000 [1:17:55<7:45:09,  1.23it/s, training_loss=0.056][A
Epoch 3:  14%|█▍        | 5749/40000 [1:17:55<7:45:04,  1.23it/s, training_loss=0.056][A
Epoch 3:  14%|█▍        | 5749/40000 [1:17:56<7:45:04,  1.23it/s, training_loss=0.105][A
Epoch 3:  14%|█▍        | 5750/40000 [1:17:56<7:44:56,  1.23it/s, training_loss=0.105][A
Epoch 3:  14%|█▍        | 5750/40000 [1:17:57<7:44:56,  1.23it/s, training_loss=0.046][A
Epoch 3:  14%|█▍        | 5751/40000 [1:17:57<7:43:59,  1.23it/s, training_loss=0.046][A
Epoch 3:  14%|█▍        | 5751/40000 [1:17:58<7:43:59,  1.23it/s, training_loss=0.029][A
Epoch 3:  14%|█▍        | 5752/40000 [1:17:58<7:43:59,  1.23it/s, training_loss=0.029][A
Epoch 3:  14%|█▍        | 5752/40000 [1:17:59<7:43:59,  1.23it/s, training_loss=0.030][A
Epoch 3:  14%|█▍        | 5753/40000 [1:17:59<7:43:46,  1.23it/s, training_loss=0.030][A
Epoch 3:  

Epoch 3:  15%|█▍        | 5839/40000 [1:19:09<7:43:56,  1.23it/s, training_loss=0.033][A
Epoch 3:  15%|█▍        | 5839/40000 [1:19:10<7:43:56,  1.23it/s, training_loss=0.071][A
Epoch 3:  15%|█▍        | 5840/40000 [1:19:10<7:43:58,  1.23it/s, training_loss=0.071][A
Epoch 3:  15%|█▍        | 5840/40000 [1:19:10<7:43:58,  1.23it/s, training_loss=0.074][A
Epoch 3:  15%|█▍        | 5841/40000 [1:19:10<7:43:53,  1.23it/s, training_loss=0.074][A
Epoch 3:  15%|█▍        | 5841/40000 [1:19:11<7:43:53,  1.23it/s, training_loss=0.066][A
Epoch 3:  15%|█▍        | 5842/40000 [1:19:11<7:43:35,  1.23it/s, training_loss=0.066][A
Epoch 3:  15%|█▍        | 5842/40000 [1:19:12<7:43:35,  1.23it/s, training_loss=0.090][A
Epoch 3:  15%|█▍        | 5843/40000 [1:19:12<7:43:00,  1.23it/s, training_loss=0.090][A
Epoch 3:  15%|█▍        | 5843/40000 [1:19:13<7:43:00,  1.23it/s, training_loss=0.128][A
Epoch 3:  15%|█▍        | 5844/40000 [1:19:13<7:43:13,  1.23it/s, training_loss=0.128][A
Epoch 3:  

Epoch 3:  15%|█▍        | 5930/40000 [1:20:23<7:42:16,  1.23it/s, training_loss=0.057][A
Epoch 3:  15%|█▍        | 5930/40000 [1:20:24<7:42:16,  1.23it/s, training_loss=0.083][A
Epoch 3:  15%|█▍        | 5931/40000 [1:20:24<7:42:36,  1.23it/s, training_loss=0.083][A
Epoch 3:  15%|█▍        | 5931/40000 [1:20:24<7:42:36,  1.23it/s, training_loss=0.066][A
Epoch 3:  15%|█▍        | 5932/40000 [1:20:24<7:41:52,  1.23it/s, training_loss=0.066][A
Epoch 3:  15%|█▍        | 5932/40000 [1:20:25<7:41:52,  1.23it/s, training_loss=0.063][A
Epoch 3:  15%|█▍        | 5933/40000 [1:20:25<7:41:29,  1.23it/s, training_loss=0.063][A
Epoch 3:  15%|█▍        | 5933/40000 [1:20:26<7:41:29,  1.23it/s, training_loss=0.077][A
Epoch 3:  15%|█▍        | 5934/40000 [1:20:26<7:41:12,  1.23it/s, training_loss=0.077][A
Epoch 3:  15%|█▍        | 5934/40000 [1:20:27<7:41:12,  1.23it/s, training_loss=0.081][A
Epoch 3:  15%|█▍        | 5935/40000 [1:20:27<7:41:39,  1.23it/s, training_loss=0.081][A
Epoch 3:  

Epoch 3:  15%|█▌        | 6021/40000 [1:21:37<7:42:02,  1.23it/s, training_loss=0.060][A
Epoch 3:  15%|█▌        | 6021/40000 [1:21:38<7:42:02,  1.23it/s, training_loss=0.077][A
Epoch 3:  15%|█▌        | 6022/40000 [1:21:38<7:40:15,  1.23it/s, training_loss=0.077][A
Epoch 3:  15%|█▌        | 6022/40000 [1:21:39<7:40:15,  1.23it/s, training_loss=0.069][A
Epoch 3:  15%|█▌        | 6023/40000 [1:21:39<7:41:34,  1.23it/s, training_loss=0.069][A
Epoch 3:  15%|█▌        | 6023/40000 [1:21:39<7:41:34,  1.23it/s, training_loss=0.141][A
Epoch 3:  15%|█▌        | 6024/40000 [1:21:39<7:41:35,  1.23it/s, training_loss=0.141][A
Epoch 3:  15%|█▌        | 6024/40000 [1:21:40<7:41:35,  1.23it/s, training_loss=0.070][A
Epoch 3:  15%|█▌        | 6025/40000 [1:21:40<7:41:17,  1.23it/s, training_loss=0.070][A
Epoch 3:  15%|█▌        | 6025/40000 [1:21:41<7:41:17,  1.23it/s, training_loss=0.093][A
Epoch 3:  15%|█▌        | 6026/40000 [1:21:41<7:41:33,  1.23it/s, training_loss=0.093][A
Epoch 3:  

Epoch 3:  15%|█▌        | 6112/40000 [1:22:51<7:40:23,  1.23it/s, training_loss=0.056][A
Epoch 3:  15%|█▌        | 6112/40000 [1:22:52<7:40:23,  1.23it/s, training_loss=0.179][A
Epoch 3:  15%|█▌        | 6113/40000 [1:22:52<7:40:36,  1.23it/s, training_loss=0.179][A
Epoch 3:  15%|█▌        | 6113/40000 [1:22:53<7:40:36,  1.23it/s, training_loss=0.066][A
Epoch 3:  15%|█▌        | 6114/40000 [1:22:53<7:40:14,  1.23it/s, training_loss=0.066][A
Epoch 3:  15%|█▌        | 6114/40000 [1:22:54<7:40:14,  1.23it/s, training_loss=0.101][A
Epoch 3:  15%|█▌        | 6115/40000 [1:22:54<7:39:28,  1.23it/s, training_loss=0.101][A
Epoch 3:  15%|█▌        | 6115/40000 [1:22:54<7:39:28,  1.23it/s, training_loss=0.047][A
Epoch 3:  15%|█▌        | 6116/40000 [1:22:54<7:40:33,  1.23it/s, training_loss=0.047][A
Epoch 3:  15%|█▌        | 6116/40000 [1:22:55<7:40:33,  1.23it/s, training_loss=0.093][A
Epoch 3:  15%|█▌        | 6117/40000 [1:22:55<7:40:31,  1.23it/s, training_loss=0.093][A
Epoch 3:  

Epoch 3:  16%|█▌        | 6203/40000 [1:24:05<7:39:35,  1.23it/s, training_loss=0.038][A
Epoch 3:  16%|█▌        | 6203/40000 [1:24:06<7:39:35,  1.23it/s, training_loss=0.075][A
Epoch 3:  16%|█▌        | 6204/40000 [1:24:06<7:40:24,  1.22it/s, training_loss=0.075][A
Epoch 3:  16%|█▌        | 6204/40000 [1:24:07<7:40:24,  1.22it/s, training_loss=0.145][A
Epoch 3:  16%|█▌        | 6205/40000 [1:24:07<7:39:12,  1.23it/s, training_loss=0.145][A
Epoch 3:  16%|█▌        | 6205/40000 [1:24:08<7:39:12,  1.23it/s, training_loss=0.083][A
Epoch 3:  16%|█▌        | 6206/40000 [1:24:08<7:38:08,  1.23it/s, training_loss=0.083][A
Epoch 3:  16%|█▌        | 6206/40000 [1:24:08<7:38:08,  1.23it/s, training_loss=0.051][A
Epoch 3:  16%|█▌        | 6207/40000 [1:24:08<7:38:11,  1.23it/s, training_loss=0.051][A
Epoch 3:  16%|█▌        | 6207/40000 [1:24:09<7:38:11,  1.23it/s, training_loss=0.117][A
Epoch 3:  16%|█▌        | 6208/40000 [1:24:09<7:38:50,  1.23it/s, training_loss=0.117][A
Epoch 3:  

Epoch 3:  16%|█▌        | 6294/40000 [1:25:19<7:37:24,  1.23it/s, training_loss=0.033][A
Epoch 3:  16%|█▌        | 6294/40000 [1:25:20<7:37:24,  1.23it/s, training_loss=0.043][A
Epoch 3:  16%|█▌        | 6295/40000 [1:25:20<7:37:14,  1.23it/s, training_loss=0.043][A
Epoch 3:  16%|█▌        | 6295/40000 [1:25:21<7:37:14,  1.23it/s, training_loss=0.019][A
Epoch 3:  16%|█▌        | 6296/40000 [1:25:21<7:37:30,  1.23it/s, training_loss=0.019][A
Epoch 3:  16%|█▌        | 6296/40000 [1:25:22<7:37:30,  1.23it/s, training_loss=0.076][A
Epoch 3:  16%|█▌        | 6297/40000 [1:25:22<7:37:05,  1.23it/s, training_loss=0.076][A
Epoch 3:  16%|█▌        | 6297/40000 [1:25:23<7:37:05,  1.23it/s, training_loss=0.249][A
Epoch 3:  16%|█▌        | 6298/40000 [1:25:23<7:37:33,  1.23it/s, training_loss=0.249][A
Epoch 3:  16%|█▌        | 6298/40000 [1:25:23<7:37:33,  1.23it/s, training_loss=0.061][A
Epoch 3:  16%|█▌        | 6299/40000 [1:25:23<7:38:09,  1.23it/s, training_loss=0.061][A
Epoch 3:  

Epoch 3:  16%|█▌        | 6385/40000 [1:26:33<7:36:45,  1.23it/s, training_loss=0.076][A
Epoch 3:  16%|█▌        | 6385/40000 [1:26:34<7:36:45,  1.23it/s, training_loss=0.112][A
Epoch 3:  16%|█▌        | 6386/40000 [1:26:34<7:36:38,  1.23it/s, training_loss=0.112][A
Epoch 3:  16%|█▌        | 6386/40000 [1:26:35<7:36:38,  1.23it/s, training_loss=0.059][A
Epoch 3:  16%|█▌        | 6387/40000 [1:26:35<7:36:49,  1.23it/s, training_loss=0.059][A
Epoch 3:  16%|█▌        | 6387/40000 [1:26:36<7:36:49,  1.23it/s, training_loss=0.056][A
Epoch 3:  16%|█▌        | 6388/40000 [1:26:36<7:36:37,  1.23it/s, training_loss=0.056][A
Epoch 3:  16%|█▌        | 6388/40000 [1:26:37<7:36:37,  1.23it/s, training_loss=0.053][A
Epoch 3:  16%|█▌        | 6389/40000 [1:26:37<7:35:57,  1.23it/s, training_loss=0.053][A
Epoch 3:  16%|█▌        | 6389/40000 [1:26:37<7:35:57,  1.23it/s, training_loss=0.052][A
Epoch 3:  16%|█▌        | 6390/40000 [1:26:37<7:35:34,  1.23it/s, training_loss=0.052][A
Epoch 3:  

Epoch 3:  16%|█▌        | 6476/40000 [1:27:48<7:35:25,  1.23it/s, training_loss=0.099][A
Epoch 3:  16%|█▌        | 6476/40000 [1:27:48<7:35:25,  1.23it/s, training_loss=0.020][A
Epoch 3:  16%|█▌        | 6477/40000 [1:27:48<7:34:34,  1.23it/s, training_loss=0.020][A
Epoch 3:  16%|█▌        | 6477/40000 [1:27:49<7:34:34,  1.23it/s, training_loss=0.049][A
Epoch 3:  16%|█▌        | 6478/40000 [1:27:49<7:35:17,  1.23it/s, training_loss=0.049][A
Epoch 3:  16%|█▌        | 6478/40000 [1:27:50<7:35:17,  1.23it/s, training_loss=0.077][A
Epoch 3:  16%|█▌        | 6479/40000 [1:27:50<7:35:09,  1.23it/s, training_loss=0.077][A
Epoch 3:  16%|█▌        | 6479/40000 [1:27:51<7:35:09,  1.23it/s, training_loss=0.085][A
Epoch 3:  16%|█▌        | 6480/40000 [1:27:51<7:34:36,  1.23it/s, training_loss=0.085][A
Epoch 3:  16%|█▌        | 6480/40000 [1:27:52<7:34:36,  1.23it/s, training_loss=0.065][A
Epoch 3:  16%|█▌        | 6481/40000 [1:27:52<7:34:21,  1.23it/s, training_loss=0.065][A
Epoch 3:  

Epoch 3:  16%|█▋        | 6567/40000 [1:29:02<7:33:41,  1.23it/s, training_loss=0.065][A
Epoch 3:  16%|█▋        | 6567/40000 [1:29:02<7:33:41,  1.23it/s, training_loss=0.057][A
Epoch 3:  16%|█▋        | 6568/40000 [1:29:02<7:34:08,  1.23it/s, training_loss=0.057][A
Epoch 3:  16%|█▋        | 6568/40000 [1:29:03<7:34:08,  1.23it/s, training_loss=0.090][A
Epoch 3:  16%|█▋        | 6569/40000 [1:29:03<7:34:15,  1.23it/s, training_loss=0.090][A
Epoch 3:  16%|█▋        | 6569/40000 [1:29:04<7:34:15,  1.23it/s, training_loss=0.154][A
Epoch 3:  16%|█▋        | 6570/40000 [1:29:04<7:35:00,  1.22it/s, training_loss=0.154][A
Epoch 3:  16%|█▋        | 6570/40000 [1:29:05<7:35:00,  1.22it/s, training_loss=0.086][A
Epoch 3:  16%|█▋        | 6571/40000 [1:29:05<7:34:20,  1.23it/s, training_loss=0.086][A
Epoch 3:  16%|█▋        | 6571/40000 [1:29:06<7:34:20,  1.23it/s, training_loss=0.036][A
Epoch 3:  16%|█▋        | 6572/40000 [1:29:06<7:33:57,  1.23it/s, training_loss=0.036][A
Epoch 3:  

Epoch 3:  17%|█▋        | 6658/40000 [1:30:16<7:33:22,  1.23it/s, training_loss=0.041][A
Epoch 3:  17%|█▋        | 6658/40000 [1:30:17<7:33:22,  1.23it/s, training_loss=0.034][A
Epoch 3:  17%|█▋        | 6659/40000 [1:30:17<7:33:02,  1.23it/s, training_loss=0.034][A
Epoch 3:  17%|█▋        | 6659/40000 [1:30:17<7:33:02,  1.23it/s, training_loss=0.075][A
Epoch 3:  17%|█▋        | 6660/40000 [1:30:17<7:33:15,  1.23it/s, training_loss=0.075][A
Epoch 3:  17%|█▋        | 6660/40000 [1:30:18<7:33:15,  1.23it/s, training_loss=0.060][A
Epoch 3:  17%|█▋        | 6661/40000 [1:30:18<7:33:06,  1.23it/s, training_loss=0.060][A
Epoch 3:  17%|█▋        | 6661/40000 [1:30:19<7:33:06,  1.23it/s, training_loss=0.086][A
Epoch 3:  17%|█▋        | 6662/40000 [1:30:19<7:32:50,  1.23it/s, training_loss=0.086][A
Epoch 3:  17%|█▋        | 6662/40000 [1:30:20<7:32:50,  1.23it/s, training_loss=0.090][A
Epoch 3:  17%|█▋        | 6663/40000 [1:30:20<7:32:14,  1.23it/s, training_loss=0.090][A
Epoch 3:  

Epoch 3:  17%|█▋        | 6749/40000 [1:31:30<7:31:47,  1.23it/s, training_loss=0.058][A
Epoch 3:  17%|█▋        | 6749/40000 [1:31:31<7:31:47,  1.23it/s, training_loss=0.108][A
Epoch 3:  17%|█▋        | 6750/40000 [1:31:31<7:31:18,  1.23it/s, training_loss=0.108][A
Epoch 3:  17%|█▋        | 6750/40000 [1:31:32<7:31:18,  1.23it/s, training_loss=0.045][A
Epoch 3:  17%|█▋        | 6751/40000 [1:31:32<7:31:20,  1.23it/s, training_loss=0.045][A
Epoch 3:  17%|█▋        | 6751/40000 [1:31:32<7:31:20,  1.23it/s, training_loss=0.017][A
Epoch 3:  17%|█▋        | 6752/40000 [1:31:32<7:31:53,  1.23it/s, training_loss=0.017][A
Epoch 3:  17%|█▋        | 6752/40000 [1:31:33<7:31:53,  1.23it/s, training_loss=0.206][A
Epoch 3:  17%|█▋        | 6753/40000 [1:31:33<7:31:03,  1.23it/s, training_loss=0.206][A
Epoch 3:  17%|█▋        | 6753/40000 [1:31:34<7:31:03,  1.23it/s, training_loss=0.172][A
Epoch 3:  17%|█▋        | 6754/40000 [1:31:34<7:31:19,  1.23it/s, training_loss=0.172][A
Epoch 3:  

Epoch 3:  17%|█▋        | 6840/40000 [1:32:44<7:30:23,  1.23it/s, training_loss=0.111][A
Epoch 3:  17%|█▋        | 6840/40000 [1:32:45<7:30:23,  1.23it/s, training_loss=0.087][A
Epoch 3:  17%|█▋        | 6841/40000 [1:32:45<7:30:29,  1.23it/s, training_loss=0.087][A
Epoch 3:  17%|█▋        | 6841/40000 [1:32:46<7:30:29,  1.23it/s, training_loss=0.097][A
Epoch 3:  17%|█▋        | 6842/40000 [1:32:46<7:30:27,  1.23it/s, training_loss=0.097][A
Epoch 3:  17%|█▋        | 6842/40000 [1:32:47<7:30:27,  1.23it/s, training_loss=0.073][A
Epoch 3:  17%|█▋        | 6843/40000 [1:32:47<7:30:56,  1.23it/s, training_loss=0.073][A
Epoch 3:  17%|█▋        | 6843/40000 [1:32:47<7:30:56,  1.23it/s, training_loss=0.061][A
Epoch 3:  17%|█▋        | 6844/40000 [1:32:47<7:30:01,  1.23it/s, training_loss=0.061][A
Epoch 3:  17%|█▋        | 6844/40000 [1:32:48<7:30:01,  1.23it/s, training_loss=0.073][A
Epoch 3:  17%|█▋        | 6845/40000 [1:32:48<7:29:45,  1.23it/s, training_loss=0.073][A
Epoch 3:  

Epoch 3:  17%|█▋        | 6931/40000 [1:33:58<7:29:21,  1.23it/s, training_loss=0.076][A
Epoch 3:  17%|█▋        | 6931/40000 [1:33:59<7:29:21,  1.23it/s, training_loss=0.091][A
Epoch 3:  17%|█▋        | 6932/40000 [1:33:59<7:28:49,  1.23it/s, training_loss=0.091][A
Epoch 3:  17%|█▋        | 6932/40000 [1:34:00<7:28:49,  1.23it/s, training_loss=0.070][A
Epoch 3:  17%|█▋        | 6933/40000 [1:34:00<7:29:01,  1.23it/s, training_loss=0.070][A
Epoch 3:  17%|█▋        | 6933/40000 [1:34:01<7:29:01,  1.23it/s, training_loss=0.067][A
Epoch 3:  17%|█▋        | 6934/40000 [1:34:01<7:28:32,  1.23it/s, training_loss=0.067][A
Epoch 3:  17%|█▋        | 6934/40000 [1:34:01<7:28:32,  1.23it/s, training_loss=0.118][A
Epoch 3:  17%|█▋        | 6935/40000 [1:34:01<7:28:58,  1.23it/s, training_loss=0.118][A
Epoch 3:  17%|█▋        | 6935/40000 [1:34:02<7:28:58,  1.23it/s, training_loss=0.010][A
Epoch 3:  17%|█▋        | 6936/40000 [1:34:02<7:28:52,  1.23it/s, training_loss=0.010][A
Epoch 3:  

Epoch 3:  18%|█▊        | 7022/40000 [1:35:12<7:27:54,  1.23it/s, training_loss=0.034][A
Epoch 3:  18%|█▊        | 7022/40000 [1:35:13<7:27:54,  1.23it/s, training_loss=0.128][A
Epoch 3:  18%|█▊        | 7023/40000 [1:35:13<7:27:49,  1.23it/s, training_loss=0.128][A
Epoch 3:  18%|█▊        | 7023/40000 [1:35:14<7:27:49,  1.23it/s, training_loss=0.153][A
Epoch 3:  18%|█▊        | 7024/40000 [1:35:14<7:27:36,  1.23it/s, training_loss=0.153][A
Epoch 3:  18%|█▊        | 7024/40000 [1:35:15<7:27:36,  1.23it/s, training_loss=0.071][A
Epoch 3:  18%|█▊        | 7025/40000 [1:35:15<7:27:59,  1.23it/s, training_loss=0.071][A
Epoch 3:  18%|█▊        | 7025/40000 [1:35:16<7:27:59,  1.23it/s, training_loss=0.087][A
Epoch 3:  18%|█▊        | 7026/40000 [1:35:16<7:28:35,  1.23it/s, training_loss=0.087][A
Epoch 3:  18%|█▊        | 7026/40000 [1:35:16<7:28:35,  1.23it/s, training_loss=0.103][A
Epoch 3:  18%|█▊        | 7027/40000 [1:35:16<7:28:35,  1.23it/s, training_loss=0.103][A
Epoch 3:  

Epoch 3:  18%|█▊        | 7113/40000 [1:36:27<7:26:18,  1.23it/s, training_loss=0.055][A
Epoch 3:  18%|█▊        | 7113/40000 [1:36:27<7:26:18,  1.23it/s, training_loss=0.029][A
Epoch 3:  18%|█▊        | 7114/40000 [1:36:27<7:26:11,  1.23it/s, training_loss=0.029][A
Epoch 3:  18%|█▊        | 7114/40000 [1:36:28<7:26:11,  1.23it/s, training_loss=0.071][A
Epoch 3:  18%|█▊        | 7115/40000 [1:36:28<7:26:12,  1.23it/s, training_loss=0.071][A
Epoch 3:  18%|█▊        | 7115/40000 [1:36:29<7:26:12,  1.23it/s, training_loss=0.088][A
Epoch 3:  18%|█▊        | 7116/40000 [1:36:29<7:25:54,  1.23it/s, training_loss=0.088][A
Epoch 3:  18%|█▊        | 7116/40000 [1:36:30<7:25:54,  1.23it/s, training_loss=0.083][A
Epoch 3:  18%|█▊        | 7117/40000 [1:36:30<7:26:23,  1.23it/s, training_loss=0.083][A
Epoch 3:  18%|█▊        | 7117/40000 [1:36:31<7:26:23,  1.23it/s, training_loss=0.140][A
Epoch 3:  18%|█▊        | 7118/40000 [1:36:31<7:26:16,  1.23it/s, training_loss=0.140][A
Epoch 3:  

Epoch 3:  18%|█▊        | 7204/40000 [1:37:41<7:26:08,  1.23it/s, training_loss=0.075][A
Epoch 3:  18%|█▊        | 7204/40000 [1:37:42<7:26:08,  1.23it/s, training_loss=0.078][A
Epoch 3:  18%|█▊        | 7205/40000 [1:37:42<7:25:12,  1.23it/s, training_loss=0.078][A
Epoch 3:  18%|█▊        | 7205/40000 [1:37:42<7:25:12,  1.23it/s, training_loss=0.142][A
Epoch 3:  18%|█▊        | 7206/40000 [1:37:42<7:25:26,  1.23it/s, training_loss=0.142][A
Epoch 3:  18%|█▊        | 7206/40000 [1:37:43<7:25:26,  1.23it/s, training_loss=0.036][A
Epoch 3:  18%|█▊        | 7207/40000 [1:37:43<7:25:05,  1.23it/s, training_loss=0.036][A
Epoch 3:  18%|█▊        | 7207/40000 [1:37:44<7:25:05,  1.23it/s, training_loss=0.136][A
Epoch 3:  18%|█▊        | 7208/40000 [1:37:44<7:25:38,  1.23it/s, training_loss=0.136][A
Epoch 3:  18%|█▊        | 7208/40000 [1:37:45<7:25:38,  1.23it/s, training_loss=0.084][A
Epoch 3:  18%|█▊        | 7209/40000 [1:37:45<7:26:14,  1.22it/s, training_loss=0.084][A
Epoch 3:  

Epoch 3:  18%|█▊        | 7295/40000 [1:38:55<7:25:54,  1.22it/s, training_loss=0.066][A
Epoch 3:  18%|█▊        | 7295/40000 [1:38:56<7:25:54,  1.22it/s, training_loss=0.056][A
Epoch 3:  18%|█▊        | 7296/40000 [1:38:56<7:25:59,  1.22it/s, training_loss=0.056][A
Epoch 3:  18%|█▊        | 7296/40000 [1:38:57<7:25:59,  1.22it/s, training_loss=0.046][A
Epoch 3:  18%|█▊        | 7297/40000 [1:38:57<7:25:14,  1.22it/s, training_loss=0.046][A
Epoch 3:  18%|█▊        | 7297/40000 [1:38:57<7:25:14,  1.22it/s, training_loss=0.090][A
Epoch 3:  18%|█▊        | 7298/40000 [1:38:57<7:25:01,  1.22it/s, training_loss=0.090][A
Epoch 3:  18%|█▊        | 7298/40000 [1:38:58<7:25:01,  1.22it/s, training_loss=0.044][A
Epoch 3:  18%|█▊        | 7299/40000 [1:38:58<7:24:29,  1.23it/s, training_loss=0.044][A
Epoch 3:  18%|█▊        | 7299/40000 [1:38:59<7:24:29,  1.23it/s, training_loss=0.127][A
Epoch 3:  18%|█▊        | 7300/40000 [1:38:59<7:24:37,  1.23it/s, training_loss=0.127][A
Epoch 3:  

Epoch 3:  18%|█▊        | 7386/40000 [1:40:09<7:23:03,  1.23it/s, training_loss=0.104][A
Epoch 3:  18%|█▊        | 7386/40000 [1:40:10<7:23:03,  1.23it/s, training_loss=0.104][A
Epoch 3:  18%|█▊        | 7387/40000 [1:40:10<7:23:48,  1.22it/s, training_loss=0.104][A
Epoch 3:  18%|█▊        | 7387/40000 [1:40:11<7:23:48,  1.22it/s, training_loss=0.099][A
Epoch 3:  18%|█▊        | 7388/40000 [1:40:11<7:23:37,  1.23it/s, training_loss=0.099][A
Epoch 3:  18%|█▊        | 7388/40000 [1:40:12<7:23:37,  1.23it/s, training_loss=0.036][A
Epoch 3:  18%|█▊        | 7389/40000 [1:40:12<7:23:45,  1.22it/s, training_loss=0.036][A
Epoch 3:  18%|█▊        | 7389/40000 [1:40:12<7:23:45,  1.22it/s, training_loss=0.051][A
Epoch 3:  18%|█▊        | 7390/40000 [1:40:12<7:23:29,  1.23it/s, training_loss=0.051][A
Epoch 3:  18%|█▊        | 7390/40000 [1:40:13<7:23:29,  1.23it/s, training_loss=0.080][A
Epoch 3:  18%|█▊        | 7391/40000 [1:40:13<7:24:26,  1.22it/s, training_loss=0.080][A
Epoch 3:  

Epoch 3:  19%|█▊        | 7477/40000 [1:41:23<7:21:02,  1.23it/s, training_loss=0.109][A
Epoch 3:  19%|█▊        | 7477/40000 [1:41:24<7:21:02,  1.23it/s, training_loss=0.046][A
Epoch 3:  19%|█▊        | 7478/40000 [1:41:24<7:20:43,  1.23it/s, training_loss=0.046][A
Epoch 3:  19%|█▊        | 7478/40000 [1:41:25<7:20:43,  1.23it/s, training_loss=0.055][A
Epoch 3:  19%|█▊        | 7479/40000 [1:41:25<7:21:11,  1.23it/s, training_loss=0.055][A
Epoch 3:  19%|█▊        | 7479/40000 [1:41:26<7:21:11,  1.23it/s, training_loss=0.117][A
Epoch 3:  19%|█▊        | 7480/40000 [1:41:26<7:21:21,  1.23it/s, training_loss=0.117][A
Epoch 3:  19%|█▊        | 7480/40000 [1:41:27<7:21:21,  1.23it/s, training_loss=0.065][A
Epoch 3:  19%|█▊        | 7481/40000 [1:41:27<7:21:14,  1.23it/s, training_loss=0.065][A
Epoch 3:  19%|█▊        | 7481/40000 [1:41:28<7:21:14,  1.23it/s, training_loss=0.155][A
Epoch 3:  19%|█▊        | 7482/40000 [1:41:28<7:21:05,  1.23it/s, training_loss=0.155][A
Epoch 3:  

Epoch 3:  19%|█▉        | 7568/40000 [1:42:38<7:20:25,  1.23it/s, training_loss=0.095][A
Epoch 3:  19%|█▉        | 7568/40000 [1:42:39<7:20:25,  1.23it/s, training_loss=0.111][A
Epoch 3:  19%|█▉        | 7569/40000 [1:42:39<7:20:03,  1.23it/s, training_loss=0.111][A
Epoch 3:  19%|█▉        | 7569/40000 [1:42:39<7:20:03,  1.23it/s, training_loss=0.204][A
Epoch 3:  19%|█▉        | 7570/40000 [1:42:39<7:19:02,  1.23it/s, training_loss=0.204][A
Epoch 3:  19%|█▉        | 7570/40000 [1:42:40<7:19:02,  1.23it/s, training_loss=0.035][A
Epoch 3:  19%|█▉        | 7571/40000 [1:42:40<7:19:20,  1.23it/s, training_loss=0.035][A
Epoch 3:  19%|█▉        | 7571/40000 [1:42:41<7:19:20,  1.23it/s, training_loss=0.138][A
Epoch 3:  19%|█▉        | 7572/40000 [1:42:41<7:19:56,  1.23it/s, training_loss=0.138][A
Epoch 3:  19%|█▉        | 7572/40000 [1:42:42<7:19:56,  1.23it/s, training_loss=0.067][A
Epoch 3:  19%|█▉        | 7573/40000 [1:42:42<7:20:07,  1.23it/s, training_loss=0.067][A
Epoch 3:  

Epoch 3:  19%|█▉        | 7659/40000 [1:43:52<7:20:45,  1.22it/s, training_loss=0.025][A
Epoch 3:  19%|█▉        | 7659/40000 [1:43:53<7:20:45,  1.22it/s, training_loss=0.058][A
Epoch 3:  19%|█▉        | 7660/40000 [1:43:53<7:19:43,  1.23it/s, training_loss=0.058][A
Epoch 3:  19%|█▉        | 7660/40000 [1:43:54<7:19:43,  1.23it/s, training_loss=0.037][A
Epoch 3:  19%|█▉        | 7661/40000 [1:43:54<7:20:16,  1.22it/s, training_loss=0.037][A
Epoch 3:  19%|█▉        | 7661/40000 [1:43:54<7:20:16,  1.22it/s, training_loss=0.067][A
Epoch 3:  19%|█▉        | 7662/40000 [1:43:54<7:20:16,  1.22it/s, training_loss=0.067][A
Epoch 3:  19%|█▉        | 7662/40000 [1:43:55<7:20:16,  1.22it/s, training_loss=0.059][A
Epoch 3:  19%|█▉        | 7663/40000 [1:43:55<7:19:50,  1.23it/s, training_loss=0.059][A
Epoch 3:  19%|█▉        | 7663/40000 [1:43:56<7:19:50,  1.23it/s, training_loss=0.069][A
Epoch 3:  19%|█▉        | 7664/40000 [1:43:56<7:19:36,  1.23it/s, training_loss=0.069][A
Epoch 3:  

Epoch 3:  19%|█▉        | 7750/40000 [1:45:06<7:17:30,  1.23it/s, training_loss=0.066][A
Epoch 3:  19%|█▉        | 7750/40000 [1:45:07<7:17:30,  1.23it/s, training_loss=0.092][A
Epoch 3:  19%|█▉        | 7751/40000 [1:45:07<7:17:29,  1.23it/s, training_loss=0.092][A
Epoch 3:  19%|█▉        | 7751/40000 [1:45:08<7:17:29,  1.23it/s, training_loss=0.049][A
Epoch 3:  19%|█▉        | 7752/40000 [1:45:08<7:17:35,  1.23it/s, training_loss=0.049][A
Epoch 3:  19%|█▉        | 7752/40000 [1:45:09<7:17:35,  1.23it/s, training_loss=0.100][A
Epoch 3:  19%|█▉        | 7753/40000 [1:45:09<7:17:46,  1.23it/s, training_loss=0.100][A
Epoch 3:  19%|█▉        | 7753/40000 [1:45:09<7:17:46,  1.23it/s, training_loss=0.028][A
Epoch 3:  19%|█▉        | 7754/40000 [1:45:09<7:17:45,  1.23it/s, training_loss=0.028][A
Epoch 3:  19%|█▉        | 7754/40000 [1:45:10<7:17:45,  1.23it/s, training_loss=0.050][A
Epoch 3:  19%|█▉        | 7755/40000 [1:45:10<7:18:07,  1.23it/s, training_loss=0.050][A
Epoch 3:  

Epoch 3:  20%|█▉        | 7841/40000 [1:46:20<7:17:17,  1.23it/s, training_loss=0.057][A
Epoch 3:  20%|█▉        | 7841/40000 [1:46:21<7:17:17,  1.23it/s, training_loss=0.084][A
Epoch 3:  20%|█▉        | 7842/40000 [1:46:21<7:17:18,  1.23it/s, training_loss=0.084][A
Epoch 3:  20%|█▉        | 7842/40000 [1:46:22<7:17:18,  1.23it/s, training_loss=0.114][A
Epoch 3:  20%|█▉        | 7843/40000 [1:46:22<7:17:20,  1.23it/s, training_loss=0.114][A
Epoch 3:  20%|█▉        | 7843/40000 [1:46:23<7:17:20,  1.23it/s, training_loss=0.145][A
Epoch 3:  20%|█▉        | 7844/40000 [1:46:23<7:17:11,  1.23it/s, training_loss=0.145][A
Epoch 3:  20%|█▉        | 7844/40000 [1:46:24<7:17:11,  1.23it/s, training_loss=0.073][A
Epoch 3:  20%|█▉        | 7845/40000 [1:46:24<7:17:05,  1.23it/s, training_loss=0.073][A
Epoch 3:  20%|█▉        | 7845/40000 [1:46:24<7:17:05,  1.23it/s, training_loss=0.094][A
Epoch 3:  20%|█▉        | 7846/40000 [1:46:24<7:16:40,  1.23it/s, training_loss=0.094][A
Epoch 3:  

Epoch 3:  20%|█▉        | 7932/40000 [1:47:35<7:16:09,  1.23it/s, training_loss=0.024][A
Epoch 3:  20%|█▉        | 7932/40000 [1:47:35<7:16:09,  1.23it/s, training_loss=0.125][A
Epoch 3:  20%|█▉        | 7933/40000 [1:47:35<7:16:18,  1.22it/s, training_loss=0.125][A
Epoch 3:  20%|█▉        | 7933/40000 [1:47:36<7:16:18,  1.22it/s, training_loss=0.064][A
Epoch 3:  20%|█▉        | 7934/40000 [1:47:36<7:16:39,  1.22it/s, training_loss=0.064][A
Epoch 3:  20%|█▉        | 7934/40000 [1:47:37<7:16:39,  1.22it/s, training_loss=0.110][A
Epoch 3:  20%|█▉        | 7935/40000 [1:47:37<7:17:29,  1.22it/s, training_loss=0.110][A
Epoch 3:  20%|█▉        | 7935/40000 [1:47:38<7:17:29,  1.22it/s, training_loss=0.117][A
Epoch 3:  20%|█▉        | 7936/40000 [1:47:38<7:17:29,  1.22it/s, training_loss=0.117][A
Epoch 3:  20%|█▉        | 7936/40000 [1:47:39<7:17:29,  1.22it/s, training_loss=0.085][A
Epoch 3:  20%|█▉        | 7937/40000 [1:47:39<7:16:36,  1.22it/s, training_loss=0.085][A
Epoch 3:  

Epoch 3:  20%|██        | 8023/40000 [1:48:49<7:13:49,  1.23it/s, training_loss=0.218][A
Epoch 3:  20%|██        | 8023/40000 [1:48:49<7:13:49,  1.23it/s, training_loss=0.123][A
Epoch 3:  20%|██        | 8024/40000 [1:48:49<7:13:42,  1.23it/s, training_loss=0.123][A
Epoch 3:  20%|██        | 8024/40000 [1:48:50<7:13:42,  1.23it/s, training_loss=0.110][A
Epoch 3:  20%|██        | 8025/40000 [1:48:50<7:13:38,  1.23it/s, training_loss=0.110][A
Epoch 3:  20%|██        | 8025/40000 [1:48:51<7:13:38,  1.23it/s, training_loss=0.130][A
Epoch 3:  20%|██        | 8026/40000 [1:48:51<7:13:24,  1.23it/s, training_loss=0.130][A
Epoch 3:  20%|██        | 8026/40000 [1:48:52<7:13:24,  1.23it/s, training_loss=0.065][A
Epoch 3:  20%|██        | 8027/40000 [1:48:52<7:14:17,  1.23it/s, training_loss=0.065][A
Epoch 3:  20%|██        | 8027/40000 [1:48:53<7:14:17,  1.23it/s, training_loss=0.073][A
Epoch 3:  20%|██        | 8028/40000 [1:48:53<7:14:20,  1.23it/s, training_loss=0.073][A
Epoch 3:  

Epoch 3:  20%|██        | 8114/40000 [1:50:03<7:12:54,  1.23it/s, training_loss=0.066][A
Epoch 3:  20%|██        | 8114/40000 [1:50:04<7:12:54,  1.23it/s, training_loss=0.108][A
Epoch 3:  20%|██        | 8115/40000 [1:50:04<7:12:47,  1.23it/s, training_loss=0.108][A
Epoch 3:  20%|██        | 8115/40000 [1:50:04<7:12:47,  1.23it/s, training_loss=0.164][A
Epoch 3:  20%|██        | 8116/40000 [1:50:04<7:13:01,  1.23it/s, training_loss=0.164][A
Epoch 3:  20%|██        | 8116/40000 [1:50:05<7:13:01,  1.23it/s, training_loss=0.039][A
Epoch 3:  20%|██        | 8117/40000 [1:50:05<7:13:10,  1.23it/s, training_loss=0.039][A
Epoch 3:  20%|██        | 8117/40000 [1:50:06<7:13:10,  1.23it/s, training_loss=0.073][A
Epoch 3:  20%|██        | 8118/40000 [1:50:06<7:13:11,  1.23it/s, training_loss=0.073][A
Epoch 3:  20%|██        | 8118/40000 [1:50:07<7:13:11,  1.23it/s, training_loss=0.087][A
Epoch 3:  20%|██        | 8119/40000 [1:50:07<7:12:20,  1.23it/s, training_loss=0.087][A
Epoch 3:  

Epoch 3:  21%|██        | 8205/40000 [1:51:17<7:11:28,  1.23it/s, training_loss=0.066][A
Epoch 3:  21%|██        | 8205/40000 [1:51:18<7:11:28,  1.23it/s, training_loss=0.039][A
Epoch 3:  21%|██        | 8206/40000 [1:51:18<7:11:56,  1.23it/s, training_loss=0.039][A
Epoch 3:  21%|██        | 8206/40000 [1:51:19<7:11:56,  1.23it/s, training_loss=0.246][A
Epoch 3:  21%|██        | 8207/40000 [1:51:19<7:11:46,  1.23it/s, training_loss=0.246][A
Epoch 3:  21%|██        | 8207/40000 [1:51:19<7:11:46,  1.23it/s, training_loss=0.053][A
Epoch 3:  21%|██        | 8208/40000 [1:51:19<7:12:14,  1.23it/s, training_loss=0.053][A
Epoch 3:  21%|██        | 8208/40000 [1:51:20<7:12:14,  1.23it/s, training_loss=0.092][A
Epoch 3:  21%|██        | 8209/40000 [1:51:20<7:12:30,  1.23it/s, training_loss=0.092][A
Epoch 3:  21%|██        | 8209/40000 [1:51:21<7:12:30,  1.23it/s, training_loss=0.097][A
Epoch 3:  21%|██        | 8210/40000 [1:51:21<7:12:02,  1.23it/s, training_loss=0.097][A
Epoch 3:  

Epoch 3:  21%|██        | 8296/40000 [1:52:31<7:10:23,  1.23it/s, training_loss=0.042][A
Epoch 3:  21%|██        | 8296/40000 [1:52:32<7:10:23,  1.23it/s, training_loss=0.048][A
Epoch 3:  21%|██        | 8297/40000 [1:52:32<7:11:00,  1.23it/s, training_loss=0.048][A
Epoch 3:  21%|██        | 8297/40000 [1:52:33<7:11:00,  1.23it/s, training_loss=0.092][A
Epoch 3:  21%|██        | 8298/40000 [1:52:33<7:11:11,  1.23it/s, training_loss=0.092][A
Epoch 3:  21%|██        | 8298/40000 [1:52:34<7:11:11,  1.23it/s, training_loss=0.041][A
Epoch 3:  21%|██        | 8299/40000 [1:52:34<7:11:27,  1.22it/s, training_loss=0.041][A
Epoch 3:  21%|██        | 8299/40000 [1:52:34<7:11:27,  1.22it/s, training_loss=0.034][A
Epoch 3:  21%|██        | 8300/40000 [1:52:34<7:11:17,  1.22it/s, training_loss=0.034][A
Epoch 3:  21%|██        | 8300/40000 [1:52:35<7:11:17,  1.22it/s, training_loss=0.047][A
Epoch 3:  21%|██        | 8301/40000 [1:52:35<7:10:59,  1.23it/s, training_loss=0.047][A
Epoch 3:  

Epoch 3:  21%|██        | 8387/40000 [1:53:45<7:08:28,  1.23it/s, training_loss=0.039][A
Epoch 3:  21%|██        | 8387/40000 [1:53:46<7:08:28,  1.23it/s, training_loss=0.036][A
Epoch 3:  21%|██        | 8388/40000 [1:53:46<7:08:48,  1.23it/s, training_loss=0.036][A
Epoch 3:  21%|██        | 8388/40000 [1:53:47<7:08:48,  1.23it/s, training_loss=0.105][A
Epoch 3:  21%|██        | 8389/40000 [1:53:47<7:08:56,  1.23it/s, training_loss=0.105][A
Epoch 3:  21%|██        | 8389/40000 [1:53:48<7:08:56,  1.23it/s, training_loss=0.054][A
Epoch 3:  21%|██        | 8390/40000 [1:53:48<7:09:12,  1.23it/s, training_loss=0.054][A
Epoch 3:  21%|██        | 8390/40000 [1:53:49<7:09:12,  1.23it/s, training_loss=0.082][A
Epoch 3:  21%|██        | 8391/40000 [1:53:49<7:10:05,  1.22it/s, training_loss=0.082][A
Epoch 3:  21%|██        | 8391/40000 [1:53:49<7:10:05,  1.22it/s, training_loss=0.075][A
Epoch 3:  21%|██        | 8392/40000 [1:53:49<7:10:39,  1.22it/s, training_loss=0.075][A
Epoch 3:  

Epoch 3:  21%|██        | 8478/40000 [1:54:59<7:07:35,  1.23it/s, training_loss=0.061][A
Epoch 3:  21%|██        | 8478/40000 [1:55:00<7:07:35,  1.23it/s, training_loss=0.065][A
Epoch 3:  21%|██        | 8479/40000 [1:55:00<7:07:55,  1.23it/s, training_loss=0.065][A
Epoch 3:  21%|██        | 8479/40000 [1:55:01<7:07:55,  1.23it/s, training_loss=0.087][A
Epoch 3:  21%|██        | 8480/40000 [1:55:01<7:07:43,  1.23it/s, training_loss=0.087][A
Epoch 3:  21%|██        | 8480/40000 [1:55:02<7:07:43,  1.23it/s, training_loss=0.089][A
Epoch 3:  21%|██        | 8481/40000 [1:55:02<7:07:24,  1.23it/s, training_loss=0.089][A
Epoch 3:  21%|██        | 8481/40000 [1:55:03<7:07:24,  1.23it/s, training_loss=0.102][A
Epoch 3:  21%|██        | 8482/40000 [1:55:03<7:07:19,  1.23it/s, training_loss=0.102][A
Epoch 3:  21%|██        | 8482/40000 [1:55:04<7:07:19,  1.23it/s, training_loss=0.056][A
Epoch 3:  21%|██        | 8483/40000 [1:55:04<7:07:32,  1.23it/s, training_loss=0.056][A
Epoch 3:  

Epoch 3:  21%|██▏       | 8569/40000 [1:56:14<7:07:20,  1.23it/s, training_loss=0.038][A
Epoch 3:  21%|██▏       | 8569/40000 [1:56:14<7:07:20,  1.23it/s, training_loss=0.051][A
Epoch 3:  21%|██▏       | 8570/40000 [1:56:14<7:07:04,  1.23it/s, training_loss=0.051][A
Epoch 3:  21%|██▏       | 8570/40000 [1:56:15<7:07:04,  1.23it/s, training_loss=0.034][A
Epoch 3:  21%|██▏       | 8571/40000 [1:56:15<7:07:08,  1.23it/s, training_loss=0.034][A
Epoch 3:  21%|██▏       | 8571/40000 [1:56:16<7:07:08,  1.23it/s, training_loss=0.082][A
Epoch 3:  21%|██▏       | 8572/40000 [1:56:16<7:06:46,  1.23it/s, training_loss=0.082][A
Epoch 3:  21%|██▏       | 8572/40000 [1:56:17<7:06:46,  1.23it/s, training_loss=0.055][A
Epoch 3:  21%|██▏       | 8573/40000 [1:56:17<7:06:18,  1.23it/s, training_loss=0.055][A
Epoch 3:  21%|██▏       | 8573/40000 [1:56:18<7:06:18,  1.23it/s, training_loss=0.078][A
Epoch 3:  21%|██▏       | 8574/40000 [1:56:18<7:06:25,  1.23it/s, training_loss=0.078][A
Epoch 3:  

Epoch 3:  22%|██▏       | 8660/40000 [1:57:28<7:05:41,  1.23it/s, training_loss=0.114][A
Epoch 3:  22%|██▏       | 8660/40000 [1:57:29<7:05:41,  1.23it/s, training_loss=0.072][A
Epoch 3:  22%|██▏       | 8661/40000 [1:57:29<7:05:55,  1.23it/s, training_loss=0.072][A
Epoch 3:  22%|██▏       | 8661/40000 [1:57:29<7:05:55,  1.23it/s, training_loss=0.060][A
Epoch 3:  22%|██▏       | 8662/40000 [1:57:29<7:05:36,  1.23it/s, training_loss=0.060][A
Epoch 3:  22%|██▏       | 8662/40000 [1:57:30<7:05:36,  1.23it/s, training_loss=0.038][A
Epoch 3:  22%|██▏       | 8663/40000 [1:57:30<7:05:00,  1.23it/s, training_loss=0.038][A
Epoch 3:  22%|██▏       | 8663/40000 [1:57:31<7:05:00,  1.23it/s, training_loss=0.175][A
Epoch 3:  22%|██▏       | 8664/40000 [1:57:31<7:05:15,  1.23it/s, training_loss=0.175][A
Epoch 3:  22%|██▏       | 8664/40000 [1:57:32<7:05:15,  1.23it/s, training_loss=0.068][A
Epoch 3:  22%|██▏       | 8665/40000 [1:57:32<7:05:18,  1.23it/s, training_loss=0.068][A
Epoch 3:  

Epoch 3:  22%|██▏       | 8751/40000 [1:58:42<7:05:18,  1.22it/s, training_loss=0.134][A
Epoch 3:  22%|██▏       | 8751/40000 [1:58:43<7:05:18,  1.22it/s, training_loss=0.092][A
Epoch 3:  22%|██▏       | 8752/40000 [1:58:43<7:05:36,  1.22it/s, training_loss=0.092][A
Epoch 3:  22%|██▏       | 8752/40000 [1:58:44<7:05:36,  1.22it/s, training_loss=0.051][A
Epoch 3:  22%|██▏       | 8753/40000 [1:58:44<7:04:24,  1.23it/s, training_loss=0.051][A
Epoch 3:  22%|██▏       | 8753/40000 [1:58:44<7:04:24,  1.23it/s, training_loss=0.044][A
Epoch 3:  22%|██▏       | 8754/40000 [1:58:44<7:04:25,  1.23it/s, training_loss=0.044][A
Epoch 3:  22%|██▏       | 8754/40000 [1:58:45<7:04:25,  1.23it/s, training_loss=0.095][A
Epoch 3:  22%|██▏       | 8755/40000 [1:58:45<7:04:26,  1.23it/s, training_loss=0.095][A
Epoch 3:  22%|██▏       | 8755/40000 [1:58:46<7:04:26,  1.23it/s, training_loss=0.019][A
Epoch 3:  22%|██▏       | 8756/40000 [1:58:46<7:04:31,  1.23it/s, training_loss=0.019][A
Epoch 3:  

Epoch 3:  22%|██▏       | 8842/40000 [1:59:56<7:01:22,  1.23it/s, training_loss=0.097][A
Epoch 3:  22%|██▏       | 8842/40000 [1:59:57<7:01:22,  1.23it/s, training_loss=0.044][A
Epoch 3:  22%|██▏       | 8843/40000 [1:59:57<7:01:16,  1.23it/s, training_loss=0.044][A
Epoch 3:  22%|██▏       | 8843/40000 [1:59:58<7:01:16,  1.23it/s, training_loss=0.112][A
Epoch 3:  22%|██▏       | 8844/40000 [1:59:58<7:01:29,  1.23it/s, training_loss=0.112][A
Epoch 3:  22%|██▏       | 8844/40000 [1:59:58<7:01:29,  1.23it/s, training_loss=0.040][A
Epoch 3:  22%|██▏       | 8845/40000 [1:59:58<7:01:23,  1.23it/s, training_loss=0.040][A
Epoch 3:  22%|██▏       | 8845/40000 [1:59:59<7:01:23,  1.23it/s, training_loss=0.049][A
Epoch 3:  22%|██▏       | 8846/40000 [1:59:59<7:01:51,  1.23it/s, training_loss=0.049][A
Epoch 3:  22%|██▏       | 8846/40000 [2:00:00<7:01:51,  1.23it/s, training_loss=0.067][A
Epoch 3:  22%|██▏       | 8847/40000 [2:00:00<7:02:08,  1.23it/s, training_loss=0.067][A
Epoch 3:  

Epoch 3:  22%|██▏       | 8933/40000 [2:01:10<7:00:24,  1.23it/s, training_loss=0.068][A
Epoch 3:  22%|██▏       | 8933/40000 [2:01:11<7:00:24,  1.23it/s, training_loss=0.075][A
Epoch 3:  22%|██▏       | 8934/40000 [2:01:11<7:00:52,  1.23it/s, training_loss=0.075][A
Epoch 3:  22%|██▏       | 8934/40000 [2:01:12<7:00:52,  1.23it/s, training_loss=0.045][A
Epoch 3:  22%|██▏       | 8935/40000 [2:01:12<7:00:27,  1.23it/s, training_loss=0.045][A
Epoch 3:  22%|██▏       | 8935/40000 [2:01:12<7:00:27,  1.23it/s, training_loss=0.126][A
Epoch 3:  22%|██▏       | 8936/40000 [2:01:12<6:59:57,  1.23it/s, training_loss=0.126][A
Epoch 3:  22%|██▏       | 8936/40000 [2:01:13<6:59:57,  1.23it/s, training_loss=0.070][A
Epoch 3:  22%|██▏       | 8937/40000 [2:01:13<7:00:26,  1.23it/s, training_loss=0.070][A
Epoch 3:  22%|██▏       | 8937/40000 [2:01:14<7:00:26,  1.23it/s, training_loss=0.074][A
Epoch 3:  22%|██▏       | 8938/40000 [2:01:14<7:00:28,  1.23it/s, training_loss=0.074][A
Epoch 3:  

Epoch 3:  23%|██▎       | 9024/40000 [2:02:24<7:01:08,  1.23it/s, training_loss=0.106][A
Epoch 3:  23%|██▎       | 9024/40000 [2:02:25<7:01:08,  1.23it/s, training_loss=0.082][A
Epoch 3:  23%|██▎       | 9025/40000 [2:02:25<7:00:52,  1.23it/s, training_loss=0.082][A
Epoch 3:  23%|██▎       | 9025/40000 [2:02:26<7:00:52,  1.23it/s, training_loss=0.041][A
Epoch 3:  23%|██▎       | 9026/40000 [2:02:26<7:00:18,  1.23it/s, training_loss=0.041][A
Epoch 3:  23%|██▎       | 9026/40000 [2:02:27<7:00:18,  1.23it/s, training_loss=0.078][A
Epoch 3:  23%|██▎       | 9027/40000 [2:02:27<6:59:39,  1.23it/s, training_loss=0.078][A
Epoch 3:  23%|██▎       | 9027/40000 [2:02:27<6:59:39,  1.23it/s, training_loss=0.077][A
Epoch 3:  23%|██▎       | 9028/40000 [2:02:27<6:59:20,  1.23it/s, training_loss=0.077][A
Epoch 3:  23%|██▎       | 9028/40000 [2:02:28<6:59:20,  1.23it/s, training_loss=0.107][A
Epoch 3:  23%|██▎       | 9029/40000 [2:02:28<6:59:43,  1.23it/s, training_loss=0.107][A
Epoch 3:  

Epoch 3:  23%|██▎       | 9115/40000 [2:03:38<6:58:45,  1.23it/s, training_loss=0.098][A
Epoch 3:  23%|██▎       | 9115/40000 [2:03:39<6:58:45,  1.23it/s, training_loss=0.065][A
Epoch 3:  23%|██▎       | 9116/40000 [2:03:39<6:59:01,  1.23it/s, training_loss=0.065][A
Epoch 3:  23%|██▎       | 9116/40000 [2:03:40<6:59:01,  1.23it/s, training_loss=0.069][A
Epoch 3:  23%|██▎       | 9117/40000 [2:03:40<6:58:46,  1.23it/s, training_loss=0.069][A
Epoch 3:  23%|██▎       | 9117/40000 [2:03:41<6:58:46,  1.23it/s, training_loss=0.084][A
Epoch 3:  23%|██▎       | 9118/40000 [2:03:41<6:58:51,  1.23it/s, training_loss=0.084][A
Epoch 3:  23%|██▎       | 9118/40000 [2:03:41<6:58:51,  1.23it/s, training_loss=0.062][A
Epoch 3:  23%|██▎       | 9119/40000 [2:03:41<6:58:50,  1.23it/s, training_loss=0.062][A
Epoch 3:  23%|██▎       | 9119/40000 [2:03:42<6:58:50,  1.23it/s, training_loss=0.100][A
Epoch 3:  23%|██▎       | 9120/40000 [2:03:42<6:58:40,  1.23it/s, training_loss=0.100][A
Epoch 3:  

Epoch 3:  23%|██▎       | 9206/40000 [2:04:52<6:57:56,  1.23it/s, training_loss=0.041][A
Epoch 3:  23%|██▎       | 9206/40000 [2:04:53<6:57:56,  1.23it/s, training_loss=0.101][A
Epoch 3:  23%|██▎       | 9207/40000 [2:04:53<6:57:44,  1.23it/s, training_loss=0.101][A
Epoch 3:  23%|██▎       | 9207/40000 [2:04:54<6:57:44,  1.23it/s, training_loss=0.054][A
Epoch 3:  23%|██▎       | 9208/40000 [2:04:54<6:57:49,  1.23it/s, training_loss=0.054][A
Epoch 3:  23%|██▎       | 9208/40000 [2:04:55<6:57:49,  1.23it/s, training_loss=0.101][A
Epoch 3:  23%|██▎       | 9209/40000 [2:04:55<6:57:58,  1.23it/s, training_loss=0.101][A
Epoch 3:  23%|██▎       | 9209/40000 [2:04:55<6:57:58,  1.23it/s, training_loss=0.022][A
Epoch 3:  23%|██▎       | 9210/40000 [2:04:55<6:57:52,  1.23it/s, training_loss=0.022][A
Epoch 3:  23%|██▎       | 9210/40000 [2:04:56<6:57:52,  1.23it/s, training_loss=0.090][A
Epoch 3:  23%|██▎       | 9211/40000 [2:04:56<6:58:11,  1.23it/s, training_loss=0.090][A
Epoch 3:  

Epoch 3:  23%|██▎       | 9297/40000 [2:06:06<6:55:55,  1.23it/s, training_loss=0.068][A
Epoch 3:  23%|██▎       | 9297/40000 [2:06:07<6:55:55,  1.23it/s, training_loss=0.094][A
Epoch 3:  23%|██▎       | 9298/40000 [2:06:07<6:56:00,  1.23it/s, training_loss=0.094][A
Epoch 3:  23%|██▎       | 9298/40000 [2:06:08<6:56:00,  1.23it/s, training_loss=0.111][A
Epoch 3:  23%|██▎       | 9299/40000 [2:06:08<6:55:53,  1.23it/s, training_loss=0.111][A
Epoch 3:  23%|██▎       | 9299/40000 [2:06:09<6:55:53,  1.23it/s, training_loss=0.072][A
Epoch 3:  23%|██▎       | 9300/40000 [2:06:09<6:56:00,  1.23it/s, training_loss=0.072][A
Epoch 3:  23%|██▎       | 9300/40000 [2:06:09<6:56:00,  1.23it/s, training_loss=0.090][A
Epoch 3:  23%|██▎       | 9301/40000 [2:06:09<6:55:49,  1.23it/s, training_loss=0.090][A
Epoch 3:  23%|██▎       | 9301/40000 [2:06:10<6:55:49,  1.23it/s, training_loss=0.102][A
Epoch 3:  23%|██▎       | 9302/40000 [2:06:10<6:56:00,  1.23it/s, training_loss=0.102][A
Epoch 3:  

Epoch 3:  23%|██▎       | 9388/40000 [2:07:20<6:56:46,  1.22it/s, training_loss=0.103][A
Epoch 3:  23%|██▎       | 9388/40000 [2:07:21<6:56:46,  1.22it/s, training_loss=0.061][A
Epoch 3:  23%|██▎       | 9389/40000 [2:07:21<6:56:57,  1.22it/s, training_loss=0.061][A
Epoch 3:  23%|██▎       | 9389/40000 [2:07:22<6:56:57,  1.22it/s, training_loss=0.121][A
Epoch 3:  23%|██▎       | 9390/40000 [2:07:22<6:56:04,  1.23it/s, training_loss=0.121][A
Epoch 3:  23%|██▎       | 9390/40000 [2:07:23<6:56:04,  1.23it/s, training_loss=0.061][A
Epoch 3:  23%|██▎       | 9391/40000 [2:07:23<6:55:47,  1.23it/s, training_loss=0.061][A
Epoch 3:  23%|██▎       | 9391/40000 [2:07:24<6:55:47,  1.23it/s, training_loss=0.183][A
Epoch 3:  23%|██▎       | 9392/40000 [2:07:24<6:56:57,  1.22it/s, training_loss=0.183][A
Epoch 3:  23%|██▎       | 9392/40000 [2:07:24<6:56:57,  1.22it/s, training_loss=0.116][A
Epoch 3:  23%|██▎       | 9393/40000 [2:07:24<6:57:03,  1.22it/s, training_loss=0.116][A
Epoch 3:  

Epoch 3:  24%|██▎       | 9479/40000 [2:08:35<6:54:45,  1.23it/s, training_loss=0.076][A
Epoch 3:  24%|██▎       | 9479/40000 [2:08:35<6:54:45,  1.23it/s, training_loss=0.039][A
Epoch 3:  24%|██▎       | 9480/40000 [2:08:35<6:54:51,  1.23it/s, training_loss=0.039][A
Epoch 3:  24%|██▎       | 9480/40000 [2:08:36<6:54:51,  1.23it/s, training_loss=0.101][A
Epoch 3:  24%|██▎       | 9481/40000 [2:08:36<6:55:10,  1.23it/s, training_loss=0.101][A
Epoch 3:  24%|██▎       | 9481/40000 [2:08:37<6:55:10,  1.23it/s, training_loss=0.117][A
Epoch 3:  24%|██▎       | 9482/40000 [2:08:37<6:54:39,  1.23it/s, training_loss=0.117][A
Epoch 3:  24%|██▎       | 9482/40000 [2:08:38<6:54:39,  1.23it/s, training_loss=0.029][A
Epoch 3:  24%|██▎       | 9483/40000 [2:08:38<6:54:55,  1.23it/s, training_loss=0.029][A
Epoch 3:  24%|██▎       | 9483/40000 [2:08:39<6:54:55,  1.23it/s, training_loss=0.083][A
Epoch 3:  24%|██▎       | 9484/40000 [2:08:39<6:54:20,  1.23it/s, training_loss=0.083][A
Epoch 3:  

Epoch 3:  24%|██▍       | 9570/40000 [2:09:49<6:52:25,  1.23it/s, training_loss=0.062][A
Epoch 3:  24%|██▍       | 9570/40000 [2:09:50<6:52:25,  1.23it/s, training_loss=0.106][A
Epoch 3:  24%|██▍       | 9571/40000 [2:09:50<6:52:40,  1.23it/s, training_loss=0.106][A
Epoch 3:  24%|██▍       | 9571/40000 [2:09:50<6:52:40,  1.23it/s, training_loss=0.136][A
Epoch 3:  24%|██▍       | 9572/40000 [2:09:50<6:52:23,  1.23it/s, training_loss=0.136][A
Epoch 3:  24%|██▍       | 9572/40000 [2:09:51<6:52:23,  1.23it/s, training_loss=0.095][A
Epoch 3:  24%|██▍       | 9573/40000 [2:09:51<6:53:21,  1.23it/s, training_loss=0.095][A
Epoch 3:  24%|██▍       | 9573/40000 [2:09:52<6:53:21,  1.23it/s, training_loss=0.053][A
Epoch 3:  24%|██▍       | 9574/40000 [2:09:52<6:53:41,  1.23it/s, training_loss=0.053][A
Epoch 3:  24%|██▍       | 9574/40000 [2:09:53<6:53:41,  1.23it/s, training_loss=0.052][A
Epoch 3:  24%|██▍       | 9575/40000 [2:09:53<6:53:31,  1.23it/s, training_loss=0.052][A
Epoch 3:  

Epoch 3:  24%|██▍       | 9661/40000 [2:11:03<6:53:43,  1.22it/s, training_loss=0.151][A
Epoch 3:  24%|██▍       | 9661/40000 [2:11:04<6:53:43,  1.22it/s, training_loss=0.054][A
Epoch 3:  24%|██▍       | 9662/40000 [2:11:04<6:53:24,  1.22it/s, training_loss=0.054][A
Epoch 3:  24%|██▍       | 9662/40000 [2:11:05<6:53:24,  1.22it/s, training_loss=0.124][A
Epoch 3:  24%|██▍       | 9663/40000 [2:11:05<6:53:20,  1.22it/s, training_loss=0.124][A
Epoch 3:  24%|██▍       | 9663/40000 [2:11:06<6:53:20,  1.22it/s, training_loss=0.072][A
Epoch 3:  24%|██▍       | 9664/40000 [2:11:06<6:53:28,  1.22it/s, training_loss=0.072][A
Epoch 3:  24%|██▍       | 9664/40000 [2:11:06<6:53:28,  1.22it/s, training_loss=0.060][A
Epoch 3:  24%|██▍       | 9665/40000 [2:11:06<6:53:06,  1.22it/s, training_loss=0.060][A
Epoch 3:  24%|██▍       | 9665/40000 [2:11:07<6:53:06,  1.22it/s, training_loss=0.065][A
Epoch 3:  24%|██▍       | 9666/40000 [2:11:07<6:53:07,  1.22it/s, training_loss=0.065][A
Epoch 3:  

Epoch 3:  24%|██▍       | 9752/40000 [2:12:17<6:51:56,  1.22it/s, training_loss=0.103][A
Epoch 3:  24%|██▍       | 9752/40000 [2:12:18<6:51:56,  1.22it/s, training_loss=0.069][A
Epoch 3:  24%|██▍       | 9753/40000 [2:12:18<6:52:03,  1.22it/s, training_loss=0.069][A
Epoch 3:  24%|██▍       | 9753/40000 [2:12:19<6:52:03,  1.22it/s, training_loss=0.130][A
Epoch 3:  24%|██▍       | 9754/40000 [2:12:19<6:51:42,  1.22it/s, training_loss=0.130][A
Epoch 3:  24%|██▍       | 9754/40000 [2:12:20<6:51:42,  1.22it/s, training_loss=0.153][A
Epoch 3:  24%|██▍       | 9755/40000 [2:12:20<6:51:50,  1.22it/s, training_loss=0.153][A
Epoch 3:  24%|██▍       | 9755/40000 [2:12:21<6:51:50,  1.22it/s, training_loss=0.056][A
Epoch 3:  24%|██▍       | 9756/40000 [2:12:21<6:51:22,  1.23it/s, training_loss=0.056][A
Epoch 3:  24%|██▍       | 9756/40000 [2:12:21<6:51:22,  1.23it/s, training_loss=0.095][A
Epoch 3:  24%|██▍       | 9757/40000 [2:12:21<6:51:53,  1.22it/s, training_loss=0.095][A
Epoch 3:  

Epoch 3:  25%|██▍       | 9843/40000 [2:13:32<6:50:05,  1.23it/s, training_loss=0.064][A
Epoch 3:  25%|██▍       | 9843/40000 [2:13:32<6:50:05,  1.23it/s, training_loss=0.128][A
Epoch 3:  25%|██▍       | 9844/40000 [2:13:32<6:49:50,  1.23it/s, training_loss=0.128][A
Epoch 3:  25%|██▍       | 9844/40000 [2:13:33<6:49:50,  1.23it/s, training_loss=0.031][A
Epoch 3:  25%|██▍       | 9845/40000 [2:13:33<6:50:10,  1.23it/s, training_loss=0.031][A
Epoch 3:  25%|██▍       | 9845/40000 [2:13:34<6:50:10,  1.23it/s, training_loss=0.037][A
Epoch 3:  25%|██▍       | 9846/40000 [2:13:34<6:50:09,  1.23it/s, training_loss=0.037][A
Epoch 3:  25%|██▍       | 9846/40000 [2:13:35<6:50:09,  1.23it/s, training_loss=0.072][A
Epoch 3:  25%|██▍       | 9847/40000 [2:13:35<6:50:48,  1.22it/s, training_loss=0.072][A
Epoch 3:  25%|██▍       | 9847/40000 [2:13:36<6:50:48,  1.22it/s, training_loss=0.127][A
Epoch 3:  25%|██▍       | 9848/40000 [2:13:36<6:51:00,  1.22it/s, training_loss=0.127][A
Epoch 3:  

Epoch 3:  25%|██▍       | 9934/40000 [2:14:46<6:48:23,  1.23it/s, training_loss=0.103][A
Epoch 3:  25%|██▍       | 9934/40000 [2:14:47<6:48:23,  1.23it/s, training_loss=0.178][A
Epoch 3:  25%|██▍       | 9935/40000 [2:14:47<6:48:12,  1.23it/s, training_loss=0.178][A
Epoch 3:  25%|██▍       | 9935/40000 [2:14:48<6:48:12,  1.23it/s, training_loss=0.096][A
Epoch 3:  25%|██▍       | 9936/40000 [2:14:48<6:47:58,  1.23it/s, training_loss=0.096][A
Epoch 3:  25%|██▍       | 9936/40000 [2:14:48<6:47:58,  1.23it/s, training_loss=0.075][A
Epoch 3:  25%|██▍       | 9937/40000 [2:14:48<6:47:50,  1.23it/s, training_loss=0.075][A
Epoch 3:  25%|██▍       | 9937/40000 [2:14:49<6:47:50,  1.23it/s, training_loss=0.199][A
Epoch 3:  25%|██▍       | 9938/40000 [2:14:49<6:47:16,  1.23it/s, training_loss=0.199][A
Epoch 3:  25%|██▍       | 9938/40000 [2:14:50<6:47:16,  1.23it/s, training_loss=0.057][A
Epoch 3:  25%|██▍       | 9939/40000 [2:14:50<6:47:31,  1.23it/s, training_loss=0.057][A
Epoch 3:  

Epoch 3:  25%|██▌       | 10024/40000 [2:16:00<6:47:16,  1.23it/s, training_loss=0.053][A
Epoch 3:  25%|██▌       | 10025/40000 [2:16:00<6:46:57,  1.23it/s, training_loss=0.053][A
Epoch 3:  25%|██▌       | 10025/40000 [2:16:01<6:46:57,  1.23it/s, training_loss=0.105][A
Epoch 3:  25%|██▌       | 10026/40000 [2:16:01<6:47:21,  1.23it/s, training_loss=0.105][A
Epoch 3:  25%|██▌       | 10026/40000 [2:16:02<6:47:21,  1.23it/s, training_loss=0.027][A
Epoch 3:  25%|██▌       | 10027/40000 [2:16:02<6:47:11,  1.23it/s, training_loss=0.027][A
Epoch 3:  25%|██▌       | 10027/40000 [2:16:03<6:47:11,  1.23it/s, training_loss=0.070][A
Epoch 3:  25%|██▌       | 10028/40000 [2:16:03<6:47:26,  1.23it/s, training_loss=0.070][A
Epoch 3:  25%|██▌       | 10028/40000 [2:16:03<6:47:26,  1.23it/s, training_loss=0.105][A
Epoch 3:  25%|██▌       | 10029/40000 [2:16:03<6:47:20,  1.23it/s, training_loss=0.105][A
Epoch 3:  25%|██▌       | 10029/40000 [2:16:04<6:47:20,  1.23it/s, training_loss=0.049][A

Epoch 3:  25%|██▌       | 10114/40000 [2:17:14<6:47:05,  1.22it/s, training_loss=0.083][A
Epoch 3:  25%|██▌       | 10115/40000 [2:17:14<6:47:15,  1.22it/s, training_loss=0.083][A
Epoch 3:  25%|██▌       | 10115/40000 [2:17:14<6:47:15,  1.22it/s, training_loss=0.136][A
Epoch 3:  25%|██▌       | 10116/40000 [2:17:14<6:46:42,  1.22it/s, training_loss=0.136][A
Epoch 3:  25%|██▌       | 10116/40000 [2:17:15<6:46:42,  1.22it/s, training_loss=0.166][A
Epoch 3:  25%|██▌       | 10117/40000 [2:17:15<6:46:44,  1.22it/s, training_loss=0.166][A
Epoch 3:  25%|██▌       | 10117/40000 [2:17:16<6:46:44,  1.22it/s, training_loss=0.120][A
Epoch 3:  25%|██▌       | 10118/40000 [2:17:16<6:46:01,  1.23it/s, training_loss=0.120][A
Epoch 3:  25%|██▌       | 10118/40000 [2:17:17<6:46:01,  1.23it/s, training_loss=0.097][A
Epoch 3:  25%|██▌       | 10119/40000 [2:17:17<6:46:14,  1.23it/s, training_loss=0.097][A
Epoch 3:  25%|██▌       | 10119/40000 [2:17:18<6:46:14,  1.23it/s, training_loss=0.103][A

Epoch 3:  26%|██▌       | 10204/40000 [2:18:27<6:45:22,  1.23it/s, training_loss=0.077][A
Epoch 3:  26%|██▌       | 10205/40000 [2:18:27<6:45:51,  1.22it/s, training_loss=0.077][A
Epoch 3:  26%|██▌       | 10205/40000 [2:18:28<6:45:51,  1.22it/s, training_loss=0.084][A
Epoch 3:  26%|██▌       | 10206/40000 [2:18:28<6:45:37,  1.22it/s, training_loss=0.084][A
Epoch 3:  26%|██▌       | 10206/40000 [2:18:29<6:45:37,  1.22it/s, training_loss=0.043][A
Epoch 3:  26%|██▌       | 10207/40000 [2:18:29<6:45:41,  1.22it/s, training_loss=0.043][A
Epoch 3:  26%|██▌       | 10207/40000 [2:18:30<6:45:41,  1.22it/s, training_loss=0.158][A
Epoch 3:  26%|██▌       | 10208/40000 [2:18:30<6:45:42,  1.22it/s, training_loss=0.158][A
Epoch 3:  26%|██▌       | 10208/40000 [2:18:30<6:45:42,  1.22it/s, training_loss=0.072][A
Epoch 3:  26%|██▌       | 10209/40000 [2:18:30<6:45:00,  1.23it/s, training_loss=0.072][A
Epoch 3:  26%|██▌       | 10209/40000 [2:18:31<6:45:00,  1.23it/s, training_loss=0.047][A

Epoch 3:  26%|██▌       | 10294/40000 [2:19:41<6:43:33,  1.23it/s, training_loss=0.126][A
Epoch 3:  26%|██▌       | 10295/40000 [2:19:41<6:43:37,  1.23it/s, training_loss=0.126][A
Epoch 3:  26%|██▌       | 10295/40000 [2:19:41<6:43:37,  1.23it/s, training_loss=0.093][A
Epoch 3:  26%|██▌       | 10296/40000 [2:19:41<6:43:55,  1.23it/s, training_loss=0.093][A
Epoch 3:  26%|██▌       | 10296/40000 [2:19:42<6:43:55,  1.23it/s, training_loss=0.146][A
Epoch 3:  26%|██▌       | 10297/40000 [2:19:42<6:43:59,  1.23it/s, training_loss=0.146][A
Epoch 3:  26%|██▌       | 10297/40000 [2:19:43<6:43:59,  1.23it/s, training_loss=0.021][A
Epoch 3:  26%|██▌       | 10298/40000 [2:19:43<6:44:07,  1.22it/s, training_loss=0.021][A
Epoch 3:  26%|██▌       | 10298/40000 [2:19:44<6:44:07,  1.22it/s, training_loss=0.089][A
Epoch 3:  26%|██▌       | 10299/40000 [2:19:44<6:43:55,  1.23it/s, training_loss=0.089][A
Epoch 3:  26%|██▌       | 10299/40000 [2:19:45<6:43:55,  1.23it/s, training_loss=0.087][A

Epoch 3:  26%|██▌       | 10384/40000 [2:20:54<6:43:05,  1.22it/s, training_loss=0.107][A
Epoch 3:  26%|██▌       | 10385/40000 [2:20:54<6:43:02,  1.22it/s, training_loss=0.107][A
Epoch 3:  26%|██▌       | 10385/40000 [2:20:55<6:43:02,  1.22it/s, training_loss=0.125][A
Epoch 3:  26%|██▌       | 10386/40000 [2:20:55<6:42:37,  1.23it/s, training_loss=0.125][A
Epoch 3:  26%|██▌       | 10386/40000 [2:20:56<6:42:37,  1.23it/s, training_loss=0.058][A
Epoch 3:  26%|██▌       | 10387/40000 [2:20:56<6:42:10,  1.23it/s, training_loss=0.058][A
Epoch 3:  26%|██▌       | 10387/40000 [2:20:56<6:42:10,  1.23it/s, training_loss=0.094][A
Epoch 3:  26%|██▌       | 10388/40000 [2:20:56<6:41:49,  1.23it/s, training_loss=0.094][A
Epoch 3:  26%|██▌       | 10388/40000 [2:20:57<6:41:49,  1.23it/s, training_loss=0.150][A
Epoch 3:  26%|██▌       | 10389/40000 [2:20:57<6:41:59,  1.23it/s, training_loss=0.150][A
Epoch 3:  26%|██▌       | 10389/40000 [2:20:58<6:41:59,  1.23it/s, training_loss=0.046][A

Epoch 3:  26%|██▌       | 10474/40000 [2:22:07<6:42:34,  1.22it/s, training_loss=0.085][A
Epoch 3:  26%|██▌       | 10475/40000 [2:22:07<6:42:56,  1.22it/s, training_loss=0.085][A
Epoch 3:  26%|██▌       | 10475/40000 [2:22:08<6:42:56,  1.22it/s, training_loss=0.086][A
Epoch 3:  26%|██▌       | 10476/40000 [2:22:08<6:42:11,  1.22it/s, training_loss=0.086][A
Epoch 3:  26%|██▌       | 10476/40000 [2:22:09<6:42:11,  1.22it/s, training_loss=0.122][A
Epoch 3:  26%|██▌       | 10477/40000 [2:22:09<6:41:26,  1.23it/s, training_loss=0.122][A
Epoch 3:  26%|██▌       | 10477/40000 [2:22:10<6:41:26,  1.23it/s, training_loss=0.062][A
Epoch 3:  26%|██▌       | 10478/40000 [2:22:10<6:41:04,  1.23it/s, training_loss=0.062][A
Epoch 3:  26%|██▌       | 10478/40000 [2:22:11<6:41:04,  1.23it/s, training_loss=0.104][A
Epoch 3:  26%|██▌       | 10479/40000 [2:22:11<6:40:30,  1.23it/s, training_loss=0.104][A
Epoch 3:  26%|██▌       | 10479/40000 [2:22:11<6:40:30,  1.23it/s, training_loss=0.074][A

Epoch 3:  26%|██▋       | 10564/40000 [2:23:21<6:40:26,  1.23it/s, training_loss=0.056][A
Epoch 3:  26%|██▋       | 10565/40000 [2:23:21<6:40:15,  1.23it/s, training_loss=0.056][A
Epoch 3:  26%|██▋       | 10565/40000 [2:23:22<6:40:15,  1.23it/s, training_loss=0.100][A
Epoch 3:  26%|██▋       | 10566/40000 [2:23:22<6:40:39,  1.22it/s, training_loss=0.100][A
Epoch 3:  26%|██▋       | 10566/40000 [2:23:23<6:40:39,  1.22it/s, training_loss=0.080][A
Epoch 3:  26%|██▋       | 10567/40000 [2:23:23<6:39:47,  1.23it/s, training_loss=0.080][A
Epoch 3:  26%|██▋       | 10567/40000 [2:23:23<6:39:47,  1.23it/s, training_loss=0.078][A
Epoch 3:  26%|██▋       | 10568/40000 [2:23:23<6:40:01,  1.23it/s, training_loss=0.078][A
Epoch 3:  26%|██▋       | 10568/40000 [2:23:24<6:40:01,  1.23it/s, training_loss=0.101][A
Epoch 3:  26%|██▋       | 10569/40000 [2:23:24<6:40:16,  1.23it/s, training_loss=0.101][A
Epoch 3:  26%|██▋       | 10569/40000 [2:23:25<6:40:16,  1.23it/s, training_loss=0.110][A

Epoch 3:  27%|██▋       | 10654/40000 [2:24:34<6:38:48,  1.23it/s, training_loss=0.081][A
Epoch 3:  27%|██▋       | 10655/40000 [2:24:34<6:39:27,  1.22it/s, training_loss=0.081][A
Epoch 3:  27%|██▋       | 10655/40000 [2:24:35<6:39:27,  1.22it/s, training_loss=0.113][A
Epoch 3:  27%|██▋       | 10656/40000 [2:24:35<6:39:25,  1.22it/s, training_loss=0.113][A
Epoch 3:  27%|██▋       | 10656/40000 [2:24:36<6:39:25,  1.22it/s, training_loss=0.146][A
Epoch 3:  27%|██▋       | 10657/40000 [2:24:36<6:39:50,  1.22it/s, training_loss=0.146][A
Epoch 3:  27%|██▋       | 10657/40000 [2:24:37<6:39:50,  1.22it/s, training_loss=0.057][A
Epoch 3:  27%|██▋       | 10658/40000 [2:24:37<6:40:02,  1.22it/s, training_loss=0.057][A
Epoch 3:  27%|██▋       | 10658/40000 [2:24:38<6:40:02,  1.22it/s, training_loss=0.069][A
Epoch 3:  27%|██▋       | 10659/40000 [2:24:38<6:38:53,  1.23it/s, training_loss=0.069][A
Epoch 3:  27%|██▋       | 10659/40000 [2:24:38<6:38:53,  1.23it/s, training_loss=0.099][A

Epoch 3:  27%|██▋       | 10744/40000 [2:25:48<6:38:20,  1.22it/s, training_loss=0.115][A
Epoch 3:  27%|██▋       | 10745/40000 [2:25:48<6:37:47,  1.23it/s, training_loss=0.115][A
Epoch 3:  27%|██▋       | 10745/40000 [2:25:49<6:37:47,  1.23it/s, training_loss=0.058][A
Epoch 3:  27%|██▋       | 10746/40000 [2:25:49<6:37:35,  1.23it/s, training_loss=0.058][A
Epoch 3:  27%|██▋       | 10746/40000 [2:25:49<6:37:35,  1.23it/s, training_loss=0.094][A
Epoch 3:  27%|██▋       | 10747/40000 [2:25:49<6:37:50,  1.23it/s, training_loss=0.094][A
Epoch 3:  27%|██▋       | 10747/40000 [2:25:50<6:37:50,  1.23it/s, training_loss=0.085][A
Epoch 3:  27%|██▋       | 10748/40000 [2:25:50<6:38:18,  1.22it/s, training_loss=0.085][A
Epoch 3:  27%|██▋       | 10748/40000 [2:25:51<6:38:18,  1.22it/s, training_loss=0.064][A
Epoch 3:  27%|██▋       | 10749/40000 [2:25:51<6:37:21,  1.23it/s, training_loss=0.064][A
Epoch 3:  27%|██▋       | 10749/40000 [2:25:52<6:37:21,  1.23it/s, training_loss=0.085][A

Epoch 3:  27%|██▋       | 10834/40000 [2:27:01<6:36:49,  1.22it/s, training_loss=0.014][A
Epoch 3:  27%|██▋       | 10835/40000 [2:27:01<6:36:17,  1.23it/s, training_loss=0.014][A
Epoch 3:  27%|██▋       | 10835/40000 [2:27:02<6:36:17,  1.23it/s, training_loss=0.138][A
Epoch 3:  27%|██▋       | 10836/40000 [2:27:02<6:36:12,  1.23it/s, training_loss=0.138][A
Epoch 3:  27%|██▋       | 10836/40000 [2:27:03<6:36:12,  1.23it/s, training_loss=0.130][A
Epoch 3:  27%|██▋       | 10837/40000 [2:27:03<6:35:40,  1.23it/s, training_loss=0.130][A
Epoch 3:  27%|██▋       | 10837/40000 [2:27:04<6:35:40,  1.23it/s, training_loss=0.046][A
Epoch 3:  27%|██▋       | 10838/40000 [2:27:04<6:35:44,  1.23it/s, training_loss=0.046][A
Epoch 3:  27%|██▋       | 10838/40000 [2:27:04<6:35:44,  1.23it/s, training_loss=0.097][A
Epoch 3:  27%|██▋       | 10839/40000 [2:27:04<6:35:50,  1.23it/s, training_loss=0.097][A
Epoch 3:  27%|██▋       | 10839/40000 [2:27:05<6:35:50,  1.23it/s, training_loss=0.063][A

Epoch 3:  27%|██▋       | 10924/40000 [2:28:15<6:35:25,  1.23it/s, training_loss=0.026][A
Epoch 3:  27%|██▋       | 10925/40000 [2:28:15<6:35:21,  1.23it/s, training_loss=0.026][A
Epoch 3:  27%|██▋       | 10925/40000 [2:28:15<6:35:21,  1.23it/s, training_loss=0.049][A
Epoch 3:  27%|██▋       | 10926/40000 [2:28:15<6:35:24,  1.23it/s, training_loss=0.049][A
Epoch 3:  27%|██▋       | 10926/40000 [2:28:16<6:35:24,  1.23it/s, training_loss=0.051][A
Epoch 3:  27%|██▋       | 10927/40000 [2:28:16<6:35:29,  1.23it/s, training_loss=0.051][A
Epoch 3:  27%|██▋       | 10927/40000 [2:28:17<6:35:29,  1.23it/s, training_loss=0.046][A
Epoch 3:  27%|██▋       | 10928/40000 [2:28:17<6:35:06,  1.23it/s, training_loss=0.046][A
Epoch 3:  27%|██▋       | 10928/40000 [2:28:18<6:35:06,  1.23it/s, training_loss=0.079][A
Epoch 3:  27%|██▋       | 10929/40000 [2:28:18<6:35:28,  1.23it/s, training_loss=0.079][A
Epoch 3:  27%|██▋       | 10929/40000 [2:28:19<6:35:28,  1.23it/s, training_loss=0.033][A

Epoch 3:  28%|██▊       | 11014/40000 [2:29:28<6:34:53,  1.22it/s, training_loss=0.090][A
Epoch 3:  28%|██▊       | 11015/40000 [2:29:28<6:35:03,  1.22it/s, training_loss=0.090][A
Epoch 3:  28%|██▊       | 11015/40000 [2:29:29<6:35:03,  1.22it/s, training_loss=0.048][A
Epoch 3:  28%|██▊       | 11016/40000 [2:29:29<6:34:43,  1.22it/s, training_loss=0.048][A
Epoch 3:  28%|██▊       | 11016/40000 [2:29:30<6:34:43,  1.22it/s, training_loss=0.072][A
Epoch 3:  28%|██▊       | 11017/40000 [2:29:30<6:35:14,  1.22it/s, training_loss=0.072][A
Epoch 3:  28%|██▊       | 11017/40000 [2:29:30<6:35:14,  1.22it/s, training_loss=0.119][A
Epoch 3:  28%|██▊       | 11018/40000 [2:29:30<6:35:05,  1.22it/s, training_loss=0.119][A
Epoch 3:  28%|██▊       | 11018/40000 [2:29:31<6:35:05,  1.22it/s, training_loss=0.034][A
Epoch 3:  28%|██▊       | 11019/40000 [2:29:31<6:35:11,  1.22it/s, training_loss=0.034][A
Epoch 3:  28%|██▊       | 11019/40000 [2:29:32<6:35:11,  1.22it/s, training_loss=0.103][A

Epoch 3:  28%|██▊       | 11104/40000 [2:30:41<6:33:54,  1.22it/s, training_loss=0.160][A
Epoch 3:  28%|██▊       | 11105/40000 [2:30:41<6:33:08,  1.22it/s, training_loss=0.160][A
Epoch 3:  28%|██▊       | 11105/40000 [2:30:42<6:33:08,  1.22it/s, training_loss=0.046][A
Epoch 3:  28%|██▊       | 11106/40000 [2:30:42<6:33:36,  1.22it/s, training_loss=0.046][A
Epoch 3:  28%|██▊       | 11106/40000 [2:30:43<6:33:36,  1.22it/s, training_loss=0.084][A
Epoch 3:  28%|██▊       | 11107/40000 [2:30:43<6:33:55,  1.22it/s, training_loss=0.084][A
Epoch 3:  28%|██▊       | 11107/40000 [2:30:44<6:33:55,  1.22it/s, training_loss=0.040][A
Epoch 3:  28%|██▊       | 11108/40000 [2:30:44<6:33:54,  1.22it/s, training_loss=0.040][A
Epoch 3:  28%|██▊       | 11108/40000 [2:30:45<6:33:54,  1.22it/s, training_loss=0.078][A
Epoch 3:  28%|██▊       | 11109/40000 [2:30:45<6:33:24,  1.22it/s, training_loss=0.078][A
Epoch 3:  28%|██▊       | 11109/40000 [2:30:46<6:33:24,  1.22it/s, training_loss=0.093][A

Epoch 3:  28%|██▊       | 11194/40000 [2:31:55<6:32:12,  1.22it/s, training_loss=0.122][A
Epoch 3:  28%|██▊       | 11195/40000 [2:31:55<6:31:25,  1.23it/s, training_loss=0.122][A
Epoch 3:  28%|██▊       | 11195/40000 [2:31:56<6:31:25,  1.23it/s, training_loss=0.022][A
Epoch 3:  28%|██▊       | 11196/40000 [2:31:56<6:31:28,  1.23it/s, training_loss=0.022][A
Epoch 3:  28%|██▊       | 11196/40000 [2:31:57<6:31:28,  1.23it/s, training_loss=0.125][A
Epoch 3:  28%|██▊       | 11197/40000 [2:31:57<6:31:39,  1.23it/s, training_loss=0.125][A
Epoch 3:  28%|██▊       | 11197/40000 [2:31:57<6:31:39,  1.23it/s, training_loss=0.114][A
Epoch 3:  28%|██▊       | 11198/40000 [2:31:57<6:31:50,  1.23it/s, training_loss=0.114][A
Epoch 3:  28%|██▊       | 11198/40000 [2:31:58<6:31:50,  1.23it/s, training_loss=0.048][A
Epoch 3:  28%|██▊       | 11199/40000 [2:31:58<6:31:24,  1.23it/s, training_loss=0.048][A
Epoch 3:  28%|██▊       | 11199/40000 [2:31:59<6:31:24,  1.23it/s, training_loss=0.136][A

Epoch 3:  28%|██▊       | 11284/40000 [2:33:08<6:29:50,  1.23it/s, training_loss=0.132][A
Epoch 3:  28%|██▊       | 11285/40000 [2:33:08<6:30:04,  1.23it/s, training_loss=0.132][A
Epoch 3:  28%|██▊       | 11285/40000 [2:33:09<6:30:04,  1.23it/s, training_loss=0.076][A
Epoch 3:  28%|██▊       | 11286/40000 [2:33:09<6:30:34,  1.23it/s, training_loss=0.076][A
Epoch 3:  28%|██▊       | 11286/40000 [2:33:10<6:30:34,  1.23it/s, training_loss=0.020][A
Epoch 3:  28%|██▊       | 11287/40000 [2:33:10<6:31:03,  1.22it/s, training_loss=0.020][A
Epoch 3:  28%|██▊       | 11287/40000 [2:33:11<6:31:03,  1.22it/s, training_loss=0.073][A
Epoch 3:  28%|██▊       | 11288/40000 [2:33:11<6:31:02,  1.22it/s, training_loss=0.073][A
Epoch 3:  28%|██▊       | 11288/40000 [2:33:12<6:31:02,  1.22it/s, training_loss=0.101][A
Epoch 3:  28%|██▊       | 11289/40000 [2:33:12<6:31:26,  1.22it/s, training_loss=0.101][A
Epoch 3:  28%|██▊       | 11289/40000 [2:33:12<6:31:26,  1.22it/s, training_loss=0.090][A

Epoch 3:  28%|██▊       | 11374/40000 [2:34:22<6:29:25,  1.23it/s, training_loss=0.099][A
Epoch 3:  28%|██▊       | 11375/40000 [2:34:22<6:29:19,  1.23it/s, training_loss=0.099][A
Epoch 3:  28%|██▊       | 11375/40000 [2:34:23<6:29:19,  1.23it/s, training_loss=0.038][A
Epoch 3:  28%|██▊       | 11376/40000 [2:34:23<6:28:56,  1.23it/s, training_loss=0.038][A
Epoch 3:  28%|██▊       | 11376/40000 [2:34:23<6:28:56,  1.23it/s, training_loss=0.035][A
Epoch 3:  28%|██▊       | 11377/40000 [2:34:23<6:29:05,  1.23it/s, training_loss=0.035][A
Epoch 3:  28%|██▊       | 11377/40000 [2:34:24<6:29:05,  1.23it/s, training_loss=0.137][A
Epoch 3:  28%|██▊       | 11378/40000 [2:34:24<6:29:03,  1.23it/s, training_loss=0.137][A
Epoch 3:  28%|██▊       | 11378/40000 [2:34:25<6:29:03,  1.23it/s, training_loss=0.062][A
Epoch 3:  28%|██▊       | 11379/40000 [2:34:25<6:28:55,  1.23it/s, training_loss=0.062][A
Epoch 3:  28%|██▊       | 11379/40000 [2:34:26<6:28:55,  1.23it/s, training_loss=0.129][A

Epoch 3:  29%|██▊       | 11464/40000 [2:35:35<6:27:57,  1.23it/s, training_loss=0.044][A
Epoch 3:  29%|██▊       | 11465/40000 [2:35:35<6:28:22,  1.22it/s, training_loss=0.044][A
Epoch 3:  29%|██▊       | 11465/40000 [2:35:36<6:28:22,  1.22it/s, training_loss=0.081][A
Epoch 3:  29%|██▊       | 11466/40000 [2:35:36<6:28:18,  1.22it/s, training_loss=0.081][A
Epoch 3:  29%|██▊       | 11466/40000 [2:35:37<6:28:18,  1.22it/s, training_loss=0.036][A
Epoch 3:  29%|██▊       | 11467/40000 [2:35:37<6:28:21,  1.22it/s, training_loss=0.036][A
Epoch 3:  29%|██▊       | 11467/40000 [2:35:38<6:28:21,  1.22it/s, training_loss=0.072][A
Epoch 3:  29%|██▊       | 11468/40000 [2:35:38<6:28:10,  1.23it/s, training_loss=0.072][A
Epoch 3:  29%|██▊       | 11468/40000 [2:35:39<6:28:10,  1.23it/s, training_loss=0.062][A
Epoch 3:  29%|██▊       | 11469/40000 [2:35:39<6:28:20,  1.22it/s, training_loss=0.062][A
Epoch 3:  29%|██▊       | 11469/40000 [2:35:39<6:28:20,  1.22it/s, training_loss=0.125][A

Epoch 3:  29%|██▉       | 11554/40000 [2:36:49<6:26:56,  1.23it/s, training_loss=0.016][A
Epoch 3:  29%|██▉       | 11555/40000 [2:36:49<6:27:08,  1.22it/s, training_loss=0.016][A
Epoch 3:  29%|██▉       | 11555/40000 [2:36:50<6:27:08,  1.22it/s, training_loss=0.146][A
Epoch 3:  29%|██▉       | 11556/40000 [2:36:50<6:27:34,  1.22it/s, training_loss=0.146][A
Epoch 3:  29%|██▉       | 11556/40000 [2:36:50<6:27:34,  1.22it/s, training_loss=0.079][A
Epoch 3:  29%|██▉       | 11557/40000 [2:36:50<6:27:07,  1.22it/s, training_loss=0.079][A
Epoch 3:  29%|██▉       | 11557/40000 [2:36:51<6:27:07,  1.22it/s, training_loss=0.074][A
Epoch 3:  29%|██▉       | 11558/40000 [2:36:51<6:27:10,  1.22it/s, training_loss=0.074][A
Epoch 3:  29%|██▉       | 11558/40000 [2:36:52<6:27:10,  1.22it/s, training_loss=0.063][A
Epoch 3:  29%|██▉       | 11559/40000 [2:36:52<6:26:13,  1.23it/s, training_loss=0.063][A
Epoch 3:  29%|██▉       | 11559/40000 [2:36:53<6:26:13,  1.23it/s, training_loss=0.071][A

Epoch 3:  29%|██▉       | 11644/40000 [2:38:02<6:25:57,  1.22it/s, training_loss=0.068][A
Epoch 3:  29%|██▉       | 11645/40000 [2:38:02<6:26:08,  1.22it/s, training_loss=0.068][A
Epoch 3:  29%|██▉       | 11645/40000 [2:38:03<6:26:08,  1.22it/s, training_loss=0.040][A
Epoch 3:  29%|██▉       | 11646/40000 [2:38:03<6:26:31,  1.22it/s, training_loss=0.040][A
Epoch 3:  29%|██▉       | 11646/40000 [2:38:04<6:26:31,  1.22it/s, training_loss=0.112][A
Epoch 3:  29%|██▉       | 11647/40000 [2:38:04<6:26:44,  1.22it/s, training_loss=0.112][A
Epoch 3:  29%|██▉       | 11647/40000 [2:38:05<6:26:44,  1.22it/s, training_loss=0.111][A
Epoch 3:  29%|██▉       | 11648/40000 [2:38:05<6:26:29,  1.22it/s, training_loss=0.111][A
Epoch 3:  29%|██▉       | 11648/40000 [2:38:06<6:26:29,  1.22it/s, training_loss=0.051][A
Epoch 3:  29%|██▉       | 11649/40000 [2:38:06<6:26:12,  1.22it/s, training_loss=0.051][A
Epoch 3:  29%|██▉       | 11649/40000 [2:38:06<6:26:12,  1.22it/s, training_loss=0.086][A

Epoch 3:  29%|██▉       | 11734/40000 [2:39:16<6:24:38,  1.22it/s, training_loss=0.103][A
Epoch 3:  29%|██▉       | 11735/40000 [2:39:16<6:24:22,  1.23it/s, training_loss=0.103][A
Epoch 3:  29%|██▉       | 11735/40000 [2:39:17<6:24:22,  1.23it/s, training_loss=0.069][A
Epoch 3:  29%|██▉       | 11736/40000 [2:39:17<6:24:28,  1.23it/s, training_loss=0.069][A
Epoch 3:  29%|██▉       | 11736/40000 [2:39:17<6:24:28,  1.23it/s, training_loss=0.073][A
Epoch 3:  29%|██▉       | 11737/40000 [2:39:17<6:24:52,  1.22it/s, training_loss=0.073][A
Epoch 3:  29%|██▉       | 11737/40000 [2:39:18<6:24:52,  1.22it/s, training_loss=0.036][A
Epoch 3:  29%|██▉       | 11738/40000 [2:39:18<6:25:21,  1.22it/s, training_loss=0.036][A
Epoch 3:  29%|██▉       | 11738/40000 [2:39:19<6:25:21,  1.22it/s, training_loss=0.046][A
Epoch 3:  29%|██▉       | 11739/40000 [2:39:19<6:25:15,  1.22it/s, training_loss=0.046][A
Epoch 3:  29%|██▉       | 11739/40000 [2:39:20<6:25:15,  1.22it/s, training_loss=0.091][A

Epoch 3:  30%|██▉       | 11824/40000 [2:40:29<6:23:21,  1.22it/s, training_loss=0.058][A
Epoch 3:  30%|██▉       | 11825/40000 [2:40:29<6:23:34,  1.22it/s, training_loss=0.058][A
Epoch 3:  30%|██▉       | 11825/40000 [2:40:30<6:23:34,  1.22it/s, training_loss=0.117][A
Epoch 3:  30%|██▉       | 11826/40000 [2:40:30<6:23:10,  1.23it/s, training_loss=0.117][A
Epoch 3:  30%|██▉       | 11826/40000 [2:40:31<6:23:10,  1.23it/s, training_loss=0.061][A
Epoch 3:  30%|██▉       | 11827/40000 [2:40:31<6:23:12,  1.23it/s, training_loss=0.061][A
Epoch 3:  30%|██▉       | 11827/40000 [2:40:32<6:23:12,  1.23it/s, training_loss=0.131][A
Epoch 3:  30%|██▉       | 11828/40000 [2:40:32<6:23:09,  1.23it/s, training_loss=0.131][A
Epoch 3:  30%|██▉       | 11828/40000 [2:40:33<6:23:09,  1.23it/s, training_loss=0.061][A
Epoch 3:  30%|██▉       | 11829/40000 [2:40:33<6:23:05,  1.23it/s, training_loss=0.061][A
Epoch 3:  30%|██▉       | 11829/40000 [2:40:33<6:23:05,  1.23it/s, training_loss=0.077][A

Epoch 3:  30%|██▉       | 11914/40000 [2:41:43<6:22:42,  1.22it/s, training_loss=0.088][A
Epoch 3:  30%|██▉       | 11915/40000 [2:41:43<6:22:52,  1.22it/s, training_loss=0.088][A
Epoch 3:  30%|██▉       | 11915/40000 [2:41:44<6:22:52,  1.22it/s, training_loss=0.023][A
Epoch 3:  30%|██▉       | 11916/40000 [2:41:44<6:22:19,  1.22it/s, training_loss=0.023][A
Epoch 3:  30%|██▉       | 11916/40000 [2:41:45<6:22:19,  1.22it/s, training_loss=0.087][A
Epoch 3:  30%|██▉       | 11917/40000 [2:41:45<6:23:15,  1.22it/s, training_loss=0.087][A
Epoch 3:  30%|██▉       | 11917/40000 [2:41:45<6:23:15,  1.22it/s, training_loss=0.174][A
Epoch 3:  30%|██▉       | 11918/40000 [2:41:45<6:22:41,  1.22it/s, training_loss=0.174][A
Epoch 3:  30%|██▉       | 11918/40000 [2:41:46<6:22:41,  1.22it/s, training_loss=0.053][A
Epoch 3:  30%|██▉       | 11919/40000 [2:41:46<6:22:32,  1.22it/s, training_loss=0.053][A
Epoch 3:  30%|██▉       | 11919/40000 [2:41:47<6:22:32,  1.22it/s, training_loss=0.048][A

Epoch 3:  30%|███       | 12004/40000 [2:42:56<6:21:18,  1.22it/s, training_loss=0.083][A
Epoch 3:  30%|███       | 12005/40000 [2:42:56<6:21:34,  1.22it/s, training_loss=0.083][A
Epoch 3:  30%|███       | 12005/40000 [2:42:57<6:21:34,  1.22it/s, training_loss=0.021][A
Epoch 3:  30%|███       | 12006/40000 [2:42:57<6:21:37,  1.22it/s, training_loss=0.021][A
Epoch 3:  30%|███       | 12006/40000 [2:42:58<6:21:37,  1.22it/s, training_loss=0.074][A
Epoch 3:  30%|███       | 12007/40000 [2:42:58<6:21:30,  1.22it/s, training_loss=0.074][A
Epoch 3:  30%|███       | 12007/40000 [2:42:59<6:21:30,  1.22it/s, training_loss=0.071][A
Epoch 3:  30%|███       | 12008/40000 [2:42:59<6:21:05,  1.22it/s, training_loss=0.071][A
Epoch 3:  30%|███       | 12008/40000 [2:43:00<6:21:05,  1.22it/s, training_loss=0.033][A
Epoch 3:  30%|███       | 12009/40000 [2:43:00<6:21:21,  1.22it/s, training_loss=0.033][A
Epoch 3:  30%|███       | 12009/40000 [2:43:01<6:21:21,  1.22it/s, training_loss=0.058][A

Epoch 3:  30%|███       | 12094/40000 [2:44:10<6:19:22,  1.23it/s, training_loss=0.056][A
Epoch 3:  30%|███       | 12095/40000 [2:44:10<6:19:16,  1.23it/s, training_loss=0.056][A
Epoch 3:  30%|███       | 12095/40000 [2:44:11<6:19:16,  1.23it/s, training_loss=0.135][A
Epoch 3:  30%|███       | 12096/40000 [2:44:11<6:19:19,  1.23it/s, training_loss=0.135][A
Epoch 3:  30%|███       | 12096/40000 [2:44:12<6:19:19,  1.23it/s, training_loss=0.119][A
Epoch 3:  30%|███       | 12097/40000 [2:44:12<6:19:59,  1.22it/s, training_loss=0.119][A
Epoch 3:  30%|███       | 12097/40000 [2:44:12<6:19:59,  1.22it/s, training_loss=0.140][A
Epoch 3:  30%|███       | 12098/40000 [2:44:12<6:20:04,  1.22it/s, training_loss=0.140][A
Epoch 3:  30%|███       | 12098/40000 [2:44:13<6:20:04,  1.22it/s, training_loss=0.130][A
Epoch 3:  30%|███       | 12099/40000 [2:44:13<6:19:54,  1.22it/s, training_loss=0.130][A
Epoch 3:  30%|███       | 12099/40000 [2:44:14<6:19:54,  1.22it/s, training_loss=0.068][A

Epoch 3:  30%|███       | 12184/40000 [2:45:24<6:19:08,  1.22it/s, training_loss=0.136][A
Epoch 3:  30%|███       | 12185/40000 [2:45:24<6:19:01,  1.22it/s, training_loss=0.136][A
Epoch 3:  30%|███       | 12185/40000 [2:45:24<6:19:01,  1.22it/s, training_loss=0.082][A
Epoch 3:  30%|███       | 12186/40000 [2:45:24<6:18:55,  1.22it/s, training_loss=0.082][A
Epoch 3:  30%|███       | 12186/40000 [2:45:25<6:18:55,  1.22it/s, training_loss=0.104][A
Epoch 3:  30%|███       | 12187/40000 [2:45:25<6:18:35,  1.22it/s, training_loss=0.104][A
Epoch 3:  30%|███       | 12187/40000 [2:45:26<6:18:35,  1.22it/s, training_loss=0.048][A
Epoch 3:  30%|███       | 12188/40000 [2:45:26<6:18:15,  1.23it/s, training_loss=0.048][A
Epoch 3:  30%|███       | 12188/40000 [2:45:27<6:18:15,  1.23it/s, training_loss=0.078][A
Epoch 3:  30%|███       | 12189/40000 [2:45:27<6:18:28,  1.22it/s, training_loss=0.078][A
Epoch 3:  30%|███       | 12189/40000 [2:45:28<6:18:28,  1.22it/s, training_loss=0.104][A

Epoch 3:  31%|███       | 12274/40000 [2:46:37<6:17:37,  1.22it/s, training_loss=0.068][A
Epoch 3:  31%|███       | 12275/40000 [2:46:37<6:18:13,  1.22it/s, training_loss=0.068][A
Epoch 3:  31%|███       | 12275/40000 [2:46:38<6:18:13,  1.22it/s, training_loss=0.059][A
Epoch 3:  31%|███       | 12276/40000 [2:46:38<6:17:56,  1.22it/s, training_loss=0.059][A
Epoch 3:  31%|███       | 12276/40000 [2:46:39<6:17:56,  1.22it/s, training_loss=0.120][A
Epoch 3:  31%|███       | 12277/40000 [2:46:39<6:17:50,  1.22it/s, training_loss=0.120][A
Epoch 3:  31%|███       | 12277/40000 [2:46:40<6:17:50,  1.22it/s, training_loss=0.052][A
Epoch 3:  31%|███       | 12278/40000 [2:46:40<6:17:46,  1.22it/s, training_loss=0.052][A
Epoch 3:  31%|███       | 12278/40000 [2:46:40<6:17:46,  1.22it/s, training_loss=0.027][A
Epoch 3:  31%|███       | 12279/40000 [2:46:40<6:18:27,  1.22it/s, training_loss=0.027][A
Epoch 3:  31%|███       | 12279/40000 [2:46:41<6:18:27,  1.22it/s, training_loss=0.051][A

Epoch 3:  31%|███       | 12364/40000 [2:47:51<6:16:13,  1.22it/s, training_loss=0.036][A
Epoch 3:  31%|███       | 12365/40000 [2:47:51<6:16:02,  1.22it/s, training_loss=0.036][A
Epoch 3:  31%|███       | 12365/40000 [2:47:51<6:16:02,  1.22it/s, training_loss=0.137][A
Epoch 3:  31%|███       | 12366/40000 [2:47:51<6:16:01,  1.22it/s, training_loss=0.137][A
Epoch 3:  31%|███       | 12366/40000 [2:47:52<6:16:01,  1.22it/s, training_loss=0.086][A
Epoch 3:  31%|███       | 12367/40000 [2:47:52<6:15:12,  1.23it/s, training_loss=0.086][A
Epoch 3:  31%|███       | 12367/40000 [2:47:53<6:15:12,  1.23it/s, training_loss=0.107][A
Epoch 3:  31%|███       | 12368/40000 [2:47:53<6:15:37,  1.23it/s, training_loss=0.107][A
Epoch 3:  31%|███       | 12368/40000 [2:47:54<6:15:37,  1.23it/s, training_loss=0.148][A
Epoch 3:  31%|███       | 12369/40000 [2:47:54<6:16:14,  1.22it/s, training_loss=0.148][A
Epoch 3:  31%|███       | 12369/40000 [2:47:55<6:16:14,  1.22it/s, training_loss=0.032][A

Epoch 3:  31%|███       | 12454/40000 [2:49:04<6:15:20,  1.22it/s, training_loss=0.074][A
Epoch 3:  31%|███       | 12455/40000 [2:49:04<6:15:14,  1.22it/s, training_loss=0.074][A
Epoch 3:  31%|███       | 12455/40000 [2:49:05<6:15:14,  1.22it/s, training_loss=0.024][A
Epoch 3:  31%|███       | 12456/40000 [2:49:05<6:15:00,  1.22it/s, training_loss=0.024][A
Epoch 3:  31%|███       | 12456/40000 [2:49:06<6:15:00,  1.22it/s, training_loss=0.036][A
Epoch 3:  31%|███       | 12457/40000 [2:49:06<6:15:29,  1.22it/s, training_loss=0.036][A
Epoch 3:  31%|███       | 12457/40000 [2:49:07<6:15:29,  1.22it/s, training_loss=0.031][A
Epoch 3:  31%|███       | 12458/40000 [2:49:07<6:15:19,  1.22it/s, training_loss=0.031][A
Epoch 3:  31%|███       | 12458/40000 [2:49:07<6:15:19,  1.22it/s, training_loss=0.143][A
Epoch 3:  31%|███       | 12459/40000 [2:49:07<6:15:32,  1.22it/s, training_loss=0.143][A
Epoch 3:  31%|███       | 12459/40000 [2:49:08<6:15:32,  1.22it/s, training_loss=0.110][A

Epoch 3:  31%|███▏      | 12544/40000 [2:50:18<6:14:14,  1.22it/s, training_loss=0.111][A
Epoch 3:  31%|███▏      | 12545/40000 [2:50:18<6:14:12,  1.22it/s, training_loss=0.111][A
Epoch 3:  31%|███▏      | 12545/40000 [2:50:19<6:14:12,  1.22it/s, training_loss=0.071][A
Epoch 3:  31%|███▏      | 12546/40000 [2:50:19<6:13:18,  1.23it/s, training_loss=0.071][A
Epoch 3:  31%|███▏      | 12546/40000 [2:50:19<6:13:18,  1.23it/s, training_loss=0.074][A
Epoch 3:  31%|███▏      | 12547/40000 [2:50:19<6:13:21,  1.23it/s, training_loss=0.074][A
Epoch 3:  31%|███▏      | 12547/40000 [2:50:20<6:13:21,  1.23it/s, training_loss=0.037][A
Epoch 3:  31%|███▏      | 12548/40000 [2:50:20<6:13:33,  1.22it/s, training_loss=0.037][A
Epoch 3:  31%|███▏      | 12548/40000 [2:50:21<6:13:33,  1.22it/s, training_loss=0.144][A
Epoch 3:  31%|███▏      | 12549/40000 [2:50:21<6:14:17,  1.22it/s, training_loss=0.144][A
Epoch 3:  31%|███▏      | 12549/40000 [2:50:22<6:14:17,  1.22it/s, training_loss=0.085][A

Epoch 3:  32%|███▏      | 12634/40000 [2:51:31<6:12:23,  1.22it/s, training_loss=0.038][A
Epoch 3:  32%|███▏      | 12635/40000 [2:51:31<6:11:37,  1.23it/s, training_loss=0.038][A
Epoch 3:  32%|███▏      | 12635/40000 [2:51:32<6:11:37,  1.23it/s, training_loss=0.087][A
Epoch 3:  32%|███▏      | 12636/40000 [2:51:32<6:11:20,  1.23it/s, training_loss=0.087][A
Epoch 3:  32%|███▏      | 12636/40000 [2:51:33<6:11:20,  1.23it/s, training_loss=0.067][A
Epoch 3:  32%|███▏      | 12637/40000 [2:51:33<6:12:03,  1.23it/s, training_loss=0.067][A
Epoch 3:  32%|███▏      | 12637/40000 [2:51:34<6:12:03,  1.23it/s, training_loss=0.040][A
Epoch 3:  32%|███▏      | 12638/40000 [2:51:34<6:12:01,  1.23it/s, training_loss=0.040][A
Epoch 3:  32%|███▏      | 12638/40000 [2:51:34<6:12:01,  1.23it/s, training_loss=0.039][A
Epoch 3:  32%|███▏      | 12639/40000 [2:51:34<6:11:35,  1.23it/s, training_loss=0.039][A
Epoch 3:  32%|███▏      | 12639/40000 [2:51:35<6:11:35,  1.23it/s, training_loss=0.078][A

Epoch 3:  32%|███▏      | 12724/40000 [2:52:45<6:11:19,  1.22it/s, training_loss=0.035][A
Epoch 3:  32%|███▏      | 12725/40000 [2:52:45<6:11:25,  1.22it/s, training_loss=0.035][A
Epoch 3:  32%|███▏      | 12725/40000 [2:52:46<6:11:25,  1.22it/s, training_loss=0.057][A
Epoch 3:  32%|███▏      | 12726/40000 [2:52:46<6:11:24,  1.22it/s, training_loss=0.057][A
Epoch 3:  32%|███▏      | 12726/40000 [2:52:46<6:11:24,  1.22it/s, training_loss=0.068][A
Epoch 3:  32%|███▏      | 12727/40000 [2:52:46<6:11:07,  1.22it/s, training_loss=0.068][A
Epoch 3:  32%|███▏      | 12727/40000 [2:52:47<6:11:07,  1.22it/s, training_loss=0.074][A
Epoch 3:  32%|███▏      | 12728/40000 [2:52:47<6:10:47,  1.23it/s, training_loss=0.074][A
Epoch 3:  32%|███▏      | 12728/40000 [2:52:48<6:10:47,  1.23it/s, training_loss=0.090][A
Epoch 3:  32%|███▏      | 12729/40000 [2:52:48<6:11:19,  1.22it/s, training_loss=0.090][A
Epoch 3:  32%|███▏      | 12729/40000 [2:52:49<6:11:19,  1.22it/s, training_loss=0.068][A

Epoch 3:  32%|███▏      | 12814/40000 [2:53:58<6:10:31,  1.22it/s, training_loss=0.042][A
Epoch 3:  32%|███▏      | 12815/40000 [2:53:58<6:10:13,  1.22it/s, training_loss=0.042][A
Epoch 3:  32%|███▏      | 12815/40000 [2:53:59<6:10:13,  1.22it/s, training_loss=0.053][A
Epoch 3:  32%|███▏      | 12816/40000 [2:53:59<6:09:50,  1.23it/s, training_loss=0.053][A
Epoch 3:  32%|███▏      | 12816/40000 [2:54:00<6:09:50,  1.23it/s, training_loss=0.043][A
Epoch 3:  32%|███▏      | 12817/40000 [2:54:00<6:09:29,  1.23it/s, training_loss=0.043][A
Epoch 3:  32%|███▏      | 12817/40000 [2:54:01<6:09:29,  1.23it/s, training_loss=0.157][A
Epoch 3:  32%|███▏      | 12818/40000 [2:54:01<6:10:14,  1.22it/s, training_loss=0.157][A
Epoch 3:  32%|███▏      | 12818/40000 [2:54:02<6:10:14,  1.22it/s, training_loss=0.028][A
Epoch 3:  32%|███▏      | 12819/40000 [2:54:02<6:10:43,  1.22it/s, training_loss=0.028][A
Epoch 3:  32%|███▏      | 12819/40000 [2:54:02<6:10:43,  1.22it/s, training_loss=0.051][A

Epoch 3:  32%|███▏      | 12904/40000 [2:55:12<6:08:26,  1.23it/s, training_loss=0.033][A
Epoch 3:  32%|███▏      | 12905/40000 [2:55:12<6:08:34,  1.23it/s, training_loss=0.033][A
Epoch 3:  32%|███▏      | 12905/40000 [2:55:13<6:08:34,  1.23it/s, training_loss=0.050][A
Epoch 3:  32%|███▏      | 12906/40000 [2:55:13<6:08:53,  1.22it/s, training_loss=0.050][A
Epoch 3:  32%|███▏      | 12906/40000 [2:55:13<6:08:53,  1.22it/s, training_loss=0.081][A
Epoch 3:  32%|███▏      | 12907/40000 [2:55:13<6:08:39,  1.22it/s, training_loss=0.081][A
Epoch 3:  32%|███▏      | 12907/40000 [2:55:14<6:08:39,  1.22it/s, training_loss=0.075][A
Epoch 3:  32%|███▏      | 12908/40000 [2:55:14<6:08:49,  1.22it/s, training_loss=0.075][A
Epoch 3:  32%|███▏      | 12908/40000 [2:55:15<6:08:49,  1.22it/s, training_loss=0.090][A
Epoch 3:  32%|███▏      | 12909/40000 [2:55:15<6:08:36,  1.22it/s, training_loss=0.090][A
Epoch 3:  32%|███▏      | 12909/40000 [2:55:16<6:08:36,  1.22it/s, training_loss=0.103][A

Epoch 3:  32%|███▏      | 12994/40000 [2:56:25<6:08:46,  1.22it/s, training_loss=0.024][A
Epoch 3:  32%|███▏      | 12995/40000 [2:56:25<6:07:55,  1.22it/s, training_loss=0.024][A
Epoch 3:  32%|███▏      | 12995/40000 [2:56:26<6:07:55,  1.22it/s, training_loss=0.069][A
Epoch 3:  32%|███▏      | 12996/40000 [2:56:26<6:07:43,  1.22it/s, training_loss=0.069][A
Epoch 3:  32%|███▏      | 12996/40000 [2:56:27<6:07:43,  1.22it/s, training_loss=0.061][A
Epoch 3:  32%|███▏      | 12997/40000 [2:56:27<6:08:00,  1.22it/s, training_loss=0.061][A
Epoch 3:  32%|███▏      | 12997/40000 [2:56:28<6:08:00,  1.22it/s, training_loss=0.098][A
Epoch 3:  32%|███▏      | 12998/40000 [2:56:28<6:08:05,  1.22it/s, training_loss=0.098][A
Epoch 3:  32%|███▏      | 12998/40000 [2:56:29<6:08:05,  1.22it/s, training_loss=0.044][A
Epoch 3:  32%|███▏      | 12999/40000 [2:56:29<6:08:14,  1.22it/s, training_loss=0.044][A
Epoch 3:  32%|███▏      | 12999/40000 [2:56:30<6:08:14,  1.22it/s, training_loss=0.047][A

Epoch 3:  33%|███▎      | 13084/40000 [2:57:39<6:07:14,  1.22it/s, training_loss=0.061][A
Epoch 3:  33%|███▎      | 13085/40000 [2:57:39<6:07:11,  1.22it/s, training_loss=0.061][A
Epoch 3:  33%|███▎      | 13085/40000 [2:57:40<6:07:11,  1.22it/s, training_loss=0.037][A
Epoch 3:  33%|███▎      | 13086/40000 [2:57:40<6:07:01,  1.22it/s, training_loss=0.037][A
Epoch 3:  33%|███▎      | 13086/40000 [2:57:41<6:07:01,  1.22it/s, training_loss=0.052][A
Epoch 3:  33%|███▎      | 13087/40000 [2:57:41<6:06:51,  1.22it/s, training_loss=0.052][A
Epoch 3:  33%|███▎      | 13087/40000 [2:57:41<6:06:51,  1.22it/s, training_loss=0.081][A
Epoch 3:  33%|███▎      | 13088/40000 [2:57:41<6:06:42,  1.22it/s, training_loss=0.081][A
Epoch 3:  33%|███▎      | 13088/40000 [2:57:42<6:06:42,  1.22it/s, training_loss=0.061][A
Epoch 3:  33%|███▎      | 13089/40000 [2:57:42<6:06:59,  1.22it/s, training_loss=0.061][A
Epoch 3:  33%|███▎      | 13089/40000 [2:57:43<6:06:59,  1.22it/s, training_loss=0.106][A

Epoch 3:  33%|███▎      | 13174/40000 [2:58:53<6:04:44,  1.23it/s, training_loss=0.087][A
Epoch 3:  33%|███▎      | 13175/40000 [2:58:53<6:04:56,  1.23it/s, training_loss=0.087][A
Epoch 3:  33%|███▎      | 13175/40000 [2:58:53<6:04:56,  1.23it/s, training_loss=0.058][A
Epoch 3:  33%|███▎      | 13176/40000 [2:58:53<6:05:27,  1.22it/s, training_loss=0.058][A
Epoch 3:  33%|███▎      | 13176/40000 [2:58:54<6:05:27,  1.22it/s, training_loss=0.054][A
Epoch 3:  33%|███▎      | 13177/40000 [2:58:54<6:05:05,  1.22it/s, training_loss=0.054][A
Epoch 3:  33%|███▎      | 13177/40000 [2:58:55<6:05:05,  1.22it/s, training_loss=0.033][A
Epoch 3:  33%|███▎      | 13178/40000 [2:58:55<6:05:13,  1.22it/s, training_loss=0.033][A
Epoch 3:  33%|███▎      | 13178/40000 [2:58:56<6:05:13,  1.22it/s, training_loss=0.040][A
Epoch 3:  33%|███▎      | 13179/40000 [2:58:56<6:04:34,  1.23it/s, training_loss=0.040][A
Epoch 3:  33%|███▎      | 13179/40000 [2:58:57<6:04:34,  1.23it/s, training_loss=0.118][A

Epoch 3:  33%|███▎      | 13264/40000 [3:00:06<6:04:03,  1.22it/s, training_loss=0.138][A
Epoch 3:  33%|███▎      | 13265/40000 [3:00:06<6:03:16,  1.23it/s, training_loss=0.138][A
Epoch 3:  33%|███▎      | 13265/40000 [3:00:07<6:03:16,  1.23it/s, training_loss=0.051][A
Epoch 3:  33%|███▎      | 13266/40000 [3:00:07<6:03:50,  1.22it/s, training_loss=0.051][A
Epoch 3:  33%|███▎      | 13266/40000 [3:00:08<6:03:50,  1.22it/s, training_loss=0.119][A
Epoch 3:  33%|███▎      | 13267/40000 [3:00:08<6:03:57,  1.22it/s, training_loss=0.119][A
Epoch 3:  33%|███▎      | 13267/40000 [3:00:08<6:03:57,  1.22it/s, training_loss=0.083][A
Epoch 3:  33%|███▎      | 13268/40000 [3:00:08<6:03:33,  1.23it/s, training_loss=0.083][A
Epoch 3:  33%|███▎      | 13268/40000 [3:00:09<6:03:33,  1.23it/s, training_loss=0.080][A
Epoch 3:  33%|███▎      | 13269/40000 [3:00:09<6:03:30,  1.23it/s, training_loss=0.080][A
Epoch 3:  33%|███▎      | 13269/40000 [3:00:10<6:03:30,  1.23it/s, training_loss=0.059][A

Epoch 3:  33%|███▎      | 13354/40000 [3:01:20<6:03:16,  1.22it/s, training_loss=0.035][A
Epoch 3:  33%|███▎      | 13355/40000 [3:01:20<6:03:35,  1.22it/s, training_loss=0.035][A
Epoch 3:  33%|███▎      | 13355/40000 [3:01:20<6:03:35,  1.22it/s, training_loss=0.132][A
Epoch 3:  33%|███▎      | 13356/40000 [3:01:20<6:03:31,  1.22it/s, training_loss=0.132][A
Epoch 3:  33%|███▎      | 13356/40000 [3:01:21<6:03:31,  1.22it/s, training_loss=0.098][A
Epoch 3:  33%|███▎      | 13357/40000 [3:01:21<6:03:30,  1.22it/s, training_loss=0.098][A
Epoch 3:  33%|███▎      | 13357/40000 [3:01:22<6:03:30,  1.22it/s, training_loss=0.087][A
Epoch 3:  33%|███▎      | 13358/40000 [3:01:22<6:03:58,  1.22it/s, training_loss=0.087][A
Epoch 3:  33%|███▎      | 13358/40000 [3:01:23<6:03:58,  1.22it/s, training_loss=0.170][A
Epoch 3:  33%|███▎      | 13359/40000 [3:01:23<6:03:45,  1.22it/s, training_loss=0.170][A
Epoch 3:  33%|███▎      | 13359/40000 [3:01:24<6:03:45,  1.22it/s, training_loss=0.069][A

Epoch 3:  34%|███▎      | 13444/40000 [3:02:33<6:00:41,  1.23it/s, training_loss=0.123][A
Epoch 3:  34%|███▎      | 13445/40000 [3:02:33<6:01:03,  1.23it/s, training_loss=0.123][A
Epoch 3:  34%|███▎      | 13445/40000 [3:02:34<6:01:03,  1.23it/s, training_loss=0.139][A
Epoch 3:  34%|███▎      | 13446/40000 [3:02:34<6:01:00,  1.23it/s, training_loss=0.139][A
Epoch 3:  34%|███▎      | 13446/40000 [3:02:35<6:01:00,  1.23it/s, training_loss=0.072][A
Epoch 3:  34%|███▎      | 13447/40000 [3:02:35<6:00:52,  1.23it/s, training_loss=0.072][A
Epoch 3:  34%|███▎      | 13447/40000 [3:02:35<6:00:52,  1.23it/s, training_loss=0.063][A
Epoch 3:  34%|███▎      | 13448/40000 [3:02:35<6:01:11,  1.23it/s, training_loss=0.063][A
Epoch 3:  34%|███▎      | 13448/40000 [3:02:36<6:01:11,  1.23it/s, training_loss=0.228][A
Epoch 3:  34%|███▎      | 13449/40000 [3:02:36<6:01:40,  1.22it/s, training_loss=0.228][A
Epoch 3:  34%|███▎      | 13449/40000 [3:02:37<6:01:40,  1.22it/s, training_loss=0.084][A

Epoch 3:  34%|███▍      | 13534/40000 [3:03:47<6:00:50,  1.22it/s, training_loss=0.053][A
Epoch 3:  34%|███▍      | 13535/40000 [3:03:47<6:01:01,  1.22it/s, training_loss=0.053][A
Epoch 3:  34%|███▍      | 13535/40000 [3:03:47<6:01:01,  1.22it/s, training_loss=0.146][A
Epoch 3:  34%|███▍      | 13536/40000 [3:03:47<6:01:00,  1.22it/s, training_loss=0.146][A
Epoch 3:  34%|███▍      | 13536/40000 [3:03:48<6:01:00,  1.22it/s, training_loss=0.047][A
Epoch 3:  34%|███▍      | 13537/40000 [3:03:48<6:00:43,  1.22it/s, training_loss=0.047][A
Epoch 3:  34%|███▍      | 13537/40000 [3:03:49<6:00:43,  1.22it/s, training_loss=0.036][A
Epoch 3:  34%|███▍      | 13538/40000 [3:03:49<6:00:34,  1.22it/s, training_loss=0.036][A
Epoch 3:  34%|███▍      | 13538/40000 [3:03:50<6:00:34,  1.22it/s, training_loss=0.103][A
Epoch 3:  34%|███▍      | 13539/40000 [3:03:50<6:00:38,  1.22it/s, training_loss=0.103][A
Epoch 3:  34%|███▍      | 13539/40000 [3:03:51<6:00:38,  1.22it/s, training_loss=0.070][A

Epoch 3:  34%|███▍      | 13624/40000 [3:05:00<5:59:29,  1.22it/s, training_loss=0.046][A
Epoch 3:  34%|███▍      | 13625/40000 [3:05:00<5:58:49,  1.23it/s, training_loss=0.046][A
Epoch 3:  34%|███▍      | 13625/40000 [3:05:01<5:58:49,  1.23it/s, training_loss=0.199][A
Epoch 3:  34%|███▍      | 13626/40000 [3:05:01<5:58:50,  1.22it/s, training_loss=0.199][A
Epoch 3:  34%|███▍      | 13626/40000 [3:05:02<5:58:50,  1.22it/s, training_loss=0.135][A
Epoch 3:  34%|███▍      | 13627/40000 [3:05:02<5:58:56,  1.22it/s, training_loss=0.135][A
Epoch 3:  34%|███▍      | 13627/40000 [3:05:03<5:58:56,  1.22it/s, training_loss=0.030][A
Epoch 3:  34%|███▍      | 13628/40000 [3:05:03<5:58:54,  1.22it/s, training_loss=0.030][A
Epoch 3:  34%|███▍      | 13628/40000 [3:05:03<5:58:54,  1.22it/s, training_loss=0.093][A
Epoch 3:  34%|███▍      | 13629/40000 [3:05:03<5:58:22,  1.23it/s, training_loss=0.093][A
Epoch 3:  34%|███▍      | 13629/40000 [3:05:04<5:58:22,  1.23it/s, training_loss=0.045][A

Epoch 3:  34%|███▍      | 13714/40000 [3:06:14<5:57:51,  1.22it/s, training_loss=0.036][A
Epoch 3:  34%|███▍      | 13715/40000 [3:06:14<5:57:35,  1.23it/s, training_loss=0.036][A
Epoch 3:  34%|███▍      | 13715/40000 [3:06:15<5:57:35,  1.23it/s, training_loss=0.020][A
Epoch 3:  34%|███▍      | 13716/40000 [3:06:15<5:57:58,  1.22it/s, training_loss=0.020][A
Epoch 3:  34%|███▍      | 13716/40000 [3:06:15<5:57:58,  1.22it/s, training_loss=0.058][A
Epoch 3:  34%|███▍      | 13717/40000 [3:06:15<5:57:44,  1.22it/s, training_loss=0.058][A
Epoch 3:  34%|███▍      | 13717/40000 [3:06:16<5:57:44,  1.22it/s, training_loss=0.084][A
Epoch 3:  34%|███▍      | 13718/40000 [3:06:16<5:57:49,  1.22it/s, training_loss=0.084][A
Epoch 3:  34%|███▍      | 13718/40000 [3:06:17<5:57:49,  1.22it/s, training_loss=0.082][A
Epoch 3:  34%|███▍      | 13719/40000 [3:06:17<5:58:23,  1.22it/s, training_loss=0.082][A
Epoch 3:  34%|███▍      | 13719/40000 [3:06:18<5:58:23,  1.22it/s, training_loss=0.094][A

Epoch 3:  35%|███▍      | 13804/40000 [3:07:27<5:56:12,  1.23it/s, training_loss=0.075][A
Epoch 3:  35%|███▍      | 13805/40000 [3:07:27<5:56:30,  1.22it/s, training_loss=0.075][A
Epoch 3:  35%|███▍      | 13805/40000 [3:07:28<5:56:30,  1.22it/s, training_loss=0.090][A
Epoch 3:  35%|███▍      | 13806/40000 [3:07:28<5:56:17,  1.23it/s, training_loss=0.090][A
Epoch 3:  35%|███▍      | 13806/40000 [3:07:29<5:56:17,  1.23it/s, training_loss=0.109][A
Epoch 3:  35%|███▍      | 13807/40000 [3:07:29<5:56:44,  1.22it/s, training_loss=0.109][A
Epoch 3:  35%|███▍      | 13807/40000 [3:07:30<5:56:44,  1.22it/s, training_loss=0.055][A
Epoch 3:  35%|███▍      | 13808/40000 [3:07:30<5:56:56,  1.22it/s, training_loss=0.055][A
Epoch 3:  35%|███▍      | 13808/40000 [3:07:31<5:56:56,  1.22it/s, training_loss=0.115][A
Epoch 3:  35%|███▍      | 13809/40000 [3:07:31<5:56:39,  1.22it/s, training_loss=0.115][A
Epoch 3:  35%|███▍      | 13809/40000 [3:07:31<5:56:39,  1.22it/s, training_loss=0.097][A

Epoch 3:  35%|███▍      | 13894/40000 [3:08:41<5:55:39,  1.22it/s, training_loss=0.111][A
Epoch 3:  35%|███▍      | 13895/40000 [3:08:41<5:56:12,  1.22it/s, training_loss=0.111][A
Epoch 3:  35%|███▍      | 13895/40000 [3:08:42<5:56:12,  1.22it/s, training_loss=0.109][A
Epoch 3:  35%|███▍      | 13896/40000 [3:08:42<5:56:17,  1.22it/s, training_loss=0.109][A
Epoch 3:  35%|███▍      | 13896/40000 [3:08:42<5:56:17,  1.22it/s, training_loss=0.078][A
Epoch 3:  35%|███▍      | 13897/40000 [3:08:42<5:56:04,  1.22it/s, training_loss=0.078][A
Epoch 3:  35%|███▍      | 13897/40000 [3:08:43<5:56:04,  1.22it/s, training_loss=0.068][A
Epoch 3:  35%|███▍      | 13898/40000 [3:08:43<5:56:27,  1.22it/s, training_loss=0.068][A
Epoch 3:  35%|███▍      | 13898/40000 [3:08:44<5:56:27,  1.22it/s, training_loss=0.050][A
Epoch 3:  35%|███▍      | 13899/40000 [3:08:44<5:56:16,  1.22it/s, training_loss=0.050][A
Epoch 3:  35%|███▍      | 13899/40000 [3:08:45<5:56:16,  1.22it/s, training_loss=0.115][A

Epoch 3:  35%|███▍      | 13984/40000 [3:09:54<5:54:29,  1.22it/s, training_loss=0.052][A
Epoch 3:  35%|███▍      | 13985/40000 [3:09:54<5:54:45,  1.22it/s, training_loss=0.052][A
Epoch 3:  35%|███▍      | 13985/40000 [3:09:55<5:54:45,  1.22it/s, training_loss=0.086][A
Epoch 3:  35%|███▍      | 13986/40000 [3:09:55<5:54:58,  1.22it/s, training_loss=0.086][A
Epoch 3:  35%|███▍      | 13986/40000 [3:09:56<5:54:58,  1.22it/s, training_loss=0.059][A
Epoch 3:  35%|███▍      | 13987/40000 [3:09:56<5:54:42,  1.22it/s, training_loss=0.059][A
Epoch 3:  35%|███▍      | 13987/40000 [3:09:57<5:54:42,  1.22it/s, training_loss=0.075][A
Epoch 3:  35%|███▍      | 13988/40000 [3:09:57<5:54:30,  1.22it/s, training_loss=0.075][A
Epoch 3:  35%|███▍      | 13988/40000 [3:09:58<5:54:30,  1.22it/s, training_loss=0.032][A
Epoch 3:  35%|███▍      | 13989/40000 [3:09:58<5:54:25,  1.22it/s, training_loss=0.032][A
Epoch 3:  35%|███▍      | 13989/40000 [3:09:59<5:54:25,  1.22it/s, training_loss=0.079][A

Epoch 3:  35%|███▌      | 14074/40000 [3:11:08<5:53:40,  1.22it/s, training_loss=0.107][A
Epoch 3:  35%|███▌      | 14075/40000 [3:11:08<5:52:54,  1.22it/s, training_loss=0.107][A
Epoch 3:  35%|███▌      | 14075/40000 [3:11:09<5:52:54,  1.22it/s, training_loss=0.115][A
Epoch 3:  35%|███▌      | 14076/40000 [3:11:09<5:52:51,  1.22it/s, training_loss=0.115][A
Epoch 3:  35%|███▌      | 14076/40000 [3:11:10<5:52:51,  1.22it/s, training_loss=0.132][A
Epoch 3:  35%|███▌      | 14077/40000 [3:11:10<5:53:00,  1.22it/s, training_loss=0.132][A
Epoch 3:  35%|███▌      | 14077/40000 [3:11:10<5:53:00,  1.22it/s, training_loss=0.036][A
Epoch 3:  35%|███▌      | 14078/40000 [3:11:10<5:52:21,  1.23it/s, training_loss=0.036][A
Epoch 3:  35%|███▌      | 14078/40000 [3:11:11<5:52:21,  1.23it/s, training_loss=0.088][A
Epoch 3:  35%|███▌      | 14079/40000 [3:11:11<5:52:24,  1.23it/s, training_loss=0.088][A
Epoch 3:  35%|███▌      | 14079/40000 [3:11:12<5:52:24,  1.23it/s, training_loss=0.085][A

Epoch 3:  35%|███▌      | 14164/40000 [3:12:22<5:51:48,  1.22it/s, training_loss=0.053][A
Epoch 3:  35%|███▌      | 14165/40000 [3:12:22<5:52:07,  1.22it/s, training_loss=0.053][A
Epoch 3:  35%|███▌      | 14165/40000 [3:12:22<5:52:07,  1.22it/s, training_loss=0.031][A
Epoch 3:  35%|███▌      | 14166/40000 [3:12:22<5:52:00,  1.22it/s, training_loss=0.031][A
Epoch 3:  35%|███▌      | 14166/40000 [3:12:23<5:52:00,  1.22it/s, training_loss=0.083][A
Epoch 3:  35%|███▌      | 14167/40000 [3:12:23<5:52:17,  1.22it/s, training_loss=0.083][A
Epoch 3:  35%|███▌      | 14167/40000 [3:12:24<5:52:17,  1.22it/s, training_loss=0.072][A
Epoch 3:  35%|███▌      | 14168/40000 [3:12:24<5:52:09,  1.22it/s, training_loss=0.072][A
Epoch 3:  35%|███▌      | 14168/40000 [3:12:25<5:52:09,  1.22it/s, training_loss=0.120][A
Epoch 3:  35%|███▌      | 14169/40000 [3:12:25<5:52:26,  1.22it/s, training_loss=0.120][A
Epoch 3:  35%|███▌      | 14169/40000 [3:12:26<5:52:26,  1.22it/s, training_loss=0.107][A

Epoch 3:  36%|███▌      | 14254/40000 [3:13:35<5:51:21,  1.22it/s, training_loss=0.061][A
Epoch 3:  36%|███▌      | 14255/40000 [3:13:35<5:50:58,  1.22it/s, training_loss=0.061][A
Epoch 3:  36%|███▌      | 14255/40000 [3:13:36<5:50:58,  1.22it/s, training_loss=0.059][A
Epoch 3:  36%|███▌      | 14256/40000 [3:13:36<5:51:18,  1.22it/s, training_loss=0.059][A
Epoch 3:  36%|███▌      | 14256/40000 [3:13:37<5:51:18,  1.22it/s, training_loss=0.119][A
Epoch 3:  36%|███▌      | 14257/40000 [3:13:37<5:51:04,  1.22it/s, training_loss=0.119][A
Epoch 3:  36%|███▌      | 14257/40000 [3:13:38<5:51:04,  1.22it/s, training_loss=0.050][A
Epoch 3:  36%|███▌      | 14258/40000 [3:13:38<5:51:14,  1.22it/s, training_loss=0.050][A
Epoch 3:  36%|███▌      | 14258/40000 [3:13:38<5:51:14,  1.22it/s, training_loss=0.096][A
Epoch 3:  36%|███▌      | 14259/40000 [3:13:38<5:51:06,  1.22it/s, training_loss=0.096][A
Epoch 3:  36%|███▌      | 14259/40000 [3:13:39<5:51:06,  1.22it/s, training_loss=0.040][A

Epoch 3:  36%|███▌      | 14344/40000 [3:14:49<5:49:16,  1.22it/s, training_loss=0.105][A
Epoch 3:  36%|███▌      | 14345/40000 [3:14:49<5:49:13,  1.22it/s, training_loss=0.105][A
Epoch 3:  36%|███▌      | 14345/40000 [3:14:49<5:49:13,  1.22it/s, training_loss=0.069][A
Epoch 3:  36%|███▌      | 14346/40000 [3:14:49<5:49:18,  1.22it/s, training_loss=0.069][A
Epoch 3:  36%|███▌      | 14346/40000 [3:14:50<5:49:18,  1.22it/s, training_loss=0.157][A
Epoch 3:  36%|███▌      | 14347/40000 [3:14:50<5:49:27,  1.22it/s, training_loss=0.157][A
Epoch 3:  36%|███▌      | 14347/40000 [3:14:51<5:49:27,  1.22it/s, training_loss=0.120][A
Epoch 3:  36%|███▌      | 14348/40000 [3:14:51<5:49:22,  1.22it/s, training_loss=0.120][A
Epoch 3:  36%|███▌      | 14348/40000 [3:14:52<5:49:22,  1.22it/s, training_loss=0.041][A
Epoch 3:  36%|███▌      | 14349/40000 [3:14:52<5:49:30,  1.22it/s, training_loss=0.041][A
Epoch 3:  36%|███▌      | 14349/40000 [3:14:53<5:49:30,  1.22it/s, training_loss=0.054][A

Epoch 3:  36%|███▌      | 14434/40000 [3:16:02<5:48:20,  1.22it/s, training_loss=0.154][A
Epoch 3:  36%|███▌      | 14435/40000 [3:16:02<5:48:17,  1.22it/s, training_loss=0.154][A
Epoch 3:  36%|███▌      | 14435/40000 [3:16:03<5:48:17,  1.22it/s, training_loss=0.032][A
Epoch 3:  36%|███▌      | 14436/40000 [3:16:03<5:48:20,  1.22it/s, training_loss=0.032][A
Epoch 3:  36%|███▌      | 14436/40000 [3:16:04<5:48:20,  1.22it/s, training_loss=0.025][A
Epoch 3:  36%|███▌      | 14437/40000 [3:16:04<5:48:03,  1.22it/s, training_loss=0.025][A
Epoch 3:  36%|███▌      | 14437/40000 [3:16:05<5:48:03,  1.22it/s, training_loss=0.060][A
Epoch 3:  36%|███▌      | 14438/40000 [3:16:05<5:48:24,  1.22it/s, training_loss=0.060][A
Epoch 3:  36%|███▌      | 14438/40000 [3:16:06<5:48:24,  1.22it/s, training_loss=0.104][A
Epoch 3:  36%|███▌      | 14439/40000 [3:16:06<5:48:41,  1.22it/s, training_loss=0.104][A
Epoch 3:  36%|███▌      | 14439/40000 [3:16:06<5:48:41,  1.22it/s, training_loss=0.082][A

Epoch 3:  36%|███▋      | 14524/40000 [3:17:16<5:46:42,  1.22it/s, training_loss=0.094][A
Epoch 3:  36%|███▋      | 14525/40000 [3:17:16<5:46:46,  1.22it/s, training_loss=0.094][A
Epoch 3:  36%|███▋      | 14525/40000 [3:17:17<5:46:46,  1.22it/s, training_loss=0.030][A
Epoch 3:  36%|███▋      | 14526/40000 [3:17:17<5:46:54,  1.22it/s, training_loss=0.030][A
Epoch 3:  36%|███▋      | 14526/40000 [3:17:17<5:46:54,  1.22it/s, training_loss=0.108][A
Epoch 3:  36%|███▋      | 14527/40000 [3:17:17<5:47:00,  1.22it/s, training_loss=0.108][A
Epoch 3:  36%|███▋      | 14527/40000 [3:17:18<5:47:00,  1.22it/s, training_loss=0.068][A
Epoch 3:  36%|███▋      | 14528/40000 [3:17:18<5:46:33,  1.23it/s, training_loss=0.068][A
Epoch 3:  36%|███▋      | 14528/40000 [3:17:19<5:46:33,  1.23it/s, training_loss=0.062][A
Epoch 3:  36%|███▋      | 14529/40000 [3:17:19<5:48:28,  1.22it/s, training_loss=0.062][A
Epoch 3:  36%|███▋      | 14529/40000 [3:17:20<5:48:28,  1.22it/s, training_loss=0.063][A

Epoch 3:  37%|███▋      | 14614/40000 [3:18:29<5:45:53,  1.22it/s, training_loss=0.096][A
Epoch 3:  37%|███▋      | 14615/40000 [3:18:29<5:45:19,  1.23it/s, training_loss=0.096][A
Epoch 3:  37%|███▋      | 14615/40000 [3:18:30<5:45:19,  1.23it/s, training_loss=0.078][A
Epoch 3:  37%|███▋      | 14616/40000 [3:18:30<5:45:35,  1.22it/s, training_loss=0.078][A
Epoch 3:  37%|███▋      | 14616/40000 [3:18:31<5:45:35,  1.22it/s, training_loss=0.067][A
Epoch 3:  37%|███▋      | 14617/40000 [3:18:31<5:45:55,  1.22it/s, training_loss=0.067][A
Epoch 3:  37%|███▋      | 14617/40000 [3:18:32<5:45:55,  1.22it/s, training_loss=0.085][A
Epoch 3:  37%|███▋      | 14618/40000 [3:18:32<5:46:15,  1.22it/s, training_loss=0.085][A
Epoch 3:  37%|███▋      | 14618/40000 [3:18:33<5:46:15,  1.22it/s, training_loss=0.046][A
Epoch 3:  37%|███▋      | 14619/40000 [3:18:33<5:46:02,  1.22it/s, training_loss=0.046][A
Epoch 3:  37%|███▋      | 14619/40000 [3:18:33<5:46:02,  1.22it/s, training_loss=0.132][A

Epoch 3:  37%|███▋      | 14704/40000 [3:19:43<5:44:16,  1.22it/s, training_loss=0.059][A
Epoch 3:  37%|███▋      | 14705/40000 [3:19:43<5:44:32,  1.22it/s, training_loss=0.059][A
Epoch 3:  37%|███▋      | 14705/40000 [3:19:44<5:44:32,  1.22it/s, training_loss=0.079][A
Epoch 3:  37%|███▋      | 14706/40000 [3:19:44<5:44:28,  1.22it/s, training_loss=0.079][A
Epoch 3:  37%|███▋      | 14706/40000 [3:19:44<5:44:28,  1.22it/s, training_loss=0.077][A
Epoch 3:  37%|███▋      | 14707/40000 [3:19:44<5:45:00,  1.22it/s, training_loss=0.077][A
Epoch 3:  37%|███▋      | 14707/40000 [3:19:45<5:45:00,  1.22it/s, training_loss=0.065][A
Epoch 3:  37%|███▋      | 14708/40000 [3:19:45<5:44:54,  1.22it/s, training_loss=0.065][A
Epoch 3:  37%|███▋      | 14708/40000 [3:19:46<5:44:54,  1.22it/s, training_loss=0.090][A
Epoch 3:  37%|███▋      | 14709/40000 [3:19:46<5:44:29,  1.22it/s, training_loss=0.090][A
Epoch 3:  37%|███▋      | 14709/40000 [3:19:47<5:44:29,  1.22it/s, training_loss=0.159][A

Epoch 3:  37%|███▋      | 14794/40000 [3:20:56<5:43:41,  1.22it/s, training_loss=0.056][A
Epoch 3:  37%|███▋      | 14795/40000 [3:20:56<5:43:56,  1.22it/s, training_loss=0.056][A
Epoch 3:  37%|███▋      | 14795/40000 [3:20:57<5:43:56,  1.22it/s, training_loss=0.086][A
Epoch 3:  37%|███▋      | 14796/40000 [3:20:57<5:43:43,  1.22it/s, training_loss=0.086][A
Epoch 3:  37%|███▋      | 14796/40000 [3:20:58<5:43:43,  1.22it/s, training_loss=0.070][A
Epoch 3:  37%|███▋      | 14797/40000 [3:20:58<5:43:29,  1.22it/s, training_loss=0.070][A
Epoch 3:  37%|███▋      | 14797/40000 [3:20:59<5:43:29,  1.22it/s, training_loss=0.076][A
Epoch 3:  37%|███▋      | 14798/40000 [3:20:59<5:43:31,  1.22it/s, training_loss=0.076][A
Epoch 3:  37%|███▋      | 14798/40000 [3:21:00<5:43:31,  1.22it/s, training_loss=0.146][A
Epoch 3:  37%|███▋      | 14799/40000 [3:21:00<5:43:00,  1.22it/s, training_loss=0.146][A
Epoch 3:  37%|███▋      | 14799/40000 [3:21:00<5:43:00,  1.22it/s, training_loss=0.038][A

Epoch 3:  37%|███▋      | 14884/40000 [3:22:10<5:41:45,  1.22it/s, training_loss=0.069][A
Epoch 3:  37%|███▋      | 14885/40000 [3:22:10<5:41:06,  1.23it/s, training_loss=0.069][A
Epoch 3:  37%|███▋      | 14885/40000 [3:22:11<5:41:06,  1.23it/s, training_loss=0.038][A
Epoch 3:  37%|███▋      | 14886/40000 [3:22:11<5:41:20,  1.23it/s, training_loss=0.038][A
Epoch 3:  37%|███▋      | 14886/40000 [3:22:11<5:41:20,  1.23it/s, training_loss=0.086][A
Epoch 3:  37%|███▋      | 14887/40000 [3:22:11<5:41:40,  1.22it/s, training_loss=0.086][A
Epoch 3:  37%|███▋      | 14887/40000 [3:22:12<5:41:40,  1.22it/s, training_loss=0.118][A
Epoch 3:  37%|███▋      | 14888/40000 [3:22:12<5:42:12,  1.22it/s, training_loss=0.118][A
Epoch 3:  37%|███▋      | 14888/40000 [3:22:13<5:42:12,  1.22it/s, training_loss=0.046][A
Epoch 3:  37%|███▋      | 14889/40000 [3:22:13<5:41:23,  1.23it/s, training_loss=0.046][A
Epoch 3:  37%|███▋      | 14889/40000 [3:22:14<5:41:23,  1.23it/s, training_loss=0.045][A

Epoch 3:  37%|███▋      | 14974/40000 [3:23:23<5:40:48,  1.22it/s, training_loss=0.164][A
Epoch 3:  37%|███▋      | 14975/40000 [3:23:23<5:40:54,  1.22it/s, training_loss=0.164][A
Epoch 3:  37%|███▋      | 14975/40000 [3:23:24<5:40:54,  1.22it/s, training_loss=0.084][A
Epoch 3:  37%|███▋      | 14976/40000 [3:23:24<5:41:23,  1.22it/s, training_loss=0.084][A
Epoch 3:  37%|███▋      | 14976/40000 [3:23:25<5:41:23,  1.22it/s, training_loss=0.094][A
Epoch 3:  37%|███▋      | 14977/40000 [3:23:25<5:40:50,  1.22it/s, training_loss=0.094][A
Epoch 3:  37%|███▋      | 14977/40000 [3:23:26<5:40:50,  1.22it/s, training_loss=0.066][A
Epoch 3:  37%|███▋      | 14978/40000 [3:23:26<5:41:16,  1.22it/s, training_loss=0.066][A
Epoch 3:  37%|███▋      | 14978/40000 [3:23:27<5:41:16,  1.22it/s, training_loss=0.049][A
Epoch 3:  37%|███▋      | 14979/40000 [3:23:27<5:41:07,  1.22it/s, training_loss=0.049][A
Epoch 3:  37%|███▋      | 14979/40000 [3:23:28<5:41:07,  1.22it/s, training_loss=0.096][A

Epoch 3:  38%|███▊      | 15064/40000 [3:24:37<5:39:13,  1.23it/s, training_loss=0.115][A
Epoch 3:  38%|███▊      | 15065/40000 [3:24:37<5:39:12,  1.23it/s, training_loss=0.115][A
Epoch 3:  38%|███▊      | 15065/40000 [3:24:38<5:39:12,  1.23it/s, training_loss=0.072][A
Epoch 3:  38%|███▊      | 15066/40000 [3:24:38<5:39:31,  1.22it/s, training_loss=0.072][A
Epoch 3:  38%|███▊      | 15066/40000 [3:24:39<5:39:31,  1.22it/s, training_loss=0.049][A
Epoch 3:  38%|███▊      | 15067/40000 [3:24:39<5:39:24,  1.22it/s, training_loss=0.049][A
Epoch 3:  38%|███▊      | 15067/40000 [3:24:39<5:39:24,  1.22it/s, training_loss=0.069][A
Epoch 3:  38%|███▊      | 15068/40000 [3:24:39<5:38:50,  1.23it/s, training_loss=0.069][A
Epoch 3:  38%|███▊      | 15068/40000 [3:24:40<5:38:50,  1.23it/s, training_loss=0.062][A
Epoch 3:  38%|███▊      | 15069/40000 [3:24:40<5:38:40,  1.23it/s, training_loss=0.062][A
Epoch 3:  38%|███▊      | 15069/40000 [3:24:41<5:38:40,  1.23it/s, training_loss=0.130][A

Epoch 3:  38%|███▊      | 15154/40000 [3:25:51<5:37:50,  1.23it/s, training_loss=0.062][A
Epoch 3:  38%|███▊      | 15155/40000 [3:25:51<5:38:01,  1.23it/s, training_loss=0.062][A
Epoch 3:  38%|███▊      | 15155/40000 [3:25:51<5:38:01,  1.23it/s, training_loss=0.065][A
Epoch 3:  38%|███▊      | 15156/40000 [3:25:51<5:38:13,  1.22it/s, training_loss=0.065][A
Epoch 3:  38%|███▊      | 15156/40000 [3:25:52<5:38:13,  1.22it/s, training_loss=0.061][A
Epoch 3:  38%|███▊      | 15157/40000 [3:25:52<5:37:38,  1.23it/s, training_loss=0.061][A
Epoch 3:  38%|███▊      | 15157/40000 [3:25:53<5:37:38,  1.23it/s, training_loss=0.055][A
Epoch 3:  38%|███▊      | 15158/40000 [3:25:53<5:37:40,  1.23it/s, training_loss=0.055][A
Epoch 3:  38%|███▊      | 15158/40000 [3:25:54<5:37:40,  1.23it/s, training_loss=0.133][A
Epoch 3:  38%|███▊      | 15159/40000 [3:25:54<5:38:36,  1.22it/s, training_loss=0.133][A
Epoch 3:  38%|███▊      | 15159/40000 [3:25:55<5:38:36,  1.22it/s, training_loss=0.055][A

Epoch 3:  38%|███▊      | 15244/40000 [3:27:04<5:37:07,  1.22it/s, training_loss=0.104][A
Epoch 3:  38%|███▊      | 15245/40000 [3:27:04<5:37:17,  1.22it/s, training_loss=0.104][A
Epoch 3:  38%|███▊      | 15245/40000 [3:27:05<5:37:17,  1.22it/s, training_loss=0.123][A
Epoch 3:  38%|███▊      | 15246/40000 [3:27:05<5:36:53,  1.22it/s, training_loss=0.123][A
Epoch 3:  38%|███▊      | 15246/40000 [3:27:06<5:36:53,  1.22it/s, training_loss=0.136][A
Epoch 3:  38%|███▊      | 15247/40000 [3:27:06<5:37:07,  1.22it/s, training_loss=0.136][A
Epoch 3:  38%|███▊      | 15247/40000 [3:27:07<5:37:07,  1.22it/s, training_loss=0.130][A
Epoch 3:  38%|███▊      | 15248/40000 [3:27:07<5:36:53,  1.22it/s, training_loss=0.130][A
Epoch 3:  38%|███▊      | 15248/40000 [3:27:07<5:36:53,  1.22it/s, training_loss=0.073][A
Epoch 3:  38%|███▊      | 15249/40000 [3:27:07<5:36:21,  1.23it/s, training_loss=0.073][A
Epoch 3:  38%|███▊      | 15249/40000 [3:27:08<5:36:21,  1.23it/s, training_loss=0.046][A

Epoch 3:  38%|███▊      | 15334/40000 [3:28:18<5:36:11,  1.22it/s, training_loss=0.076][A
Epoch 3:  38%|███▊      | 15335/40000 [3:28:18<5:36:05,  1.22it/s, training_loss=0.076][A
Epoch 3:  38%|███▊      | 15335/40000 [3:28:18<5:36:05,  1.22it/s, training_loss=0.096][A
Epoch 3:  38%|███▊      | 15336/40000 [3:28:18<5:36:41,  1.22it/s, training_loss=0.096][A
Epoch 3:  38%|███▊      | 15336/40000 [3:28:19<5:36:41,  1.22it/s, training_loss=0.113][A
Epoch 3:  38%|███▊      | 15337/40000 [3:28:19<5:36:18,  1.22it/s, training_loss=0.113][A
Epoch 3:  38%|███▊      | 15337/40000 [3:28:20<5:36:18,  1.22it/s, training_loss=0.106][A
Epoch 3:  38%|███▊      | 15338/40000 [3:28:20<5:36:45,  1.22it/s, training_loss=0.106][A
Epoch 3:  38%|███▊      | 15338/40000 [3:28:21<5:36:45,  1.22it/s, training_loss=0.054][A
Epoch 3:  38%|███▊      | 15339/40000 [3:28:21<5:36:19,  1.22it/s, training_loss=0.054][A
Epoch 3:  38%|███▊      | 15339/40000 [3:28:22<5:36:19,  1.22it/s, training_loss=0.098][A

Epoch 3:  39%|███▊      | 15424/40000 [3:29:31<5:35:33,  1.22it/s, training_loss=0.077][A
Epoch 3:  39%|███▊      | 15425/40000 [3:29:31<5:35:40,  1.22it/s, training_loss=0.077][A
Epoch 3:  39%|███▊      | 15425/40000 [3:29:32<5:35:40,  1.22it/s, training_loss=0.082][A
Epoch 3:  39%|███▊      | 15426/40000 [3:29:32<5:35:50,  1.22it/s, training_loss=0.082][A
Epoch 3:  39%|███▊      | 15426/40000 [3:29:33<5:35:50,  1.22it/s, training_loss=0.042][A
Epoch 3:  39%|███▊      | 15427/40000 [3:29:33<5:35:54,  1.22it/s, training_loss=0.042][A
Epoch 3:  39%|███▊      | 15427/40000 [3:29:34<5:35:54,  1.22it/s, training_loss=0.082][A
Epoch 3:  39%|███▊      | 15428/40000 [3:29:34<5:35:17,  1.22it/s, training_loss=0.082][A
Epoch 3:  39%|███▊      | 15428/40000 [3:29:34<5:35:17,  1.22it/s, training_loss=0.040][A
Epoch 3:  39%|███▊      | 15429/40000 [3:29:34<5:35:00,  1.22it/s, training_loss=0.040][A
Epoch 3:  39%|███▊      | 15429/40000 [3:29:35<5:35:00,  1.22it/s, training_loss=0.040][A

Epoch 3:  39%|███▉      | 15514/40000 [3:30:45<5:33:28,  1.22it/s, training_loss=0.060][A
Epoch 3:  39%|███▉      | 15515/40000 [3:30:45<5:33:32,  1.22it/s, training_loss=0.060][A
Epoch 3:  39%|███▉      | 15515/40000 [3:30:46<5:33:32,  1.22it/s, training_loss=0.070][A
Epoch 3:  39%|███▉      | 15516/40000 [3:30:46<5:33:25,  1.22it/s, training_loss=0.070][A
Epoch 3:  39%|███▉      | 15516/40000 [3:30:46<5:33:25,  1.22it/s, training_loss=0.084][A
Epoch 3:  39%|███▉      | 15517/40000 [3:30:46<5:33:29,  1.22it/s, training_loss=0.084][A
Epoch 3:  39%|███▉      | 15517/40000 [3:30:47<5:33:29,  1.22it/s, training_loss=0.082][A
Epoch 3:  39%|███▉      | 15518/40000 [3:30:47<5:33:17,  1.22it/s, training_loss=0.082][A
Epoch 3:  39%|███▉      | 15518/40000 [3:30:48<5:33:17,  1.22it/s, training_loss=0.121][A
Epoch 3:  39%|███▉      | 15519/40000 [3:30:48<5:33:07,  1.22it/s, training_loss=0.121][A
Epoch 3:  39%|███▉      | 15519/40000 [3:30:49<5:33:07,  1.22it/s, training_loss=0.047][A

Epoch 3:  39%|███▉      | 15604/40000 [3:31:58<5:31:25,  1.23it/s, training_loss=0.030][A
Epoch 3:  39%|███▉      | 15605/40000 [3:31:58<5:31:44,  1.23it/s, training_loss=0.030][A
Epoch 3:  39%|███▉      | 15605/40000 [3:31:59<5:31:44,  1.23it/s, training_loss=0.184][A
Epoch 3:  39%|███▉      | 15606/40000 [3:31:59<5:31:21,  1.23it/s, training_loss=0.184][A
Epoch 3:  39%|███▉      | 15606/40000 [3:32:00<5:31:21,  1.23it/s, training_loss=0.140][A
Epoch 3:  39%|███▉      | 15607/40000 [3:32:00<5:31:34,  1.23it/s, training_loss=0.140][A
Epoch 3:  39%|███▉      | 15607/40000 [3:32:01<5:31:34,  1.23it/s, training_loss=0.038][A
Epoch 3:  39%|███▉      | 15608/40000 [3:32:01<5:31:39,  1.23it/s, training_loss=0.038][A
Epoch 3:  39%|███▉      | 15608/40000 [3:32:01<5:31:39,  1.23it/s, training_loss=0.111][A
Epoch 3:  39%|███▉      | 15609/40000 [3:32:01<5:31:45,  1.23it/s, training_loss=0.111][A
Epoch 3:  39%|███▉      | 15609/40000 [3:32:02<5:31:45,  1.23it/s, training_loss=0.081][A

Epoch 3:  39%|███▉      | 15694/40000 [3:33:12<5:30:53,  1.22it/s, training_loss=0.110][A
Epoch 3:  39%|███▉      | 15695/40000 [3:33:12<5:31:13,  1.22it/s, training_loss=0.110][A
Epoch 3:  39%|███▉      | 15695/40000 [3:33:12<5:31:13,  1.22it/s, training_loss=0.069][A
Epoch 3:  39%|███▉      | 15696/40000 [3:33:12<5:31:08,  1.22it/s, training_loss=0.069][A
Epoch 3:  39%|███▉      | 15696/40000 [3:33:13<5:31:08,  1.22it/s, training_loss=0.074][A
Epoch 3:  39%|███▉      | 15697/40000 [3:33:13<5:31:10,  1.22it/s, training_loss=0.074][A
Epoch 3:  39%|███▉      | 15697/40000 [3:33:14<5:31:10,  1.22it/s, training_loss=0.084][A
Epoch 3:  39%|███▉      | 15698/40000 [3:33:14<5:31:17,  1.22it/s, training_loss=0.084][A
Epoch 3:  39%|███▉      | 15698/40000 [3:33:15<5:31:17,  1.22it/s, training_loss=0.067][A
Epoch 3:  39%|███▉      | 15699/40000 [3:33:15<5:31:09,  1.22it/s, training_loss=0.067][A
Epoch 3:  39%|███▉      | 15699/40000 [3:33:16<5:31:09,  1.22it/s, training_loss=0.090][A

Epoch 3:  39%|███▉      | 15784/40000 [3:34:25<5:29:55,  1.22it/s, training_loss=0.125][A
Epoch 3:  39%|███▉      | 15785/40000 [3:34:25<5:30:28,  1.22it/s, training_loss=0.125][A
Epoch 3:  39%|███▉      | 15785/40000 [3:34:26<5:30:28,  1.22it/s, training_loss=0.091][A
Epoch 3:  39%|███▉      | 15786/40000 [3:34:26<5:30:23,  1.22it/s, training_loss=0.091][A
Epoch 3:  39%|███▉      | 15786/40000 [3:34:27<5:30:23,  1.22it/s, training_loss=0.080][A
Epoch 3:  39%|███▉      | 15787/40000 [3:34:27<5:29:57,  1.22it/s, training_loss=0.080][A
Epoch 3:  39%|███▉      | 15787/40000 [3:34:28<5:29:57,  1.22it/s, training_loss=0.063][A
Epoch 3:  39%|███▉      | 15788/40000 [3:34:28<5:30:12,  1.22it/s, training_loss=0.063][A
Epoch 3:  39%|███▉      | 15788/40000 [3:34:28<5:30:12,  1.22it/s, training_loss=0.070][A
Epoch 3:  39%|███▉      | 15789/40000 [3:34:28<5:30:10,  1.22it/s, training_loss=0.070][A
Epoch 3:  39%|███▉      | 15789/40000 [3:34:29<5:30:10,  1.22it/s, training_loss=0.059][A

Epoch 3:  40%|███▉      | 15874/40000 [3:35:39<5:28:36,  1.22it/s, training_loss=0.062][A
Epoch 3:  40%|███▉      | 15875/40000 [3:35:39<5:28:22,  1.22it/s, training_loss=0.062][A
Epoch 3:  40%|███▉      | 15875/40000 [3:35:40<5:28:22,  1.22it/s, training_loss=0.081][A
Epoch 3:  40%|███▉      | 15876/40000 [3:35:40<5:28:07,  1.23it/s, training_loss=0.081][A
Epoch 3:  40%|███▉      | 15876/40000 [3:35:40<5:28:07,  1.23it/s, training_loss=0.054][A
Epoch 3:  40%|███▉      | 15877/40000 [3:35:40<5:28:38,  1.22it/s, training_loss=0.054][A
Epoch 3:  40%|███▉      | 15877/40000 [3:35:41<5:28:38,  1.22it/s, training_loss=0.096][A
Epoch 3:  40%|███▉      | 15878/40000 [3:35:41<5:28:32,  1.22it/s, training_loss=0.096][A
Epoch 3:  40%|███▉      | 15878/40000 [3:35:42<5:28:32,  1.22it/s, training_loss=0.046][A
Epoch 3:  40%|███▉      | 15879/40000 [3:35:42<5:28:19,  1.22it/s, training_loss=0.046][A
Epoch 3:  40%|███▉      | 15879/40000 [3:35:43<5:28:19,  1.22it/s, training_loss=0.091][A

Epoch 3:  40%|███▉      | 15964/40000 [3:36:52<5:27:03,  1.22it/s, training_loss=0.093][A
Epoch 3:  40%|███▉      | 15965/40000 [3:36:52<5:27:07,  1.22it/s, training_loss=0.093][A
Epoch 3:  40%|███▉      | 15965/40000 [3:36:53<5:27:07,  1.22it/s, training_loss=0.138][A
Epoch 3:  40%|███▉      | 15966/40000 [3:36:53<5:28:30,  1.22it/s, training_loss=0.138][A
Epoch 3:  40%|███▉      | 15966/40000 [3:36:54<5:28:30,  1.22it/s, training_loss=0.080][A
Epoch 3:  40%|███▉      | 15967/40000 [3:36:54<5:27:26,  1.22it/s, training_loss=0.080][A
Epoch 3:  40%|███▉      | 15967/40000 [3:36:55<5:27:26,  1.22it/s, training_loss=0.065][A
Epoch 3:  40%|███▉      | 15968/40000 [3:36:55<5:27:49,  1.22it/s, training_loss=0.065][A
Epoch 3:  40%|███▉      | 15968/40000 [3:36:55<5:27:49,  1.22it/s, training_loss=0.160][A
Epoch 3:  40%|███▉      | 15969/40000 [3:36:55<5:27:59,  1.22it/s, training_loss=0.160][A
Epoch 3:  40%|███▉      | 15969/40000 [3:36:56<5:27:59,  1.22it/s, training_loss=0.161][A

Epoch 3:  40%|████      | 16054/40000 [3:38:06<5:25:34,  1.23it/s, training_loss=0.148][A
Epoch 3:  40%|████      | 16055/40000 [3:38:06<5:25:50,  1.22it/s, training_loss=0.148][A
Epoch 3:  40%|████      | 16055/40000 [3:38:07<5:25:50,  1.22it/s, training_loss=0.039][A
Epoch 3:  40%|████      | 16056/40000 [3:38:07<5:25:19,  1.23it/s, training_loss=0.039][A
Epoch 3:  40%|████      | 16056/40000 [3:38:07<5:25:19,  1.23it/s, training_loss=0.062][A
Epoch 3:  40%|████      | 16057/40000 [3:38:07<5:25:20,  1.23it/s, training_loss=0.062][A
Epoch 3:  40%|████      | 16057/40000 [3:38:08<5:25:20,  1.23it/s, training_loss=0.027][A
Epoch 3:  40%|████      | 16058/40000 [3:38:08<5:24:56,  1.23it/s, training_loss=0.027][A
Epoch 3:  40%|████      | 16058/40000 [3:38:09<5:24:56,  1.23it/s, training_loss=0.038][A
Epoch 3:  40%|████      | 16059/40000 [3:38:09<5:25:27,  1.23it/s, training_loss=0.038][A
Epoch 3:  40%|████      | 16059/40000 [3:38:10<5:25:27,  1.23it/s, training_loss=0.039][A

Epoch 3:  40%|████      | 16144/40000 [3:39:19<5:24:53,  1.22it/s, training_loss=0.062][A
Epoch 3:  40%|████      | 16145/40000 [3:39:19<5:24:26,  1.23it/s, training_loss=0.062][A
Epoch 3:  40%|████      | 16145/40000 [3:39:20<5:24:26,  1.23it/s, training_loss=0.072][A
Epoch 3:  40%|████      | 16146/40000 [3:39:20<5:24:11,  1.23it/s, training_loss=0.072][A
Epoch 3:  40%|████      | 16146/40000 [3:39:21<5:24:11,  1.23it/s, training_loss=0.104][A
Epoch 3:  40%|████      | 16147/40000 [3:39:21<5:24:26,  1.23it/s, training_loss=0.104][A
Epoch 3:  40%|████      | 16147/40000 [3:39:22<5:24:26,  1.23it/s, training_loss=0.092][A
Epoch 3:  40%|████      | 16148/40000 [3:39:22<5:24:42,  1.22it/s, training_loss=0.092][A
Epoch 3:  40%|████      | 16148/40000 [3:39:23<5:24:42,  1.22it/s, training_loss=0.118][A
Epoch 3:  40%|████      | 16149/40000 [3:39:23<5:24:31,  1.22it/s, training_loss=0.118][A
Epoch 3:  40%|████      | 16149/40000 [3:39:23<5:24:31,  1.22it/s, training_loss=0.073][A

Epoch 3:  41%|████      | 16234/40000 [3:40:33<5:23:38,  1.22it/s, training_loss=0.109][A
Epoch 3:  41%|████      | 16235/40000 [3:40:33<5:23:41,  1.22it/s, training_loss=0.109][A
Epoch 3:  41%|████      | 16235/40000 [3:40:34<5:23:41,  1.22it/s, training_loss=0.070][A
Epoch 3:  41%|████      | 16236/40000 [3:40:34<5:23:25,  1.22it/s, training_loss=0.070][A
Epoch 3:  41%|████      | 16236/40000 [3:40:34<5:23:25,  1.22it/s, training_loss=0.082][A
Epoch 3:  41%|████      | 16237/40000 [3:40:34<5:23:20,  1.22it/s, training_loss=0.082][A
Epoch 3:  41%|████      | 16237/40000 [3:40:35<5:23:20,  1.22it/s, training_loss=0.040][A
Epoch 3:  41%|████      | 16238/40000 [3:40:35<5:23:25,  1.22it/s, training_loss=0.040][A
Epoch 3:  41%|████      | 16238/40000 [3:40:36<5:23:25,  1.22it/s, training_loss=0.093][A
Epoch 3:  41%|████      | 16239/40000 [3:40:36<5:23:19,  1.22it/s, training_loss=0.093][A
Epoch 3:  41%|████      | 16239/40000 [3:40:37<5:23:19,  1.22it/s, training_loss=0.118][A

Epoch 3:  41%|████      | 16324/40000 [3:41:46<5:23:32,  1.22it/s, training_loss=0.020][A
Epoch 3:  41%|████      | 16325/40000 [3:41:46<5:22:50,  1.22it/s, training_loss=0.020][A
Epoch 3:  41%|████      | 16325/40000 [3:41:47<5:22:50,  1.22it/s, training_loss=0.102][A
Epoch 3:  41%|████      | 16326/40000 [3:41:47<5:22:54,  1.22it/s, training_loss=0.102][A
Epoch 3:  41%|████      | 16326/40000 [3:41:48<5:22:54,  1.22it/s, training_loss=0.051][A
Epoch 3:  41%|████      | 16327/40000 [3:41:48<5:22:19,  1.22it/s, training_loss=0.051][A
Epoch 3:  41%|████      | 16327/40000 [3:41:49<5:22:19,  1.22it/s, training_loss=0.109][A
Epoch 3:  41%|████      | 16328/40000 [3:41:49<5:22:17,  1.22it/s, training_loss=0.109][A
Epoch 3:  41%|████      | 16328/40000 [3:41:50<5:22:17,  1.22it/s, training_loss=0.056][A
Epoch 3:  41%|████      | 16329/40000 [3:41:50<5:21:58,  1.23it/s, training_loss=0.056][A
Epoch 3:  41%|████      | 16329/40000 [3:41:50<5:21:58,  1.23it/s, training_loss=0.040][A

Epoch 3:  41%|████      | 16414/40000 [3:43:00<5:21:08,  1.22it/s, training_loss=0.030][A
Epoch 3:  41%|████      | 16415/40000 [3:43:00<5:21:01,  1.22it/s, training_loss=0.030][A
Epoch 3:  41%|████      | 16415/40000 [3:43:01<5:21:01,  1.22it/s, training_loss=0.034][A
Epoch 3:  41%|████      | 16416/40000 [3:43:01<5:21:10,  1.22it/s, training_loss=0.034][A
Epoch 3:  41%|████      | 16416/40000 [3:43:01<5:21:10,  1.22it/s, training_loss=0.093][A
Epoch 3:  41%|████      | 16417/40000 [3:43:01<5:21:03,  1.22it/s, training_loss=0.093][A
Epoch 3:  41%|████      | 16417/40000 [3:43:02<5:21:03,  1.22it/s, training_loss=0.018][A
Epoch 3:  41%|████      | 16418/40000 [3:43:02<5:21:09,  1.22it/s, training_loss=0.018][A
Epoch 3:  41%|████      | 16418/40000 [3:43:03<5:21:09,  1.22it/s, training_loss=0.128][A
Epoch 3:  41%|████      | 16419/40000 [3:43:03<5:20:49,  1.23it/s, training_loss=0.128][A
Epoch 3:  41%|████      | 16419/40000 [3:43:04<5:20:49,  1.23it/s, training_loss=0.139][A

Epoch 3:  41%|████▏     | 16504/40000 [3:44:13<5:19:26,  1.23it/s, training_loss=0.044][A
Epoch 3:  41%|████▏     | 16505/40000 [3:44:13<5:19:01,  1.23it/s, training_loss=0.044][A
Epoch 3:  41%|████▏     | 16505/40000 [3:44:14<5:19:01,  1.23it/s, training_loss=0.072][A
Epoch 3:  41%|████▏     | 16506/40000 [3:44:14<5:18:59,  1.23it/s, training_loss=0.072][A
Epoch 3:  41%|████▏     | 16506/40000 [3:44:15<5:18:59,  1.23it/s, training_loss=0.132][A
Epoch 3:  41%|████▏     | 16507/40000 [3:44:15<5:19:00,  1.23it/s, training_loss=0.132][A
Epoch 3:  41%|████▏     | 16507/40000 [3:44:16<5:19:00,  1.23it/s, training_loss=0.086][A
Epoch 3:  41%|████▏     | 16508/40000 [3:44:16<5:19:00,  1.23it/s, training_loss=0.086][A
Epoch 3:  41%|████▏     | 16508/40000 [3:44:17<5:19:00,  1.23it/s, training_loss=0.034][A
Epoch 3:  41%|████▏     | 16509/40000 [3:44:17<5:20:22,  1.22it/s, training_loss=0.034][A
Epoch 3:  41%|████▏     | 16509/40000 [3:44:17<5:20:22,  1.22it/s, training_loss=0.025][A

Epoch 3:  41%|████▏     | 16594/40000 [3:45:27<5:18:32,  1.22it/s, training_loss=0.076][A
Epoch 3:  41%|████▏     | 16595/40000 [3:45:27<5:19:11,  1.22it/s, training_loss=0.076][A
Epoch 3:  41%|████▏     | 16595/40000 [3:45:28<5:19:11,  1.22it/s, training_loss=0.104][A
Epoch 3:  41%|████▏     | 16596/40000 [3:45:28<5:19:08,  1.22it/s, training_loss=0.104][A
Epoch 3:  41%|████▏     | 16596/40000 [3:45:28<5:19:08,  1.22it/s, training_loss=0.097][A
Epoch 3:  41%|████▏     | 16597/40000 [3:45:28<5:19:09,  1.22it/s, training_loss=0.097][A
Epoch 3:  41%|████▏     | 16597/40000 [3:45:29<5:19:09,  1.22it/s, training_loss=0.056][A
Epoch 3:  41%|████▏     | 16598/40000 [3:45:29<5:19:04,  1.22it/s, training_loss=0.056][A
Epoch 3:  41%|████▏     | 16598/40000 [3:45:30<5:19:04,  1.22it/s, training_loss=0.047][A
Epoch 3:  41%|████▏     | 16599/40000 [3:45:30<5:18:46,  1.22it/s, training_loss=0.047][A
Epoch 3:  41%|████▏     | 16599/40000 [3:45:31<5:18:46,  1.22it/s, training_loss=0.025][A

Epoch 3:  42%|████▏     | 16684/40000 [3:46:40<5:17:53,  1.22it/s, training_loss=0.051][A
Epoch 3:  42%|████▏     | 16685/40000 [3:46:40<5:18:03,  1.22it/s, training_loss=0.051][A
Epoch 3:  42%|████▏     | 16685/40000 [3:46:41<5:18:03,  1.22it/s, training_loss=0.069][A
Epoch 3:  42%|████▏     | 16686/40000 [3:46:41<5:18:05,  1.22it/s, training_loss=0.069][A
Epoch 3:  42%|████▏     | 16686/40000 [3:46:42<5:18:05,  1.22it/s, training_loss=0.070][A
Epoch 3:  42%|████▏     | 16687/40000 [3:46:42<5:18:09,  1.22it/s, training_loss=0.070][A
Epoch 3:  42%|████▏     | 16687/40000 [3:46:43<5:18:09,  1.22it/s, training_loss=0.022][A
Epoch 3:  42%|████▏     | 16688/40000 [3:46:43<5:18:16,  1.22it/s, training_loss=0.022][A
Epoch 3:  42%|████▏     | 16688/40000 [3:46:44<5:18:16,  1.22it/s, training_loss=0.069][A
Epoch 3:  42%|████▏     | 16689/40000 [3:46:44<5:17:46,  1.22it/s, training_loss=0.069][A
Epoch 3:  42%|████▏     | 16689/40000 [3:46:44<5:17:46,  1.22it/s, training_loss=0.063][A

Epoch 3:  42%|████▏     | 16774/40000 [3:47:54<5:16:55,  1.22it/s, training_loss=0.128][A
Epoch 3:  42%|████▏     | 16775/40000 [3:47:54<5:16:31,  1.22it/s, training_loss=0.128][A
Epoch 3:  42%|████▏     | 16775/40000 [3:47:55<5:16:31,  1.22it/s, training_loss=0.081][A
Epoch 3:  42%|████▏     | 16776/40000 [3:47:55<5:16:19,  1.22it/s, training_loss=0.081][A
Epoch 3:  42%|████▏     | 16776/40000 [3:47:56<5:16:19,  1.22it/s, training_loss=0.117][A
Epoch 3:  42%|████▏     | 16777/40000 [3:47:56<5:15:55,  1.23it/s, training_loss=0.117][A
Epoch 3:  42%|████▏     | 16777/40000 [3:47:56<5:15:55,  1.23it/s, training_loss=0.037][A
Epoch 3:  42%|████▏     | 16778/40000 [3:47:56<5:15:50,  1.23it/s, training_loss=0.037][A
Epoch 3:  42%|████▏     | 16778/40000 [3:47:57<5:15:50,  1.23it/s, training_loss=0.060][A
Epoch 3:  42%|████▏     | 16779/40000 [3:47:57<5:15:44,  1.23it/s, training_loss=0.060][A
Epoch 3:  42%|████▏     | 16779/40000 [3:47:58<5:15:44,  1.23it/s, training_loss=0.113][A

Epoch 3:  42%|████▏     | 16864/40000 [3:49:07<5:15:37,  1.22it/s, training_loss=0.053][A
Epoch 3:  42%|████▏     | 16865/40000 [3:49:07<5:15:38,  1.22it/s, training_loss=0.053][A
Epoch 3:  42%|████▏     | 16865/40000 [3:49:08<5:15:38,  1.22it/s, training_loss=0.076][A
Epoch 3:  42%|████▏     | 16866/40000 [3:49:08<5:15:42,  1.22it/s, training_loss=0.076][A
Epoch 3:  42%|████▏     | 16866/40000 [3:49:09<5:15:42,  1.22it/s, training_loss=0.122][A
Epoch 3:  42%|████▏     | 16867/40000 [3:49:09<5:16:06,  1.22it/s, training_loss=0.122][A
Epoch 3:  42%|████▏     | 16867/40000 [3:49:10<5:16:06,  1.22it/s, training_loss=0.012][A
Epoch 3:  42%|████▏     | 16868/40000 [3:49:10<5:15:09,  1.22it/s, training_loss=0.012][A
Epoch 3:  42%|████▏     | 16868/40000 [3:49:11<5:15:09,  1.22it/s, training_loss=0.064][A
Epoch 3:  42%|████▏     | 16869/40000 [3:49:11<5:15:27,  1.22it/s, training_loss=0.064][A
Epoch 3:  42%|████▏     | 16869/40000 [3:49:12<5:15:27,  1.22it/s, training_loss=0.040][A

Epoch 3:  42%|████▏     | 16954/40000 [3:50:21<5:13:33,  1.22it/s, training_loss=0.054][A
Epoch 3:  42%|████▏     | 16955/40000 [3:50:21<5:13:46,  1.22it/s, training_loss=0.054][A
Epoch 3:  42%|████▏     | 16955/40000 [3:50:22<5:13:46,  1.22it/s, training_loss=0.040][A
Epoch 3:  42%|████▏     | 16956/40000 [3:50:22<5:14:06,  1.22it/s, training_loss=0.040][A
Epoch 3:  42%|████▏     | 16956/40000 [3:50:23<5:14:06,  1.22it/s, training_loss=0.146][A
Epoch 3:  42%|████▏     | 16957/40000 [3:50:23<5:13:45,  1.22it/s, training_loss=0.146][A
Epoch 3:  42%|████▏     | 16957/40000 [3:50:23<5:13:45,  1.22it/s, training_loss=0.091][A
Epoch 3:  42%|████▏     | 16958/40000 [3:50:23<5:13:51,  1.22it/s, training_loss=0.091][A
Epoch 3:  42%|████▏     | 16958/40000 [3:50:24<5:13:51,  1.22it/s, training_loss=0.071][A
Epoch 3:  42%|████▏     | 16959/40000 [3:50:24<5:13:39,  1.22it/s, training_loss=0.071][A
Epoch 3:  42%|████▏     | 16959/40000 [3:50:25<5:13:39,  1.22it/s, training_loss=0.096][A

Epoch 3:  43%|████▎     | 17044/40000 [3:51:35<5:13:16,  1.22it/s, training_loss=0.096][A
Epoch 3:  43%|████▎     | 17045/40000 [3:51:35<5:13:17,  1.22it/s, training_loss=0.096][A
Epoch 3:  43%|████▎     | 17045/40000 [3:51:35<5:13:17,  1.22it/s, training_loss=0.078][A
Epoch 3:  43%|████▎     | 17046/40000 [3:51:35<5:13:27,  1.22it/s, training_loss=0.078][A
Epoch 3:  43%|████▎     | 17046/40000 [3:51:36<5:13:27,  1.22it/s, training_loss=0.058][A
Epoch 3:  43%|████▎     | 17047/40000 [3:51:36<5:13:22,  1.22it/s, training_loss=0.058][A
Epoch 3:  43%|████▎     | 17047/40000 [3:51:37<5:13:22,  1.22it/s, training_loss=0.074][A
Epoch 3:  43%|████▎     | 17048/40000 [3:51:37<5:13:24,  1.22it/s, training_loss=0.074][A
Epoch 3:  43%|████▎     | 17048/40000 [3:51:38<5:13:24,  1.22it/s, training_loss=0.060][A
Epoch 3:  43%|████▎     | 17049/40000 [3:51:38<5:13:08,  1.22it/s, training_loss=0.060][A
Epoch 3:  43%|████▎     | 17049/40000 [3:51:39<5:13:08,  1.22it/s, training_loss=0.124][A

Epoch 3:  43%|████▎     | 17134/40000 [3:52:48<5:10:56,  1.23it/s, training_loss=0.106][A
Epoch 3:  43%|████▎     | 17135/40000 [3:52:48<5:11:10,  1.22it/s, training_loss=0.106][A
Epoch 3:  43%|████▎     | 17135/40000 [3:52:49<5:11:10,  1.22it/s, training_loss=0.126][A
Epoch 3:  43%|████▎     | 17136/40000 [3:52:49<5:10:27,  1.23it/s, training_loss=0.126][A
Epoch 3:  43%|████▎     | 17136/40000 [3:52:50<5:10:27,  1.23it/s, training_loss=0.087][A
Epoch 3:  43%|████▎     | 17137/40000 [3:52:50<5:10:39,  1.23it/s, training_loss=0.087][A
Epoch 3:  43%|████▎     | 17137/40000 [3:52:51<5:10:39,  1.23it/s, training_loss=0.058][A
Epoch 3:  43%|████▎     | 17138/40000 [3:52:51<5:11:06,  1.22it/s, training_loss=0.058][A
Epoch 3:  43%|████▎     | 17138/40000 [3:52:51<5:11:06,  1.22it/s, training_loss=0.149][A
Epoch 3:  43%|████▎     | 17139/40000 [3:52:51<5:10:52,  1.23it/s, training_loss=0.149][A
Epoch 3:  43%|████▎     | 17139/40000 [3:52:52<5:10:52,  1.23it/s, training_loss=0.070][A

Epoch 3:  43%|████▎     | 17224/40000 [3:54:02<5:10:36,  1.22it/s, training_loss=0.123][A
Epoch 3:  43%|████▎     | 17225/40000 [3:54:02<5:10:54,  1.22it/s, training_loss=0.123][A
Epoch 3:  43%|████▎     | 17225/40000 [3:54:03<5:10:54,  1.22it/s, training_loss=0.047][A
Epoch 3:  43%|████▎     | 17226/40000 [3:54:03<5:10:42,  1.22it/s, training_loss=0.047][A
Epoch 3:  43%|████▎     | 17226/40000 [3:54:03<5:10:42,  1.22it/s, training_loss=0.132][A
Epoch 3:  43%|████▎     | 17227/40000 [3:54:03<5:10:52,  1.22it/s, training_loss=0.132][A
Epoch 3:  43%|████▎     | 17227/40000 [3:54:04<5:10:52,  1.22it/s, training_loss=0.112][A
Epoch 3:  43%|████▎     | 17228/40000 [3:54:04<5:10:47,  1.22it/s, training_loss=0.112][A
Epoch 3:  43%|████▎     | 17228/40000 [3:54:05<5:10:47,  1.22it/s, training_loss=0.115][A
Epoch 3:  43%|████▎     | 17229/40000 [3:54:05<5:10:22,  1.22it/s, training_loss=0.115][A
Epoch 3:  43%|████▎     | 17229/40000 [3:54:06<5:10:22,  1.22it/s, training_loss=0.047][A

Epoch 3:  43%|████▎     | 17314/40000 [3:55:15<5:09:17,  1.22it/s, training_loss=0.078][A
Epoch 3:  43%|████▎     | 17315/40000 [3:55:15<5:09:08,  1.22it/s, training_loss=0.078][A
Epoch 3:  43%|████▎     | 17315/40000 [3:55:16<5:09:08,  1.22it/s, training_loss=0.149][A
Epoch 3:  43%|████▎     | 17316/40000 [3:55:16<5:09:17,  1.22it/s, training_loss=0.149][A
Epoch 3:  43%|████▎     | 17316/40000 [3:55:17<5:09:17,  1.22it/s, training_loss=0.170][A
Epoch 3:  43%|████▎     | 17317/40000 [3:55:17<5:08:59,  1.22it/s, training_loss=0.170][A
Epoch 3:  43%|████▎     | 17317/40000 [3:55:18<5:08:59,  1.22it/s, training_loss=0.104][A
Epoch 3:  43%|████▎     | 17318/40000 [3:55:18<5:08:28,  1.23it/s, training_loss=0.104][A
Epoch 3:  43%|████▎     | 17318/40000 [3:55:19<5:08:28,  1.23it/s, training_loss=0.078][A
Epoch 3:  43%|████▎     | 17319/40000 [3:55:19<5:08:08,  1.23it/s, training_loss=0.078][A
Epoch 3:  43%|████▎     | 17319/40000 [3:55:19<5:08:08,  1.23it/s, training_loss=0.073][A

Epoch 3:  44%|████▎     | 17404/40000 [3:56:29<5:07:34,  1.22it/s, training_loss=0.068][A
Epoch 3:  44%|████▎     | 17405/40000 [3:56:29<5:07:38,  1.22it/s, training_loss=0.068][A
Epoch 3:  44%|████▎     | 17405/40000 [3:56:30<5:07:38,  1.22it/s, training_loss=0.043][A
Epoch 3:  44%|████▎     | 17406/40000 [3:56:30<5:07:53,  1.22it/s, training_loss=0.043][A
Epoch 3:  44%|████▎     | 17406/40000 [3:56:31<5:07:53,  1.22it/s, training_loss=0.042][A
Epoch 3:  44%|████▎     | 17407/40000 [3:56:31<5:07:51,  1.22it/s, training_loss=0.042][A
Epoch 3:  44%|████▎     | 17407/40000 [3:56:31<5:07:51,  1.22it/s, training_loss=0.085][A
Epoch 3:  44%|████▎     | 17408/40000 [3:56:31<5:07:55,  1.22it/s, training_loss=0.085][A
Epoch 3:  44%|████▎     | 17408/40000 [3:56:32<5:07:55,  1.22it/s, training_loss=0.060][A
Epoch 3:  44%|████▎     | 17409/40000 [3:56:32<5:08:29,  1.22it/s, training_loss=0.060][A
Epoch 3:  44%|████▎     | 17409/40000 [3:56:33<5:08:29,  1.22it/s, training_loss=0.047][A

Epoch 3:  44%|████▎     | 17494/40000 [3:57:42<5:06:27,  1.22it/s, training_loss=0.105][A
Epoch 3:  44%|████▎     | 17495/40000 [3:57:42<5:06:41,  1.22it/s, training_loss=0.105][A
Epoch 3:  44%|████▎     | 17495/40000 [3:57:43<5:06:41,  1.22it/s, training_loss=0.066][A
Epoch 3:  44%|████▎     | 17496/40000 [3:57:43<5:06:14,  1.22it/s, training_loss=0.066][A
Epoch 3:  44%|████▎     | 17496/40000 [3:57:44<5:06:14,  1.22it/s, training_loss=0.084][A
Epoch 3:  44%|████▎     | 17497/40000 [3:57:44<5:06:22,  1.22it/s, training_loss=0.084][A
Epoch 3:  44%|████▎     | 17497/40000 [3:57:45<5:06:22,  1.22it/s, training_loss=0.046][A
Epoch 3:  44%|████▎     | 17498/40000 [3:57:45<5:06:23,  1.22it/s, training_loss=0.046][A
Epoch 3:  44%|████▎     | 17498/40000 [3:57:46<5:06:23,  1.22it/s, training_loss=0.077][A
Epoch 3:  44%|████▎     | 17499/40000 [3:57:46<5:06:31,  1.22it/s, training_loss=0.077][A
Epoch 3:  44%|████▎     | 17499/40000 [3:57:47<5:06:31,  1.22it/s, training_loss=0.091][A

Epoch 3:  44%|████▍     | 17584/40000 [3:58:56<5:05:46,  1.22it/s, training_loss=0.181][A
Epoch 3:  44%|████▍     | 17585/40000 [3:58:56<5:05:36,  1.22it/s, training_loss=0.181][A
Epoch 3:  44%|████▍     | 17585/40000 [3:58:57<5:05:36,  1.22it/s, training_loss=0.187][A
Epoch 3:  44%|████▍     | 17586/40000 [3:58:57<5:05:31,  1.22it/s, training_loss=0.187][A
Epoch 3:  44%|████▍     | 17586/40000 [3:58:58<5:05:31,  1.22it/s, training_loss=0.085][A
Epoch 3:  44%|████▍     | 17587/40000 [3:58:58<5:05:13,  1.22it/s, training_loss=0.085][A
Epoch 3:  44%|████▍     | 17587/40000 [3:58:58<5:05:13,  1.22it/s, training_loss=0.073][A
Epoch 3:  44%|████▍     | 17588/40000 [3:58:58<5:05:12,  1.22it/s, training_loss=0.073][A
Epoch 3:  44%|████▍     | 17588/40000 [3:58:59<5:05:12,  1.22it/s, training_loss=0.151][A
Epoch 3:  44%|████▍     | 17589/40000 [3:58:59<5:05:04,  1.22it/s, training_loss=0.151][A
Epoch 3:  44%|████▍     | 17589/40000 [3:59:00<5:05:04,  1.22it/s, training_loss=0.090][A

Epoch 3:  44%|████▍     | 17674/40000 [4:00:10<5:04:51,  1.22it/s, training_loss=0.080][A
Epoch 3:  44%|████▍     | 17675/40000 [4:00:10<5:04:33,  1.22it/s, training_loss=0.080][A
Epoch 3:  44%|████▍     | 17675/40000 [4:00:10<5:04:33,  1.22it/s, training_loss=0.128][A
Epoch 3:  44%|████▍     | 17676/40000 [4:00:10<5:04:08,  1.22it/s, training_loss=0.128][A
Epoch 3:  44%|████▍     | 17676/40000 [4:00:11<5:04:08,  1.22it/s, training_loss=0.055][A
Epoch 3:  44%|████▍     | 17677/40000 [4:00:11<5:02:58,  1.23it/s, training_loss=0.055][A
Epoch 3:  44%|████▍     | 17677/40000 [4:00:12<5:02:58,  1.23it/s, training_loss=0.147][A
Epoch 3:  44%|████▍     | 17678/40000 [4:00:12<5:02:40,  1.23it/s, training_loss=0.147][A
Epoch 3:  44%|████▍     | 17678/40000 [4:00:13<5:02:40,  1.23it/s, training_loss=0.108][A
Epoch 3:  44%|████▍     | 17679/40000 [4:00:13<5:03:23,  1.23it/s, training_loss=0.108][A
Epoch 3:  44%|████▍     | 17679/40000 [4:00:14<5:03:23,  1.23it/s, training_loss=0.040][A

Epoch 3:  44%|████▍     | 17764/40000 [4:01:23<5:02:52,  1.22it/s, training_loss=0.117][A
Epoch 3:  44%|████▍     | 17765/40000 [4:01:23<5:02:54,  1.22it/s, training_loss=0.117][A
Epoch 3:  44%|████▍     | 17765/40000 [4:01:24<5:02:54,  1.22it/s, training_loss=0.037][A
Epoch 3:  44%|████▍     | 17766/40000 [4:01:24<5:02:57,  1.22it/s, training_loss=0.037][A
Epoch 3:  44%|████▍     | 17766/40000 [4:01:25<5:02:57,  1.22it/s, training_loss=0.074][A
Epoch 3:  44%|████▍     | 17767/40000 [4:01:25<5:03:15,  1.22it/s, training_loss=0.074][A
Epoch 3:  44%|████▍     | 17767/40000 [4:01:26<5:03:15,  1.22it/s, training_loss=0.049][A
Epoch 3:  44%|████▍     | 17768/40000 [4:01:26<5:03:18,  1.22it/s, training_loss=0.049][A
Epoch 3:  44%|████▍     | 17768/40000 [4:01:26<5:03:18,  1.22it/s, training_loss=0.099][A
Epoch 3:  44%|████▍     | 17769/40000 [4:01:26<5:02:57,  1.22it/s, training_loss=0.099][A
Epoch 3:  44%|████▍     | 17769/40000 [4:01:27<5:02:57,  1.22it/s, training_loss=0.091][A

Epoch 3:  45%|████▍     | 17854/40000 [4:02:37<5:00:48,  1.23it/s, training_loss=0.033][A
Epoch 3:  45%|████▍     | 17855/40000 [4:02:37<5:01:15,  1.23it/s, training_loss=0.033][A
Epoch 3:  45%|████▍     | 17855/40000 [4:02:37<5:01:15,  1.23it/s, training_loss=0.133][A
Epoch 3:  45%|████▍     | 17856/40000 [4:02:37<5:01:10,  1.23it/s, training_loss=0.133][A
Epoch 3:  45%|████▍     | 17856/40000 [4:02:38<5:01:10,  1.23it/s, training_loss=0.139][A
Epoch 3:  45%|████▍     | 17857/40000 [4:02:38<5:01:18,  1.22it/s, training_loss=0.139][A
Epoch 3:  45%|████▍     | 17857/40000 [4:02:39<5:01:18,  1.22it/s, training_loss=0.060][A
Epoch 3:  45%|████▍     | 17858/40000 [4:02:39<5:01:40,  1.22it/s, training_loss=0.060][A
Epoch 3:  45%|████▍     | 17858/40000 [4:02:40<5:01:40,  1.22it/s, training_loss=0.086][A
Epoch 3:  45%|████▍     | 17859/40000 [4:02:40<5:01:35,  1.22it/s, training_loss=0.086][A
Epoch 3:  45%|████▍     | 17859/40000 [4:02:41<5:01:35,  1.22it/s, training_loss=0.056][A

Epoch 3:  45%|████▍     | 17944/40000 [4:03:50<5:00:40,  1.22it/s, training_loss=0.036][A
Epoch 3:  45%|████▍     | 17945/40000 [4:03:50<5:00:37,  1.22it/s, training_loss=0.036][A
Epoch 3:  45%|████▍     | 17945/40000 [4:03:51<5:00:37,  1.22it/s, training_loss=0.056][A
Epoch 3:  45%|████▍     | 17946/40000 [4:03:51<5:00:30,  1.22it/s, training_loss=0.056][A
Epoch 3:  45%|████▍     | 17946/40000 [4:03:52<5:00:30,  1.22it/s, training_loss=0.055][A
Epoch 3:  45%|████▍     | 17947/40000 [4:03:52<5:00:33,  1.22it/s, training_loss=0.055][A
Epoch 3:  45%|████▍     | 17947/40000 [4:03:53<5:00:33,  1.22it/s, training_loss=0.122][A
Epoch 3:  45%|████▍     | 17948/40000 [4:03:53<5:00:24,  1.22it/s, training_loss=0.122][A
Epoch 3:  45%|████▍     | 17948/40000 [4:03:53<5:00:24,  1.22it/s, training_loss=0.068][A
Epoch 3:  45%|████▍     | 17949/40000 [4:03:53<5:00:23,  1.22it/s, training_loss=0.068][A
Epoch 3:  45%|████▍     | 17949/40000 [4:03:54<5:00:23,  1.22it/s, training_loss=0.111][A

Epoch 3:  45%|████▌     | 18034/40000 [4:05:04<4:59:17,  1.22it/s, training_loss=0.144][A
Epoch 3:  45%|████▌     | 18035/40000 [4:05:04<4:58:55,  1.22it/s, training_loss=0.144][A
Epoch 3:  45%|████▌     | 18035/40000 [4:05:04<4:58:55,  1.22it/s, training_loss=0.100][A
Epoch 3:  45%|████▌     | 18036/40000 [4:05:04<4:58:42,  1.23it/s, training_loss=0.100][A
Epoch 3:  45%|████▌     | 18036/40000 [4:05:05<4:58:42,  1.23it/s, training_loss=0.072][A
Epoch 3:  45%|████▌     | 18037/40000 [4:05:05<4:58:36,  1.23it/s, training_loss=0.072][A
Epoch 3:  45%|████▌     | 18037/40000 [4:05:06<4:58:36,  1.23it/s, training_loss=0.096][A
Epoch 3:  45%|████▌     | 18038/40000 [4:05:06<4:58:05,  1.23it/s, training_loss=0.096][A
Epoch 3:  45%|████▌     | 18038/40000 [4:05:07<4:58:05,  1.23it/s, training_loss=0.076][A
Epoch 3:  45%|████▌     | 18039/40000 [4:05:07<4:58:09,  1.23it/s, training_loss=0.076][A
Epoch 3:  45%|████▌     | 18039/40000 [4:05:08<4:58:09,  1.23it/s, training_loss=0.074][A

Epoch 3:  45%|████▌     | 18124/40000 [4:06:17<4:57:42,  1.22it/s, training_loss=0.078][A
Epoch 3:  45%|████▌     | 18125/40000 [4:06:17<4:57:34,  1.23it/s, training_loss=0.078][A
Epoch 3:  45%|████▌     | 18125/40000 [4:06:18<4:57:34,  1.23it/s, training_loss=0.145][A
Epoch 3:  45%|████▌     | 18126/40000 [4:06:18<4:57:46,  1.22it/s, training_loss=0.145][A
Epoch 3:  45%|████▌     | 18126/40000 [4:06:19<4:57:46,  1.22it/s, training_loss=0.039][A
Epoch 3:  45%|████▌     | 18127/40000 [4:06:19<4:57:35,  1.23it/s, training_loss=0.039][A
Epoch 3:  45%|████▌     | 18127/40000 [4:06:20<4:57:35,  1.23it/s, training_loss=0.147][A
Epoch 3:  45%|████▌     | 18128/40000 [4:06:20<4:57:11,  1.23it/s, training_loss=0.147][A
Epoch 3:  45%|████▌     | 18128/40000 [4:06:20<4:57:11,  1.23it/s, training_loss=0.111][A
Epoch 3:  45%|████▌     | 18129/40000 [4:06:20<4:57:37,  1.22it/s, training_loss=0.111][A
Epoch 3:  45%|████▌     | 18129/40000 [4:06:21<4:57:37,  1.22it/s, training_loss=0.201][A

Epoch 3:  46%|████▌     | 18214/40000 [4:07:31<4:57:13,  1.22it/s, training_loss=0.102][A
Epoch 3:  46%|████▌     | 18215/40000 [4:07:31<4:56:30,  1.22it/s, training_loss=0.102][A
Epoch 3:  46%|████▌     | 18215/40000 [4:07:31<4:56:30,  1.22it/s, training_loss=0.150][A
Epoch 3:  46%|████▌     | 18216/40000 [4:07:31<4:56:28,  1.22it/s, training_loss=0.150][A
Epoch 3:  46%|████▌     | 18216/40000 [4:07:32<4:56:28,  1.22it/s, training_loss=0.130][A
Epoch 3:  46%|████▌     | 18217/40000 [4:07:32<4:56:14,  1.23it/s, training_loss=0.130][A
Epoch 3:  46%|████▌     | 18217/40000 [4:07:33<4:56:14,  1.23it/s, training_loss=0.059][A
Epoch 3:  46%|████▌     | 18218/40000 [4:07:33<4:55:55,  1.23it/s, training_loss=0.059][A
Epoch 3:  46%|████▌     | 18218/40000 [4:07:34<4:55:55,  1.23it/s, training_loss=0.079][A
Epoch 3:  46%|████▌     | 18219/40000 [4:07:34<4:55:48,  1.23it/s, training_loss=0.079][A
Epoch 3:  46%|████▌     | 18219/40000 [4:07:35<4:55:48,  1.23it/s, training_loss=0.058][A

Epoch 3:  46%|████▌     | 18304/40000 [4:08:44<4:54:42,  1.23it/s, training_loss=0.048][A
Epoch 3:  46%|████▌     | 18305/40000 [4:08:44<4:54:35,  1.23it/s, training_loss=0.048][A
Epoch 3:  46%|████▌     | 18305/40000 [4:08:45<4:54:35,  1.23it/s, training_loss=0.083][A
Epoch 3:  46%|████▌     | 18306/40000 [4:08:45<4:54:41,  1.23it/s, training_loss=0.083][A
Epoch 3:  46%|████▌     | 18306/40000 [4:08:46<4:54:41,  1.23it/s, training_loss=0.035][A
Epoch 3:  46%|████▌     | 18307/40000 [4:08:46<4:55:18,  1.22it/s, training_loss=0.035][A
Epoch 3:  46%|████▌     | 18307/40000 [4:08:47<4:55:18,  1.22it/s, training_loss=0.047][A
Epoch 3:  46%|████▌     | 18308/40000 [4:08:47<4:55:37,  1.22it/s, training_loss=0.047][A
Epoch 3:  46%|████▌     | 18308/40000 [4:08:47<4:55:37,  1.22it/s, training_loss=0.112][A
Epoch 3:  46%|████▌     | 18309/40000 [4:08:47<4:55:49,  1.22it/s, training_loss=0.112][A
Epoch 3:  46%|████▌     | 18309/40000 [4:08:48<4:55:49,  1.22it/s, training_loss=0.088][A

Epoch 3:  46%|████▌     | 18394/40000 [4:09:58<4:53:45,  1.23it/s, training_loss=0.039][A
Epoch 3:  46%|████▌     | 18395/40000 [4:09:58<4:53:44,  1.23it/s, training_loss=0.039][A
Epoch 3:  46%|████▌     | 18395/40000 [4:09:58<4:53:44,  1.23it/s, training_loss=0.022][A
Epoch 3:  46%|████▌     | 18396/40000 [4:09:58<4:54:09,  1.22it/s, training_loss=0.022][A
Epoch 3:  46%|████▌     | 18396/40000 [4:09:59<4:54:09,  1.22it/s, training_loss=0.072][A
Epoch 3:  46%|████▌     | 18397/40000 [4:09:59<4:54:13,  1.22it/s, training_loss=0.072][A
Epoch 3:  46%|████▌     | 18397/40000 [4:10:00<4:54:13,  1.22it/s, training_loss=0.058][A
Epoch 3:  46%|████▌     | 18398/40000 [4:10:00<4:53:55,  1.22it/s, training_loss=0.058][A
Epoch 3:  46%|████▌     | 18398/40000 [4:10:01<4:53:55,  1.22it/s, training_loss=0.051][A
Epoch 3:  46%|████▌     | 18399/40000 [4:10:01<4:54:23,  1.22it/s, training_loss=0.051][A
Epoch 3:  46%|████▌     | 18399/40000 [4:10:02<4:54:23,  1.22it/s, training_loss=0.168][A

Epoch 3:  46%|████▌     | 18484/40000 [4:11:11<4:52:39,  1.23it/s, training_loss=0.024][A
Epoch 3:  46%|████▌     | 18485/40000 [4:11:11<4:52:07,  1.23it/s, training_loss=0.024][A
Epoch 3:  46%|████▌     | 18485/40000 [4:11:12<4:52:07,  1.23it/s, training_loss=0.017][A
Epoch 3:  46%|████▌     | 18486/40000 [4:11:12<4:51:49,  1.23it/s, training_loss=0.017][A
Epoch 3:  46%|████▌     | 18486/40000 [4:11:13<4:51:49,  1.23it/s, training_loss=0.069][A
Epoch 3:  46%|████▌     | 18487/40000 [4:11:13<4:52:19,  1.23it/s, training_loss=0.069][A
Epoch 3:  46%|████▌     | 18487/40000 [4:11:14<4:52:19,  1.23it/s, training_loss=0.063][A
Epoch 3:  46%|████▌     | 18488/40000 [4:11:14<4:52:09,  1.23it/s, training_loss=0.063][A
Epoch 3:  46%|████▌     | 18488/40000 [4:11:14<4:52:09,  1.23it/s, training_loss=0.108][A
Epoch 3:  46%|████▌     | 18489/40000 [4:11:14<4:52:24,  1.23it/s, training_loss=0.108][A
Epoch 3:  46%|████▌     | 18489/40000 [4:11:15<4:52:24,  1.23it/s, training_loss=0.079][A

Epoch 3:  46%|████▋     | 18574/40000 [4:12:25<4:51:48,  1.22it/s, training_loss=0.177][A
Epoch 3:  46%|████▋     | 18575/40000 [4:12:25<4:52:05,  1.22it/s, training_loss=0.177][A
Epoch 3:  46%|████▋     | 18575/40000 [4:12:25<4:52:05,  1.22it/s, training_loss=0.045][A
Epoch 3:  46%|████▋     | 18576/40000 [4:12:25<4:52:20,  1.22it/s, training_loss=0.045][A
Epoch 3:  46%|████▋     | 18576/40000 [4:12:26<4:52:20,  1.22it/s, training_loss=0.048][A
Epoch 3:  46%|████▋     | 18577/40000 [4:12:26<4:52:04,  1.22it/s, training_loss=0.048][A
Epoch 3:  46%|████▋     | 18577/40000 [4:12:27<4:52:04,  1.22it/s, training_loss=0.082][A
Epoch 3:  46%|████▋     | 18578/40000 [4:12:27<4:51:57,  1.22it/s, training_loss=0.082][A
Epoch 3:  46%|████▋     | 18578/40000 [4:12:28<4:51:57,  1.22it/s, training_loss=0.066][A
Epoch 3:  46%|████▋     | 18579/40000 [4:12:28<4:51:48,  1.22it/s, training_loss=0.066][A
Epoch 3:  46%|████▋     | 18579/40000 [4:12:29<4:51:48,  1.22it/s, training_loss=0.050][A

Epoch 3:  47%|████▋     | 18664/40000 [4:13:38<4:50:22,  1.22it/s, training_loss=0.093][A
Epoch 3:  47%|████▋     | 18665/40000 [4:13:38<4:50:34,  1.22it/s, training_loss=0.093][A
Epoch 3:  47%|████▋     | 18665/40000 [4:13:39<4:50:34,  1.22it/s, training_loss=0.148][A
Epoch 3:  47%|████▋     | 18666/40000 [4:13:39<4:50:31,  1.22it/s, training_loss=0.148][A
Epoch 3:  47%|████▋     | 18666/40000 [4:13:40<4:50:31,  1.22it/s, training_loss=0.108][A
Epoch 3:  47%|████▋     | 18667/40000 [4:13:40<4:50:21,  1.22it/s, training_loss=0.108][A
Epoch 3:  47%|████▋     | 18667/40000 [4:13:41<4:50:21,  1.22it/s, training_loss=0.118][A
Epoch 3:  47%|████▋     | 18668/40000 [4:13:41<4:50:17,  1.22it/s, training_loss=0.118][A
Epoch 3:  47%|████▋     | 18668/40000 [4:13:41<4:50:17,  1.22it/s, training_loss=0.121][A
Epoch 3:  47%|████▋     | 18669/40000 [4:13:41<4:50:45,  1.22it/s, training_loss=0.121][A
Epoch 3:  47%|████▋     | 18669/40000 [4:13:42<4:50:45,  1.22it/s, training_loss=0.051][A

In [15]:
print(device)

cuda:0


In [18]:
torch.cuda.memory_allocated(device=device)

0