In [2]:
!pip install transformers



In [3]:
!pip install sentencepiece

Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/14/67/e42bd1181472c95c8cda79305df848264f2a7f62740995a46945d9797b67/sentencepiece-0.1.95-cp36-cp36m-manylinux2014_x86_64.whl (1.2MB)
[K     |▎                               | 10kB 20.3MB/s eta 0:00:01[K     |▌                               | 20kB 16.6MB/s eta 0:00:01[K     |▉                               | 30kB 13.6MB/s eta 0:00:01[K     |█                               | 40kB 13.1MB/s eta 0:00:01[K     |█▍                              | 51kB 9.3MB/s eta 0:00:01[K     |█▋                              | 61kB 10.0MB/s eta 0:00:01[K     |██                              | 71kB 9.7MB/s eta 0:00:01[K     |██▏                             | 81kB 10.7MB/s eta 0:00:01[K     |██▌                             | 92kB 10.2MB/s eta 0:00:01[K     |██▊                             | 102kB 9.0MB/s eta 0:00:01[K     |███                             | 112kB 9.0MB/s eta 0:00:01[K     |███▎            

In [4]:
import logging
import os
import argparse
import random
from tqdm import tqdm, trange
import csv

import numpy as np
import sys
import matplotlib.pyplot as plt
import sentencepiece
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler


logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.INFO)
logger = logging.getLogger(__name__)

In [5]:
# Arguments
max_seq_length = 256
train_batch_size = 16
eval_batch_size = 8       
learning_rate = 5e-5      
num_train_epochs = 3            # Epochs - less epochs to be used for BERT
warmup_proportion = 0.1         # How to use?
seed = 42                       # Random seed
local_rank = -1
optimize_on_cpu = False          # Whether to perform optimization and keep the optimizer averages on CPU
fp16 = False                    # Whether to use 16-bit float precision instead of 32-bit
loss_scale = 128                # Loss scaling, positive power of 2 values can improve fp16 convergence
gradient_accumulation_steps = 4 
model_checkpoint = False

In [6]:
#FILES : 

task1_train = 'Task1_train.csv'
task1_dev = 'Task1_dev.csv'
task2_train = 'Task2_train.csv'
task2_dev = 'Task2_dev.csv'

**Necessory FUNCTIONS : **


---




In [7]:
def read_examples(input_file):

    with open(input_file, 'r', encoding='utf-8') as f:
        reader = csv.reader(f)
        lines = list(reader)

    examples = [
        {
            "article" : line[0],
            "question" : line[1],

            "options" : [line[2], line[3], line[4], line[5], line[6]],
         
            "label" : int(line[7]) 
         } for line in lines[1:]    # we skip the line with the column names
    ]
    return examples



class InputFeatures(object):
    def __init__(self, features , lm_labels, options, label):
        # We didn't stored tokens in features
        self.choices_features = [
            {
                'input_ids': input_ids,
                'input_mask': input_mask,
                'segment_ids': segment_ids
            }
            for _, input_ids, input_mask, segment_ids in features 
        ]

        self.lm_labels = lm_labels
        self.options = options
        self.label = label


def _truncate_seq_pair(tokens_a, tokens_b, max_length):
    """Truncates a sequence pair in place !
    Both article and question are truncated from end"""

    # This is a simple heuristic which will always truncate the longer sequence
    # one token at a time. This makes more sense than truncating an equal percent
    # of tokens from each, since if one sequence is very short then each token
    # that's truncated likely contains more information than a longer sequence.
    while True:
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
        if len(tokens_a) > len(tokens_b):
            tokens_a.pop()
        else:
            tokens_b.pop()


In [12]:
def convert_examples_to_features(examples, tokenizer, max_seq_length):
    """Loads a data file into a list of `InputBatch`s."""
    
    features = []
    num_tokens_article = []
    count = 0
    masking_token_id = tokenizer.mask_token_id

    for example_index, example in enumerate(examples):
        # Article Truncated from beginning and not end!
        choices_features = []

        article_tokens = tokenizer.tokenize(example['article'])
        ques_tokens = tokenizer.tokenize(example['question'].replace("@placeholder", tokenizer.mask_token))

        options = []
        for c in example['options']:
            options.append(tokenizer.convert_tokens_to_ids(tokenizer.tokenize(c))[0])
        
        # Truncate Article now        
        _truncate_seq_pair(article_tokens, ques_tokens, max_seq_length - 1)

        # tokens = ques_tokens + [tokenizer.sep_token] + article_tokens + [tokenizer.sep_token]
        # segment_ids = [0] * (len(ques_tokens) + 1) + [1] * (len(article_tokens) + 1)          # Segment Id's 0 -> Ques and 1 -> Article

        tokens = ques_tokens + [tokenizer.sep_token]
        segment_ids = [0] * (len(ques_tokens) + 1) 

        input_ids = tokenizer.convert_tokens_to_ids(tokens)                                   
        input_mask = [1] * len(input_ids)
        padding = [0] * (max_seq_length - len(input_ids))
        input_ids += [tokenizer.pad_token_id] * (len(padding))
        input_mask += padding
        segment_ids += padding

        # Heuristic : Loss calculated only for Masked Token!
        lm_labels = [-100 if t_id != masking_token_id else tokenizer.convert_tokens_to_ids(tokenizer.tokenize(example['options'][example['label']]))[0] for t_id in input_ids]

        assert len(input_ids) == max_seq_length
        assert len(input_mask) == max_seq_length
        assert len(segment_ids) == max_seq_length
        assert len(lm_labels) == max_seq_length
        choices_features.append((tokens, input_ids, input_mask, segment_ids))
        # *------------------------------------------------------------------------------------------------------------------*
        # Truncate article from start!
        # article_tokens = tokenizer.tokenize(example['article'])
        # ques_tokens = tokenizer.tokenize(example['question'].replace("@placeholder", "_"))
        # ques_tokens[masked_index] = '[MASK]'

        # choices_features2 = []
        # _truncate_seq_pair2(article_tokens, ques_tokens, max_seq_length - 2)

        # tokens = ques_tokens + ["[SEP]"] + article_tokens + ["[SEP]"]
        # segment_ids = [0] * (len(ques_tokens) + 1) + [1] * (len(article_tokens) + 1)
        # input_ids = tokenizer.convert_tokens_to_ids(tokens)
        # input_mask = [1] * len(input_ids)
        # padding = [0] * (max_seq_length - len(input_ids))
        # input_ids += padding
        # input_mask += padding
        # segment_ids += padding
        # lm_labels = [-1 if t_id != masking_token_id else tokenizer.convert_tokens_to_ids(tokenizer.tokenize(example['options'][example['label']]))[0] for t_id in input_ids]

        # assert len(input_ids) == max_seq_length
        # assert len(input_mask) == max_seq_length
        # assert len(segment_ids) == max_seq_length
        # assert len(lm_labels) == max_seq_length

        # choices_features2.append((tokens, input_ids, input_mask, segment_ids))

        features.append(
            InputFeatures(
                features = choices_features,
                lm_labels = lm_labels,
                options = options,
                label = example['label']
            )
        )
    
    return features



                                                                                                  
                

In [9]:
def copy_optimizer_params_to_model(named_params_model, named_params_optimizer):
    """ Utility function for optimize_on_cpu and 16-bits training.
        Copy the parameters optimized on CPU/RAM back to the model on GPU
    """
    for (name_opti, param_opti), (name_model, param_model) in zip(named_params_optimizer, named_params_model):
        if name_opti != name_model:
            logger.error("name_opti != name_model: {} {}".format(name_opti, name_model))
            raise ValueError
        param_model.data.copy_(param_opti.data)

        
def set_optimizer_params_grad(named_params_optimizer, named_params_model, test_nan=False):
    """ Utility function for optimize_on_cpu and 16-bits training.
        Copy the gradient of the GPU parameters to the CPU/RAMM copy of the model
    """
    is_nan = False
    for (name_opti, param_opti), (name_model, param_model) in zip(named_params_optimizer, named_params_model):
        if name_opti != name_model:
            logger.error("name_opti != name_model: {} {}".format(name_opti, name_model))
            raise ValueError
        if param_model.grad is not None:
            if test_nan and torch.isnan(param_model.grad).sum() > 0:
                is_nan = True
            if param_opti.grad is None:
                param_opti.grad = torch.nn.Parameter(param_opti.data.new().resize_(*param_opti.data.size()))
            param_opti.grad.data.copy_(param_model.grad.data)
        else:
            param_opti.grad = None
    return is_nan



# Define Accuracy check metrics and training utils

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

def select_field(features, field):
    return [
            feature.choices_features[0][field]
        for feature in features
    ]
   
    
def classifiction_metric(preds, labels, label_list):
    """ The Metric of classification, input should be numpy format """

    acc = metrics.accuracy_score(labels, preds)

    labels_list = [i for i in range(len(label_list))]

    report = metrics.classification_report(
        labels, preds, labels=labels_list, target_names=label_list, digits=5, output_dict=True)

    return acc, report

Main Code Begins Now : 

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()

train_batch_size = int(train_batch_size / gradient_accumulation_steps)
#Initialise seeds
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if n_gpu > 0:
    torch.cuda.manual_seed_all(seed)


Importing Tokenizer and Model From Hugging face library "Transformers"


Following Models have to be tried : 
Reference link for pretrained models : [Hugging Face Pretrained Models](https://huggingface.co/transformers/pretrained_models.html)

1.   XlNet  
2.   Albert  (Expected to work better)  -> v2 version is better than v1
3.   Roberta (Expected to work poorer)
4.   Electra (No idea about it,  Have to study about it!)


Instructions for saving model : 

1. Please download the saved model file into your computer (size might be big though :) )
2. Report the Results in the docs ->. [Doc Link](https://docs.google.com/document/d/1VPKYoi1VahtQgPCs4zMZVYOhy3xHkrq7OLp5fnClmvM/edit?usp=sharing)





For different models import these respectively : 

1.  XLNetTokenizer, XLNetLMHeadModel
2.  AlbertTokenizer,  AlbertForMaskedLM
3.  RobertaTokenizer, RobertaForMaskedLM
4.  ElectraTokenizer, ElectraForMaskedLM



There might be some errors. If so, Please Contact -> Abhishek Mittal, 180022 :)


In [47]:
from transformers import AlbertTokenizer, AlbertForMaskedLM, BertModel, AdamW        
import torch

tokenizer = AlbertTokenizer.from_pretrained('albert-xxlarge-v2')        # Write the tokenizer to be used
model = AlbertForMaskedLM.from_pretrained('albert-xxlarge-v2')    # Write the model to be used

# tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')        # Write the tokenizer to be used
# model = BertForMaskedLM.from_pretrained('bert-large-uncased')    # Write the model to be used

# bert_model = BertModel.from_pretrained('bert-large-uncased')

# tokenizer = ElectraTokenizer.from_pretrained('google/electra-base-discriminator')
# model = ElectraForMaskedLM.from_pretrained('google/electra-base-discriminator')


02/15/2021 10:26:02 - INFO - filelock -   Lock 140211453353096 acquired on /root/.cache/huggingface/transformers/57f3073e5921aab4297e8e1a59ec930b5cc22e13e6ecabad692e1bb4f28d71a2.d6110e25022b713452eb83d5bfa8ae64530995a93d8e694fe52e05aa85dd3a7d.lock


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=760289.0, style=ProgressStyle(descripti…

02/15/2021 10:26:03 - INFO - filelock -   Lock 140211453353096 released on /root/.cache/huggingface/transformers/57f3073e5921aab4297e8e1a59ec930b5cc22e13e6ecabad692e1bb4f28d71a2.d6110e25022b713452eb83d5bfa8ae64530995a93d8e694fe52e05aa85dd3a7d.lock





02/15/2021 10:26:03 - INFO - filelock -   Lock 140211453351976 acquired on /root/.cache/huggingface/transformers/d5df78f35f3fedf5dab8dafee426bca04736e16e0a70bd647bd60607a39bf577.f2355728e5c0b990d2832c3ab8751836d17900316f0e874141a85d322e48b3fd.lock


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=710.0, style=ProgressStyle(description_…

02/15/2021 10:26:03 - INFO - filelock -   Lock 140211453351976 released on /root/.cache/huggingface/transformers/d5df78f35f3fedf5dab8dafee426bca04736e16e0a70bd647bd60607a39bf577.f2355728e5c0b990d2832c3ab8751836d17900316f0e874141a85d322e48b3fd.lock





02/15/2021 10:26:03 - INFO - filelock -   Lock 140211428071688 acquired on /root/.cache/huggingface/transformers/fa287cec8f8154228201180899218fed4f77fa0fd085bfb8a11cd1e1bef85216.4296b379bf943b455a1427c0758ddc23a147936625e7527e40b9087c28d7ae32.lock


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=892728632.0, style=ProgressStyle(descri…

02/15/2021 10:26:26 - INFO - filelock -   Lock 140211428071688 released on /root/.cache/huggingface/transformers/fa287cec8f8154228201180899218fed4f77fa0fd085bfb8a11cd1e1bef85216.4296b379bf943b455a1427c0758ddc23a147936625e7527e40b9087c28d7ae32.lock





In [50]:
# Load Saved model (if any)
if model_checkpoint :
    values = torch.load(no_cls_task1)
    model.load_state_dict(values['model'])

# Optimisations
if fp16:
    model.half()
if n_gpu > 1:
    model = torch.nn.DataParallel(model)

# Load Model to device (cuda here)
model.to(device)


AlbertForMaskedLM(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=4096, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((4096,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertAttention(
                (query): Linear(in_features=4096, out_features=4096, bias=True)
                (key): Linear(in_features=4096, out_features=4096, bias=True)
                (value): Linear(in_features=4096, out_features=4096, bias=

In [14]:
if fp16:
    param_optimizer = [(n, param.clone().detach().to('cpu').float().requires_grad_()) \
                        for n, param in model.named_parameters()]
elif optimize_on_cpu:
    param_optimizer = [(n, param.clone().detach().to('cpu').requires_grad_()) \
                        for n, param in model.named_parameters()]
else:
    param_optimizer = list(model.named_parameters())

no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0}
    ]


 **Training Code** : 








In [28]:
train_examples = read_examples(task1_train)         # Training Examples
num_train_steps = int(len(train_examples) / train_batch_size / gradient_accumulation_steps * num_train_epochs)
t_total = num_train_steps

# Optimiser is Adam
optimizer = AdamW(optimizer_grouped_parameters,
                         lr = learning_rate)


In [None]:
print(tokenizer)

PreTrainedTokenizer(name_or_path='bert-large-uncased', vocab_size=30522, model_max_len=512, is_fast=False, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})


In [29]:
global_step = 0
train_features = convert_examples_to_features(
        train_examples, tokenizer, max_seq_length
        )

logger.info("***** Running training *****")
logger.info("  Num examples = %d", len(train_examples))
logger.info("  Batch size = %d", train_batch_size)
logger.info("  Num steps = %d", num_train_steps)

# all_options = torch.tensor([f.options for f in train_features], dtype=torch.long)
all_labels = torch.tensor([f.label for f in train_features], dtype=torch.long)
all_lm_labels = torch.tensor([f.lm_labels for f in train_features], dtype=torch.long)

all_input_ids_1 = torch.tensor(select_field(train_features, 'input_ids'), dtype=torch.long)
all_input_mask_1 = torch.tensor(select_field(train_features, 'input_mask'), dtype=torch.long)
all_segment_ids_1 = torch.tensor(select_field(train_features, 'segment_ids'), dtype=torch.long)

train_data = TensorDataset(all_input_ids_1, all_input_mask_1, all_segment_ids_1, all_labels, all_lm_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=train_batch_size)

model.train()

02/15/2021 09:32:51 - INFO - __main__ -   ***** Running training *****
02/15/2021 09:32:51 - INFO - __main__ -     Num examples = 3227
02/15/2021 09:32:51 - INFO - __main__ -     Batch size = 4
02/15/2021 09:32:51 - INFO - __main__ -     Num steps = 605


BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): LayerNorm((1024,), eps=1e-12, elementw

In [None]:
!nvidia-smi

Sun Jan 17 10:43:19 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.27.04    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    32W / 250W |   2199MiB / 16280MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

TRAIN Loop Snippet : 

In [30]:
correct_ans = 0
wrong_list = []
total_ans = 0

for _ in trange(int(num_train_epochs), desc="Epoch"):
    tr_loss = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    
    for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
        batch = tuple(t.to(device) for t in batch)
        input_ids, input_mask, segment_ids, label, lm_label_ids = batch

        outputs = model(input_ids, attention_mask=input_mask, labels=lm_label_ids, token_type_ids = segment_ids)   #TOkens with labels set to -100 are ignored             
        loss = outputs.loss

        if n_gpu > 1:
            loss = loss.mean() # mean() to average on multi-gpu.
        if fp16 and loss_scale != 1.0:
            # rescale loss for fp16 training
            # see https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html
            loss = loss * loss_scale
        if gradient_accumulation_steps > 1:
            loss = loss / gradient_accumulation_steps

        loss.backward()

        tr_loss += loss.item()
        nb_tr_steps += 1
        if (step + 1) % gradient_accumulation_steps == 0:
            if fp16 or optimize_on_cpu:
                if fp16 and loss_scale != 1.0:
                    # scale down gradients for fp16 training
                    for param in model.parameters():
                        if param.grad is not None:
                            param.grad.data = param.grad.data / loss_scale
                is_nan = set_optimizer_params_grad(param_optimizer, model.named_parameters(), test_nan=True)
                if is_nan:
                    logger.info("FP16 TRAINING: Nan in gradients, reducing loss scaling")
                    loss_scale = loss_scale / 2
                    model.zero_grad()
                    continue
                optimizer.step()
                copy_optimizer_params_to_model(model.named_parameters(), param_optimizer)
            else:
                optimizer.step()

            train_loss = tr_loss / nb_tr_steps
            print("\tTraining loss : ", train_loss)

            model.zero_grad()
            global_step += 1
            
          
torch.save({
    'model': model.state_dict()
}, 'saved_file_3.txt')
   


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
Iteration:   0%|          | 0/807 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/807 [00:00<08:07,  1.65it/s][A
Iteration:   0%|          | 2/807 [00:01<07:59,  1.68it/s][A
Iteration:   0%|          | 3/807 [00:01<07:56,  1.69it/s][A
Iteration:   0%|          | 4/807 [00:02<08:23,  1.60it/s][A

	Training loss :  1.2155051082372665



Iteration:   1%|          | 5/807 [00:03<08:09,  1.64it/s][A
Iteration:   1%|          | 6/807 [00:03<08:03,  1.66it/s][A
Iteration:   1%|          | 7/807 [00:04<07:55,  1.68it/s][A
Iteration:   1%|          | 8/807 [00:04<08:21,  1.59it/s][A

	Training loss :  1.2125826105475426



Iteration:   1%|          | 9/807 [00:05<08:07,  1.64it/s][A
Iteration:   1%|          | 10/807 [00:06<08:01,  1.65it/s][A
Iteration:   1%|▏         | 11/807 [00:06<07:55,  1.68it/s][A
Iteration:   1%|▏         | 12/807 [00:07<08:20,  1.59it/s][A

	Training loss :  1.1888723820447922



Iteration:   2%|▏         | 13/807 [00:07<08:07,  1.63it/s][A
Iteration:   2%|▏         | 14/807 [00:08<08:01,  1.65it/s][A
Iteration:   2%|▏         | 15/807 [00:09<07:54,  1.67it/s][A
Iteration:   2%|▏         | 16/807 [00:09<08:19,  1.58it/s][A

	Training loss :  1.2709757946431637



Iteration:   2%|▏         | 17/807 [00:10<08:06,  1.62it/s][A
Iteration:   2%|▏         | 18/807 [00:10<07:59,  1.65it/s][A
Iteration:   2%|▏         | 19/807 [00:11<07:53,  1.66it/s][A
Iteration:   2%|▏         | 20/807 [00:12<08:18,  1.58it/s][A

	Training loss :  1.2403174459934234



Iteration:   3%|▎         | 21/807 [00:12<08:06,  1.62it/s][A
Iteration:   3%|▎         | 22/807 [00:13<08:00,  1.63it/s][A
Iteration:   3%|▎         | 23/807 [00:14<07:53,  1.66it/s][A
Iteration:   3%|▎         | 24/807 [00:14<08:19,  1.57it/s][A

	Training loss :  1.2348821262518566



Iteration:   3%|▎         | 25/807 [00:15<08:05,  1.61it/s][A
Iteration:   3%|▎         | 26/807 [00:15<07:59,  1.63it/s][A
Iteration:   3%|▎         | 27/807 [00:16<07:52,  1.65it/s][A
Iteration:   3%|▎         | 28/807 [00:17<08:17,  1.56it/s][A

	Training loss :  1.2009027600288391



Iteration:   4%|▎         | 29/807 [00:17<08:03,  1.61it/s][A
Iteration:   4%|▎         | 30/807 [00:18<07:59,  1.62it/s][A
Iteration:   4%|▍         | 31/807 [00:19<07:52,  1.64it/s][A
Iteration:   4%|▍         | 32/807 [00:19<08:16,  1.56it/s][A

	Training loss :  1.1947109065949917



Iteration:   4%|▍         | 33/807 [00:20<08:04,  1.60it/s][A
Iteration:   4%|▍         | 34/807 [00:20<07:58,  1.61it/s][A
Iteration:   4%|▍         | 35/807 [00:21<07:50,  1.64it/s][A
Iteration:   4%|▍         | 36/807 [00:22<08:16,  1.55it/s][A

	Training loss :  1.2086400936047237



Iteration:   5%|▍         | 37/807 [00:22<08:02,  1.59it/s][A
Iteration:   5%|▍         | 38/807 [00:23<07:58,  1.61it/s][A
Iteration:   5%|▍         | 39/807 [00:24<07:50,  1.63it/s][A
Iteration:   5%|▍         | 40/807 [00:24<08:15,  1.55it/s][A

	Training loss :  1.202427753061056



Iteration:   5%|▌         | 41/807 [00:25<08:01,  1.59it/s][A
Iteration:   5%|▌         | 42/807 [00:25<07:54,  1.61it/s][A
Iteration:   5%|▌         | 43/807 [00:26<07:45,  1.64it/s][A
Iteration:   5%|▌         | 44/807 [00:27<08:09,  1.56it/s][A

	Training loss :  1.2112522714517333



Iteration:   6%|▌         | 45/807 [00:27<07:56,  1.60it/s][A
Iteration:   6%|▌         | 46/807 [00:28<07:50,  1.62it/s][A
Iteration:   6%|▌         | 47/807 [00:29<07:42,  1.64it/s][A
Iteration:   6%|▌         | 48/807 [00:29<08:07,  1.56it/s][A

	Training loss :  1.2163500276704629



Iteration:   6%|▌         | 49/807 [00:30<07:55,  1.60it/s][A
Iteration:   6%|▌         | 50/807 [00:30<07:49,  1.61it/s][A
Iteration:   6%|▋         | 51/807 [00:31<07:43,  1.63it/s][A
Iteration:   6%|▋         | 52/807 [00:32<08:07,  1.55it/s][A

	Training loss :  1.226617225087606



Iteration:   7%|▋         | 53/807 [00:32<07:53,  1.59it/s][A
Iteration:   7%|▋         | 54/807 [00:33<07:48,  1.61it/s][A
Iteration:   7%|▋         | 55/807 [00:34<07:39,  1.64it/s][A
Iteration:   7%|▋         | 56/807 [00:34<08:04,  1.55it/s][A

	Training loss :  1.2171208315661974



Iteration:   7%|▋         | 57/807 [00:35<07:55,  1.58it/s][A
Iteration:   7%|▋         | 58/807 [00:35<07:49,  1.60it/s][A
Iteration:   7%|▋         | 59/807 [00:36<07:41,  1.62it/s][A
Iteration:   7%|▋         | 60/807 [00:37<08:04,  1.54it/s][A

	Training loss :  1.2115328361590703



Iteration:   8%|▊         | 61/807 [00:37<07:53,  1.58it/s][A
Iteration:   8%|▊         | 62/807 [00:38<07:47,  1.59it/s][A
Iteration:   8%|▊         | 63/807 [00:39<07:39,  1.62it/s][A
Iteration:   8%|▊         | 64/807 [00:39<08:03,  1.54it/s][A

	Training loss :  1.2025420106947422



Iteration:   8%|▊         | 65/807 [00:40<07:51,  1.57it/s][A
Iteration:   8%|▊         | 66/807 [00:41<07:45,  1.59it/s][A
Iteration:   8%|▊         | 67/807 [00:41<07:36,  1.62it/s][A
Iteration:   8%|▊         | 68/807 [00:42<08:00,  1.54it/s][A

	Training loss :  1.1985693100620718



Iteration:   9%|▊         | 69/807 [00:42<07:49,  1.57it/s][A
Iteration:   9%|▊         | 70/807 [00:43<07:42,  1.59it/s][A
Iteration:   9%|▉         | 71/807 [00:44<07:35,  1.62it/s][A
Iteration:   9%|▉         | 72/807 [00:44<07:58,  1.54it/s][A

	Training loss :  1.2082208792368572



Iteration:   9%|▉         | 73/807 [00:45<07:47,  1.57it/s][A
Iteration:   9%|▉         | 74/807 [00:46<07:41,  1.59it/s][A
Iteration:   9%|▉         | 75/807 [00:46<07:34,  1.61it/s][A
Iteration:   9%|▉         | 76/807 [00:47<07:57,  1.53it/s][A

	Training loss :  1.2182007416298515



Iteration:  10%|▉         | 77/807 [00:48<07:46,  1.56it/s][A
Iteration:  10%|▉         | 78/807 [00:48<07:40,  1.58it/s][A
Iteration:  10%|▉         | 79/807 [00:49<07:33,  1.60it/s][A
Iteration:  10%|▉         | 80/807 [00:49<07:57,  1.52it/s][A

	Training loss :  1.220736588537693



Iteration:  10%|█         | 81/807 [00:50<07:44,  1.56it/s][A
Iteration:  10%|█         | 82/807 [00:51<07:40,  1.58it/s][A
Iteration:  10%|█         | 83/807 [00:51<07:33,  1.60it/s][A
Iteration:  10%|█         | 84/807 [00:52<07:54,  1.52it/s][A

	Training loss :  1.2282079756259918



Iteration:  11%|█         | 85/807 [00:53<07:44,  1.55it/s][A
Iteration:  11%|█         | 86/807 [00:53<07:37,  1.58it/s][A
Iteration:  11%|█         | 87/807 [00:54<07:31,  1.60it/s][A
Iteration:  11%|█         | 88/807 [00:55<07:54,  1.52it/s][A

	Training loss :  1.2310641035437584



Iteration:  11%|█         | 89/807 [00:55<07:43,  1.55it/s][A
Iteration:  11%|█         | 90/807 [00:56<07:38,  1.56it/s][A
Iteration:  11%|█▏        | 91/807 [00:56<07:30,  1.59it/s][A
Iteration:  11%|█▏        | 92/807 [00:57<07:52,  1.51it/s][A

	Training loss :  1.2419702741114989



Iteration:  12%|█▏        | 93/807 [00:58<07:41,  1.55it/s][A
Iteration:  12%|█▏        | 94/807 [00:58<07:35,  1.57it/s][A
Iteration:  12%|█▏        | 95/807 [00:59<07:29,  1.58it/s][A
Iteration:  12%|█▏        | 96/807 [01:00<07:50,  1.51it/s][A

	Training loss :  1.2377210191140573



Iteration:  12%|█▏        | 97/807 [01:00<07:38,  1.55it/s][A
Iteration:  12%|█▏        | 98/807 [01:01<07:32,  1.57it/s][A
Iteration:  12%|█▏        | 99/807 [01:02<07:27,  1.58it/s][A
Iteration:  12%|█▏        | 100/807 [01:02<07:48,  1.51it/s][A

	Training loss :  1.2394172763824463



Iteration:  13%|█▎        | 101/807 [01:03<07:39,  1.54it/s][A
Iteration:  13%|█▎        | 102/807 [01:04<07:33,  1.55it/s][A
Iteration:  13%|█▎        | 103/807 [01:04<07:27,  1.57it/s][A
Iteration:  13%|█▎        | 104/807 [01:05<07:48,  1.50it/s][A

	Training loss :  1.2472910651793847



Iteration:  13%|█▎        | 105/807 [01:06<07:36,  1.54it/s][A
Iteration:  13%|█▎        | 106/807 [01:06<07:32,  1.55it/s][A
Iteration:  13%|█▎        | 107/807 [01:07<07:26,  1.57it/s][A
Iteration:  13%|█▎        | 108/807 [01:08<07:45,  1.50it/s][A

	Training loss :  1.2351303503469184



Iteration:  14%|█▎        | 109/807 [01:08<07:36,  1.53it/s][A
Iteration:  14%|█▎        | 110/807 [01:09<07:29,  1.55it/s][A
Iteration:  14%|█▍        | 111/807 [01:09<07:23,  1.57it/s][A
Iteration:  14%|█▍        | 112/807 [01:10<07:44,  1.50it/s][A

	Training loss :  1.229411795469267



Iteration:  14%|█▍        | 113/807 [01:11<07:33,  1.53it/s][A
Iteration:  14%|█▍        | 114/807 [01:11<07:28,  1.54it/s][A
Iteration:  14%|█▍        | 115/807 [01:12<07:22,  1.56it/s][A
Iteration:  14%|█▍        | 116/807 [01:13<07:42,  1.49it/s][A

	Training loss :  1.216185236028556



Iteration:  14%|█▍        | 117/807 [01:13<07:32,  1.53it/s][A
Iteration:  15%|█▍        | 118/807 [01:14<07:26,  1.54it/s][A
Iteration:  15%|█▍        | 119/807 [01:15<07:21,  1.56it/s][A
Iteration:  15%|█▍        | 120/807 [01:15<07:40,  1.49it/s][A

	Training loss :  1.2294888533651829



Iteration:  15%|█▍        | 121/807 [01:16<07:30,  1.52it/s][A
Iteration:  15%|█▌        | 122/807 [01:17<07:24,  1.54it/s][A
Iteration:  15%|█▌        | 123/807 [01:17<07:19,  1.56it/s][A
Iteration:  15%|█▌        | 124/807 [01:18<07:38,  1.49it/s][A

	Training loss :  1.2259388986614443



Iteration:  15%|█▌        | 125/807 [01:19<07:29,  1.52it/s][A
Iteration:  16%|█▌        | 126/807 [01:19<07:23,  1.54it/s][A
Iteration:  16%|█▌        | 127/807 [01:20<07:16,  1.56it/s][A
Iteration:  16%|█▌        | 128/807 [01:21<07:38,  1.48it/s][A

	Training loss :  1.232675202889368



Iteration:  16%|█▌        | 129/807 [01:21<07:27,  1.51it/s][A
Iteration:  16%|█▌        | 130/807 [01:22<07:22,  1.53it/s][A
Iteration:  16%|█▌        | 131/807 [01:23<07:17,  1.55it/s][A
Iteration:  16%|█▋        | 132/807 [01:23<07:37,  1.47it/s][A

	Training loss :  1.234329283011682



Iteration:  16%|█▋        | 133/807 [01:24<07:27,  1.51it/s][A
Iteration:  17%|█▋        | 134/807 [01:25<07:20,  1.53it/s][A
Iteration:  17%|█▋        | 135/807 [01:25<07:15,  1.54it/s][A
Iteration:  17%|█▋        | 136/807 [01:26<07:34,  1.48it/s][A

	Training loss :  1.228623680989532



Iteration:  17%|█▋        | 137/807 [01:27<07:24,  1.51it/s][A
Iteration:  17%|█▋        | 138/807 [01:27<07:19,  1.52it/s][A
Iteration:  17%|█▋        | 139/807 [01:28<07:13,  1.54it/s][A
Iteration:  17%|█▋        | 140/807 [01:29<07:32,  1.47it/s][A

	Training loss :  1.2285093816263335



Iteration:  17%|█▋        | 141/807 [01:29<07:22,  1.51it/s][A
Iteration:  18%|█▊        | 142/807 [01:30<07:17,  1.52it/s][A
Iteration:  18%|█▊        | 143/807 [01:31<07:11,  1.54it/s][A
Iteration:  18%|█▊        | 144/807 [01:31<07:31,  1.47it/s][A

	Training loss :  1.228830077374975



Iteration:  18%|█▊        | 145/807 [01:32<07:21,  1.50it/s][A
Iteration:  18%|█▊        | 146/807 [01:33<07:15,  1.52it/s][A
Iteration:  18%|█▊        | 147/807 [01:33<07:10,  1.53it/s][A
Iteration:  18%|█▊        | 148/807 [01:34<07:30,  1.46it/s][A

	Training loss :  1.2273981166449752



Iteration:  18%|█▊        | 149/807 [01:35<07:19,  1.50it/s][A
Iteration:  19%|█▊        | 150/807 [01:35<07:14,  1.51it/s][A
Iteration:  19%|█▊        | 151/807 [01:36<07:08,  1.53it/s][A
Iteration:  19%|█▉        | 152/807 [01:37<07:28,  1.46it/s][A

	Training loss :  1.222596027153103



Iteration:  19%|█▉        | 153/807 [01:37<07:17,  1.50it/s][A
Iteration:  19%|█▉        | 154/807 [01:38<07:10,  1.52it/s][A
Iteration:  19%|█▉        | 155/807 [01:38<07:03,  1.54it/s][A
Iteration:  19%|█▉        | 156/807 [01:39<07:22,  1.47it/s][A

	Training loss :  1.2267922329214902



Iteration:  19%|█▉        | 157/807 [01:40<07:12,  1.50it/s][A
Iteration:  20%|█▉        | 158/807 [01:41<07:06,  1.52it/s][A
Iteration:  20%|█▉        | 159/807 [01:41<07:00,  1.54it/s][A
Iteration:  20%|█▉        | 160/807 [01:42<07:18,  1.48it/s][A

	Training loss :  1.234203147701919



Iteration:  20%|█▉        | 161/807 [01:43<07:09,  1.50it/s][A
Iteration:  20%|██        | 162/807 [01:43<07:03,  1.52it/s][A
Iteration:  20%|██        | 163/807 [01:44<06:58,  1.54it/s][A
Iteration:  20%|██        | 164/807 [01:45<07:16,  1.47it/s][A

	Training loss :  1.242255404773282



Iteration:  20%|██        | 165/807 [01:45<07:07,  1.50it/s][A
Iteration:  21%|██        | 166/807 [01:46<07:01,  1.52it/s][A
Iteration:  21%|██        | 167/807 [01:46<06:57,  1.53it/s][A
Iteration:  21%|██        | 168/807 [01:47<07:15,  1.47it/s][A

	Training loss :  1.247684990188905



Iteration:  21%|██        | 169/807 [01:48<07:04,  1.50it/s][A
Iteration:  21%|██        | 170/807 [01:48<06:59,  1.52it/s][A
Iteration:  21%|██        | 171/807 [01:49<06:53,  1.54it/s][A
Iteration:  21%|██▏       | 172/807 [01:50<07:11,  1.47it/s][A

	Training loss :  1.2485425976473232



Iteration:  21%|██▏       | 173/807 [01:50<07:03,  1.50it/s][A
Iteration:  22%|██▏       | 174/807 [01:51<06:57,  1.51it/s][A
Iteration:  22%|██▏       | 175/807 [01:52<06:53,  1.53it/s][A
Iteration:  22%|██▏       | 176/807 [01:53<07:12,  1.46it/s][A

	Training loss :  1.2457677909935063



Iteration:  22%|██▏       | 177/807 [01:53<07:01,  1.49it/s][A
Iteration:  22%|██▏       | 178/807 [01:54<06:56,  1.51it/s][A
Iteration:  22%|██▏       | 179/807 [01:54<06:51,  1.53it/s][A
Iteration:  22%|██▏       | 180/807 [01:55<07:08,  1.46it/s][A

	Training loss :  1.246821788781219



Iteration:  22%|██▏       | 181/807 [01:56<06:59,  1.49it/s][A
Iteration:  23%|██▎       | 182/807 [01:56<06:54,  1.51it/s][A
Iteration:  23%|██▎       | 183/807 [01:57<06:49,  1.52it/s][A
Iteration:  23%|██▎       | 184/807 [01:58<07:06,  1.46it/s][A

	Training loss :  1.2476519389968852



Iteration:  23%|██▎       | 185/807 [01:59<06:57,  1.49it/s][A
Iteration:  23%|██▎       | 186/807 [01:59<06:51,  1.51it/s][A
Iteration:  23%|██▎       | 187/807 [02:00<06:47,  1.52it/s][A
Iteration:  23%|██▎       | 188/807 [02:01<07:04,  1.46it/s][A

	Training loss :  1.2460570524030543



Iteration:  23%|██▎       | 189/807 [02:01<06:55,  1.49it/s][A
Iteration:  24%|██▎       | 190/807 [02:02<06:49,  1.51it/s][A
Iteration:  24%|██▎       | 191/807 [02:02<06:44,  1.52it/s][A
Iteration:  24%|██▍       | 192/807 [02:03<07:01,  1.46it/s][A

	Training loss :  1.2502404797511797



Iteration:  24%|██▍       | 193/807 [02:04<06:52,  1.49it/s][A
Iteration:  24%|██▍       | 194/807 [02:05<06:46,  1.51it/s][A
Iteration:  24%|██▍       | 195/807 [02:05<06:42,  1.52it/s][A
Iteration:  24%|██▍       | 196/807 [02:06<06:59,  1.45it/s][A

	Training loss :  1.2465069435384808



Iteration:  24%|██▍       | 197/807 [02:07<06:51,  1.48it/s][A
Iteration:  25%|██▍       | 198/807 [02:07<06:45,  1.50it/s][A
Iteration:  25%|██▍       | 199/807 [02:08<06:41,  1.52it/s][A
Iteration:  25%|██▍       | 200/807 [02:09<06:58,  1.45it/s][A

	Training loss :  1.2502489419281482



Iteration:  25%|██▍       | 201/807 [02:09<06:49,  1.48it/s][A
Iteration:  25%|██▌       | 202/807 [02:10<06:44,  1.50it/s][A
Iteration:  25%|██▌       | 203/807 [02:11<06:39,  1.51it/s][A
Iteration:  25%|██▌       | 204/807 [02:11<06:56,  1.45it/s][A

	Training loss :  1.2523224670805184



Iteration:  25%|██▌       | 205/807 [02:12<06:47,  1.48it/s][A
Iteration:  26%|██▌       | 206/807 [02:13<06:42,  1.49it/s][A
Iteration:  26%|██▌       | 207/807 [02:13<06:37,  1.51it/s][A
Iteration:  26%|██▌       | 208/807 [02:14<06:54,  1.45it/s][A

	Training loss :  1.2548125318896313



Iteration:  26%|██▌       | 209/807 [02:15<06:45,  1.47it/s][A
Iteration:  26%|██▌       | 210/807 [02:15<06:39,  1.49it/s][A
Iteration:  26%|██▌       | 211/807 [02:16<06:35,  1.51it/s][A
Iteration:  26%|██▋       | 212/807 [02:17<06:52,  1.44it/s][A

	Training loss :  1.2486887645890128



Iteration:  26%|██▋       | 213/807 [02:17<06:44,  1.47it/s][A
Iteration:  27%|██▋       | 214/807 [02:18<06:38,  1.49it/s][A
Iteration:  27%|██▋       | 215/807 [02:19<06:33,  1.50it/s][A
Iteration:  27%|██▋       | 216/807 [02:19<06:50,  1.44it/s][A

	Training loss :  1.255165211442444



Iteration:  27%|██▋       | 217/807 [02:20<06:41,  1.47it/s][A
Iteration:  27%|██▋       | 218/807 [02:21<06:35,  1.49it/s][A
Iteration:  27%|██▋       | 219/807 [02:21<06:31,  1.50it/s][A
Iteration:  27%|██▋       | 220/807 [02:22<06:48,  1.44it/s][A

	Training loss :  1.2553552406755362



Iteration:  27%|██▋       | 221/807 [02:23<06:40,  1.46it/s][A
Iteration:  28%|██▊       | 222/807 [02:23<06:34,  1.48it/s][A
Iteration:  28%|██▊       | 223/807 [02:24<06:28,  1.50it/s][A
Iteration:  28%|██▊       | 224/807 [02:25<06:45,  1.44it/s][A

	Training loss :  1.2471176938020758



Iteration:  28%|██▊       | 225/807 [02:26<06:37,  1.46it/s][A
Iteration:  28%|██▊       | 226/807 [02:26<06:30,  1.49it/s][A
Iteration:  28%|██▊       | 227/807 [02:27<06:26,  1.50it/s][A
Iteration:  28%|██▊       | 228/807 [02:28<06:42,  1.44it/s][A

	Training loss :  1.2501469289785938



Iteration:  28%|██▊       | 229/807 [02:28<06:33,  1.47it/s][A
Iteration:  29%|██▊       | 230/807 [02:29<06:27,  1.49it/s][A
Iteration:  29%|██▊       | 231/807 [02:30<06:22,  1.51it/s][A
Iteration:  29%|██▊       | 232/807 [02:30<06:38,  1.44it/s][A

	Training loss :  1.248784115987605



Iteration:  29%|██▉       | 233/807 [02:31<06:31,  1.47it/s][A
Iteration:  29%|██▉       | 234/807 [02:32<06:25,  1.49it/s][A
Iteration:  29%|██▉       | 235/807 [02:32<06:20,  1.50it/s][A
Iteration:  29%|██▉       | 236/807 [02:33<06:36,  1.44it/s][A

	Training loss :  1.250043026597823



Iteration:  29%|██▉       | 237/807 [02:34<06:28,  1.47it/s][A
Iteration:  29%|██▉       | 238/807 [02:34<06:23,  1.48it/s][A
Iteration:  30%|██▉       | 239/807 [02:35<06:19,  1.50it/s][A
Iteration:  30%|██▉       | 240/807 [02:36<06:35,  1.43it/s][A

	Training loss :  1.2522980661441883



Iteration:  30%|██▉       | 241/807 [02:36<06:28,  1.46it/s][A
Iteration:  30%|██▉       | 242/807 [02:37<06:21,  1.48it/s][A
Iteration:  30%|███       | 243/807 [02:38<06:17,  1.50it/s][A
Iteration:  30%|███       | 244/807 [02:38<06:33,  1.43it/s][A

	Training loss :  1.2627543920620543



Iteration:  30%|███       | 245/807 [02:39<06:23,  1.47it/s][A
Iteration:  30%|███       | 246/807 [02:40<06:18,  1.48it/s][A
Iteration:  31%|███       | 247/807 [02:40<06:13,  1.50it/s][A
Iteration:  31%|███       | 248/807 [02:41<06:29,  1.44it/s][A

	Training loss :  1.257175593006034



Iteration:  31%|███       | 249/807 [02:42<06:21,  1.46it/s][A
Iteration:  31%|███       | 250/807 [02:42<06:15,  1.48it/s][A
Iteration:  31%|███       | 251/807 [02:43<06:11,  1.50it/s][A
Iteration:  31%|███       | 252/807 [02:44<06:26,  1.44it/s][A

	Training loss :  1.2575453807200705



Iteration:  31%|███▏      | 253/807 [02:45<06:17,  1.47it/s][A
Iteration:  31%|███▏      | 254/807 [02:45<06:12,  1.48it/s][A
Iteration:  32%|███▏      | 255/807 [02:46<06:07,  1.50it/s][A
Iteration:  32%|███▏      | 256/807 [02:47<06:23,  1.44it/s][A

	Training loss :  1.2595393775263801



Iteration:  32%|███▏      | 257/807 [02:47<06:14,  1.47it/s][A
Iteration:  32%|███▏      | 258/807 [02:48<06:09,  1.49it/s][A
Iteration:  32%|███▏      | 259/807 [02:49<06:05,  1.50it/s][A
Iteration:  32%|███▏      | 260/807 [02:49<06:20,  1.44it/s][A

	Training loss :  1.2611575100284356



Iteration:  32%|███▏      | 261/807 [02:50<06:11,  1.47it/s][A
Iteration:  32%|███▏      | 262/807 [02:51<06:06,  1.49it/s][A
Iteration:  33%|███▎      | 263/807 [02:51<06:01,  1.50it/s][A
Iteration:  33%|███▎      | 264/807 [02:52<06:16,  1.44it/s][A

	Training loss :  1.2665173967905117



Iteration:  33%|███▎      | 265/807 [02:53<06:08,  1.47it/s][A
Iteration:  33%|███▎      | 266/807 [02:53<06:03,  1.49it/s][A
Iteration:  33%|███▎      | 267/807 [02:54<05:58,  1.51it/s][A
Iteration:  33%|███▎      | 268/807 [02:55<06:14,  1.44it/s][A

	Training loss :  1.2682404852847555



Iteration:  33%|███▎      | 269/807 [02:55<06:05,  1.47it/s][A
Iteration:  33%|███▎      | 270/807 [02:56<06:00,  1.49it/s][A
Iteration:  34%|███▎      | 271/807 [02:57<05:55,  1.51it/s][A
Iteration:  34%|███▎      | 272/807 [02:57<06:11,  1.44it/s][A

	Training loss :  1.271383798407281



Iteration:  34%|███▍      | 273/807 [02:58<06:02,  1.47it/s][A
Iteration:  34%|███▍      | 274/807 [02:59<05:57,  1.49it/s][A
Iteration:  34%|███▍      | 275/807 [02:59<05:53,  1.50it/s][A
Iteration:  34%|███▍      | 276/807 [03:00<06:09,  1.44it/s][A

	Training loss :  1.2708002325633299



Iteration:  34%|███▍      | 277/807 [03:01<06:01,  1.47it/s][A
Iteration:  34%|███▍      | 278/807 [03:01<05:56,  1.49it/s][A
Iteration:  35%|███▍      | 279/807 [03:02<05:52,  1.50it/s][A
Iteration:  35%|███▍      | 280/807 [03:03<06:06,  1.44it/s][A

	Training loss :  1.2731627218425274



Iteration:  35%|███▍      | 281/807 [03:04<05:57,  1.47it/s][A
Iteration:  35%|███▍      | 282/807 [03:04<05:52,  1.49it/s][A
Iteration:  35%|███▌      | 283/807 [03:05<05:48,  1.50it/s][A
Iteration:  35%|███▌      | 284/807 [03:06<06:02,  1.44it/s][A

	Training loss :  1.2780622035055094



Iteration:  35%|███▌      | 285/807 [03:06<05:53,  1.48it/s][A
Iteration:  35%|███▌      | 286/807 [03:07<05:49,  1.49it/s][A
Iteration:  36%|███▌      | 287/807 [03:08<05:45,  1.51it/s][A
Iteration:  36%|███▌      | 288/807 [03:08<05:59,  1.44it/s][A

	Training loss :  1.2788292365148664



Iteration:  36%|███▌      | 289/807 [03:09<05:52,  1.47it/s][A
Iteration:  36%|███▌      | 290/807 [03:10<05:46,  1.49it/s][A
Iteration:  36%|███▌      | 291/807 [03:10<05:42,  1.51it/s][A
Iteration:  36%|███▌      | 292/807 [03:11<05:56,  1.44it/s][A

	Training loss :  1.2773253579866397



Iteration:  36%|███▋      | 293/807 [03:12<05:48,  1.47it/s][A
Iteration:  36%|███▋      | 294/807 [03:12<05:43,  1.49it/s][A
Iteration:  37%|███▋      | 295/807 [03:13<05:39,  1.51it/s][A
Iteration:  37%|███▋      | 296/807 [03:14<05:54,  1.44it/s][A

	Training loss :  1.2751381188832425



Iteration:  37%|███▋      | 297/807 [03:14<05:45,  1.47it/s][A
Iteration:  37%|███▋      | 298/807 [03:15<05:41,  1.49it/s][A
Iteration:  37%|███▋      | 299/807 [03:16<05:36,  1.51it/s][A
Iteration:  37%|███▋      | 300/807 [03:16<05:50,  1.45it/s][A

	Training loss :  1.277715658644835



Iteration:  37%|███▋      | 301/807 [03:17<05:43,  1.48it/s][A
Iteration:  37%|███▋      | 302/807 [03:18<05:38,  1.49it/s][A
Iteration:  38%|███▊      | 303/807 [03:18<05:34,  1.51it/s][A
Iteration:  38%|███▊      | 304/807 [03:19<05:48,  1.44it/s][A

	Training loss :  1.2767518914647793



Iteration:  38%|███▊      | 305/807 [03:20<05:40,  1.48it/s][A
Iteration:  38%|███▊      | 306/807 [03:20<05:35,  1.49it/s][A
Iteration:  38%|███▊      | 307/807 [03:21<05:31,  1.51it/s][A
Iteration:  38%|███▊      | 308/807 [03:22<05:45,  1.44it/s][A

	Training loss :  1.277021935330583



Iteration:  38%|███▊      | 309/807 [03:22<05:37,  1.48it/s][A
Iteration:  38%|███▊      | 310/807 [03:23<05:33,  1.49it/s][A
Iteration:  39%|███▊      | 311/807 [03:24<05:28,  1.51it/s][A
Iteration:  39%|███▊      | 312/807 [03:25<05:42,  1.45it/s][A

	Training loss :  1.2801503253479798



Iteration:  39%|███▉      | 313/807 [03:25<05:34,  1.48it/s][A
Iteration:  39%|███▉      | 314/807 [03:26<05:30,  1.49it/s][A
Iteration:  39%|███▉      | 315/807 [03:26<05:26,  1.51it/s][A
Iteration:  39%|███▉      | 316/807 [03:27<05:39,  1.44it/s][A

	Training loss :  1.284926526814322



Iteration:  39%|███▉      | 317/807 [03:28<05:32,  1.47it/s][A
Iteration:  39%|███▉      | 318/807 [03:29<05:28,  1.49it/s][A
Iteration:  40%|███▉      | 319/807 [03:29<05:23,  1.51it/s][A
Iteration:  40%|███▉      | 320/807 [03:30<05:37,  1.44it/s][A

	Training loss :  1.2854827190749347



Iteration:  40%|███▉      | 321/807 [03:31<05:29,  1.47it/s][A
Iteration:  40%|███▉      | 322/807 [03:31<05:24,  1.49it/s][A
Iteration:  40%|████      | 323/807 [03:32<05:21,  1.51it/s][A
Iteration:  40%|████      | 324/807 [03:33<05:34,  1.44it/s][A

	Training loss :  1.2889550691585483



Iteration:  40%|████      | 325/807 [03:33<05:26,  1.47it/s][A
Iteration:  40%|████      | 326/807 [03:34<05:22,  1.49it/s][A
Iteration:  41%|████      | 327/807 [03:35<05:18,  1.51it/s][A
Iteration:  41%|████      | 328/807 [03:35<05:31,  1.45it/s][A

	Training loss :  1.288343272467212



Iteration:  41%|████      | 329/807 [03:36<05:23,  1.48it/s][A
Iteration:  41%|████      | 330/807 [03:37<05:19,  1.49it/s][A
Iteration:  41%|████      | 331/807 [03:37<05:15,  1.51it/s][A
Iteration:  41%|████      | 332/807 [03:38<05:28,  1.45it/s][A

	Training loss :  1.2909418275377837



Iteration:  41%|████▏     | 333/807 [03:39<05:21,  1.47it/s][A
Iteration:  41%|████▏     | 334/807 [03:39<05:16,  1.49it/s][A
Iteration:  42%|████▏     | 335/807 [03:40<05:13,  1.51it/s][A
Iteration:  42%|████▏     | 336/807 [03:41<05:26,  1.44it/s][A

	Training loss :  1.2924543313149894



Iteration:  42%|████▏     | 337/807 [03:41<05:18,  1.47it/s][A
Iteration:  42%|████▏     | 338/807 [03:42<05:13,  1.49it/s][A
Iteration:  42%|████▏     | 339/807 [03:43<05:09,  1.51it/s][A
Iteration:  42%|████▏     | 340/807 [03:43<05:22,  1.45it/s][A

	Training loss :  1.296933683490052



Iteration:  42%|████▏     | 341/807 [03:44<05:16,  1.47it/s][A
Iteration:  42%|████▏     | 342/807 [03:45<05:11,  1.49it/s][A
Iteration:  43%|████▎     | 343/807 [03:45<05:07,  1.51it/s][A
Iteration:  43%|████▎     | 344/807 [03:46<05:20,  1.45it/s][A

	Training loss :  1.299053453342166



Iteration:  43%|████▎     | 345/807 [03:47<05:13,  1.47it/s][A
Iteration:  43%|████▎     | 346/807 [03:47<05:08,  1.49it/s][A
Iteration:  43%|████▎     | 347/807 [03:48<05:04,  1.51it/s][A
Iteration:  43%|████▎     | 348/807 [03:49<05:17,  1.45it/s][A

	Training loss :  1.2993887607222316



Iteration:  43%|████▎     | 349/807 [03:50<05:10,  1.47it/s][A
Iteration:  43%|████▎     | 350/807 [03:50<05:06,  1.49it/s][A
Iteration:  43%|████▎     | 351/807 [03:51<05:02,  1.51it/s][A
Iteration:  44%|████▎     | 352/807 [03:52<05:14,  1.45it/s][A

	Training loss :  1.3000634772364388



Iteration:  44%|████▎     | 353/807 [03:52<05:07,  1.48it/s][A
Iteration:  44%|████▍     | 354/807 [03:53<05:04,  1.49it/s][A
Iteration:  44%|████▍     | 355/807 [03:54<05:00,  1.51it/s][A
Iteration:  44%|████▍     | 356/807 [03:54<05:12,  1.44it/s][A

	Training loss :  1.2993638218286332



Iteration:  44%|████▍     | 357/807 [03:55<05:04,  1.48it/s][A
Iteration:  44%|████▍     | 358/807 [03:56<05:00,  1.49it/s][A
Iteration:  44%|████▍     | 359/807 [03:56<04:56,  1.51it/s][A
Iteration:  45%|████▍     | 360/807 [03:57<05:09,  1.45it/s][A

	Training loss :  1.299513978179958



Iteration:  45%|████▍     | 361/807 [03:58<05:02,  1.47it/s][A
Iteration:  45%|████▍     | 362/807 [03:58<04:58,  1.49it/s][A
Iteration:  45%|████▍     | 363/807 [03:59<04:54,  1.51it/s][A
Iteration:  45%|████▌     | 364/807 [04:00<05:06,  1.45it/s][A

	Training loss :  1.3016190952160855



Iteration:  45%|████▌     | 365/807 [04:00<04:59,  1.48it/s][A
Iteration:  45%|████▌     | 366/807 [04:01<04:55,  1.49it/s][A
Iteration:  45%|████▌     | 367/807 [04:02<04:51,  1.51it/s][A
Iteration:  46%|████▌     | 368/807 [04:02<05:03,  1.45it/s][A

	Training loss :  1.2998838469224132



Iteration:  46%|████▌     | 369/807 [04:03<04:56,  1.48it/s][A
Iteration:  46%|████▌     | 370/807 [04:04<04:52,  1.50it/s][A
Iteration:  46%|████▌     | 371/807 [04:04<04:48,  1.51it/s][A
Iteration:  46%|████▌     | 372/807 [04:05<05:00,  1.45it/s][A

	Training loss :  1.2991408092841026



Iteration:  46%|████▌     | 373/807 [04:06<04:54,  1.47it/s][A
Iteration:  46%|████▋     | 374/807 [04:06<04:50,  1.49it/s][A
Iteration:  46%|████▋     | 375/807 [04:07<04:46,  1.51it/s][A
Iteration:  47%|████▋     | 376/807 [04:08<04:58,  1.44it/s][A

	Training loss :  1.2961726856834077



Iteration:  47%|████▋     | 377/807 [04:08<04:50,  1.48it/s][A
Iteration:  47%|████▋     | 378/807 [04:09<04:47,  1.49it/s][A
Iteration:  47%|████▋     | 379/807 [04:10<04:43,  1.51it/s][A
Iteration:  47%|████▋     | 380/807 [04:10<04:54,  1.45it/s][A

	Training loss :  1.297082958017525



Iteration:  47%|████▋     | 381/807 [04:11<04:49,  1.47it/s][A
Iteration:  47%|████▋     | 382/807 [04:12<04:44,  1.49it/s][A
Iteration:  47%|████▋     | 383/807 [04:12<04:41,  1.50it/s][A
Iteration:  48%|████▊     | 384/807 [04:13<04:54,  1.43it/s][A

	Training loss :  1.3002140114549547



Iteration:  48%|████▊     | 385/807 [04:14<04:47,  1.47it/s][A
Iteration:  48%|████▊     | 386/807 [04:14<04:42,  1.49it/s][A
Iteration:  48%|████▊     | 387/807 [04:15<04:38,  1.51it/s][A
Iteration:  48%|████▊     | 388/807 [04:16<04:50,  1.44it/s][A

	Training loss :  1.3019306557541042



Iteration:  48%|████▊     | 389/807 [04:17<04:44,  1.47it/s][A
Iteration:  48%|████▊     | 390/807 [04:17<04:40,  1.49it/s][A
Iteration:  48%|████▊     | 391/807 [04:18<04:36,  1.50it/s][A
Iteration:  49%|████▊     | 392/807 [04:19<04:47,  1.44it/s][A

	Training loss :  1.304338136725888



Iteration:  49%|████▊     | 393/807 [04:19<04:41,  1.47it/s][A
Iteration:  49%|████▉     | 394/807 [04:20<04:37,  1.49it/s][A
Iteration:  49%|████▉     | 395/807 [04:21<04:33,  1.51it/s][A
Iteration:  49%|████▉     | 396/807 [04:21<04:44,  1.44it/s][A

	Training loss :  1.304596677497782



Iteration:  49%|████▉     | 397/807 [04:22<04:38,  1.47it/s][A
Iteration:  49%|████▉     | 398/807 [04:23<04:34,  1.49it/s][A
Iteration:  49%|████▉     | 399/807 [04:23<04:31,  1.50it/s][A
Iteration:  50%|████▉     | 400/807 [04:24<04:42,  1.44it/s][A

	Training loss :  1.30475710503757



Iteration:  50%|████▉     | 401/807 [04:25<04:36,  1.47it/s][A
Iteration:  50%|████▉     | 402/807 [04:25<04:32,  1.48it/s][A
Iteration:  50%|████▉     | 403/807 [04:26<04:28,  1.50it/s][A
Iteration:  50%|█████     | 404/807 [04:27<04:40,  1.44it/s][A

	Training loss :  1.3065515209985252



Iteration:  50%|█████     | 405/807 [04:27<04:34,  1.47it/s][A
Iteration:  50%|█████     | 406/807 [04:28<04:29,  1.49it/s][A
Iteration:  50%|█████     | 407/807 [04:29<04:26,  1.50it/s][A
Iteration:  51%|█████     | 408/807 [04:29<04:36,  1.44it/s][A

	Training loss :  1.3060112407832754



Iteration:  51%|█████     | 409/807 [04:30<04:30,  1.47it/s][A
Iteration:  51%|█████     | 410/807 [04:31<04:26,  1.49it/s][A
Iteration:  51%|█████     | 411/807 [04:31<04:23,  1.50it/s][A
Iteration:  51%|█████     | 412/807 [04:32<04:34,  1.44it/s][A

	Training loss :  1.3098608312531583



Iteration:  51%|█████     | 413/807 [04:33<04:28,  1.47it/s][A
Iteration:  51%|█████▏    | 414/807 [04:33<04:23,  1.49it/s][A
Iteration:  51%|█████▏    | 415/807 [04:34<04:20,  1.51it/s][A
Iteration:  52%|█████▏    | 416/807 [04:35<04:30,  1.44it/s][A

	Training loss :  1.3098132596709406



Iteration:  52%|█████▏    | 417/807 [04:36<04:24,  1.47it/s][A
Iteration:  52%|█████▏    | 418/807 [04:36<04:20,  1.49it/s][A
Iteration:  52%|█████▏    | 419/807 [04:37<04:18,  1.50it/s][A
Iteration:  52%|█████▏    | 420/807 [04:38<04:29,  1.44it/s][A

	Training loss :  1.3088253740043867



Iteration:  52%|█████▏    | 421/807 [04:38<04:23,  1.47it/s][A
Iteration:  52%|█████▏    | 422/807 [04:39<04:18,  1.49it/s][A
Iteration:  52%|█████▏    | 423/807 [04:40<04:14,  1.51it/s][A
Iteration:  53%|█████▎    | 424/807 [04:40<04:25,  1.44it/s][A

	Training loss :  1.3081911982933305



Iteration:  53%|█████▎    | 425/807 [04:41<04:20,  1.47it/s][A
Iteration:  53%|█████▎    | 426/807 [04:42<04:16,  1.49it/s][A
Iteration:  53%|█████▎    | 427/807 [04:42<04:13,  1.50it/s][A
Iteration:  53%|█████▎    | 428/807 [04:43<04:23,  1.44it/s][A

	Training loss :  1.3067789006038246



Iteration:  53%|█████▎    | 429/807 [04:44<04:18,  1.46it/s][A
Iteration:  53%|█████▎    | 430/807 [04:44<04:13,  1.48it/s][A
Iteration:  53%|█████▎    | 431/807 [04:45<04:11,  1.50it/s][A
Iteration:  54%|█████▎    | 432/807 [04:46<04:21,  1.43it/s][A

	Training loss :  1.3089536718886208



Iteration:  54%|█████▎    | 433/807 [04:46<04:15,  1.46it/s][A
Iteration:  54%|█████▍    | 434/807 [04:47<04:11,  1.48it/s][A
Iteration:  54%|█████▍    | 435/807 [04:48<04:08,  1.50it/s][A
Iteration:  54%|█████▍    | 436/807 [04:48<04:18,  1.44it/s][A

	Training loss :  1.3107218688370985



Iteration:  54%|█████▍    | 437/807 [04:49<04:12,  1.46it/s][A
Iteration:  54%|█████▍    | 438/807 [04:50<04:08,  1.48it/s][A
Iteration:  54%|█████▍    | 439/807 [04:50<04:05,  1.50it/s][A
Iteration:  55%|█████▍    | 440/807 [04:51<04:15,  1.44it/s][A

	Training loss :  1.310832037234848



Iteration:  55%|█████▍    | 441/807 [04:52<04:09,  1.47it/s][A
Iteration:  55%|█████▍    | 442/807 [04:52<04:05,  1.49it/s][A
Iteration:  55%|█████▍    | 443/807 [04:53<04:02,  1.50it/s][A
Iteration:  55%|█████▌    | 444/807 [04:54<04:12,  1.44it/s][A

	Training loss :  1.3088924670675854



Iteration:  55%|█████▌    | 445/807 [04:55<04:07,  1.46it/s][A
Iteration:  55%|█████▌    | 446/807 [04:55<04:03,  1.48it/s][A
Iteration:  55%|█████▌    | 447/807 [04:56<04:00,  1.50it/s][A
Iteration:  56%|█████▌    | 448/807 [04:57<04:09,  1.44it/s][A

	Training loss :  1.3112478773109615



Iteration:  56%|█████▌    | 449/807 [04:57<04:03,  1.47it/s][A
Iteration:  56%|█████▌    | 450/807 [04:58<03:59,  1.49it/s][A
Iteration:  56%|█████▌    | 451/807 [04:59<03:56,  1.50it/s][A
Iteration:  56%|█████▌    | 452/807 [04:59<04:07,  1.43it/s][A

	Training loss :  1.3087904465805114



Iteration:  56%|█████▌    | 453/807 [05:00<04:02,  1.46it/s][A
Iteration:  56%|█████▋    | 454/807 [05:01<03:58,  1.48it/s][A
Iteration:  56%|█████▋    | 455/807 [05:01<03:54,  1.50it/s][A
Iteration:  57%|█████▋    | 456/807 [05:02<04:03,  1.44it/s][A

	Training loss :  1.3064086145084155



Iteration:  57%|█████▋    | 457/807 [05:03<03:58,  1.46it/s][A
Iteration:  57%|█████▋    | 458/807 [05:03<03:55,  1.48it/s][A
Iteration:  57%|█████▋    | 459/807 [05:04<03:51,  1.50it/s][A
Iteration:  57%|█████▋    | 460/807 [05:05<04:00,  1.44it/s][A

	Training loss :  1.3053269355841306



Iteration:  57%|█████▋    | 461/807 [05:05<03:55,  1.47it/s][A
Iteration:  57%|█████▋    | 462/807 [05:06<03:51,  1.49it/s][A
Iteration:  57%|█████▋    | 463/807 [05:07<03:49,  1.50it/s][A
Iteration:  57%|█████▋    | 464/807 [05:07<03:58,  1.44it/s][A

	Training loss :  1.3044010922955027



Iteration:  58%|█████▊    | 465/807 [05:08<03:53,  1.46it/s][A
Iteration:  58%|█████▊    | 466/807 [05:09<03:50,  1.48it/s][A
Iteration:  58%|█████▊    | 467/807 [05:09<03:47,  1.50it/s][A
Iteration:  58%|█████▊    | 468/807 [05:10<03:55,  1.44it/s][A

	Training loss :  1.3024411893680565



Iteration:  58%|█████▊    | 469/807 [05:11<03:50,  1.46it/s][A
Iteration:  58%|█████▊    | 470/807 [05:12<03:46,  1.49it/s][A
Iteration:  58%|█████▊    | 471/807 [05:12<03:44,  1.50it/s][A
Iteration:  58%|█████▊    | 472/807 [05:13<03:53,  1.43it/s][A

	Training loss :  1.3046137735373893



Iteration:  59%|█████▊    | 473/807 [05:14<03:47,  1.47it/s][A
Iteration:  59%|█████▊    | 474/807 [05:14<03:44,  1.49it/s][A
Iteration:  59%|█████▉    | 475/807 [05:15<03:40,  1.50it/s][A
Iteration:  59%|█████▉    | 476/807 [05:16<03:50,  1.43it/s][A

	Training loss :  1.3062209859490395



Iteration:  59%|█████▉    | 477/807 [05:16<03:45,  1.47it/s][A
Iteration:  59%|█████▉    | 478/807 [05:17<03:41,  1.48it/s][A
Iteration:  59%|█████▉    | 479/807 [05:18<03:38,  1.50it/s][A
Iteration:  59%|█████▉    | 480/807 [05:18<03:48,  1.43it/s][A

	Training loss :  1.307753138927122



Iteration:  60%|█████▉    | 481/807 [05:19<03:42,  1.46it/s][A
Iteration:  60%|█████▉    | 482/807 [05:20<03:39,  1.48it/s][A
Iteration:  60%|█████▉    | 483/807 [05:20<03:35,  1.50it/s][A
Iteration:  60%|█████▉    | 484/807 [05:21<03:44,  1.44it/s][A

	Training loss :  1.3076278980482707



Iteration:  60%|██████    | 485/807 [05:22<03:40,  1.46it/s][A
Iteration:  60%|██████    | 486/807 [05:22<03:36,  1.48it/s][A
Iteration:  60%|██████    | 487/807 [05:23<03:33,  1.50it/s][A
Iteration:  60%|██████    | 488/807 [05:24<03:42,  1.43it/s][A

	Training loss :  1.3064379950405145



Iteration:  61%|██████    | 489/807 [05:24<03:37,  1.47it/s][A
Iteration:  61%|██████    | 490/807 [05:25<03:34,  1.48it/s][A
Iteration:  61%|██████    | 491/807 [05:26<03:30,  1.50it/s][A
Iteration:  61%|██████    | 492/807 [05:27<03:39,  1.44it/s][A

	Training loss :  1.3087108609516447



Iteration:  61%|██████    | 493/807 [05:27<03:34,  1.46it/s][A
Iteration:  61%|██████    | 494/807 [05:28<03:30,  1.48it/s][A
Iteration:  61%|██████▏   | 495/807 [05:28<03:28,  1.50it/s][A
Iteration:  61%|██████▏   | 496/807 [05:29<03:36,  1.44it/s][A

	Training loss :  1.3078261788694128



Iteration:  62%|██████▏   | 497/807 [05:30<03:31,  1.47it/s][A
Iteration:  62%|██████▏   | 498/807 [05:31<03:28,  1.49it/s][A
Iteration:  62%|██████▏   | 499/807 [05:31<03:25,  1.50it/s][A
Iteration:  62%|██████▏   | 500/807 [05:32<03:33,  1.44it/s][A

	Training loss :  1.3073180736899377



Iteration:  62%|██████▏   | 501/807 [05:33<03:28,  1.47it/s][A
Iteration:  62%|██████▏   | 502/807 [05:33<03:25,  1.48it/s][A
Iteration:  62%|██████▏   | 503/807 [05:34<03:22,  1.50it/s][A
Iteration:  62%|██████▏   | 504/807 [05:35<03:30,  1.44it/s][A

	Training loss :  1.3069738223557434



Iteration:  63%|██████▎   | 505/807 [05:35<03:26,  1.46it/s][A
Iteration:  63%|██████▎   | 506/807 [05:36<03:22,  1.48it/s][A
Iteration:  63%|██████▎   | 507/807 [05:37<03:20,  1.50it/s][A
Iteration:  63%|██████▎   | 508/807 [05:37<03:28,  1.44it/s][A

	Training loss :  1.3046328648924828



Iteration:  63%|██████▎   | 509/807 [05:38<03:23,  1.46it/s][A
Iteration:  63%|██████▎   | 510/807 [05:39<03:20,  1.48it/s][A
Iteration:  63%|██████▎   | 511/807 [05:39<03:17,  1.50it/s][A
Iteration:  63%|██████▎   | 512/807 [05:40<03:24,  1.44it/s][A

	Training loss :  1.3040741281001829



Iteration:  64%|██████▎   | 513/807 [05:41<03:20,  1.46it/s][A
Iteration:  64%|██████▎   | 514/807 [05:41<03:17,  1.49it/s][A
Iteration:  64%|██████▍   | 515/807 [05:42<03:14,  1.50it/s][A
Iteration:  64%|██████▍   | 516/807 [05:43<03:21,  1.44it/s][A

	Training loss :  1.3052274841786355



Iteration:  64%|██████▍   | 517/807 [05:43<03:17,  1.47it/s][A
Iteration:  64%|██████▍   | 518/807 [05:44<03:14,  1.49it/s][A
Iteration:  64%|██████▍   | 519/807 [05:45<03:12,  1.50it/s][A
Iteration:  64%|██████▍   | 520/807 [05:46<03:20,  1.43it/s][A

	Training loss :  1.3042912784104164



Iteration:  65%|██████▍   | 521/807 [05:46<03:15,  1.46it/s][A
Iteration:  65%|██████▍   | 522/807 [05:47<03:11,  1.48it/s][A
Iteration:  65%|██████▍   | 523/807 [05:48<03:09,  1.50it/s][A
Iteration:  65%|██████▍   | 524/807 [05:48<03:16,  1.44it/s][A

	Training loss :  1.3019455353381069



Iteration:  65%|██████▌   | 525/807 [05:49<03:12,  1.47it/s][A
Iteration:  65%|██████▌   | 526/807 [05:50<03:09,  1.49it/s][A
Iteration:  65%|██████▌   | 527/807 [05:50<03:06,  1.50it/s][A
Iteration:  65%|██████▌   | 528/807 [05:51<03:13,  1.44it/s][A

	Training loss :  1.3026760688446688



Iteration:  66%|██████▌   | 529/807 [05:52<03:10,  1.46it/s][A
Iteration:  66%|██████▌   | 530/807 [05:52<03:06,  1.48it/s][A
Iteration:  66%|██████▌   | 531/807 [05:53<03:04,  1.50it/s][A
Iteration:  66%|██████▌   | 532/807 [05:54<03:11,  1.44it/s][A

	Training loss :  1.3036313980705756



Iteration:  66%|██████▌   | 533/807 [05:54<03:06,  1.47it/s][A
Iteration:  66%|██████▌   | 534/807 [05:55<03:03,  1.49it/s][A
Iteration:  66%|██████▋   | 535/807 [05:56<03:00,  1.50it/s][A
Iteration:  66%|██████▋   | 536/807 [05:56<03:08,  1.44it/s][A

	Training loss :  1.3013628829659811



Iteration:  67%|██████▋   | 537/807 [05:57<03:03,  1.47it/s][A
Iteration:  67%|██████▋   | 538/807 [05:58<03:01,  1.49it/s][A
Iteration:  67%|██████▋   | 539/807 [05:58<02:58,  1.50it/s][A
Iteration:  67%|██████▋   | 540/807 [05:59<03:05,  1.44it/s][A

	Training loss :  1.3019562494423655



Iteration:  67%|██████▋   | 541/807 [06:00<03:00,  1.47it/s][A
Iteration:  67%|██████▋   | 542/807 [06:00<02:58,  1.49it/s][A
Iteration:  67%|██████▋   | 543/807 [06:01<02:55,  1.50it/s][A
Iteration:  67%|██████▋   | 544/807 [06:02<03:02,  1.44it/s][A

	Training loss :  1.2996331702589112



Iteration:  68%|██████▊   | 545/807 [06:03<02:58,  1.47it/s][A
Iteration:  68%|██████▊   | 546/807 [06:03<02:55,  1.49it/s][A
Iteration:  68%|██████▊   | 547/807 [06:04<02:52,  1.51it/s][A
Iteration:  68%|██████▊   | 548/807 [06:05<02:59,  1.44it/s][A

	Training loss :  1.3018323415278519



Iteration:  68%|██████▊   | 549/807 [06:05<02:55,  1.47it/s][A
Iteration:  68%|██████▊   | 550/807 [06:06<02:53,  1.48it/s][A
Iteration:  68%|██████▊   | 551/807 [06:07<02:50,  1.50it/s][A
Iteration:  68%|██████▊   | 552/807 [06:07<02:57,  1.44it/s][A

	Training loss :  1.301722236679516



Iteration:  69%|██████▊   | 553/807 [06:08<02:53,  1.47it/s][A
Iteration:  69%|██████▊   | 554/807 [06:09<02:50,  1.49it/s][A
Iteration:  69%|██████▉   | 555/807 [06:09<02:47,  1.50it/s][A
Iteration:  69%|██████▉   | 556/807 [06:10<02:54,  1.44it/s][A

	Training loss :  1.3005384196587604



Iteration:  69%|██████▉   | 557/807 [06:11<02:50,  1.46it/s][A
Iteration:  69%|██████▉   | 558/807 [06:11<02:47,  1.49it/s][A
Iteration:  69%|██████▉   | 559/807 [06:12<02:45,  1.50it/s][A
Iteration:  69%|██████▉   | 560/807 [06:13<02:51,  1.44it/s][A

	Training loss :  1.2991594186318771



Iteration:  70%|██████▉   | 561/807 [06:13<02:47,  1.47it/s][A
Iteration:  70%|██████▉   | 562/807 [06:14<02:44,  1.48it/s][A
Iteration:  70%|██████▉   | 563/807 [06:15<02:42,  1.50it/s][A
Iteration:  70%|██████▉   | 564/807 [06:15<02:48,  1.44it/s][A

	Training loss :  1.298632189895667



Iteration:  70%|███████   | 565/807 [06:16<02:45,  1.47it/s][A
Iteration:  70%|███████   | 566/807 [06:17<02:41,  1.49it/s][A
Iteration:  70%|███████   | 567/807 [06:17<02:39,  1.50it/s][A
Iteration:  70%|███████   | 568/807 [06:18<02:45,  1.44it/s][A

	Training loss :  1.2985709135905958



Iteration:  71%|███████   | 569/807 [06:19<02:41,  1.47it/s][A
Iteration:  71%|███████   | 570/807 [06:19<02:39,  1.49it/s][A
Iteration:  71%|███████   | 571/807 [06:20<02:37,  1.50it/s][A
Iteration:  71%|███████   | 572/807 [06:21<02:43,  1.44it/s][A

	Training loss :  1.298535056620628



Iteration:  71%|███████   | 573/807 [06:22<02:39,  1.47it/s][A
Iteration:  71%|███████   | 574/807 [06:22<02:36,  1.49it/s][A
Iteration:  71%|███████▏  | 575/807 [06:23<02:34,  1.50it/s][A
Iteration:  71%|███████▏  | 576/807 [06:24<02:40,  1.44it/s][A

	Training loss :  1.2986981412913237



Iteration:  71%|███████▏  | 577/807 [06:24<02:36,  1.47it/s][A
Iteration:  72%|███████▏  | 578/807 [06:25<02:34,  1.49it/s][A
Iteration:  72%|███████▏  | 579/807 [06:26<02:31,  1.50it/s][A
Iteration:  72%|███████▏  | 580/807 [06:26<02:37,  1.44it/s][A

	Training loss :  1.2991364310013838



Iteration:  72%|███████▏  | 581/807 [06:27<02:34,  1.47it/s][A
Iteration:  72%|███████▏  | 582/807 [06:28<02:31,  1.49it/s][A
Iteration:  72%|███████▏  | 583/807 [06:28<02:29,  1.50it/s][A
Iteration:  72%|███████▏  | 584/807 [06:29<02:34,  1.44it/s][A

	Training loss :  1.2996849374309793



Iteration:  72%|███████▏  | 585/807 [06:30<02:31,  1.47it/s][A
Iteration:  73%|███████▎  | 586/807 [06:30<02:28,  1.49it/s][A
Iteration:  73%|███████▎  | 587/807 [06:31<02:26,  1.50it/s][A
Iteration:  73%|███████▎  | 588/807 [06:32<02:32,  1.43it/s][A

	Training loss :  1.3007035313718984



Iteration:  73%|███████▎  | 589/807 [06:32<02:28,  1.47it/s][A
Iteration:  73%|███████▎  | 590/807 [06:33<02:26,  1.48it/s][A
Iteration:  73%|███████▎  | 591/807 [06:34<02:24,  1.50it/s][A
Iteration:  73%|███████▎  | 592/807 [06:34<02:29,  1.44it/s][A

	Training loss :  1.3029357655527625



Iteration:  73%|███████▎  | 593/807 [06:35<02:25,  1.47it/s][A
Iteration:  74%|███████▎  | 594/807 [06:36<02:23,  1.49it/s][A
Iteration:  74%|███████▎  | 595/807 [06:36<02:20,  1.50it/s][A
Iteration:  74%|███████▍  | 596/807 [06:37<02:26,  1.44it/s][A

	Training loss :  1.3024561358678262



Iteration:  74%|███████▍  | 597/807 [06:38<02:22,  1.47it/s][A
Iteration:  74%|███████▍  | 598/807 [06:38<02:20,  1.49it/s][A
Iteration:  74%|███████▍  | 599/807 [06:39<02:18,  1.50it/s][A
Iteration:  74%|███████▍  | 600/807 [06:40<02:23,  1.44it/s][A

	Training loss :  1.3031164039671421



Iteration:  74%|███████▍  | 601/807 [06:41<02:20,  1.47it/s][A
Iteration:  75%|███████▍  | 602/807 [06:41<02:18,  1.49it/s][A
Iteration:  75%|███████▍  | 603/807 [06:42<02:15,  1.50it/s][A
Iteration:  75%|███████▍  | 604/807 [06:43<02:20,  1.44it/s][A

	Training loss :  1.3034883064051337



Iteration:  75%|███████▍  | 605/807 [06:43<02:17,  1.46it/s][A
Iteration:  75%|███████▌  | 606/807 [06:44<02:15,  1.49it/s][A
Iteration:  75%|███████▌  | 607/807 [06:45<02:13,  1.50it/s][A
Iteration:  75%|███████▌  | 608/807 [06:45<02:18,  1.44it/s][A

	Training loss :  1.3042810276444805



Iteration:  75%|███████▌  | 609/807 [06:46<02:14,  1.47it/s][A
Iteration:  76%|███████▌  | 610/807 [06:47<02:12,  1.49it/s][A
Iteration:  76%|███████▌  | 611/807 [06:47<02:10,  1.50it/s][A
Iteration:  76%|███████▌  | 612/807 [06:48<02:15,  1.44it/s][A

	Training loss :  1.302703225875602



Iteration:  76%|███████▌  | 613/807 [06:49<02:12,  1.47it/s][A
Iteration:  76%|███████▌  | 614/807 [06:49<02:09,  1.49it/s][A
Iteration:  76%|███████▌  | 615/807 [06:50<02:07,  1.50it/s][A
Iteration:  76%|███████▋  | 616/807 [06:51<02:12,  1.44it/s][A

	Training loss :  1.304215836592696



Iteration:  76%|███████▋  | 617/807 [06:51<02:09,  1.47it/s][A
Iteration:  77%|███████▋  | 618/807 [06:52<02:07,  1.49it/s][A
Iteration:  77%|███████▋  | 619/807 [06:53<02:05,  1.50it/s][A
Iteration:  77%|███████▋  | 620/807 [06:53<02:09,  1.44it/s][A

	Training loss :  1.304076637712217



Iteration:  77%|███████▋  | 621/807 [06:54<02:06,  1.47it/s][A
Iteration:  77%|███████▋  | 622/807 [06:55<02:04,  1.49it/s][A
Iteration:  77%|███████▋  | 623/807 [06:55<02:02,  1.50it/s][A
Iteration:  77%|███████▋  | 624/807 [06:56<02:07,  1.44it/s][A

	Training loss :  1.3035136468421955



Iteration:  77%|███████▋  | 625/807 [06:57<02:03,  1.47it/s][A
Iteration:  78%|███████▊  | 626/807 [06:57<02:01,  1.49it/s][A
Iteration:  78%|███████▊  | 627/807 [06:58<01:59,  1.51it/s][A
Iteration:  78%|███████▊  | 628/807 [06:59<02:04,  1.44it/s][A

	Training loss :  1.304751152587924



Iteration:  78%|███████▊  | 629/807 [07:00<02:00,  1.47it/s][A
Iteration:  78%|███████▊  | 630/807 [07:00<01:58,  1.49it/s][A
Iteration:  78%|███████▊  | 631/807 [07:01<01:57,  1.50it/s][A
Iteration:  78%|███████▊  | 632/807 [07:02<02:01,  1.44it/s][A

	Training loss :  1.3047903072136113



Iteration:  78%|███████▊  | 633/807 [07:02<01:58,  1.47it/s][A
Iteration:  79%|███████▊  | 634/807 [07:03<01:56,  1.49it/s][A
Iteration:  79%|███████▊  | 635/807 [07:04<01:54,  1.50it/s][A
Iteration:  79%|███████▉  | 636/807 [07:04<01:59,  1.43it/s][A

	Training loss :  1.3061610487928181



Iteration:  79%|███████▉  | 637/807 [07:05<01:56,  1.46it/s][A
Iteration:  79%|███████▉  | 638/807 [07:06<01:53,  1.48it/s][A
Iteration:  79%|███████▉  | 639/807 [07:06<01:52,  1.50it/s][A
Iteration:  79%|███████▉  | 640/807 [07:07<01:56,  1.44it/s][A

	Training loss :  1.3097304285969584



Iteration:  79%|███████▉  | 641/807 [07:08<01:53,  1.47it/s][A
Iteration:  80%|███████▉  | 642/807 [07:08<01:50,  1.49it/s][A
Iteration:  80%|███████▉  | 643/807 [07:09<01:48,  1.51it/s][A
Iteration:  80%|███████▉  | 644/807 [07:10<01:52,  1.44it/s][A

	Training loss :  1.3090532514156763



Iteration:  80%|███████▉  | 645/807 [07:10<01:50,  1.47it/s][A
Iteration:  80%|████████  | 646/807 [07:11<01:48,  1.48it/s][A
Iteration:  80%|████████  | 647/807 [07:12<01:46,  1.50it/s][A
Iteration:  80%|████████  | 648/807 [07:12<01:50,  1.44it/s][A

	Training loss :  1.3066897031241729



Iteration:  80%|████████  | 649/807 [07:13<01:47,  1.47it/s][A
Iteration:  81%|████████  | 650/807 [07:14<01:45,  1.49it/s][A
Iteration:  81%|████████  | 651/807 [07:14<01:43,  1.50it/s][A
Iteration:  81%|████████  | 652/807 [07:15<01:47,  1.44it/s][A

	Training loss :  1.3060539494171464



Iteration:  81%|████████  | 653/807 [07:16<01:44,  1.47it/s][A
Iteration:  81%|████████  | 654/807 [07:16<01:42,  1.49it/s][A
Iteration:  81%|████████  | 655/807 [07:17<01:41,  1.50it/s][A
Iteration:  81%|████████▏ | 656/807 [07:18<01:45,  1.44it/s][A

	Training loss :  1.304644179189714



Iteration:  81%|████████▏ | 657/807 [07:19<01:42,  1.47it/s][A
Iteration:  82%|████████▏ | 658/807 [07:19<01:40,  1.48it/s][A
Iteration:  82%|████████▏ | 659/807 [07:20<01:38,  1.50it/s][A
Iteration:  82%|████████▏ | 660/807 [07:21<01:42,  1.44it/s][A

	Training loss :  1.3046763905521594



Iteration:  82%|████████▏ | 661/807 [07:21<01:39,  1.47it/s][A
Iteration:  82%|████████▏ | 662/807 [07:22<01:37,  1.49it/s][A
Iteration:  82%|████████▏ | 663/807 [07:23<01:35,  1.50it/s][A
Iteration:  82%|████████▏ | 664/807 [07:23<01:39,  1.44it/s][A

	Training loss :  1.3048058584422233



Iteration:  82%|████████▏ | 665/807 [07:24<01:36,  1.47it/s][A
Iteration:  83%|████████▎ | 666/807 [07:25<01:34,  1.49it/s][A
Iteration:  83%|████████▎ | 667/807 [07:25<01:33,  1.50it/s][A
Iteration:  83%|████████▎ | 668/807 [07:26<01:36,  1.44it/s][A

	Training loss :  1.3064971656767195



Iteration:  83%|████████▎ | 669/807 [07:27<01:33,  1.47it/s][A
Iteration:  83%|████████▎ | 670/807 [07:27<01:31,  1.49it/s][A
Iteration:  83%|████████▎ | 671/807 [07:28<01:30,  1.50it/s][A
Iteration:  83%|████████▎ | 672/807 [07:29<01:33,  1.44it/s][A

	Training loss :  1.3050265458989



Iteration:  83%|████████▎ | 673/807 [07:29<01:30,  1.47it/s][A
Iteration:  84%|████████▎ | 674/807 [07:30<01:29,  1.49it/s][A
Iteration:  84%|████████▎ | 675/807 [07:31<01:27,  1.50it/s][A
Iteration:  84%|████████▍ | 676/807 [07:31<01:31,  1.44it/s][A

	Training loss :  1.3043412504905074



Iteration:  84%|████████▍ | 677/807 [07:32<01:28,  1.47it/s][A
Iteration:  84%|████████▍ | 678/807 [07:33<01:27,  1.48it/s][A
Iteration:  84%|████████▍ | 679/807 [07:33<01:25,  1.50it/s][A
Iteration:  84%|████████▍ | 680/807 [07:34<01:28,  1.44it/s][A

	Training loss :  1.3046663683565225



Iteration:  84%|████████▍ | 681/807 [07:35<01:25,  1.47it/s][A
Iteration:  85%|████████▍ | 682/807 [07:35<01:24,  1.49it/s][A
Iteration:  85%|████████▍ | 683/807 [07:36<01:22,  1.50it/s][A
Iteration:  85%|████████▍ | 684/807 [07:37<01:25,  1.44it/s][A

	Training loss :  1.30283712177423



Iteration:  85%|████████▍ | 685/807 [07:38<01:22,  1.47it/s][A
Iteration:  85%|████████▌ | 686/807 [07:38<01:21,  1.49it/s][A
Iteration:  85%|████████▌ | 687/807 [07:39<01:19,  1.51it/s][A
Iteration:  85%|████████▌ | 688/807 [07:40<01:22,  1.44it/s][A

	Training loss :  1.3047762472009243



Iteration:  85%|████████▌ | 689/807 [07:40<01:20,  1.47it/s][A
Iteration:  86%|████████▌ | 690/807 [07:41<01:18,  1.49it/s][A
Iteration:  86%|████████▌ | 691/807 [07:42<01:16,  1.51it/s][A
Iteration:  86%|████████▌ | 692/807 [07:42<01:19,  1.44it/s][A

	Training loss :  1.3066203808250454



Iteration:  86%|████████▌ | 693/807 [07:43<01:17,  1.47it/s][A
Iteration:  86%|████████▌ | 694/807 [07:44<01:15,  1.49it/s][A
Iteration:  86%|████████▌ | 695/807 [07:44<01:14,  1.51it/s][A
Iteration:  86%|████████▌ | 696/807 [07:45<01:16,  1.44it/s][A

	Training loss :  1.3070627847022709



Iteration:  86%|████████▋ | 697/807 [07:46<01:14,  1.47it/s][A
Iteration:  86%|████████▋ | 698/807 [07:46<01:12,  1.49it/s][A
Iteration:  87%|████████▋ | 699/807 [07:47<01:11,  1.51it/s][A
Iteration:  87%|████████▋ | 700/807 [07:48<01:14,  1.45it/s][A

	Training loss :  1.3073323682802065



Iteration:  87%|████████▋ | 701/807 [07:48<01:12,  1.47it/s][A
Iteration:  87%|████████▋ | 702/807 [07:49<01:10,  1.49it/s][A
Iteration:  87%|████████▋ | 703/807 [07:50<01:09,  1.50it/s][A
Iteration:  87%|████████▋ | 704/807 [07:50<01:11,  1.44it/s][A

	Training loss :  1.3063404414040798



Iteration:  87%|████████▋ | 705/807 [07:51<01:09,  1.47it/s][A
Iteration:  87%|████████▋ | 706/807 [07:52<01:07,  1.49it/s][A
Iteration:  88%|████████▊ | 707/807 [07:52<01:06,  1.51it/s][A
Iteration:  88%|████████▊ | 708/807 [07:53<01:08,  1.44it/s][A

	Training loss :  1.3067170988115888



Iteration:  88%|████████▊ | 709/807 [07:54<01:06,  1.47it/s][A
Iteration:  88%|████████▊ | 710/807 [07:54<01:05,  1.49it/s][A
Iteration:  88%|████████▊ | 711/807 [07:55<01:03,  1.51it/s][A
Iteration:  88%|████████▊ | 712/807 [07:56<01:05,  1.45it/s][A

	Training loss :  1.3063643594387542



Iteration:  88%|████████▊ | 713/807 [07:56<01:03,  1.47it/s][A
Iteration:  88%|████████▊ | 714/807 [07:57<01:02,  1.49it/s][A
Iteration:  89%|████████▊ | 715/807 [07:58<01:01,  1.51it/s][A
Iteration:  89%|████████▊ | 716/807 [07:58<01:03,  1.44it/s][A

	Training loss :  1.3067084446382922



Iteration:  89%|████████▉ | 717/807 [07:59<01:01,  1.47it/s][A
Iteration:  89%|████████▉ | 718/807 [08:00<00:59,  1.49it/s][A
Iteration:  89%|████████▉ | 719/807 [08:00<00:58,  1.51it/s][A
Iteration:  89%|████████▉ | 720/807 [08:01<01:00,  1.44it/s][A

	Training loss :  1.305165987999903



Iteration:  89%|████████▉ | 721/807 [08:02<00:58,  1.47it/s][A
Iteration:  89%|████████▉ | 722/807 [08:03<00:56,  1.49it/s][A
Iteration:  90%|████████▉ | 723/807 [08:03<00:55,  1.51it/s][A
Iteration:  90%|████████▉ | 724/807 [08:04<00:57,  1.44it/s][A

	Training loss :  1.3054499266802935



Iteration:  90%|████████▉ | 725/807 [08:05<00:55,  1.47it/s][A
Iteration:  90%|████████▉ | 726/807 [08:05<00:54,  1.49it/s][A
Iteration:  90%|█████████ | 727/807 [08:06<00:53,  1.51it/s][A
Iteration:  90%|█████████ | 728/807 [08:07<00:54,  1.44it/s][A

	Training loss :  1.3056440516733205



Iteration:  90%|█████████ | 729/807 [08:07<00:53,  1.47it/s][A
Iteration:  90%|█████████ | 730/807 [08:08<00:51,  1.49it/s][A
Iteration:  91%|█████████ | 731/807 [08:09<00:50,  1.50it/s][A
Iteration:  91%|█████████ | 732/807 [08:09<00:51,  1.44it/s][A

	Training loss :  1.3055588016337385



Iteration:  91%|█████████ | 733/807 [08:10<00:50,  1.47it/s][A
Iteration:  91%|█████████ | 734/807 [08:11<00:49,  1.49it/s][A
Iteration:  91%|█████████ | 735/807 [08:11<00:47,  1.50it/s][A
Iteration:  91%|█████████ | 736/807 [08:12<00:49,  1.44it/s][A

	Training loss :  1.3066656615828043



Iteration:  91%|█████████▏| 737/807 [08:13<00:47,  1.47it/s][A
Iteration:  91%|█████████▏| 738/807 [08:13<00:46,  1.49it/s][A
Iteration:  92%|█████████▏| 739/807 [08:14<00:45,  1.50it/s][A
Iteration:  92%|█████████▏| 740/807 [08:15<00:46,  1.44it/s][A

	Training loss :  1.3064393275895634



Iteration:  92%|█████████▏| 741/807 [08:15<00:45,  1.46it/s][A
Iteration:  92%|█████████▏| 742/807 [08:16<00:43,  1.49it/s][A
Iteration:  92%|█████████▏| 743/807 [08:17<00:42,  1.50it/s][A
Iteration:  92%|█████████▏| 744/807 [08:17<00:43,  1.44it/s][A

	Training loss :  1.305557734342993



Iteration:  92%|█████████▏| 745/807 [08:18<00:42,  1.47it/s][A
Iteration:  92%|█████████▏| 746/807 [08:19<00:41,  1.49it/s][A
Iteration:  93%|█████████▎| 747/807 [08:19<00:39,  1.50it/s][A
Iteration:  93%|█████████▎| 748/807 [08:20<00:40,  1.44it/s][A

	Training loss :  1.3046445838190655



Iteration:  93%|█████████▎| 749/807 [08:21<00:39,  1.47it/s][A
Iteration:  93%|█████████▎| 750/807 [08:21<00:38,  1.49it/s][A
Iteration:  93%|█████████▎| 751/807 [08:22<00:37,  1.51it/s][A
Iteration:  93%|█████████▎| 752/807 [08:23<00:38,  1.44it/s][A

	Training loss :  1.3056386293486713



Iteration:  93%|█████████▎| 753/807 [08:24<00:36,  1.47it/s][A
Iteration:  93%|█████████▎| 754/807 [08:24<00:35,  1.49it/s][A
Iteration:  94%|█████████▎| 755/807 [08:25<00:34,  1.50it/s][A
Iteration:  94%|█████████▎| 756/807 [08:26<00:35,  1.43it/s][A

	Training loss :  1.305751980888465



Iteration:  94%|█████████▍| 757/807 [08:26<00:34,  1.47it/s][A
Iteration:  94%|█████████▍| 758/807 [08:27<00:32,  1.49it/s][A
Iteration:  94%|█████████▍| 759/807 [08:28<00:31,  1.50it/s][A
Iteration:  94%|█████████▍| 760/807 [08:28<00:32,  1.44it/s][A

	Training loss :  1.305762274053536



Iteration:  94%|█████████▍| 761/807 [08:29<00:31,  1.47it/s][A
Iteration:  94%|█████████▍| 762/807 [08:30<00:30,  1.49it/s][A
Iteration:  95%|█████████▍| 763/807 [08:30<00:29,  1.50it/s][A
Iteration:  95%|█████████▍| 764/807 [08:31<00:29,  1.44it/s][A

	Training loss :  1.3043114075251898



Iteration:  95%|█████████▍| 765/807 [08:32<00:28,  1.47it/s][A
Iteration:  95%|█████████▍| 766/807 [08:32<00:27,  1.49it/s][A
Iteration:  95%|█████████▌| 767/807 [08:33<00:26,  1.50it/s][A
Iteration:  95%|█████████▌| 768/807 [08:34<00:27,  1.44it/s][A

	Training loss :  1.304717860223415



Iteration:  95%|█████████▌| 769/807 [08:34<00:25,  1.47it/s][A
Iteration:  95%|█████████▌| 770/807 [08:35<00:24,  1.49it/s][A
Iteration:  96%|█████████▌| 771/807 [08:36<00:23,  1.51it/s][A
Iteration:  96%|█████████▌| 772/807 [08:36<00:24,  1.44it/s][A

	Training loss :  1.3039243068796984



Iteration:  96%|█████████▌| 773/807 [08:37<00:23,  1.47it/s][A
Iteration:  96%|█████████▌| 774/807 [08:38<00:22,  1.48it/s][A
Iteration:  96%|█████████▌| 775/807 [08:38<00:21,  1.50it/s][A
Iteration:  96%|█████████▌| 776/807 [08:39<00:21,  1.44it/s][A

	Training loss :  1.304222891126404



Iteration:  96%|█████████▋| 777/807 [08:40<00:20,  1.47it/s][A
Iteration:  96%|█████████▋| 778/807 [08:40<00:19,  1.49it/s][A
Iteration:  97%|█████████▋| 779/807 [08:41<00:18,  1.50it/s][A
Iteration:  97%|█████████▋| 780/807 [08:42<00:18,  1.44it/s][A

	Training loss :  1.3048981189345703



Iteration:  97%|█████████▋| 781/807 [08:43<00:17,  1.47it/s][A
Iteration:  97%|█████████▋| 782/807 [08:43<00:16,  1.49it/s][A
Iteration:  97%|█████████▋| 783/807 [08:44<00:15,  1.50it/s][A
Iteration:  97%|█████████▋| 784/807 [08:45<00:15,  1.44it/s][A

	Training loss :  1.3042407348675995



Iteration:  97%|█████████▋| 785/807 [08:45<00:14,  1.47it/s][A
Iteration:  97%|█████████▋| 786/807 [08:46<00:14,  1.49it/s][A
Iteration:  98%|█████████▊| 787/807 [08:47<00:13,  1.51it/s][A
Iteration:  98%|█████████▊| 788/807 [08:47<00:13,  1.44it/s][A

	Training loss :  1.3049224655564666



Iteration:  98%|█████████▊| 789/807 [08:48<00:12,  1.47it/s][A
Iteration:  98%|█████████▊| 790/807 [08:49<00:11,  1.49it/s][A
Iteration:  98%|█████████▊| 791/807 [08:49<00:10,  1.51it/s][A
Iteration:  98%|█████████▊| 792/807 [08:50<00:10,  1.44it/s][A

	Training loss :  1.3042586541010275



Iteration:  98%|█████████▊| 793/807 [08:51<00:09,  1.47it/s][A
Iteration:  98%|█████████▊| 794/807 [08:51<00:08,  1.49it/s][A
Iteration:  99%|█████████▊| 795/807 [08:52<00:07,  1.50it/s][A
Iteration:  99%|█████████▊| 796/807 [08:53<00:07,  1.44it/s][A

	Training loss :  1.3024060272017317



Iteration:  99%|█████████▉| 797/807 [08:53<00:06,  1.47it/s][A
Iteration:  99%|█████████▉| 798/807 [08:54<00:06,  1.49it/s][A
Iteration:  99%|█████████▉| 799/807 [08:55<00:05,  1.51it/s][A
Iteration:  99%|█████████▉| 800/807 [08:55<00:04,  1.44it/s][A

	Training loss :  1.3042149573192001



Iteration:  99%|█████████▉| 801/807 [08:56<00:04,  1.47it/s][A
Iteration:  99%|█████████▉| 802/807 [08:57<00:03,  1.49it/s][A
Iteration: 100%|█████████▉| 803/807 [08:57<00:02,  1.50it/s][A
Iteration: 100%|█████████▉| 804/807 [08:58<00:02,  1.44it/s][A

	Training loss :  1.3041421616477753



Iteration: 100%|█████████▉| 805/807 [08:59<00:01,  1.47it/s][A
Iteration: 100%|█████████▉| 806/807 [08:59<00:00,  1.49it/s][A
Iteration: 100%|██████████| 807/807 [09:00<00:00,  1.49it/s]
Epoch:  33%|███▎      | 1/3 [09:00<18:00, 540.47s/it]
Iteration:   0%|          | 0/807 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/807 [00:00<08:36,  1.56it/s][A
Iteration:   0%|          | 2/807 [00:01<08:38,  1.55it/s][A
Iteration:   0%|          | 3/807 [00:01<08:37,  1.55it/s][A
Iteration:   0%|          | 4/807 [00:02<09:05,  1.47it/s][A

	Training loss :  1.374334454536438



Iteration:   1%|          | 5/807 [00:03<08:57,  1.49it/s][A
Iteration:   1%|          | 6/807 [00:03<08:52,  1.50it/s][A
Iteration:   1%|          | 7/807 [00:04<08:48,  1.51it/s][A
Iteration:   1%|          | 8/807 [00:05<09:12,  1.45it/s][A

	Training loss :  1.299004688858986



Iteration:   1%|          | 9/807 [00:06<09:00,  1.48it/s][A
Iteration:   1%|          | 10/807 [00:06<08:54,  1.49it/s][A
Iteration:   1%|▏         | 11/807 [00:07<08:47,  1.51it/s][A
Iteration:   1%|▏         | 12/807 [00:08<09:11,  1.44it/s][A

	Training loss :  1.2840800732374191



Iteration:   2%|▏         | 13/807 [00:08<09:00,  1.47it/s][A
Iteration:   2%|▏         | 14/807 [00:09<08:52,  1.49it/s][A
Iteration:   2%|▏         | 15/807 [00:10<08:46,  1.50it/s][A
Iteration:   2%|▏         | 16/807 [00:10<09:08,  1.44it/s][A

	Training loss :  1.3466557078063488



Iteration:   2%|▏         | 17/807 [00:11<08:57,  1.47it/s][A
Iteration:   2%|▏         | 18/807 [00:12<08:49,  1.49it/s][A
Iteration:   2%|▏         | 19/807 [00:12<08:42,  1.51it/s][A
Iteration:   2%|▏         | 20/807 [00:13<09:06,  1.44it/s][A

	Training loss :  1.379822090268135



Iteration:   3%|▎         | 21/807 [00:14<08:55,  1.47it/s][A
Iteration:   3%|▎         | 22/807 [00:14<08:48,  1.49it/s][A
Iteration:   3%|▎         | 23/807 [00:15<08:40,  1.51it/s][A
Iteration:   3%|▎         | 24/807 [00:16<09:02,  1.44it/s][A

	Training loss :  1.3836836392680805



Iteration:   3%|▎         | 25/807 [00:16<08:50,  1.47it/s][A
Iteration:   3%|▎         | 26/807 [00:17<08:43,  1.49it/s][A
Iteration:   3%|▎         | 27/807 [00:18<08:37,  1.51it/s][A
Iteration:   3%|▎         | 28/807 [00:18<09:00,  1.44it/s][A

	Training loss :  1.399184837937355



Iteration:   4%|▎         | 29/807 [00:19<08:47,  1.47it/s][A
Iteration:   4%|▎         | 30/807 [00:20<08:41,  1.49it/s][A
Iteration:   4%|▍         | 31/807 [00:20<08:35,  1.51it/s][A
Iteration:   4%|▍         | 32/807 [00:21<08:57,  1.44it/s][A

	Training loss :  1.4125563260167837



Iteration:   4%|▍         | 33/807 [00:22<08:46,  1.47it/s][A
Iteration:   4%|▍         | 34/807 [00:22<08:38,  1.49it/s][A
Iteration:   4%|▍         | 35/807 [00:23<08:32,  1.51it/s][A
Iteration:   4%|▍         | 36/807 [00:24<08:54,  1.44it/s][A

	Training loss :  1.4049747834602992



Iteration:   5%|▍         | 37/807 [00:25<08:42,  1.47it/s][A
Iteration:   5%|▍         | 38/807 [00:25<08:35,  1.49it/s][A
Iteration:   5%|▍         | 39/807 [00:26<08:29,  1.51it/s][A
Iteration:   5%|▍         | 40/807 [00:27<08:50,  1.45it/s][A

	Training loss :  1.3896342858672142



Iteration:   5%|▌         | 41/807 [00:27<08:41,  1.47it/s][A
Iteration:   5%|▌         | 42/807 [00:28<08:33,  1.49it/s][A
Iteration:   5%|▌         | 43/807 [00:29<08:28,  1.50it/s][A
Iteration:   5%|▌         | 44/807 [00:29<08:50,  1.44it/s][A

	Training loss :  1.3551710708574816



Iteration:   6%|▌         | 45/807 [00:30<08:38,  1.47it/s][A
Iteration:   6%|▌         | 46/807 [00:31<08:33,  1.48it/s][A
Iteration:   6%|▌         | 47/807 [00:31<08:25,  1.50it/s][A
Iteration:   6%|▌         | 48/807 [00:32<08:46,  1.44it/s][A

	Training loss :  1.3524086140096188



Iteration:   6%|▌         | 49/807 [00:33<08:35,  1.47it/s][A
Iteration:   6%|▌         | 50/807 [00:33<08:28,  1.49it/s][A
Iteration:   6%|▋         | 51/807 [00:34<08:22,  1.50it/s][A
Iteration:   6%|▋         | 52/807 [00:35<08:43,  1.44it/s][A

	Training loss :  1.3449684599271188



Iteration:   7%|▋         | 53/807 [00:35<08:32,  1.47it/s][A
Iteration:   7%|▋         | 54/807 [00:36<08:25,  1.49it/s][A
Iteration:   7%|▋         | 55/807 [00:37<08:19,  1.51it/s][A
Iteration:   7%|▋         | 56/807 [00:37<08:40,  1.44it/s][A

	Training loss :  1.3366932006818908



Iteration:   7%|▋         | 57/807 [00:38<08:29,  1.47it/s][A
Iteration:   7%|▋         | 58/807 [00:39<08:22,  1.49it/s][A
Iteration:   7%|▋         | 59/807 [00:39<08:17,  1.50it/s][A
Iteration:   7%|▋         | 60/807 [00:40<08:39,  1.44it/s][A

	Training loss :  1.327210729320844



Iteration:   8%|▊         | 61/807 [00:41<08:28,  1.47it/s][A
Iteration:   8%|▊         | 62/807 [00:41<08:20,  1.49it/s][A
Iteration:   8%|▊         | 63/807 [00:42<08:13,  1.51it/s][A
Iteration:   8%|▊         | 64/807 [00:43<08:35,  1.44it/s][A

	Training loss :  1.329697129316628



Iteration:   8%|▊         | 65/807 [00:44<08:25,  1.47it/s][A
Iteration:   8%|▊         | 66/807 [00:44<08:18,  1.49it/s][A
Iteration:   8%|▊         | 67/807 [00:45<08:12,  1.50it/s][A
Iteration:   8%|▊         | 68/807 [00:46<08:32,  1.44it/s][A

	Training loss :  1.31881243604071



Iteration:   9%|▊         | 69/807 [00:46<08:22,  1.47it/s][A
Iteration:   9%|▊         | 70/807 [00:47<08:15,  1.49it/s][A
Iteration:   9%|▉         | 71/807 [00:48<08:10,  1.50it/s][A
Iteration:   9%|▉         | 72/807 [00:48<08:29,  1.44it/s][A

	Training loss :  1.307939608891805



Iteration:   9%|▉         | 73/807 [00:49<08:19,  1.47it/s][A
Iteration:   9%|▉         | 74/807 [00:50<08:12,  1.49it/s][A
Iteration:   9%|▉         | 75/807 [00:50<08:07,  1.50it/s][A
Iteration:   9%|▉         | 76/807 [00:51<08:28,  1.44it/s][A

	Training loss :  1.3187855866394544



Iteration:  10%|▉         | 77/807 [00:52<08:18,  1.47it/s][A
Iteration:  10%|▉         | 78/807 [00:52<08:10,  1.49it/s][A
Iteration:  10%|▉         | 79/807 [00:53<08:04,  1.50it/s][A
Iteration:  10%|▉         | 80/807 [00:54<08:25,  1.44it/s][A

	Training loss :  1.3091867364943028



Iteration:  10%|█         | 81/807 [00:54<08:15,  1.47it/s][A
Iteration:  10%|█         | 82/807 [00:55<08:08,  1.48it/s][A
Iteration:  10%|█         | 83/807 [00:56<08:01,  1.50it/s][A
Iteration:  10%|█         | 84/807 [00:56<08:21,  1.44it/s][A

	Training loss :  1.3089082971924828



Iteration:  11%|█         | 85/807 [00:57<08:11,  1.47it/s][A
Iteration:  11%|█         | 86/807 [00:58<08:03,  1.49it/s][A
Iteration:  11%|█         | 87/807 [00:58<07:57,  1.51it/s][A
Iteration:  11%|█         | 88/807 [00:59<08:17,  1.44it/s][A

	Training loss :  1.3108083741231398



Iteration:  11%|█         | 89/807 [01:00<08:06,  1.48it/s][A
Iteration:  11%|█         | 90/807 [01:00<08:01,  1.49it/s][A
Iteration:  11%|█▏        | 91/807 [01:01<07:55,  1.51it/s][A
Iteration:  11%|█▏        | 92/807 [01:02<08:16,  1.44it/s][A

	Training loss :  1.3010121098031169



Iteration:  12%|█▏        | 93/807 [01:02<08:05,  1.47it/s][A
Iteration:  12%|█▏        | 94/807 [01:03<07:58,  1.49it/s][A
Iteration:  12%|█▏        | 95/807 [01:04<07:53,  1.51it/s][A
Iteration:  12%|█▏        | 96/807 [01:05<08:13,  1.44it/s][A

	Training loss :  1.2992429938167334



Iteration:  12%|█▏        | 97/807 [01:05<08:02,  1.47it/s][A
Iteration:  12%|█▏        | 98/807 [01:06<07:56,  1.49it/s][A
Iteration:  12%|█▏        | 99/807 [01:06<07:51,  1.50it/s][A
Iteration:  12%|█▏        | 100/807 [01:07<08:13,  1.43it/s][A

	Training loss :  1.295921276807785



Iteration:  13%|█▎        | 101/807 [01:08<08:01,  1.47it/s][A
Iteration:  13%|█▎        | 102/807 [01:09<07:54,  1.48it/s][A
Iteration:  13%|█▎        | 103/807 [01:09<07:48,  1.50it/s][A
Iteration:  13%|█▎        | 104/807 [01:10<08:09,  1.44it/s][A

	Training loss :  1.2962170048401906



Iteration:  13%|█▎        | 105/807 [01:11<07:58,  1.47it/s][A
Iteration:  13%|█▎        | 106/807 [01:11<07:51,  1.49it/s][A
Iteration:  13%|█▎        | 107/807 [01:12<07:46,  1.50it/s][A
Iteration:  13%|█▎        | 108/807 [01:13<08:06,  1.44it/s][A

	Training loss :  1.30764142120326



Iteration:  14%|█▎        | 109/807 [01:13<07:55,  1.47it/s][A
Iteration:  14%|█▎        | 110/807 [01:14<07:48,  1.49it/s][A
Iteration:  14%|█▍        | 111/807 [01:15<07:43,  1.50it/s][A
Iteration:  14%|█▍        | 112/807 [01:15<08:02,  1.44it/s][A

	Training loss :  1.318561889231205



Iteration:  14%|█▍        | 113/807 [01:16<07:52,  1.47it/s][A
Iteration:  14%|█▍        | 114/807 [01:17<07:45,  1.49it/s][A
Iteration:  14%|█▍        | 115/807 [01:17<07:41,  1.50it/s][A
Iteration:  14%|█▍        | 116/807 [01:18<08:02,  1.43it/s][A

	Training loss :  1.3101489687788075



Iteration:  14%|█▍        | 117/807 [01:19<07:51,  1.46it/s][A
Iteration:  15%|█▍        | 118/807 [01:19<07:44,  1.48it/s][A
Iteration:  15%|█▍        | 119/807 [01:20<07:37,  1.50it/s][A
Iteration:  15%|█▍        | 120/807 [01:21<07:58,  1.44it/s][A

	Training loss :  1.324732001622518



Iteration:  15%|█▍        | 121/807 [01:21<07:47,  1.47it/s][A
Iteration:  15%|█▌        | 122/807 [01:22<07:40,  1.49it/s][A
Iteration:  15%|█▌        | 123/807 [01:23<07:35,  1.50it/s][A
Iteration:  15%|█▌        | 124/807 [01:24<07:54,  1.44it/s][A

	Training loss :  1.3228356136429695



Iteration:  15%|█▌        | 125/807 [01:24<07:46,  1.46it/s][A
Iteration:  16%|█▌        | 126/807 [01:25<07:39,  1.48it/s][A
Iteration:  16%|█▌        | 127/807 [01:26<07:33,  1.50it/s][A
Iteration:  16%|█▌        | 128/807 [01:26<07:51,  1.44it/s][A

	Training loss :  1.3161124694161117



Iteration:  16%|█▌        | 129/807 [01:27<07:43,  1.46it/s][A
Iteration:  16%|█▌        | 130/807 [01:28<07:35,  1.49it/s][A
Iteration:  16%|█▌        | 131/807 [01:28<07:30,  1.50it/s][A
Iteration:  16%|█▋        | 132/807 [01:29<07:49,  1.44it/s][A

	Training loss :  1.3188920693867134



Iteration:  16%|█▋        | 133/807 [01:30<07:37,  1.47it/s][A
Iteration:  17%|█▋        | 134/807 [01:30<07:32,  1.49it/s][A
Iteration:  17%|█▋        | 135/807 [01:31<07:26,  1.50it/s][A
Iteration:  17%|█▋        | 136/807 [01:32<07:45,  1.44it/s][A

	Training loss :  1.3069545341765179



Iteration:  17%|█▋        | 137/807 [01:32<07:38,  1.46it/s][A
Iteration:  17%|█▋        | 138/807 [01:33<07:31,  1.48it/s][A
Iteration:  17%|█▋        | 139/807 [01:34<07:25,  1.50it/s][A
Iteration:  17%|█▋        | 140/807 [01:34<07:44,  1.44it/s][A

	Training loss :  1.302760939512934



Iteration:  17%|█▋        | 141/807 [01:35<07:33,  1.47it/s][A
Iteration:  18%|█▊        | 142/807 [01:36<07:27,  1.48it/s][A
Iteration:  18%|█▊        | 143/807 [01:36<07:22,  1.50it/s][A
Iteration:  18%|█▊        | 144/807 [01:37<07:41,  1.44it/s][A

	Training loss :  1.2991142272949219



Iteration:  18%|█▊        | 145/807 [01:38<07:31,  1.47it/s][A
Iteration:  18%|█▊        | 146/807 [01:38<07:24,  1.49it/s][A
Iteration:  18%|█▊        | 147/807 [01:39<07:18,  1.50it/s][A
Iteration:  18%|█▊        | 148/807 [01:40<07:37,  1.44it/s][A

	Training loss :  1.29531255163051



Iteration:  18%|█▊        | 149/807 [01:41<07:28,  1.47it/s][A
Iteration:  19%|█▊        | 150/807 [01:41<07:22,  1.48it/s][A
Iteration:  19%|█▊        | 151/807 [01:42<07:17,  1.50it/s][A
Iteration:  19%|█▉        | 152/807 [01:43<07:35,  1.44it/s][A

	Training loss :  1.2893357716108624



Iteration:  19%|█▉        | 153/807 [01:43<07:25,  1.47it/s][A
Iteration:  19%|█▉        | 154/807 [01:44<07:19,  1.49it/s][A
Iteration:  19%|█▉        | 155/807 [01:45<07:13,  1.50it/s][A
Iteration:  19%|█▉        | 156/807 [01:45<07:31,  1.44it/s][A

	Training loss :  1.2933053855712597



Iteration:  19%|█▉        | 157/807 [01:46<07:24,  1.46it/s][A
Iteration:  20%|█▉        | 158/807 [01:47<07:16,  1.49it/s][A
Iteration:  20%|█▉        | 159/807 [01:47<07:11,  1.50it/s][A
Iteration:  20%|█▉        | 160/807 [01:48<07:30,  1.44it/s][A

	Training loss :  1.2986445538699627



Iteration:  20%|█▉        | 161/807 [01:49<07:20,  1.47it/s][A
Iteration:  20%|██        | 162/807 [01:49<07:15,  1.48it/s][A
Iteration:  20%|██        | 163/807 [01:50<07:10,  1.50it/s][A
Iteration:  20%|██        | 164/807 [01:51<07:28,  1.43it/s][A

	Training loss :  1.288180288745136



Iteration:  20%|██        | 165/807 [01:51<07:18,  1.46it/s][A
Iteration:  21%|██        | 166/807 [01:52<07:12,  1.48it/s][A
Iteration:  21%|██        | 167/807 [01:53<07:08,  1.50it/s][A
Iteration:  21%|██        | 168/807 [01:53<07:25,  1.43it/s][A

	Training loss :  1.287109334553991



Iteration:  21%|██        | 169/807 [01:54<07:15,  1.46it/s][A
Iteration:  21%|██        | 170/807 [01:55<07:09,  1.48it/s][A
Iteration:  21%|██        | 171/807 [01:55<07:04,  1.50it/s][A
Iteration:  21%|██▏       | 172/807 [01:56<07:20,  1.44it/s][A

	Training loss :  1.2884319403836892



Iteration:  21%|██▏       | 173/807 [01:57<07:11,  1.47it/s][A
Iteration:  22%|██▏       | 174/807 [01:57<07:05,  1.49it/s][A
Iteration:  22%|██▏       | 175/807 [01:58<07:01,  1.50it/s][A
Iteration:  22%|██▏       | 176/807 [01:59<07:19,  1.44it/s][A

	Training loss :  1.286596917970614



Iteration:  22%|██▏       | 177/807 [02:00<07:10,  1.46it/s][A
Iteration:  22%|██▏       | 178/807 [02:00<07:04,  1.48it/s][A
Iteration:  22%|██▏       | 179/807 [02:01<06:58,  1.50it/s][A
Iteration:  22%|██▏       | 180/807 [02:02<07:17,  1.43it/s][A

	Training loss :  1.2847520305050744



Iteration:  22%|██▏       | 181/807 [02:02<07:07,  1.46it/s][A
Iteration:  23%|██▎       | 182/807 [02:03<07:01,  1.48it/s][A
Iteration:  23%|██▎       | 183/807 [02:04<06:57,  1.50it/s][A
Iteration:  23%|██▎       | 184/807 [02:04<07:13,  1.44it/s][A

	Training loss :  1.285230025001194



Iteration:  23%|██▎       | 185/807 [02:05<07:04,  1.47it/s][A
Iteration:  23%|██▎       | 186/807 [02:06<06:58,  1.48it/s][A
Iteration:  23%|██▎       | 187/807 [02:06<06:53,  1.50it/s][A
Iteration:  23%|██▎       | 188/807 [02:07<07:11,  1.43it/s][A

	Training loss :  1.2859097288009969



Iteration:  23%|██▎       | 189/807 [02:08<07:01,  1.46it/s][A
Iteration:  24%|██▎       | 190/807 [02:08<06:56,  1.48it/s][A
Iteration:  24%|██▎       | 191/807 [02:09<06:51,  1.50it/s][A
Iteration:  24%|██▍       | 192/807 [02:10<07:07,  1.44it/s][A

	Training loss :  1.2874476692328851



Iteration:  24%|██▍       | 193/807 [02:10<06:58,  1.47it/s][A
Iteration:  24%|██▍       | 194/807 [02:11<06:52,  1.49it/s][A
Iteration:  24%|██▍       | 195/807 [02:12<06:48,  1.50it/s][A
Iteration:  24%|██▍       | 196/807 [02:12<07:05,  1.44it/s][A

	Training loss :  1.2931619037170798



Iteration:  24%|██▍       | 197/807 [02:13<06:56,  1.46it/s][A
Iteration:  25%|██▍       | 198/807 [02:14<06:50,  1.48it/s][A
Iteration:  25%|██▍       | 199/807 [02:14<06:46,  1.50it/s][A
Iteration:  25%|██▍       | 200/807 [02:15<07:03,  1.43it/s][A

	Training loss :  1.2938475823402404



Iteration:  25%|██▍       | 201/807 [02:16<06:54,  1.46it/s][A
Iteration:  25%|██▌       | 202/807 [02:17<06:48,  1.48it/s][A
Iteration:  25%|██▌       | 203/807 [02:17<06:43,  1.50it/s][A
Iteration:  25%|██▌       | 204/807 [02:18<07:00,  1.43it/s][A

	Training loss :  1.2880240570680768



Iteration:  25%|██▌       | 205/807 [02:19<06:52,  1.46it/s][A
Iteration:  26%|██▌       | 206/807 [02:19<06:45,  1.48it/s][A
Iteration:  26%|██▌       | 207/807 [02:20<06:40,  1.50it/s][A
Iteration:  26%|██▌       | 208/807 [02:21<06:56,  1.44it/s][A

	Training loss :  1.287132185525619



Iteration:  26%|██▌       | 209/807 [02:21<06:47,  1.47it/s][A
Iteration:  26%|██▌       | 210/807 [02:22<06:42,  1.48it/s][A
Iteration:  26%|██▌       | 211/807 [02:23<06:37,  1.50it/s][A
Iteration:  26%|██▋       | 212/807 [02:23<06:53,  1.44it/s][A

	Training loss :  1.2809563754302151



Iteration:  26%|██▋       | 213/807 [02:24<06:45,  1.47it/s][A
Iteration:  27%|██▋       | 214/807 [02:25<06:39,  1.48it/s][A
Iteration:  27%|██▋       | 215/807 [02:25<06:34,  1.50it/s][A
Iteration:  27%|██▋       | 216/807 [02:26<06:51,  1.44it/s][A

	Training loss :  1.2832343785299196



Iteration:  27%|██▋       | 217/807 [02:27<06:42,  1.47it/s][A
Iteration:  27%|██▋       | 218/807 [02:27<06:36,  1.48it/s][A
Iteration:  27%|██▋       | 219/807 [02:28<06:31,  1.50it/s][A
Iteration:  27%|██▋       | 220/807 [02:29<06:47,  1.44it/s][A

	Training loss :  1.285697939991951



Iteration:  27%|██▋       | 221/807 [02:29<06:38,  1.47it/s][A
Iteration:  28%|██▊       | 222/807 [02:30<06:33,  1.49it/s][A
Iteration:  28%|██▊       | 223/807 [02:31<06:28,  1.51it/s][A
Iteration:  28%|██▊       | 224/807 [02:32<06:43,  1.44it/s][A

	Training loss :  1.2881723985608136



Iteration:  28%|██▊       | 225/807 [02:32<06:37,  1.47it/s][A
Iteration:  28%|██▊       | 226/807 [02:33<06:30,  1.49it/s][A
Iteration:  28%|██▊       | 227/807 [02:33<06:25,  1.50it/s][A
Iteration:  28%|██▊       | 228/807 [02:34<06:41,  1.44it/s][A

	Training loss :  1.2857769061076014



Iteration:  28%|██▊       | 229/807 [02:35<06:32,  1.47it/s][A
Iteration:  29%|██▊       | 230/807 [02:36<06:26,  1.49it/s][A
Iteration:  29%|██▊       | 231/807 [02:36<06:23,  1.50it/s][A
Iteration:  29%|██▊       | 232/807 [02:37<06:40,  1.44it/s][A

	Training loss :  1.287159308020411



Iteration:  29%|██▉       | 233/807 [02:38<06:31,  1.47it/s][A
Iteration:  29%|██▉       | 234/807 [02:38<06:26,  1.48it/s][A
Iteration:  29%|██▉       | 235/807 [02:39<06:22,  1.50it/s][A
Iteration:  29%|██▉       | 236/807 [02:40<06:39,  1.43it/s][A

	Training loss :  1.2865247910810729



Iteration:  29%|██▉       | 237/807 [02:40<06:30,  1.46it/s][A
Iteration:  29%|██▉       | 238/807 [02:41<06:23,  1.48it/s][A
Iteration:  30%|██▉       | 239/807 [02:42<06:18,  1.50it/s][A
Iteration:  30%|██▉       | 240/807 [02:42<06:34,  1.44it/s][A

	Training loss :  1.2827481356759867



Iteration:  30%|██▉       | 241/807 [02:43<06:25,  1.47it/s][A
Iteration:  30%|██▉       | 242/807 [02:44<06:21,  1.48it/s][A
Iteration:  30%|███       | 243/807 [02:44<06:16,  1.50it/s][A
Iteration:  30%|███       | 244/807 [02:45<06:33,  1.43it/s][A

	Training loss :  1.2817059594099638



Iteration:  30%|███       | 245/807 [02:46<06:24,  1.46it/s][A
Iteration:  30%|███       | 246/807 [02:46<06:19,  1.48it/s][A
Iteration:  31%|███       | 247/807 [02:47<06:14,  1.50it/s][A
Iteration:  31%|███       | 248/807 [02:48<06:29,  1.43it/s][A

	Training loss :  1.2771932321690744



Iteration:  31%|███       | 249/807 [02:49<06:21,  1.46it/s][A
Iteration:  31%|███       | 250/807 [02:49<06:16,  1.48it/s][A
Iteration:  31%|███       | 251/807 [02:50<06:10,  1.50it/s][A
Iteration:  31%|███       | 252/807 [02:51<06:25,  1.44it/s][A

	Training loss :  1.276786487017359



Iteration:  31%|███▏      | 253/807 [02:51<06:18,  1.46it/s][A
Iteration:  31%|███▏      | 254/807 [02:52<06:12,  1.48it/s][A
Iteration:  32%|███▏      | 255/807 [02:53<06:08,  1.50it/s][A
Iteration:  32%|███▏      | 256/807 [02:53<06:24,  1.43it/s][A

	Training loss :  1.2728599673137069



Iteration:  32%|███▏      | 257/807 [02:54<06:15,  1.46it/s][A
Iteration:  32%|███▏      | 258/807 [02:55<06:09,  1.48it/s][A
Iteration:  32%|███▏      | 259/807 [02:55<06:04,  1.50it/s][A
Iteration:  32%|███▏      | 260/807 [02:56<06:19,  1.44it/s][A

	Training loss :  1.2735126245480317



Iteration:  32%|███▏      | 261/807 [02:57<06:12,  1.47it/s][A
Iteration:  32%|███▏      | 262/807 [02:57<06:07,  1.48it/s][A
Iteration:  33%|███▎      | 263/807 [02:58<06:02,  1.50it/s][A
Iteration:  33%|███▎      | 264/807 [02:59<06:17,  1.44it/s][A

	Training loss :  1.2710392874750225



Iteration:  33%|███▎      | 265/807 [02:59<06:10,  1.46it/s][A
Iteration:  33%|███▎      | 266/807 [03:00<06:04,  1.48it/s][A
Iteration:  33%|███▎      | 267/807 [03:01<05:59,  1.50it/s][A
Iteration:  33%|███▎      | 268/807 [03:01<06:14,  1.44it/s][A

	Training loss :  1.271642758552708



Iteration:  33%|███▎      | 269/807 [03:02<06:07,  1.47it/s][A
Iteration:  33%|███▎      | 270/807 [03:03<06:01,  1.49it/s][A
Iteration:  34%|███▎      | 271/807 [03:03<05:56,  1.50it/s][A
Iteration:  34%|███▎      | 272/807 [03:04<06:11,  1.44it/s][A

	Training loss :  1.2695029800867332



Iteration:  34%|███▍      | 273/807 [03:05<06:03,  1.47it/s][A
Iteration:  34%|███▍      | 274/807 [03:05<05:59,  1.48it/s][A
Iteration:  34%|███▍      | 275/807 [03:06<05:55,  1.50it/s][A
Iteration:  34%|███▍      | 276/807 [03:07<06:10,  1.43it/s][A

	Training loss :  1.2643089091432267



Iteration:  34%|███▍      | 277/807 [03:08<06:02,  1.46it/s][A
Iteration:  34%|███▍      | 278/807 [03:08<05:56,  1.48it/s][A
Iteration:  35%|███▍      | 279/807 [03:09<05:53,  1.50it/s][A
Iteration:  35%|███▍      | 280/807 [03:10<06:07,  1.44it/s][A

	Training loss :  1.2712461914334978



Iteration:  35%|███▍      | 281/807 [03:10<05:58,  1.47it/s][A
Iteration:  35%|███▍      | 282/807 [03:11<05:53,  1.49it/s][A
Iteration:  35%|███▌      | 283/807 [03:12<05:50,  1.50it/s][A
Iteration:  35%|███▌      | 284/807 [03:12<06:05,  1.43it/s][A

	Training loss :  1.2748640146054013



Iteration:  35%|███▌      | 285/807 [03:13<05:57,  1.46it/s][A
Iteration:  35%|███▌      | 286/807 [03:14<05:52,  1.48it/s][A
Iteration:  36%|███▌      | 287/807 [03:14<05:49,  1.49it/s][A
Iteration:  36%|███▌      | 288/807 [03:15<06:03,  1.43it/s][A

	Training loss :  1.2783266703287761



Iteration:  36%|███▌      | 289/807 [03:16<05:54,  1.46it/s][A
Iteration:  36%|███▌      | 290/807 [03:16<05:49,  1.48it/s][A
Iteration:  36%|███▌      | 291/807 [03:17<05:45,  1.49it/s][A
Iteration:  36%|███▌      | 292/807 [03:18<06:00,  1.43it/s][A

	Training loss :  1.280749058070248



Iteration:  36%|███▋      | 293/807 [03:18<05:51,  1.46it/s][A
Iteration:  36%|███▋      | 294/807 [03:19<05:46,  1.48it/s][A
Iteration:  37%|███▋      | 295/807 [03:20<05:41,  1.50it/s][A
Iteration:  37%|███▋      | 296/807 [03:21<05:55,  1.44it/s][A

	Training loss :  1.283514547992397



Iteration:  37%|███▋      | 297/807 [03:21<05:47,  1.47it/s][A
Iteration:  37%|███▋      | 298/807 [03:22<05:42,  1.49it/s][A
Iteration:  37%|███▋      | 299/807 [03:22<05:37,  1.50it/s][A
Iteration:  37%|███▋      | 300/807 [03:23<05:51,  1.44it/s][A

	Training loss :  1.2868869936466216



Iteration:  37%|███▋      | 301/807 [03:24<05:43,  1.47it/s][A
Iteration:  37%|███▋      | 302/807 [03:25<05:38,  1.49it/s][A
Iteration:  38%|███▊      | 303/807 [03:25<05:35,  1.50it/s][A
Iteration:  38%|███▊      | 304/807 [03:26<05:49,  1.44it/s][A

	Training loss :  1.2925326549693157



Iteration:  38%|███▊      | 305/807 [03:27<05:41,  1.47it/s][A
Iteration:  38%|███▊      | 306/807 [03:27<05:37,  1.48it/s][A
Iteration:  38%|███▊      | 307/807 [03:28<05:32,  1.50it/s][A
Iteration:  38%|███▊      | 308/807 [03:29<05:46,  1.44it/s][A

	Training loss :  1.296292010065797



Iteration:  38%|███▊      | 309/807 [03:29<05:40,  1.46it/s][A
Iteration:  38%|███▊      | 310/807 [03:30<05:34,  1.48it/s][A
Iteration:  39%|███▊      | 311/807 [03:31<05:30,  1.50it/s][A
Iteration:  39%|███▊      | 312/807 [03:31<05:43,  1.44it/s][A

	Training loss :  1.2955558181573184



Iteration:  39%|███▉      | 313/807 [03:32<05:36,  1.47it/s][A
Iteration:  39%|███▉      | 314/807 [03:33<05:32,  1.48it/s][A
Iteration:  39%|███▉      | 315/807 [03:33<05:27,  1.50it/s][A
Iteration:  39%|███▉      | 316/807 [03:34<05:40,  1.44it/s][A

	Training loss :  1.296055841295025



Iteration:  39%|███▉      | 317/807 [03:35<05:33,  1.47it/s][A
Iteration:  39%|███▉      | 318/807 [03:35<05:28,  1.49it/s][A
Iteration:  40%|███▉      | 319/807 [03:36<05:24,  1.50it/s][A
Iteration:  40%|███▉      | 320/807 [03:37<05:38,  1.44it/s][A

	Training loss :  1.2950250865891575



Iteration:  40%|███▉      | 321/807 [03:37<05:30,  1.47it/s][A
Iteration:  40%|███▉      | 322/807 [03:38<05:26,  1.49it/s][A
Iteration:  40%|████      | 323/807 [03:39<05:21,  1.51it/s][A
Iteration:  40%|████      | 324/807 [03:40<05:35,  1.44it/s][A

	Training loss :  1.2926316309122392



Iteration:  40%|████      | 325/807 [03:40<05:28,  1.47it/s][A
Iteration:  40%|████      | 326/807 [03:41<05:23,  1.49it/s][A
Iteration:  41%|████      | 327/807 [03:41<05:19,  1.50it/s][A
Iteration:  41%|████      | 328/807 [03:42<05:33,  1.44it/s][A

	Training loss :  1.2937780698625052



Iteration:  41%|████      | 329/807 [03:43<05:26,  1.46it/s][A
Iteration:  41%|████      | 330/807 [03:44<05:21,  1.48it/s][A
Iteration:  41%|████      | 331/807 [03:44<05:16,  1.50it/s][A
Iteration:  41%|████      | 332/807 [03:45<05:30,  1.44it/s][A

	Training loss :  1.2941569908555732



Iteration:  41%|████▏     | 333/807 [03:46<05:23,  1.47it/s][A
Iteration:  41%|████▏     | 334/807 [03:46<05:18,  1.49it/s][A
Iteration:  42%|████▏     | 335/807 [03:47<05:13,  1.50it/s][A
Iteration:  42%|████▏     | 336/807 [03:48<05:26,  1.44it/s][A

	Training loss :  1.2935667807857196



Iteration:  42%|████▏     | 337/807 [03:48<05:19,  1.47it/s][A
Iteration:  42%|████▏     | 338/807 [03:49<05:14,  1.49it/s][A
Iteration:  42%|████▏     | 339/807 [03:50<05:11,  1.50it/s][A
Iteration:  42%|████▏     | 340/807 [03:50<05:25,  1.44it/s][A

	Training loss :  1.2941507851376253



Iteration:  42%|████▏     | 341/807 [03:51<05:18,  1.46it/s][A
Iteration:  42%|████▏     | 342/807 [03:52<05:13,  1.49it/s][A
Iteration:  43%|████▎     | 343/807 [03:52<05:09,  1.50it/s][A
Iteration:  43%|████▎     | 344/807 [03:53<05:21,  1.44it/s][A

	Training loss :  1.2928640163222025



Iteration:  43%|████▎     | 345/807 [03:54<05:14,  1.47it/s][A
Iteration:  43%|████▎     | 346/807 [03:54<05:10,  1.49it/s][A
Iteration:  43%|████▎     | 347/807 [03:55<05:06,  1.50it/s][A
Iteration:  43%|████▎     | 348/807 [03:56<05:18,  1.44it/s][A

	Training loss :  1.293772379899847



Iteration:  43%|████▎     | 349/807 [03:56<05:11,  1.47it/s][A
Iteration:  43%|████▎     | 350/807 [03:57<05:06,  1.49it/s][A
Iteration:  43%|████▎     | 351/807 [03:58<05:03,  1.50it/s][A
Iteration:  44%|████▎     | 352/807 [03:59<05:16,  1.44it/s][A

	Training loss :  1.2947874570434743



Iteration:  44%|████▎     | 353/807 [03:59<05:09,  1.47it/s][A
Iteration:  44%|████▍     | 354/807 [04:00<05:04,  1.49it/s][A
Iteration:  44%|████▍     | 355/807 [04:00<05:00,  1.50it/s][A
Iteration:  44%|████▍     | 356/807 [04:01<05:12,  1.44it/s][A

	Training loss :  1.2972335447086376



Iteration:  44%|████▍     | 357/807 [04:02<05:05,  1.47it/s][A
Iteration:  44%|████▍     | 358/807 [04:03<05:01,  1.49it/s][A
Iteration:  44%|████▍     | 359/807 [04:03<04:57,  1.50it/s][A
Iteration:  45%|████▍     | 360/807 [04:04<05:09,  1.44it/s][A

	Training loss :  1.2992416987816493



Iteration:  45%|████▍     | 361/807 [04:05<05:02,  1.47it/s][A
Iteration:  45%|████▍     | 362/807 [04:05<04:58,  1.49it/s][A
Iteration:  45%|████▍     | 363/807 [04:06<04:55,  1.50it/s][A
Iteration:  45%|████▌     | 364/807 [04:07<05:06,  1.44it/s][A

	Training loss :  1.297722129540129



Iteration:  45%|████▌     | 365/807 [04:07<05:00,  1.47it/s][A
Iteration:  45%|████▌     | 366/807 [04:08<04:56,  1.49it/s][A
Iteration:  45%|████▌     | 367/807 [04:09<04:51,  1.51it/s][A
Iteration:  46%|████▌     | 368/807 [04:09<05:04,  1.44it/s][A

	Training loss :  1.301251016071309



Iteration:  46%|████▌     | 369/807 [04:10<04:58,  1.47it/s][A
Iteration:  46%|████▌     | 370/807 [04:11<04:54,  1.49it/s][A
Iteration:  46%|████▌     | 371/807 [04:11<04:50,  1.50it/s][A
Iteration:  46%|████▌     | 372/807 [04:12<05:02,  1.44it/s][A

	Training loss :  1.2991036072533617



Iteration:  46%|████▌     | 373/807 [04:13<04:55,  1.47it/s][A
Iteration:  46%|████▋     | 374/807 [04:13<04:51,  1.49it/s][A
Iteration:  46%|████▋     | 375/807 [04:14<04:47,  1.50it/s][A
Iteration:  47%|████▋     | 376/807 [04:15<04:58,  1.44it/s][A

	Training loss :  1.2971883500193029



Iteration:  47%|████▋     | 377/807 [04:15<04:52,  1.47it/s][A
Iteration:  47%|████▋     | 378/807 [04:16<04:47,  1.49it/s][A
Iteration:  47%|████▋     | 379/807 [04:17<04:44,  1.51it/s][A
Iteration:  47%|████▋     | 380/807 [04:17<04:55,  1.44it/s][A

	Training loss :  1.2952917880133579



Iteration:  47%|████▋     | 381/807 [04:18<04:49,  1.47it/s][A
Iteration:  47%|████▋     | 382/807 [04:19<04:44,  1.49it/s][A
Iteration:  47%|████▋     | 383/807 [04:19<04:41,  1.50it/s][A
Iteration:  48%|████▊     | 384/807 [04:20<04:54,  1.44it/s][A

	Training loss :  1.297637466341257



Iteration:  48%|████▊     | 385/807 [04:21<04:47,  1.47it/s][A
Iteration:  48%|████▊     | 386/807 [04:22<04:44,  1.48it/s][A
Iteration:  48%|████▊     | 387/807 [04:22<04:39,  1.50it/s][A
Iteration:  48%|████▊     | 388/807 [04:23<04:51,  1.44it/s][A

	Training loss :  1.2956429787517822



Iteration:  48%|████▊     | 389/807 [04:24<04:44,  1.47it/s][A
Iteration:  48%|████▊     | 390/807 [04:24<04:40,  1.49it/s][A
Iteration:  48%|████▊     | 391/807 [04:25<04:36,  1.51it/s][A
Iteration:  49%|████▊     | 392/807 [04:26<04:48,  1.44it/s][A

	Training loss :  1.2986570721378132



Iteration:  49%|████▊     | 393/807 [04:26<04:42,  1.47it/s][A
Iteration:  49%|████▉     | 394/807 [04:27<04:37,  1.49it/s][A
Iteration:  49%|████▉     | 395/807 [04:28<04:33,  1.50it/s][A
Iteration:  49%|████▉     | 396/807 [04:28<04:44,  1.44it/s][A

	Training loss :  1.299667829515958



Iteration:  49%|████▉     | 397/807 [04:29<04:38,  1.47it/s][A
Iteration:  49%|████▉     | 398/807 [04:30<04:34,  1.49it/s][A
Iteration:  49%|████▉     | 399/807 [04:30<04:31,  1.50it/s][A
Iteration:  50%|████▉     | 400/807 [04:31<04:43,  1.44it/s][A

	Training loss :  1.299700707346201



Iteration:  50%|████▉     | 401/807 [04:32<04:36,  1.47it/s][A
Iteration:  50%|████▉     | 402/807 [04:32<04:31,  1.49it/s][A
Iteration:  50%|████▉     | 403/807 [04:33<04:28,  1.51it/s][A
Iteration:  50%|█████     | 404/807 [04:34<04:38,  1.45it/s][A

	Training loss :  1.3024053955727284



Iteration:  50%|█████     | 405/807 [04:34<04:33,  1.47it/s][A
Iteration:  50%|█████     | 406/807 [04:35<04:29,  1.49it/s][A
Iteration:  50%|█████     | 407/807 [04:36<04:26,  1.50it/s][A
Iteration:  51%|█████     | 408/807 [04:36<04:36,  1.44it/s][A

	Training loss :  1.3013264395150483



Iteration:  51%|█████     | 409/807 [04:37<04:31,  1.47it/s][A
Iteration:  51%|█████     | 410/807 [04:38<04:27,  1.49it/s][A
Iteration:  51%|█████     | 411/807 [04:38<04:23,  1.50it/s][A
Iteration:  51%|█████     | 412/807 [04:39<04:33,  1.44it/s][A

	Training loss :  1.298086120954995



Iteration:  51%|█████     | 413/807 [04:40<04:28,  1.46it/s][A
Iteration:  51%|█████▏    | 414/807 [04:40<04:24,  1.49it/s][A
Iteration:  51%|█████▏    | 415/807 [04:41<04:20,  1.50it/s][A
Iteration:  52%|█████▏    | 416/807 [04:42<04:31,  1.44it/s][A

	Training loss :  1.297999110359412



Iteration:  52%|█████▏    | 417/807 [04:43<04:24,  1.47it/s][A
Iteration:  52%|█████▏    | 418/807 [04:43<04:21,  1.49it/s][A
Iteration:  52%|█████▏    | 419/807 [04:44<04:17,  1.51it/s][A
Iteration:  52%|█████▏    | 420/807 [04:45<04:28,  1.44it/s][A

	Training loss :  1.3001887639363607



Iteration:  52%|█████▏    | 421/807 [04:45<04:23,  1.46it/s][A
Iteration:  52%|█████▏    | 422/807 [04:46<04:18,  1.49it/s][A
Iteration:  52%|█████▏    | 423/807 [04:47<04:15,  1.50it/s][A
Iteration:  53%|█████▎    | 424/807 [04:47<04:25,  1.44it/s][A

	Training loss :  1.2999979164802804



Iteration:  53%|█████▎    | 425/807 [04:48<04:19,  1.47it/s][A
Iteration:  53%|█████▎    | 426/807 [04:49<04:15,  1.49it/s][A
Iteration:  53%|█████▎    | 427/807 [04:49<04:12,  1.51it/s][A
Iteration:  53%|█████▎    | 428/807 [04:50<04:22,  1.44it/s][A

	Training loss :  1.3026104942660466



Iteration:  53%|█████▎    | 429/807 [04:51<04:16,  1.47it/s][A
Iteration:  53%|█████▎    | 430/807 [04:51<04:12,  1.49it/s][A
Iteration:  53%|█████▎    | 431/807 [04:52<04:09,  1.51it/s][A
Iteration:  54%|█████▎    | 432/807 [04:53<04:20,  1.44it/s][A

	Training loss :  1.3016499412004594



Iteration:  54%|█████▎    | 433/807 [04:53<04:13,  1.48it/s][A
Iteration:  54%|█████▍    | 434/807 [04:54<04:10,  1.49it/s][A
Iteration:  54%|█████▍    | 435/807 [04:55<04:06,  1.51it/s][A
Iteration:  54%|█████▍    | 436/807 [04:55<04:17,  1.44it/s][A

	Training loss :  1.3006149079274694



Iteration:  54%|█████▍    | 437/807 [04:56<04:12,  1.47it/s][A
Iteration:  54%|█████▍    | 438/807 [04:57<04:08,  1.49it/s][A
Iteration:  54%|█████▍    | 439/807 [04:57<04:05,  1.50it/s][A
Iteration:  55%|█████▍    | 440/807 [04:58<04:15,  1.44it/s][A

	Training loss :  1.2999270249496806



Iteration:  55%|█████▍    | 441/807 [04:59<04:08,  1.47it/s][A
Iteration:  55%|█████▍    | 442/807 [04:59<04:05,  1.49it/s][A
Iteration:  55%|█████▍    | 443/807 [05:00<04:01,  1.50it/s][A
Iteration:  55%|█████▌    | 444/807 [05:01<04:12,  1.44it/s][A

	Training loss :  1.297108188003033



Iteration:  55%|█████▌    | 445/807 [05:02<04:05,  1.47it/s][A
Iteration:  55%|█████▌    | 446/807 [05:02<04:02,  1.49it/s][A
Iteration:  55%|█████▌    | 447/807 [05:03<03:58,  1.51it/s][A
Iteration:  56%|█████▌    | 448/807 [05:04<04:09,  1.44it/s][A

	Training loss :  1.2992560069209762



Iteration:  56%|█████▌    | 449/807 [05:04<04:04,  1.47it/s][A
Iteration:  56%|█████▌    | 450/807 [05:05<04:00,  1.49it/s][A
Iteration:  56%|█████▌    | 451/807 [05:06<03:57,  1.50it/s][A
Iteration:  56%|█████▌    | 452/807 [05:06<04:06,  1.44it/s][A

	Training loss :  1.2987627658696301



Iteration:  56%|█████▌    | 453/807 [05:07<04:01,  1.46it/s][A
Iteration:  56%|█████▋    | 454/807 [05:08<03:57,  1.48it/s][A
Iteration:  56%|█████▋    | 455/807 [05:08<03:54,  1.50it/s][A
Iteration:  57%|█████▋    | 456/807 [05:09<04:03,  1.44it/s][A

	Training loss :  1.2981121563597728



Iteration:  57%|█████▋    | 457/807 [05:10<03:59,  1.46it/s][A
Iteration:  57%|█████▋    | 458/807 [05:10<03:54,  1.49it/s][A
Iteration:  57%|█████▋    | 459/807 [05:11<03:51,  1.50it/s][A
Iteration:  57%|█████▋    | 460/807 [05:12<04:01,  1.44it/s][A

	Training loss :  1.296143171320791



Iteration:  57%|█████▋    | 461/807 [05:12<03:56,  1.46it/s][A
Iteration:  57%|█████▋    | 462/807 [05:13<03:52,  1.49it/s][A
Iteration:  57%|█████▋    | 463/807 [05:14<03:49,  1.50it/s][A
Iteration:  57%|█████▋    | 464/807 [05:14<03:59,  1.43it/s][A

	Training loss :  1.2953948463345397



Iteration:  58%|█████▊    | 465/807 [05:15<03:53,  1.46it/s][A
Iteration:  58%|█████▊    | 466/807 [05:16<03:49,  1.49it/s][A
Iteration:  58%|█████▊    | 467/807 [05:16<03:46,  1.50it/s][A
Iteration:  58%|█████▊    | 468/807 [05:17<03:56,  1.44it/s][A

	Training loss :  1.2944708545493264



Iteration:  58%|█████▊    | 469/807 [05:18<03:50,  1.47it/s][A
Iteration:  58%|█████▊    | 470/807 [05:18<03:47,  1.48it/s][A
Iteration:  58%|█████▊    | 471/807 [05:19<03:43,  1.50it/s][A
Iteration:  58%|█████▊    | 472/807 [05:20<03:52,  1.44it/s][A

	Training loss :  1.2967595415095152



Iteration:  59%|█████▊    | 473/807 [05:21<03:47,  1.47it/s][A
Iteration:  59%|█████▊    | 474/807 [05:21<03:43,  1.49it/s][A
Iteration:  59%|█████▉    | 475/807 [05:22<03:40,  1.51it/s][A
Iteration:  59%|█████▉    | 476/807 [05:23<03:49,  1.44it/s][A

	Training loss :  1.2954304641535301



Iteration:  59%|█████▉    | 477/807 [05:23<03:45,  1.46it/s][A
Iteration:  59%|█████▉    | 478/807 [05:24<03:41,  1.49it/s][A
Iteration:  59%|█████▉    | 479/807 [05:25<03:38,  1.50it/s][A
Iteration:  59%|█████▉    | 480/807 [05:25<03:46,  1.44it/s][A

	Training loss :  1.2984121022125086



Iteration:  60%|█████▉    | 481/807 [05:26<03:42,  1.47it/s][A
Iteration:  60%|█████▉    | 482/807 [05:27<03:38,  1.49it/s][A
Iteration:  60%|█████▉    | 483/807 [05:27<03:35,  1.50it/s][A
Iteration:  60%|█████▉    | 484/807 [05:28<03:44,  1.44it/s][A

	Training loss :  1.297309165651148



Iteration:  60%|██████    | 485/807 [05:29<03:38,  1.47it/s][A
Iteration:  60%|██████    | 486/807 [05:29<03:36,  1.48it/s][A
Iteration:  60%|██████    | 487/807 [05:30<03:32,  1.50it/s][A
Iteration:  60%|██████    | 488/807 [05:31<03:41,  1.44it/s][A

	Training loss :  1.2964128842119311



Iteration:  61%|██████    | 489/807 [05:31<03:36,  1.47it/s][A
Iteration:  61%|██████    | 490/807 [05:32<03:34,  1.48it/s][A
Iteration:  61%|██████    | 491/807 [05:33<03:31,  1.49it/s][A
Iteration:  61%|██████    | 492/807 [05:33<03:39,  1.43it/s][A

	Training loss :  1.2954925414992542



Iteration:  61%|██████    | 493/807 [05:34<03:33,  1.47it/s][A
Iteration:  61%|██████    | 494/807 [05:35<03:31,  1.48it/s][A
Iteration:  61%|██████▏   | 495/807 [05:35<03:27,  1.50it/s][A
Iteration:  61%|██████▏   | 496/807 [05:36<03:36,  1.43it/s][A

	Training loss :  1.297160200293987



Iteration:  62%|██████▏   | 497/807 [05:37<03:31,  1.46it/s][A
Iteration:  62%|██████▏   | 498/807 [05:37<03:28,  1.48it/s][A
Iteration:  62%|██████▏   | 499/807 [05:38<03:25,  1.50it/s][A
Iteration:  62%|██████▏   | 500/807 [05:39<03:33,  1.44it/s][A

	Training loss :  1.294829437494278



Iteration:  62%|██████▏   | 501/807 [05:40<03:28,  1.46it/s][A
Iteration:  62%|██████▏   | 502/807 [05:40<03:25,  1.49it/s][A
Iteration:  62%|██████▏   | 503/807 [05:41<03:22,  1.50it/s][A
Iteration:  62%|██████▏   | 504/807 [05:42<03:30,  1.44it/s][A

	Training loss :  1.2944377346171274



Iteration:  63%|██████▎   | 505/807 [05:42<03:25,  1.47it/s][A
Iteration:  63%|██████▎   | 506/807 [05:43<03:22,  1.49it/s][A
Iteration:  63%|██████▎   | 507/807 [05:44<03:19,  1.50it/s][A
Iteration:  63%|██████▎   | 508/807 [05:44<03:27,  1.44it/s][A

	Training loss :  1.2963841104601312



Iteration:  63%|██████▎   | 509/807 [05:45<03:23,  1.47it/s][A
Iteration:  63%|██████▎   | 510/807 [05:46<03:19,  1.49it/s][A
Iteration:  63%|██████▎   | 511/807 [05:46<03:16,  1.50it/s][A
Iteration:  63%|██████▎   | 512/807 [05:47<03:24,  1.44it/s][A

	Training loss :  1.29842051374726



Iteration:  64%|██████▎   | 513/807 [05:48<03:19,  1.47it/s][A
Iteration:  64%|██████▎   | 514/807 [05:48<03:17,  1.49it/s][A
Iteration:  64%|██████▍   | 515/807 [05:49<03:14,  1.50it/s][A
Iteration:  64%|██████▍   | 516/807 [05:50<03:21,  1.44it/s][A

	Training loss :  1.2979795408341313



Iteration:  64%|██████▍   | 517/807 [05:50<03:17,  1.47it/s][A
Iteration:  64%|██████▍   | 518/807 [05:51<03:14,  1.49it/s][A
Iteration:  64%|██████▍   | 519/807 [05:52<03:11,  1.51it/s][A
Iteration:  64%|██████▍   | 520/807 [05:52<03:19,  1.44it/s][A

	Training loss :  1.3003057752664273



Iteration:  65%|██████▍   | 521/807 [05:53<03:14,  1.47it/s][A
Iteration:  65%|██████▍   | 522/807 [05:54<03:11,  1.49it/s][A
Iteration:  65%|██████▍   | 523/807 [05:54<03:09,  1.50it/s][A
Iteration:  65%|██████▍   | 524/807 [05:55<03:16,  1.44it/s][A

	Training loss :  1.2986273720064236



Iteration:  65%|██████▌   | 525/807 [05:56<03:11,  1.47it/s][A
Iteration:  65%|██████▌   | 526/807 [05:56<03:09,  1.49it/s][A
Iteration:  65%|██████▌   | 527/807 [05:57<03:06,  1.51it/s][A
Iteration:  65%|██████▌   | 528/807 [05:58<03:13,  1.44it/s][A

	Training loss :  1.2979472296481782



Iteration:  66%|██████▌   | 529/807 [05:59<03:09,  1.47it/s][A
Iteration:  66%|██████▌   | 530/807 [05:59<03:06,  1.49it/s][A
Iteration:  66%|██████▌   | 531/807 [06:00<03:03,  1.50it/s][A
Iteration:  66%|██████▌   | 532/807 [06:01<03:11,  1.44it/s][A

	Training loss :  1.2960283963527894



Iteration:  66%|██████▌   | 533/807 [06:01<03:06,  1.47it/s][A
Iteration:  66%|██████▌   | 534/807 [06:02<03:03,  1.49it/s][A
Iteration:  66%|██████▋   | 535/807 [06:03<03:01,  1.50it/s][A
Iteration:  66%|██████▋   | 536/807 [06:03<03:08,  1.44it/s][A

	Training loss :  1.29778835855758



Iteration:  67%|██████▋   | 537/807 [06:04<03:03,  1.47it/s][A
Iteration:  67%|██████▋   | 538/807 [06:05<03:00,  1.49it/s][A
Iteration:  67%|██████▋   | 539/807 [06:05<02:58,  1.50it/s][A
Iteration:  67%|██████▋   | 540/807 [06:06<03:04,  1.44it/s][A

	Training loss :  1.2981731014119253



Iteration:  67%|██████▋   | 541/807 [06:07<03:01,  1.47it/s][A
Iteration:  67%|██████▋   | 542/807 [06:07<02:58,  1.49it/s][A
Iteration:  67%|██████▋   | 543/807 [06:08<02:56,  1.50it/s][A
Iteration:  67%|██████▋   | 544/807 [06:09<03:02,  1.44it/s][A

	Training loss :  1.2978483468075008



Iteration:  68%|██████▊   | 545/807 [06:09<02:58,  1.47it/s][A
Iteration:  68%|██████▊   | 546/807 [06:10<02:55,  1.49it/s][A
Iteration:  68%|██████▊   | 547/807 [06:11<02:53,  1.50it/s][A
Iteration:  68%|██████▊   | 548/807 [06:11<03:00,  1.43it/s][A

	Training loss :  1.2985589727215523



Iteration:  68%|██████▊   | 549/807 [06:12<02:56,  1.47it/s][A
Iteration:  68%|██████▊   | 550/807 [06:13<02:52,  1.49it/s][A
Iteration:  68%|██████▊   | 551/807 [06:13<02:50,  1.50it/s][A
Iteration:  68%|██████▊   | 552/807 [06:14<02:57,  1.44it/s][A

	Training loss :  1.296534919436427



Iteration:  69%|██████▊   | 553/807 [06:15<02:52,  1.47it/s][A
Iteration:  69%|██████▊   | 554/807 [06:15<02:50,  1.48it/s][A
Iteration:  69%|██████▉   | 555/807 [06:16<02:47,  1.50it/s][A
Iteration:  69%|██████▉   | 556/807 [06:17<02:54,  1.44it/s][A

	Training loss :  1.2973557794265609



Iteration:  69%|██████▉   | 557/807 [06:18<02:50,  1.47it/s][A
Iteration:  69%|██████▉   | 558/807 [06:18<02:47,  1.49it/s][A
Iteration:  69%|██████▉   | 559/807 [06:19<02:45,  1.50it/s][A
Iteration:  69%|██████▉   | 560/807 [06:20<02:51,  1.44it/s][A

	Training loss :  1.2990356822099005



Iteration:  70%|██████▉   | 561/807 [06:20<02:47,  1.47it/s][A
Iteration:  70%|██████▉   | 562/807 [06:21<02:44,  1.49it/s][A
Iteration:  70%|██████▉   | 563/807 [06:22<02:42,  1.51it/s][A
Iteration:  70%|██████▉   | 564/807 [06:22<02:48,  1.44it/s][A

	Training loss :  1.2983483475573518



Iteration:  70%|███████   | 565/807 [06:23<02:44,  1.47it/s][A
Iteration:  70%|███████   | 566/807 [06:24<02:42,  1.48it/s][A
Iteration:  70%|███████   | 567/807 [06:24<02:40,  1.50it/s][A
Iteration:  70%|███████   | 568/807 [06:25<02:46,  1.44it/s][A

	Training loss :  1.2977846609035009



Iteration:  71%|███████   | 569/807 [06:26<02:41,  1.47it/s][A
Iteration:  71%|███████   | 570/807 [06:26<02:39,  1.49it/s][A
Iteration:  71%|███████   | 571/807 [06:27<02:36,  1.51it/s][A
Iteration:  71%|███████   | 572/807 [06:28<02:43,  1.44it/s][A

	Training loss :  1.3017596106429201



Iteration:  71%|███████   | 573/807 [06:28<02:38,  1.48it/s][A
Iteration:  71%|███████   | 574/807 [06:29<02:36,  1.49it/s][A
Iteration:  71%|███████▏  | 575/807 [06:30<02:34,  1.50it/s][A
Iteration:  71%|███████▏  | 576/807 [06:30<02:40,  1.44it/s][A

	Training loss :  1.30220093143483



Iteration:  71%|███████▏  | 577/807 [06:31<02:37,  1.46it/s][A
Iteration:  72%|███████▏  | 578/807 [06:32<02:34,  1.48it/s][A
Iteration:  72%|███████▏  | 579/807 [06:32<02:32,  1.50it/s][A
Iteration:  72%|███████▏  | 580/807 [06:33<02:38,  1.43it/s][A

	Training loss :  1.3025569620831259



Iteration:  72%|███████▏  | 581/807 [06:34<02:34,  1.46it/s][A
Iteration:  72%|███████▏  | 582/807 [06:34<02:31,  1.48it/s][A
Iteration:  72%|███████▏  | 583/807 [06:35<02:29,  1.50it/s][A
Iteration:  72%|███████▏  | 584/807 [06:36<02:35,  1.44it/s][A

	Training loss :  1.302002865668029



Iteration:  72%|███████▏  | 585/807 [06:37<02:31,  1.46it/s][A
Iteration:  73%|███████▎  | 586/807 [06:37<02:29,  1.48it/s][A
Iteration:  73%|███████▎  | 587/807 [06:38<02:26,  1.50it/s][A
Iteration:  73%|███████▎  | 588/807 [06:39<02:32,  1.44it/s][A

	Training loss :  1.2998678422704035



Iteration:  73%|███████▎  | 589/807 [06:39<02:29,  1.46it/s][A
Iteration:  73%|███████▎  | 590/807 [06:40<02:26,  1.48it/s][A
Iteration:  73%|███████▎  | 591/807 [06:41<02:24,  1.50it/s][A
Iteration:  73%|███████▎  | 592/807 [06:41<02:29,  1.43it/s][A

	Training loss :  1.2984696725333058



Iteration:  73%|███████▎  | 593/807 [06:42<02:26,  1.46it/s][A
Iteration:  74%|███████▎  | 594/807 [06:43<02:23,  1.48it/s][A
Iteration:  74%|███████▎  | 595/807 [06:43<02:21,  1.50it/s][A
Iteration:  74%|███████▍  | 596/807 [06:44<02:26,  1.44it/s][A

	Training loss :  1.297079113305815



Iteration:  74%|███████▍  | 597/807 [06:45<02:23,  1.47it/s][A
Iteration:  74%|███████▍  | 598/807 [06:45<02:20,  1.49it/s][A
Iteration:  74%|███████▍  | 599/807 [06:46<02:18,  1.51it/s][A
Iteration:  74%|███████▍  | 600/807 [06:47<02:23,  1.44it/s][A

	Training loss :  1.3005430410305658



Iteration:  74%|███████▍  | 601/807 [06:47<02:20,  1.47it/s][A
Iteration:  75%|███████▍  | 602/807 [06:48<02:17,  1.49it/s][A
Iteration:  75%|███████▍  | 603/807 [06:49<02:15,  1.50it/s][A
Iteration:  75%|███████▍  | 604/807 [06:49<02:21,  1.44it/s][A

	Training loss :  1.3003386284736609



Iteration:  75%|███████▍  | 605/807 [06:50<02:17,  1.47it/s][A
Iteration:  75%|███████▌  | 606/807 [06:51<02:15,  1.48it/s][A
Iteration:  75%|███████▌  | 607/807 [06:51<02:13,  1.50it/s][A
Iteration:  75%|███████▌  | 608/807 [06:52<02:18,  1.44it/s][A

	Training loss :  1.2988351766804331



Iteration:  75%|███████▌  | 609/807 [06:53<02:15,  1.47it/s][A
Iteration:  76%|███████▌  | 610/807 [06:54<02:13,  1.48it/s][A
Iteration:  76%|███████▌  | 611/807 [06:54<02:11,  1.49it/s][A
Iteration:  76%|███████▌  | 612/807 [06:55<02:15,  1.43it/s][A

	Training loss :  1.2977500547186223



Iteration:  76%|███████▌  | 613/807 [06:56<02:12,  1.46it/s][A
Iteration:  76%|███████▌  | 614/807 [06:56<02:10,  1.48it/s][A
Iteration:  76%|███████▌  | 615/807 [06:57<02:08,  1.50it/s][A
Iteration:  76%|███████▋  | 616/807 [06:58<02:12,  1.44it/s][A

	Training loss :  1.299682885698684



Iteration:  76%|███████▋  | 617/807 [06:58<02:09,  1.47it/s][A
Iteration:  77%|███████▋  | 618/807 [06:59<02:07,  1.49it/s][A
Iteration:  77%|███████▋  | 619/807 [07:00<02:04,  1.50it/s][A
Iteration:  77%|███████▋  | 620/807 [07:00<02:09,  1.44it/s][A

	Training loss :  1.3001870360105268



Iteration:  77%|███████▋  | 621/807 [07:01<02:07,  1.46it/s][A
Iteration:  77%|███████▋  | 622/807 [07:02<02:04,  1.49it/s][A
Iteration:  77%|███████▋  | 623/807 [07:02<02:02,  1.50it/s][A
Iteration:  77%|███████▋  | 624/807 [07:03<02:07,  1.44it/s][A

	Training loss :  1.299029556222451



Iteration:  77%|███████▋  | 625/807 [07:04<02:04,  1.47it/s][A
Iteration:  78%|███████▊  | 626/807 [07:04<02:01,  1.49it/s][A
Iteration:  78%|███████▊  | 627/807 [07:05<01:59,  1.50it/s][A
Iteration:  78%|███████▊  | 628/807 [07:06<02:04,  1.44it/s][A

	Training loss :  1.2971796461731007



Iteration:  78%|███████▊  | 629/807 [07:06<02:01,  1.47it/s][A
Iteration:  78%|███████▊  | 630/807 [07:07<01:59,  1.48it/s][A
Iteration:  78%|███████▊  | 631/807 [07:08<01:57,  1.50it/s][A
Iteration:  78%|███████▊  | 632/807 [07:09<02:01,  1.44it/s][A

	Training loss :  1.2946489715500722



Iteration:  78%|███████▊  | 633/807 [07:09<01:58,  1.47it/s][A
Iteration:  79%|███████▊  | 634/807 [07:10<01:56,  1.49it/s][A
Iteration:  79%|███████▊  | 635/807 [07:10<01:54,  1.50it/s][A
Iteration:  79%|███████▉  | 636/807 [07:11<01:58,  1.44it/s][A

	Training loss :  1.295888955863017



Iteration:  79%|███████▉  | 637/807 [07:12<01:55,  1.47it/s][A
Iteration:  79%|███████▉  | 638/807 [07:13<01:54,  1.48it/s][A
Iteration:  79%|███████▉  | 639/807 [07:13<01:52,  1.50it/s][A
Iteration:  79%|███████▉  | 640/807 [07:14<01:56,  1.43it/s][A

	Training loss :  1.2971913545392453



Iteration:  79%|███████▉  | 641/807 [07:15<01:53,  1.47it/s][A
Iteration:  80%|███████▉  | 642/807 [07:15<01:51,  1.48it/s][A
Iteration:  80%|███████▉  | 643/807 [07:16<01:49,  1.50it/s][A
Iteration:  80%|███████▉  | 644/807 [07:17<01:53,  1.44it/s][A

	Training loss :  1.2972114622037603



Iteration:  80%|███████▉  | 645/807 [07:17<01:50,  1.46it/s][A
Iteration:  80%|████████  | 646/807 [07:18<01:48,  1.48it/s][A
Iteration:  80%|████████  | 647/807 [07:19<01:47,  1.49it/s][A
Iteration:  80%|████████  | 648/807 [07:19<01:50,  1.43it/s][A

	Training loss :  1.2975540027758221



Iteration:  80%|████████  | 649/807 [07:20<01:47,  1.46it/s][A
Iteration:  81%|████████  | 650/807 [07:21<01:45,  1.48it/s][A
Iteration:  81%|████████  | 651/807 [07:21<01:43,  1.50it/s][A
Iteration:  81%|████████  | 652/807 [07:22<01:47,  1.44it/s][A

	Training loss :  1.2993573176166031



Iteration:  81%|████████  | 653/807 [07:23<01:44,  1.47it/s][A
Iteration:  81%|████████  | 654/807 [07:23<01:42,  1.49it/s][A
Iteration:  81%|████████  | 655/807 [07:24<01:41,  1.50it/s][A
Iteration:  81%|████████▏ | 656/807 [07:25<01:44,  1.44it/s][A

	Training loss :  1.3001957674760645



Iteration:  81%|████████▏ | 657/807 [07:25<01:42,  1.47it/s][A
Iteration:  82%|████████▏ | 658/807 [07:26<01:39,  1.49it/s][A
Iteration:  82%|████████▏ | 659/807 [07:27<01:38,  1.50it/s][A
Iteration:  82%|████████▏ | 660/807 [07:28<01:41,  1.44it/s][A

	Training loss :  1.3028668359373554



Iteration:  82%|████████▏ | 661/807 [07:28<01:39,  1.46it/s][A
Iteration:  82%|████████▏ | 662/807 [07:29<01:37,  1.48it/s][A
Iteration:  82%|████████▏ | 663/807 [07:29<01:35,  1.50it/s][A
Iteration:  82%|████████▏ | 664/807 [07:30<01:39,  1.44it/s][A

	Training loss :  1.302823136100568



Iteration:  82%|████████▏ | 665/807 [07:31<01:36,  1.47it/s][A
Iteration:  83%|████████▎ | 666/807 [07:32<01:34,  1.48it/s][A
Iteration:  83%|████████▎ | 667/807 [07:32<01:33,  1.50it/s][A
Iteration:  83%|████████▎ | 668/807 [07:33<01:36,  1.44it/s][A

	Training loss :  1.3032117033611514



Iteration:  83%|████████▎ | 669/807 [07:34<01:34,  1.46it/s][A
Iteration:  83%|████████▎ | 670/807 [07:34<01:32,  1.49it/s][A
Iteration:  83%|████████▎ | 671/807 [07:35<01:30,  1.50it/s][A
Iteration:  83%|████████▎ | 672/807 [07:36<01:33,  1.44it/s][A

	Training loss :  1.3023494371168671



Iteration:  83%|████████▎ | 673/807 [07:36<01:31,  1.47it/s][A
Iteration:  84%|████████▎ | 674/807 [07:37<01:29,  1.48it/s][A
Iteration:  84%|████████▎ | 675/807 [07:38<01:27,  1.50it/s][A
Iteration:  84%|████████▍ | 676/807 [07:38<01:30,  1.44it/s][A

	Training loss :  1.3036723183633308



Iteration:  84%|████████▍ | 677/807 [07:39<01:28,  1.47it/s][A
Iteration:  84%|████████▍ | 678/807 [07:40<01:26,  1.49it/s][A
Iteration:  84%|████████▍ | 679/807 [07:40<01:25,  1.50it/s][A
Iteration:  84%|████████▍ | 680/807 [07:41<01:28,  1.43it/s][A

	Training loss :  1.3030056063743198



Iteration:  84%|████████▍ | 681/807 [07:42<01:25,  1.47it/s][A
Iteration:  85%|████████▍ | 682/807 [07:42<01:24,  1.48it/s][A
Iteration:  85%|████████▍ | 683/807 [07:43<01:22,  1.50it/s][A
Iteration:  85%|████████▍ | 684/807 [07:44<01:25,  1.44it/s][A

	Training loss :  1.3036931042957027



Iteration:  85%|████████▍ | 685/807 [07:44<01:23,  1.47it/s][A
Iteration:  85%|████████▌ | 686/807 [07:45<01:21,  1.49it/s][A
Iteration:  85%|████████▌ | 687/807 [07:46<01:19,  1.51it/s][A
Iteration:  85%|████████▌ | 688/807 [07:47<01:22,  1.44it/s][A

	Training loss :  1.3018383640189504



Iteration:  85%|████████▌ | 689/807 [07:47<01:20,  1.47it/s][A
Iteration:  86%|████████▌ | 690/807 [07:48<01:18,  1.49it/s][A
Iteration:  86%|████████▌ | 691/807 [07:48<01:17,  1.50it/s][A
Iteration:  86%|████████▌ | 692/807 [07:49<01:19,  1.44it/s][A

	Training loss :  1.3015232391095575



Iteration:  86%|████████▌ | 693/807 [07:50<01:17,  1.47it/s][A
Iteration:  86%|████████▌ | 694/807 [07:51<01:15,  1.49it/s][A
Iteration:  86%|████████▌ | 695/807 [07:51<01:14,  1.50it/s][A
Iteration:  86%|████████▌ | 696/807 [07:52<01:17,  1.44it/s][A

	Training loss :  1.3002365160433726



Iteration:  86%|████████▋ | 697/807 [07:53<01:15,  1.47it/s][A
Iteration:  86%|████████▋ | 698/807 [07:53<01:13,  1.49it/s][A
Iteration:  87%|████████▋ | 699/807 [07:54<01:11,  1.50it/s][A
Iteration:  87%|████████▋ | 700/807 [07:55<01:14,  1.44it/s][A

	Training loss :  1.3013974088430404



Iteration:  87%|████████▋ | 701/807 [07:55<01:12,  1.47it/s][A
Iteration:  87%|████████▋ | 702/807 [07:56<01:10,  1.48it/s][A
Iteration:  87%|████████▋ | 703/807 [07:57<01:09,  1.50it/s][A
Iteration:  87%|████████▋ | 704/807 [07:57<01:11,  1.43it/s][A

	Training loss :  1.3024567710235715



Iteration:  87%|████████▋ | 705/807 [07:58<01:09,  1.46it/s][A
Iteration:  87%|████████▋ | 706/807 [07:59<01:08,  1.48it/s][A
Iteration:  88%|████████▊ | 707/807 [07:59<01:06,  1.50it/s][A
Iteration:  88%|████████▊ | 708/807 [08:00<01:08,  1.44it/s][A

	Training loss :  1.3012715245539186



Iteration:  88%|████████▊ | 709/807 [08:01<01:06,  1.47it/s][A
Iteration:  88%|████████▊ | 710/807 [08:01<01:05,  1.49it/s][A
Iteration:  88%|████████▊ | 711/807 [08:02<01:03,  1.51it/s][A
Iteration:  88%|████████▊ | 712/807 [08:03<01:05,  1.44it/s][A

	Training loss :  1.3002214405811234



Iteration:  88%|████████▊ | 713/807 [08:03<01:04,  1.47it/s][A
Iteration:  88%|████████▊ | 714/807 [08:04<01:02,  1.49it/s][A
Iteration:  89%|████████▊ | 715/807 [08:05<01:01,  1.50it/s][A
Iteration:  89%|████████▊ | 716/807 [08:06<01:03,  1.44it/s][A

	Training loss :  1.2994822157994328



Iteration:  89%|████████▉ | 717/807 [08:06<01:01,  1.47it/s][A
Iteration:  89%|████████▉ | 718/807 [08:07<00:59,  1.48it/s][A
Iteration:  89%|████████▉ | 719/807 [08:08<00:58,  1.50it/s][A
Iteration:  89%|████████▉ | 720/807 [08:08<01:00,  1.44it/s][A

	Training loss :  1.2985251187450355



Iteration:  89%|████████▉ | 721/807 [08:09<00:58,  1.47it/s][A
Iteration:  89%|████████▉ | 722/807 [08:10<00:57,  1.49it/s][A
Iteration:  90%|████████▉ | 723/807 [08:10<00:55,  1.50it/s][A
Iteration:  90%|████████▉ | 724/807 [08:11<00:57,  1.44it/s][A

	Training loss :  1.296929307775932



Iteration:  90%|████████▉ | 725/807 [08:12<00:55,  1.47it/s][A
Iteration:  90%|████████▉ | 726/807 [08:12<00:54,  1.49it/s][A
Iteration:  90%|█████████ | 727/807 [08:13<00:53,  1.50it/s][A
Iteration:  90%|█████████ | 728/807 [08:14<00:55,  1.43it/s][A

	Training loss :  1.2970956995018892



Iteration:  90%|█████████ | 729/807 [08:14<00:53,  1.47it/s][A
Iteration:  90%|█████████ | 730/807 [08:15<00:51,  1.49it/s][A
Iteration:  91%|█████████ | 731/807 [08:16<00:50,  1.50it/s][A
Iteration:  91%|█████████ | 732/807 [08:16<00:52,  1.44it/s][A

	Training loss :  1.2967320696496574



Iteration:  91%|█████████ | 733/807 [08:17<00:50,  1.47it/s][A
Iteration:  91%|█████████ | 734/807 [08:18<00:49,  1.49it/s][A
Iteration:  91%|█████████ | 735/807 [08:18<00:47,  1.51it/s][A
Iteration:  91%|█████████ | 736/807 [08:19<00:49,  1.44it/s][A

	Training loss :  1.2975676026924148



Iteration:  91%|█████████▏| 737/807 [08:20<00:47,  1.47it/s][A
Iteration:  91%|█████████▏| 738/807 [08:20<00:46,  1.49it/s][A
Iteration:  92%|█████████▏| 739/807 [08:21<00:45,  1.51it/s][A
Iteration:  92%|█████████▏| 740/807 [08:22<00:46,  1.44it/s][A

	Training loss :  1.296214960112765



Iteration:  92%|█████████▏| 741/807 [08:22<00:44,  1.47it/s][A
Iteration:  92%|█████████▏| 742/807 [08:23<00:43,  1.49it/s][A
Iteration:  92%|█████████▏| 743/807 [08:24<00:42,  1.50it/s][A
Iteration:  92%|█████████▏| 744/807 [08:25<00:43,  1.44it/s][A

	Training loss :  1.2958450891397975



Iteration:  92%|█████████▏| 745/807 [08:25<00:42,  1.47it/s][A
Iteration:  92%|█████████▏| 746/807 [08:26<00:40,  1.49it/s][A
Iteration:  93%|█████████▎| 747/807 [08:26<00:39,  1.50it/s][A
Iteration:  93%|█████████▎| 748/807 [08:27<00:41,  1.44it/s][A

	Training loss :  1.2945141676355174



Iteration:  93%|█████████▎| 749/807 [08:28<00:39,  1.47it/s][A
Iteration:  93%|█████████▎| 750/807 [08:29<00:38,  1.49it/s][A
Iteration:  93%|█████████▎| 751/807 [08:29<00:37,  1.50it/s][A
Iteration:  93%|█████████▎| 752/807 [08:30<00:38,  1.44it/s][A

	Training loss :  1.2931898804975952



Iteration:  93%|█████████▎| 753/807 [08:31<00:36,  1.47it/s][A
Iteration:  93%|█████████▎| 754/807 [08:31<00:35,  1.48it/s][A
Iteration:  94%|█████████▎| 755/807 [08:32<00:34,  1.50it/s][A
Iteration:  94%|█████████▎| 756/807 [08:33<00:35,  1.44it/s][A

	Training loss :  1.292885025697095



Iteration:  94%|█████████▍| 757/807 [08:33<00:34,  1.47it/s][A
Iteration:  94%|█████████▍| 758/807 [08:34<00:33,  1.48it/s][A
Iteration:  94%|█████████▍| 759/807 [08:35<00:31,  1.50it/s][A
Iteration:  94%|█████████▍| 760/807 [08:35<00:32,  1.44it/s][A

	Training loss :  1.293486438338694



Iteration:  94%|█████████▍| 761/807 [08:36<00:31,  1.47it/s][A
Iteration:  94%|█████████▍| 762/807 [08:37<00:30,  1.49it/s][A
Iteration:  95%|█████████▍| 763/807 [08:37<00:29,  1.50it/s][A
Iteration:  95%|█████████▍| 764/807 [08:38<00:29,  1.44it/s][A

	Training loss :  1.2969358629347143



Iteration:  95%|█████████▍| 765/807 [08:39<00:28,  1.46it/s][A
Iteration:  95%|█████████▍| 766/807 [08:39<00:27,  1.49it/s][A
Iteration:  95%|█████████▌| 767/807 [08:40<00:26,  1.50it/s][A
Iteration:  95%|█████████▌| 768/807 [08:41<00:27,  1.44it/s][A

	Training loss :  1.297736757939371



Iteration:  95%|█████████▌| 769/807 [08:41<00:25,  1.47it/s][A
Iteration:  95%|█████████▌| 770/807 [08:42<00:24,  1.49it/s][A
Iteration:  96%|█████████▌| 771/807 [08:43<00:23,  1.50it/s][A
Iteration:  96%|█████████▌| 772/807 [08:44<00:24,  1.44it/s][A

	Training loss :  1.299036274186379



Iteration:  96%|█████████▌| 773/807 [08:44<00:23,  1.47it/s][A
Iteration:  96%|█████████▌| 774/807 [08:45<00:22,  1.49it/s][A
Iteration:  96%|█████████▌| 775/807 [08:45<00:21,  1.51it/s][A
Iteration:  96%|█████████▌| 776/807 [08:46<00:21,  1.45it/s][A

	Training loss :  1.2984787534115856



Iteration:  96%|█████████▋| 777/807 [08:47<00:20,  1.48it/s][A
Iteration:  96%|█████████▋| 778/807 [08:48<00:19,  1.49it/s][A
Iteration:  97%|█████████▋| 779/807 [08:48<00:18,  1.50it/s][A
Iteration:  97%|█████████▋| 780/807 [08:49<00:18,  1.44it/s][A

	Training loss :  1.2972133515354916



Iteration:  97%|█████████▋| 781/807 [08:50<00:17,  1.47it/s][A
Iteration:  97%|█████████▋| 782/807 [08:50<00:16,  1.49it/s][A
Iteration:  97%|█████████▋| 783/807 [08:51<00:15,  1.51it/s][A
Iteration:  97%|█████████▋| 784/807 [08:52<00:15,  1.44it/s][A

	Training loss :  1.2983263085919374



Iteration:  97%|█████████▋| 785/807 [08:52<00:14,  1.47it/s][A
Iteration:  97%|█████████▋| 786/807 [08:53<00:14,  1.49it/s][A
Iteration:  98%|█████████▊| 787/807 [08:54<00:13,  1.50it/s][A
Iteration:  98%|█████████▊| 788/807 [08:54<00:13,  1.44it/s][A

	Training loss :  1.2969120155055511



Iteration:  98%|█████████▊| 789/807 [08:55<00:12,  1.46it/s][A
Iteration:  98%|█████████▊| 790/807 [08:56<00:11,  1.49it/s][A
Iteration:  98%|█████████▊| 791/807 [08:56<00:10,  1.50it/s][A
Iteration:  98%|█████████▊| 792/807 [08:57<00:10,  1.44it/s][A

	Training loss :  1.2972370151693773



Iteration:  98%|█████████▊| 793/807 [08:58<00:09,  1.47it/s][A
Iteration:  98%|█████████▊| 794/807 [08:58<00:08,  1.49it/s][A
Iteration:  99%|█████████▊| 795/807 [08:59<00:07,  1.51it/s][A
Iteration:  99%|█████████▊| 796/807 [09:00<00:07,  1.44it/s][A

	Training loss :  1.2960586851490803



Iteration:  99%|█████████▉| 797/807 [09:00<00:06,  1.47it/s][A
Iteration:  99%|█████████▉| 798/807 [09:01<00:06,  1.49it/s][A
Iteration:  99%|█████████▉| 799/807 [09:02<00:05,  1.50it/s][A
Iteration:  99%|█████████▉| 800/807 [09:03<00:04,  1.44it/s][A

	Training loss :  1.2995129146799445



Iteration:  99%|█████████▉| 801/807 [09:03<00:04,  1.47it/s][A
Iteration:  99%|█████████▉| 802/807 [09:04<00:03,  1.49it/s][A
Iteration: 100%|█████████▉| 803/807 [09:04<00:02,  1.50it/s][A
Iteration: 100%|█████████▉| 804/807 [09:05<00:02,  1.44it/s][A

	Training loss :  1.2994454139426572



Iteration: 100%|█████████▉| 805/807 [09:06<00:01,  1.46it/s][A
Iteration: 100%|█████████▉| 806/807 [09:07<00:00,  1.48it/s][A
Iteration: 100%|██████████| 807/807 [09:07<00:00,  1.47it/s]
Epoch:  67%|██████▋   | 2/3 [18:08<09:02, 542.59s/it]
Iteration:   0%|          | 0/807 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/807 [00:00<08:41,  1.54it/s][A
Iteration:   0%|          | 2/807 [00:01<08:41,  1.54it/s][A
Iteration:   0%|          | 3/807 [00:01<08:40,  1.54it/s][A
Iteration:   0%|          | 4/807 [00:02<09:08,  1.46it/s][A

	Training loss :  1.937175691127777



Iteration:   1%|          | 5/807 [00:03<09:00,  1.49it/s][A
Iteration:   1%|          | 6/807 [00:04<08:54,  1.50it/s][A
Iteration:   1%|          | 7/807 [00:04<08:49,  1.51it/s][A
Iteration:   1%|          | 8/807 [00:05<09:11,  1.45it/s][A

	Training loss :  1.582140825688839



Iteration:   1%|          | 9/807 [00:06<09:00,  1.48it/s][A
Iteration:   1%|          | 10/807 [00:06<08:53,  1.49it/s][A
Iteration:   1%|▏         | 11/807 [00:07<08:49,  1.50it/s][A
Iteration:   1%|▏         | 12/807 [00:08<09:15,  1.43it/s][A

	Training loss :  1.4924138536055882



Iteration:   2%|▏         | 13/807 [00:08<09:05,  1.46it/s][A
Iteration:   2%|▏         | 14/807 [00:09<08:56,  1.48it/s][A
Iteration:   2%|▏         | 15/807 [00:10<08:49,  1.50it/s][A
Iteration:   2%|▏         | 16/807 [00:10<09:10,  1.44it/s][A

	Training loss :  1.470871690660715



Iteration:   2%|▏         | 17/807 [00:11<08:59,  1.46it/s][A
Iteration:   2%|▏         | 18/807 [00:12<08:52,  1.48it/s][A
Iteration:   2%|▏         | 19/807 [00:12<08:44,  1.50it/s][A
Iteration:   2%|▏         | 20/807 [00:13<09:06,  1.44it/s][A

	Training loss :  1.4113427072763443



Iteration:   3%|▎         | 21/807 [00:14<08:56,  1.46it/s][A
Iteration:   3%|▎         | 22/807 [00:14<08:48,  1.49it/s][A
Iteration:   3%|▎         | 23/807 [00:15<08:41,  1.50it/s][A
Iteration:   3%|▎         | 24/807 [00:16<09:02,  1.44it/s][A

	Training loss :  1.3812228664755821



Iteration:   3%|▎         | 25/807 [00:16<08:51,  1.47it/s][A
Iteration:   3%|▎         | 26/807 [00:17<08:44,  1.49it/s][A
Iteration:   3%|▎         | 27/807 [00:18<08:39,  1.50it/s][A
Iteration:   3%|▎         | 28/807 [00:19<09:01,  1.44it/s][A

	Training loss :  1.346449343221528



Iteration:   4%|▎         | 29/807 [00:19<08:50,  1.47it/s][A
Iteration:   4%|▎         | 30/807 [00:20<08:44,  1.48it/s][A
Iteration:   4%|▍         | 31/807 [00:20<08:37,  1.50it/s][A
Iteration:   4%|▍         | 32/807 [00:21<08:59,  1.44it/s][A

	Training loss :  1.350021967664361



Iteration:   4%|▍         | 33/807 [00:22<08:49,  1.46it/s][A
Iteration:   4%|▍         | 34/807 [00:23<08:41,  1.48it/s][A
Iteration:   4%|▍         | 35/807 [00:23<08:36,  1.50it/s][A
Iteration:   4%|▍         | 36/807 [00:24<08:57,  1.43it/s][A

	Training loss :  1.3507117943631277



Iteration:   5%|▍         | 37/807 [00:25<08:44,  1.47it/s][A
Iteration:   5%|▍         | 38/807 [00:25<08:38,  1.48it/s][A
Iteration:   5%|▍         | 39/807 [00:26<08:31,  1.50it/s][A
Iteration:   5%|▍         | 40/807 [00:27<08:53,  1.44it/s][A

	Training loss :  1.3401042893528938



Iteration:   5%|▌         | 41/807 [00:27<08:42,  1.47it/s][A
Iteration:   5%|▌         | 42/807 [00:28<08:35,  1.49it/s][A
Iteration:   5%|▌         | 43/807 [00:29<08:29,  1.50it/s][A
Iteration:   5%|▌         | 44/807 [00:29<08:53,  1.43it/s][A

	Training loss :  1.3653362135995517



Iteration:   6%|▌         | 45/807 [00:30<08:39,  1.47it/s][A
Iteration:   6%|▌         | 46/807 [00:31<08:33,  1.48it/s][A
Iteration:   6%|▌         | 47/807 [00:31<08:27,  1.50it/s][A
Iteration:   6%|▌         | 48/807 [00:32<08:50,  1.43it/s][A

	Training loss :  1.3660147450864315



Iteration:   6%|▌         | 49/807 [00:33<08:38,  1.46it/s][A
Iteration:   6%|▌         | 50/807 [00:33<08:31,  1.48it/s][A
Iteration:   6%|▋         | 51/807 [00:34<08:24,  1.50it/s][A
Iteration:   6%|▋         | 52/807 [00:35<08:46,  1.43it/s][A

	Training loss :  1.3475929074562514



Iteration:   7%|▋         | 53/807 [00:35<08:34,  1.47it/s][A
Iteration:   7%|▋         | 54/807 [00:36<08:27,  1.48it/s][A
Iteration:   7%|▋         | 55/807 [00:37<08:21,  1.50it/s][A
Iteration:   7%|▋         | 56/807 [00:38<08:43,  1.43it/s][A

	Training loss :  1.326062879392079



Iteration:   7%|▋         | 57/807 [00:38<08:33,  1.46it/s][A
Iteration:   7%|▋         | 58/807 [00:39<08:25,  1.48it/s][A
Iteration:   7%|▋         | 59/807 [00:40<08:18,  1.50it/s][A
Iteration:   7%|▋         | 60/807 [00:40<08:40,  1.44it/s][A

	Training loss :  1.3355980644623437



Iteration:   8%|▊         | 61/807 [00:41<08:28,  1.47it/s][A
Iteration:   8%|▊         | 62/807 [00:42<08:21,  1.48it/s][A
Iteration:   8%|▊         | 63/807 [00:42<08:16,  1.50it/s][A
Iteration:   8%|▊         | 64/807 [00:43<08:37,  1.44it/s][A

	Training loss :  1.3097493192180991



Iteration:   8%|▊         | 65/807 [00:44<08:27,  1.46it/s][A
Iteration:   8%|▊         | 66/807 [00:44<08:19,  1.48it/s][A
Iteration:   8%|▊         | 67/807 [00:45<08:14,  1.50it/s][A
Iteration:   8%|▊         | 68/807 [00:46<08:35,  1.43it/s][A

	Training loss :  1.3021893580170238



Iteration:   9%|▊         | 69/807 [00:46<08:24,  1.46it/s][A
Iteration:   9%|▊         | 70/807 [00:47<08:17,  1.48it/s][A
Iteration:   9%|▉         | 71/807 [00:48<08:12,  1.49it/s][A
Iteration:   9%|▉         | 72/807 [00:48<08:32,  1.43it/s][A

	Training loss :  1.2937708273530006



Iteration:   9%|▉         | 73/807 [00:49<08:22,  1.46it/s][A
Iteration:   9%|▉         | 74/807 [00:50<08:16,  1.48it/s][A
Iteration:   9%|▉         | 75/807 [00:50<08:09,  1.49it/s][A
Iteration:   9%|▉         | 76/807 [00:51<08:29,  1.44it/s][A

	Training loss :  1.2903218120336533



Iteration:  10%|▉         | 77/807 [00:52<08:18,  1.46it/s][A
Iteration:  10%|▉         | 78/807 [00:52<08:11,  1.48it/s][A
Iteration:  10%|▉         | 79/807 [00:53<08:04,  1.50it/s][A
Iteration:  10%|▉         | 80/807 [00:54<08:24,  1.44it/s][A

	Training loss :  1.2912701152265071



Iteration:  10%|█         | 81/807 [00:55<08:13,  1.47it/s][A
Iteration:  10%|█         | 82/807 [00:55<08:07,  1.49it/s][A
Iteration:  10%|█         | 83/807 [00:56<08:00,  1.51it/s][A
Iteration:  10%|█         | 84/807 [00:57<08:21,  1.44it/s][A

	Training loss :  1.30351561378865



Iteration:  11%|█         | 85/807 [00:57<08:11,  1.47it/s][A
Iteration:  11%|█         | 86/807 [00:58<08:03,  1.49it/s][A
Iteration:  11%|█         | 87/807 [00:59<07:58,  1.50it/s][A
Iteration:  11%|█         | 88/807 [00:59<08:18,  1.44it/s][A

	Training loss :  1.2924352471124043



Iteration:  11%|█         | 89/807 [01:00<08:09,  1.47it/s][A
Iteration:  11%|█         | 90/807 [01:01<08:02,  1.49it/s][A
Iteration:  11%|█▏        | 91/807 [01:01<07:56,  1.50it/s][A
Iteration:  11%|█▏        | 92/807 [01:02<08:15,  1.44it/s][A

	Training loss :  1.2994794256013373



Iteration:  12%|█▏        | 93/807 [01:03<08:07,  1.46it/s][A
Iteration:  12%|█▏        | 94/807 [01:03<08:01,  1.48it/s][A
Iteration:  12%|█▏        | 95/807 [01:04<07:55,  1.50it/s][A
Iteration:  12%|█▏        | 96/807 [01:05<08:14,  1.44it/s][A

	Training loss :  1.3002555376539628



Iteration:  12%|█▏        | 97/807 [01:05<08:03,  1.47it/s][A
Iteration:  12%|█▏        | 98/807 [01:06<07:56,  1.49it/s][A
Iteration:  12%|█▏        | 99/807 [01:07<07:50,  1.50it/s][A
Iteration:  12%|█▏        | 100/807 [01:07<08:11,  1.44it/s][A

	Training loss :  1.2966328924894333



Iteration:  13%|█▎        | 101/807 [01:08<08:01,  1.47it/s][A
Iteration:  13%|█▎        | 102/807 [01:09<07:55,  1.48it/s][A
Iteration:  13%|█▎        | 103/807 [01:09<07:49,  1.50it/s][A
Iteration:  13%|█▎        | 104/807 [01:10<08:09,  1.44it/s][A

	Training loss :  1.3078075526998594



Iteration:  13%|█▎        | 105/807 [01:11<08:00,  1.46it/s][A
Iteration:  13%|█▎        | 106/807 [01:11<07:53,  1.48it/s][A
Iteration:  13%|█▎        | 107/807 [01:12<07:47,  1.50it/s][A
Iteration:  13%|█▎        | 108/807 [01:13<08:06,  1.44it/s][A

	Training loss :  1.3186287587439571



Iteration:  14%|█▎        | 109/807 [01:14<07:57,  1.46it/s][A
Iteration:  14%|█▎        | 110/807 [01:14<07:50,  1.48it/s][A
Iteration:  14%|█▍        | 111/807 [01:15<07:43,  1.50it/s][A
Iteration:  14%|█▍        | 112/807 [01:16<08:03,  1.44it/s][A

	Training loss :  1.3152216954955034



Iteration:  14%|█▍        | 113/807 [01:16<07:53,  1.47it/s][A
Iteration:  14%|█▍        | 114/807 [01:17<07:48,  1.48it/s][A
Iteration:  14%|█▍        | 115/807 [01:18<07:41,  1.50it/s][A
Iteration:  14%|█▍        | 116/807 [01:18<08:02,  1.43it/s][A

	Training loss :  1.3093722015619278



Iteration:  14%|█▍        | 117/807 [01:19<07:53,  1.46it/s][A
Iteration:  15%|█▍        | 118/807 [01:20<07:46,  1.48it/s][A
Iteration:  15%|█▍        | 119/807 [01:20<07:41,  1.49it/s][A
Iteration:  15%|█▍        | 120/807 [01:21<08:00,  1.43it/s][A

	Training loss :  1.315090829630693



Iteration:  15%|█▍        | 121/807 [01:22<07:49,  1.46it/s][A
Iteration:  15%|█▌        | 122/807 [01:22<07:42,  1.48it/s][A
Iteration:  15%|█▌        | 123/807 [01:23<07:35,  1.50it/s][A
Iteration:  15%|█▌        | 124/807 [01:24<07:55,  1.44it/s][A

	Training loss :  1.3163178587152111



Iteration:  15%|█▌        | 125/807 [01:24<07:45,  1.47it/s][A
Iteration:  16%|█▌        | 126/807 [01:25<07:39,  1.48it/s][A
Iteration:  16%|█▌        | 127/807 [01:26<07:34,  1.50it/s][A
Iteration:  16%|█▌        | 128/807 [01:27<07:52,  1.44it/s][A

	Training loss :  1.3154006353579462



Iteration:  16%|█▌        | 129/807 [01:27<07:40,  1.47it/s][A
Iteration:  16%|█▌        | 130/807 [01:28<07:35,  1.49it/s][A
Iteration:  16%|█▌        | 131/807 [01:28<07:28,  1.51it/s][A
Iteration:  16%|█▋        | 132/807 [01:29<07:50,  1.43it/s][A

	Training loss :  1.3092208409851247



Iteration:  16%|█▋        | 133/807 [01:30<07:40,  1.46it/s][A
Iteration:  17%|█▋        | 134/807 [01:31<07:34,  1.48it/s][A
Iteration:  17%|█▋        | 135/807 [01:31<07:29,  1.50it/s][A
Iteration:  17%|█▋        | 136/807 [01:32<07:47,  1.44it/s][A

	Training loss :  1.311061204794575



Iteration:  17%|█▋        | 137/807 [01:33<07:37,  1.46it/s][A
Iteration:  17%|█▋        | 138/807 [01:33<07:29,  1.49it/s][A
Iteration:  17%|█▋        | 139/807 [01:34<07:24,  1.50it/s][A
Iteration:  17%|█▋        | 140/807 [01:35<07:44,  1.44it/s][A

	Training loss :  1.3052058113472802



Iteration:  17%|█▋        | 141/807 [01:35<07:33,  1.47it/s][A
Iteration:  18%|█▊        | 142/807 [01:36<07:27,  1.48it/s][A
Iteration:  18%|█▊        | 143/807 [01:37<07:22,  1.50it/s][A
Iteration:  18%|█▊        | 144/807 [01:37<07:42,  1.43it/s][A

	Training loss :  1.3012823549409707



Iteration:  18%|█▊        | 145/807 [01:38<07:31,  1.47it/s][A
Iteration:  18%|█▊        | 146/807 [01:39<07:25,  1.48it/s][A
Iteration:  18%|█▊        | 147/807 [01:39<07:19,  1.50it/s][A
Iteration:  18%|█▊        | 148/807 [01:40<07:39,  1.44it/s][A

	Training loss :  1.3034060311478537



Iteration:  18%|█▊        | 149/807 [01:41<07:28,  1.47it/s][A
Iteration:  19%|█▊        | 150/807 [01:41<07:22,  1.48it/s][A
Iteration:  19%|█▊        | 151/807 [01:42<07:16,  1.50it/s][A
Iteration:  19%|█▉        | 152/807 [01:43<07:35,  1.44it/s][A

	Training loss :  1.3068357247271036



Iteration:  19%|█▉        | 153/807 [01:43<07:26,  1.47it/s][A
Iteration:  19%|█▉        | 154/807 [01:44<07:19,  1.48it/s][A
Iteration:  19%|█▉        | 155/807 [01:45<07:13,  1.50it/s][A
Iteration:  19%|█▉        | 156/807 [01:46<07:32,  1.44it/s][A

	Training loss :  1.3029233075869389



Iteration:  19%|█▉        | 157/807 [01:46<07:22,  1.47it/s][A
Iteration:  20%|█▉        | 158/807 [01:47<07:16,  1.49it/s][A
Iteration:  20%|█▉        | 159/807 [01:47<07:10,  1.50it/s][A
Iteration:  20%|█▉        | 160/807 [01:48<07:28,  1.44it/s][A

	Training loss :  1.30740828178823



Iteration:  20%|█▉        | 161/807 [01:49<07:19,  1.47it/s][A
Iteration:  20%|██        | 162/807 [01:50<07:13,  1.49it/s][A
Iteration:  20%|██        | 163/807 [01:50<07:08,  1.50it/s][A
Iteration:  20%|██        | 164/807 [01:51<07:26,  1.44it/s][A

	Training loss :  1.3017029137146183



Iteration:  20%|██        | 165/807 [01:52<07:18,  1.46it/s][A
Iteration:  21%|██        | 166/807 [01:52<07:11,  1.48it/s][A
Iteration:  21%|██        | 167/807 [01:53<07:06,  1.50it/s][A
Iteration:  21%|██        | 168/807 [01:54<07:24,  1.44it/s][A

	Training loss :  1.3057546665271123



Iteration:  21%|██        | 169/807 [01:54<07:14,  1.47it/s][A
Iteration:  21%|██        | 170/807 [01:55<07:09,  1.48it/s][A
Iteration:  21%|██        | 171/807 [01:56<07:03,  1.50it/s][A
Iteration:  21%|██▏       | 172/807 [01:56<07:21,  1.44it/s][A

	Training loss :  1.3081920763780905



Iteration:  21%|██▏       | 173/807 [01:57<07:12,  1.47it/s][A
Iteration:  22%|██▏       | 174/807 [01:58<07:05,  1.49it/s][A
Iteration:  22%|██▏       | 175/807 [01:58<07:00,  1.50it/s][A
Iteration:  22%|██▏       | 176/807 [01:59<07:17,  1.44it/s][A

	Training loss :  1.3039629618552597



Iteration:  22%|██▏       | 177/807 [02:00<07:09,  1.47it/s][A
Iteration:  22%|██▏       | 178/807 [02:00<07:03,  1.48it/s][A
Iteration:  22%|██▏       | 179/807 [02:01<06:58,  1.50it/s][A
Iteration:  22%|██▏       | 180/807 [02:02<07:15,  1.44it/s][A

	Training loss :  1.3050311631626552



Iteration:  22%|██▏       | 181/807 [02:02<07:06,  1.47it/s][A
Iteration:  23%|██▎       | 182/807 [02:03<07:00,  1.49it/s][A
Iteration:  23%|██▎       | 183/807 [02:04<06:54,  1.51it/s][A
Iteration:  23%|██▎       | 184/807 [02:05<07:11,  1.44it/s][A

	Training loss :  1.308071367766546



Iteration:  23%|██▎       | 185/807 [02:05<07:04,  1.47it/s][A
Iteration:  23%|██▎       | 186/807 [02:06<06:57,  1.49it/s][A
Iteration:  23%|██▎       | 187/807 [02:07<06:52,  1.50it/s][A
Iteration:  23%|██▎       | 188/807 [02:07<07:11,  1.43it/s][A

	Training loss :  1.3131266375805468



Iteration:  23%|██▎       | 189/807 [02:08<07:01,  1.47it/s][A
Iteration:  24%|██▎       | 190/807 [02:09<06:55,  1.49it/s][A
Iteration:  24%|██▎       | 191/807 [02:09<06:49,  1.50it/s][A
Iteration:  24%|██▍       | 192/807 [02:10<07:06,  1.44it/s][A

	Training loss :  1.3102546253552039



Iteration:  24%|██▍       | 193/807 [02:11<06:56,  1.47it/s][A
Iteration:  24%|██▍       | 194/807 [02:11<06:50,  1.49it/s][A
Iteration:  24%|██▍       | 195/807 [02:12<06:46,  1.51it/s][A
Iteration:  24%|██▍       | 196/807 [02:13<07:02,  1.44it/s][A

	Training loss :  1.3109355851703761



Iteration:  24%|██▍       | 197/807 [02:13<06:54,  1.47it/s][A
Iteration:  25%|██▍       | 198/807 [02:14<06:48,  1.49it/s][A
Iteration:  25%|██▍       | 199/807 [02:15<06:43,  1.51it/s][A
Iteration:  25%|██▍       | 200/807 [02:15<07:01,  1.44it/s][A

	Training loss :  1.308840953707695



Iteration:  25%|██▍       | 201/807 [02:16<06:52,  1.47it/s][A
Iteration:  25%|██▌       | 202/807 [02:17<06:47,  1.49it/s][A
Iteration:  25%|██▌       | 203/807 [02:17<06:42,  1.50it/s][A
Iteration:  25%|██▌       | 204/807 [02:18<07:00,  1.43it/s][A

	Training loss :  1.3098697656509923



Iteration:  25%|██▌       | 205/807 [02:19<06:51,  1.46it/s][A
Iteration:  26%|██▌       | 206/807 [02:19<06:44,  1.48it/s][A
Iteration:  26%|██▌       | 207/807 [02:20<06:40,  1.50it/s][A
Iteration:  26%|██▌       | 208/807 [02:21<06:57,  1.44it/s][A

	Training loss :  1.311662155848283



Iteration:  26%|██▌       | 209/807 [02:21<06:48,  1.46it/s][A
Iteration:  26%|██▌       | 210/807 [02:22<06:42,  1.48it/s][A
Iteration:  26%|██▌       | 211/807 [02:23<06:37,  1.50it/s][A
Iteration:  26%|██▋       | 212/807 [02:24<06:53,  1.44it/s][A

	Training loss :  1.3066917213628877



Iteration:  26%|██▋       | 213/807 [02:24<06:43,  1.47it/s][A
Iteration:  27%|██▋       | 214/807 [02:25<06:38,  1.49it/s][A
Iteration:  27%|██▋       | 215/807 [02:26<06:35,  1.50it/s][A
Iteration:  27%|██▋       | 216/807 [02:26<06:51,  1.44it/s][A

	Training loss :  1.305217347211308



Iteration:  27%|██▋       | 217/807 [02:27<06:42,  1.47it/s][A
Iteration:  27%|██▋       | 218/807 [02:28<06:36,  1.49it/s][A
Iteration:  27%|██▋       | 219/807 [02:28<06:30,  1.50it/s][A
Iteration:  27%|██▋       | 220/807 [02:29<06:48,  1.44it/s][A

	Training loss :  1.304536336660385



Iteration:  27%|██▋       | 221/807 [02:30<06:38,  1.47it/s][A
Iteration:  28%|██▊       | 222/807 [02:30<06:33,  1.49it/s][A
Iteration:  28%|██▊       | 223/807 [02:31<06:28,  1.50it/s][A
Iteration:  28%|██▊       | 224/807 [02:32<06:43,  1.44it/s][A

	Training loss :  1.3052550389298372



Iteration:  28%|██▊       | 225/807 [02:32<06:35,  1.47it/s][A
Iteration:  28%|██▊       | 226/807 [02:33<06:30,  1.49it/s][A
Iteration:  28%|██▊       | 227/807 [02:34<06:26,  1.50it/s][A
Iteration:  28%|██▊       | 228/807 [02:34<06:41,  1.44it/s][A

	Training loss :  1.300987118430305



Iteration:  28%|██▊       | 229/807 [02:35<06:34,  1.46it/s][A
Iteration:  29%|██▊       | 230/807 [02:36<06:28,  1.48it/s][A
Iteration:  29%|██▊       | 231/807 [02:36<06:23,  1.50it/s][A
Iteration:  29%|██▊       | 232/807 [02:37<06:38,  1.44it/s][A

	Training loss :  1.3077290363866707



Iteration:  29%|██▉       | 233/807 [02:38<06:31,  1.47it/s][A
Iteration:  29%|██▉       | 234/807 [02:38<06:25,  1.49it/s][A
Iteration:  29%|██▉       | 235/807 [02:39<06:20,  1.50it/s][A
Iteration:  29%|██▉       | 236/807 [02:40<06:37,  1.44it/s][A

	Training loss :  1.3048896812281365



Iteration:  29%|██▉       | 237/807 [02:40<06:29,  1.46it/s][A
Iteration:  29%|██▉       | 238/807 [02:41<06:23,  1.48it/s][A
Iteration:  30%|██▉       | 239/807 [02:42<06:18,  1.50it/s][A
Iteration:  30%|██▉       | 240/807 [02:43<06:34,  1.44it/s][A

	Training loss :  1.3057503600915272



Iteration:  30%|██▉       | 241/807 [02:43<06:25,  1.47it/s][A
Iteration:  30%|██▉       | 242/807 [02:44<06:19,  1.49it/s][A
Iteration:  30%|███       | 243/807 [02:45<06:14,  1.50it/s][A
Iteration:  30%|███       | 244/807 [02:45<06:30,  1.44it/s][A

	Training loss :  1.3032961497541333



Iteration:  30%|███       | 245/807 [02:46<06:21,  1.47it/s][A
Iteration:  30%|███       | 246/807 [02:47<06:16,  1.49it/s][A
Iteration:  31%|███       | 247/807 [02:47<06:12,  1.50it/s][A
Iteration:  31%|███       | 248/807 [02:48<06:29,  1.43it/s][A

	Training loss :  1.3025900620606639



Iteration:  31%|███       | 249/807 [02:49<06:20,  1.46it/s][A
Iteration:  31%|███       | 250/807 [02:49<06:14,  1.49it/s][A
Iteration:  31%|███       | 251/807 [02:50<06:10,  1.50it/s][A
Iteration:  31%|███       | 252/807 [02:51<06:27,  1.43it/s][A

	Training loss :  1.3024102917739324



Iteration:  31%|███▏      | 253/807 [02:51<06:17,  1.47it/s][A
Iteration:  31%|███▏      | 254/807 [02:52<06:12,  1.48it/s][A
Iteration:  32%|███▏      | 255/807 [02:53<06:07,  1.50it/s][A
Iteration:  32%|███▏      | 256/807 [02:53<06:23,  1.44it/s][A

	Training loss :  1.30294622736983



Iteration:  32%|███▏      | 257/807 [02:54<06:15,  1.46it/s][A
Iteration:  32%|███▏      | 258/807 [02:55<06:09,  1.48it/s][A
Iteration:  32%|███▏      | 259/807 [02:55<06:04,  1.50it/s][A
Iteration:  32%|███▏      | 260/807 [02:56<06:19,  1.44it/s][A

	Training loss :  1.305425368134792



Iteration:  32%|███▏      | 261/807 [02:57<06:13,  1.46it/s][A
Iteration:  32%|███▏      | 262/807 [02:57<06:06,  1.49it/s][A
Iteration:  33%|███▎      | 263/807 [02:58<06:01,  1.50it/s][A
Iteration:  33%|███▎      | 264/807 [02:59<06:16,  1.44it/s][A

	Training loss :  1.3108918544921009



Iteration:  33%|███▎      | 265/807 [02:59<06:09,  1.47it/s][A
Iteration:  33%|███▎      | 266/807 [03:00<06:03,  1.49it/s][A
Iteration:  33%|███▎      | 267/807 [03:01<06:00,  1.50it/s][A
Iteration:  33%|███▎      | 268/807 [03:02<06:15,  1.44it/s][A

	Training loss :  1.309884388944996



Iteration:  33%|███▎      | 269/807 [03:02<06:07,  1.46it/s][A
Iteration:  33%|███▎      | 270/807 [03:03<06:00,  1.49it/s][A
Iteration:  34%|███▎      | 271/807 [03:04<05:57,  1.50it/s][A
Iteration:  34%|███▎      | 272/807 [03:04<06:11,  1.44it/s][A

	Training loss :  1.3091751529889948



Iteration:  34%|███▍      | 273/807 [03:05<06:04,  1.47it/s][A
Iteration:  34%|███▍      | 274/807 [03:06<05:59,  1.48it/s][A
Iteration:  34%|███▍      | 275/807 [03:06<05:54,  1.50it/s][A
Iteration:  34%|███▍      | 276/807 [03:07<06:08,  1.44it/s][A

	Training loss :  1.3077251664970233



Iteration:  34%|███▍      | 277/807 [03:08<06:00,  1.47it/s][A
Iteration:  34%|███▍      | 278/807 [03:08<05:54,  1.49it/s][A
Iteration:  35%|███▍      | 279/807 [03:09<05:50,  1.51it/s][A
Iteration:  35%|███▍      | 280/807 [03:10<06:04,  1.44it/s][A

	Training loss :  1.3086120275514466



Iteration:  35%|███▍      | 281/807 [03:10<05:56,  1.47it/s][A
Iteration:  35%|███▍      | 282/807 [03:11<05:52,  1.49it/s][A
Iteration:  35%|███▌      | 283/807 [03:12<05:49,  1.50it/s][A
Iteration:  35%|███▌      | 284/807 [03:12<06:04,  1.43it/s][A

	Training loss :  1.3081911945007216



Iteration:  35%|███▌      | 285/807 [03:13<05:56,  1.46it/s][A
Iteration:  35%|███▌      | 286/807 [03:14<05:50,  1.49it/s][A
Iteration:  36%|███▌      | 287/807 [03:14<05:45,  1.51it/s][A
Iteration:  36%|███▌      | 288/807 [03:15<05:59,  1.44it/s][A

	Training loss :  1.305001239395804



Iteration:  36%|███▌      | 289/807 [03:16<05:53,  1.47it/s][A
Iteration:  36%|███▌      | 290/807 [03:16<05:48,  1.49it/s][A
Iteration:  36%|███▌      | 291/807 [03:17<05:43,  1.50it/s][A
Iteration:  36%|███▌      | 292/807 [03:18<05:57,  1.44it/s][A

	Training loss :  1.3037166213744307



Iteration:  36%|███▋      | 293/807 [03:18<05:50,  1.47it/s][A
Iteration:  36%|███▋      | 294/807 [03:19<05:45,  1.49it/s][A
Iteration:  37%|███▋      | 295/807 [03:20<05:40,  1.50it/s][A
Iteration:  37%|███▋      | 296/807 [03:21<05:54,  1.44it/s][A

	Training loss :  1.3028165716174487



Iteration:  37%|███▋      | 297/807 [03:21<05:47,  1.47it/s][A
Iteration:  37%|███▋      | 298/807 [03:22<05:42,  1.49it/s][A
Iteration:  37%|███▋      | 299/807 [03:23<05:38,  1.50it/s][A
Iteration:  37%|███▋      | 300/807 [03:23<05:52,  1.44it/s][A

	Training loss :  1.3030320205291113



Iteration:  37%|███▋      | 301/807 [03:24<05:45,  1.46it/s][A
Iteration:  37%|███▋      | 302/807 [03:25<05:39,  1.49it/s][A
Iteration:  38%|███▊      | 303/807 [03:25<05:35,  1.50it/s][A
Iteration:  38%|███▊      | 304/807 [03:26<05:48,  1.44it/s][A

	Training loss :  1.3016324839309643



Iteration:  38%|███▊      | 305/807 [03:27<05:42,  1.47it/s][A
Iteration:  38%|███▊      | 306/807 [03:27<05:37,  1.49it/s][A
Iteration:  38%|███▊      | 307/807 [03:28<05:33,  1.50it/s][A
Iteration:  38%|███▊      | 308/807 [03:29<05:47,  1.44it/s][A

	Training loss :  1.3015662214972756



Iteration:  38%|███▊      | 309/807 [03:29<05:39,  1.47it/s][A
Iteration:  38%|███▊      | 310/807 [03:30<05:34,  1.49it/s][A
Iteration:  39%|███▊      | 311/807 [03:31<05:29,  1.50it/s][A
Iteration:  39%|███▊      | 312/807 [03:31<05:44,  1.44it/s][A

	Training loss :  1.2997554769882789



Iteration:  39%|███▉      | 313/807 [03:32<05:35,  1.47it/s][A
Iteration:  39%|███▉      | 314/807 [03:33<05:30,  1.49it/s][A
Iteration:  39%|███▉      | 315/807 [03:33<05:26,  1.51it/s][A
Iteration:  39%|███▉      | 316/807 [03:34<05:40,  1.44it/s][A

	Training loss :  1.2993716627736636



Iteration:  39%|███▉      | 317/807 [03:35<05:32,  1.47it/s][A
Iteration:  39%|███▉      | 318/807 [03:35<05:28,  1.49it/s][A
Iteration:  40%|███▉      | 319/807 [03:36<05:24,  1.51it/s][A
Iteration:  40%|███▉      | 320/807 [03:37<05:37,  1.44it/s][A

	Training loss :  1.30121367610991



Iteration:  40%|███▉      | 321/807 [03:37<05:31,  1.46it/s][A
Iteration:  40%|███▉      | 322/807 [03:38<05:26,  1.49it/s][A
Iteration:  40%|████      | 323/807 [03:39<05:22,  1.50it/s][A
Iteration:  40%|████      | 324/807 [03:40<05:35,  1.44it/s][A

	Training loss :  1.3052184386753742



Iteration:  40%|████      | 325/807 [03:40<05:27,  1.47it/s][A
Iteration:  40%|████      | 326/807 [03:41<05:24,  1.48it/s][A
Iteration:  41%|████      | 327/807 [03:42<05:19,  1.50it/s][A
Iteration:  41%|████      | 328/807 [03:42<05:32,  1.44it/s][A

	Training loss :  1.3076826977293665



Iteration:  41%|████      | 329/807 [03:43<05:26,  1.47it/s][A
Iteration:  41%|████      | 330/807 [03:44<05:21,  1.48it/s][A
Iteration:  41%|████      | 331/807 [03:44<05:16,  1.50it/s][A
Iteration:  41%|████      | 332/807 [03:45<05:30,  1.44it/s][A

	Training loss :  1.3120414212525608



Iteration:  41%|████▏     | 333/807 [03:46<05:23,  1.47it/s][A
Iteration:  41%|████▏     | 334/807 [03:46<05:18,  1.49it/s][A
Iteration:  42%|████▏     | 335/807 [03:47<05:14,  1.50it/s][A
Iteration:  42%|████▏     | 336/807 [03:48<05:27,  1.44it/s][A

	Training loss :  1.312175877392292



Iteration:  42%|████▏     | 337/807 [03:48<05:20,  1.47it/s][A
Iteration:  42%|████▏     | 338/807 [03:49<05:15,  1.49it/s][A
Iteration:  42%|████▏     | 339/807 [03:50<05:11,  1.50it/s][A
Iteration:  42%|████▏     | 340/807 [03:50<05:23,  1.44it/s][A

	Training loss :  1.314834673615063



Iteration:  42%|████▏     | 341/807 [03:51<05:17,  1.47it/s][A
Iteration:  42%|████▏     | 342/807 [03:52<05:12,  1.49it/s][A
Iteration:  43%|████▎     | 343/807 [03:52<05:09,  1.50it/s][A
Iteration:  43%|████▎     | 344/807 [03:53<05:21,  1.44it/s][A

	Training loss :  1.3139603631787522



Iteration:  43%|████▎     | 345/807 [03:54<05:14,  1.47it/s][A
Iteration:  43%|████▎     | 346/807 [03:54<05:10,  1.48it/s][A
Iteration:  43%|████▎     | 347/807 [03:55<05:05,  1.51it/s][A
Iteration:  43%|████▎     | 348/807 [03:56<05:18,  1.44it/s][A

	Training loss :  1.3140837644366012



Iteration:  43%|████▎     | 349/807 [03:56<05:12,  1.47it/s][A
Iteration:  43%|████▎     | 350/807 [03:57<05:07,  1.49it/s][A
Iteration:  43%|████▎     | 351/807 [03:58<05:03,  1.50it/s][A
Iteration:  44%|████▎     | 352/807 [03:59<05:15,  1.44it/s][A

	Training loss :  1.3133791871368885



Iteration:  44%|████▎     | 353/807 [03:59<05:08,  1.47it/s][A
Iteration:  44%|████▍     | 354/807 [04:00<05:04,  1.49it/s][A
Iteration:  44%|████▍     | 355/807 [04:00<05:00,  1.50it/s][A
Iteration:  44%|████▍     | 356/807 [04:01<05:13,  1.44it/s][A

	Training loss :  1.3134655108612574



Iteration:  44%|████▍     | 357/807 [04:02<05:06,  1.47it/s][A
Iteration:  44%|████▍     | 358/807 [04:03<05:02,  1.48it/s][A
Iteration:  44%|████▍     | 359/807 [04:03<04:58,  1.50it/s][A
Iteration:  45%|████▍     | 360/807 [04:04<05:11,  1.44it/s][A

	Training loss :  1.3182924542162153



Iteration:  45%|████▍     | 361/807 [04:05<05:04,  1.46it/s][A
Iteration:  45%|████▍     | 362/807 [04:05<04:59,  1.48it/s][A
Iteration:  45%|████▍     | 363/807 [04:06<04:55,  1.50it/s][A
Iteration:  45%|████▌     | 364/807 [04:07<05:07,  1.44it/s][A

	Training loss :  1.318926370733387



Iteration:  45%|████▌     | 365/807 [04:07<05:00,  1.47it/s][A
Iteration:  45%|████▌     | 366/807 [04:08<04:56,  1.49it/s][A
Iteration:  45%|████▌     | 367/807 [04:09<04:53,  1.50it/s][A
Iteration:  46%|████▌     | 368/807 [04:09<05:05,  1.44it/s][A

	Training loss :  1.317583113260891



Iteration:  46%|████▌     | 369/807 [04:10<04:58,  1.47it/s][A
Iteration:  46%|████▌     | 370/807 [04:11<04:53,  1.49it/s][A
Iteration:  46%|████▌     | 371/807 [04:11<04:50,  1.50it/s][A
Iteration:  46%|████▌     | 372/807 [04:12<05:02,  1.44it/s][A

	Training loss :  1.3161433034686632



Iteration:  46%|████▌     | 373/807 [04:13<04:55,  1.47it/s][A
Iteration:  46%|████▋     | 374/807 [04:13<04:50,  1.49it/s][A
Iteration:  46%|████▋     | 375/807 [04:14<04:47,  1.50it/s][A
Iteration:  47%|████▋     | 376/807 [04:15<04:59,  1.44it/s][A

	Training loss :  1.3130994370960174



Iteration:  47%|████▋     | 377/807 [04:15<04:52,  1.47it/s][A
Iteration:  47%|████▋     | 378/807 [04:16<04:48,  1.49it/s][A
Iteration:  47%|████▋     | 379/807 [04:17<04:44,  1.50it/s][A
Iteration:  47%|████▋     | 380/807 [04:18<04:56,  1.44it/s][A

	Training loss :  1.31640560893636



Iteration:  47%|████▋     | 381/807 [04:18<04:49,  1.47it/s][A
Iteration:  47%|████▋     | 382/807 [04:19<04:46,  1.49it/s][A
Iteration:  47%|████▋     | 383/807 [04:20<04:41,  1.50it/s][A
Iteration:  48%|████▊     | 384/807 [04:20<04:53,  1.44it/s][A

	Training loss :  1.3145366129465401



Iteration:  48%|████▊     | 385/807 [04:21<04:48,  1.46it/s][A
Iteration:  48%|████▊     | 386/807 [04:22<04:43,  1.48it/s][A
Iteration:  48%|████▊     | 387/807 [04:22<04:39,  1.50it/s][A
Iteration:  48%|████▊     | 388/807 [04:23<04:50,  1.44it/s][A

	Training loss :  1.314452104808129



Iteration:  48%|████▊     | 389/807 [04:24<04:44,  1.47it/s][A
Iteration:  48%|████▊     | 390/807 [04:24<04:40,  1.48it/s][A
Iteration:  48%|████▊     | 391/807 [04:25<04:38,  1.50it/s][A
Iteration:  49%|████▊     | 392/807 [04:26<04:49,  1.43it/s][A

	Training loss :  1.3132324454431632



Iteration:  49%|████▊     | 393/807 [04:26<04:42,  1.46it/s][A
Iteration:  49%|████▉     | 394/807 [04:27<04:37,  1.49it/s][A
Iteration:  49%|████▉     | 395/807 [04:28<04:33,  1.51it/s][A
Iteration:  49%|████▉     | 396/807 [04:28<04:44,  1.44it/s][A

	Training loss :  1.31320368114746



Iteration:  49%|████▉     | 397/807 [04:29<04:39,  1.47it/s][A
Iteration:  49%|████▉     | 398/807 [04:30<04:35,  1.49it/s][A
Iteration:  49%|████▉     | 399/807 [04:30<04:31,  1.50it/s][A
Iteration:  50%|████▉     | 400/807 [04:31<04:42,  1.44it/s][A

	Training loss :  1.3147911466658115



Iteration:  50%|████▉     | 401/807 [04:32<04:36,  1.47it/s][A
Iteration:  50%|████▉     | 402/807 [04:32<04:32,  1.49it/s][A
Iteration:  50%|████▉     | 403/807 [04:33<04:28,  1.50it/s][A
Iteration:  50%|█████     | 404/807 [04:34<04:39,  1.44it/s][A

	Training loss :  1.312369752785947



Iteration:  50%|█████     | 405/807 [04:34<04:33,  1.47it/s][A
Iteration:  50%|█████     | 406/807 [04:35<04:28,  1.49it/s][A
Iteration:  50%|█████     | 407/807 [04:36<04:26,  1.50it/s][A
Iteration:  51%|█████     | 408/807 [04:37<04:37,  1.44it/s][A

	Training loss :  1.309563334227777



Iteration:  51%|█████     | 409/807 [04:37<04:30,  1.47it/s][A
Iteration:  51%|█████     | 410/807 [04:38<04:27,  1.49it/s][A
Iteration:  51%|█████     | 411/807 [04:39<04:23,  1.50it/s][A
Iteration:  51%|█████     | 412/807 [04:39<04:35,  1.43it/s][A

	Training loss :  1.3104292792313308



Iteration:  51%|█████     | 413/807 [04:40<04:28,  1.47it/s][A
Iteration:  51%|█████▏    | 414/807 [04:41<04:24,  1.49it/s][A
Iteration:  51%|█████▏    | 415/807 [04:41<04:21,  1.50it/s][A
Iteration:  52%|█████▏    | 416/807 [04:42<04:31,  1.44it/s][A

	Training loss :  1.3096718256576703



Iteration:  52%|█████▏    | 417/807 [04:43<04:25,  1.47it/s][A
Iteration:  52%|█████▏    | 418/807 [04:43<04:22,  1.48it/s][A
Iteration:  52%|█████▏    | 419/807 [04:44<04:19,  1.50it/s][A
Iteration:  52%|█████▏    | 420/807 [04:45<04:30,  1.43it/s][A

	Training loss :  1.3075896177973065



Iteration:  52%|█████▏    | 421/807 [04:45<04:23,  1.46it/s][A
Iteration:  52%|█████▏    | 422/807 [04:46<04:19,  1.48it/s][A
Iteration:  52%|█████▏    | 423/807 [04:47<04:15,  1.50it/s][A
Iteration:  53%|█████▎    | 424/807 [04:47<04:26,  1.44it/s][A

	Training loss :  1.3082526126560174



Iteration:  53%|█████▎    | 425/807 [04:48<04:21,  1.46it/s][A
Iteration:  53%|█████▎    | 426/807 [04:49<04:17,  1.48it/s][A
Iteration:  53%|█████▎    | 427/807 [04:49<04:13,  1.50it/s][A
Iteration:  53%|█████▎    | 428/807 [04:50<04:23,  1.44it/s][A

	Training loss :  1.3103290943898886



Iteration:  53%|█████▎    | 429/807 [04:51<04:17,  1.47it/s][A
Iteration:  53%|█████▎    | 430/807 [04:51<04:13,  1.49it/s][A
Iteration:  53%|█████▎    | 431/807 [04:52<04:09,  1.51it/s][A
Iteration:  54%|█████▎    | 432/807 [04:53<04:20,  1.44it/s][A

	Training loss :  1.3111834252874057



Iteration:  54%|█████▎    | 433/807 [04:54<04:14,  1.47it/s][A
Iteration:  54%|█████▍    | 434/807 [04:54<04:11,  1.48it/s][A
Iteration:  54%|█████▍    | 435/807 [04:55<04:07,  1.51it/s][A
Iteration:  54%|█████▍    | 436/807 [04:56<04:18,  1.44it/s][A

	Training loss :  1.3103088236183202



Iteration:  54%|█████▍    | 437/807 [04:56<04:12,  1.46it/s][A
Iteration:  54%|█████▍    | 438/807 [04:57<04:08,  1.48it/s][A
Iteration:  54%|█████▍    | 439/807 [04:58<04:04,  1.51it/s][A
Iteration:  55%|█████▍    | 440/807 [04:58<04:14,  1.44it/s][A

	Training loss :  1.3089460067451



Iteration:  55%|█████▍    | 441/807 [04:59<04:09,  1.47it/s][A
Iteration:  55%|█████▍    | 442/807 [05:00<04:04,  1.49it/s][A
Iteration:  55%|█████▍    | 443/807 [05:00<04:01,  1.51it/s][A
Iteration:  55%|█████▌    | 444/807 [05:01<04:12,  1.44it/s][A

	Training loss :  1.3127633207687386



Iteration:  55%|█████▌    | 445/807 [05:02<04:06,  1.47it/s][A
Iteration:  55%|█████▌    | 446/807 [05:02<04:03,  1.48it/s][A
Iteration:  55%|█████▌    | 447/807 [05:03<03:59,  1.50it/s][A
Iteration:  56%|█████▌    | 448/807 [05:04<04:09,  1.44it/s][A

	Training loss :  1.3114356996624597



Iteration:  56%|█████▌    | 449/807 [05:04<04:04,  1.47it/s][A
Iteration:  56%|█████▌    | 450/807 [05:05<04:00,  1.48it/s][A
Iteration:  56%|█████▌    | 451/807 [05:06<03:57,  1.50it/s][A
Iteration:  56%|█████▌    | 452/807 [05:06<04:06,  1.44it/s][A

	Training loss :  1.312574952247396



Iteration:  56%|█████▌    | 453/807 [05:07<04:01,  1.46it/s][A
Iteration:  56%|█████▋    | 454/807 [05:08<03:58,  1.48it/s][A
Iteration:  56%|█████▋    | 455/807 [05:08<03:54,  1.50it/s][A
Iteration:  57%|█████▋    | 456/807 [05:09<04:04,  1.44it/s][A

	Training loss :  1.307813332595846



Iteration:  57%|█████▋    | 457/807 [05:10<03:58,  1.47it/s][A
Iteration:  57%|█████▋    | 458/807 [05:10<03:55,  1.48it/s][A
Iteration:  57%|█████▋    | 459/807 [05:11<03:51,  1.50it/s][A
Iteration:  57%|█████▋    | 460/807 [05:12<04:00,  1.44it/s][A

	Training loss :  1.3044269711427066



Iteration:  57%|█████▋    | 461/807 [05:13<03:55,  1.47it/s][A
Iteration:  57%|█████▋    | 462/807 [05:13<03:51,  1.49it/s][A
Iteration:  57%|█████▋    | 463/807 [05:14<03:48,  1.51it/s][A
Iteration:  57%|█████▋    | 464/807 [05:15<03:57,  1.44it/s][A

	Training loss :  1.304767231039446



Iteration:  58%|█████▊    | 465/807 [05:15<03:51,  1.47it/s][A
Iteration:  58%|█████▊    | 466/807 [05:16<03:48,  1.49it/s][A
Iteration:  58%|█████▊    | 467/807 [05:17<03:46,  1.50it/s][A
Iteration:  58%|█████▊    | 468/807 [05:17<03:56,  1.44it/s][A

	Training loss :  1.307838935436856



Iteration:  58%|█████▊    | 469/807 [05:18<03:50,  1.47it/s][A
Iteration:  58%|█████▊    | 470/807 [05:19<03:46,  1.49it/s][A
Iteration:  58%|█████▊    | 471/807 [05:19<03:44,  1.50it/s][A
Iteration:  58%|█████▊    | 472/807 [05:20<03:52,  1.44it/s][A

	Training loss :  1.306618437612966



Iteration:  59%|█████▊    | 473/807 [05:21<03:47,  1.47it/s][A
Iteration:  59%|█████▊    | 474/807 [05:21<03:43,  1.49it/s][A
Iteration:  59%|█████▉    | 475/807 [05:22<03:40,  1.50it/s][A
Iteration:  59%|█████▉    | 476/807 [05:23<03:50,  1.44it/s][A

	Training loss :  1.306177832937541



Iteration:  59%|█████▉    | 477/807 [05:23<03:44,  1.47it/s][A
Iteration:  59%|█████▉    | 478/807 [05:24<03:41,  1.49it/s][A
Iteration:  59%|█████▉    | 479/807 [05:25<03:38,  1.50it/s][A
Iteration:  59%|█████▉    | 480/807 [05:25<03:46,  1.44it/s][A

	Training loss :  1.306634627468884



Iteration:  60%|█████▉    | 481/807 [05:26<03:42,  1.47it/s][A
Iteration:  60%|█████▉    | 482/807 [05:27<03:38,  1.49it/s][A
Iteration:  60%|█████▉    | 483/807 [05:27<03:35,  1.51it/s][A
Iteration:  60%|█████▉    | 484/807 [05:28<03:44,  1.44it/s][A

	Training loss :  1.3075614790162764



Iteration:  60%|██████    | 485/807 [05:29<03:39,  1.47it/s][A
Iteration:  60%|██████    | 486/807 [05:29<03:36,  1.49it/s][A
Iteration:  60%|██████    | 487/807 [05:30<03:32,  1.50it/s][A
Iteration:  60%|██████    | 488/807 [05:31<03:41,  1.44it/s][A

	Training loss :  1.3070321516057506



Iteration:  61%|██████    | 489/807 [05:32<03:36,  1.47it/s][A
Iteration:  61%|██████    | 490/807 [05:32<03:32,  1.49it/s][A
Iteration:  61%|██████    | 491/807 [05:33<03:30,  1.50it/s][A
Iteration:  61%|██████    | 492/807 [05:34<03:38,  1.44it/s][A

	Training loss :  1.3043866743280637



Iteration:  61%|██████    | 493/807 [05:34<03:33,  1.47it/s][A
Iteration:  61%|██████    | 494/807 [05:35<03:30,  1.49it/s][A
Iteration:  61%|██████▏   | 495/807 [05:36<03:27,  1.50it/s][A
Iteration:  61%|██████▏   | 496/807 [05:36<03:36,  1.44it/s][A

	Training loss :  1.3018421389523052



Iteration:  62%|██████▏   | 497/807 [05:37<03:31,  1.47it/s][A
Iteration:  62%|██████▏   | 498/807 [05:38<03:27,  1.49it/s][A
Iteration:  62%|██████▏   | 499/807 [05:38<03:25,  1.50it/s][A
Iteration:  62%|██████▏   | 500/807 [05:39<03:32,  1.44it/s][A

	Training loss :  1.3051674807667732



Iteration:  62%|██████▏   | 501/807 [05:40<03:27,  1.47it/s][A
Iteration:  62%|██████▏   | 502/807 [05:40<03:24,  1.49it/s][A
Iteration:  62%|██████▏   | 503/807 [05:41<03:22,  1.50it/s][A
Iteration:  62%|██████▏   | 504/807 [05:42<03:31,  1.43it/s][A

	Training loss :  1.304618279553122



Iteration:  63%|██████▎   | 505/807 [05:42<03:26,  1.46it/s][A
Iteration:  63%|██████▎   | 506/807 [05:43<03:22,  1.48it/s][A
Iteration:  63%|██████▎   | 507/807 [05:44<03:19,  1.50it/s][A
Iteration:  63%|██████▎   | 508/807 [05:44<03:28,  1.44it/s][A

	Training loss :  1.3049065477148754



Iteration:  63%|██████▎   | 509/807 [05:45<03:23,  1.47it/s][A
Iteration:  63%|██████▎   | 510/807 [05:46<03:20,  1.48it/s][A
Iteration:  63%|██████▎   | 511/807 [05:46<03:16,  1.50it/s][A
Iteration:  63%|██████▎   | 512/807 [05:47<03:24,  1.44it/s][A

	Training loss :  1.3035365982796066



Iteration:  64%|██████▎   | 513/807 [05:48<03:19,  1.47it/s][A
Iteration:  64%|██████▎   | 514/807 [05:48<03:16,  1.49it/s][A
Iteration:  64%|██████▍   | 515/807 [05:49<03:13,  1.51it/s][A
Iteration:  64%|██████▍   | 516/807 [05:50<03:21,  1.45it/s][A

	Training loss :  1.3033697448613109



Iteration:  64%|██████▍   | 517/807 [05:50<03:16,  1.47it/s][A
Iteration:  64%|██████▍   | 518/807 [05:51<03:13,  1.49it/s][A
Iteration:  64%|██████▍   | 519/807 [05:52<03:11,  1.50it/s][A
Iteration:  64%|██████▍   | 520/807 [05:53<03:19,  1.44it/s][A

	Training loss :  1.3007405285078746



Iteration:  65%|██████▍   | 521/807 [05:53<03:14,  1.47it/s][A
Iteration:  65%|██████▍   | 522/807 [05:54<03:11,  1.49it/s][A
Iteration:  65%|██████▍   | 523/807 [05:55<03:08,  1.51it/s][A
Iteration:  65%|██████▍   | 524/807 [05:55<03:16,  1.44it/s][A

	Training loss :  1.3008070923103632



Iteration:  65%|██████▌   | 525/807 [05:56<03:11,  1.47it/s][A
Iteration:  65%|██████▌   | 526/807 [05:57<03:08,  1.49it/s][A
Iteration:  65%|██████▌   | 527/807 [05:57<03:05,  1.51it/s][A
Iteration:  65%|██████▌   | 528/807 [05:58<03:13,  1.45it/s][A

	Training loss :  1.3004889549743948



Iteration:  66%|██████▌   | 529/807 [05:59<03:08,  1.47it/s][A
Iteration:  66%|██████▌   | 530/807 [05:59<03:05,  1.49it/s][A
Iteration:  66%|██████▌   | 531/807 [06:00<03:03,  1.51it/s][A
Iteration:  66%|██████▌   | 532/807 [06:01<03:10,  1.44it/s][A

	Training loss :  1.2987195140773193



Iteration:  66%|██████▌   | 533/807 [06:01<03:05,  1.48it/s][A
Iteration:  66%|██████▌   | 534/807 [06:02<03:03,  1.49it/s][A
Iteration:  66%|██████▋   | 535/807 [06:03<03:00,  1.51it/s][A
Iteration:  66%|██████▋   | 536/807 [06:03<03:08,  1.44it/s][A

	Training loss :  1.2992437870533609



Iteration:  67%|██████▋   | 537/807 [06:04<03:03,  1.47it/s][A
Iteration:  67%|██████▋   | 538/807 [06:05<03:01,  1.48it/s][A
Iteration:  67%|██████▋   | 539/807 [06:05<02:58,  1.50it/s][A
Iteration:  67%|██████▋   | 540/807 [06:06<03:05,  1.44it/s][A

	Training loss :  1.3001906460633985



Iteration:  67%|██████▋   | 541/807 [06:07<03:01,  1.47it/s][A
Iteration:  67%|██████▋   | 542/807 [06:07<02:57,  1.49it/s][A
Iteration:  67%|██████▋   | 543/807 [06:08<02:55,  1.50it/s][A
Iteration:  67%|██████▋   | 544/807 [06:09<03:02,  1.44it/s][A

	Training loss :  1.3007885092211997



Iteration:  68%|██████▊   | 545/807 [06:09<02:58,  1.47it/s][A
Iteration:  68%|██████▊   | 546/807 [06:10<02:55,  1.49it/s][A
Iteration:  68%|██████▊   | 547/807 [06:11<02:52,  1.50it/s][A
Iteration:  68%|██████▊   | 548/807 [06:12<02:59,  1.44it/s][A

	Training loss :  1.3015346348176908



Iteration:  68%|██████▊   | 549/807 [06:12<02:55,  1.47it/s][A
Iteration:  68%|██████▊   | 550/807 [06:13<02:52,  1.49it/s][A
Iteration:  68%|██████▊   | 551/807 [06:13<02:50,  1.50it/s][A
Iteration:  68%|██████▊   | 552/807 [06:14<02:56,  1.44it/s][A

	Training loss :  1.3039709741546623



Iteration:  69%|██████▊   | 553/807 [06:15<02:51,  1.48it/s][A
Iteration:  69%|██████▊   | 554/807 [06:16<02:50,  1.49it/s][A
Iteration:  69%|██████▉   | 555/807 [06:16<02:47,  1.50it/s][A
Iteration:  69%|██████▉   | 556/807 [06:17<02:54,  1.44it/s][A

	Training loss :  1.3026840216714701



Iteration:  69%|██████▉   | 557/807 [06:18<02:50,  1.47it/s][A
Iteration:  69%|██████▉   | 558/807 [06:18<02:47,  1.48it/s][A
Iteration:  69%|██████▉   | 559/807 [06:19<02:45,  1.50it/s][A
Iteration:  69%|██████▉   | 560/807 [06:20<02:51,  1.44it/s][A

	Training loss :  1.3013200503907034



Iteration:  70%|██████▉   | 561/807 [06:20<02:47,  1.47it/s][A
Iteration:  70%|██████▉   | 562/807 [06:21<02:44,  1.49it/s][A
Iteration:  70%|██████▉   | 563/807 [06:22<02:42,  1.50it/s][A
Iteration:  70%|██████▉   | 564/807 [06:22<02:49,  1.43it/s][A

	Training loss :  1.3016212568321126



Iteration:  70%|███████   | 565/807 [06:23<02:45,  1.47it/s][A
Iteration:  70%|███████   | 566/807 [06:24<02:42,  1.48it/s][A
Iteration:  70%|███████   | 567/807 [06:24<02:40,  1.50it/s][A
Iteration:  70%|███████   | 568/807 [06:25<02:47,  1.43it/s][A

	Training loss :  1.3026015903853194



Iteration:  71%|███████   | 569/807 [06:26<02:43,  1.46it/s][A
Iteration:  71%|███████   | 570/807 [06:26<02:40,  1.48it/s][A
Iteration:  71%|███████   | 571/807 [06:27<02:37,  1.50it/s][A
Iteration:  71%|███████   | 572/807 [06:28<02:43,  1.44it/s][A

	Training loss :  1.3020096234195715



Iteration:  71%|███████   | 573/807 [06:28<02:39,  1.47it/s][A
Iteration:  71%|███████   | 574/807 [06:29<02:37,  1.48it/s][A
Iteration:  71%|███████▏  | 575/807 [06:30<02:34,  1.50it/s][A
Iteration:  71%|███████▏  | 576/807 [06:31<02:40,  1.44it/s][A

	Training loss :  1.3004465046752658



Iteration:  71%|███████▏  | 577/807 [06:31<02:36,  1.47it/s][A
Iteration:  72%|███████▏  | 578/807 [06:32<02:34,  1.49it/s][A
Iteration:  72%|███████▏  | 579/807 [06:32<02:31,  1.50it/s][A
Iteration:  72%|███████▏  | 580/807 [06:33<02:37,  1.44it/s][A

	Training loss :  1.301367328855498



Iteration:  72%|███████▏  | 581/807 [06:34<02:34,  1.46it/s][A
Iteration:  72%|███████▏  | 582/807 [06:35<02:31,  1.48it/s][A
Iteration:  72%|███████▏  | 583/807 [06:35<02:29,  1.49it/s][A
Iteration:  72%|███████▏  | 584/807 [06:36<02:35,  1.43it/s][A

	Training loss :  1.3007739674657175



Iteration:  72%|███████▏  | 585/807 [06:37<02:31,  1.46it/s][A
Iteration:  73%|███████▎  | 586/807 [06:37<02:28,  1.49it/s][A
Iteration:  73%|███████▎  | 587/807 [06:38<02:26,  1.50it/s][A
Iteration:  73%|███████▎  | 588/807 [06:39<02:32,  1.44it/s][A

	Training loss :  1.3013155914995136



Iteration:  73%|███████▎  | 589/807 [06:39<02:28,  1.47it/s][A
Iteration:  73%|███████▎  | 590/807 [06:40<02:25,  1.49it/s][A
Iteration:  73%|███████▎  | 591/807 [06:41<02:23,  1.50it/s][A
Iteration:  73%|███████▎  | 592/807 [06:41<02:29,  1.44it/s][A

	Training loss :  1.301065753289574



Iteration:  73%|███████▎  | 593/807 [06:42<02:25,  1.47it/s][A
Iteration:  74%|███████▎  | 594/807 [06:43<02:23,  1.49it/s][A
Iteration:  74%|███████▎  | 595/807 [06:43<02:21,  1.50it/s][A
Iteration:  74%|███████▍  | 596/807 [06:44<02:26,  1.44it/s][A

	Training loss :  1.301364006062082



Iteration:  74%|███████▍  | 597/807 [06:45<02:23,  1.47it/s][A
Iteration:  74%|███████▍  | 598/807 [06:45<02:20,  1.49it/s][A
Iteration:  74%|███████▍  | 599/807 [06:46<02:18,  1.50it/s][A
Iteration:  74%|███████▍  | 600/807 [06:47<02:23,  1.44it/s][A

	Training loss :  1.3015569760898749



Iteration:  74%|███████▍  | 601/807 [06:48<02:20,  1.47it/s][A
Iteration:  75%|███████▍  | 602/807 [06:48<02:17,  1.49it/s][A
Iteration:  75%|███████▍  | 603/807 [06:49<02:15,  1.50it/s][A
Iteration:  75%|███████▍  | 604/807 [06:50<02:20,  1.44it/s][A

	Training loss :  1.3020204783768843



Iteration:  75%|███████▍  | 605/807 [06:50<02:17,  1.47it/s][A
Iteration:  75%|███████▌  | 606/807 [06:51<02:15,  1.48it/s][A
Iteration:  75%|███████▌  | 607/807 [06:52<02:13,  1.50it/s][A
Iteration:  75%|███████▌  | 608/807 [06:52<02:18,  1.44it/s][A

	Training loss :  1.3051500444447524



Iteration:  75%|███████▌  | 609/807 [06:53<02:14,  1.47it/s][A
Iteration:  76%|███████▌  | 610/807 [06:54<02:12,  1.49it/s][A
Iteration:  76%|███████▌  | 611/807 [06:54<02:10,  1.50it/s][A
Iteration:  76%|███████▌  | 612/807 [06:55<02:15,  1.44it/s][A

	Training loss :  1.3043588943828166



Iteration:  76%|███████▌  | 613/807 [06:56<02:12,  1.46it/s][A
Iteration:  76%|███████▌  | 614/807 [06:56<02:09,  1.49it/s][A
Iteration:  76%|███████▌  | 615/807 [06:57<02:07,  1.50it/s][A
Iteration:  76%|███████▋  | 616/807 [06:58<02:12,  1.44it/s][A

	Training loss :  1.303170280555239



Iteration:  76%|███████▋  | 617/807 [06:58<02:09,  1.47it/s][A
Iteration:  77%|███████▋  | 618/807 [06:59<02:06,  1.49it/s][A
Iteration:  77%|███████▋  | 619/807 [07:00<02:05,  1.50it/s][A
Iteration:  77%|███████▋  | 620/807 [07:00<02:10,  1.44it/s][A

	Training loss :  1.3027372594321929



Iteration:  77%|███████▋  | 621/807 [07:01<02:07,  1.46it/s][A
Iteration:  77%|███████▋  | 622/807 [07:02<02:04,  1.48it/s][A
Iteration:  77%|███████▋  | 623/807 [07:02<02:02,  1.50it/s][A
Iteration:  77%|███████▋  | 624/807 [07:03<02:08,  1.43it/s][A

	Training loss :  1.3041101024032404



Iteration:  77%|███████▋  | 625/807 [07:04<02:04,  1.47it/s][A
Iteration:  78%|███████▊  | 626/807 [07:04<02:02,  1.48it/s][A
Iteration:  78%|███████▊  | 627/807 [07:05<01:59,  1.50it/s][A
Iteration:  78%|███████▊  | 628/807 [07:06<02:04,  1.44it/s][A

	Training loss :  1.3052027789745362



Iteration:  78%|███████▊  | 629/807 [07:07<02:01,  1.47it/s][A
Iteration:  78%|███████▊  | 630/807 [07:07<01:59,  1.48it/s][A
Iteration:  78%|███████▊  | 631/807 [07:08<01:57,  1.50it/s][A
Iteration:  78%|███████▊  | 632/807 [07:09<02:02,  1.43it/s][A

	Training loss :  1.3070994076566607



Iteration:  78%|███████▊  | 633/807 [07:09<01:59,  1.46it/s][A
Iteration:  79%|███████▊  | 634/807 [07:10<01:56,  1.48it/s][A
Iteration:  79%|███████▊  | 635/807 [07:11<01:54,  1.50it/s][A
Iteration:  79%|███████▉  | 636/807 [07:11<01:59,  1.43it/s][A

	Training loss :  1.3076758944669609



Iteration:  79%|███████▉  | 637/807 [07:12<01:56,  1.46it/s][A
Iteration:  79%|███████▉  | 638/807 [07:13<01:54,  1.48it/s][A
Iteration:  79%|███████▉  | 639/807 [07:13<01:52,  1.50it/s][A
Iteration:  79%|███████▉  | 640/807 [07:14<01:56,  1.44it/s][A

	Training loss :  1.3054645471740514



Iteration:  79%|███████▉  | 641/807 [07:15<01:53,  1.46it/s][A
Iteration:  80%|███████▉  | 642/807 [07:15<01:51,  1.48it/s][A
Iteration:  80%|███████▉  | 643/807 [07:16<01:49,  1.49it/s][A
Iteration:  80%|███████▉  | 644/807 [07:17<01:54,  1.42it/s][A

	Training loss :  1.3067778229065563



Iteration:  80%|███████▉  | 645/807 [07:17<01:51,  1.46it/s][A
Iteration:  80%|████████  | 646/807 [07:18<01:49,  1.47it/s][A
Iteration:  80%|████████  | 647/807 [07:19<01:47,  1.49it/s][A
Iteration:  80%|████████  | 648/807 [07:20<01:51,  1.43it/s][A

	Training loss :  1.3063728453384504



Iteration:  80%|████████  | 649/807 [07:20<01:48,  1.46it/s][A
Iteration:  81%|████████  | 650/807 [07:21<01:45,  1.48it/s][A
Iteration:  81%|████████  | 651/807 [07:21<01:44,  1.50it/s][A
Iteration:  81%|████████  | 652/807 [07:22<01:48,  1.43it/s][A

	Training loss :  1.3051649615442826



Iteration:  81%|████████  | 653/807 [07:23<01:44,  1.47it/s][A
Iteration:  81%|████████  | 654/807 [07:24<01:43,  1.48it/s][A
Iteration:  81%|████████  | 655/807 [07:24<01:41,  1.50it/s][A
Iteration:  81%|████████▏ | 656/807 [07:25<01:45,  1.43it/s][A

	Training loss :  1.3053449824815844



Iteration:  81%|████████▏ | 657/807 [07:26<01:42,  1.47it/s][A
Iteration:  82%|████████▏ | 658/807 [07:26<01:40,  1.48it/s][A
Iteration:  82%|████████▏ | 659/807 [07:27<01:38,  1.50it/s][A
Iteration:  82%|████████▏ | 660/807 [07:28<01:42,  1.44it/s][A

	Training loss :  1.305533793568611



Iteration:  82%|████████▏ | 661/807 [07:28<01:39,  1.47it/s][A
Iteration:  82%|████████▏ | 662/807 [07:29<01:37,  1.48it/s][A
Iteration:  82%|████████▏ | 663/807 [07:30<01:36,  1.50it/s][A
Iteration:  82%|████████▏ | 664/807 [07:30<01:39,  1.43it/s][A

	Training loss :  1.3081293317029274



Iteration:  82%|████████▏ | 665/807 [07:31<01:37,  1.46it/s][A
Iteration:  83%|████████▎ | 666/807 [07:32<01:35,  1.48it/s][A
Iteration:  83%|████████▎ | 667/807 [07:32<01:33,  1.50it/s][A
Iteration:  83%|████████▎ | 668/807 [07:33<01:36,  1.44it/s][A

	Training loss :  1.308543874921199



Iteration:  83%|████████▎ | 669/807 [07:34<01:34,  1.47it/s][A
Iteration:  83%|████████▎ | 670/807 [07:34<01:32,  1.48it/s][A
Iteration:  83%|████████▎ | 671/807 [07:35<01:31,  1.49it/s][A
Iteration:  83%|████████▎ | 672/807 [07:36<01:34,  1.43it/s][A

	Training loss :  1.310781853273511



Iteration:  83%|████████▎ | 673/807 [07:37<01:31,  1.46it/s][A
Iteration:  84%|████████▎ | 674/807 [07:37<01:29,  1.48it/s][A
Iteration:  84%|████████▎ | 675/807 [07:38<01:28,  1.50it/s][A
Iteration:  84%|████████▍ | 676/807 [07:39<01:31,  1.44it/s][A

	Training loss :  1.3087691152413217



Iteration:  84%|████████▍ | 677/807 [07:39<01:28,  1.46it/s][A
Iteration:  84%|████████▍ | 678/807 [07:40<01:27,  1.48it/s][A
Iteration:  84%|████████▍ | 679/807 [07:41<01:25,  1.49it/s][A
Iteration:  84%|████████▍ | 680/807 [07:41<01:28,  1.43it/s][A

	Training loss :  1.3090209369273746



Iteration:  84%|████████▍ | 681/807 [07:42<01:26,  1.46it/s][A
Iteration:  85%|████████▍ | 682/807 [07:43<01:24,  1.48it/s][A
Iteration:  85%|████████▍ | 683/807 [07:43<01:22,  1.50it/s][A
Iteration:  85%|████████▍ | 684/807 [07:44<01:26,  1.43it/s][A

	Training loss :  1.3088775499348055



Iteration:  85%|████████▍ | 685/807 [07:45<01:23,  1.46it/s][A
Iteration:  85%|████████▌ | 686/807 [07:45<01:21,  1.48it/s][A
Iteration:  85%|████████▌ | 687/807 [07:46<01:20,  1.50it/s][A
Iteration:  85%|████████▌ | 688/807 [07:47<01:22,  1.44it/s][A

	Training loss :  1.3086105572103068



Iteration:  85%|████████▌ | 689/807 [07:47<01:20,  1.47it/s][A
Iteration:  86%|████████▌ | 690/807 [07:48<01:18,  1.48it/s][A
Iteration:  86%|████████▌ | 691/807 [07:49<01:17,  1.50it/s][A
Iteration:  86%|████████▌ | 692/807 [07:49<01:19,  1.44it/s][A

	Training loss :  1.3098502178929445



Iteration:  86%|████████▌ | 693/807 [07:50<01:17,  1.47it/s][A
Iteration:  86%|████████▌ | 694/807 [07:51<01:15,  1.49it/s][A
Iteration:  86%|████████▌ | 695/807 [07:51<01:14,  1.50it/s][A
Iteration:  86%|████████▌ | 696/807 [07:52<01:17,  1.44it/s][A

	Training loss :  1.3084439093182827



Iteration:  86%|████████▋ | 697/807 [07:53<01:14,  1.47it/s][A
Iteration:  86%|████████▋ | 698/807 [07:53<01:13,  1.48it/s][A
Iteration:  87%|████████▋ | 699/807 [07:54<01:12,  1.50it/s][A
Iteration:  87%|████████▋ | 700/807 [07:55<01:14,  1.44it/s][A

	Training loss :  1.308276965192386



Iteration:  87%|████████▋ | 701/807 [07:56<01:12,  1.47it/s][A
Iteration:  87%|████████▋ | 702/807 [07:56<01:10,  1.49it/s][A
Iteration:  87%|████████▋ | 703/807 [07:57<01:09,  1.50it/s][A
Iteration:  87%|████████▋ | 704/807 [07:58<01:11,  1.43it/s][A

	Training loss :  1.3081646258519455



Iteration:  87%|████████▋ | 705/807 [07:58<01:09,  1.47it/s][A
Iteration:  87%|████████▋ | 706/807 [07:59<01:08,  1.48it/s][A
Iteration:  88%|████████▊ | 707/807 [08:00<01:06,  1.50it/s][A
Iteration:  88%|████████▊ | 708/807 [08:00<01:09,  1.43it/s][A

	Training loss :  1.3100909481277574



Iteration:  88%|████████▊ | 709/807 [08:01<01:07,  1.46it/s][A
Iteration:  88%|████████▊ | 710/807 [08:02<01:05,  1.48it/s][A
Iteration:  88%|████████▊ | 711/807 [08:02<01:04,  1.50it/s][A
Iteration:  88%|████████▊ | 712/807 [08:03<01:06,  1.44it/s][A

	Training loss :  1.3097498361649138



Iteration:  88%|████████▊ | 713/807 [08:04<01:03,  1.47it/s][A
Iteration:  88%|████████▊ | 714/807 [08:04<01:02,  1.48it/s][A
Iteration:  89%|████████▊ | 715/807 [08:05<01:01,  1.50it/s][A
Iteration:  89%|████████▊ | 716/807 [08:06<01:03,  1.43it/s][A

	Training loss :  1.3110273778438568



Iteration:  89%|████████▉ | 717/807 [08:06<01:01,  1.47it/s][A
Iteration:  89%|████████▉ | 718/807 [08:07<00:59,  1.49it/s][A
Iteration:  89%|████████▉ | 719/807 [08:08<00:58,  1.50it/s][A
Iteration:  89%|████████▉ | 720/807 [08:08<01:00,  1.44it/s][A

	Training loss :  1.3094685857494672



Iteration:  89%|████████▉ | 721/807 [08:09<00:58,  1.47it/s][A
Iteration:  89%|████████▉ | 722/807 [08:10<00:57,  1.49it/s][A
Iteration:  90%|████████▉ | 723/807 [08:10<00:55,  1.51it/s][A
Iteration:  90%|████████▉ | 724/807 [08:11<00:57,  1.44it/s][A

	Training loss :  1.3118716942671254



Iteration:  90%|████████▉ | 725/807 [08:12<00:55,  1.47it/s][A
Iteration:  90%|████████▉ | 726/807 [08:13<00:54,  1.48it/s][A
Iteration:  90%|█████████ | 727/807 [08:13<00:53,  1.50it/s][A
Iteration:  90%|█████████ | 728/807 [08:14<00:55,  1.44it/s][A

	Training loss :  1.3132313506288842



Iteration:  90%|█████████ | 729/807 [08:15<00:53,  1.46it/s][A
Iteration:  90%|█████████ | 730/807 [08:15<00:52,  1.48it/s][A
Iteration:  91%|█████████ | 731/807 [08:16<00:50,  1.50it/s][A
Iteration:  91%|█████████ | 732/807 [08:17<00:52,  1.44it/s][A

	Training loss :  1.3140171823280105



Iteration:  91%|█████████ | 733/807 [08:17<00:50,  1.46it/s][A
Iteration:  91%|█████████ | 734/807 [08:18<00:49,  1.48it/s][A
Iteration:  91%|█████████ | 735/807 [08:19<00:48,  1.50it/s][A
Iteration:  91%|█████████ | 736/807 [08:19<00:49,  1.43it/s][A

	Training loss :  1.3139774624420248



Iteration:  91%|█████████▏| 737/807 [08:20<00:47,  1.47it/s][A
Iteration:  91%|█████████▏| 738/807 [08:21<00:46,  1.48it/s][A
Iteration:  92%|█████████▏| 739/807 [08:21<00:45,  1.50it/s][A
Iteration:  92%|█████████▏| 740/807 [08:22<00:46,  1.44it/s][A

	Training loss :  1.3147978603839874



Iteration:  92%|█████████▏| 741/807 [08:23<00:44,  1.47it/s][A
Iteration:  92%|█████████▏| 742/807 [08:23<00:43,  1.49it/s][A
Iteration:  92%|█████████▏| 743/807 [08:24<00:42,  1.51it/s][A
Iteration:  92%|█████████▏| 744/807 [08:25<00:43,  1.44it/s][A

	Training loss :  1.3127544912439522



Iteration:  92%|█████████▏| 745/807 [08:25<00:42,  1.47it/s][A
Iteration:  92%|█████████▏| 746/807 [08:26<00:41,  1.48it/s][A
Iteration:  93%|█████████▎| 747/807 [08:27<00:40,  1.50it/s][A
Iteration:  93%|█████████▎| 748/807 [08:28<00:41,  1.44it/s][A

	Training loss :  1.311903878329272



Iteration:  93%|█████████▎| 749/807 [08:28<00:39,  1.47it/s][A
Iteration:  93%|█████████▎| 750/807 [08:29<00:38,  1.48it/s][A
Iteration:  93%|█████████▎| 751/807 [08:29<00:37,  1.50it/s][A
Iteration:  93%|█████████▎| 752/807 [08:30<00:38,  1.44it/s][A

	Training loss :  1.3105722502508061



Iteration:  93%|█████████▎| 753/807 [08:31<00:36,  1.47it/s][A
Iteration:  93%|█████████▎| 754/807 [08:32<00:35,  1.48it/s][A
Iteration:  94%|█████████▎| 755/807 [08:32<00:34,  1.50it/s][A
Iteration:  94%|█████████▎| 756/807 [08:33<00:35,  1.44it/s][A

	Training loss :  1.3101732248351687



Iteration:  94%|█████████▍| 757/807 [08:34<00:34,  1.47it/s][A
Iteration:  94%|█████████▍| 758/807 [08:34<00:32,  1.49it/s][A
Iteration:  94%|█████████▍| 759/807 [08:35<00:31,  1.50it/s][A
Iteration:  94%|█████████▍| 760/807 [08:36<00:32,  1.44it/s][A

	Training loss :  1.3101480175005762



Iteration:  94%|█████████▍| 761/807 [08:36<00:31,  1.46it/s][A
Iteration:  94%|█████████▍| 762/807 [08:37<00:30,  1.48it/s][A
Iteration:  95%|█████████▍| 763/807 [08:38<00:29,  1.50it/s][A
Iteration:  95%|█████████▍| 764/807 [08:38<00:29,  1.44it/s][A

	Training loss :  1.30840257449924



Iteration:  95%|█████████▍| 765/807 [08:39<00:28,  1.47it/s][A
Iteration:  95%|█████████▍| 766/807 [08:40<00:27,  1.49it/s][A
Iteration:  95%|█████████▌| 767/807 [08:40<00:26,  1.50it/s][A
Iteration:  95%|█████████▌| 768/807 [08:41<00:27,  1.44it/s][A

	Training loss :  1.3081373060898234



Iteration:  95%|█████████▌| 769/807 [08:42<00:25,  1.46it/s][A
Iteration:  95%|█████████▌| 770/807 [08:42<00:24,  1.49it/s][A
Iteration:  96%|█████████▌| 771/807 [08:43<00:23,  1.50it/s][A
Iteration:  96%|█████████▌| 772/807 [08:44<00:24,  1.44it/s][A

	Training loss :  1.30804028049343



Iteration:  96%|█████████▌| 773/807 [08:44<00:23,  1.47it/s][A
Iteration:  96%|█████████▌| 774/807 [08:45<00:22,  1.49it/s][A
Iteration:  96%|█████████▌| 775/807 [08:46<00:21,  1.50it/s][A
Iteration:  96%|█████████▌| 776/807 [08:47<00:21,  1.44it/s][A

	Training loss :  1.3093260577351777



Iteration:  96%|█████████▋| 777/807 [08:47<00:20,  1.46it/s][A
Iteration:  96%|█████████▋| 778/807 [08:48<00:19,  1.49it/s][A
Iteration:  97%|█████████▋| 779/807 [08:48<00:18,  1.50it/s][A
Iteration:  97%|█████████▋| 780/807 [08:49<00:18,  1.44it/s][A

	Training loss :  1.3081070020412788



Iteration:  97%|█████████▋| 781/807 [08:50<00:17,  1.47it/s][A
Iteration:  97%|█████████▋| 782/807 [08:51<00:16,  1.49it/s][A
Iteration:  97%|█████████▋| 783/807 [08:51<00:15,  1.51it/s][A
Iteration:  97%|█████████▋| 784/807 [08:52<00:15,  1.44it/s][A

	Training loss :  1.3082376132358093



Iteration:  97%|█████████▋| 785/807 [08:53<00:14,  1.47it/s][A
Iteration:  97%|█████████▋| 786/807 [08:53<00:14,  1.49it/s][A
Iteration:  98%|█████████▊| 787/807 [08:54<00:13,  1.51it/s][A
Iteration:  98%|█████████▊| 788/807 [08:55<00:13,  1.44it/s][A

	Training loss :  1.3075848817068914



Iteration:  98%|█████████▊| 789/807 [08:55<00:12,  1.47it/s][A
Iteration:  98%|█████████▊| 790/807 [08:56<00:11,  1.49it/s][A
Iteration:  98%|█████████▊| 791/807 [08:57<00:10,  1.51it/s][A
Iteration:  98%|█████████▊| 792/807 [08:57<00:10,  1.44it/s][A

	Training loss :  1.307809083600237



Iteration:  98%|█████████▊| 793/807 [08:58<00:09,  1.47it/s][A
Iteration:  98%|█████████▊| 794/807 [08:59<00:08,  1.49it/s][A
Iteration:  99%|█████████▊| 795/807 [08:59<00:07,  1.51it/s][A
Iteration:  99%|█████████▊| 796/807 [09:00<00:07,  1.45it/s][A

	Training loss :  1.3076351169665255



Iteration:  99%|█████████▉| 797/807 [09:01<00:06,  1.47it/s][A
Iteration:  99%|█████████▉| 798/807 [09:01<00:06,  1.49it/s][A
Iteration:  99%|█████████▉| 799/807 [09:02<00:05,  1.50it/s][A
Iteration:  99%|█████████▉| 800/807 [09:03<00:04,  1.44it/s][A

	Training loss :  1.3074643705785274



Iteration:  99%|█████████▉| 801/807 [09:03<00:04,  1.47it/s][A
Iteration:  99%|█████████▉| 802/807 [09:04<00:03,  1.49it/s][A
Iteration: 100%|█████████▉| 803/807 [09:05<00:02,  1.50it/s][A
Iteration: 100%|█████████▉| 804/807 [09:05<00:02,  1.44it/s][A

	Training loss :  1.3054147024652851



Iteration: 100%|█████████▉| 805/807 [09:06<00:01,  1.47it/s][A
Iteration: 100%|█████████▉| 806/807 [09:07<00:00,  1.49it/s][A
Iteration: 100%|██████████| 807/807 [09:07<00:00,  1.47it/s]
Epoch: 100%|██████████| 3/3 [27:15<00:00, 545.28s/it]


In [None]:
torch.save({
    'model': model.state_dict()
}, 'saved_file_epoch1.txt')


In [51]:
correct_score = 0
wrong_list = []
examples = read_examples('Task_2_dev_full.csv')
model.eval()

AlbertForMaskedLM(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=4096, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((4096,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertAttention(
                (query): Linear(in_features=4096, out_features=4096, bias=True)
                (key): Linear(in_features=4096, out_features=4096, bias=True)
                (value): Linear(in_features=4096, out_features=4096, bias=

In [None]:
print(["abhi"] + [tokenizer.mask_token] + [tokenizer.sep_token] + [tokenizer.pad_token])

['abhi', '[MASK]', '[SEP]', '[PAD]']


In [None]:

inputs = tokenizer("The capital of France is [MASK].", return_tensors="pt")
labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"]

inputs = inputs.to('cuda')
labels = labels.to('cuda')

outputs = model(**inputs, labels=labels, output_hidden_states=True)
loss = outputs.loss
logits = outputs.logits

In [None]:
print(inputs)

{'input_ids': tensor([[ 101, 1996, 3007, 1997, 2605, 2003,  103, 1012,  102]],
       device='cuda:0'), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}


In [None]:
print(logits[0])

tensor([[ -6.0357,  -5.9746,  -6.0352,  ...,  -5.5175,  -5.4489,  -3.6893],
        [-13.0917, -12.5835, -12.3866,  ...,  -8.3100, -11.5729, -10.4926],
        [ -8.5031,  -9.5911,  -9.4720,  ..., -11.5717,  -9.6129,  -7.2501],
        ...,
        [ -0.6591,  -0.7770,  -0.8077,  ...,  -2.6484,  -2.2071,  -1.9790],
        [ -8.3370,  -7.8262,  -8.5830,  ...,  -5.8269,  -8.2611,  -6.9086],
        [ -7.7844,  -7.5628,  -8.2584,  ...,  -8.0199,  -7.6262,  -8.3779]],
       device='cuda:0', grad_fn=<SelectBackward>)


In [None]:
print(outputs.hidden_states[-1][0][-3])

tensor([-0.0051,  0.5623, -0.3249,  ..., -0.3349,  0.2096, -0.2982],
       device='cuda:0', grad_fn=<SelectBackward>)


In [None]:
print(tokenizer.convert_ids_to_tokens(1013))

/


In [None]:
tokenizer.tokenize("_")

['_']

In [None]:
list1 = []
list2 = []

In [52]:
with open('outfile.csv', 'w') as f :
    writer = csv.writer(f, delimiter = ',')
    writer.writerow(['Predicted label', 'Option0','Option1','Option2','Option3','Option4'])
    for idx, example in enumerate(examples) :

        article = example['article']
        ques_tokens = tokenizer.tokenize(example['question'].replace("@placeholder", tokenizer.mask_token))        
        tokenized_article = tokenizer.tokenize(article)

        _truncate_seq_pair(tokenized_article, ques_tokens, max_seq_length - 1)

        # tokens =  ques_tokens + [tokenizer.sep_token] + tokenized_article + [tokenizer.sep_token]

        tokens =  ques_tokens + [tokenizer.sep_token]

        masked_index = tokens.index(tokenizer.mask_token)


        candidates = example['options']
        candidates_ids = []
        for c in candidates:
            candidates_ids.append(tokenizer.convert_tokens_to_ids(tokenizer.tokenize(c))[0])


        indexed_tokens = tokenizer.convert_tokens_to_ids(tokens)        
        segments_ids = [0] * (len(ques_tokens) + 1)
        input_mask = [1] * len(indexed_tokens)

        # Zero-pad up to the sequence length.
        padding = [0] * (max_seq_length - len(indexed_tokens))
        indexed_tokens += [tokenizer.pad_token_id] * len(padding)
        input_mask += padding
        segments_ids += padding
        

        tokens_tensor = torch.tensor([indexed_tokens])
        segments_tensors = torch.tensor([segments_ids])
        mask_tensors = torch.tensor([input_mask])
        
        mask_tensors = mask_tensors.to(device)
        tokens_tensor = tokens_tensor.to(device)
        segments_tensors = segments_tensors.to(device)


        predictions = model(input_ids = tokens_tensor, attention_mask=mask_tensors, token_type_ids = segments_tensors)
        predictions_candidates = predictions.logits[0, masked_index, candidates_ids]
        answer_idx = torch.argmax(predictions_candidates).item()
        print(answer_idx)
        
        writer.writerow([answer_idx, predictions_candidates[0].item(),predictions_candidates[1].item(),predictions_candidates[2].item(),predictions_candidates[3].item(),predictions_candidates[4].item()])
        print("Correct answer : ", answer_idx, "\tLabel :", example['label'], '\n') 
        if(answer_idx == example['label']):
            correct_score += 1
        else :
            wrong_list.append(idx)



1
Correct answer :  1 	Label : 0 

0
Correct answer :  0 	Label : 0 

3
Correct answer :  3 	Label : 3 

0
Correct answer :  0 	Label : 0 

2
Correct answer :  2 	Label : 2 

2
Correct answer :  2 	Label : 2 

3
Correct answer :  3 	Label : 3 

2
Correct answer :  2 	Label : 2 

3
Correct answer :  3 	Label : 2 

2
Correct answer :  2 	Label : 2 

3
Correct answer :  3 	Label : 3 

3
Correct answer :  3 	Label : 3 

1
Correct answer :  1 	Label : 3 

1
Correct answer :  1 	Label : 1 

2
Correct answer :  2 	Label : 2 

0
Correct answer :  0 	Label : 3 

0
Correct answer :  0 	Label : 2 

3
Correct answer :  3 	Label : 3 

2
Correct answer :  2 	Label : 2 

3
Correct answer :  3 	Label : 3 

2
Correct answer :  2 	Label : 2 

2
Correct answer :  2 	Label : 2 

1
Correct answer :  1 	Label : 1 

3
Correct answer :  3 	Label : 3 

0
Correct answer :  0 	Label : 0 

2
Correct answer :  2 	Label : 2 

4
Correct answer :  4 	Label : 4 

0
Correct answer :  0 	Label : 0 

4
Correct answer :  

In [53]:
accuracy = correct_score / len(examples)  
print("Accuracy :", accuracy)
print("Correct answers :", correct_score) 

print("wrong list items :\n", wrong_list) 


Accuracy : 0.7262044653349001
Correct answers : 618
wrong list items :
 [0, 8, 12, 15, 16, 31, 34, 36, 40, 46, 56, 59, 63, 65, 78, 83, 87, 89, 96, 99, 102, 105, 114, 116, 125, 128, 129, 130, 133, 139, 140, 142, 146, 150, 151, 153, 156, 158, 161, 162, 163, 168, 171, 172, 174, 175, 178, 183, 188, 189, 190, 192, 198, 204, 205, 208, 209, 217, 218, 228, 230, 234, 243, 244, 245, 250, 256, 258, 263, 265, 267, 268, 270, 274, 282, 286, 287, 299, 301, 307, 312, 315, 316, 321, 322, 325, 330, 332, 335, 336, 337, 338, 350, 356, 366, 369, 371, 375, 377, 380, 390, 395, 396, 397, 402, 403, 414, 415, 417, 419, 423, 428, 430, 433, 439, 446, 448, 449, 451, 452, 461, 469, 474, 476, 482, 489, 501, 506, 508, 512, 513, 515, 520, 521, 522, 524, 526, 527, 533, 538, 539, 543, 545, 546, 547, 550, 551, 552, 558, 567, 570, 571, 577, 578, 579, 581, 583, 587, 597, 598, 599, 611, 619, 621, 622, 626, 627, 628, 631, 632, 635, 636, 639, 641, 644, 647, 653, 654, 657, 660, 664, 665, 670, 674, 685, 686, 691, 692, 693, 695,

Material After this is just for practice and is of no use: 


---



---



In [None]:
for i, item in enumerate(list1):
  if(list2[i] != item):
    print("Yes") 

In [None]:
tokenizer.encode("Hello, my dog is very <mask> unusual", add_special_tokens=False)

[17, 11368, 19, 94, 2288, 27, 172, 6, 4395]

In [None]:
# We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)).unsqueeze(0)  # We will predict the masked token

print(input_ids)

perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask[:, :, -1] = 1.0  # Previous tokens don't see last token i.e <mask> here.

target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float)  # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0  # Our first (and only) prediction will be the last token of the sequence (the masked token)
input_ids=input_ids.to(device)
perm_mask=perm_mask.to(device)
target_mapping=target_mapping.to(device)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
print(np.shape(outputs.logits))
next_token_logits = outputs[0]  # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
print(next_token_logits)
print(torch.argmax(next_token_logits))
# The same way can the XLNetLMHeadModel be used to be trained by standard auto-regressive language modeling.


tensor([[   17, 11368,    19,    94,  2288,    27,   172,     6]])
torch.Size([1, 1, 32000])
tensor([[[-32.9998, -42.5084, -42.8883,  ..., -38.2738, -41.6116, -38.0217]]],
       device='cuda:0', grad_fn=<AddBackward0>)
tensor(172, device='cuda:0')


In [None]:


input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)).unsqueeze(0)  # We will predict the masked token
labels = torch.tensor(tokenizer.encode("cute", add_special_tokens=False)).unsqueeze(0)

assert labels.shape[0] == 1, 'only one word will be predicted'
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask[:, :, -1] = 1.0  # Previous tokens don't see last token as is done in standard auto-regressive lm training

target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float)  # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0  # Our first (and only) prediction will be the last token of the sequence (the masked token)

input_ids=input_ids.to('cuda')
perm_mask=perm_mask.to('cuda')
target_mapping=target_mapping.to('cuda')
labels = labels.to(device)

outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping, labels=labels)
loss = outputs.loss
next_token_logits = outputs.logits  # Logits have shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]