In [None]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader,random_split
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models

In [None]:
from google.colab import drive

drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
# path configuration
root = 'gdrive/My Drive/project/'
LABELS_PATH = root+'data/MSVD_label_final.csv'
DATA_PATH = root+'data/MSVD/training_data/feat/'
MODEL_PATH = root+'model/'

## Dataset

In [None]:
label_final_df = pd.read_csv(LABELS_PATH)
label_final_df.tail(5)

Unnamed: 0,caption,video_id,sent_len
19714,a man and two women walk across the beach,jLgmCY1fEE8_16_26.avi,9
19715,criss angel talking to three women,jLgmCY1fEE8_16_26.avi,6
19716,chris angel gathering women on the beach,jLgmCY1fEE8_16_26.avi,7
19717,criss angel talks to women on the beach,jLgmCY1fEE8_16_26.avi,8
19718,the man is talking to women,jLgmCY1fEE8_16_26.avi,6


### 1. Load Embedding

In [None]:
embedding = np.load(MODEL_PATH+'MSVD_embedding.npy')
word2idx = pickle.load(open(MODEL_PATH+'MSVD_word2idx.pkl', "rb"))
idx2word = pickle.load(open(MODEL_PATH+'MSVD_idx2word.pkl', "rb"))

In [None]:
vid = 'jLgmCY1fEE8_16_26.avi'

In [None]:
filename = DATA_PATH + f'{vid}.npy'

### 2.Dataset Class for pytorch

In [None]:
class MSVD(Dataset):
    def __init__(self, df, word2idx, DATA_PATH):
        super(MSVD, self).__init__()
        self.df = df
        self.path = DATA_PATH
        self.word2idx = word2idx
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        vid = row['video_id']
        filename = self.path + f'{vid}.npy'
        x = torch.tensor(np.load(filename))
        sentence_emb = [self.word2idx.get(word, self.word2idx['<UNK>']) for word in row['caption'].split(' ')]
        y = torch.zeros(len(sentence_emb)+2)
        y[0], y[-1] = self.word2idx['<START>'], self.word2idx['<END>']
        y[1:-1] = torch.tensor(sentence_emb)
        #true_sentence = row['caption']
        return x.float(), y.long()#, true_sentence

ds = MSVD(label_final_df, word2idx, DATA_PATH)

In [None]:
ds_iter = iter(ds)

In [None]:
x, y = next(ds_iter)
x, y

(tensor([[0.0000, 0.6935, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.5050, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 1.7072, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0081, 1.7673, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.7900, 1.6565, 0.0000,  ..., 0.2572, 0.0000, 0.0000],
         [0.0000, 2.0024, 0.0000,  ..., 0.8096, 0.0000, 0.0000]]),
 tensor([  1,   4,   8, 782, 580,   4,  48,   2]))

In [None]:
[idx2word[word.item()] for word in y]

['<START>', 'a', 'woman', 'goes', 'under', 'a', 'horse', '<END>']

### 3.Dataloaders

In [None]:
def collate_fn(batch):
    """
    padding  text or video
    """
    x_batch = []
    y_batch = []
    for x, y in batch:
        x_batch.append(x)
        y_batch.append(y)
    lengths = list(map(lambda x: x.size(0), x_batch))
    x_batch = torch.nn.utils.rnn.pad_sequence(x_batch,  batch_first=True)
    x_batch = torch.nn.utils.rnn.pack_padded_sequence(x_batch, lengths, batch_first=True, enforce_sorted=False)
    y_batch = torch.nn.utils.rnn.pad_sequence(y_batch, batch_first=True)
    return x_batch, y_batch#, lengths  # PackedSequence, padded tensor

In [None]:
# split train/test
train_proportion = 0.90
train_size = int(train_proportion * len(ds))
validation_size = len(ds) - train_size
print(train_size, validation_size)

17747 1972


In [None]:
train_ds, validation_ds = random_split(ds, [train_size, validation_size])
# dataloaders
train_dl = DataLoader(train_ds, batch_size=256, shuffle=True, collate_fn=collate_fn)
valid_dl = DataLoader(validation_ds, batch_size=256, shuffle=False, collate_fn=collate_fn)

In [None]:
loader = iter(train_dl)

In [None]:
x, y = next(loader)
x, y

(PackedSequence(data=tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.2822, 1.7576],
        [0.0000, 1.6779, 0.0000,  ..., 0.0000, 0.4317, 0.0000],
        [0.0000, 0.0000, 3.9638,  ..., 0.0000, 0.0000, 0.8791],
        ...,
        [1.1512, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0156],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 1.0527, 0.7188],
        [3.0851, 0.0000, 0.0000,  ..., 0.0189, 0.0000, 0.3621]]), batch_sizes=tensor([16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 16, 16]), sorted_indices=tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15]), unsorted_indices=tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])),
 tensor([[   1,   19,   8

# Model

### 1.Sequence to Sequence Model (Encoder-Decoder)

While image description handles a variable length output sequence of words, video description also has to handle a variable length input sequence of frames. 
Related approaches to video description have resolved variable length input by holistic video representations [29, 28, 11], pooling over frames [39], or sub-sampling on a fixed number of input frames [43]. In contrast, in this work we propose a sequence to sequence

In [None]:
class Encoder(nn.Module):
    '''
    Take sequence of video's resnet50 features as input
    
    note: batch_first=True does not apply to hidden or cell states
    '''
    def __init__(self, input_dim, hidden_dim):
        super(Encoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.rnn = nn.GRU(input_dim, hidden_dim, num_layers=1, batch_first=True)
                
    def forward(self, x):
        '''
        x: PackedSequence
        '''
        outputs, hidden = self.rnn(x) 
        return hidden
    
class Decoder(nn.Module):
    '''
    Decode Hidden State from Encoder to sentence (sequence of texts)
    
    note: batch_first=True does not apply to hidden or cell states
    '''
    def __init__(self, weights, emb_dim, hidden_dim, out_dim):
        super(Decoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.emb_dim = emb_dim
        self.out_dim = out_dim
        
        # layers
        #self.emb = nn.Embedding.from_pretrained(torch.tensor(weights), padding_idx=0, freeze=False)
        self.emb = nn.Embedding(out_dim, emb_dim, padding_idx=0)
        self.rnn = nn.GRU(emb_dim + hidden_dim, hidden_dim, num_layers=1, batch_first=True)
        self.fc_out1 = nn.Linear(emb_dim + hidden_dim * 2, out_dim)
        self.fc_out2 = nn.Linear(out_dim, out_dim)

                
    def forward(self, word_input, encoded_context, hidden):
        '''
        word_input: (batch_size)
        encoded_context: (1, batch_size, hidden_dim)
        hidden: (1, batch_size, hidden_dim)
        '''
        # 1 word at a time
    
        word_input = self.emb(word_input) # dim (batch, emb_dim) 
        emb_input = torch.cat([word_input, encoded_context.squeeze(0)], dim=1)
        output, hidden = self.rnn(emb_input.unsqueeze(1).float(), hidden)
        prediction = F.relu(self.fc_out1(torch.cat([word_input, encoded_context.squeeze(0), hidden.squeeze(0)], dim=1).float()))
        prediction = self.fc_out2(prediction)
        return prediction, hidden 
    

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
    
    def forward(self, x, y, teacher_forcing_ratio=0.8):
        '''
        x: PackedSequence
        y: (batch_size, sentence_len(padded))
        hidden: (1, batch_size, hidden_dim)
        '''
        batch_size = y.size(0)
        sentence_len = y.size(1)
        vocab_size = self.decoder.out_dim
        
        ##############
        # Initialize #
        ##############
        # tensor for final outputs
        outputs = torch.zeros(batch_size, sentence_len, vocab_size).to(self.device)
        # last hidden state of the encoder is the context
        encoded_context = self.encoder(x) # (1, batch_size, hidden_dim)
        # first hidden state 
        hidden = encoded_context # (1, batch_size, hidden_dim)
        # first input '<START>'
        word_input = y[:, 0] # (batch_size)
        for t in range(1, sentence_len):
            #insert input token embedding, previous hidden state and the context state
            #receive output tensor (predictions) and new hidden state
            output, hidden = self.decoder(word_input, encoded_context, hidden)
            
            #place predictions in a tensor holding predictions for each token
            outputs[:, t, :] = output
            
            #decide if we are going to use teacher forcing or not
            teacher_force = np.random.rand() < teacher_forcing_ratio
            
            #get the highest predicted token from our predictions
            top1 = output.argmax(1) # dim: (batch_size)
            
            #if teacher forcing, use actual next token as next input
            #if not, use predicted token
            word_input = y[:, t] if teacher_force else top1

        return outputs
    

# Training 

Set up the model

In [None]:
N_VOCAB = len(word2idx)
EMB_DIM = 300
INPUT_DIM = 4096 # vgg16 fc dim
HIDDEN_DIM = 512

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
encoder = Encoder(INPUT_DIM, HIDDEN_DIM)
decoder = Decoder(embedding, EMB_DIM, HIDDEN_DIM, N_VOCAB)
model = Seq2Seq(encoder, decoder, device)
model.to(device)

Seq2Seq(
  (encoder): Encoder(
    (rnn): GRU(4096, 512, batch_first=True)
  )
  (decoder): Decoder(
    (emb): Embedding(1504, 300, padding_idx=0)
    (rnn): GRU(812, 512, batch_first=True)
    (fc_out1): Linear(in_features=1324, out_features=1504, bias=True)
    (fc_out2): Linear(in_features=1504, out_features=1504, bias=True)
  )
)

weight initialization with N(0, 0.01)

In [None]:
#for name, param in model.named_parameters():
#    #if name != 'decoder.emb.weight':
#    print(name, param.shape)
#    nn.init.normal_(param.data, mean=0, std=0.01)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0005)
loss_func = nn.CrossEntropyLoss(ignore_index=0)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, threshold=0.001, threshold_mode='rel', min_lr=0)

In [None]:
def train_one_epoch(model, dataloader, optimizer, lossFun, backwards=True, print_loss=False):
    
    if backwards == True:
        model.train()
    else:
        model.eval()
    
    total_loss = 0.0
    
    for i, (x, y) in enumerate(tqdm(dataloader)):
        x = x.to(device)
        y = y.to(device)
        out = model(x, y)
        out = out.view(-1, N_VOCAB)
        y = y.view(-1)
        loss = lossFun(out, y)
        
        total_loss += loss.item()
        if i % 10 == 0:
            print(f'Batch loss: {loss.item()}')
        if backwards == True:
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()
    
    avg_loss = total_loss / len(dataloader)
    
    if print_loss == True:
        print(avg_loss)
    
    return avg_loss



def evaluate(model, dataloader, lossFun):
    
    model.eval()
    
    total_loss = 0
    
    with torch.no_grad():
    
        for x, y in tqdm(dataloader):
            x = x.to(device)
            y = y.to(device)
            out = model(x, y)
            out = out.view(-1, N_VOCAB)
            y = y.view(-1)
            loss = lossFun(out, y)

            total_loss += loss.item()
        
    return total_loss / len(dataloader)

In [None]:
device

device(type='cuda')

In [None]:
# train 3 more epochs
EPOCHS = 100
best = 100000
train_losses = []
valid_losses = []
for epoch in range(EPOCHS):
    print('Epoch: ', epoch)
    
    train_loss = train_one_epoch(model, train_dl, optimizer, loss_func)
    print('Train Loss: ', train_loss)
    train_losses.append(train_loss)
    
    valid_loss = evaluate(model, valid_dl, loss_func)
    print('Valid Loss: ', valid_loss)
    valid_losses.append(valid_loss)

    if valid_loss < best:
        best = valid_loss
        torch.save(model.state_dict(),  MODEL_PATH+'MSVD_seq2seq_v2.pt')
    scheduler.step(valid_loss)



  0%|          | 0/70 [00:00<?, ?it/s][A[A

Epoch:  0




  1%|▏         | 1/70 [01:47<2:04:04, 107.90s/it][A[A

Batch loss: 7.318040370941162




  3%|▎         | 2/70 [03:16<1:55:40, 102.06s/it][A[A

  4%|▍         | 3/70 [04:30<1:44:40, 93.75s/it] [A[A

  6%|▌         | 4/70 [05:30<1:31:56, 83.59s/it][A[A

  7%|▋         | 5/70 [06:27<1:21:53, 75.59s/it][A[A

  9%|▊         | 6/70 [07:12<1:10:48, 66.39s/it][A[A

 10%|█         | 7/70 [07:43<58:26, 55.66s/it]  [A[A

 11%|█▏        | 8/70 [08:13<49:33, 47.96s/it][A[A

 13%|█▎        | 9/70 [08:38<41:46, 41.09s/it][A[A

 14%|█▍        | 10/70 [09:00<35:36, 35.60s/it][A[A

 16%|█▌        | 11/70 [09:20<30:21, 30.87s/it][A[A

Batch loss: 4.474686145782471




 17%|█▋        | 12/70 [09:36<25:19, 26.20s/it][A[A

 19%|█▊        | 13/70 [09:50<21:33, 22.69s/it][A[A

 20%|██        | 14/70 [10:01<17:49, 19.10s/it][A[A

 21%|██▏       | 15/70 [10:13<15:34, 16.99s/it][A[A

 23%|██▎       | 16/70 [10:23<13:30, 15.00s/it][A[A

 24%|██▍       | 17/70 [10:29<10:54, 12.35s/it][A[A

 26%|██▌       | 18/70 [10:42<10:39, 12.30s/it][A[A

 27%|██▋       | 19/70 [10:50<09:23, 11.05s/it][A[A

 29%|██▊       | 20/70 [10:57<08:22, 10.05s/it][A[A

 30%|███       | 21/70 [11:04<07:15,  8.89s/it][A[A

Batch loss: 4.098505020141602




 31%|███▏      | 22/70 [11:09<06:13,  7.79s/it][A[A

 33%|███▎      | 23/70 [11:17<06:07,  7.82s/it][A[A

 34%|███▍      | 24/70 [11:23<05:42,  7.45s/it][A[A

 36%|███▌      | 25/70 [11:27<04:48,  6.41s/it][A[A

 37%|███▋      | 26/70 [11:30<03:54,  5.33s/it][A[A

 39%|███▊      | 27/70 [11:35<03:39,  5.10s/it][A[A

 40%|████      | 28/70 [11:37<03:05,  4.41s/it][A[A

 41%|████▏     | 29/70 [11:41<02:47,  4.10s/it][A[A

 43%|████▎     | 30/70 [11:46<02:52,  4.31s/it][A[A

 44%|████▍     | 31/70 [11:49<02:35,  3.98s/it][A[A

Batch loss: 3.919748306274414




 46%|████▌     | 32/70 [11:52<02:20,  3.70s/it][A[A

 47%|████▋     | 33/70 [11:56<02:27,  3.98s/it][A[A

 49%|████▊     | 34/70 [11:59<02:07,  3.55s/it][A[A

 50%|█████     | 35/70 [12:01<01:52,  3.20s/it][A[A

 51%|█████▏    | 36/70 [12:06<01:58,  3.48s/it][A[A

 53%|█████▎    | 37/70 [12:09<01:54,  3.46s/it][A[A

 54%|█████▍    | 38/70 [12:12<01:46,  3.33s/it][A[A

 56%|█████▌    | 39/70 [12:16<01:52,  3.63s/it][A[A

 57%|█████▋    | 40/70 [12:19<01:43,  3.46s/it][A[A

 59%|█████▊    | 41/70 [12:22<01:31,  3.15s/it][A[A

Batch loss: 3.6797056198120117




 60%|██████    | 42/70 [12:26<01:36,  3.45s/it][A[A

 61%|██████▏   | 43/70 [12:28<01:25,  3.17s/it][A[A

 63%|██████▎   | 44/70 [12:31<01:15,  2.91s/it][A[A

 64%|██████▍   | 45/70 [12:35<01:22,  3.30s/it][A[A

 66%|██████▌   | 46/70 [12:38<01:18,  3.26s/it][A[A

 67%|██████▋   | 47/70 [12:41<01:09,  3.02s/it][A[A

 69%|██████▊   | 48/70 [12:45<01:15,  3.44s/it][A[A

 70%|███████   | 49/70 [12:48<01:06,  3.17s/it][A[A

 71%|███████▏  | 50/70 [12:50<00:58,  2.95s/it][A[A

 73%|███████▎  | 51/70 [12:54<01:04,  3.38s/it][A[A

Batch loss: 3.451209306716919




 74%|███████▍  | 52/70 [12:57<00:55,  3.10s/it][A[A

 76%|███████▌  | 53/70 [12:59<00:48,  2.86s/it][A[A

 77%|███████▋  | 54/70 [13:03<00:51,  3.24s/it][A[A

 79%|███████▊  | 55/70 [13:06<00:45,  3.01s/it][A[A

 80%|████████  | 56/70 [13:08<00:39,  2.82s/it][A[A

 81%|████████▏ | 57/70 [13:12<00:42,  3.24s/it][A[A

 83%|████████▎ | 58/70 [13:15<00:36,  3.04s/it][A[A

 84%|████████▍ | 59/70 [13:17<00:30,  2.81s/it][A[A

 86%|████████▌ | 60/70 [13:21<00:32,  3.25s/it][A[A

 87%|████████▋ | 61/70 [13:24<00:26,  2.99s/it][A[A

Batch loss: 3.318873643875122




 89%|████████▊ | 62/70 [13:26<00:22,  2.80s/it][A[A

 90%|█████████ | 63/70 [13:30<00:22,  3.25s/it][A[A

 91%|█████████▏| 64/70 [13:33<00:18,  3.03s/it][A[A

 93%|█████████▎| 65/70 [13:35<00:14,  2.85s/it][A[A

 94%|█████████▍| 66/70 [13:40<00:13,  3.38s/it][A[A

 96%|█████████▌| 67/70 [13:43<00:09,  3.13s/it][A[A

 97%|█████████▋| 68/70 [13:45<00:05,  2.93s/it][A[A

 99%|█████████▊| 69/70 [13:50<00:03,  3.39s/it][A[A

100%|██████████| 70/70 [13:50<00:00, 11.87s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  3.989929417201451




 12%|█▎        | 1/8 [00:08<00:59,  8.44s/it][A[A

 25%|██▌       | 2/8 [00:11<00:41,  6.91s/it][A[A

 38%|███▊      | 3/8 [00:15<00:29,  5.81s/it][A[A

 50%|█████     | 4/8 [00:17<00:19,  4.76s/it][A[A

 62%|██████▎   | 5/8 [00:21<00:13,  4.64s/it][A[A

 75%|███████▌  | 6/8 [00:24<00:07,  3.97s/it][A[A

 88%|████████▊ | 7/8 [00:26<00:03,  3.46s/it][A[A

100%|██████████| 8/8 [00:29<00:00,  3.69s/it]


Valid Loss:  3.382241040468216




  0%|          | 0/70 [00:00<?, ?it/s][A[A

Epoch:  1




  1%|▏         | 1/70 [00:08<09:41,  8.43s/it][A[A

Batch loss: 3.075448513031006




  3%|▎         | 2/70 [00:12<07:54,  6.98s/it][A[A

  4%|▍         | 3/70 [00:15<06:38,  5.95s/it][A[A

  6%|▌         | 4/70 [00:17<05:20,  4.86s/it][A[A

  7%|▋         | 5/70 [00:22<05:12,  4.81s/it][A[A

  9%|▊         | 6/70 [00:25<04:25,  4.15s/it][A[A

 10%|█         | 7/70 [00:27<03:49,  3.65s/it][A[A

 11%|█▏        | 8/70 [00:32<04:07,  3.99s/it][A[A

 13%|█▎        | 9/70 [00:35<03:38,  3.58s/it][A[A

 14%|█▍        | 10/70 [00:37<03:15,  3.27s/it][A[A

 16%|█▌        | 11/70 [00:42<03:38,  3.70s/it][A[A

Batch loss: 3.1157007217407227




 17%|█▋        | 12/70 [00:44<03:16,  3.38s/it][A[A

 19%|█▊        | 13/70 [00:47<02:57,  3.11s/it][A[A

 20%|██        | 14/70 [00:51<03:16,  3.51s/it][A[A

 21%|██▏       | 15/70 [00:54<02:58,  3.24s/it][A[A

 23%|██▎       | 16/70 [00:56<02:41,  2.99s/it][A[A

 24%|██▍       | 17/70 [01:01<03:01,  3.42s/it][A[A

 26%|██▌       | 18/70 [01:03<02:44,  3.15s/it][A[A

 27%|██▋       | 19/70 [01:06<02:28,  2.91s/it][A[A

 29%|██▊       | 20/70 [01:10<02:48,  3.37s/it][A[A

 30%|███       | 21/70 [01:13<02:32,  3.11s/it][A[A

Batch loss: 3.036078453063965




 31%|███▏      | 22/70 [01:15<02:18,  2.88s/it][A[A

 33%|███▎      | 23/70 [01:19<02:36,  3.33s/it][A[A

 34%|███▍      | 24/70 [01:22<02:21,  3.07s/it][A[A

 36%|███▌      | 25/70 [01:24<02:09,  2.87s/it][A[A

 37%|███▋      | 26/70 [01:29<02:26,  3.33s/it][A[A

 39%|███▊      | 27/70 [01:31<02:14,  3.13s/it][A[A

 40%|████      | 28/70 [01:34<02:03,  2.93s/it][A[A

 41%|████▏     | 29/70 [01:38<02:22,  3.47s/it][A[A

 43%|████▎     | 30/70 [01:41<02:08,  3.21s/it][A[A

 44%|████▍     | 31/70 [01:44<01:56,  2.99s/it][A[A

Batch loss: 3.03236722946167




 46%|████▌     | 32/70 [01:48<02:10,  3.42s/it][A[A

 47%|████▋     | 33/70 [01:51<01:57,  3.18s/it][A[A

 49%|████▊     | 34/70 [01:53<01:46,  2.95s/it][A[A

 50%|█████     | 35/70 [01:58<01:59,  3.42s/it][A[A

 51%|█████▏    | 36/70 [02:00<01:46,  3.12s/it][A[A

 53%|█████▎    | 37/70 [02:02<01:35,  2.90s/it][A[A

 54%|█████▍    | 38/70 [02:07<01:45,  3.30s/it][A[A

 56%|█████▌    | 39/70 [02:09<01:34,  3.03s/it][A[A

 57%|█████▋    | 40/70 [02:11<01:24,  2.81s/it][A[A

 59%|█████▊    | 41/70 [02:16<01:34,  3.27s/it][A[A

Batch loss: 2.9595484733581543




 60%|██████    | 42/70 [02:18<01:24,  3.03s/it][A[A

 61%|██████▏   | 43/70 [02:20<01:16,  2.83s/it][A[A

 63%|██████▎   | 44/70 [02:25<01:26,  3.34s/it][A[A

 64%|██████▍   | 45/70 [02:28<01:17,  3.10s/it][A[A

 66%|██████▌   | 46/70 [02:30<01:09,  2.89s/it][A[A

 67%|██████▋   | 47/70 [02:35<01:18,  3.40s/it][A[A

 69%|██████▊   | 48/70 [02:37<01:09,  3.17s/it][A[A

 70%|███████   | 49/70 [02:40<01:02,  2.97s/it][A[A

 71%|███████▏  | 50/70 [02:44<01:10,  3.51s/it][A[A

 73%|███████▎  | 51/70 [02:47<01:01,  3.25s/it][A[A

Batch loss: 3.020170211791992




 74%|███████▍  | 52/70 [02:50<00:54,  3.02s/it][A[A

 76%|███████▌  | 53/70 [02:54<00:59,  3.53s/it][A[A

 77%|███████▋  | 54/70 [02:57<00:52,  3.26s/it][A[A

 79%|███████▊  | 55/70 [02:59<00:45,  3.02s/it][A[A

 80%|████████  | 56/70 [03:04<00:48,  3.48s/it][A[A

 81%|████████▏ | 57/70 [03:06<00:41,  3.19s/it][A[A

 83%|████████▎ | 58/70 [03:09<00:35,  2.95s/it][A[A

 84%|████████▍ | 59/70 [03:13<00:37,  3.45s/it][A[A

 86%|████████▌ | 60/70 [03:16<00:31,  3.15s/it][A[A

 87%|████████▋ | 61/70 [03:18<00:26,  2.90s/it][A[A

Batch loss: 2.8164360523223877




 89%|████████▊ | 62/70 [03:23<00:26,  3.34s/it][A[A

 90%|█████████ | 63/70 [03:25<00:21,  3.09s/it][A[A

 91%|█████████▏| 64/70 [03:27<00:17,  2.86s/it][A[A

 93%|█████████▎| 65/70 [03:32<00:16,  3.33s/it][A[A

 94%|█████████▍| 66/70 [03:34<00:12,  3.08s/it][A[A

 96%|█████████▌| 67/70 [03:37<00:08,  2.86s/it][A[A

 97%|█████████▋| 68/70 [03:41<00:06,  3.36s/it][A[A

 99%|█████████▊| 69/70 [03:44<00:03,  3.13s/it][A[A

100%|██████████| 70/70 [03:45<00:00,  3.22s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  3.0125708511897495




 12%|█▎        | 1/8 [00:08<01:02,  8.90s/it][A[A

 25%|██▌       | 2/8 [00:12<00:43,  7.26s/it][A[A

 38%|███▊      | 3/8 [00:16<00:30,  6.18s/it][A[A

 50%|█████     | 4/8 [00:18<00:20,  5.04s/it][A[A

 62%|██████▎   | 5/8 [00:22<00:14,  4.88s/it][A[A

 75%|███████▌  | 6/8 [00:25<00:08,  4.13s/it][A[A

 88%|████████▊ | 7/8 [00:27<00:03,  3.58s/it][A[A

100%|██████████| 8/8 [00:30<00:00,  3.84s/it]


Valid Loss:  2.8766618967056274




  0%|          | 0/70 [00:00<?, ?it/s][A[A

Epoch:  2




  1%|▏         | 1/70 [00:08<10:06,  8.79s/it][A[A

Batch loss: 2.6774487495422363




  3%|▎         | 2/70 [00:12<08:17,  7.32s/it][A[A

  4%|▍         | 3/70 [00:16<06:58,  6.25s/it][A[A

  6%|▌         | 4/70 [00:18<05:36,  5.10s/it][A[A

  7%|▋         | 5/70 [00:23<05:24,  4.99s/it][A[A

  9%|▊         | 6/70 [00:26<04:31,  4.25s/it][A[A

 10%|█         | 7/70 [00:28<03:52,  3.68s/it][A[A

 11%|█▏        | 8/70 [00:33<04:05,  3.96s/it][A[A

 13%|█▎        | 9/70 [00:35<03:34,  3.52s/it][A[A

 14%|█▍        | 10/70 [00:37<03:10,  3.17s/it][A[A

 16%|█▌        | 11/70 [00:42<03:32,  3.60s/it][A[A

Batch loss: 2.606247901916504




 17%|█▋        | 12/70 [00:44<03:09,  3.26s/it][A[A

 19%|█▊        | 13/70 [00:47<02:49,  2.98s/it][A[A

 20%|██        | 14/70 [00:51<03:15,  3.48s/it][A[A

 21%|██▏       | 15/70 [00:54<02:55,  3.20s/it][A[A

 23%|██▎       | 16/70 [00:56<02:39,  2.95s/it][A[A

 24%|██▍       | 17/70 [01:01<03:04,  3.49s/it][A[A

 26%|██▌       | 18/70 [01:04<02:45,  3.19s/it][A[A

 27%|██▋       | 19/70 [01:06<02:31,  2.98s/it][A[A

 29%|██▊       | 20/70 [01:11<02:57,  3.54s/it][A[A

 30%|███       | 21/70 [01:14<02:39,  3.25s/it][A[A

Batch loss: 2.5981953144073486




 31%|███▏      | 22/70 [01:16<02:24,  3.01s/it][A[A

 33%|███▎      | 23/70 [01:21<02:48,  3.58s/it][A[A

 34%|███▍      | 24/70 [01:23<02:30,  3.28s/it][A[A

 36%|███▌      | 25/70 [01:26<02:16,  3.04s/it][A[A

 37%|███▋      | 26/70 [01:31<02:35,  3.54s/it][A[A

 39%|███▊      | 27/70 [01:33<02:18,  3.21s/it][A[A

 40%|████      | 28/70 [01:35<02:04,  2.96s/it][A[A

 41%|████▏     | 29/70 [01:40<02:20,  3.43s/it][A[A

 43%|████▎     | 30/70 [01:42<02:05,  3.13s/it][A[A

 44%|████▍     | 31/70 [01:45<01:53,  2.92s/it][A[A

Batch loss: 2.628159523010254




 46%|████▌     | 32/70 [01:49<02:09,  3.40s/it][A[A

 47%|████▋     | 33/70 [01:52<01:55,  3.12s/it][A[A

 49%|████▊     | 34/70 [01:54<01:44,  2.90s/it][A[A

 50%|█████     | 35/70 [01:59<01:59,  3.40s/it][A[A

 51%|█████▏    | 36/70 [02:01<01:46,  3.12s/it][A[A

 53%|█████▎    | 37/70 [02:04<01:35,  2.90s/it][A[A

 54%|█████▍    | 38/70 [02:08<01:49,  3.41s/it][A[A

 56%|█████▌    | 39/70 [02:11<01:37,  3.14s/it][A[A

 57%|█████▋    | 40/70 [02:13<01:27,  2.93s/it][A[A

 59%|█████▊    | 41/70 [02:17<01:36,  3.34s/it][A[A

Batch loss: 2.506103515625




 60%|██████    | 42/70 [02:20<01:26,  3.10s/it][A[A

 61%|██████▏   | 43/70 [02:22<01:18,  2.91s/it][A[A

 63%|██████▎   | 44/70 [02:27<01:27,  3.38s/it][A[A

 64%|██████▍   | 45/70 [02:29<01:17,  3.12s/it][A[A

 66%|██████▌   | 46/70 [02:32<01:09,  2.91s/it][A[A

 67%|██████▋   | 47/70 [02:36<01:18,  3.40s/it][A[A

 69%|██████▊   | 48/70 [02:39<01:08,  3.09s/it][A[A

 70%|███████   | 49/70 [02:41<01:00,  2.86s/it][A[A

 71%|███████▏  | 50/70 [02:46<01:06,  3.33s/it][A[A

 73%|███████▎  | 51/70 [02:48<00:58,  3.07s/it][A[A

Batch loss: 2.3926234245300293




 74%|███████▍  | 52/70 [02:50<00:51,  2.88s/it][A[A

 76%|███████▌  | 53/70 [02:55<00:58,  3.46s/it][A[A

 77%|███████▋  | 54/70 [02:58<00:50,  3.18s/it][A[A

 79%|███████▊  | 55/70 [03:00<00:44,  2.93s/it][A[A

 80%|████████  | 56/70 [03:05<00:48,  3.45s/it][A[A

 81%|████████▏ | 57/70 [03:07<00:40,  3.15s/it][A[A

 83%|████████▎ | 58/70 [03:10<00:35,  2.92s/it][A[A

 84%|████████▍ | 59/70 [03:14<00:37,  3.38s/it][A[A

 86%|████████▌ | 60/70 [03:17<00:31,  3.12s/it][A[A

 87%|████████▋ | 61/70 [03:19<00:26,  2.94s/it][A[A

Batch loss: 2.3742103576660156




 89%|████████▊ | 62/70 [03:24<00:28,  3.51s/it][A[A

 90%|█████████ | 63/70 [03:27<00:22,  3.23s/it][A[A

 91%|█████████▏| 64/70 [03:29<00:17,  3.00s/it][A[A

 93%|█████████▎| 65/70 [03:34<00:17,  3.56s/it][A[A

 94%|█████████▍| 66/70 [03:36<00:13,  3.27s/it][A[A

 96%|█████████▌| 67/70 [03:39<00:09,  3.02s/it][A[A

 97%|█████████▋| 68/70 [03:44<00:07,  3.54s/it][A[A

 99%|█████████▊| 69/70 [03:46<00:03,  3.22s/it][A[A

100%|██████████| 70/70 [03:47<00:00,  3.25s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  2.5640482459749494




 12%|█▎        | 1/8 [00:08<01:01,  8.82s/it][A[A

 25%|██▌       | 2/8 [00:12<00:42,  7.14s/it][A[A

 38%|███▊      | 3/8 [00:15<00:30,  6.04s/it][A[A

 50%|█████     | 4/8 [00:17<00:19,  4.91s/it][A[A

 62%|██████▎   | 5/8 [00:22<00:14,  4.76s/it][A[A

 75%|███████▌  | 6/8 [00:24<00:08,  4.04s/it][A[A

 88%|████████▊ | 7/8 [00:26<00:03,  3.52s/it][A[A

100%|██████████| 8/8 [00:30<00:00,  3.75s/it]


Valid Loss:  2.54573056101799




  0%|          | 0/70 [00:00<?, ?it/s][A[A

Epoch:  3




  1%|▏         | 1/70 [00:08<10:00,  8.70s/it][A[A

Batch loss: 2.361776828765869




  3%|▎         | 2/70 [00:12<08:09,  7.21s/it][A[A

  4%|▍         | 3/70 [00:16<06:54,  6.19s/it][A[A

  6%|▌         | 4/70 [00:18<05:34,  5.06s/it][A[A

  7%|▋         | 5/70 [00:23<05:25,  5.01s/it][A[A

  9%|▊         | 6/70 [00:26<04:33,  4.28s/it][A[A

 10%|█         | 7/70 [00:28<03:55,  3.74s/it][A[A

 11%|█▏        | 8/70 [00:33<04:10,  4.04s/it][A[A

 13%|█▎        | 9/70 [00:35<03:37,  3.56s/it][A[A

 14%|█▍        | 10/70 [00:38<03:11,  3.20s/it][A[A

 16%|█▌        | 11/70 [00:42<03:35,  3.65s/it][A[A

Batch loss: 2.2033088207244873




 17%|█▋        | 12/70 [00:45<03:13,  3.33s/it][A[A

 19%|█▊        | 13/70 [00:47<02:54,  3.07s/it][A[A

 20%|██        | 14/70 [00:52<03:18,  3.55s/it][A[A

 21%|██▏       | 15/70 [00:55<02:59,  3.26s/it][A[A

 23%|██▎       | 16/70 [00:57<02:43,  3.04s/it][A[A

 24%|██▍       | 17/70 [01:02<03:08,  3.55s/it][A[A

 26%|██▌       | 18/70 [01:04<02:48,  3.24s/it][A[A

 27%|██▋       | 19/70 [01:07<02:31,  2.97s/it][A[A

 29%|██▊       | 20/70 [01:11<02:49,  3.39s/it][A[A

 30%|███       | 21/70 [01:14<02:31,  3.10s/it][A[A

Batch loss: 2.1477975845336914




 31%|███▏      | 22/70 [01:16<02:17,  2.86s/it][A[A

 33%|███▎      | 23/70 [01:20<02:31,  3.23s/it][A[A

 34%|███▍      | 24/70 [01:22<02:16,  2.97s/it][A[A

 36%|███▌      | 25/70 [01:25<02:04,  2.77s/it][A[A

 37%|███▋      | 26/70 [01:29<02:22,  3.25s/it][A[A

 39%|███▊      | 27/70 [01:31<02:08,  2.98s/it][A[A

 40%|████      | 28/70 [01:34<01:56,  2.77s/it][A[A

 41%|████▏     | 29/70 [01:38<02:15,  3.30s/it][A[A

 43%|████▎     | 30/70 [01:41<02:01,  3.03s/it][A[A

 44%|████▍     | 31/70 [01:43<01:50,  2.85s/it][A[A

Batch loss: 2.209555149078369




 46%|████▌     | 32/70 [01:48<02:10,  3.42s/it][A[A

 47%|████▋     | 33/70 [01:50<01:57,  3.19s/it][A[A

 49%|████▊     | 34/70 [01:53<01:47,  3.00s/it][A[A

 50%|█████     | 35/70 [01:58<02:02,  3.51s/it][A[A

 51%|█████▏    | 36/70 [02:00<01:50,  3.24s/it][A[A

 53%|█████▎    | 37/70 [02:03<01:39,  3.02s/it][A[A

 54%|█████▍    | 38/70 [02:07<01:52,  3.53s/it][A[A

 56%|█████▌    | 39/70 [02:10<01:39,  3.22s/it][A[A

 57%|█████▋    | 40/70 [02:12<01:28,  2.95s/it][A[A

 59%|█████▊    | 41/70 [02:17<01:40,  3.46s/it][A[A

Batch loss: 2.3454864025115967




 60%|██████    | 42/70 [02:19<01:28,  3.16s/it][A[A

 61%|██████▏   | 43/70 [02:22<01:18,  2.92s/it][A[A

 63%|██████▎   | 44/70 [02:26<01:29,  3.44s/it][A[A

 64%|██████▍   | 45/70 [02:29<01:18,  3.14s/it][A[A

 66%|██████▌   | 46/70 [02:31<01:09,  2.90s/it][A[A

 67%|██████▋   | 47/70 [02:36<01:17,  3.38s/it][A[A

 69%|██████▊   | 48/70 [02:38<01:08,  3.11s/it][A[A

 70%|███████   | 49/70 [02:41<01:00,  2.89s/it][A[A

 71%|███████▏  | 50/70 [02:45<01:07,  3.37s/it][A[A

 73%|███████▎  | 51/70 [02:47<00:58,  3.10s/it][A[A

Batch loss: 2.2294256687164307




 74%|███████▍  | 52/70 [02:50<00:52,  2.89s/it][A[A

 76%|███████▌  | 53/70 [02:55<00:58,  3.45s/it][A[A

 77%|███████▋  | 54/70 [02:57<00:51,  3.20s/it][A[A

 79%|███████▊  | 55/70 [03:00<00:44,  2.99s/it][A[A

 80%|████████  | 56/70 [03:05<00:49,  3.53s/it][A[A

 81%|████████▏ | 57/70 [03:07<00:42,  3.25s/it][A[A

 83%|████████▎ | 58/70 [03:10<00:36,  3.01s/it][A[A

 84%|████████▍ | 59/70 [03:14<00:39,  3.56s/it][A[A

 86%|████████▌ | 60/70 [03:17<00:32,  3.23s/it][A[A

 87%|████████▋ | 61/70 [03:19<00:26,  2.97s/it][A[A

Batch loss: 2.3733742237091064




 89%|████████▊ | 62/70 [03:24<00:27,  3.48s/it][A[A

 90%|█████████ | 63/70 [03:26<00:22,  3.17s/it][A[A

 91%|█████████▏| 64/70 [03:29<00:17,  2.92s/it][A[A

 93%|█████████▎| 65/70 [03:33<00:16,  3.39s/it][A[A

 94%|█████████▍| 66/70 [03:36<00:12,  3.11s/it][A[A

 96%|█████████▌| 67/70 [03:38<00:08,  2.88s/it][A[A

 97%|█████████▋| 68/70 [03:43<00:06,  3.37s/it][A[A

 99%|█████████▊| 69/70 [03:45<00:03,  3.10s/it][A[A

100%|██████████| 70/70 [03:46<00:00,  3.23s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  2.319684485026768




 12%|█▎        | 1/8 [00:08<01:02,  8.86s/it][A[A

 25%|██▌       | 2/8 [00:12<00:43,  7.24s/it][A[A

 38%|███▊      | 3/8 [00:15<00:30,  6.09s/it][A[A

 50%|█████     | 4/8 [00:18<00:19,  4.99s/it][A[A

 62%|██████▎   | 5/8 [00:22<00:14,  4.88s/it][A[A

 75%|███████▌  | 6/8 [00:25<00:08,  4.18s/it][A[A

 88%|████████▊ | 7/8 [00:27<00:03,  3.64s/it][A[A

100%|██████████| 8/8 [00:31<00:00,  3.88s/it]


Valid Loss:  2.4399347007274628




  0%|          | 0/70 [00:00<?, ?it/s][A[A

Epoch:  4




  1%|▏         | 1/70 [00:08<10:03,  8.75s/it][A[A

Batch loss: 2.0614006519317627




  3%|▎         | 2/70 [00:12<08:09,  7.19s/it][A[A

  4%|▍         | 3/70 [00:15<06:47,  6.08s/it][A[A

  6%|▌         | 4/70 [00:18<05:27,  4.96s/it][A[A

  7%|▋         | 5/70 [00:22<05:14,  4.85s/it][A[A

  9%|▊         | 6/70 [00:25<04:24,  4.14s/it][A[A

 10%|█         | 7/70 [00:27<03:47,  3.62s/it][A[A

 11%|█▏        | 8/70 [00:32<04:02,  3.91s/it][A[A

 13%|█▎        | 9/70 [00:34<03:32,  3.49s/it][A[A

 14%|█▍        | 10/70 [00:37<03:09,  3.15s/it][A[A

 16%|█▌        | 11/70 [00:41<03:31,  3.59s/it][A[A

Batch loss: 1.983657956123352




 17%|█▋        | 12/70 [00:44<03:09,  3.26s/it][A[A

 19%|█▊        | 13/70 [00:46<02:51,  3.01s/it][A[A

 20%|██        | 14/70 [00:51<03:18,  3.55s/it][A[A

 21%|██▏       | 15/70 [00:53<02:59,  3.26s/it][A[A

 23%|██▎       | 16/70 [00:56<02:42,  3.02s/it][A[A

 24%|██▍       | 17/70 [01:01<03:06,  3.51s/it][A[A

 26%|██▌       | 18/70 [01:03<02:48,  3.24s/it][A[A

 27%|██▋       | 19/70 [01:06<02:34,  3.03s/it][A[A

 29%|██▊       | 20/70 [01:10<02:55,  3.51s/it][A[A

 30%|███       | 21/70 [01:13<02:35,  3.17s/it][A[A

Batch loss: 2.2230007648468018




 31%|███▏      | 22/70 [01:15<02:20,  2.92s/it][A[A

 33%|███▎      | 23/70 [01:19<02:35,  3.32s/it][A[A

 34%|███▍      | 24/70 [01:22<02:19,  3.03s/it][A[A

 36%|███▌      | 25/70 [01:24<02:06,  2.81s/it][A[A

 37%|███▋      | 26/70 [01:28<02:18,  3.16s/it][A[A

 39%|███▊      | 27/70 [01:30<02:06,  2.94s/it][A[A

 40%|████      | 28/70 [01:33<02:01,  2.88s/it][A[A

 41%|████▏     | 29/70 [01:36<01:52,  2.75s/it][A[A

 43%|████▎     | 30/70 [01:38<01:50,  2.75s/it][A[A

 44%|████▍     | 31/70 [01:41<01:44,  2.67s/it][A[A

Batch loss: 2.096762180328369




 46%|████▌     | 32/70 [01:43<01:41,  2.67s/it][A[A

 47%|████▋     | 33/70 [01:46<01:36,  2.60s/it][A[A

 49%|████▊     | 34/70 [01:49<01:35,  2.66s/it][A[A

 50%|█████     | 35/70 [01:51<01:31,  2.62s/it][A[A

 51%|█████▏    | 36/70 [01:54<01:32,  2.71s/it][A[A

 53%|█████▎    | 37/70 [01:57<01:28,  2.67s/it][A[A

 54%|█████▍    | 38/70 [02:00<01:27,  2.73s/it][A[A

 56%|█████▌    | 39/70 [02:02<01:23,  2.68s/it][A[A

 57%|█████▋    | 40/70 [02:05<01:21,  2.73s/it][A[A

 59%|█████▊    | 41/70 [02:08<01:17,  2.67s/it][A[A

Batch loss: 2.5971052646636963




 60%|██████    | 42/70 [02:10<01:16,  2.74s/it][A[A

 61%|██████▏   | 43/70 [02:13<01:12,  2.68s/it][A[A

 63%|██████▎   | 44/70 [02:16<01:11,  2.74s/it][A[A

 64%|██████▍   | 45/70 [02:18<01:06,  2.65s/it][A[A

 66%|██████▌   | 46/70 [02:21<01:04,  2.69s/it][A[A

 67%|██████▋   | 47/70 [02:24<01:00,  2.63s/it][A[A

 69%|██████▊   | 48/70 [02:26<00:59,  2.69s/it][A[A

 70%|███████   | 49/70 [02:29<00:55,  2.64s/it][A[A

 71%|███████▏  | 50/70 [02:32<00:54,  2.71s/it][A[A

 73%|███████▎  | 51/70 [02:34<00:51,  2.69s/it][A[A

Batch loss: 2.237903594970703




 74%|███████▍  | 52/70 [02:37<00:49,  2.77s/it][A[A

 76%|███████▌  | 53/70 [02:41<00:50,  2.98s/it][A[A

 77%|███████▋  | 54/70 [02:44<00:47,  2.96s/it][A[A

 79%|███████▊  | 55/70 [02:46<00:42,  2.84s/it][A[A

 80%|████████  | 56/70 [02:49<00:39,  2.83s/it][A[A

 81%|████████▏ | 57/70 [02:52<00:36,  2.78s/it][A[A

 83%|████████▎ | 58/70 [02:55<00:34,  2.83s/it][A[A

 84%|████████▍ | 59/70 [02:57<00:30,  2.73s/it][A[A

 86%|████████▌ | 60/70 [03:00<00:27,  2.74s/it][A[A

 87%|████████▋ | 61/70 [03:02<00:23,  2.64s/it][A[A

Batch loss: 2.0882177352905273




 89%|████████▊ | 62/70 [03:05<00:21,  2.70s/it][A[A

 90%|█████████ | 63/70 [03:08<00:18,  2.63s/it][A[A

 91%|█████████▏| 64/70 [03:10<00:15,  2.65s/it][A[A

 93%|█████████▎| 65/70 [03:13<00:12,  2.59s/it][A[A

 94%|█████████▍| 66/70 [03:16<00:10,  2.62s/it][A[A

 96%|█████████▌| 67/70 [03:18<00:07,  2.57s/it][A[A

 97%|█████████▋| 68/70 [03:21<00:05,  2.62s/it][A[A

 99%|█████████▊| 69/70 [03:23<00:02,  2.56s/it][A[A

100%|██████████| 70/70 [03:24<00:00,  2.92s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  2.1733323761395047




 12%|█▎        | 1/8 [00:03<00:24,  3.54s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.29s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.99s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.87s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.70s/it][A[A

 75%|███████▌  | 6/8 [00:16<00:05,  2.66s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.57s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.49s/it]


Valid Loss:  2.371651381254196




  0%|          | 0/70 [00:00<?, ?it/s][A[A

Epoch:  5




  1%|▏         | 1/70 [00:03<04:04,  3.55s/it][A[A

Batch loss: 2.1504173278808594




  3%|▎         | 2/70 [00:06<03:48,  3.37s/it][A[A

  4%|▍         | 3/70 [00:08<03:26,  3.08s/it][A[A

  6%|▌         | 4/70 [00:11<03:16,  2.98s/it][A[A

  7%|▋         | 5/70 [00:13<03:00,  2.78s/it][A[A

  9%|▊         | 6/70 [00:16<02:53,  2.71s/it][A[A

 10%|█         | 7/70 [00:18<02:44,  2.61s/it][A[A

 11%|█▏        | 8/70 [00:21<02:40,  2.59s/it][A[A

 13%|█▎        | 9/70 [00:23<02:33,  2.51s/it][A[A

 14%|█▍        | 10/70 [00:26<02:31,  2.53s/it][A[A

 16%|█▌        | 11/70 [00:28<02:25,  2.46s/it][A[A

Batch loss: 1.910562515258789




 17%|█▋        | 12/70 [00:31<02:23,  2.48s/it][A[A

 19%|█▊        | 13/70 [00:33<02:18,  2.44s/it][A[A

 20%|██        | 14/70 [00:36<02:19,  2.49s/it][A[A

 21%|██▏       | 15/70 [00:38<02:14,  2.44s/it][A[A

 23%|██▎       | 16/70 [00:41<02:14,  2.49s/it][A[A

 24%|██▍       | 17/70 [00:43<02:08,  2.43s/it][A[A

 26%|██▌       | 18/70 [00:45<02:09,  2.49s/it][A[A

 27%|██▋       | 19/70 [00:48<02:04,  2.43s/it][A[A

 29%|██▊       | 20/70 [00:50<02:04,  2.49s/it][A[A

 30%|███       | 21/70 [00:53<02:01,  2.49s/it][A[A

Batch loss: 2.1959590911865234




 31%|███▏      | 22/70 [00:56<02:03,  2.57s/it][A[A

 33%|███▎      | 23/70 [00:58<01:59,  2.55s/it][A[A

 34%|███▍      | 24/70 [01:01<01:59,  2.59s/it][A[A

 36%|███▌      | 25/70 [01:03<01:54,  2.55s/it][A[A

 37%|███▋      | 26/70 [01:06<01:54,  2.60s/it][A[A

 39%|███▊      | 27/70 [01:08<01:49,  2.55s/it][A[A

 40%|████      | 28/70 [01:11<01:49,  2.60s/it][A[A

 41%|████▏     | 29/70 [01:14<01:44,  2.55s/it][A[A

 43%|████▎     | 30/70 [01:16<01:43,  2.59s/it][A[A

 44%|████▍     | 31/70 [01:19<01:38,  2.52s/it][A[A

Batch loss: 2.187804698944092




 46%|████▌     | 32/70 [01:21<01:36,  2.55s/it][A[A

 47%|████▋     | 33/70 [01:24<01:32,  2.49s/it][A[A

 49%|████▊     | 34/70 [01:26<01:30,  2.52s/it][A[A

 50%|█████     | 35/70 [01:28<01:26,  2.46s/it][A[A

 51%|█████▏    | 36/70 [01:31<01:25,  2.51s/it][A[A

 53%|█████▎    | 37/70 [01:33<01:20,  2.45s/it][A[A

 54%|█████▍    | 38/70 [01:36<01:19,  2.48s/it][A[A

 56%|█████▌    | 39/70 [01:38<01:15,  2.43s/it][A[A

 57%|█████▋    | 40/70 [01:41<01:14,  2.47s/it][A[A

 59%|█████▊    | 41/70 [01:43<01:10,  2.42s/it][A[A

Batch loss: 2.1499154567718506




 60%|██████    | 42/70 [01:46<01:09,  2.47s/it][A[A

 61%|██████▏   | 43/70 [01:48<01:05,  2.42s/it][A[A

 63%|██████▎   | 44/70 [01:51<01:04,  2.46s/it][A[A

 64%|██████▍   | 45/70 [01:53<01:00,  2.41s/it][A[A

 66%|██████▌   | 46/70 [01:55<00:59,  2.46s/it][A[A

 67%|██████▋   | 47/70 [01:58<00:56,  2.44s/it][A[A

 69%|██████▊   | 48/70 [02:01<00:55,  2.51s/it][A[A

 70%|███████   | 49/70 [02:03<00:52,  2.49s/it][A[A

 71%|███████▏  | 50/70 [02:06<00:50,  2.55s/it][A[A

 73%|███████▎  | 51/70 [02:08<00:47,  2.52s/it][A[A

Batch loss: 2.1943869590759277




 74%|███████▍  | 52/70 [02:11<00:46,  2.57s/it][A[A

 76%|███████▌  | 53/70 [02:13<00:43,  2.53s/it][A[A

 77%|███████▋  | 54/70 [02:16<00:41,  2.58s/it][A[A

 79%|███████▊  | 55/70 [02:18<00:38,  2.54s/it][A[A

 80%|████████  | 56/70 [02:21<00:36,  2.59s/it][A[A

 81%|████████▏ | 57/70 [02:23<00:32,  2.52s/it][A[A

 83%|████████▎ | 58/70 [02:26<00:30,  2.53s/it][A[A

 84%|████████▍ | 59/70 [02:28<00:27,  2.47s/it][A[A

 86%|████████▌ | 60/70 [02:31<00:25,  2.51s/it][A[A

 87%|████████▋ | 61/70 [02:33<00:22,  2.46s/it][A[A

Batch loss: 2.0433175563812256




 89%|████████▊ | 62/70 [02:36<00:19,  2.49s/it][A[A

 90%|█████████ | 63/70 [02:38<00:17,  2.45s/it][A[A

 91%|█████████▏| 64/70 [02:41<00:14,  2.48s/it][A[A

 93%|█████████▎| 65/70 [02:43<00:12,  2.45s/it][A[A

 94%|█████████▍| 66/70 [02:46<00:09,  2.48s/it][A[A

 96%|█████████▌| 67/70 [02:48<00:07,  2.43s/it][A[A

 97%|█████████▋| 68/70 [02:51<00:04,  2.47s/it][A[A

 99%|█████████▊| 69/70 [02:53<00:02,  2.44s/it][A[A

100%|██████████| 70/70 [02:54<00:00,  2.49s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  2.0590263536998203




 12%|█▎        | 1/8 [00:03<00:22,  3.15s/it][A[A

 25%|██▌       | 2/8 [00:05<00:18,  3.00s/it][A[A

 38%|███▊      | 3/8 [00:07<00:13,  2.76s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.71s/it][A[A

 62%|██████▎   | 5/8 [00:12<00:07,  2.59s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.59s/it][A[A

 88%|████████▊ | 7/8 [00:17<00:02,  2.52s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.42s/it]


Valid Loss:  2.274596780538559




  0%|          | 0/70 [00:00<?, ?it/s][A[A

Epoch:  6




  1%|▏         | 1/70 [00:03<04:05,  3.55s/it][A[A

Batch loss: 1.972293496131897




  3%|▎         | 2/70 [00:06<03:48,  3.36s/it][A[A

  4%|▍         | 3/70 [00:08<03:27,  3.10s/it][A[A

  6%|▌         | 4/70 [00:11<03:17,  3.00s/it][A[A

  7%|▋         | 5/70 [00:14<03:03,  2.83s/it][A[A

  9%|▊         | 6/70 [00:16<02:55,  2.75s/it][A[A

 10%|█         | 7/70 [00:19<02:44,  2.61s/it][A[A

 11%|█▏        | 8/70 [00:21<02:42,  2.61s/it][A[A

 13%|█▎        | 9/70 [00:24<02:36,  2.57s/it][A[A

 14%|█▍        | 10/70 [00:26<02:36,  2.60s/it][A[A

 16%|█▌        | 11/70 [00:29<02:31,  2.56s/it][A[A

Batch loss: 2.146818161010742




 17%|█▋        | 12/70 [00:31<02:31,  2.61s/it][A[A

 19%|█▊        | 13/70 [00:34<02:25,  2.55s/it][A[A

 20%|██        | 14/70 [00:37<02:24,  2.58s/it][A[A

 21%|██▏       | 15/70 [00:39<02:19,  2.54s/it][A[A

 23%|██▎       | 16/70 [00:42<02:19,  2.59s/it][A[A

 24%|██▍       | 17/70 [00:44<02:15,  2.55s/it][A[A

 26%|██▌       | 18/70 [00:47<02:14,  2.58s/it][A[A

 27%|██▋       | 19/70 [00:49<02:07,  2.50s/it][A[A

 29%|██▊       | 20/70 [00:52<02:05,  2.52s/it][A[A

 30%|███       | 21/70 [00:54<02:00,  2.46s/it][A[A

Batch loss: 1.8255149126052856




 31%|███▏      | 22/70 [00:57<02:00,  2.52s/it][A[A

 33%|███▎      | 23/70 [00:59<01:56,  2.47s/it][A[A

 34%|███▍      | 24/70 [01:02<01:55,  2.51s/it][A[A

 36%|███▌      | 25/70 [01:04<01:50,  2.45s/it][A[A

 37%|███▋      | 26/70 [01:06<01:49,  2.49s/it][A[A

 39%|███▊      | 27/70 [01:09<01:45,  2.44s/it][A[A

 40%|████      | 28/70 [01:11<01:44,  2.50s/it][A[A

 41%|████▏     | 29/70 [01:14<01:40,  2.45s/it][A[A

 43%|████▎     | 30/70 [01:16<01:39,  2.49s/it][A[A

 44%|████▍     | 31/70 [01:19<01:35,  2.44s/it][A[A

Batch loss: 2.0323777198791504




 46%|████▌     | 32/70 [01:21<01:34,  2.49s/it][A[A

 47%|████▋     | 33/70 [01:24<01:30,  2.45s/it][A[A

 49%|████▊     | 34/70 [01:26<01:29,  2.48s/it][A[A

 50%|█████     | 35/70 [01:29<01:25,  2.45s/it][A[A

 51%|█████▏    | 36/70 [01:31<01:25,  2.52s/it][A[A

 53%|█████▎    | 37/70 [01:34<01:22,  2.50s/it][A[A

 54%|█████▍    | 38/70 [01:36<01:21,  2.55s/it][A[A

 56%|█████▌    | 39/70 [01:39<01:18,  2.53s/it][A[A

 57%|█████▋    | 40/70 [01:42<01:17,  2.58s/it][A[A

 59%|█████▊    | 41/70 [01:44<01:13,  2.55s/it][A[A

Batch loss: 1.9436120986938477




 60%|██████    | 42/70 [01:47<01:12,  2.59s/it][A[A

 61%|██████▏   | 43/70 [01:49<01:08,  2.54s/it][A[A

 63%|██████▎   | 44/70 [01:52<01:07,  2.59s/it][A[A

 64%|██████▍   | 45/70 [01:54<01:03,  2.54s/it][A[A

 66%|██████▌   | 46/70 [01:57<01:00,  2.54s/it][A[A

 67%|██████▋   | 47/70 [01:59<00:57,  2.49s/it][A[A

 69%|██████▊   | 48/70 [02:02<00:55,  2.52s/it][A[A

 70%|███████   | 49/70 [02:04<00:51,  2.47s/it][A[A

 71%|███████▏  | 50/70 [02:07<00:49,  2.49s/it][A[A

 73%|███████▎  | 51/70 [02:09<00:46,  2.44s/it][A[A

Batch loss: 1.935197114944458




 74%|███████▍  | 52/70 [02:12<00:44,  2.49s/it][A[A

 76%|███████▌  | 53/70 [02:14<00:41,  2.43s/it][A[A

 77%|███████▋  | 54/70 [02:16<00:39,  2.47s/it][A[A

 79%|███████▊  | 55/70 [02:19<00:36,  2.42s/it][A[A

 80%|████████  | 56/70 [02:21<00:34,  2.48s/it][A[A

 81%|████████▏ | 57/70 [02:24<00:31,  2.43s/it][A[A

 83%|████████▎ | 58/70 [02:26<00:29,  2.47s/it][A[A

 84%|████████▍ | 59/70 [02:29<00:26,  2.43s/it][A[A

 86%|████████▌ | 60/70 [02:31<00:24,  2.48s/it][A[A

 87%|████████▋ | 61/70 [02:33<00:21,  2.44s/it][A[A

Batch loss: 2.0188586711883545




 89%|████████▊ | 62/70 [02:36<00:20,  2.51s/it][A[A

 90%|█████████ | 63/70 [02:39<00:17,  2.50s/it][A[A

 91%|█████████▏| 64/70 [02:41<00:15,  2.55s/it][A[A

 93%|█████████▎| 65/70 [02:44<00:12,  2.51s/it][A[A

 94%|█████████▍| 66/70 [02:47<00:10,  2.59s/it][A[A

 96%|█████████▌| 67/70 [02:49<00:07,  2.56s/it][A[A

 97%|█████████▋| 68/70 [02:52<00:05,  2.62s/it][A[A

 99%|█████████▊| 69/70 [02:54<00:02,  2.59s/it][A[A

100%|██████████| 70/70 [02:55<00:00,  2.51s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.982649028301239




 12%|█▎        | 1/8 [00:03<00:23,  3.31s/it][A[A

 25%|██▌       | 2/8 [00:06<00:18,  3.14s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.87s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.74s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:07,  2.62s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.56s/it][A[A

 88%|████████▊ | 7/8 [00:17<00:02,  2.48s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.40s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.2750474214553833
Epoch:  7




  1%|▏         | 1/70 [00:03<03:46,  3.28s/it][A[A

Batch loss: 1.9821178913116455




  3%|▎         | 2/70 [00:06<03:32,  3.13s/it][A[A

  4%|▍         | 3/70 [00:08<03:16,  2.93s/it][A[A

  6%|▌         | 4/70 [00:11<03:05,  2.82s/it][A[A

  7%|▋         | 5/70 [00:13<02:53,  2.68s/it][A[A

  9%|▊         | 6/70 [00:15<02:49,  2.64s/it][A[A

 10%|█         | 7/70 [00:18<02:40,  2.55s/it][A[A

 11%|█▏        | 8/70 [00:20<02:38,  2.56s/it][A[A

 13%|█▎        | 9/70 [00:23<02:32,  2.50s/it][A[A

 14%|█▍        | 10/70 [00:25<02:30,  2.51s/it][A[A

 16%|█▌        | 11/70 [00:28<02:27,  2.51s/it][A[A

Batch loss: 1.8201854228973389




 17%|█▋        | 12/70 [00:30<02:28,  2.56s/it][A[A

 19%|█▊        | 13/70 [00:33<02:24,  2.54s/it][A[A

 20%|██        | 14/70 [00:36<02:24,  2.58s/it][A[A

 21%|██▏       | 15/70 [00:38<02:19,  2.54s/it][A[A

 23%|██▎       | 16/70 [00:41<02:19,  2.58s/it][A[A

 24%|██▍       | 17/70 [00:43<02:14,  2.55s/it][A[A

 26%|██▌       | 18/70 [00:46<02:14,  2.58s/it][A[A

 27%|██▋       | 19/70 [00:48<02:10,  2.55s/it][A[A

 29%|██▊       | 20/70 [00:51<02:09,  2.59s/it][A[A

 30%|███       | 21/70 [00:53<02:03,  2.52s/it][A[A

Batch loss: 1.960526704788208




 31%|███▏      | 22/70 [00:56<02:01,  2.52s/it][A[A

 33%|███▎      | 23/70 [00:58<01:56,  2.48s/it][A[A

 34%|███▍      | 24/70 [01:01<01:55,  2.51s/it][A[A

 36%|███▌      | 25/70 [01:03<01:50,  2.46s/it][A[A

 37%|███▋      | 26/70 [01:06<01:49,  2.50s/it][A[A

 39%|███▊      | 27/70 [01:08<01:45,  2.46s/it][A[A

 40%|████      | 28/70 [01:11<01:45,  2.52s/it][A[A

 41%|████▏     | 29/70 [01:13<01:41,  2.46s/it][A[A

 43%|████▎     | 30/70 [01:16<01:39,  2.49s/it][A[A

 44%|████▍     | 31/70 [01:18<01:35,  2.45s/it][A[A

Batch loss: 1.9180645942687988




 46%|████▌     | 32/70 [01:21<01:34,  2.48s/it][A[A

 47%|████▋     | 33/70 [01:23<01:30,  2.44s/it][A[A

 49%|████▊     | 34/70 [01:26<01:28,  2.47s/it][A[A

 50%|█████     | 35/70 [01:28<01:24,  2.43s/it][A[A

 51%|█████▏    | 36/70 [01:30<01:24,  2.47s/it][A[A

 53%|█████▎    | 37/70 [01:33<01:21,  2.46s/it][A[A

 54%|█████▍    | 38/70 [01:36<01:20,  2.52s/it][A[A

 56%|█████▌    | 39/70 [01:38<01:17,  2.51s/it][A[A

 57%|█████▋    | 40/70 [01:41<01:16,  2.55s/it][A[A

 59%|█████▊    | 41/70 [01:43<01:13,  2.53s/it][A[A

Batch loss: 1.9472187757492065




 60%|██████    | 42/70 [01:46<01:12,  2.58s/it][A[A

 61%|██████▏   | 43/70 [01:48<01:08,  2.54s/it][A[A

 63%|██████▎   | 44/70 [01:51<01:07,  2.58s/it][A[A

 64%|██████▍   | 45/70 [01:53<01:03,  2.54s/it][A[A

 66%|██████▌   | 46/70 [01:56<01:02,  2.58s/it][A[A

 67%|██████▋   | 47/70 [01:58<00:57,  2.52s/it][A[A

 69%|██████▊   | 48/70 [02:01<00:55,  2.53s/it][A[A

 70%|███████   | 49/70 [02:03<00:51,  2.47s/it][A[A

 71%|███████▏  | 50/70 [02:06<00:50,  2.51s/it][A[A

 73%|███████▎  | 51/70 [02:08<00:46,  2.46s/it][A[A

Batch loss: 1.8302291631698608




 74%|███████▍  | 52/70 [02:11<00:44,  2.50s/it][A[A

 76%|███████▌  | 53/70 [02:13<00:41,  2.45s/it][A[A

 77%|███████▋  | 54/70 [02:16<00:39,  2.49s/it][A[A

 79%|███████▊  | 55/70 [02:18<00:36,  2.44s/it][A[A

 80%|████████  | 56/70 [02:21<00:34,  2.48s/it][A[A

 81%|████████▏ | 57/70 [02:23<00:31,  2.44s/it][A[A

 83%|████████▎ | 58/70 [02:26<00:29,  2.47s/it][A[A

 84%|████████▍ | 59/70 [02:28<00:26,  2.43s/it][A[A

 86%|████████▌ | 60/70 [02:31<00:24,  2.48s/it][A[A

 87%|████████▋ | 61/70 [02:33<00:21,  2.44s/it][A[A

Batch loss: 1.9807995557785034




 89%|████████▊ | 62/70 [02:35<00:19,  2.49s/it][A[A

 90%|█████████ | 63/70 [02:38<00:17,  2.45s/it][A[A

 91%|█████████▏| 64/70 [02:41<00:15,  2.53s/it][A[A

 93%|█████████▎| 65/70 [02:43<00:12,  2.50s/it][A[A

 94%|█████████▍| 66/70 [02:46<00:10,  2.57s/it][A[A

 96%|█████████▌| 67/70 [02:48<00:07,  2.53s/it][A[A

 97%|█████████▋| 68/70 [02:51<00:05,  2.58s/it][A[A

 99%|█████████▊| 69/70 [02:53<00:02,  2.54s/it][A[A

100%|██████████| 70/70 [02:54<00:00,  2.49s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.911324644088745




 12%|█▎        | 1/8 [00:03<00:23,  3.31s/it][A[A

 25%|██▌       | 2/8 [00:06<00:18,  3.15s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.89s/it][A[A

 50%|█████     | 4/8 [00:10<00:11,  2.77s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:07,  2.61s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.57s/it][A[A

 88%|████████▊ | 7/8 [00:17<00:02,  2.50s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.43s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.279944270849228
Epoch:  8




  1%|▏         | 1/70 [00:03<03:57,  3.44s/it][A[A

Batch loss: 1.8110990524291992




  3%|▎         | 2/70 [00:06<03:42,  3.28s/it][A[A

  4%|▍         | 3/70 [00:08<03:20,  2.99s/it][A[A

  6%|▌         | 4/70 [00:11<03:11,  2.90s/it][A[A

  7%|▋         | 5/70 [00:13<02:59,  2.77s/it][A[A

  9%|▊         | 6/70 [00:16<02:56,  2.76s/it][A[A

 10%|█         | 7/70 [00:19<02:47,  2.67s/it][A[A

 11%|█▏        | 8/70 [00:21<02:45,  2.67s/it][A[A

 13%|█▎        | 9/70 [00:24<02:36,  2.57s/it][A[A

 14%|█▍        | 10/70 [00:26<02:34,  2.57s/it][A[A

 16%|█▌        | 11/70 [00:28<02:27,  2.50s/it][A[A

Batch loss: 1.7811729907989502




 17%|█▋        | 12/70 [00:31<02:26,  2.53s/it][A[A

 19%|█▊        | 13/70 [00:33<02:20,  2.46s/it][A[A

 20%|██        | 14/70 [00:36<02:19,  2.49s/it][A[A

 21%|██▏       | 15/70 [00:38<02:15,  2.46s/it][A[A

 23%|██▎       | 16/70 [00:41<02:14,  2.49s/it][A[A

 24%|██▍       | 17/70 [00:43<02:09,  2.45s/it][A[A

 26%|██▌       | 18/70 [00:46<02:14,  2.58s/it][A[A

 27%|██▋       | 19/70 [00:49<02:10,  2.57s/it][A[A

 29%|██▊       | 20/70 [00:51<02:08,  2.58s/it][A[A

 30%|███       | 21/70 [00:54<02:02,  2.51s/it][A[A

Batch loss: 1.7938789129257202




 31%|███▏      | 22/70 [00:56<02:01,  2.52s/it][A[A

 33%|███▎      | 23/70 [00:58<01:56,  2.48s/it][A[A

 34%|███▍      | 24/70 [01:01<01:54,  2.50s/it][A[A

 36%|███▌      | 25/70 [01:03<01:51,  2.48s/it][A[A

 37%|███▋      | 26/70 [01:06<01:51,  2.53s/it][A[A

 39%|███▊      | 27/70 [01:09<01:48,  2.52s/it][A[A

 40%|████      | 28/70 [01:11<01:47,  2.57s/it][A[A

 41%|████▏     | 29/70 [01:14<01:43,  2.53s/it][A[A

 43%|████▎     | 30/70 [01:16<01:42,  2.56s/it][A[A

 44%|████▍     | 31/70 [01:19<01:39,  2.54s/it][A[A

Batch loss: 2.0508902072906494




 46%|████▌     | 32/70 [01:22<01:38,  2.59s/it][A[A

 47%|████▋     | 33/70 [01:24<01:34,  2.55s/it][A[A

 49%|████▊     | 34/70 [01:27<01:33,  2.59s/it][A[A

 50%|█████     | 35/70 [01:29<01:29,  2.57s/it][A[A

 51%|█████▏    | 36/70 [01:32<01:27,  2.58s/it][A[A

 53%|█████▎    | 37/70 [01:34<01:23,  2.52s/it][A[A

 54%|█████▍    | 38/70 [01:37<01:21,  2.53s/it][A[A

 56%|█████▌    | 39/70 [01:39<01:17,  2.51s/it][A[A

 57%|█████▋    | 40/70 [01:42<01:15,  2.52s/it][A[A

 59%|█████▊    | 41/70 [01:44<01:11,  2.46s/it][A[A

Batch loss: 1.9784395694732666




 60%|██████    | 42/70 [01:47<01:10,  2.51s/it][A[A

 61%|██████▏   | 43/70 [01:49<01:06,  2.46s/it][A[A

 63%|██████▎   | 44/70 [01:52<01:04,  2.48s/it][A[A

 64%|██████▍   | 45/70 [01:54<01:01,  2.45s/it][A[A

 66%|██████▌   | 46/70 [01:56<00:59,  2.48s/it][A[A

 67%|██████▋   | 47/70 [01:59<00:56,  2.44s/it][A[A

 69%|██████▊   | 48/70 [02:01<00:54,  2.48s/it][A[A

 70%|███████   | 49/70 [02:04<00:51,  2.44s/it][A[A

 71%|███████▏  | 50/70 [02:06<00:49,  2.48s/it][A[A

 73%|███████▎  | 51/70 [02:09<00:46,  2.45s/it][A[A

Batch loss: 1.8936649560928345




 74%|███████▍  | 52/70 [02:11<00:45,  2.52s/it][A[A

 76%|███████▌  | 53/70 [02:14<00:42,  2.50s/it][A[A

 77%|███████▋  | 54/70 [02:17<00:40,  2.55s/it][A[A

 79%|███████▊  | 55/70 [02:19<00:37,  2.52s/it][A[A

 80%|████████  | 56/70 [02:22<00:36,  2.58s/it][A[A

 81%|████████▏ | 57/70 [02:24<00:33,  2.54s/it][A[A

 83%|████████▎ | 58/70 [02:27<00:31,  2.59s/it][A[A

 84%|████████▍ | 59/70 [02:29<00:28,  2.56s/it][A[A

 86%|████████▌ | 60/70 [02:32<00:25,  2.60s/it][A[A

 87%|████████▋ | 61/70 [02:34<00:23,  2.56s/it][A[A

Batch loss: 1.890252947807312




 89%|████████▊ | 62/70 [02:37<00:20,  2.56s/it][A[A

 90%|█████████ | 63/70 [02:39<00:17,  2.51s/it][A[A

 91%|█████████▏| 64/70 [02:42<00:15,  2.53s/it][A[A

 93%|█████████▎| 65/70 [02:44<00:12,  2.47s/it][A[A

 94%|█████████▍| 66/70 [02:47<00:09,  2.50s/it][A[A

 96%|█████████▌| 67/70 [02:49<00:07,  2.46s/it][A[A

 97%|█████████▋| 68/70 [02:52<00:04,  2.49s/it][A[A

 99%|█████████▊| 69/70 [02:54<00:02,  2.46s/it][A[A

100%|██████████| 70/70 [02:55<00:00,  2.51s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.8871150067874363




 12%|█▎        | 1/8 [00:03<00:22,  3.20s/it][A[A

 25%|██▌       | 2/8 [00:05<00:18,  3.04s/it][A[A

 38%|███▊      | 3/8 [00:08<00:13,  2.79s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.68s/it][A[A

 62%|██████▎   | 5/8 [00:12<00:07,  2.55s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.54s/it][A[A

 88%|████████▊ | 7/8 [00:17<00:02,  2.45s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.37s/it]


Valid Loss:  2.245361775159836




  0%|          | 0/70 [00:00<?, ?it/s][A[A

Epoch:  9




  1%|▏         | 1/70 [00:03<04:02,  3.52s/it][A[A

Batch loss: 1.728989839553833




  3%|▎         | 2/70 [00:06<03:48,  3.36s/it][A[A

  4%|▍         | 3/70 [00:08<03:26,  3.07s/it][A[A

  6%|▌         | 4/70 [00:11<03:18,  3.00s/it][A[A

  7%|▋         | 5/70 [00:14<03:05,  2.85s/it][A[A

  9%|▊         | 6/70 [00:16<02:59,  2.80s/it][A[A

 10%|█         | 7/70 [00:19<02:50,  2.70s/it][A[A

 11%|█▏        | 8/70 [00:22<02:47,  2.70s/it][A[A

 13%|█▎        | 9/70 [00:24<02:39,  2.62s/it][A[A

 14%|█▍        | 10/70 [00:27<02:37,  2.62s/it][A[A

 16%|█▌        | 11/70 [00:29<02:29,  2.54s/it][A[A

Batch loss: 1.8951959609985352




 17%|█▋        | 12/70 [00:32<02:27,  2.55s/it][A[A

 19%|█▊        | 13/70 [00:34<02:23,  2.51s/it][A[A

 20%|██        | 14/70 [00:37<02:21,  2.53s/it][A[A

 21%|██▏       | 15/70 [00:39<02:16,  2.48s/it][A[A

 23%|██▎       | 16/70 [00:41<02:15,  2.50s/it][A[A

 24%|██▍       | 17/70 [00:44<02:10,  2.46s/it][A[A

 26%|██▌       | 18/70 [00:47<02:11,  2.53s/it][A[A

 27%|██▋       | 19/70 [00:49<02:06,  2.48s/it][A[A

 29%|██▊       | 20/70 [00:51<02:05,  2.51s/it][A[A

 30%|███       | 21/70 [00:54<02:00,  2.46s/it][A[A

Batch loss: 1.6861988306045532




 31%|███▏      | 22/70 [00:56<01:59,  2.49s/it][A[A

 33%|███▎      | 23/70 [00:59<01:55,  2.45s/it][A[A

 34%|███▍      | 24/70 [01:01<01:54,  2.49s/it][A[A

 36%|███▌      | 25/70 [01:04<01:50,  2.45s/it][A[A

 37%|███▋      | 26/70 [01:06<01:49,  2.50s/it][A[A

 39%|███▊      | 27/70 [01:09<01:47,  2.49s/it][A[A

 40%|████      | 28/70 [01:12<01:47,  2.56s/it][A[A

 41%|████▏     | 29/70 [01:14<01:43,  2.54s/it][A[A

 43%|████▎     | 30/70 [01:17<01:43,  2.59s/it][A[A

 44%|████▍     | 31/70 [01:19<01:39,  2.56s/it][A[A

Batch loss: 1.7866477966308594




 46%|████▌     | 32/70 [01:22<01:38,  2.60s/it][A[A

 47%|████▋     | 33/70 [01:24<01:34,  2.56s/it][A[A

 49%|████▊     | 34/70 [01:27<01:33,  2.61s/it][A[A

 50%|█████     | 35/70 [01:30<01:29,  2.57s/it][A[A

 51%|█████▏    | 36/70 [01:32<01:27,  2.59s/it][A[A

 53%|█████▎    | 37/70 [01:35<01:23,  2.52s/it][A[A

 54%|█████▍    | 38/70 [01:37<01:21,  2.56s/it][A[A

 56%|█████▌    | 39/70 [01:40<01:17,  2.51s/it][A[A

 57%|█████▋    | 40/70 [01:42<01:17,  2.57s/it][A[A

 59%|█████▊    | 41/70 [01:45<01:13,  2.54s/it][A[A

Batch loss: 1.7892839908599854




 60%|██████    | 42/70 [01:48<01:12,  2.60s/it][A[A

 61%|██████▏   | 43/70 [01:50<01:09,  2.56s/it][A[A

 63%|██████▎   | 44/70 [01:53<01:07,  2.61s/it][A[A

 64%|██████▍   | 45/70 [01:55<01:04,  2.58s/it][A[A

 66%|██████▌   | 46/70 [01:58<01:02,  2.62s/it][A[A

 67%|██████▋   | 47/70 [02:00<00:59,  2.58s/it][A[A

 69%|██████▊   | 48/70 [02:03<00:57,  2.61s/it][A[A

 70%|███████   | 49/70 [02:05<00:53,  2.55s/it][A[A

 71%|███████▏  | 50/70 [02:08<00:51,  2.55s/it][A[A

 73%|███████▎  | 51/70 [02:10<00:47,  2.49s/it][A[A

Batch loss: 2.0662765502929688




 74%|███████▍  | 52/70 [02:13<00:45,  2.53s/it][A[A

 76%|███████▌  | 53/70 [02:15<00:42,  2.49s/it][A[A

 77%|███████▋  | 54/70 [02:18<00:40,  2.52s/it][A[A

 79%|███████▊  | 55/70 [02:20<00:36,  2.46s/it][A[A

 80%|████████  | 56/70 [02:23<00:34,  2.50s/it][A[A

 81%|████████▏ | 57/70 [02:25<00:31,  2.45s/it][A[A

 83%|████████▎ | 58/70 [02:28<00:29,  2.50s/it][A[A

 84%|████████▍ | 59/70 [02:30<00:26,  2.45s/it][A[A

 86%|████████▌ | 60/70 [02:33<00:24,  2.48s/it][A[A

 87%|████████▋ | 61/70 [02:35<00:21,  2.44s/it][A[A

Batch loss: 2.3024544715881348




 89%|████████▊ | 62/70 [02:38<00:19,  2.49s/it][A[A

 90%|█████████ | 63/70 [02:40<00:17,  2.44s/it][A[A

 91%|█████████▏| 64/70 [02:43<00:14,  2.48s/it][A[A

 93%|█████████▎| 65/70 [02:45<00:12,  2.44s/it][A[A

 94%|█████████▍| 66/70 [02:48<00:10,  2.53s/it][A[A

 96%|█████████▌| 67/70 [02:50<00:07,  2.52s/it][A[A

 97%|█████████▋| 68/70 [02:53<00:05,  2.56s/it][A[A

 99%|█████████▊| 69/70 [02:55<00:02,  2.53s/it][A[A

100%|██████████| 70/70 [02:56<00:00,  2.52s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.8295852388654437




 12%|█▎        | 1/8 [00:03<00:23,  3.37s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.21s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.94s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.85s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.70s/it][A[A

 75%|███████▌  | 6/8 [00:16<00:05,  2.64s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.53s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.47s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3664931654930115
Epoch:  10




  1%|▏         | 1/70 [00:03<03:53,  3.39s/it][A[A

Batch loss: 1.7523257732391357




  3%|▎         | 2/70 [00:06<03:38,  3.21s/it][A[A

  4%|▍         | 3/70 [00:08<03:19,  2.97s/it][A[A

  6%|▌         | 4/70 [00:11<03:09,  2.86s/it][A[A

  7%|▋         | 5/70 [00:13<02:57,  2.72s/it][A[A

  9%|▊         | 6/70 [00:16<02:52,  2.70s/it][A[A

 10%|█         | 7/70 [00:18<02:43,  2.60s/it][A[A

 11%|█▏        | 8/70 [00:21<02:40,  2.60s/it][A[A

 13%|█▎        | 9/70 [00:23<02:34,  2.53s/it][A[A

 14%|█▍        | 10/70 [00:26<02:32,  2.54s/it][A[A

 16%|█▌        | 11/70 [00:28<02:27,  2.49s/it][A[A

Batch loss: 1.8125141859054565




 17%|█▋        | 12/70 [00:31<02:25,  2.51s/it][A[A

 19%|█▊        | 13/70 [00:33<02:20,  2.47s/it][A[A

 20%|██        | 14/70 [00:36<02:20,  2.51s/it][A[A

 21%|██▏       | 15/70 [00:38<02:18,  2.51s/it][A[A

 23%|██▎       | 16/70 [00:41<02:18,  2.56s/it][A[A

 24%|██▍       | 17/70 [00:43<02:14,  2.53s/it][A[A

 26%|██▌       | 18/70 [00:46<02:14,  2.58s/it][A[A

 27%|██▋       | 19/70 [00:48<02:10,  2.55s/it][A[A

 29%|██▊       | 20/70 [00:51<02:09,  2.59s/it][A[A

 30%|███       | 21/70 [00:54<02:05,  2.57s/it][A[A

Batch loss: 1.6545796394348145




 31%|███▏      | 22/70 [00:56<02:05,  2.60s/it][A[A

 33%|███▎      | 23/70 [00:59<02:00,  2.57s/it][A[A

 34%|███▍      | 24/70 [01:01<01:58,  2.58s/it][A[A

 36%|███▌      | 25/70 [01:04<01:53,  2.51s/it][A[A

 37%|███▋      | 26/70 [01:06<01:51,  2.53s/it][A[A

 39%|███▊      | 27/70 [01:09<01:46,  2.49s/it][A[A

 40%|████      | 28/70 [01:11<01:45,  2.52s/it][A[A

 41%|████▏     | 29/70 [01:14<01:41,  2.48s/it][A[A

 43%|████▎     | 30/70 [01:16<01:40,  2.51s/it][A[A

 44%|████▍     | 31/70 [01:19<01:36,  2.48s/it][A[A

Batch loss: 1.873030185699463




 46%|████▌     | 32/70 [01:21<01:35,  2.51s/it][A[A

 47%|████▋     | 33/70 [01:24<01:31,  2.47s/it][A[A

 49%|████▊     | 34/70 [01:26<01:30,  2.50s/it][A[A

 50%|█████     | 35/70 [01:29<01:26,  2.47s/it][A[A

 51%|█████▏    | 36/70 [01:31<01:24,  2.49s/it][A[A

 53%|█████▎    | 37/70 [01:33<01:20,  2.45s/it][A[A

 54%|█████▍    | 38/70 [01:36<01:19,  2.49s/it][A[A

 56%|█████▌    | 39/70 [01:38<01:15,  2.44s/it][A[A

 57%|█████▋    | 40/70 [01:41<01:14,  2.49s/it][A[A

 59%|█████▊    | 41/70 [01:43<01:12,  2.49s/it][A[A

Batch loss: 1.7849433422088623




 60%|██████    | 42/70 [01:46<01:11,  2.56s/it][A[A

 61%|██████▏   | 43/70 [01:49<01:08,  2.53s/it][A[A

 63%|██████▎   | 44/70 [01:51<01:07,  2.60s/it][A[A

 64%|██████▍   | 45/70 [01:54<01:03,  2.56s/it][A[A

 66%|██████▌   | 46/70 [01:57<01:02,  2.60s/it][A[A

 67%|██████▋   | 47/70 [01:59<00:58,  2.56s/it][A[A

 69%|██████▊   | 48/70 [02:02<00:57,  2.60s/it][A[A

 70%|███████   | 49/70 [02:04<00:53,  2.57s/it][A[A

 71%|███████▏  | 50/70 [02:07<00:51,  2.59s/it][A[A

 73%|███████▎  | 51/70 [02:09<00:48,  2.53s/it][A[A

Batch loss: 1.8278594017028809




 74%|███████▍  | 52/70 [02:12<00:45,  2.55s/it][A[A

 76%|███████▌  | 53/70 [02:14<00:42,  2.51s/it][A[A

 77%|███████▋  | 54/70 [02:17<00:40,  2.53s/it][A[A

 79%|███████▊  | 55/70 [02:19<00:37,  2.48s/it][A[A

 80%|████████  | 56/70 [02:22<00:35,  2.51s/it][A[A

 81%|████████▏ | 57/70 [02:24<00:32,  2.47s/it][A[A

 83%|████████▎ | 58/70 [02:27<00:30,  2.51s/it][A[A

 84%|████████▍ | 59/70 [02:29<00:27,  2.47s/it][A[A

 86%|████████▌ | 60/70 [02:32<00:25,  2.51s/it][A[A

 87%|████████▋ | 61/70 [02:34<00:22,  2.47s/it][A[A

Batch loss: 1.668601632118225




 89%|████████▊ | 62/70 [02:37<00:20,  2.50s/it][A[A

 90%|█████████ | 63/70 [02:39<00:17,  2.46s/it][A[A

 91%|█████████▏| 64/70 [02:42<00:14,  2.49s/it][A[A

 93%|█████████▎| 65/70 [02:44<00:12,  2.44s/it][A[A

 94%|█████████▍| 66/70 [02:46<00:09,  2.48s/it][A[A

 96%|█████████▌| 67/70 [02:49<00:07,  2.49s/it][A[A

 97%|█████████▋| 68/70 [02:52<00:05,  2.54s/it][A[A

 99%|█████████▊| 69/70 [02:54<00:02,  2.52s/it][A[A

100%|██████████| 70/70 [02:55<00:00,  2.51s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.7833437289510454




 12%|█▎        | 1/8 [00:03<00:23,  3.34s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.18s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.91s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.81s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.68s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.66s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.57s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.48s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3207745254039764
Epoch:  11




  1%|▏         | 1/70 [00:03<03:48,  3.31s/it][A[A

Batch loss: 1.7190561294555664




  3%|▎         | 2/70 [00:06<03:35,  3.16s/it][A[A

  4%|▍         | 3/70 [00:08<03:14,  2.90s/it][A[A

  6%|▌         | 4/70 [00:11<03:05,  2.81s/it][A[A

  7%|▋         | 5/70 [00:13<02:53,  2.67s/it][A[A

  9%|▊         | 6/70 [00:15<02:48,  2.64s/it][A[A

 10%|█         | 7/70 [00:18<02:40,  2.55s/it][A[A

 11%|█▏        | 8/70 [00:20<02:38,  2.56s/it][A[A

 13%|█▎        | 9/70 [00:23<02:32,  2.50s/it][A[A

 14%|█▍        | 10/70 [00:25<02:31,  2.52s/it][A[A

 16%|█▌        | 11/70 [00:28<02:25,  2.47s/it][A[A

Batch loss: 2.0783531665802




 17%|█▋        | 12/70 [00:30<02:24,  2.50s/it][A[A

 19%|█▊        | 13/70 [00:33<02:19,  2.46s/it][A[A

 20%|██        | 14/70 [00:35<02:19,  2.49s/it][A[A

 21%|██▏       | 15/70 [00:37<02:15,  2.46s/it][A[A

 23%|██▎       | 16/70 [00:40<02:16,  2.52s/it][A[A

 24%|██▍       | 17/70 [00:43<02:13,  2.51s/it][A[A

 26%|██▌       | 18/70 [00:45<02:14,  2.58s/it][A[A

 27%|██▋       | 19/70 [00:48<02:10,  2.55s/it][A[A

 29%|██▊       | 20/70 [00:51<02:09,  2.60s/it][A[A

 30%|███       | 21/70 [00:53<02:05,  2.57s/it][A[A

Batch loss: 1.6101558208465576




 31%|███▏      | 22/70 [00:56<02:05,  2.61s/it][A[A

 33%|███▎      | 23/70 [00:58<02:00,  2.57s/it][A[A

 34%|███▍      | 24/70 [01:01<01:59,  2.61s/it][A[A

 36%|███▌      | 25/70 [01:03<01:55,  2.56s/it][A[A

 37%|███▋      | 26/70 [01:06<01:53,  2.58s/it][A[A

 39%|███▊      | 27/70 [01:08<01:48,  2.51s/it][A[A

 40%|████      | 28/70 [01:11<01:46,  2.54s/it][A[A

 41%|████▏     | 29/70 [01:14<01:43,  2.53s/it][A[A

 43%|████▎     | 30/70 [01:16<01:43,  2.59s/it][A[A

 44%|████▍     | 31/70 [01:19<01:39,  2.56s/it][A[A

Batch loss: 1.816104769706726




 46%|████▌     | 32/70 [01:21<01:39,  2.61s/it][A[A

 47%|████▋     | 33/70 [01:24<01:35,  2.58s/it][A[A

 49%|████▊     | 34/70 [01:27<01:34,  2.62s/it][A[A

 50%|█████     | 35/70 [01:29<01:30,  2.58s/it][A[A

 51%|█████▏    | 36/70 [01:32<01:28,  2.61s/it][A[A

 53%|█████▎    | 37/70 [01:34<01:25,  2.58s/it][A[A

 54%|█████▍    | 38/70 [01:37<01:23,  2.60s/it][A[A

 56%|█████▌    | 39/70 [01:39<01:18,  2.52s/it][A[A

 57%|█████▋    | 40/70 [01:42<01:15,  2.53s/it][A[A

 59%|█████▊    | 41/70 [01:44<01:12,  2.50s/it][A[A

Batch loss: 1.7841423749923706




 60%|██████    | 42/70 [01:47<01:10,  2.52s/it][A[A

 61%|██████▏   | 43/70 [01:49<01:06,  2.47s/it][A[A

 63%|██████▎   | 44/70 [01:52<01:05,  2.53s/it][A[A

 64%|██████▍   | 45/70 [01:54<01:02,  2.48s/it][A[A

 66%|██████▌   | 46/70 [01:57<01:00,  2.51s/it][A[A

 67%|██████▋   | 47/70 [01:59<00:56,  2.46s/it][A[A

 69%|██████▊   | 48/70 [02:02<00:55,  2.52s/it][A[A

 70%|███████   | 49/70 [02:04<00:52,  2.50s/it][A[A

 71%|███████▏  | 50/70 [02:07<00:51,  2.55s/it][A[A

 73%|███████▎  | 51/70 [02:09<00:47,  2.51s/it][A[A

Batch loss: 1.7315469980239868




 74%|███████▍  | 52/70 [02:12<00:45,  2.53s/it][A[A

 76%|███████▌  | 53/70 [02:14<00:42,  2.49s/it][A[A

 77%|███████▋  | 54/70 [02:17<00:40,  2.53s/it][A[A

 79%|███████▊  | 55/70 [02:20<00:38,  2.54s/it][A[A

 80%|████████  | 56/70 [02:23<00:37,  2.68s/it][A[A

 81%|████████▏ | 57/70 [02:25<00:34,  2.66s/it][A[A

 83%|████████▎ | 58/70 [02:28<00:31,  2.66s/it][A[A

 84%|████████▍ | 59/70 [02:30<00:28,  2.59s/it][A[A

 86%|████████▌ | 60/70 [02:33<00:26,  2.61s/it][A[A

 87%|████████▋ | 61/70 [02:35<00:23,  2.57s/it][A[A

Batch loss: 1.6527600288391113




 89%|████████▊ | 62/70 [02:38<00:20,  2.61s/it][A[A

 90%|█████████ | 63/70 [02:41<00:17,  2.57s/it][A[A

 91%|█████████▏| 64/70 [02:43<00:15,  2.59s/it][A[A

 93%|█████████▎| 65/70 [02:46<00:12,  2.52s/it][A[A

 94%|█████████▍| 66/70 [02:48<00:10,  2.53s/it][A[A

 96%|█████████▌| 67/70 [02:51<00:07,  2.49s/it][A[A

 97%|█████████▋| 68/70 [02:53<00:05,  2.54s/it][A[A

 99%|█████████▊| 69/70 [02:56<00:02,  2.50s/it][A[A

100%|██████████| 70/70 [02:56<00:00,  2.53s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.7560907857758659




 12%|█▎        | 1/8 [00:03<00:21,  3.11s/it][A[A

 25%|██▌       | 2/8 [00:05<00:17,  2.97s/it][A[A

 38%|███▊      | 3/8 [00:07<00:13,  2.74s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.68s/it][A[A

 62%|██████▎   | 5/8 [00:12<00:07,  2.57s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.55s/it][A[A

 88%|████████▊ | 7/8 [00:17<00:02,  2.46s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.38s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3361264765262604
Epoch:  12




  1%|▏         | 1/70 [00:03<03:51,  3.35s/it][A[A

Batch loss: 1.573788046836853




  3%|▎         | 2/70 [00:06<03:37,  3.19s/it][A[A

  4%|▍         | 3/70 [00:08<03:17,  2.95s/it][A[A

  6%|▌         | 4/70 [00:11<03:09,  2.88s/it][A[A

  7%|▋         | 5/70 [00:13<02:58,  2.75s/it][A[A

  9%|▊         | 6/70 [00:16<02:55,  2.74s/it][A[A

 10%|█         | 7/70 [00:18<02:46,  2.65s/it][A[A

 11%|█▏        | 8/70 [00:21<02:44,  2.66s/it][A[A

 13%|█▎        | 9/70 [00:23<02:38,  2.59s/it][A[A

 14%|█▍        | 10/70 [00:26<02:37,  2.62s/it][A[A

 16%|█▌        | 11/70 [00:29<02:32,  2.59s/it][A[A

Batch loss: 1.6441516876220703




 17%|█▋        | 12/70 [00:31<02:32,  2.63s/it][A[A

 19%|█▊        | 13/70 [00:34<02:25,  2.55s/it][A[A

 20%|██        | 14/70 [00:36<02:22,  2.54s/it][A[A

 21%|██▏       | 15/70 [00:39<02:16,  2.49s/it][A[A

 23%|██▎       | 16/70 [00:41<02:17,  2.55s/it][A[A

 24%|██▍       | 17/70 [00:44<02:13,  2.51s/it][A[A

 26%|██▌       | 18/70 [00:46<02:11,  2.53s/it][A[A

 27%|██▋       | 19/70 [00:49<02:06,  2.47s/it][A[A

 29%|██▊       | 20/70 [00:51<02:05,  2.51s/it][A[A

 30%|███       | 21/70 [00:54<02:00,  2.47s/it][A[A

Batch loss: 1.8552536964416504




 31%|███▏      | 22/70 [00:56<01:59,  2.50s/it][A[A

 33%|███▎      | 23/70 [00:59<01:55,  2.46s/it][A[A

 34%|███▍      | 24/70 [01:01<01:54,  2.49s/it][A[A

 36%|███▌      | 25/70 [01:04<01:50,  2.46s/it][A[A

 37%|███▋      | 26/70 [01:06<01:49,  2.48s/it][A[A

 39%|███▊      | 27/70 [01:08<01:45,  2.45s/it][A[A

 40%|████      | 28/70 [01:11<01:44,  2.48s/it][A[A

 41%|████▏     | 29/70 [01:13<01:40,  2.46s/it][A[A

 43%|████▎     | 30/70 [01:16<01:41,  2.53s/it][A[A

 44%|████▍     | 31/70 [01:19<01:37,  2.50s/it][A[A

Batch loss: 1.7643451690673828




 46%|████▌     | 32/70 [01:21<01:37,  2.56s/it][A[A

 47%|████▋     | 33/70 [01:24<01:34,  2.55s/it][A[A

 49%|████▊     | 34/70 [01:26<01:33,  2.59s/it][A[A

 50%|█████     | 35/70 [01:29<01:29,  2.55s/it][A[A

 51%|█████▏    | 36/70 [01:32<01:27,  2.59s/it][A[A

 53%|█████▎    | 37/70 [01:34<01:24,  2.56s/it][A[A

 54%|█████▍    | 38/70 [01:37<01:23,  2.61s/it][A[A

 56%|█████▌    | 39/70 [01:39<01:19,  2.56s/it][A[A

 57%|█████▋    | 40/70 [01:42<01:17,  2.59s/it][A[A

 59%|█████▊    | 41/70 [01:44<01:13,  2.52s/it][A[A

Batch loss: 1.6183853149414062




 60%|██████    | 42/70 [01:47<01:11,  2.54s/it][A[A

 61%|██████▏   | 43/70 [01:49<01:07,  2.50s/it][A[A

 63%|██████▎   | 44/70 [01:52<01:05,  2.52s/it][A[A

 64%|██████▍   | 45/70 [01:54<01:01,  2.47s/it][A[A

 66%|██████▌   | 46/70 [01:57<00:59,  2.49s/it][A[A

 67%|██████▋   | 47/70 [01:59<00:56,  2.45s/it][A[A

 69%|██████▊   | 48/70 [02:02<00:54,  2.49s/it][A[A

 70%|███████   | 49/70 [02:04<00:51,  2.45s/it][A[A

 71%|███████▏  | 50/70 [02:07<00:49,  2.47s/it][A[A

 73%|███████▎  | 51/70 [02:09<00:46,  2.44s/it][A[A

Batch loss: 1.7425578832626343




 74%|███████▍  | 52/70 [02:11<00:44,  2.47s/it][A[A

 76%|███████▌  | 53/70 [02:14<00:41,  2.43s/it][A[A

 77%|███████▋  | 54/70 [02:16<00:39,  2.47s/it][A[A

 79%|███████▊  | 55/70 [02:19<00:36,  2.44s/it][A[A

 80%|████████  | 56/70 [02:21<00:35,  2.51s/it][A[A

 81%|████████▏ | 57/70 [02:24<00:32,  2.50s/it][A[A

 83%|████████▎ | 58/70 [02:26<00:30,  2.55s/it][A[A

 84%|████████▍ | 59/70 [02:29<00:27,  2.53s/it][A[A

 86%|████████▌ | 60/70 [02:32<00:25,  2.56s/it][A[A

 87%|████████▋ | 61/70 [02:34<00:22,  2.54s/it][A[A

Batch loss: 1.662169337272644




 89%|████████▊ | 62/70 [02:37<00:20,  2.58s/it][A[A

 90%|█████████ | 63/70 [02:39<00:17,  2.55s/it][A[A

 91%|█████████▏| 64/70 [02:42<00:15,  2.58s/it][A[A

 93%|█████████▎| 65/70 [02:44<00:12,  2.54s/it][A[A

 94%|█████████▍| 66/70 [02:47<00:10,  2.55s/it][A[A

 96%|█████████▌| 67/70 [02:49<00:07,  2.50s/it][A[A

 97%|█████████▋| 68/70 [02:52<00:05,  2.51s/it][A[A

 99%|█████████▊| 69/70 [02:54<00:02,  2.51s/it][A[A

100%|██████████| 70/70 [02:55<00:00,  2.51s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.732808893067496




 12%|█▎        | 1/8 [00:03<00:23,  3.34s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.17s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.91s/it][A[A

 50%|█████     | 4/8 [00:10<00:11,  2.81s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.68s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.65s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.57s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.48s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3088114857673645
Epoch:  13




  1%|▏         | 1/70 [00:03<03:56,  3.43s/it][A[A

Batch loss: 1.689760684967041




  3%|▎         | 2/70 [00:06<03:41,  3.26s/it][A[A

  4%|▍         | 3/70 [00:08<03:18,  2.97s/it][A[A

  6%|▌         | 4/70 [00:11<03:09,  2.87s/it][A[A

  7%|▋         | 5/70 [00:13<02:56,  2.72s/it][A[A

  9%|▊         | 6/70 [00:16<02:52,  2.69s/it][A[A

 10%|█         | 7/70 [00:18<02:43,  2.59s/it][A[A

 11%|█▏        | 8/70 [00:21<02:39,  2.58s/it][A[A

 13%|█▎        | 9/70 [00:23<02:32,  2.51s/it][A[A

 14%|█▍        | 10/70 [00:26<02:31,  2.52s/it][A[A

 16%|█▌        | 11/70 [00:28<02:26,  2.49s/it][A[A

Batch loss: 1.5136127471923828




 17%|█▋        | 12/70 [00:30<02:25,  2.51s/it][A[A

 19%|█▊        | 13/70 [00:33<02:20,  2.46s/it][A[A

 20%|██        | 14/70 [00:35<02:19,  2.50s/it][A[A

 21%|██▏       | 15/70 [00:38<02:25,  2.64s/it][A[A

 23%|██▎       | 16/70 [00:41<02:22,  2.64s/it][A[A

 24%|██▍       | 17/70 [00:43<02:17,  2.59s/it][A[A

 26%|██▌       | 18/70 [00:46<02:17,  2.64s/it][A[A

 27%|██▋       | 19/70 [00:49<02:13,  2.62s/it][A[A

 29%|██▊       | 20/70 [00:52<02:12,  2.64s/it][A[A

 30%|███       | 21/70 [00:54<02:06,  2.59s/it][A[A

Batch loss: 1.6088656187057495




 31%|███▏      | 22/70 [00:57<02:05,  2.61s/it][A[A

 33%|███▎      | 23/70 [00:59<02:01,  2.58s/it][A[A

 34%|███▍      | 24/70 [01:02<02:00,  2.62s/it][A[A

 36%|███▌      | 25/70 [01:04<01:55,  2.57s/it][A[A

 37%|███▋      | 26/70 [01:07<01:54,  2.60s/it][A[A

 39%|███▊      | 27/70 [01:10<01:50,  2.58s/it][A[A

 40%|████      | 28/70 [01:12<01:47,  2.57s/it][A[A

 41%|████▏     | 29/70 [01:14<01:42,  2.50s/it][A[A

 43%|████▎     | 30/70 [01:17<01:41,  2.54s/it][A[A

 44%|████▍     | 31/70 [01:19<01:36,  2.48s/it][A[A

Batch loss: 1.7081183195114136




 46%|████▌     | 32/70 [01:22<01:35,  2.51s/it][A[A

 47%|████▋     | 33/70 [01:24<01:31,  2.46s/it][A[A

 49%|████▊     | 34/70 [01:27<01:29,  2.48s/it][A[A

 50%|█████     | 35/70 [01:29<01:25,  2.44s/it][A[A

 51%|█████▏    | 36/70 [01:32<01:24,  2.48s/it][A[A

 53%|█████▎    | 37/70 [01:34<01:20,  2.45s/it][A[A

 54%|█████▍    | 38/70 [01:37<01:19,  2.48s/it][A[A

 56%|█████▌    | 39/70 [01:39<01:15,  2.44s/it][A[A

 57%|█████▋    | 40/70 [01:42<01:14,  2.48s/it][A[A

 59%|█████▊    | 41/70 [01:44<01:10,  2.44s/it][A[A

Batch loss: 1.6825885772705078




 60%|██████    | 42/70 [01:47<01:09,  2.48s/it][A[A

 61%|██████▏   | 43/70 [01:49<01:06,  2.46s/it][A[A

 63%|██████▎   | 44/70 [01:52<01:05,  2.53s/it][A[A

 64%|██████▍   | 45/70 [01:54<01:03,  2.52s/it][A[A

 66%|██████▌   | 46/70 [01:57<01:01,  2.57s/it][A[A

 67%|██████▋   | 47/70 [01:59<00:58,  2.55s/it][A[A

 69%|██████▊   | 48/70 [02:02<00:56,  2.59s/it][A[A

 70%|███████   | 49/70 [02:04<00:53,  2.55s/it][A[A

 71%|███████▏  | 50/70 [02:07<00:51,  2.59s/it][A[A

 73%|███████▎  | 51/70 [02:10<00:48,  2.56s/it][A[A

Batch loss: 1.6711585521697998




 74%|███████▍  | 52/70 [02:12<00:46,  2.60s/it][A[A

 76%|███████▌  | 53/70 [02:15<00:43,  2.56s/it][A[A

 77%|███████▋  | 54/70 [02:17<00:40,  2.56s/it][A[A

 79%|███████▊  | 55/70 [02:20<00:37,  2.50s/it][A[A

 80%|████████  | 56/70 [02:22<00:35,  2.53s/it][A[A

 81%|████████▏ | 57/70 [02:25<00:32,  2.48s/it][A[A

 83%|████████▎ | 58/70 [02:27<00:29,  2.49s/it][A[A

 84%|████████▍ | 59/70 [02:30<00:27,  2.46s/it][A[A

 86%|████████▌ | 60/70 [02:32<00:24,  2.49s/it][A[A

 87%|████████▋ | 61/70 [02:35<00:22,  2.46s/it][A[A

Batch loss: 1.8004921674728394




 89%|████████▊ | 62/70 [02:37<00:20,  2.51s/it][A[A

 90%|█████████ | 63/70 [02:40<00:17,  2.46s/it][A[A

 91%|█████████▏| 64/70 [02:42<00:15,  2.52s/it][A[A

 93%|█████████▎| 65/70 [02:45<00:12,  2.48s/it][A[A

 94%|█████████▍| 66/70 [02:47<00:10,  2.53s/it][A[A

 96%|█████████▌| 67/70 [02:50<00:07,  2.50s/it][A[A

 97%|█████████▋| 68/70 [02:52<00:05,  2.53s/it][A[A

 99%|█████████▊| 69/70 [02:55<00:02,  2.49s/it][A[A

100%|██████████| 70/70 [02:55<00:00,  2.51s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.663370440687452




 12%|█▎        | 1/8 [00:03<00:23,  3.30s/it][A[A

 25%|██▌       | 2/8 [00:06<00:18,  3.16s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.95s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.85s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.71s/it][A[A

 75%|███████▌  | 6/8 [00:16<00:05,  2.68s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.59s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.52s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4744738936424255
Epoch:  14




  1%|▏         | 1/70 [00:03<04:08,  3.60s/it][A[A

Batch loss: 1.541420817375183




  3%|▎         | 2/70 [00:06<03:49,  3.38s/it][A[A

  4%|▍         | 3/70 [00:08<03:25,  3.07s/it][A[A

  6%|▌         | 4/70 [00:11<03:13,  2.93s/it][A[A

  7%|▋         | 5/70 [00:13<03:02,  2.82s/it][A[A

  9%|▊         | 6/70 [00:16<02:56,  2.76s/it][A[A

 10%|█         | 7/70 [00:19<02:47,  2.66s/it][A[A

 11%|█▏        | 8/70 [00:21<02:45,  2.66s/it][A[A

 13%|█▎        | 9/70 [00:24<02:37,  2.59s/it][A[A

 14%|█▍        | 10/70 [00:26<02:35,  2.59s/it][A[A

 16%|█▌        | 11/70 [00:29<02:29,  2.53s/it][A[A

Batch loss: 1.6188299655914307




 17%|█▋        | 12/70 [00:31<02:29,  2.57s/it][A[A

 19%|█▊        | 13/70 [00:34<02:23,  2.52s/it][A[A

 20%|██        | 14/70 [00:36<02:23,  2.57s/it][A[A

 21%|██▏       | 15/70 [00:39<02:17,  2.51s/it][A[A

 23%|██▎       | 16/70 [00:41<02:16,  2.52s/it][A[A

 24%|██▍       | 17/70 [00:44<02:10,  2.46s/it][A[A

 26%|██▌       | 18/70 [00:46<02:11,  2.53s/it][A[A

 27%|██▋       | 19/70 [00:49<02:08,  2.51s/it][A[A

 29%|██▊       | 20/70 [00:51<02:08,  2.57s/it][A[A

 30%|███       | 21/70 [00:54<02:04,  2.54s/it][A[A

Batch loss: 1.6762371063232422




 31%|███▏      | 22/70 [00:57<02:04,  2.60s/it][A[A

 33%|███▎      | 23/70 [00:59<02:00,  2.56s/it][A[A

 34%|███▍      | 24/70 [01:02<01:59,  2.60s/it][A[A

 36%|███▌      | 25/70 [01:04<01:54,  2.55s/it][A[A

 37%|███▋      | 26/70 [01:07<01:54,  2.60s/it][A[A

 39%|███▊      | 27/70 [01:09<01:50,  2.57s/it][A[A

 40%|████      | 28/70 [01:12<01:48,  2.59s/it][A[A

 41%|████▏     | 29/70 [01:14<01:43,  2.52s/it][A[A

 43%|████▎     | 30/70 [01:17<01:41,  2.54s/it][A[A

 44%|████▍     | 31/70 [01:20<01:38,  2.52s/it][A[A

Batch loss: 1.837846279144287




 46%|████▌     | 32/70 [01:22<01:37,  2.58s/it][A[A

 47%|████▋     | 33/70 [01:25<01:34,  2.55s/it][A[A

 49%|████▊     | 34/70 [01:27<01:33,  2.60s/it][A[A

 50%|█████     | 35/70 [01:30<01:29,  2.56s/it][A[A

 51%|█████▏    | 36/70 [01:33<01:28,  2.60s/it][A[A

 53%|█████▎    | 37/70 [01:35<01:24,  2.56s/it][A[A

 54%|█████▍    | 38/70 [01:38<01:23,  2.61s/it][A[A

 56%|█████▌    | 39/70 [01:40<01:19,  2.56s/it][A[A

 57%|█████▋    | 40/70 [01:43<01:17,  2.59s/it][A[A

 59%|█████▊    | 41/70 [01:45<01:12,  2.52s/it][A[A

Batch loss: 1.6684107780456543




 60%|██████    | 42/70 [01:48<01:10,  2.54s/it][A[A

 61%|██████▏   | 43/70 [01:50<01:06,  2.48s/it][A[A

 63%|██████▎   | 44/70 [01:53<01:05,  2.52s/it][A[A

 64%|██████▍   | 45/70 [01:55<01:01,  2.46s/it][A[A

 66%|██████▌   | 46/70 [01:58<01:00,  2.51s/it][A[A

 67%|██████▋   | 47/70 [02:00<00:56,  2.47s/it][A[A

 69%|██████▊   | 48/70 [02:03<00:54,  2.49s/it][A[A

 70%|███████   | 49/70 [02:05<00:52,  2.48s/it][A[A

 71%|███████▏  | 50/70 [02:08<00:50,  2.52s/it][A[A

 73%|███████▎  | 51/70 [02:10<00:49,  2.59s/it][A[A

Batch loss: 1.6799325942993164




 74%|███████▍  | 52/70 [02:13<00:45,  2.54s/it][A[A

 76%|███████▌  | 53/70 [02:15<00:43,  2.53s/it][A[A

 77%|███████▋  | 54/70 [02:18<00:41,  2.61s/it][A[A

 79%|███████▊  | 55/70 [02:20<00:37,  2.51s/it][A[A

 80%|████████  | 56/70 [02:23<00:35,  2.53s/it][A[A

 81%|████████▏ | 57/70 [02:26<00:34,  2.63s/it][A[A

 83%|████████▎ | 58/70 [02:28<00:30,  2.58s/it][A[A

 84%|████████▍ | 59/70 [02:31<00:28,  2.59s/it][A[A

 86%|████████▌ | 60/70 [02:34<00:26,  2.69s/it][A[A

 87%|████████▋ | 61/70 [02:36<00:23,  2.62s/it][A[A

Batch loss: 1.587880253791809




 89%|████████▊ | 62/70 [02:39<00:21,  2.66s/it][A[A

 90%|█████████ | 63/70 [02:42<00:19,  2.74s/it][A[A

 91%|█████████▏| 64/70 [02:44<00:15,  2.65s/it][A[A

 93%|█████████▎| 65/70 [02:47<00:13,  2.66s/it][A[A

 94%|█████████▍| 66/70 [02:50<00:10,  2.73s/it][A[A

 96%|█████████▌| 67/70 [02:52<00:07,  2.64s/it][A[A

 97%|█████████▋| 68/70 [02:55<00:05,  2.61s/it][A[A

 99%|█████████▊| 69/70 [02:58<00:02,  2.66s/it][A[A

100%|██████████| 70/70 [02:59<00:00,  2.56s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.6470055052212307




 12%|█▎        | 1/8 [00:03<00:22,  3.20s/it][A[A

 25%|██▌       | 2/8 [00:05<00:18,  3.04s/it][A[A

 38%|███▊      | 3/8 [00:08<00:13,  2.79s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.71s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.69s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.54s/it][A[A

 88%|████████▊ | 7/8 [00:17<00:02,  2.52s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.49s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3253804743289948
Epoch:  15




  1%|▏         | 1/70 [00:03<03:43,  3.25s/it][A[A

Batch loss: 1.6849901676177979




  3%|▎         | 2/70 [00:05<03:30,  3.10s/it][A[A

  4%|▍         | 3/70 [00:08<03:11,  2.86s/it][A[A

  6%|▌         | 4/70 [00:10<03:03,  2.77s/it][A[A

  7%|▋         | 5/70 [00:13<03:03,  2.82s/it][A[A

  9%|▊         | 6/70 [00:16<02:52,  2.70s/it][A[A

 10%|█         | 7/70 [00:18<02:49,  2.70s/it][A[A

 11%|█▏        | 8/70 [00:21<02:51,  2.77s/it][A[A

 13%|█▎        | 9/70 [00:24<02:42,  2.66s/it][A[A

 14%|█▍        | 10/70 [00:27<02:41,  2.69s/it][A[A

 16%|█▌        | 11/70 [00:29<02:42,  2.75s/it][A[A

Batch loss: 1.4568610191345215




 17%|█▋        | 12/70 [00:32<02:34,  2.66s/it][A[A

 19%|█▊        | 13/70 [00:35<02:37,  2.76s/it][A[A

 20%|██        | 14/70 [00:38<02:40,  2.86s/it][A[A

 21%|██▏       | 15/70 [00:40<02:27,  2.69s/it][A[A

 23%|██▎       | 16/70 [00:43<02:23,  2.65s/it][A[A

 24%|██▍       | 17/70 [00:46<02:22,  2.69s/it][A[A

 26%|██▌       | 18/70 [00:48<02:13,  2.57s/it][A[A

 27%|██▋       | 19/70 [00:50<02:10,  2.57s/it][A[A

 29%|██▊       | 20/70 [00:53<02:11,  2.62s/it][A[A

 30%|███       | 21/70 [00:55<02:03,  2.52s/it][A[A

Batch loss: 1.4722363948822021




 31%|███▏      | 22/70 [00:58<02:02,  2.55s/it][A[A

 33%|███▎      | 23/70 [01:01<02:03,  2.62s/it][A[A

 34%|███▍      | 24/70 [01:03<01:55,  2.52s/it][A[A

 36%|███▌      | 25/70 [01:06<01:54,  2.53s/it][A[A

 37%|███▋      | 26/70 [01:08<01:54,  2.60s/it][A[A

 39%|███▊      | 27/70 [01:11<01:48,  2.51s/it][A[A

 40%|████      | 28/70 [01:13<01:45,  2.52s/it][A[A

 41%|████▏     | 29/70 [01:16<01:46,  2.59s/it][A[A

 43%|████▎     | 30/70 [01:18<01:41,  2.53s/it][A[A

 44%|████▍     | 31/70 [01:21<01:40,  2.59s/it][A[A

Batch loss: 1.4867115020751953




 46%|████▌     | 32/70 [01:24<01:41,  2.67s/it][A[A

 47%|████▋     | 33/70 [01:26<01:35,  2.58s/it][A[A

 49%|████▊     | 34/70 [01:29<01:34,  2.62s/it][A[A

 50%|█████     | 35/70 [01:32<01:34,  2.70s/it][A[A

 51%|█████▏    | 36/70 [01:34<01:28,  2.61s/it][A[A

 53%|█████▎    | 37/70 [01:37<01:27,  2.65s/it][A[A

 54%|█████▍    | 38/70 [01:40<01:26,  2.72s/it][A[A

 56%|█████▌    | 39/70 [01:42<01:21,  2.64s/it][A[A

 57%|█████▋    | 40/70 [01:45<01:18,  2.62s/it][A[A

 59%|█████▊    | 41/70 [01:48<01:16,  2.65s/it][A[A

Batch loss: 1.4703561067581177




 60%|██████    | 42/70 [01:50<01:11,  2.55s/it][A[A

 61%|██████▏   | 43/70 [01:53<01:09,  2.59s/it][A[A

 63%|██████▎   | 44/70 [01:56<01:08,  2.64s/it][A[A

 64%|██████▍   | 45/70 [01:58<01:03,  2.54s/it][A[A

 66%|██████▌   | 46/70 [02:00<01:01,  2.57s/it][A[A

 67%|██████▋   | 47/70 [02:03<01:00,  2.64s/it][A[A

 69%|██████▊   | 48/70 [02:06<00:56,  2.55s/it][A[A

 70%|███████   | 49/70 [02:08<00:54,  2.59s/it][A[A

 71%|███████▏  | 50/70 [02:11<00:53,  2.65s/it][A[A

 73%|███████▎  | 51/70 [02:13<00:48,  2.54s/it][A[A

Batch loss: 1.6819636821746826




 74%|███████▍  | 52/70 [02:16<00:46,  2.56s/it][A[A

 76%|███████▌  | 53/70 [02:19<00:45,  2.65s/it][A[A

 77%|███████▋  | 54/70 [02:21<00:41,  2.57s/it][A[A

 79%|███████▊  | 55/70 [02:24<00:39,  2.61s/it][A[A

 80%|████████  | 56/70 [02:27<00:37,  2.69s/it][A[A

 81%|████████▏ | 57/70 [02:29<00:33,  2.61s/it][A[A

 83%|████████▎ | 58/70 [02:32<00:31,  2.63s/it][A[A

 84%|████████▍ | 59/70 [02:35<00:29,  2.69s/it][A[A

 86%|████████▌ | 60/70 [02:37<00:26,  2.61s/it][A[A

 87%|████████▋ | 61/70 [02:40<00:23,  2.63s/it][A[A

Batch loss: 1.4847928285598755




 89%|████████▊ | 62/70 [02:43<00:21,  2.72s/it][A[A

 90%|█████████ | 63/70 [02:45<00:18,  2.62s/it][A[A

 91%|█████████▏| 64/70 [02:48<00:15,  2.65s/it][A[A

 93%|█████████▎| 65/70 [02:51<00:13,  2.68s/it][A[A

 94%|█████████▍| 66/70 [02:53<00:10,  2.58s/it][A[A

 96%|█████████▌| 67/70 [02:56<00:07,  2.61s/it][A[A

 97%|█████████▋| 68/70 [02:59<00:05,  2.70s/it][A[A

 99%|█████████▊| 69/70 [03:01<00:02,  2.58s/it][A[A

100%|██████████| 70/70 [03:02<00:00,  2.60s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.5757647582462855




 12%|█▎        | 1/8 [00:03<00:23,  3.40s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.20s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.97s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.92s/it][A[A

 62%|██████▎   | 5/8 [00:14<00:08,  2.98s/it][A[A

 75%|███████▌  | 6/8 [00:16<00:05,  2.77s/it][A[A

 88%|████████▊ | 7/8 [00:19<00:02,  2.76s/it][A[A

100%|██████████| 8/8 [00:21<00:00,  2.70s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3560253381729126
Epoch:  16




  1%|▏         | 1/70 [00:03<03:47,  3.29s/it][A[A

Batch loss: 1.4749294519424438




  3%|▎         | 2/70 [00:06<03:34,  3.15s/it][A[A

  4%|▍         | 3/70 [00:08<03:16,  2.93s/it][A[A

  6%|▌         | 4/70 [00:11<03:08,  2.86s/it][A[A

  7%|▋         | 5/70 [00:14<03:06,  2.87s/it][A[A

  9%|▊         | 6/70 [00:16<02:54,  2.73s/it][A[A

 10%|█         | 7/70 [00:19<02:51,  2.72s/it][A[A

 11%|█▏        | 8/70 [00:22<02:52,  2.78s/it][A[A

 13%|█▎        | 9/70 [00:24<02:42,  2.67s/it][A[A

 14%|█▍        | 10/70 [00:27<02:41,  2.69s/it][A[A

 16%|█▌        | 11/70 [00:30<02:42,  2.76s/it][A[A

Batch loss: 1.442212700843811




 17%|█▋        | 12/70 [00:32<02:31,  2.62s/it][A[A

 19%|█▊        | 13/70 [00:35<02:28,  2.61s/it][A[A

 20%|██        | 14/70 [00:37<02:29,  2.67s/it][A[A

 21%|██▏       | 15/70 [00:40<02:22,  2.59s/it][A[A

 23%|██▎       | 16/70 [00:43<02:22,  2.64s/it][A[A

 24%|██▍       | 17/70 [00:45<02:24,  2.72s/it][A[A

 26%|██▌       | 18/70 [00:48<02:16,  2.62s/it][A[A

 27%|██▋       | 19/70 [00:51<02:15,  2.65s/it][A[A

 29%|██▊       | 20/70 [00:53<02:15,  2.72s/it][A[A

 30%|███       | 21/70 [00:56<02:08,  2.62s/it][A[A

Batch loss: 1.5082393884658813




 31%|███▏      | 22/70 [00:59<02:07,  2.65s/it][A[A

 33%|███▎      | 23/70 [01:01<02:08,  2.73s/it][A[A

 34%|███▍      | 24/70 [01:04<02:02,  2.66s/it][A[A

 36%|███▌      | 25/70 [01:07<02:00,  2.67s/it][A[A

 37%|███▋      | 26/70 [01:09<01:58,  2.70s/it][A[A

 39%|███▊      | 27/70 [01:12<01:52,  2.61s/it][A[A

 40%|████      | 28/70 [01:14<01:49,  2.60s/it][A[A

 41%|████▏     | 29/70 [01:17<01:48,  2.65s/it][A[A

 43%|████▎     | 30/70 [01:19<01:41,  2.53s/it][A[A

 44%|████▍     | 31/70 [01:22<01:39,  2.55s/it][A[A

Batch loss: 1.383756160736084




 46%|████▌     | 32/70 [01:25<01:39,  2.62s/it][A[A

 47%|████▋     | 33/70 [01:27<01:33,  2.52s/it][A[A

 49%|████▊     | 34/70 [01:30<01:30,  2.52s/it][A[A

 50%|█████     | 35/70 [01:32<01:30,  2.59s/it][A[A

 51%|█████▏    | 36/70 [01:35<01:25,  2.50s/it][A[A

 53%|█████▎    | 37/70 [01:37<01:23,  2.54s/it][A[A

 54%|█████▍    | 38/70 [01:40<01:24,  2.65s/it][A[A

 56%|█████▌    | 39/70 [01:42<01:18,  2.53s/it][A[A

 57%|█████▋    | 40/70 [01:45<01:16,  2.55s/it][A[A

 59%|█████▊    | 41/70 [01:48<01:16,  2.65s/it][A[A

Batch loss: 1.5252747535705566




 60%|██████    | 42/70 [01:50<01:12,  2.59s/it][A[A

 61%|██████▏   | 43/70 [01:53<01:10,  2.61s/it][A[A

 63%|██████▎   | 44/70 [01:56<01:09,  2.69s/it][A[A

 64%|██████▍   | 45/70 [01:58<01:05,  2.61s/it][A[A

 66%|██████▌   | 46/70 [02:01<01:03,  2.63s/it][A[A

 67%|██████▋   | 47/70 [02:04<01:02,  2.73s/it][A[A

 69%|██████▊   | 48/70 [02:06<00:58,  2.65s/it][A[A

 70%|███████   | 49/70 [02:09<00:55,  2.67s/it][A[A

 71%|███████▏  | 50/70 [02:12<00:53,  2.69s/it][A[A

 73%|███████▎  | 51/70 [02:14<00:49,  2.59s/it][A[A

Batch loss: 1.6784716844558716




 74%|███████▍  | 52/70 [02:17<00:47,  2.62s/it][A[A

 76%|███████▌  | 53/70 [02:20<00:45,  2.70s/it][A[A

 77%|███████▋  | 54/70 [02:22<00:41,  2.58s/it][A[A

 79%|███████▊  | 55/70 [02:25<00:38,  2.59s/it][A[A

 80%|████████  | 56/70 [02:27<00:37,  2.65s/it][A[A

 81%|████████▏ | 57/70 [02:30<00:33,  2.55s/it][A[A

 83%|████████▎ | 58/70 [02:32<00:31,  2.59s/it][A[A

 84%|████████▍ | 59/70 [02:35<00:29,  2.64s/it][A[A

 86%|████████▌ | 60/70 [02:38<00:26,  2.64s/it][A[A

 87%|████████▋ | 61/70 [02:41<00:23,  2.64s/it][A[A

Batch loss: 1.4357073307037354




 89%|████████▊ | 62/70 [02:43<00:21,  2.71s/it][A[A

 90%|█████████ | 63/70 [02:46<00:18,  2.60s/it][A[A

 91%|█████████▏| 64/70 [02:48<00:15,  2.61s/it][A[A

 93%|█████████▎| 65/70 [02:51<00:13,  2.72s/it][A[A

 94%|█████████▍| 66/70 [02:54<00:10,  2.65s/it][A[A

 96%|█████████▌| 67/70 [02:57<00:08,  2.69s/it][A[A

 97%|█████████▋| 68/70 [03:00<00:05,  2.78s/it][A[A

 99%|█████████▊| 69/70 [03:02<00:02,  2.70s/it][A[A

100%|██████████| 70/70 [03:03<00:00,  2.62s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4865513954843794




 12%|█▎        | 1/8 [00:03<00:24,  3.55s/it][A[A

 25%|██▌       | 2/8 [00:06<00:20,  3.37s/it][A[A

 38%|███▊      | 3/8 [00:08<00:15,  3.06s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.96s/it][A[A

 62%|██████▎   | 5/8 [00:14<00:08,  2.91s/it][A[A

 75%|███████▌  | 6/8 [00:16<00:05,  2.71s/it][A[A

 88%|████████▊ | 7/8 [00:19<00:02,  2.69s/it][A[A

100%|██████████| 8/8 [00:21<00:00,  2.66s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3330088555812836
Epoch:  17




  1%|▏         | 1/70 [00:03<03:47,  3.30s/it][A[A

Batch loss: 1.3310731649398804




  3%|▎         | 2/70 [00:06<03:32,  3.13s/it][A[A

  4%|▍         | 3/70 [00:08<03:12,  2.87s/it][A[A

  6%|▌         | 4/70 [00:10<03:04,  2.79s/it][A[A

  7%|▋         | 5/70 [00:13<03:01,  2.79s/it][A[A

  9%|▊         | 6/70 [00:16<02:49,  2.65s/it][A[A

 10%|█         | 7/70 [00:18<02:44,  2.62s/it][A[A

 11%|█▏        | 8/70 [00:21<02:44,  2.66s/it][A[A

 13%|█▎        | 9/70 [00:23<02:35,  2.55s/it][A[A

 14%|█▍        | 10/70 [00:26<02:34,  2.57s/it][A[A

 16%|█▌        | 11/70 [00:28<02:34,  2.62s/it][A[A

Batch loss: 1.6234325170516968




 17%|█▋        | 12/70 [00:31<02:26,  2.53s/it][A[A

 19%|█▊        | 13/70 [00:34<02:27,  2.59s/it][A[A

 20%|██        | 14/70 [00:36<02:29,  2.67s/it][A[A

 21%|██▏       | 15/70 [00:39<02:22,  2.60s/it][A[A

 23%|██▎       | 16/70 [00:41<02:21,  2.63s/it][A[A

 24%|██▍       | 17/70 [00:44<02:23,  2.70s/it][A[A

 26%|██▌       | 18/70 [00:47<02:16,  2.62s/it][A[A

 27%|██▋       | 19/70 [00:50<02:15,  2.66s/it][A[A

 29%|██▊       | 20/70 [00:52<02:16,  2.72s/it][A[A

 30%|███       | 21/70 [00:55<02:09,  2.64s/it][A[A

Batch loss: 1.5006932020187378




 31%|███▏      | 22/70 [00:58<02:06,  2.64s/it][A[A

 33%|███▎      | 23/70 [01:00<02:06,  2.68s/it][A[A

 34%|███▍      | 24/70 [01:03<01:58,  2.57s/it][A[A

 36%|███▌      | 25/70 [01:05<01:56,  2.59s/it][A[A

 37%|███▋      | 26/70 [01:08<01:55,  2.64s/it][A[A

 39%|███▊      | 27/70 [01:10<01:49,  2.54s/it][A[A

 40%|████      | 28/70 [01:13<01:47,  2.56s/it][A[A

 41%|████▏     | 29/70 [01:16<01:47,  2.62s/it][A[A

 43%|████▎     | 30/70 [01:18<01:40,  2.52s/it][A[A

 44%|████▍     | 31/70 [01:21<01:38,  2.54s/it][A[A

Batch loss: 1.969483494758606




 46%|████▌     | 32/70 [01:23<01:39,  2.61s/it][A[A

 47%|████▋     | 33/70 [01:26<01:33,  2.52s/it][A[A

 49%|████▊     | 34/70 [01:28<01:31,  2.54s/it][A[A

 50%|█████     | 35/70 [01:31<01:31,  2.60s/it][A[A

 51%|█████▏    | 36/70 [01:33<01:25,  2.52s/it][A[A

 53%|█████▎    | 37/70 [01:36<01:23,  2.54s/it][A[A

 54%|█████▍    | 38/70 [01:39<01:23,  2.62s/it][A[A

 56%|█████▌    | 39/70 [01:41<01:19,  2.57s/it][A[A

 57%|█████▋    | 40/70 [01:44<01:17,  2.60s/it][A[A

 59%|█████▊    | 41/70 [01:47<01:17,  2.69s/it][A[A

Batch loss: 1.387454628944397




 60%|██████    | 42/70 [01:49<01:12,  2.61s/it][A[A

 61%|██████▏   | 43/70 [01:52<01:11,  2.64s/it][A[A

 63%|██████▎   | 44/70 [01:55<01:10,  2.71s/it][A[A

 64%|██████▍   | 45/70 [01:57<01:05,  2.63s/it][A[A

 66%|██████▌   | 46/70 [02:00<01:03,  2.65s/it][A[A

 67%|██████▋   | 47/70 [02:03<01:02,  2.71s/it][A[A

 69%|██████▊   | 48/70 [02:05<00:57,  2.61s/it][A[A

 70%|███████   | 49/70 [02:08<00:54,  2.59s/it][A[A

 71%|███████▏  | 50/70 [02:10<00:52,  2.63s/it][A[A

 73%|███████▎  | 51/70 [02:13<00:48,  2.56s/it][A[A

Batch loss: 1.4152897596359253




 74%|███████▍  | 52/70 [02:15<00:46,  2.61s/it][A[A

 76%|███████▌  | 53/70 [02:18<00:45,  2.69s/it][A[A

 77%|███████▋  | 54/70 [02:21<00:41,  2.61s/it][A[A

 79%|███████▊  | 55/70 [02:23<00:39,  2.64s/it][A[A

 80%|████████  | 56/70 [02:26<00:37,  2.71s/it][A[A

 81%|████████▏ | 57/70 [02:29<00:34,  2.62s/it][A[A

 83%|████████▎ | 58/70 [02:31<00:31,  2.65s/it][A[A

 84%|████████▍ | 59/70 [02:34<00:29,  2.72s/it][A[A

 86%|████████▌ | 60/70 [02:37<00:26,  2.62s/it][A[A

 87%|████████▋ | 61/70 [02:39<00:23,  2.60s/it][A[A

Batch loss: 1.4177488088607788




 89%|████████▊ | 62/70 [02:42<00:21,  2.66s/it][A[A

 90%|█████████ | 63/70 [02:44<00:17,  2.56s/it][A[A

 91%|█████████▏| 64/70 [02:47<00:15,  2.57s/it][A[A

 93%|█████████▎| 65/70 [02:50<00:13,  2.64s/it][A[A

 94%|█████████▍| 66/70 [02:52<00:10,  2.56s/it][A[A

 96%|█████████▌| 67/70 [02:55<00:07,  2.57s/it][A[A

 97%|█████████▋| 68/70 [02:58<00:05,  2.64s/it][A[A

 99%|█████████▊| 69/70 [03:00<00:02,  2.54s/it][A[A

100%|██████████| 70/70 [03:01<00:00,  2.59s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.473338840688978




 12%|█▎        | 1/8 [00:03<00:22,  3.22s/it][A[A

 25%|██▌       | 2/8 [00:05<00:18,  3.05s/it][A[A

 38%|███▊      | 3/8 [00:08<00:13,  2.80s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.72s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.70s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.56s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.58s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.53s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.404620975255966
Epoch:  18




  1%|▏         | 1/70 [00:03<03:56,  3.43s/it][A[A

Batch loss: 1.4626117944717407




  3%|▎         | 2/70 [00:06<03:41,  3.26s/it][A[A

  4%|▍         | 3/70 [00:08<03:21,  3.01s/it][A[A

  6%|▌         | 4/70 [00:11<03:12,  2.92s/it][A[A

  7%|▋         | 5/70 [00:14<03:09,  2.91s/it][A[A

  9%|▊         | 6/70 [00:16<02:56,  2.76s/it][A[A

 10%|█         | 7/70 [00:19<02:52,  2.74s/it][A[A

 11%|█▏        | 8/70 [00:22<02:51,  2.77s/it][A[A

 13%|█▎        | 9/70 [00:24<02:41,  2.65s/it][A[A

 14%|█▍        | 10/70 [00:27<02:37,  2.63s/it][A[A

 16%|█▌        | 11/70 [00:30<02:38,  2.68s/it][A[A

Batch loss: 1.4014209508895874




 17%|█▋        | 12/70 [00:32<02:28,  2.57s/it][A[A

 19%|█▊        | 13/70 [00:34<02:25,  2.56s/it][A[A

 20%|██        | 14/70 [00:37<02:26,  2.61s/it][A[A

 21%|██▏       | 15/70 [00:39<02:18,  2.52s/it][A[A

 23%|██▎       | 16/70 [00:42<02:17,  2.55s/it][A[A

 24%|██▍       | 17/70 [00:45<02:18,  2.61s/it][A[A

 26%|██▌       | 18/70 [00:47<02:10,  2.52s/it][A[A

 27%|██▋       | 19/70 [00:50<02:09,  2.53s/it][A[A

 29%|██▊       | 20/70 [00:52<02:10,  2.61s/it][A[A

 30%|███       | 21/70 [00:55<02:03,  2.51s/it][A[A

Batch loss: 1.9094589948654175




 31%|███▏      | 22/70 [00:57<02:02,  2.55s/it][A[A

 33%|███▎      | 23/70 [01:00<02:02,  2.60s/it][A[A

 34%|███▍      | 24/70 [01:02<01:55,  2.52s/it][A[A

 36%|███▌      | 25/70 [01:05<01:55,  2.57s/it][A[A

 37%|███▋      | 26/70 [01:08<01:57,  2.68s/it][A[A

 39%|███▊      | 27/70 [01:10<01:51,  2.60s/it][A[A

 40%|████      | 28/70 [01:13<01:50,  2.63s/it][A[A

 41%|████▏     | 29/70 [01:16<01:51,  2.71s/it][A[A

 43%|████▎     | 30/70 [01:18<01:45,  2.63s/it][A[A

 44%|████▍     | 31/70 [01:21<01:42,  2.64s/it][A[A

Batch loss: 1.4057501554489136




 46%|████▌     | 32/70 [01:24<01:43,  2.72s/it][A[A

 47%|████▋     | 33/70 [01:27<01:37,  2.64s/it][A[A

 49%|████▊     | 34/70 [01:29<01:34,  2.63s/it][A[A

 50%|█████     | 35/70 [01:32<01:33,  2.67s/it][A[A

 51%|█████▏    | 36/70 [01:34<01:27,  2.57s/it][A[A

 53%|█████▎    | 37/70 [01:37<01:27,  2.66s/it][A[A

 54%|█████▍    | 38/70 [01:40<01:28,  2.76s/it][A[A

 56%|█████▌    | 39/70 [01:42<01:21,  2.64s/it][A[A

 57%|█████▋    | 40/70 [01:45<01:18,  2.63s/it][A[A

 59%|█████▊    | 41/70 [01:48<01:17,  2.66s/it][A[A

Batch loss: 1.407719373703003




 60%|██████    | 42/70 [01:50<01:11,  2.57s/it][A[A

 61%|██████▏   | 43/70 [01:53<01:09,  2.57s/it][A[A

 63%|██████▎   | 44/70 [01:55<01:08,  2.63s/it][A[A

 64%|██████▍   | 45/70 [01:58<01:03,  2.53s/it][A[A

 66%|██████▌   | 46/70 [02:00<01:01,  2.55s/it][A[A

 67%|██████▋   | 47/70 [02:03<01:00,  2.63s/it][A[A

 69%|██████▊   | 48/70 [02:05<00:55,  2.54s/it][A[A

 70%|███████   | 49/70 [02:08<00:54,  2.57s/it][A[A

 71%|███████▏  | 50/70 [02:11<00:53,  2.69s/it][A[A

 73%|███████▎  | 51/70 [02:14<00:50,  2.64s/it][A[A

Batch loss: 1.6044230461120605




 74%|███████▍  | 52/70 [02:16<00:48,  2.67s/it][A[A

 76%|███████▌  | 53/70 [02:19<00:46,  2.75s/it][A[A

 77%|███████▋  | 54/70 [02:22<00:42,  2.66s/it][A[A

 79%|███████▊  | 55/70 [02:25<00:40,  2.69s/it][A[A

 80%|████████  | 56/70 [02:27<00:38,  2.74s/it][A[A

 81%|████████▏ | 57/70 [02:30<00:34,  2.64s/it][A[A

 83%|████████▎ | 58/70 [02:33<00:32,  2.67s/it][A[A

 84%|████████▍ | 59/70 [02:35<00:29,  2.72s/it][A[A

 86%|████████▌ | 60/70 [02:38<00:25,  2.59s/it][A[A

 87%|████████▋ | 61/70 [02:40<00:23,  2.60s/it][A[A

Batch loss: 1.4909837245941162




 89%|████████▊ | 62/70 [02:43<00:21,  2.67s/it][A[A

 90%|█████████ | 63/70 [02:45<00:17,  2.56s/it][A[A

 91%|█████████▏| 64/70 [02:48<00:15,  2.56s/it][A[A

 93%|█████████▎| 65/70 [02:51<00:13,  2.63s/it][A[A

 94%|█████████▍| 66/70 [02:53<00:10,  2.53s/it][A[A

 96%|█████████▌| 67/70 [02:56<00:07,  2.55s/it][A[A

 97%|█████████▋| 68/70 [02:58<00:05,  2.61s/it][A[A

 99%|█████████▊| 69/70 [03:01<00:02,  2.52s/it][A[A

100%|██████████| 70/70 [03:01<00:00,  2.60s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4873708878244674




 12%|█▎        | 1/8 [00:03<00:22,  3.27s/it][A[A

 25%|██▌       | 2/8 [00:05<00:18,  3.09s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.83s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.73s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.74s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.61s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.63s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.55s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3359656631946564
Epoch:  19




  1%|▏         | 1/70 [00:03<04:05,  3.55s/it][A[A

Batch loss: 1.6111457347869873




  3%|▎         | 2/70 [00:06<03:48,  3.36s/it][A[A

  4%|▍         | 3/70 [00:08<03:27,  3.09s/it][A[A

  6%|▌         | 4/70 [00:11<03:16,  2.98s/it][A[A

  7%|▋         | 5/70 [00:14<03:12,  2.97s/it][A[A

  9%|▊         | 6/70 [00:17<02:59,  2.81s/it][A[A

 10%|█         | 7/70 [00:19<02:54,  2.76s/it][A[A

 11%|█▏        | 8/70 [00:22<02:53,  2.81s/it][A[A

 13%|█▎        | 9/70 [00:24<02:42,  2.66s/it][A[A

 14%|█▍        | 10/70 [00:27<02:39,  2.65s/it][A[A

 16%|█▌        | 11/70 [00:30<02:38,  2.69s/it][A[A

Batch loss: 1.4503871202468872




 17%|█▋        | 12/70 [00:32<02:29,  2.59s/it][A[A

 19%|█▊        | 13/70 [00:35<02:26,  2.58s/it][A[A

 20%|██        | 14/70 [00:37<02:27,  2.63s/it][A[A

 21%|██▏       | 15/70 [00:40<02:18,  2.52s/it][A[A

 23%|██▎       | 16/70 [00:42<02:17,  2.55s/it][A[A

 24%|██▍       | 17/70 [00:45<02:18,  2.61s/it][A[A

 26%|██▌       | 18/70 [00:47<02:11,  2.53s/it][A[A

 27%|██▋       | 19/70 [00:50<02:09,  2.55s/it][A[A

 29%|██▊       | 20/70 [00:53<02:10,  2.62s/it][A[A

 30%|███       | 21/70 [00:55<02:03,  2.53s/it][A[A

Batch loss: 1.4255601167678833




 31%|███▏      | 22/70 [00:58<02:02,  2.56s/it][A[A

 33%|███▎      | 23/70 [01:01<02:05,  2.66s/it][A[A

 34%|███▍      | 24/70 [01:03<01:59,  2.60s/it][A[A

 36%|███▌      | 25/70 [01:06<01:58,  2.63s/it][A[A

 37%|███▋      | 26/70 [01:09<01:59,  2.71s/it][A[A

 39%|███▊      | 27/70 [01:11<01:53,  2.63s/it][A[A

 40%|████      | 28/70 [01:14<01:52,  2.67s/it][A[A

 41%|████▏     | 29/70 [01:17<01:53,  2.76s/it][A[A

 43%|████▎     | 30/70 [01:19<01:47,  2.69s/it][A[A

 44%|████▍     | 31/70 [01:22<01:45,  2.70s/it][A[A

Batch loss: 1.43887197971344




 46%|████▌     | 32/70 [01:25<01:45,  2.78s/it][A[A

 47%|████▋     | 33/70 [01:27<01:37,  2.64s/it][A[A

 49%|████▊     | 34/70 [01:30<01:34,  2.63s/it][A[A

 50%|█████     | 35/70 [01:33<01:34,  2.71s/it][A[A

 51%|█████▏    | 36/70 [01:35<01:29,  2.63s/it][A[A

 53%|█████▎    | 37/70 [01:38<01:27,  2.65s/it][A[A

 54%|█████▍    | 38/70 [01:41<01:26,  2.72s/it][A[A

 56%|█████▌    | 39/70 [01:43<01:21,  2.63s/it][A[A

 57%|█████▋    | 40/70 [01:46<01:19,  2.66s/it][A[A

 59%|█████▊    | 41/70 [01:49<01:19,  2.73s/it][A[A

Batch loss: 1.5176832675933838




 60%|██████    | 42/70 [01:51<01:13,  2.64s/it][A[A

 61%|██████▏   | 43/70 [01:54<01:11,  2.66s/it][A[A

 63%|██████▎   | 44/70 [01:57<01:10,  2.70s/it][A[A

 64%|██████▍   | 45/70 [01:59<01:04,  2.59s/it][A[A

 66%|██████▌   | 46/70 [02:02<01:02,  2.58s/it][A[A

 67%|██████▋   | 47/70 [02:05<01:01,  2.67s/it][A[A

 69%|██████▊   | 48/70 [02:07<00:56,  2.56s/it][A[A

 70%|███████   | 49/70 [02:10<00:54,  2.58s/it][A[A

 71%|███████▏  | 50/70 [02:12<00:52,  2.63s/it][A[A

 73%|███████▎  | 51/70 [02:15<00:48,  2.53s/it][A[A

Batch loss: 1.4126570224761963




 74%|███████▍  | 52/70 [02:17<00:46,  2.57s/it][A[A

 76%|███████▌  | 53/70 [02:20<00:44,  2.63s/it][A[A

 77%|███████▋  | 54/70 [02:22<00:40,  2.53s/it][A[A

 79%|███████▊  | 55/70 [02:25<00:38,  2.55s/it][A[A

 80%|████████  | 56/70 [02:28<00:36,  2.62s/it][A[A

 81%|████████▏ | 57/70 [02:30<00:32,  2.53s/it][A[A

 83%|████████▎ | 58/70 [02:33<00:30,  2.55s/it][A[A

 84%|████████▍ | 59/70 [02:35<00:28,  2.61s/it][A[A

 86%|████████▌ | 60/70 [02:38<00:25,  2.55s/it][A[A

 87%|████████▋ | 61/70 [02:41<00:23,  2.60s/it][A[A

Batch loss: 1.4524493217468262




 89%|████████▊ | 62/70 [02:44<00:21,  2.69s/it][A[A

 90%|█████████ | 63/70 [02:46<00:18,  2.62s/it][A[A

 91%|█████████▏| 64/70 [02:49<00:15,  2.65s/it][A[A

 93%|█████████▎| 65/70 [02:52<00:13,  2.72s/it][A[A

 94%|█████████▍| 66/70 [02:54<00:10,  2.63s/it][A[A

 96%|█████████▌| 67/70 [02:57<00:07,  2.66s/it][A[A

 97%|█████████▋| 68/70 [03:00<00:05,  2.73s/it][A[A

 99%|█████████▊| 69/70 [03:02<00:02,  2.65s/it][A[A

100%|██████████| 70/70 [03:03<00:00,  2.62s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.485126144545419




 12%|█▎        | 1/8 [00:03<00:22,  3.22s/it][A[A

 25%|██▌       | 2/8 [00:05<00:18,  3.05s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.81s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.73s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.71s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.57s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.54s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.50s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3572834730148315
Epoch:  20




  1%|▏         | 1/70 [00:03<03:46,  3.28s/it][A[A

Batch loss: 1.4080662727355957




  3%|▎         | 2/70 [00:06<03:33,  3.14s/it][A[A

  4%|▍         | 3/70 [00:08<03:12,  2.88s/it][A[A

  6%|▌         | 4/70 [00:10<03:04,  2.80s/it][A[A

  7%|▋         | 5/70 [00:13<03:01,  2.79s/it][A[A

  9%|▊         | 6/70 [00:16<02:50,  2.66s/it][A[A

 10%|█         | 7/70 [00:18<02:45,  2.63s/it][A[A

 11%|█▏        | 8/70 [00:21<02:46,  2.69s/it][A[A

 13%|█▎        | 9/70 [00:23<02:39,  2.62s/it][A[A

 14%|█▍        | 10/70 [00:26<02:39,  2.65s/it][A[A

 16%|█▌        | 11/70 [00:29<02:41,  2.73s/it][A[A

Batch loss: 1.580775260925293




 17%|█▋        | 12/70 [00:32<02:33,  2.65s/it][A[A

 19%|█▊        | 13/70 [00:34<02:32,  2.68s/it][A[A

 20%|██        | 14/70 [00:37<02:33,  2.74s/it][A[A

 21%|██▏       | 15/70 [00:40<02:25,  2.64s/it][A[A

 23%|██▎       | 16/70 [00:42<02:23,  2.66s/it][A[A

 24%|██▍       | 17/70 [00:45<02:25,  2.74s/it][A[A

 26%|██▌       | 18/70 [00:48<02:16,  2.62s/it][A[A

 27%|██▋       | 19/70 [00:50<02:12,  2.61s/it][A[A

 29%|██▊       | 20/70 [00:53<02:13,  2.66s/it][A[A

 30%|███       | 21/70 [00:55<02:05,  2.56s/it][A[A

Batch loss: 1.370369791984558




 31%|███▏      | 22/70 [00:58<02:03,  2.57s/it][A[A

 33%|███▎      | 23/70 [01:01<02:03,  2.62s/it][A[A

 34%|███▍      | 24/70 [01:03<01:56,  2.53s/it][A[A

 36%|███▌      | 25/70 [01:06<01:54,  2.55s/it][A[A

 37%|███▋      | 26/70 [01:08<01:55,  2.62s/it][A[A

 39%|███▊      | 27/70 [01:11<01:48,  2.53s/it][A[A

 40%|████      | 28/70 [01:13<01:46,  2.54s/it][A[A

 41%|████▏     | 29/70 [01:16<01:46,  2.60s/it][A[A

 43%|████▎     | 30/70 [01:18<01:40,  2.52s/it][A[A

 44%|████▍     | 31/70 [01:21<01:38,  2.53s/it][A[A

Batch loss: 1.4309723377227783




 46%|████▌     | 32/70 [01:24<01:38,  2.60s/it][A[A

 47%|████▋     | 33/70 [01:26<01:33,  2.53s/it][A[A

 49%|████▊     | 34/70 [01:29<01:32,  2.58s/it][A[A

 50%|█████     | 35/70 [01:32<01:33,  2.67s/it][A[A

 51%|█████▏    | 36/70 [01:34<01:28,  2.60s/it][A[A

 53%|█████▎    | 37/70 [01:37<01:26,  2.63s/it][A[A

 54%|█████▍    | 38/70 [01:40<01:26,  2.70s/it][A[A

 56%|█████▌    | 39/70 [01:42<01:21,  2.62s/it][A[A

 57%|█████▋    | 40/70 [01:45<01:19,  2.64s/it][A[A

 59%|█████▊    | 41/70 [01:47<01:18,  2.71s/it][A[A

Batch loss: 1.4059257507324219




 60%|██████    | 42/70 [01:50<01:13,  2.63s/it][A[A

 61%|██████▏   | 43/70 [01:53<01:10,  2.61s/it][A[A

 63%|██████▎   | 44/70 [01:55<01:09,  2.66s/it][A[A

 64%|██████▍   | 45/70 [01:58<01:04,  2.57s/it][A[A

 66%|██████▌   | 46/70 [02:00<01:02,  2.59s/it][A[A

 67%|██████▋   | 47/70 [02:03<01:00,  2.64s/it][A[A

 69%|██████▊   | 48/70 [02:05<00:56,  2.55s/it][A[A

 70%|███████   | 49/70 [02:08<00:53,  2.56s/it][A[A

 71%|███████▏  | 50/70 [02:11<00:52,  2.62s/it][A[A

 73%|███████▎  | 51/70 [02:13<00:47,  2.53s/it][A[A

Batch loss: 1.3765356540679932




 74%|███████▍  | 52/70 [02:16<00:46,  2.56s/it][A[A

 76%|███████▌  | 53/70 [02:18<00:44,  2.62s/it][A[A

 77%|███████▋  | 54/70 [02:21<00:40,  2.52s/it][A[A

 79%|███████▊  | 55/70 [02:23<00:38,  2.54s/it][A[A

 80%|████████  | 56/70 [02:26<00:36,  2.62s/it][A[A

 81%|████████▏ | 57/70 [02:28<00:32,  2.53s/it][A[A

 83%|████████▎ | 58/70 [02:31<00:30,  2.55s/it][A[A

 84%|████████▍ | 59/70 [02:34<00:29,  2.66s/it][A[A

 86%|████████▌ | 60/70 [02:36<00:25,  2.59s/it][A[A

 87%|████████▋ | 61/70 [02:39<00:23,  2.62s/it][A[A

Batch loss: 1.4035121202468872




 89%|████████▊ | 62/70 [02:42<00:21,  2.70s/it][A[A

 90%|█████████ | 63/70 [02:44<00:18,  2.62s/it][A[A

 91%|█████████▏| 64/70 [02:47<00:15,  2.65s/it][A[A

 93%|█████████▎| 65/70 [02:50<00:13,  2.72s/it][A[A

 94%|█████████▍| 66/70 [02:52<00:10,  2.63s/it][A[A

 96%|█████████▌| 67/70 [02:55<00:07,  2.66s/it][A[A

 97%|█████████▋| 68/70 [02:58<00:05,  2.71s/it][A[A

 99%|█████████▊| 69/70 [03:00<00:02,  2.59s/it][A[A

100%|██████████| 70/70 [03:01<00:00,  2.59s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4594044395855494




 12%|█▎        | 1/8 [00:03<00:23,  3.31s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.19s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.95s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.85s/it][A[A

 62%|██████▎   | 5/8 [00:14<00:08,  2.83s/it][A[A

 75%|███████▌  | 6/8 [00:16<00:05,  2.69s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.65s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.62s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3260206282138824
Epoch:  21




  1%|▏         | 1/70 [00:03<03:57,  3.44s/it][A[A

Batch loss: 1.4700183868408203




  3%|▎         | 2/70 [00:06<03:42,  3.28s/it][A[A

  4%|▍         | 3/70 [00:08<03:23,  3.03s/it][A[A

  6%|▌         | 4/70 [00:11<03:11,  2.90s/it][A[A

  7%|▋         | 5/70 [00:14<03:05,  2.85s/it][A[A

  9%|▊         | 6/70 [00:16<02:53,  2.71s/it][A[A

 10%|█         | 7/70 [00:19<02:48,  2.67s/it][A[A

 11%|█▏        | 8/70 [00:21<02:47,  2.70s/it][A[A

 13%|█▎        | 9/70 [00:24<02:37,  2.58s/it][A[A

 14%|█▍        | 10/70 [00:26<02:35,  2.59s/it][A[A

 16%|█▌        | 11/70 [00:29<02:36,  2.65s/it][A[A

Batch loss: 1.6034841537475586




 17%|█▋        | 12/70 [00:31<02:27,  2.55s/it][A[A

 19%|█▊        | 13/70 [00:34<02:25,  2.55s/it][A[A

 20%|██        | 14/70 [00:37<02:26,  2.62s/it][A[A

 21%|██▏       | 15/70 [00:39<02:18,  2.53s/it][A[A

 23%|██▎       | 16/70 [00:42<02:17,  2.54s/it][A[A

 24%|██▍       | 17/70 [00:44<02:17,  2.60s/it][A[A

 26%|██▌       | 18/70 [00:47<02:10,  2.51s/it][A[A

 27%|██▋       | 19/70 [00:49<02:10,  2.56s/it][A[A

 29%|██▊       | 20/70 [00:52<02:13,  2.67s/it][A[A

 30%|███       | 21/70 [00:55<02:07,  2.60s/it][A[A

Batch loss: 1.541537880897522




 31%|███▏      | 22/70 [00:57<02:06,  2.63s/it][A[A

 33%|███▎      | 23/70 [01:00<02:07,  2.71s/it][A[A

 34%|███▍      | 24/70 [01:03<02:00,  2.62s/it][A[A

 36%|███▌      | 25/70 [01:05<01:59,  2.65s/it][A[A

 37%|███▋      | 26/70 [01:08<01:59,  2.72s/it][A[A

 39%|███▊      | 27/70 [01:11<01:53,  2.63s/it][A[A

 40%|████      | 28/70 [01:13<01:51,  2.65s/it][A[A

 41%|████▏     | 29/70 [01:16<01:49,  2.68s/it][A[A

 43%|████▎     | 30/70 [01:18<01:42,  2.56s/it][A[A

 44%|████▍     | 31/70 [01:21<01:40,  2.57s/it][A[A

Batch loss: 1.4845207929611206




 46%|████▌     | 32/70 [01:24<01:40,  2.65s/it][A[A

 47%|████▋     | 33/70 [01:26<01:34,  2.55s/it][A[A

 49%|████▊     | 34/70 [01:29<01:32,  2.57s/it][A[A

 50%|█████     | 35/70 [01:32<01:31,  2.62s/it][A[A

 51%|█████▏    | 36/70 [01:34<01:25,  2.53s/it][A[A

 53%|█████▎    | 37/70 [01:36<01:24,  2.56s/it][A[A

 54%|█████▍    | 38/70 [01:39<01:23,  2.61s/it][A[A

 56%|█████▌    | 39/70 [01:42<01:18,  2.52s/it][A[A

 57%|█████▋    | 40/70 [01:44<01:16,  2.54s/it][A[A

 59%|█████▊    | 41/70 [01:47<01:15,  2.61s/it][A[A

Batch loss: 1.4597713947296143




 60%|██████    | 42/70 [01:49<01:10,  2.53s/it][A[A

 61%|██████▏   | 43/70 [01:52<01:08,  2.55s/it][A[A

 63%|██████▎   | 44/70 [01:55<01:08,  2.62s/it][A[A

 64%|██████▍   | 45/70 [01:57<01:04,  2.56s/it][A[A

 66%|██████▌   | 46/70 [02:00<01:02,  2.59s/it][A[A

 67%|██████▋   | 47/70 [02:03<01:01,  2.68s/it][A[A

 69%|██████▊   | 48/70 [02:05<00:57,  2.60s/it][A[A

 70%|███████   | 49/70 [02:08<00:55,  2.64s/it][A[A

 71%|███████▏  | 50/70 [02:11<00:54,  2.71s/it][A[A

 73%|███████▎  | 51/70 [02:13<00:49,  2.63s/it][A[A

Batch loss: 1.3178317546844482




 74%|███████▍  | 52/70 [02:16<00:47,  2.66s/it][A[A

 76%|███████▌  | 53/70 [02:19<00:46,  2.73s/it][A[A

 77%|███████▋  | 54/70 [02:21<00:41,  2.61s/it][A[A

 79%|███████▊  | 55/70 [02:24<00:38,  2.59s/it][A[A

 80%|████████  | 56/70 [02:26<00:36,  2.64s/it][A[A

 81%|████████▏ | 57/70 [02:29<00:33,  2.55s/it][A[A

 83%|████████▎ | 58/70 [02:31<00:30,  2.56s/it][A[A

 84%|████████▍ | 59/70 [02:34<00:28,  2.61s/it][A[A

 86%|████████▌ | 60/70 [02:36<00:25,  2.52s/it][A[A

 87%|████████▋ | 61/70 [02:39<00:22,  2.53s/it][A[A

Batch loss: 1.8613252639770508




 89%|████████▊ | 62/70 [02:42<00:20,  2.60s/it][A[A

 90%|█████████ | 63/70 [02:44<00:17,  2.52s/it][A[A

 91%|█████████▏| 64/70 [02:47<00:15,  2.63s/it][A[A

 93%|█████████▎| 65/70 [02:50<00:13,  2.73s/it][A[A

 94%|█████████▍| 66/70 [02:52<00:10,  2.61s/it][A[A

 96%|█████████▌| 67/70 [02:55<00:07,  2.59s/it][A[A

 97%|█████████▋| 68/70 [02:57<00:05,  2.65s/it][A[A

 99%|█████████▊| 69/70 [03:00<00:02,  2.54s/it][A[A

100%|██████████| 70/70 [03:01<00:00,  2.59s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.44315014396395




 12%|█▎        | 1/8 [00:03<00:23,  3.37s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.19s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.94s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.84s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.83s/it][A[A

 75%|███████▌  | 6/8 [00:16<00:05,  2.68s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.66s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.61s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.348337799310684
Epoch:  22




  1%|▏         | 1/70 [00:03<03:59,  3.47s/it][A[A

Batch loss: 1.3181475400924683




  3%|▎         | 2/70 [00:06<03:41,  3.26s/it][A[A

  4%|▍         | 3/70 [00:08<03:19,  2.97s/it][A[A

  6%|▌         | 4/70 [00:11<03:08,  2.86s/it][A[A

  7%|▋         | 5/70 [00:14<03:06,  2.86s/it][A[A

  9%|▊         | 6/70 [00:16<02:52,  2.70s/it][A[A

 10%|█         | 7/70 [00:18<02:47,  2.66s/it][A[A

 11%|█▏        | 8/70 [00:21<02:47,  2.71s/it][A[A

 13%|█▎        | 9/70 [00:24<02:37,  2.59s/it][A[A

 14%|█▍        | 10/70 [00:26<02:35,  2.59s/it][A[A

 16%|█▌        | 11/70 [00:29<02:35,  2.63s/it][A[A

Batch loss: 1.3721277713775635




 17%|█▋        | 12/70 [00:31<02:27,  2.54s/it][A[A

 19%|█▊        | 13/70 [00:34<02:25,  2.55s/it][A[A

 20%|██        | 14/70 [00:37<02:26,  2.61s/it][A[A

 21%|██▏       | 15/70 [00:39<02:19,  2.54s/it][A[A

 23%|██▎       | 16/70 [00:41<02:17,  2.55s/it][A[A

 24%|██▍       | 17/70 [00:44<02:18,  2.62s/it][A[A

 26%|██▌       | 18/70 [00:47<02:13,  2.58s/it][A[A

 27%|██▋       | 19/70 [00:49<02:13,  2.61s/it][A[A

 29%|██▊       | 20/70 [00:52<02:14,  2.69s/it][A[A

 30%|███       | 21/70 [00:55<02:07,  2.61s/it][A[A

Batch loss: 1.3767033815383911




 31%|███▏      | 22/70 [00:57<02:06,  2.64s/it][A[A

 33%|███▎      | 23/70 [01:00<02:07,  2.71s/it][A[A

 34%|███▍      | 24/70 [01:03<02:00,  2.62s/it][A[A

 36%|███▌      | 25/70 [01:05<01:59,  2.66s/it][A[A

 37%|███▋      | 26/70 [01:08<01:59,  2.73s/it][A[A

 39%|███▊      | 27/70 [01:11<01:53,  2.64s/it][A[A

 40%|████      | 28/70 [01:13<01:50,  2.62s/it][A[A

 41%|████▏     | 29/70 [01:16<01:48,  2.66s/it][A[A

 43%|████▎     | 30/70 [01:18<01:43,  2.58s/it][A[A

 44%|████▍     | 31/70 [01:21<01:40,  2.58s/it][A[A

Batch loss: 1.4182528257369995




 46%|████▌     | 32/70 [01:24<01:40,  2.64s/it][A[A

 47%|████▋     | 33/70 [01:26<01:33,  2.54s/it][A[A

 49%|████▊     | 34/70 [01:29<01:32,  2.58s/it][A[A

 50%|█████     | 35/70 [01:32<01:32,  2.64s/it][A[A

 51%|█████▏    | 36/70 [01:34<01:26,  2.56s/it][A[A

 53%|█████▎    | 37/70 [01:37<01:25,  2.59s/it][A[A

 54%|█████▍    | 38/70 [01:39<01:24,  2.65s/it][A[A

 56%|█████▌    | 39/70 [01:42<01:19,  2.55s/it][A[A

 57%|█████▋    | 40/70 [01:44<01:16,  2.56s/it][A[A

 59%|█████▊    | 41/70 [01:47<01:16,  2.62s/it][A[A

Batch loss: 1.6768275499343872




 60%|██████    | 42/70 [01:49<01:11,  2.54s/it][A[A

 61%|██████▏   | 43/70 [01:52<01:09,  2.59s/it][A[A

 63%|██████▎   | 44/70 [01:55<01:09,  2.67s/it][A[A

 64%|██████▍   | 45/70 [01:57<01:05,  2.61s/it][A[A

 66%|██████▌   | 46/70 [02:00<01:03,  2.65s/it][A[A

 67%|██████▋   | 47/70 [02:03<01:02,  2.73s/it][A[A

 69%|██████▊   | 48/70 [02:06<00:58,  2.65s/it][A[A

 70%|███████   | 49/70 [02:08<00:56,  2.69s/it][A[A

 71%|███████▏  | 50/70 [02:11<00:55,  2.76s/it][A[A

 73%|███████▎  | 51/70 [02:14<00:50,  2.66s/it][A[A

Batch loss: 1.6448684930801392




 74%|███████▍  | 52/70 [02:16<00:48,  2.68s/it][A[A

 76%|███████▌  | 53/70 [02:19<00:46,  2.71s/it][A[A

 77%|███████▋  | 54/70 [02:22<00:41,  2.60s/it][A[A

 79%|███████▊  | 55/70 [02:24<00:39,  2.61s/it][A[A

 80%|████████  | 56/70 [02:27<00:37,  2.70s/it][A[A

 81%|████████▏ | 57/70 [02:30<00:34,  2.63s/it][A[A

 83%|████████▎ | 58/70 [02:32<00:31,  2.66s/it][A[A

 84%|████████▍ | 59/70 [02:35<00:30,  2.73s/it][A[A

 86%|████████▌ | 60/70 [02:38<00:26,  2.64s/it][A[A

 87%|████████▋ | 61/70 [02:40<00:24,  2.67s/it][A[A

Batch loss: 1.44356369972229




 89%|████████▊ | 62/70 [02:43<00:21,  2.74s/it][A[A

 90%|█████████ | 63/70 [02:46<00:18,  2.65s/it][A[A

 91%|█████████▏| 64/70 [02:48<00:16,  2.67s/it][A[A

 93%|█████████▎| 65/70 [02:51<00:13,  2.71s/it][A[A

 94%|█████████▍| 66/70 [02:54<00:10,  2.59s/it][A[A

 96%|█████████▌| 67/70 [02:56<00:07,  2.58s/it][A[A

 97%|█████████▋| 68/70 [02:59<00:05,  2.64s/it][A[A

 99%|█████████▊| 69/70 [03:01<00:02,  2.54s/it][A[A

100%|██████████| 70/70 [03:02<00:00,  2.61s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4470375282423837




 12%|█▎        | 1/8 [00:03<00:22,  3.24s/it][A[A

 25%|██▌       | 2/8 [00:05<00:18,  3.07s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.82s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.74s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.70s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.55s/it][A[A

 88%|████████▊ | 7/8 [00:17<00:02,  2.53s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.49s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3309969007968903
Epoch:  23




  1%|▏         | 1/70 [00:03<03:47,  3.29s/it][A[A

Batch loss: 1.3490707874298096




  3%|▎         | 2/70 [00:06<03:33,  3.14s/it][A[A

  4%|▍         | 3/70 [00:08<03:14,  2.91s/it][A[A

  6%|▌         | 4/70 [00:11<03:08,  2.86s/it][A[A

  7%|▋         | 5/70 [00:14<03:07,  2.88s/it][A[A

  9%|▊         | 6/70 [00:16<02:55,  2.74s/it][A[A

 10%|█         | 7/70 [00:19<02:52,  2.74s/it][A[A

 11%|█▏        | 8/70 [00:22<02:52,  2.78s/it][A[A

 13%|█▎        | 9/70 [00:24<02:42,  2.67s/it][A[A

 14%|█▍        | 10/70 [00:27<02:41,  2.69s/it][A[A

 16%|█▌        | 11/70 [00:30<02:42,  2.75s/it][A[A

Batch loss: 1.4174224138259888




 17%|█▋        | 12/70 [00:32<02:34,  2.67s/it][A[A

 19%|█▊        | 13/70 [00:35<02:31,  2.65s/it][A[A

 20%|██        | 14/70 [00:38<02:30,  2.68s/it][A[A

 21%|██▏       | 15/70 [00:40<02:22,  2.58s/it][A[A

 23%|██▎       | 16/70 [00:43<02:20,  2.59s/it][A[A

 24%|██▍       | 17/70 [00:45<02:20,  2.65s/it][A[A

 26%|██▌       | 18/70 [00:48<02:12,  2.55s/it][A[A

 27%|██▋       | 19/70 [00:50<02:10,  2.56s/it][A[A

 29%|██▊       | 20/70 [00:53<02:10,  2.62s/it][A[A

 30%|███       | 21/70 [00:55<02:04,  2.53s/it][A[A

Batch loss: 1.4012799263000488




 31%|███▏      | 22/70 [00:58<02:03,  2.56s/it][A[A

 33%|███▎      | 23/70 [01:01<02:04,  2.64s/it][A[A

 34%|███▍      | 24/70 [01:03<01:56,  2.54s/it][A[A

 36%|███▌      | 25/70 [01:06<01:55,  2.56s/it][A[A

 37%|███▋      | 26/70 [01:08<01:55,  2.62s/it][A[A

 39%|███▊      | 27/70 [01:11<01:48,  2.52s/it][A[A

 40%|████      | 28/70 [01:13<01:47,  2.55s/it][A[A

 41%|████▏     | 29/70 [01:16<01:48,  2.66s/it][A[A

 43%|████▎     | 30/70 [01:19<01:43,  2.59s/it][A[A

 44%|████▍     | 31/70 [01:21<01:42,  2.63s/it][A[A

Batch loss: 1.6065244674682617




 46%|████▌     | 32/70 [01:24<01:43,  2.72s/it][A[A

 47%|████▋     | 33/70 [01:27<01:37,  2.64s/it][A[A

 49%|████▊     | 34/70 [01:29<01:35,  2.66s/it][A[A

 50%|█████     | 35/70 [01:32<01:36,  2.75s/it][A[A

 51%|█████▏    | 36/70 [01:35<01:30,  2.65s/it][A[A

 53%|█████▎    | 37/70 [01:38<01:28,  2.67s/it][A[A

 54%|█████▍    | 38/70 [01:40<01:27,  2.73s/it][A[A

 56%|█████▌    | 39/70 [01:43<01:21,  2.63s/it][A[A

 57%|█████▋    | 40/70 [01:45<01:18,  2.62s/it][A[A

 59%|█████▊    | 41/70 [01:48<01:17,  2.68s/it][A[A

Batch loss: 1.4756358861923218




 60%|██████    | 42/70 [01:51<01:12,  2.57s/it][A[A

 61%|██████▏   | 43/70 [01:53<01:09,  2.58s/it][A[A

 63%|██████▎   | 44/70 [01:56<01:08,  2.63s/it][A[A

 64%|██████▍   | 45/70 [01:58<01:03,  2.53s/it][A[A

 66%|██████▌   | 46/70 [02:01<01:01,  2.54s/it][A[A

 67%|██████▋   | 47/70 [02:04<01:00,  2.61s/it][A[A

 69%|██████▊   | 48/70 [02:06<00:55,  2.52s/it][A[A

 70%|███████   | 49/70 [02:08<00:53,  2.54s/it][A[A

 71%|███████▏  | 50/70 [02:11<00:52,  2.61s/it][A[A

 73%|███████▎  | 51/70 [02:14<00:47,  2.52s/it][A[A

Batch loss: 1.4287203550338745




 74%|███████▍  | 52/70 [02:16<00:45,  2.55s/it][A[A

 76%|███████▌  | 53/70 [02:19<00:44,  2.62s/it][A[A

 77%|███████▋  | 54/70 [02:21<00:40,  2.55s/it][A[A

 79%|███████▊  | 55/70 [02:24<00:38,  2.59s/it][A[A

 80%|████████  | 56/70 [02:27<00:37,  2.68s/it][A[A

 81%|████████▏ | 57/70 [02:29<00:33,  2.60s/it][A[A

 83%|████████▎ | 58/70 [02:32<00:31,  2.64s/it][A[A

 84%|████████▍ | 59/70 [02:35<00:29,  2.71s/it][A[A

 86%|████████▌ | 60/70 [02:37<00:26,  2.62s/it][A[A

 87%|████████▋ | 61/70 [02:40<00:23,  2.65s/it][A[A

Batch loss: 1.4736700057983398




 89%|████████▊ | 62/70 [02:43<00:21,  2.74s/it][A[A

 90%|█████████ | 63/70 [02:45<00:18,  2.64s/it][A[A

 91%|█████████▏| 64/70 [02:48<00:15,  2.62s/it][A[A

 93%|█████████▎| 65/70 [02:51<00:13,  2.66s/it][A[A

 94%|█████████▍| 66/70 [02:53<00:10,  2.58s/it][A[A

 96%|█████████▌| 67/70 [02:56<00:07,  2.58s/it][A[A

 97%|█████████▋| 68/70 [02:58<00:05,  2.63s/it][A[A

 99%|█████████▊| 69/70 [03:01<00:02,  2.54s/it][A[A

100%|██████████| 70/70 [03:02<00:00,  2.60s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4582180874688284




 12%|█▎        | 1/8 [00:03<00:22,  3.22s/it][A[A

 25%|██▌       | 2/8 [00:05<00:18,  3.06s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.81s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.72s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.71s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.58s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.56s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.51s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3202602863311768
Epoch:  24




  1%|▏         | 1/70 [00:03<03:55,  3.42s/it][A[A

Batch loss: 1.4407607316970825




  3%|▎         | 2/70 [00:06<03:41,  3.26s/it][A[A

  4%|▍         | 3/70 [00:08<03:21,  3.01s/it][A[A

  6%|▌         | 4/70 [00:11<03:13,  2.93s/it][A[A

  7%|▋         | 5/70 [00:14<03:10,  2.92s/it][A[A

  9%|▊         | 6/70 [00:16<02:57,  2.78s/it][A[A

 10%|█         | 7/70 [00:19<02:53,  2.75s/it][A[A

 11%|█▏        | 8/70 [00:22<02:53,  2.80s/it][A[A

 13%|█▎        | 9/70 [00:24<02:43,  2.69s/it][A[A

 14%|█▍        | 10/70 [00:27<02:41,  2.70s/it][A[A

 16%|█▌        | 11/70 [00:30<02:43,  2.76s/it][A[A

Batch loss: 1.357309103012085




 17%|█▋        | 12/70 [00:32<02:33,  2.64s/it][A[A

 19%|█▊        | 13/70 [00:35<02:29,  2.63s/it][A[A

 20%|██        | 14/70 [00:38<02:31,  2.70s/it][A[A

 21%|██▏       | 15/70 [00:40<02:24,  2.62s/it][A[A

 23%|██▎       | 16/70 [00:43<02:23,  2.65s/it][A[A

 24%|██▍       | 17/70 [00:46<02:23,  2.71s/it][A[A

 26%|██▌       | 18/70 [00:48<02:16,  2.62s/it][A[A

 27%|██▋       | 19/70 [00:51<02:14,  2.64s/it][A[A

 29%|██▊       | 20/70 [00:54<02:16,  2.72s/it][A[A

 30%|███       | 21/70 [00:56<02:09,  2.64s/it][A[A

Batch loss: 1.3759872913360596




 31%|███▏      | 22/70 [00:59<02:07,  2.66s/it][A[A

 33%|███▎      | 23/70 [01:02<02:08,  2.73s/it][A[A

 34%|███▍      | 24/70 [01:04<01:59,  2.60s/it][A[A

 36%|███▌      | 25/70 [01:07<01:55,  2.58s/it][A[A

 37%|███▋      | 26/70 [01:10<01:56,  2.65s/it][A[A

 39%|███▊      | 27/70 [01:12<01:49,  2.55s/it][A[A

 40%|████      | 28/70 [01:14<01:47,  2.56s/it][A[A

 41%|████▏     | 29/70 [01:17<01:47,  2.62s/it][A[A

 43%|████▎     | 30/70 [01:20<01:41,  2.54s/it][A[A

 44%|████▍     | 31/70 [01:22<01:39,  2.55s/it][A[A

Batch loss: 1.556496262550354




 46%|████▌     | 32/70 [01:25<01:39,  2.62s/it][A[A

 47%|████▋     | 33/70 [01:27<01:33,  2.53s/it][A[A

 49%|████▊     | 34/70 [01:30<01:31,  2.55s/it][A[A

 50%|█████     | 35/70 [01:33<01:31,  2.62s/it][A[A

 51%|█████▏    | 36/70 [01:35<01:25,  2.53s/it][A[A

 53%|█████▎    | 37/70 [01:38<01:24,  2.55s/it][A[A

 54%|█████▍    | 38/70 [01:40<01:23,  2.61s/it][A[A

 56%|█████▌    | 39/70 [01:43<01:18,  2.53s/it][A[A

 57%|█████▋    | 40/70 [01:45<01:17,  2.59s/it][A[A

 59%|█████▊    | 41/70 [01:48<01:17,  2.69s/it][A[A

Batch loss: 1.3286285400390625




 60%|██████    | 42/70 [01:51<01:13,  2.62s/it][A[A

 61%|██████▏   | 43/70 [01:53<01:11,  2.66s/it][A[A

 63%|██████▎   | 44/70 [01:56<01:10,  2.73s/it][A[A

 64%|██████▍   | 45/70 [01:59<01:05,  2.64s/it][A[A

 66%|██████▌   | 46/70 [02:02<01:03,  2.66s/it][A[A

 67%|██████▋   | 47/70 [02:04<01:03,  2.74s/it][A[A

 69%|██████▊   | 48/70 [02:07<00:58,  2.65s/it][A[A

 70%|███████   | 49/70 [02:10<00:55,  2.65s/it][A[A

 71%|███████▏  | 50/70 [02:12<00:53,  2.68s/it][A[A

 73%|███████▎  | 51/70 [02:15<00:48,  2.58s/it][A[A

Batch loss: 1.7035123109817505




 74%|███████▍  | 52/70 [02:17<00:46,  2.60s/it][A[A

 76%|███████▌  | 53/70 [02:20<00:45,  2.66s/it][A[A

 77%|███████▋  | 54/70 [02:22<00:40,  2.55s/it][A[A

 79%|███████▊  | 55/70 [02:25<00:38,  2.57s/it][A[A

 80%|████████  | 56/70 [02:28<00:36,  2.62s/it][A[A

 81%|████████▏ | 57/70 [02:30<00:32,  2.53s/it][A[A

 83%|████████▎ | 58/70 [02:33<00:30,  2.54s/it][A[A

 84%|████████▍ | 59/70 [02:35<00:28,  2.61s/it][A[A

 86%|████████▌ | 60/70 [02:38<00:25,  2.52s/it][A[A

 87%|████████▋ | 61/70 [02:40<00:22,  2.53s/it][A[A

Batch loss: 1.4108282327651978




 89%|████████▊ | 62/70 [02:43<00:20,  2.61s/it][A[A

 90%|█████████ | 63/70 [02:45<00:17,  2.52s/it][A[A

 91%|█████████▏| 64/70 [02:48<00:15,  2.55s/it][A[A

 93%|█████████▎| 65/70 [02:51<00:13,  2.66s/it][A[A

 94%|█████████▍| 66/70 [02:53<00:10,  2.58s/it][A[A

 96%|█████████▌| 67/70 [02:56<00:07,  2.62s/it][A[A

 97%|█████████▋| 68/70 [02:59<00:05,  2.70s/it][A[A

 99%|█████████▊| 69/70 [03:01<00:02,  2.62s/it][A[A

100%|██████████| 70/70 [03:02<00:00,  2.61s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.448050139631544




 12%|█▎        | 1/8 [00:03<00:23,  3.43s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.25s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.97s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.88s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.82s/it][A[A

 75%|███████▌  | 6/8 [00:16<00:05,  2.64s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.60s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.58s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.2682860791683197
Epoch:  25




  1%|▏         | 1/70 [00:03<03:50,  3.34s/it][A[A

Batch loss: 1.3858240842819214




  3%|▎         | 2/70 [00:06<03:36,  3.18s/it][A[A

  4%|▍         | 3/70 [00:08<03:15,  2.92s/it][A[A

  6%|▌         | 4/70 [00:11<03:06,  2.82s/it][A[A

  7%|▋         | 5/70 [00:13<03:02,  2.81s/it][A[A

  9%|▊         | 6/70 [00:16<02:50,  2.66s/it][A[A

 10%|█         | 7/70 [00:18<02:46,  2.64s/it][A[A

 11%|█▏        | 8/70 [00:21<02:45,  2.67s/it][A[A

 13%|█▎        | 9/70 [00:23<02:36,  2.57s/it][A[A

 14%|█▍        | 10/70 [00:26<02:34,  2.58s/it][A[A

 16%|█▌        | 11/70 [00:29<02:35,  2.64s/it][A[A

Batch loss: 1.5298603773117065




 17%|█▋        | 12/70 [00:31<02:27,  2.55s/it][A[A

 19%|█▊        | 13/70 [00:34<02:28,  2.60s/it][A[A

 20%|██        | 14/70 [00:37<02:35,  2.77s/it][A[A

 21%|██▏       | 15/70 [00:40<02:31,  2.75s/it][A[A

 23%|██▎       | 16/70 [00:42<02:28,  2.75s/it][A[A

 24%|██▍       | 17/70 [00:45<02:27,  2.79s/it][A[A

 26%|██▌       | 18/70 [00:48<02:19,  2.69s/it][A[A

 27%|██▋       | 19/70 [00:50<02:17,  2.70s/it][A[A

 29%|██▊       | 20/70 [00:53<02:18,  2.77s/it][A[A

 30%|███       | 21/70 [00:56<02:10,  2.66s/it][A[A

Batch loss: 1.3369743824005127




 31%|███▏      | 22/70 [00:58<02:07,  2.65s/it][A[A

 33%|███▎      | 23/70 [01:01<02:06,  2.68s/it][A[A

 34%|███▍      | 24/70 [01:03<01:58,  2.58s/it][A[A

 36%|███▌      | 25/70 [01:06<01:55,  2.58s/it][A[A

 37%|███▋      | 26/70 [01:09<01:55,  2.62s/it][A[A

 39%|███▊      | 27/70 [01:11<01:49,  2.54s/it][A[A

 40%|████      | 28/70 [01:14<01:47,  2.56s/it][A[A

 41%|████▏     | 29/70 [01:16<01:47,  2.62s/it][A[A

 43%|████▎     | 30/70 [01:19<01:40,  2.51s/it][A[A

 44%|████▍     | 31/70 [01:21<01:38,  2.53s/it][A[A

Batch loss: 1.429396390914917




 46%|████▌     | 32/70 [01:24<01:38,  2.60s/it][A[A

 47%|████▋     | 33/70 [01:26<01:32,  2.51s/it][A[A

 49%|████▊     | 34/70 [01:29<01:31,  2.53s/it][A[A

 50%|█████     | 35/70 [01:32<01:30,  2.60s/it][A[A

 51%|█████▏    | 36/70 [01:34<01:25,  2.51s/it][A[A

 53%|█████▎    | 37/70 [01:37<01:23,  2.54s/it][A[A

 54%|█████▍    | 38/70 [01:40<01:24,  2.66s/it][A[A

 56%|█████▌    | 39/70 [01:42<01:20,  2.60s/it][A[A

 57%|█████▋    | 40/70 [01:45<01:19,  2.67s/it][A[A

 59%|█████▊    | 41/70 [01:48<01:19,  2.75s/it][A[A

Batch loss: 1.3421059846878052




 60%|██████    | 42/70 [01:50<01:14,  2.67s/it][A[A

 61%|██████▏   | 43/70 [01:53<01:12,  2.68s/it][A[A

 63%|██████▎   | 44/70 [01:56<01:11,  2.74s/it][A[A

 64%|██████▍   | 45/70 [01:58<01:06,  2.66s/it][A[A

 66%|██████▌   | 46/70 [02:01<01:04,  2.68s/it][A[A

 67%|██████▋   | 47/70 [02:04<01:03,  2.74s/it][A[A

 69%|██████▊   | 48/70 [02:06<00:57,  2.63s/it][A[A

 70%|███████   | 49/70 [02:09<00:55,  2.62s/it][A[A

 71%|███████▏  | 50/70 [02:12<00:53,  2.69s/it][A[A

 73%|███████▎  | 51/70 [02:14<00:49,  2.60s/it][A[A

Batch loss: 1.529064655303955




 74%|███████▍  | 52/70 [02:17<00:47,  2.62s/it][A[A

 76%|███████▌  | 53/70 [02:20<00:45,  2.66s/it][A[A

 77%|███████▋  | 54/70 [02:22<00:40,  2.56s/it][A[A

 79%|███████▊  | 55/70 [02:25<00:38,  2.57s/it][A[A

 80%|████████  | 56/70 [02:27<00:36,  2.63s/it][A[A

 81%|████████▏ | 57/70 [02:30<00:32,  2.54s/it][A[A

 83%|████████▎ | 58/70 [02:32<00:30,  2.56s/it][A[A

 84%|████████▍ | 59/70 [02:35<00:28,  2.63s/it][A[A

 86%|████████▌ | 60/70 [02:37<00:25,  2.56s/it][A[A

 87%|████████▋ | 61/70 [02:40<00:23,  2.59s/it][A[A

Batch loss: 1.3390718698501587




 89%|████████▊ | 62/70 [02:43<00:21,  2.64s/it][A[A

 90%|█████████ | 63/70 [02:45<00:18,  2.59s/it][A[A

 91%|█████████▏| 64/70 [02:48<00:15,  2.65s/it][A[A

 93%|█████████▎| 65/70 [02:51<00:13,  2.73s/it][A[A

 94%|█████████▍| 66/70 [02:53<00:10,  2.66s/it][A[A

 96%|█████████▌| 67/70 [02:56<00:08,  2.68s/it][A[A

 97%|█████████▋| 68/70 [02:59<00:05,  2.75s/it][A[A

 99%|█████████▊| 69/70 [03:02<00:02,  2.65s/it][A[A

100%|██████████| 70/70 [03:02<00:00,  2.61s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4387983628681729




 12%|█▎        | 1/8 [00:03<00:23,  3.37s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.19s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.92s/it][A[A

 50%|█████     | 4/8 [00:10<00:11,  2.79s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.77s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.64s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.64s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.58s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.466029793024063
Epoch:  26




  1%|▏         | 1/70 [00:03<04:01,  3.49s/it][A[A

Batch loss: 1.4233254194259644




  3%|▎         | 2/70 [00:06<03:44,  3.31s/it][A[A

  4%|▍         | 3/70 [00:08<03:23,  3.04s/it][A[A

  6%|▌         | 4/70 [00:11<03:14,  2.94s/it][A[A

  7%|▋         | 5/70 [00:14<03:09,  2.92s/it][A[A

  9%|▊         | 6/70 [00:16<02:57,  2.77s/it][A[A

 10%|█         | 7/70 [00:19<02:52,  2.74s/it][A[A

 11%|█▏        | 8/70 [00:22<02:50,  2.75s/it][A[A

 13%|█▎        | 9/70 [00:24<02:40,  2.64s/it][A[A

 14%|█▍        | 10/70 [00:27<02:38,  2.64s/it][A[A

 16%|█▌        | 11/70 [00:30<02:37,  2.68s/it][A[A

Batch loss: 1.3855825662612915




 17%|█▋        | 12/70 [00:32<02:28,  2.57s/it][A[A

 19%|█▊        | 13/70 [00:34<02:27,  2.59s/it][A[A

 20%|██        | 14/70 [00:37<02:27,  2.64s/it][A[A

 21%|██▏       | 15/70 [00:40<02:19,  2.54s/it][A[A

 23%|██▎       | 16/70 [00:42<02:17,  2.55s/it][A[A

 24%|██▍       | 17/70 [00:45<02:18,  2.62s/it][A[A

 26%|██▌       | 18/70 [00:47<02:11,  2.53s/it][A[A

 27%|██▋       | 19/70 [00:50<02:10,  2.55s/it][A[A

 29%|██▊       | 20/70 [00:53<02:10,  2.61s/it][A[A

 30%|███       | 21/70 [00:55<02:03,  2.53s/it][A[A

Batch loss: 1.9051622152328491




 31%|███▏      | 22/70 [00:57<02:01,  2.54s/it][A[A

 33%|███▎      | 23/70 [01:00<02:04,  2.65s/it][A[A

 34%|███▍      | 24/70 [01:03<01:59,  2.59s/it][A[A

 36%|███▌      | 25/70 [01:06<01:58,  2.63s/it][A[A

 37%|███▋      | 26/70 [01:08<01:59,  2.70s/it][A[A

 39%|███▊      | 27/70 [01:11<01:52,  2.62s/it][A[A

 40%|████      | 28/70 [01:14<01:51,  2.65s/it][A[A

 41%|████▏     | 29/70 [01:16<01:51,  2.73s/it][A[A

 43%|████▎     | 30/70 [01:19<01:45,  2.64s/it][A[A

 44%|████▍     | 31/70 [01:22<01:43,  2.66s/it][A[A

Batch loss: 1.527795433998108




 46%|████▌     | 32/70 [01:25<01:43,  2.74s/it][A[A

 47%|████▋     | 33/70 [01:27<01:36,  2.62s/it][A[A

 49%|████▊     | 34/70 [01:29<01:33,  2.60s/it][A[A

 50%|█████     | 35/70 [01:32<01:33,  2.66s/it][A[A

 51%|█████▏    | 36/70 [01:35<01:28,  2.59s/it][A[A

 53%|█████▎    | 37/70 [01:37<01:25,  2.60s/it][A[A

 54%|█████▍    | 38/70 [01:40<01:25,  2.66s/it][A[A

 56%|█████▌    | 39/70 [01:42<01:19,  2.55s/it][A[A

 57%|█████▋    | 40/70 [01:45<01:16,  2.56s/it][A[A

 59%|█████▊    | 41/70 [01:48<01:16,  2.63s/it][A[A

Batch loss: 1.355089545249939




 60%|██████    | 42/70 [01:50<01:11,  2.54s/it][A[A

 61%|██████▏   | 43/70 [01:53<01:09,  2.56s/it][A[A

 63%|██████▎   | 44/70 [01:55<01:07,  2.61s/it][A[A

 64%|██████▍   | 45/70 [01:58<01:03,  2.52s/it][A[A

 66%|██████▌   | 46/70 [02:00<01:01,  2.54s/it][A[A

 67%|██████▋   | 47/70 [02:03<00:59,  2.60s/it][A[A

 69%|██████▊   | 48/70 [02:05<00:56,  2.55s/it][A[A

 70%|███████   | 49/70 [02:08<00:54,  2.61s/it][A[A

 71%|███████▏  | 50/70 [02:11<00:53,  2.68s/it][A[A

 73%|███████▎  | 51/70 [02:14<00:49,  2.60s/it][A[A

Batch loss: 1.3522460460662842




 74%|███████▍  | 52/70 [02:16<00:47,  2.64s/it][A[A

 76%|███████▌  | 53/70 [02:19<00:45,  2.70s/it][A[A

 77%|███████▋  | 54/70 [02:22<00:41,  2.62s/it][A[A

 79%|███████▊  | 55/70 [02:24<00:39,  2.66s/it][A[A

 80%|████████  | 56/70 [02:27<00:38,  2.72s/it][A[A

 81%|████████▏ | 57/70 [02:30<00:34,  2.64s/it][A[A

 83%|████████▎ | 58/70 [02:32<00:31,  2.62s/it][A[A

 84%|████████▍ | 59/70 [02:35<00:29,  2.67s/it][A[A

 86%|████████▌ | 60/70 [02:37<00:25,  2.57s/it][A[A

 87%|████████▋ | 61/70 [02:40<00:23,  2.60s/it][A[A

Batch loss: 1.3753323554992676




 89%|████████▊ | 62/70 [02:43<00:21,  2.64s/it][A[A

 90%|█████████ | 63/70 [02:45<00:17,  2.54s/it][A[A

 91%|█████████▏| 64/70 [02:48<00:15,  2.56s/it][A[A

 93%|█████████▎| 65/70 [02:50<00:13,  2.62s/it][A[A

 94%|█████████▍| 66/70 [02:53<00:10,  2.53s/it][A[A

 96%|█████████▌| 67/70 [02:55<00:07,  2.56s/it][A[A

 97%|█████████▋| 68/70 [02:58<00:05,  2.63s/it][A[A

 99%|█████████▊| 69/70 [03:00<00:02,  2.53s/it][A[A

100%|██████████| 70/70 [03:01<00:00,  2.60s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4578675644738333




 12%|█▎        | 1/8 [00:03<00:22,  3.23s/it][A[A

 25%|██▌       | 2/8 [00:05<00:18,  3.06s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.81s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.74s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.79s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.66s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.64s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.57s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.344821512699127
Epoch:  27




  1%|▏         | 1/70 [00:03<04:02,  3.51s/it][A[A

Batch loss: 1.589340329170227




  3%|▎         | 2/70 [00:06<03:52,  3.42s/it][A[A

  4%|▍         | 3/70 [00:09<03:31,  3.15s/it][A[A

  6%|▌         | 4/70 [00:12<03:20,  3.04s/it][A[A

  7%|▋         | 5/70 [00:14<03:16,  3.02s/it][A[A

  9%|▊         | 6/70 [00:17<03:02,  2.85s/it][A[A

 10%|█         | 7/70 [00:20<02:54,  2.76s/it][A[A

 11%|█▏        | 8/70 [00:22<02:51,  2.77s/it][A[A

 13%|█▎        | 9/70 [00:25<02:40,  2.63s/it][A[A

 14%|█▍        | 10/70 [00:27<02:37,  2.62s/it][A[A

 16%|█▌        | 11/70 [00:30<02:38,  2.68s/it][A[A

Batch loss: 1.4151474237442017




 17%|█▋        | 12/70 [00:32<02:31,  2.61s/it][A[A

 19%|█▊        | 13/70 [00:35<02:28,  2.60s/it][A[A

 20%|██        | 14/70 [00:38<02:28,  2.65s/it][A[A

 21%|██▏       | 15/70 [00:40<02:20,  2.56s/it][A[A

 23%|██▎       | 16/70 [00:43<02:18,  2.57s/it][A[A

 24%|██▍       | 17/70 [00:46<02:19,  2.63s/it][A[A

 26%|██▌       | 18/70 [00:48<02:11,  2.52s/it][A[A

 27%|██▋       | 19/70 [00:50<02:09,  2.54s/it][A[A

 29%|██▊       | 20/70 [00:53<02:10,  2.60s/it][A[A

 30%|███       | 21/70 [00:56<02:04,  2.54s/it][A[A

Batch loss: 1.314375877380371




 31%|███▏      | 22/70 [00:58<02:04,  2.59s/it][A[A

 33%|███▎      | 23/70 [01:01<02:06,  2.69s/it][A[A

 34%|███▍      | 24/70 [01:04<02:00,  2.62s/it][A[A

 36%|███▌      | 25/70 [01:06<01:59,  2.65s/it][A[A

 37%|███▋      | 26/70 [01:09<02:00,  2.75s/it][A[A

 39%|███▊      | 27/70 [01:12<01:55,  2.68s/it][A[A

 40%|████      | 28/70 [01:15<01:53,  2.71s/it][A[A

 41%|████▏     | 29/70 [01:18<01:54,  2.78s/it][A[A

 43%|████▎     | 30/70 [01:20<01:47,  2.68s/it][A[A

 44%|████▍     | 31/70 [01:23<01:44,  2.69s/it][A[A

Batch loss: 1.3993878364562988




 46%|████▌     | 32/70 [01:26<01:44,  2.74s/it][A[A

 47%|████▋     | 33/70 [01:28<01:37,  2.63s/it][A[A

 49%|████▊     | 34/70 [01:31<01:39,  2.76s/it][A[A

 50%|█████     | 35/70 [01:34<01:38,  2.81s/it][A[A

 51%|█████▏    | 36/70 [01:36<01:32,  2.71s/it][A[A

 53%|█████▎    | 37/70 [01:39<01:30,  2.75s/it][A[A

 54%|█████▍    | 38/70 [01:42<01:31,  2.85s/it][A[A

 56%|█████▌    | 39/70 [01:45<01:25,  2.75s/it][A[A

 57%|█████▋    | 40/70 [01:48<01:23,  2.78s/it][A[A

 59%|█████▊    | 41/70 [01:51<01:22,  2.84s/it][A[A

Batch loss: 1.5752310752868652




 60%|██████    | 42/70 [01:53<01:16,  2.74s/it][A[A

 61%|██████▏   | 43/70 [01:56<01:14,  2.75s/it][A[A

 63%|██████▎   | 44/70 [01:59<01:13,  2.81s/it][A[A

 64%|██████▍   | 45/70 [02:01<01:07,  2.71s/it][A[A

 66%|██████▌   | 46/70 [02:04<01:05,  2.72s/it][A[A

 67%|██████▋   | 47/70 [02:07<01:03,  2.75s/it][A[A

 69%|██████▊   | 48/70 [02:09<00:57,  2.63s/it][A[A

 70%|███████   | 49/70 [02:12<00:56,  2.67s/it][A[A

 71%|███████▏  | 50/70 [02:15<00:54,  2.75s/it][A[A

 73%|███████▎  | 51/70 [02:17<00:50,  2.64s/it][A[A

Batch loss: 1.460559368133545




 74%|███████▍  | 52/70 [02:20<00:47,  2.63s/it][A[A

 76%|███████▌  | 53/70 [02:23<00:45,  2.68s/it][A[A

 77%|███████▋  | 54/70 [02:25<00:41,  2.58s/it][A[A

 79%|███████▊  | 55/70 [02:28<00:38,  2.59s/it][A[A

 80%|████████  | 56/70 [02:30<00:36,  2.64s/it][A[A

 81%|████████▏ | 57/70 [02:33<00:33,  2.55s/it][A[A

 83%|████████▎ | 58/70 [02:36<00:31,  2.59s/it][A[A

 84%|████████▍ | 59/70 [02:38<00:29,  2.70s/it][A[A

 86%|████████▌ | 60/70 [02:41<00:26,  2.62s/it][A[A

 87%|████████▋ | 61/70 [02:44<00:23,  2.66s/it][A[A

Batch loss: 1.4200133085250854




 89%|████████▊ | 62/70 [02:47<00:21,  2.74s/it][A[A

 90%|█████████ | 63/70 [02:49<00:18,  2.66s/it][A[A

 91%|█████████▏| 64/70 [02:52<00:16,  2.71s/it][A[A

 93%|█████████▎| 65/70 [02:55<00:14,  2.84s/it][A[A

 94%|█████████▍| 66/70 [02:58<00:11,  2.76s/it][A[A

 96%|█████████▌| 67/70 [03:00<00:08,  2.77s/it][A[A

 97%|█████████▋| 68/70 [03:03<00:05,  2.80s/it][A[A

 99%|█████████▊| 69/70 [03:06<00:02,  2.67s/it][A[A

100%|██████████| 70/70 [03:06<00:00,  2.67s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.456329778262547




 12%|█▎        | 1/8 [00:03<00:23,  3.34s/it][A[A

 25%|██▌       | 2/8 [00:06<00:18,  3.14s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.88s/it][A[A

 50%|█████     | 4/8 [00:10<00:11,  2.76s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.80s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.64s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.60s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.55s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3338322043418884
Epoch:  28




  1%|▏         | 1/70 [00:03<03:55,  3.42s/it][A[A

Batch loss: 1.803203821182251




  3%|▎         | 2/70 [00:06<03:40,  3.24s/it][A[A

  4%|▍         | 3/70 [00:08<03:18,  2.97s/it][A[A

  6%|▌         | 4/70 [00:11<03:08,  2.86s/it][A[A

  7%|▋         | 5/70 [00:14<03:06,  2.87s/it][A[A

  9%|▊         | 6/70 [00:16<02:55,  2.74s/it][A[A

 10%|█         | 7/70 [00:19<02:53,  2.75s/it][A[A

 11%|█▏        | 8/70 [00:22<02:53,  2.80s/it][A[A

 13%|█▎        | 9/70 [00:24<02:44,  2.69s/it][A[A

 14%|█▍        | 10/70 [00:27<02:43,  2.72s/it][A[A

 16%|█▌        | 11/70 [00:30<02:43,  2.78s/it][A[A

Batch loss: 1.3324000835418701




 17%|█▋        | 12/70 [00:32<02:35,  2.68s/it][A[A

 19%|█▊        | 13/70 [00:35<02:34,  2.71s/it][A[A

 20%|██        | 14/70 [00:38<02:34,  2.75s/it][A[A

 21%|██▏       | 15/70 [00:40<02:24,  2.63s/it][A[A

 23%|██▎       | 16/70 [00:43<02:21,  2.62s/it][A[A

 24%|██▍       | 17/70 [00:46<02:22,  2.68s/it][A[A

 26%|██▌       | 18/70 [00:48<02:14,  2.59s/it][A[A

 27%|██▋       | 19/70 [00:51<02:14,  2.63s/it][A[A

 29%|██▊       | 20/70 [00:54<02:14,  2.68s/it][A[A

 30%|███       | 21/70 [00:56<02:06,  2.58s/it][A[A

Batch loss: 1.5959959030151367




 31%|███▏      | 22/70 [00:59<02:04,  2.59s/it][A[A

 33%|███▎      | 23/70 [01:01<02:04,  2.65s/it][A[A

 34%|███▍      | 24/70 [01:04<01:57,  2.56s/it][A[A

 36%|███▌      | 25/70 [01:06<01:55,  2.57s/it][A[A

 37%|███▋      | 26/70 [01:09<01:55,  2.62s/it][A[A

 39%|███▊      | 27/70 [01:11<01:48,  2.53s/it][A[A

 40%|████      | 28/70 [01:14<01:47,  2.56s/it][A[A

 41%|████▏     | 29/70 [01:17<01:47,  2.62s/it][A[A

 43%|████▎     | 30/70 [01:19<01:41,  2.55s/it][A[A

 44%|████▍     | 31/70 [01:22<01:41,  2.60s/it][A[A

Batch loss: 1.5705904960632324




 46%|████▌     | 32/70 [01:25<01:42,  2.70s/it][A[A

 47%|████▋     | 33/70 [01:27<01:37,  2.63s/it][A[A

 49%|████▊     | 34/70 [01:30<01:35,  2.66s/it][A[A

 50%|█████     | 35/70 [01:33<01:35,  2.73s/it][A[A

 51%|█████▏    | 36/70 [01:35<01:30,  2.65s/it][A[A

 53%|█████▎    | 37/70 [01:38<01:28,  2.67s/it][A[A

 54%|█████▍    | 38/70 [01:41<01:27,  2.75s/it][A[A

 56%|█████▌    | 39/70 [01:44<01:24,  2.72s/it][A[A

 57%|█████▋    | 40/70 [01:46<01:21,  2.73s/it][A[A

 59%|█████▊    | 41/70 [01:49<01:19,  2.75s/it][A[A

Batch loss: 1.3347632884979248




 60%|██████    | 42/70 [01:52<01:14,  2.64s/it][A[A

 61%|██████▏   | 43/70 [01:54<01:11,  2.65s/it][A[A

 63%|██████▎   | 44/70 [01:57<01:10,  2.70s/it][A[A

 64%|██████▍   | 45/70 [01:59<01:04,  2.59s/it][A[A

 66%|██████▌   | 46/70 [02:02<01:02,  2.60s/it][A[A

 67%|██████▋   | 47/70 [02:05<01:01,  2.66s/it][A[A

 69%|██████▊   | 48/70 [02:07<00:56,  2.57s/it][A[A

 70%|███████   | 49/70 [02:10<00:54,  2.58s/it][A[A

 71%|███████▏  | 50/70 [02:13<00:52,  2.64s/it][A[A

 73%|███████▎  | 51/70 [02:15<00:48,  2.54s/it][A[A

Batch loss: 1.3477193117141724




 74%|███████▍  | 52/70 [02:17<00:46,  2.56s/it][A[A

 76%|███████▌  | 53/70 [02:20<00:44,  2.62s/it][A[A

 77%|███████▋  | 54/70 [02:23<00:40,  2.54s/it][A[A

 79%|███████▊  | 55/70 [02:25<00:38,  2.58s/it][A[A

 80%|████████  | 56/70 [02:28<00:37,  2.69s/it][A[A

 81%|████████▏ | 57/70 [02:31<00:34,  2.62s/it][A[A

 83%|████████▎ | 58/70 [02:33<00:31,  2.65s/it][A[A

 84%|████████▍ | 59/70 [02:36<00:29,  2.72s/it][A[A

 86%|████████▌ | 60/70 [02:39<00:26,  2.64s/it][A[A

 87%|████████▋ | 61/70 [02:41<00:23,  2.65s/it][A[A

Batch loss: 1.304412841796875




 89%|████████▊ | 62/70 [02:44<00:21,  2.73s/it][A[A

 90%|█████████ | 63/70 [02:47<00:18,  2.64s/it][A[A

 91%|█████████▏| 64/70 [02:49<00:16,  2.67s/it][A[A

 93%|█████████▎| 65/70 [02:52<00:13,  2.70s/it][A[A

 94%|█████████▍| 66/70 [02:55<00:10,  2.59s/it][A[A

 96%|█████████▌| 67/70 [02:57<00:07,  2.59s/it][A[A

 97%|█████████▋| 68/70 [03:00<00:05,  2.68s/it][A[A

 99%|█████████▊| 69/70 [03:02<00:02,  2.61s/it][A[A

100%|██████████| 70/70 [03:03<00:00,  2.63s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4477169684001379




 12%|█▎        | 1/8 [00:03<00:23,  3.39s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.22s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.97s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.86s/it][A[A

 62%|██████▎   | 5/8 [00:14<00:08,  2.85s/it][A[A

 75%|███████▌  | 6/8 [00:16<00:05,  2.71s/it][A[A

 88%|████████▊ | 7/8 [00:19<00:02,  2.69s/it][A[A

100%|██████████| 8/8 [00:21<00:00,  2.63s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.309884935617447
Epoch:  29




  1%|▏         | 1/70 [00:03<03:51,  3.35s/it][A[A

Batch loss: 1.325279712677002




  3%|▎         | 2/70 [00:06<03:35,  3.17s/it][A[A

  4%|▍         | 3/70 [00:08<03:16,  2.93s/it][A[A

  6%|▌         | 4/70 [00:11<03:07,  2.84s/it][A[A

  7%|▋         | 5/70 [00:13<03:03,  2.82s/it][A[A

  9%|▊         | 6/70 [00:16<02:51,  2.68s/it][A[A

 10%|█         | 7/70 [00:18<02:47,  2.65s/it][A[A

 11%|█▏        | 8/70 [00:21<02:46,  2.69s/it][A[A

 13%|█▎        | 9/70 [00:23<02:38,  2.60s/it][A[A

 14%|█▍        | 10/70 [00:26<02:35,  2.60s/it][A[A

 16%|█▌        | 11/70 [00:29<02:35,  2.64s/it][A[A

Batch loss: 1.3493874073028564




 17%|█▋        | 12/70 [00:31<02:28,  2.56s/it][A[A

 19%|█▊        | 13/70 [00:34<02:26,  2.57s/it][A[A

 20%|██        | 14/70 [00:37<02:27,  2.63s/it][A[A

 21%|██▏       | 15/70 [00:39<02:20,  2.55s/it][A[A

 23%|██▎       | 16/70 [00:42<02:20,  2.61s/it][A[A

 24%|██▍       | 17/70 [00:45<02:22,  2.69s/it][A[A

 26%|██▌       | 18/70 [00:47<02:16,  2.62s/it][A[A

 27%|██▋       | 19/70 [00:50<02:15,  2.65s/it][A[A

 29%|██▊       | 20/70 [00:53<02:16,  2.73s/it][A[A

 30%|███       | 21/70 [00:55<02:09,  2.65s/it][A[A

Batch loss: 1.355228066444397




 31%|███▏      | 22/70 [00:58<02:08,  2.67s/it][A[A

 33%|███▎      | 23/70 [01:01<02:08,  2.74s/it][A[A

 34%|███▍      | 24/70 [01:03<02:02,  2.66s/it][A[A

 36%|███▌      | 25/70 [01:06<01:59,  2.66s/it][A[A

 37%|███▋      | 26/70 [01:09<01:58,  2.69s/it][A[A

 39%|███▊      | 27/70 [01:11<01:51,  2.58s/it][A[A

 40%|████      | 28/70 [01:14<01:49,  2.60s/it][A[A

 41%|████▏     | 29/70 [01:16<01:48,  2.64s/it][A[A

 43%|████▎     | 30/70 [01:19<01:42,  2.56s/it][A[A

 44%|████▍     | 31/70 [01:21<01:40,  2.57s/it][A[A

Batch loss: 1.6454083919525146




 46%|████▌     | 32/70 [01:24<01:40,  2.64s/it][A[A

 47%|████▋     | 33/70 [01:26<01:34,  2.56s/it][A[A

 49%|████▊     | 34/70 [01:29<01:32,  2.56s/it][A[A

 50%|█████     | 35/70 [01:32<01:31,  2.62s/it][A[A

 51%|█████▏    | 36/70 [01:34<01:26,  2.54s/it][A[A

 53%|█████▎    | 37/70 [01:37<01:24,  2.56s/it][A[A

 54%|█████▍    | 38/70 [01:40<01:23,  2.62s/it][A[A

 56%|█████▌    | 39/70 [01:42<01:18,  2.53s/it][A[A

 57%|█████▋    | 40/70 [01:44<01:16,  2.55s/it][A[A

 59%|█████▊    | 41/70 [01:47<01:16,  2.65s/it][A[A

Batch loss: 1.6638951301574707




 60%|██████    | 42/70 [01:50<01:12,  2.59s/it][A[A

 61%|██████▏   | 43/70 [01:52<01:11,  2.63s/it][A[A

 63%|██████▎   | 44/70 [01:55<01:10,  2.71s/it][A[A

 64%|██████▍   | 45/70 [01:58<01:05,  2.63s/it][A[A

 66%|██████▌   | 46/70 [02:01<01:03,  2.65s/it][A[A

 67%|██████▋   | 47/70 [02:03<01:02,  2.73s/it][A[A

 69%|██████▊   | 48/70 [02:06<00:58,  2.64s/it][A[A

 70%|███████   | 49/70 [02:09<00:56,  2.67s/it][A[A

 71%|███████▏  | 50/70 [02:11<00:54,  2.72s/it][A[A

 73%|███████▎  | 51/70 [02:14<00:49,  2.60s/it][A[A

Batch loss: 1.3270118236541748




 74%|███████▍  | 52/70 [02:16<00:46,  2.60s/it][A[A

 76%|███████▌  | 53/70 [02:19<00:45,  2.65s/it][A[A

 77%|███████▋  | 54/70 [02:22<00:41,  2.56s/it][A[A

 79%|███████▊  | 55/70 [02:24<00:38,  2.58s/it][A[A

 80%|████████  | 56/70 [02:27<00:36,  2.64s/it][A[A

 81%|████████▏ | 57/70 [02:29<00:32,  2.54s/it][A[A

 83%|████████▎ | 58/70 [02:32<00:30,  2.55s/it][A[A

 84%|████████▍ | 59/70 [02:35<00:28,  2.63s/it][A[A

 86%|████████▌ | 60/70 [02:37<00:25,  2.55s/it][A[A

 87%|████████▋ | 61/70 [02:40<00:23,  2.56s/it][A[A

Batch loss: 1.8354268074035645




 89%|████████▊ | 62/70 [02:42<00:20,  2.62s/it][A[A

 90%|█████████ | 63/70 [02:45<00:17,  2.54s/it][A[A

 91%|█████████▏| 64/70 [02:47<00:15,  2.56s/it][A[A

 93%|█████████▎| 65/70 [02:50<00:13,  2.62s/it][A[A

 94%|█████████▍| 66/70 [02:52<00:10,  2.56s/it][A[A

 96%|█████████▌| 67/70 [02:55<00:07,  2.62s/it][A[A

 97%|█████████▋| 68/70 [02:58<00:05,  2.70s/it][A[A

 99%|█████████▊| 69/70 [03:01<00:02,  2.62s/it][A[A

100%|██████████| 70/70 [03:01<00:00,  2.60s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4509835822241648




 12%|█▎        | 1/8 [00:03<00:23,  3.40s/it][A[A

 25%|██▌       | 2/8 [00:06<00:19,  3.21s/it][A[A

 38%|███▊      | 3/8 [00:08<00:14,  2.95s/it][A[A

 50%|█████     | 4/8 [00:11<00:11,  2.86s/it][A[A

 62%|██████▎   | 5/8 [00:13<00:08,  2.85s/it][A[A

 75%|███████▌  | 6/8 [00:16<00:05,  2.68s/it][A[A

 88%|████████▊ | 7/8 [00:18<00:02,  2.63s/it][A[A

100%|██████████| 8/8 [00:20<00:00,  2.59s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.427304297685623
Epoch:  30




  1%|▏         | 1/70 [00:03<03:56,  3.42s/it][A[A

Batch loss: 1.3811891078948975




  3%|▎         | 2/70 [00:06<03:38,  3.22s/it][A[A

  4%|▍         | 3/70 [00:08<03:18,  2.96s/it][A[A

  6%|▌         | 4/70 [00:11<03:08,  2.86s/it][A[A

  7%|▋         | 5/70 [00:14<03:06,  2.86s/it][A[A

  9%|▊         | 6/70 [00:16<02:54,  2.73s/it][A[A

 10%|█         | 7/70 [00:19<02:49,  2.70s/it][A[A

 11%|█▏        | 8/70 [00:21<02:49,  2.73s/it][A[A

 13%|█▎        | 9/70 [00:24<02:39,  2.62s/it][A[A

 14%|█▍        | 10/70 [00:26<02:36,  2.61s/it][A[A

 16%|█▌        | 11/70 [00:29<02:37,  2.67s/it][A[A

Batch loss: 1.3060740232467651




 17%|█▋        | 12/70 [00:31<02:28,  2.57s/it][A[A

 19%|█▊        | 13/70 [00:34<02:26,  2.57s/it][A[A

 20%|██        | 14/70 [00:37<02:29,  2.67s/it][A[A

 21%|██▏       | 15/70 [00:39<02:22,  2.60s/it][A[A

 23%|██▎       | 16/70 [00:42<02:22,  2.63s/it][A[A

 24%|██▍       | 17/70 [00:45<02:23,  2.72s/it][A[A

 26%|██▌       | 18/70 [00:47<02:17,  2.64s/it][A[A

 27%|██▋       | 19/70 [00:50<02:17,  2.69s/it][A[A

 29%|██▊       | 20/70 [00:53<02:17,  2.75s/it][A[A

 30%|███       | 21/70 [00:56<02:10,  2.66s/it][A[A

Batch loss: 1.4595056772232056




 31%|███▏      | 22/70 [00:58<02:08,  2.68s/it][A[A

 33%|███▎      | 23/70 [01:01<02:08,  2.73s/it][A[A

 34%|███▍      | 24/70 [01:03<02:00,  2.61s/it][A[A

 36%|███▌      | 25/70 [01:06<01:57,  2.60s/it][A[A

 37%|███▋      | 26/70 [01:09<01:57,  2.68s/it][A[A

 39%|███▊      | 27/70 [01:11<01:50,  2.58s/it][A[A

 40%|████      | 28/70 [01:14<01:48,  2.58s/it][A[A

 41%|████▏     | 29/70 [01:17<01:48,  2.64s/it][A[A

 43%|████▎     | 30/70 [01:19<01:42,  2.55s/it][A[A

 44%|████▍     | 31/70 [01:22<01:40,  2.57s/it][A[A

Batch loss: 1.526351809501648




 46%|████▌     | 32/70 [01:24<01:40,  2.64s/it][A[A

 47%|████▋     | 33/70 [01:27<01:33,  2.54s/it][A[A

 49%|████▊     | 34/70 [01:29<01:32,  2.56s/it][A[A

 50%|█████     | 35/70 [01:32<01:31,  2.61s/it][A[A

 51%|█████▏    | 36/70 [01:34<01:26,  2.54s/it][A[A

 53%|█████▎    | 37/70 [01:37<01:24,  2.56s/it][A[A

 54%|█████▍    | 38/70 [01:40<01:24,  2.63s/it][A[A

 56%|█████▌    | 39/70 [01:42<01:19,  2.57s/it][A[A

 57%|█████▋    | 40/70 [01:45<01:18,  2.61s/it][A[A

 59%|█████▊    | 41/70 [01:48<01:17,  2.69s/it][A[A

Batch loss: 1.3713431358337402




 60%|██████    | 42/70 [01:50<01:13,  2.62s/it][A[A

 61%|██████▏   | 43/70 [01:53<01:11,  2.64s/it][A[A

 63%|██████▎   | 44/70 [01:56<01:10,  2.71s/it][A[A

 64%|██████▍   | 45/70 [01:58<01:05,  2.63s/it][A[A

 66%|██████▌   | 46/70 [02:01<01:03,  2.66s/it][A[A

 67%|██████▋   | 47/70 [02:04<01:02,  2.72s/it][A[A

 69%|██████▊   | 48/70 [02:06<00:57,  2.63s/it][A[A

 70%|███████   | 49/70 [02:09<00:55,  2.62s/it][A[A

 71%|███████▏  | 50/70 [02:12<00:53,  2.66s/it][A[A

 73%|███████▎  | 51/70 [02:14<00:48,  2.58s/it][A[A

Batch loss: 1.3254384994506836




 74%|███████▍  | 52/70 [02:17<00:47,  2.62s/it][A[A

 76%|███████▌  | 53/70 [02:20<00:45,  2.70s/it][A[A

 77%|███████▋  | 54/70 [02:22<00:42,  2.63s/it][A[A

 79%|███████▊  | 55/70 [02:25<00:39,  2.65s/it][A[A

 80%|████████  | 56/70 [02:28<00:38,  2.72s/it][A[A

 81%|████████▏ | 57/70 [02:30<00:34,  2.64s/it][A[A

 83%|████████▎ | 58/70 [02:33<00:31,  2.67s/it][A[A

 84%|████████▍ | 59/70 [02:36<00:30,  2.74s/it][A[A

 86%|████████▌ | 60/70 [02:38<00:26,  2.65s/it][A[A

 87%|████████▋ | 61/70 [02:41<00:23,  2.64s/it][A[A

Batch loss: 1.407860279083252




 89%|████████▊ | 62/70 [02:44<00:21,  2.67s/it][A[A

 90%|█████████ | 63/70 [02:46<00:18,  2.57s/it][A[A

 91%|█████████▏| 64/70 [02:49<00:15,  2.60s/it][A[A

 93%|█████████▎| 65/70 [02:51<00:13,  2.64s/it][A[A

 94%|█████████▍| 66/70 [02:54<00:10,  2.55s/it][A[A

 96%|█████████▌| 67/70 [02:56<00:07,  2.58s/it][A[A

 97%|█████████▋| 68/70 [02:59<00:05,  2.63s/it][A[A

 99%|█████████▊| 69/70 [03:01<00:02,  2.53s/it][A[A

100%|██████████| 70/70 [03:02<00:00,  2.61s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.414207853589739




 12%|█▎        | 1/8 [00:02<00:18,  2.67s/it][A[A

 25%|██▌       | 2/8 [00:05<00:15,  2.58s/it][A[A

 38%|███▊      | 3/8 [00:07<00:12,  2.48s/it][A[A

 50%|█████     | 4/8 [00:09<00:10,  2.53s/it][A[A

 62%|██████▎   | 5/8 [00:12<00:07,  2.45s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:05,  2.51s/it][A[A

 88%|████████▊ | 7/8 [00:17<00:02,  2.43s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.39s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3109667003154755
Epoch:  31




  1%|▏         | 1/70 [00:02<03:18,  2.88s/it][A[A

Batch loss: 1.4549534320831299




  3%|▎         | 2/70 [00:05<03:10,  2.80s/it][A[A

  4%|▍         | 3/70 [00:07<03:00,  2.69s/it][A[A

  6%|▌         | 4/70 [00:10<03:00,  2.74s/it][A[A

  7%|▋         | 5/70 [00:13<02:52,  2.65s/it][A[A

  9%|▊         | 6/70 [00:16<02:53,  2.71s/it][A[A

 10%|█         | 7/70 [00:18<02:46,  2.64s/it][A[A

 11%|█▏        | 8/70 [00:21<02:47,  2.70s/it][A[A

 13%|█▎        | 9/70 [00:23<02:40,  2.63s/it][A[A

 14%|█▍        | 10/70 [00:26<02:39,  2.65s/it][A[A

 16%|█▌        | 11/70 [00:28<02:30,  2.56s/it][A[A

Batch loss: 1.3195959329605103




 17%|█▋        | 12/70 [00:31<02:31,  2.62s/it][A[A

 19%|█▊        | 13/70 [00:33<02:24,  2.53s/it][A[A

 20%|██        | 14/70 [00:36<02:24,  2.59s/it][A[A

 21%|██▏       | 15/70 [00:39<02:18,  2.51s/it][A[A

 23%|██▎       | 16/70 [00:41<02:19,  2.58s/it][A[A

 24%|██▍       | 17/70 [00:44<02:12,  2.50s/it][A[A

 26%|██▌       | 18/70 [00:46<02:13,  2.57s/it][A[A

 27%|██▋       | 19/70 [00:49<02:07,  2.50s/it][A[A

 29%|██▊       | 20/70 [00:51<02:08,  2.57s/it][A[A

 30%|███       | 21/70 [00:54<02:02,  2.50s/it][A[A

Batch loss: 1.3488541841506958




 31%|███▏      | 22/70 [00:56<02:03,  2.58s/it][A[A

 33%|███▎      | 23/70 [00:59<01:57,  2.50s/it][A[A

 34%|███▍      | 24/70 [01:01<01:57,  2.55s/it][A[A

 36%|███▌      | 25/70 [01:04<01:51,  2.49s/it][A[A

 37%|███▋      | 26/70 [01:07<01:54,  2.60s/it][A[A

 39%|███▊      | 27/70 [01:09<01:49,  2.55s/it][A[A

 40%|████      | 28/70 [01:12<01:50,  2.63s/it][A[A

 41%|████▏     | 29/70 [01:14<01:45,  2.58s/it][A[A

 43%|████▎     | 30/70 [01:17<01:46,  2.67s/it][A[A

 44%|████▍     | 31/70 [01:20<01:41,  2.60s/it][A[A

Batch loss: 1.4676650762557983




 46%|████▌     | 32/70 [01:23<01:41,  2.67s/it][A[A

 47%|████▋     | 33/70 [01:25<01:36,  2.60s/it][A[A

 49%|████▊     | 34/70 [01:28<01:36,  2.68s/it][A[A

 50%|█████     | 35/70 [01:30<01:31,  2.60s/it][A[A

 51%|█████▏    | 36/70 [01:33<01:29,  2.64s/it][A[A

 53%|█████▎    | 37/70 [01:35<01:23,  2.54s/it][A[A

 54%|█████▍    | 38/70 [01:38<01:23,  2.62s/it][A[A

 56%|█████▌    | 39/70 [01:40<01:18,  2.53s/it][A[A

 57%|█████▋    | 40/70 [01:43<01:17,  2.59s/it][A[A

 59%|█████▊    | 41/70 [01:45<01:12,  2.51s/it][A[A

Batch loss: 1.3772649765014648




 60%|██████    | 42/70 [01:48<01:12,  2.59s/it][A[A

 61%|██████▏   | 43/70 [01:51<01:07,  2.51s/it][A[A

 63%|██████▎   | 44/70 [01:53<01:07,  2.58s/it][A[A

 64%|██████▍   | 45/70 [01:56<01:02,  2.52s/it][A[A

 66%|██████▌   | 46/70 [01:58<01:02,  2.59s/it][A[A

 67%|██████▋   | 47/70 [02:01<00:58,  2.52s/it][A[A

 69%|██████▊   | 48/70 [02:04<00:56,  2.59s/it][A[A

 70%|███████   | 49/70 [02:06<00:52,  2.52s/it][A[A

 71%|███████▏  | 50/70 [02:09<00:52,  2.61s/it][A[A

 73%|███████▎  | 51/70 [02:11<00:48,  2.55s/it][A[A

Batch loss: 1.2983472347259521




 74%|███████▍  | 52/70 [02:14<00:47,  2.64s/it][A[A

 76%|███████▌  | 53/70 [02:16<00:43,  2.58s/it][A[A

 77%|███████▋  | 54/70 [02:19<00:42,  2.67s/it][A[A

 79%|███████▊  | 55/70 [02:22<00:39,  2.60s/it][A[A

 80%|████████  | 56/70 [02:25<00:37,  2.69s/it][A[A

 81%|████████▏ | 57/70 [02:27<00:34,  2.62s/it][A[A

 83%|████████▎ | 58/70 [02:30<00:32,  2.69s/it][A[A

 84%|████████▍ | 59/70 [02:32<00:28,  2.61s/it][A[A

 86%|████████▌ | 60/70 [02:35<00:26,  2.69s/it][A[A

 87%|████████▋ | 61/70 [02:38<00:23,  2.58s/it][A[A

Batch loss: 1.3682447671890259




 89%|████████▊ | 62/70 [02:40<00:20,  2.62s/it][A[A

 90%|█████████ | 63/70 [02:43<00:17,  2.53s/it][A[A

 91%|█████████▏| 64/70 [02:46<00:16,  2.71s/it][A[A

 93%|█████████▎| 65/70 [02:48<00:13,  2.62s/it][A[A

 94%|█████████▍| 66/70 [02:51<00:10,  2.65s/it][A[A

 96%|█████████▌| 67/70 [02:53<00:07,  2.56s/it][A[A

 97%|█████████▋| 68/70 [02:56<00:05,  2.59s/it][A[A

 99%|█████████▊| 69/70 [02:58<00:02,  2.52s/it][A[A

100%|██████████| 70/70 [02:59<00:00,  2.57s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.435546224457877




 12%|█▎        | 1/8 [00:02<00:18,  2.65s/it][A[A

 25%|██▌       | 2/8 [00:05<00:16,  2.76s/it][A[A

 38%|███▊      | 3/8 [00:07<00:13,  2.61s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.62s/it][A[A

 62%|██████▎   | 5/8 [00:12<00:07,  2.51s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.54s/it][A[A

 88%|████████▊ | 7/8 [00:17<00:02,  2.47s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.47s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.409722536802292
Epoch:  32




  1%|▏         | 1/70 [00:02<03:16,  2.85s/it][A[A

Batch loss: 1.3134775161743164




  3%|▎         | 2/70 [00:05<03:08,  2.77s/it][A[A

  4%|▍         | 3/70 [00:07<02:59,  2.67s/it][A[A

  6%|▌         | 4/70 [00:10<03:00,  2.74s/it][A[A

  7%|▋         | 5/70 [00:13<02:51,  2.64s/it][A[A

  9%|▊         | 6/70 [00:16<02:53,  2.71s/it][A[A

 10%|█         | 7/70 [00:18<02:45,  2.63s/it][A[A

 11%|█▏        | 8/70 [00:21<02:47,  2.70s/it][A[A

 13%|█▎        | 9/70 [00:23<02:39,  2.61s/it][A[A

 14%|█▍        | 10/70 [00:26<02:38,  2.64s/it][A[A

 16%|█▌        | 11/70 [00:28<02:30,  2.54s/it][A[A

Batch loss: 1.360295295715332




 17%|█▋        | 12/70 [00:31<02:32,  2.63s/it][A[A

 19%|█▊        | 13/70 [00:34<02:26,  2.57s/it][A[A

 20%|██        | 14/70 [00:36<02:28,  2.66s/it][A[A

 21%|██▏       | 15/70 [00:39<02:22,  2.59s/it][A[A

 23%|██▎       | 16/70 [00:42<02:24,  2.67s/it][A[A

 24%|██▍       | 17/70 [00:44<02:17,  2.60s/it][A[A

 26%|██▌       | 18/70 [00:47<02:19,  2.68s/it][A[A

 27%|██▋       | 19/70 [00:49<02:13,  2.61s/it][A[A

 29%|██▊       | 20/70 [00:52<02:14,  2.68s/it][A[A

 30%|███       | 21/70 [00:55<02:07,  2.61s/it][A[A

Batch loss: 1.3782777786254883




 31%|███▏      | 22/70 [00:57<02:07,  2.65s/it][A[A

 33%|███▎      | 23/70 [01:00<01:59,  2.55s/it][A[A

 34%|███▍      | 24/70 [01:03<01:59,  2.59s/it][A[A

 36%|███▌      | 25/70 [01:05<01:54,  2.55s/it][A[A

 37%|███▋      | 26/70 [01:08<01:54,  2.60s/it][A[A

 39%|███▊      | 27/70 [01:10<01:48,  2.52s/it][A[A

 40%|████      | 28/70 [01:13<01:47,  2.56s/it][A[A

 41%|████▏     | 29/70 [01:15<01:43,  2.52s/it][A[A

 43%|████▎     | 30/70 [01:18<01:43,  2.58s/it][A[A

 44%|████▍     | 31/70 [01:20<01:38,  2.51s/it][A[A

Batch loss: 1.4464960098266602




 46%|████▌     | 32/70 [01:23<01:38,  2.59s/it][A[A

 47%|████▋     | 33/70 [01:25<01:32,  2.51s/it][A[A

 49%|████▊     | 34/70 [01:28<01:32,  2.57s/it][A[A

 50%|█████     | 35/70 [01:30<01:27,  2.50s/it][A[A

 51%|█████▏    | 36/70 [01:33<01:27,  2.56s/it][A[A

 53%|█████▎    | 37/70 [01:35<01:22,  2.50s/it][A[A

 54%|█████▍    | 38/70 [01:38<01:23,  2.60s/it][A[A

 56%|█████▌    | 39/70 [01:41<01:19,  2.55s/it][A[A

 57%|█████▋    | 40/70 [01:43<01:19,  2.64s/it][A[A

 59%|█████▊    | 41/70 [01:46<01:14,  2.57s/it][A[A

Batch loss: 2.422060489654541




 60%|██████    | 42/70 [01:49<01:14,  2.66s/it][A[A

 61%|██████▏   | 43/70 [01:51<01:10,  2.60s/it][A[A

 63%|██████▎   | 44/70 [01:54<01:09,  2.68s/it][A[A

 64%|██████▍   | 45/70 [01:57<01:05,  2.61s/it][A[A

 66%|██████▌   | 46/70 [01:59<01:04,  2.68s/it][A[A

 67%|██████▋   | 47/70 [02:02<01:00,  2.61s/it][A[A

 69%|██████▊   | 48/70 [02:05<00:58,  2.64s/it][A[A

 70%|███████   | 49/70 [02:07<00:53,  2.55s/it][A[A

 71%|███████▏  | 50/70 [02:10<00:52,  2.60s/it][A[A

 73%|███████▎  | 51/70 [02:12<00:47,  2.52s/it][A[A

Batch loss: 1.3674654960632324




 74%|███████▍  | 52/70 [02:15<00:46,  2.58s/it][A[A

 76%|███████▌  | 53/70 [02:17<00:42,  2.51s/it][A[A

 77%|███████▋  | 54/70 [02:20<00:41,  2.57s/it][A[A

 79%|███████▊  | 55/70 [02:22<00:37,  2.51s/it][A[A

 80%|████████  | 56/70 [02:25<00:36,  2.59s/it][A[A

 81%|████████▏ | 57/70 [02:27<00:32,  2.53s/it][A[A

 83%|████████▎ | 58/70 [02:30<00:31,  2.59s/it][A[A

 84%|████████▍ | 59/70 [02:32<00:27,  2.52s/it][A[A

 86%|████████▌ | 60/70 [02:35<00:25,  2.59s/it][A[A

 87%|████████▋ | 61/70 [02:37<00:22,  2.53s/it][A[A

Batch loss: 1.3448922634124756




 89%|████████▊ | 62/70 [02:40<00:20,  2.58s/it][A[A

 90%|█████████ | 63/70 [02:43<00:17,  2.52s/it][A[A

 91%|█████████▏| 64/70 [02:45<00:15,  2.63s/it][A[A

 93%|█████████▎| 65/70 [02:48<00:12,  2.58s/it][A[A

 94%|█████████▍| 66/70 [02:51<00:10,  2.66s/it][A[A

 96%|█████████▌| 67/70 [02:53<00:07,  2.61s/it][A[A

 97%|█████████▋| 68/70 [02:56<00:05,  2.68s/it][A[A

 99%|█████████▊| 69/70 [02:59<00:02,  2.61s/it][A[A

100%|██████████| 70/70 [02:59<00:00,  2.57s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4463408827781676




 12%|█▎        | 1/8 [00:02<00:19,  2.77s/it][A[A

 25%|██▌       | 2/8 [00:05<00:16,  2.68s/it][A[A

 38%|███▊      | 3/8 [00:07<00:12,  2.59s/it][A[A

 50%|█████     | 4/8 [00:10<00:10,  2.60s/it][A[A

 62%|██████▎   | 5/8 [00:12<00:07,  2.49s/it][A[A

 75%|███████▌  | 6/8 [00:15<00:05,  2.54s/it][A[A

 88%|████████▊ | 7/8 [00:17<00:02,  2.46s/it][A[A

100%|██████████| 8/8 [00:19<00:00,  2.41s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3906972408294678
Epoch:  33




  1%|▏         | 1/70 [00:02<03:09,  2.74s/it][A[A

Batch loss: 1.376636028289795




  3%|▎         | 2/70 [00:05<03:00,  2.66s/it][A[A

  4%|▍         | 3/70 [00:07<02:50,  2.55s/it][A[A

  6%|▌         | 4/70 [00:10<02:51,  2.60s/it][A[A

  7%|▋         | 5/70 [00:12<02:43,  2.51s/it][A[A

  9%|▊         | 6/70 [00:15<02:46,  2.60s/it][A[A

 10%|█         | 7/70 [00:17<02:39,  2.53s/it][A[A

 11%|█▏        | 8/70 [00:20<02:40,  2.59s/it][A[A

 13%|█▎        | 9/70 [00:22<02:33,  2.51s/it][A[A

 14%|█▍        | 10/70 [00:25<02:34,  2.58s/it][A[A

 16%|█▌        | 11/70 [00:27<02:27,  2.50s/it][A[A

Batch loss: 1.3190782070159912




 17%|█▋        | 12/70 [00:30<02:30,  2.60s/it][A[A

 19%|█▊        | 13/70 [00:33<02:26,  2.57s/it][A[A

 20%|██        | 14/70 [00:36<02:29,  2.68s/it][A[A

 21%|██▏       | 15/70 [00:38<02:24,  2.62s/it][A[A

 23%|██▎       | 16/70 [00:41<02:26,  2.71s/it][A[A

 24%|██▍       | 17/70 [00:43<02:19,  2.64s/it][A[A

 26%|██▌       | 18/70 [00:46<02:20,  2.70s/it][A[A

 27%|██▋       | 19/70 [00:49<02:13,  2.63s/it][A[A

 29%|██▊       | 20/70 [00:52<02:14,  2.69s/it][A[A

 30%|███       | 21/70 [00:54<02:08,  2.61s/it][A[A

Batch loss: 1.3543095588684082




 31%|███▏      | 22/70 [00:57<02:07,  2.65s/it][A[A

 33%|███▎      | 23/70 [00:59<02:00,  2.56s/it][A[A

 34%|███▍      | 24/70 [01:02<02:00,  2.62s/it][A[A

 36%|███▌      | 25/70 [01:04<01:54,  2.54s/it][A[A

 37%|███▋      | 26/70 [01:07<01:53,  2.58s/it][A[A

 39%|███▊      | 27/70 [01:09<01:47,  2.51s/it][A[A

 40%|████      | 28/70 [01:12<01:47,  2.57s/it][A[A

 41%|████▏     | 29/70 [01:14<01:42,  2.50s/it][A[A

 43%|████▎     | 30/70 [01:17<01:42,  2.57s/it][A[A

 44%|████▍     | 31/70 [01:19<01:37,  2.50s/it][A[A

Batch loss: 1.3139561414718628




 46%|████▌     | 32/70 [01:22<01:37,  2.56s/it][A[A

 47%|████▋     | 33/70 [01:24<01:32,  2.49s/it][A[A

 49%|████▊     | 34/70 [01:27<01:32,  2.56s/it][A[A

 50%|█████     | 35/70 [01:29<01:27,  2.49s/it][A[A

 51%|█████▏    | 36/70 [01:32<01:26,  2.56s/it][A[A

 53%|█████▎    | 37/70 [01:34<01:22,  2.49s/it][A[A

 54%|█████▍    | 38/70 [01:37<01:23,  2.61s/it][A[A

 56%|█████▌    | 39/70 [01:40<01:19,  2.57s/it][A[A

 57%|█████▋    | 40/70 [01:43<01:19,  2.64s/it][A[A

 59%|█████▊    | 41/70 [01:45<01:14,  2.58s/it][A[A

Batch loss: 1.4441883563995361




 60%|██████    | 42/70 [01:48<01:14,  2.66s/it][A[A

 61%|██████▏   | 43/70 [01:50<01:09,  2.59s/it][A[A

 63%|██████▎   | 44/70 [01:53<01:09,  2.67s/it][A[A

 64%|██████▍   | 45/70 [01:56<01:04,  2.60s/it][A[A

 66%|██████▌   | 46/70 [01:58<01:04,  2.67s/it][A[A

 67%|██████▋   | 47/70 [02:01<00:59,  2.61s/it][A[A

 69%|██████▊   | 48/70 [02:03<00:56,  2.58s/it][A[A

 70%|███████   | 49/70 [02:06<00:51,  2.48s/it][A[A

 71%|███████▏  | 50/70 [02:08<00:49,  2.46s/it][A[A

 73%|███████▎  | 51/70 [02:10<00:45,  2.38s/it][A[A

Batch loss: 1.4301424026489258




 74%|███████▍  | 52/70 [02:13<00:42,  2.38s/it][A[A

 76%|███████▌  | 53/70 [02:15<00:39,  2.33s/it][A[A

 77%|███████▋  | 54/70 [02:17<00:37,  2.35s/it][A[A

 79%|███████▊  | 55/70 [02:20<00:34,  2.31s/it][A[A

 80%|████████  | 56/70 [02:22<00:32,  2.32s/it][A[A

 81%|████████▏ | 57/70 [02:24<00:29,  2.29s/it][A[A

 83%|████████▎ | 58/70 [02:26<00:27,  2.31s/it][A[A

 84%|████████▍ | 59/70 [02:29<00:25,  2.28s/it][A[A

 86%|████████▌ | 60/70 [02:31<00:23,  2.31s/it][A[A

 87%|████████▋ | 61/70 [02:33<00:20,  2.28s/it][A[A

Batch loss: 1.3983536958694458




 89%|████████▊ | 62/70 [02:36<00:18,  2.30s/it][A[A

 90%|█████████ | 63/70 [02:38<00:15,  2.28s/it][A[A

 91%|█████████▏| 64/70 [02:40<00:13,  2.31s/it][A[A

 93%|█████████▎| 65/70 [02:43<00:11,  2.32s/it][A[A

 94%|█████████▍| 66/70 [02:45<00:09,  2.37s/it][A[A

 96%|█████████▌| 67/70 [02:47<00:07,  2.35s/it][A[A

 97%|█████████▋| 68/70 [02:50<00:04,  2.38s/it][A[A

 99%|█████████▊| 69/70 [02:52<00:02,  2.37s/it][A[A

100%|██████████| 70/70 [02:53<00:00,  2.48s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4453389252935136




 12%|█▎        | 1/8 [00:02<00:16,  2.36s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.37s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.33s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.35s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.32s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.33s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.27s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.21s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3078936338424683
Epoch:  34




  1%|▏         | 1/70 [00:02<02:44,  2.38s/it][A[A

Batch loss: 1.5292960405349731




  3%|▎         | 2/70 [00:04<02:43,  2.41s/it][A[A

  4%|▍         | 3/70 [00:07<02:39,  2.38s/it][A[A

  6%|▌         | 4/70 [00:09<02:39,  2.41s/it][A[A

  7%|▋         | 5/70 [00:11<02:35,  2.38s/it][A[A

  9%|▊         | 6/70 [00:14<02:34,  2.41s/it][A[A

 10%|█         | 7/70 [00:16<02:30,  2.38s/it][A[A

 11%|█▏        | 8/70 [00:19<02:29,  2.41s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.38s/it][A[A

 14%|█▍        | 10/70 [00:24<02:24,  2.41s/it][A[A

 16%|█▌        | 11/70 [00:26<02:20,  2.38s/it][A[A

Batch loss: 1.5320942401885986




 17%|█▋        | 12/70 [00:28<02:19,  2.41s/it][A[A

 19%|█▊        | 13/70 [00:31<02:15,  2.38s/it][A[A

 20%|██        | 14/70 [00:33<02:13,  2.39s/it][A[A

 21%|██▏       | 15/70 [00:35<02:10,  2.37s/it][A[A

 23%|██▎       | 16/70 [00:38<02:08,  2.38s/it][A[A

 24%|██▍       | 17/70 [00:40<02:03,  2.33s/it][A[A

 26%|██▌       | 18/70 [00:42<02:02,  2.35s/it][A[A

 27%|██▋       | 19/70 [00:45<01:58,  2.31s/it][A[A

 29%|██▊       | 20/70 [00:47<01:56,  2.34s/it][A[A

 30%|███       | 21/70 [00:49<01:52,  2.30s/it][A[A

Batch loss: 1.4376736879348755




 31%|███▏      | 22/70 [00:52<01:54,  2.38s/it][A[A

 33%|███▎      | 23/70 [00:54<01:49,  2.34s/it][A[A

 34%|███▍      | 24/70 [00:56<01:48,  2.35s/it][A[A

 36%|███▌      | 25/70 [00:59<01:46,  2.36s/it][A[A

 37%|███▋      | 26/70 [01:01<01:43,  2.36s/it][A[A

 39%|███▊      | 27/70 [01:03<01:40,  2.33s/it][A[A

 40%|████      | 28/70 [01:06<01:38,  2.35s/it][A[A

 41%|████▏     | 29/70 [01:08<01:34,  2.31s/it][A[A

 43%|████▎     | 30/70 [01:11<01:35,  2.39s/it][A[A

 44%|████▍     | 31/70 [01:13<01:32,  2.37s/it][A[A

Batch loss: 1.4236643314361572




 46%|████▌     | 32/70 [01:15<01:32,  2.43s/it][A[A

 47%|████▋     | 33/70 [01:18<01:29,  2.41s/it][A[A

 49%|████▊     | 34/70 [01:20<01:28,  2.45s/it][A[A

 50%|█████     | 35/70 [01:23<01:24,  2.42s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:23,  2.45s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:20,  2.43s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:18,  2.45s/it][A[A

 56%|█████▌    | 39/70 [01:33<01:15,  2.43s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:13,  2.46s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:09,  2.39s/it][A[A

Batch loss: 1.5905523300170898




 60%|██████    | 42/70 [01:40<01:07,  2.42s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:04,  2.38s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:01,  2.37s/it][A[A

 64%|██████▍   | 45/70 [01:47<00:58,  2.34s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:56,  2.36s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:53,  2.32s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:51,  2.34s/it][A[A

 70%|███████   | 49/70 [01:56<00:48,  2.31s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.33s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:43,  2.30s/it][A[A

Batch loss: 1.431583046913147




 74%|███████▍  | 52/70 [02:03<00:41,  2.32s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.30s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.31s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:34,  2.30s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.31s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:29,  2.28s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.35s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.35s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.39s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.37s/it][A[A

Batch loss: 1.3992204666137695




 89%|████████▊ | 62/70 [02:26<00:19,  2.40s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.39s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.44s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:12,  2.43s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.46s/it][A[A

 96%|█████████▌| 67/70 [02:39<00:07,  2.43s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.46s/it][A[A

 99%|█████████▊| 69/70 [02:44<00:02,  2.43s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4478732636996678




 12%|█▎        | 1/8 [00:02<00:16,  2.31s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.32s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.27s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.27s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.24s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.25s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.22s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.15s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.348390370607376
Epoch:  35




  1%|▏         | 1/70 [00:02<02:47,  2.42s/it][A[A

Batch loss: 1.3753172159194946




  3%|▎         | 2/70 [00:04<02:43,  2.41s/it][A[A

  4%|▍         | 3/70 [00:07<02:37,  2.35s/it][A[A

  6%|▌         | 4/70 [00:09<02:35,  2.35s/it][A[A

  7%|▋         | 5/70 [00:11<02:31,  2.33s/it][A[A

  9%|▊         | 6/70 [00:14<02:29,  2.34s/it][A[A

 10%|█         | 7/70 [00:16<02:24,  2.30s/it][A[A

 11%|█▏        | 8/70 [00:18<02:24,  2.34s/it][A[A

 13%|█▎        | 9/70 [00:20<02:22,  2.33s/it][A[A

 14%|█▍        | 10/70 [00:23<02:21,  2.37s/it][A[A

 16%|█▌        | 11/70 [00:25<02:19,  2.36s/it][A[A

Batch loss: 1.4788490533828735




 17%|█▋        | 12/70 [00:28<02:19,  2.40s/it][A[A

 19%|█▊        | 13/70 [00:30<02:15,  2.38s/it][A[A

 20%|██        | 14/70 [00:33<02:14,  2.41s/it][A[A

 21%|██▏       | 15/70 [00:35<02:10,  2.38s/it][A[A

 23%|██▎       | 16/70 [00:37<02:09,  2.40s/it][A[A

 24%|██▍       | 17/70 [00:40<02:06,  2.38s/it][A[A

 26%|██▌       | 18/70 [00:42<02:05,  2.41s/it][A[A

 27%|██▋       | 19/70 [00:44<02:01,  2.39s/it][A[A

 29%|██▊       | 20/70 [00:47<01:58,  2.38s/it][A[A

 30%|███       | 21/70 [00:49<01:54,  2.33s/it][A[A

Batch loss: 1.4030835628509521




 31%|███▏      | 22/70 [00:51<01:52,  2.35s/it][A[A

 33%|███▎      | 23/70 [00:54<01:49,  2.34s/it][A[A

 34%|███▍      | 24/70 [00:56<01:48,  2.35s/it][A[A

 36%|███▌      | 25/70 [00:58<01:44,  2.31s/it][A[A

 37%|███▋      | 26/70 [01:01<01:42,  2.32s/it][A[A

 39%|███▊      | 27/70 [01:03<01:38,  2.30s/it][A[A

 40%|████      | 28/70 [01:05<01:37,  2.32s/it][A[A

 41%|████▏     | 29/70 [01:08<01:33,  2.29s/it][A[A

 43%|████▎     | 30/70 [01:10<01:34,  2.36s/it][A[A

 44%|████▍     | 31/70 [01:12<01:31,  2.34s/it][A[A

Batch loss: 1.2990326881408691




 46%|████▌     | 32/70 [01:15<01:29,  2.36s/it][A[A

 47%|████▋     | 33/70 [01:17<01:25,  2.32s/it][A[A

 49%|████▊     | 34/70 [01:19<01:23,  2.33s/it][A[A

 50%|█████     | 35/70 [01:22<01:20,  2.30s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:18,  2.32s/it][A[A

 53%|█████▎    | 37/70 [01:26<01:16,  2.31s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:15,  2.35s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:12,  2.34s/it][A[A

 57%|█████▋    | 40/70 [01:33<01:11,  2.39s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:08,  2.36s/it][A[A

Batch loss: 1.463680386543274




 60%|██████    | 42/70 [01:38<01:07,  2.41s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:04,  2.38s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:02,  2.41s/it][A[A

 64%|██████▍   | 45/70 [01:45<00:59,  2.39s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:57,  2.41s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:54,  2.39s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:52,  2.38s/it][A[A

 70%|███████   | 49/70 [01:55<00:48,  2.33s/it][A[A

 71%|███████▏  | 50/70 [01:57<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [01:59<00:44,  2.33s/it][A[A

Batch loss: 1.5873324871063232




 74%|███████▍  | 52/70 [02:02<00:42,  2.38s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:40,  2.37s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:38,  2.39s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:35,  2.36s/it][A[A

 80%|████████  | 56/70 [02:11<00:33,  2.39s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:31,  2.40s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:29,  2.43s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.41s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:24,  2.44s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.41s/it][A[A

Batch loss: 1.3321542739868164




 89%|████████▊ | 62/70 [02:26<00:19,  2.41s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.36s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.38s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.35s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.47s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.40s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.40s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.35s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4451638272830418




 12%|█▎        | 1/8 [00:02<00:16,  2.36s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.35s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.29s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.31s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.29s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.30s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.26s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.19s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4035450220108032
Epoch:  36




  1%|▏         | 1/70 [00:02<02:52,  2.50s/it][A[A

Batch loss: 1.3126623630523682




  3%|▎         | 2/70 [00:05<02:52,  2.54s/it][A[A

  4%|▍         | 3/70 [00:07<02:47,  2.50s/it][A[A

  6%|▌         | 4/70 [00:10<02:45,  2.50s/it][A[A

  7%|▋         | 5/70 [00:12<02:41,  2.48s/it][A[A

  9%|▊         | 6/70 [00:15<02:40,  2.51s/it][A[A

 10%|█         | 7/70 [00:17<02:35,  2.47s/it][A[A

 11%|█▏        | 8/70 [00:20<02:36,  2.52s/it][A[A

 13%|█▎        | 9/70 [00:22<02:31,  2.48s/it][A[A

 14%|█▍        | 10/70 [00:25<02:30,  2.52s/it][A[A

 16%|█▌        | 11/70 [00:27<02:27,  2.50s/it][A[A

Batch loss: 1.350272297859192




 17%|█▋        | 12/70 [00:29<02:23,  2.47s/it][A[A

 19%|█▊        | 13/70 [00:32<02:18,  2.42s/it][A[A

 20%|██        | 14/70 [00:34<02:16,  2.43s/it][A[A

 21%|██▏       | 15/70 [00:37<02:13,  2.42s/it][A[A

 23%|██▎       | 16/70 [00:39<02:10,  2.41s/it][A[A

 24%|██▍       | 17/70 [00:41<02:05,  2.37s/it][A[A

 26%|██▌       | 18/70 [00:44<02:03,  2.38s/it][A[A

 27%|██▋       | 19/70 [00:46<01:59,  2.35s/it][A[A

 29%|██▊       | 20/70 [00:48<01:59,  2.39s/it][A[A

 30%|███       | 21/70 [00:51<01:54,  2.35s/it][A[A

Batch loss: 1.3257054090499878




 31%|███▏      | 22/70 [00:53<01:53,  2.37s/it][A[A

 33%|███▎      | 23/70 [00:55<01:49,  2.32s/it][A[A

 34%|███▍      | 24/70 [00:58<01:47,  2.34s/it][A[A

 36%|███▌      | 25/70 [01:00<01:44,  2.31s/it][A[A

 37%|███▋      | 26/70 [01:02<01:43,  2.36s/it][A[A

 39%|███▊      | 27/70 [01:05<01:40,  2.34s/it][A[A

 40%|████      | 28/70 [01:07<01:38,  2.35s/it][A[A

 41%|████▏     | 29/70 [01:09<01:35,  2.32s/it][A[A

 43%|████▎     | 30/70 [01:12<01:34,  2.37s/it][A[A

 44%|████▍     | 31/70 [01:14<01:32,  2.36s/it][A[A

Batch loss: 1.6225632429122925




 46%|████▌     | 32/70 [01:17<01:31,  2.41s/it][A[A

 47%|████▋     | 33/70 [01:19<01:28,  2.40s/it][A[A

 49%|████▊     | 34/70 [01:21<01:27,  2.42s/it][A[A

 50%|█████     | 35/70 [01:24<01:23,  2.39s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:22,  2.43s/it][A[A

 53%|█████▎    | 37/70 [01:29<01:19,  2.41s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:18,  2.45s/it][A[A

 56%|█████▌    | 39/70 [01:34<01:15,  2.43s/it][A[A

 57%|█████▋    | 40/70 [01:36<01:13,  2.45s/it][A[A

 59%|█████▊    | 41/70 [01:38<01:09,  2.40s/it][A[A

Batch loss: 1.4299957752227783




 60%|██████    | 42/70 [01:41<01:07,  2.41s/it][A[A

 61%|██████▏   | 43/70 [01:43<01:05,  2.42s/it][A[A

 63%|██████▎   | 44/70 [01:46<01:03,  2.44s/it][A[A

 64%|██████▍   | 45/70 [01:48<00:59,  2.39s/it][A[A

 66%|██████▌   | 46/70 [01:51<00:59,  2.49s/it][A[A

 67%|██████▋   | 47/70 [01:53<00:57,  2.48s/it][A[A

 69%|██████▊   | 48/70 [01:56<00:55,  2.54s/it][A[A

 70%|███████   | 49/70 [01:58<00:53,  2.53s/it][A[A

 71%|███████▏  | 50/70 [02:01<00:52,  2.62s/it][A[A

 73%|███████▎  | 51/70 [02:04<00:48,  2.57s/it][A[A

Batch loss: 1.785463571548462




 74%|███████▍  | 52/70 [02:06<00:47,  2.62s/it][A[A

 76%|███████▌  | 53/70 [02:09<00:43,  2.58s/it][A[A

 77%|███████▋  | 54/70 [02:12<00:41,  2.60s/it][A[A

 79%|███████▊  | 55/70 [02:14<00:38,  2.56s/it][A[A

 80%|████████  | 56/70 [02:17<00:36,  2.59s/it][A[A

 81%|████████▏ | 57/70 [02:19<00:32,  2.53s/it][A[A

 83%|████████▎ | 58/70 [02:22<00:30,  2.53s/it][A[A

 84%|████████▍ | 59/70 [02:24<00:27,  2.47s/it][A[A

 86%|████████▌ | 60/70 [02:26<00:24,  2.49s/it][A[A

 87%|████████▋ | 61/70 [02:29<00:21,  2.44s/it][A[A

Batch loss: 1.2937045097351074




 89%|████████▊ | 62/70 [02:31<00:19,  2.46s/it][A[A

 90%|█████████ | 63/70 [02:34<00:16,  2.42s/it][A[A

 91%|█████████▏| 64/70 [02:36<00:14,  2.46s/it][A[A

 93%|█████████▎| 65/70 [02:39<00:12,  2.42s/it][A[A

 94%|█████████▍| 66/70 [02:41<00:09,  2.48s/it][A[A

 96%|█████████▌| 67/70 [02:44<00:07,  2.47s/it][A[A

 97%|█████████▋| 68/70 [02:46<00:05,  2.51s/it][A[A

 99%|█████████▊| 69/70 [02:49<00:02,  2.46s/it][A[A

100%|██████████| 70/70 [02:49<00:00,  2.43s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.483324486868722




 12%|█▎        | 1/8 [00:02<00:16,  2.37s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.36s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.30s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.30s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.26s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.30s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.28s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.20s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4495556950569153
Epoch:  37




  1%|▏         | 1/70 [00:02<02:49,  2.45s/it][A[A

Batch loss: 1.441898226737976




  3%|▎         | 2/70 [00:04<02:47,  2.46s/it][A[A

  4%|▍         | 3/70 [00:07<02:40,  2.39s/it][A[A

  6%|▌         | 4/70 [00:09<02:39,  2.42s/it][A[A

  7%|▋         | 5/70 [00:11<02:35,  2.39s/it][A[A

  9%|▊         | 6/70 [00:14<02:36,  2.45s/it][A[A

 10%|█         | 7/70 [00:17<02:34,  2.46s/it][A[A

 11%|█▏        | 8/70 [00:19<02:38,  2.55s/it][A[A

 13%|█▎        | 9/70 [00:22<02:33,  2.52s/it][A[A

 14%|█▍        | 10/70 [00:24<02:33,  2.56s/it][A[A

 16%|█▌        | 11/70 [00:27<02:29,  2.53s/it][A[A

Batch loss: 1.5185825824737549




 17%|█▋        | 12/70 [00:30<02:30,  2.59s/it][A[A

 19%|█▊        | 13/70 [00:32<02:27,  2.59s/it][A[A

 20%|██        | 14/70 [00:35<02:25,  2.61s/it][A[A

 21%|██▏       | 15/70 [00:37<02:21,  2.57s/it][A[A

 23%|██▎       | 16/70 [00:40<02:18,  2.56s/it][A[A

 24%|██▍       | 17/70 [00:42<02:15,  2.55s/it][A[A

 26%|██▌       | 18/70 [00:45<02:12,  2.56s/it][A[A

 27%|██▋       | 19/70 [00:47<02:08,  2.52s/it][A[A

 29%|██▊       | 20/70 [00:50<02:06,  2.54s/it][A[A

 30%|███       | 21/70 [00:52<02:02,  2.50s/it][A[A

Batch loss: 1.5050864219665527




 31%|███▏      | 22/70 [00:55<02:02,  2.56s/it][A[A

 33%|███▎      | 23/70 [00:57<01:57,  2.50s/it][A[A

 34%|███▍      | 24/70 [01:00<01:55,  2.52s/it][A[A

 36%|███▌      | 25/70 [01:02<01:51,  2.48s/it][A[A

 37%|███▋      | 26/70 [01:05<01:50,  2.50s/it][A[A

 39%|███▊      | 27/70 [01:07<01:44,  2.44s/it][A[A

 40%|████      | 28/70 [01:10<01:41,  2.43s/it][A[A

 41%|████▏     | 29/70 [01:12<01:37,  2.38s/it][A[A

 43%|████▎     | 30/70 [01:14<01:35,  2.40s/it][A[A

 44%|████▍     | 31/70 [01:17<01:32,  2.36s/it][A[A

Batch loss: 1.3468095064163208




 46%|████▌     | 32/70 [01:19<01:30,  2.39s/it][A[A

 47%|████▋     | 33/70 [01:21<01:28,  2.40s/it][A[A

 49%|████▊     | 34/70 [01:24<01:27,  2.43s/it][A[A

 50%|█████     | 35/70 [01:26<01:24,  2.41s/it][A[A

 51%|█████▏    | 36/70 [01:29<01:23,  2.45s/it][A[A

 53%|█████▎    | 37/70 [01:31<01:20,  2.43s/it][A[A

 54%|█████▍    | 38/70 [01:34<01:18,  2.46s/it][A[A

 56%|█████▌    | 39/70 [01:36<01:15,  2.44s/it][A[A

 57%|█████▋    | 40/70 [01:39<01:14,  2.47s/it][A[A

 59%|█████▊    | 41/70 [01:41<01:10,  2.45s/it][A[A

Batch loss: 1.3394616842269897




 60%|██████    | 42/70 [01:44<01:09,  2.47s/it][A[A

 61%|██████▏   | 43/70 [01:46<01:05,  2.44s/it][A[A

 63%|██████▎   | 44/70 [01:48<01:03,  2.43s/it][A[A

 64%|██████▍   | 45/70 [01:51<00:59,  2.38s/it][A[A

 66%|██████▌   | 46/70 [01:53<00:57,  2.40s/it][A[A

 67%|██████▋   | 47/70 [01:56<00:55,  2.41s/it][A[A

 69%|██████▊   | 48/70 [01:58<00:54,  2.47s/it][A[A

 70%|███████   | 49/70 [02:01<00:51,  2.45s/it][A[A

 71%|███████▏  | 50/70 [02:03<00:49,  2.49s/it][A[A

 73%|███████▎  | 51/70 [02:06<00:46,  2.47s/it][A[A

Batch loss: 1.361783504486084




 74%|███████▍  | 52/70 [02:08<00:45,  2.50s/it][A[A

 76%|███████▌  | 53/70 [02:11<00:42,  2.47s/it][A[A

 77%|███████▋  | 54/70 [02:13<00:39,  2.50s/it][A[A

 79%|███████▊  | 55/70 [02:16<00:36,  2.46s/it][A[A

 80%|████████  | 56/70 [02:18<00:34,  2.47s/it][A[A

 81%|████████▏ | 57/70 [02:20<00:31,  2.41s/it][A[A

 83%|████████▎ | 58/70 [02:23<00:28,  2.41s/it][A[A

 84%|████████▍ | 59/70 [02:25<00:26,  2.36s/it][A[A

 86%|████████▌ | 60/70 [02:27<00:24,  2.40s/it][A[A

 87%|████████▋ | 61/70 [02:30<00:21,  2.35s/it][A[A

Batch loss: 1.3307216167449951




 89%|████████▊ | 62/70 [02:32<00:18,  2.37s/it][A[A

 90%|█████████ | 63/70 [02:34<00:16,  2.34s/it][A[A

 91%|█████████▏| 64/70 [02:37<00:14,  2.37s/it][A[A

 93%|█████████▎| 65/70 [02:39<00:11,  2.33s/it][A[A

 94%|█████████▍| 66/70 [02:41<00:09,  2.35s/it][A[A

 96%|█████████▌| 67/70 [02:44<00:06,  2.32s/it][A[A

 97%|█████████▋| 68/70 [02:46<00:04,  2.36s/it][A[A

 99%|█████████▊| 69/70 [02:48<00:02,  2.33s/it][A[A

100%|██████████| 70/70 [02:49<00:00,  2.42s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.434111147267478




 12%|█▎        | 1/8 [00:02<00:16,  2.33s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.33s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.28s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.30s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.29s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.34s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.32s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.23s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4009093642234802
Epoch:  38




  1%|▏         | 1/70 [00:02<02:52,  2.50s/it][A[A

Batch loss: 1.4588056802749634




  3%|▎         | 2/70 [00:05<02:50,  2.51s/it][A[A

  4%|▍         | 3/70 [00:07<02:45,  2.47s/it][A[A

  6%|▌         | 4/70 [00:09<02:43,  2.48s/it][A[A

  7%|▋         | 5/70 [00:12<02:39,  2.46s/it][A[A

  9%|▊         | 6/70 [00:14<02:38,  2.47s/it][A[A

 10%|█         | 7/70 [00:17<02:32,  2.43s/it][A[A

 11%|█▏        | 8/70 [00:19<02:29,  2.42s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.38s/it][A[A

 14%|█▍        | 10/70 [00:24<02:23,  2.40s/it][A[A

 16%|█▌        | 11/70 [00:26<02:20,  2.38s/it][A[A

Batch loss: 1.8692713975906372




 17%|█▋        | 12/70 [00:29<02:18,  2.39s/it][A[A

 19%|█▊        | 13/70 [00:31<02:13,  2.35s/it][A[A

 20%|██        | 14/70 [00:33<02:13,  2.38s/it][A[A

 21%|██▏       | 15/70 [00:36<02:08,  2.34s/it][A[A

 23%|██▎       | 16/70 [00:38<02:07,  2.37s/it][A[A

 24%|██▍       | 17/70 [00:40<02:04,  2.34s/it][A[A

 26%|██▌       | 18/70 [00:43<02:04,  2.39s/it][A[A

 27%|██▋       | 19/70 [00:45<01:59,  2.34s/it][A[A

 29%|██▊       | 20/70 [00:47<01:58,  2.37s/it][A[A

 30%|███       | 21/70 [00:50<01:53,  2.32s/it][A[A

Batch loss: 1.3226537704467773




 31%|███▏      | 22/70 [00:52<01:53,  2.37s/it][A[A

 33%|███▎      | 23/70 [00:54<01:49,  2.33s/it][A[A

 34%|███▍      | 24/70 [00:57<01:48,  2.36s/it][A[A

 36%|███▌      | 25/70 [00:59<01:46,  2.36s/it][A[A

 37%|███▋      | 26/70 [01:02<01:46,  2.43s/it][A[A

 39%|███▊      | 27/70 [01:04<01:43,  2.40s/it][A[A

 40%|████      | 28/70 [01:07<01:42,  2.44s/it][A[A

 41%|████▏     | 29/70 [01:09<01:39,  2.42s/it][A[A

 43%|████▎     | 30/70 [01:11<01:38,  2.46s/it][A[A

 44%|████▍     | 31/70 [01:14<01:34,  2.42s/it][A[A

Batch loss: 1.4410979747772217




 46%|████▌     | 32/70 [01:16<01:33,  2.45s/it][A[A

 47%|████▋     | 33/70 [01:19<01:29,  2.42s/it][A[A

 49%|████▊     | 34/70 [01:21<01:28,  2.46s/it][A[A

 50%|█████     | 35/70 [01:24<01:24,  2.42s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:22,  2.42s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:18,  2.37s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:16,  2.40s/it][A[A

 56%|█████▌    | 39/70 [01:33<01:13,  2.36s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:11,  2.38s/it][A[A

 59%|█████▊    | 41/70 [01:38<01:08,  2.35s/it][A[A

Batch loss: 1.3139270544052124




 60%|██████    | 42/70 [01:40<01:06,  2.37s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:03,  2.34s/it][A[A

 63%|██████▎   | 44/70 [01:45<01:01,  2.37s/it][A[A

 64%|██████▍   | 45/70 [01:47<00:58,  2.34s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:56,  2.36s/it][A[A

 67%|██████▋   | 47/70 [01:52<00:53,  2.33s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:51,  2.36s/it][A[A

 70%|███████   | 49/70 [01:56<00:48,  2.32s/it][A[A

 71%|███████▏  | 50/70 [01:59<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:43,  2.31s/it][A[A

Batch loss: 1.3936026096343994




 74%|███████▍  | 52/70 [02:03<00:42,  2.35s/it][A[A

 76%|███████▌  | 53/70 [02:06<00:40,  2.35s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:38,  2.41s/it][A[A

 79%|███████▊  | 55/70 [02:11<00:35,  2.39s/it][A[A

 80%|████████  | 56/70 [02:13<00:34,  2.43s/it][A[A

 81%|████████▏ | 57/70 [02:16<00:31,  2.41s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:29,  2.44s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:26,  2.41s/it][A[A

 86%|████████▌ | 60/70 [02:23<00:24,  2.44s/it][A[A

 87%|████████▋ | 61/70 [02:25<00:21,  2.43s/it][A[A

Batch loss: 1.3353302478790283




 89%|████████▊ | 62/70 [02:28<00:19,  2.46s/it][A[A

 90%|█████████ | 63/70 [02:30<00:16,  2.41s/it][A[A

 91%|█████████▏| 64/70 [02:33<00:14,  2.40s/it][A[A

 93%|█████████▎| 65/70 [02:35<00:11,  2.37s/it][A[A

 94%|█████████▍| 66/70 [02:37<00:09,  2.40s/it][A[A

 96%|█████████▌| 67/70 [02:40<00:07,  2.36s/it][A[A

 97%|█████████▋| 68/70 [02:42<00:04,  2.36s/it][A[A

 99%|█████████▊| 69/70 [02:44<00:02,  2.33s/it][A[A

100%|██████████| 70/70 [02:45<00:00,  2.36s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4017298119408743




 12%|█▎        | 1/8 [00:02<00:16,  2.41s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.39s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.32s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.32s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.26s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.28s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.23s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.17s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3619163930416107
Epoch:  39




  1%|▏         | 1/70 [00:02<02:46,  2.42s/it][A[A

Batch loss: 1.528199553489685




  3%|▎         | 2/70 [00:04<02:45,  2.44s/it][A[A

  4%|▍         | 3/70 [00:07<02:41,  2.40s/it][A[A

  6%|▌         | 4/70 [00:09<02:45,  2.51s/it][A[A

  7%|▋         | 5/70 [00:12<02:42,  2.50s/it][A[A

  9%|▊         | 6/70 [00:15<02:43,  2.55s/it][A[A

 10%|█         | 7/70 [00:17<02:37,  2.50s/it][A[A

 11%|█▏        | 8/70 [00:20<02:35,  2.51s/it][A[A

 13%|█▎        | 9/70 [00:22<02:31,  2.48s/it][A[A

 14%|█▍        | 10/70 [00:25<02:30,  2.51s/it][A[A

 16%|█▌        | 11/70 [00:27<02:26,  2.49s/it][A[A

Batch loss: 1.4597690105438232




 17%|█▋        | 12/70 [00:30<02:27,  2.54s/it][A[A

 19%|█▊        | 13/70 [00:32<02:22,  2.49s/it][A[A

 20%|██        | 14/70 [00:34<02:18,  2.47s/it][A[A

 21%|██▏       | 15/70 [00:37<02:12,  2.40s/it][A[A

 23%|██▎       | 16/70 [00:39<02:10,  2.41s/it][A[A

 24%|██▍       | 17/70 [00:41<02:06,  2.39s/it][A[A

 26%|██▌       | 18/70 [00:44<02:06,  2.44s/it][A[A

 27%|██▋       | 19/70 [00:46<02:03,  2.42s/it][A[A

 29%|██▊       | 20/70 [00:49<02:02,  2.46s/it][A[A

 30%|███       | 21/70 [00:51<01:59,  2.43s/it][A[A

Batch loss: 1.3318108320236206




 31%|███▏      | 22/70 [00:54<01:58,  2.47s/it][A[A

 33%|███▎      | 23/70 [00:56<01:54,  2.43s/it][A[A

 34%|███▍      | 24/70 [00:59<01:53,  2.46s/it][A[A

 36%|███▌      | 25/70 [01:01<01:49,  2.42s/it][A[A

 37%|███▋      | 26/70 [01:04<01:48,  2.47s/it][A[A

 39%|███▊      | 27/70 [01:06<01:44,  2.44s/it][A[A

 40%|████      | 28/70 [01:08<01:42,  2.44s/it][A[A

 41%|████▏     | 29/70 [01:11<01:38,  2.39s/it][A[A

 43%|████▎     | 30/70 [01:13<01:37,  2.43s/it][A[A

 44%|████▍     | 31/70 [01:16<01:33,  2.39s/it][A[A

Batch loss: 1.337868571281433




 46%|████▌     | 32/70 [01:18<01:31,  2.40s/it][A[A

 47%|████▋     | 33/70 [01:20<01:26,  2.35s/it][A[A

 49%|████▊     | 34/70 [01:23<01:25,  2.37s/it][A[A

 50%|█████     | 35/70 [01:25<01:21,  2.33s/it][A[A

 51%|█████▏    | 36/70 [01:27<01:20,  2.36s/it][A[A

 53%|█████▎    | 37/70 [01:30<01:16,  2.32s/it][A[A

 54%|█████▍    | 38/70 [01:32<01:14,  2.33s/it][A[A

 56%|█████▌    | 39/70 [01:34<01:11,  2.31s/it][A[A

 57%|█████▋    | 40/70 [01:37<01:10,  2.35s/it][A[A

 59%|█████▊    | 41/70 [01:39<01:07,  2.32s/it][A[A

Batch loss: 1.4241846799850464




 60%|██████    | 42/70 [01:41<01:05,  2.34s/it][A[A

 61%|██████▏   | 43/70 [01:43<01:02,  2.32s/it][A[A

 63%|██████▎   | 44/70 [01:46<01:01,  2.35s/it][A[A

 64%|██████▍   | 45/70 [01:48<00:58,  2.35s/it][A[A

 66%|██████▌   | 46/70 [01:51<00:57,  2.39s/it][A[A

 67%|██████▋   | 47/70 [01:53<00:54,  2.37s/it][A[A

 69%|██████▊   | 48/70 [01:56<00:53,  2.41s/it][A[A

 70%|███████   | 49/70 [01:58<00:50,  2.40s/it][A[A

 71%|███████▏  | 50/70 [02:00<00:48,  2.43s/it][A[A

 73%|███████▎  | 51/70 [02:03<00:45,  2.40s/it][A[A

Batch loss: 1.6276848316192627




 74%|███████▍  | 52/70 [02:05<00:44,  2.45s/it][A[A

 76%|███████▌  | 53/70 [02:08<00:41,  2.42s/it][A[A

 77%|███████▋  | 54/70 [02:10<00:39,  2.45s/it][A[A

 79%|███████▊  | 55/70 [02:12<00:35,  2.40s/it][A[A

 80%|████████  | 56/70 [02:15<00:33,  2.40s/it][A[A

 81%|████████▏ | 57/70 [02:17<00:30,  2.35s/it][A[A

 83%|████████▎ | 58/70 [02:20<00:28,  2.38s/it][A[A

 84%|████████▍ | 59/70 [02:22<00:25,  2.34s/it][A[A

 86%|████████▌ | 60/70 [02:24<00:23,  2.36s/it][A[A

 87%|████████▋ | 61/70 [02:26<00:20,  2.32s/it][A[A

Batch loss: 1.4255119562149048




 89%|████████▊ | 62/70 [02:29<00:18,  2.34s/it][A[A

 90%|█████████ | 63/70 [02:31<00:16,  2.31s/it][A[A

 91%|█████████▏| 64/70 [02:33<00:13,  2.33s/it][A[A

 93%|█████████▎| 65/70 [02:36<00:11,  2.31s/it][A[A

 94%|█████████▍| 66/70 [02:38<00:09,  2.35s/it][A[A

 96%|█████████▌| 67/70 [02:40<00:07,  2.34s/it][A[A

 97%|█████████▋| 68/70 [02:43<00:04,  2.37s/it][A[A

 99%|█████████▊| 69/70 [02:45<00:02,  2.36s/it][A[A

100%|██████████| 70/70 [02:46<00:00,  2.38s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4019195454461233




 12%|█▎        | 1/8 [00:02<00:16,  2.37s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.36s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.33s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.36s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.34s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.36s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.33s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.25s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.2690715193748474
Epoch:  40




  1%|▏         | 1/70 [00:02<02:56,  2.56s/it][A[A

Batch loss: 1.4156057834625244




  3%|▎         | 2/70 [00:05<02:53,  2.56s/it][A[A

  4%|▍         | 3/70 [00:07<02:47,  2.49s/it][A[A

  6%|▌         | 4/70 [00:09<02:44,  2.49s/it][A[A

  7%|▋         | 5/70 [00:12<02:39,  2.46s/it][A[A

  9%|▊         | 6/70 [00:14<02:36,  2.45s/it][A[A

 10%|█         | 7/70 [00:17<02:30,  2.39s/it][A[A

 11%|█▏        | 8/70 [00:19<02:27,  2.39s/it][A[A

 13%|█▎        | 9/70 [00:21<02:24,  2.37s/it][A[A

 14%|█▍        | 10/70 [00:24<02:22,  2.38s/it][A[A

 16%|█▌        | 11/70 [00:26<02:18,  2.34s/it][A[A

Batch loss: 1.5747745037078857




 17%|█▋        | 12/70 [00:28<02:16,  2.35s/it][A[A

 19%|█▊        | 13/70 [00:30<02:12,  2.32s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.35s/it][A[A

 21%|██▏       | 15/70 [00:35<02:07,  2.32s/it][A[A

 23%|██▎       | 16/70 [00:38<02:06,  2.34s/it][A[A

 24%|██▍       | 17/70 [00:40<02:03,  2.33s/it][A[A

 26%|██▌       | 18/70 [00:42<02:02,  2.35s/it][A[A

 27%|██▋       | 19/70 [00:45<01:59,  2.34s/it][A[A

 29%|██▊       | 20/70 [00:47<01:57,  2.35s/it][A[A

 30%|███       | 21/70 [00:49<01:53,  2.31s/it][A[A

Batch loss: 1.5123378038406372




 31%|███▏      | 22/70 [00:52<01:52,  2.34s/it][A[A

 33%|███▎      | 23/70 [00:54<01:48,  2.32s/it][A[A

 34%|███▍      | 24/70 [00:56<01:49,  2.38s/it][A[A

 36%|███▌      | 25/70 [00:59<01:46,  2.37s/it][A[A

 37%|███▋      | 26/70 [01:01<01:45,  2.41s/it][A[A

 39%|███▊      | 27/70 [01:04<01:43,  2.40s/it][A[A

 40%|████      | 28/70 [01:06<01:42,  2.44s/it][A[A

 41%|████▏     | 29/70 [01:08<01:38,  2.41s/it][A[A

 43%|████▎     | 30/70 [01:11<01:37,  2.44s/it][A[A

 44%|████▍     | 31/70 [01:13<01:34,  2.42s/it][A[A

Batch loss: 1.4355223178863525




 46%|████▌     | 32/70 [01:16<01:33,  2.46s/it][A[A

 47%|████▋     | 33/70 [01:18<01:29,  2.43s/it][A[A

 49%|████▊     | 34/70 [01:21<01:27,  2.42s/it][A[A

 50%|█████     | 35/70 [01:23<01:23,  2.37s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:20,  2.37s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:18,  2.38s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:16,  2.38s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:12,  2.34s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:10,  2.35s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:07,  2.32s/it][A[A

Batch loss: 1.329727292060852




 60%|██████    | 42/70 [01:39<01:06,  2.37s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:02,  2.33s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:01,  2.35s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:58,  2.32s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:56,  2.34s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:53,  2.31s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:51,  2.34s/it][A[A

 70%|███████   | 49/70 [01:56<00:48,  2.31s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:44,  2.34s/it][A[A

Batch loss: 1.3552345037460327




 74%|███████▍  | 52/70 [02:03<00:43,  2.39s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.39s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:39,  2.45s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:36,  2.42s/it][A[A

 80%|████████  | 56/70 [02:13<00:34,  2.45s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:31,  2.43s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:29,  2.47s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:26,  2.45s/it][A[A

 86%|████████▌ | 60/70 [02:23<00:24,  2.48s/it][A[A

 87%|████████▋ | 61/70 [02:25<00:22,  2.47s/it][A[A

Batch loss: 1.4807212352752686




 89%|████████▊ | 62/70 [02:27<00:19,  2.45s/it][A[A

 90%|█████████ | 63/70 [02:30<00:16,  2.39s/it][A[A

 91%|█████████▏| 64/70 [02:32<00:14,  2.39s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.40s/it][A[A

 94%|█████████▍| 66/70 [02:37<00:09,  2.44s/it][A[A

 96%|█████████▌| 67/70 [02:39<00:07,  2.42s/it][A[A

 97%|█████████▋| 68/70 [02:42<00:04,  2.45s/it][A[A

 99%|█████████▊| 69/70 [02:44<00:02,  2.43s/it][A[A

100%|██████████| 70/70 [02:45<00:00,  2.36s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4579260230064393




 12%|█▎        | 1/8 [00:02<00:17,  2.45s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.45s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.40s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.40s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.36s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.36s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.31s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.25s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.345362037420273
Epoch:  41




  1%|▏         | 1/70 [00:02<02:48,  2.45s/it][A[A

Batch loss: 1.3953068256378174




  3%|▎         | 2/70 [00:04<02:45,  2.44s/it][A[A

  4%|▍         | 3/70 [00:07<02:40,  2.39s/it][A[A

  6%|▌         | 4/70 [00:09<02:38,  2.41s/it][A[A

  7%|▋         | 5/70 [00:11<02:33,  2.37s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.37s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.35s/it][A[A

 11%|█▏        | 8/70 [00:18<02:27,  2.37s/it][A[A

 13%|█▎        | 9/70 [00:21<02:23,  2.35s/it][A[A

 14%|█▍        | 10/70 [00:23<02:22,  2.37s/it][A[A

 16%|█▌        | 11/70 [00:25<02:18,  2.34s/it][A[A

Batch loss: 1.4226173162460327




 17%|█▋        | 12/70 [00:28<02:17,  2.37s/it][A[A

 19%|█▊        | 13/70 [00:30<02:13,  2.34s/it][A[A

 20%|██        | 14/70 [00:33<02:12,  2.36s/it][A[A

 21%|██▏       | 15/70 [00:35<02:09,  2.35s/it][A[A

 23%|██▎       | 16/70 [00:37<02:09,  2.40s/it][A[A

 24%|██▍       | 17/70 [00:40<02:06,  2.39s/it][A[A

 26%|██▌       | 18/70 [00:42<02:06,  2.44s/it][A[A

 27%|██▋       | 19/70 [00:45<02:03,  2.41s/it][A[A

 29%|██▊       | 20/70 [00:47<02:02,  2.46s/it][A[A

 30%|███       | 21/70 [00:50<01:58,  2.43s/it][A[A

Batch loss: 1.3270859718322754




 31%|███▏      | 22/70 [00:52<01:58,  2.46s/it][A[A

 33%|███▎      | 23/70 [00:55<01:54,  2.43s/it][A[A

 34%|███▍      | 24/70 [00:57<01:53,  2.46s/it][A[A

 36%|███▌      | 25/70 [00:59<01:49,  2.44s/it][A[A

 37%|███▋      | 26/70 [01:02<01:46,  2.43s/it][A[A

 39%|███▊      | 27/70 [01:04<01:41,  2.37s/it][A[A

 40%|████      | 28/70 [01:06<01:39,  2.37s/it][A[A

 41%|████▏     | 29/70 [01:09<01:36,  2.35s/it][A[A

 43%|████▎     | 30/70 [01:11<01:34,  2.36s/it][A[A

 44%|████▍     | 31/70 [01:13<01:30,  2.32s/it][A[A

Batch loss: 1.5957282781600952




 46%|████▌     | 32/70 [01:16<01:29,  2.35s/it][A[A

 47%|████▋     | 33/70 [01:18<01:26,  2.33s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.35s/it][A[A

 50%|█████     | 35/70 [01:23<01:21,  2.33s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:19,  2.34s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:16,  2.31s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:14,  2.34s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:11,  2.31s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:09,  2.33s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:07,  2.32s/it][A[A

Batch loss: 1.5281236171722412




 60%|██████    | 42/70 [01:39<01:05,  2.34s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:02,  2.32s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:02,  2.40s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.39s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:58,  2.44s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:55,  2.43s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:53,  2.45s/it][A[A

 70%|███████   | 49/70 [01:56<00:50,  2.41s/it][A[A

 71%|███████▏  | 50/70 [01:59<00:48,  2.44s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:45,  2.41s/it][A[A

Batch loss: 1.533118486404419




 74%|███████▍  | 52/70 [02:03<00:44,  2.45s/it][A[A

 76%|███████▌  | 53/70 [02:06<00:40,  2.41s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:39,  2.44s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.38s/it][A[A

 80%|████████  | 56/70 [02:13<00:33,  2.38s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:30,  2.36s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:28,  2.37s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:25,  2.33s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.36s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:20,  2.32s/it][A[A

Batch loss: 1.4163192510604858




 89%|████████▊ | 62/70 [02:27<00:18,  2.34s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.32s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.33s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.31s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.33s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.31s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.33s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.30s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.445423904487065




 12%|█▎        | 1/8 [00:02<00:16,  2.30s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.35s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.32s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.34s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.31s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.34s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.31s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.24s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.434317499399185
Epoch:  42




  1%|▏         | 1/70 [00:02<02:55,  2.54s/it][A[A

Batch loss: 1.4873251914978027




  3%|▎         | 2/70 [00:05<02:52,  2.53s/it][A[A

  4%|▍         | 3/70 [00:07<02:46,  2.48s/it][A[A

  6%|▌         | 4/70 [00:09<02:44,  2.49s/it][A[A

  7%|▋         | 5/70 [00:12<02:36,  2.41s/it][A[A

  9%|▊         | 6/70 [00:14<02:33,  2.40s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.35s/it][A[A

 11%|█▏        | 8/70 [00:19<02:26,  2.36s/it][A[A

 13%|█▎        | 9/70 [00:21<02:22,  2.33s/it][A[A

 14%|█▍        | 10/70 [00:23<02:20,  2.34s/it][A[A

 16%|█▌        | 11/70 [00:26<02:16,  2.31s/it][A[A

Batch loss: 1.500016212463379




 17%|█▋        | 12/70 [00:28<02:16,  2.35s/it][A[A

 19%|█▊        | 13/70 [00:30<02:11,  2.31s/it][A[A

 20%|██        | 14/70 [00:33<02:10,  2.33s/it][A[A

 21%|██▏       | 15/70 [00:35<02:06,  2.31s/it][A[A

 23%|██▎       | 16/70 [00:37<02:05,  2.33s/it][A[A

 24%|██▍       | 17/70 [00:39<02:01,  2.30s/it][A[A

 26%|██▌       | 18/70 [00:42<02:01,  2.33s/it][A[A

 27%|██▋       | 19/70 [00:44<01:56,  2.29s/it][A[A

 29%|██▊       | 20/70 [00:46<01:55,  2.32s/it][A[A

 30%|███       | 21/70 [00:49<01:52,  2.30s/it][A[A

Batch loss: 1.3629282712936401




 31%|███▏      | 22/70 [00:51<01:51,  2.33s/it][A[A

 33%|███▎      | 23/70 [00:53<01:49,  2.33s/it][A[A

 34%|███▍      | 24/70 [00:56<01:49,  2.38s/it][A[A

 36%|███▌      | 25/70 [00:58<01:46,  2.38s/it][A[A

 37%|███▋      | 26/70 [01:01<01:46,  2.42s/it][A[A

 39%|███▊      | 27/70 [01:03<01:43,  2.40s/it][A[A

 40%|████      | 28/70 [01:06<01:41,  2.42s/it][A[A

 41%|████▏     | 29/70 [01:08<01:38,  2.41s/it][A[A

 43%|████▎     | 30/70 [01:10<01:37,  2.43s/it][A[A

 44%|████▍     | 31/70 [01:13<01:33,  2.40s/it][A[A

Batch loss: 1.3394994735717773




 46%|████▌     | 32/70 [01:15<01:32,  2.43s/it][A[A

 47%|████▋     | 33/70 [01:18<01:28,  2.39s/it][A[A

 49%|████▊     | 34/70 [01:20<01:25,  2.39s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.34s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:20,  2.37s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:17,  2.34s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:15,  2.37s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:12,  2.33s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:10,  2.35s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:06,  2.31s/it][A[A

Batch loss: 1.3220551013946533




 60%|██████    | 42/70 [01:39<01:05,  2.34s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:02,  2.31s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:00,  2.34s/it][A[A

 64%|██████▍   | 45/70 [01:45<00:57,  2.30s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:55,  2.33s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:52,  2.30s/it][A[A

 69%|██████▊   | 48/70 [01:52<00:51,  2.32s/it][A[A

 70%|███████   | 49/70 [01:55<00:48,  2.30s/it][A[A

 71%|███████▏  | 50/70 [01:57<00:46,  2.33s/it][A[A

 73%|███████▎  | 51/70 [01:59<00:44,  2.34s/it][A[A

Batch loss: 1.3238511085510254




 74%|███████▍  | 52/70 [02:02<00:42,  2.39s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:40,  2.37s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:38,  2.40s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:35,  2.39s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.42s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:31,  2.39s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:29,  2.42s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.45s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:25,  2.51s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.44s/it][A[A

Batch loss: 1.4704904556274414




 89%|████████▊ | 62/70 [02:26<00:19,  2.42s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.37s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.41s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.39s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.41s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.37s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.42s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.39s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4598784974643162




 12%|█▎        | 1/8 [00:02<00:16,  2.40s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.41s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.36s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.37s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.32s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.31s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.26s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.21s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.273992508649826
Epoch:  43




  1%|▏         | 1/70 [00:02<02:49,  2.46s/it][A[A

Batch loss: 1.337201714515686




  3%|▎         | 2/70 [00:04<02:45,  2.44s/it][A[A

  4%|▍         | 3/70 [00:07<02:39,  2.38s/it][A[A

  6%|▌         | 4/70 [00:09<02:37,  2.38s/it][A[A

  7%|▋         | 5/70 [00:11<02:31,  2.33s/it][A[A

  9%|▊         | 6/70 [00:14<02:30,  2.35s/it][A[A

 10%|█         | 7/70 [00:16<02:25,  2.31s/it][A[A

 11%|█▏        | 8/70 [00:18<02:24,  2.33s/it][A[A

 13%|█▎        | 9/70 [00:20<02:21,  2.31s/it][A[A

 14%|█▍        | 10/70 [00:23<02:20,  2.34s/it][A[A

 16%|█▌        | 11/70 [00:25<02:16,  2.32s/it][A[A

Batch loss: 1.3635280132293701




 17%|█▋        | 12/70 [00:28<02:15,  2.34s/it][A[A

 19%|█▊        | 13/70 [00:30<02:12,  2.32s/it][A[A

 20%|██        | 14/70 [00:32<02:10,  2.34s/it][A[A

 21%|██▏       | 15/70 [00:34<02:07,  2.32s/it][A[A

 23%|██▎       | 16/70 [00:37<02:08,  2.37s/it][A[A

 24%|██▍       | 17/70 [00:39<02:05,  2.36s/it][A[A

 26%|██▌       | 18/70 [00:42<02:04,  2.40s/it][A[A

 27%|██▋       | 19/70 [00:44<02:01,  2.38s/it][A[A

 29%|██▊       | 20/70 [00:47<02:01,  2.42s/it][A[A

 30%|███       | 21/70 [00:49<01:58,  2.41s/it][A[A

Batch loss: 1.4081774950027466




 31%|███▏      | 22/70 [00:52<01:58,  2.46s/it][A[A

 33%|███▎      | 23/70 [00:54<01:54,  2.44s/it][A[A

 34%|███▍      | 24/70 [00:57<01:53,  2.47s/it][A[A

 36%|███▌      | 25/70 [00:59<01:50,  2.44s/it][A[A

 37%|███▋      | 26/70 [01:01<01:48,  2.46s/it][A[A

 39%|███▊      | 27/70 [01:04<01:42,  2.39s/it][A[A

 40%|████      | 28/70 [01:06<01:40,  2.39s/it][A[A

 41%|████▏     | 29/70 [01:08<01:37,  2.37s/it][A[A

 43%|████▎     | 30/70 [01:11<01:35,  2.38s/it][A[A

 44%|████▍     | 31/70 [01:13<01:30,  2.33s/it][A[A

Batch loss: 1.4464824199676514




 46%|████▌     | 32/70 [01:15<01:29,  2.35s/it][A[A

 47%|████▋     | 33/70 [01:18<01:25,  2.31s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.35s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.32s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:19,  2.35s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:17,  2.33s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:15,  2.36s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:12,  2.33s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:10,  2.35s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:07,  2.31s/it][A[A

Batch loss: 1.3386701345443726




 60%|██████    | 42/70 [01:39<01:05,  2.33s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:02,  2.32s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:01,  2.37s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.36s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:57,  2.40s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:54,  2.39s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:53,  2.41s/it][A[A

 70%|███████   | 49/70 [01:55<00:50,  2.39s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:48,  2.42s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:45,  2.40s/it][A[A

Batch loss: 1.3164657354354858




 74%|███████▍  | 52/70 [02:03<00:43,  2.43s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.40s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:38,  2.40s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.34s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.35s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.33s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.35s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.33s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.35s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.34s/it][A[A

Batch loss: 1.3375194072723389




 89%|████████▊ | 62/70 [02:26<00:18,  2.35s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.32s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.34s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.31s/it][A[A

 94%|█████████▍| 66/70 [02:35<00:09,  2.33s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.30s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.41s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.37s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4572946514402116




 12%|█▎        | 1/8 [00:02<00:16,  2.34s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.37s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.34s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.38s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.35s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.38s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.35s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.28s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3053761422634125
Epoch:  44




  1%|▏         | 1/70 [00:02<02:57,  2.57s/it][A[A

Batch loss: 1.3973312377929688




  3%|▎         | 2/70 [00:05<02:54,  2.56s/it][A[A

  4%|▍         | 3/70 [00:07<02:48,  2.51s/it][A[A

  6%|▌         | 4/70 [00:10<02:46,  2.53s/it][A[A

  7%|▋         | 5/70 [00:12<02:39,  2.45s/it][A[A

  9%|▊         | 6/70 [00:14<02:36,  2.44s/it][A[A

 10%|█         | 7/70 [00:17<02:30,  2.39s/it][A[A

 11%|█▏        | 8/70 [00:19<02:29,  2.41s/it][A[A

 13%|█▎        | 9/70 [00:21<02:24,  2.36s/it][A[A

 14%|█▍        | 10/70 [00:24<02:21,  2.37s/it][A[A

 16%|█▌        | 11/70 [00:26<02:17,  2.33s/it][A[A

Batch loss: 1.3336223363876343




 17%|█▋        | 12/70 [00:28<02:15,  2.34s/it][A[A

 19%|█▊        | 13/70 [00:31<02:12,  2.32s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.34s/it][A[A

 21%|██▏       | 15/70 [00:35<02:06,  2.30s/it][A[A

 23%|██▎       | 16/70 [00:37<02:05,  2.33s/it][A[A

 24%|██▍       | 17/70 [00:40<02:01,  2.30s/it][A[A

 26%|██▌       | 18/70 [00:42<02:00,  2.31s/it][A[A

 27%|██▋       | 19/70 [00:44<01:56,  2.29s/it][A[A

 29%|██▊       | 20/70 [00:47<01:55,  2.31s/it][A[A

 30%|███       | 21/70 [00:49<01:52,  2.29s/it][A[A

Batch loss: 1.4639359712600708




 31%|███▏      | 22/70 [00:51<01:52,  2.34s/it][A[A

 33%|███▎      | 23/70 [00:54<01:50,  2.34s/it][A[A

 34%|███▍      | 24/70 [00:56<01:49,  2.39s/it][A[A

 36%|███▌      | 25/70 [00:59<01:46,  2.37s/it][A[A

 37%|███▋      | 26/70 [01:01<01:46,  2.42s/it][A[A

 39%|███▊      | 27/70 [01:03<01:43,  2.40s/it][A[A

 40%|████      | 28/70 [01:06<01:41,  2.43s/it][A[A

 41%|████▏     | 29/70 [01:08<01:38,  2.40s/it][A[A

 43%|████▎     | 30/70 [01:11<01:37,  2.44s/it][A[A

 44%|████▍     | 31/70 [01:13<01:34,  2.41s/it][A[A

Batch loss: 1.3158345222473145




 46%|████▌     | 32/70 [01:16<01:32,  2.44s/it][A[A

 47%|████▋     | 33/70 [01:18<01:27,  2.38s/it][A[A

 49%|████▊     | 34/70 [01:20<01:25,  2.37s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.33s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:20,  2.38s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:18,  2.36s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:17,  2.41s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:14,  2.39s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:12,  2.43s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:09,  2.39s/it][A[A

Batch loss: 1.5839835405349731




 60%|██████    | 42/70 [01:39<01:07,  2.43s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:04,  2.40s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:02,  2.42s/it][A[A

 64%|██████▍   | 45/70 [01:47<00:59,  2.40s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:58,  2.42s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:54,  2.37s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:52,  2.37s/it][A[A

 70%|███████   | 49/70 [01:56<00:49,  2.34s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:47,  2.36s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:44,  2.32s/it][A[A

Batch loss: 1.3236191272735596




 74%|███████▍  | 52/70 [02:03<00:42,  2.34s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.30s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:37,  2.32s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:34,  2.29s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.33s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:30,  2.32s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.34s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.30s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.33s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:20,  2.30s/it][A[A

Batch loss: 1.413820505142212




 89%|████████▊ | 62/70 [02:26<00:18,  2.33s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.31s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.36s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.36s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.39s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.37s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.40s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.38s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4337586726461138




 12%|█▎        | 1/8 [00:02<00:16,  2.38s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.38s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.34s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.37s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.32s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.32s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.28s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.22s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4502122700214386
Epoch:  45




  1%|▏         | 1/70 [00:02<02:54,  2.53s/it][A[A

Batch loss: 1.4482249021530151




  3%|▎         | 2/70 [00:04<02:50,  2.50s/it][A[A

  4%|▍         | 3/70 [00:07<02:42,  2.42s/it][A[A

  6%|▌         | 4/70 [00:09<02:39,  2.41s/it][A[A

  7%|▋         | 5/70 [00:11<02:34,  2.37s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.37s/it][A[A

 10%|█         | 7/70 [00:16<02:26,  2.33s/it][A[A

 11%|█▏        | 8/70 [00:18<02:25,  2.35s/it][A[A

 13%|█▎        | 9/70 [00:21<02:22,  2.34s/it][A[A

 14%|█▍        | 10/70 [00:23<02:21,  2.35s/it][A[A

 16%|█▌        | 11/70 [00:25<02:16,  2.31s/it][A[A

Batch loss: 1.4252803325653076




 17%|█▋        | 12/70 [00:28<02:15,  2.33s/it][A[A

 19%|█▊        | 13/70 [00:30<02:12,  2.32s/it][A[A

 20%|██        | 14/70 [00:32<02:11,  2.34s/it][A[A

 21%|██▏       | 15/70 [00:35<02:08,  2.33s/it][A[A

 23%|██▎       | 16/70 [00:37<02:08,  2.38s/it][A[A

 24%|██▍       | 17/70 [00:40<02:05,  2.37s/it][A[A

 26%|██▌       | 18/70 [00:42<02:05,  2.41s/it][A[A

 27%|██▋       | 19/70 [00:44<02:01,  2.39s/it][A[A

 29%|██▊       | 20/70 [00:47<02:00,  2.42s/it][A[A

 30%|███       | 21/70 [00:49<01:57,  2.39s/it][A[A

Batch loss: 1.5622998476028442




 31%|███▏      | 22/70 [00:52<01:56,  2.42s/it][A[A

 33%|███▎      | 23/70 [00:54<01:52,  2.39s/it][A[A

 34%|███▍      | 24/70 [00:56<01:51,  2.43s/it][A[A

 36%|███▌      | 25/70 [00:59<01:47,  2.40s/it][A[A

 37%|███▋      | 26/70 [01:01<01:45,  2.39s/it][A[A

 39%|███▊      | 27/70 [01:03<01:40,  2.35s/it][A[A

 40%|████      | 28/70 [01:06<01:38,  2.36s/it][A[A

 41%|████▏     | 29/70 [01:08<01:35,  2.34s/it][A[A

 43%|████▎     | 30/70 [01:10<01:34,  2.35s/it][A[A

 44%|████▍     | 31/70 [01:13<01:30,  2.32s/it][A[A

Batch loss: 1.3804495334625244




 46%|████▌     | 32/70 [01:15<01:29,  2.34s/it][A[A

 47%|████▋     | 33/70 [01:17<01:25,  2.32s/it][A[A

 49%|████▊     | 34/70 [01:20<01:23,  2.33s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.34s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:20,  2.36s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:17,  2.34s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:15,  2.37s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:12,  2.32s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:10,  2.35s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:07,  2.32s/it][A[A

Batch loss: 1.7095047235488892




 60%|██████    | 42/70 [01:39<01:05,  2.34s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:03,  2.35s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:03,  2.46s/it][A[A

 64%|██████▍   | 45/70 [01:46<01:01,  2.44s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:59,  2.47s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:56,  2.44s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:54,  2.49s/it][A[A

 70%|███████   | 49/70 [01:56<00:51,  2.47s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:49,  2.49s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:46,  2.46s/it][A[A

Batch loss: 1.3560341596603394




 74%|███████▍  | 52/70 [02:03<00:45,  2.50s/it][A[A

 76%|███████▌  | 53/70 [02:06<00:41,  2.44s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:39,  2.49s/it][A[A

 79%|███████▊  | 55/70 [02:11<00:36,  2.43s/it][A[A

 80%|████████  | 56/70 [02:13<00:34,  2.46s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:31,  2.41s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:29,  2.42s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:26,  2.38s/it][A[A

 86%|████████▌ | 60/70 [02:23<00:23,  2.40s/it][A[A

 87%|████████▋ | 61/70 [02:25<00:21,  2.38s/it][A[A

Batch loss: 1.3763618469238281




 89%|████████▊ | 62/70 [02:27<00:19,  2.42s/it][A[A

 90%|█████████ | 63/70 [02:30<00:16,  2.40s/it][A[A

 91%|█████████▏| 64/70 [02:32<00:14,  2.38s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.34s/it][A[A

 94%|█████████▍| 66/70 [02:37<00:09,  2.35s/it][A[A

 96%|█████████▌| 67/70 [02:39<00:06,  2.31s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.33s/it][A[A

 99%|█████████▊| 69/70 [02:44<00:02,  2.30s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.36s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.413502984387534




 12%|█▎        | 1/8 [00:02<00:16,  2.40s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.41s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.35s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.37s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.34s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.35s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.31s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.25s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4595338702201843
Epoch:  46




  1%|▏         | 1/70 [00:02<02:52,  2.50s/it][A[A

Batch loss: 1.3922066688537598




  3%|▎         | 2/70 [00:05<02:50,  2.51s/it][A[A

  4%|▍         | 3/70 [00:07<02:44,  2.45s/it][A[A

  6%|▌         | 4/70 [00:09<02:41,  2.45s/it][A[A

  7%|▋         | 5/70 [00:12<02:34,  2.38s/it][A[A

  9%|▊         | 6/70 [00:14<02:32,  2.38s/it][A[A

 10%|█         | 7/70 [00:16<02:27,  2.34s/it][A[A

 11%|█▏        | 8/70 [00:18<02:25,  2.35s/it][A[A

 13%|█▎        | 9/70 [00:21<02:21,  2.32s/it][A[A

 14%|█▍        | 10/70 [00:23<02:20,  2.34s/it][A[A

 16%|█▌        | 11/70 [00:25<02:16,  2.32s/it][A[A

Batch loss: 1.5367231369018555




 17%|█▋        | 12/70 [00:28<02:15,  2.34s/it][A[A

 19%|█▊        | 13/70 [00:30<02:11,  2.31s/it][A[A

 20%|██        | 14/70 [00:32<02:10,  2.33s/it][A[A

 21%|██▏       | 15/70 [00:35<02:06,  2.30s/it][A[A

 23%|██▎       | 16/70 [00:37<02:05,  2.32s/it][A[A

 24%|██▍       | 17/70 [00:39<02:01,  2.29s/it][A[A

 26%|██▌       | 18/70 [00:42<02:00,  2.32s/it][A[A

 27%|██▋       | 19/70 [00:44<01:57,  2.30s/it][A[A

 29%|██▊       | 20/70 [00:46<01:55,  2.32s/it][A[A

 30%|███       | 21/70 [00:48<01:52,  2.29s/it][A[A

Batch loss: 1.4095178842544556




 31%|███▏      | 22/70 [00:51<01:53,  2.36s/it][A[A

 33%|███▎      | 23/70 [00:53<01:50,  2.35s/it][A[A

 34%|███▍      | 24/70 [00:56<01:50,  2.40s/it][A[A

 36%|███▌      | 25/70 [00:58<01:47,  2.39s/it][A[A

 37%|███▋      | 26/70 [01:01<01:45,  2.41s/it][A[A

 39%|███▊      | 27/70 [01:03<01:43,  2.40s/it][A[A

 40%|████      | 28/70 [01:05<01:41,  2.43s/it][A[A

 41%|████▏     | 29/70 [01:08<01:38,  2.41s/it][A[A

 43%|████▎     | 30/70 [01:10<01:37,  2.43s/it][A[A

 44%|████▍     | 31/70 [01:13<01:33,  2.41s/it][A[A

Batch loss: 1.377847671508789




 46%|████▌     | 32/70 [01:15<01:32,  2.43s/it][A[A

 47%|████▋     | 33/70 [01:17<01:27,  2.37s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.36s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.33s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:22,  2.43s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:21,  2.48s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:19,  2.49s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:15,  2.45s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:14,  2.47s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:10,  2.43s/it][A[A

Batch loss: 1.3870530128479004




 60%|██████    | 42/70 [01:39<01:08,  2.45s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:05,  2.42s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:03,  2.44s/it][A[A

 64%|██████▍   | 45/70 [01:47<01:00,  2.40s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:57,  2.41s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:54,  2.35s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:51,  2.36s/it][A[A

 70%|███████   | 49/70 [01:56<00:49,  2.34s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:43,  2.31s/it][A[A

Batch loss: 1.4303196668624878




 74%|███████▍  | 52/70 [02:03<00:41,  2.33s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.31s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:37,  2.34s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:34,  2.32s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.35s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.31s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:27,  2.33s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.30s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.32s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:20,  2.28s/it][A[A

Batch loss: 1.5760250091552734




 89%|████████▊ | 62/70 [02:26<00:18,  2.33s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.31s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.36s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.36s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.40s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.38s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.41s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.38s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4274885654449463




 12%|█▎        | 1/8 [00:02<00:17,  2.43s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.42s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.37s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.37s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.31s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.30s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.26s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.21s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.30454820394516
Epoch:  47




  1%|▏         | 1/70 [00:02<02:43,  2.36s/it][A[A

Batch loss: 1.3490841388702393




  3%|▎         | 2/70 [00:04<02:42,  2.38s/it][A[A

  4%|▍         | 3/70 [00:07<02:36,  2.34s/it][A[A

  6%|▌         | 4/70 [00:09<02:35,  2.35s/it][A[A

  7%|▋         | 5/70 [00:11<02:30,  2.32s/it][A[A

  9%|▊         | 6/70 [00:14<02:29,  2.33s/it][A[A

 10%|█         | 7/70 [00:16<02:25,  2.31s/it][A[A

 11%|█▏        | 8/70 [00:18<02:25,  2.34s/it][A[A

 13%|█▎        | 9/70 [00:20<02:20,  2.31s/it][A[A

 14%|█▍        | 10/70 [00:23<02:19,  2.33s/it][A[A

 16%|█▌        | 11/70 [00:25<02:16,  2.32s/it][A[A

Batch loss: 1.4211455583572388




 17%|█▋        | 12/70 [00:27<02:15,  2.34s/it][A[A

 19%|█▊        | 13/70 [00:30<02:11,  2.31s/it][A[A

 20%|██        | 14/70 [00:32<02:10,  2.33s/it][A[A

 21%|██▏       | 15/70 [00:34<02:08,  2.34s/it][A[A

 23%|██▎       | 16/70 [00:37<02:08,  2.38s/it][A[A

 24%|██▍       | 17/70 [00:39<02:06,  2.38s/it][A[A

 26%|██▌       | 18/70 [00:42<02:04,  2.40s/it][A[A

 27%|██▋       | 19/70 [00:44<02:01,  2.38s/it][A[A

 29%|██▊       | 20/70 [00:47<02:01,  2.42s/it][A[A

 30%|███       | 21/70 [00:49<01:57,  2.39s/it][A[A

Batch loss: 1.3518835306167603




 31%|███▏      | 22/70 [00:51<01:56,  2.43s/it][A[A

 33%|███▎      | 23/70 [00:54<01:52,  2.40s/it][A[A

 34%|███▍      | 24/70 [00:56<01:51,  2.43s/it][A[A

 36%|███▌      | 25/70 [00:59<01:47,  2.39s/it][A[A

 37%|███▋      | 26/70 [01:01<01:44,  2.39s/it][A[A

 39%|███▊      | 27/70 [01:03<01:40,  2.34s/it][A[A

 40%|████      | 28/70 [01:06<01:39,  2.37s/it][A[A

 41%|████▏     | 29/70 [01:08<01:35,  2.34s/it][A[A

 43%|████▎     | 30/70 [01:10<01:35,  2.38s/it][A[A

 44%|████▍     | 31/70 [01:13<01:30,  2.33s/it][A[A

Batch loss: 1.3222236633300781




 46%|████▌     | 32/70 [01:15<01:29,  2.35s/it][A[A

 47%|████▋     | 33/70 [01:17<01:26,  2.33s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.35s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.32s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:19,  2.34s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:16,  2.32s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:14,  2.33s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:11,  2.31s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:09,  2.32s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:06,  2.29s/it][A[A

Batch loss: 1.3615971803665161




 60%|██████    | 42/70 [01:38<01:04,  2.32s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:02,  2.33s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:01,  2.38s/it][A[A

 64%|██████▍   | 45/70 [01:45<00:58,  2.36s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:57,  2.40s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:55,  2.39s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:53,  2.43s/it][A[A

 70%|███████   | 49/70 [01:55<00:50,  2.41s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:48,  2.44s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:45,  2.40s/it][A[A

Batch loss: 1.4885544776916504




 74%|███████▍  | 52/70 [02:02<00:43,  2.43s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.40s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:38,  2.40s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:35,  2.36s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.39s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.35s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.37s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.35s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.37s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.34s/it][A[A

Batch loss: 1.3054033517837524




 89%|████████▊ | 62/70 [02:26<00:19,  2.38s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.34s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.35s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.33s/it][A[A

 94%|█████████▍| 66/70 [02:35<00:09,  2.35s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.33s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.35s/it][A[A

 99%|█████████▊| 69/70 [02:42<00:02,  2.32s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.33s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4454717363630023




 12%|█▎        | 1/8 [00:02<00:16,  2.38s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.40s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.36s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.38s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.34s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.36s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.32s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.25s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.369341254234314
Epoch:  48




  1%|▏         | 1/70 [00:02<02:51,  2.49s/it][A[A

Batch loss: 1.4226901531219482




  3%|▎         | 2/70 [00:04<02:48,  2.48s/it][A[A

  4%|▍         | 3/70 [00:07<02:43,  2.44s/it][A[A

  6%|▌         | 4/70 [00:09<02:41,  2.45s/it][A[A

  7%|▋         | 5/70 [00:11<02:34,  2.37s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.37s/it][A[A

 10%|█         | 7/70 [00:16<02:27,  2.34s/it][A[A

 11%|█▏        | 8/70 [00:19<02:27,  2.38s/it][A[A

 13%|█▎        | 9/70 [00:21<02:24,  2.36s/it][A[A

 14%|█▍        | 10/70 [00:23<02:23,  2.39s/it][A[A

 16%|█▌        | 11/70 [00:26<02:20,  2.38s/it][A[A

Batch loss: 1.4046024084091187




 17%|█▋        | 12/70 [00:28<02:20,  2.43s/it][A[A

 19%|█▊        | 13/70 [00:31<02:17,  2.41s/it][A[A

 20%|██        | 14/70 [00:33<02:15,  2.43s/it][A[A

 21%|██▏       | 15/70 [00:35<02:11,  2.39s/it][A[A

 23%|██▎       | 16/70 [00:38<02:11,  2.43s/it][A[A

 24%|██▍       | 17/70 [00:40<02:06,  2.40s/it][A[A

 26%|██▌       | 18/70 [00:43<02:05,  2.41s/it][A[A

 27%|██▋       | 19/70 [00:45<02:01,  2.38s/it][A[A

 29%|██▊       | 20/70 [00:47<01:58,  2.37s/it][A[A

 30%|███       | 21/70 [00:50<01:55,  2.35s/it][A[A

Batch loss: 1.3184894323349




 31%|███▏      | 22/70 [00:52<01:53,  2.37s/it][A[A

 33%|███▎      | 23/70 [00:54<01:50,  2.35s/it][A[A

 34%|███▍      | 24/70 [00:57<01:48,  2.36s/it][A[A

 36%|███▌      | 25/70 [00:59<01:44,  2.32s/it][A[A

 37%|███▋      | 26/70 [01:01<01:42,  2.33s/it][A[A

 39%|███▊      | 27/70 [01:04<01:38,  2.30s/it][A[A

 40%|████      | 28/70 [01:06<01:37,  2.31s/it][A[A

 41%|████▏     | 29/70 [01:08<01:33,  2.29s/it][A[A

 43%|████▎     | 30/70 [01:11<01:33,  2.33s/it][A[A

 44%|████▍     | 31/70 [01:13<01:29,  2.31s/it][A[A

Batch loss: 1.3530967235565186




 46%|████▌     | 32/70 [01:15<01:28,  2.34s/it][A[A

 47%|████▋     | 33/70 [01:17<01:25,  2.32s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.34s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.32s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:20,  2.37s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:17,  2.35s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:16,  2.39s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:13,  2.38s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:12,  2.41s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:09,  2.39s/it][A[A

Batch loss: 1.3364273309707642




 60%|██████    | 42/70 [01:39<01:07,  2.43s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:04,  2.39s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:02,  2.41s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.38s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:57,  2.41s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:54,  2.35s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:51,  2.36s/it][A[A

 70%|███████   | 49/70 [01:56<00:49,  2.34s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:44,  2.32s/it][A[A

Batch loss: 1.3460999727249146




 74%|███████▍  | 52/70 [02:03<00:42,  2.34s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.31s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.33s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:34,  2.32s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.33s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:29,  2.30s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:27,  2.32s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.30s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.32s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:20,  2.32s/it][A[A

Batch loss: 1.620519995689392




 89%|████████▊ | 62/70 [02:26<00:18,  2.34s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.31s/it][A[A

 91%|█████████▏| 64/70 [02:30<00:14,  2.36s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.36s/it][A[A

 94%|█████████▍| 66/70 [02:35<00:09,  2.39s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.37s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.41s/it][A[A

 99%|█████████▊| 69/70 [02:42<00:02,  2.39s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4432203360966274




 12%|█▎        | 1/8 [00:02<00:16,  2.39s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.40s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.35s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.38s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.31s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.30s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.25s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.20s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.2939982414245605
Epoch:  49




  1%|▏         | 1/70 [00:02<02:45,  2.40s/it][A[A

Batch loss: 1.5890005826950073




  3%|▎         | 2/70 [00:04<02:43,  2.40s/it][A[A

  4%|▍         | 3/70 [00:07<02:37,  2.35s/it][A[A

  6%|▌         | 4/70 [00:09<02:35,  2.36s/it][A[A

  7%|▋         | 5/70 [00:11<02:30,  2.32s/it][A[A

  9%|▊         | 6/70 [00:13<02:29,  2.33s/it][A[A

 10%|█         | 7/70 [00:16<02:25,  2.31s/it][A[A

 11%|█▏        | 8/70 [00:18<02:24,  2.33s/it][A[A

 13%|█▎        | 9/70 [00:20<02:20,  2.30s/it][A[A

 14%|█▍        | 10/70 [00:23<02:20,  2.34s/it][A[A

 16%|█▌        | 11/70 [00:25<02:16,  2.31s/it][A[A

Batch loss: 1.5750190019607544




 17%|█▋        | 12/70 [00:27<02:14,  2.33s/it][A[A

 19%|█▊        | 13/70 [00:30<02:11,  2.30s/it][A[A

 20%|██        | 14/70 [00:32<02:10,  2.32s/it][A[A

 21%|██▏       | 15/70 [00:34<02:08,  2.33s/it][A[A

 23%|██▎       | 16/70 [00:37<02:08,  2.38s/it][A[A

 24%|██▍       | 17/70 [00:39<02:05,  2.37s/it][A[A

 26%|██▌       | 18/70 [00:42<02:04,  2.40s/it][A[A

 27%|██▋       | 19/70 [00:44<02:01,  2.38s/it][A[A

 29%|██▊       | 20/70 [00:47<02:00,  2.42s/it][A[A

 30%|███       | 21/70 [00:49<01:57,  2.39s/it][A[A

Batch loss: 1.4495344161987305




 31%|███▏      | 22/70 [00:51<01:56,  2.42s/it][A[A

 33%|███▎      | 23/70 [00:54<01:52,  2.38s/it][A[A

 34%|███▍      | 24/70 [00:56<01:50,  2.41s/it][A[A

 36%|███▌      | 25/70 [00:58<01:47,  2.38s/it][A[A

 37%|███▋      | 26/70 [01:01<01:44,  2.38s/it][A[A

 39%|███▊      | 27/70 [01:03<01:40,  2.33s/it][A[A

 40%|████      | 28/70 [01:05<01:38,  2.35s/it][A[A

 41%|████▏     | 29/70 [01:08<01:35,  2.32s/it][A[A

 43%|████▎     | 30/70 [01:10<01:33,  2.33s/it][A[A

 44%|████▍     | 31/70 [01:12<01:30,  2.31s/it][A[A

Batch loss: 1.4480220079421997




 46%|████▌     | 32/70 [01:15<01:28,  2.32s/it][A[A

 47%|████▋     | 33/70 [01:17<01:24,  2.30s/it][A[A

 49%|████▊     | 34/70 [01:19<01:23,  2.32s/it][A[A

 50%|█████     | 35/70 [01:22<01:20,  2.31s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:19,  2.33s/it][A[A

 53%|█████▎    | 37/70 [01:26<01:15,  2.29s/it][A[A

 54%|█████▍    | 38/70 [01:28<01:13,  2.31s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:11,  2.29s/it][A[A

 57%|█████▋    | 40/70 [01:33<01:09,  2.32s/it][A[A

 59%|█████▊    | 41/70 [01:35<01:06,  2.29s/it][A[A

Batch loss: 1.4525809288024902




 60%|██████    | 42/70 [01:38<01:04,  2.32s/it][A[A

 61%|██████▏   | 43/70 [01:40<01:02,  2.31s/it][A[A

 63%|██████▎   | 44/70 [01:42<01:01,  2.37s/it][A[A

 64%|██████▍   | 45/70 [01:45<00:58,  2.35s/it][A[A

 66%|██████▌   | 46/70 [01:47<00:57,  2.40s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:54,  2.38s/it][A[A

 69%|██████▊   | 48/70 [01:52<00:53,  2.41s/it][A[A

 70%|███████   | 49/70 [01:54<00:50,  2.39s/it][A[A

 71%|███████▏  | 50/70 [01:57<00:48,  2.41s/it][A[A

 73%|███████▎  | 51/70 [01:59<00:45,  2.38s/it][A[A

Batch loss: 1.4247465133666992




 74%|███████▍  | 52/70 [02:02<00:43,  2.42s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:40,  2.41s/it][A[A

 77%|███████▋  | 54/70 [02:06<00:38,  2.40s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:35,  2.35s/it][A[A

 80%|████████  | 56/70 [02:11<00:32,  2.35s/it][A[A

 81%|████████▏ | 57/70 [02:13<00:30,  2.34s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:28,  2.34s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:25,  2.33s/it][A[A

 86%|████████▌ | 60/70 [02:20<00:23,  2.35s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:21,  2.34s/it][A[A

Batch loss: 1.3162364959716797




 89%|████████▊ | 62/70 [02:25<00:18,  2.34s/it][A[A

 90%|█████████ | 63/70 [02:27<00:16,  2.31s/it][A[A

 91%|█████████▏| 64/70 [02:30<00:14,  2.34s/it][A[A

 93%|█████████▎| 65/70 [02:32<00:11,  2.31s/it][A[A

 94%|█████████▍| 66/70 [02:34<00:09,  2.34s/it][A[A

 96%|█████████▌| 67/70 [02:37<00:06,  2.31s/it][A[A

 97%|█████████▋| 68/70 [02:39<00:04,  2.33s/it][A[A

 99%|█████████▊| 69/70 [02:41<00:02,  2.29s/it][A[A

100%|██████████| 70/70 [02:42<00:00,  2.32s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4318386776106697




 12%|█▎        | 1/8 [00:02<00:16,  2.32s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.35s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.32s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.34s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.31s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.32s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.30s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.23s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.2986078560352325
Epoch:  50




  1%|▏         | 1/70 [00:02<02:51,  2.48s/it][A[A

Batch loss: 1.450539231300354




  3%|▎         | 2/70 [00:05<02:50,  2.50s/it][A[A

  4%|▍         | 3/70 [00:07<02:44,  2.45s/it][A[A

  6%|▌         | 4/70 [00:09<02:42,  2.46s/it][A[A

  7%|▋         | 5/70 [00:12<02:35,  2.39s/it][A[A

  9%|▊         | 6/70 [00:14<02:33,  2.40s/it][A[A

 10%|█         | 7/70 [00:16<02:27,  2.35s/it][A[A

 11%|█▏        | 8/70 [00:19<02:27,  2.38s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.39s/it][A[A

 14%|█▍        | 10/70 [00:24<02:25,  2.43s/it][A[A

 16%|█▌        | 11/70 [00:26<02:22,  2.41s/it][A[A

Batch loss: 1.385022759437561




 17%|█▋        | 12/70 [00:29<02:22,  2.45s/it][A[A

 19%|█▊        | 13/70 [00:31<02:18,  2.42s/it][A[A

 20%|██        | 14/70 [00:33<02:16,  2.44s/it][A[A

 21%|██▏       | 15/70 [00:36<02:12,  2.41s/it][A[A

 23%|██▎       | 16/70 [00:38<02:14,  2.50s/it][A[A

 24%|██▍       | 17/70 [00:41<02:11,  2.48s/it][A[A

 26%|██▌       | 18/70 [00:43<02:09,  2.50s/it][A[A

 27%|██▋       | 19/70 [00:46<02:05,  2.46s/it][A[A

 29%|██▊       | 20/70 [00:48<02:01,  2.43s/it][A[A

 30%|███       | 21/70 [00:50<01:56,  2.37s/it][A[A

Batch loss: 1.342773675918579




 31%|███▏      | 22/70 [00:53<01:54,  2.39s/it][A[A

 33%|███▎      | 23/70 [00:55<01:50,  2.34s/it][A[A

 34%|███▍      | 24/70 [00:57<01:47,  2.35s/it][A[A

 36%|███▌      | 25/70 [01:00<01:44,  2.33s/it][A[A

 37%|███▋      | 26/70 [01:02<01:42,  2.34s/it][A[A

 39%|███▊      | 27/70 [01:04<01:39,  2.31s/it][A[A

 40%|████      | 28/70 [01:07<01:38,  2.35s/it][A[A

 41%|████▏     | 29/70 [01:09<01:34,  2.31s/it][A[A

 43%|████▎     | 30/70 [01:11<01:32,  2.32s/it][A[A

 44%|████▍     | 31/70 [01:13<01:29,  2.30s/it][A[A

Batch loss: 1.3958172798156738




 46%|████▌     | 32/70 [01:16<01:28,  2.33s/it][A[A

 47%|████▋     | 33/70 [01:18<01:25,  2.30s/it][A[A

 49%|████▊     | 34/70 [01:20<01:23,  2.32s/it][A[A

 50%|█████     | 35/70 [01:23<01:20,  2.31s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:20,  2.38s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:18,  2.37s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:16,  2.40s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:13,  2.37s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:12,  2.41s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:08,  2.38s/it][A[A

Batch loss: 1.4620110988616943




 60%|██████    | 42/70 [01:40<01:07,  2.42s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:04,  2.39s/it][A[A

 63%|██████▎   | 44/70 [01:45<01:02,  2.41s/it][A[A

 64%|██████▍   | 45/70 [01:47<00:59,  2.39s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:58,  2.43s/it][A[A

 67%|██████▋   | 47/70 [01:52<00:55,  2.39s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:52,  2.38s/it][A[A

 70%|███████   | 49/70 [01:56<00:49,  2.35s/it][A[A

 71%|███████▏  | 50/70 [01:59<00:47,  2.37s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:44,  2.33s/it][A[A

Batch loss: 1.4958082437515259




 74%|███████▍  | 52/70 [02:03<00:42,  2.34s/it][A[A

 76%|███████▌  | 53/70 [02:06<00:39,  2.31s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:37,  2.32s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:34,  2.29s/it][A[A

 80%|████████  | 56/70 [02:13<00:32,  2.31s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:29,  2.28s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:27,  2.31s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.28s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.32s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:20,  2.30s/it][A[A

Batch loss: 1.3301186561584473




 89%|████████▊ | 62/70 [02:26<00:18,  2.33s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.30s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.36s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.34s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.39s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.37s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.40s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.38s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4467396838324411




 12%|█▎        | 1/8 [00:02<00:16,  2.42s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.42s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.37s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.40s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.34s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.33s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.28s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.23s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.373714327812195
Epoch:  51




  1%|▏         | 1/70 [00:02<02:47,  2.42s/it][A[A

Batch loss: 1.4754974842071533




  3%|▎         | 2/70 [00:04<02:45,  2.44s/it][A[A

  4%|▍         | 3/70 [00:07<02:39,  2.38s/it][A[A

  6%|▌         | 4/70 [00:09<02:38,  2.40s/it][A[A

  7%|▋         | 5/70 [00:11<02:33,  2.36s/it][A[A

  9%|▊         | 6/70 [00:14<02:33,  2.40s/it][A[A

 10%|█         | 7/70 [00:16<02:29,  2.37s/it][A[A

 11%|█▏        | 8/70 [00:19<02:28,  2.40s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.38s/it][A[A

 14%|█▍        | 10/70 [00:23<02:23,  2.40s/it][A[A

 16%|█▌        | 11/70 [00:26<02:20,  2.38s/it][A[A

Batch loss: 1.428554654121399




 17%|█▋        | 12/70 [00:28<02:19,  2.41s/it][A[A

 19%|█▊        | 13/70 [00:30<02:14,  2.36s/it][A[A

 20%|██        | 14/70 [00:33<02:14,  2.40s/it][A[A

 21%|██▏       | 15/70 [00:35<02:12,  2.40s/it][A[A

 23%|██▎       | 16/70 [00:38<02:12,  2.46s/it][A[A

 24%|██▍       | 17/70 [00:40<02:10,  2.46s/it][A[A

 26%|██▌       | 18/70 [00:43<02:10,  2.50s/it][A[A

 27%|██▋       | 19/70 [00:45<02:06,  2.49s/it][A[A

 29%|██▊       | 20/70 [00:48<02:05,  2.52s/it][A[A

 30%|███       | 21/70 [00:50<02:02,  2.49s/it][A[A

Batch loss: 1.4029535055160522




 31%|███▏      | 22/70 [00:53<02:00,  2.52s/it][A[A

 33%|███▎      | 23/70 [00:56<01:57,  2.49s/it][A[A

 34%|███▍      | 24/70 [00:58<01:56,  2.53s/it][A[A

 36%|███▌      | 25/70 [01:01<01:51,  2.49s/it][A[A

 37%|███▋      | 26/70 [01:03<01:49,  2.48s/it][A[A

 39%|███▊      | 27/70 [01:05<01:43,  2.42s/it][A[A

 40%|████      | 28/70 [01:08<01:42,  2.43s/it][A[A

 41%|████▏     | 29/70 [01:10<01:37,  2.38s/it][A[A

 43%|████▎     | 30/70 [01:12<01:35,  2.38s/it][A[A

 44%|████▍     | 31/70 [01:15<01:31,  2.34s/it][A[A

Batch loss: 1.4408361911773682




 46%|████▌     | 32/70 [01:17<01:29,  2.36s/it][A[A

 47%|████▋     | 33/70 [01:19<01:26,  2.35s/it][A[A

 49%|████▊     | 34/70 [01:22<01:25,  2.36s/it][A[A

 50%|█████     | 35/70 [01:24<01:21,  2.32s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:19,  2.34s/it][A[A

 53%|█████▎    | 37/70 [01:29<01:16,  2.30s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:14,  2.32s/it][A[A

 56%|█████▌    | 39/70 [01:33<01:11,  2.30s/it][A[A

 57%|█████▋    | 40/70 [01:36<01:09,  2.32s/it][A[A

 59%|█████▊    | 41/70 [01:38<01:06,  2.30s/it][A[A

Batch loss: 1.3979350328445435




 60%|██████    | 42/70 [01:40<01:05,  2.34s/it][A[A

 61%|██████▏   | 43/70 [01:43<01:03,  2.34s/it][A[A

 63%|██████▎   | 44/70 [01:45<01:02,  2.39s/it][A[A

 64%|██████▍   | 45/70 [01:47<00:59,  2.38s/it][A[A

 66%|██████▌   | 46/70 [01:50<00:57,  2.41s/it][A[A

 67%|██████▋   | 47/70 [01:52<00:55,  2.40s/it][A[A

 69%|██████▊   | 48/70 [01:55<00:53,  2.42s/it][A[A

 70%|███████   | 49/70 [01:57<00:50,  2.40s/it][A[A

 71%|███████▏  | 50/70 [02:00<00:48,  2.43s/it][A[A

 73%|███████▎  | 51/70 [02:02<00:45,  2.41s/it][A[A

Batch loss: 1.3388012647628784




 74%|███████▍  | 52/70 [02:04<00:44,  2.45s/it][A[A

 76%|███████▌  | 53/70 [02:07<00:40,  2.40s/it][A[A

 77%|███████▋  | 54/70 [02:09<00:38,  2.40s/it][A[A

 79%|███████▊  | 55/70 [02:11<00:35,  2.36s/it][A[A

 80%|████████  | 56/70 [02:14<00:33,  2.40s/it][A[A

 81%|████████▏ | 57/70 [02:16<00:31,  2.39s/it][A[A

 83%|████████▎ | 58/70 [02:19<00:29,  2.42s/it][A[A

 84%|████████▍ | 59/70 [02:21<00:26,  2.39s/it][A[A

 86%|████████▌ | 60/70 [02:24<00:24,  2.42s/it][A[A

 87%|████████▋ | 61/70 [02:26<00:21,  2.40s/it][A[A

Batch loss: 1.426317811012268




 89%|████████▊ | 62/70 [02:29<00:19,  2.45s/it][A[A

 90%|█████████ | 63/70 [02:31<00:17,  2.43s/it][A[A

 91%|█████████▏| 64/70 [02:33<00:14,  2.47s/it][A[A

 93%|█████████▎| 65/70 [02:36<00:12,  2.46s/it][A[A

 94%|█████████▍| 66/70 [02:38<00:09,  2.47s/it][A[A

 96%|█████████▌| 67/70 [02:41<00:07,  2.42s/it][A[A

 97%|█████████▋| 68/70 [02:43<00:04,  2.40s/it][A[A

 99%|█████████▊| 69/70 [02:45<00:02,  2.36s/it][A[A

100%|██████████| 70/70 [02:46<00:00,  2.38s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4484639695712498




 12%|█▎        | 1/8 [00:02<00:16,  2.33s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.32s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.27s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.27s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.27s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.30s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.26s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.18s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.495764821767807
Epoch:  52




  1%|▏         | 1/70 [00:02<02:47,  2.42s/it][A[A

Batch loss: 1.3946079015731812




  3%|▎         | 2/70 [00:04<02:44,  2.42s/it][A[A

  4%|▍         | 3/70 [00:07<02:38,  2.37s/it][A[A

  6%|▌         | 4/70 [00:09<02:36,  2.37s/it][A[A

  7%|▋         | 5/70 [00:11<02:31,  2.33s/it][A[A

  9%|▊         | 6/70 [00:14<02:30,  2.34s/it][A[A

 10%|█         | 7/70 [00:16<02:27,  2.35s/it][A[A

 11%|█▏        | 8/70 [00:18<02:27,  2.38s/it][A[A

 13%|█▎        | 9/70 [00:21<02:24,  2.37s/it][A[A

 14%|█▍        | 10/70 [00:23<02:24,  2.40s/it][A[A

 16%|█▌        | 11/70 [00:26<02:21,  2.39s/it][A[A

Batch loss: 1.3538836240768433




 17%|█▋        | 12/70 [00:28<02:20,  2.42s/it][A[A

 19%|█▊        | 13/70 [00:30<02:16,  2.40s/it][A[A

 20%|██        | 14/70 [00:33<02:15,  2.42s/it][A[A

 21%|██▏       | 15/70 [00:35<02:12,  2.40s/it][A[A

 23%|██▎       | 16/70 [00:38<02:10,  2.42s/it][A[A

 24%|██▍       | 17/70 [00:40<02:07,  2.41s/it][A[A

 26%|██▌       | 18/70 [00:42<02:04,  2.39s/it][A[A

 27%|██▋       | 19/70 [00:45<01:59,  2.35s/it][A[A

 29%|██▊       | 20/70 [00:47<01:57,  2.35s/it][A[A

 30%|███       | 21/70 [00:49<01:54,  2.33s/it][A[A

Batch loss: 1.3256733417510986




 31%|███▏      | 22/70 [00:52<01:52,  2.34s/it][A[A

 33%|███▎      | 23/70 [00:54<01:48,  2.31s/it][A[A

 34%|███▍      | 24/70 [00:56<01:47,  2.34s/it][A[A

 36%|███▌      | 25/70 [00:59<01:44,  2.31s/it][A[A

 37%|███▋      | 26/70 [01:01<01:43,  2.34s/it][A[A

 39%|███▊      | 27/70 [01:03<01:39,  2.31s/it][A[A

 40%|████      | 28/70 [01:06<01:37,  2.33s/it][A[A

 41%|████▏     | 29/70 [01:08<01:34,  2.29s/it][A[A

 43%|████▎     | 30/70 [01:10<01:32,  2.31s/it][A[A

 44%|████▍     | 31/70 [01:12<01:29,  2.30s/it][A[A

Batch loss: 1.499497890472412




 46%|████▌     | 32/70 [01:15<01:28,  2.32s/it][A[A

 47%|████▋     | 33/70 [01:17<01:26,  2.33s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.34s/it][A[A

 50%|█████     | 35/70 [01:22<01:22,  2.34s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:20,  2.38s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:18,  2.38s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:17,  2.41s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:14,  2.40s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:12,  2.42s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:09,  2.40s/it][A[A

Batch loss: 1.4680155515670776




 60%|██████    | 42/70 [01:39<01:07,  2.43s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:04,  2.39s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:02,  2.41s/it][A[A

 64%|██████▍   | 45/70 [01:46<01:00,  2.40s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:57,  2.40s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:54,  2.35s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:51,  2.35s/it][A[A

 70%|███████   | 49/70 [01:55<00:49,  2.34s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:43,  2.31s/it][A[A

Batch loss: 1.4397727251052856




 74%|███████▍  | 52/70 [02:02<00:41,  2.33s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.30s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.33s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:34,  2.30s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.32s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:29,  2.30s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:27,  2.32s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:25,  2.29s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.32s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:20,  2.30s/it][A[A

Batch loss: 1.7556867599487305




 89%|████████▊ | 62/70 [02:25<00:18,  2.32s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.33s/it][A[A

 91%|█████████▏| 64/70 [02:30<00:14,  2.37s/it][A[A

 93%|█████████▎| 65/70 [02:32<00:11,  2.36s/it][A[A

 94%|█████████▍| 66/70 [02:35<00:09,  2.39s/it][A[A

 96%|█████████▌| 67/70 [02:37<00:07,  2.38s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.40s/it][A[A

 99%|█████████▊| 69/70 [02:42<00:02,  2.38s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.33s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4401756508009775




 12%|█▎        | 1/8 [00:02<00:16,  2.39s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.40s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.35s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.38s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.32s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.30s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.26s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.22s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.388392746448517
Epoch:  53




  1%|▏         | 1/70 [00:02<02:44,  2.38s/it][A[A

Batch loss: 1.421925663948059




  3%|▎         | 2/70 [00:04<02:42,  2.39s/it][A[A

  4%|▍         | 3/70 [00:07<02:37,  2.35s/it][A[A

  6%|▌         | 4/70 [00:09<02:35,  2.35s/it][A[A

  7%|▋         | 5/70 [00:11<02:30,  2.32s/it][A[A

  9%|▊         | 6/70 [00:14<02:29,  2.34s/it][A[A

 10%|█         | 7/70 [00:16<02:27,  2.35s/it][A[A

 11%|█▏        | 8/70 [00:18<02:25,  2.35s/it][A[A

 13%|█▎        | 9/70 [00:20<02:21,  2.32s/it][A[A

 14%|█▍        | 10/70 [00:23<02:20,  2.34s/it][A[A

 16%|█▌        | 11/70 [00:25<02:16,  2.31s/it][A[A

Batch loss: 1.4705196619033813




 17%|█▋        | 12/70 [00:28<02:16,  2.35s/it][A[A

 19%|█▊        | 13/70 [00:30<02:11,  2.31s/it][A[A

 20%|██        | 14/70 [00:32<02:11,  2.35s/it][A[A

 21%|██▏       | 15/70 [00:35<02:10,  2.38s/it][A[A

 23%|██▎       | 16/70 [00:37<02:12,  2.45s/it][A[A

 24%|██▍       | 17/70 [00:40<02:07,  2.41s/it][A[A

 26%|██▌       | 18/70 [00:42<02:07,  2.44s/it][A[A

 27%|██▋       | 19/70 [00:44<02:03,  2.42s/it][A[A

 29%|██▊       | 20/70 [00:47<02:02,  2.44s/it][A[A

 30%|███       | 21/70 [00:49<01:58,  2.41s/it][A[A

Batch loss: 1.345800518989563




 31%|███▏      | 22/70 [00:52<01:57,  2.44s/it][A[A

 33%|███▎      | 23/70 [00:54<01:53,  2.42s/it][A[A

 34%|███▍      | 24/70 [00:57<01:51,  2.43s/it][A[A

 36%|███▌      | 25/70 [00:59<01:48,  2.40s/it][A[A

 37%|███▋      | 26/70 [01:01<01:45,  2.39s/it][A[A

 39%|███▊      | 27/70 [01:04<01:40,  2.34s/it][A[A

 40%|████      | 28/70 [01:06<01:39,  2.37s/it][A[A

 41%|████▏     | 29/70 [01:08<01:36,  2.34s/it][A[A

 43%|████▎     | 30/70 [01:11<01:33,  2.34s/it][A[A

 44%|████▍     | 31/70 [01:13<01:29,  2.31s/it][A[A

Batch loss: 1.5304254293441772




 46%|████▌     | 32/70 [01:15<01:29,  2.35s/it][A[A

 47%|████▋     | 33/70 [01:18<01:25,  2.31s/it][A[A

 49%|████▊     | 34/70 [01:20<01:23,  2.33s/it][A[A

 50%|█████     | 35/70 [01:22<01:20,  2.29s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:19,  2.33s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:15,  2.29s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:14,  2.33s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:11,  2.30s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:10,  2.34s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:07,  2.32s/it][A[A

Batch loss: 1.5299147367477417




 60%|██████    | 42/70 [01:39<01:06,  2.39s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:04,  2.39s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:02,  2.41s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.39s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:58,  2.43s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:55,  2.41s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:53,  2.43s/it][A[A

 70%|███████   | 49/70 [01:56<00:50,  2.41s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:48,  2.42s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:45,  2.42s/it][A[A

Batch loss: 1.531944751739502




 74%|███████▍  | 52/70 [02:03<00:43,  2.43s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.37s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.37s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.34s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.39s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:31,  2.39s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.41s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.39s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:24,  2.42s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.39s/it][A[A

Batch loss: 1.440227746963501




 89%|████████▊ | 62/70 [02:27<00:19,  2.41s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.40s/it][A[A

 91%|█████████▏| 64/70 [02:32<00:14,  2.42s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.40s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.44s/it][A[A

 96%|█████████▌| 67/70 [02:39<00:07,  2.38s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.39s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.34s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.454707474367959




 12%|█▎        | 1/8 [00:02<00:17,  2.50s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.47s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.39s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.35s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.30s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.29s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.26s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.20s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.308549463748932
Epoch:  54




  1%|▏         | 1/70 [00:02<02:42,  2.36s/it][A[A

Batch loss: 1.4290486574172974




  3%|▎         | 2/70 [00:04<02:41,  2.38s/it][A[A

  4%|▍         | 3/70 [00:06<02:35,  2.33s/it][A[A

  6%|▌         | 4/70 [00:09<02:34,  2.35s/it][A[A

  7%|▋         | 5/70 [00:11<02:30,  2.31s/it][A[A

  9%|▊         | 6/70 [00:14<02:29,  2.34s/it][A[A

 10%|█         | 7/70 [00:16<02:26,  2.33s/it][A[A

 11%|█▏        | 8/70 [00:18<02:27,  2.38s/it][A[A

 13%|█▎        | 9/70 [00:21<02:24,  2.36s/it][A[A

 14%|█▍        | 10/70 [00:23<02:23,  2.40s/it][A[A

 16%|█▌        | 11/70 [00:25<02:20,  2.38s/it][A[A

Batch loss: 1.3986384868621826




 17%|█▋        | 12/70 [00:28<02:19,  2.41s/it][A[A

 19%|█▊        | 13/70 [00:30<02:15,  2.38s/it][A[A

 20%|██        | 14/70 [00:33<02:15,  2.41s/it][A[A

 21%|██▏       | 15/70 [00:35<02:11,  2.39s/it][A[A

 23%|██▎       | 16/70 [00:38<02:10,  2.41s/it][A[A

 24%|██▍       | 17/70 [00:40<02:07,  2.40s/it][A[A

 26%|██▌       | 18/70 [00:42<02:03,  2.38s/it][A[A

 27%|██▋       | 19/70 [00:44<01:59,  2.34s/it][A[A

 29%|██▊       | 20/70 [00:47<01:57,  2.35s/it][A[A

 30%|███       | 21/70 [00:49<01:55,  2.35s/it][A[A

Batch loss: 1.4098031520843506




 31%|███▏      | 22/70 [00:52<01:52,  2.35s/it][A[A

 33%|███▎      | 23/70 [00:54<01:49,  2.34s/it][A[A

 34%|███▍      | 24/70 [00:56<01:47,  2.34s/it][A[A

 36%|███▌      | 25/70 [00:58<01:43,  2.31s/it][A[A

 37%|███▋      | 26/70 [01:01<01:42,  2.32s/it][A[A

 39%|███▊      | 27/70 [01:03<01:38,  2.29s/it][A[A

 40%|████      | 28/70 [01:05<01:37,  2.32s/it][A[A

 41%|████▏     | 29/70 [01:08<01:33,  2.29s/it][A[A

 43%|████▎     | 30/70 [01:10<01:32,  2.32s/it][A[A

 44%|████▍     | 31/70 [01:12<01:29,  2.31s/it][A[A

Batch loss: 1.4504742622375488




 46%|████▌     | 32/70 [01:15<01:29,  2.36s/it][A[A

 47%|████▋     | 33/70 [01:17<01:25,  2.32s/it][A[A

 49%|████▊     | 34/70 [01:19<01:24,  2.34s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.33s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:21,  2.38s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:18,  2.37s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:17,  2.41s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:13,  2.38s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:12,  2.41s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:09,  2.39s/it][A[A

Batch loss: 1.32618248462677




 60%|██████    | 42/70 [01:39<01:07,  2.42s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:04,  2.40s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:02,  2.42s/it][A[A

 64%|██████▍   | 45/70 [01:46<01:00,  2.41s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:57,  2.40s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:54,  2.35s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:51,  2.36s/it][A[A

 70%|███████   | 49/70 [01:55<00:49,  2.34s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.35s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:43,  2.31s/it][A[A

Batch loss: 1.5275473594665527




 74%|███████▍  | 52/70 [02:02<00:42,  2.33s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:39,  2.30s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.32s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:34,  2.29s/it][A[A

 80%|████████  | 56/70 [02:11<00:32,  2.32s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.31s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:27,  2.33s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:25,  2.31s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.33s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:20,  2.30s/it][A[A

Batch loss: 1.4506993293762207




 89%|████████▊ | 62/70 [02:25<00:18,  2.33s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.32s/it][A[A

 91%|█████████▏| 64/70 [02:30<00:14,  2.37s/it][A[A

 93%|█████████▎| 65/70 [02:32<00:11,  2.36s/it][A[A

 94%|█████████▍| 66/70 [02:35<00:09,  2.40s/it][A[A

 96%|█████████▌| 67/70 [02:37<00:07,  2.39s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.43s/it][A[A

 99%|█████████▊| 69/70 [02:42<00:02,  2.41s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.33s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4386108006749834




 12%|█▎        | 1/8 [00:02<00:16,  2.39s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.40s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.35s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.37s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.33s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.32s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.27s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.22s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.2966388761997223
Epoch:  55




  1%|▏         | 1/70 [00:02<02:44,  2.39s/it][A[A

Batch loss: 1.4925167560577393




  3%|▎         | 2/70 [00:04<02:43,  2.40s/it][A[A

  4%|▍         | 3/70 [00:07<02:38,  2.36s/it][A[A

  6%|▌         | 4/70 [00:09<02:35,  2.36s/it][A[A

  7%|▋         | 5/70 [00:11<02:34,  2.38s/it][A[A

  9%|▊         | 6/70 [00:14<02:33,  2.39s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.35s/it][A[A

 11%|█▏        | 8/70 [00:18<02:26,  2.36s/it][A[A

 13%|█▎        | 9/70 [00:21<02:21,  2.33s/it][A[A

 14%|█▍        | 10/70 [00:23<02:20,  2.34s/it][A[A

 16%|█▌        | 11/70 [00:25<02:16,  2.31s/it][A[A

Batch loss: 1.419745922088623




 17%|█▋        | 12/70 [00:28<02:15,  2.33s/it][A[A

 19%|█▊        | 13/70 [00:30<02:13,  2.33s/it][A[A

 20%|██        | 14/70 [00:33<02:13,  2.39s/it][A[A

 21%|██▏       | 15/70 [00:35<02:10,  2.37s/it][A[A

 23%|██▎       | 16/70 [00:37<02:09,  2.41s/it][A[A

 24%|██▍       | 17/70 [00:40<02:07,  2.40s/it][A[A

 26%|██▌       | 18/70 [00:42<02:07,  2.44s/it][A[A

 27%|██▋       | 19/70 [00:45<02:03,  2.43s/it][A[A

 29%|██▊       | 20/70 [00:47<02:03,  2.47s/it][A[A

 30%|███       | 21/70 [00:50<01:59,  2.44s/it][A[A

Batch loss: 1.3427516222000122




 31%|███▏      | 22/70 [00:52<02:02,  2.55s/it][A[A

 33%|███▎      | 23/70 [00:55<01:56,  2.49s/it][A[A

 34%|███▍      | 24/70 [00:57<01:54,  2.48s/it][A[A

 36%|███▌      | 25/70 [01:00<01:49,  2.43s/it][A[A

 37%|███▋      | 26/70 [01:02<01:46,  2.42s/it][A[A

 39%|███▊      | 27/70 [01:04<01:42,  2.39s/it][A[A

 40%|████      | 28/70 [01:07<01:42,  2.43s/it][A[A

 41%|████▏     | 29/70 [01:09<01:38,  2.41s/it][A[A

 43%|████▎     | 30/70 [01:12<01:38,  2.45s/it][A[A

 44%|████▍     | 31/70 [01:14<01:35,  2.45s/it][A[A

Batch loss: 1.3471882343292236




 46%|████▌     | 32/70 [01:17<01:34,  2.48s/it][A[A

 47%|████▋     | 33/70 [01:19<01:30,  2.46s/it][A[A

 49%|████▊     | 34/70 [01:22<01:29,  2.50s/it][A[A

 50%|█████     | 35/70 [01:24<01:26,  2.47s/it][A[A

 51%|█████▏    | 36/70 [01:27<01:25,  2.51s/it][A[A

 53%|█████▎    | 37/70 [01:29<01:22,  2.49s/it][A[A

 54%|█████▍    | 38/70 [01:32<01:19,  2.47s/it][A[A

 56%|█████▌    | 39/70 [01:34<01:15,  2.42s/it][A[A

 57%|█████▋    | 40/70 [01:36<01:12,  2.41s/it][A[A

 59%|█████▊    | 41/70 [01:39<01:08,  2.38s/it][A[A

Batch loss: 1.5088587999343872




 60%|██████    | 42/70 [01:41<01:06,  2.38s/it][A[A

 61%|██████▏   | 43/70 [01:43<01:03,  2.34s/it][A[A

 63%|██████▎   | 44/70 [01:46<01:01,  2.35s/it][A[A

 64%|██████▍   | 45/70 [01:48<00:58,  2.32s/it][A[A

 66%|██████▌   | 46/70 [01:50<00:56,  2.34s/it][A[A

 67%|██████▋   | 47/70 [01:52<00:53,  2.32s/it][A[A

 69%|██████▊   | 48/70 [01:55<00:51,  2.34s/it][A[A

 70%|███████   | 49/70 [01:57<00:48,  2.30s/it][A[A

 71%|███████▏  | 50/70 [01:59<00:46,  2.33s/it][A[A

 73%|███████▎  | 51/70 [02:02<00:43,  2.31s/it][A[A

Batch loss: 1.5515217781066895




 74%|███████▍  | 52/70 [02:04<00:42,  2.33s/it][A[A

 76%|███████▌  | 53/70 [02:06<00:39,  2.31s/it][A[A

 77%|███████▋  | 54/70 [02:09<00:37,  2.34s/it][A[A

 79%|███████▊  | 55/70 [02:11<00:34,  2.32s/it][A[A

 80%|████████  | 56/70 [02:14<00:33,  2.38s/it][A[A

 81%|████████▏ | 57/70 [02:16<00:30,  2.38s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:28,  2.41s/it][A[A

 84%|████████▍ | 59/70 [02:21<00:26,  2.41s/it][A[A

 86%|████████▌ | 60/70 [02:23<00:24,  2.45s/it][A[A

 87%|████████▋ | 61/70 [02:26<00:21,  2.42s/it][A[A

Batch loss: 1.6187390089035034




 89%|████████▊ | 62/70 [02:28<00:19,  2.45s/it][A[A

 90%|█████████ | 63/70 [02:31<00:17,  2.43s/it][A[A

 91%|█████████▏| 64/70 [02:33<00:14,  2.47s/it][A[A

 93%|█████████▎| 65/70 [02:36<00:12,  2.44s/it][A[A

 94%|█████████▍| 66/70 [02:38<00:09,  2.42s/it][A[A

 96%|█████████▌| 67/70 [02:40<00:07,  2.38s/it][A[A

 97%|█████████▋| 68/70 [02:43<00:04,  2.37s/it][A[A

 99%|█████████▊| 69/70 [02:45<00:02,  2.34s/it][A[A

100%|██████████| 70/70 [02:46<00:00,  2.37s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4491746817316329




 12%|█▎        | 1/8 [00:02<00:15,  2.28s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.29s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.24s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.26s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.23s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.24s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.20s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.14s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4101874232292175
Epoch:  56




  1%|▏         | 1/70 [00:02<02:44,  2.39s/it][A[A

Batch loss: 1.8621553182601929




  3%|▎         | 2/70 [00:04<02:42,  2.39s/it][A[A

  4%|▍         | 3/70 [00:07<02:38,  2.37s/it][A[A

  6%|▌         | 4/70 [00:09<02:38,  2.40s/it][A[A

  7%|▋         | 5/70 [00:11<02:32,  2.35s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.37s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.36s/it][A[A

 11%|█▏        | 8/70 [00:19<02:28,  2.40s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.39s/it][A[A

 14%|█▍        | 10/70 [00:23<02:25,  2.42s/it][A[A

 16%|█▌        | 11/70 [00:26<02:21,  2.40s/it][A[A

Batch loss: 1.605157494544983




 17%|█▋        | 12/70 [00:28<02:20,  2.43s/it][A[A

 19%|█▊        | 13/70 [00:31<02:16,  2.40s/it][A[A

 20%|██        | 14/70 [00:33<02:15,  2.42s/it][A[A

 21%|██▏       | 15/70 [00:35<02:11,  2.40s/it][A[A

 23%|██▎       | 16/70 [00:38<02:10,  2.42s/it][A[A

 24%|██▍       | 17/70 [00:40<02:06,  2.39s/it][A[A

 26%|██▌       | 18/70 [00:43<02:04,  2.38s/it][A[A

 27%|██▋       | 19/70 [00:45<01:59,  2.34s/it][A[A

 29%|██▊       | 20/70 [00:47<01:58,  2.37s/it][A[A

 30%|███       | 21/70 [00:50<01:54,  2.34s/it][A[A

Batch loss: 1.602859616279602




 31%|███▏      | 22/70 [00:52<01:53,  2.35s/it][A[A

 33%|███▎      | 23/70 [00:54<01:49,  2.33s/it][A[A

 34%|███▍      | 24/70 [00:57<01:47,  2.34s/it][A[A

 36%|███▌      | 25/70 [00:59<01:44,  2.32s/it][A[A

 37%|███▋      | 26/70 [01:01<01:42,  2.34s/it][A[A

 39%|███▊      | 27/70 [01:03<01:39,  2.31s/it][A[A

 40%|████      | 28/70 [01:06<01:37,  2.33s/it][A[A

 41%|████▏     | 29/70 [01:08<01:34,  2.30s/it][A[A

 43%|████▎     | 30/70 [01:10<01:33,  2.34s/it][A[A

 44%|████▍     | 31/70 [01:13<01:30,  2.31s/it][A[A

Batch loss: 1.3480318784713745




 46%|████▌     | 32/70 [01:15<01:28,  2.34s/it][A[A

 47%|████▋     | 33/70 [01:17<01:25,  2.31s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.33s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.34s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:21,  2.39s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:18,  2.37s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:17,  2.41s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:13,  2.38s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:12,  2.41s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:09,  2.40s/it][A[A

Batch loss: 1.4014557600021362




 60%|██████    | 42/70 [01:39<01:08,  2.44s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:05,  2.41s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:03,  2.43s/it][A[A

 64%|██████▍   | 45/70 [01:46<01:00,  2.40s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:57,  2.39s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:54,  2.35s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:52,  2.38s/it][A[A

 70%|███████   | 49/70 [01:56<00:49,  2.34s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:44,  2.32s/it][A[A

Batch loss: 1.3113995790481567




 74%|███████▍  | 52/70 [02:03<00:42,  2.33s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.31s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.34s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:34,  2.33s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.34s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.31s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.34s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.31s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.33s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:20,  2.30s/it][A[A

Batch loss: 1.398057460784912




 89%|████████▊ | 62/70 [02:26<00:18,  2.34s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.35s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.39s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.39s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.41s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.38s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.41s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.39s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4631395288876126




 12%|█▎        | 1/8 [00:02<00:17,  2.46s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.46s/it][A[A

 38%|███▊      | 3/8 [00:07<00:12,  2.42s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.44s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.38s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.35s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.33s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.27s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4203289449214935
Epoch:  57




  1%|▏         | 1/70 [00:02<02:54,  2.53s/it][A[A

Batch loss: 1.4272321462631226




  3%|▎         | 2/70 [00:05<02:51,  2.52s/it][A[A

  4%|▍         | 3/70 [00:07<02:45,  2.46s/it][A[A

  6%|▌         | 4/70 [00:09<02:43,  2.48s/it][A[A

  7%|▋         | 5/70 [00:12<02:38,  2.44s/it][A[A

  9%|▊         | 6/70 [00:14<02:36,  2.45s/it][A[A

 10%|█         | 7/70 [00:17<02:32,  2.42s/it][A[A

 11%|█▏        | 8/70 [00:19<02:31,  2.44s/it][A[A

 13%|█▎        | 9/70 [00:21<02:27,  2.42s/it][A[A

 14%|█▍        | 10/70 [00:24<02:24,  2.41s/it][A[A

 16%|█▌        | 11/70 [00:26<02:19,  2.36s/it][A[A

Batch loss: 1.3455582857131958




 17%|█▋        | 12/70 [00:28<02:17,  2.36s/it][A[A

 19%|█▊        | 13/70 [00:31<02:14,  2.36s/it][A[A

 20%|██        | 14/70 [00:33<02:12,  2.37s/it][A[A

 21%|██▏       | 15/70 [00:35<02:08,  2.33s/it][A[A

 23%|██▎       | 16/70 [00:38<02:06,  2.35s/it][A[A

 24%|██▍       | 17/70 [00:40<02:02,  2.31s/it][A[A

 26%|██▌       | 18/70 [00:42<02:01,  2.33s/it][A[A

 27%|██▋       | 19/70 [00:45<01:57,  2.30s/it][A[A

 29%|██▊       | 20/70 [00:47<01:56,  2.33s/it][A[A

 30%|███       | 21/70 [00:49<01:52,  2.30s/it][A[A

Batch loss: 1.4022200107574463




 31%|███▏      | 22/70 [00:52<01:51,  2.32s/it][A[A

 33%|███▎      | 23/70 [00:54<01:47,  2.29s/it][A[A

 34%|███▍      | 24/70 [00:56<01:46,  2.32s/it][A[A

 36%|███▌      | 25/70 [00:58<01:43,  2.29s/it][A[A

 37%|███▋      | 26/70 [01:01<01:41,  2.31s/it][A[A

 39%|███▊      | 27/70 [01:03<01:39,  2.30s/it][A[A

 40%|████      | 28/70 [01:06<01:39,  2.36s/it][A[A

 41%|████▏     | 29/70 [01:08<01:36,  2.37s/it][A[A

 43%|████▎     | 30/70 [01:10<01:35,  2.40s/it][A[A

 44%|████▍     | 31/70 [01:13<01:33,  2.39s/it][A[A

Batch loss: 1.3118329048156738




 46%|████▌     | 32/70 [01:15<01:31,  2.42s/it][A[A

 47%|████▋     | 33/70 [01:18<01:28,  2.39s/it][A[A

 49%|████▊     | 34/70 [01:20<01:27,  2.42s/it][A[A

 50%|█████     | 35/70 [01:22<01:24,  2.40s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:22,  2.43s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:19,  2.41s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:16,  2.41s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:13,  2.37s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:11,  2.37s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:08,  2.36s/it][A[A

Batch loss: 1.3446946144104004




 60%|██████    | 42/70 [01:39<01:06,  2.37s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:02,  2.33s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:01,  2.35s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:57,  2.31s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:55,  2.33s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:52,  2.30s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:51,  2.33s/it][A[A

 70%|███████   | 49/70 [01:55<00:48,  2.31s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.33s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:44,  2.34s/it][A[A

Batch loss: 1.4679560661315918




 74%|███████▍  | 52/70 [02:02<00:43,  2.40s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.35s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:38,  2.38s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:35,  2.36s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.41s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:31,  2.39s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.41s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.39s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:24,  2.41s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.40s/it][A[A

Batch loss: 1.342091679573059




 89%|████████▊ | 62/70 [02:26<00:19,  2.43s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.40s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.42s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:12,  2.41s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.40s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.34s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.34s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.33s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4305487394332885




 12%|█▎        | 1/8 [00:02<00:16,  2.30s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.30s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.26s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.25s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.22s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.24s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.21s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.14s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.43202543258667
Epoch:  58




  1%|▏         | 1/70 [00:02<02:45,  2.40s/it][A[A

Batch loss: 1.515271544456482




  3%|▎         | 2/70 [00:04<02:43,  2.40s/it][A[A

  4%|▍         | 3/70 [00:07<02:40,  2.39s/it][A[A

  6%|▌         | 4/70 [00:09<02:37,  2.39s/it][A[A

  7%|▋         | 5/70 [00:11<02:31,  2.33s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.36s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.35s/it][A[A

 11%|█▏        | 8/70 [00:19<02:28,  2.40s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.38s/it][A[A

 14%|█▍        | 10/70 [00:23<02:24,  2.42s/it][A[A

 16%|█▌        | 11/70 [00:26<02:21,  2.39s/it][A[A

Batch loss: 1.8187804222106934




 17%|█▋        | 12/70 [00:28<02:20,  2.42s/it][A[A

 19%|█▊        | 13/70 [00:31<02:16,  2.39s/it][A[A

 20%|██        | 14/70 [00:33<02:15,  2.42s/it][A[A

 21%|██▏       | 15/70 [00:35<02:11,  2.39s/it][A[A

 23%|██▎       | 16/70 [00:38<02:11,  2.43s/it][A[A

 24%|██▍       | 17/70 [00:40<02:08,  2.42s/it][A[A

 26%|██▌       | 18/70 [00:43<02:05,  2.41s/it][A[A

 27%|██▋       | 19/70 [00:45<02:00,  2.37s/it][A[A

 29%|██▊       | 20/70 [00:47<01:59,  2.39s/it][A[A

 30%|███       | 21/70 [00:50<01:55,  2.35s/it][A[A

Batch loss: 1.3317081928253174




 31%|███▏      | 22/70 [00:52<01:53,  2.36s/it][A[A

 33%|███▎      | 23/70 [00:54<01:49,  2.33s/it][A[A

 34%|███▍      | 24/70 [00:57<01:47,  2.33s/it][A[A

 36%|███▌      | 25/70 [00:59<01:43,  2.30s/it][A[A

 37%|███▋      | 26/70 [01:01<01:42,  2.32s/it][A[A

 39%|███▊      | 27/70 [01:03<01:38,  2.30s/it][A[A

 40%|████      | 28/70 [01:06<01:38,  2.35s/it][A[A

 41%|████▏     | 29/70 [01:08<01:34,  2.32s/it][A[A

 43%|████▎     | 30/70 [01:10<01:33,  2.33s/it][A[A

 44%|████▍     | 31/70 [01:13<01:29,  2.30s/it][A[A

Batch loss: 1.3789324760437012




 46%|████▌     | 32/70 [01:15<01:28,  2.33s/it][A[A

 47%|████▋     | 33/70 [01:17<01:25,  2.30s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.33s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.33s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:21,  2.39s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:18,  2.38s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:17,  2.41s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:14,  2.39s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:12,  2.41s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:09,  2.41s/it][A[A

Batch loss: 1.437651515007019




 60%|██████    | 42/70 [01:39<01:08,  2.43s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:04,  2.40s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:03,  2.43s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.37s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:57,  2.38s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:54,  2.35s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:52,  2.38s/it][A[A

 70%|███████   | 49/70 [01:56<00:49,  2.35s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:47,  2.37s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:44,  2.32s/it][A[A

Batch loss: 1.3703972101211548




 74%|███████▍  | 52/70 [02:03<00:42,  2.35s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.32s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.37s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:34,  2.32s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.34s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.31s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.33s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.30s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.32s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:20,  2.30s/it][A[A

Batch loss: 1.4398282766342163




 89%|████████▊ | 62/70 [02:26<00:18,  2.33s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.33s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.37s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.36s/it][A[A

 94%|█████████▍| 66/70 [02:35<00:09,  2.39s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.38s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.41s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.40s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4464877026421683




 12%|█▎        | 1/8 [00:02<00:16,  2.39s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.41s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.36s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.37s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.30s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.31s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.28s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.22s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.409727692604065
Epoch:  59




  1%|▏         | 1/70 [00:02<02:55,  2.54s/it][A[A

Batch loss: 1.466429591178894




  3%|▎         | 2/70 [00:05<02:52,  2.53s/it][A[A

  4%|▍         | 3/70 [00:07<02:46,  2.48s/it][A[A

  6%|▌         | 4/70 [00:09<02:43,  2.48s/it][A[A

  7%|▋         | 5/70 [00:12<02:38,  2.43s/it][A[A

  9%|▊         | 6/70 [00:14<02:36,  2.45s/it][A[A

 10%|█         | 7/70 [00:17<02:32,  2.43s/it][A[A

 11%|█▏        | 8/70 [00:19<02:31,  2.44s/it][A[A

 13%|█▎        | 9/70 [00:21<02:27,  2.42s/it][A[A

 14%|█▍        | 10/70 [00:24<02:25,  2.42s/it][A[A

 16%|█▌        | 11/70 [00:26<02:20,  2.37s/it][A[A

Batch loss: 1.3432728052139282




 17%|█▋        | 12/70 [00:28<02:17,  2.37s/it][A[A

 19%|█▊        | 13/70 [00:31<02:14,  2.35s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.36s/it][A[A

 21%|██▏       | 15/70 [00:35<02:07,  2.32s/it][A[A

 23%|██▎       | 16/70 [00:38<02:06,  2.34s/it][A[A

 24%|██▍       | 17/70 [00:40<02:02,  2.31s/it][A[A

 26%|██▌       | 18/70 [00:42<02:01,  2.33s/it][A[A

 27%|██▋       | 19/70 [00:45<01:56,  2.29s/it][A[A

 29%|██▊       | 20/70 [00:47<01:56,  2.32s/it][A[A

 30%|███       | 21/70 [00:49<01:52,  2.30s/it][A[A

Batch loss: 1.5260616540908813




 31%|███▏      | 22/70 [00:52<01:51,  2.32s/it][A[A

 33%|███▎      | 23/70 [00:54<01:47,  2.30s/it][A[A

 34%|███▍      | 24/70 [00:56<01:46,  2.31s/it][A[A

 36%|███▌      | 25/70 [00:58<01:43,  2.29s/it][A[A

 37%|███▋      | 26/70 [01:01<01:41,  2.30s/it][A[A

 39%|███▊      | 27/70 [01:03<01:38,  2.29s/it][A[A

 40%|████      | 28/70 [01:06<01:38,  2.35s/it][A[A

 41%|████▏     | 29/70 [01:08<01:36,  2.36s/it][A[A

 43%|████▎     | 30/70 [01:10<01:35,  2.38s/it][A[A

 44%|████▍     | 31/70 [01:13<01:32,  2.38s/it][A[A

Batch loss: 1.46651029586792




 46%|████▌     | 32/70 [01:15<01:31,  2.41s/it][A[A

 47%|████▋     | 33/70 [01:18<01:28,  2.40s/it][A[A

 49%|████▊     | 34/70 [01:20<01:27,  2.42s/it][A[A

 50%|█████     | 35/70 [01:22<01:23,  2.39s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:22,  2.42s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:19,  2.40s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:17,  2.41s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:13,  2.36s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:11,  2.37s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:07,  2.34s/it][A[A

Batch loss: 1.5712907314300537




 60%|██████    | 42/70 [01:39<01:05,  2.36s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:02,  2.32s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:00,  2.33s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:57,  2.30s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:55,  2.33s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:52,  2.30s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:50,  2.32s/it][A[A

 70%|███████   | 49/70 [01:55<00:48,  2.30s/it][A[A

 71%|███████▏  | 50/70 [01:57<00:46,  2.32s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:43,  2.30s/it][A[A

Batch loss: 1.4127075672149658




 74%|███████▍  | 52/70 [02:02<00:41,  2.33s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:39,  2.32s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.35s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:34,  2.32s/it][A[A

 80%|████████  | 56/70 [02:11<00:33,  2.37s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.36s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:28,  2.40s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.38s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:24,  2.42s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.41s/it][A[A

Batch loss: 1.4267438650131226




 89%|████████▊ | 62/70 [02:26<00:19,  2.44s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.41s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.43s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:12,  2.41s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.41s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.36s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.37s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.35s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4131202135767256




 12%|█▎        | 1/8 [00:02<00:16,  2.33s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.33s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.27s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.27s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.24s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.25s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.22s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.15s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.34883514046669
Epoch:  60




  1%|▏         | 1/70 [00:02<02:43,  2.37s/it][A[A

Batch loss: 1.3788930177688599




  3%|▎         | 2/70 [00:04<02:43,  2.40s/it][A[A

  4%|▍         | 3/70 [00:07<02:38,  2.37s/it][A[A

  6%|▌         | 4/70 [00:09<02:36,  2.37s/it][A[A

  7%|▋         | 5/70 [00:11<02:31,  2.33s/it][A[A

  9%|▊         | 6/70 [00:14<02:29,  2.34s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.35s/it][A[A

 11%|█▏        | 8/70 [00:19<02:28,  2.40s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.38s/it][A[A

 14%|█▍        | 10/70 [00:23<02:24,  2.41s/it][A[A

 16%|█▌        | 11/70 [00:26<02:21,  2.39s/it][A[A

Batch loss: 1.3473162651062012




 17%|█▋        | 12/70 [00:28<02:20,  2.43s/it][A[A

 19%|█▊        | 13/70 [00:31<02:17,  2.40s/it][A[A

 20%|██        | 14/70 [00:33<02:15,  2.43s/it][A[A

 21%|██▏       | 15/70 [00:35<02:11,  2.40s/it][A[A

 23%|██▎       | 16/70 [00:38<02:10,  2.42s/it][A[A

 24%|██▍       | 17/70 [00:40<02:06,  2.39s/it][A[A

 26%|██▌       | 18/70 [00:42<02:03,  2.38s/it][A[A

 27%|██▋       | 19/70 [00:45<01:59,  2.34s/it][A[A

 29%|██▊       | 20/70 [00:47<01:59,  2.40s/it][A[A

 30%|███       | 21/70 [00:50<01:55,  2.35s/it][A[A

Batch loss: 1.3117436170578003




 31%|███▏      | 22/70 [00:52<01:53,  2.37s/it][A[A

 33%|███▎      | 23/70 [00:54<01:49,  2.33s/it][A[A

 34%|███▍      | 24/70 [00:57<01:47,  2.34s/it][A[A

 36%|███▌      | 25/70 [00:59<01:44,  2.31s/it][A[A

 37%|███▋      | 26/70 [01:01<01:43,  2.36s/it][A[A

 39%|███▊      | 27/70 [01:04<01:40,  2.34s/it][A[A

 40%|████      | 28/70 [01:06<01:39,  2.38s/it][A[A

 41%|████▏     | 29/70 [01:08<01:35,  2.34s/it][A[A

 43%|████▎     | 30/70 [01:11<01:34,  2.36s/it][A[A

 44%|████▍     | 31/70 [01:13<01:30,  2.33s/it][A[A

Batch loss: 1.7774523496627808




 46%|████▌     | 32/70 [01:15<01:29,  2.35s/it][A[A

 47%|████▋     | 33/70 [01:18<01:25,  2.31s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.33s/it][A[A

 50%|█████     | 35/70 [01:22<01:21,  2.34s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:21,  2.39s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:18,  2.37s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:17,  2.41s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:13,  2.38s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:12,  2.41s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:09,  2.39s/it][A[A

Batch loss: 1.479185700416565




 60%|██████    | 42/70 [01:39<01:07,  2.42s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:04,  2.40s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:03,  2.43s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.39s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:57,  2.38s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:53,  2.35s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:52,  2.37s/it][A[A

 70%|███████   | 49/70 [01:56<00:49,  2.36s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:47,  2.39s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:45,  2.37s/it][A[A

Batch loss: 1.4209314584732056




 74%|███████▍  | 52/70 [02:03<00:43,  2.41s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.39s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:38,  2.42s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.40s/it][A[A

 80%|████████  | 56/70 [02:13<00:33,  2.42s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:31,  2.40s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:29,  2.42s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:26,  2.37s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.38s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.34s/it][A[A

Batch loss: 1.822826623916626




 89%|████████▊ | 62/70 [02:27<00:18,  2.37s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.33s/it][A[A

 91%|█████████▏| 64/70 [02:32<00:14,  2.35s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.32s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.33s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.31s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.32s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.30s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4779024532863072




 12%|█▎        | 1/8 [00:02<00:15,  2.27s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.28s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.24s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.26s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.23s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.24s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.23s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.16s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.422999918460846
Epoch:  61




  1%|▏         | 1/70 [00:02<02:53,  2.51s/it][A[A

Batch loss: 1.395824909210205




  3%|▎         | 2/70 [00:05<02:51,  2.52s/it][A[A

  4%|▍         | 3/70 [00:07<02:44,  2.46s/it][A[A

  6%|▌         | 4/70 [00:09<02:43,  2.48s/it][A[A

  7%|▋         | 5/70 [00:12<02:38,  2.44s/it][A[A

  9%|▊         | 6/70 [00:14<02:37,  2.46s/it][A[A

 10%|█         | 7/70 [00:17<02:32,  2.42s/it][A[A

 11%|█▏        | 8/70 [00:19<02:31,  2.44s/it][A[A

 13%|█▎        | 9/70 [00:21<02:27,  2.41s/it][A[A

 14%|█▍        | 10/70 [00:24<02:25,  2.43s/it][A[A

 16%|█▌        | 11/70 [00:26<02:19,  2.36s/it][A[A

Batch loss: 1.3420308828353882




 17%|█▋        | 12/70 [00:28<02:17,  2.36s/it][A[A

 19%|█▊        | 13/70 [00:31<02:13,  2.34s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.35s/it][A[A

 21%|██▏       | 15/70 [00:35<02:07,  2.32s/it][A[A

 23%|██▎       | 16/70 [00:38<02:06,  2.34s/it][A[A

 24%|██▍       | 17/70 [00:40<02:02,  2.31s/it][A[A

 26%|██▌       | 18/70 [00:42<02:01,  2.34s/it][A[A

 27%|██▋       | 19/70 [00:45<01:57,  2.31s/it][A[A

 29%|██▊       | 20/70 [00:47<01:56,  2.33s/it][A[A

 30%|███       | 21/70 [00:49<01:53,  2.31s/it][A[A

Batch loss: 1.5898408889770508




 31%|███▏      | 22/70 [00:52<01:51,  2.33s/it][A[A

 33%|███▎      | 23/70 [00:54<01:47,  2.30s/it][A[A

 34%|███▍      | 24/70 [00:56<01:46,  2.31s/it][A[A

 36%|███▌      | 25/70 [00:58<01:42,  2.28s/it][A[A

 37%|███▋      | 26/70 [01:01<01:41,  2.31s/it][A[A

 39%|███▊      | 27/70 [01:03<01:39,  2.32s/it][A[A

 40%|████      | 28/70 [01:06<01:40,  2.38s/it][A[A

 41%|████▏     | 29/70 [01:08<01:39,  2.42s/it][A[A

 43%|████▎     | 30/70 [01:11<01:39,  2.48s/it][A[A

 44%|████▍     | 31/70 [01:13<01:35,  2.45s/it][A[A

Batch loss: 1.3289828300476074




 46%|████▌     | 32/70 [01:16<01:33,  2.47s/it][A[A

 47%|████▋     | 33/70 [01:18<01:29,  2.43s/it][A[A

 49%|████▊     | 34/70 [01:21<01:28,  2.45s/it][A[A

 50%|█████     | 35/70 [01:23<01:24,  2.42s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:23,  2.44s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:19,  2.41s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:17,  2.43s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:13,  2.38s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:11,  2.37s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:07,  2.34s/it][A[A

Batch loss: 1.3477662801742554




 60%|██████    | 42/70 [01:39<01:05,  2.36s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:02,  2.32s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:00,  2.34s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:57,  2.32s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:56,  2.34s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:53,  2.32s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:51,  2.34s/it][A[A

 70%|███████   | 49/70 [01:56<00:48,  2.31s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.33s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:43,  2.30s/it][A[A

Batch loss: 1.6231359243392944




 74%|███████▍  | 52/70 [02:03<00:42,  2.33s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.31s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.32s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:34,  2.30s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.36s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.35s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.39s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.37s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:24,  2.41s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.39s/it][A[A

Batch loss: 1.3151583671569824




 89%|████████▊ | 62/70 [02:27<00:19,  2.42s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.40s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.42s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.39s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.42s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.38s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.37s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.35s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.461558152948107




 12%|█▎        | 1/8 [00:02<00:16,  2.38s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.36s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.29s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.30s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.25s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.26s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.24s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.16s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.281417816877365
Epoch:  62




  1%|▏         | 1/70 [00:02<02:54,  2.53s/it][A[A

Batch loss: 1.320906162261963




  3%|▎         | 2/70 [00:04<02:48,  2.48s/it][A[A

  4%|▍         | 3/70 [00:07<02:41,  2.41s/it][A[A

  6%|▌         | 4/70 [00:09<02:38,  2.40s/it][A[A

  7%|▋         | 5/70 [00:11<02:33,  2.36s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.37s/it][A[A

 10%|█         | 7/70 [00:16<02:30,  2.38s/it][A[A

 11%|█▏        | 8/70 [00:19<02:29,  2.42s/it][A[A

 13%|█▎        | 9/70 [00:21<02:26,  2.40s/it][A[A

 14%|█▍        | 10/70 [00:23<02:25,  2.43s/it][A[A

 16%|█▌        | 11/70 [00:26<02:21,  2.40s/it][A[A

Batch loss: 1.4314032793045044




 17%|█▋        | 12/70 [00:28<02:20,  2.43s/it][A[A

 19%|█▊        | 13/70 [00:31<02:17,  2.41s/it][A[A

 20%|██        | 14/70 [00:33<02:16,  2.44s/it][A[A

 21%|██▏       | 15/70 [00:36<02:12,  2.41s/it][A[A

 23%|██▎       | 16/70 [00:38<02:11,  2.44s/it][A[A

 24%|██▍       | 17/70 [00:40<02:08,  2.42s/it][A[A

 26%|██▌       | 18/70 [00:43<02:05,  2.42s/it][A[A

 27%|██▋       | 19/70 [00:45<02:00,  2.36s/it][A[A

 29%|██▊       | 20/70 [00:47<01:58,  2.37s/it][A[A

 30%|███       | 21/70 [00:50<01:56,  2.37s/it][A[A

Batch loss: 1.3581743240356445




 31%|███▏      | 22/70 [00:52<01:53,  2.37s/it][A[A

 33%|███▎      | 23/70 [00:54<01:49,  2.34s/it][A[A

 34%|███▍      | 24/70 [00:57<01:48,  2.35s/it][A[A

 36%|███▌      | 25/70 [00:59<01:44,  2.32s/it][A[A

 37%|███▋      | 26/70 [01:02<01:44,  2.38s/it][A[A

 39%|███▊      | 27/70 [01:04<01:40,  2.34s/it][A[A

 40%|████      | 28/70 [01:06<01:38,  2.35s/it][A[A

 41%|████▏     | 29/70 [01:08<01:35,  2.33s/it][A[A

 43%|████▎     | 30/70 [01:11<01:34,  2.35s/it][A[A

 44%|████▍     | 31/70 [01:13<01:31,  2.35s/it][A[A

Batch loss: 1.3524503707885742




 46%|████▌     | 32/70 [01:16<01:29,  2.37s/it][A[A

 47%|████▋     | 33/70 [01:18<01:25,  2.32s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.34s/it][A[A

 50%|█████     | 35/70 [01:23<01:21,  2.34s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:21,  2.39s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:18,  2.38s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:16,  2.40s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:13,  2.39s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:12,  2.41s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:09,  2.39s/it][A[A

Batch loss: 1.6238511800765991




 60%|██████    | 42/70 [01:40<01:07,  2.42s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:04,  2.39s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:02,  2.42s/it][A[A

 64%|██████▍   | 45/70 [01:47<00:58,  2.36s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:56,  2.36s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:53,  2.33s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:51,  2.36s/it][A[A

 70%|███████   | 49/70 [01:56<00:49,  2.35s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:47,  2.39s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:45,  2.37s/it][A[A

Batch loss: 1.3308534622192383




 74%|███████▍  | 52/70 [02:03<00:43,  2.40s/it][A[A

 76%|███████▌  | 53/70 [02:06<00:40,  2.38s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:38,  2.41s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.40s/it][A[A

 80%|████████  | 56/70 [02:13<00:33,  2.43s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:31,  2.40s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:29,  2.42s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:26,  2.40s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.39s/it][A[A

 87%|████████▋ | 61/70 [02:25<00:21,  2.35s/it][A[A

Batch loss: 1.4032522439956665




 89%|████████▊ | 62/70 [02:27<00:19,  2.39s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.34s/it][A[A

 91%|█████████▏| 64/70 [02:32<00:14,  2.34s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.30s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.32s/it][A[A

 96%|█████████▌| 67/70 [02:39<00:06,  2.29s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.32s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.30s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.427229106426239




 12%|█▎        | 1/8 [00:02<00:16,  2.30s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.30s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.27s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.27s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.23s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.25s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.23s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.17s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3921331763267517
Epoch:  63




  1%|▏         | 1/70 [00:02<02:52,  2.50s/it][A[A

Batch loss: 1.324639081954956




  3%|▎         | 2/70 [00:05<02:50,  2.50s/it][A[A

  4%|▍         | 3/70 [00:07<02:44,  2.46s/it][A[A

  6%|▌         | 4/70 [00:09<02:43,  2.47s/it][A[A

  7%|▋         | 5/70 [00:12<02:38,  2.44s/it][A[A

  9%|▊         | 6/70 [00:14<02:36,  2.45s/it][A[A

 10%|█         | 7/70 [00:17<02:32,  2.42s/it][A[A

 11%|█▏        | 8/70 [00:19<02:32,  2.45s/it][A[A

 13%|█▎        | 9/70 [00:21<02:27,  2.41s/it][A[A

 14%|█▍        | 10/70 [00:24<02:25,  2.42s/it][A[A

 16%|█▌        | 11/70 [00:26<02:20,  2.38s/it][A[A

Batch loss: 1.419335961341858




 17%|█▋        | 12/70 [00:29<02:18,  2.39s/it][A[A

 19%|█▊        | 13/70 [00:31<02:15,  2.37s/it][A[A

 20%|██        | 14/70 [00:33<02:13,  2.39s/it][A[A

 21%|██▏       | 15/70 [00:36<02:09,  2.36s/it][A[A

 23%|██▎       | 16/70 [00:38<02:08,  2.38s/it][A[A

 24%|██▍       | 17/70 [00:40<02:04,  2.35s/it][A[A

 26%|██▌       | 18/70 [00:43<02:02,  2.36s/it][A[A

 27%|██▋       | 19/70 [00:45<01:58,  2.33s/it][A[A

 29%|██▊       | 20/70 [00:47<01:57,  2.36s/it][A[A

 30%|███       | 21/70 [00:50<01:54,  2.33s/it][A[A

Batch loss: 1.3997901678085327




 31%|███▏      | 22/70 [00:52<01:52,  2.34s/it][A[A

 33%|███▎      | 23/70 [00:54<01:48,  2.31s/it][A[A

 34%|███▍      | 24/70 [00:57<01:47,  2.33s/it][A[A

 36%|███▌      | 25/70 [00:59<01:43,  2.30s/it][A[A

 37%|███▋      | 26/70 [01:01<01:42,  2.33s/it][A[A

 39%|███▊      | 27/70 [01:03<01:38,  2.30s/it][A[A

 40%|████      | 28/70 [01:06<01:38,  2.35s/it][A[A

 41%|████▏     | 29/70 [01:08<01:35,  2.34s/it][A[A

 43%|████▎     | 30/70 [01:11<01:35,  2.39s/it][A[A

 44%|████▍     | 31/70 [01:13<01:32,  2.37s/it][A[A

Batch loss: 1.524659276008606




 46%|████▌     | 32/70 [01:16<01:31,  2.41s/it][A[A

 47%|████▋     | 33/70 [01:18<01:28,  2.39s/it][A[A

 49%|████▊     | 34/70 [01:20<01:27,  2.42s/it][A[A

 50%|█████     | 35/70 [01:23<01:23,  2.40s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:22,  2.43s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:19,  2.41s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:16,  2.40s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:12,  2.35s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:10,  2.36s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:07,  2.33s/it][A[A

Batch loss: 1.3503637313842773




 60%|██████    | 42/70 [01:39<01:05,  2.35s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:03,  2.34s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:01,  2.35s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:57,  2.31s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:56,  2.34s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:53,  2.31s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:51,  2.33s/it][A[A

 70%|███████   | 49/70 [01:55<00:48,  2.31s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.33s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:44,  2.34s/it][A[A

Batch loss: 1.530285120010376




 74%|███████▍  | 52/70 [02:03<00:42,  2.35s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.32s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.33s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:34,  2.31s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.37s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.35s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.41s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.41s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:24,  2.46s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.44s/it][A[A

Batch loss: 1.3796030282974243




 89%|████████▊ | 62/70 [02:27<00:19,  2.47s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.42s/it][A[A

 91%|█████████▏| 64/70 [02:32<00:14,  2.45s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:12,  2.41s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.43s/it][A[A

 96%|█████████▌| 67/70 [02:39<00:07,  2.39s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.39s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.35s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4512987971305846




 12%|█▎        | 1/8 [00:02<00:16,  2.29s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.30s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.25s/it][A[A

 50%|█████     | 4/8 [00:08<00:08,  2.25s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.22s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.23s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.24s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.15s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3022722005844116
Epoch:  64




  1%|▏         | 1/70 [00:02<02:44,  2.38s/it][A[A

Batch loss: 1.510135531425476




  3%|▎         | 2/70 [00:04<02:44,  2.42s/it][A[A

  4%|▍         | 3/70 [00:07<02:39,  2.38s/it][A[A

  6%|▌         | 4/70 [00:09<02:36,  2.38s/it][A[A

  7%|▋         | 5/70 [00:11<02:32,  2.34s/it][A[A

  9%|▊         | 6/70 [00:14<02:30,  2.36s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.36s/it][A[A

 11%|█▏        | 8/70 [00:19<02:29,  2.41s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.39s/it][A[A

 14%|█▍        | 10/70 [00:23<02:24,  2.41s/it][A[A

 16%|█▌        | 11/70 [00:26<02:21,  2.39s/it][A[A

Batch loss: 1.476747989654541




 17%|█▋        | 12/70 [00:28<02:20,  2.43s/it][A[A

 19%|█▊        | 13/70 [00:31<02:17,  2.41s/it][A[A

 20%|██        | 14/70 [00:33<02:15,  2.43s/it][A[A

 21%|██▏       | 15/70 [00:35<02:11,  2.40s/it][A[A

 23%|██▎       | 16/70 [00:38<02:11,  2.43s/it][A[A

 24%|██▍       | 17/70 [00:40<02:07,  2.40s/it][A[A

 26%|██▌       | 18/70 [00:43<02:04,  2.40s/it][A[A

 27%|██▋       | 19/70 [00:45<01:59,  2.35s/it][A[A

 29%|██▊       | 20/70 [00:47<01:58,  2.37s/it][A[A

 30%|███       | 21/70 [00:50<01:55,  2.36s/it][A[A

Batch loss: 1.4640166759490967




 31%|███▏      | 22/70 [00:52<01:55,  2.41s/it][A[A

 33%|███▎      | 23/70 [00:55<01:52,  2.39s/it][A[A

 34%|███▍      | 24/70 [00:57<01:52,  2.44s/it][A[A

 36%|███▌      | 25/70 [00:59<01:48,  2.41s/it][A[A

 37%|███▋      | 26/70 [01:02<01:47,  2.44s/it][A[A

 39%|███▊      | 27/70 [01:04<01:43,  2.41s/it][A[A

 40%|████      | 28/70 [01:07<01:42,  2.43s/it][A[A

 41%|████▏     | 29/70 [01:09<01:38,  2.41s/it][A[A

 43%|████▎     | 30/70 [01:12<01:37,  2.43s/it][A[A

 44%|████▍     | 31/70 [01:14<01:33,  2.39s/it][A[A

Batch loss: 1.327322006225586




 46%|████▌     | 32/70 [01:16<01:31,  2.40s/it][A[A

 47%|████▋     | 33/70 [01:19<01:27,  2.35s/it][A[A

 49%|████▊     | 34/70 [01:21<01:25,  2.37s/it][A[A

 50%|█████     | 35/70 [01:23<01:21,  2.33s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:19,  2.34s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:16,  2.31s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:14,  2.33s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:11,  2.30s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:09,  2.33s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:06,  2.30s/it][A[A

Batch loss: 1.438291311264038




 60%|██████    | 42/70 [01:39<01:05,  2.33s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:02,  2.30s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:00,  2.32s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:57,  2.30s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:55,  2.33s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:52,  2.30s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:51,  2.33s/it][A[A

 70%|███████   | 49/70 [01:56<00:48,  2.32s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:47,  2.38s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:45,  2.37s/it][A[A

Batch loss: 1.4245647192001343




 74%|███████▍  | 52/70 [02:03<00:43,  2.41s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.38s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:38,  2.41s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.39s/it][A[A

 80%|████████  | 56/70 [02:13<00:33,  2.42s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:31,  2.40s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:29,  2.43s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:26,  2.39s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.39s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.34s/it][A[A

Batch loss: 1.5099560022354126




 89%|████████▊ | 62/70 [02:27<00:18,  2.36s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.33s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.34s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.31s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.33s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.30s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.33s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.30s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4464633073125566




 12%|█▎        | 1/8 [00:02<00:16,  2.32s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.32s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.29s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.29s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.25s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.26s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.25s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.18s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.355035960674286
Epoch:  65




  1%|▏         | 1/70 [00:02<02:53,  2.52s/it][A[A

Batch loss: 1.4322797060012817




  3%|▎         | 2/70 [00:05<02:51,  2.52s/it][A[A

  4%|▍         | 3/70 [00:07<02:45,  2.47s/it][A[A

  6%|▌         | 4/70 [00:09<02:43,  2.48s/it][A[A

  7%|▋         | 5/70 [00:12<02:38,  2.44s/it][A[A

  9%|▊         | 6/70 [00:14<02:37,  2.46s/it][A[A

 10%|█         | 7/70 [00:17<02:35,  2.47s/it][A[A

 11%|█▏        | 8/70 [00:19<02:36,  2.52s/it][A[A

 13%|█▎        | 9/70 [00:22<02:30,  2.47s/it][A[A

 14%|█▍        | 10/70 [00:24<02:27,  2.47s/it][A[A

 16%|█▌        | 11/70 [00:26<02:21,  2.39s/it][A[A

Batch loss: 1.40061354637146




 17%|█▋        | 12/70 [00:29<02:18,  2.39s/it][A[A

 19%|█▊        | 13/70 [00:31<02:16,  2.40s/it][A[A

 20%|██        | 14/70 [00:34<02:13,  2.39s/it][A[A

 21%|██▏       | 15/70 [00:36<02:08,  2.34s/it][A[A

 23%|██▎       | 16/70 [00:38<02:06,  2.35s/it][A[A

 24%|██▍       | 17/70 [00:40<02:02,  2.32s/it][A[A

 26%|██▌       | 18/70 [00:43<02:01,  2.33s/it][A[A

 27%|██▋       | 19/70 [00:45<01:57,  2.31s/it][A[A

 29%|██▊       | 20/70 [00:47<01:56,  2.33s/it][A[A

 30%|███       | 21/70 [00:50<01:52,  2.31s/it][A[A

Batch loss: 1.3563822507858276




 31%|███▏      | 22/70 [00:52<01:51,  2.33s/it][A[A

 33%|███▎      | 23/70 [00:54<01:48,  2.30s/it][A[A

 34%|███▍      | 24/70 [00:57<01:48,  2.37s/it][A[A

 36%|███▌      | 25/70 [00:59<01:45,  2.34s/it][A[A

 37%|███▋      | 26/70 [01:01<01:43,  2.36s/it][A[A

 39%|███▊      | 27/70 [01:04<01:41,  2.36s/it][A[A

 40%|████      | 28/70 [01:06<01:40,  2.39s/it][A[A

 41%|████▏     | 29/70 [01:09<01:37,  2.38s/it][A[A

 43%|████▎     | 30/70 [01:11<01:36,  2.40s/it][A[A

 44%|████▍     | 31/70 [01:13<01:32,  2.38s/it][A[A

Batch loss: 1.3082841634750366




 46%|████▌     | 32/70 [01:16<01:32,  2.43s/it][A[A

 47%|████▋     | 33/70 [01:18<01:28,  2.40s/it][A[A

 49%|████▊     | 34/70 [01:21<01:27,  2.44s/it][A[A

 50%|█████     | 35/70 [01:23<01:24,  2.41s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:23,  2.45s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:20,  2.43s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:18,  2.46s/it][A[A

 56%|█████▌    | 39/70 [01:33<01:14,  2.41s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:11,  2.40s/it][A[A

 59%|█████▊    | 41/70 [01:38<01:08,  2.38s/it][A[A

Batch loss: 1.4537274837493896




 60%|██████    | 42/70 [01:40<01:06,  2.38s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:03,  2.35s/it][A[A

 63%|██████▎   | 44/70 [01:45<01:01,  2.36s/it][A[A

 64%|██████▍   | 45/70 [01:47<00:58,  2.33s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:56,  2.34s/it][A[A

 67%|██████▋   | 47/70 [01:52<00:53,  2.33s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:51,  2.35s/it][A[A

 70%|███████   | 49/70 [01:56<00:49,  2.34s/it][A[A

 71%|███████▏  | 50/70 [01:59<00:47,  2.36s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:44,  2.32s/it][A[A

Batch loss: 1.6310378313064575




 74%|███████▍  | 52/70 [02:03<00:42,  2.35s/it][A[A

 76%|███████▌  | 53/70 [02:06<00:39,  2.32s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:37,  2.34s/it][A[A

 79%|███████▊  | 55/70 [02:11<00:36,  2.40s/it][A[A

 80%|████████  | 56/70 [02:13<00:34,  2.44s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:31,  2.41s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:29,  2.44s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:26,  2.42s/it][A[A

 86%|████████▌ | 60/70 [02:23<00:24,  2.47s/it][A[A

 87%|████████▋ | 61/70 [02:25<00:21,  2.44s/it][A[A

Batch loss: 1.3515210151672363




 89%|████████▊ | 62/70 [02:28<00:19,  2.46s/it][A[A

 90%|█████████ | 63/70 [02:30<00:16,  2.43s/it][A[A

 91%|█████████▏| 64/70 [02:33<00:15,  2.51s/it][A[A

 93%|█████████▎| 65/70 [02:35<00:12,  2.47s/it][A[A

 94%|█████████▍| 66/70 [02:38<00:09,  2.45s/it][A[A

 96%|█████████▌| 67/70 [02:40<00:07,  2.40s/it][A[A

 97%|█████████▋| 68/70 [02:42<00:04,  2.41s/it][A[A

 99%|█████████▊| 69/70 [02:45<00:02,  2.40s/it][A[A

100%|██████████| 70/70 [02:45<00:00,  2.37s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4605347190584455




 12%|█▎        | 1/8 [00:02<00:16,  2.34s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.35s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.35s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.33s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.29s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.29s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.25s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.19s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3300631046295166
Epoch:  66




  1%|▏         | 1/70 [00:02<02:43,  2.38s/it][A[A

Batch loss: 1.411348819732666




  3%|▎         | 2/70 [00:04<02:42,  2.38s/it][A[A

  4%|▍         | 3/70 [00:07<02:36,  2.34s/it][A[A

  6%|▌         | 4/70 [00:09<02:34,  2.35s/it][A[A

  7%|▋         | 5/70 [00:11<02:30,  2.32s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.37s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.36s/it][A[A

 11%|█▏        | 8/70 [00:18<02:28,  2.40s/it][A[A

 13%|█▎        | 9/70 [00:21<02:26,  2.39s/it][A[A

 14%|█▍        | 10/70 [00:23<02:25,  2.42s/it][A[A

 16%|█▌        | 11/70 [00:26<02:21,  2.40s/it][A[A

Batch loss: 1.5119357109069824




 17%|█▋        | 12/70 [00:28<02:21,  2.43s/it][A[A

 19%|█▊        | 13/70 [00:31<02:17,  2.42s/it][A[A

 20%|██        | 14/70 [00:33<02:16,  2.43s/it][A[A

 21%|██▏       | 15/70 [00:35<02:12,  2.41s/it][A[A

 23%|██▎       | 16/70 [00:38<02:11,  2.43s/it][A[A

 24%|██▍       | 17/70 [00:40<02:05,  2.37s/it][A[A

 26%|██▌       | 18/70 [00:43<02:04,  2.39s/it][A[A

 27%|██▋       | 19/70 [00:45<01:59,  2.34s/it][A[A

 29%|██▊       | 20/70 [00:47<01:59,  2.39s/it][A[A

 30%|███       | 21/70 [00:50<01:56,  2.38s/it][A[A

Batch loss: 1.378233790397644




 31%|███▏      | 22/70 [00:52<01:56,  2.42s/it][A[A

 33%|███▎      | 23/70 [00:54<01:52,  2.40s/it][A[A

 34%|███▍      | 24/70 [00:57<01:52,  2.44s/it][A[A

 36%|███▌      | 25/70 [00:59<01:48,  2.41s/it][A[A

 37%|███▋      | 26/70 [01:02<01:47,  2.44s/it][A[A

 39%|███▊      | 27/70 [01:04<01:43,  2.41s/it][A[A

 40%|████      | 28/70 [01:07<01:42,  2.44s/it][A[A

 41%|████▏     | 29/70 [01:09<01:38,  2.41s/it][A[A

 43%|████▎     | 30/70 [01:12<01:37,  2.44s/it][A[A

 44%|████▍     | 31/70 [01:14<01:32,  2.38s/it][A[A

Batch loss: 1.3432326316833496




 46%|████▌     | 32/70 [01:16<01:30,  2.39s/it][A[A

 47%|████▋     | 33/70 [01:18<01:26,  2.35s/it][A[A

 49%|████▊     | 34/70 [01:21<01:25,  2.37s/it][A[A

 50%|█████     | 35/70 [01:23<01:21,  2.33s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:19,  2.34s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:16,  2.31s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:14,  2.33s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:11,  2.30s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:09,  2.33s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:06,  2.30s/it][A[A

Batch loss: 1.3578249216079712




 60%|██████    | 42/70 [01:39<01:05,  2.33s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:02,  2.30s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:00,  2.33s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:57,  2.30s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:55,  2.33s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:52,  2.30s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:52,  2.38s/it][A[A

 70%|███████   | 49/70 [01:56<00:49,  2.37s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:48,  2.41s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:45,  2.38s/it][A[A

Batch loss: 1.4513474702835083




 74%|███████▍  | 52/70 [02:03<00:43,  2.42s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.38s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:38,  2.41s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.39s/it][A[A

 80%|████████  | 56/70 [02:13<00:33,  2.41s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:31,  2.40s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:29,  2.43s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:26,  2.37s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.38s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.35s/it][A[A

Batch loss: 1.3569319248199463




 89%|████████▊ | 62/70 [02:27<00:18,  2.36s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.33s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.34s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.32s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.35s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.31s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.33s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.30s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4441811186926705




 12%|█▎        | 1/8 [00:02<00:16,  2.32s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.31s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.26s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.30s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.26s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.28s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.26s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.19s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4937208592891693
Epoch:  67




  1%|▏         | 1/70 [00:02<02:53,  2.52s/it][A[A

Batch loss: 1.4936563968658447




  3%|▎         | 2/70 [00:05<02:51,  2.53s/it][A[A

  4%|▍         | 3/70 [00:07<02:45,  2.47s/it][A[A

  6%|▌         | 4/70 [00:09<02:43,  2.47s/it][A[A

  7%|▋         | 5/70 [00:12<02:38,  2.44s/it][A[A

  9%|▊         | 6/70 [00:14<02:36,  2.45s/it][A[A

 10%|█         | 7/70 [00:17<02:32,  2.42s/it][A[A

 11%|█▏        | 8/70 [00:19<02:31,  2.45s/it][A[A

 13%|█▎        | 9/70 [00:21<02:26,  2.41s/it][A[A

 14%|█▍        | 10/70 [00:24<02:24,  2.40s/it][A[A

 16%|█▌        | 11/70 [00:26<02:18,  2.34s/it][A[A

Batch loss: 1.3312175273895264




 17%|█▋        | 12/70 [00:28<02:16,  2.36s/it][A[A

 19%|█▊        | 13/70 [00:31<02:13,  2.34s/it][A[A

 20%|██        | 14/70 [00:33<02:12,  2.37s/it][A[A

 21%|██▏       | 15/70 [00:35<02:09,  2.35s/it][A[A

 23%|██▎       | 16/70 [00:38<02:08,  2.37s/it][A[A

 24%|██▍       | 17/70 [00:40<02:04,  2.34s/it][A[A

 26%|██▌       | 18/70 [00:43<02:05,  2.41s/it][A[A

 27%|██▋       | 19/70 [00:45<02:00,  2.36s/it][A[A

 29%|██▊       | 20/70 [00:47<01:58,  2.36s/it][A[A

 30%|███       | 21/70 [00:50<01:53,  2.32s/it][A[A

Batch loss: 1.4330823421478271




 31%|███▏      | 22/70 [00:52<01:53,  2.36s/it][A[A

 33%|███▎      | 23/70 [00:54<01:49,  2.34s/it][A[A

 34%|███▍      | 24/70 [00:57<01:48,  2.35s/it][A[A

 36%|███▌      | 25/70 [00:59<01:44,  2.31s/it][A[A

 37%|███▋      | 26/70 [01:01<01:42,  2.34s/it][A[A

 39%|███▊      | 27/70 [01:04<01:40,  2.34s/it][A[A

 40%|████      | 28/70 [01:06<01:40,  2.39s/it][A[A

 41%|████▏     | 29/70 [01:08<01:37,  2.38s/it][A[A

 43%|████▎     | 30/70 [01:11<01:36,  2.42s/it][A[A

 44%|████▍     | 31/70 [01:13<01:33,  2.40s/it][A[A

Batch loss: 1.3706860542297363




 46%|████▌     | 32/70 [01:16<01:32,  2.44s/it][A[A

 47%|████▋     | 33/70 [01:18<01:29,  2.42s/it][A[A

 49%|████▊     | 34/70 [01:21<01:28,  2.45s/it][A[A

 50%|█████     | 35/70 [01:23<01:24,  2.42s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:22,  2.44s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:18,  2.39s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:16,  2.39s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:12,  2.34s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:10,  2.36s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:07,  2.32s/it][A[A

Batch loss: 1.3371453285217285




 60%|██████    | 42/70 [01:39<01:05,  2.34s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:02,  2.31s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:00,  2.33s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:57,  2.31s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:55,  2.33s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:53,  2.32s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:51,  2.33s/it][A[A

 70%|███████   | 49/70 [01:56<00:48,  2.30s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:43,  2.31s/it][A[A

Batch loss: 1.3296713829040527




 74%|███████▍  | 52/70 [02:03<00:41,  2.33s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.30s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.32s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.34s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.39s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:30,  2.38s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:29,  2.42s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.40s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:24,  2.44s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.41s/it][A[A

Batch loss: 1.3573671579360962




 89%|████████▊ | 62/70 [02:27<00:19,  2.45s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.42s/it][A[A

 91%|█████████▏| 64/70 [02:32<00:14,  2.44s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:12,  2.41s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.40s/it][A[A

 96%|█████████▌| 67/70 [02:39<00:07,  2.35s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.36s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.36s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.35s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4512151769229344




 12%|█▎        | 1/8 [00:02<00:16,  2.41s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.42s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.37s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.38s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.34s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.36s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.33s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.26s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3325428664684296
Epoch:  68




  1%|▏         | 1/70 [00:02<02:53,  2.52s/it][A[A

Batch loss: 1.3943052291870117




  3%|▎         | 2/70 [00:05<02:50,  2.51s/it][A[A

  4%|▍         | 3/70 [00:07<02:42,  2.42s/it][A[A

  6%|▌         | 4/70 [00:09<02:38,  2.41s/it][A[A

  7%|▋         | 5/70 [00:11<02:34,  2.37s/it][A[A

  9%|▊         | 6/70 [00:14<02:32,  2.38s/it][A[A

 10%|█         | 7/70 [00:16<02:27,  2.34s/it][A[A

 11%|█▏        | 8/70 [00:18<02:25,  2.34s/it][A[A

 13%|█▎        | 9/70 [00:21<02:21,  2.32s/it][A[A

 14%|█▍        | 10/70 [00:23<02:20,  2.34s/it][A[A

 16%|█▌        | 11/70 [00:25<02:16,  2.31s/it][A[A

Batch loss: 1.338457465171814




 17%|█▋        | 12/70 [00:28<02:15,  2.34s/it][A[A

 19%|█▊        | 13/70 [00:30<02:11,  2.30s/it][A[A

 20%|██        | 14/70 [00:32<02:09,  2.32s/it][A[A

 21%|██▏       | 15/70 [00:35<02:06,  2.30s/it][A[A

 23%|██▎       | 16/70 [00:37<02:06,  2.34s/it][A[A

 24%|██▍       | 17/70 [00:39<02:02,  2.31s/it][A[A

 26%|██▌       | 18/70 [00:42<02:01,  2.34s/it][A[A

 27%|██▋       | 19/70 [00:44<01:57,  2.30s/it][A[A

 29%|██▊       | 20/70 [00:46<01:58,  2.37s/it][A[A

 30%|███       | 21/70 [00:49<01:56,  2.37s/it][A[A

Batch loss: 1.441537618637085




 31%|███▏      | 22/70 [00:51<01:55,  2.42s/it][A[A

 33%|███▎      | 23/70 [00:54<01:52,  2.40s/it][A[A

 34%|███▍      | 24/70 [00:56<01:52,  2.44s/it][A[A

 36%|███▌      | 25/70 [00:58<01:48,  2.42s/it][A[A

 37%|███▋      | 26/70 [01:01<01:47,  2.44s/it][A[A

 39%|███▊      | 27/70 [01:03<01:43,  2.41s/it][A[A

 40%|████      | 28/70 [01:06<01:42,  2.44s/it][A[A

 41%|████▏     | 29/70 [01:08<01:39,  2.42s/it][A[A

 43%|████▎     | 30/70 [01:11<01:37,  2.44s/it][A[A

 44%|████▍     | 31/70 [01:13<01:32,  2.38s/it][A[A

Batch loss: 1.3999567031860352




 46%|████▌     | 32/70 [01:15<01:30,  2.38s/it][A[A

 47%|████▋     | 33/70 [01:18<01:27,  2.35s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.36s/it][A[A

 50%|█████     | 35/70 [01:22<01:22,  2.35s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:20,  2.36s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:16,  2.33s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:15,  2.35s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:12,  2.33s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:10,  2.35s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:06,  2.30s/it][A[A

Batch loss: 1.366574764251709




 60%|██████    | 42/70 [01:39<01:05,  2.34s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:02,  2.31s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:00,  2.33s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:57,  2.30s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:55,  2.32s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:53,  2.32s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:52,  2.38s/it][A[A

 70%|███████   | 49/70 [01:55<00:49,  2.37s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:48,  2.40s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:45,  2.39s/it][A[A

Batch loss: 1.4248249530792236




 74%|███████▍  | 52/70 [02:02<00:43,  2.42s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.40s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:38,  2.43s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:36,  2.41s/it][A[A

 80%|████████  | 56/70 [02:12<00:34,  2.43s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:31,  2.42s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:29,  2.43s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.38s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.38s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.39s/it][A[A

Batch loss: 1.377875566482544




 89%|████████▊ | 62/70 [02:26<00:19,  2.43s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.37s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.36s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.32s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.35s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.32s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.35s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.33s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4292914748191834




 12%|█▎        | 1/8 [00:02<00:16,  2.37s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.35s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.30s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.29s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.26s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.29s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.29s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.20s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.300407439470291
Epoch:  69




  1%|▏         | 1/70 [00:02<02:53,  2.51s/it][A[A

Batch loss: 1.575194001197815




  3%|▎         | 2/70 [00:05<02:51,  2.52s/it][A[A

  4%|▍         | 3/70 [00:07<02:45,  2.47s/it][A[A

  6%|▌         | 4/70 [00:09<02:43,  2.48s/it][A[A

  7%|▋         | 5/70 [00:12<02:38,  2.44s/it][A[A

  9%|▊         | 6/70 [00:14<02:37,  2.46s/it][A[A

 10%|█         | 7/70 [00:17<02:32,  2.42s/it][A[A

 11%|█▏        | 8/70 [00:19<02:31,  2.44s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.38s/it][A[A

 14%|█▍        | 10/70 [00:24<02:22,  2.38s/it][A[A

 16%|█▌        | 11/70 [00:26<02:17,  2.34s/it][A[A

Batch loss: 1.4434597492218018




 17%|█▋        | 12/70 [00:28<02:17,  2.37s/it][A[A

 19%|█▊        | 13/70 [00:31<02:13,  2.34s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.35s/it][A[A

 21%|██▏       | 15/70 [00:35<02:07,  2.32s/it][A[A

 23%|██▎       | 16/70 [00:38<02:05,  2.33s/it][A[A

 24%|██▍       | 17/70 [00:40<02:01,  2.30s/it][A[A

 26%|██▌       | 18/70 [00:42<02:00,  2.32s/it][A[A

 27%|██▋       | 19/70 [00:44<01:57,  2.30s/it][A[A

 29%|██▊       | 20/70 [00:47<01:56,  2.33s/it][A[A

 30%|███       | 21/70 [00:49<01:52,  2.30s/it][A[A

Batch loss: 1.3951427936553955




 31%|███▏      | 22/70 [00:51<01:51,  2.33s/it][A[A

 33%|███▎      | 23/70 [00:54<01:48,  2.30s/it][A[A

 34%|███▍      | 24/70 [00:56<01:46,  2.32s/it][A[A

 36%|███▌      | 25/70 [00:58<01:43,  2.29s/it][A[A

 37%|███▋      | 26/70 [01:01<01:41,  2.31s/it][A[A

 39%|███▊      | 27/70 [01:03<01:39,  2.32s/it][A[A

 40%|████      | 28/70 [01:06<01:39,  2.37s/it][A[A

 41%|████▏     | 29/70 [01:08<01:36,  2.36s/it][A[A

 43%|████▎     | 30/70 [01:10<01:36,  2.40s/it][A[A

 44%|████▍     | 31/70 [01:13<01:32,  2.38s/it][A[A

Batch loss: 1.4286636114120483




 46%|████▌     | 32/70 [01:15<01:32,  2.42s/it][A[A

 47%|████▋     | 33/70 [01:18<01:28,  2.40s/it][A[A

 49%|████▊     | 34/70 [01:20<01:27,  2.43s/it][A[A

 50%|█████     | 35/70 [01:22<01:24,  2.40s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:22,  2.43s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:19,  2.41s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:16,  2.40s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:13,  2.36s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:11,  2.39s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:08,  2.37s/it][A[A

Batch loss: 1.4177429676055908




 60%|██████    | 42/70 [01:39<01:07,  2.41s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:04,  2.39s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:02,  2.42s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.40s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:58,  2.43s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:55,  2.41s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:53,  2.44s/it][A[A

 70%|███████   | 49/70 [01:56<00:50,  2.41s/it][A[A

 71%|███████▏  | 50/70 [01:59<00:48,  2.44s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:45,  2.38s/it][A[A

Batch loss: 1.3269602060317993




 74%|███████▍  | 52/70 [02:03<00:42,  2.38s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.34s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:37,  2.35s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:34,  2.33s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.34s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:29,  2.31s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:27,  2.33s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.30s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.33s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:20,  2.30s/it][A[A

Batch loss: 1.4065600633621216




 89%|████████▊ | 62/70 [02:26<00:18,  2.33s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.29s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:13,  2.32s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.30s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.32s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.29s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.32s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.34s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.436508011817932




 12%|█▎        | 1/8 [00:02<00:16,  2.40s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.40s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.35s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.38s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.35s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.37s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.34s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.26s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.277928590774536
Epoch:  70




  1%|▏         | 1/70 [00:02<02:53,  2.52s/it][A[A

Batch loss: 1.4738515615463257




  3%|▎         | 2/70 [00:05<02:51,  2.52s/it][A[A

  4%|▍         | 3/70 [00:07<02:43,  2.44s/it][A[A

  6%|▌         | 4/70 [00:09<02:40,  2.43s/it][A[A

  7%|▋         | 5/70 [00:12<02:35,  2.39s/it][A[A

  9%|▊         | 6/70 [00:14<02:32,  2.39s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.36s/it][A[A

 11%|█▏        | 8/70 [00:19<02:26,  2.36s/it][A[A

 13%|█▎        | 9/70 [00:21<02:21,  2.33s/it][A[A

 14%|█▍        | 10/70 [00:23<02:20,  2.35s/it][A[A

 16%|█▌        | 11/70 [00:25<02:16,  2.32s/it][A[A

Batch loss: 1.6988976001739502




 17%|█▋        | 12/70 [00:28<02:16,  2.36s/it][A[A

 19%|█▊        | 13/70 [00:30<02:12,  2.33s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.35s/it][A[A

 21%|██▏       | 15/70 [00:35<02:06,  2.30s/it][A[A

 23%|██▎       | 16/70 [00:37<02:05,  2.33s/it][A[A

 24%|██▍       | 17/70 [00:39<02:01,  2.30s/it][A[A

 26%|██▌       | 18/70 [00:42<02:00,  2.32s/it][A[A

 27%|██▋       | 19/70 [00:44<01:57,  2.30s/it][A[A

 29%|██▊       | 20/70 [00:46<01:58,  2.36s/it][A[A

 30%|███       | 21/70 [00:49<01:55,  2.35s/it][A[A

Batch loss: 1.51992666721344




 31%|███▏      | 22/70 [00:51<01:54,  2.39s/it][A[A

 33%|███▎      | 23/70 [00:54<01:51,  2.37s/it][A[A

 34%|███▍      | 24/70 [00:56<01:50,  2.41s/it][A[A

 36%|███▌      | 25/70 [00:58<01:47,  2.38s/it][A[A

 37%|███▋      | 26/70 [01:01<01:46,  2.41s/it][A[A

 39%|███▊      | 27/70 [01:03<01:42,  2.38s/it][A[A

 40%|████      | 28/70 [01:06<01:40,  2.40s/it][A[A

 41%|████▏     | 29/70 [01:08<01:37,  2.38s/it][A[A

 43%|████▎     | 30/70 [01:10<01:35,  2.40s/it][A[A

 44%|████▍     | 31/70 [01:13<01:31,  2.34s/it][A[A

Batch loss: 1.4397683143615723




 46%|████▌     | 32/70 [01:15<01:29,  2.35s/it][A[A

 47%|████▋     | 33/70 [01:17<01:25,  2.32s/it][A[A

 49%|████▊     | 34/70 [01:20<01:23,  2.32s/it][A[A

 50%|█████     | 35/70 [01:22<01:20,  2.29s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:18,  2.31s/it][A[A

 53%|█████▎    | 37/70 [01:26<01:15,  2.28s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:13,  2.30s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:10,  2.27s/it][A[A

 57%|█████▋    | 40/70 [01:33<01:09,  2.31s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:06,  2.28s/it][A[A

Batch loss: 1.5804258584976196




 60%|██████    | 42/70 [01:38<01:04,  2.32s/it][A[A

 61%|██████▏   | 43/70 [01:40<01:01,  2.29s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:00,  2.32s/it][A[A

 64%|██████▍   | 45/70 [01:45<00:57,  2.30s/it][A[A

 66%|██████▌   | 46/70 [01:47<00:56,  2.34s/it][A[A

 67%|██████▋   | 47/70 [01:49<00:53,  2.31s/it][A[A

 69%|██████▊   | 48/70 [01:52<00:52,  2.39s/it][A[A

 70%|███████   | 49/70 [01:54<00:49,  2.38s/it][A[A

 71%|███████▏  | 50/70 [01:57<00:48,  2.41s/it][A[A

 73%|███████▎  | 51/70 [01:59<00:45,  2.39s/it][A[A

Batch loss: 1.5389891862869263




 74%|███████▍  | 52/70 [02:02<00:43,  2.41s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:40,  2.39s/it][A[A

 77%|███████▋  | 54/70 [02:06<00:38,  2.41s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:35,  2.38s/it][A[A

 80%|████████  | 56/70 [02:11<00:33,  2.40s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.38s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:28,  2.41s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:25,  2.36s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.35s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:20,  2.31s/it][A[A

Batch loss: 1.3153613805770874




 89%|████████▊ | 62/70 [02:25<00:18,  2.34s/it][A[A

 90%|█████████ | 63/70 [02:27<00:16,  2.30s/it][A[A

 91%|█████████▏| 64/70 [02:30<00:13,  2.32s/it][A[A

 93%|█████████▎| 65/70 [02:32<00:11,  2.29s/it][A[A

 94%|█████████▍| 66/70 [02:34<00:09,  2.31s/it][A[A

 96%|█████████▌| 67/70 [02:37<00:06,  2.28s/it][A[A

 97%|█████████▋| 68/70 [02:39<00:04,  2.30s/it][A[A

 99%|█████████▊| 69/70 [02:41<00:02,  2.32s/it][A[A

100%|██████████| 70/70 [02:42<00:00,  2.32s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4553480574062891




 12%|█▎        | 1/8 [00:02<00:15,  2.28s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.31s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.26s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.27s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.23s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.24s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.24s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.17s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.406912535429001
Epoch:  71




  1%|▏         | 1/70 [00:02<02:52,  2.50s/it][A[A

Batch loss: 1.4447901248931885




  3%|▎         | 2/70 [00:05<02:50,  2.50s/it][A[A

  4%|▍         | 3/70 [00:07<02:44,  2.45s/it][A[A

  6%|▌         | 4/70 [00:09<02:42,  2.46s/it][A[A

  7%|▋         | 5/70 [00:12<02:36,  2.42s/it][A[A

  9%|▊         | 6/70 [00:14<02:35,  2.42s/it][A[A

 10%|█         | 7/70 [00:16<02:30,  2.39s/it][A[A

 11%|█▏        | 8/70 [00:19<02:29,  2.41s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.39s/it][A[A

 14%|█▍        | 10/70 [00:24<02:22,  2.38s/it][A[A

 16%|█▌        | 11/70 [00:26<02:18,  2.34s/it][A[A

Batch loss: 1.5262314081192017




 17%|█▋        | 12/70 [00:28<02:15,  2.34s/it][A[A

 19%|█▊        | 13/70 [00:30<02:12,  2.32s/it][A[A

 20%|██        | 14/70 [00:33<02:10,  2.33s/it][A[A

 21%|██▏       | 15/70 [00:35<02:06,  2.30s/it][A[A

 23%|██▎       | 16/70 [00:37<02:04,  2.31s/it][A[A

 24%|██▍       | 17/70 [00:40<02:00,  2.28s/it][A[A

 26%|██▌       | 18/70 [00:42<01:59,  2.30s/it][A[A

 27%|██▋       | 19/70 [00:44<01:55,  2.27s/it][A[A

 29%|██▊       | 20/70 [00:46<01:55,  2.32s/it][A[A

 30%|███       | 21/70 [00:49<01:52,  2.29s/it][A[A

Batch loss: 1.3732707500457764




 31%|███▏      | 22/70 [00:51<01:50,  2.31s/it][A[A

 33%|███▎      | 23/70 [00:53<01:46,  2.28s/it][A[A

 34%|███▍      | 24/70 [00:56<01:45,  2.29s/it][A[A

 36%|███▌      | 25/70 [00:58<01:42,  2.27s/it][A[A

 37%|███▋      | 26/70 [01:00<01:40,  2.29s/it][A[A

 39%|███▊      | 27/70 [01:02<01:38,  2.28s/it][A[A

 40%|████      | 28/70 [01:05<01:37,  2.33s/it][A[A

 41%|████▏     | 29/70 [01:07<01:35,  2.33s/it][A[A

 43%|████▎     | 30/70 [01:10<01:34,  2.37s/it][A[A

 44%|████▍     | 31/70 [01:12<01:32,  2.37s/it][A[A

Batch loss: 1.5295798778533936




 46%|████▌     | 32/70 [01:14<01:31,  2.40s/it][A[A

 47%|████▋     | 33/70 [01:17<01:27,  2.38s/it][A[A

 49%|████▊     | 34/70 [01:19<01:26,  2.41s/it][A[A

 50%|█████     | 35/70 [01:22<01:23,  2.39s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:21,  2.41s/it][A[A

 53%|█████▎    | 37/70 [01:26<01:18,  2.38s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:15,  2.37s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:11,  2.32s/it][A[A

 57%|█████▋    | 40/70 [01:33<01:09,  2.33s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:06,  2.31s/it][A[A

Batch loss: 1.3383857011795044




 60%|██████    | 42/70 [01:38<01:06,  2.37s/it][A[A

 61%|██████▏   | 43/70 [01:40<01:03,  2.35s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:01,  2.38s/it][A[A

 64%|██████▍   | 45/70 [01:45<00:58,  2.36s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:57,  2.39s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:54,  2.37s/it][A[A

 69%|██████▊   | 48/70 [01:52<00:52,  2.39s/it][A[A

 70%|███████   | 49/70 [01:55<00:49,  2.37s/it][A[A

 71%|███████▏  | 50/70 [01:57<00:48,  2.40s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:45,  2.39s/it][A[A

Batch loss: 1.4817957878112793




 74%|███████▍  | 52/70 [02:02<00:43,  2.43s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:40,  2.37s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.36s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:35,  2.34s/it][A[A

 80%|████████  | 56/70 [02:11<00:32,  2.34s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.31s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:27,  2.32s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:25,  2.31s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.34s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:20,  2.31s/it][A[A

Batch loss: 1.833542823791504




 89%|████████▊ | 62/70 [02:25<00:18,  2.32s/it][A[A

 90%|█████████ | 63/70 [02:27<00:16,  2.32s/it][A[A

 91%|█████████▏| 64/70 [02:30<00:14,  2.34s/it][A[A

 93%|█████████▎| 65/70 [02:32<00:11,  2.30s/it][A[A

 94%|█████████▍| 66/70 [02:34<00:09,  2.32s/it][A[A

 96%|█████████▌| 67/70 [02:37<00:06,  2.29s/it][A[A

 97%|█████████▋| 68/70 [02:39<00:04,  2.31s/it][A[A

 99%|█████████▊| 69/70 [02:41<00:02,  2.29s/it][A[A

100%|██████████| 70/70 [02:42<00:00,  2.32s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4254226786749704




 12%|█▎        | 1/8 [00:02<00:16,  2.37s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.38s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.33s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.33s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.31s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.35s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.33s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.24s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4181784093379974
Epoch:  72




  1%|▏         | 1/70 [00:02<02:52,  2.50s/it][A[A

Batch loss: 1.4152483940124512




  3%|▎         | 2/70 [00:05<02:50,  2.51s/it][A[A

  4%|▍         | 3/70 [00:07<02:44,  2.45s/it][A[A

  6%|▌         | 4/70 [00:09<02:41,  2.44s/it][A[A

  7%|▋         | 5/70 [00:12<02:35,  2.39s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.37s/it][A[A

 10%|█         | 7/70 [00:16<02:26,  2.33s/it][A[A

 11%|█▏        | 8/70 [00:19<02:27,  2.38s/it][A[A

 13%|█▎        | 9/70 [00:21<02:23,  2.35s/it][A[A

 14%|█▍        | 10/70 [00:23<02:23,  2.39s/it][A[A

 16%|█▌        | 11/70 [00:26<02:18,  2.35s/it][A[A

Batch loss: 1.409787654876709




 17%|█▋        | 12/70 [00:28<02:17,  2.36s/it][A[A

 19%|█▊        | 13/70 [00:30<02:12,  2.33s/it][A[A

 20%|██        | 14/70 [00:33<02:10,  2.34s/it][A[A

 21%|██▏       | 15/70 [00:35<02:07,  2.32s/it][A[A

 23%|██▎       | 16/70 [00:37<02:07,  2.37s/it][A[A

 24%|██▍       | 17/70 [00:40<02:03,  2.33s/it][A[A

 26%|██▌       | 18/70 [00:42<02:02,  2.35s/it][A[A

 27%|██▋       | 19/70 [00:44<01:58,  2.33s/it][A[A

 29%|██▊       | 20/70 [00:47<01:59,  2.38s/it][A[A

 30%|███       | 21/70 [00:49<01:56,  2.38s/it][A[A

Batch loss: 1.8044008016586304




 31%|███▏      | 22/70 [00:52<01:56,  2.42s/it][A[A

 33%|███▎      | 23/70 [00:54<01:53,  2.42s/it][A[A

 34%|███▍      | 24/70 [00:57<01:53,  2.47s/it][A[A

 36%|███▌      | 25/70 [00:59<01:50,  2.46s/it][A[A

 37%|███▋      | 26/70 [01:02<01:50,  2.51s/it][A[A

 39%|███▊      | 27/70 [01:04<01:45,  2.46s/it][A[A

 40%|████      | 28/70 [01:07<01:43,  2.47s/it][A[A

 41%|████▏     | 29/70 [01:09<01:39,  2.42s/it][A[A

 43%|████▎     | 30/70 [01:11<01:37,  2.43s/it][A[A

 44%|████▍     | 31/70 [01:14<01:33,  2.40s/it][A[A

Batch loss: 1.3631166219711304




 46%|████▌     | 32/70 [01:16<01:31,  2.40s/it][A[A

 47%|████▋     | 33/70 [01:18<01:26,  2.35s/it][A[A

 49%|████▊     | 34/70 [01:21<01:25,  2.38s/it][A[A

 50%|█████     | 35/70 [01:23<01:21,  2.32s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:19,  2.33s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:15,  2.30s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:13,  2.31s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:10,  2.28s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:08,  2.30s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:06,  2.29s/it][A[A

Batch loss: 1.4684176445007324




 60%|██████    | 42/70 [01:39<01:06,  2.37s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:03,  2.36s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:01,  2.36s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:58,  2.34s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:56,  2.34s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:52,  2.30s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:51,  2.33s/it][A[A

 70%|███████   | 49/70 [01:55<00:48,  2.32s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:47,  2.37s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:44,  2.36s/it][A[A

Batch loss: 1.4476765394210815




 74%|███████▍  | 52/70 [02:03<00:43,  2.40s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.37s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:38,  2.41s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.39s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.41s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:30,  2.38s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.41s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.38s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.37s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:20,  2.33s/it][A[A

Batch loss: 1.3173534870147705




 89%|████████▊ | 62/70 [02:26<00:18,  2.34s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.31s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:13,  2.32s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.29s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.32s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.29s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.30s/it][A[A

 99%|█████████▊| 69/70 [02:42<00:02,  2.27s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4269102709633963




 12%|█▎        | 1/8 [00:02<00:15,  2.26s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.29s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.24s/it][A[A

 50%|█████     | 4/8 [00:08<00:08,  2.25s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.21s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.22s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.20s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.15s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3898858428001404
Epoch:  73




  1%|▏         | 1/70 [00:02<02:51,  2.48s/it][A[A

Batch loss: 1.3387526273727417




  3%|▎         | 2/70 [00:04<02:48,  2.48s/it][A[A

  4%|▍         | 3/70 [00:07<02:43,  2.43s/it][A[A

  6%|▌         | 4/70 [00:09<02:41,  2.44s/it][A[A

  7%|▋         | 5/70 [00:12<02:36,  2.40s/it][A[A

  9%|▊         | 6/70 [00:14<02:35,  2.43s/it][A[A

 10%|█         | 7/70 [00:16<02:30,  2.39s/it][A[A

 11%|█▏        | 8/70 [00:19<02:29,  2.41s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.38s/it][A[A

 14%|█▍        | 10/70 [00:24<02:24,  2.42s/it][A[A

 16%|█▌        | 11/70 [00:26<02:18,  2.35s/it][A[A

Batch loss: 1.315431833267212




 17%|█▋        | 12/70 [00:28<02:16,  2.36s/it][A[A

 19%|█▊        | 13/70 [00:30<02:11,  2.31s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.35s/it][A[A

 21%|██▏       | 15/70 [00:35<02:08,  2.34s/it][A[A

 23%|██▎       | 16/70 [00:38<02:08,  2.38s/it][A[A

 24%|██▍       | 17/70 [00:40<02:05,  2.36s/it][A[A

 26%|██▌       | 18/70 [00:42<02:03,  2.38s/it][A[A

 27%|██▋       | 19/70 [00:45<02:00,  2.37s/it][A[A

 29%|██▊       | 20/70 [00:47<02:00,  2.41s/it][A[A

 30%|███       | 21/70 [00:50<01:56,  2.38s/it][A[A

Batch loss: 1.4310524463653564




 31%|███▏      | 22/70 [00:52<01:55,  2.41s/it][A[A

 33%|███▎      | 23/70 [00:54<01:52,  2.39s/it][A[A

 34%|███▍      | 24/70 [00:57<01:50,  2.40s/it][A[A

 36%|███▌      | 25/70 [00:59<01:45,  2.34s/it][A[A

 37%|███▋      | 26/70 [01:01<01:43,  2.35s/it][A[A

 39%|███▊      | 27/70 [01:04<01:39,  2.32s/it][A[A

 40%|████      | 28/70 [01:06<01:39,  2.36s/it][A[A

 41%|████▏     | 29/70 [01:08<01:35,  2.32s/it][A[A

 43%|████▎     | 30/70 [01:11<01:33,  2.33s/it][A[A

 44%|████▍     | 31/70 [01:13<01:29,  2.30s/it][A[A

Batch loss: 1.3316084146499634




 46%|████▌     | 32/70 [01:15<01:28,  2.32s/it][A[A

 47%|████▋     | 33/70 [01:17<01:24,  2.30s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.34s/it][A[A

 50%|█████     | 35/70 [01:22<01:20,  2.30s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:19,  2.33s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:16,  2.31s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:14,  2.32s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:11,  2.30s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:09,  2.31s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:06,  2.29s/it][A[A

Batch loss: 1.329623818397522




 60%|██████    | 42/70 [01:38<01:05,  2.34s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:03,  2.34s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:01,  2.37s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.36s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:57,  2.38s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:54,  2.37s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:52,  2.40s/it][A[A

 70%|███████   | 49/70 [01:55<00:49,  2.37s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:48,  2.40s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:45,  2.38s/it][A[A

Batch loss: 1.3534085750579834




 74%|███████▍  | 52/70 [02:02<00:43,  2.40s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:39,  2.35s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.36s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:34,  2.31s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.33s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:29,  2.30s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:27,  2.32s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:25,  2.28s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:22,  2.30s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:20,  2.27s/it][A[A

Batch loss: 1.398532748222351




 89%|████████▊ | 62/70 [02:25<00:18,  2.30s/it][A[A

 90%|█████████ | 63/70 [02:27<00:15,  2.27s/it][A[A

 91%|█████████▏| 64/70 [02:30<00:13,  2.29s/it][A[A

 93%|█████████▎| 65/70 [02:32<00:11,  2.28s/it][A[A

 94%|█████████▍| 66/70 [02:34<00:09,  2.30s/it][A[A

 96%|█████████▌| 67/70 [02:37<00:06,  2.28s/it][A[A

 97%|█████████▋| 68/70 [02:39<00:04,  2.30s/it][A[A

 99%|█████████▊| 69/70 [02:41<00:02,  2.27s/it][A[A

100%|██████████| 70/70 [02:42<00:00,  2.32s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4541312422071184




 12%|█▎        | 1/8 [00:02<00:16,  2.36s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.36s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.32s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.33s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.30s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.31s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.29s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.21s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.334019213914871
Epoch:  74




  1%|▏         | 1/70 [00:02<02:53,  2.52s/it][A[A

Batch loss: 1.2995696067810059




  3%|▎         | 2/70 [00:05<02:51,  2.53s/it][A[A

  4%|▍         | 3/70 [00:07<02:46,  2.48s/it][A[A

  6%|▌         | 4/70 [00:09<02:44,  2.49s/it][A[A

  7%|▋         | 5/70 [00:12<02:39,  2.45s/it][A[A

  9%|▊         | 6/70 [00:14<02:34,  2.42s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.36s/it][A[A

 11%|█▏        | 8/70 [00:19<02:27,  2.37s/it][A[A

 13%|█▎        | 9/70 [00:21<02:21,  2.32s/it][A[A

 14%|█▍        | 10/70 [00:23<02:20,  2.34s/it][A[A

 16%|█▌        | 11/70 [00:26<02:16,  2.32s/it][A[A

Batch loss: 1.986948847770691




 17%|█▋        | 12/70 [00:28<02:16,  2.35s/it][A[A

 19%|█▊        | 13/70 [00:30<02:11,  2.31s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.34s/it][A[A

 21%|██▏       | 15/70 [00:35<02:07,  2.32s/it][A[A

 23%|██▎       | 16/70 [00:37<02:07,  2.35s/it][A[A

 24%|██▍       | 17/70 [00:40<02:03,  2.32s/it][A[A

 26%|██▌       | 18/70 [00:42<02:01,  2.35s/it][A[A

 27%|██▋       | 19/70 [00:44<01:58,  2.32s/it][A[A

 29%|██▊       | 20/70 [00:47<02:01,  2.44s/it][A[A

 30%|███       | 21/70 [00:49<01:57,  2.39s/it][A[A

Batch loss: 1.3829593658447266




 31%|███▏      | 22/70 [00:52<01:56,  2.42s/it][A[A

 33%|███▎      | 23/70 [00:54<01:52,  2.40s/it][A[A

 34%|███▍      | 24/70 [00:57<01:52,  2.44s/it][A[A

 36%|███▌      | 25/70 [00:59<01:49,  2.43s/it][A[A

 37%|███▋      | 26/70 [01:02<01:47,  2.45s/it][A[A

 39%|███▊      | 27/70 [01:04<01:44,  2.44s/it][A[A

 40%|████      | 28/70 [01:06<01:42,  2.45s/it][A[A

 41%|████▏     | 29/70 [01:09<01:40,  2.45s/it][A[A

 43%|████▎     | 30/70 [01:11<01:38,  2.47s/it][A[A

 44%|████▍     | 31/70 [01:14<01:35,  2.44s/it][A[A

Batch loss: 1.4732924699783325




 46%|████▌     | 32/70 [01:16<01:32,  2.44s/it][A[A

 47%|████▋     | 33/70 [01:19<01:28,  2.39s/it][A[A

 49%|████▊     | 34/70 [01:21<01:26,  2.39s/it][A[A

 50%|█████     | 35/70 [01:23<01:23,  2.38s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:21,  2.40s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:17,  2.35s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:15,  2.36s/it][A[A

 56%|█████▌    | 39/70 [01:33<01:11,  2.32s/it][A[A

 57%|█████▋    | 40/70 [01:35<01:10,  2.33s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:06,  2.30s/it][A[A

Batch loss: 1.5234726667404175




 60%|██████    | 42/70 [01:40<01:05,  2.32s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:01,  2.29s/it][A[A

 63%|██████▎   | 44/70 [01:44<00:59,  2.31s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:57,  2.28s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:55,  2.31s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:52,  2.29s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:50,  2.31s/it][A[A

 70%|███████   | 49/70 [01:55<00:48,  2.29s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:44,  2.34s/it][A[A

Batch loss: 1.3307514190673828




 74%|███████▍  | 52/70 [02:03<00:42,  2.38s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.37s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:38,  2.41s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.38s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.41s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:30,  2.38s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.40s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.38s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:24,  2.42s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.36s/it][A[A

Batch loss: 1.3318896293640137




 89%|████████▊ | 62/70 [02:27<00:19,  2.38s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.35s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.38s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.33s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.33s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.30s/it][A[A

 97%|█████████▋| 68/70 [02:41<00:04,  2.34s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.31s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4725723794528416




 12%|█▎        | 1/8 [00:02<00:16,  2.39s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.38s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.31s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.30s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.26s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.27s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.24s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.17s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3729454278945923
Epoch:  75




  1%|▏         | 1/70 [00:02<02:54,  2.52s/it][A[A

Batch loss: 1.6255136728286743




  3%|▎         | 2/70 [00:05<02:51,  2.53s/it][A[A

  4%|▍         | 3/70 [00:07<02:46,  2.49s/it][A[A

  6%|▌         | 4/70 [00:09<02:43,  2.48s/it][A[A

  7%|▋         | 5/70 [00:12<02:39,  2.46s/it][A[A

  9%|▊         | 6/70 [00:14<02:37,  2.46s/it][A[A

 10%|█         | 7/70 [00:17<02:32,  2.42s/it][A[A

 11%|█▏        | 8/70 [00:19<02:31,  2.44s/it][A[A

 13%|█▎        | 9/70 [00:21<02:26,  2.40s/it][A[A

 14%|█▍        | 10/70 [00:24<02:25,  2.42s/it][A[A

 16%|█▌        | 11/70 [00:26<02:21,  2.40s/it][A[A

Batch loss: 1.8570963144302368




 17%|█▋        | 12/70 [00:29<02:19,  2.40s/it][A[A

 19%|█▊        | 13/70 [00:31<02:13,  2.35s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.36s/it][A[A

 21%|██▏       | 15/70 [00:36<02:09,  2.35s/it][A[A

 23%|██▎       | 16/70 [00:38<02:08,  2.39s/it][A[A

 24%|██▍       | 17/70 [00:40<02:05,  2.37s/it][A[A

 26%|██▌       | 18/70 [00:43<02:04,  2.40s/it][A[A

 27%|██▋       | 19/70 [00:45<02:00,  2.37s/it][A[A

 29%|██▊       | 20/70 [00:48<01:59,  2.40s/it][A[A

 30%|███       | 21/70 [00:50<01:56,  2.37s/it][A[A

Batch loss: 1.3215500116348267




 31%|███▏      | 22/70 [00:52<01:55,  2.41s/it][A[A

 33%|███▎      | 23/70 [00:55<01:51,  2.37s/it][A[A

 34%|███▍      | 24/70 [00:57<01:50,  2.39s/it][A[A

 36%|███▌      | 25/70 [00:59<01:46,  2.36s/it][A[A

 37%|███▋      | 26/70 [01:02<01:43,  2.36s/it][A[A

 39%|███▊      | 27/70 [01:04<01:39,  2.32s/it][A[A

 40%|████      | 28/70 [01:06<01:38,  2.34s/it][A[A

 41%|████▏     | 29/70 [01:09<01:34,  2.31s/it][A[A

 43%|████▎     | 30/70 [01:11<01:33,  2.33s/it][A[A

 44%|████▍     | 31/70 [01:13<01:29,  2.30s/it][A[A

Batch loss: 1.6007909774780273




 46%|████▌     | 32/70 [01:16<01:27,  2.32s/it][A[A

 47%|████▋     | 33/70 [01:18<01:24,  2.30s/it][A[A

 49%|████▊     | 34/70 [01:20<01:23,  2.31s/it][A[A

 50%|█████     | 35/70 [01:22<01:20,  2.29s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:18,  2.30s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:15,  2.28s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:13,  2.30s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:10,  2.28s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:09,  2.30s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:06,  2.28s/it][A[A

Batch loss: 1.6869604587554932




 60%|██████    | 42/70 [01:39<01:04,  2.31s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:02,  2.32s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:01,  2.38s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.36s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:57,  2.40s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:54,  2.38s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:52,  2.40s/it][A[A

 70%|███████   | 49/70 [01:55<00:49,  2.37s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:48,  2.41s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:45,  2.39s/it][A[A

Batch loss: 1.3784191608428955




 74%|███████▍  | 52/70 [02:03<00:43,  2.43s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.40s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:38,  2.40s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.36s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.38s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.35s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.36s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.34s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.37s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.33s/it][A[A

Batch loss: 1.4084290266036987




 89%|████████▊ | 62/70 [02:26<00:18,  2.35s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.32s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:13,  2.33s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.30s/it][A[A

 94%|█████████▍| 66/70 [02:35<00:09,  2.33s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.33s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.36s/it][A[A

 99%|█████████▊| 69/70 [02:42<00:02,  2.32s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.473613212789808




 12%|█▎        | 1/8 [00:02<00:17,  2.48s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.47s/it][A[A

 38%|███▊      | 3/8 [00:07<00:12,  2.41s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.42s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.37s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.37s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.34s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.27s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.466361850500107
Epoch:  76




  1%|▏         | 1/70 [00:02<02:50,  2.47s/it][A[A

Batch loss: 1.3371906280517578




  3%|▎         | 2/70 [00:05<02:50,  2.51s/it][A[A

  4%|▍         | 3/70 [00:07<02:45,  2.48s/it][A[A

  6%|▌         | 4/70 [00:10<02:46,  2.53s/it][A[A

  7%|▋         | 5/70 [00:12<02:39,  2.45s/it][A[A

  9%|▊         | 6/70 [00:14<02:36,  2.44s/it][A[A

 10%|█         | 7/70 [00:17<02:32,  2.42s/it][A[A

 11%|█▏        | 8/70 [00:19<02:31,  2.44s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.39s/it][A[A

 14%|█▍        | 10/70 [00:24<02:24,  2.40s/it][A[A

 16%|█▌        | 11/70 [00:26<02:19,  2.36s/it][A[A

Batch loss: 1.3275078535079956




 17%|█▋        | 12/70 [00:29<02:17,  2.38s/it][A[A

 19%|█▊        | 13/70 [00:31<02:13,  2.34s/it][A[A

 20%|██        | 14/70 [00:33<02:12,  2.36s/it][A[A

 21%|██▏       | 15/70 [00:35<02:08,  2.34s/it][A[A

 23%|██▎       | 16/70 [00:38<02:07,  2.36s/it][A[A

 24%|██▍       | 17/70 [00:40<02:03,  2.33s/it][A[A

 26%|██▌       | 18/70 [00:43<02:03,  2.37s/it][A[A

 27%|██▋       | 19/70 [00:45<01:58,  2.33s/it][A[A

 29%|██▊       | 20/70 [00:47<01:57,  2.36s/it][A[A

 30%|███       | 21/70 [00:50<01:54,  2.35s/it][A[A

Batch loss: 1.32508385181427




 31%|███▏      | 22/70 [00:52<01:55,  2.40s/it][A[A

 33%|███▎      | 23/70 [00:54<01:52,  2.39s/it][A[A

 34%|███▍      | 24/70 [00:57<01:54,  2.49s/it][A[A

 36%|███▌      | 25/70 [01:00<01:52,  2.50s/it][A[A

 37%|███▋      | 26/70 [01:02<01:50,  2.51s/it][A[A

 39%|███▊      | 27/70 [01:05<01:46,  2.47s/it][A[A

 40%|████      | 28/70 [01:07<01:44,  2.48s/it][A[A

 41%|████▏     | 29/70 [01:10<01:40,  2.44s/it][A[A

 43%|████▎     | 30/70 [01:12<01:38,  2.47s/it][A[A

 44%|████▍     | 31/70 [01:14<01:34,  2.43s/it][A[A

Batch loss: 1.3379852771759033




 46%|████▌     | 32/70 [01:17<01:32,  2.42s/it][A[A

 47%|████▋     | 33/70 [01:19<01:28,  2.38s/it][A[A

 49%|████▊     | 34/70 [01:21<01:25,  2.38s/it][A[A

 50%|█████     | 35/70 [01:24<01:23,  2.38s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:21,  2.39s/it][A[A

 53%|█████▎    | 37/70 [01:29<01:17,  2.35s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:15,  2.36s/it][A[A

 56%|█████▌    | 39/70 [01:33<01:12,  2.32s/it][A[A

 57%|█████▋    | 40/70 [01:36<01:10,  2.35s/it][A[A

 59%|█████▊    | 41/70 [01:38<01:07,  2.32s/it][A[A

Batch loss: 1.337725043296814




 60%|██████    | 42/70 [01:40<01:06,  2.36s/it][A[A

 61%|██████▏   | 43/70 [01:43<01:03,  2.34s/it][A[A

 63%|██████▎   | 44/70 [01:45<01:01,  2.35s/it][A[A

 64%|██████▍   | 45/70 [01:47<00:57,  2.32s/it][A[A

 66%|██████▌   | 46/70 [01:50<00:56,  2.34s/it][A[A

 67%|██████▋   | 47/70 [01:52<00:53,  2.31s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:51,  2.33s/it][A[A

 70%|███████   | 49/70 [01:57<00:49,  2.33s/it][A[A

 71%|███████▏  | 50/70 [01:59<00:47,  2.39s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:45,  2.38s/it][A[A

Batch loss: 1.4095826148986816




 74%|███████▍  | 52/70 [02:04<00:43,  2.42s/it][A[A

 76%|███████▌  | 53/70 [02:06<00:40,  2.41s/it][A[A

 77%|███████▋  | 54/70 [02:09<00:39,  2.45s/it][A[A

 79%|███████▊  | 55/70 [02:11<00:36,  2.44s/it][A[A

 80%|████████  | 56/70 [02:14<00:34,  2.46s/it][A[A

 81%|████████▏ | 57/70 [02:16<00:31,  2.43s/it][A[A

 83%|████████▎ | 58/70 [02:19<00:29,  2.46s/it][A[A

 84%|████████▍ | 59/70 [02:21<00:26,  2.44s/it][A[A

 86%|████████▌ | 60/70 [02:23<00:24,  2.44s/it][A[A

 87%|████████▋ | 61/70 [02:26<00:21,  2.38s/it][A[A

Batch loss: 1.5533965826034546




 89%|████████▊ | 62/70 [02:28<00:19,  2.39s/it][A[A

 90%|█████████ | 63/70 [02:30<00:16,  2.37s/it][A[A

 91%|█████████▏| 64/70 [02:33<00:14,  2.41s/it][A[A

 93%|█████████▎| 65/70 [02:35<00:12,  2.41s/it][A[A

 94%|█████████▍| 66/70 [02:38<00:09,  2.44s/it][A[A

 96%|█████████▌| 67/70 [02:40<00:07,  2.41s/it][A[A

 97%|█████████▋| 68/70 [02:43<00:04,  2.43s/it][A[A

 99%|█████████▊| 69/70 [02:45<00:02,  2.42s/it][A[A

100%|██████████| 70/70 [02:46<00:00,  2.38s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4160585028784616




 12%|█▎        | 1/8 [00:02<00:17,  2.43s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.43s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.38s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.36s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.31s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.30s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.26s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.20s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3061273992061615
Epoch:  77




  1%|▏         | 1/70 [00:02<02:48,  2.44s/it][A[A

Batch loss: 1.3728611469268799




  3%|▎         | 2/70 [00:04<02:45,  2.43s/it][A[A

  4%|▍         | 3/70 [00:07<02:39,  2.38s/it][A[A

  6%|▌         | 4/70 [00:09<02:37,  2.38s/it][A[A

  7%|▋         | 5/70 [00:11<02:32,  2.35s/it][A[A

  9%|▊         | 6/70 [00:14<02:32,  2.38s/it][A[A

 10%|█         | 7/70 [00:16<02:27,  2.34s/it][A[A

 11%|█▏        | 8/70 [00:18<02:26,  2.37s/it][A[A

 13%|█▎        | 9/70 [00:21<02:22,  2.33s/it][A[A

 14%|█▍        | 10/70 [00:23<02:21,  2.36s/it][A[A

 16%|█▌        | 11/70 [00:25<02:17,  2.33s/it][A[A

Batch loss: 1.374181866645813




 17%|█▋        | 12/70 [00:28<02:17,  2.36s/it][A[A

 19%|█▊        | 13/70 [00:30<02:14,  2.35s/it][A[A

 20%|██        | 14/70 [00:33<02:15,  2.41s/it][A[A

 21%|██▏       | 15/70 [00:35<02:13,  2.42s/it][A[A

 23%|██▎       | 16/70 [00:38<02:11,  2.44s/it][A[A

 24%|██▍       | 17/70 [00:40<02:08,  2.42s/it][A[A

 26%|██▌       | 18/70 [00:42<02:07,  2.45s/it][A[A

 27%|██▋       | 19/70 [00:45<02:03,  2.42s/it][A[A

 29%|██▊       | 20/70 [00:47<02:02,  2.44s/it][A[A

 30%|███       | 21/70 [00:50<01:58,  2.42s/it][A[A

Batch loss: 1.439339280128479




 31%|███▏      | 22/70 [00:52<01:58,  2.47s/it][A[A

 33%|███▎      | 23/70 [00:55<01:54,  2.43s/it][A[A

 34%|███▍      | 24/70 [00:57<01:52,  2.44s/it][A[A

 36%|███▌      | 25/70 [00:59<01:47,  2.38s/it][A[A

 37%|███▋      | 26/70 [01:02<01:44,  2.38s/it][A[A

 39%|███▊      | 27/70 [01:04<01:41,  2.35s/it][A[A

 40%|████      | 28/70 [01:06<01:39,  2.37s/it][A[A

 41%|████▏     | 29/70 [01:09<01:35,  2.33s/it][A[A

 43%|████▎     | 30/70 [01:11<01:34,  2.37s/it][A[A

 44%|████▍     | 31/70 [01:13<01:31,  2.34s/it][A[A

Batch loss: 1.3643027544021606




 46%|████▌     | 32/70 [01:16<01:29,  2.36s/it][A[A

 47%|████▋     | 33/70 [01:18<01:26,  2.33s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.35s/it][A[A

 50%|█████     | 35/70 [01:23<01:21,  2.33s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:19,  2.35s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:16,  2.31s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:14,  2.32s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:11,  2.29s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:09,  2.31s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:06,  2.28s/it][A[A

Batch loss: 1.3232734203338623




 60%|██████    | 42/70 [01:39<01:05,  2.35s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:03,  2.35s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:01,  2.38s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.38s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:57,  2.40s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:55,  2.39s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:53,  2.42s/it][A[A

 70%|███████   | 49/70 [01:56<00:50,  2.40s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:48,  2.42s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:45,  2.40s/it][A[A

Batch loss: 1.3737601041793823




 74%|███████▍  | 52/70 [02:03<00:43,  2.41s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:40,  2.35s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:37,  2.35s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:34,  2.32s/it][A[A

 80%|████████  | 56/70 [02:12<00:32,  2.33s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:29,  2.30s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:27,  2.32s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:25,  2.28s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.31s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:20,  2.29s/it][A[A

Batch loss: 1.4746018648147583




 89%|████████▊ | 62/70 [02:26<00:18,  2.31s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.29s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:13,  2.33s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.30s/it][A[A

 94%|█████████▍| 66/70 [02:35<00:09,  2.33s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.30s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.35s/it][A[A

 99%|█████████▊| 69/70 [02:42<00:02,  2.32s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.43372084072658




 12%|█▎        | 1/8 [00:02<00:16,  2.41s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.42s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.36s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.38s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.34s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.36s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.32s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.25s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.357725828886032
Epoch:  78




  1%|▏         | 1/70 [00:02<02:51,  2.49s/it][A[A

Batch loss: 1.4617946147918701




  3%|▎         | 2/70 [00:05<02:49,  2.50s/it][A[A

  4%|▍         | 3/70 [00:07<02:44,  2.46s/it][A[A

  6%|▌         | 4/70 [00:09<02:39,  2.42s/it][A[A

  7%|▋         | 5/70 [00:11<02:33,  2.36s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.36s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.35s/it][A[A

 11%|█▏        | 8/70 [00:18<02:26,  2.35s/it][A[A

 13%|█▎        | 9/70 [00:21<02:21,  2.32s/it][A[A

 14%|█▍        | 10/70 [00:23<02:19,  2.33s/it][A[A

 16%|█▌        | 11/70 [00:25<02:15,  2.30s/it][A[A

Batch loss: 1.4468116760253906




 17%|█▋        | 12/70 [00:28<02:14,  2.32s/it][A[A

 19%|█▊        | 13/70 [00:30<02:10,  2.30s/it][A[A

 20%|██        | 14/70 [00:32<02:09,  2.31s/it][A[A

 21%|██▏       | 15/70 [00:34<02:05,  2.28s/it][A[A

 23%|██▎       | 16/70 [00:37<02:05,  2.32s/it][A[A

 24%|██▍       | 17/70 [00:39<02:01,  2.30s/it][A[A

 26%|██▌       | 18/70 [00:41<02:00,  2.32s/it][A[A

 27%|██▋       | 19/70 [00:44<01:57,  2.29s/it][A[A

 29%|██▊       | 20/70 [00:46<01:55,  2.31s/it][A[A

 30%|███       | 21/70 [00:48<01:52,  2.30s/it][A[A

Batch loss: 1.4799975156784058




 31%|███▏      | 22/70 [00:51<01:53,  2.36s/it][A[A

 33%|███▎      | 23/70 [00:53<01:49,  2.34s/it][A[A

 34%|███▍      | 24/70 [00:56<01:48,  2.37s/it][A[A

 36%|███▌      | 25/70 [00:58<01:46,  2.36s/it][A[A

 37%|███▋      | 26/70 [01:00<01:45,  2.40s/it][A[A

 39%|███▊      | 27/70 [01:03<01:42,  2.38s/it][A[A

 40%|████      | 28/70 [01:05<01:41,  2.42s/it][A[A

 41%|████▏     | 29/70 [01:08<01:38,  2.40s/it][A[A

 43%|████▎     | 30/70 [01:10<01:37,  2.43s/it][A[A

 44%|████▍     | 31/70 [01:12<01:33,  2.41s/it][A[A

Batch loss: 1.3480502367019653




 46%|████▌     | 32/70 [01:15<01:32,  2.43s/it][A[A

 47%|████▋     | 33/70 [01:17<01:27,  2.37s/it][A[A

 49%|████▊     | 34/70 [01:20<01:25,  2.36s/it][A[A

 50%|█████     | 35/70 [01:22<01:22,  2.35s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:19,  2.34s/it][A[A

 53%|█████▎    | 37/70 [01:26<01:15,  2.30s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:14,  2.33s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:11,  2.30s/it][A[A

 57%|█████▋    | 40/70 [01:33<01:09,  2.31s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:06,  2.28s/it][A[A

Batch loss: 1.3866684436798096




 60%|██████    | 42/70 [01:38<01:05,  2.33s/it][A[A

 61%|██████▏   | 43/70 [01:40<01:01,  2.30s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:00,  2.31s/it][A[A

 64%|██████▍   | 45/70 [01:45<00:57,  2.29s/it][A[A

 66%|██████▌   | 46/70 [01:47<00:55,  2.31s/it][A[A

 67%|██████▋   | 47/70 [01:49<00:52,  2.28s/it][A[A

 69%|██████▊   | 48/70 [01:52<00:50,  2.31s/it][A[A

 70%|███████   | 49/70 [01:54<00:48,  2.30s/it][A[A

 71%|███████▏  | 50/70 [01:56<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [01:59<00:44,  2.35s/it][A[A

Batch loss: 1.7386058568954468




 74%|███████▍  | 52/70 [02:01<00:42,  2.39s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:40,  2.37s/it][A[A

 77%|███████▋  | 54/70 [02:06<00:38,  2.40s/it][A[A

 79%|███████▊  | 55/70 [02:08<00:35,  2.37s/it][A[A

 80%|████████  | 56/70 [02:11<00:33,  2.40s/it][A[A

 81%|████████▏ | 57/70 [02:13<00:30,  2.38s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:28,  2.41s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:26,  2.39s/it][A[A

 86%|████████▌ | 60/70 [02:20<00:23,  2.39s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:21,  2.34s/it][A[A

Batch loss: 1.4568828344345093




 89%|████████▊ | 62/70 [02:25<00:18,  2.35s/it][A[A

 90%|█████████ | 63/70 [02:27<00:16,  2.34s/it][A[A

 91%|█████████▏| 64/70 [02:30<00:14,  2.39s/it][A[A

 93%|█████████▎| 65/70 [02:32<00:11,  2.37s/it][A[A

 94%|█████████▍| 66/70 [02:35<00:09,  2.40s/it][A[A

 96%|█████████▌| 67/70 [02:37<00:07,  2.37s/it][A[A

 97%|█████████▋| 68/70 [02:39<00:04,  2.41s/it][A[A

 99%|█████████▊| 69/70 [02:42<00:02,  2.38s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.33s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4421229907444546




 12%|█▎        | 1/8 [00:02<00:16,  2.43s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.41s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.36s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.37s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.30s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.28s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.23s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.19s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.400372177362442
Epoch:  79




  1%|▏         | 1/70 [00:02<02:44,  2.39s/it][A[A

Batch loss: 1.326642394065857




  3%|▎         | 2/70 [00:04<02:42,  2.40s/it][A[A

  4%|▍         | 3/70 [00:07<02:37,  2.34s/it][A[A

  6%|▌         | 4/70 [00:09<02:34,  2.34s/it][A[A

  7%|▋         | 5/70 [00:11<02:29,  2.30s/it][A[A

  9%|▊         | 6/70 [00:13<02:28,  2.32s/it][A[A

 10%|█         | 7/70 [00:16<02:24,  2.30s/it][A[A

 11%|█▏        | 8/70 [00:18<02:23,  2.31s/it][A[A

 13%|█▎        | 9/70 [00:20<02:19,  2.28s/it][A[A

 14%|█▍        | 10/70 [00:23<02:17,  2.30s/it][A[A

 16%|█▌        | 11/70 [00:25<02:14,  2.28s/it][A[A

Batch loss: 1.4002656936645508




 17%|█▋        | 12/70 [00:27<02:14,  2.32s/it][A[A

 19%|█▊        | 13/70 [00:29<02:11,  2.30s/it][A[A

 20%|██        | 14/70 [00:32<02:10,  2.33s/it][A[A

 21%|██▏       | 15/70 [00:34<02:08,  2.34s/it][A[A

 23%|██▎       | 16/70 [00:37<02:08,  2.38s/it][A[A

 24%|██▍       | 17/70 [00:39<02:05,  2.36s/it][A[A

 26%|██▌       | 18/70 [00:41<02:04,  2.39s/it][A[A

 27%|██▋       | 19/70 [00:44<02:00,  2.37s/it][A[A

 29%|██▊       | 20/70 [00:46<02:00,  2.40s/it][A[A

 30%|███       | 21/70 [00:49<01:56,  2.38s/it][A[A

Batch loss: 1.3281042575836182




 31%|███▏      | 22/70 [00:51<01:55,  2.41s/it][A[A

 33%|███▎      | 23/70 [00:53<01:52,  2.39s/it][A[A

 34%|███▍      | 24/70 [00:56<01:51,  2.42s/it][A[A

 36%|███▌      | 25/70 [00:58<01:47,  2.40s/it][A[A

 37%|███▋      | 26/70 [01:01<01:45,  2.39s/it][A[A

 39%|███▊      | 27/70 [01:03<01:40,  2.33s/it][A[A

 40%|████      | 28/70 [01:05<01:38,  2.36s/it][A[A

 41%|████▏     | 29/70 [01:07<01:34,  2.31s/it][A[A

 43%|████▎     | 30/70 [01:10<01:33,  2.33s/it][A[A

 44%|████▍     | 31/70 [01:12<01:29,  2.30s/it][A[A

Batch loss: 1.6571245193481445




 46%|████▌     | 32/70 [01:14<01:28,  2.33s/it][A[A

 47%|████▋     | 33/70 [01:17<01:25,  2.30s/it][A[A

 49%|████▊     | 34/70 [01:19<01:23,  2.32s/it][A[A

 50%|█████     | 35/70 [01:21<01:20,  2.29s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:18,  2.31s/it][A[A

 53%|█████▎    | 37/70 [01:26<01:15,  2.29s/it][A[A

 54%|█████▍    | 38/70 [01:28<01:13,  2.31s/it][A[A

 56%|█████▌    | 39/70 [01:30<01:11,  2.30s/it][A[A

 57%|█████▋    | 40/70 [01:33<01:09,  2.31s/it][A[A

 59%|█████▊    | 41/70 [01:35<01:06,  2.29s/it][A[A

Batch loss: 1.3366966247558594




 60%|██████    | 42/70 [01:38<01:05,  2.33s/it][A[A

 61%|██████▏   | 43/70 [01:40<01:03,  2.34s/it][A[A

 63%|██████▎   | 44/70 [01:42<01:01,  2.38s/it][A[A

 64%|██████▍   | 45/70 [01:45<00:59,  2.36s/it][A[A

 66%|██████▌   | 46/70 [01:47<00:57,  2.40s/it][A[A

 67%|██████▋   | 47/70 [01:49<00:54,  2.38s/it][A[A

 69%|██████▊   | 48/70 [01:52<00:53,  2.41s/it][A[A

 70%|███████   | 49/70 [01:54<00:50,  2.38s/it][A[A

 71%|███████▏  | 50/70 [01:57<00:48,  2.41s/it][A[A

 73%|███████▎  | 51/70 [01:59<00:45,  2.39s/it][A[A

Batch loss: 1.3222023248672485




 74%|███████▍  | 52/70 [02:02<00:43,  2.42s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:40,  2.40s/it][A[A

 77%|███████▋  | 54/70 [02:06<00:38,  2.39s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:35,  2.34s/it][A[A

 80%|████████  | 56/70 [02:11<00:32,  2.34s/it][A[A

 81%|████████▏ | 57/70 [02:13<00:30,  2.32s/it][A[A

 83%|████████▎ | 58/70 [02:15<00:27,  2.33s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:25,  2.30s/it][A[A

 86%|████████▌ | 60/70 [02:20<00:23,  2.31s/it][A[A

 87%|████████▋ | 61/70 [02:22<00:20,  2.29s/it][A[A

Batch loss: 1.4230365753173828




 89%|████████▊ | 62/70 [02:25<00:18,  2.31s/it][A[A

 90%|█████████ | 63/70 [02:27<00:15,  2.28s/it][A[A

 91%|█████████▏| 64/70 [02:29<00:13,  2.31s/it][A[A

 93%|█████████▎| 65/70 [02:31<00:11,  2.29s/it][A[A

 94%|█████████▍| 66/70 [02:34<00:09,  2.30s/it][A[A

 96%|█████████▌| 67/70 [02:36<00:06,  2.27s/it][A[A

 97%|█████████▋| 68/70 [02:39<00:04,  2.33s/it][A[A

 99%|█████████▊| 69/70 [02:41<00:02,  2.30s/it][A[A

100%|██████████| 70/70 [02:41<00:00,  2.31s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4189495631626674




 12%|█▎        | 1/8 [00:02<00:16,  2.31s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.34s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.31s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.33s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.31s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.32s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.30s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.23s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4228675961494446
Epoch:  80




  1%|▏         | 1/70 [00:02<02:52,  2.50s/it][A[A

Batch loss: 1.4010103940963745




  3%|▎         | 2/70 [00:05<02:50,  2.51s/it][A[A

  4%|▍         | 3/70 [00:07<02:47,  2.50s/it][A[A

  6%|▌         | 4/70 [00:10<02:46,  2.52s/it][A[A

  7%|▋         | 5/70 [00:12<02:38,  2.44s/it][A[A

  9%|▊         | 6/70 [00:14<02:34,  2.42s/it][A[A

 10%|█         | 7/70 [00:16<02:30,  2.38s/it][A[A

 11%|█▏        | 8/70 [00:19<02:28,  2.40s/it][A[A

 13%|█▎        | 9/70 [00:21<02:22,  2.34s/it][A[A

 14%|█▍        | 10/70 [00:24<02:21,  2.36s/it][A[A

 16%|█▌        | 11/70 [00:26<02:16,  2.31s/it][A[A

Batch loss: 1.3562766313552856




 17%|█▋        | 12/70 [00:28<02:15,  2.34s/it][A[A

 19%|█▊        | 13/70 [00:30<02:11,  2.30s/it][A[A

 20%|██        | 14/70 [00:33<02:09,  2.31s/it][A[A

 21%|██▏       | 15/70 [00:35<02:05,  2.28s/it][A[A

 23%|██▎       | 16/70 [00:37<02:05,  2.32s/it][A[A

 24%|██▍       | 17/70 [00:40<02:01,  2.29s/it][A[A

 26%|██▌       | 18/70 [00:42<01:59,  2.31s/it][A[A

 27%|██▋       | 19/70 [00:44<01:56,  2.29s/it][A[A

 29%|██▊       | 20/70 [00:46<01:55,  2.31s/it][A[A

 30%|███       | 21/70 [00:49<01:51,  2.28s/it][A[A

Batch loss: 1.3584147691726685




 31%|███▏      | 22/70 [00:51<01:51,  2.32s/it][A[A

 33%|███▎      | 23/70 [00:53<01:49,  2.33s/it][A[A

 34%|███▍      | 24/70 [00:56<01:49,  2.37s/it][A[A

 36%|███▌      | 25/70 [00:58<01:46,  2.36s/it][A[A

 37%|███▋      | 26/70 [01:01<01:45,  2.39s/it][A[A

 39%|███▊      | 27/70 [01:03<01:41,  2.37s/it][A[A

 40%|████      | 28/70 [01:05<01:40,  2.39s/it][A[A

 41%|████▏     | 29/70 [01:08<01:37,  2.37s/it][A[A

 43%|████▎     | 30/70 [01:10<01:35,  2.40s/it][A[A

 44%|████▍     | 31/70 [01:13<01:32,  2.37s/it][A[A

Batch loss: 1.3676620721817017




 46%|████▌     | 32/70 [01:15<01:31,  2.40s/it][A[A

 47%|████▋     | 33/70 [01:17<01:26,  2.35s/it][A[A

 49%|████▊     | 34/70 [01:20<01:24,  2.35s/it][A[A

 50%|█████     | 35/70 [01:22<01:20,  2.31s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:20,  2.36s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:17,  2.34s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:16,  2.38s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:13,  2.36s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:11,  2.39s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:08,  2.37s/it][A[A

Batch loss: 1.3957278728485107




 60%|██████    | 42/70 [01:39<01:07,  2.39s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:04,  2.37s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:02,  2.40s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:59,  2.38s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:57,  2.40s/it][A[A

 67%|██████▋   | 47/70 [01:51<00:54,  2.37s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:52,  2.37s/it][A[A

 70%|███████   | 49/70 [01:55<00:48,  2.33s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:47,  2.36s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:44,  2.32s/it][A[A

Batch loss: 1.4189952611923218




 74%|███████▍  | 52/70 [02:02<00:41,  2.33s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:38,  2.29s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:37,  2.32s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:34,  2.28s/it][A[A

 80%|████████  | 56/70 [02:11<00:32,  2.31s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:29,  2.28s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:27,  2.31s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:25,  2.28s/it][A[A

 86%|████████▌ | 60/70 [02:20<00:23,  2.31s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:20,  2.28s/it][A[A

Batch loss: 1.4142884016036987




 89%|████████▊ | 62/70 [02:25<00:18,  2.31s/it][A[A

 90%|█████████ | 63/70 [02:27<00:15,  2.28s/it][A[A

 91%|█████████▏| 64/70 [02:30<00:13,  2.31s/it][A[A

 93%|█████████▎| 65/70 [02:32<00:11,  2.32s/it][A[A

 94%|█████████▍| 66/70 [02:34<00:09,  2.37s/it][A[A

 96%|█████████▌| 67/70 [02:37<00:07,  2.38s/it][A[A

 97%|█████████▋| 68/70 [02:39<00:04,  2.42s/it][A[A

 99%|█████████▊| 69/70 [02:42<00:02,  2.42s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.33s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4442000882966177




 12%|█▎        | 1/8 [00:02<00:17,  2.45s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.43s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.37s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.38s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.33s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.34s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.31s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.23s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3633486926555634
Epoch:  81




  1%|▏         | 1/70 [00:02<02:45,  2.40s/it][A[A

Batch loss: 1.3504812717437744




  3%|▎         | 2/70 [00:04<02:42,  2.40s/it][A[A

  4%|▍         | 3/70 [00:07<02:37,  2.35s/it][A[A

  6%|▌         | 4/70 [00:09<02:35,  2.35s/it][A[A

  7%|▋         | 5/70 [00:11<02:30,  2.31s/it][A[A

  9%|▊         | 6/70 [00:13<02:28,  2.32s/it][A[A

 10%|█         | 7/70 [00:16<02:24,  2.30s/it][A[A

 11%|█▏        | 8/70 [00:18<02:23,  2.31s/it][A[A

 13%|█▎        | 9/70 [00:20<02:19,  2.29s/it][A[A

 14%|█▍        | 10/70 [00:23<02:18,  2.31s/it][A[A

 16%|█▌        | 11/70 [00:25<02:14,  2.28s/it][A[A

Batch loss: 1.4710875749588013




 17%|█▋        | 12/70 [00:27<02:13,  2.30s/it][A[A

 19%|█▊        | 13/70 [00:29<02:09,  2.28s/it][A[A

 20%|██        | 14/70 [00:32<02:09,  2.31s/it][A[A

 21%|██▏       | 15/70 [00:34<02:05,  2.28s/it][A[A

 23%|██▎       | 16/70 [00:37<02:06,  2.35s/it][A[A

 24%|██▍       | 17/70 [00:39<02:03,  2.34s/it][A[A

 26%|██▌       | 18/70 [00:41<02:03,  2.38s/it][A[A

 27%|██▋       | 19/70 [00:44<02:00,  2.36s/it][A[A

 29%|██▊       | 20/70 [00:46<02:01,  2.42s/it][A[A

 30%|███       | 21/70 [00:49<01:57,  2.41s/it][A[A

Batch loss: 1.3129290342330933




 31%|███▏      | 22/70 [00:51<01:56,  2.43s/it][A[A

 33%|███▎      | 23/70 [00:53<01:52,  2.40s/it][A[A

 34%|███▍      | 24/70 [00:56<01:51,  2.41s/it][A[A

 36%|███▌      | 25/70 [00:58<01:47,  2.39s/it][A[A

 37%|███▋      | 26/70 [01:01<01:46,  2.42s/it][A[A

 39%|███▊      | 27/70 [01:03<01:41,  2.36s/it][A[A

 40%|████      | 28/70 [01:05<01:39,  2.36s/it][A[A

 41%|████▏     | 29/70 [01:08<01:35,  2.33s/it][A[A

 43%|████▎     | 30/70 [01:10<01:34,  2.36s/it][A[A

 44%|████▍     | 31/70 [01:12<01:30,  2.32s/it][A[A

Batch loss: 1.3410983085632324




 46%|████▌     | 32/70 [01:15<01:28,  2.34s/it][A[A

 47%|████▋     | 33/70 [01:17<01:25,  2.30s/it][A[A

 49%|████▊     | 34/70 [01:19<01:23,  2.32s/it][A[A

 50%|█████     | 35/70 [01:21<01:20,  2.30s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:18,  2.32s/it][A[A

 53%|█████▎    | 37/70 [01:26<01:15,  2.28s/it][A[A

 54%|█████▍    | 38/70 [01:28<01:13,  2.31s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:10,  2.29s/it][A[A

 57%|█████▋    | 40/70 [01:33<01:09,  2.31s/it][A[A

 59%|█████▊    | 41/70 [01:35<01:06,  2.28s/it][A[A

Batch loss: 1.5558686256408691




 60%|██████    | 42/70 [01:38<01:05,  2.35s/it][A[A

 61%|██████▏   | 43/70 [01:40<01:02,  2.31s/it][A[A

 63%|██████▎   | 44/70 [01:42<01:01,  2.37s/it][A[A

 64%|██████▍   | 45/70 [01:45<00:58,  2.35s/it][A[A

 66%|██████▌   | 46/70 [01:47<00:57,  2.39s/it][A[A

 67%|██████▋   | 47/70 [01:49<00:54,  2.37s/it][A[A

 69%|██████▊   | 48/70 [01:52<00:52,  2.39s/it][A[A

 70%|███████   | 49/70 [01:54<00:50,  2.38s/it][A[A

 71%|███████▏  | 50/70 [01:57<00:48,  2.41s/it][A[A

 73%|███████▎  | 51/70 [01:59<00:45,  2.38s/it][A[A

Batch loss: 1.3416942358016968




 74%|███████▍  | 52/70 [02:02<00:43,  2.42s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:40,  2.39s/it][A[A

 77%|███████▋  | 54/70 [02:06<00:38,  2.43s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:35,  2.37s/it][A[A

 80%|████████  | 56/70 [02:11<00:33,  2.37s/it][A[A

 81%|████████▏ | 57/70 [02:13<00:30,  2.33s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:28,  2.35s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:25,  2.32s/it][A[A

 86%|████████▌ | 60/70 [02:20<00:23,  2.35s/it][A[A

 87%|████████▋ | 61/70 [02:22<00:20,  2.30s/it][A[A

Batch loss: 1.4121081829071045




 89%|████████▊ | 62/70 [02:25<00:18,  2.32s/it][A[A

 90%|█████████ | 63/70 [02:27<00:16,  2.29s/it][A[A

 91%|█████████▏| 64/70 [02:29<00:13,  2.31s/it][A[A

 93%|█████████▎| 65/70 [02:32<00:11,  2.28s/it][A[A

 94%|█████████▍| 66/70 [02:34<00:09,  2.29s/it][A[A

 96%|█████████▌| 67/70 [02:36<00:06,  2.27s/it][A[A

 97%|█████████▋| 68/70 [02:39<00:04,  2.31s/it][A[A

 99%|█████████▊| 69/70 [02:41<00:02,  2.28s/it][A[A

100%|██████████| 70/70 [02:41<00:00,  2.31s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.42382059608187




 12%|█▎        | 1/8 [00:02<00:16,  2.30s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.29s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.27s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.31s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.33s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.41s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.43s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.30s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4026594161987305
Epoch:  82




  1%|▏         | 1/70 [00:02<03:02,  2.64s/it][A[A

Batch loss: 1.4077068567276




  3%|▎         | 2/70 [00:05<02:57,  2.61s/it][A[A

  4%|▍         | 3/70 [00:07<02:50,  2.55s/it][A[A

  6%|▌         | 4/70 [00:10<02:50,  2.58s/it][A[A

  7%|▋         | 5/70 [00:12<02:43,  2.52s/it][A[A

  9%|▊         | 6/70 [00:14<02:37,  2.46s/it][A[A

 10%|█         | 7/70 [00:17<02:30,  2.38s/it][A[A

 11%|█▏        | 8/70 [00:19<02:29,  2.41s/it][A[A

 13%|█▎        | 9/70 [00:22<02:26,  2.41s/it][A[A

 14%|█▍        | 10/70 [00:24<02:25,  2.43s/it][A[A

 16%|█▌        | 11/70 [00:26<02:19,  2.36s/it][A[A

Batch loss: 1.4223486185073853




 17%|█▋        | 12/70 [00:29<02:18,  2.38s/it][A[A

 19%|█▊        | 13/70 [00:31<02:13,  2.35s/it][A[A

 20%|██        | 14/70 [00:33<02:14,  2.40s/it][A[A

 21%|██▏       | 15/70 [00:36<02:11,  2.39s/it][A[A

 23%|██▎       | 16/70 [00:38<02:09,  2.39s/it][A[A

 24%|██▍       | 17/70 [00:40<02:04,  2.35s/it][A[A

 26%|██▌       | 18/70 [00:43<02:02,  2.36s/it][A[A

 27%|██▋       | 19/70 [00:45<01:59,  2.35s/it][A[A

 29%|██▊       | 20/70 [00:48<01:59,  2.39s/it][A[A

 30%|███       | 21/70 [00:50<01:55,  2.35s/it][A[A

Batch loss: 1.3254189491271973




 31%|███▏      | 22/70 [00:52<01:55,  2.40s/it][A[A

 33%|███▎      | 23/70 [00:55<01:52,  2.38s/it][A[A

 34%|███▍      | 24/70 [00:57<01:51,  2.42s/it][A[A

 36%|███▌      | 25/70 [01:00<01:49,  2.43s/it][A[A

 37%|███▋      | 26/70 [01:02<01:48,  2.47s/it][A[A

 39%|███▊      | 27/70 [01:05<01:45,  2.44s/it][A[A

 40%|████      | 28/70 [01:07<01:43,  2.47s/it][A[A

 41%|████▏     | 29/70 [01:10<01:40,  2.44s/it][A[A

 43%|████▎     | 30/70 [01:12<01:38,  2.46s/it][A[A

 44%|████▍     | 31/70 [01:14<01:34,  2.43s/it][A[A

Batch loss: 1.3936800956726074




 46%|████▌     | 32/70 [01:17<01:33,  2.47s/it][A[A

 47%|████▋     | 33/70 [01:19<01:29,  2.42s/it][A[A

 49%|████▊     | 34/70 [01:22<01:27,  2.42s/it][A[A

 50%|█████     | 35/70 [01:24<01:22,  2.36s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:21,  2.40s/it][A[A

 53%|█████▎    | 37/70 [01:29<01:18,  2.38s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:17,  2.42s/it][A[A

 56%|█████▌    | 39/70 [01:34<01:14,  2.40s/it][A[A

 57%|█████▋    | 40/70 [01:36<01:13,  2.44s/it][A[A

 59%|█████▊    | 41/70 [01:39<01:10,  2.42s/it][A[A

Batch loss: 1.3950746059417725




 60%|██████    | 42/70 [01:41<01:08,  2.46s/it][A[A

 61%|██████▏   | 43/70 [01:43<01:05,  2.44s/it][A[A

 63%|██████▎   | 44/70 [01:46<01:04,  2.48s/it][A[A

 64%|██████▍   | 45/70 [01:48<01:01,  2.47s/it][A[A

 66%|██████▌   | 46/70 [01:51<00:59,  2.49s/it][A[A

 67%|██████▋   | 47/70 [01:53<00:56,  2.44s/it][A[A

 69%|██████▊   | 48/70 [01:56<00:53,  2.43s/it][A[A

 70%|███████   | 49/70 [01:58<00:49,  2.37s/it][A[A

 71%|███████▏  | 50/70 [02:00<00:47,  2.38s/it][A[A

 73%|███████▎  | 51/70 [02:03<00:44,  2.35s/it][A[A

Batch loss: 1.3780244588851929




 74%|███████▍  | 52/70 [02:05<00:42,  2.36s/it][A[A

 76%|███████▌  | 53/70 [02:07<00:39,  2.33s/it][A[A

 77%|███████▋  | 54/70 [02:10<00:37,  2.34s/it][A[A

 79%|███████▊  | 55/70 [02:12<00:34,  2.31s/it][A[A

 80%|████████  | 56/70 [02:14<00:32,  2.34s/it][A[A

 81%|████████▏ | 57/70 [02:17<00:30,  2.31s/it][A[A

 83%|████████▎ | 58/70 [02:19<00:27,  2.33s/it][A[A

 84%|████████▍ | 59/70 [02:21<00:25,  2.30s/it][A[A

 86%|████████▌ | 60/70 [02:24<00:23,  2.33s/it][A[A

 87%|████████▋ | 61/70 [02:26<00:20,  2.30s/it][A[A

Batch loss: 1.432745337486267




 89%|████████▊ | 62/70 [02:28<00:18,  2.33s/it][A[A

 90%|█████████ | 63/70 [02:30<00:16,  2.31s/it][A[A

 91%|█████████▏| 64/70 [02:33<00:14,  2.36s/it][A[A

 93%|█████████▎| 65/70 [02:35<00:11,  2.36s/it][A[A

 94%|█████████▍| 66/70 [02:38<00:09,  2.40s/it][A[A

 96%|█████████▌| 67/70 [02:40<00:07,  2.38s/it][A[A

 97%|█████████▋| 68/70 [02:43<00:04,  2.41s/it][A[A

 99%|█████████▊| 69/70 [02:45<00:02,  2.43s/it][A[A

100%|██████████| 70/70 [02:46<00:00,  2.38s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4266811285700116




 12%|█▎        | 1/8 [00:02<00:16,  2.42s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.43s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.37s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.38s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.33s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.33s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.27s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.22s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3696733117103577
Epoch:  83




  1%|▏         | 1/70 [00:02<02:50,  2.47s/it][A[A

Batch loss: 1.5204380750656128




  3%|▎         | 2/70 [00:04<02:46,  2.45s/it][A[A

  4%|▍         | 3/70 [00:07<02:39,  2.38s/it][A[A

  6%|▌         | 4/70 [00:09<02:36,  2.37s/it][A[A

  7%|▋         | 5/70 [00:11<02:31,  2.34s/it][A[A

  9%|▊         | 6/70 [00:14<02:29,  2.34s/it][A[A

 10%|█         | 7/70 [00:16<02:25,  2.31s/it][A[A

 11%|█▏        | 8/70 [00:18<02:23,  2.32s/it][A[A

 13%|█▎        | 9/70 [00:20<02:19,  2.29s/it][A[A

 14%|█▍        | 10/70 [00:23<02:18,  2.31s/it][A[A

 16%|█▌        | 11/70 [00:25<02:14,  2.28s/it][A[A

Batch loss: 1.4003232717514038




 17%|█▋        | 12/70 [00:27<02:14,  2.32s/it][A[A

 19%|█▊        | 13/70 [00:30<02:10,  2.28s/it][A[A

 20%|██        | 14/70 [00:32<02:09,  2.31s/it][A[A

 21%|██▏       | 15/70 [00:34<02:07,  2.32s/it][A[A

 23%|██▎       | 16/70 [00:37<02:07,  2.36s/it][A[A

 24%|██▍       | 17/70 [00:39<02:05,  2.37s/it][A[A

 26%|██▌       | 18/70 [00:42<02:04,  2.40s/it][A[A

 27%|██▋       | 19/70 [00:44<02:01,  2.39s/it][A[A

 29%|██▊       | 20/70 [00:46<02:00,  2.41s/it][A[A

 30%|███       | 21/70 [00:49<01:56,  2.38s/it][A[A

Batch loss: 1.5003082752227783




 31%|███▏      | 22/70 [00:51<01:55,  2.41s/it][A[A

 33%|███▎      | 23/70 [00:54<01:52,  2.39s/it][A[A

 34%|███▍      | 24/70 [00:56<01:51,  2.42s/it][A[A

 36%|███▌      | 25/70 [00:58<01:48,  2.40s/it][A[A

 37%|███▋      | 26/70 [01:01<01:45,  2.39s/it][A[A

 39%|███▊      | 27/70 [01:03<01:40,  2.34s/it][A[A

 40%|████      | 28/70 [01:05<01:38,  2.33s/it][A[A

 41%|████▏     | 29/70 [01:08<01:35,  2.32s/it][A[A

 43%|████▎     | 30/70 [01:10<01:33,  2.34s/it][A[A

 44%|████▍     | 31/70 [01:12<01:29,  2.30s/it][A[A

Batch loss: 1.3826664686203003




 46%|████▌     | 32/70 [01:15<01:28,  2.32s/it][A[A

 47%|████▋     | 33/70 [01:17<01:25,  2.30s/it][A[A

 49%|████▊     | 34/70 [01:19<01:23,  2.32s/it][A[A

 50%|█████     | 35/70 [01:21<01:20,  2.30s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:18,  2.32s/it][A[A

 53%|█████▎    | 37/70 [01:26<01:15,  2.29s/it][A[A

 54%|█████▍    | 38/70 [01:28<01:14,  2.31s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:10,  2.28s/it][A[A

 57%|█████▋    | 40/70 [01:33<01:09,  2.31s/it][A[A

 59%|█████▊    | 41/70 [01:35<01:06,  2.29s/it][A[A

Batch loss: 1.4020971059799194




 60%|██████    | 42/70 [01:38<01:05,  2.33s/it][A[A

 61%|██████▏   | 43/70 [01:40<01:03,  2.34s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:03,  2.44s/it][A[A

 64%|██████▍   | 45/70 [01:45<01:01,  2.46s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:59,  2.49s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:56,  2.45s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:54,  2.48s/it][A[A

 70%|███████   | 49/70 [01:55<00:51,  2.45s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:49,  2.49s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:46,  2.47s/it][A[A

Batch loss: 1.5875306129455566




 74%|███████▍  | 52/70 [02:03<00:45,  2.50s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:42,  2.48s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:39,  2.46s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:35,  2.40s/it][A[A

 80%|████████  | 56/70 [02:12<00:33,  2.41s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:30,  2.37s/it][A[A

 83%|████████▎ | 58/70 [02:17<00:28,  2.37s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.38s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:24,  2.42s/it][A[A

 87%|████████▋ | 61/70 [02:24<00:21,  2.38s/it][A[A

Batch loss: 1.3624471426010132




 89%|████████▊ | 62/70 [02:26<00:18,  2.37s/it][A[A

 90%|█████████ | 63/70 [02:29<00:16,  2.32s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.33s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:11,  2.33s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.34s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:06,  2.30s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.32s/it][A[A

 99%|█████████▊| 69/70 [02:42<00:02,  2.30s/it][A[A

100%|██████████| 70/70 [02:43<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4370902385030473




 12%|█▎        | 1/8 [00:02<00:16,  2.34s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.35s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.33s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.34s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.31s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.39s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.38s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.28s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.349591225385666
Epoch:  84




  1%|▏         | 1/70 [00:02<03:04,  2.67s/it][A[A

Batch loss: 1.424541711807251




  3%|▎         | 2/70 [00:05<03:02,  2.69s/it][A[A

  4%|▍         | 3/70 [00:07<02:54,  2.60s/it][A[A

  6%|▌         | 4/70 [00:10<02:51,  2.60s/it][A[A

  7%|▋         | 5/70 [00:12<02:42,  2.50s/it][A[A

  9%|▊         | 6/70 [00:15<02:41,  2.52s/it][A[A

 10%|█         | 7/70 [00:17<02:35,  2.48s/it][A[A

 11%|█▏        | 8/70 [00:20<02:36,  2.52s/it][A[A

 13%|█▎        | 9/70 [00:22<02:32,  2.50s/it][A[A

 14%|█▍        | 10/70 [00:25<02:32,  2.54s/it][A[A

 16%|█▌        | 11/70 [00:27<02:27,  2.50s/it][A[A

Batch loss: 1.7114945650100708




 17%|█▋        | 12/70 [00:30<02:28,  2.55s/it][A[A

 19%|█▊        | 13/70 [00:32<02:23,  2.52s/it][A[A

 20%|██        | 14/70 [00:35<02:22,  2.55s/it][A[A

 21%|██▏       | 15/70 [00:37<02:18,  2.52s/it][A[A

 23%|██▎       | 16/70 [00:40<02:17,  2.55s/it][A[A

 24%|██▍       | 17/70 [00:43<02:14,  2.53s/it][A[A

 26%|██▌       | 18/70 [00:45<02:11,  2.53s/it][A[A

 27%|██▋       | 19/70 [00:47<02:06,  2.47s/it][A[A

 29%|██▊       | 20/70 [00:50<02:05,  2.51s/it][A[A

 30%|███       | 21/70 [00:52<02:00,  2.47s/it][A[A

Batch loss: 1.556442379951477




 31%|███▏      | 22/70 [00:55<02:00,  2.51s/it][A[A

 33%|███▎      | 23/70 [00:57<01:55,  2.47s/it][A[A

 34%|███▍      | 24/70 [01:00<01:53,  2.48s/it][A[A

 36%|███▌      | 25/70 [01:02<01:49,  2.43s/it][A[A

 37%|███▋      | 26/70 [01:05<01:47,  2.45s/it][A[A

 39%|███▊      | 27/70 [01:07<01:43,  2.40s/it][A[A

 40%|████      | 28/70 [01:09<01:42,  2.43s/it][A[A

 41%|████▏     | 29/70 [01:12<01:38,  2.40s/it][A[A

 43%|████▎     | 30/70 [01:14<01:37,  2.43s/it][A[A

 44%|████▍     | 31/70 [01:17<01:33,  2.41s/it][A[A

Batch loss: 1.3225167989730835




 46%|████▌     | 32/70 [01:19<01:33,  2.45s/it][A[A

 47%|████▋     | 33/70 [01:22<01:29,  2.42s/it][A[A

 49%|████▊     | 34/70 [01:24<01:29,  2.50s/it][A[A

 50%|█████     | 35/70 [01:27<01:26,  2.48s/it][A[A

 51%|█████▏    | 36/70 [01:29<01:25,  2.52s/it][A[A

 53%|█████▎    | 37/70 [01:32<01:22,  2.50s/it][A[A

 54%|█████▍    | 38/70 [01:34<01:21,  2.54s/it][A[A

 56%|█████▌    | 39/70 [01:37<01:18,  2.52s/it][A[A

 57%|█████▋    | 40/70 [01:39<01:16,  2.55s/it][A[A

 59%|█████▊    | 41/70 [01:42<01:12,  2.51s/it][A[A

Batch loss: 1.475145697593689




 60%|██████    | 42/70 [01:45<01:11,  2.56s/it][A[A

 61%|██████▏   | 43/70 [01:47<01:07,  2.51s/it][A[A

 63%|██████▎   | 44/70 [01:50<01:06,  2.54s/it][A[A

 64%|██████▍   | 45/70 [01:52<01:01,  2.47s/it][A[A

 66%|██████▌   | 46/70 [01:54<00:59,  2.48s/it][A[A

 67%|██████▋   | 47/70 [01:57<00:56,  2.46s/it][A[A

 69%|██████▊   | 48/70 [01:59<00:54,  2.48s/it][A[A

 70%|███████   | 49/70 [02:02<00:50,  2.42s/it][A[A

 71%|███████▏  | 50/70 [02:04<00:48,  2.44s/it][A[A

 73%|███████▎  | 51/70 [02:06<00:45,  2.41s/it][A[A

Batch loss: 1.3769094944000244




 74%|███████▍  | 52/70 [02:09<00:43,  2.44s/it][A[A

 76%|███████▌  | 53/70 [02:11<00:40,  2.40s/it][A[A

 77%|███████▋  | 54/70 [02:14<00:38,  2.43s/it][A[A

 79%|███████▊  | 55/70 [02:16<00:36,  2.40s/it][A[A

 80%|████████  | 56/70 [02:19<00:34,  2.43s/it][A[A

 81%|████████▏ | 57/70 [02:21<00:31,  2.42s/it][A[A

 83%|████████▎ | 58/70 [02:23<00:29,  2.45s/it][A[A

 84%|████████▍ | 59/70 [02:26<00:26,  2.41s/it][A[A

 86%|████████▌ | 60/70 [02:28<00:24,  2.43s/it][A[A

 87%|████████▋ | 61/70 [02:31<00:21,  2.44s/it][A[A

Batch loss: 1.6113189458847046




 89%|████████▊ | 62/70 [02:33<00:19,  2.49s/it][A[A

 90%|█████████ | 63/70 [02:36<00:17,  2.49s/it][A[A

 91%|█████████▏| 64/70 [02:38<00:15,  2.53s/it][A[A

 93%|█████████▎| 65/70 [02:41<00:12,  2.51s/it][A[A

 94%|█████████▍| 66/70 [02:44<00:10,  2.55s/it][A[A

 96%|█████████▌| 67/70 [02:46<00:07,  2.51s/it][A[A

 97%|█████████▋| 68/70 [02:49<00:05,  2.55s/it][A[A

 99%|█████████▊| 69/70 [02:51<00:02,  2.52s/it][A[A

100%|██████████| 70/70 [02:52<00:00,  2.46s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4439577647617885




 12%|█▎        | 1/8 [00:02<00:18,  2.59s/it][A[A

 25%|██▌       | 2/8 [00:05<00:15,  2.55s/it][A[A

 38%|███▊      | 3/8 [00:07<00:12,  2.46s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.50s/it][A[A

 62%|██████▎   | 5/8 [00:12<00:07,  2.43s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.42s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.37s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.31s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.317238748073578
Epoch:  85




  1%|▏         | 1/70 [00:02<02:55,  2.55s/it][A[A

Batch loss: 1.4547127485275269




  3%|▎         | 2/70 [00:05<02:53,  2.55s/it][A[A

  4%|▍         | 3/70 [00:07<02:46,  2.48s/it][A[A

  6%|▌         | 4/70 [00:09<02:44,  2.50s/it][A[A

  7%|▋         | 5/70 [00:12<02:39,  2.45s/it][A[A

  9%|▊         | 6/70 [00:14<02:37,  2.46s/it][A[A

 10%|█         | 7/70 [00:17<02:32,  2.42s/it][A[A

 11%|█▏        | 8/70 [00:19<02:31,  2.45s/it][A[A

 13%|█▎        | 9/70 [00:22<02:29,  2.44s/it][A[A

 14%|█▍        | 10/70 [00:24<02:28,  2.47s/it][A[A

 16%|█▌        | 11/70 [00:27<02:25,  2.47s/it][A[A

Batch loss: 1.7813102006912231




 17%|█▋        | 12/70 [00:29<02:26,  2.52s/it][A[A

 19%|█▊        | 13/70 [00:32<02:22,  2.50s/it][A[A

 20%|██        | 14/70 [00:34<02:22,  2.54s/it][A[A

 21%|██▏       | 15/70 [00:37<02:18,  2.51s/it][A[A

 23%|██▎       | 16/70 [00:39<02:17,  2.55s/it][A[A

 24%|██▍       | 17/70 [00:42<02:13,  2.52s/it][A[A

 26%|██▌       | 18/70 [00:44<02:12,  2.55s/it][A[A

 27%|██▋       | 19/70 [00:47<02:08,  2.52s/it][A[A

 29%|██▊       | 20/70 [00:49<02:06,  2.54s/it][A[A

 30%|███       | 21/70 [00:52<02:01,  2.48s/it][A[A

Batch loss: 1.3374892473220825




 31%|███▏      | 22/70 [00:54<01:59,  2.50s/it][A[A

 33%|███▎      | 23/70 [00:57<01:54,  2.44s/it][A[A

 34%|███▍      | 24/70 [00:59<01:53,  2.47s/it][A[A

 36%|███▌      | 25/70 [01:02<01:49,  2.44s/it][A[A

 37%|███▋      | 26/70 [01:04<01:48,  2.46s/it][A[A

 39%|███▊      | 27/70 [01:06<01:43,  2.42s/it][A[A

 40%|████      | 28/70 [01:09<01:42,  2.44s/it][A[A

 41%|████▏     | 29/70 [01:11<01:38,  2.40s/it][A[A

 43%|████▎     | 30/70 [01:14<01:37,  2.43s/it][A[A

 44%|████▍     | 31/70 [01:16<01:33,  2.40s/it][A[A

Batch loss: 1.3110374212265015




 46%|████▌     | 32/70 [01:19<01:32,  2.43s/it][A[A

 47%|████▋     | 33/70 [01:21<01:29,  2.42s/it][A[A

 49%|████▊     | 34/70 [01:23<01:28,  2.45s/it][A[A

 50%|█████     | 35/70 [01:26<01:24,  2.42s/it][A[A

 51%|█████▏    | 36/70 [01:28<01:22,  2.43s/it][A[A

 53%|█████▎    | 37/70 [01:31<01:19,  2.42s/it][A[A

 54%|█████▍    | 38/70 [01:33<01:19,  2.50s/it][A[A

 56%|█████▌    | 39/70 [01:36<01:16,  2.48s/it][A[A

 57%|█████▋    | 40/70 [01:38<01:15,  2.53s/it][A[A

 59%|█████▊    | 41/70 [01:41<01:12,  2.50s/it][A[A

Batch loss: 1.417704463005066




 60%|██████    | 42/70 [01:44<01:11,  2.55s/it][A[A

 61%|██████▏   | 43/70 [01:46<01:07,  2.51s/it][A[A

 63%|██████▎   | 44/70 [01:49<01:05,  2.53s/it][A[A

 64%|██████▍   | 45/70 [01:51<01:02,  2.50s/it][A[A

 66%|██████▌   | 46/70 [01:54<01:00,  2.53s/it][A[A

 67%|██████▋   | 47/70 [01:56<00:57,  2.51s/it][A[A

 69%|██████▊   | 48/70 [01:59<00:55,  2.51s/it][A[A

 70%|███████   | 49/70 [02:01<00:51,  2.45s/it][A[A

 71%|███████▏  | 50/70 [02:03<00:49,  2.48s/it][A[A

 73%|███████▎  | 51/70 [02:06<00:46,  2.46s/it][A[A

Batch loss: 1.3342736959457397




 74%|███████▍  | 52/70 [02:08<00:45,  2.53s/it][A[A

 76%|███████▌  | 53/70 [02:11<00:42,  2.50s/it][A[A

 77%|███████▋  | 54/70 [02:14<00:40,  2.55s/it][A[A

 79%|███████▊  | 55/70 [02:16<00:37,  2.52s/it][A[A

 80%|████████  | 56/70 [02:19<00:35,  2.55s/it][A[A

 81%|████████▏ | 57/70 [02:21<00:32,  2.52s/it][A[A

 83%|████████▎ | 58/70 [02:24<00:30,  2.56s/it][A[A

 84%|████████▍ | 59/70 [02:26<00:27,  2.52s/it][A[A

 86%|████████▌ | 60/70 [02:29<00:25,  2.56s/it][A[A

 87%|████████▋ | 61/70 [02:31<00:22,  2.49s/it][A[A

Batch loss: 1.3204132318496704




 89%|████████▊ | 62/70 [02:34<00:19,  2.49s/it][A[A

 90%|█████████ | 63/70 [02:36<00:17,  2.45s/it][A[A

 91%|█████████▏| 64/70 [02:39<00:14,  2.48s/it][A[A

 93%|█████████▎| 65/70 [02:41<00:12,  2.43s/it][A[A

 94%|█████████▍| 66/70 [02:43<00:09,  2.45s/it][A[A

 96%|█████████▌| 67/70 [02:46<00:07,  2.42s/it][A[A

 97%|█████████▋| 68/70 [02:48<00:04,  2.47s/it][A[A

 99%|█████████▊| 69/70 [02:51<00:02,  2.44s/it][A[A

100%|██████████| 70/70 [02:51<00:00,  2.46s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4429280076708113




 12%|█▎        | 1/8 [00:02<00:18,  2.60s/it][A[A

 25%|██▌       | 2/8 [00:05<00:15,  2.59s/it][A[A

 38%|███▊      | 3/8 [00:07<00:12,  2.49s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.46s/it][A[A

 62%|██████▎   | 5/8 [00:12<00:07,  2.39s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.40s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.35s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.30s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.391054719686508
Epoch:  86




  1%|▏         | 1/70 [00:02<03:03,  2.66s/it][A[A

Batch loss: 1.4258921146392822




  3%|▎         | 2/70 [00:05<03:02,  2.68s/it][A[A

  4%|▍         | 3/70 [00:07<02:55,  2.61s/it][A[A

  6%|▌         | 4/70 [00:10<02:53,  2.62s/it][A[A

  7%|▋         | 5/70 [00:12<02:47,  2.58s/it][A[A

  9%|▊         | 6/70 [00:15<02:45,  2.59s/it][A[A

 10%|█         | 7/70 [00:18<02:40,  2.55s/it][A[A

 11%|█▏        | 8/70 [00:20<02:39,  2.57s/it][A[A

 13%|█▎        | 9/70 [00:23<02:35,  2.54s/it][A[A

 14%|█▍        | 10/70 [00:25<02:32,  2.55s/it][A[A

 16%|█▌        | 11/70 [00:27<02:25,  2.47s/it][A[A

Batch loss: 1.6007901430130005




 17%|█▋        | 12/70 [00:30<02:24,  2.49s/it][A[A

 19%|█▊        | 13/70 [00:32<02:20,  2.47s/it][A[A

 20%|██        | 14/70 [00:35<02:19,  2.49s/it][A[A

 21%|██▏       | 15/70 [00:37<02:14,  2.44s/it][A[A

 23%|██▎       | 16/70 [00:40<02:12,  2.46s/it][A[A

 24%|██▍       | 17/70 [00:42<02:08,  2.42s/it][A[A

 26%|██▌       | 18/70 [00:45<02:07,  2.45s/it][A[A

 27%|██▋       | 19/70 [00:47<02:02,  2.41s/it][A[A

 29%|██▊       | 20/70 [00:49<02:01,  2.43s/it][A[A

 30%|███       | 21/70 [00:52<01:57,  2.40s/it][A[A

Batch loss: 1.3027845621109009




 31%|███▏      | 22/70 [00:54<01:56,  2.43s/it][A[A

 33%|███▎      | 23/70 [00:57<01:52,  2.40s/it][A[A

 34%|███▍      | 24/70 [00:59<01:51,  2.42s/it][A[A

 36%|███▌      | 25/70 [01:01<01:47,  2.39s/it][A[A

 37%|███▋      | 26/70 [01:04<01:47,  2.43s/it][A[A

 39%|███▊      | 27/70 [01:06<01:44,  2.44s/it][A[A

 40%|████      | 28/70 [01:09<01:44,  2.50s/it][A[A

 41%|████▏     | 29/70 [01:11<01:42,  2.49s/it][A[A

 43%|████▎     | 30/70 [01:14<01:41,  2.53s/it][A[A

 44%|████▍     | 31/70 [01:17<01:37,  2.49s/it][A[A

Batch loss: 1.4689598083496094




 46%|████▌     | 32/70 [01:19<01:36,  2.54s/it][A[A

 47%|████▋     | 33/70 [01:22<01:32,  2.50s/it][A[A

 49%|████▊     | 34/70 [01:24<01:31,  2.53s/it][A[A

 50%|█████     | 35/70 [01:27<01:27,  2.51s/it][A[A

 51%|█████▏    | 36/70 [01:29<01:26,  2.55s/it][A[A

 53%|█████▎    | 37/70 [01:32<01:23,  2.52s/it][A[A

 54%|█████▍    | 38/70 [01:34<01:21,  2.54s/it][A[A

 56%|█████▌    | 39/70 [01:37<01:16,  2.47s/it][A[A

 57%|█████▋    | 40/70 [01:39<01:14,  2.49s/it][A[A

 59%|█████▊    | 41/70 [01:42<01:10,  2.45s/it][A[A

Batch loss: 1.3449276685714722




 60%|██████    | 42/70 [01:44<01:09,  2.49s/it][A[A

 61%|██████▏   | 43/70 [01:46<01:06,  2.46s/it][A[A

 63%|██████▎   | 44/70 [01:49<01:04,  2.50s/it][A[A

 64%|██████▍   | 45/70 [01:51<01:01,  2.45s/it][A[A

 66%|██████▌   | 46/70 [01:54<00:59,  2.47s/it][A[A

 67%|██████▋   | 47/70 [01:56<00:55,  2.42s/it][A[A

 69%|██████▊   | 48/70 [01:59<00:53,  2.45s/it][A[A

 70%|███████   | 49/70 [02:01<00:50,  2.41s/it][A[A

 71%|███████▏  | 50/70 [02:04<00:48,  2.44s/it][A[A

 73%|███████▎  | 51/70 [02:06<00:45,  2.41s/it][A[A

Batch loss: 1.4619991779327393




 74%|███████▍  | 52/70 [02:08<00:44,  2.45s/it][A[A

 76%|███████▌  | 53/70 [02:11<00:41,  2.46s/it][A[A

 77%|███████▋  | 54/70 [02:14<00:40,  2.50s/it][A[A

 79%|███████▊  | 55/70 [02:16<00:37,  2.48s/it][A[A

 80%|████████  | 56/70 [02:19<00:35,  2.52s/it][A[A

 81%|████████▏ | 57/70 [02:21<00:32,  2.50s/it][A[A

 83%|████████▎ | 58/70 [02:24<00:30,  2.53s/it][A[A

 84%|████████▍ | 59/70 [02:26<00:27,  2.50s/it][A[A

 86%|████████▌ | 60/70 [02:29<00:25,  2.53s/it][A[A

 87%|████████▋ | 61/70 [02:31<00:22,  2.50s/it][A[A

Batch loss: 1.4519625902175903




 89%|████████▊ | 62/70 [02:34<00:20,  2.54s/it][A[A

 90%|█████████ | 63/70 [02:36<00:17,  2.53s/it][A[A

 91%|█████████▏| 64/70 [02:39<00:15,  2.51s/it][A[A

 93%|█████████▎| 65/70 [02:41<00:12,  2.46s/it][A[A

 94%|█████████▍| 66/70 [02:44<00:09,  2.47s/it][A[A

 96%|█████████▌| 67/70 [02:46<00:07,  2.46s/it][A[A

 97%|█████████▋| 68/70 [02:48<00:04,  2.48s/it][A[A

 99%|█████████▊| 69/70 [02:51<00:02,  2.44s/it][A[A

100%|██████████| 70/70 [02:52<00:00,  2.46s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.433905610016414




 12%|█▎        | 1/8 [00:02<00:16,  2.43s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.45s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.38s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.39s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.34s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.36s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.32s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.28s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.32821187376976
Epoch:  87




  1%|▏         | 1/70 [00:02<03:00,  2.62s/it][A[A

Batch loss: 1.499809741973877




  3%|▎         | 2/70 [00:05<02:56,  2.60s/it][A[A

  4%|▍         | 3/70 [00:07<02:49,  2.54s/it][A[A

  6%|▌         | 4/70 [00:10<02:49,  2.57s/it][A[A

  7%|▋         | 5/70 [00:12<02:44,  2.54s/it][A[A

  9%|▊         | 6/70 [00:15<02:44,  2.57s/it][A[A

 10%|█         | 7/70 [00:17<02:37,  2.51s/it][A[A

 11%|█▏        | 8/70 [00:20<02:36,  2.52s/it][A[A

 13%|█▎        | 9/70 [00:22<02:30,  2.48s/it][A[A

 14%|█▍        | 10/70 [00:25<02:30,  2.50s/it][A[A

 16%|█▌        | 11/70 [00:27<02:25,  2.47s/it][A[A

Batch loss: 1.6182364225387573




 17%|█▋        | 12/70 [00:30<02:25,  2.51s/it][A[A

 19%|█▊        | 13/70 [00:32<02:21,  2.49s/it][A[A

 20%|██        | 14/70 [00:35<02:19,  2.49s/it][A[A

 21%|██▏       | 15/70 [00:37<02:13,  2.43s/it][A[A

 23%|██▎       | 16/70 [00:39<02:11,  2.43s/it][A[A

 24%|██▍       | 17/70 [00:42<02:08,  2.42s/it][A[A

 26%|██▌       | 18/70 [00:44<02:06,  2.43s/it][A[A

 27%|██▋       | 19/70 [00:47<02:03,  2.43s/it][A[A

 29%|██▊       | 20/70 [00:49<02:00,  2.42s/it][A[A

 30%|███       | 21/70 [00:51<01:56,  2.38s/it][A[A

Batch loss: 1.4890270233154297




 31%|███▏      | 22/70 [00:54<01:53,  2.37s/it][A[A

 33%|███▎      | 23/70 [00:56<01:50,  2.35s/it][A[A

 34%|███▍      | 24/70 [00:58<01:47,  2.34s/it][A[A

 36%|███▌      | 25/70 [01:01<01:45,  2.35s/it][A[A

 37%|███▋      | 26/70 [01:03<01:45,  2.39s/it][A[A

 39%|███▊      | 27/70 [01:06<01:44,  2.42s/it][A[A

 40%|████      | 28/70 [01:08<01:40,  2.40s/it][A[A

 41%|████▏     | 29/70 [01:10<01:37,  2.37s/it][A[A

 43%|████▎     | 30/70 [01:13<01:34,  2.37s/it][A[A

 44%|████▍     | 31/70 [01:15<01:33,  2.39s/it][A[A

Batch loss: 1.6339727640151978




 46%|████▌     | 32/70 [01:18<01:31,  2.41s/it][A[A

 47%|████▋     | 33/70 [01:20<01:29,  2.41s/it][A[A

 49%|████▊     | 34/70 [01:22<01:27,  2.42s/it][A[A

 50%|█████     | 35/70 [01:25<01:24,  2.41s/it][A[A

 51%|█████▏    | 36/70 [01:27<01:22,  2.41s/it][A[A

 53%|█████▎    | 37/70 [01:30<01:19,  2.40s/it][A[A

 54%|█████▍    | 38/70 [01:32<01:16,  2.41s/it][A[A

 56%|█████▌    | 39/70 [01:34<01:14,  2.41s/it][A[A

 57%|█████▋    | 40/70 [01:37<01:12,  2.42s/it][A[A

 59%|█████▊    | 41/70 [01:39<01:09,  2.39s/it][A[A

Batch loss: 1.3561136722564697




 60%|██████    | 42/70 [01:42<01:06,  2.39s/it][A[A

 61%|██████▏   | 43/70 [01:44<01:03,  2.36s/it][A[A

 63%|██████▎   | 44/70 [01:46<01:01,  2.37s/it][A[A

 64%|██████▍   | 45/70 [01:49<00:59,  2.39s/it][A[A

 66%|██████▌   | 46/70 [01:51<00:57,  2.41s/it][A[A

 67%|██████▋   | 47/70 [01:54<00:55,  2.41s/it][A[A

 69%|██████▊   | 48/70 [01:56<00:53,  2.43s/it][A[A

 70%|███████   | 49/70 [01:58<00:50,  2.42s/it][A[A

 71%|███████▏  | 50/70 [02:01<00:48,  2.42s/it][A[A

 73%|███████▎  | 51/70 [02:03<00:46,  2.43s/it][A[A

Batch loss: 1.4017821550369263




 74%|███████▍  | 52/70 [02:06<00:43,  2.43s/it][A[A

 76%|███████▌  | 53/70 [02:08<00:41,  2.43s/it][A[A

 77%|███████▋  | 54/70 [02:11<00:38,  2.43s/it][A[A

 79%|███████▊  | 55/70 [02:13<00:36,  2.40s/it][A[A

 80%|████████  | 56/70 [02:15<00:33,  2.38s/it][A[A

 81%|████████▏ | 57/70 [02:18<00:30,  2.35s/it][A[A

 83%|████████▎ | 58/70 [02:20<00:28,  2.36s/it][A[A

 84%|████████▍ | 59/70 [02:22<00:25,  2.34s/it][A[A

 86%|████████▌ | 60/70 [02:24<00:23,  2.33s/it][A[A

 87%|████████▋ | 61/70 [02:27<00:20,  2.32s/it][A[A

Batch loss: 1.9056379795074463




 89%|████████▊ | 62/70 [02:29<00:18,  2.32s/it][A[A

 90%|█████████ | 63/70 [02:31<00:16,  2.32s/it][A[A

 91%|█████████▏| 64/70 [02:34<00:13,  2.32s/it][A[A

 93%|█████████▎| 65/70 [02:36<00:11,  2.31s/it][A[A

 94%|█████████▍| 66/70 [02:38<00:09,  2.32s/it][A[A

 96%|█████████▌| 67/70 [02:41<00:06,  2.32s/it][A[A

 97%|█████████▋| 68/70 [02:43<00:04,  2.31s/it][A[A

 99%|█████████▊| 69/70 [02:45<00:02,  2.31s/it][A[A

100%|██████████| 70/70 [02:46<00:00,  2.38s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4585673894201006




 12%|█▎        | 1/8 [00:02<00:16,  2.29s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.28s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.30s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.32s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.32s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.33s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.33s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.23s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.329500049352646
Epoch:  88




  1%|▏         | 1/70 [00:02<02:51,  2.48s/it][A[A

Batch loss: 1.3239701986312866




  3%|▎         | 2/70 [00:04<02:47,  2.47s/it][A[A

  4%|▍         | 3/70 [00:07<02:44,  2.45s/it][A[A

  6%|▌         | 4/70 [00:09<02:41,  2.44s/it][A[A

  7%|▋         | 5/70 [00:12<02:39,  2.45s/it][A[A

  9%|▊         | 6/70 [00:14<02:35,  2.42s/it][A[A

 10%|█         | 7/70 [00:16<02:30,  2.39s/it][A[A

 11%|█▏        | 8/70 [00:19<02:27,  2.38s/it][A[A

 13%|█▎        | 9/70 [00:21<02:24,  2.38s/it][A[A

 14%|█▍        | 10/70 [00:23<02:22,  2.38s/it][A[A

 16%|█▌        | 11/70 [00:26<02:20,  2.38s/it][A[A

Batch loss: 1.9206511974334717




 17%|█▋        | 12/70 [00:28<02:17,  2.36s/it][A[A

 19%|█▊        | 13/70 [00:31<02:14,  2.36s/it][A[A

 20%|██        | 14/70 [00:33<02:13,  2.38s/it][A[A

 21%|██▏       | 15/70 [00:35<02:09,  2.35s/it][A[A

 23%|██▎       | 16/70 [00:38<02:06,  2.34s/it][A[A

 24%|██▍       | 17/70 [00:40<02:03,  2.33s/it][A[A

 26%|██▌       | 18/70 [00:42<02:01,  2.33s/it][A[A

 27%|██▋       | 19/70 [00:45<01:58,  2.33s/it][A[A

 29%|██▊       | 20/70 [00:47<01:56,  2.32s/it][A[A

 30%|███       | 21/70 [00:49<01:53,  2.32s/it][A[A

Batch loss: 1.3265751600265503




 31%|███▏      | 22/70 [00:52<01:52,  2.35s/it][A[A

 33%|███▎      | 23/70 [00:54<01:51,  2.36s/it][A[A

 34%|███▍      | 24/70 [00:56<01:50,  2.39s/it][A[A

 36%|███▌      | 25/70 [00:59<01:47,  2.39s/it][A[A

 37%|███▋      | 26/70 [01:01<01:45,  2.40s/it][A[A

 39%|███▊      | 27/70 [01:04<01:43,  2.40s/it][A[A

 40%|████      | 28/70 [01:06<01:41,  2.41s/it][A[A

 41%|████▏     | 29/70 [01:08<01:38,  2.41s/it][A[A

 43%|████▎     | 30/70 [01:11<01:36,  2.41s/it][A[A

 44%|████▍     | 31/70 [01:13<01:33,  2.40s/it][A[A

Batch loss: 1.483860969543457




 46%|████▌     | 32/70 [01:16<01:30,  2.39s/it][A[A

 47%|████▋     | 33/70 [01:18<01:28,  2.38s/it][A[A

 49%|████▊     | 34/70 [01:20<01:25,  2.38s/it][A[A

 50%|█████     | 35/70 [01:23<01:22,  2.35s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:19,  2.34s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:17,  2.35s/it][A[A

 54%|█████▍    | 38/70 [01:30<01:15,  2.35s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:12,  2.32s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:09,  2.31s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:06,  2.29s/it][A[A

Batch loss: 1.4165853261947632




 60%|██████    | 42/70 [01:39<01:04,  2.30s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:01,  2.28s/it][A[A

 63%|██████▎   | 44/70 [01:43<00:59,  2.28s/it][A[A

 64%|██████▍   | 45/70 [01:46<00:58,  2.33s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:56,  2.34s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:53,  2.34s/it][A[A

 69%|██████▊   | 48/70 [01:53<00:52,  2.37s/it][A[A

 70%|███████   | 49/70 [01:55<00:49,  2.37s/it][A[A

 71%|███████▏  | 50/70 [01:58<00:47,  2.39s/it][A[A

 73%|███████▎  | 51/70 [02:00<00:45,  2.41s/it][A[A

Batch loss: 1.572097659111023




 74%|███████▍  | 52/70 [02:03<00:43,  2.44s/it][A[A

 76%|███████▌  | 53/70 [02:05<00:41,  2.45s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:39,  2.45s/it][A[A

 79%|███████▊  | 55/70 [02:10<00:37,  2.48s/it][A[A

 80%|████████  | 56/70 [02:13<00:34,  2.48s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:32,  2.48s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:29,  2.48s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:27,  2.48s/it][A[A

 86%|████████▌ | 60/70 [02:23<00:24,  2.47s/it][A[A

 87%|████████▋ | 61/70 [02:25<00:22,  2.49s/it][A[A

Batch loss: 1.3761212825775146




 89%|████████▊ | 62/70 [02:27<00:19,  2.45s/it][A[A

 90%|█████████ | 63/70 [02:30<00:16,  2.42s/it][A[A

 91%|█████████▏| 64/70 [02:32<00:14,  2.40s/it][A[A

 93%|█████████▎| 65/70 [02:35<00:12,  2.41s/it][A[A

 94%|█████████▍| 66/70 [02:37<00:09,  2.40s/it][A[A

 96%|█████████▌| 67/70 [02:39<00:07,  2.39s/it][A[A

 97%|█████████▋| 68/70 [02:42<00:04,  2.38s/it][A[A

 99%|█████████▊| 69/70 [02:44<00:02,  2.37s/it][A[A

100%|██████████| 70/70 [02:45<00:00,  2.36s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.436474985735757




 12%|█▎        | 1/8 [00:02<00:15,  2.28s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.29s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.28s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.29s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.28s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.28s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.28s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.18s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.341001808643341
Epoch:  89




  1%|▏         | 1/70 [00:02<02:45,  2.40s/it][A[A

Batch loss: 1.4766558408737183




  3%|▎         | 2/70 [00:04<02:45,  2.44s/it][A[A

  4%|▍         | 3/70 [00:07<02:44,  2.45s/it][A[A

  6%|▌         | 4/70 [00:09<02:42,  2.46s/it][A[A

  7%|▋         | 5/70 [00:12<02:39,  2.46s/it][A[A

  9%|▊         | 6/70 [00:14<02:37,  2.46s/it][A[A

 10%|█         | 7/70 [00:17<02:34,  2.46s/it][A[A

 11%|█▏        | 8/70 [00:19<02:32,  2.47s/it][A[A

 13%|█▎        | 9/70 [00:22<02:31,  2.49s/it][A[A

 14%|█▍        | 10/70 [00:24<02:29,  2.49s/it][A[A

 16%|█▌        | 11/70 [00:27<02:26,  2.49s/it][A[A

Batch loss: 1.331939458847046




 17%|█▋        | 12/70 [00:29<02:22,  2.45s/it][A[A

 19%|█▊        | 13/70 [00:32<02:18,  2.43s/it][A[A

 20%|██        | 14/70 [00:34<02:15,  2.41s/it][A[A

 21%|██▏       | 15/70 [00:36<02:12,  2.41s/it][A[A

 23%|██▎       | 16/70 [00:39<02:11,  2.43s/it][A[A

 24%|██▍       | 17/70 [00:41<02:10,  2.46s/it][A[A

 26%|██▌       | 18/70 [00:44<02:10,  2.50s/it][A[A

 27%|██▋       | 19/70 [00:46<02:07,  2.49s/it][A[A

 29%|██▊       | 20/70 [00:49<02:05,  2.51s/it][A[A

 30%|███       | 21/70 [00:51<02:02,  2.49s/it][A[A

Batch loss: 1.592201590538025




 31%|███▏      | 22/70 [00:54<01:59,  2.49s/it][A[A

 33%|███▎      | 23/70 [00:56<01:56,  2.48s/it][A[A

 34%|███▍      | 24/70 [00:59<01:54,  2.48s/it][A[A

 36%|███▌      | 25/70 [01:01<01:51,  2.48s/it][A[A

 37%|███▋      | 26/70 [01:04<01:49,  2.49s/it][A[A

 39%|███▊      | 27/70 [01:06<01:45,  2.45s/it][A[A

 40%|████      | 28/70 [01:09<01:41,  2.43s/it][A[A

 41%|████▏     | 29/70 [01:11<01:39,  2.42s/it][A[A

 43%|████▎     | 30/70 [01:13<01:36,  2.42s/it][A[A

 44%|████▍     | 31/70 [01:16<01:33,  2.39s/it][A[A

Batch loss: 1.3182772397994995




 46%|████▌     | 32/70 [01:18<01:30,  2.38s/it][A[A

 47%|████▋     | 33/70 [01:20<01:27,  2.37s/it][A[A

 49%|████▊     | 34/70 [01:23<01:25,  2.36s/it][A[A

 50%|█████     | 35/70 [01:25<01:22,  2.35s/it][A[A

 51%|█████▏    | 36/70 [01:27<01:19,  2.35s/it][A[A

 53%|█████▎    | 37/70 [01:30<01:17,  2.35s/it][A[A

 54%|█████▍    | 38/70 [01:32<01:14,  2.34s/it][A[A

 56%|█████▌    | 39/70 [01:34<01:12,  2.35s/it][A[A

 57%|█████▋    | 40/70 [01:37<01:10,  2.35s/it][A[A

 59%|█████▊    | 41/70 [01:39<01:08,  2.35s/it][A[A

Batch loss: 1.3173346519470215




 60%|██████    | 42/70 [01:41<01:06,  2.36s/it][A[A

 61%|██████▏   | 43/70 [01:44<01:04,  2.38s/it][A[A

 63%|██████▎   | 44/70 [01:46<01:02,  2.40s/it][A[A

 64%|██████▍   | 45/70 [01:49<01:01,  2.44s/it][A[A

 66%|██████▌   | 46/70 [01:51<00:58,  2.45s/it][A[A

 67%|██████▋   | 47/70 [01:54<00:56,  2.47s/it][A[A

 69%|██████▊   | 48/70 [01:56<00:54,  2.46s/it][A[A

 70%|███████   | 49/70 [01:59<00:51,  2.45s/it][A[A

 71%|███████▏  | 50/70 [02:01<00:49,  2.46s/it][A[A

 73%|███████▎  | 51/70 [02:04<00:46,  2.45s/it][A[A

Batch loss: 1.4006876945495605




 74%|███████▍  | 52/70 [02:06<00:44,  2.47s/it][A[A

 76%|███████▌  | 53/70 [02:09<00:41,  2.45s/it][A[A

 77%|███████▋  | 54/70 [02:11<00:38,  2.43s/it][A[A

 79%|███████▊  | 55/70 [02:13<00:36,  2.40s/it][A[A

 80%|████████  | 56/70 [02:16<00:33,  2.41s/it][A[A

 81%|████████▏ | 57/70 [02:18<00:31,  2.39s/it][A[A

 83%|████████▎ | 58/70 [02:20<00:28,  2.39s/it][A[A

 84%|████████▍ | 59/70 [02:23<00:26,  2.37s/it][A[A

 86%|████████▌ | 60/70 [02:25<00:23,  2.37s/it][A[A

 87%|████████▋ | 61/70 [02:28<00:21,  2.37s/it][A[A

Batch loss: 1.3176504373550415




 89%|████████▊ | 62/70 [02:30<00:19,  2.40s/it][A[A

 90%|█████████ | 63/70 [02:32<00:16,  2.37s/it][A[A

 91%|█████████▏| 64/70 [02:35<00:14,  2.37s/it][A[A

 93%|█████████▎| 65/70 [02:37<00:11,  2.36s/it][A[A

 94%|█████████▍| 66/70 [02:39<00:09,  2.36s/it][A[A

 96%|█████████▌| 67/70 [02:42<00:07,  2.37s/it][A[A

 97%|█████████▋| 68/70 [02:44<00:04,  2.36s/it][A[A

 99%|█████████▊| 69/70 [02:46<00:02,  2.36s/it][A[A

100%|██████████| 70/70 [02:47<00:00,  2.40s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4583785363606043




 12%|█▎        | 1/8 [00:02<00:17,  2.46s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.45s/it][A[A

 38%|███▊      | 3/8 [00:07<00:12,  2.42s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.43s/it][A[A

 62%|██████▎   | 5/8 [00:12<00:07,  2.41s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.41s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.39s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.30s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.374318689107895
Epoch:  90




  1%|▏         | 1/70 [00:02<02:53,  2.52s/it][A[A

Batch loss: 1.5360954999923706




  3%|▎         | 2/70 [00:05<02:51,  2.52s/it][A[A

  4%|▍         | 3/70 [00:07<02:47,  2.50s/it][A[A

  6%|▌         | 4/70 [00:09<02:43,  2.47s/it][A[A

  7%|▋         | 5/70 [00:12<02:38,  2.44s/it][A[A

  9%|▊         | 6/70 [00:14<02:35,  2.43s/it][A[A

 10%|█         | 7/70 [00:17<02:33,  2.44s/it][A[A

 11%|█▏        | 8/70 [00:19<02:29,  2.42s/it][A[A

 13%|█▎        | 9/70 [00:21<02:26,  2.41s/it][A[A

 14%|█▍        | 10/70 [00:24<02:24,  2.40s/it][A[A

 16%|█▌        | 11/70 [00:26<02:20,  2.38s/it][A[A

Batch loss: 1.465196132659912




 17%|█▋        | 12/70 [00:28<02:17,  2.37s/it][A[A

 19%|█▊        | 13/70 [00:31<02:14,  2.36s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.35s/it][A[A

 21%|██▏       | 15/70 [00:35<02:08,  2.34s/it][A[A

 23%|██▎       | 16/70 [00:38<02:06,  2.34s/it][A[A

 24%|██▍       | 17/70 [00:40<02:03,  2.33s/it][A[A

 26%|██▌       | 18/70 [00:42<02:02,  2.35s/it][A[A

 27%|██▋       | 19/70 [00:45<01:59,  2.34s/it][A[A

 29%|██▊       | 20/70 [00:47<01:58,  2.37s/it][A[A

 30%|███       | 21/70 [00:50<01:57,  2.41s/it][A[A

Batch loss: 1.5110015869140625




 31%|███▏      | 22/70 [00:52<01:57,  2.44s/it][A[A

 33%|███▎      | 23/70 [00:55<01:55,  2.46s/it][A[A

 34%|███▍      | 24/70 [00:57<01:53,  2.47s/it][A[A

 36%|███▌      | 25/70 [01:00<01:51,  2.47s/it][A[A

 37%|███▋      | 26/70 [01:02<01:48,  2.47s/it][A[A

 39%|███▊      | 27/70 [01:05<01:46,  2.48s/it][A[A

 40%|████      | 28/70 [01:07<01:43,  2.47s/it][A[A

 41%|████▏     | 29/70 [01:10<01:41,  2.46s/it][A[A

 43%|████▎     | 30/70 [01:12<01:38,  2.46s/it][A[A

 44%|████▍     | 31/70 [01:15<01:36,  2.48s/it][A[A

Batch loss: 1.4704898595809937




 46%|████▌     | 32/70 [01:17<01:33,  2.46s/it][A[A

 47%|████▋     | 33/70 [01:19<01:30,  2.43s/it][A[A

 49%|████▊     | 34/70 [01:22<01:27,  2.43s/it][A[A

 50%|█████     | 35/70 [01:24<01:24,  2.40s/it][A[A

 51%|█████▏    | 36/70 [01:27<01:21,  2.40s/it][A[A

 53%|█████▎    | 37/70 [01:29<01:18,  2.39s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:16,  2.38s/it][A[A

 56%|█████▌    | 39/70 [01:34<01:13,  2.37s/it][A[A

 57%|█████▋    | 40/70 [01:36<01:11,  2.38s/it][A[A

 59%|█████▊    | 41/70 [01:38<01:09,  2.41s/it][A[A

Batch loss: 1.3930789232254028




 60%|██████    | 42/70 [01:41<01:07,  2.40s/it][A[A

 61%|██████▏   | 43/70 [01:43<01:04,  2.39s/it][A[A

 63%|██████▎   | 44/70 [01:46<01:02,  2.39s/it][A[A

 64%|██████▍   | 45/70 [01:48<01:00,  2.41s/it][A[A

 66%|██████▌   | 46/70 [01:50<00:57,  2.39s/it][A[A

 67%|██████▋   | 47/70 [01:53<00:54,  2.39s/it][A[A

 69%|██████▊   | 48/70 [01:55<00:53,  2.42s/it][A[A

 70%|███████   | 49/70 [01:58<00:51,  2.44s/it][A[A

 71%|███████▏  | 50/70 [02:00<00:49,  2.47s/it][A[A

 73%|███████▎  | 51/70 [02:03<00:46,  2.47s/it][A[A

Batch loss: 1.3793476819992065




 74%|███████▍  | 52/70 [02:05<00:44,  2.48s/it][A[A

 76%|███████▌  | 53/70 [02:08<00:42,  2.48s/it][A[A

 77%|███████▋  | 54/70 [02:10<00:39,  2.47s/it][A[A

 79%|███████▊  | 55/70 [02:13<00:37,  2.48s/it][A[A

 80%|████████  | 56/70 [02:15<00:34,  2.48s/it][A[A

 81%|████████▏ | 57/70 [02:18<00:32,  2.50s/it][A[A

 83%|████████▎ | 58/70 [02:20<00:29,  2.49s/it][A[A

 84%|████████▍ | 59/70 [02:23<00:27,  2.45s/it][A[A

 86%|████████▌ | 60/70 [02:25<00:24,  2.41s/it][A[A

 87%|████████▋ | 61/70 [02:27<00:21,  2.41s/it][A[A

Batch loss: 1.3474100828170776




 89%|████████▊ | 62/70 [02:30<00:19,  2.39s/it][A[A

 90%|█████████ | 63/70 [02:32<00:16,  2.36s/it][A[A

 91%|█████████▏| 64/70 [02:34<00:14,  2.37s/it][A[A

 93%|█████████▎| 65/70 [02:37<00:11,  2.36s/it][A[A

 94%|█████████▍| 66/70 [02:39<00:09,  2.36s/it][A[A

 96%|█████████▌| 67/70 [02:41<00:07,  2.38s/it][A[A

 97%|█████████▋| 68/70 [02:44<00:04,  2.38s/it][A[A

 99%|█████████▊| 69/70 [02:46<00:02,  2.39s/it][A[A

100%|██████████| 70/70 [02:47<00:00,  2.39s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4286731668881008




 12%|█▎        | 1/8 [00:02<00:16,  2.38s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.36s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.36s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.33s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.30s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.31s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.33s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.23s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.2734053134918213
Epoch:  91




  1%|▏         | 1/70 [00:02<03:06,  2.70s/it][A[A

Batch loss: 1.3294219970703125




  3%|▎         | 2/70 [00:05<03:06,  2.74s/it][A[A

  4%|▍         | 3/70 [00:08<02:58,  2.67s/it][A[A

  6%|▌         | 4/70 [00:10<02:52,  2.62s/it][A[A

  7%|▋         | 5/70 [00:13<02:47,  2.57s/it][A[A

  9%|▊         | 6/70 [00:15<02:43,  2.55s/it][A[A

 10%|█         | 7/70 [00:17<02:38,  2.52s/it][A[A

 11%|█▏        | 8/70 [00:20<02:35,  2.51s/it][A[A

 13%|█▎        | 9/70 [00:22<02:29,  2.46s/it][A[A

 14%|█▍        | 10/70 [00:25<02:26,  2.44s/it][A[A

 16%|█▌        | 11/70 [00:27<02:21,  2.40s/it][A[A

Batch loss: 1.3957411050796509




 17%|█▋        | 12/70 [00:30<02:21,  2.44s/it][A[A

 19%|█▊        | 13/70 [00:32<02:19,  2.45s/it][A[A

 20%|██        | 14/70 [00:34<02:17,  2.45s/it][A[A

 21%|██▏       | 15/70 [00:37<02:14,  2.45s/it][A[A

 23%|██▎       | 16/70 [00:39<02:13,  2.47s/it][A[A

 24%|██▍       | 17/70 [00:42<02:11,  2.48s/it][A[A

 26%|██▌       | 18/70 [00:44<02:09,  2.48s/it][A[A

 27%|██▋       | 19/70 [00:47<02:07,  2.50s/it][A[A

 29%|██▊       | 20/70 [00:49<02:04,  2.50s/it][A[A

 30%|███       | 21/70 [00:52<02:01,  2.48s/it][A[A

Batch loss: 1.586686372756958




 31%|███▏      | 22/70 [00:54<01:59,  2.48s/it][A[A

 33%|███▎      | 23/70 [00:57<01:55,  2.45s/it][A[A

 34%|███▍      | 24/70 [00:59<01:51,  2.43s/it][A[A

 36%|███▌      | 25/70 [01:02<01:48,  2.42s/it][A[A

 37%|███▋      | 26/70 [01:04<01:46,  2.41s/it][A[A

 39%|███▊      | 27/70 [01:06<01:42,  2.39s/it][A[A

 40%|████      | 28/70 [01:09<01:40,  2.39s/it][A[A

 41%|████▏     | 29/70 [01:11<01:38,  2.41s/it][A[A

 43%|████▎     | 30/70 [01:13<01:35,  2.39s/it][A[A

 44%|████▍     | 31/70 [01:16<01:32,  2.37s/it][A[A

Batch loss: 1.343599796295166




 46%|████▌     | 32/70 [01:18<01:31,  2.40s/it][A[A

 47%|████▋     | 33/70 [01:21<01:27,  2.37s/it][A[A

 49%|████▊     | 34/70 [01:23<01:24,  2.36s/it][A[A

 50%|█████     | 35/70 [01:25<01:22,  2.37s/it][A[A

 51%|█████▏    | 36/70 [01:28<01:20,  2.36s/it][A[A

 53%|█████▎    | 37/70 [01:30<01:17,  2.35s/it][A[A

 54%|█████▍    | 38/70 [01:32<01:15,  2.35s/it][A[A

 56%|█████▌    | 39/70 [01:35<01:13,  2.37s/it][A[A

 57%|█████▋    | 40/70 [01:37<01:12,  2.40s/it][A[A

 59%|█████▊    | 41/70 [01:40<01:10,  2.44s/it][A[A

Batch loss: 1.5904096364974976




 60%|██████    | 42/70 [01:42<01:08,  2.45s/it][A[A

 61%|██████▏   | 43/70 [01:45<01:06,  2.46s/it][A[A

 63%|██████▎   | 44/70 [01:47<01:04,  2.47s/it][A[A

 64%|██████▍   | 45/70 [01:50<01:01,  2.48s/it][A[A

 66%|██████▌   | 46/70 [01:52<00:59,  2.48s/it][A[A

 67%|██████▋   | 47/70 [01:55<00:57,  2.50s/it][A[A

 69%|██████▊   | 48/70 [01:57<00:55,  2.50s/it][A[A

 70%|███████   | 49/70 [02:00<00:52,  2.50s/it][A[A

 71%|███████▏  | 50/70 [02:02<00:49,  2.46s/it][A[A

 73%|███████▎  | 51/70 [02:04<00:46,  2.42s/it][A[A

Batch loss: 1.3283259868621826




 74%|███████▍  | 52/70 [02:07<00:43,  2.41s/it][A[A

 76%|███████▌  | 53/70 [02:09<00:41,  2.42s/it][A[A

 77%|███████▋  | 54/70 [02:12<00:38,  2.40s/it][A[A

 79%|███████▊  | 55/70 [02:14<00:35,  2.38s/it][A[A

 80%|████████  | 56/70 [02:16<00:33,  2.38s/it][A[A

 81%|████████▏ | 57/70 [02:19<00:30,  2.37s/it][A[A

 83%|████████▎ | 58/70 [02:21<00:28,  2.37s/it][A[A

 84%|████████▍ | 59/70 [02:23<00:25,  2.33s/it][A[A

 86%|████████▌ | 60/70 [02:26<00:23,  2.34s/it][A[A

 87%|████████▋ | 61/70 [02:28<00:21,  2.33s/it][A[A

Batch loss: 1.4132064580917358




 89%|████████▊ | 62/70 [02:30<00:18,  2.34s/it][A[A

 90%|█████████ | 63/70 [02:33<00:16,  2.32s/it][A[A

 91%|█████████▏| 64/70 [02:35<00:13,  2.31s/it][A[A

 93%|█████████▎| 65/70 [02:37<00:11,  2.32s/it][A[A

 94%|█████████▍| 66/70 [02:40<00:09,  2.33s/it][A[A

 96%|█████████▌| 67/70 [02:42<00:07,  2.37s/it][A[A

 97%|█████████▋| 68/70 [02:44<00:04,  2.38s/it][A[A

 99%|█████████▊| 69/70 [02:47<00:02,  2.39s/it][A[A

100%|██████████| 70/70 [02:48<00:00,  2.40s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4392867037228176




 12%|█▎        | 1/8 [00:02<00:16,  2.34s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.34s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.33s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.33s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.32s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.32s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.31s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.22s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3482342660427094
Epoch:  92




  1%|▏         | 1/70 [00:02<02:43,  2.37s/it][A[A

Batch loss: 1.2855603694915771




  3%|▎         | 2/70 [00:04<02:40,  2.35s/it][A[A

  4%|▍         | 3/70 [00:06<02:36,  2.34s/it][A[A

  6%|▌         | 4/70 [00:09<02:35,  2.36s/it][A[A

  7%|▋         | 5/70 [00:11<02:32,  2.34s/it][A[A

  9%|▊         | 6/70 [00:13<02:28,  2.32s/it][A[A

 10%|█         | 7/70 [00:16<02:26,  2.32s/it][A[A

 11%|█▏        | 8/70 [00:18<02:23,  2.31s/it][A[A

 13%|█▎        | 9/70 [00:20<02:20,  2.30s/it][A[A

 14%|█▍        | 10/70 [00:23<02:17,  2.29s/it][A[A

 16%|█▌        | 11/70 [00:25<02:14,  2.28s/it][A[A

Batch loss: 1.5281195640563965




 17%|█▋        | 12/70 [00:27<02:15,  2.34s/it][A[A

 19%|█▊        | 13/70 [00:30<02:12,  2.33s/it][A[A

 20%|██        | 14/70 [00:32<02:10,  2.33s/it][A[A

 21%|██▏       | 15/70 [00:34<02:07,  2.31s/it][A[A

 23%|██▎       | 16/70 [00:37<02:05,  2.32s/it][A[A

 24%|██▍       | 17/70 [00:39<02:02,  2.32s/it][A[A

 26%|██▌       | 18/70 [00:41<02:02,  2.35s/it][A[A

 27%|██▋       | 19/70 [00:44<02:00,  2.36s/it][A[A

 29%|██▊       | 20/70 [00:46<01:58,  2.38s/it][A[A

 30%|███       | 21/70 [00:49<01:56,  2.39s/it][A[A

Batch loss: 1.5244702100753784




 31%|███▏      | 22/70 [00:51<01:55,  2.41s/it][A[A

 33%|███▎      | 23/70 [00:53<01:53,  2.42s/it][A[A

 34%|███▍      | 24/70 [00:56<01:51,  2.43s/it][A[A

 36%|███▌      | 25/70 [00:58<01:48,  2.41s/it][A[A

 37%|███▋      | 26/70 [01:01<01:45,  2.40s/it][A[A

 39%|███▊      | 27/70 [01:03<01:44,  2.42s/it][A[A

 40%|████      | 28/70 [01:06<01:42,  2.43s/it][A[A

 41%|████▏     | 29/70 [01:08<01:37,  2.39s/it][A[A

 43%|████▎     | 30/70 [01:10<01:34,  2.36s/it][A[A

 44%|████▍     | 31/70 [01:12<01:31,  2.34s/it][A[A

Batch loss: 1.343641996383667




 46%|████▌     | 32/70 [01:15<01:29,  2.36s/it][A[A

 47%|████▋     | 33/70 [01:17<01:26,  2.34s/it][A[A

 49%|████▊     | 34/70 [01:19<01:23,  2.32s/it][A[A

 50%|█████     | 35/70 [01:22<01:20,  2.30s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:18,  2.31s/it][A[A

 53%|█████▎    | 37/70 [01:26<01:16,  2.30s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:13,  2.31s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:11,  2.29s/it][A[A

 57%|█████▋    | 40/70 [01:33<01:08,  2.30s/it][A[A

 59%|█████▊    | 41/70 [01:35<01:06,  2.29s/it][A[A

Batch loss: 1.3361337184906006




 60%|██████    | 42/70 [01:38<01:05,  2.33s/it][A[A

 61%|██████▏   | 43/70 [01:40<01:02,  2.32s/it][A[A

 63%|██████▎   | 44/70 [01:42<01:00,  2.31s/it][A[A

 64%|██████▍   | 45/70 [01:45<00:57,  2.31s/it][A[A

 66%|██████▌   | 46/70 [01:47<00:56,  2.34s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:54,  2.37s/it][A[A

 69%|██████▊   | 48/70 [01:52<00:53,  2.41s/it][A[A

 70%|███████   | 49/70 [01:55<00:50,  2.42s/it][A[A

 71%|███████▏  | 50/70 [01:57<00:48,  2.42s/it][A[A

 73%|███████▎  | 51/70 [01:59<00:45,  2.42s/it][A[A

Batch loss: 1.4009701013565063




 74%|███████▍  | 52/70 [02:02<00:43,  2.42s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:40,  2.41s/it][A[A

 77%|███████▋  | 54/70 [02:07<00:38,  2.42s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:36,  2.43s/it][A[A

 80%|████████  | 56/70 [02:11<00:33,  2.42s/it][A[A

 81%|████████▏ | 57/70 [02:14<00:31,  2.43s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:28,  2.39s/it][A[A

 84%|████████▍ | 59/70 [02:19<00:26,  2.38s/it][A[A

 86%|████████▌ | 60/70 [02:21<00:23,  2.40s/it][A[A

 87%|████████▋ | 61/70 [02:23<00:21,  2.40s/it][A[A

Batch loss: 1.4496110677719116




 89%|████████▊ | 62/70 [02:26<00:19,  2.40s/it][A[A

 90%|█████████ | 63/70 [02:28<00:16,  2.41s/it][A[A

 91%|█████████▏| 64/70 [02:31<00:14,  2.41s/it][A[A

 93%|█████████▎| 65/70 [02:33<00:12,  2.41s/it][A[A

 94%|█████████▍| 66/70 [02:36<00:09,  2.41s/it][A[A

 96%|█████████▌| 67/70 [02:38<00:07,  2.40s/it][A[A

 97%|█████████▋| 68/70 [02:40<00:04,  2.42s/it][A[A

 99%|█████████▊| 69/70 [02:43<00:02,  2.41s/it][A[A

100%|██████████| 70/70 [02:44<00:00,  2.34s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4510465553828649




 12%|█▎        | 1/8 [00:02<00:16,  2.30s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.27s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.25s/it][A[A

 50%|█████     | 4/8 [00:08<00:08,  2.25s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.24s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.22s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.21s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.13s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3695745170116425
Epoch:  93




  1%|▏         | 1/70 [00:02<02:40,  2.32s/it][A[A

Batch loss: 1.3178905248641968




  3%|▎         | 2/70 [00:04<02:39,  2.35s/it][A[A

  4%|▍         | 3/70 [00:07<02:36,  2.33s/it][A[A

  6%|▌         | 4/70 [00:09<02:33,  2.32s/it][A[A

  7%|▋         | 5/70 [00:11<02:30,  2.32s/it][A[A

  9%|▊         | 6/70 [00:13<02:28,  2.32s/it][A[A

 10%|█         | 7/70 [00:16<02:25,  2.31s/it][A[A

 11%|█▏        | 8/70 [00:18<02:23,  2.31s/it][A[A

 13%|█▎        | 9/70 [00:20<02:20,  2.30s/it][A[A

 14%|█▍        | 10/70 [00:23<02:17,  2.29s/it][A[A

 16%|█▌        | 11/70 [00:25<02:16,  2.32s/it][A[A

Batch loss: 1.3309499025344849




 17%|█▋        | 12/70 [00:27<02:15,  2.34s/it][A[A

 19%|█▊        | 13/70 [00:30<02:14,  2.36s/it][A[A

 20%|██        | 14/70 [00:32<02:13,  2.39s/it][A[A

 21%|██▏       | 15/70 [00:35<02:11,  2.38s/it][A[A

 23%|██▎       | 16/70 [00:37<02:09,  2.39s/it][A[A

 24%|██▍       | 17/70 [00:39<02:06,  2.38s/it][A[A

 26%|██▌       | 18/70 [00:42<02:03,  2.38s/it][A[A

 27%|██▋       | 19/70 [00:44<02:01,  2.37s/it][A[A

 29%|██▊       | 20/70 [00:46<01:58,  2.37s/it][A[A

 30%|███       | 21/70 [00:49<01:59,  2.43s/it][A[A

Batch loss: 1.3476909399032593




 31%|███▏      | 22/70 [00:52<01:57,  2.44s/it][A[A

 33%|███▎      | 23/70 [00:54<01:54,  2.43s/it][A[A

 34%|███▍      | 24/70 [00:56<01:50,  2.41s/it][A[A

 36%|███▌      | 25/70 [00:59<01:48,  2.41s/it][A[A

 37%|███▋      | 26/70 [01:01<01:45,  2.40s/it][A[A

 39%|███▊      | 27/70 [01:03<01:42,  2.38s/it][A[A

 40%|████      | 28/70 [01:06<01:39,  2.37s/it][A[A

 41%|████▏     | 29/70 [01:08<01:36,  2.36s/it][A[A

 43%|████▎     | 30/70 [01:10<01:34,  2.36s/it][A[A

 44%|████▍     | 31/70 [01:13<01:31,  2.35s/it][A[A

Batch loss: 1.324389934539795




 46%|████▌     | 32/70 [01:15<01:29,  2.35s/it][A[A

 47%|████▋     | 33/70 [01:17<01:27,  2.37s/it][A[A

 49%|████▊     | 34/70 [01:20<01:25,  2.36s/it][A[A

 50%|█████     | 35/70 [01:22<01:22,  2.36s/it][A[A

 51%|█████▏    | 36/70 [01:25<01:20,  2.36s/it][A[A

 53%|█████▎    | 37/70 [01:27<01:17,  2.35s/it][A[A

 54%|█████▍    | 38/70 [01:29<01:15,  2.36s/it][A[A

 56%|█████▌    | 39/70 [01:32<01:13,  2.38s/it][A[A

 57%|█████▋    | 40/70 [01:34<01:12,  2.40s/it][A[A

 59%|█████▊    | 41/70 [01:37<01:10,  2.42s/it][A[A

Batch loss: 1.3505245447158813




 60%|██████    | 42/70 [01:39<01:08,  2.45s/it][A[A

 61%|██████▏   | 43/70 [01:42<01:06,  2.46s/it][A[A

 63%|██████▎   | 44/70 [01:44<01:04,  2.47s/it][A[A

 64%|██████▍   | 45/70 [01:47<01:01,  2.47s/it][A[A

 66%|██████▌   | 46/70 [01:49<00:59,  2.48s/it][A[A

 67%|██████▋   | 47/70 [01:52<00:56,  2.46s/it][A[A

 69%|██████▊   | 48/70 [01:54<00:54,  2.47s/it][A[A

 70%|███████   | 49/70 [01:56<00:51,  2.47s/it][A[A

 71%|███████▏  | 50/70 [01:59<00:48,  2.44s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:45,  2.40s/it][A[A

Batch loss: 1.3345980644226074




 74%|███████▍  | 52/70 [02:04<00:43,  2.41s/it][A[A

 76%|███████▌  | 53/70 [02:06<00:40,  2.39s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:38,  2.39s/it][A[A

 79%|███████▊  | 55/70 [02:11<00:35,  2.38s/it][A[A

 80%|████████  | 56/70 [02:13<00:33,  2.38s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:30,  2.37s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:28,  2.38s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:26,  2.37s/it][A[A

 86%|████████▌ | 60/70 [02:22<00:23,  2.36s/it][A[A

 87%|████████▋ | 61/70 [02:25<00:21,  2.36s/it][A[A

Batch loss: 1.3305529356002808




 89%|████████▊ | 62/70 [02:27<00:18,  2.36s/it][A[A

 90%|█████████ | 63/70 [02:30<00:16,  2.36s/it][A[A

 91%|█████████▏| 64/70 [02:32<00:14,  2.35s/it][A[A

 93%|█████████▎| 65/70 [02:34<00:11,  2.34s/it][A[A

 94%|█████████▍| 66/70 [02:37<00:09,  2.37s/it][A[A

 96%|█████████▌| 67/70 [02:39<00:07,  2.40s/it][A[A

 97%|█████████▋| 68/70 [02:42<00:04,  2.42s/it][A[A

 99%|█████████▊| 69/70 [02:44<00:02,  2.44s/it][A[A

100%|██████████| 70/70 [02:45<00:00,  2.36s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.433884356703077




 12%|█▎        | 1/8 [00:02<00:16,  2.41s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.40s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.40s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.40s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.39s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.39s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.37s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.27s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.4842498302459717
Epoch:  94




  1%|▏         | 1/70 [00:02<02:44,  2.38s/it][A[A

Batch loss: 1.4039864540100098




  3%|▎         | 2/70 [00:04<02:42,  2.39s/it][A[A

  4%|▍         | 3/70 [00:07<02:41,  2.41s/it][A[A

  6%|▌         | 4/70 [00:09<02:37,  2.38s/it][A[A

  7%|▋         | 5/70 [00:11<02:34,  2.37s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.36s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.35s/it][A[A

 11%|█▏        | 8/70 [00:18<02:25,  2.34s/it][A[A

 13%|█▎        | 9/70 [00:21<02:22,  2.34s/it][A[A

 14%|█▍        | 10/70 [00:23<02:20,  2.34s/it][A[A

 16%|█▌        | 11/70 [00:25<02:18,  2.35s/it][A[A

Batch loss: 1.3616617918014526




 17%|█▋        | 12/70 [00:28<02:16,  2.35s/it][A[A

 19%|█▊        | 13/70 [00:30<02:13,  2.35s/it][A[A

 20%|██        | 14/70 [00:33<02:12,  2.36s/it][A[A

 21%|██▏       | 15/70 [00:35<02:10,  2.37s/it][A[A

 23%|██▎       | 16/70 [00:37<02:07,  2.37s/it][A[A

 24%|██▍       | 17/70 [00:40<02:06,  2.38s/it][A[A

 26%|██▌       | 18/70 [00:42<02:05,  2.42s/it][A[A

 27%|██▋       | 19/70 [00:45<02:03,  2.41s/it][A[A

 29%|██▊       | 20/70 [00:47<02:01,  2.43s/it][A[A

 30%|███       | 21/70 [00:50<01:59,  2.44s/it][A[A

Batch loss: 1.5731070041656494




 31%|███▏      | 22/70 [00:52<01:58,  2.48s/it][A[A

 33%|███▎      | 23/70 [00:55<01:57,  2.49s/it][A[A

 34%|███▍      | 24/70 [00:57<01:55,  2.52s/it][A[A

 36%|███▌      | 25/70 [01:00<01:53,  2.52s/it][A[A

 37%|███▋      | 26/70 [01:02<01:50,  2.51s/it][A[A

 39%|███▊      | 27/70 [01:05<01:47,  2.50s/it][A[A

 40%|████      | 28/70 [01:07<01:45,  2.51s/it][A[A

 41%|████▏     | 29/70 [01:10<01:40,  2.46s/it][A[A

 43%|████▎     | 30/70 [01:12<01:37,  2.43s/it][A[A

 44%|████▍     | 31/70 [01:14<01:33,  2.40s/it][A[A

Batch loss: 1.3589645624160767




 46%|████▌     | 32/70 [01:17<01:31,  2.40s/it][A[A

 47%|████▋     | 33/70 [01:19<01:28,  2.39s/it][A[A

 49%|████▊     | 34/70 [01:21<01:25,  2.38s/it][A[A

 50%|█████     | 35/70 [01:24<01:22,  2.36s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:20,  2.36s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:18,  2.38s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:15,  2.36s/it][A[A

 56%|█████▌    | 39/70 [01:33<01:13,  2.38s/it][A[A

 57%|█████▋    | 40/70 [01:36<01:11,  2.37s/it][A[A

 59%|█████▊    | 41/70 [01:38<01:08,  2.36s/it][A[A

Batch loss: 1.6389943361282349




 60%|██████    | 42/70 [01:40<01:05,  2.35s/it][A[A

 61%|██████▏   | 43/70 [01:43<01:03,  2.35s/it][A[A

 63%|██████▎   | 44/70 [01:45<01:01,  2.36s/it][A[A

 64%|██████▍   | 45/70 [01:47<00:59,  2.40s/it][A[A

 66%|██████▌   | 46/70 [01:50<00:58,  2.42s/it][A[A

 67%|██████▋   | 47/70 [01:52<00:55,  2.43s/it][A[A

 69%|██████▊   | 48/70 [01:55<00:53,  2.44s/it][A[A

 70%|███████   | 49/70 [01:57<00:51,  2.45s/it][A[A

 71%|███████▏  | 50/70 [02:00<00:49,  2.46s/it][A[A

 73%|███████▎  | 51/70 [02:02<00:46,  2.46s/it][A[A

Batch loss: 1.3078467845916748




 74%|███████▍  | 52/70 [02:05<00:44,  2.46s/it][A[A

 76%|███████▌  | 53/70 [02:07<00:42,  2.47s/it][A[A

 77%|███████▋  | 54/70 [02:10<00:39,  2.47s/it][A[A

 79%|███████▊  | 55/70 [02:12<00:36,  2.45s/it][A[A

 80%|████████  | 56/70 [02:15<00:34,  2.48s/it][A[A

 81%|████████▏ | 57/70 [02:17<00:32,  2.48s/it][A[A

 83%|████████▎ | 58/70 [02:20<00:29,  2.48s/it][A[A

 84%|████████▍ | 59/70 [02:22<00:27,  2.47s/it][A[A

 86%|████████▌ | 60/70 [02:25<00:24,  2.46s/it][A[A

 87%|████████▋ | 61/70 [02:27<00:22,  2.46s/it][A[A

Batch loss: 1.343947410583496




 89%|████████▊ | 62/70 [02:29<00:19,  2.48s/it][A[A

 90%|█████████ | 63/70 [02:32<00:17,  2.47s/it][A[A

 91%|█████████▏| 64/70 [02:34<00:14,  2.48s/it][A[A

 93%|█████████▎| 65/70 [02:37<00:12,  2.48s/it][A[A

 94%|█████████▍| 66/70 [02:39<00:09,  2.47s/it][A[A

 96%|█████████▌| 67/70 [02:42<00:07,  2.46s/it][A[A

 97%|█████████▋| 68/70 [02:44<00:04,  2.46s/it][A[A

 99%|█████████▊| 69/70 [02:47<00:02,  2.42s/it][A[A

100%|██████████| 70/70 [02:47<00:00,  2.40s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4357851249831064




 12%|█▎        | 1/8 [00:02<00:16,  2.31s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.34s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.32s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.30s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.30s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.29s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.29s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.20s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.466445803642273
Epoch:  95




  1%|▏         | 1/70 [00:02<02:45,  2.39s/it][A[A

Batch loss: 1.4063336849212646




  3%|▎         | 2/70 [00:04<02:42,  2.39s/it][A[A

  4%|▍         | 3/70 [00:07<02:39,  2.38s/it][A[A

  6%|▌         | 4/70 [00:09<02:36,  2.38s/it][A[A

  7%|▋         | 5/70 [00:11<02:33,  2.36s/it][A[A

  9%|▊         | 6/70 [00:14<02:31,  2.37s/it][A[A

 10%|█         | 7/70 [00:16<02:28,  2.36s/it][A[A

 11%|█▏        | 8/70 [00:18<02:26,  2.36s/it][A[A

 13%|█▎        | 9/70 [00:21<02:25,  2.38s/it][A[A

 14%|█▍        | 10/70 [00:23<02:25,  2.42s/it][A[A

 16%|█▌        | 11/70 [00:26<02:23,  2.42s/it][A[A

Batch loss: 1.4538172483444214




 17%|█▋        | 12/70 [00:28<02:21,  2.45s/it][A[A

 19%|█▊        | 13/70 [00:31<02:19,  2.44s/it][A[A

 20%|██        | 14/70 [00:33<02:17,  2.45s/it][A[A

 21%|██▏       | 15/70 [00:36<02:14,  2.45s/it][A[A

 23%|██▎       | 16/70 [00:38<02:12,  2.45s/it][A[A

 24%|██▍       | 17/70 [00:40<02:09,  2.44s/it][A[A

 26%|██▌       | 18/70 [00:43<02:06,  2.44s/it][A[A

 27%|██▋       | 19/70 [00:45<02:04,  2.44s/it][A[A

 29%|██▊       | 20/70 [00:48<02:00,  2.41s/it][A[A

 30%|███       | 21/70 [00:50<01:57,  2.39s/it][A[A

Batch loss: 1.3848193883895874




 31%|███▏      | 22/70 [00:52<01:54,  2.39s/it][A[A

 33%|███▎      | 23/70 [00:55<01:52,  2.39s/it][A[A

 34%|███▍      | 24/70 [00:57<01:49,  2.38s/it][A[A

 36%|███▌      | 25/70 [01:00<01:46,  2.37s/it][A[A

 37%|███▋      | 26/70 [01:02<01:44,  2.36s/it][A[A

 39%|███▊      | 27/70 [01:04<01:41,  2.36s/it][A[A

 40%|████      | 28/70 [01:07<01:39,  2.37s/it][A[A

 41%|████▏     | 29/70 [01:09<01:36,  2.36s/it][A[A

 43%|████▎     | 30/70 [01:11<01:34,  2.36s/it][A[A

 44%|████▍     | 31/70 [01:14<01:31,  2.35s/it][A[A

Batch loss: 1.2965508699417114




 46%|████▌     | 32/70 [01:16<01:30,  2.39s/it][A[A

 47%|████▋     | 33/70 [01:18<01:27,  2.36s/it][A[A

 49%|████▊     | 34/70 [01:21<01:25,  2.37s/it][A[A

 50%|█████     | 35/70 [01:23<01:22,  2.36s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:21,  2.38s/it][A[A

 53%|█████▎    | 37/70 [01:28<01:19,  2.41s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:18,  2.44s/it][A[A

 56%|█████▌    | 39/70 [01:33<01:16,  2.45s/it][A[A

 57%|█████▋    | 40/70 [01:36<01:13,  2.47s/it][A[A

 59%|█████▊    | 41/70 [01:38<01:11,  2.46s/it][A[A

Batch loss: 1.407688021659851




 60%|██████    | 42/70 [01:41<01:09,  2.48s/it][A[A

 61%|██████▏   | 43/70 [01:43<01:06,  2.46s/it][A[A

 63%|██████▎   | 44/70 [01:45<01:04,  2.47s/it][A[A

 64%|██████▍   | 45/70 [01:48<01:01,  2.48s/it][A[A

 66%|██████▌   | 46/70 [01:50<00:59,  2.49s/it][A[A

 67%|██████▋   | 47/70 [01:53<00:56,  2.44s/it][A[A

 69%|██████▊   | 48/70 [01:55<00:53,  2.42s/it][A[A

 70%|███████   | 49/70 [01:57<00:50,  2.39s/it][A[A

 71%|███████▏  | 50/70 [02:00<00:48,  2.40s/it][A[A

 73%|███████▎  | 51/70 [02:02<00:45,  2.39s/it][A[A

Batch loss: 1.3515747785568237




 74%|███████▍  | 52/70 [02:05<00:42,  2.38s/it][A[A

 76%|███████▌  | 53/70 [02:07<00:40,  2.37s/it][A[A

 77%|███████▋  | 54/70 [02:09<00:37,  2.37s/it][A[A

 79%|███████▊  | 55/70 [02:12<00:35,  2.36s/it][A[A

 80%|████████  | 56/70 [02:14<00:33,  2.37s/it][A[A

 81%|████████▏ | 57/70 [02:16<00:30,  2.37s/it][A[A

 83%|████████▎ | 58/70 [02:19<00:28,  2.37s/it][A[A

 84%|████████▍ | 59/70 [02:21<00:25,  2.35s/it][A[A

 86%|████████▌ | 60/70 [02:23<00:23,  2.35s/it][A[A

 87%|████████▋ | 61/70 [02:26<00:21,  2.37s/it][A[A

Batch loss: 1.4296636581420898




 89%|████████▊ | 62/70 [02:28<00:19,  2.40s/it][A[A

 90%|█████████ | 63/70 [02:31<00:16,  2.39s/it][A[A

 91%|█████████▏| 64/70 [02:33<00:14,  2.42s/it][A[A

 93%|█████████▎| 65/70 [02:36<00:12,  2.43s/it][A[A

 94%|█████████▍| 66/70 [02:38<00:09,  2.46s/it][A[A

 96%|█████████▌| 67/70 [02:41<00:07,  2.45s/it][A[A

 97%|█████████▋| 68/70 [02:43<00:04,  2.45s/it][A[A

 99%|█████████▊| 69/70 [02:46<00:02,  2.45s/it][A[A

100%|██████████| 70/70 [02:46<00:00,  2.38s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4358103496687753




 12%|█▎        | 1/8 [00:02<00:16,  2.43s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.41s/it][A[A

 38%|███▊      | 3/8 [00:07<00:12,  2.41s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.41s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:07,  2.38s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.35s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.33s/it][A[A

100%|██████████| 8/8 [00:18<00:00,  2.25s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.32190665602684
Epoch:  96




  1%|▏         | 1/70 [00:02<02:40,  2.33s/it][A[A

Batch loss: 1.343737244606018




  3%|▎         | 2/70 [00:04<02:39,  2.35s/it][A[A

  4%|▍         | 3/70 [00:07<02:37,  2.35s/it][A[A

  6%|▌         | 4/70 [00:09<02:35,  2.35s/it][A[A

  7%|▋         | 5/70 [00:11<02:32,  2.34s/it][A[A

  9%|▊         | 6/70 [00:14<02:30,  2.36s/it][A[A

 10%|█         | 7/70 [00:16<02:27,  2.34s/it][A[A

 11%|█▏        | 8/70 [00:18<02:26,  2.36s/it][A[A

 13%|█▎        | 9/70 [00:21<02:23,  2.35s/it][A[A

 14%|█▍        | 10/70 [00:23<02:23,  2.38s/it][A[A

 16%|█▌        | 11/70 [00:25<02:19,  2.36s/it][A[A

Batch loss: 1.3526958227157593




 17%|█▋        | 12/70 [00:28<02:17,  2.37s/it][A[A

 19%|█▊        | 13/70 [00:30<02:14,  2.36s/it][A[A

 20%|██        | 14/70 [00:33<02:12,  2.37s/it][A[A

 21%|██▏       | 15/70 [00:35<02:11,  2.40s/it][A[A

 23%|██▎       | 16/70 [00:37<02:09,  2.40s/it][A[A

 24%|██▍       | 17/70 [00:40<02:08,  2.43s/it][A[A

 26%|██▌       | 18/70 [00:42<02:06,  2.44s/it][A[A

 27%|██▋       | 19/70 [00:45<02:05,  2.45s/it][A[A

 29%|██▊       | 20/70 [00:47<02:03,  2.46s/it][A[A

 30%|███       | 21/70 [00:50<02:00,  2.46s/it][A[A

Batch loss: 1.4524718523025513




 31%|███▏      | 22/70 [00:52<01:58,  2.46s/it][A[A

 33%|███▎      | 23/70 [00:55<01:55,  2.46s/it][A[A

 34%|███▍      | 24/70 [00:57<01:53,  2.47s/it][A[A

 36%|███▌      | 25/70 [01:00<01:49,  2.43s/it][A[A

 37%|███▋      | 26/70 [01:02<01:45,  2.41s/it][A[A

 39%|███▊      | 27/70 [01:04<01:43,  2.41s/it][A[A

 40%|████      | 28/70 [01:07<01:41,  2.43s/it][A[A

 41%|████▏     | 29/70 [01:09<01:40,  2.44s/it][A[A

 43%|████▎     | 30/70 [01:12<01:37,  2.45s/it][A[A

 44%|████▍     | 31/70 [01:14<01:36,  2.47s/it][A[A

Batch loss: 1.397384762763977




 46%|████▌     | 32/70 [01:17<01:33,  2.47s/it][A[A

 47%|████▋     | 33/70 [01:19<01:31,  2.48s/it][A[A

 49%|████▊     | 34/70 [01:22<01:29,  2.47s/it][A[A

 50%|█████     | 35/70 [01:24<01:26,  2.47s/it][A[A

 51%|█████▏    | 36/70 [01:27<01:24,  2.47s/it][A[A

 53%|█████▎    | 37/70 [01:29<01:21,  2.48s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:17,  2.44s/it][A[A

 56%|█████▌    | 39/70 [01:34<01:14,  2.39s/it][A[A

 57%|█████▋    | 40/70 [01:36<01:10,  2.36s/it][A[A

 59%|█████▊    | 41/70 [01:38<01:08,  2.35s/it][A[A

Batch loss: 1.4272005558013916




 60%|██████    | 42/70 [01:41<01:06,  2.36s/it][A[A

 61%|██████▏   | 43/70 [01:43<01:03,  2.34s/it][A[A

 63%|██████▎   | 44/70 [01:45<01:00,  2.34s/it][A[A

 64%|██████▍   | 45/70 [01:48<00:58,  2.33s/it][A[A

 66%|██████▌   | 46/70 [01:50<00:55,  2.33s/it][A[A

 67%|██████▋   | 47/70 [01:52<00:53,  2.31s/it][A[A

 69%|██████▊   | 48/70 [01:55<00:50,  2.31s/it][A[A

 70%|███████   | 49/70 [01:57<00:48,  2.30s/it][A[A

 71%|███████▏  | 50/70 [01:59<00:45,  2.29s/it][A[A

 73%|███████▎  | 51/70 [02:01<00:43,  2.29s/it][A[A

Batch loss: 1.3558824062347412




 74%|███████▍  | 52/70 [02:04<00:41,  2.30s/it][A[A

 76%|███████▌  | 53/70 [02:06<00:38,  2.29s/it][A[A

 77%|███████▋  | 54/70 [02:08<00:36,  2.31s/it][A[A

 79%|███████▊  | 55/70 [02:11<00:34,  2.30s/it][A[A

 80%|████████  | 56/70 [02:13<00:32,  2.33s/it][A[A

 81%|████████▏ | 57/70 [02:15<00:30,  2.34s/it][A[A

 83%|████████▎ | 58/70 [02:18<00:28,  2.36s/it][A[A

 84%|████████▍ | 59/70 [02:20<00:26,  2.38s/it][A[A

 86%|████████▌ | 60/70 [02:23<00:23,  2.40s/it][A[A

 87%|████████▋ | 61/70 [02:25<00:21,  2.39s/it][A[A

Batch loss: 1.388056993484497




 89%|████████▊ | 62/70 [02:27<00:19,  2.40s/it][A[A

 90%|█████████ | 63/70 [02:30<00:16,  2.39s/it][A[A

 91%|█████████▏| 64/70 [02:32<00:14,  2.40s/it][A[A

 93%|█████████▎| 65/70 [02:35<00:11,  2.39s/it][A[A

 94%|█████████▍| 66/70 [02:37<00:09,  2.41s/it][A[A

 96%|█████████▌| 67/70 [02:39<00:07,  2.36s/it][A[A

 97%|█████████▋| 68/70 [02:42<00:04,  2.34s/it][A[A

 99%|█████████▊| 69/70 [02:44<00:02,  2.34s/it][A[A

100%|██████████| 70/70 [02:45<00:00,  2.36s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4261054396629333




 12%|█▎        | 1/8 [00:02<00:15,  2.25s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.25s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.24s/it][A[A

 50%|█████     | 4/8 [00:08<00:08,  2.24s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.23s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.22s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.21s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.13s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3349991738796234
Epoch:  97




  1%|▏         | 1/70 [00:02<02:42,  2.35s/it][A[A

Batch loss: 1.50785493850708




  3%|▎         | 2/70 [00:04<02:39,  2.35s/it][A[A

  4%|▍         | 3/70 [00:07<02:36,  2.34s/it][A[A

  6%|▌         | 4/70 [00:09<02:33,  2.33s/it][A[A

  7%|▋         | 5/70 [00:11<02:31,  2.33s/it][A[A

  9%|▊         | 6/70 [00:13<02:28,  2.32s/it][A[A

 10%|█         | 7/70 [00:16<02:26,  2.33s/it][A[A

 11%|█▏        | 8/70 [00:18<02:26,  2.36s/it][A[A

 13%|█▎        | 9/70 [00:21<02:24,  2.37s/it][A[A

 14%|█▍        | 10/70 [00:23<02:23,  2.39s/it][A[A

 16%|█▌        | 11/70 [00:25<02:21,  2.40s/it][A[A

Batch loss: 1.4299136400222778




 17%|█▋        | 12/70 [00:28<02:19,  2.40s/it][A[A

 19%|█▊        | 13/70 [00:30<02:16,  2.39s/it][A[A

 20%|██        | 14/70 [00:33<02:13,  2.39s/it][A[A

 21%|██▏       | 15/70 [00:35<02:10,  2.38s/it][A[A

 23%|██▎       | 16/70 [00:37<02:09,  2.39s/it][A[A

 24%|██▍       | 17/70 [00:40<02:06,  2.39s/it][A[A

 26%|██▌       | 18/70 [00:42<02:02,  2.36s/it][A[A

 27%|██▋       | 19/70 [00:44<01:59,  2.34s/it][A[A

 29%|██▊       | 20/70 [00:47<01:56,  2.33s/it][A[A

 30%|███       | 21/70 [00:49<01:55,  2.35s/it][A[A

Batch loss: 1.4171862602233887




 31%|███▏      | 22/70 [00:51<01:52,  2.34s/it][A[A

 33%|███▎      | 23/70 [00:54<01:48,  2.32s/it][A[A

 34%|███▍      | 24/70 [00:56<01:46,  2.31s/it][A[A

 36%|███▌      | 25/70 [00:58<01:43,  2.29s/it][A[A

 37%|███▋      | 26/70 [01:01<01:40,  2.29s/it][A[A

 39%|███▊      | 27/70 [01:03<01:38,  2.30s/it][A[A

 40%|████      | 28/70 [01:05<01:36,  2.29s/it][A[A

 41%|████▏     | 29/70 [01:07<01:33,  2.28s/it][A[A

 43%|████▎     | 30/70 [01:10<01:31,  2.29s/it][A[A

 44%|████▍     | 31/70 [01:12<01:29,  2.28s/it][A[A

Batch loss: 1.3819795846939087




 46%|████▌     | 32/70 [01:14<01:26,  2.29s/it][A[A

 47%|████▋     | 33/70 [01:17<01:24,  2.29s/it][A[A

 49%|████▊     | 34/70 [01:19<01:22,  2.30s/it][A[A

 50%|█████     | 35/70 [01:21<01:22,  2.34s/it][A[A

 51%|█████▏    | 36/70 [01:24<01:20,  2.36s/it][A[A

 53%|█████▎    | 37/70 [01:26<01:18,  2.37s/it][A[A

 54%|█████▍    | 38/70 [01:28<01:16,  2.38s/it][A[A

 56%|█████▌    | 39/70 [01:31<01:14,  2.39s/it][A[A

 57%|█████▋    | 40/70 [01:33<01:11,  2.39s/it][A[A

 59%|█████▊    | 41/70 [01:36<01:09,  2.40s/it][A[A

Batch loss: 1.3101242780685425




 60%|██████    | 42/70 [01:38<01:07,  2.42s/it][A[A

 61%|██████▏   | 43/70 [01:41<01:04,  2.40s/it][A[A

 63%|██████▎   | 44/70 [01:43<01:03,  2.43s/it][A[A

 64%|██████▍   | 45/70 [01:45<01:00,  2.44s/it][A[A

 66%|██████▌   | 46/70 [01:48<00:57,  2.41s/it][A[A

 67%|██████▋   | 47/70 [01:50<00:54,  2.38s/it][A[A

 69%|██████▊   | 48/70 [01:52<00:52,  2.37s/it][A[A

 70%|███████   | 49/70 [01:55<00:49,  2.36s/it][A[A

 71%|███████▏  | 50/70 [01:57<00:46,  2.34s/it][A[A

 73%|███████▎  | 51/70 [01:59<00:44,  2.33s/it][A[A

Batch loss: 1.3159822225570679




 74%|███████▍  | 52/70 [02:02<00:41,  2.33s/it][A[A

 76%|███████▌  | 53/70 [02:04<00:39,  2.32s/it][A[A

 77%|███████▋  | 54/70 [02:06<00:37,  2.32s/it][A[A

 79%|███████▊  | 55/70 [02:09<00:34,  2.31s/it][A[A

 80%|████████  | 56/70 [02:11<00:32,  2.30s/it][A[A

 81%|████████▏ | 57/70 [02:13<00:29,  2.29s/it][A[A

 83%|████████▎ | 58/70 [02:16<00:27,  2.29s/it][A[A

 84%|████████▍ | 59/70 [02:18<00:25,  2.30s/it][A[A

 86%|████████▌ | 60/70 [02:20<00:23,  2.30s/it][A[A

 87%|████████▋ | 61/70 [02:22<00:20,  2.30s/it][A[A

Batch loss: 1.3277900218963623




 89%|████████▊ | 62/70 [02:25<00:18,  2.30s/it][A[A

 90%|█████████ | 63/70 [02:27<00:16,  2.31s/it][A[A

 91%|█████████▏| 64/70 [02:29<00:13,  2.33s/it][A[A

 93%|█████████▎| 65/70 [02:32<00:11,  2.34s/it][A[A

 94%|█████████▍| 66/70 [02:34<00:09,  2.35s/it][A[A

 96%|█████████▌| 67/70 [02:37<00:07,  2.35s/it][A[A

 97%|█████████▋| 68/70 [02:39<00:04,  2.37s/it][A[A

 99%|█████████▊| 69/70 [02:41<00:02,  2.40s/it][A[A

100%|██████████| 70/70 [02:42<00:00,  2.32s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4275836365563528




 12%|█▎        | 1/8 [00:02<00:16,  2.40s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.36s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.33s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.31s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.26s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.23s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.20s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.13s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.327351450920105
Epoch:  98




  1%|▏         | 1/70 [00:02<03:01,  2.64s/it][A[A

Batch loss: 1.5388402938842773




  3%|▎         | 2/70 [00:05<02:54,  2.57s/it][A[A

  4%|▍         | 3/70 [00:07<02:48,  2.51s/it][A[A

  6%|▌         | 4/70 [00:09<02:42,  2.47s/it][A[A

  7%|▋         | 5/70 [00:12<02:38,  2.43s/it][A[A

  9%|▊         | 6/70 [00:14<02:33,  2.40s/it][A[A

 10%|█         | 7/70 [00:16<02:30,  2.40s/it][A[A

 11%|█▏        | 8/70 [00:19<02:27,  2.39s/it][A[A

 13%|█▎        | 9/70 [00:21<02:26,  2.39s/it][A[A

 14%|█▍        | 10/70 [00:23<02:22,  2.38s/it][A[A

 16%|█▌        | 11/70 [00:26<02:19,  2.37s/it][A[A

Batch loss: 1.4481956958770752




 17%|█▋        | 12/70 [00:28<02:17,  2.36s/it][A[A

 19%|█▊        | 13/70 [00:31<02:14,  2.36s/it][A[A

 20%|██        | 14/70 [00:33<02:11,  2.36s/it][A[A

 21%|██▏       | 15/70 [00:35<02:11,  2.39s/it][A[A

 23%|██▎       | 16/70 [00:38<02:09,  2.41s/it][A[A

 24%|██▍       | 17/70 [00:40<02:08,  2.43s/it][A[A

 26%|██▌       | 18/70 [00:43<02:06,  2.44s/it][A[A

 27%|██▋       | 19/70 [00:45<02:05,  2.46s/it][A[A

 29%|██▊       | 20/70 [00:48<02:03,  2.47s/it][A[A

 30%|███       | 21/70 [00:50<02:01,  2.48s/it][A[A

Batch loss: 1.3386517763137817




 31%|███▏      | 22/70 [00:53<01:58,  2.47s/it][A[A

 33%|███▎      | 23/70 [00:55<01:55,  2.47s/it][A[A

 34%|███▍      | 24/70 [00:58<01:53,  2.47s/it][A[A

 36%|███▌      | 25/70 [01:00<01:50,  2.46s/it][A[A

 37%|███▋      | 26/70 [01:02<01:46,  2.43s/it][A[A

 39%|███▊      | 27/70 [01:05<01:43,  2.41s/it][A[A

 40%|████      | 28/70 [01:07<01:41,  2.41s/it][A[A

 41%|████▏     | 29/70 [01:10<01:39,  2.43s/it][A[A

 43%|████▎     | 30/70 [01:12<01:37,  2.44s/it][A[A

 44%|████▍     | 31/70 [01:15<01:35,  2.45s/it][A[A

Batch loss: 1.3884246349334717




 46%|████▌     | 32/70 [01:17<01:33,  2.46s/it][A[A

 47%|████▋     | 33/70 [01:20<01:31,  2.48s/it][A[A

 49%|████▊     | 34/70 [01:22<01:31,  2.54s/it][A[A

 50%|█████     | 35/70 [01:25<01:29,  2.55s/it][A[A

 51%|█████▏    | 36/70 [01:27<01:25,  2.52s/it][A[A

 53%|█████▎    | 37/70 [01:30<01:22,  2.49s/it][A[A

 54%|█████▍    | 38/70 [01:32<01:20,  2.50s/it][A[A

 56%|█████▌    | 39/70 [01:35<01:15,  2.45s/it][A[A

 57%|█████▋    | 40/70 [01:37<01:13,  2.44s/it][A[A

 59%|█████▊    | 41/70 [01:39<01:09,  2.41s/it][A[A

Batch loss: 1.5598474740982056




 60%|██████    | 42/70 [01:42<01:07,  2.40s/it][A[A

 61%|██████▏   | 43/70 [01:44<01:04,  2.38s/it][A[A

 63%|██████▎   | 44/70 [01:46<01:01,  2.37s/it][A[A

 64%|██████▍   | 45/70 [01:49<00:59,  2.36s/it][A[A

 66%|██████▌   | 46/70 [01:51<00:56,  2.36s/it][A[A

 67%|██████▋   | 47/70 [01:53<00:54,  2.36s/it][A[A

 69%|██████▊   | 48/70 [01:56<00:51,  2.35s/it][A[A

 70%|███████   | 49/70 [01:58<00:49,  2.34s/it][A[A

 71%|███████▏  | 50/70 [02:00<00:47,  2.35s/it][A[A

 73%|███████▎  | 51/70 [02:03<00:44,  2.35s/it][A[A

Batch loss: 1.3453757762908936




 74%|███████▍  | 52/70 [02:05<00:42,  2.36s/it][A[A

 76%|███████▌  | 53/70 [02:08<00:40,  2.35s/it][A[A

 77%|███████▋  | 54/70 [02:10<00:37,  2.35s/it][A[A

 79%|███████▊  | 55/70 [02:12<00:35,  2.36s/it][A[A

 80%|████████  | 56/70 [02:15<00:33,  2.40s/it][A[A

 81%|████████▏ | 57/70 [02:17<00:31,  2.42s/it][A[A

 83%|████████▎ | 58/70 [02:20<00:29,  2.45s/it][A[A

 84%|████████▍ | 59/70 [02:22<00:26,  2.45s/it][A[A

 86%|████████▌ | 60/70 [02:25<00:24,  2.46s/it][A[A

 87%|████████▋ | 61/70 [02:27<00:22,  2.45s/it][A[A

Batch loss: 1.3829606771469116




 89%|████████▊ | 62/70 [02:30<00:19,  2.45s/it][A[A

 90%|█████████ | 63/70 [02:32<00:17,  2.46s/it][A[A

 91%|█████████▏| 64/70 [02:35<00:14,  2.46s/it][A[A

 93%|█████████▎| 65/70 [02:37<00:12,  2.45s/it][A[A

 94%|█████████▍| 66/70 [02:39<00:09,  2.42s/it][A[A

 96%|█████████▌| 67/70 [02:42<00:07,  2.39s/it][A[A

 97%|█████████▋| 68/70 [02:44<00:04,  2.38s/it][A[A

 99%|█████████▊| 69/70 [02:46<00:02,  2.38s/it][A[A

100%|██████████| 70/70 [02:47<00:00,  2.39s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4546370148658752




 12%|█▎        | 1/8 [00:02<00:16,  2.29s/it][A[A

 25%|██▌       | 2/8 [00:04<00:13,  2.29s/it][A[A

 38%|███▊      | 3/8 [00:06<00:11,  2.28s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.27s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.28s/it][A[A

 75%|███████▌  | 6/8 [00:13<00:04,  2.28s/it][A[A

 88%|████████▊ | 7/8 [00:15<00:02,  2.27s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.17s/it]


  0%|          | 0/70 [00:00<?, ?it/s][A[A

Valid Loss:  2.3229551017284393
Epoch:  99




  1%|▏         | 1/70 [00:02<02:46,  2.42s/it][A[A

Batch loss: 1.560524582862854




  3%|▎         | 2/70 [00:04<02:43,  2.41s/it][A[A

  4%|▍         | 3/70 [00:07<02:39,  2.38s/it][A[A

  6%|▌         | 4/70 [00:09<02:36,  2.38s/it][A[A

  7%|▋         | 5/70 [00:11<02:33,  2.36s/it][A[A

  9%|▊         | 6/70 [00:14<02:32,  2.39s/it][A[A

 10%|█         | 7/70 [00:16<02:32,  2.42s/it][A[A

 11%|█▏        | 8/70 [00:19<02:31,  2.45s/it][A[A

 13%|█▎        | 9/70 [00:21<02:29,  2.45s/it][A[A

 14%|█▍        | 10/70 [00:24<02:27,  2.46s/it][A[A

 16%|█▌        | 11/70 [00:26<02:25,  2.46s/it][A[A

Batch loss: 1.3627629280090332




 17%|█▋        | 12/70 [00:29<02:23,  2.47s/it][A[A

 19%|█▊        | 13/70 [00:31<02:21,  2.48s/it][A[A

 20%|██        | 14/70 [00:34<02:18,  2.48s/it][A[A

 21%|██▏       | 15/70 [00:36<02:16,  2.47s/it][A[A

 23%|██▎       | 16/70 [00:39<02:13,  2.47s/it][A[A

 24%|██▍       | 17/70 [00:41<02:08,  2.42s/it][A[A

 26%|██▌       | 18/70 [00:43<02:05,  2.40s/it][A[A

 27%|██▋       | 19/70 [00:46<02:01,  2.38s/it][A[A

 29%|██▊       | 20/70 [00:48<01:59,  2.39s/it][A[A

 30%|███       | 21/70 [00:50<01:56,  2.37s/it][A[A

Batch loss: 1.6060140132904053




 31%|███▏      | 22/70 [00:53<01:54,  2.38s/it][A[A

 33%|███▎      | 23/70 [00:55<01:51,  2.37s/it][A[A

 34%|███▍      | 24/70 [00:57<01:48,  2.37s/it][A[A

 36%|███▌      | 25/70 [01:00<01:46,  2.36s/it][A[A

 37%|███▋      | 26/70 [01:02<01:43,  2.36s/it][A[A

 39%|███▊      | 27/70 [01:05<01:41,  2.37s/it][A[A

 40%|████      | 28/70 [01:07<01:39,  2.37s/it][A[A

 41%|████▏     | 29/70 [01:09<01:37,  2.37s/it][A[A

 43%|████▎     | 30/70 [01:12<01:34,  2.37s/it][A[A

 44%|████▍     | 31/70 [01:14<01:32,  2.37s/it][A[A

Batch loss: 1.4141371250152588




 46%|████▌     | 32/70 [01:16<01:31,  2.40s/it][A[A

 47%|████▋     | 33/70 [01:19<01:28,  2.38s/it][A[A

 49%|████▊     | 34/70 [01:21<01:27,  2.42s/it][A[A

 50%|█████     | 35/70 [01:24<01:25,  2.43s/it][A[A

 51%|█████▏    | 36/70 [01:26<01:23,  2.45s/it][A[A

 53%|█████▎    | 37/70 [01:29<01:20,  2.45s/it][A[A

 54%|█████▍    | 38/70 [01:31<01:18,  2.46s/it][A[A

 56%|█████▌    | 39/70 [01:34<01:16,  2.47s/it][A[A

 57%|█████▋    | 40/70 [01:36<01:14,  2.48s/it][A[A

 59%|█████▊    | 41/70 [01:39<01:11,  2.47s/it][A[A

Batch loss: 1.4643256664276123




 60%|██████    | 42/70 [01:41<01:09,  2.48s/it][A[A

 61%|██████▏   | 43/70 [01:44<01:06,  2.48s/it][A[A

 63%|██████▎   | 44/70 [01:46<01:04,  2.49s/it][A[A

 64%|██████▍   | 45/70 [01:48<01:01,  2.45s/it][A[A

 66%|██████▌   | 46/70 [01:51<00:59,  2.46s/it][A[A

 67%|██████▋   | 47/70 [01:53<00:56,  2.45s/it][A[A

 69%|██████▊   | 48/70 [01:56<00:53,  2.43s/it][A[A

 70%|███████   | 49/70 [01:58<00:50,  2.41s/it][A[A

 71%|███████▏  | 50/70 [02:01<00:47,  2.40s/it][A[A

 73%|███████▎  | 51/70 [02:03<00:45,  2.38s/it][A[A

Batch loss: 1.3314951658248901




 74%|███████▍  | 52/70 [02:05<00:42,  2.38s/it][A[A

 76%|███████▌  | 53/70 [02:08<00:40,  2.36s/it][A[A

 77%|███████▋  | 54/70 [02:10<00:37,  2.35s/it][A[A

 79%|███████▊  | 55/70 [02:12<00:35,  2.35s/it][A[A

 80%|████████  | 56/70 [02:15<00:33,  2.36s/it][A[A

 81%|████████▏ | 57/70 [02:17<00:31,  2.40s/it][A[A

 83%|████████▎ | 58/70 [02:19<00:28,  2.39s/it][A[A

 84%|████████▍ | 59/70 [02:22<00:26,  2.38s/it][A[A

 86%|████████▌ | 60/70 [02:24<00:23,  2.36s/it][A[A

 87%|████████▋ | 61/70 [02:27<00:21,  2.39s/it][A[A

Batch loss: 1.3293395042419434




 89%|████████▊ | 62/70 [02:29<00:19,  2.42s/it][A[A

 90%|█████████ | 63/70 [02:32<00:17,  2.43s/it][A[A

 91%|█████████▏| 64/70 [02:34<00:14,  2.44s/it][A[A

 93%|█████████▎| 65/70 [02:36<00:12,  2.44s/it][A[A

 94%|█████████▍| 66/70 [02:39<00:09,  2.46s/it][A[A

 96%|█████████▌| 67/70 [02:41<00:07,  2.44s/it][A[A

 97%|█████████▋| 68/70 [02:44<00:04,  2.45s/it][A[A

 99%|█████████▊| 69/70 [02:46<00:02,  2.45s/it][A[A

100%|██████████| 70/70 [02:47<00:00,  2.39s/it]


  0%|          | 0/8 [00:00<?, ?it/s][A[A

Train Loss:  1.4576427068029132




 12%|█▎        | 1/8 [00:02<00:17,  2.46s/it][A[A

 25%|██▌       | 2/8 [00:04<00:14,  2.44s/it][A[A

 38%|███▊      | 3/8 [00:07<00:11,  2.38s/it][A[A

 50%|█████     | 4/8 [00:09<00:09,  2.35s/it][A[A

 62%|██████▎   | 5/8 [00:11<00:06,  2.33s/it][A[A

 75%|███████▌  | 6/8 [00:14<00:04,  2.35s/it][A[A

 88%|████████▊ | 7/8 [00:16<00:02,  2.35s/it][A[A

100%|██████████| 8/8 [00:17<00:00,  2.24s/it]

Valid Loss:  2.3621239066123962



